improve match bracket matching (#10613)

This commit is contained in:
Pascal Kuthe 2024-05-07 16:26:04 +02:00 committed by GitHub
parent 0432d9cf14
commit 5b8b2f4b9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 185 additions and 60 deletions

View File

@ -75,60 +75,64 @@ fn find_pair(
) -> Option<usize> { ) -> Option<usize> {
let pos = doc.char_to_byte(pos_); let pos = doc.char_to_byte(pos_);
let root = syntax.tree_for_byte_range(pos, pos + 1).root_node(); let root = syntax.tree_for_byte_range(pos, pos).root_node();
let mut node = root.descendant_for_byte_range(pos, pos + 1)?; let mut node = root.descendant_for_byte_range(pos, pos)?;
loop { loop {
if node.is_named() { if node.is_named() && node.child_count() >= 2 {
let (start_byte, end_byte) = surrounding_bytes(doc, &node)?; let open = node.child(0).unwrap();
let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte)); let close = node.child(node.child_count() - 1).unwrap();
if is_valid_pair_on_pos(doc, start_char, end_char) { if let (Some((start_pos, open)), Some((end_pos, close))) =
if end_byte == pos { (as_char(doc, &open), as_char(doc, &close))
return Some(start_char); {
if PAIRS.contains(&(open, close)) {
if start_pos == pos_ {
return Some(start_pos);
} }
// We return the end char if the cursor is either on the start char // We return the end char if the cursor is either on the start char
// or at some arbitrary position between start and end char. // or at some arbitrary position between start and end char.
if traverse_parents || start_byte == pos { if traverse_parents || end_pos == pos_ {
return Some(end_char); return Some(end_pos);
}
} }
} }
} }
// this node itselt wasn't a pair but maybe its siblings are // this node itselt wasn't a pair but maybe its siblings are
// check if we are *on* the pair (special cased so we don't look if let Some((start_char, end_char)) = as_close_pair(doc, &node) {
// at the current node twice and to jump to the start on that case) if let Some(pair_start) =
if let Some(open) = as_close_pair(doc, &node) { find_pair_end(doc, node.prev_sibling(), start_char, end_char, Backward)
if let Some(pair_start) = find_pair_end(doc, node.prev_sibling(), open, Backward) { {
return Some(pair_start); return Some(pair_start);
} }
} }
if let Some((start_char, end_char)) = as_open_pair(doc, &node) {
if !traverse_parents { if let Some(pair_end) =
// check if we are *on* the opening pair (special cased here as find_pair_end(doc, node.next_sibling(), start_char, end_char, Forward)
// an opptimization since we only care about bracket on the cursor {
// here)
if let Some(close) = as_open_pair(doc, &node) {
if let Some(pair_end) = find_pair_end(doc, node.next_sibling(), close, Forward) {
return Some(pair_end); return Some(pair_end);
} }
} }
if node.is_named() {
break;
}
}
for close in if traverse_parents {
for sibling in
iter::successors(node.next_sibling(), |node| node.next_sibling()).take(MATCH_LIMIT) iter::successors(node.next_sibling(), |node| node.next_sibling()).take(MATCH_LIMIT)
{ {
let Some(open) = as_close_pair(doc, &close) else { let Some((start_char, end_char)) = as_close_pair(doc, &sibling) else {
continue; continue;
}; };
if find_pair_end(doc, Some(node), open, Backward).is_some() { if find_pair_end(doc, sibling.prev_sibling(), start_char, end_char, Backward)
return doc.try_byte_to_char(close.start_byte()).ok(); .is_some()
{
return doc.try_byte_to_char(sibling.start_byte()).ok();
} }
} }
} else if node.is_named() {
break;
}
let Some(parent) = node.parent() else { let Some(parent) = node.parent() else {
break; break;
}; };
@ -241,29 +245,13 @@ pub fn is_valid_pair(ch: char) -> bool {
PAIRS.iter().any(|(l, r)| *l == ch || *r == ch) PAIRS.iter().any(|(l, r)| *l == ch || *r == ch)
} }
fn is_valid_pair_on_pos(doc: RopeSlice, start_char: usize, end_char: usize) -> bool {
PAIRS.contains(&(doc.char(start_char), doc.char(end_char)))
}
fn surrounding_bytes(doc: RopeSlice, node: &Node) -> Option<(usize, usize)> {
let len = doc.len_bytes();
let start_byte = node.start_byte();
let end_byte = node.end_byte().saturating_sub(1);
if start_byte >= len || end_byte >= len {
return None;
}
Some((start_byte, end_byte))
}
/// Tests if this node is a pair close char and returns the expected open char /// Tests if this node is a pair close char and returns the expected open char
fn as_close_pair(doc: RopeSlice, node: &Node) -> Option<char> { /// and close char contained in this node
fn as_close_pair(doc: RopeSlice, node: &Node) -> Option<(char, char)> {
let close = as_char(doc, node)?.1; let close = as_char(doc, node)?.1;
PAIRS PAIRS
.iter() .iter()
.find_map(|&(open, close_)| (close_ == close).then_some(open)) .find_map(|&(open, close_)| (close_ == close).then_some((close, open)))
} }
/// Checks if `node` or its siblings (at most MATCH_LIMIT nodes) is the specified closing char /// Checks if `node` or its siblings (at most MATCH_LIMIT nodes) is the specified closing char
@ -274,6 +262,7 @@ fn as_close_pair(doc: RopeSlice, node: &Node) -> Option<char> {
fn find_pair_end( fn find_pair_end(
doc: RopeSlice, doc: RopeSlice,
node: Option<Node>, node: Option<Node>,
start_char: char,
end_char: char, end_char: char,
direction: Direction, direction: Direction,
) -> Option<usize> { ) -> Option<usize> {
@ -281,20 +270,30 @@ fn find_pair_end(
Forward => Node::next_sibling, Forward => Node::next_sibling,
Backward => Node::prev_sibling, Backward => Node::prev_sibling,
}; };
let mut depth = 0;
iter::successors(node, advance) iter::successors(node, advance)
.take(MATCH_LIMIT) .take(MATCH_LIMIT)
.find_map(|node| { .find_map(|node| {
let (pos, c) = as_char(doc, &node)?; let (pos, c) = as_char(doc, &node)?;
(end_char == c).then_some(pos) if c == end_char {
if depth == 0 {
return Some(pos);
}
depth -= 1;
} else if c == start_char {
depth += 1;
}
None
}) })
} }
/// Tests if this node is a pair close char and returns the expected open char /// Tests if this node is a pair open char and returns the expected close char
fn as_open_pair(doc: RopeSlice, node: &Node) -> Option<char> { /// and open char contained in this node
fn as_open_pair(doc: RopeSlice, node: &Node) -> Option<(char, char)> {
let open = as_char(doc, node)?.1; let open = as_char(doc, node)?.1;
PAIRS PAIRS
.iter() .iter()
.find_map(|&(open_, close)| (open_ == open).then_some(close)) .find_map(|&(open_, close)| (open_ == open).then_some((open, close)))
} }
/// If node is a single char return it (and its char position) /// If node is a single char return it (and its char position)

View File

@ -806,3 +806,129 @@ fn ident(x: usize) -> usize { x }
Ok(()) Ok(())
} }
#[tokio::test(flavor = "multi_thread")]
async fn match_bracket() -> anyhow::Result<()> {
let rust_tests = vec![
// fwd
(
indoc! {r##"
fn foo(x: usize) -> usize { #[x|]# + 1 }
"##},
"mm",
indoc! {r##"
fn foo(x: usize) -> usize { x + 1 #[}|]#
"##},
),
// backward
(
indoc! {r##"
fn foo(x: usize) -> usize { #[x|]# + 1 }
"##},
"mmmm",
indoc! {r##"
fn foo(x: usize) -> usize #[{|]# x + 1 }
"##},
),
// avoid false positive inside string literal
(
indoc! {r##"
fn foo() -> &'static str { "(hello#[ |]#world)" }
"##},
"mm",
indoc! {r##"
fn foo() -> &'static str { "(hello world)#["|]# }
"##},
),
// make sure matching on quotes works
(
indoc! {r##"
fn foo() -> &'static str { "(hello#[ |]#world)" }
"##},
"mm",
indoc! {r##"
fn foo() -> &'static str { "(hello world)#["|]# }
"##},
),
// .. on both ends
(
indoc! {r##"
fn foo() -> &'static str { "(hello#[ |]#world)" }
"##},
"mmmm",
indoc! {r##"
fn foo() -> &'static str { #["|]#(hello world)" }
"##},
),
// match on siblings nodes
(
indoc! {r##"
fn foo(bar: Option<usize>) -> usize {
match bar {
Some(b#[a|]#r) => bar,
None => 42,
}
}
"##},
"mmmm",
indoc! {r##"
fn foo(bar: Option<usize>) -> usize {
match bar {
Some#[(|]#bar) => bar,
None => 42,
}
}
"##},
),
// gracefully handle multiple sibling brackets (usally for errors/incomplete syntax trees)
// in the past we selected the first > instead of the second > here
(
indoc! {r##"
fn foo() {
foo::<b#[a|]#r<>>
}
"##},
"mm",
indoc! {r##"
fn foo() {
foo::<bar<>#[>|]#
}
"##},
),
];
let python_tests = vec![
// python quotes have a slightly more complex syntax tree
// that triggerd a bug in an old implementation so we test
// them here
(
indoc! {r##"
foo_python = "mm does not#[ |]#work on this string"
"##},
"mm",
indoc! {r##"
foo_python = "mm does not work on this string#["|]#
"##},
),
(
indoc! {r##"
foo_python = "mm does not#[ |]#work on this string"
"##},
"mmmm",
indoc! {r##"
foo_python = #["|]#mm does not work on this string"
"##},
),
];
for test in rust_tests {
println!("{test:?}");
test_with_config(AppBuilder::new().with_file("foo.rs", None), test).await?;
}
for test in python_tests {
println!("{test:?}");
test_with_config(AppBuilder::new().with_file("foo.py", None), test).await?;
}
Ok(())
}