mirror of
https://github.com/helix-editor/helix.git
synced 2024-11-25 02:46:17 +04:00
Rebuild search by using regex-automata
This commit is contained in:
parent
557c63033c
commit
8ac2d50fec
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -316,6 +316,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"quickcheck",
|
||||
"regex",
|
||||
"regex-automata",
|
||||
"ropey",
|
||||
"serde",
|
||||
"similar",
|
||||
@ -757,6 +758,15 @@ dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
|
@ -26,6 +26,7 @@ tree-sitter = "0.19"
|
||||
once_cell = "1.8"
|
||||
arc-swap = "1"
|
||||
regex = "1"
|
||||
regex-automata = "0.1"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
toml = "0.5"
|
||||
|
@ -43,3 +43,180 @@ pub fn find_nth_prev(text: RopeSlice, ch: char, mut pos: usize, n: usize) -> Opt
|
||||
|
||||
Some(pos)
|
||||
}
|
||||
|
||||
use crate::movement::Direction;
|
||||
use regex_automata::{dense, DenseDFA, Error as RegexError, DFA};
|
||||
use std::ops::Range;
|
||||
|
||||
pub struct Searcher {
|
||||
/// Locate end of match searching right.
|
||||
right_fdfa: DenseDFA<Vec<usize>, usize>,
|
||||
/// Locate start of match searching right.
|
||||
right_rdfa: DenseDFA<Vec<usize>, usize>,
|
||||
|
||||
/// Locate start of match searching left.
|
||||
left_fdfa: DenseDFA<Vec<usize>, usize>,
|
||||
/// Locate end of match searching left.
|
||||
left_rdfa: DenseDFA<Vec<usize>, usize>,
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
pub fn new(pattern: &str) -> Result<Searcher, RegexError> {
|
||||
// Check case info for smart case
|
||||
let has_uppercase = pattern.chars().any(|c| c.is_uppercase());
|
||||
|
||||
// Create Regex DFAs for all search directions.
|
||||
let mut builder = dense::Builder::new();
|
||||
let builder = builder.case_insensitive(!has_uppercase);
|
||||
|
||||
let left_fdfa = builder.clone().reverse(true).build(pattern)?;
|
||||
let left_rdfa = builder
|
||||
.clone()
|
||||
.anchored(true)
|
||||
.longest_match(true)
|
||||
.build(pattern)?;
|
||||
|
||||
let right_fdfa = builder.clone().build(pattern)?;
|
||||
let right_rdfa = builder
|
||||
.anchored(true)
|
||||
.longest_match(true)
|
||||
.reverse(true)
|
||||
.build(pattern)?;
|
||||
|
||||
Ok(Searcher {
|
||||
right_fdfa,
|
||||
right_rdfa,
|
||||
left_fdfa,
|
||||
left_rdfa,
|
||||
})
|
||||
}
|
||||
pub fn search_prev(&self, text: RopeSlice, offset: usize) -> Option<Range<usize>> {
|
||||
let text = text.slice(..offset);
|
||||
let start = self.rfind(text, &self.left_fdfa)?;
|
||||
let end = self.find(text.slice(start..), &self.left_rdfa)?;
|
||||
|
||||
Some(start..start + end)
|
||||
}
|
||||
|
||||
pub fn search_next(&self, text: RopeSlice, offset: usize) -> Option<Range<usize>> {
|
||||
let text = text.slice(offset..);
|
||||
let end = self.find(text, &self.right_fdfa)?;
|
||||
let start = self.rfind(text.slice(..end), &self.right_rdfa)?;
|
||||
|
||||
Some(offset + start..offset + end)
|
||||
}
|
||||
|
||||
/// Returns the end offset of the longest match. If no match exists, then None is returned.
|
||||
/// NOTE: based on DFA::find_at
|
||||
fn find(&self, text: RopeSlice, dfa: &impl DFA) -> Option<usize> {
|
||||
// TOOD: needs to change to rfind condition if searching reverse
|
||||
// TODO: check this inside main search
|
||||
// if dfa.is_anchored() && start > 0 {
|
||||
// return None;
|
||||
// }
|
||||
|
||||
let mut state = dfa.start_state();
|
||||
let mut last_match = if dfa.is_dead_state(state) {
|
||||
return None;
|
||||
} else if dfa.is_match_state(state) {
|
||||
Some(0)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
for chunk in text.chunks() {
|
||||
for (i, &b) in chunk.as_bytes().iter().enumerate() {
|
||||
state = unsafe { dfa.next_state_unchecked(state, b) };
|
||||
if dfa.is_match_or_dead_state(state) {
|
||||
if dfa.is_dead_state(state) {
|
||||
return last_match;
|
||||
}
|
||||
last_match = Some(i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_match
|
||||
}
|
||||
|
||||
/// Returns the start offset of the longest match in reverse, by searching from the end of the
|
||||
/// input towards the start of the input. If no match exists, then None is returned. In other
|
||||
/// words, this has the same match semantics as find, but in reverse.
|
||||
///
|
||||
/// NOTE: based on DFA::rfind_at
|
||||
fn rfind(&self, text: RopeSlice, dfa: &impl DFA) -> Option<usize> {
|
||||
// if dfa.is_anchored() && start < bytes.len() {
|
||||
// return None;
|
||||
// }
|
||||
|
||||
let mut state = dfa.start_state();
|
||||
let mut last_match = if dfa.is_dead_state(state) {
|
||||
return None;
|
||||
} else if dfa.is_match_state(state) {
|
||||
Some(text.len_bytes())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// This is basically chunks().rev()
|
||||
let (mut chunks, _, _, _) = text.chunks_at_byte(text.len_bytes());
|
||||
|
||||
while let Some(chunk) = chunks.prev() {
|
||||
for (i, &b) in chunk.as_bytes().iter().enumerate().rev() {
|
||||
state = unsafe { dfa.next_state_unchecked(state, b) };
|
||||
if dfa.is_match_or_dead_state(state) {
|
||||
if dfa.is_dead_state(state) {
|
||||
return last_match;
|
||||
}
|
||||
last_match = Some(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
last_match
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_search_next() {
|
||||
use crate::Rope;
|
||||
let text = Rope::from("hello world!");
|
||||
|
||||
let searcher = Searcher::new(r"\w+").unwrap();
|
||||
|
||||
let result = searcher.search_next(text.slice(..), 0).unwrap();
|
||||
let fragment = text.slice(result.start..result.end);
|
||||
assert_eq!("hello", fragment);
|
||||
|
||||
let result = searcher.search_next(text.slice(..), result.end).unwrap();
|
||||
let fragment = text.slice(result.start..result.end);
|
||||
assert_eq!("world", fragment);
|
||||
|
||||
let result = searcher.search_next(text.slice(..), result.end);
|
||||
assert!(result.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_prev() {
|
||||
use crate::Rope;
|
||||
let text = Rope::from("hello world!");
|
||||
|
||||
let searcher = Searcher::new(r"\w+").unwrap();
|
||||
|
||||
let result = searcher
|
||||
.search_prev(text.slice(..), text.len_bytes())
|
||||
.unwrap();
|
||||
let fragment = text.slice(result.start..result.end);
|
||||
assert_eq!("world", fragment);
|
||||
|
||||
let result = searcher.search_prev(text.slice(..), result.start).unwrap();
|
||||
let fragment = text.slice(result.start..result.end);
|
||||
assert_eq!("hello", fragment);
|
||||
|
||||
let result = searcher.search_prev(text.slice(..), result.start);
|
||||
assert!(result.is_none());
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user