Optimize LSP snippet parsing

2024-11-22 01:16:18 +04:00 · 2023-02-07 20:15:39 +01:00 · 2023-02-07 20:15:39 +01:00 · e973b71c83
commit e973b71c83
parent 9c12e0fb76
4 changed files with 57 additions and 19 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1141,7 +1141,6 @@ dependencies = [
 "helix-loader",
 "log",
 "lsp-types",
- "once_cell",
 "serde",
 "serde_json",
 "thiserror",
--- a/helix-lsp/Cargo.toml
+++ b/helix-lsp/Cargo.toml
@ -27,4 +27,3 @@ thiserror = "1.0"
 tokio = { version = "1.26", features = ["rt", "rt-multi-thread", "io-util", "io-std", "time", "process", "macros", "fs", "parking_lot", "sync"] }
 tokio-stream = "0.1.12"
 which = "4.4"
-once_cell = "1.15"
--- a/helix-lsp/src/snippet.rs
+++ b/helix-lsp/src/snippet.rs
@ -50,14 +50,11 @@ pub struct Snippet<'a> {
    elements: Vec<SnippetElement<'a>>,
 }

-pub fn parse<'a>(s: &'a str) -> Result<Snippet<'a>> {
+pub fn parse(s: &str) -> Result<Snippet<'_>> {
    parser::parse(s).map_err(|rest| anyhow!("Failed to parse snippet. Remaining input: {}", rest))
 }

 mod parser {
-    use helix_core::regex;
-    use once_cell::sync::Lazy;
-
    use helix_parsec::*;

    use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@ -86,17 +83,34 @@ mod parser {
        else        ::= text
    */

-    static DIGIT: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r"^[0-9]+").unwrap());
-    static VARIABLE: Lazy<regex::Regex> =
-        Lazy::new(|| regex::Regex::new(r"^[_a-zA-Z][_a-zA-Z0-9]*").unwrap());
-    static TEXT: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r"^[^\$]+").unwrap());
-
    fn var<'a>() -> impl Parser<'a, Output = &'a str> {
-        pattern(&VARIABLE)
+        // var = [_a-zA-Z][_a-zA-Z0-9]*
+        move |input: &'a str| match input
+            .char_indices()
+            .take_while(|(p, c)| {
+                *c == '_'
+                    || if *p == 0 {
+                        c.is_ascii_alphabetic()
+                    } else {
+                        c.is_ascii_alphanumeric()
+                    }
+            })
+            .last()
+        {
+            Some((index, c)) if index >= 1 => {
+                let index = index + c.len_utf8();
+                Ok((&input[index..], &input[0..index]))
+            }
+            _ => Err(input),
+        }
+    }
+
+    fn text<'a>() -> impl Parser<'a, Output = &'a str> {
+        take_while(|c| c != '$')
    }

    fn digit<'a>() -> impl Parser<'a, Output = usize> {
-        filter_map(pattern(&DIGIT), |s| s.parse().ok())
+        filter_map(take_while(|c| c.is_ascii_digit()), |s| s.parse().ok())
    }

    fn case_change<'a>() -> impl Parser<'a, Output = CaseChange> {
@ -152,7 +166,7 @@ fn format<'a>() -> impl Parser<'a, Output = FormatItem<'a>> {
                |seq| { Conditional(seq.1, None, Some(seq.4)) }
            ),
            // Any text
-            map(pattern(&TEXT), Text),
+            map(text(), Text),
        )
    }

@ -245,12 +259,9 @@ fn variable<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
        )
    }

-    fn text<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
-        map(pattern(&TEXT), SnippetElement::Text)
-    }
-
    fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
-        choice!(tabstop(), placeholder(), choice(), variable(), text())
+        let text = map(text(), SnippetElement::Text);
+        choice!(tabstop(), placeholder(), choice(), variable(), text)
    }

    fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
--- a/helix-parsec/src/lib.rs
+++ b/helix-parsec/src/lib.rs
@ -157,6 +157,35 @@ pub fn take_until<'a, F>(pattern: F) -> impl Parser<'a, Output = &'a str>
    }
 }

+/// A parser which matches all values until the specified pattern no longer match.
+///
+/// This parser only ever fails if the input has a length of zero.
+///
+/// # Examples
+///
+/// ```
+/// use helix_parsec::{take_while, Parser};
+/// let parser = take_while(|c| c == '1');
+/// assert_eq!(Ok(("2", "11")), parser.parse("112"));
+/// assert_eq!(Err("22"), parser.parse("22"));
+/// ```
+pub fn take_while<'a, F>(pattern: F) -> impl Parser<'a, Output = &'a str>
+where
+    F: Fn(char) -> bool,
+{
+    move |input: &'a str| match input
+        .char_indices()
+        .take_while(|(_p, c)| pattern(*c))
+        .last()
+    {
+        Some((index, c)) => {
+            let index = index + c.len_utf8();
+            Ok((&input[index..], &input[0..index]))
+        }
+        _ => Err(input),
+    }
+}
+
 // Variadic parser combinators

 /// A parser combinator which matches a sequence of parsers in an all-or-nothing fashion.