Merge 5691cb833e into f305c7299d

Add support for Amber-lang (#12021 )
Co-authored-by: Phoenix Himself <pkaras.it@gmail.com> Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-11-22 01:16:18 +04:00 · 2024-11-21 23:09:08 +03:00 · 2024-11-21 10:09:42 -06:00 · 2024-11-21 07:58:14 -06:00 · 2024-11-16 21:52:59 -08:00 · 2024-11-07 23:05:09 -08:00
9 changed files with 1052 additions and 700 deletions
--- a/book/src/generated/lang-support.md
+++ b/book/src/generated/lang-support.md
@ -3,6 +3,7 @@
 | ada | ✓ | ✓ |  | `ada_language_server` |
 | adl | ✓ | ✓ | ✓ |  |
 | agda | ✓ |  |  |  |
+| amber | ✓ |  |  |  |
 | astro | ✓ |  |  |  |
 | awk | ✓ | ✓ |  | `awk-language-server` |
 | bash | ✓ | ✓ | ✓ | `bash-language-server` |
--- a/helix-core/src/shellwords.rs
+++ b/helix-core/src/shellwords.rs
@ -1,6 +1,329 @@
+use smartstring::{LazyCompact, SmartString};
 use std::borrow::Cow;

+/// A utility for parsing shell-like command lines.
+///
+/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments.
+///
+/// # Features
+///
+/// - Parses command and arguments from input strings.
+/// - Supports single, double, and backtick quoted arguments.
+/// - Respects backslash escaping in arguments.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// # use helix_core::shellwords::Shellwords;
+/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs");
+/// assert_eq!(":o", shellwords.command());
+/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap());
+/// ```
+///
+/// Empty command:
+///
+/// ```
+/// # use helix_core::shellwords::Shellwords;
+/// let shellwords = Shellwords::from(" ");
+/// assert!(shellwords.command().is_empty());
+/// ```
+///
+/// # Iterator
+///
+/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input.
+///
+/// ```
+/// # use helix_core::shellwords::Shellwords;
+/// let shellwords = Shellwords::from(":o a b c");
+/// let mut args = shellwords.args();
+/// assert_eq!(Some("a"), args.next());
+/// assert_eq!(Some("b"), args.next());
+/// assert_eq!(Some("c"), args.next());
+/// assert_eq!(None, args.next());
+/// ```
+#[derive(Clone, Copy)]
+pub struct Shellwords<'a> {
+    input: &'a str,
+}
+
+impl<'a> From<&'a str> for Shellwords<'a> {
+    #[inline]
+    fn from(input: &'a str) -> Self {
+        Self { input }
+    }
+}
+
+impl<'a> From<&'a String> for Shellwords<'a> {
+    #[inline]
+    fn from(input: &'a String) -> Self {
+        Self { input }
+    }
+}
+
+impl<'a> From<&'a Cow<'a, str>> for Shellwords<'a> {
+    #[inline]
+    fn from(input: &'a Cow<str>) -> Self {
+        Self { input }
+    }
+}
+
+impl<'a> Shellwords<'a> {
+    #[inline]
+    #[must_use]
+    pub fn command(&self) -> &str {
+        self.input
+            .split_once(' ')
+            .map_or(self.input, |(command, _)| command)
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn args(&self) -> Args<'a> {
+        let args = self.input.split_once(' ').map_or("", |(_, args)| args);
+        Args::parse(args)
+    }
+
+    #[inline]
+    pub fn input(&self) -> &str {
+        self.input
+    }
+
+    /// Checks that the input ends with a whitespace character which is not escaped.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use helix_core::shellwords::Shellwords;
+    /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
+    /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
+    /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
+    /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
+    /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true);
+    /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
+    /// ```
+    #[inline]
+    pub fn ends_with_whitespace(&self) -> bool {
+        self.input.ends_with(' ')
+    }
+}
+
+/// An iterator over an input string which yields arguments.
+///
+/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks).
+#[derive(Debug, Clone, Copy)]
+pub struct Args<'a> {
+    input: &'a str,
+    idx: usize,
+    start: usize,
+}
+
+impl<'a> Args<'a> {
+    #[inline]
+    fn parse(input: &'a str) -> Self {
+        Self {
+            input,
+            idx: 0,
+            start: 0,
+        }
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.input.is_empty()
+    }
+
+    /// Returns the args exactly as input.
+    ///
+    /// # Examples
+    /// ```
+    /// # use helix_core::shellwords::Args;
+    /// let args = Args::from(r#"sed -n "s/test t/not /p""#);
+    /// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw());
+    ///
+    /// let args = Args::from(r#"cat "file name with space.txt""#);
+    /// assert_eq!(r#"cat "file name with space.txt""#, args.raw());
+    /// ```
+    #[inline]
+    pub fn raw(&self) -> &str {
+        self.input
+    }
+
+    /// Returns the remainder of the args exactly as input.
+    ///
+    /// # Examples
+    /// ```
+    /// # use helix_core::shellwords::Args;
+    /// let mut args = Args::from(r#"sed -n "s/test t/not /p""#);
+    /// assert_eq!("sed", args.next().unwrap());
+    /// assert_eq!(r#"-n "s/test t/not /p""#, args.rest());
+    /// ```
+    ///
+    /// Never calling `next` and using `rest` is functionally equivalent to calling `raw`.
+    #[inline]
+    pub fn rest(&self) -> &str {
+        &self.input[self.idx..]
+    }
+
+    /// Convenient function to return an empty `Args`.
+    ///
+    /// When used in any iteration, it will always return `None`.
+    #[inline(always)]
+    pub const fn empty() -> Self {
+        Self {
+            input: "",
+            idx: 0,
+            start: 0,
+        }
+    }
+}
+
+#[allow(clippy::copy_iterator)]
+impl<'a> Iterator for Args<'a> {
+    type Item = &'a str;
+
+    #[inline]
+    #[allow(clippy::too_many_lines)]
+    fn next(&mut self) -> Option<Self::Item> {
+        // The parser loop is split into three main blocks to handle different types of input processing:
+        //
+        // 1. Quote block:
+        //    - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote,
+        //      locating the closing quote to return the quoted argument.
+        //    - Handles cases where mismatched quotes are ignored and when quotes appear as the last character.
+        //
+        // 2. Whitespace block:
+        //    - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases
+        //      remain grouped together.
+        //    - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly.
+        //
+        // 3. Catch-all block:
+        //    - Handles any other character, updating the `is_escaped` status if a backslash is encountered,
+        //      advancing the loop to the next character.
+
+        let bytes = self.input.as_bytes();
+        let mut in_quotes = false;
+        let mut quote = b'\0';
+        let mut is_escaped = false;
+
+        while self.idx < bytes.len() {
+            match bytes[self.idx] {
+                b'"' | b'\'' | b'`' if !is_escaped => {
+                    if in_quotes {
+                        // Found the proper closing quote, so can return the arg and advance the state along.
+                        if bytes[self.idx] == quote {
+                            let arg = Some(&self.input[self.start..self.idx]);
+                            self.idx += 1;
+                            self.start = self.idx;
+                            return arg;
+                        }
+                        // If quote does not match the type of the opening quote, then do nothing and advance.
+                        self.idx += 1;
+                    } else if self.idx == bytes.len() - 1 {
+                        // Special case for when a quote is the last input in args.
+                        // e.g: :read "file with space.txt""
+                        // This preserves the quote as an arg:
+                        // - `file with space`
+                        // - `"`
+                        let arg = Some(&self.input[self.idx..]);
+                        self.idx = bytes.len();
+                        self.start = bytes.len();
+                        return arg;
+                    } else {
+                        // Found opening quote.
+                        in_quotes = true;
+                        // Kind of quote that was found.
+                        quote = bytes[self.idx];
+
+                        if self.start < self.idx {
+                            // When part of the input ends in a quote, `one two" three`, this properly returns the `two`
+                            // before advancing to the quoted arg for the next iteration:
+                            // - `one` <- previous arg
+                            // - `two` <- this step
+                            // - ` three` <- next arg
+                            let arg = Some(&self.input[self.start..self.idx]);
+                            self.idx += 1;
+                            self.start = self.idx;
+                            return arg;
+                        }
+
+                        // Advance after quote.
+                        self.idx += 1;
+                        // Exclude quote from arg output.
+                        self.start = self.idx;
+                    }
+                }
+                b' ' | b'\t' if !in_quotes => {
+                    // Found a true whitespace separator that wasn't inside quotes.
+
+                    // Check if there is anything to return or if its just advancing over whitespace.
+                    // `start` will only be less than `idx` when there is something to return.
+                    if self.start < self.idx {
+                        let arg = Some(&self.input[self.start..self.idx]);
+                        self.idx += 1;
+                        self.start = self.idx;
+                        return arg;
+                    }
+
+                    // Advance beyond the whitespace.
+                    self.idx += 1;
+
+                    // This is where `start` will be set to the start of an arg boundary, either encountering a word
+                    // boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part
+                    // of the code. Either way, all that remains for the check above will be to return a full arg.
+                    self.start = self.idx;
+                }
+                _ => {
+                    // If previous loop didn't find any backslash and was already escaped it will change to false
+                    // as the backslash chain was broken.
+                    //
+                    // If the previous loop had no backslash escape, and found one this iteration, then its the start
+                    // of an escape chain.
+                    is_escaped = match (is_escaped, bytes[self.idx]) {
+                        (false, b'\\') => true, // Set `is_escaped` if the current byte is a backslash
+                        _ => false, //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false
+                    };
+
+                    // Advance to next `char`.
+                    self.idx += 1;
+                }
+            }
+        }
+
+        // Fallback that catches when the loop would have exited but failed to return the arg between start and the end.
+        if self.start < bytes.len() {
+            let arg = Some(&self.input[self.start..]);
+            self.start = bytes.len();
+            return arg;
+        }
+
+        // All args have been parsed.
+        None
+    }
+}
+
+impl<'a> From<&'a String> for Args<'a> {
+    fn from(args: &'a String) -> Self {
+        Args::parse(args)
+    }
+}
+
+impl<'a> From<&'a str> for Args<'a> {
+    fn from(args: &'a str) -> Self {
+        Args::parse(args)
+    }
+}
+
+impl<'a> From<&'a Cow<'_, str>> for Args<'a> {
+    fn from(args: &'a Cow<str>) -> Self {
+        Args::parse(args)
+    }
+}
+
 /// Auto escape for shellwords usage.
+#[inline]
+#[must_use]
 pub fn escape(input: Cow<str>) -> Cow<str> {
    if !input.chars().any(|x| x.is_ascii_whitespace()) {
        input
@ -13,186 +336,141 @@ pub fn escape(input: Cow<str>) -> Cow<str> {
            buf
        }))
    } else {
-        Cow::Owned(format!("\"{}\"", input))
+        Cow::Owned(format!("\"{input}\""))
    }
 }

-enum State {
-    OnWhitespace,
-    Unquoted,
-    UnquotedEscaped,
-    Quoted,
-    QuoteEscaped,
-    Dquoted,
-    DquoteEscaped,
-}
+/// Unescapes a string, converting escape sequences into their literal characters.
+///
+/// This function handles the following escape sequences:
+/// - `\\n` is converted to `\n` (newline)
+/// - `\\t` is converted to `\t` (tab)
+/// - `\\u{...}` is converted to the corresponding Unicode character
+///
+/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged.
+///
+/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape("hello\\nworld");
+/// assert_eq!("hello\nworld", unescaped);
+/// ```
+///
+/// Unescaping tabs:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape("hello\\tworld");
+/// assert_eq!("hello\tworld", unescaped);
+/// ```
+///
+/// Unescaping Unicode characters:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape("hello\\u{1f929}world");
+/// assert_eq!("hello\u{1f929}world", unescaped);
+/// assert_eq!("hello🤩world", unescaped);
+/// ```
+///
+/// Handling backslashes:
+///
+/// ```
+/// # use helix_core::shellwords::unescape;
+/// let unescaped = unescape(r"hello\\world");
+/// assert_eq!(r"hello\\world", unescaped);
+///
+/// let unescaped = unescape(r"hello\\\\world");
+/// assert_eq!(r"hello\\\\world", unescaped);
+/// ```
+///
+/// # Note
+///
+/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is.
+#[inline]
+#[must_use]
+pub fn unescape(input: &str) -> Cow<'_, str> {
+    enum State {
+        Normal,
+        Escaped,
+        Unicode,
+    }

-pub struct Shellwords<'a> {
-    state: State,
-    /// Shellwords where whitespace and escapes has been resolved.
-    words: Vec<Cow<'a, str>>,
-    /// The parts of the input that are divided into shellwords. This can be
-    /// used to retrieve the original text for a given word by looking up the
-    /// same index in the Vec as the word in `words`.
-    parts: Vec<&'a str>,
-}
+    let mut unescaped = String::new();
+    let mut state = State::Normal;
+    let mut is_escaped = false;
+    // NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars
+    let mut unicode = SmartString::<LazyCompact>::new_const();

-impl<'a> From<&'a str> for Shellwords<'a> {
-    fn from(input: &'a str) -> Self {
-        use State::*;
-
-        let mut state = Unquoted;
-        let mut words = Vec::new();
-        let mut parts = Vec::new();
-        let mut escaped = String::with_capacity(input.len());
-
-        let mut part_start = 0;
-        let mut unescaped_start = 0;
-        let mut end = 0;
-
-        for (i, c) in input.char_indices() {
-            state = match state {
-                OnWhitespace => match c {
-                    '"' => {
-                        end = i;
-                        Dquoted
-                    }
-                    '\'' => {
-                        end = i;
-                        Quoted
-                    }
-                    '\\' => {
-                        if cfg!(unix) {
-                            escaped.push_str(&input[unescaped_start..i]);
-                            unescaped_start = i + 1;
-                            UnquotedEscaped
-                        } else {
-                            OnWhitespace
+    for (idx, ch) in input.char_indices() {
+        match state {
+            State::Normal => match ch {
+                '\\' => {
+                    if !is_escaped {
+                        // PERF: As not every separator will be escaped, we use `String::new` as that has no initial
+                        // allocation. If an escape is found, then we reserve capacity thats the len of the separator,
+                        // as the new unescaped string will be at least that long.
+                        unescaped.reserve(input.len());
+                        if idx > 0 {
+                            // First time finding an escape, so all prior chars can be added to the new unescaped
+                            // version if its not the very first char found.
+                            unescaped.push_str(&input[0..idx]);
                        }
                    }
-                    c if c.is_ascii_whitespace() => {
-                        end = i;
-                        OnWhitespace
-                    }
-                    _ => Unquoted,
-                },
-                Unquoted => match c {
-                    '\\' => {
-                        if cfg!(unix) {
-                            escaped.push_str(&input[unescaped_start..i]);
-                            unescaped_start = i + 1;
-                            UnquotedEscaped
-                        } else {
-                            Unquoted
-                        }
-                    }
-                    c if c.is_ascii_whitespace() => {
-                        end = i;
-                        OnWhitespace
-                    }
-                    _ => Unquoted,
-                },
-                UnquotedEscaped => Unquoted,
-                Quoted => match c {
-                    '\\' => {
-                        if cfg!(unix) {
-                            escaped.push_str(&input[unescaped_start..i]);
-                            unescaped_start = i + 1;
-                            QuoteEscaped
-                        } else {
-                            Quoted
-                        }
-                    }
-                    '\'' => {
-                        end = i;
-                        OnWhitespace
-                    }
-                    _ => Quoted,
-                },
-                QuoteEscaped => Quoted,
-                Dquoted => match c {
-                    '\\' => {
-                        if cfg!(unix) {
-                            escaped.push_str(&input[unescaped_start..i]);
-                            unescaped_start = i + 1;
-                            DquoteEscaped
-                        } else {
-                            Dquoted
-                        }
-                    }
-                    '"' => {
-                        end = i;
-                        OnWhitespace
-                    }
-                    _ => Dquoted,
-                },
-                DquoteEscaped => Dquoted,
-            };
-
-            let c_len = c.len_utf8();
-            if i == input.len() - c_len && end == 0 {
-                end = i + c_len;
-            }
-
-            if end > 0 {
-                let esc_trim = escaped.trim();
-                let inp = &input[unescaped_start..end];
-
-                if !(esc_trim.is_empty() && inp.trim().is_empty()) {
-                    if esc_trim.is_empty() {
-                        words.push(inp.into());
-                        parts.push(inp);
-                    } else {
-                        words.push([escaped, inp.into()].concat().into());
-                        parts.push(&input[part_start..end]);
-                        escaped = "".to_string();
+                    state = State::Escaped;
+                    is_escaped = true;
+                }
+                _ => {
+                    if is_escaped {
+                        unescaped.push(ch);
                    }
                }
-                unescaped_start = i + 1;
-                part_start = i + 1;
-                end = 0;
+            },
+            State::Escaped => {
+                match ch {
+                    'n' => unescaped.push('\n'),
+                    't' => unescaped.push('\t'),
+                    'u' => {
+                        state = State::Unicode;
+                        continue;
+                    }
+                    // Uncomment if you want to handle '\\' to '\'
+                    // '\\' => unescaped.push('\\'),
+                    _ => {
+                        unescaped.push('\\');
+                        unescaped.push(ch);
+                    }
+                }
+                state = State::Normal;
            }
-        }
-
-        debug_assert!(words.len() == parts.len());
-
-        Self {
-            state,
-            words,
-            parts,
+            State::Unicode => match ch {
+                '{' => continue,
+                '}' => {
+                    let Ok(digit) = u32::from_str_radix(&unicode, 16) else {
+                        return input.into();
+                    };
+                    let Some(point) = char::from_u32(digit) else {
+                        return input.into();
+                    };
+                    unescaped.push(point);
+                    // Might be more unicode to unescape so clear for reuse.
+                    unicode.clear();
+                    state = State::Normal;
+                }
+                _ => unicode.push(ch),
+            },
        }
    }
-}

-impl<'a> Shellwords<'a> {
-    /// Checks that the input ends with a whitespace character which is not escaped.
-    ///
-    /// # Examples
-    ///
-    /// ```rust
-    /// use helix_core::shellwords::Shellwords;
-    /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
-    /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
-    /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
-    /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
-    /// #[cfg(unix)]
-    /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
-    /// #[cfg(unix)]
-    /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
-    /// ```
-    pub fn ends_with_whitespace(&self) -> bool {
-        matches!(self.state, State::OnWhitespace)
-    }
-
-    /// Returns the list of shellwords calculated from the input string.
-    pub fn words(&self) -> &[Cow<'a, str>] {
-        &self.words
-    }
-
-    /// Returns a list of strings which correspond to [`Self::words`] but represent the original
-    /// text in the input string - including escape characters - without separating whitespace.
-    pub fn parts(&self) -> &[&'a str] {
-        &self.parts
+    if is_escaped {
+        unescaped.into()
+    } else {
+        input.into()
    }
 }

@ -201,114 +479,191 @@ mod test {
    use super::*;

    #[test]
-    #[cfg(windows)]
-    fn test_normal() {
+    fn base() {
        let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
        let shellwords = Shellwords::from(input);
-        let result = shellwords.words().to_vec();
-        let expected = vec![
-            Cow::from(":o"),
-            Cow::from("single_word"),
-            Cow::from("twó"),
-            Cow::from("wörds"),
-            Cow::from("\\three\\"),
-            Cow::from("\\"),
-            Cow::from("with\\ escaping\\\\"),
+        let args = vec![
+            "single_word",
+            "twó",
+            "wörds",
+            r"\three\",
+            r#"\"with\"#,
+            r"escaping\\",
        ];
-        // TODO test is_owned and is_borrowed, once they get stabilized.
-        assert_eq!(expected, result);
+
+        assert_eq!(":o", shellwords.command());
+        assert_eq!(args, shellwords.args().collect::<Vec<_>>());
    }

    #[test]
-    #[cfg(unix)]
-    fn test_normal() {
-        let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
-        let shellwords = Shellwords::from(input);
-        let result = shellwords.words().to_vec();
-        let expected = vec![
-            Cow::from(":o"),
-            Cow::from("single_word"),
-            Cow::from("twó"),
-            Cow::from("wörds"),
-            Cow::from(r#"three "with escaping\"#),
-        ];
-        // TODO test is_owned and is_borrowed, once they get stabilized.
-        assert_eq!(expected, result);
+    fn should_have_empty_args() {
+        let shellwords = Shellwords::from(":quit");
+        assert!(
+            shellwords.args().is_empty(),
+            "args: `{}`",
+            shellwords.args().next().unwrap()
+        );
+        assert!(shellwords.args().next().is_none());
    }

    #[test]
-    #[cfg(unix)]
-    fn test_quoted() {
+    fn should_return_empty_command() {
+        let shellwords = Shellwords::from(" ");
+        assert!(shellwords.command().is_empty());
+    }
+
+    #[test]
+    fn should_support_unicode_args() {
+        assert_eq!(
+            Shellwords::from(":sh echo 𒀀").args().collect::<Vec<_>>(),
+            &["echo", "𒀀"]
+        );
+        assert_eq!(
+            Shellwords::from(":sh echo 𒀀 hello world𒀀")
+                .args()
+                .collect::<Vec<_>>(),
+            &["echo", "𒀀", "hello", "world𒀀"]
+        );
+    }
+
+    #[test]
+    fn should_preserve_quote_if_last_argument() {
+        let sh = Shellwords::from(r#":read "file with space.txt"""#);
+        let mut args = sh.args();
+        assert_eq!("file with space.txt", args.next().unwrap());
+        assert_eq!(r#"""#, args.next().unwrap());
+    }
+
+    #[test]
+    fn should_return_rest_of_non_closed_quote_as_one_argument() {
+        let sh = Shellwords::from(r":rename 'should be one \'argument");
+        assert_eq!(r"should be one \'argument", sh.args().next().unwrap());
+    }
+
+    #[test]
+    fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg() {
+        let sh = Shellwords::from(r":rename 'should be one \\'argument");
+        let mut args = sh.args();
+        assert_eq!(r"should be one \\", args.next().unwrap());
+        assert_eq!(r"argument", args.next().unwrap());
+    }
+
+    #[test]
+    fn should_split_args() {
+        assert_eq!(Shellwords::from(":o a").args().collect::<Vec<_>>(), &["a"]);
+        assert_eq!(
+            Shellwords::from(":o a\\ ").args().collect::<Vec<_>>(),
+            &["a\\"]
+        );
+    }
+
+    #[test]
+    fn should_parse_args_even_with_leading_whitespace() {
+        // Three spaces
+        assert_eq!(
+            Shellwords::from(":o   a").args().collect::<Vec<_>>(),
+            &["a"]
+        );
+    }
+
+    #[test]
+    fn should_parse_single_quotes_while_respecting_escapes() {
        let quoted =
            r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
        let shellwords = Shellwords::from(quoted);
-        let result = shellwords.words().to_vec();
+        let result = shellwords.args().collect::<Vec<_>>();
        let expected = vec![
-            Cow::from(":o"),
-            Cow::from("single_word"),
-            Cow::from("twó wörds"),
-            Cow::from(r#"three' "with escaping\"#),
-            Cow::from("quote incomplete"),
+            "single_word",
+            "twó wörds",
+            "",
+            " ",
+            r#"\three\' \"with\ escaping\\"#,
+            "quote incomplete",
        ];
        assert_eq!(expected, result);
    }

    #[test]
-    #[cfg(unix)]
-    fn test_dquoted() {
+    fn should_parse_double_quotes_while_respecting_escapes() {
        let dquoted = r#":o "single_word" "twó wörds" "" "  ""\three\' \"with\ escaping\\" "dquote incomplete"#;
        let shellwords = Shellwords::from(dquoted);
-        let result = shellwords.words().to_vec();
+        let result = shellwords.args().collect::<Vec<_>>();
        let expected = vec![
-            Cow::from(":o"),
-            Cow::from("single_word"),
-            Cow::from("twó wörds"),
-            Cow::from(r#"three' "with escaping\"#),
-            Cow::from("dquote incomplete"),
+            "single_word",
+            "twó wörds",
+            "",
+            "  ",
+            r#"\three\' \"with\ escaping\\"#,
+            "dquote incomplete",
        ];
        assert_eq!(expected, result);
    }

    #[test]
-    #[cfg(unix)]
-    fn test_mixed() {
+    fn should_respect_escapes_with_mixed_quotes() {
        let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
        let shellwords = Shellwords::from(dquoted);
-        let result = shellwords.words().to_vec();
+        let result = shellwords.args().collect::<Vec<_>>();
        let expected = vec![
-            Cow::from(":o"),
-            Cow::from("single_word"),
-            Cow::from("twó wörds"),
-            Cow::from("three' \"with escaping\\"),
-            Cow::from("no space before"),
-            Cow::from("and after"),
-            Cow::from("$#%^@"),
-            Cow::from("%^&(%^"),
-            Cow::from(")(*&^%"),
-            Cow::from(r#"a\\b"#),
-            //last ' just changes to quoted but since we dont have anything after it, it should be ignored
+            "single_word",
+            "twó wörds",
+            r#"\three\' \"with\ escaping\\"#,
+            "no space before",
+            "and after",
+            "$#%^@",
+            "%^&(%^",
+            r")(*&^%",
+            r"a\\\\\b",
+            // Last ' is important, as if the user input an accidental quote at the end, this should be checked in
+            // commands where there should only be one input and return an error rather than silently succeed.
+            "'",
        ];
        assert_eq!(expected, result);
    }

    #[test]
-    fn test_lists() {
-        let input =
-            r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#;
+    fn should_return_rest() {
+        let input = r#":set statusline.center ["file-type","file-encoding"]"#;
        let shellwords = Shellwords::from(input);
-        let result = shellwords.words().to_vec();
-        let expected = vec![
-            Cow::from(":set"),
-            Cow::from("statusline.center"),
-            Cow::from(r#"["file-type","file-encoding"]"#),
-            Cow::from(r#"["list", "in", "quotes"]"#),
-        ];
-        assert_eq!(expected, result);
+        let mut args = shellwords.args();
+        assert_eq!(":set", shellwords.command());
+        assert_eq!(Some("statusline.center"), args.next());
+        assert_eq!(r#"["file-type","file-encoding"]"#, args.rest());
+    }
+
+    #[test]
+    fn should_return_no_args() {
+        let mut args = Args::parse("");
+        assert!(args.next().is_none());
+    }
+
+    #[test]
+    fn should_leave_escaped_quotes() {
+        let input = r#"\" \` \' \"with \'with \`with"#;
+        let result = Args::parse(input).collect::<Vec<_>>();
+        assert_eq!(r#"\""#, result[0]);
+        assert_eq!(r"\`", result[1]);
+        assert_eq!(r"\'", result[2]);
+        assert_eq!(r#"\"with"#, result[3]);
+        assert_eq!(r"\'with", result[4]);
+        assert_eq!(r"\`with", result[5]);
+    }
+
+    #[test]
+    fn should_leave_literal_newline_alone() {
+        let result = Args::parse(r"\n").collect::<Vec<_>>();
+        assert_eq!(r"\n", result[0]);
+    }
+
+    #[test]
+    fn should_leave_literal_unicode_alone() {
+        let result = Args::parse(r"\u{C}").collect::<Vec<_>>();
+        assert_eq!(r"\u{C}", result[0]);
    }

    #[test]
    #[cfg(unix)]
-    fn test_escaping_unix() {
+    fn should_escape_unix() {
        assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
        assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar"));
        assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar"));
@ -316,35 +671,79 @@ fn test_escaping_unix() {

    #[test]
    #[cfg(windows)]
-    fn test_escaping_windows() {
+    fn should_escape_windows() {
        assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
        assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
    }

    #[test]
-    #[cfg(unix)]
-    fn test_parts() {
-        assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
-        assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
+    fn should_unescape_newline() {
+        let unescaped = unescape("hello\\nworld");
+        assert_eq!("hello\nworld", unescaped);
    }

    #[test]
-    #[cfg(windows)]
-    fn test_parts() {
-        assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
-        assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
+    fn should_unescape_tab() {
+        let unescaped = unescape("hello\\tworld");
+        assert_eq!("hello\tworld", unescaped);
    }

    #[test]
-    fn test_multibyte_at_end() {
-        assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);
-        assert_eq!(
-            Shellwords::from(":sh echo 𒀀").parts(),
-            &[":sh", "echo", "𒀀"]
-        );
-        assert_eq!(
-            Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),
-            &[":sh", "echo", "𒀀", "hello", "world𒀀"]
-        );
+    fn should_unescape_unicode() {
+        let unescaped = unescape("hello\\u{1f929}world");
+        assert_eq!("hello\u{1f929}world", unescaped, "char: 🤩 ");
+        assert_eq!("hello🤩world", unescaped);
+    }
+
+    #[test]
+    fn should_return_original_input_due_to_bad_unicode() {
+        let unescaped = unescape("hello\\u{999999999}world");
+        assert_eq!("hello\\u{999999999}world", unescaped);
+    }
+
+    #[test]
+    fn should_not_unescape_slash() {
+        let unescaped = unescape(r"hello\\world");
+        assert_eq!(r"hello\\world", unescaped);
+
+        let unescaped = unescape(r"hello\\\\world");
+        assert_eq!(r"hello\\\\world", unescaped);
+    }
+
+    #[test]
+    fn should_not_unescape_slash_single_quote() {
+        let unescaped = unescape("\\'");
+        assert_eq!(r"\'", unescaped);
+    }
+
+    #[test]
+    fn should_not_unescape_slash_double_quote() {
+        let unescaped = unescape("\\\"");
+        assert_eq!(r#"\""#, unescaped);
+    }
+
+    #[test]
+    fn should_not_change_anything() {
+        let unescaped = unescape("'");
+        assert_eq!("'", unescaped);
+        let unescaped = unescape(r#"""#);
+        assert_eq!(r#"""#, unescaped);
+    }
+
+    #[test]
+    fn should_only_unescape_newline_not_slash_single_quote() {
+        let unescaped = unescape("\\n\'");
+        assert_eq!("\n'", unescaped);
+        let unescaped = unescape("\\n\\'");
+        assert_eq!("\n\\'", unescaped);
+    }
+
+    #[test]
+    fn should_unescape_args() {
+        // 1f929: 🤩
+        let args = Args::parse(r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"#)
+            .collect::<Vec<_>>();
+        assert_eq!("hello\u{1f929} world", unescape(args[0]));
+        assert_eq!(r#"["hello", "🤩", "world"]"#, unescape(args[1]));
    }
 }
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@ -30,7 +30,9 @@
    object, pos_at_coords,
    regex::{self, Regex},
    search::{self, CharMatcher},
-    selection, shellwords, surround,
+    selection,
+    shellwords::{self, Args},
+    surround,
    syntax::{BlockCommentToken, LanguageServerFeature},
    text_annotations::{Overlay, TextAnnotations},
    textobject,
@ -190,7 +192,7 @@ fn make_job_callback<T, F>(
 pub enum MappableCommand {
    Typable {
        name: String,
-        args: Vec<String>,
+        args: String,
        doc: String,
    },
    Static {
@ -225,15 +227,17 @@ impl MappableCommand {
    pub fn execute(&self, cx: &mut Context) {
        match &self {
            Self::Typable { name, args, doc: _ } => {
-                let args: Vec<Cow<str>> = args.iter().map(Cow::from).collect();
                if let Some(command) = typed::TYPABLE_COMMAND_MAP.get(name.as_str()) {
                    let mut cx = compositor::Context {
                        editor: cx.editor,
                        jobs: cx.jobs,
                        scroll: None,
                    };
-                    if let Err(e) = (command.fun)(&mut cx, &args[..], PromptEvent::Validate) {
-                        cx.editor.set_error(format!("{}", e));
+
+                    if let Err(err) =
+                        (command.fun)(&mut cx, Args::from(args), PromptEvent::Validate)
+                    {
+                        cx.editor.set_error(format!("{err}"));
                    }
                }
            }
@ -601,21 +605,15 @@ impl std::str::FromStr for MappableCommand {

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if let Some(suffix) = s.strip_prefix(':') {
-            let mut typable_command = suffix.split(' ').map(|arg| arg.trim());
-            let name = typable_command
-                .next()
-                .ok_or_else(|| anyhow!("Expected typable command name"))?;
-            let args = typable_command
-                .map(|s| s.to_owned())
-                .collect::<Vec<String>>();
+            let (name, args) = suffix.split_once(' ').unwrap_or((suffix, ""));
            typed::TYPABLE_COMMAND_MAP
                .get(name)
                .map(|cmd| MappableCommand::Typable {
                    name: cmd.name.to_owned(),
                    doc: format!(":{} {:?}", cmd.name, args),
-                    args,
+                    args: args.to_string(),
                })
-                .ok_or_else(|| anyhow!("No TypableCommand named '{}'", s))
+                .ok_or_else(|| anyhow!("No TypableCommand named '{}'", name))
        } else if let Some(suffix) = s.strip_prefix('@') {
            helix_view::input::parse_macro(suffix).map(|keys| Self::Macro {
                name: s.to_string(),
@ -3218,7 +3216,7 @@ pub fn command_palette(cx: &mut Context) {
                    .iter()
                    .map(|cmd| MappableCommand::Typable {
                        name: cmd.name.to_owned(),
-                        args: Vec::new(),
+                        args: String::new(),
                        doc: cmd.doc.to_owned(),
                    }),
            );
@ -4271,7 +4269,7 @@ fn yank_joined_impl(editor: &mut Editor, separator: &str, register: char) {
        .fragments(text)
        .fold(String::new(), |mut acc, fragment| {
            if !acc.is_empty() {
-                acc.push_str(separator);
+                acc.push_str(&helix_core::shellwords::unescape(separator));
            }
            acc.push_str(&fragment);
            acc
--- a/helix-term/src/commands/dap.rs
+++ b/helix-term/src/commands/dap.rs
@ -109,6 +109,7 @@ fn dap_callback<T, F>(
    jobs.callback(callback);
 }

+// TODO: transition to `shellwords::Args` instead of `Option<Vec<Cow>>>`
 pub fn dap_start_impl(
    cx: &mut compositor::Context,
    name: Option<&str>,
@ -312,6 +313,7 @@ pub fn dap_restart(cx: &mut Context) {
    );
 }

+// TODO: transition to `shellwords::Args` instead of `Vec<String>`
 fn debug_parameter_prompt(
    completions: Vec<DebugConfigCompletion>,
    config_name: String,
--- a/helix-term/src/commands/typed.rs
+++ b/helix-term/src/commands/typed.rs
--- a/helix-term/src/keymap.rs
+++ b/helix-term/src/keymap.rs
@ -597,18 +597,14 @@ fn escaped_keymap() {
        let expectation = KeyTrie::Node(KeyTrieNode::new(
            "",
            hashmap! {
-                key => KeyTrie::Sequence(vec!{
+                key => KeyTrie::Sequence(vec![
                    MappableCommand::select_all,
                    MappableCommand::Typable {
                        name: "pipe".to_string(),
-                        args: vec!{
-                            "sed".to_string(),
-                            "-E".to_string(),
-                            "'s/\\s+$//g'".to_string()
-                        },
-                        doc: "".to_string(),
+                        args: String::from("sed -E 's/\\s+$//g'"),
+                        doc: String::new(),
                    },
-                })
+                ])
            },
            vec![key],
        ));
--- a/languages.toml
+++ b/languages.toml
@ -3931,3 +3931,14 @@ indent = { tab-width = 4, unit = "    " }
 [[grammar]]
 name = "spade"
 source = { git = "https://gitlab.com/spade-lang/tree-sitter-spade/", rev = "4d5b141017c61fe7e168e0a5c5721ee62b0d9572" }
+
+[[language]]
+name = "amber"
+scope = "source.ab"
+file-types = ["ab"]
+comment-token = "//"
+indent = { tab-width = 4, unit = "    " }
+
+[[grammar]]
+name = "amber"
+source = { git = "https://github.com/amber-lang/tree-sitter-amber", rev = "c6df3ec2ec243ed76550c525e7ac3d9a10c6c814" }
--- a/runtime/queries/amber/highlights.scm
+++ b/runtime/queries/amber/highlights.scm
@ -0,0 +1,60 @@
+(comment) @comment
+
+[
+    "if"
+    "loop"
+    "for"
+    "return"
+    "fun"
+    "else"
+    "then"
+    "break"
+    "continue"
+    "and"
+    "or"
+    "not"
+    "let"
+    "pub"
+    "main"
+    "echo"
+    "exit"
+    "fun"
+    "import"
+    "from"
+    "as"
+    "in"
+    "fail"
+    "failed"
+    "silent"
+    "nameof"
+    "is"
+    "unsafe"
+    "trust"
+] @keyword
+
+; Literals
+(boolean) @constant.builtin.boolean
+(number) @constant.numeric
+(null) @constant.numeric
+(string) @string
+(status) @keyword
+(command) @string
+(handler) @keyword
+(block) @punctuation.delimiter
+(variable_init) @keyword
+(variable_assignment) @punctuation.delimiter
+(variable) @variable
+(escape_sequence) @constant.character.escape
+(type_name_symbol) @type
+(interpolation) @punctuation.delimiter
+(reference) @keyword
+(preprocessor_directive) @comment
+(shebang) @comment
+(function_definition
+    name: (variable) @function.method)
+(function_call
+    name: (variable) @function.method)
+(import_statement
+    "pub" @keyword
+    "import" @keyword
+    "from" @keyword)
--- a/runtime/queries/solidity/highlights.scm
+++ b/runtime/queries/solidity/highlights.scm
@ -12,6 +12,8 @@
 (unicode_string_literal)
 (yul_string_literal)
 ] @string
+(hex_string_literal "hex" @string.special.symbol)
+(unicode_string_literal "unicode" @string.special.symbol)
 [
 (number_literal)
 (yul_decimal_number)
@ -20,6 +22,7 @@
 [
 (true)
 (false)
+ (yul_boolean)
 ] @constant.builtin.boolean

 (comment) @comment
@ -44,18 +47,18 @@
 (type_name "(" @punctuation.bracket "=>" @punctuation.delimiter ")" @punctuation.bracket)

 ; Definitions
-(struct_declaration 
+(struct_declaration
  name: (identifier) @type)
-(enum_declaration 
+(enum_declaration
  name: (identifier) @type)
 (contract_declaration
-  name: (identifier) @type) 
+  name: (identifier) @type)
 (library_declaration
-  name: (identifier) @type) 
+  name: (identifier) @type)
 (interface_declaration
  name: (identifier) @type)
-(event_definition 
-  name: (identifier) @type) 
+(event_definition
+  name: (identifier) @type)

 (function_definition
  name:  (identifier) @function)
Author	SHA1	Message	Date
RoloEdits	98598a49fe	Merge `5691cb833e` into `f305c7299d`	2024-11-21 23:09:08 +03:00
Lens0021 / Leslie	f305c7299d	Add support for Amber-lang (#12021 ) Co-authored-by: Phoenix Himself <pkaras.it@gmail.com> Co-authored-by: Michael Davis <mcarsondavis@gmail.com>	2024-11-21 10:09:42 -06:00
Valentin B.	9e0d2d0a19	chore(solidity): add highlight queries (#12102 ) Add highlights for `hex` and `unicode` string prefixes and YUL booleans	2024-11-21 07:58:14 -06:00
Rolo	5691cb833e	refactor(commands): propogate `Args` changes refactor(commands): ignore unit pattern match refactor(commands): use `if let` over `matches!` refactor(commands): ignore unit pattern match perf(commands):remove unnecessary ref for a &str refactor(commands): change `MappableCommand` `args` from `Vec<String>` to String refactor(commands): use `Args::raw` over `fold`ing refactor: use `rest` in `set_option` command refactor: use `rest` in `toggle_option` command chore(dap): add TODOs to switch to `Args`	2024-11-16 21:52:59 -08:00
Rolo	ba026aaab4	refactor(shellwords): change arg handling strategy refactor: no longer special case for bracket lists refactor: no longer special case end space This was a hold over from before the `raw` function was added to `Args`. perf: remove `bytes` field to save 16 bytes From 56 bytes to 40, saving 16 bytes. perf: move `in_quotes` field to local variable perf: move `quote` field to local variable refactor: remove `is_finished` state from `Args` test: change example command to `read` `yank-join` now uses `raw` and thus would not be parsed with the `next` function so no longer applicable. refactor: remove unneeded range end for index refactor: remove backtracking escape check Instead, it can be tracked as the parser scans through the first time. refactor: clean up code and add more comments	2024-11-07 23:05:09 -08:00