Compare commits

...

5 Commits

Author SHA1 Message Date
RoloEdits
98598a49fe
Merge 5691cb833e into f305c7299d 2024-11-21 23:09:08 +03:00
Lens0021 / Leslie
f305c7299d
Add support for Amber-lang (#12021)
Co-authored-by: Phoenix Himself <pkaras.it@gmail.com>
Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
2024-11-21 10:09:42 -06:00
Valentin B.
9e0d2d0a19
chore(solidity): add highlight queries (#12102)
Add highlights for `hex` and `unicode` string prefixes and YUL booleans
2024-11-21 07:58:14 -06:00
Rolo
5691cb833e refactor(commands): propogate Args changes
refactor(commands): ignore unit pattern match

refactor(commands): use `if let` over `matches!`

refactor(commands): ignore unit pattern match

perf(commands):remove unnecessary ref for a &str

refactor(commands): change `MappableCommand` `args` from `Vec<String>` to String

refactor(commands): use `Args::raw` over `fold`ing

refactor: use `rest` in `set_option` command

refactor: use `rest` in `toggle_option` command

chore(dap): add TODOs to switch to `Args`
2024-11-16 21:52:59 -08:00
Rolo
ba026aaab4 refactor(shellwords): change arg handling strategy
refactor: no longer special case for bracket lists

refactor: no longer special case end space

This was a hold over from before the `raw` function was added to `Args`.

perf: remove `bytes` field to save 16 bytes

From 56 bytes to 40, saving 16 bytes.

perf: move `in_quotes` field to local variable

perf: move `quote` field to local variable

refactor: remove `is_finished` state from `Args`

test: change example command to `read`

`yank-join` now uses `raw` and thus would not be parsed with the `next`
function so no longer applicable.

refactor: remove unneeded range end for index

refactor: remove backtracking escape check

Instead, it can be tracked as the parser scans through the first time.

refactor: clean up code and add more comments
2024-11-07 23:05:09 -08:00
9 changed files with 1052 additions and 700 deletions

View File

@ -3,6 +3,7 @@
| ada | ✓ | ✓ | | `ada_language_server` |
| adl | ✓ | ✓ | ✓ | |
| agda | ✓ | | | |
| amber | ✓ | | | |
| astro | ✓ | | | |
| awk | ✓ | ✓ | | `awk-language-server` |
| bash | ✓ | ✓ | ✓ | `bash-language-server` |

View File

@ -1,6 +1,329 @@
use smartstring::{LazyCompact, SmartString};
use std::borrow::Cow;
/// A utility for parsing shell-like command lines.
///
/// The `Shellwords` struct takes an input string and allows extracting the command and its arguments.
///
/// # Features
///
/// - Parses command and arguments from input strings.
/// - Supports single, double, and backtick quoted arguments.
/// - Respects backslash escaping in arguments.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// # use helix_core::shellwords::Shellwords;
/// let shellwords = Shellwords::from(":o helix-core/src/shellwords.rs");
/// assert_eq!(":o", shellwords.command());
/// assert_eq!("helix-core/src/shellwords.rs", shellwords.args().next().unwrap());
/// ```
///
/// Empty command:
///
/// ```
/// # use helix_core::shellwords::Shellwords;
/// let shellwords = Shellwords::from(" ");
/// assert!(shellwords.command().is_empty());
/// ```
///
/// # Iterator
///
/// The `args` method returns a non-allocating iterator, `Args`, over the arguments of the input.
///
/// ```
/// # use helix_core::shellwords::Shellwords;
/// let shellwords = Shellwords::from(":o a b c");
/// let mut args = shellwords.args();
/// assert_eq!(Some("a"), args.next());
/// assert_eq!(Some("b"), args.next());
/// assert_eq!(Some("c"), args.next());
/// assert_eq!(None, args.next());
/// ```
#[derive(Clone, Copy)]
pub struct Shellwords<'a> {
input: &'a str,
}
impl<'a> From<&'a str> for Shellwords<'a> {
#[inline]
fn from(input: &'a str) -> Self {
Self { input }
}
}
impl<'a> From<&'a String> for Shellwords<'a> {
#[inline]
fn from(input: &'a String) -> Self {
Self { input }
}
}
impl<'a> From<&'a Cow<'a, str>> for Shellwords<'a> {
#[inline]
fn from(input: &'a Cow<str>) -> Self {
Self { input }
}
}
impl<'a> Shellwords<'a> {
#[inline]
#[must_use]
pub fn command(&self) -> &str {
self.input
.split_once(' ')
.map_or(self.input, |(command, _)| command)
}
#[inline]
#[must_use]
pub fn args(&self) -> Args<'a> {
let args = self.input.split_once(' ').map_or("", |(_, args)| args);
Args::parse(args)
}
#[inline]
pub fn input(&self) -> &str {
self.input
}
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// # use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
#[inline]
pub fn ends_with_whitespace(&self) -> bool {
self.input.ends_with(' ')
}
}
/// An iterator over an input string which yields arguments.
///
/// Splits on whitespace, but respects quoted substrings (using double quotes, single quotes, or backticks).
#[derive(Debug, Clone, Copy)]
pub struct Args<'a> {
input: &'a str,
idx: usize,
start: usize,
}
impl<'a> Args<'a> {
#[inline]
fn parse(input: &'a str) -> Self {
Self {
input,
idx: 0,
start: 0,
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.input.is_empty()
}
/// Returns the args exactly as input.
///
/// # Examples
/// ```
/// # use helix_core::shellwords::Args;
/// let args = Args::from(r#"sed -n "s/test t/not /p""#);
/// assert_eq!(r#"sed -n "s/test t/not /p""#, args.raw());
///
/// let args = Args::from(r#"cat "file name with space.txt""#);
/// assert_eq!(r#"cat "file name with space.txt""#, args.raw());
/// ```
#[inline]
pub fn raw(&self) -> &str {
self.input
}
/// Returns the remainder of the args exactly as input.
///
/// # Examples
/// ```
/// # use helix_core::shellwords::Args;
/// let mut args = Args::from(r#"sed -n "s/test t/not /p""#);
/// assert_eq!("sed", args.next().unwrap());
/// assert_eq!(r#"-n "s/test t/not /p""#, args.rest());
/// ```
///
/// Never calling `next` and using `rest` is functionally equivalent to calling `raw`.
#[inline]
pub fn rest(&self) -> &str {
&self.input[self.idx..]
}
/// Convenient function to return an empty `Args`.
///
/// When used in any iteration, it will always return `None`.
#[inline(always)]
pub const fn empty() -> Self {
Self {
input: "",
idx: 0,
start: 0,
}
}
}
#[allow(clippy::copy_iterator)]
impl<'a> Iterator for Args<'a> {
type Item = &'a str;
#[inline]
#[allow(clippy::too_many_lines)]
fn next(&mut self) -> Option<Self::Item> {
// The parser loop is split into three main blocks to handle different types of input processing:
//
// 1. Quote block:
// - Detects an unescaped quote character, either starting an in-quote scan or, if already in-quote,
// locating the closing quote to return the quoted argument.
// - Handles cases where mismatched quotes are ignored and when quotes appear as the last character.
//
// 2. Whitespace block:
// - Handles arguments separated by whitespace (space or tab), respecting quotes so quoted phrases
// remain grouped together.
// - Splits arguments by whitespace when outside of a quoted context and updates boundaries accordingly.
//
// 3. Catch-all block:
// - Handles any other character, updating the `is_escaped` status if a backslash is encountered,
// advancing the loop to the next character.
let bytes = self.input.as_bytes();
let mut in_quotes = false;
let mut quote = b'\0';
let mut is_escaped = false;
while self.idx < bytes.len() {
match bytes[self.idx] {
b'"' | b'\'' | b'`' if !is_escaped => {
if in_quotes {
// Found the proper closing quote, so can return the arg and advance the state along.
if bytes[self.idx] == quote {
let arg = Some(&self.input[self.start..self.idx]);
self.idx += 1;
self.start = self.idx;
return arg;
}
// If quote does not match the type of the opening quote, then do nothing and advance.
self.idx += 1;
} else if self.idx == bytes.len() - 1 {
// Special case for when a quote is the last input in args.
// e.g: :read "file with space.txt""
// This preserves the quote as an arg:
// - `file with space`
// - `"`
let arg = Some(&self.input[self.idx..]);
self.idx = bytes.len();
self.start = bytes.len();
return arg;
} else {
// Found opening quote.
in_quotes = true;
// Kind of quote that was found.
quote = bytes[self.idx];
if self.start < self.idx {
// When part of the input ends in a quote, `one two" three`, this properly returns the `two`
// before advancing to the quoted arg for the next iteration:
// - `one` <- previous arg
// - `two` <- this step
// - ` three` <- next arg
let arg = Some(&self.input[self.start..self.idx]);
self.idx += 1;
self.start = self.idx;
return arg;
}
// Advance after quote.
self.idx += 1;
// Exclude quote from arg output.
self.start = self.idx;
}
}
b' ' | b'\t' if !in_quotes => {
// Found a true whitespace separator that wasn't inside quotes.
// Check if there is anything to return or if its just advancing over whitespace.
// `start` will only be less than `idx` when there is something to return.
if self.start < self.idx {
let arg = Some(&self.input[self.start..self.idx]);
self.idx += 1;
self.start = self.idx;
return arg;
}
// Advance beyond the whitespace.
self.idx += 1;
// This is where `start` will be set to the start of an arg boundary, either encountering a word
// boundary or a quote boundary. If it finds a quote, then it will be advanced again in that part
// of the code. Either way, all that remains for the check above will be to return a full arg.
self.start = self.idx;
}
_ => {
// If previous loop didn't find any backslash and was already escaped it will change to false
// as the backslash chain was broken.
//
// If the previous loop had no backslash escape, and found one this iteration, then its the start
// of an escape chain.
is_escaped = match (is_escaped, bytes[self.idx]) {
(false, b'\\') => true, // Set `is_escaped` if the current byte is a backslash
_ => false, //Reset `is_escaped` if it was true, otherwise keep `is_escaped` as false
};
// Advance to next `char`.
self.idx += 1;
}
}
}
// Fallback that catches when the loop would have exited but failed to return the arg between start and the end.
if self.start < bytes.len() {
let arg = Some(&self.input[self.start..]);
self.start = bytes.len();
return arg;
}
// All args have been parsed.
None
}
}
impl<'a> From<&'a String> for Args<'a> {
fn from(args: &'a String) -> Self {
Args::parse(args)
}
}
impl<'a> From<&'a str> for Args<'a> {
fn from(args: &'a str) -> Self {
Args::parse(args)
}
}
impl<'a> From<&'a Cow<'_, str>> for Args<'a> {
fn from(args: &'a Cow<str>) -> Self {
Args::parse(args)
}
}
/// Auto escape for shellwords usage.
#[inline]
#[must_use]
pub fn escape(input: Cow<str>) -> Cow<str> {
if !input.chars().any(|x| x.is_ascii_whitespace()) {
input
@ -13,186 +336,141 @@ pub fn escape(input: Cow<str>) -> Cow<str> {
buf
}))
} else {
Cow::Owned(format!("\"{}\"", input))
Cow::Owned(format!("\"{input}\""))
}
}
enum State {
OnWhitespace,
Unquoted,
UnquotedEscaped,
Quoted,
QuoteEscaped,
Dquoted,
DquoteEscaped,
}
/// Unescapes a string, converting escape sequences into their literal characters.
///
/// This function handles the following escape sequences:
/// - `\\n` is converted to `\n` (newline)
/// - `\\t` is converted to `\t` (tab)
/// - `\\u{...}` is converted to the corresponding Unicode character
///
/// Other escape sequences, such as `\\` followed by any character not listed above, will remain unchanged.
///
/// If input is invalid, for example if there is invalid unicode, \u{999999999}, it will return the input as is.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape("hello\\nworld");
/// assert_eq!("hello\nworld", unescaped);
/// ```
///
/// Unescaping tabs:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape("hello\\tworld");
/// assert_eq!("hello\tworld", unescaped);
/// ```
///
/// Unescaping Unicode characters:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape("hello\\u{1f929}world");
/// assert_eq!("hello\u{1f929}world", unescaped);
/// assert_eq!("hello🤩world", unescaped);
/// ```
///
/// Handling backslashes:
///
/// ```
/// # use helix_core::shellwords::unescape;
/// let unescaped = unescape(r"hello\\world");
/// assert_eq!(r"hello\\world", unescaped);
///
/// let unescaped = unescape(r"hello\\\\world");
/// assert_eq!(r"hello\\\\world", unescaped);
/// ```
///
/// # Note
///
/// This function is opinionated, with a clear purpose of handling user input, not a general or generic unescaping utility, and does not unescape sequences like `\\'` or `\\\"`, leaving them as is.
#[inline]
#[must_use]
pub fn unescape(input: &str) -> Cow<'_, str> {
enum State {
Normal,
Escaped,
Unicode,
}
pub struct Shellwords<'a> {
state: State,
/// Shellwords where whitespace and escapes has been resolved.
words: Vec<Cow<'a, str>>,
/// The parts of the input that are divided into shellwords. This can be
/// used to retrieve the original text for a given word by looking up the
/// same index in the Vec as the word in `words`.
parts: Vec<&'a str>,
}
let mut unescaped = String::new();
let mut state = State::Normal;
let mut is_escaped = false;
// NOTE: Max unicode code point is U+10FFFF for a maximum of 6 chars
let mut unicode = SmartString::<LazyCompact>::new_const();
impl<'a> From<&'a str> for Shellwords<'a> {
fn from(input: &'a str) -> Self {
use State::*;
let mut state = Unquoted;
let mut words = Vec::new();
let mut parts = Vec::new();
let mut escaped = String::with_capacity(input.len());
let mut part_start = 0;
let mut unescaped_start = 0;
let mut end = 0;
for (i, c) in input.char_indices() {
state = match state {
OnWhitespace => match c {
'"' => {
end = i;
Dquoted
}
'\'' => {
end = i;
Quoted
}
'\\' => {
if cfg!(unix) {
escaped.push_str(&input[unescaped_start..i]);
unescaped_start = i + 1;
UnquotedEscaped
} else {
OnWhitespace
for (idx, ch) in input.char_indices() {
match state {
State::Normal => match ch {
'\\' => {
if !is_escaped {
// PERF: As not every separator will be escaped, we use `String::new` as that has no initial
// allocation. If an escape is found, then we reserve capacity thats the len of the separator,
// as the new unescaped string will be at least that long.
unescaped.reserve(input.len());
if idx > 0 {
// First time finding an escape, so all prior chars can be added to the new unescaped
// version if its not the very first char found.
unescaped.push_str(&input[0..idx]);
}
}
c if c.is_ascii_whitespace() => {
end = i;
OnWhitespace
}
_ => Unquoted,
},
Unquoted => match c {
'\\' => {
if cfg!(unix) {
escaped.push_str(&input[unescaped_start..i]);
unescaped_start = i + 1;
UnquotedEscaped
} else {
Unquoted
}
}
c if c.is_ascii_whitespace() => {
end = i;
OnWhitespace
}
_ => Unquoted,
},
UnquotedEscaped => Unquoted,
Quoted => match c {
'\\' => {
if cfg!(unix) {
escaped.push_str(&input[unescaped_start..i]);
unescaped_start = i + 1;
QuoteEscaped
} else {
Quoted
}
}
'\'' => {
end = i;
OnWhitespace
}
_ => Quoted,
},
QuoteEscaped => Quoted,
Dquoted => match c {
'\\' => {
if cfg!(unix) {
escaped.push_str(&input[unescaped_start..i]);
unescaped_start = i + 1;
DquoteEscaped
} else {
Dquoted
}
}
'"' => {
end = i;
OnWhitespace
}
_ => Dquoted,
},
DquoteEscaped => Dquoted,
};
let c_len = c.len_utf8();
if i == input.len() - c_len && end == 0 {
end = i + c_len;
}
if end > 0 {
let esc_trim = escaped.trim();
let inp = &input[unescaped_start..end];
if !(esc_trim.is_empty() && inp.trim().is_empty()) {
if esc_trim.is_empty() {
words.push(inp.into());
parts.push(inp);
} else {
words.push([escaped, inp.into()].concat().into());
parts.push(&input[part_start..end]);
escaped = "".to_string();
state = State::Escaped;
is_escaped = true;
}
_ => {
if is_escaped {
unescaped.push(ch);
}
}
unescaped_start = i + 1;
part_start = i + 1;
end = 0;
},
State::Escaped => {
match ch {
'n' => unescaped.push('\n'),
't' => unescaped.push('\t'),
'u' => {
state = State::Unicode;
continue;
}
// Uncomment if you want to handle '\\' to '\'
// '\\' => unescaped.push('\\'),
_ => {
unescaped.push('\\');
unescaped.push(ch);
}
}
state = State::Normal;
}
}
debug_assert!(words.len() == parts.len());
Self {
state,
words,
parts,
State::Unicode => match ch {
'{' => continue,
'}' => {
let Ok(digit) = u32::from_str_radix(&unicode, 16) else {
return input.into();
};
let Some(point) = char::from_u32(digit) else {
return input.into();
};
unescaped.push(point);
// Might be more unicode to unescape so clear for reuse.
unicode.clear();
state = State::Normal;
}
_ => unicode.push(ch),
},
}
}
}
impl<'a> Shellwords<'a> {
/// Checks that the input ends with a whitespace character which is not escaped.
///
/// # Examples
///
/// ```rust
/// use helix_core::shellwords::Shellwords;
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
/// #[cfg(unix)]
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
/// ```
pub fn ends_with_whitespace(&self) -> bool {
matches!(self.state, State::OnWhitespace)
}
/// Returns the list of shellwords calculated from the input string.
pub fn words(&self) -> &[Cow<'a, str>] {
&self.words
}
/// Returns a list of strings which correspond to [`Self::words`] but represent the original
/// text in the input string - including escape characters - without separating whitespace.
pub fn parts(&self) -> &[&'a str] {
&self.parts
if is_escaped {
unescaped.into()
} else {
input.into()
}
}
@ -201,114 +479,191 @@ mod test {
use super::*;
#[test]
#[cfg(windows)]
fn test_normal() {
fn base() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
let shellwords = Shellwords::from(input);
let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
Cow::from("twó"),
Cow::from("wörds"),
Cow::from("\\three\\"),
Cow::from("\\"),
Cow::from("with\\ escaping\\\\"),
let args = vec![
"single_word",
"twó",
"wörds",
r"\three\",
r#"\"with\"#,
r"escaping\\",
];
// TODO test is_owned and is_borrowed, once they get stabilized.
assert_eq!(expected, result);
assert_eq!(":o", shellwords.command());
assert_eq!(args, shellwords.args().collect::<Vec<_>>());
}
#[test]
#[cfg(unix)]
fn test_normal() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
let shellwords = Shellwords::from(input);
let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
Cow::from("twó"),
Cow::from("wörds"),
Cow::from(r#"three "with escaping\"#),
];
// TODO test is_owned and is_borrowed, once they get stabilized.
assert_eq!(expected, result);
fn should_have_empty_args() {
let shellwords = Shellwords::from(":quit");
assert!(
shellwords.args().is_empty(),
"args: `{}`",
shellwords.args().next().unwrap()
);
assert!(shellwords.args().next().is_none());
}
#[test]
#[cfg(unix)]
fn test_quoted() {
fn should_return_empty_command() {
let shellwords = Shellwords::from(" ");
assert!(shellwords.command().is_empty());
}
#[test]
fn should_support_unicode_args() {
assert_eq!(
Shellwords::from(":sh echo 𒀀").args().collect::<Vec<_>>(),
&["echo", "𒀀"]
);
assert_eq!(
Shellwords::from(":sh echo 𒀀 hello world𒀀")
.args()
.collect::<Vec<_>>(),
&["echo", "𒀀", "hello", "world𒀀"]
);
}
#[test]
fn should_preserve_quote_if_last_argument() {
let sh = Shellwords::from(r#":read "file with space.txt"""#);
let mut args = sh.args();
assert_eq!("file with space.txt", args.next().unwrap());
assert_eq!(r#"""#, args.next().unwrap());
}
#[test]
fn should_return_rest_of_non_closed_quote_as_one_argument() {
let sh = Shellwords::from(r":rename 'should be one \'argument");
assert_eq!(r"should be one \'argument", sh.args().next().unwrap());
}
#[test]
fn should_respect_escaped_quote_in_what_looks_like_non_closed_arg() {
let sh = Shellwords::from(r":rename 'should be one \\'argument");
let mut args = sh.args();
assert_eq!(r"should be one \\", args.next().unwrap());
assert_eq!(r"argument", args.next().unwrap());
}
#[test]
fn should_split_args() {
assert_eq!(Shellwords::from(":o a").args().collect::<Vec<_>>(), &["a"]);
assert_eq!(
Shellwords::from(":o a\\ ").args().collect::<Vec<_>>(),
&["a\\"]
);
}
#[test]
fn should_parse_args_even_with_leading_whitespace() {
// Three spaces
assert_eq!(
Shellwords::from(":o a").args().collect::<Vec<_>>(),
&["a"]
);
}
#[test]
fn should_parse_single_quotes_while_respecting_escapes() {
let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
let shellwords = Shellwords::from(quoted);
let result = shellwords.words().to_vec();
let result = shellwords.args().collect::<Vec<_>>();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
Cow::from("twó wörds"),
Cow::from(r#"three' "with escaping\"#),
Cow::from("quote incomplete"),
"single_word",
"twó wörds",
"",
" ",
r#"\three\' \"with\ escaping\\"#,
"quote incomplete",
];
assert_eq!(expected, result);
}
#[test]
#[cfg(unix)]
fn test_dquoted() {
fn should_parse_double_quotes_while_respecting_escapes() {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;
let shellwords = Shellwords::from(dquoted);
let result = shellwords.words().to_vec();
let result = shellwords.args().collect::<Vec<_>>();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
Cow::from("twó wörds"),
Cow::from(r#"three' "with escaping\"#),
Cow::from("dquote incomplete"),
"single_word",
"twó wörds",
"",
" ",
r#"\three\' \"with\ escaping\\"#,
"dquote incomplete",
];
assert_eq!(expected, result);
}
#[test]
#[cfg(unix)]
fn test_mixed() {
fn should_respect_escapes_with_mixed_quotes() {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
let shellwords = Shellwords::from(dquoted);
let result = shellwords.words().to_vec();
let result = shellwords.args().collect::<Vec<_>>();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
Cow::from("twó wörds"),
Cow::from("three' \"with escaping\\"),
Cow::from("no space before"),
Cow::from("and after"),
Cow::from("$#%^@"),
Cow::from("%^&(%^"),
Cow::from(")(*&^%"),
Cow::from(r#"a\\b"#),
//last ' just changes to quoted but since we dont have anything after it, it should be ignored
"single_word",
"twó wörds",
r#"\three\' \"with\ escaping\\"#,
"no space before",
"and after",
"$#%^@",
"%^&(%^",
r")(*&^%",
r"a\\\\\b",
// Last ' is important, as if the user input an accidental quote at the end, this should be checked in
// commands where there should only be one input and return an error rather than silently succeed.
"'",
];
assert_eq!(expected, result);
}
#[test]
fn test_lists() {
let input =
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#;
fn should_return_rest() {
let input = r#":set statusline.center ["file-type","file-encoding"]"#;
let shellwords = Shellwords::from(input);
let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":set"),
Cow::from("statusline.center"),
Cow::from(r#"["file-type","file-encoding"]"#),
Cow::from(r#"["list", "in", "quotes"]"#),
];
assert_eq!(expected, result);
let mut args = shellwords.args();
assert_eq!(":set", shellwords.command());
assert_eq!(Some("statusline.center"), args.next());
assert_eq!(r#"["file-type","file-encoding"]"#, args.rest());
}
#[test]
fn should_return_no_args() {
let mut args = Args::parse("");
assert!(args.next().is_none());
}
#[test]
fn should_leave_escaped_quotes() {
let input = r#"\" \` \' \"with \'with \`with"#;
let result = Args::parse(input).collect::<Vec<_>>();
assert_eq!(r#"\""#, result[0]);
assert_eq!(r"\`", result[1]);
assert_eq!(r"\'", result[2]);
assert_eq!(r#"\"with"#, result[3]);
assert_eq!(r"\'with", result[4]);
assert_eq!(r"\`with", result[5]);
}
#[test]
fn should_leave_literal_newline_alone() {
let result = Args::parse(r"\n").collect::<Vec<_>>();
assert_eq!(r"\n", result[0]);
}
#[test]
fn should_leave_literal_unicode_alone() {
let result = Args::parse(r"\u{C}").collect::<Vec<_>>();
assert_eq!(r"\u{C}", result[0]);
}
#[test]
#[cfg(unix)]
fn test_escaping_unix() {
fn should_escape_unix() {
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar"));
assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar"));
@ -316,35 +671,79 @@ fn test_escaping_unix() {
#[test]
#[cfg(windows)]
fn test_escaping_windows() {
fn should_escape_windows() {
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
}
#[test]
#[cfg(unix)]
fn test_parts() {
assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
fn should_unescape_newline() {
let unescaped = unescape("hello\\nworld");
assert_eq!("hello\nworld", unescaped);
}
#[test]
#[cfg(windows)]
fn test_parts() {
assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
fn should_unescape_tab() {
let unescaped = unescape("hello\\tworld");
assert_eq!("hello\tworld", unescaped);
}
#[test]
fn test_multibyte_at_end() {
assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);
assert_eq!(
Shellwords::from(":sh echo 𒀀").parts(),
&[":sh", "echo", "𒀀"]
);
assert_eq!(
Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),
&[":sh", "echo", "𒀀", "hello", "world𒀀"]
);
fn should_unescape_unicode() {
let unescaped = unescape("hello\\u{1f929}world");
assert_eq!("hello\u{1f929}world", unescaped, "char: 🤩 ");
assert_eq!("hello🤩world", unescaped);
}
#[test]
fn should_return_original_input_due_to_bad_unicode() {
let unescaped = unescape("hello\\u{999999999}world");
assert_eq!("hello\\u{999999999}world", unescaped);
}
#[test]
fn should_not_unescape_slash() {
let unescaped = unescape(r"hello\\world");
assert_eq!(r"hello\\world", unescaped);
let unescaped = unescape(r"hello\\\\world");
assert_eq!(r"hello\\\\world", unescaped);
}
#[test]
fn should_not_unescape_slash_single_quote() {
let unescaped = unescape("\\'");
assert_eq!(r"\'", unescaped);
}
#[test]
fn should_not_unescape_slash_double_quote() {
let unescaped = unescape("\\\"");
assert_eq!(r#"\""#, unescaped);
}
#[test]
fn should_not_change_anything() {
let unescaped = unescape("'");
assert_eq!("'", unescaped);
let unescaped = unescape(r#"""#);
assert_eq!(r#"""#, unescaped);
}
#[test]
fn should_only_unescape_newline_not_slash_single_quote() {
let unescaped = unescape("\\n\'");
assert_eq!("\n'", unescaped);
let unescaped = unescape("\\n\\'");
assert_eq!("\n\\'", unescaped);
}
#[test]
fn should_unescape_args() {
// 1f929: 🤩
let args = Args::parse(r#"'hello\u{1f929} world' '["hello", "\u{1f929}", "world"]'"#)
.collect::<Vec<_>>();
assert_eq!("hello\u{1f929} world", unescape(args[0]));
assert_eq!(r#"["hello", "🤩", "world"]"#, unescape(args[1]));
}
}

View File

@ -30,7 +30,9 @@
object, pos_at_coords,
regex::{self, Regex},
search::{self, CharMatcher},
selection, shellwords, surround,
selection,
shellwords::{self, Args},
surround,
syntax::{BlockCommentToken, LanguageServerFeature},
text_annotations::{Overlay, TextAnnotations},
textobject,
@ -190,7 +192,7 @@ fn make_job_callback<T, F>(
pub enum MappableCommand {
Typable {
name: String,
args: Vec<String>,
args: String,
doc: String,
},
Static {
@ -225,15 +227,17 @@ impl MappableCommand {
pub fn execute(&self, cx: &mut Context) {
match &self {
Self::Typable { name, args, doc: _ } => {
let args: Vec<Cow<str>> = args.iter().map(Cow::from).collect();
if let Some(command) = typed::TYPABLE_COMMAND_MAP.get(name.as_str()) {
let mut cx = compositor::Context {
editor: cx.editor,
jobs: cx.jobs,
scroll: None,
};
if let Err(e) = (command.fun)(&mut cx, &args[..], PromptEvent::Validate) {
cx.editor.set_error(format!("{}", e));
if let Err(err) =
(command.fun)(&mut cx, Args::from(args), PromptEvent::Validate)
{
cx.editor.set_error(format!("{err}"));
}
}
}
@ -601,21 +605,15 @@ impl std::str::FromStr for MappableCommand {
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Some(suffix) = s.strip_prefix(':') {
let mut typable_command = suffix.split(' ').map(|arg| arg.trim());
let name = typable_command
.next()
.ok_or_else(|| anyhow!("Expected typable command name"))?;
let args = typable_command
.map(|s| s.to_owned())
.collect::<Vec<String>>();
let (name, args) = suffix.split_once(' ').unwrap_or((suffix, ""));
typed::TYPABLE_COMMAND_MAP
.get(name)
.map(|cmd| MappableCommand::Typable {
name: cmd.name.to_owned(),
doc: format!(":{} {:?}", cmd.name, args),
args,
args: args.to_string(),
})
.ok_or_else(|| anyhow!("No TypableCommand named '{}'", s))
.ok_or_else(|| anyhow!("No TypableCommand named '{}'", name))
} else if let Some(suffix) = s.strip_prefix('@') {
helix_view::input::parse_macro(suffix).map(|keys| Self::Macro {
name: s.to_string(),
@ -3218,7 +3216,7 @@ pub fn command_palette(cx: &mut Context) {
.iter()
.map(|cmd| MappableCommand::Typable {
name: cmd.name.to_owned(),
args: Vec::new(),
args: String::new(),
doc: cmd.doc.to_owned(),
}),
);
@ -4271,7 +4269,7 @@ fn yank_joined_impl(editor: &mut Editor, separator: &str, register: char) {
.fragments(text)
.fold(String::new(), |mut acc, fragment| {
if !acc.is_empty() {
acc.push_str(separator);
acc.push_str(&helix_core::shellwords::unescape(separator));
}
acc.push_str(&fragment);
acc

View File

@ -109,6 +109,7 @@ fn dap_callback<T, F>(
jobs.callback(callback);
}
// TODO: transition to `shellwords::Args` instead of `Option<Vec<Cow>>>`
pub fn dap_start_impl(
cx: &mut compositor::Context,
name: Option<&str>,
@ -312,6 +313,7 @@ pub fn dap_restart(cx: &mut Context) {
);
}
// TODO: transition to `shellwords::Args` instead of `Vec<String>`
fn debug_parameter_prompt(
completions: Vec<DebugConfigCompletion>,
config_name: String,

File diff suppressed because it is too large Load Diff

View File

@ -597,18 +597,14 @@ fn escaped_keymap() {
let expectation = KeyTrie::Node(KeyTrieNode::new(
"",
hashmap! {
key => KeyTrie::Sequence(vec!{
key => KeyTrie::Sequence(vec![
MappableCommand::select_all,
MappableCommand::Typable {
name: "pipe".to_string(),
args: vec!{
"sed".to_string(),
"-E".to_string(),
"'s/\\s+$//g'".to_string()
},
doc: "".to_string(),
args: String::from("sed -E 's/\\s+$//g'"),
doc: String::new(),
},
})
])
},
vec![key],
));

View File

@ -3931,3 +3931,14 @@ indent = { tab-width = 4, unit = " " }
[[grammar]]
name = "spade"
source = { git = "https://gitlab.com/spade-lang/tree-sitter-spade/", rev = "4d5b141017c61fe7e168e0a5c5721ee62b0d9572" }
[[language]]
name = "amber"
scope = "source.ab"
file-types = ["ab"]
comment-token = "//"
indent = { tab-width = 4, unit = " " }
[[grammar]]
name = "amber"
source = { git = "https://github.com/amber-lang/tree-sitter-amber", rev = "c6df3ec2ec243ed76550c525e7ac3d9a10c6c814" }

View File

@ -0,0 +1,60 @@
(comment) @comment
[
"if"
"loop"
"for"
"return"
"fun"
"else"
"then"
"break"
"continue"
"and"
"or"
"not"
"let"
"pub"
"main"
"echo"
"exit"
"fun"
"import"
"from"
"as"
"in"
"fail"
"failed"
"silent"
"nameof"
"is"
"unsafe"
"trust"
] @keyword
; Literals
(boolean) @constant.builtin.boolean
(number) @constant.numeric
(null) @constant.numeric
(string) @string
(status) @keyword
(command) @string
(handler) @keyword
(block) @punctuation.delimiter
(variable_init) @keyword
(variable_assignment) @punctuation.delimiter
(variable) @variable
(escape_sequence) @constant.character.escape
(type_name_symbol) @type
(interpolation) @punctuation.delimiter
(reference) @keyword
(preprocessor_directive) @comment
(shebang) @comment
(function_definition
name: (variable) @function.method)
(function_call
name: (variable) @function.method)
(import_statement
"pub" @keyword
"import" @keyword
"from" @keyword)

View File

@ -12,6 +12,8 @@
(unicode_string_literal)
(yul_string_literal)
] @string
(hex_string_literal "hex" @string.special.symbol)
(unicode_string_literal "unicode" @string.special.symbol)
[
(number_literal)
(yul_decimal_number)
@ -20,6 +22,7 @@
[
(true)
(false)
(yul_boolean)
] @constant.builtin.boolean
(comment) @comment
@ -44,18 +47,18 @@
(type_name "(" @punctuation.bracket "=>" @punctuation.delimiter ")" @punctuation.bracket)
; Definitions
(struct_declaration
(struct_declaration
name: (identifier) @type)
(enum_declaration
(enum_declaration
name: (identifier) @type)
(contract_declaration
name: (identifier) @type)
name: (identifier) @type)
(library_declaration
name: (identifier) @type)
name: (identifier) @type)
(interface_declaration
name: (identifier) @type)
(event_definition
name: (identifier) @type)
(event_definition
name: (identifier) @type)
(function_definition
name: (identifier) @function)