diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 54eb02fd0..b1c76b758 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -1,7 +1,7 @@ use std::{cmp::Reverse, iter}; use ropey::iter::Chars; -use tree_sitter::{Node, QueryCursor}; +use tree_sitter::Node; use crate::{ char_idx_at_visual_offset, @@ -13,7 +13,6 @@ }, line_ending::rope_is_line_ending, position::char_idx_at_visual_block_offset, - syntax::LanguageConfiguration, text_annotations::TextAnnotations, textobject::TextObject, visual_offset_from_block, Range, RopeSlice, Selection, Syntax, @@ -500,29 +499,22 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo /// Finds the range of the next or previous textobject in the syntax sub-tree of `node`. /// Returns the range in the forwards direction. pub fn goto_treesitter_object( + syntax: &Syntax, slice: RopeSlice, range: Range, object_name: &str, dir: Direction, - slice_tree: Node, - lang_config: &LanguageConfiguration, count: usize, ) -> Range { let get_range = move |range: Range| -> Option { let byte_pos = slice.char_to_byte(range.cursor(slice)); let cap_name = |t: TextObject| format!("{}.{}", object_name, t); - let mut cursor = QueryCursor::new(); - let nodes = lang_config.textobject_query()?.capture_nodes_any( - &[ - &cap_name(TextObject::Movement), - &cap_name(TextObject::Around), - &cap_name(TextObject::Inside), - ], - slice_tree, - slice, - &mut cursor, - )?; + let movement = cap_name(TextObject::Movement); + let around = cap_name(TextObject::Around); + let inside = cap_name(TextObject::Inside); + let capture_names = &[movement.as_str(), around.as_str(), inside.as_str()]; + let nodes = syntax.textobject_nodes(capture_names, slice, None); let node = match dir { Direction::Forward => nodes diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index a606f4c2e..6bd039f46 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -134,8 +134,6 @@ pub struct LanguageConfiguration { #[serde(skip)] pub(crate) indent_query: OnceCell>, - #[serde(skip)] - pub(crate) textobject_query: OnceCell>, #[serde(skip_serializing_if = "Option::is_none")] pub debugger: Option, @@ -534,11 +532,6 @@ fn from_str(s: &str) -> Result { } } -#[derive(Debug)] -pub struct TextObjectQuery { - pub query: Query, -} - #[derive(Debug)] pub enum CapturedNode<'a> { Single(Node<'a>), @@ -586,118 +579,57 @@ pub fn byte_range(&self) -> std::ops::Range { /// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. const TREE_SITTER_MATCH_LIMIT: u32 = 256; -impl TextObjectQuery { - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - self.capture_nodes_any(&[capture_name], node, slice, cursor) - } - - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - let capture_idx = capture_names - .iter() - .find_map(|cap| self.query.capture_index_for_name(cap))?; - - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let nodes = cursor - .captures(&self.query, node, RopeProvider(slice)) - .filter_map(move |(mat, _)| { - let nodes: Vec<_> = mat - .captures - .iter() - .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) - .collect(); - - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); - - Some(nodes) - } -} - -pub fn read_query(language: &str, filename: &str) -> String { +pub fn read_query(language: &str, filename: &str) -> Option { static INHERITS_REGEX: Lazy = Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); - let query = load_runtime_file(language, filename).unwrap_or_default(); + let query = load_runtime_file(language, filename).ok()?; // replaces all "; inherits (,)*" with the queries of the given language(s) - INHERITS_REGEX + let contents = INHERITS_REGEX .replace_all(&query, |captures: ®ex::Captures| { captures[1] .split(',') - .map(|language| format!("\n{}\n", read_query(language, filename))) + .filter_map(|language| Some(format!("\n{}\n", read_query(language, filename)?))) .collect::() }) - .to_string() + .to_string(); + + Some(contents) } impl LanguageConfiguration { fn initialize_highlight(&self, scopes: &[String]) -> Option> { - let highlights_query = read_query(&self.language_id, "highlights.scm"); + let highlights_query = read_query(&self.language_id, "highlights.scm")?; // always highlight syntax errors // highlights_query += "\n(ERROR) @error"; + let textobjects_query = read_query(&self.language_id, "textobjects.scm"); + let injections_query = read_query(&self.language_id, "injections.scm"); let locals_query = read_query(&self.language_id, "locals.scm"); - if highlights_query.is_empty() { - None - } else { - let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) - .map_err(|err| { - log::error!( - "Failed to load tree-sitter parser for language {:?}: {}", - self.language_id, - err - ) - }) - .ok()?; - let config = HighlightConfiguration::new( - language, - &highlights_query, - &injections_query, - &locals_query, - ) - .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) + let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) + .map_err(|err| { + log::error!( + "Failed to load tree-sitter parser for language {:?}: {}", + self.language_id, + err + ) + }) .ok()?; + let config = HighlightConfiguration::new( + language, + &highlights_query, + textobjects_query.as_deref(), + &injections_query.unwrap_or_default(), + &locals_query.unwrap_or_default(), + ) + .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) + .ok()?; - config.configure(scopes); - Some(Arc::new(config)) - } + config.configure(scopes); + Some(Arc::new(config)) } pub fn reconfigure(&self, scopes: &[String]) { @@ -722,24 +654,12 @@ pub fn indent_query(&self) -> Option<&Query> { .as_ref() } - pub fn textobject_query(&self) -> Option<&TextObjectQuery> { - self.textobject_query - .get_or_init(|| { - self.load_query("textobjects.scm") - .map(|query| TextObjectQuery { query }) - }) - .as_ref() - } - pub fn scope(&self) -> &str { &self.scope } fn load_query(&self, kind: &str) -> Option { - let query_text = read_query(&self.language_id, kind); - if query_text.is_empty() { - return None; - } + let query_text = read_query(&self.language_id, kind)?; let lang = self.highlight_config.get()?.as_ref()?.language; Query::new(lang, &query_text) .map_err(|e| { @@ -1457,6 +1377,42 @@ pub fn highlight_iter<'a>( } } + pub fn textobject_nodes<'a>( + &'a self, + capture_names: &'a [&str], + source: RopeSlice<'a>, + query_range: Option>, + ) -> impl Iterator> { + self.query_iter( + |config| config.textobjects_query.as_ref(), + source, + query_range, + ) + .filter_map(move |(layer, match_, _)| { + // TODO: cache this per-language with a hashmap? + let capture_idx = capture_names.iter().find_map(|name| { + layer + .config + .textobjects_query + .as_ref() + .expect("layer must have textobjects query in order to match") + .capture_index_for_name(name) + })?; + + let nodes: Vec<_> = match_ + .captures + .iter() + .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) + .collect(); + + if nodes.len() > 1 { + Some(CapturedNode::Grouped(nodes)) + } else { + nodes.into_iter().map(CapturedNode::Single).next() + } + }) + } + pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { let mut container_id = self.root; @@ -1748,7 +1704,8 @@ pub enum HighlightEvent { #[derive(Debug)] pub struct HighlightConfiguration { pub language: Grammar, - pub query: Query, + query: Query, + textobjects_query: Option, injections_query: Query, combined_injections_patterns: Vec, highlights_pattern_index: usize, @@ -1846,6 +1803,7 @@ impl HighlightConfiguration { pub fn new( language: Grammar, highlights_query: &str, + textobjects_query: Option<&str>, injection_query: &str, locals_query: &str, ) -> Result { @@ -1865,6 +1823,9 @@ pub fn new( highlights_pattern_index += 1; } } + let textobjects_query = textobjects_query + .map(|source| Query::new(language, source)) + .transpose()?; let injections_query = Query::new(language, injection_query)?; let combined_injections_patterns = (0..injections_query.pattern_count()) @@ -1922,6 +1883,7 @@ pub fn new( Ok(Self { language, query, + textobjects_query, injections_query, combined_injections_patterns, highlights_pattern_index, @@ -2809,11 +2771,7 @@ fn test_textobject_queries() { .unwrap(); let language = get_language("rust").unwrap(); - let query = Query::new(language, query_str).unwrap(); - let textobject = TextObjectQuery { query }; - let mut cursor = QueryCursor::new(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); + let config = HighlightConfiguration::new(language, "", Some(query_str), "", "").unwrap(); let syntax = Syntax::new( source.slice(..), Arc::new(config), @@ -2821,11 +2779,10 @@ fn test_textobject_queries() { ) .unwrap(); - let root = syntax.tree().root_node(); - let mut test = |capture, range| { - let matches: Vec<_> = textobject - .capture_nodes(capture, root, source.slice(..), &mut cursor) - .unwrap() + let test = |capture, range| { + let capture_names = &[capture]; + let matches: Vec<_> = syntax + .textobject_nodes(capture_names, source.slice(..), None) .collect(); assert_eq!( @@ -2881,6 +2838,7 @@ fn test_parser() { language, &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") .unwrap(), + None, // textobjects.scm &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm") .unwrap(), "", // locals.scm @@ -2989,7 +2947,7 @@ fn assert_pretty_print( .unwrap(); let language = get_language(language_name).unwrap(); - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); + let config = HighlightConfiguration::new(language, "", None, "", "").unwrap(); let syntax = Syntax::new( source.slice(..), Arc::new(config), diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index bf00a4580..86f423eb9 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -1,14 +1,12 @@ use std::fmt::Display; use ropey::RopeSlice; -use tree_sitter::{Node, QueryCursor}; use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::line_ending::rope_is_line_ending; use crate::movement::Direction; use crate::surround; -use crate::syntax::LanguageConfiguration; use crate::Range; fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize { @@ -254,22 +252,20 @@ fn textobject_pair_surround_impl( /// `object_name` is a query capture base name like "function", "class", etc. /// `slice_tree` is the tree-sitter node corresponding to given text slice. pub fn textobject_treesitter( + syntax: &crate::Syntax, slice: RopeSlice, range: Range, textobject: TextObject, object_name: &str, - slice_tree: Node, - lang_config: &LanguageConfiguration, _count: usize, ) -> Range { let get_range = move || -> Option { let byte_pos = slice.char_to_byte(range.cursor(slice)); let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner - let mut cursor = QueryCursor::new(); - let node = lang_config - .textobject_query()? - .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)? + let capture_names = &[capture_name.as_str()]; + let node = syntax + .textobject_nodes(capture_names, slice, None) .filter(|node| node.byte_range().contains(&byte_pos)) .min_by_key(|node| node.byte_range().len())?; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index fdad31a81..65688106b 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -4905,20 +4905,12 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct let count = cx.count(); let motion = move |editor: &mut Editor| { let (view, doc) = current!(editor); - if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) { + if let Some(syntax) = doc.syntax() { let text = doc.text().slice(..); - let root = syntax.tree().root_node(); let selection = doc.selection(view.id).clone().transform(|range| { - let new_range = movement::goto_treesitter_object( - text, - range, - object, - direction, - root, - lang_config, - count, - ); + let new_range = + movement::goto_treesitter_object(syntax, text, range, object, direction, count); if editor.mode == Mode::Select { let head = if new_range.head < range.anchor { @@ -5000,19 +4992,10 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { let text = doc.text().slice(..); let textobject_treesitter = |obj_name: &str, range: Range| -> Range { - let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) { - Some(t) => t, - None => return range, + let Some(syntax) = doc.syntax() else { + return range; }; - textobject::textobject_treesitter( - text, - range, - objtype, - obj_name, - syntax.tree().root_node(), - lang_config, - count, - ) + textobject::textobject_treesitter(syntax, text, range, objtype, obj_name, count) }; if ch == 'g' && doc.diff_handle().is_none() { diff --git a/xtask/src/querycheck.rs b/xtask/src/querycheck.rs index 454d0e5cd..c68c8a4f3 100644 --- a/xtask/src/querycheck.rs +++ b/xtask/src/querycheck.rs @@ -18,7 +18,7 @@ pub fn query_check() -> Result<(), DynError> { let grammar_name = language.grammar.as_ref().unwrap_or(language_name); for query_file in query_files { let language = get_language(grammar_name); - let query_text = read_query(language_name, query_file); + let Some(query_text) = read_query(language_name, query_file) else { continue }; if let Ok(lang) = language { if !query_text.is_empty() { if let Err(reason) = Query::new(lang, &query_text) {