Run textobject queries across injections

With this change textobjects work even within injection layers, so you
can use `]f` to jump to a function definition in a JavaScript `<script>`
tag within HTML for example.

This requires `Syntax::query_iter` - a utility function for running a
query from `HighlightConfiguration` across injection layers - which
comes from the rainbow brackets branch (merged into my driver).
We need to relocate the textobject query from the `LanguageConfiguration`
to the `HighlightConfiguration` in order to access it
per-injection-layer, like we do for the rainbow brackets query. With
that, the only necessary change is to port the contents of
`TextObjectQuery::capture_nodes_any` to a new function that uses
`query_iter` and update callers.

The callers end up being a bit cleaner: they only need to take `Syntax`
now and not `LanguageConfiguration` and the root layer's root
`tree_sitter::Node`.
This commit is contained in:
Michael Davis 2024-01-11 16:04:12 -05:00
parent 12e7d126b6
commit d3575dc0ff
No known key found for this signature in database
5 changed files with 98 additions and 169 deletions

View File

@ -1,7 +1,7 @@
use std::{cmp::Reverse, iter};
use ropey::iter::Chars;
use tree_sitter::{Node, QueryCursor};
use tree_sitter::Node;
use crate::{
char_idx_at_visual_offset,
@ -13,7 +13,6 @@
},
line_ending::rope_is_line_ending,
position::char_idx_at_visual_block_offset,
syntax::LanguageConfiguration,
text_annotations::TextAnnotations,
textobject::TextObject,
visual_offset_from_block, Range, RopeSlice, Selection, Syntax,
@ -500,29 +499,22 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo
/// Finds the range of the next or previous textobject in the syntax sub-tree of `node`.
/// Returns the range in the forwards direction.
pub fn goto_treesitter_object(
syntax: &Syntax,
slice: RopeSlice,
range: Range,
object_name: &str,
dir: Direction,
slice_tree: Node,
lang_config: &LanguageConfiguration,
count: usize,
) -> Range {
let get_range = move |range: Range| -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));
let cap_name = |t: TextObject| format!("{}.{}", object_name, t);
let mut cursor = QueryCursor::new();
let nodes = lang_config.textobject_query()?.capture_nodes_any(
&[
&cap_name(TextObject::Movement),
&cap_name(TextObject::Around),
&cap_name(TextObject::Inside),
],
slice_tree,
slice,
&mut cursor,
)?;
let movement = cap_name(TextObject::Movement);
let around = cap_name(TextObject::Around);
let inside = cap_name(TextObject::Inside);
let capture_names = &[movement.as_str(), around.as_str(), inside.as_str()];
let nodes = syntax.textobject_nodes(capture_names, slice, None);
let node = match dir {
Direction::Forward => nodes

View File

@ -134,8 +134,6 @@ pub struct LanguageConfiguration {
#[serde(skip)]
pub(crate) indent_query: OnceCell<Option<Query>>,
#[serde(skip)]
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub debugger: Option<DebugAdapterConfig>,
@ -534,11 +532,6 @@ fn from_str(s: &str) -> Result<Self, Self::Err> {
}
}
#[derive(Debug)]
pub struct TextObjectQuery {
pub query: Query,
}
#[derive(Debug)]
pub enum CapturedNode<'a> {
Single(Node<'a>),
@ -586,97 +579,36 @@ pub fn byte_range(&self) -> std::ops::Range<usize> {
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
const TREE_SITTER_MATCH_LIMIT: u32 = 256;
impl TextObjectQuery {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes<'a>(
&'a self,
capture_name: &str,
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
self.capture_nodes_any(&[capture_name], node, slice, cursor)
}
/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any<'a>(
&'a self,
capture_names: &[&str],
node: Node<'a>,
slice: RopeSlice<'a>,
cursor: &'a mut QueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
let capture_idx = capture_names
.iter()
.find_map(|cap| self.query.capture_index_for_name(cap))?;
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
let nodes = cursor
.captures(&self.query, node, RopeProvider(slice))
.filter_map(move |(mat, _)| {
let nodes: Vec<_> = mat
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();
if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
});
Some(nodes)
}
}
pub fn read_query(language: &str, filename: &str) -> String {
pub fn read_query(language: &str, filename: &str) -> Option<String> {
static INHERITS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
let query = load_runtime_file(language, filename).unwrap_or_default();
let query = load_runtime_file(language, filename).ok()?;
// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
INHERITS_REGEX
let contents = INHERITS_REGEX
.replace_all(&query, |captures: &regex::Captures| {
captures[1]
.split(',')
.map(|language| format!("\n{}\n", read_query(language, filename)))
.filter_map(|language| Some(format!("\n{}\n", read_query(language, filename)?)))
.collect::<String>()
})
.to_string()
.to_string();
Some(contents)
}
impl LanguageConfiguration {
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
let highlights_query = read_query(&self.language_id, "highlights.scm");
let highlights_query = read_query(&self.language_id, "highlights.scm")?;
// always highlight syntax errors
// highlights_query += "\n(ERROR) @error";
let textobjects_query = read_query(&self.language_id, "textobjects.scm");
let injections_query = read_query(&self.language_id, "injections.scm");
let locals_query = read_query(&self.language_id, "locals.scm");
if highlights_query.is_empty() {
None
} else {
let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
.map_err(|err| {
log::error!(
@ -689,8 +621,9 @@ fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfigu
let config = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
textobjects_query.as_deref(),
&injections_query.unwrap_or_default(),
&locals_query.unwrap_or_default(),
)
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
.ok()?;
@ -698,7 +631,6 @@ fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfigu
config.configure(scopes);
Some(Arc::new(config))
}
}
pub fn reconfigure(&self, scopes: &[String]) {
if let Some(Some(config)) = self.highlight_config.get() {
@ -722,24 +654,12 @@ pub fn indent_query(&self) -> Option<&Query> {
.as_ref()
}
pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
self.textobject_query
.get_or_init(|| {
self.load_query("textobjects.scm")
.map(|query| TextObjectQuery { query })
})
.as_ref()
}
pub fn scope(&self) -> &str {
&self.scope
}
fn load_query(&self, kind: &str) -> Option<Query> {
let query_text = read_query(&self.language_id, kind);
if query_text.is_empty() {
return None;
}
let query_text = read_query(&self.language_id, kind)?;
let lang = self.highlight_config.get()?.as_ref()?.language;
Query::new(lang, &query_text)
.map_err(|e| {
@ -1457,6 +1377,42 @@ pub fn highlight_iter<'a>(
}
}
pub fn textobject_nodes<'a>(
&'a self,
capture_names: &'a [&str],
source: RopeSlice<'a>,
query_range: Option<std::ops::Range<usize>>,
) -> impl Iterator<Item = CapturedNode<'a>> {
self.query_iter(
|config| config.textobjects_query.as_ref(),
source,
query_range,
)
.filter_map(move |(layer, match_, _)| {
// TODO: cache this per-language with a hashmap?
let capture_idx = capture_names.iter().find_map(|name| {
layer
.config
.textobjects_query
.as_ref()
.expect("layer must have textobjects query in order to match")
.capture_index_for_name(name)
})?;
let nodes: Vec<_> = match_
.captures
.iter()
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
.collect();
if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
})
}
pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
let mut container_id = self.root;
@ -1748,7 +1704,8 @@ pub enum HighlightEvent {
#[derive(Debug)]
pub struct HighlightConfiguration {
pub language: Grammar,
pub query: Query,
query: Query,
textobjects_query: Option<Query>,
injections_query: Query,
combined_injections_patterns: Vec<usize>,
highlights_pattern_index: usize,
@ -1846,6 +1803,7 @@ impl HighlightConfiguration {
pub fn new(
language: Grammar,
highlights_query: &str,
textobjects_query: Option<&str>,
injection_query: &str,
locals_query: &str,
) -> Result<Self, QueryError> {
@ -1865,6 +1823,9 @@ pub fn new(
highlights_pattern_index += 1;
}
}
let textobjects_query = textobjects_query
.map(|source| Query::new(language, source))
.transpose()?;
let injections_query = Query::new(language, injection_query)?;
let combined_injections_patterns = (0..injections_query.pattern_count())
@ -1922,6 +1883,7 @@ pub fn new(
Ok(Self {
language,
query,
textobjects_query,
injections_query,
combined_injections_patterns,
highlights_pattern_index,
@ -2809,11 +2771,7 @@ fn test_textobject_queries() {
.unwrap();
let language = get_language("rust").unwrap();
let query = Query::new(language, query_str).unwrap();
let textobject = TextObjectQuery { query };
let mut cursor = QueryCursor::new();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let config = HighlightConfiguration::new(language, "", Some(query_str), "", "").unwrap();
let syntax = Syntax::new(
source.slice(..),
Arc::new(config),
@ -2821,11 +2779,10 @@ fn test_textobject_queries() {
)
.unwrap();
let root = syntax.tree().root_node();
let mut test = |capture, range| {
let matches: Vec<_> = textobject
.capture_nodes(capture, root, source.slice(..), &mut cursor)
.unwrap()
let test = |capture, range| {
let capture_names = &[capture];
let matches: Vec<_> = syntax
.textobject_nodes(capture_names, source.slice(..), None)
.collect();
assert_eq!(
@ -2881,6 +2838,7 @@ fn test_parser() {
language,
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
.unwrap(),
None, // textobjects.scm
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
.unwrap(),
"", // locals.scm
@ -2989,7 +2947,7 @@ fn assert_pretty_print(
.unwrap();
let language = get_language(language_name).unwrap();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
let config = HighlightConfiguration::new(language, "", None, "", "").unwrap();
let syntax = Syntax::new(
source.slice(..),
Arc::new(config),

View File

@ -1,14 +1,12 @@
use std::fmt::Display;
use ropey::RopeSlice;
use tree_sitter::{Node, QueryCursor};
use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction;
use crate::surround;
use crate::syntax::LanguageConfiguration;
use crate::Range;
fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize {
@ -254,22 +252,20 @@ fn textobject_pair_surround_impl(
/// `object_name` is a query capture base name like "function", "class", etc.
/// `slice_tree` is the tree-sitter node corresponding to given text slice.
pub fn textobject_treesitter(
syntax: &crate::Syntax,
slice: RopeSlice,
range: Range,
textobject: TextObject,
object_name: &str,
slice_tree: Node,
lang_config: &LanguageConfiguration,
_count: usize,
) -> Range {
let get_range = move || -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));
let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner
let mut cursor = QueryCursor::new();
let node = lang_config
.textobject_query()?
.capture_nodes(&capture_name, slice_tree, slice, &mut cursor)?
let capture_names = &[capture_name.as_str()];
let node = syntax
.textobject_nodes(capture_names, slice, None)
.filter(|node| node.byte_range().contains(&byte_pos))
.min_by_key(|node| node.byte_range().len())?;

View File

@ -4905,20 +4905,12 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct
let count = cx.count();
let motion = move |editor: &mut Editor| {
let (view, doc) = current!(editor);
if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) {
if let Some(syntax) = doc.syntax() {
let text = doc.text().slice(..);
let root = syntax.tree().root_node();
let selection = doc.selection(view.id).clone().transform(|range| {
let new_range = movement::goto_treesitter_object(
text,
range,
object,
direction,
root,
lang_config,
count,
);
let new_range =
movement::goto_treesitter_object(syntax, text, range, object, direction, count);
if editor.mode == Mode::Select {
let head = if new_range.head < range.anchor {
@ -5000,19 +4992,10 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
let text = doc.text().slice(..);
let textobject_treesitter = |obj_name: &str, range: Range| -> Range {
let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) {
Some(t) => t,
None => return range,
let Some(syntax) = doc.syntax() else {
return range;
};
textobject::textobject_treesitter(
text,
range,
objtype,
obj_name,
syntax.tree().root_node(),
lang_config,
count,
)
textobject::textobject_treesitter(syntax, text, range, objtype, obj_name, count)
};
if ch == 'g' && doc.diff_handle().is_none() {

View File

@ -18,7 +18,7 @@ pub fn query_check() -> Result<(), DynError> {
let grammar_name = language.grammar.as_ref().unwrap_or(language_name);
for query_file in query_files {
let language = get_language(grammar_name);
let query_text = read_query(language_name, query_file);
let Some(query_text) = read_query(language_name, query_file) else { continue };
if let Ok(lang) = language {
if !query_text.is_empty() {
if let Err(reason) = Query::new(lang, &query_text) {