mirror of
https://github.com/helix-editor/helix.git
synced 2024-11-22 01:16:18 +04:00
move syntax highlighting to separate crate
This commit is contained in:
parent
08ee8b9443
commit
c4b7b08809
18
Cargo.lock
generated
18
Cargo.lock
generated
@ -1311,6 +1311,7 @@ dependencies = [
|
||||
"hashbrown 0.14.5",
|
||||
"helix-loader",
|
||||
"helix-stdx",
|
||||
"helix-syntax",
|
||||
"imara-diff",
|
||||
"indoc",
|
||||
"log",
|
||||
@ -1425,6 +1426,23 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "helix-syntax"
|
||||
version = "24.7.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arc-swap",
|
||||
"bitflags 2.6.0",
|
||||
"hashbrown 0.14.5",
|
||||
"helix-stdx",
|
||||
"log",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"ropey",
|
||||
"slotmap",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "helix-term"
|
||||
version = "24.7.0"
|
||||
|
@ -12,6 +12,7 @@ members = [
|
||||
"helix-vcs",
|
||||
"helix-parsec",
|
||||
"helix-stdx",
|
||||
"helix-syntax",
|
||||
"xtask",
|
||||
]
|
||||
|
||||
|
@ -17,6 +17,7 @@ integration = []
|
||||
|
||||
[dependencies]
|
||||
helix-stdx = { path = "../helix-stdx" }
|
||||
helix-syntax = { path = "../helix-syntax" }
|
||||
helix-loader = { path = "../helix-loader" }
|
||||
|
||||
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
|
||||
|
@ -738,18 +738,9 @@ fn init_indent_query<'a, 'b>(
|
||||
.map(|prec| prec.byte_range().end - 1..byte_pos + 1)
|
||||
.unwrap_or(byte_pos..byte_pos + 1);
|
||||
|
||||
crate::syntax::PARSER.with(|ts_parser| {
|
||||
let mut ts_parser = ts_parser.borrow_mut();
|
||||
let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
|
||||
let query_result = query_indents(
|
||||
query,
|
||||
syntax,
|
||||
&mut cursor,
|
||||
text,
|
||||
query_range,
|
||||
new_line_byte_pos,
|
||||
);
|
||||
ts_parser.cursors.push(cursor);
|
||||
crate::syntax::with_cursor(|cursor| {
|
||||
let query_result =
|
||||
query_indents(query, syntax, cursor, text, query_range, new_line_byte_pos);
|
||||
(query_result, deepest_preceding)
|
||||
})
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,3 @@
|
||||
use arc_swap::ArcSwap;
|
||||
use helix_core::{
|
||||
indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle},
|
||||
syntax::{Configuration, Loader},
|
||||
@ -6,7 +5,7 @@
|
||||
};
|
||||
use helix_stdx::rope::RopeSliceExt;
|
||||
use ropey::Rope;
|
||||
use std::{ops::Range, path::PathBuf, process::Command, sync::Arc};
|
||||
use std::{ops::Range, path::PathBuf, process::Command};
|
||||
|
||||
#[test]
|
||||
fn test_treesitter_indent_rust() {
|
||||
@ -200,12 +199,7 @@ fn test_treesitter_indent(
|
||||
let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit);
|
||||
let highlight_config = language_config.highlight_config(&[]).unwrap();
|
||||
let text = doc.slice(..);
|
||||
let syntax = Syntax::new(
|
||||
text,
|
||||
highlight_config,
|
||||
Arc::new(ArcSwap::from_pointee(loader)),
|
||||
)
|
||||
.unwrap();
|
||||
let syntax = Syntax::new(text, highlight_config, |_| None).unwrap();
|
||||
let indent_query = language_config.indent_query().unwrap();
|
||||
|
||||
for i in 0..doc.len_lines() {
|
||||
|
28
helix-syntax/Cargo.toml
Normal file
28
helix-syntax/Cargo.toml
Normal file
@ -0,0 +1,28 @@
|
||||
[package]
|
||||
name = "helix-syntax"
|
||||
description = "Helix syntax highlighting "
|
||||
include = ["src/**/*", "README.md"]
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
categories.workspace = true
|
||||
repository.workspace = true
|
||||
homepage.workspace = true
|
||||
|
||||
[features]
|
||||
|
||||
[dependencies]
|
||||
helix-stdx = { path = "../helix-stdx" }
|
||||
|
||||
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
|
||||
slotmap = "1.0"
|
||||
tree-sitter.workspace = true
|
||||
once_cell = "1.19"
|
||||
arc-swap = "1"
|
||||
regex = "1"
|
||||
bitflags = "2.4"
|
||||
ahash = "0.8.9"
|
||||
hashbrown = { version = "0.14.3", features = ["raw"] }
|
||||
log = "0.4"
|
331
helix-syntax/src/config.rs
Normal file
331
helix-syntax/src/config.rs
Normal file
@ -0,0 +1,331 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use helix_stdx::rope::{self, RopeSliceExt};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use ropey::RopeSlice;
|
||||
use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch};
|
||||
|
||||
use crate::highlighter::Highlight;
|
||||
use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG};
|
||||
|
||||
/// Contains the data needed to highlight code written in a particular language.
|
||||
///
|
||||
/// This struct is immutable and can be shared between threads.
|
||||
#[derive(Debug)]
|
||||
pub struct HighlightConfiguration {
|
||||
pub language: Grammar,
|
||||
pub query: Query,
|
||||
pub(crate) injections_query: Query,
|
||||
pub(crate) combined_injections_patterns: Vec<usize>,
|
||||
pub(crate) highlights_pattern_index: usize,
|
||||
pub(crate) highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
|
||||
pub(crate) non_local_variable_patterns: Vec<bool>,
|
||||
pub(crate) injection_content_capture_index: Option<u32>,
|
||||
pub(crate) injection_language_capture_index: Option<u32>,
|
||||
pub(crate) injection_filename_capture_index: Option<u32>,
|
||||
pub(crate) injection_shebang_capture_index: Option<u32>,
|
||||
pub(crate) local_scope_capture_index: Option<u32>,
|
||||
pub(crate) local_def_capture_index: Option<u32>,
|
||||
pub(crate) local_def_value_capture_index: Option<u32>,
|
||||
pub(crate) local_ref_capture_index: Option<u32>,
|
||||
}
|
||||
|
||||
impl HighlightConfiguration {
|
||||
/// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
|
||||
/// queries.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
|
||||
/// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
|
||||
/// should be non-empty, otherwise no syntax highlights will be added.
|
||||
/// * `injections_query` - A string containing tree patterns for injecting other languages
|
||||
/// into the document. This can be empty if no injections are desired.
|
||||
/// * `locals_query` - A string containing tree patterns for tracking local variable
|
||||
/// definitions and references. This can be empty if local variable tracking is not needed.
|
||||
///
|
||||
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
|
||||
pub fn new(
|
||||
language: Grammar,
|
||||
highlights_query: &str,
|
||||
injection_query: &str,
|
||||
locals_query: &str,
|
||||
) -> Result<Self, QueryError> {
|
||||
// Concatenate the query strings, keeping track of the start offset of each section.
|
||||
let mut query_source = String::new();
|
||||
query_source.push_str(locals_query);
|
||||
let highlights_query_offset = query_source.len();
|
||||
query_source.push_str(highlights_query);
|
||||
|
||||
// Construct a single query by concatenating the three query strings, but record the
|
||||
// range of pattern indices that belong to each individual string.
|
||||
let query = Query::new(&language, &query_source)?;
|
||||
let mut highlights_pattern_index = 0;
|
||||
for i in 0..(query.pattern_count()) {
|
||||
let pattern_offset = query.start_byte_for_pattern(i);
|
||||
if pattern_offset < highlights_query_offset {
|
||||
highlights_pattern_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let injections_query = Query::new(&language, injection_query)?;
|
||||
let combined_injections_patterns = (0..injections_query.pattern_count())
|
||||
.filter(|&i| {
|
||||
injections_query
|
||||
.property_settings(i)
|
||||
.iter()
|
||||
.any(|s| &*s.key == "injection.combined")
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Find all of the highlighting patterns that are disabled for nodes that
|
||||
// have been identified as local variables.
|
||||
let non_local_variable_patterns = (0..query.pattern_count())
|
||||
.map(|i| {
|
||||
query
|
||||
.property_predicates(i)
|
||||
.iter()
|
||||
.any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Store the numeric ids for all of the special captures.
|
||||
let mut injection_content_capture_index = None;
|
||||
let mut injection_language_capture_index = None;
|
||||
let mut injection_filename_capture_index = None;
|
||||
let mut injection_shebang_capture_index = None;
|
||||
let mut local_def_capture_index = None;
|
||||
let mut local_def_value_capture_index = None;
|
||||
let mut local_ref_capture_index = None;
|
||||
let mut local_scope_capture_index = None;
|
||||
for (i, name) in query.capture_names().iter().enumerate() {
|
||||
let i = Some(i as u32);
|
||||
match *name {
|
||||
"local.definition" => local_def_capture_index = i,
|
||||
"local.definition-value" => local_def_value_capture_index = i,
|
||||
"local.reference" => local_ref_capture_index = i,
|
||||
"local.scope" => local_scope_capture_index = i,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
for (i, name) in injections_query.capture_names().iter().enumerate() {
|
||||
let i = Some(i as u32);
|
||||
match *name {
|
||||
"injection.content" => injection_content_capture_index = i,
|
||||
"injection.language" => injection_language_capture_index = i,
|
||||
"injection.filename" => injection_filename_capture_index = i,
|
||||
"injection.shebang" => injection_shebang_capture_index = i,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
|
||||
Ok(Self {
|
||||
language,
|
||||
query,
|
||||
injections_query,
|
||||
combined_injections_patterns,
|
||||
highlights_pattern_index,
|
||||
highlight_indices,
|
||||
non_local_variable_patterns,
|
||||
injection_content_capture_index,
|
||||
injection_language_capture_index,
|
||||
injection_filename_capture_index,
|
||||
injection_shebang_capture_index,
|
||||
local_scope_capture_index,
|
||||
local_def_capture_index,
|
||||
local_def_value_capture_index,
|
||||
local_ref_capture_index,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a slice containing all of the highlight names used in the configuration.
|
||||
pub fn names(&self) -> &[&str] {
|
||||
self.query.capture_names()
|
||||
}
|
||||
|
||||
/// Set the list of recognized highlight names.
|
||||
///
|
||||
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
|
||||
/// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
|
||||
/// these queries can choose to recognize highlights with different levels of specificity.
|
||||
/// For example, the string `function.builtin` will match against `function.builtin.constructor`
|
||||
/// but will not match `function.method.builtin` and `function.method`.
|
||||
///
|
||||
/// When highlighting, results are returned as `Highlight` values, which contain the index
|
||||
/// of the matched highlight this list of highlight names.
|
||||
pub fn configure(&self, recognized_names: &[String]) {
|
||||
let mut capture_parts = Vec::new();
|
||||
let indices: Vec<_> = self
|
||||
.query
|
||||
.capture_names()
|
||||
.iter()
|
||||
.map(move |capture_name| {
|
||||
capture_parts.clear();
|
||||
capture_parts.extend(capture_name.split('.'));
|
||||
|
||||
let mut best_index = None;
|
||||
let mut best_match_len = 0;
|
||||
for (i, recognized_name) in recognized_names.iter().enumerate() {
|
||||
let mut len = 0;
|
||||
let mut matches = true;
|
||||
for (i, part) in recognized_name.split('.').enumerate() {
|
||||
match capture_parts.get(i) {
|
||||
Some(capture_part) if *capture_part == part => len += 1,
|
||||
_ => {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches && len > best_match_len {
|
||||
best_index = Some(i);
|
||||
best_match_len = len;
|
||||
}
|
||||
}
|
||||
best_index.map(Highlight)
|
||||
})
|
||||
.collect();
|
||||
|
||||
self.highlight_indices.store(Arc::new(indices));
|
||||
}
|
||||
|
||||
fn injection_pair<'a>(
|
||||
&self,
|
||||
query_match: &QueryMatch<'a, 'a>,
|
||||
source: RopeSlice<'a>,
|
||||
) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
|
||||
let mut injection_capture = None;
|
||||
let mut content_node = None;
|
||||
|
||||
for capture in query_match.captures {
|
||||
let index = Some(capture.index);
|
||||
if index == self.injection_language_capture_index {
|
||||
let name = byte_range_to_str(capture.node.byte_range(), source);
|
||||
injection_capture = Some(InjectionLanguageMarker::Name(name));
|
||||
} else if index == self.injection_filename_capture_index {
|
||||
let name = byte_range_to_str(capture.node.byte_range(), source);
|
||||
let path = Path::new(name.as_ref()).to_path_buf();
|
||||
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
|
||||
} else if index == self.injection_shebang_capture_index {
|
||||
let node_slice = source.byte_slice(capture.node.byte_range());
|
||||
|
||||
// some languages allow space and newlines before the actual string content
|
||||
// so a shebang could be on either the first or second line
|
||||
let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
|
||||
node_slice.byte_slice(..end)
|
||||
} else {
|
||||
node_slice
|
||||
};
|
||||
|
||||
static SHEBANG_REGEX: Lazy<rope::Regex> =
|
||||
Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
|
||||
|
||||
injection_capture = SHEBANG_REGEX
|
||||
.captures_iter(lines.regex_input())
|
||||
.map(|cap| {
|
||||
let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
|
||||
InjectionLanguageMarker::Shebang(cap.into())
|
||||
})
|
||||
.next()
|
||||
} else if index == self.injection_content_capture_index {
|
||||
content_node = Some(capture.node);
|
||||
}
|
||||
}
|
||||
(injection_capture, content_node)
|
||||
}
|
||||
|
||||
pub(super) fn injection_for_match<'a>(
|
||||
&self,
|
||||
query: &'a Query,
|
||||
query_match: &QueryMatch<'a, 'a>,
|
||||
source: RopeSlice<'a>,
|
||||
) -> (
|
||||
Option<InjectionLanguageMarker<'a>>,
|
||||
Option<Node<'a>>,
|
||||
IncludedChildren,
|
||||
) {
|
||||
let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
|
||||
|
||||
let mut included_children = IncludedChildren::default();
|
||||
for prop in query.property_settings(query_match.pattern_index) {
|
||||
match prop.key.as_ref() {
|
||||
// In addition to specifying the language name via the text of a
|
||||
// captured node, it can also be hard-coded via a `#set!` predicate
|
||||
// that sets the injection.language key.
|
||||
"injection.language" if injection_capture.is_none() => {
|
||||
injection_capture = prop
|
||||
.value
|
||||
.as_ref()
|
||||
.map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
|
||||
}
|
||||
|
||||
// By default, injections do not include the *children* of an
|
||||
// `injection.content` node - only the ranges that belong to the
|
||||
// node itself. This can be changed using a `#set!` predicate that
|
||||
// sets the `injection.include-children` key.
|
||||
"injection.include-children" => included_children = IncludedChildren::All,
|
||||
|
||||
// Some queries might only exclude named children but include unnamed
|
||||
// children in their `injection.content` node. This can be enabled using
|
||||
// a `#set!` predicate that sets the `injection.include-unnamed-children` key.
|
||||
"injection.include-unnamed-children" => {
|
||||
included_children = IncludedChildren::Unnamed
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
(injection_capture, content_node, included_children)
|
||||
}
|
||||
pub fn load_query(
|
||||
&self,
|
||||
language: &str,
|
||||
filename: &str,
|
||||
read_query_text: impl FnMut(&str, &str) -> String,
|
||||
) -> Result<Option<Query>, QueryError> {
|
||||
let query_text = read_query(language, filename, read_query_text);
|
||||
if query_text.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
Query::new(&self.language, &query_text).map(Some)
|
||||
}
|
||||
}
|
||||
|
||||
/// reads a query by invoking `read_query_text`, handeles any `inherits` directives
|
||||
pub fn read_query(
|
||||
language: &str,
|
||||
filename: &str,
|
||||
mut read_query_text: impl FnMut(&str, &str) -> String,
|
||||
) -> String {
|
||||
fn read_query_impl(
|
||||
language: &str,
|
||||
filename: &str,
|
||||
read_query_text: &mut impl FnMut(&str, &str) -> String,
|
||||
) -> String {
|
||||
static INHERITS_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
|
||||
|
||||
let query = read_query_text(language, filename);
|
||||
|
||||
// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
|
||||
INHERITS_REGEX
|
||||
.replace_all(&query, |captures: ®ex::Captures| {
|
||||
captures[1]
|
||||
.split(',')
|
||||
.map(|language| {
|
||||
format!(
|
||||
"\n{}\n",
|
||||
read_query_impl(language, filename, &mut *read_query_text)
|
||||
)
|
||||
})
|
||||
.collect::<String>()
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
read_query_impl(language, filename, &mut read_query_text)
|
||||
}
|
439
helix-syntax/src/highlighter.rs
Normal file
439
helix-syntax/src/highlighter.rs
Normal file
@ -0,0 +1,439 @@
|
||||
use std::borrow::Cow;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::atomic::{self, AtomicUsize};
|
||||
use std::{fmt, iter, mem, ops};
|
||||
|
||||
use ropey::RopeSlice;
|
||||
use tree_sitter::{QueryCaptures, QueryCursor, Tree};
|
||||
|
||||
use crate::ropey::RopeProvider;
|
||||
use crate::{
|
||||
byte_range_to_str, Error, HighlightConfiguration, Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
|
||||
};
|
||||
|
||||
const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
||||
|
||||
/// Indicates which highlight should be applied to a region of source code.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Highlight(pub usize);
|
||||
|
||||
/// Represents a single step in rendering a syntax-highlighted document.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum HighlightEvent {
|
||||
Source { start: usize, end: usize },
|
||||
HighlightStart(Highlight),
|
||||
HighlightEnd,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct LocalDef<'a> {
|
||||
name: Cow<'a, str>,
|
||||
value_range: ops::Range<usize>,
|
||||
highlight: Option<Highlight>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct LocalScope<'a> {
|
||||
inherits: bool,
|
||||
range: ops::Range<usize>,
|
||||
local_defs: Vec<LocalDef<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct HighlightIter<'a> {
|
||||
source: RopeSlice<'a>,
|
||||
byte_offset: usize,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
layers: Vec<HighlightIterLayer<'a>>,
|
||||
iter_count: usize,
|
||||
next_event: Option<HighlightEvent>,
|
||||
last_highlight_range: Option<(usize, usize, u32)>,
|
||||
}
|
||||
|
||||
struct HighlightIterLayer<'a> {
|
||||
_tree: Option<Tree>,
|
||||
cursor: QueryCursor,
|
||||
captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
|
||||
config: &'a HighlightConfiguration,
|
||||
highlight_end_stack: Vec<usize>,
|
||||
scope_stack: Vec<LocalScope<'a>>,
|
||||
depth: u32,
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for HighlightIterLayer<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("HighlightIterLayer").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> HighlightIterLayer<'a> {
|
||||
// First, sort scope boundaries by their byte offset in the document. At a
|
||||
// given position, emit scope endings before scope beginnings. Finally, emit
|
||||
// scope boundaries from deeper layers first.
|
||||
fn sort_key(&self) -> Option<(usize, bool, isize)> {
|
||||
let depth = -(self.depth as isize);
|
||||
let next_start = self
|
||||
.captures
|
||||
.borrow_mut()
|
||||
.peek()
|
||||
.map(|(m, i)| m.captures[*i].node.start_byte());
|
||||
let next_end = self.highlight_end_stack.last().cloned();
|
||||
match (next_start, next_end) {
|
||||
(Some(start), Some(end)) => {
|
||||
if start < end {
|
||||
Some((start, true, depth))
|
||||
} else {
|
||||
Some((end, false, depth))
|
||||
}
|
||||
}
|
||||
(Some(i), None) => Some((i, true, depth)),
|
||||
(None, Some(j)) => Some((j, false, depth)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> HighlightIter<'a> {
|
||||
fn emit_event(
|
||||
&mut self,
|
||||
offset: usize,
|
||||
event: Option<HighlightEvent>,
|
||||
) -> Option<Result<HighlightEvent, Error>> {
|
||||
let result;
|
||||
if self.byte_offset < offset {
|
||||
result = Some(Ok(HighlightEvent::Source {
|
||||
start: self.byte_offset,
|
||||
end: offset,
|
||||
}));
|
||||
self.byte_offset = offset;
|
||||
self.next_event = event;
|
||||
} else {
|
||||
result = event.map(Ok);
|
||||
}
|
||||
self.sort_layers();
|
||||
result
|
||||
}
|
||||
|
||||
fn sort_layers(&mut self) {
|
||||
while !self.layers.is_empty() {
|
||||
if let Some(sort_key) = self.layers[0].sort_key() {
|
||||
let mut i = 0;
|
||||
while i + 1 < self.layers.len() {
|
||||
if let Some(next_offset) = self.layers[i + 1].sort_key() {
|
||||
if next_offset < sort_key {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
let layer = self.layers.remove(i + 1);
|
||||
PARSER.with(|ts_parser| {
|
||||
let highlighter = &mut ts_parser.borrow_mut();
|
||||
highlighter.cursors.push(layer.cursor);
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
if i > 0 {
|
||||
self.layers[0..(i + 1)].rotate_left(1);
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
let layer = self.layers.remove(0);
|
||||
PARSER.with(|ts_parser| {
|
||||
let highlighter = &mut ts_parser.borrow_mut();
|
||||
highlighter.cursors.push(layer.cursor);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for HighlightIter<'a> {
|
||||
type Item = Result<HighlightEvent, Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
'main: loop {
|
||||
// If we've already determined the next highlight boundary, just return it.
|
||||
if let Some(e) = self.next_event.take() {
|
||||
return Some(Ok(e));
|
||||
}
|
||||
|
||||
// Periodically check for cancellation, returning `Cancelled` error if the
|
||||
// cancellation flag was flipped.
|
||||
if let Some(cancellation_flag) = self.cancellation_flag {
|
||||
self.iter_count += 1;
|
||||
if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
|
||||
self.iter_count = 0;
|
||||
if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 {
|
||||
return Some(Err(Error::Cancelled));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If none of the layers have any more highlight boundaries, terminate.
|
||||
if self.layers.is_empty() {
|
||||
let len = self.source.len_bytes();
|
||||
return if self.byte_offset < len {
|
||||
let result = Some(Ok(HighlightEvent::Source {
|
||||
start: self.byte_offset,
|
||||
end: len,
|
||||
}));
|
||||
self.byte_offset = len;
|
||||
result
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
|
||||
// Get the next capture from whichever layer has the earliest highlight boundary.
|
||||
let range;
|
||||
let layer = &mut self.layers[0];
|
||||
let captures = layer.captures.get_mut();
|
||||
if let Some((next_match, capture_index)) = captures.peek() {
|
||||
let next_capture = next_match.captures[*capture_index];
|
||||
range = next_capture.node.byte_range();
|
||||
|
||||
// If any previous highlight ends before this node starts, then before
|
||||
// processing this capture, emit the source code up until the end of the
|
||||
// previous highlight, and an end event for that highlight.
|
||||
if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
|
||||
if end_byte <= range.start {
|
||||
layer.highlight_end_stack.pop();
|
||||
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
|
||||
}
|
||||
}
|
||||
}
|
||||
// If there are no more captures, then emit any remaining highlight end events.
|
||||
// And if there are none of those, then just advance to the end of the document.
|
||||
else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
|
||||
layer.highlight_end_stack.pop();
|
||||
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
|
||||
} else {
|
||||
return self.emit_event(self.source.len_bytes(), None);
|
||||
};
|
||||
|
||||
let (mut match_, capture_index) = captures.next().unwrap();
|
||||
let mut capture = match_.captures[capture_index];
|
||||
|
||||
// Remove from the local scope stack any local scopes that have already ended.
|
||||
while range.start > layer.scope_stack.last().unwrap().range.end {
|
||||
layer.scope_stack.pop();
|
||||
}
|
||||
|
||||
// If this capture is for tracking local variables, then process the
|
||||
// local variable info.
|
||||
let mut reference_highlight = None;
|
||||
let mut definition_highlight = None;
|
||||
while match_.pattern_index < layer.config.highlights_pattern_index {
|
||||
// If the node represents a local scope, push a new local scope onto
|
||||
// the scope stack.
|
||||
if Some(capture.index) == layer.config.local_scope_capture_index {
|
||||
definition_highlight = None;
|
||||
let mut scope = LocalScope {
|
||||
inherits: true,
|
||||
range: range.clone(),
|
||||
local_defs: Vec::new(),
|
||||
};
|
||||
for prop in layer.config.query.property_settings(match_.pattern_index) {
|
||||
if let "local.scope-inherits" = prop.key.as_ref() {
|
||||
scope.inherits =
|
||||
prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
|
||||
}
|
||||
}
|
||||
layer.scope_stack.push(scope);
|
||||
}
|
||||
// If the node represents a definition, add a new definition to the
|
||||
// local scope at the top of the scope stack.
|
||||
else if Some(capture.index) == layer.config.local_def_capture_index {
|
||||
reference_highlight = None;
|
||||
let scope = layer.scope_stack.last_mut().unwrap();
|
||||
|
||||
let mut value_range = 0..0;
|
||||
for capture in match_.captures {
|
||||
if Some(capture.index) == layer.config.local_def_value_capture_index {
|
||||
value_range = capture.node.byte_range();
|
||||
}
|
||||
}
|
||||
|
||||
let name = byte_range_to_str(range.clone(), self.source);
|
||||
scope.local_defs.push(LocalDef {
|
||||
name,
|
||||
value_range,
|
||||
highlight: None,
|
||||
});
|
||||
definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
|
||||
}
|
||||
// If the node represents a reference, then try to find the corresponding
|
||||
// definition in the scope stack.
|
||||
else if Some(capture.index) == layer.config.local_ref_capture_index
|
||||
&& definition_highlight.is_none()
|
||||
{
|
||||
definition_highlight = None;
|
||||
let name = byte_range_to_str(range.clone(), self.source);
|
||||
for scope in layer.scope_stack.iter().rev() {
|
||||
if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
|
||||
if def.name == name && range.start >= def.value_range.end {
|
||||
Some(def.highlight)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
reference_highlight = highlight;
|
||||
break;
|
||||
}
|
||||
if !scope.inherits {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Continue processing any additional matches for the same node.
|
||||
if let Some((next_match, next_capture_index)) = captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
capture = next_capture;
|
||||
match_ = captures.next().unwrap().0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
self.sort_layers();
|
||||
continue 'main;
|
||||
}
|
||||
|
||||
// Otherwise, this capture must represent a highlight.
|
||||
// If this exact range has already been highlighted by an earlier pattern, or by
|
||||
// a different layer, then skip over this one.
|
||||
if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
|
||||
if range.start == last_start && range.end == last_end && layer.depth < last_depth {
|
||||
self.sort_layers();
|
||||
continue 'main;
|
||||
}
|
||||
}
|
||||
|
||||
// If the current node was found to be a local variable, then skip over any
|
||||
// highlighting patterns that are disabled for local variables.
|
||||
if definition_highlight.is_some() || reference_highlight.is_some() {
|
||||
while layer.config.non_local_variable_patterns[match_.pattern_index] {
|
||||
match_.remove();
|
||||
if let Some((next_match, next_capture_index)) = captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
capture = next_capture;
|
||||
match_ = captures.next().unwrap().0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
self.sort_layers();
|
||||
continue 'main;
|
||||
}
|
||||
}
|
||||
|
||||
// Once a highlighting pattern is found for the current node, skip over
|
||||
// any later highlighting patterns that also match this node. Captures
|
||||
// for a given node are ordered by pattern index, so these subsequent
|
||||
// captures are guaranteed to be for highlighting, not injections or
|
||||
// local variables.
|
||||
while let Some((next_match, next_capture_index)) = captures.peek() {
|
||||
let next_capture = next_match.captures[*next_capture_index];
|
||||
if next_capture.node == capture.node {
|
||||
captures.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
|
||||
|
||||
// If this node represents a local definition, then store the current
|
||||
// highlight value on the local scope entry representing this node.
|
||||
if let Some(definition_highlight) = definition_highlight {
|
||||
*definition_highlight = current_highlight;
|
||||
}
|
||||
|
||||
// Emit a scope start event and push the node's end position to the stack.
|
||||
if let Some(highlight) = reference_highlight.or(current_highlight) {
|
||||
self.last_highlight_range = Some((range.start, range.end, layer.depth));
|
||||
layer.highlight_end_stack.push(range.end);
|
||||
return self
|
||||
.emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
|
||||
}
|
||||
|
||||
self.sort_layers();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Syntax {
|
||||
/// Iterate over the highlighted regions for a given slice of source code.
|
||||
pub fn highlight_iter<'a>(
|
||||
&'a self,
|
||||
source: RopeSlice<'a>,
|
||||
range: Option<std::ops::Range<usize>>,
|
||||
cancellation_flag: Option<&'a AtomicUsize>,
|
||||
) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
|
||||
let mut layers = self
|
||||
.layers
|
||||
.iter()
|
||||
.filter_map(|(_, layer)| {
|
||||
// TODO: if range doesn't overlap layer range, skip it
|
||||
|
||||
// Reuse a cursor from the pool if available.
|
||||
let mut cursor = PARSER.with(|ts_parser| {
|
||||
let highlighter = &mut ts_parser.borrow_mut();
|
||||
highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
|
||||
});
|
||||
|
||||
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
|
||||
// prevents them from being moved. But both of these values are really just
|
||||
// pointers, so it's actually ok to move them.
|
||||
let cursor_ref =
|
||||
unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
|
||||
|
||||
// if reusing cursors & no range this resets to whole range
|
||||
cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
|
||||
cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
|
||||
|
||||
let mut captures = cursor_ref
|
||||
.captures(
|
||||
&layer.config.query,
|
||||
layer.tree().root_node(),
|
||||
RopeProvider(source),
|
||||
)
|
||||
.peekable();
|
||||
|
||||
// If there's no captures, skip the layer
|
||||
captures.peek()?;
|
||||
|
||||
Some(HighlightIterLayer {
|
||||
highlight_end_stack: Vec::new(),
|
||||
scope_stack: vec![LocalScope {
|
||||
inherits: false,
|
||||
range: 0..usize::MAX,
|
||||
local_defs: Vec::new(),
|
||||
}],
|
||||
cursor,
|
||||
_tree: None,
|
||||
captures: RefCell::new(captures),
|
||||
config: layer.config.as_ref(), // TODO: just reuse `layer`
|
||||
depth: layer.depth, // TODO: just reuse `layer`
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
layers.sort_unstable_by_key(|layer| layer.sort_key());
|
||||
|
||||
let mut result = HighlightIter {
|
||||
source,
|
||||
byte_offset: range.map_or(0, |r| r.start),
|
||||
cancellation_flag,
|
||||
iter_count: 0,
|
||||
layers,
|
||||
next_event: None,
|
||||
last_highlight_range: None,
|
||||
};
|
||||
result.sort_layers();
|
||||
result
|
||||
}
|
||||
}
|
342
helix-syntax/src/lib.rs
Normal file
342
helix-syntax/src/lib.rs
Normal file
@ -0,0 +1,342 @@
|
||||
use ::ropey::RopeSlice;
|
||||
use slotmap::{DefaultKey as LayerId, HopSlotMap};
|
||||
use tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree};
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cell::RefCell;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::parse::LayerUpdateFlags;
|
||||
|
||||
pub use crate::config::{read_query, HighlightConfiguration};
|
||||
pub use crate::ropey::RopeProvider;
|
||||
pub use merge::merge;
|
||||
pub use pretty_print::pretty_print_tree;
|
||||
pub use tree_cursor::TreeCursor;
|
||||
|
||||
mod config;
|
||||
pub mod highlighter;
|
||||
mod merge;
|
||||
mod parse;
|
||||
mod pretty_print;
|
||||
mod ropey;
|
||||
mod tree_cursor;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Syntax {
|
||||
layers: HopSlotMap<LayerId, LanguageLayer>,
|
||||
root: LayerId,
|
||||
}
|
||||
|
||||
impl Syntax {
|
||||
pub fn new(
|
||||
source: RopeSlice,
|
||||
config: Arc<HighlightConfiguration>,
|
||||
injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
|
||||
) -> Option<Self> {
|
||||
let root_layer = LanguageLayer {
|
||||
tree: None,
|
||||
config,
|
||||
depth: 0,
|
||||
flags: LayerUpdateFlags::empty(),
|
||||
ranges: vec![Range {
|
||||
start_byte: 0,
|
||||
end_byte: usize::MAX,
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(usize::MAX, usize::MAX),
|
||||
}],
|
||||
parent: None,
|
||||
};
|
||||
|
||||
// track scope_descriptor: a Vec of scopes for item in tree
|
||||
|
||||
let mut layers = HopSlotMap::default();
|
||||
let root = layers.insert(root_layer);
|
||||
|
||||
let mut syntax = Self { root, layers };
|
||||
|
||||
let res = syntax.update(source, Vec::new(), injection_callback);
|
||||
|
||||
if res.is_err() {
|
||||
log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
|
||||
return None;
|
||||
}
|
||||
Some(syntax)
|
||||
}
|
||||
|
||||
pub fn tree(&self) -> &Tree {
|
||||
self.layers[self.root].tree()
|
||||
}
|
||||
|
||||
pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
|
||||
let mut container_id = self.root;
|
||||
|
||||
for (layer_id, layer) in self.layers.iter() {
|
||||
if layer.depth > self.layers[container_id].depth
|
||||
&& layer.contains_byte_range(start, end)
|
||||
{
|
||||
container_id = layer_id;
|
||||
}
|
||||
}
|
||||
|
||||
self.layers[container_id].tree()
|
||||
}
|
||||
|
||||
pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
|
||||
self.tree_for_byte_range(start, end)
|
||||
.root_node()
|
||||
.named_descendant_for_byte_range(start, end)
|
||||
}
|
||||
|
||||
pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
|
||||
self.tree_for_byte_range(start, end)
|
||||
.root_node()
|
||||
.descendant_for_byte_range(start, end)
|
||||
}
|
||||
|
||||
pub fn walk(&self) -> TreeCursor<'_> {
|
||||
TreeCursor::new(&self.layers, self.root)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LanguageLayer {
|
||||
// mode
|
||||
// grammar
|
||||
pub config: Arc<HighlightConfiguration>,
|
||||
pub(crate) tree: Option<Tree>,
|
||||
pub ranges: Vec<Range>,
|
||||
pub depth: u32,
|
||||
flags: LayerUpdateFlags,
|
||||
parent: Option<LayerId>,
|
||||
}
|
||||
|
||||
/// This PartialEq implementation only checks if that
|
||||
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
|
||||
/// It does not check whether the layers have the same internal treesitter
|
||||
/// state.
|
||||
impl PartialEq for LanguageLayer {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.depth == other.depth
|
||||
&& self.config.language == other.config.language
|
||||
&& self.ranges == other.ranges
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash implementation belongs to PartialEq implementation above.
|
||||
/// See its documentation for details.
|
||||
impl Hash for LanguageLayer {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.depth.hash(state);
|
||||
self.config.language.hash(state);
|
||||
self.ranges.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageLayer {
|
||||
pub fn tree(&self) -> &Tree {
|
||||
// TODO: no unwrap
|
||||
self.tree.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Whether the layer contains the given byte range.
|
||||
///
|
||||
/// If the layer has multiple ranges (i.e. combined injections), the
|
||||
/// given range is considered contained if it is within the start and
|
||||
/// end bytes of the first and last ranges **and** if the given range
|
||||
/// starts or ends within any of the layer's ranges.
|
||||
fn contains_byte_range(&self, start: usize, end: usize) -> bool {
|
||||
let layer_start = self
|
||||
.ranges
|
||||
.first()
|
||||
.expect("ranges should not be empty")
|
||||
.start_byte;
|
||||
let layer_end = self
|
||||
.ranges
|
||||
.last()
|
||||
.expect("ranges should not be empty")
|
||||
.end_byte;
|
||||
|
||||
layer_start <= start
|
||||
&& layer_end >= end
|
||||
&& self.ranges.iter().any(|range| {
|
||||
let byte_range = range.start_byte..range.end_byte;
|
||||
byte_range.contains(&start) || byte_range.contains(&end)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum InjectionLanguageMarker<'a> {
|
||||
Name(Cow<'a, str>),
|
||||
Filename(Cow<'a, Path>),
|
||||
Shebang(String),
|
||||
}
|
||||
|
||||
const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CapturedNode<'a> {
|
||||
Single(Node<'a>),
|
||||
/// Guaranteed to be not empty
|
||||
Grouped(Vec<Node<'a>>),
|
||||
}
|
||||
|
||||
impl<'a> CapturedNode<'a> {
|
||||
pub fn start_byte(&self) -> usize {
|
||||
match self {
|
||||
Self::Single(n) => n.start_byte(),
|
||||
Self::Grouped(ns) => ns[0].start_byte(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn end_byte(&self) -> usize {
|
||||
match self {
|
||||
Self::Single(n) => n.end_byte(),
|
||||
Self::Grouped(ns) => ns.last().unwrap().end_byte(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn byte_range(&self) -> std::ops::Range<usize> {
|
||||
self.start_byte()..self.end_byte()
|
||||
}
|
||||
}
|
||||
|
||||
/// The maximum number of in-progress matches a TS cursor can consider at once.
|
||||
/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
|
||||
/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
|
||||
///
|
||||
///
|
||||
/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
|
||||
/// However, this causes performance issues for medium to large files.
|
||||
/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
|
||||
///
|
||||
///
|
||||
/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
|
||||
/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
|
||||
/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
|
||||
///
|
||||
///
|
||||
/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
|
||||
/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
|
||||
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
|
||||
const TREE_SITTER_MATCH_LIMIT: u32 = 256;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TextObjectQuery {
|
||||
pub query: Query,
|
||||
}
|
||||
|
||||
impl TextObjectQuery {
|
||||
/// Run the query on the given node and return sub nodes which match given
|
||||
/// capture ("function.inside", "class.around", etc).
|
||||
///
|
||||
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
|
||||
/// and support for this is partial and could use improvement.
|
||||
///
|
||||
/// ```query
|
||||
/// (comment)+ @capture
|
||||
///
|
||||
/// ; OR
|
||||
/// (
|
||||
/// (comment)*
|
||||
/// .
|
||||
/// (function)
|
||||
/// ) @capture
|
||||
/// ```
|
||||
pub fn capture_nodes<'a>(
|
||||
&'a self,
|
||||
capture_name: &str,
|
||||
node: Node<'a>,
|
||||
slice: RopeSlice<'a>,
|
||||
cursor: &'a mut QueryCursor,
|
||||
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
|
||||
self.capture_nodes_any(&[capture_name], node, slice, cursor)
|
||||
}
|
||||
|
||||
/// Find the first capture that exists out of all given `capture_names`
|
||||
/// and return sub nodes that match this capture.
|
||||
pub fn capture_nodes_any<'a>(
|
||||
&'a self,
|
||||
capture_names: &[&str],
|
||||
node: Node<'a>,
|
||||
slice: RopeSlice<'a>,
|
||||
cursor: &'a mut QueryCursor,
|
||||
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
|
||||
let capture_idx = capture_names
|
||||
.iter()
|
||||
.find_map(|cap| self.query.capture_index_for_name(cap))?;
|
||||
|
||||
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
|
||||
|
||||
let nodes = cursor
|
||||
.captures(&self.query, node, RopeProvider(slice))
|
||||
.filter_map(move |(mat, _)| {
|
||||
let nodes: Vec<_> = mat
|
||||
.captures
|
||||
.iter()
|
||||
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
|
||||
.collect();
|
||||
|
||||
if nodes.len() > 1 {
|
||||
Some(CapturedNode::Grouped(nodes))
|
||||
} else {
|
||||
nodes.into_iter().map(CapturedNode::Single).next()
|
||||
}
|
||||
});
|
||||
|
||||
Some(nodes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the reason why syntax highlighting failed.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Error {
|
||||
Cancelled,
|
||||
InvalidLanguage,
|
||||
InvalidRanges,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum IncludedChildren {
|
||||
None,
|
||||
All,
|
||||
Unnamed,
|
||||
}
|
||||
|
||||
impl Default for IncludedChildren {
|
||||
fn default() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
|
||||
Cow::from(source.byte_slice(range))
|
||||
}
|
||||
|
||||
struct TsParser {
|
||||
parser: tree_sitter::Parser,
|
||||
pub cursors: Vec<QueryCursor>,
|
||||
}
|
||||
|
||||
// could also just use a pool, or a single instance?
|
||||
thread_local! {
|
||||
static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
|
||||
parser: Parser::new(),
|
||||
cursors: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
|
||||
PARSER.with(|parser| {
|
||||
let mut parser = parser.borrow_mut();
|
||||
let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new);
|
||||
let res = f(&mut cursor);
|
||||
parser.cursors.push(cursor);
|
||||
res
|
||||
})
|
||||
}
|
135
helix-syntax/src/merge.rs
Normal file
135
helix-syntax/src/merge.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use crate::highlighter::{Highlight, HighlightEvent};
|
||||
|
||||
pub struct Merge<I> {
|
||||
iter: I,
|
||||
spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
|
||||
|
||||
next_event: Option<HighlightEvent>,
|
||||
next_span: Option<(usize, std::ops::Range<usize>)>,
|
||||
|
||||
queue: Vec<HighlightEvent>,
|
||||
}
|
||||
|
||||
/// Merge a list of spans into the highlight event stream.
|
||||
pub fn merge<I: Iterator<Item = HighlightEvent>>(
|
||||
iter: I,
|
||||
spans: Vec<(usize, std::ops::Range<usize>)>,
|
||||
) -> Merge<I> {
|
||||
let spans = Box::new(spans.into_iter());
|
||||
let mut merge = Merge {
|
||||
iter,
|
||||
spans,
|
||||
next_event: None,
|
||||
next_span: None,
|
||||
queue: Vec::new(),
|
||||
};
|
||||
merge.next_event = merge.iter.next();
|
||||
merge.next_span = merge.spans.next();
|
||||
merge
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
|
||||
type Item = HighlightEvent;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
use HighlightEvent::*;
|
||||
if let Some(event) = self.queue.pop() {
|
||||
return Some(event);
|
||||
}
|
||||
|
||||
loop {
|
||||
match (self.next_event, &self.next_span) {
|
||||
// this happens when range is partially or fully offscreen
|
||||
(Some(Source { start, .. }), Some((span, range))) if start > range.start => {
|
||||
if start > range.end {
|
||||
self.next_span = self.spans.next();
|
||||
} else {
|
||||
self.next_span = Some((*span, start..range.end));
|
||||
};
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
match (self.next_event, &self.next_span) {
|
||||
(Some(HighlightStart(i)), _) => {
|
||||
self.next_event = self.iter.next();
|
||||
Some(HighlightStart(i))
|
||||
}
|
||||
(Some(HighlightEnd), _) => {
|
||||
self.next_event = self.iter.next();
|
||||
Some(HighlightEnd)
|
||||
}
|
||||
(Some(Source { start, end }), Some((_, range))) if start < range.start => {
|
||||
let intersect = range.start.min(end);
|
||||
let event = Source {
|
||||
start,
|
||||
end: intersect,
|
||||
};
|
||||
|
||||
if end == intersect {
|
||||
// the event is complete
|
||||
self.next_event = self.iter.next();
|
||||
} else {
|
||||
// subslice the event
|
||||
self.next_event = Some(Source {
|
||||
start: intersect,
|
||||
end,
|
||||
});
|
||||
};
|
||||
|
||||
Some(event)
|
||||
}
|
||||
(Some(Source { start, end }), Some((span, range))) if start == range.start => {
|
||||
let intersect = range.end.min(end);
|
||||
let event = HighlightStart(Highlight(*span));
|
||||
|
||||
// enqueue in reverse order
|
||||
self.queue.push(HighlightEnd);
|
||||
self.queue.push(Source {
|
||||
start,
|
||||
end: intersect,
|
||||
});
|
||||
|
||||
if end == intersect {
|
||||
// the event is complete
|
||||
self.next_event = self.iter.next();
|
||||
} else {
|
||||
// subslice the event
|
||||
self.next_event = Some(Source {
|
||||
start: intersect,
|
||||
end,
|
||||
});
|
||||
};
|
||||
|
||||
if intersect == range.end {
|
||||
self.next_span = self.spans.next();
|
||||
} else {
|
||||
self.next_span = Some((*span, intersect..range.end));
|
||||
}
|
||||
|
||||
Some(event)
|
||||
}
|
||||
(Some(event), None) => {
|
||||
self.next_event = self.iter.next();
|
||||
Some(event)
|
||||
}
|
||||
// Can happen if cursor at EOF and/or diagnostic reaches past the end.
|
||||
// We need to actually emit events for the cursor-at-EOF situation,
|
||||
// even though the range is past the end of the text. This needs to be
|
||||
// handled appropriately by the drawing code by not assuming that
|
||||
// all `Source` events point to valid indices in the rope.
|
||||
(None, Some((span, range))) => {
|
||||
let event = HighlightStart(Highlight(*span));
|
||||
self.queue.push(HighlightEnd);
|
||||
self.queue.push(Source {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
});
|
||||
self.next_span = self.spans.next();
|
||||
Some(event)
|
||||
}
|
||||
(None, None) => None,
|
||||
e => unreachable!("{:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
429
helix-syntax/src/parse.rs
Normal file
429
helix-syntax/src/parse.rs
Normal file
@ -0,0 +1,429 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::mem::replace;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ahash::RandomState;
|
||||
use bitflags::bitflags;
|
||||
use hashbrown::raw::RawTable;
|
||||
use ropey::RopeSlice;
|
||||
use tree_sitter::{Node, Parser, Point, QueryCursor, Range};
|
||||
|
||||
use crate::ropey::RopeProvider;
|
||||
use crate::{
|
||||
Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer,
|
||||
Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
|
||||
};
|
||||
|
||||
bitflags! {
|
||||
/// Flags that track the status of a layer
|
||||
/// in the `Sytaxn::update` function
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct LayerUpdateFlags : u32{
|
||||
const MODIFIED = 0b001;
|
||||
const MOVED = 0b010;
|
||||
const TOUCHED = 0b100;
|
||||
}
|
||||
}
|
||||
|
||||
impl Syntax {
|
||||
pub fn update(
|
||||
&mut self,
|
||||
source: RopeSlice,
|
||||
edits: Vec<tree_sitter::InputEdit>,
|
||||
injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
|
||||
) -> Result<(), Error> {
|
||||
let mut queue = VecDeque::new();
|
||||
queue.push_back(self.root);
|
||||
|
||||
// This table allows inverse indexing of `layers`.
|
||||
// That is by hashing a `Layer` you can find
|
||||
// the `LayerId` of an existing equivalent `Layer` in `layers`.
|
||||
//
|
||||
// It is used to determine if a new layer exists for an injection
|
||||
// or if an existing layer needs to be updated.
|
||||
let mut layers_table = RawTable::with_capacity(self.layers.len());
|
||||
let layers_hasher = RandomState::new();
|
||||
// Use the edits to update all layers markers
|
||||
fn point_add(a: Point, b: Point) -> Point {
|
||||
if b.row > 0 {
|
||||
Point::new(a.row.saturating_add(b.row), b.column)
|
||||
} else {
|
||||
Point::new(0, a.column.saturating_add(b.column))
|
||||
}
|
||||
}
|
||||
fn point_sub(a: Point, b: Point) -> Point {
|
||||
if a.row > b.row {
|
||||
Point::new(a.row.saturating_sub(b.row), a.column)
|
||||
} else {
|
||||
Point::new(0, a.column.saturating_sub(b.column))
|
||||
}
|
||||
}
|
||||
|
||||
for (layer_id, layer) in self.layers.iter_mut() {
|
||||
// The root layer always covers the whole range (0..usize::MAX)
|
||||
if layer.depth == 0 {
|
||||
layer.flags = LayerUpdateFlags::MODIFIED;
|
||||
continue;
|
||||
}
|
||||
|
||||
if !edits.is_empty() {
|
||||
for range in &mut layer.ranges {
|
||||
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
|
||||
for edit in edits.iter().rev() {
|
||||
let is_pure_insertion = edit.old_end_byte == edit.start_byte;
|
||||
|
||||
// if edit is after range, skip
|
||||
if edit.start_byte > range.end_byte {
|
||||
// TODO: || (is_noop && edit.start_byte == range.end_byte)
|
||||
continue;
|
||||
}
|
||||
|
||||
// if edit is before range, shift entire range by len
|
||||
if edit.old_end_byte < range.start_byte {
|
||||
range.start_byte =
|
||||
edit.new_end_byte + (range.start_byte - edit.old_end_byte);
|
||||
range.start_point = point_add(
|
||||
edit.new_end_position,
|
||||
point_sub(range.start_point, edit.old_end_position),
|
||||
);
|
||||
|
||||
range.end_byte = edit
|
||||
.new_end_byte
|
||||
.saturating_add(range.end_byte - edit.old_end_byte);
|
||||
range.end_point = point_add(
|
||||
edit.new_end_position,
|
||||
point_sub(range.end_point, edit.old_end_position),
|
||||
);
|
||||
|
||||
layer.flags |= LayerUpdateFlags::MOVED;
|
||||
}
|
||||
// if the edit starts in the space before and extends into the range
|
||||
else if edit.start_byte < range.start_byte {
|
||||
range.start_byte = edit.new_end_byte;
|
||||
range.start_point = edit.new_end_position;
|
||||
|
||||
range.end_byte = range
|
||||
.end_byte
|
||||
.saturating_sub(edit.old_end_byte)
|
||||
.saturating_add(edit.new_end_byte);
|
||||
range.end_point = point_add(
|
||||
edit.new_end_position,
|
||||
point_sub(range.end_point, edit.old_end_position),
|
||||
);
|
||||
layer.flags = LayerUpdateFlags::MODIFIED;
|
||||
}
|
||||
// If the edit is an insertion at the start of the tree, shift
|
||||
else if edit.start_byte == range.start_byte && is_pure_insertion {
|
||||
range.start_byte = edit.new_end_byte;
|
||||
range.start_point = edit.new_end_position;
|
||||
layer.flags |= LayerUpdateFlags::MOVED;
|
||||
} else {
|
||||
range.end_byte = range
|
||||
.end_byte
|
||||
.saturating_sub(edit.old_end_byte)
|
||||
.saturating_add(edit.new_end_byte);
|
||||
range.end_point = point_add(
|
||||
edit.new_end_position,
|
||||
point_sub(range.end_point, edit.old_end_position),
|
||||
);
|
||||
layer.flags = LayerUpdateFlags::MODIFIED;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let hash = layers_hasher.hash_one(layer);
|
||||
// Safety: insert_no_grow is unsafe because it assumes that the table
|
||||
// has enough capacity to hold additional elements.
|
||||
// This is always the case as we reserved enough capacity above.
|
||||
unsafe { layers_table.insert_no_grow(hash, layer_id) };
|
||||
}
|
||||
|
||||
PARSER.with(|ts_parser| {
|
||||
let ts_parser = &mut ts_parser.borrow_mut();
|
||||
ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
|
||||
let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
|
||||
// TODO: might need to set cursor range
|
||||
cursor.set_byte_range(0..usize::MAX);
|
||||
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
|
||||
|
||||
let source_slice = source.slice(..);
|
||||
|
||||
while let Some(layer_id) = queue.pop_front() {
|
||||
let layer = &mut self.layers[layer_id];
|
||||
|
||||
// Mark the layer as touched
|
||||
layer.flags |= LayerUpdateFlags::TOUCHED;
|
||||
|
||||
// If a tree already exists, notify it of changes.
|
||||
if let Some(tree) = &mut layer.tree {
|
||||
if layer
|
||||
.flags
|
||||
.intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
|
||||
{
|
||||
for edit in edits.iter().rev() {
|
||||
// Apply the edits in reverse.
|
||||
// If we applied them in order then edit 1 would disrupt the positioning of edit 2.
|
||||
tree.edit(edit);
|
||||
}
|
||||
}
|
||||
|
||||
if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
|
||||
// Re-parse the tree.
|
||||
layer.parse(&mut ts_parser.parser, source)?;
|
||||
}
|
||||
} else {
|
||||
// always parse if this layer has never been parsed before
|
||||
layer.parse(&mut ts_parser.parser, source)?;
|
||||
}
|
||||
|
||||
// Switch to an immutable borrow.
|
||||
let layer = &self.layers[layer_id];
|
||||
|
||||
// Process injections.
|
||||
let matches = cursor.matches(
|
||||
&layer.config.injections_query,
|
||||
layer.tree().root_node(),
|
||||
RopeProvider(source_slice),
|
||||
);
|
||||
let mut combined_injections = vec![
|
||||
(None, Vec::new(), IncludedChildren::default());
|
||||
layer.config.combined_injections_patterns.len()
|
||||
];
|
||||
let mut injections = Vec::new();
|
||||
let mut last_injection_end = 0;
|
||||
for mat in matches {
|
||||
let (injection_capture, content_node, included_children) = layer
|
||||
.config
|
||||
.injection_for_match(&layer.config.injections_query, &mat, source_slice);
|
||||
|
||||
// in case this is a combined injection save it for more processing later
|
||||
if let Some(combined_injection_idx) = layer
|
||||
.config
|
||||
.combined_injections_patterns
|
||||
.iter()
|
||||
.position(|&pattern| pattern == mat.pattern_index)
|
||||
{
|
||||
let entry = &mut combined_injections[combined_injection_idx];
|
||||
if injection_capture.is_some() {
|
||||
entry.0 = injection_capture;
|
||||
}
|
||||
if let Some(content_node) = content_node {
|
||||
if content_node.start_byte() >= last_injection_end {
|
||||
entry.1.push(content_node);
|
||||
last_injection_end = content_node.end_byte();
|
||||
}
|
||||
}
|
||||
entry.2 = included_children;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Explicitly remove this match so that none of its other captures will remain
|
||||
// in the stream of captures.
|
||||
mat.remove();
|
||||
|
||||
// If a language is found with the given name, then add a new language layer
|
||||
// to the highlighted document.
|
||||
if let (Some(injection_capture), Some(content_node)) =
|
||||
(injection_capture, content_node)
|
||||
{
|
||||
if let Some(config) = (injection_callback)(&injection_capture) {
|
||||
let ranges =
|
||||
intersect_ranges(&layer.ranges, &[content_node], included_children);
|
||||
|
||||
if !ranges.is_empty() {
|
||||
if content_node.start_byte() < last_injection_end {
|
||||
continue;
|
||||
}
|
||||
last_injection_end = content_node.end_byte();
|
||||
injections.push((config, ranges));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (lang_name, content_nodes, included_children) in combined_injections {
|
||||
if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
|
||||
if let Some(config) = (injection_callback)(&lang_name) {
|
||||
let ranges =
|
||||
intersect_ranges(&layer.ranges, &content_nodes, included_children);
|
||||
if !ranges.is_empty() {
|
||||
injections.push((config, ranges));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let depth = layer.depth + 1;
|
||||
// TODO: can't inline this since matches borrows self.layers
|
||||
for (config, ranges) in injections {
|
||||
let parent = Some(layer_id);
|
||||
let new_layer = LanguageLayer {
|
||||
tree: None,
|
||||
config,
|
||||
depth,
|
||||
ranges,
|
||||
flags: LayerUpdateFlags::empty(),
|
||||
parent: None,
|
||||
};
|
||||
|
||||
// Find an identical existing layer
|
||||
let layer = layers_table
|
||||
.get(layers_hasher.hash_one(&new_layer), |&it| {
|
||||
self.layers[it] == new_layer
|
||||
})
|
||||
.copied();
|
||||
|
||||
// ...or insert a new one.
|
||||
let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
|
||||
self.layers[layer_id].parent = parent;
|
||||
|
||||
queue.push_back(layer_id);
|
||||
}
|
||||
|
||||
// TODO: pre-process local scopes at this time, rather than highlight?
|
||||
// would solve problems with locals not working across boundaries
|
||||
}
|
||||
|
||||
// Return the cursor back in the pool.
|
||||
ts_parser.cursors.push(cursor);
|
||||
|
||||
// Reset all `LayerUpdateFlags` and remove all untouched layers
|
||||
self.layers.retain(|_, layer| {
|
||||
replace(&mut layer.flags, LayerUpdateFlags::empty())
|
||||
.contains(LayerUpdateFlags::TOUCHED)
|
||||
});
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the ranges that should be included when parsing an injection.
|
||||
/// This takes into account three things:
|
||||
/// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
|
||||
/// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
|
||||
/// are the ranges of those nodes.
|
||||
/// * `includes_children` - For some injections, the content nodes' children should be
|
||||
/// excluded from the nested document, so that only the content nodes' *own* content
|
||||
/// is reparsed. For other injections, the content nodes' entire ranges should be
|
||||
/// reparsed, including the ranges of their children.
|
||||
fn intersect_ranges(
|
||||
parent_ranges: &[Range],
|
||||
nodes: &[Node],
|
||||
included_children: IncludedChildren,
|
||||
) -> Vec<Range> {
|
||||
let mut cursor = nodes[0].walk();
|
||||
let mut result = Vec::new();
|
||||
let mut parent_range_iter = parent_ranges.iter();
|
||||
let mut parent_range = parent_range_iter
|
||||
.next()
|
||||
.expect("Layers should only be constructed with non-empty ranges vectors");
|
||||
for node in nodes.iter() {
|
||||
let mut preceding_range = Range {
|
||||
start_byte: 0,
|
||||
start_point: Point::new(0, 0),
|
||||
end_byte: node.start_byte(),
|
||||
end_point: node.start_position(),
|
||||
};
|
||||
let following_range = Range {
|
||||
start_byte: node.end_byte(),
|
||||
start_point: node.end_position(),
|
||||
end_byte: usize::MAX,
|
||||
end_point: Point::new(usize::MAX, usize::MAX),
|
||||
};
|
||||
|
||||
for excluded_range in node
|
||||
.children(&mut cursor)
|
||||
.filter_map(|child| match included_children {
|
||||
IncludedChildren::None => Some(child.range()),
|
||||
IncludedChildren::All => None,
|
||||
IncludedChildren::Unnamed => {
|
||||
if child.is_named() {
|
||||
Some(child.range())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.chain([following_range].iter().cloned())
|
||||
{
|
||||
let mut range = Range {
|
||||
start_byte: preceding_range.end_byte,
|
||||
start_point: preceding_range.end_point,
|
||||
end_byte: excluded_range.start_byte,
|
||||
end_point: excluded_range.start_point,
|
||||
};
|
||||
preceding_range = excluded_range;
|
||||
|
||||
if range.end_byte < parent_range.start_byte {
|
||||
continue;
|
||||
}
|
||||
|
||||
while parent_range.start_byte <= range.end_byte {
|
||||
if parent_range.end_byte > range.start_byte {
|
||||
if range.start_byte < parent_range.start_byte {
|
||||
range.start_byte = parent_range.start_byte;
|
||||
range.start_point = parent_range.start_point;
|
||||
}
|
||||
|
||||
if parent_range.end_byte < range.end_byte {
|
||||
if range.start_byte < parent_range.end_byte {
|
||||
result.push(Range {
|
||||
start_byte: range.start_byte,
|
||||
start_point: range.start_point,
|
||||
end_byte: parent_range.end_byte,
|
||||
end_point: parent_range.end_point,
|
||||
});
|
||||
}
|
||||
range.start_byte = parent_range.end_byte;
|
||||
range.start_point = parent_range.end_point;
|
||||
} else {
|
||||
if range.start_byte < range.end_byte {
|
||||
result.push(range);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(next_range) = parent_range_iter.next() {
|
||||
parent_range = next_range;
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
impl LanguageLayer {
|
||||
fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
|
||||
parser
|
||||
.set_included_ranges(&self.ranges)
|
||||
.map_err(|_| Error::InvalidRanges)?;
|
||||
|
||||
parser
|
||||
.set_language(&self.config.language)
|
||||
.map_err(|_| Error::InvalidLanguage)?;
|
||||
|
||||
// unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
|
||||
let tree = parser
|
||||
.parse_with(
|
||||
&mut |byte, _| {
|
||||
if byte <= source.len_bytes() {
|
||||
let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
|
||||
&chunk.as_bytes()[byte - start_byte..]
|
||||
} else {
|
||||
// out of range
|
||||
&[]
|
||||
}
|
||||
},
|
||||
self.tree.as_ref(),
|
||||
)
|
||||
.ok_or(Error::Cancelled)?;
|
||||
// unsafe { ts_parser.parser.set_cancellation_flag(None) };
|
||||
self.tree = Some(tree);
|
||||
Ok(())
|
||||
}
|
||||
}
|
65
helix-syntax/src/pretty_print.rs
Normal file
65
helix-syntax/src/pretty_print.rs
Normal file
@ -0,0 +1,65 @@
|
||||
use std::fmt;
|
||||
|
||||
use tree_sitter::{Node, TreeCursor};
|
||||
|
||||
pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
|
||||
if node.child_count() == 0 {
|
||||
if node_is_visible(&node) {
|
||||
write!(fmt, "({})", node.kind())
|
||||
} else {
|
||||
write!(fmt, "\"{}\"", node.kind())
|
||||
}
|
||||
} else {
|
||||
pretty_print_tree_impl(fmt, &mut node.walk(), 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn pretty_print_tree_impl<W: fmt::Write>(
|
||||
fmt: &mut W,
|
||||
cursor: &mut TreeCursor,
|
||||
depth: usize,
|
||||
) -> fmt::Result {
|
||||
let node = cursor.node();
|
||||
let visible = node_is_visible(&node);
|
||||
|
||||
if visible {
|
||||
let indentation_columns = depth * 2;
|
||||
write!(fmt, "{:indentation_columns$}", "")?;
|
||||
|
||||
if let Some(field_name) = cursor.field_name() {
|
||||
write!(fmt, "{}: ", field_name)?;
|
||||
}
|
||||
|
||||
write!(fmt, "({}", node.kind())?;
|
||||
}
|
||||
|
||||
// Handle children.
|
||||
if cursor.goto_first_child() {
|
||||
loop {
|
||||
if node_is_visible(&cursor.node()) {
|
||||
fmt.write_char('\n')?;
|
||||
}
|
||||
|
||||
pretty_print_tree_impl(fmt, cursor, depth + 1)?;
|
||||
|
||||
if !cursor.goto_next_sibling() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let moved = cursor.goto_parent();
|
||||
// The parent of the first child must exist, and must be `node`.
|
||||
debug_assert!(moved);
|
||||
debug_assert!(cursor.node() == node);
|
||||
}
|
||||
|
||||
if visible {
|
||||
fmt.write_char(')')?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn node_is_visible(node: &Node) -> bool {
|
||||
node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
|
||||
}
|
29
helix-syntax/src/ropey.rs
Normal file
29
helix-syntax/src/ropey.rs
Normal file
@ -0,0 +1,29 @@
|
||||
// glue code for using TS with ropey, this should be put behind a feature flag
|
||||
// in the future (and potentially be partially removed)
|
||||
|
||||
use ropey::RopeSlice;
|
||||
use tree_sitter::{Node, TextProvider};
|
||||
|
||||
// Adapter to convert rope chunks to bytes
|
||||
pub struct ChunksBytes<'a> {
|
||||
chunks: ropey::iter::Chunks<'a>,
|
||||
}
|
||||
impl<'a> Iterator for ChunksBytes<'a> {
|
||||
type Item = &'a [u8];
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.chunks.next().map(str::as_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RopeProvider<'a>(pub RopeSlice<'a>);
|
||||
|
||||
impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> {
|
||||
type I = ChunksBytes<'a>;
|
||||
|
||||
fn text(&mut self, node: Node) -> Self::I {
|
||||
let fragment = self.0.byte_slice(node.start_byte()..node.end_byte());
|
||||
ChunksBytes {
|
||||
chunks: fragment.chunks(),
|
||||
}
|
||||
}
|
||||
}
|
264
helix-syntax/src/tree_cursor.rs
Normal file
264
helix-syntax/src/tree_cursor.rs
Normal file
@ -0,0 +1,264 @@
|
||||
use std::{cmp::Reverse, ops::Range};
|
||||
|
||||
use super::{LanguageLayer, LayerId};
|
||||
|
||||
use slotmap::HopSlotMap;
|
||||
use tree_sitter::Node;
|
||||
|
||||
/// The byte range of an injection layer.
|
||||
///
|
||||
/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges.
|
||||
/// This allows us to sort the ranges ahead of time in order to efficiently find a range that
|
||||
/// contains a point with maximum depth.
|
||||
#[derive(Debug)]
|
||||
struct InjectionRange {
|
||||
start: usize,
|
||||
end: usize,
|
||||
layer_id: LayerId,
|
||||
depth: u32,
|
||||
}
|
||||
|
||||
pub struct TreeCursor<'a> {
|
||||
layers: &'a HopSlotMap<LayerId, LanguageLayer>,
|
||||
root: LayerId,
|
||||
current: LayerId,
|
||||
injection_ranges: Vec<InjectionRange>,
|
||||
// TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but
|
||||
// that returns very surprising results in testing.
|
||||
cursor: Node<'a>,
|
||||
}
|
||||
|
||||
impl<'a> TreeCursor<'a> {
|
||||
pub(super) fn new(layers: &'a HopSlotMap<LayerId, LanguageLayer>, root: LayerId) -> Self {
|
||||
let mut injection_ranges = Vec::new();
|
||||
|
||||
for (layer_id, layer) in layers.iter() {
|
||||
// Skip the root layer
|
||||
if layer.parent.is_none() {
|
||||
continue;
|
||||
}
|
||||
for byte_range in layer.ranges.iter() {
|
||||
let range = InjectionRange {
|
||||
start: byte_range.start_byte,
|
||||
end: byte_range.end_byte,
|
||||
layer_id,
|
||||
depth: layer.depth,
|
||||
};
|
||||
injection_ranges.push(range);
|
||||
}
|
||||
}
|
||||
|
||||
injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth)));
|
||||
|
||||
let cursor = layers[root].tree().root_node();
|
||||
|
||||
Self {
|
||||
layers,
|
||||
root,
|
||||
current: root,
|
||||
injection_ranges,
|
||||
cursor,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn node(&self) -> Node<'a> {
|
||||
self.cursor
|
||||
}
|
||||
|
||||
pub fn goto_parent(&mut self) -> bool {
|
||||
if let Some(parent) = self.node().parent() {
|
||||
self.cursor = parent;
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we are already on the root layer, we cannot ascend.
|
||||
if self.current == self.root {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ascend to the parent layer.
|
||||
let range = self.node().byte_range();
|
||||
let parent_id = self.layers[self.current]
|
||||
.parent
|
||||
.expect("non-root layers have a parent");
|
||||
self.current = parent_id;
|
||||
let root = self.layers[self.current].tree().root_node();
|
||||
self.cursor = root
|
||||
.descendant_for_byte_range(range.start, range.end)
|
||||
.unwrap_or(root);
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub fn goto_parent_with<P>(&mut self, predicate: P) -> bool
|
||||
where
|
||||
P: Fn(&Node) -> bool,
|
||||
{
|
||||
while self.goto_parent() {
|
||||
if predicate(&self.node()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Finds the injection layer that has exactly the same range as the given `range`.
|
||||
fn layer_id_of_byte_range(&self, search_range: Range<usize>) -> Option<LayerId> {
|
||||
let start_idx = self
|
||||
.injection_ranges
|
||||
.partition_point(|range| range.end < search_range.end);
|
||||
|
||||
self.injection_ranges[start_idx..]
|
||||
.iter()
|
||||
.take_while(|range| range.end == search_range.end)
|
||||
.find_map(|range| (range.start == search_range.start).then_some(range.layer_id))
|
||||
}
|
||||
|
||||
fn goto_first_child_impl(&mut self, named: bool) -> bool {
|
||||
// Check if the current node's range is an exact injection layer range.
|
||||
if let Some(layer_id) = self
|
||||
.layer_id_of_byte_range(self.node().byte_range())
|
||||
.filter(|&layer_id| layer_id != self.current)
|
||||
{
|
||||
// Switch to the child layer.
|
||||
self.current = layer_id;
|
||||
self.cursor = self.layers[self.current].tree().root_node();
|
||||
return true;
|
||||
}
|
||||
|
||||
let child = if named {
|
||||
self.cursor.named_child(0)
|
||||
} else {
|
||||
self.cursor.child(0)
|
||||
};
|
||||
|
||||
if let Some(child) = child {
|
||||
// Otherwise descend in the current tree.
|
||||
self.cursor = child;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn goto_first_child(&mut self) -> bool {
|
||||
self.goto_first_child_impl(false)
|
||||
}
|
||||
|
||||
pub fn goto_first_named_child(&mut self) -> bool {
|
||||
self.goto_first_child_impl(true)
|
||||
}
|
||||
|
||||
fn goto_next_sibling_impl(&mut self, named: bool) -> bool {
|
||||
let sibling = if named {
|
||||
self.cursor.next_named_sibling()
|
||||
} else {
|
||||
self.cursor.next_sibling()
|
||||
};
|
||||
|
||||
if let Some(sibling) = sibling {
|
||||
self.cursor = sibling;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn goto_next_sibling(&mut self) -> bool {
|
||||
self.goto_next_sibling_impl(false)
|
||||
}
|
||||
|
||||
pub fn goto_next_named_sibling(&mut self) -> bool {
|
||||
self.goto_next_sibling_impl(true)
|
||||
}
|
||||
|
||||
fn goto_prev_sibling_impl(&mut self, named: bool) -> bool {
|
||||
let sibling = if named {
|
||||
self.cursor.prev_named_sibling()
|
||||
} else {
|
||||
self.cursor.prev_sibling()
|
||||
};
|
||||
|
||||
if let Some(sibling) = sibling {
|
||||
self.cursor = sibling;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn goto_prev_sibling(&mut self) -> bool {
|
||||
self.goto_prev_sibling_impl(false)
|
||||
}
|
||||
|
||||
pub fn goto_prev_named_sibling(&mut self) -> bool {
|
||||
self.goto_prev_sibling_impl(true)
|
||||
}
|
||||
|
||||
/// Finds the injection layer that contains the given start-end range.
|
||||
fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId {
|
||||
let start_idx = self
|
||||
.injection_ranges
|
||||
.partition_point(|range| range.end < end);
|
||||
|
||||
self.injection_ranges[start_idx..]
|
||||
.iter()
|
||||
.take_while(|range| range.start < end)
|
||||
.find_map(|range| (range.start <= start).then_some(range.layer_id))
|
||||
.unwrap_or(self.root)
|
||||
}
|
||||
|
||||
pub fn reset_to_byte_range(&mut self, start: usize, end: usize) {
|
||||
self.current = self.layer_id_containing_byte_range(start, end);
|
||||
let root = self.layers[self.current].tree().root_node();
|
||||
self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root);
|
||||
}
|
||||
|
||||
/// Returns an iterator over the children of the node the TreeCursor is on
|
||||
/// at the time this is called.
|
||||
pub fn children(&'a mut self) -> ChildIter {
|
||||
let parent = self.node();
|
||||
|
||||
ChildIter {
|
||||
cursor: self,
|
||||
parent,
|
||||
named: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the named children of the node the TreeCursor is on
|
||||
/// at the time this is called.
|
||||
pub fn named_children(&'a mut self) -> ChildIter {
|
||||
let parent = self.node();
|
||||
|
||||
ChildIter {
|
||||
cursor: self,
|
||||
parent,
|
||||
named: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ChildIter<'n> {
|
||||
cursor: &'n mut TreeCursor<'n>,
|
||||
parent: Node<'n>,
|
||||
named: bool,
|
||||
}
|
||||
|
||||
impl<'n> Iterator for ChildIter<'n> {
|
||||
type Item = Node<'n>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// first iteration, just visit the first child
|
||||
if self.cursor.node() == self.parent {
|
||||
self.cursor
|
||||
.goto_first_child_impl(self.named)
|
||||
.then(|| self.cursor.node())
|
||||
} else {
|
||||
self.cursor
|
||||
.goto_next_sibling_impl(self.named)
|
||||
.then(|| self.cursor.node())
|
||||
}
|
||||
}
|
||||
}
|
@ -54,7 +54,14 @@ pub fn highlighted_code_block<'a>(
|
||||
language.into(),
|
||||
))
|
||||
.and_then(|config| config.highlight_config(theme.scopes()))
|
||||
.and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader)));
|
||||
.and_then(|config| {
|
||||
Syntax::new(ropeslice, config, |injection| {
|
||||
config_loader
|
||||
.load()
|
||||
.language_configuration_for_injection_string(injection)
|
||||
.and_then(|config| config.get_highlight_config())
|
||||
})
|
||||
});
|
||||
|
||||
let syntax = match syntax {
|
||||
Some(s) => s,
|
||||
|
@ -82,7 +82,12 @@ fn finish_debounce(&mut self) {
|
||||
let Some(syntax) = language_config
|
||||
.highlight_config(&loader.load().scopes())
|
||||
.and_then(|highlight_config| {
|
||||
helix_core::Syntax::new(text.slice(..), highlight_config, loader)
|
||||
helix_core::Syntax::new(text.slice(..), highlight_config, |injection| {
|
||||
loader
|
||||
.load()
|
||||
.language_configuration_for_injection_string(injection)
|
||||
.and_then(|config| config.get_highlight_config())
|
||||
})
|
||||
})
|
||||
else {
|
||||
log::info!("highlighting picker item failed");
|
||||
|
@ -7,7 +7,7 @@
|
||||
use helix_core::chars::char_is_word;
|
||||
use helix_core::doc_formatter::TextFormat;
|
||||
use helix_core::encoding::Encoding;
|
||||
use helix_core::syntax::{Highlight, LanguageServerFeature};
|
||||
use helix_core::syntax::{generate_edits, Highlight, LanguageServerFeature};
|
||||
use helix_core::text_annotations::{InlineAnnotation, Overlay};
|
||||
use helix_lsp::util::lsp_pos_to_pos;
|
||||
use helix_stdx::faccess::{copy_metadata, readonly};
|
||||
@ -156,6 +156,7 @@ pub struct Document {
|
||||
pub syntax: Option<Syntax>,
|
||||
/// Corresponding language scope name. Usually `source.<lang>`.
|
||||
pub language: Option<Arc<LanguageConfiguration>>,
|
||||
loader: Option<Arc<ArcSwap<helix_core::syntax::Loader>>>,
|
||||
|
||||
/// Pending changes since last history commit.
|
||||
changes: ChangeSet,
|
||||
@ -678,6 +679,7 @@ pub fn from(
|
||||
focused_at: std::time::Instant::now(),
|
||||
readonly: false,
|
||||
jump_labels: HashMap::new(),
|
||||
loader: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -1131,9 +1133,15 @@ pub fn set_language(
|
||||
if let Some(highlight_config) =
|
||||
language_config.highlight_config(&(*loader).load().scopes())
|
||||
{
|
||||
self.syntax = Syntax::new(self.text.slice(..), highlight_config, loader);
|
||||
let loader_ = loader.load_full();
|
||||
self.syntax = Syntax::new(self.text.slice(..), highlight_config, |injection| {
|
||||
loader_
|
||||
.language_configuration_for_injection_string(injection)
|
||||
.and_then(|config| config.get_highlight_config())
|
||||
});
|
||||
}
|
||||
|
||||
self.loader = Some(loader);
|
||||
self.language = Some(language_config);
|
||||
} else {
|
||||
self.syntax = None;
|
||||
@ -1275,11 +1283,16 @@ fn apply_impl(
|
||||
|
||||
// update tree-sitter syntax tree
|
||||
if let Some(syntax) = &mut self.syntax {
|
||||
let loader = self.loader.as_ref().unwrap().load_full();
|
||||
// TODO: no unwrap
|
||||
let res = syntax.update(
|
||||
old_doc.slice(..),
|
||||
self.text.slice(..),
|
||||
transaction.changes(),
|
||||
generate_edits(old_doc.slice(..), transaction.changes()),
|
||||
|injection| {
|
||||
loader
|
||||
.language_configuration_for_injection_string(injection)
|
||||
.and_then(|config| config.get_highlight_config())
|
||||
},
|
||||
);
|
||||
if res.is_err() {
|
||||
log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
|
||||
|
Loading…
Reference in New Issue
Block a user