significantly improve treesitter performance while editing large files (#4716)
* significantly improve treesitter performance while editing large files * Apply stylistic suggestions from code review Co-authored-by: Michael Davis <mcarsondavis@gmail.com> * use PartialEq and Hash instead of a freestanding function Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
This commit is contained in:
parent
9059c65a53
commit
f538b69759
27
Cargo.lock
generated
27
Cargo.lock
generated
@ -13,6 +13,18 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"getrandom",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "0.7.18"
|
version = "0.7.18"
|
||||||
@ -400,18 +412,29 @@ version = "0.12.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash 0.7.6",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "helix-core"
|
name = "helix-core"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"ahash 0.8.2",
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"chrono",
|
"chrono",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"etcetera",
|
"etcetera",
|
||||||
|
"hashbrown 0.13.1",
|
||||||
"helix-loader",
|
"helix-loader",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@ -1288,7 +1311,7 @@ version = "0.1.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137"
|
checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hashbrown",
|
"hashbrown 0.12.3",
|
||||||
"regex",
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -30,6 +30,8 @@ once_cell = "1.16"
|
|||||||
arc-swap = "1"
|
arc-swap = "1"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
bitflags = "1.3"
|
bitflags = "1.3"
|
||||||
|
ahash = "0.8.2"
|
||||||
|
hashbrown = { version = "0.13.1", features = ["raw"] }
|
||||||
|
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
@ -7,8 +7,10 @@
|
|||||||
Rope, RopeSlice, Tendril,
|
Rope, RopeSlice, Tendril,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use ahash::RandomState;
|
||||||
use arc_swap::{ArcSwap, Guard};
|
use arc_swap::{ArcSwap, Guard};
|
||||||
use bitflags::bitflags;
|
use bitflags::bitflags;
|
||||||
|
use hashbrown::raw::RawTable;
|
||||||
use slotmap::{DefaultKey as LayerId, HopSlotMap};
|
use slotmap::{DefaultKey as LayerId, HopSlotMap};
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
@ -16,7 +18,8 @@
|
|||||||
cell::RefCell,
|
cell::RefCell,
|
||||||
collections::{HashMap, VecDeque},
|
collections::{HashMap, VecDeque},
|
||||||
fmt,
|
fmt,
|
||||||
mem::replace,
|
hash::{Hash, Hasher},
|
||||||
|
mem::{replace, transmute},
|
||||||
path::Path,
|
path::Path,
|
||||||
str::FromStr,
|
str::FromStr,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
@ -770,30 +773,38 @@ pub fn update(
|
|||||||
// Convert the changeset into tree sitter edits.
|
// Convert the changeset into tree sitter edits.
|
||||||
let edits = generate_edits(old_source, changeset);
|
let edits = generate_edits(old_source, changeset);
|
||||||
|
|
||||||
|
// This table allows inverse indexing of `layers`.
|
||||||
|
// That is by hashing a `Layer` you can find
|
||||||
|
// the `LayerId` of an existing equivalent `Layer` in `layers`.
|
||||||
|
//
|
||||||
|
// It is used to determine if a new layer exists for an injection
|
||||||
|
// or if an existing layer needs to be updated.
|
||||||
|
let mut layers_table = RawTable::with_capacity(self.layers.len());
|
||||||
|
let layers_hasher = RandomState::new();
|
||||||
// Use the edits to update all layers markers
|
// Use the edits to update all layers markers
|
||||||
if !edits.is_empty() {
|
fn point_add(a: Point, b: Point) -> Point {
|
||||||
fn point_add(a: Point, b: Point) -> Point {
|
if b.row > 0 {
|
||||||
if b.row > 0 {
|
Point::new(a.row.saturating_add(b.row), b.column)
|
||||||
Point::new(a.row.saturating_add(b.row), b.column)
|
} else {
|
||||||
} else {
|
Point::new(0, a.column.saturating_add(b.column))
|
||||||
Point::new(0, a.column.saturating_add(b.column))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
fn point_sub(a: Point, b: Point) -> Point {
|
}
|
||||||
if a.row > b.row {
|
fn point_sub(a: Point, b: Point) -> Point {
|
||||||
Point::new(a.row.saturating_sub(b.row), a.column)
|
if a.row > b.row {
|
||||||
} else {
|
Point::new(a.row.saturating_sub(b.row), a.column)
|
||||||
Point::new(0, a.column.saturating_sub(b.column))
|
} else {
|
||||||
}
|
Point::new(0, a.column.saturating_sub(b.column))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (layer_id, layer) in self.layers.iter_mut() {
|
||||||
|
// The root layer always covers the whole range (0..usize::MAX)
|
||||||
|
if layer.depth == 0 {
|
||||||
|
layer.flags = LayerUpdateFlags::MODIFIED;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for layer in self.layers.values_mut() {
|
if !edits.is_empty() {
|
||||||
// The root layer always covers the whole range (0..usize::MAX)
|
|
||||||
if layer.depth == 0 {
|
|
||||||
layer.flags = LayerUpdateFlags::MODIFIED;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for range in &mut layer.ranges {
|
for range in &mut layer.ranges {
|
||||||
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
|
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
|
||||||
for edit in edits.iter().rev() {
|
for edit in edits.iter().rev() {
|
||||||
@ -858,6 +869,12 @@ fn point_sub(a: Point, b: Point) -> Point {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let hash = layers_hasher.hash_one(layer);
|
||||||
|
// Safety: insert_no_grow is unsafe because it assumes that the table
|
||||||
|
// has enough capacity to hold additional elements.
|
||||||
|
// This is always the case as we reserved enough capacity above.
|
||||||
|
unsafe { layers_table.insert_no_grow(hash, layer_id) };
|
||||||
}
|
}
|
||||||
|
|
||||||
PARSER.with(|ts_parser| {
|
PARSER.with(|ts_parser| {
|
||||||
@ -982,27 +999,23 @@ fn point_sub(a: Point, b: Point) -> Point {
|
|||||||
let depth = layer.depth + 1;
|
let depth = layer.depth + 1;
|
||||||
// TODO: can't inline this since matches borrows self.layers
|
// TODO: can't inline this since matches borrows self.layers
|
||||||
for (config, ranges) in injections {
|
for (config, ranges) in injections {
|
||||||
// Find an existing layer
|
let new_layer = LanguageLayer {
|
||||||
let layer = self
|
tree: None,
|
||||||
.layers
|
config,
|
||||||
.iter_mut()
|
depth,
|
||||||
.find(|(_, layer)| {
|
ranges,
|
||||||
layer.depth == depth && // TODO: track parent id instead
|
flags: LayerUpdateFlags::empty(),
|
||||||
layer.config.language == config.language && layer.ranges == ranges
|
};
|
||||||
|
|
||||||
|
// Find an identical existing layer
|
||||||
|
let layer = layers_table
|
||||||
|
.get(layers_hasher.hash_one(&new_layer), |&it| {
|
||||||
|
self.layers[it] == new_layer
|
||||||
})
|
})
|
||||||
.map(|(id, _layer)| id);
|
.copied();
|
||||||
|
|
||||||
// ...or insert a new one.
|
// ...or insert a new one.
|
||||||
let layer_id = layer.unwrap_or_else(|| {
|
let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
|
||||||
self.layers.insert(LanguageLayer {
|
|
||||||
tree: None,
|
|
||||||
config,
|
|
||||||
depth,
|
|
||||||
ranges,
|
|
||||||
// set the modified flag to ensure the layer is parsed
|
|
||||||
flags: LayerUpdateFlags::empty(),
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
queue.push_back(layer_id);
|
queue.push_back(layer_id);
|
||||||
}
|
}
|
||||||
@ -1139,6 +1152,34 @@ pub struct LanguageLayer {
|
|||||||
flags: LayerUpdateFlags,
|
flags: LayerUpdateFlags,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This PartialEq implementation only checks if that
|
||||||
|
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
|
||||||
|
/// It does not check whether the layers have the same internal treesitter
|
||||||
|
/// state.
|
||||||
|
impl PartialEq for LanguageLayer {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.depth == other.depth
|
||||||
|
&& self.config.language == other.config.language
|
||||||
|
&& self.ranges == other.ranges
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hash implementation belongs to PartialEq implementation above.
|
||||||
|
/// See its documentation for details.
|
||||||
|
impl Hash for LanguageLayer {
|
||||||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
self.depth.hash(state);
|
||||||
|
// The transmute is necessary here because tree_sitter::Language does not derive Hash at the moment.
|
||||||
|
// However it does use #[repr] transparent so the transmute here is safe
|
||||||
|
// as `Language` (which `Grammar` is an alias for) is just a newtype wrapper around a (thin) pointer.
|
||||||
|
// This is also compatible with the PartialEq implementation of language
|
||||||
|
// as that is just a pointer comparison.
|
||||||
|
let language: *const () = unsafe { transmute(self.config.language) };
|
||||||
|
language.hash(state);
|
||||||
|
self.ranges.hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl LanguageLayer {
|
impl LanguageLayer {
|
||||||
pub fn tree(&self) -> &Tree {
|
pub fn tree(&self) -> &Tree {
|
||||||
// TODO: no unwrap
|
// TODO: no unwrap
|
||||||
|
Loading…
Reference in New Issue
Block a user