Speed up ensure_next_boundary during render

This code:

    let start = ensure_grapheme_boundary_next(text, text.byte_to_char(start));
    let end = ensure_grapheme_boundary_next(text, text.byte_to_char(end));

Would convert byte to char index, but then internally immediately convert back
to byte index, operate on it, then convert it to char index.

This change reduces the amount of time spent in ensure_grapheme_boundary from
29% to 2%.
This commit is contained in:
Blaž Hrastnik 2022-01-12 10:22:16 +09:00
parent 9d41113ae0
commit 11c3ba9350
2 changed files with 61 additions and 3 deletions

View File

@ -120,6 +120,43 @@ pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -
chunk_char_idx + tmp
}
#[must_use]
pub fn nth_next_grapheme_boundary_byte(slice: RopeSlice, mut byte_idx: usize, n: usize) -> usize {
// Bounds check
debug_assert!(byte_idx <= slice.len_bytes());
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, mut _chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
// Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
// Find the nth next grapheme cluster boundary.
for _ in 0..n {
loop {
match gc.next_boundary(chunk, chunk_byte_idx) {
Ok(None) => return slice.len_bytes(),
Ok(Some(n)) => {
byte_idx = n;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
chunk_byte_idx += chunk.len();
let (a, _, _c, _) = slice.chunk_at_byte(chunk_byte_idx);
chunk = a;
// chunk_char_idx = c;
}
Err(GraphemeIncomplete::PreContext(n)) => {
let ctx_chunk = slice.chunk_at_byte(n - 1).0;
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
}
_ => unreachable!(),
}
}
}
byte_idx
}
/// Finds the next grapheme boundary after the given char position.
#[must_use]
#[inline(always)]
@ -127,6 +164,13 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
nth_next_grapheme_boundary(slice, char_idx, 1)
}
/// Finds the next grapheme boundary after the given byte position.
#[must_use]
#[inline(always)]
pub fn next_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> usize {
nth_next_grapheme_boundary_byte(slice, byte_idx, 1)
}
/// Returns the passed char index if it's already a grapheme boundary,
/// or the next grapheme boundary char index if not.
#[must_use]
@ -151,6 +195,18 @@ pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize
}
}
/// Returns the passed byte index if it's already a grapheme boundary,
/// or the next grapheme boundary byte index if not.
#[must_use]
#[inline]
pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> usize {
if byte_idx == 0 {
byte_idx
} else {
next_grapheme_boundary_byte(slice, byte_idx - 1)
}
}
/// Returns whether the given char position is a grapheme boundary.
#[must_use]
pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool {

View File

@ -8,7 +8,9 @@
use helix_core::{
coords_at_pos, encoding,
graphemes::{ensure_grapheme_boundary_next, next_grapheme_boundary, prev_grapheme_boundary},
graphemes::{
ensure_grapheme_boundary_next_byte, next_grapheme_boundary, prev_grapheme_boundary,
},
movement::Direction,
syntax::{self, HighlightEvent},
unicode::segmentation::UnicodeSegmentation,
@ -154,8 +156,8 @@ pub fn doc_syntax_highlights<'doc>(
.map(move |event| match event {
// convert byte offsets to char offset
HighlightEvent::Source { start, end } => {
let start = ensure_grapheme_boundary_next(text, text.byte_to_char(start));
let end = ensure_grapheme_boundary_next(text, text.byte_to_char(end));
let start = text.byte_to_char(ensure_grapheme_boundary_next_byte(text, start));
let end = text.byte_to_char(ensure_grapheme_boundary_next_byte(text, end));
HighlightEvent::Source { start, end }
}
event => event,