Naive Spellbook integration

This commit is contained in:
Michael Davis 2024-09-08 10:04:14 -04:00
parent a1a5faebef
commit ffe121a4ac
No known key found for this signature in database
10 changed files with 289 additions and 11 deletions

31
Cargo.lock generated
View File

@ -272,7 +272,7 @@ checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
dependencies = [
"cfg-if",
"crossbeam-utils",
"hashbrown",
"hashbrown 0.14.5",
"lock_api",
"once_cell",
"parking_lot_core",
@ -783,7 +783,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef65b256631078ef733bc5530c4e6b1c2e7d5c2830b75d4e9034ab3997d18fe"
dependencies = [
"gix-hash",
"hashbrown",
"hashbrown 0.14.5",
"parking_lot",
]
@ -819,7 +819,7 @@ dependencies = [
"gix-traverse",
"gix-utils",
"gix-validate",
"hashbrown",
"hashbrown 0.14.5",
"itoa",
"libc",
"memmap2",
@ -1214,6 +1214,12 @@ dependencies = [
"allocator-api2",
]
[[package]]
name = "hashbrown"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
[[package]]
name = "helix-core"
version = "24.7.0"
@ -1226,7 +1232,7 @@ dependencies = [
"encoding_rs",
"etcetera",
"globset",
"hashbrown",
"hashbrown 0.14.5",
"helix-loader",
"helix-stdx",
"imara-diff",
@ -1274,7 +1280,7 @@ dependencies = [
"ahash",
"anyhow",
"futures-executor",
"hashbrown",
"hashbrown 0.14.5",
"log",
"once_cell",
"parking_lot",
@ -1460,6 +1466,7 @@ dependencies = [
"serde",
"serde_json",
"slotmap",
"spellbook",
"tempfile",
"thiserror 2.0.3",
"tokio",
@ -1668,7 +1675,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc9da1a252bd44cd341657203722352efc9bc0c847d06ea6d2dc1cd1135e0a01"
dependencies = [
"ahash",
"hashbrown",
"hashbrown 0.14.5",
]
[[package]]
@ -1678,7 +1685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5"
dependencies = [
"equivalent",
"hashbrown",
"hashbrown 0.14.5",
]
[[package]]
@ -2325,6 +2332,16 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "spellbook"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "880af4c079784bfbc72d39cd8754bcd092937fbb07ee70a39d8e7f423ca7beaf"
dependencies = [
"ahash",
"hashbrown 0.15.2",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"

View File

@ -42,6 +42,7 @@ tree-sitter = { version = "0.22" }
nucleo = "0.5.0"
slotmap = "1.0.7"
thiserror = "2.0"
spellbook = "0.2"
[workspace.package]
version = "24.7.0"

View File

@ -592,6 +592,6 @@ fn mtime(path: &Path) -> Result<SystemTime> {
/// Gives the contents of a file from a language's `runtime/queries/<lang>`
/// directory
pub fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::Error> {
let path = crate::runtime_file(&PathBuf::new().join("queries").join(language).join(filename));
let path = crate::runtime_file(PathBuf::new().join("queries").join(language).join(filename));
std::fs::read_to_string(path)
}

View File

@ -107,7 +107,8 @@ fn find_runtime_file(rel_path: &Path) -> Option<PathBuf> {
/// The valid runtime directories are searched in priority order and the first
/// file found to exist is returned, otherwise the path to the final attempt
/// that failed.
pub fn runtime_file(rel_path: &Path) -> PathBuf {
pub fn runtime_file<P: AsRef<Path>>(rel_path: P) -> PathBuf {
let rel_path = rel_path.as_ref();
find_runtime_file(rel_path).unwrap_or_else(|| {
RUNTIME_DIRS
.last()
@ -132,6 +133,14 @@ pub fn cache_dir() -> PathBuf {
path
}
pub fn state_dir() -> PathBuf {
let strategy = choose_base_strategy().expect("Unable to find the cache directory!");
// BaseStrategy always return Some in `state_dir`.
let mut path = strategy.state_dir().unwrap();
path.push("helix");
path
}
pub fn config_file() -> PathBuf {
CONFIG_FILE.get().map(|path| path.to_path_buf()).unwrap()
}
@ -152,6 +161,11 @@ pub fn default_log_file() -> PathBuf {
cache_dir().join("helix.log")
}
// HACK: there should a personal dictionary per-locale.
pub fn personal_dictionary_file() -> PathBuf {
state_dir().join("personal-dictionary.txt")
}
/// Merge two TOML documents, merging values from `right` onto `left`
///
/// When an array exists in both `left` and `right`, `right`'s array is

View File

@ -568,6 +568,8 @@ pub fn doc(&self) -> &str {
command_palette, "Open command palette",
goto_word, "Jump to a two-character label",
extend_to_word, "Extend to a two-character label",
add_word_to_personal_dictionary, "Add the word under the primary cursor to the personal dictionary for the current locale",
suggest_spelling_correction, "Suggest a spelling correction for the mistake under the cursor",
);
}
@ -6414,3 +6416,160 @@ fn jump_to_word(cx: &mut Context, behaviour: Movement) {
}
jump_to_label(cx, words, behaviour)
}
// HACK: this should be folded into code actions.
fn add_word_to_personal_dictionary(cx: &mut Context) {
let (view, doc) = current_ref!(cx.editor);
let text = doc.text().slice(..);
let selection = doc.selection(view.id).primary();
let range = if selection.len() == 1 {
textobject::textobject_word(text, selection, textobject::TextObject::Inside, 1, false)
} else {
selection
};
let word = range.fragment(text);
let prompt = ui::Prompt::new(
"add-word:".into(),
None,
ui::completers::none,
move |cx, input: &str, event: PromptEvent| {
fn append_word(word: &str) -> std::io::Result<()> {
use std::io::Write;
let path = helix_loader::state_dir().join("personal-dictionary.txt");
let mut file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)?;
file.write_all(word.as_bytes())?;
file.write_all(helix_core::NATIVE_LINE_ENDING.as_str().as_bytes())?;
file.sync_data()?;
Ok(())
}
if event != PromptEvent::Validate {
return;
}
if let Err(err) = cx.editor.dictionary.add(input) {
cx.editor.set_error(format!(
"Failed to add \"{input}\" to the dictionary: {err}"
));
return;
}
if let Err(err) = append_word(input) {
cx.editor.set_error(format!(
"Failed to persist \"{input}\" to the on-disk dictionary: {err}"
));
return;
}
cx.editor
.set_status(format!("Added \"{input}\" to the dictionary"));
},
)
.with_line(word.into(), cx.editor);
cx.push_layer(Box::new(prompt));
}
fn suggest_spelling_correction(cx: &mut Context) {
use helix_stdx::rope::Regex;
use tokio::time::Instant;
let dictionary = &cx.editor.dictionary;
let (view, doc) = current_ref!(cx.editor);
let view_id = view.id;
let doc_id = doc.id();
let text = doc.text().slice(..);
let selection = doc.selection(view.id).primary();
let direction = selection.direction();
let cursor = selection.cursor(text);
let line_no = selection.cursor_line(text);
let line = text.line(line_no);
let line_start = text.line_to_char(line_no);
#[repr(transparent)]
struct Suggestion(String);
impl ui::menu::Item for Suggestion {
type Data = ();
fn format(&self, _data: &Self::Data) -> tui::widgets::Row {
self.0.as_str().into()
}
}
// This is a hack around not storing the spelling errors as diagnostics.
// Re-find the spelling mistake under the cursor:
static WORDS: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap());
let current_mistake = WORDS.find_iter(line.regex_input_at(..)).find_map(|match_| {
let start = text.byte_to_char(match_.start());
let end = text.byte_to_char(match_.end());
let word = Cow::from(line.slice(start..end));
let range = start + line_start..end + line_start;
if !dictionary.check(&word) && range.contains(&cursor) {
Some((word, range))
} else {
None
}
});
let Some((word, range)) = current_mistake else {
cx.editor
.set_error("No spelling mistake under the primary cursor");
return;
};
let mut suggestions = Vec::new();
let start_time = Instant::now();
dictionary.suggest(&word, &mut suggestions);
let end_time = Instant::now();
log::info!(
"found {} suggestion{} for '{}' in {:?}",
suggestions.len(),
if suggestions.len() == 1 { "" } else { "s" },
&word,
end_time.duration_since(start_time)
);
if suggestions.is_empty() {
cx.editor
.set_error(format!("No suggestions for '{}' found", &word));
return;
}
// SAFETY: `Suggestion` is a newtype wrapper so it has the same layout as the wrapped String.
let suggestions = unsafe { std::mem::transmute::<Vec<String>, Vec<Suggestion>>(suggestions) };
let mut menu = ui::Menu::new(suggestions, (), move |editor, action, event| {
if event != PromptEvent::Validate {
return;
}
// Because we `move_down` below, this is always Some:
let suggestion = &action.unwrap().0;
let view = view_mut!(editor, view_id);
let doc = doc_mut!(editor, &doc_id);
let new_range = Range::new(range.start, range.start + suggestion.chars().count())
.with_direction(direction);
let transaction = Transaction::change(
doc.text(),
[(range.start, range.end, Some(suggestion.into()))].into_iter(),
)
.with_selection(Selection::from(new_range));
doc.apply(&transaction, view_id);
doc.append_changes_to_history(view);
});
menu.move_down();
let popup = Popup::new("suggestion", menu).with_scrollbar(false);
cx.push_layer(Box::new(popup));
}

View File

@ -230,6 +230,8 @@ pub fn default() -> HashMap<Mode, KeyTrie> {
"D" => workspace_diagnostics_picker,
"g" => changed_file_picker,
"a" => code_action,
"A" => add_word_to_personal_dictionary,
"Z" => suggest_spelling_correction,
"'" => last_picker,
"G" => { "Debug (experimental)" sticky=true
"l" => dap_launch,

View File

@ -22,6 +22,7 @@
unicode::width::UnicodeWidthStr,
visual_offset_from_block, Change, Position, Range, Selection, Transaction,
};
use helix_stdx::rope::RopeSliceExt;
use helix_view::{
annotations::diagnostics::DiagnosticFilter,
document::{Mode, SavePoint, SCRATCH_BUFFER_NAME},
@ -29,7 +30,7 @@
graphics::{Color, CursorKind, Modifier, Rect, Style},
input::{KeyEvent, MouseButton, MouseEvent, MouseEventKind},
keyboard::{KeyCode, KeyModifiers},
Document, Editor, Theme, View,
Dictionary, Document, Editor, Theme, View,
};
use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc, sync::Arc};
@ -145,6 +146,10 @@ pub fn render_view(
}
overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic));
}
let spell = Self::doc_spell_highlights(&editor.dictionary, doc, view, theme);
if !spell.is_empty() {
overlay_highlights = Box::new(syntax::merge(overlay_highlights, spell));
}
if is_focused {
let highlights = syntax::merge(
@ -461,6 +466,55 @@ pub fn doc_diagnostics_highlights(
]
}
pub fn doc_spell_highlights(
dict: &Dictionary,
doc: &Document,
view: &View,
theme: &Theme,
) -> Vec<(usize, std::ops::Range<usize>)> {
// This is **very** ***very*** naive and not at all reflective of what the actual
// integration will look like. Doing this per-render is very needlessly expensive.
// Instead it should be done in the background and possibly incrementally (only
// re-checking ranges that are affected by document changes). However regex-cursor
// is very fast and so is spellbook (degenerate cases max out at 1μs in a release
// build on my machine, i.e. a worst case throughput of 2 million words / second) so
// this is suitable for my testing. I mostly want to find cases where spellbook's
// results are surprising.
// Also we want to use tree-sitter to mark nodes as ones that should be spellchecked
// and maybe specify strategies for doing tokenization (try to tokenize prose vs.
// programming languages).
// Plus these should really be proper diagnostics so that we can pull them up in the
// diagnostics picker and jump to them.
use helix_stdx::rope::Regex;
use once_cell::sync::Lazy;
use std::borrow::Cow;
static WORDS: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap());
let mut spans = Vec::new();
let error = theme.find_scope_index("diagnostic.error").unwrap();
let text = doc.text().slice(..);
let start = text.line_to_char(text.char_to_line(doc.view_offset(view.id).anchor));
let end = text.line_to_char(view.estimate_last_doc_line(doc) + 1);
for match_ in WORDS.find_iter(text.regex_input_at(start..end)) {
let range = text.byte_to_char(match_.start())..text.byte_to_char(match_.end());
// TODO: consider how to allow passing the RopeSlice to spellbook:
// * Use an Input trait like regex-cursor?
// * Accept `impl Iterator<Item = char>`?
// * Maybe spellbook should have an internal `String` buffer and it should try to copy
// the word into that? Only in the best case do you not have to allocate at all.
// Maybe we should use a single string buffer and perform all changes to the string
// in-place instead of using `replace` from the stdlib and Cows.
let word = Cow::from(text.slice(range.clone()));
if !dict.check(&word) {
spans.push((error, range))
}
}
spans
}
/// Get highlight spans for selections in a document view.
pub fn doc_selection_highlights(
mode: Mode,

View File

@ -52,6 +52,8 @@ log = "~0.4"
parking_lot = "0.12.3"
thiserror.workspace = true
spellbook.workspace = true
[target.'cfg(windows)'.dependencies]
clipboard-win = { version = "5.4", features = ["std"] }

View File

@ -11,7 +11,7 @@
register::Registers,
theme::{self, Theme},
tree::{self, Tree},
Document, DocumentId, View, ViewId,
Dictionary, Document, DocumentId, View, ViewId,
};
use dap::StackFrame;
use helix_vcs::DiffProviderRegistry;
@ -1095,6 +1095,9 @@ pub struct Editor {
pub mouse_down_range: Option<Range>,
pub cursor_cache: CursorCache,
/// HACK:
pub dictionary: Dictionary,
}
pub type Motion = Box<dyn Fn(&mut Editor)>;
@ -1174,6 +1177,30 @@ pub fn new(
// HAXX: offset the render area height by 1 to account for prompt/commandline
area.height -= 1;
// HACK: what's the right interface for Spellbook to expose so we don't have to
// read these entire files into strings? (See associated TODO in Spellbook.)
let aff =
std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.aff"))
.unwrap();
let dic =
std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.dic"))
.unwrap();
// HACK: All this stuff should happen off the main thread.
let mut dictionary = Dictionary::new(&aff, &dic).unwrap();
if let Ok(file) = std::fs::File::open(helix_loader::personal_dictionary_file()) {
use std::io::{BufRead as _, BufReader};
let reader = BufReader::with_capacity(8 * 1024, file);
for line in reader.lines() {
let line = line.unwrap();
let line = line.trim();
if line.is_empty() {
continue;
}
dictionary.add(line).unwrap();
}
}
Self {
mode: Mode::Normal,
tree: Tree::new(area),
@ -1216,6 +1243,7 @@ pub fn new(
handlers,
mouse_down_range: None,
cursor_cache: CursorCache::default(),
dictionary,
}
}

View File

@ -75,5 +75,6 @@ pub fn align_view(doc: &mut Document, view: &View, align: Align) {
pub use document::Document;
pub use editor::Editor;
use helix_core::char_idx_at_visual_offset;
pub use spellbook::Dictionary;
pub use theme::Theme;
pub use view::View;