From ffe121a4ac3de43518f43d25bf1e3dc46514a706 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Sun, 8 Sep 2024 10:04:14 -0400 Subject: [PATCH] Naive Spellbook integration --- Cargo.lock | 31 ++++-- Cargo.toml | 1 + helix-loader/src/grammar.rs | 2 +- helix-loader/src/lib.rs | 16 +++- helix-term/src/commands.rs | 159 +++++++++++++++++++++++++++++++ helix-term/src/keymap/default.rs | 2 + helix-term/src/ui/editor.rs | 56 ++++++++++- helix-view/Cargo.toml | 2 + helix-view/src/editor.rs | 30 +++++- helix-view/src/lib.rs | 1 + 10 files changed, 289 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9cae3e3ed..db5652ebd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -272,7 +272,7 @@ checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -783,7 +783,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ef65b256631078ef733bc5530c4e6b1c2e7d5c2830b75d4e9034ab3997d18fe" dependencies = [ "gix-hash", - "hashbrown", + "hashbrown 0.14.5", "parking_lot", ] @@ -819,7 +819,7 @@ dependencies = [ "gix-traverse", "gix-utils", "gix-validate", - "hashbrown", + "hashbrown 0.14.5", "itoa", "libc", "memmap2", @@ -1214,6 +1214,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + [[package]] name = "helix-core" version = "24.7.0" @@ -1226,7 +1232,7 @@ dependencies = [ "encoding_rs", "etcetera", "globset", - "hashbrown", + "hashbrown 0.14.5", "helix-loader", "helix-stdx", "imara-diff", @@ -1274,7 +1280,7 @@ dependencies = [ "ahash", "anyhow", "futures-executor", - "hashbrown", + "hashbrown 0.14.5", "log", "once_cell", "parking_lot", @@ -1460,6 +1466,7 @@ dependencies = [ "serde", "serde_json", "slotmap", + "spellbook", "tempfile", "thiserror 2.0.3", "tokio", @@ -1668,7 +1675,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc9da1a252bd44cd341657203722352efc9bc0c847d06ea6d2dc1cd1135e0a01" dependencies = [ "ahash", - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -1678,7 +1685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -2325,6 +2332,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spellbook" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "880af4c079784bfbc72d39cd8754bcd092937fbb07ee70a39d8e7f423ca7beaf" +dependencies = [ + "ahash", + "hashbrown 0.15.2", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index 753be4b46..6ade6b32b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ tree-sitter = { version = "0.22" } nucleo = "0.5.0" slotmap = "1.0.7" thiserror = "2.0" +spellbook = "0.2" [workspace.package] version = "24.7.0" diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs index 99e911544..0ccd173ea 100644 --- a/helix-loader/src/grammar.rs +++ b/helix-loader/src/grammar.rs @@ -592,6 +592,6 @@ fn mtime(path: &Path) -> Result { /// Gives the contents of a file from a language's `runtime/queries/` /// directory pub fn load_runtime_file(language: &str, filename: &str) -> Result { - let path = crate::runtime_file(&PathBuf::new().join("queries").join(language).join(filename)); + let path = crate::runtime_file(PathBuf::new().join("queries").join(language).join(filename)); std::fs::read_to_string(path) } diff --git a/helix-loader/src/lib.rs b/helix-loader/src/lib.rs index 0e7c134d0..d18e2533f 100644 --- a/helix-loader/src/lib.rs +++ b/helix-loader/src/lib.rs @@ -107,7 +107,8 @@ fn find_runtime_file(rel_path: &Path) -> Option { /// The valid runtime directories are searched in priority order and the first /// file found to exist is returned, otherwise the path to the final attempt /// that failed. -pub fn runtime_file(rel_path: &Path) -> PathBuf { +pub fn runtime_file>(rel_path: P) -> PathBuf { + let rel_path = rel_path.as_ref(); find_runtime_file(rel_path).unwrap_or_else(|| { RUNTIME_DIRS .last() @@ -132,6 +133,14 @@ pub fn cache_dir() -> PathBuf { path } +pub fn state_dir() -> PathBuf { + let strategy = choose_base_strategy().expect("Unable to find the cache directory!"); + // BaseStrategy always return Some in `state_dir`. + let mut path = strategy.state_dir().unwrap(); + path.push("helix"); + path +} + pub fn config_file() -> PathBuf { CONFIG_FILE.get().map(|path| path.to_path_buf()).unwrap() } @@ -152,6 +161,11 @@ pub fn default_log_file() -> PathBuf { cache_dir().join("helix.log") } +// HACK: there should a personal dictionary per-locale. +pub fn personal_dictionary_file() -> PathBuf { + state_dir().join("personal-dictionary.txt") +} + /// Merge two TOML documents, merging values from `right` onto `left` /// /// When an array exists in both `left` and `right`, `right`'s array is diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index d08148362..e1df68534 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -568,6 +568,8 @@ pub fn doc(&self) -> &str { command_palette, "Open command palette", goto_word, "Jump to a two-character label", extend_to_word, "Extend to a two-character label", + add_word_to_personal_dictionary, "Add the word under the primary cursor to the personal dictionary for the current locale", + suggest_spelling_correction, "Suggest a spelling correction for the mistake under the cursor", ); } @@ -6414,3 +6416,160 @@ fn jump_to_word(cx: &mut Context, behaviour: Movement) { } jump_to_label(cx, words, behaviour) } + +// HACK: this should be folded into code actions. +fn add_word_to_personal_dictionary(cx: &mut Context) { + let (view, doc) = current_ref!(cx.editor); + let text = doc.text().slice(..); + let selection = doc.selection(view.id).primary(); + let range = if selection.len() == 1 { + textobject::textobject_word(text, selection, textobject::TextObject::Inside, 1, false) + } else { + selection + }; + let word = range.fragment(text); + + let prompt = ui::Prompt::new( + "add-word:".into(), + None, + ui::completers::none, + move |cx, input: &str, event: PromptEvent| { + fn append_word(word: &str) -> std::io::Result<()> { + use std::io::Write; + let path = helix_loader::state_dir().join("personal-dictionary.txt"); + let mut file = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path)?; + file.write_all(word.as_bytes())?; + file.write_all(helix_core::NATIVE_LINE_ENDING.as_str().as_bytes())?; + file.sync_data()?; + + Ok(()) + } + + if event != PromptEvent::Validate { + return; + } + + if let Err(err) = cx.editor.dictionary.add(input) { + cx.editor.set_error(format!( + "Failed to add \"{input}\" to the dictionary: {err}" + )); + return; + } + + if let Err(err) = append_word(input) { + cx.editor.set_error(format!( + "Failed to persist \"{input}\" to the on-disk dictionary: {err}" + )); + return; + } + + cx.editor + .set_status(format!("Added \"{input}\" to the dictionary")); + }, + ) + .with_line(word.into(), cx.editor); + + cx.push_layer(Box::new(prompt)); +} + +fn suggest_spelling_correction(cx: &mut Context) { + use helix_stdx::rope::Regex; + use tokio::time::Instant; + + let dictionary = &cx.editor.dictionary; + let (view, doc) = current_ref!(cx.editor); + let view_id = view.id; + let doc_id = doc.id(); + let text = doc.text().slice(..); + + let selection = doc.selection(view.id).primary(); + let direction = selection.direction(); + let cursor = selection.cursor(text); + let line_no = selection.cursor_line(text); + let line = text.line(line_no); + let line_start = text.line_to_char(line_no); + + #[repr(transparent)] + struct Suggestion(String); + + impl ui::menu::Item for Suggestion { + type Data = (); + + fn format(&self, _data: &Self::Data) -> tui::widgets::Row { + self.0.as_str().into() + } + } + + // This is a hack around not storing the spelling errors as diagnostics. + // Re-find the spelling mistake under the cursor: + static WORDS: Lazy = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap()); + let current_mistake = WORDS.find_iter(line.regex_input_at(..)).find_map(|match_| { + let start = text.byte_to_char(match_.start()); + let end = text.byte_to_char(match_.end()); + let word = Cow::from(line.slice(start..end)); + let range = start + line_start..end + line_start; + if !dictionary.check(&word) && range.contains(&cursor) { + Some((word, range)) + } else { + None + } + }); + + let Some((word, range)) = current_mistake else { + cx.editor + .set_error("No spelling mistake under the primary cursor"); + return; + }; + + let mut suggestions = Vec::new(); + let start_time = Instant::now(); + dictionary.suggest(&word, &mut suggestions); + let end_time = Instant::now(); + log::info!( + "found {} suggestion{} for '{}' in {:?}", + suggestions.len(), + if suggestions.len() == 1 { "" } else { "s" }, + &word, + end_time.duration_since(start_time) + ); + + if suggestions.is_empty() { + cx.editor + .set_error(format!("No suggestions for '{}' found", &word)); + return; + } + + // SAFETY: `Suggestion` is a newtype wrapper so it has the same layout as the wrapped String. + let suggestions = unsafe { std::mem::transmute::, Vec>(suggestions) }; + + let mut menu = ui::Menu::new(suggestions, (), move |editor, action, event| { + if event != PromptEvent::Validate { + return; + } + + // Because we `move_down` below, this is always Some: + let suggestion = &action.unwrap().0; + + let view = view_mut!(editor, view_id); + let doc = doc_mut!(editor, &doc_id); + + let new_range = Range::new(range.start, range.start + suggestion.chars().count()) + .with_direction(direction); + let transaction = Transaction::change( + doc.text(), + [(range.start, range.end, Some(suggestion.into()))].into_iter(), + ) + .with_selection(Selection::from(new_range)); + + doc.apply(&transaction, view_id); + doc.append_changes_to_history(view); + }); + menu.move_down(); + + let popup = Popup::new("suggestion", menu).with_scrollbar(false); + + cx.push_layer(Box::new(popup)); +} diff --git a/helix-term/src/keymap/default.rs b/helix-term/src/keymap/default.rs index c6cefd927..f19b070f5 100644 --- a/helix-term/src/keymap/default.rs +++ b/helix-term/src/keymap/default.rs @@ -230,6 +230,8 @@ pub fn default() -> HashMap { "D" => workspace_diagnostics_picker, "g" => changed_file_picker, "a" => code_action, + "A" => add_word_to_personal_dictionary, + "Z" => suggest_spelling_correction, "'" => last_picker, "G" => { "Debug (experimental)" sticky=true "l" => dap_launch, diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 5179be4f4..a1dd42f70 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -22,6 +22,7 @@ unicode::width::UnicodeWidthStr, visual_offset_from_block, Change, Position, Range, Selection, Transaction, }; +use helix_stdx::rope::RopeSliceExt; use helix_view::{ annotations::diagnostics::DiagnosticFilter, document::{Mode, SavePoint, SCRATCH_BUFFER_NAME}, @@ -29,7 +30,7 @@ graphics::{Color, CursorKind, Modifier, Rect, Style}, input::{KeyEvent, MouseButton, MouseEvent, MouseEventKind}, keyboard::{KeyCode, KeyModifiers}, - Document, Editor, Theme, View, + Dictionary, Document, Editor, Theme, View, }; use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc, sync::Arc}; @@ -145,6 +146,10 @@ pub fn render_view( } overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic)); } + let spell = Self::doc_spell_highlights(&editor.dictionary, doc, view, theme); + if !spell.is_empty() { + overlay_highlights = Box::new(syntax::merge(overlay_highlights, spell)); + } if is_focused { let highlights = syntax::merge( @@ -461,6 +466,55 @@ pub fn doc_diagnostics_highlights( ] } + pub fn doc_spell_highlights( + dict: &Dictionary, + doc: &Document, + view: &View, + theme: &Theme, + ) -> Vec<(usize, std::ops::Range)> { + // This is **very** ***very*** naive and not at all reflective of what the actual + // integration will look like. Doing this per-render is very needlessly expensive. + // Instead it should be done in the background and possibly incrementally (only + // re-checking ranges that are affected by document changes). However regex-cursor + // is very fast and so is spellbook (degenerate cases max out at 1μs in a release + // build on my machine, i.e. a worst case throughput of 2 million words / second) so + // this is suitable for my testing. I mostly want to find cases where spellbook's + // results are surprising. + // Also we want to use tree-sitter to mark nodes as ones that should be spellchecked + // and maybe specify strategies for doing tokenization (try to tokenize prose vs. + // programming languages). + // Plus these should really be proper diagnostics so that we can pull them up in the + // diagnostics picker and jump to them. + use helix_stdx::rope::Regex; + use once_cell::sync::Lazy; + use std::borrow::Cow; + static WORDS: Lazy = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap()); + + let mut spans = Vec::new(); + let error = theme.find_scope_index("diagnostic.error").unwrap(); + + let text = doc.text().slice(..); + let start = text.line_to_char(text.char_to_line(doc.view_offset(view.id).anchor)); + let end = text.line_to_char(view.estimate_last_doc_line(doc) + 1); + + for match_ in WORDS.find_iter(text.regex_input_at(start..end)) { + let range = text.byte_to_char(match_.start())..text.byte_to_char(match_.end()); + // TODO: consider how to allow passing the RopeSlice to spellbook: + // * Use an Input trait like regex-cursor? + // * Accept `impl Iterator`? + // * Maybe spellbook should have an internal `String` buffer and it should try to copy + // the word into that? Only in the best case do you not have to allocate at all. + // Maybe we should use a single string buffer and perform all changes to the string + // in-place instead of using `replace` from the stdlib and Cows. + let word = Cow::from(text.slice(range.clone())); + if !dict.check(&word) { + spans.push((error, range)) + } + } + + spans + } + /// Get highlight spans for selections in a document view. pub fn doc_selection_highlights( mode: Mode, diff --git a/helix-view/Cargo.toml b/helix-view/Cargo.toml index 6f71fa052..86d375623 100644 --- a/helix-view/Cargo.toml +++ b/helix-view/Cargo.toml @@ -52,6 +52,8 @@ log = "~0.4" parking_lot = "0.12.3" thiserror.workspace = true +spellbook.workspace = true + [target.'cfg(windows)'.dependencies] clipboard-win = { version = "5.4", features = ["std"] } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 4fc3f4700..ce7f1ecd0 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -11,7 +11,7 @@ register::Registers, theme::{self, Theme}, tree::{self, Tree}, - Document, DocumentId, View, ViewId, + Dictionary, Document, DocumentId, View, ViewId, }; use dap::StackFrame; use helix_vcs::DiffProviderRegistry; @@ -1095,6 +1095,9 @@ pub struct Editor { pub mouse_down_range: Option, pub cursor_cache: CursorCache, + + /// HACK: + pub dictionary: Dictionary, } pub type Motion = Box; @@ -1174,6 +1177,30 @@ pub fn new( // HAXX: offset the render area height by 1 to account for prompt/commandline area.height -= 1; + // HACK: what's the right interface for Spellbook to expose so we don't have to + // read these entire files into strings? (See associated TODO in Spellbook.) + let aff = + std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.aff")) + .unwrap(); + let dic = + std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.dic")) + .unwrap(); + // HACK: All this stuff should happen off the main thread. + let mut dictionary = Dictionary::new(&aff, &dic).unwrap(); + if let Ok(file) = std::fs::File::open(helix_loader::personal_dictionary_file()) { + use std::io::{BufRead as _, BufReader}; + let reader = BufReader::with_capacity(8 * 1024, file); + for line in reader.lines() { + let line = line.unwrap(); + let line = line.trim(); + if line.is_empty() { + continue; + } + + dictionary.add(line).unwrap(); + } + } + Self { mode: Mode::Normal, tree: Tree::new(area), @@ -1216,6 +1243,7 @@ pub fn new( handlers, mouse_down_range: None, cursor_cache: CursorCache::default(), + dictionary, } } diff --git a/helix-view/src/lib.rs b/helix-view/src/lib.rs index d54b49ef5..c57eff189 100644 --- a/helix-view/src/lib.rs +++ b/helix-view/src/lib.rs @@ -75,5 +75,6 @@ pub fn align_view(doc: &mut Document, view: &View, align: Align) { pub use document::Document; pub use editor::Editor; use helix_core::char_idx_at_visual_offset; +pub use spellbook::Dictionary; pub use theme::Theme; pub use view::View;