From 52c4a28faea49fa13e3c33729533a369d97f1d32 Mon Sep 17 00:00:00 2001 From: "Alexis (Poliorcetics) Bourget" Date: Sat, 27 Apr 2024 19:49:45 +0200 Subject: [PATCH] feat: Diff source caching and autodetection --- Cargo.lock | 11 ++ helix-term/src/commands.rs | 22 ++- helix-term/src/commands/typed.rs | 6 +- helix-vcs/Cargo.toml | 2 +- helix-vcs/src/git.rs | 39 ++--- helix-vcs/src/git/test.rs | 30 +++- helix-vcs/src/lib.rs | 262 ++++++++++++++++++++++++------- helix-view/src/document.rs | 4 +- helix-view/src/editor.rs | 6 + 9 files changed, 277 insertions(+), 105 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6cc73ebf0..9b48b20bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,6 +211,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -710,12 +719,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e0eb9efdf96c35c0bed7596d1bef2d4ce6360a1d09738001f9d3e402aa7ba3e" dependencies = [ "crc32fast", + "crossbeam-channel", "flate2", "gix-hash", "gix-trace", "gix-utils", "libc", "once_cell", + "parking_lot", "prodash", "sha1_smol", "thiserror 1.0.69", diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 628f6fd27..65a5291f8 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -71,6 +71,7 @@ future::Future, io::Read, num::NonZeroUsize, + sync::Arc, }; use std::{ @@ -3089,7 +3090,7 @@ struct JumpMeta { fn changed_file_picker(cx: &mut Context) { pub struct FileChangeData { - cwd: PathBuf, + cwd: Arc, style_untracked: Style, style_modified: Style, style_conflict: Style, @@ -3097,7 +3098,7 @@ pub struct FileChangeData { style_renamed: Style, } - let cwd = helix_stdx::env::current_working_dir(); + let cwd: Arc = Arc::from(helix_stdx::env::current_working_dir().as_path()); if !cwd.exists() { cx.editor .set_error("Current working directory does not exist"); @@ -3168,17 +3169,24 @@ pub struct FileChangeData { .with_preview(|_editor, meta| Some((meta.path().into(), None))); let injector = picker.injector(); - cx.editor - .diff_providers - .clone() - .for_each_changed_file(cwd, move |change| match change { + // Helix can be launched without arguments, in which case no diff provider will be loaded since + // there is no file to provide infos for. + // + // This ensures we have one to work with for cwd (and as a bonus it means any file opened + // from this picker will have its diff provider already in cache). + cx.editor.diff_providers.add(&cwd); + cx.editor.diff_providers.clone().for_each_changed_file( + cwd.clone(), + move |change| match change { Ok(change) => injector.push(change).is_ok(), Err(err) => { status::report_blocking(err); true } - }); + }, + ); cx.push_layer(Box::new(overlaid(picker))); + cx.editor.diff_providers.remove(&cwd); } pub fn command_palette(cx: &mut Context) { diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 7402a06f3..e2067eb67 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -1279,7 +1279,7 @@ fn reload( let scrolloff = cx.editor.config().scrolloff; let (view, doc) = current!(cx.editor); - doc.reload(view, &cx.editor.diff_providers).map(|_| { + doc.reload(view, &mut cx.editor.diff_providers).map(|_| { view.ensure_cursor_in_view(doc, scrolloff); })?; if let Some(path) = doc.path() { @@ -1318,6 +1318,8 @@ fn reload_all( }) .collect(); + cx.editor.diff_providers.reset(); + for (doc_id, view_ids) in docs_view_ids { let doc = doc_mut!(cx.editor, &doc_id); @@ -1327,7 +1329,7 @@ fn reload_all( // Ensure that the view is synced with the document's history. view.sync_changes(doc); - if let Err(error) = doc.reload(view, &cx.editor.diff_providers) { + if let Err(error) = doc.reload(view, &mut cx.editor.diff_providers) { cx.editor.set_error(format!("{}", error)); continue; } diff --git a/helix-vcs/Cargo.toml b/helix-vcs/Cargo.toml index 919df04ac..6352eca7e 100644 --- a/helix-vcs/Cargo.toml +++ b/helix-vcs/Cargo.toml @@ -19,7 +19,7 @@ tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "p parking_lot = "0.12" arc-swap = { version = "1.7.1" } -gix = { version = "0.67.0", features = ["attributes", "status"], default-features = false, optional = true } +gix = { version = "0.67.0", features = ["attributes", "parallel", "status"], default-features = false, optional = true } imara-diff = "0.1.7" anyhow = "1" diff --git a/helix-vcs/src/git.rs b/helix-vcs/src/git.rs index 189f6e22b..bba87684c 100644 --- a/helix-vcs/src/git.rs +++ b/helix-vcs/src/git.rs @@ -22,22 +22,12 @@ #[cfg(test)] mod test; -#[inline] -fn get_repo_dir(file: &Path) -> Result<&Path> { - file.parent().context("file has no parent directory") -} - -pub fn get_diff_base(file: &Path) -> Result> { +pub fn get_diff_base(repo: &ThreadSafeRepository, file: &Path) -> Result> { debug_assert!(!file.exists() || file.is_file()); debug_assert!(file.is_absolute()); - let file = gix::path::realpath(file).context("resolve symlinks")?; + let file = gix::path::realpath(file).context("resolve symlink")?; - // TODO cache repository lookup - - let repo_dir = get_repo_dir(&file)?; - let repo = open_repo(repo_dir) - .context("failed to open git repo")? - .to_thread_local(); + let repo = repo.to_thread_local(); let head = repo.head_commit()?; let file_oid = find_file_in_commit(&repo, &head, &file)?; @@ -59,15 +49,8 @@ pub fn get_diff_base(file: &Path) -> Result> { } } -pub fn get_current_head_name(file: &Path) -> Result>>> { - debug_assert!(!file.exists() || file.is_file()); - debug_assert!(file.is_absolute()); - let file = gix::path::realpath(file).context("resolve symlinks")?; - - let repo_dir = get_repo_dir(&file)?; - let repo = open_repo(repo_dir) - .context("failed to open git repo")? - .to_thread_local(); +pub fn get_current_head_name(repo: &ThreadSafeRepository) -> Result>>> { + let repo = repo.to_thread_local(); let head_ref = repo.head_ref()?; let head_commit = repo.head_commit()?; @@ -79,11 +62,17 @@ pub fn get_current_head_name(file: &Path) -> Result>>> { Ok(Arc::new(ArcSwap::from_pointee(name.into_boxed_str()))) } -pub fn for_each_changed_file(cwd: &Path, f: impl Fn(Result) -> bool) -> Result<()> { - status(&open_repo(cwd)?.to_thread_local(), f) +pub fn for_each_changed_file( + repo: &ThreadSafeRepository, + f: impl Fn(Result) -> bool, +) -> Result<()> { + status(&repo.to_thread_local(), f) } -fn open_repo(path: &Path) -> Result { +pub(super) fn open_repo(path: &Path) -> Result { + // Ensure the repo itself is an absolute real path, else we'll not match prefixes with + // symlink-resolved files in `get_diff_base()` above. + let path = gix::path::realpath(path)?; // custom open options let mut git_open_opts_map = gix::sec::trust::Mapping::::default(); diff --git a/helix-vcs/src/git/test.rs b/helix-vcs/src/git/test.rs index 164040f50..069a08b50 100644 --- a/helix-vcs/src/git/test.rs +++ b/helix-vcs/src/git/test.rs @@ -54,7 +54,8 @@ fn missing_file() { let file = temp_git.path().join("file.txt"); File::create(&file).unwrap().write_all(b"foo").unwrap(); - assert!(git::get_diff_base(&file).is_err()); + let repo = git::open_repo(temp_git.path()).unwrap(); + assert!(git::get_diff_base(&repo, &file).is_err()); } #[test] @@ -64,7 +65,12 @@ fn unmodified_file() { let contents = b"foo".as_slice(); File::create(&file).unwrap().write_all(contents).unwrap(); create_commit(temp_git.path(), true); - assert_eq!(git::get_diff_base(&file).unwrap(), Vec::from(contents)); + + let repo = git::open_repo(temp_git.path()).unwrap(); + assert_eq!( + git::get_diff_base(&repo, &file).unwrap(), + Vec::from(contents) + ); } #[test] @@ -76,7 +82,11 @@ fn modified_file() { create_commit(temp_git.path(), true); File::create(&file).unwrap().write_all(b"bar").unwrap(); - assert_eq!(git::get_diff_base(&file).unwrap(), Vec::from(contents)); + let repo = git::open_repo(temp_git.path()).unwrap(); + assert_eq!( + git::get_diff_base(&repo, &file).unwrap(), + Vec::from(contents) + ); } /// Test that `get_file_head` does not return content for a directory. @@ -95,7 +105,9 @@ fn directory() { std::fs::remove_dir_all(&dir).unwrap(); File::create(&dir).unwrap().write_all(b"bar").unwrap(); - assert!(git::get_diff_base(&dir).is_err()); + + let repo = git::open_repo(temp_git.path()).unwrap(); + assert!(git::get_diff_base(&repo, &dir).is_err()); } /// Test that `get_diff_base` resolves symlinks so that the same diff base is @@ -122,8 +134,9 @@ fn symlink() { symlink("file.txt", &file_link).unwrap(); create_commit(temp_git.path(), true); - assert_eq!(git::get_diff_base(&file_link).unwrap(), contents); - assert_eq!(git::get_diff_base(&file).unwrap(), contents); + let repo = git::open_repo(temp_git.path()).unwrap(); + assert_eq!(git::get_diff_base(&repo, &file_link).unwrap(), contents); + assert_eq!(git::get_diff_base(&repo, &file).unwrap(), contents); } /// Test that `get_diff_base` returns content when the file is a symlink to @@ -147,6 +160,7 @@ fn symlink_to_git_repo() { let file_link = temp_dir.path().join("file_link.txt"); symlink(&file, &file_link).unwrap(); - assert_eq!(git::get_diff_base(&file_link).unwrap(), contents); - assert_eq!(git::get_diff_base(&file).unwrap(), contents); + let repo = git::open_repo(temp_git.path()).unwrap(); + assert_eq!(git::get_diff_base(&repo, &file_link).unwrap(), contents); + assert_eq!(git::get_diff_base(&repo, &file).unwrap(), contents); } diff --git a/helix-vcs/src/lib.rs b/helix-vcs/src/lib.rs index 539be779a..2508be6b3 100644 --- a/helix-vcs/src/lib.rs +++ b/helix-vcs/src/lib.rs @@ -1,9 +1,6 @@ -use anyhow::{anyhow, bail, Result}; +use anyhow::Result; use arc_swap::ArcSwap; -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{collections::HashMap, path::Path, sync::Arc}; #[cfg(feature = "git")] mod git; @@ -16,67 +13,182 @@ pub use status::FileChange; -#[derive(Clone)] +#[derive(Default, Clone)] pub struct DiffProviderRegistry { - providers: Vec, + /// Repository root path mapped to their provider. + /// + /// When a root path cannot be found after having called `add_file`, it means there is no + /// provider to speak of. + providers: HashMap, DiffProvider>, + /// Count the number of files added for a specific provider path. + /// Providers themselves don't care about that, this is handled entirely in `Self::add_file`, + /// without knowledge from the `Self::add_file_` methods. + /// + /// Note: it *could* happen that a provider for a path is changed without the number of + /// associated files changing, e.g deleting a .git/ and initializing a .jj/ repo. + counters: HashMap, u32>, } +/// Diff-related methods impl DiffProviderRegistry { pub fn get_diff_base(&self, file: &Path) -> Option> { - self.providers - .iter() - .find_map(|provider| match provider.get_diff_base(file) { - Ok(res) => Some(res), - Err(err) => { - log::debug!("{err:#?}"); - log::debug!("failed to open diff base for {}", file.display()); - None - } - }) + match self.provider_for(file)?.get_diff_base(file) { + Ok(diff_base) => Some(diff_base), + Err(err) => { + log::debug!("{err:#?}"); + log::debug!("failed to open diff base for {}", file.display()); + None + } + } } pub fn get_current_head_name(&self, file: &Path) -> Option>>> { - self.providers - .iter() - .find_map(|provider| match provider.get_current_head_name(file) { - Ok(res) => Some(res), - Err(err) => { - log::debug!("{err:#?}"); - log::debug!("failed to obtain current head name for {}", file.display()); - None - } - }) + match self.provider_for(file)?.get_current_head_name() { + Ok(head_name) => Some(head_name), + Err(err) => { + log::debug!("{err:#?}"); + log::debug!("failed to obtain current head name for {}", file.display()); + None + } + } } /// Fire-and-forget changed file iteration. Runs everything in a background task. Keeps /// iteration until `on_change` returns `false`. pub fn for_each_changed_file( self, - cwd: PathBuf, + cwd: Arc, f: impl Fn(Result) -> bool + Send + 'static, ) { tokio::task::spawn_blocking(move || { - if self - .providers - .iter() - .find_map(|provider| provider.for_each_changed_file(&cwd, &f).ok()) - .is_none() - { - f(Err(anyhow!("no diff provider returns success"))); + let Some(diff_provider) = self.provider_for(&cwd) else { + return; + }; + if let Err(err) = diff_provider.for_each_changed_file(&f) { + f(Err(err)); } }); } } -impl Default for DiffProviderRegistry { - fn default() -> Self { - // currently only git is supported - // TODO make this configurable when more providers are added - let providers = vec![ +/// Creation and update methods +#[cfg_attr(not(feature = "git"), allow(unused))] +impl DiffProviderRegistry { + /// Register a provider (if any is found) for the given path. + pub fn add(&mut self, path: &Path) { + let Some((repo_path, provider)) = get_possible_provider(path) else { + // Do nothing here: there is no path to use and so the actual methods to get infos + // like `get_diff_base` just won't do anything since they won't find a source to + // work with. + log::debug!("Found no potential diff provider for {}", path.display()); + // Note: if a `./` dir is deleted, we may end up in a situation where we lose track + // of a now unused provider. This is acceptable because it doesn't happen that often in + // practice and people can just reload to force an update. + // + // If it becomes an issue in the future, we could fix it by recomputing the providers + // for each stored paths here. + return; + }; + + let result = match provider { #[cfg(feature = "git")] - DiffProvider::Git, - ]; - DiffProviderRegistry { providers } + PossibleDiffProvider::Git => self.add_file_git(repo_path), + }; + + match result { + Ok((key, prov)) => { + // Increase the count for this path. + let count = self.counters.entry(key).or_default(); + let created = *count == 0; + *count += 1; + + // Only log at info level when adding a new provider + if created { + log::info!( + "Added {prov:?} (repo: {}) from {}", + repo_path.display(), + path.display() + ) + } else { + log::debug!( + "Reused {prov:?} (repo: {}) for {}", + repo_path.display(), + path.display() + ); + } + } + Err(err) => log::debug!( + "Failed to open repo at {} for {}: {:?}", + repo_path.display(), + path.display(), + err + ), + } + } + + /// Reload the provider for the given path. + pub fn reload(&mut self, path: &Path) { + self.remove(path); + self.add(path); + } + + /// Remove the given path from the provider cache. If it was the last one using it, this will + /// free up the provider. + pub fn remove(&mut self, path: &Path) { + let Some((repo_path, _)) = get_possible_provider(path) else { + return; + }; + + let Some(count) = self.counters.get_mut(repo_path) else { + return; + }; + + *count -= 1; + if *count == 0 { + // Cleanup the provider when the last user disappears + self.counters.remove(repo_path); + self.providers.remove(repo_path); + + // While reallocating is costly, in most sessions of Helix there will be one main + // workspace and sometimes a jump to some temporary one (for example from a jump-to-def + // in an LSP) that will be closed after some time. We want to avoid keeping unused + // RAM for this. + self.providers.shrink_to_fit(); + self.counters.shrink_to_fit(); + } + } + + /// Clears the saved providers completely. + pub fn reset(&mut self) { + self.providers = Default::default(); + self.counters = Default::default(); + } +} + +/// Private methods +impl DiffProviderRegistry { + fn provider_for(&self, path: &Path) -> Option<&DiffProvider> { + let path = get_possible_provider(path)?.0; + self.providers.get(path) + } + + /// Add the git repo to the known providers *if* it isn't already known. + #[cfg(feature = "git")] + fn add_file_git(&mut self, repo_path: &Path) -> Result<(Arc, PossibleDiffProvider)> { + // Don't build a git repo object if there is already one for that path. + if let Some((key, DiffProvider::Git(_))) = self.providers.get_key_value(repo_path) { + return Ok((Arc::clone(key), PossibleDiffProvider::Git)); + } + + match git::open_repo(repo_path) { + Ok(repo) => { + let key = Arc::from(repo_path); + self.providers + .insert(Arc::clone(&key), DiffProvider::Git(repo)); + Ok((key, PossibleDiffProvider::Git)) + } + Err(err) => Err(err), + } } } @@ -84,39 +196,67 @@ fn default() -> Self { /// cloning [DiffProviderRegistry] as `Clone` cannot be used in trait objects. /// /// `Copy` is simply to ensure the `clone()` call is the simplest it can be. -#[derive(Copy, Clone)] +#[derive(Clone)] pub enum DiffProvider { #[cfg(feature = "git")] - Git, - None, + Git(gix::ThreadSafeRepository), } +#[cfg_attr(not(feature = "git"), allow(unused))] impl DiffProvider { fn get_diff_base(&self, file: &Path) -> Result> { - match self { + // We need the */ref else we're matching on a reference and Rust considers all references + // inhabited. In our case + match *self { #[cfg(feature = "git")] - Self::Git => git::get_diff_base(file), - Self::None => bail!("No diff support compiled in"), + Self::Git(ref repo) => git::get_diff_base(repo, file), } } - fn get_current_head_name(&self, file: &Path) -> Result>>> { - match self { + fn get_current_head_name(&self) -> Result>>> { + match *self { #[cfg(feature = "git")] - Self::Git => git::get_current_head_name(file), - Self::None => bail!("No diff support compiled in"), + Self::Git(ref repo) => git::get_current_head_name(repo), } } - fn for_each_changed_file( - &self, - cwd: &Path, - f: impl Fn(Result) -> bool, - ) -> Result<()> { - match self { + fn for_each_changed_file(&self, f: impl Fn(Result) -> bool) -> Result<()> { + match *self { #[cfg(feature = "git")] - Self::Git => git::for_each_changed_file(cwd, f), - Self::None => bail!("No diff support compiled in"), + Self::Git(ref repo) => git::for_each_changed_file(repo, f), } } } + +#[derive(Debug, Copy, Clone)] +pub enum PossibleDiffProvider { + /// Possibly a git repo rooted at the stored path (i.e. `/.git` exists) + #[cfg(feature = "git")] + Git, +} + +/// Does *possible* diff provider auto detection. Returns the 'root' of the workspace +/// +/// We say possible because this function doesn't open the actual repository to check if that's +/// actually the case. +fn get_possible_provider(path: &Path) -> Option<(&Path, PossibleDiffProvider)> { + if cfg!(feature = "git") { + #[cfg_attr(not(feature = "git"), allow(unused))] + fn check_path(path: &Path) -> Option<(&Path, PossibleDiffProvider)> { + #[cfg(feature = "git")] + if path.join(".git").try_exists().ok()? { + return Some((path, PossibleDiffProvider::Git)); + } + + None + } + + for parent in path.ancestors() { + if let Some(path_and_provider) = check_path(parent) { + return Some(path_and_provider); + } + } + } + + None +} diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index fa089cdaf..6e8edb472 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -1111,7 +1111,7 @@ pub fn detect_readonly(&mut self) { pub fn reload( &mut self, view: &mut View, - provider_registry: &DiffProviderRegistry, + provider_registry: &mut DiffProviderRegistry, ) -> Result<(), Error> { let encoding = self.encoding; let path = match self.path() { @@ -1122,6 +1122,8 @@ pub fn reload( }, }; + provider_registry.reload(&path); + // Once we have a valid path we check if its readonly status has changed self.detect_readonly(); diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 174190e5d..73326c2a7 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1735,6 +1735,8 @@ pub fn open(&mut self, path: &Path, action: Action) -> Result Result<(), return Err(CloseError::BufferModified(doc.display_name().into_owned())); } + if let Some(path) = doc.path() { + self.diff_providers.remove(path); + } + // This will also disallow any follow-up writes self.saves.remove(&doc_id);