feat: Diff source caching and autodetection

This commit is contained in:
Alexis (Poliorcetics) Bourget 2024-04-27 19:49:45 +02:00
parent cbbeca6c52
commit 52c4a28fae
No known key found for this signature in database
9 changed files with 277 additions and 105 deletions

11
Cargo.lock generated
View File

@ -211,6 +211,15 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
@ -710,12 +719,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e0eb9efdf96c35c0bed7596d1bef2d4ce6360a1d09738001f9d3e402aa7ba3e"
dependencies = [
"crc32fast",
"crossbeam-channel",
"flate2",
"gix-hash",
"gix-trace",
"gix-utils",
"libc",
"once_cell",
"parking_lot",
"prodash",
"sha1_smol",
"thiserror 1.0.69",

View File

@ -71,6 +71,7 @@
future::Future,
io::Read,
num::NonZeroUsize,
sync::Arc,
};
use std::{
@ -3089,7 +3090,7 @@ struct JumpMeta {
fn changed_file_picker(cx: &mut Context) {
pub struct FileChangeData {
cwd: PathBuf,
cwd: Arc<Path>,
style_untracked: Style,
style_modified: Style,
style_conflict: Style,
@ -3097,7 +3098,7 @@ pub struct FileChangeData {
style_renamed: Style,
}
let cwd = helix_stdx::env::current_working_dir();
let cwd: Arc<Path> = Arc::from(helix_stdx::env::current_working_dir().as_path());
if !cwd.exists() {
cx.editor
.set_error("Current working directory does not exist");
@ -3168,17 +3169,24 @@ pub struct FileChangeData {
.with_preview(|_editor, meta| Some((meta.path().into(), None)));
let injector = picker.injector();
cx.editor
.diff_providers
.clone()
.for_each_changed_file(cwd, move |change| match change {
// Helix can be launched without arguments, in which case no diff provider will be loaded since
// there is no file to provide infos for.
//
// This ensures we have one to work with for cwd (and as a bonus it means any file opened
// from this picker will have its diff provider already in cache).
cx.editor.diff_providers.add(&cwd);
cx.editor.diff_providers.clone().for_each_changed_file(
cwd.clone(),
move |change| match change {
Ok(change) => injector.push(change).is_ok(),
Err(err) => {
status::report_blocking(err);
true
}
});
},
);
cx.push_layer(Box::new(overlaid(picker)));
cx.editor.diff_providers.remove(&cwd);
}
pub fn command_palette(cx: &mut Context) {

View File

@ -1279,7 +1279,7 @@ fn reload(
let scrolloff = cx.editor.config().scrolloff;
let (view, doc) = current!(cx.editor);
doc.reload(view, &cx.editor.diff_providers).map(|_| {
doc.reload(view, &mut cx.editor.diff_providers).map(|_| {
view.ensure_cursor_in_view(doc, scrolloff);
})?;
if let Some(path) = doc.path() {
@ -1318,6 +1318,8 @@ fn reload_all(
})
.collect();
cx.editor.diff_providers.reset();
for (doc_id, view_ids) in docs_view_ids {
let doc = doc_mut!(cx.editor, &doc_id);
@ -1327,7 +1329,7 @@ fn reload_all(
// Ensure that the view is synced with the document's history.
view.sync_changes(doc);
if let Err(error) = doc.reload(view, &cx.editor.diff_providers) {
if let Err(error) = doc.reload(view, &mut cx.editor.diff_providers) {
cx.editor.set_error(format!("{}", error));
continue;
}

View File

@ -19,7 +19,7 @@ tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "p
parking_lot = "0.12"
arc-swap = { version = "1.7.1" }
gix = { version = "0.67.0", features = ["attributes", "status"], default-features = false, optional = true }
gix = { version = "0.67.0", features = ["attributes", "parallel", "status"], default-features = false, optional = true }
imara-diff = "0.1.7"
anyhow = "1"

View File

@ -22,22 +22,12 @@
#[cfg(test)]
mod test;
#[inline]
fn get_repo_dir(file: &Path) -> Result<&Path> {
file.parent().context("file has no parent directory")
}
pub fn get_diff_base(file: &Path) -> Result<Vec<u8>> {
pub fn get_diff_base(repo: &ThreadSafeRepository, file: &Path) -> Result<Vec<u8>> {
debug_assert!(!file.exists() || file.is_file());
debug_assert!(file.is_absolute());
let file = gix::path::realpath(file).context("resolve symlinks")?;
let file = gix::path::realpath(file).context("resolve symlink")?;
// TODO cache repository lookup
let repo_dir = get_repo_dir(&file)?;
let repo = open_repo(repo_dir)
.context("failed to open git repo")?
.to_thread_local();
let repo = repo.to_thread_local();
let head = repo.head_commit()?;
let file_oid = find_file_in_commit(&repo, &head, &file)?;
@ -59,15 +49,8 @@ pub fn get_diff_base(file: &Path) -> Result<Vec<u8>> {
}
}
pub fn get_current_head_name(file: &Path) -> Result<Arc<ArcSwap<Box<str>>>> {
debug_assert!(!file.exists() || file.is_file());
debug_assert!(file.is_absolute());
let file = gix::path::realpath(file).context("resolve symlinks")?;
let repo_dir = get_repo_dir(&file)?;
let repo = open_repo(repo_dir)
.context("failed to open git repo")?
.to_thread_local();
pub fn get_current_head_name(repo: &ThreadSafeRepository) -> Result<Arc<ArcSwap<Box<str>>>> {
let repo = repo.to_thread_local();
let head_ref = repo.head_ref()?;
let head_commit = repo.head_commit()?;
@ -79,11 +62,17 @@ pub fn get_current_head_name(file: &Path) -> Result<Arc<ArcSwap<Box<str>>>> {
Ok(Arc::new(ArcSwap::from_pointee(name.into_boxed_str())))
}
pub fn for_each_changed_file(cwd: &Path, f: impl Fn(Result<FileChange>) -> bool) -> Result<()> {
status(&open_repo(cwd)?.to_thread_local(), f)
pub fn for_each_changed_file(
repo: &ThreadSafeRepository,
f: impl Fn(Result<FileChange>) -> bool,
) -> Result<()> {
status(&repo.to_thread_local(), f)
}
fn open_repo(path: &Path) -> Result<ThreadSafeRepository> {
pub(super) fn open_repo(path: &Path) -> Result<ThreadSafeRepository> {
// Ensure the repo itself is an absolute real path, else we'll not match prefixes with
// symlink-resolved files in `get_diff_base()` above.
let path = gix::path::realpath(path)?;
// custom open options
let mut git_open_opts_map = gix::sec::trust::Mapping::<gix::open::Options>::default();

View File

@ -54,7 +54,8 @@ fn missing_file() {
let file = temp_git.path().join("file.txt");
File::create(&file).unwrap().write_all(b"foo").unwrap();
assert!(git::get_diff_base(&file).is_err());
let repo = git::open_repo(temp_git.path()).unwrap();
assert!(git::get_diff_base(&repo, &file).is_err());
}
#[test]
@ -64,7 +65,12 @@ fn unmodified_file() {
let contents = b"foo".as_slice();
File::create(&file).unwrap().write_all(contents).unwrap();
create_commit(temp_git.path(), true);
assert_eq!(git::get_diff_base(&file).unwrap(), Vec::from(contents));
let repo = git::open_repo(temp_git.path()).unwrap();
assert_eq!(
git::get_diff_base(&repo, &file).unwrap(),
Vec::from(contents)
);
}
#[test]
@ -76,7 +82,11 @@ fn modified_file() {
create_commit(temp_git.path(), true);
File::create(&file).unwrap().write_all(b"bar").unwrap();
assert_eq!(git::get_diff_base(&file).unwrap(), Vec::from(contents));
let repo = git::open_repo(temp_git.path()).unwrap();
assert_eq!(
git::get_diff_base(&repo, &file).unwrap(),
Vec::from(contents)
);
}
/// Test that `get_file_head` does not return content for a directory.
@ -95,7 +105,9 @@ fn directory() {
std::fs::remove_dir_all(&dir).unwrap();
File::create(&dir).unwrap().write_all(b"bar").unwrap();
assert!(git::get_diff_base(&dir).is_err());
let repo = git::open_repo(temp_git.path()).unwrap();
assert!(git::get_diff_base(&repo, &dir).is_err());
}
/// Test that `get_diff_base` resolves symlinks so that the same diff base is
@ -122,8 +134,9 @@ fn symlink() {
symlink("file.txt", &file_link).unwrap();
create_commit(temp_git.path(), true);
assert_eq!(git::get_diff_base(&file_link).unwrap(), contents);
assert_eq!(git::get_diff_base(&file).unwrap(), contents);
let repo = git::open_repo(temp_git.path()).unwrap();
assert_eq!(git::get_diff_base(&repo, &file_link).unwrap(), contents);
assert_eq!(git::get_diff_base(&repo, &file).unwrap(), contents);
}
/// Test that `get_diff_base` returns content when the file is a symlink to
@ -147,6 +160,7 @@ fn symlink_to_git_repo() {
let file_link = temp_dir.path().join("file_link.txt");
symlink(&file, &file_link).unwrap();
assert_eq!(git::get_diff_base(&file_link).unwrap(), contents);
assert_eq!(git::get_diff_base(&file).unwrap(), contents);
let repo = git::open_repo(temp_git.path()).unwrap();
assert_eq!(git::get_diff_base(&repo, &file_link).unwrap(), contents);
assert_eq!(git::get_diff_base(&repo, &file).unwrap(), contents);
}

View File

@ -1,9 +1,6 @@
use anyhow::{anyhow, bail, Result};
use anyhow::Result;
use arc_swap::ArcSwap;
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use std::{collections::HashMap, path::Path, sync::Arc};
#[cfg(feature = "git")]
mod git;
@ -16,67 +13,182 @@
pub use status::FileChange;
#[derive(Clone)]
#[derive(Default, Clone)]
pub struct DiffProviderRegistry {
providers: Vec<DiffProvider>,
/// Repository root path mapped to their provider.
///
/// When a root path cannot be found after having called `add_file`, it means there is no
/// provider to speak of.
providers: HashMap<Arc<Path>, DiffProvider>,
/// Count the number of files added for a specific provider path.
/// Providers themselves don't care about that, this is handled entirely in `Self::add_file`,
/// without knowledge from the `Self::add_file_<provider>` methods.
///
/// Note: it *could* happen that a provider for a path is changed without the number of
/// associated files changing, e.g deleting a .git/ and initializing a .jj/ repo.
counters: HashMap<Arc<Path>, u32>,
}
/// Diff-related methods
impl DiffProviderRegistry {
pub fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> {
self.providers
.iter()
.find_map(|provider| match provider.get_diff_base(file) {
Ok(res) => Some(res),
Err(err) => {
log::debug!("{err:#?}");
log::debug!("failed to open diff base for {}", file.display());
None
}
})
match self.provider_for(file)?.get_diff_base(file) {
Ok(diff_base) => Some(diff_base),
Err(err) => {
log::debug!("{err:#?}");
log::debug!("failed to open diff base for {}", file.display());
None
}
}
}
pub fn get_current_head_name(&self, file: &Path) -> Option<Arc<ArcSwap<Box<str>>>> {
self.providers
.iter()
.find_map(|provider| match provider.get_current_head_name(file) {
Ok(res) => Some(res),
Err(err) => {
log::debug!("{err:#?}");
log::debug!("failed to obtain current head name for {}", file.display());
None
}
})
match self.provider_for(file)?.get_current_head_name() {
Ok(head_name) => Some(head_name),
Err(err) => {
log::debug!("{err:#?}");
log::debug!("failed to obtain current head name for {}", file.display());
None
}
}
}
/// Fire-and-forget changed file iteration. Runs everything in a background task. Keeps
/// iteration until `on_change` returns `false`.
pub fn for_each_changed_file(
self,
cwd: PathBuf,
cwd: Arc<Path>,
f: impl Fn(Result<FileChange>) -> bool + Send + 'static,
) {
tokio::task::spawn_blocking(move || {
if self
.providers
.iter()
.find_map(|provider| provider.for_each_changed_file(&cwd, &f).ok())
.is_none()
{
f(Err(anyhow!("no diff provider returns success")));
let Some(diff_provider) = self.provider_for(&cwd) else {
return;
};
if let Err(err) = diff_provider.for_each_changed_file(&f) {
f(Err(err));
}
});
}
}
impl Default for DiffProviderRegistry {
fn default() -> Self {
// currently only git is supported
// TODO make this configurable when more providers are added
let providers = vec![
/// Creation and update methods
#[cfg_attr(not(feature = "git"), allow(unused))]
impl DiffProviderRegistry {
/// Register a provider (if any is found) for the given path.
pub fn add(&mut self, path: &Path) {
let Some((repo_path, provider)) = get_possible_provider(path) else {
// Do nothing here: there is no path to use and so the actual methods to get infos
// like `get_diff_base` just won't do anything since they won't find a source to
// work with.
log::debug!("Found no potential diff provider for {}", path.display());
// Note: if a `.<vcs>/` dir is deleted, we may end up in a situation where we lose track
// of a now unused provider. This is acceptable because it doesn't happen that often in
// practice and people can just reload to force an update.
//
// If it becomes an issue in the future, we could fix it by recomputing the providers
// for each stored paths here.
return;
};
let result = match provider {
#[cfg(feature = "git")]
DiffProvider::Git,
];
DiffProviderRegistry { providers }
PossibleDiffProvider::Git => self.add_file_git(repo_path),
};
match result {
Ok((key, prov)) => {
// Increase the count for this path.
let count = self.counters.entry(key).or_default();
let created = *count == 0;
*count += 1;
// Only log at info level when adding a new provider
if created {
log::info!(
"Added {prov:?} (repo: {}) from {}",
repo_path.display(),
path.display()
)
} else {
log::debug!(
"Reused {prov:?} (repo: {}) for {}",
repo_path.display(),
path.display()
);
}
}
Err(err) => log::debug!(
"Failed to open repo at {} for {}: {:?}",
repo_path.display(),
path.display(),
err
),
}
}
/// Reload the provider for the given path.
pub fn reload(&mut self, path: &Path) {
self.remove(path);
self.add(path);
}
/// Remove the given path from the provider cache. If it was the last one using it, this will
/// free up the provider.
pub fn remove(&mut self, path: &Path) {
let Some((repo_path, _)) = get_possible_provider(path) else {
return;
};
let Some(count) = self.counters.get_mut(repo_path) else {
return;
};
*count -= 1;
if *count == 0 {
// Cleanup the provider when the last user disappears
self.counters.remove(repo_path);
self.providers.remove(repo_path);
// While reallocating is costly, in most sessions of Helix there will be one main
// workspace and sometimes a jump to some temporary one (for example from a jump-to-def
// in an LSP) that will be closed after some time. We want to avoid keeping unused
// RAM for this.
self.providers.shrink_to_fit();
self.counters.shrink_to_fit();
}
}
/// Clears the saved providers completely.
pub fn reset(&mut self) {
self.providers = Default::default();
self.counters = Default::default();
}
}
/// Private methods
impl DiffProviderRegistry {
fn provider_for(&self, path: &Path) -> Option<&DiffProvider> {
let path = get_possible_provider(path)?.0;
self.providers.get(path)
}
/// Add the git repo to the known providers *if* it isn't already known.
#[cfg(feature = "git")]
fn add_file_git(&mut self, repo_path: &Path) -> Result<(Arc<Path>, PossibleDiffProvider)> {
// Don't build a git repo object if there is already one for that path.
if let Some((key, DiffProvider::Git(_))) = self.providers.get_key_value(repo_path) {
return Ok((Arc::clone(key), PossibleDiffProvider::Git));
}
match git::open_repo(repo_path) {
Ok(repo) => {
let key = Arc::from(repo_path);
self.providers
.insert(Arc::clone(&key), DiffProvider::Git(repo));
Ok((key, PossibleDiffProvider::Git))
}
Err(err) => Err(err),
}
}
}
@ -84,39 +196,67 @@ fn default() -> Self {
/// cloning [DiffProviderRegistry] as `Clone` cannot be used in trait objects.
///
/// `Copy` is simply to ensure the `clone()` call is the simplest it can be.
#[derive(Copy, Clone)]
#[derive(Clone)]
pub enum DiffProvider {
#[cfg(feature = "git")]
Git,
None,
Git(gix::ThreadSafeRepository),
}
#[cfg_attr(not(feature = "git"), allow(unused))]
impl DiffProvider {
fn get_diff_base(&self, file: &Path) -> Result<Vec<u8>> {
match self {
// We need the */ref else we're matching on a reference and Rust considers all references
// inhabited. In our case
match *self {
#[cfg(feature = "git")]
Self::Git => git::get_diff_base(file),
Self::None => bail!("No diff support compiled in"),
Self::Git(ref repo) => git::get_diff_base(repo, file),
}
}
fn get_current_head_name(&self, file: &Path) -> Result<Arc<ArcSwap<Box<str>>>> {
match self {
fn get_current_head_name(&self) -> Result<Arc<ArcSwap<Box<str>>>> {
match *self {
#[cfg(feature = "git")]
Self::Git => git::get_current_head_name(file),
Self::None => bail!("No diff support compiled in"),
Self::Git(ref repo) => git::get_current_head_name(repo),
}
}
fn for_each_changed_file(
&self,
cwd: &Path,
f: impl Fn(Result<FileChange>) -> bool,
) -> Result<()> {
match self {
fn for_each_changed_file(&self, f: impl Fn(Result<FileChange>) -> bool) -> Result<()> {
match *self {
#[cfg(feature = "git")]
Self::Git => git::for_each_changed_file(cwd, f),
Self::None => bail!("No diff support compiled in"),
Self::Git(ref repo) => git::for_each_changed_file(repo, f),
}
}
}
#[derive(Debug, Copy, Clone)]
pub enum PossibleDiffProvider {
/// Possibly a git repo rooted at the stored path (i.e. `<path>/.git` exists)
#[cfg(feature = "git")]
Git,
}
/// Does *possible* diff provider auto detection. Returns the 'root' of the workspace
///
/// We say possible because this function doesn't open the actual repository to check if that's
/// actually the case.
fn get_possible_provider(path: &Path) -> Option<(&Path, PossibleDiffProvider)> {
if cfg!(feature = "git") {
#[cfg_attr(not(feature = "git"), allow(unused))]
fn check_path(path: &Path) -> Option<(&Path, PossibleDiffProvider)> {
#[cfg(feature = "git")]
if path.join(".git").try_exists().ok()? {
return Some((path, PossibleDiffProvider::Git));
}
None
}
for parent in path.ancestors() {
if let Some(path_and_provider) = check_path(parent) {
return Some(path_and_provider);
}
}
}
None
}

View File

@ -1111,7 +1111,7 @@ pub fn detect_readonly(&mut self) {
pub fn reload(
&mut self,
view: &mut View,
provider_registry: &DiffProviderRegistry,
provider_registry: &mut DiffProviderRegistry,
) -> Result<(), Error> {
let encoding = self.encoding;
let path = match self.path() {
@ -1122,6 +1122,8 @@ pub fn reload(
},
};
provider_registry.reload(&path);
// Once we have a valid path we check if its readonly status has changed
self.detect_readonly();

View File

@ -1735,6 +1735,8 @@ pub fn open(&mut self, path: &Path, action: Action) -> Result<DocumentId, Docume
Editor::doc_diagnostics(&self.language_servers, &self.diagnostics, &doc);
doc.replace_diagnostics(diagnostics, &[], None);
// When opening a *new* file, ensure its diff provider is loaded.
self.diff_providers.add(&path);
if let Some(diff_base) = self.diff_providers.get_diff_base(&path) {
doc.set_diff_base(diff_base);
}
@ -1768,6 +1770,10 @@ pub fn close_document(&mut self, doc_id: DocumentId, force: bool) -> Result<(),
return Err(CloseError::BufferModified(doc.display_name().into_owned()));
}
if let Some(path) = doc.path() {
self.diff_providers.remove(path);
}
// This will also disallow any follow-up writes
self.saves.remove(&doc_id);