mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
project search: Skip loading of gitignored paths when their descendants will never match an inclusion/exclusion query (#42968)
Co-authored-by: dino <dinojoaocosta@gmail.com> Related-to: #38799 Release Notes: - Improved project search performance with "Also search files ignored by configuration" combined with file inclusion/exclusion queries. --------- Co-authored-by: dino <dinojoaocosta@gmail.com>
This commit is contained in:
parent
2a40dcfd77
commit
58fe19d55e
4 changed files with 292 additions and 13 deletions
45
Cargo.lock
generated
45
Cargo.lock
generated
|
|
@ -3668,6 +3668,26 @@ dependencies = [
|
|||
"tiny-keccak",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_format"
|
||||
version = "0.2.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
|
||||
dependencies = [
|
||||
"const_format_proc_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_format_proc_macros"
|
||||
version = "0.2.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
|
|
@ -12752,6 +12772,15 @@ version = "0.2.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5da3b0203fd7ee5720aa0b5e790b591aa5d3f41c3ed2c34a3a393382198af2f7"
|
||||
|
||||
[[package]]
|
||||
name = "pori"
|
||||
version = "0.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a63d338dec139f56dacc692ca63ad35a6be6a797442479b55acd611d79e906"
|
||||
dependencies = [
|
||||
"nom 7.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.11.1"
|
||||
|
|
@ -13068,6 +13097,7 @@ dependencies = [
|
|||
"url",
|
||||
"util",
|
||||
"watch",
|
||||
"wax",
|
||||
"which 6.0.3",
|
||||
"worktree",
|
||||
"zeroize",
|
||||
|
|
@ -19492,6 +19522,21 @@ dependencies = [
|
|||
"zlog",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wax"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d12a78aa0bab22d2f26ed1a96df7ab58e8a93506a3e20adb47c51a93b4e1357"
|
||||
dependencies = [
|
||||
"const_format",
|
||||
"itertools 0.11.0",
|
||||
"nom 7.1.3",
|
||||
"pori",
|
||||
"regex",
|
||||
"thiserror 1.0.69",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wayland-backend"
|
||||
version = "0.3.11"
|
||||
|
|
|
|||
|
|
@ -719,6 +719,7 @@ wasmtime = { version = "29", default-features = false, features = [
|
|||
"parallel-compilation",
|
||||
] }
|
||||
wasmtime-wasi = "29"
|
||||
wax = "0.6"
|
||||
which = "6.0.0"
|
||||
windows-core = "0.61"
|
||||
wit-component = "0.221"
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ toml.workspace = true
|
|||
url.workspace = true
|
||||
util.workspace = true
|
||||
watch.workspace = true
|
||||
wax.workspace = true
|
||||
which.workspace = true
|
||||
worktree.workspace = true
|
||||
zeroize.workspace = true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
use std::{
|
||||
cell::LazyCell,
|
||||
collections::BTreeSet,
|
||||
io::{BufRead, BufReader},
|
||||
ops::Range,
|
||||
path::Path,
|
||||
path::{Path, PathBuf},
|
||||
pin::pin,
|
||||
sync::Arc,
|
||||
};
|
||||
|
|
@ -22,7 +24,7 @@ use smol::{
|
|||
|
||||
use text::BufferId;
|
||||
use util::{ResultExt, maybe, paths::compare_rel_paths};
|
||||
use worktree::{Entry, ProjectEntryId, Snapshot, Worktree};
|
||||
use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeSettings};
|
||||
|
||||
use crate::{
|
||||
Project, ProjectItem, ProjectPath, RemotelyCreatedModels,
|
||||
|
|
@ -178,7 +180,7 @@ impl Search {
|
|||
|
||||
let (find_all_matches_tx, find_all_matches_rx) =
|
||||
bounded(MAX_CONCURRENT_BUFFER_OPENS);
|
||||
|
||||
let query = Arc::new(query);
|
||||
let (candidate_searcher, tasks) = match self.kind {
|
||||
SearchKind::OpenBuffersOnly => {
|
||||
let Ok(open_buffers) = cx.update(|cx| self.all_loaded_buffers(&query, cx))
|
||||
|
|
@ -207,11 +209,10 @@ impl Search {
|
|||
let (sorted_search_results_tx, sorted_search_results_rx) = unbounded();
|
||||
|
||||
let (input_paths_tx, input_paths_rx) = unbounded();
|
||||
|
||||
let tasks = vec![
|
||||
cx.spawn(Self::provide_search_paths(
|
||||
std::mem::take(worktrees),
|
||||
query.include_ignored(),
|
||||
query.clone(),
|
||||
input_paths_tx,
|
||||
sorted_search_results_tx,
|
||||
))
|
||||
|
|
@ -366,26 +367,30 @@ impl Search {
|
|||
|
||||
fn provide_search_paths(
|
||||
worktrees: Vec<Entity<Worktree>>,
|
||||
include_ignored: bool,
|
||||
query: Arc<SearchQuery>,
|
||||
tx: Sender<InputPath>,
|
||||
results: Sender<oneshot::Receiver<ProjectPath>>,
|
||||
) -> impl AsyncFnOnce(&mut AsyncApp) {
|
||||
async move |cx| {
|
||||
_ = maybe!(async move {
|
||||
let gitignored_tracker = PathInclusionMatcher::new(query.clone());
|
||||
for worktree in worktrees {
|
||||
let (mut snapshot, worktree_settings) = worktree
|
||||
.read_with(cx, |this, _| {
|
||||
Some((this.snapshot(), this.as_local()?.settings()))
|
||||
})?
|
||||
.context("The worktree is not local")?;
|
||||
if include_ignored {
|
||||
if query.include_ignored() {
|
||||
// Pre-fetch all of the ignored directories as they're going to be searched.
|
||||
let mut entries_to_refresh = vec![];
|
||||
for entry in snapshot.entries(include_ignored, 0) {
|
||||
if entry.is_ignored && entry.kind.is_unloaded() {
|
||||
if !worktree_settings.is_path_excluded(&entry.path) {
|
||||
entries_to_refresh.push(entry.path.clone());
|
||||
}
|
||||
|
||||
for entry in snapshot.entries(query.include_ignored(), 0) {
|
||||
if gitignored_tracker.should_scan_gitignored_dir(
|
||||
entry,
|
||||
&snapshot,
|
||||
&worktree_settings,
|
||||
) {
|
||||
entries_to_refresh.push(entry.path.clone());
|
||||
}
|
||||
}
|
||||
let barrier = worktree.update(cx, |this, _| {
|
||||
|
|
@ -404,8 +409,9 @@ impl Search {
|
|||
cx.background_executor()
|
||||
.scoped(|scope| {
|
||||
scope.spawn(async {
|
||||
for entry in snapshot.files(include_ignored, 0) {
|
||||
for entry in snapshot.files(query.include_ignored(), 0) {
|
||||
let (should_scan_tx, should_scan_rx) = oneshot::channel();
|
||||
|
||||
let Ok(_) = tx
|
||||
.send(InputPath {
|
||||
entry: entry.clone(),
|
||||
|
|
@ -788,3 +794,229 @@ struct MatchingEntry {
|
|||
path: ProjectPath,
|
||||
should_scan_tx: oneshot::Sender<ProjectPath>,
|
||||
}
|
||||
|
||||
/// This struct encapsulates the logic to decide whether a given gitignored directory should be
|
||||
/// scanned based on include/exclude patterns of a search query (as include/exclude parameters may match paths inside it).
|
||||
/// It is kind-of doing an inverse of glob. Given a glob pattern like `src/**/` and a parent path like `src`, we need to decide whether the parent
|
||||
/// may contain glob hits.
|
||||
struct PathInclusionMatcher {
|
||||
included: BTreeSet<PathBuf>,
|
||||
query: Arc<SearchQuery>,
|
||||
}
|
||||
|
||||
impl PathInclusionMatcher {
|
||||
fn new(query: Arc<SearchQuery>) -> Self {
|
||||
let mut included = BTreeSet::new();
|
||||
// To do an inverse glob match, we split each glob into it's prefix and the glob part.
|
||||
// For example, `src/**/*.rs` becomes `src/` and `**/*.rs`. The glob part gets dropped.
|
||||
// Then, when checking whether a given directory should be scanned, we check whether it is a non-empty substring of any glob prefix.
|
||||
if query.filters_path() {
|
||||
included.extend(
|
||||
query
|
||||
.files_to_include()
|
||||
.sources()
|
||||
.iter()
|
||||
.flat_map(|glob| Some(wax::Glob::new(glob).ok()?.partition().0)),
|
||||
);
|
||||
}
|
||||
Self { included, query }
|
||||
}
|
||||
|
||||
fn should_scan_gitignored_dir(
|
||||
&self,
|
||||
entry: &Entry,
|
||||
snapshot: &Snapshot,
|
||||
worktree_settings: &WorktreeSettings,
|
||||
) -> bool {
|
||||
if !entry.is_ignored || !entry.kind.is_unloaded() {
|
||||
return false;
|
||||
}
|
||||
if !self.query.include_ignored() {
|
||||
return false;
|
||||
}
|
||||
if worktree_settings.is_path_excluded(&entry.path) {
|
||||
return false;
|
||||
}
|
||||
if !self.query.filters_path() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let as_abs_path = LazyCell::new(move || snapshot.absolutize(&entry.path));
|
||||
let entry_path = entry.path.as_std_path();
|
||||
// 3. Check Exclusions (Pruning)
|
||||
// If the current path is a child of an excluded path, we stop.
|
||||
let is_excluded = self.path_is_definitely_excluded(entry_path, snapshot);
|
||||
|
||||
if is_excluded {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 4. Check Inclusions (Traversal)
|
||||
if self.included.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// We scan if the current path is a descendant of an include prefix
|
||||
// OR if the current path is an ancestor of an include prefix (we need to go deeper to find it).
|
||||
let is_included = self.included.iter().any(|prefix| {
|
||||
let (prefix_matches_entry, entry_matches_prefix) = if prefix.is_absolute() {
|
||||
(
|
||||
prefix.starts_with(&**as_abs_path),
|
||||
as_abs_path.starts_with(prefix),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
prefix.starts_with(entry_path),
|
||||
entry_path.starts_with(prefix),
|
||||
)
|
||||
};
|
||||
|
||||
// Logic:
|
||||
// 1. entry_matches_prefix: We are inside the target zone (e.g. glob: src/, current: src/lib/). Keep scanning.
|
||||
// 2. prefix_matches_entry: We are above the target zone (e.g. glob: src/foo/, current: src/). Keep scanning to reach foo.
|
||||
prefix_matches_entry || entry_matches_prefix
|
||||
});
|
||||
|
||||
is_included
|
||||
}
|
||||
fn path_is_definitely_excluded(&self, path: &Path, snapshot: &Snapshot) -> bool {
|
||||
if !self.query.files_to_exclude().sources().is_empty() {
|
||||
let mut path = if self.query.match_full_paths() {
|
||||
let mut full_path = snapshot.root_name().as_std_path().to_owned();
|
||||
full_path.push(path);
|
||||
full_path
|
||||
} else {
|
||||
path.to_owned()
|
||||
};
|
||||
loop {
|
||||
if self.query.files_to_exclude().is_match(&path) {
|
||||
return true;
|
||||
} else if !path.pop() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use fs::FakeFs;
|
||||
use serde_json::json;
|
||||
use settings::Settings;
|
||||
use util::{
|
||||
path,
|
||||
paths::{PathMatcher, PathStyle},
|
||||
rel_path::RelPath,
|
||||
};
|
||||
use worktree::{Entry, EntryKind, WorktreeSettings};
|
||||
|
||||
use crate::{
|
||||
Project, project_search::PathInclusionMatcher, project_tests::init_test,
|
||||
search::SearchQuery,
|
||||
};
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_path_inclusion_matcher(cx: &mut gpui::TestAppContext) {
|
||||
init_test(cx);
|
||||
|
||||
let fs = FakeFs::new(cx.background_executor.clone());
|
||||
fs.insert_tree(
|
||||
"/root",
|
||||
json!({
|
||||
".gitignore": "src/data/\n",
|
||||
"src": {
|
||||
"data": {
|
||||
"main.csv": "field_1,field_2,field_3",
|
||||
},
|
||||
"lib": {
|
||||
"main.txt": "Are you familiar with fields?",
|
||||
},
|
||||
},
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
|
||||
let worktree = project.update(cx, |project, cx| project.worktrees(cx).next().unwrap());
|
||||
let (worktree_settings, worktree_snapshot) = worktree.update(cx, |worktree, cx| {
|
||||
let settings_location = worktree.settings_location(cx);
|
||||
return (
|
||||
WorktreeSettings::get(Some(settings_location), cx).clone(),
|
||||
worktree.snapshot(),
|
||||
);
|
||||
});
|
||||
|
||||
// Manually create a test entry for the gitignored directory since it won't
|
||||
// be loaded by the worktree
|
||||
let entry = Entry {
|
||||
id: ProjectEntryId::from_proto(1),
|
||||
kind: EntryKind::UnloadedDir,
|
||||
path: Arc::from(RelPath::unix(Path::new("src/data")).unwrap()),
|
||||
inode: 0,
|
||||
mtime: None,
|
||||
canonical_path: None,
|
||||
is_ignored: true,
|
||||
is_hidden: false,
|
||||
is_always_included: false,
|
||||
is_external: false,
|
||||
is_private: false,
|
||||
size: 0,
|
||||
char_bag: Default::default(),
|
||||
is_fifo: false,
|
||||
};
|
||||
|
||||
// 1. Test searching for `field`, including ignored files without any
|
||||
// inclusion and exclusion filters.
|
||||
let include_ignored = true;
|
||||
let files_to_include = PathMatcher::default();
|
||||
let files_to_exclude = PathMatcher::default();
|
||||
let match_full_paths = false;
|
||||
let search_query = SearchQuery::text(
|
||||
"field",
|
||||
false,
|
||||
false,
|
||||
include_ignored,
|
||||
files_to_include,
|
||||
files_to_exclude,
|
||||
match_full_paths,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
|
||||
assert!(path_matcher.should_scan_gitignored_dir(
|
||||
&entry,
|
||||
&worktree_snapshot,
|
||||
&worktree_settings
|
||||
));
|
||||
|
||||
// 2. Test searching for `field`, including ignored files but updating
|
||||
// `files_to_include` to only include files under `src/lib`.
|
||||
let include_ignored = true;
|
||||
let files_to_include = PathMatcher::new(vec!["src/lib"], PathStyle::Posix).unwrap();
|
||||
let files_to_exclude = PathMatcher::default();
|
||||
let match_full_paths = false;
|
||||
let search_query = SearchQuery::text(
|
||||
"field",
|
||||
false,
|
||||
false,
|
||||
include_ignored,
|
||||
files_to_include,
|
||||
files_to_exclude,
|
||||
match_full_paths,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
|
||||
assert!(!path_matcher.should_scan_gitignored_dir(
|
||||
&entry,
|
||||
&worktree_snapshot,
|
||||
&worktree_settings
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue