project search: Skip loading of gitignored paths when their descendants will never match an inclusion/exclusion query (#42968)

Co-authored-by: dino <dinojoaocosta@gmail.com>

Related-to: #38799

Release Notes:

- Improved project search performance with "Also search files ignored by
configuration" combined with file inclusion/exclusion queries.

---------

Co-authored-by: dino <dinojoaocosta@gmail.com>
This commit is contained in:
Piotr Osiewicz 2025-11-20 18:44:55 +01:00 committed by GitHub
parent 2a40dcfd77
commit 58fe19d55e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 292 additions and 13 deletions

45
Cargo.lock generated
View file

@ -3668,6 +3668,26 @@ dependencies = [
"tiny-keccak",
]
[[package]]
name = "const_format"
version = "0.2.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
@ -12752,6 +12772,15 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5da3b0203fd7ee5720aa0b5e790b591aa5d3f41c3ed2c34a3a393382198af2f7"
[[package]]
name = "pori"
version = "0.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a63d338dec139f56dacc692ca63ad35a6be6a797442479b55acd611d79e906"
dependencies = [
"nom 7.1.3",
]
[[package]]
name = "portable-atomic"
version = "1.11.1"
@ -13068,6 +13097,7 @@ dependencies = [
"url",
"util",
"watch",
"wax",
"which 6.0.3",
"worktree",
"zeroize",
@ -19492,6 +19522,21 @@ dependencies = [
"zlog",
]
[[package]]
name = "wax"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d12a78aa0bab22d2f26ed1a96df7ab58e8a93506a3e20adb47c51a93b4e1357"
dependencies = [
"const_format",
"itertools 0.11.0",
"nom 7.1.3",
"pori",
"regex",
"thiserror 1.0.69",
"walkdir",
]
[[package]]
name = "wayland-backend"
version = "0.3.11"

View file

@ -719,6 +719,7 @@ wasmtime = { version = "29", default-features = false, features = [
"parallel-compilation",
] }
wasmtime-wasi = "29"
wax = "0.6"
which = "6.0.0"
windows-core = "0.61"
wit-component = "0.221"

View file

@ -86,6 +86,7 @@ toml.workspace = true
url.workspace = true
util.workspace = true
watch.workspace = true
wax.workspace = true
which.workspace = true
worktree.workspace = true
zeroize.workspace = true

View file

@ -1,7 +1,9 @@
use std::{
cell::LazyCell,
collections::BTreeSet,
io::{BufRead, BufReader},
ops::Range,
path::Path,
path::{Path, PathBuf},
pin::pin,
sync::Arc,
};
@ -22,7 +24,7 @@ use smol::{
use text::BufferId;
use util::{ResultExt, maybe, paths::compare_rel_paths};
use worktree::{Entry, ProjectEntryId, Snapshot, Worktree};
use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeSettings};
use crate::{
Project, ProjectItem, ProjectPath, RemotelyCreatedModels,
@ -178,7 +180,7 @@ impl Search {
let (find_all_matches_tx, find_all_matches_rx) =
bounded(MAX_CONCURRENT_BUFFER_OPENS);
let query = Arc::new(query);
let (candidate_searcher, tasks) = match self.kind {
SearchKind::OpenBuffersOnly => {
let Ok(open_buffers) = cx.update(|cx| self.all_loaded_buffers(&query, cx))
@ -207,11 +209,10 @@ impl Search {
let (sorted_search_results_tx, sorted_search_results_rx) = unbounded();
let (input_paths_tx, input_paths_rx) = unbounded();
let tasks = vec![
cx.spawn(Self::provide_search_paths(
std::mem::take(worktrees),
query.include_ignored(),
query.clone(),
input_paths_tx,
sorted_search_results_tx,
))
@ -366,26 +367,30 @@ impl Search {
fn provide_search_paths(
worktrees: Vec<Entity<Worktree>>,
include_ignored: bool,
query: Arc<SearchQuery>,
tx: Sender<InputPath>,
results: Sender<oneshot::Receiver<ProjectPath>>,
) -> impl AsyncFnOnce(&mut AsyncApp) {
async move |cx| {
_ = maybe!(async move {
let gitignored_tracker = PathInclusionMatcher::new(query.clone());
for worktree in worktrees {
let (mut snapshot, worktree_settings) = worktree
.read_with(cx, |this, _| {
Some((this.snapshot(), this.as_local()?.settings()))
})?
.context("The worktree is not local")?;
if include_ignored {
if query.include_ignored() {
// Pre-fetch all of the ignored directories as they're going to be searched.
let mut entries_to_refresh = vec![];
for entry in snapshot.entries(include_ignored, 0) {
if entry.is_ignored && entry.kind.is_unloaded() {
if !worktree_settings.is_path_excluded(&entry.path) {
entries_to_refresh.push(entry.path.clone());
}
for entry in snapshot.entries(query.include_ignored(), 0) {
if gitignored_tracker.should_scan_gitignored_dir(
entry,
&snapshot,
&worktree_settings,
) {
entries_to_refresh.push(entry.path.clone());
}
}
let barrier = worktree.update(cx, |this, _| {
@ -404,8 +409,9 @@ impl Search {
cx.background_executor()
.scoped(|scope| {
scope.spawn(async {
for entry in snapshot.files(include_ignored, 0) {
for entry in snapshot.files(query.include_ignored(), 0) {
let (should_scan_tx, should_scan_rx) = oneshot::channel();
let Ok(_) = tx
.send(InputPath {
entry: entry.clone(),
@ -788,3 +794,229 @@ struct MatchingEntry {
path: ProjectPath,
should_scan_tx: oneshot::Sender<ProjectPath>,
}
/// This struct encapsulates the logic to decide whether a given gitignored directory should be
/// scanned based on include/exclude patterns of a search query (as include/exclude parameters may match paths inside it).
/// It is kind-of doing an inverse of glob. Given a glob pattern like `src/**/` and a parent path like `src`, we need to decide whether the parent
/// may contain glob hits.
struct PathInclusionMatcher {
included: BTreeSet<PathBuf>,
query: Arc<SearchQuery>,
}
impl PathInclusionMatcher {
fn new(query: Arc<SearchQuery>) -> Self {
let mut included = BTreeSet::new();
// To do an inverse glob match, we split each glob into it's prefix and the glob part.
// For example, `src/**/*.rs` becomes `src/` and `**/*.rs`. The glob part gets dropped.
// Then, when checking whether a given directory should be scanned, we check whether it is a non-empty substring of any glob prefix.
if query.filters_path() {
included.extend(
query
.files_to_include()
.sources()
.iter()
.flat_map(|glob| Some(wax::Glob::new(glob).ok()?.partition().0)),
);
}
Self { included, query }
}
fn should_scan_gitignored_dir(
&self,
entry: &Entry,
snapshot: &Snapshot,
worktree_settings: &WorktreeSettings,
) -> bool {
if !entry.is_ignored || !entry.kind.is_unloaded() {
return false;
}
if !self.query.include_ignored() {
return false;
}
if worktree_settings.is_path_excluded(&entry.path) {
return false;
}
if !self.query.filters_path() {
return true;
}
let as_abs_path = LazyCell::new(move || snapshot.absolutize(&entry.path));
let entry_path = entry.path.as_std_path();
// 3. Check Exclusions (Pruning)
// If the current path is a child of an excluded path, we stop.
let is_excluded = self.path_is_definitely_excluded(entry_path, snapshot);
if is_excluded {
return false;
}
// 4. Check Inclusions (Traversal)
if self.included.is_empty() {
return true;
}
// We scan if the current path is a descendant of an include prefix
// OR if the current path is an ancestor of an include prefix (we need to go deeper to find it).
let is_included = self.included.iter().any(|prefix| {
let (prefix_matches_entry, entry_matches_prefix) = if prefix.is_absolute() {
(
prefix.starts_with(&**as_abs_path),
as_abs_path.starts_with(prefix),
)
} else {
(
prefix.starts_with(entry_path),
entry_path.starts_with(prefix),
)
};
// Logic:
// 1. entry_matches_prefix: We are inside the target zone (e.g. glob: src/, current: src/lib/). Keep scanning.
// 2. prefix_matches_entry: We are above the target zone (e.g. glob: src/foo/, current: src/). Keep scanning to reach foo.
prefix_matches_entry || entry_matches_prefix
});
is_included
}
fn path_is_definitely_excluded(&self, path: &Path, snapshot: &Snapshot) -> bool {
if !self.query.files_to_exclude().sources().is_empty() {
let mut path = if self.query.match_full_paths() {
let mut full_path = snapshot.root_name().as_std_path().to_owned();
full_path.push(path);
full_path
} else {
path.to_owned()
};
loop {
if self.query.files_to_exclude().is_match(&path) {
return true;
} else if !path.pop() {
return false;
}
}
} else {
false
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use fs::FakeFs;
use serde_json::json;
use settings::Settings;
use util::{
path,
paths::{PathMatcher, PathStyle},
rel_path::RelPath,
};
use worktree::{Entry, EntryKind, WorktreeSettings};
use crate::{
Project, project_search::PathInclusionMatcher, project_tests::init_test,
search::SearchQuery,
};
#[gpui::test]
async fn test_path_inclusion_matcher(cx: &mut gpui::TestAppContext) {
init_test(cx);
let fs = FakeFs::new(cx.background_executor.clone());
fs.insert_tree(
"/root",
json!({
".gitignore": "src/data/\n",
"src": {
"data": {
"main.csv": "field_1,field_2,field_3",
},
"lib": {
"main.txt": "Are you familiar with fields?",
},
},
}),
)
.await;
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
let worktree = project.update(cx, |project, cx| project.worktrees(cx).next().unwrap());
let (worktree_settings, worktree_snapshot) = worktree.update(cx, |worktree, cx| {
let settings_location = worktree.settings_location(cx);
return (
WorktreeSettings::get(Some(settings_location), cx).clone(),
worktree.snapshot(),
);
});
// Manually create a test entry for the gitignored directory since it won't
// be loaded by the worktree
let entry = Entry {
id: ProjectEntryId::from_proto(1),
kind: EntryKind::UnloadedDir,
path: Arc::from(RelPath::unix(Path::new("src/data")).unwrap()),
inode: 0,
mtime: None,
canonical_path: None,
is_ignored: true,
is_hidden: false,
is_always_included: false,
is_external: false,
is_private: false,
size: 0,
char_bag: Default::default(),
is_fifo: false,
};
// 1. Test searching for `field`, including ignored files without any
// inclusion and exclusion filters.
let include_ignored = true;
let files_to_include = PathMatcher::default();
let files_to_exclude = PathMatcher::default();
let match_full_paths = false;
let search_query = SearchQuery::text(
"field",
false,
false,
include_ignored,
files_to_include,
files_to_exclude,
match_full_paths,
None,
)
.unwrap();
let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
assert!(path_matcher.should_scan_gitignored_dir(
&entry,
&worktree_snapshot,
&worktree_settings
));
// 2. Test searching for `field`, including ignored files but updating
// `files_to_include` to only include files under `src/lib`.
let include_ignored = true;
let files_to_include = PathMatcher::new(vec!["src/lib"], PathStyle::Posix).unwrap();
let files_to_exclude = PathMatcher::default();
let match_full_paths = false;
let search_query = SearchQuery::text(
"field",
false,
false,
include_ignored,
files_to_include,
files_to_exclude,
match_full_paths,
None,
)
.unwrap();
let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
assert!(!path_matcher.should_scan_gitignored_dir(
&entry,
&worktree_snapshot,
&worktree_settings
));
}
}