mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
fuzzy_nucleo: Optimize path matching with CharBag prefilter and add benchmarks (#54112)
This PR was originally a part of https://github.com/zed-industries/zed/pull/53551 so theres more info about its motivation there. - Add a CharBag prefilter on path candidates to skip irrelevant entries before invoking nucleo's matcher. - Use binary_search on sorted matched char indices when reconstructing byte positions (perf improvement). - Add a criterion benchmark comparing `fuzzy_nucleo` path matching against the existing fuzzy crate. Performance Chart: | Benchmark | Size | Nucleo (before) | Nucleo (after) | Fuzzy | Before/Fuzzy | After/Fuzzy | |-----------|-----:|----------------:|---------------:|------:|-------------:|------------:| | 1-word | 100 | 14.14 µs | 9.12 µs | 9.06 µs | 1.56x | 1.01x | | 1-word | 1,000 | 164.37 µs | 114.11 µs | 110.43 µs | 1.49x | 1.03x | | 1-word | 10,000 | 1.83 ms | 1.39 ms | 1.41 ms | 1.30x | 0.99x | | 2-word | 100 | 12.83 µs | 3.51 µs | 979 ns | 13.10x | 3.59x | | 2-word | 1,000 | 131.65 µs | 33.46 µs | 6.37 µs | 20.67x | 5.25x | | 2-word | 10,000 | 1.24 ms | 338.84 µs | 52.46 µs | 23.64x | 6.46x |o Exact Current State: | query | size | nucleo | fuzzy | nucleo/fuzzy | |---|---:|---:|---:|---:| | 1-word | 100 | 8.62 µs | 9.22 µs | 0.93× | | 1-word | 1000 | 102 µs | 111 µs | 0.92× | | 1-word | 10000 | 1.13 ms | 1.28 ms | 0.88× | | 2-word | 100 | 3.48 µs | 0.98 µs | 3.55× | | 2-word | 1000 | 29.9 µs | 6.39 µs | 4.68× | | 2-word | 10000 | 271 µs | 53.4 µs | 5.08× | | 4-word | 100 | 0.85 µs | 0.53 µs | 1.60× | | 4-word | 1000 | 2.99 µs | 1.66 µs | 1.80× | | 4-word | 10000 | 20.1 µs | 9.14 µs | 2.20× | Self-Review Checklist: - [x] I've reviewed my own diff for quality, security, and reliability - [x] Unsafe blocks (if any) have justifying comments - [x] The content is consistent with the [UI/UX checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) - [x] Tests cover the new/changed behavior - [x] Performance impact has been considered and is acceptable Release Notes: - fuzzy_nucleo: improved the performance of path matching
This commit is contained in:
parent
80a053ed2a
commit
722f3089ed
8 changed files with 451 additions and 45 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -6772,6 +6772,8 @@ dependencies = [
|
|||
name = "fuzzy_nucleo"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"fuzzy",
|
||||
"gpui",
|
||||
"nucleo",
|
||||
"util",
|
||||
|
|
|
|||
|
|
@ -698,13 +698,18 @@ fn matching_history_items<'a>(
|
|||
.into_iter()
|
||||
.chain(currently_opened)
|
||||
.map(|found_path| {
|
||||
let candidate = PathMatchCandidate {
|
||||
is_dir: false, // You can't open directories as project items
|
||||
path: &found_path.project.path,
|
||||
// Only match history items names, otherwise their paths may match too many queries, producing false positives.
|
||||
// E.g. `foo` would match both `something/foo/bar.rs` and `something/foo/foo.rs` and if the former is a history item,
|
||||
// it would be shown first always, despite the latter being a better match.
|
||||
};
|
||||
// Only match history items names, otherwise their paths may match too many queries,
|
||||
// producing false positives. E.g. `foo` would match both `something/foo/bar.rs` and
|
||||
// `something/foo/foo.rs` and if the former is a history item, it would be shown first
|
||||
// always, despite the latter being a better match.
|
||||
let candidate = PathMatchCandidate::new(
|
||||
&found_path.project.path,
|
||||
false,
|
||||
worktree_name_by_id
|
||||
.as_ref()
|
||||
.and_then(|m| m.get(&found_path.project.worktree_id))
|
||||
.map(|prefix| prefix.as_ref()),
|
||||
);
|
||||
candidates_paths.insert(&found_path.project, found_path);
|
||||
(found_path.project.worktree_id, candidate)
|
||||
})
|
||||
|
|
@ -731,7 +736,7 @@ fn matching_history_items<'a>(
|
|||
worktree.to_usize(),
|
||||
worktree_root_name,
|
||||
query.path_query(),
|
||||
false,
|
||||
fuzzy_nucleo::Case::Ignore,
|
||||
max_results,
|
||||
path_style,
|
||||
)
|
||||
|
|
@ -914,7 +919,7 @@ impl FileFinderDelegate {
|
|||
candidate_sets.as_slice(),
|
||||
query.path_query(),
|
||||
&relative_to,
|
||||
false,
|
||||
fuzzy_nucleo::Case::Ignore,
|
||||
100,
|
||||
&cancel_flag,
|
||||
cx.background_executor().clone(),
|
||||
|
|
|
|||
|
|
@ -13,9 +13,15 @@ path = "src/fuzzy_nucleo.rs"
|
|||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
fuzzy.workspace = true
|
||||
nucleo.workspace = true
|
||||
gpui.workspace = true
|
||||
util.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
util = {workspace = true, features = ["test-support"]}
|
||||
criterion.workspace = true
|
||||
util = { workspace = true, features = ["test-support"] }
|
||||
|
||||
[[bench]]
|
||||
name = "match_benchmark"
|
||||
harness = false
|
||||
|
|
|
|||
253
crates/fuzzy_nucleo/benches/match_benchmark.rs
Normal file
253
crates/fuzzy_nucleo/benches/match_benchmark.rs
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
|
||||
use fuzzy::CharBag;
|
||||
use util::{paths::PathStyle, rel_path::RelPath};
|
||||
|
||||
const DIRS: &[&str] = &[
|
||||
"src",
|
||||
"crates/gpui/src",
|
||||
"crates/editor/src",
|
||||
"crates/fuzzy_nucleo/src",
|
||||
"crates/workspace/src",
|
||||
"crates/project/src",
|
||||
"crates/language/src",
|
||||
"crates/terminal/src",
|
||||
"crates/assistant/src",
|
||||
"crates/theme/src",
|
||||
"tests/integration",
|
||||
"tests/unit",
|
||||
"docs/architecture",
|
||||
"scripts",
|
||||
"assets/icons",
|
||||
"assets/fonts",
|
||||
"crates/git/src",
|
||||
"crates/rpc/src",
|
||||
"crates/settings/src",
|
||||
"crates/diagnostics/src",
|
||||
"crates/search/src",
|
||||
"crates/collab/src",
|
||||
"crates/db/src",
|
||||
"crates/lsp/src",
|
||||
];
|
||||
|
||||
const FILENAMES: &[&str] = &[
|
||||
"parser.rs",
|
||||
"main.rs",
|
||||
"executor.rs",
|
||||
"editor.rs",
|
||||
"strings.rs",
|
||||
"workspace.rs",
|
||||
"project.rs",
|
||||
"buffer.rs",
|
||||
"colors.rs",
|
||||
"panel.rs",
|
||||
"renderer.rs",
|
||||
"dispatcher.rs",
|
||||
"matcher.rs",
|
||||
"paths.rs",
|
||||
"context.rs",
|
||||
"toolbar.rs",
|
||||
"statusbar.rs",
|
||||
"keymap.rs",
|
||||
"config.rs",
|
||||
"settings.rs",
|
||||
"diagnostics.rs",
|
||||
"completion.rs",
|
||||
"hover.rs",
|
||||
"references.rs",
|
||||
"inlay_hints.rs",
|
||||
"git_blame.rs",
|
||||
"terminal.rs",
|
||||
"search.rs",
|
||||
"replace.rs",
|
||||
"outline.rs",
|
||||
"breadcrumbs.rs",
|
||||
"tab_bar.rs",
|
||||
"Cargo.toml",
|
||||
"README.md",
|
||||
"build.sh",
|
||||
"LICENSE",
|
||||
"overview.md",
|
||||
"string_helpers.rs",
|
||||
"test_helpers.rs",
|
||||
"fixtures.json",
|
||||
"schema.sql",
|
||||
];
|
||||
|
||||
const QUERY_WORDS: &[&str] = &[
|
||||
"par",
|
||||
"edi",
|
||||
"buf",
|
||||
"set",
|
||||
"mat",
|
||||
"con",
|
||||
"ren",
|
||||
"dis",
|
||||
"sea",
|
||||
"ter",
|
||||
"col",
|
||||
"hov",
|
||||
"out",
|
||||
"rep",
|
||||
"key",
|
||||
"too",
|
||||
"pan",
|
||||
"str",
|
||||
"dia",
|
||||
"com",
|
||||
"executor",
|
||||
"workspace",
|
||||
"settings",
|
||||
"terminal",
|
||||
"breadcrumbs",
|
||||
"git_blame",
|
||||
"fixtures",
|
||||
"schema",
|
||||
"config",
|
||||
"toolbar",
|
||||
];
|
||||
|
||||
/// Deterministic query generation from QUERY_WORDS using a simple LCG.
|
||||
/// Returns `count` queries of each arity: 1, 2, and 4 space-separated words.
|
||||
fn generate_queries(count: usize) -> (Vec<String>, Vec<String>, Vec<String>) {
|
||||
let mut state: u64 = 0xDEAD_BEEF;
|
||||
let mut next = || -> usize {
|
||||
// LCG: simple, fast, deterministic
|
||||
state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
|
||||
(state >> 33) as usize
|
||||
};
|
||||
let mut n_word = |n: usize| -> Vec<String> {
|
||||
(0..count)
|
||||
.map(|_| {
|
||||
(0..n)
|
||||
.map(|_| QUERY_WORDS[next() % QUERY_WORDS.len()])
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
(n_word(1), n_word(2), n_word(4))
|
||||
}
|
||||
|
||||
fn generate_path_strings(count: usize) -> &'static [String] {
|
||||
let paths: Box<[String]> = (0..count)
|
||||
.map(|id| {
|
||||
let dir = DIRS[id % DIRS.len()];
|
||||
let file = FILENAMES[id / DIRS.len() % FILENAMES.len()];
|
||||
format!("{dir}/{file}")
|
||||
})
|
||||
.collect();
|
||||
Box::leak(paths)
|
||||
}
|
||||
|
||||
fn generate_nucleo_path_candidates(
|
||||
paths: &'static [String],
|
||||
) -> Vec<fuzzy_nucleo::PathMatchCandidate<'static>> {
|
||||
paths
|
||||
.iter()
|
||||
.map(|path| {
|
||||
fuzzy_nucleo::PathMatchCandidate::new(RelPath::unix(path).unwrap(), false, None)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn generate_fuzzy_path_candidates(
|
||||
paths: &'static [String],
|
||||
) -> Vec<fuzzy::PathMatchCandidate<'static>> {
|
||||
paths
|
||||
.iter()
|
||||
.map(|path| fuzzy::PathMatchCandidate {
|
||||
is_dir: false,
|
||||
path: RelPath::unix(path).unwrap(),
|
||||
char_bag: CharBag::from(path.as_str()),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn capitalize_each_word(query: &str) -> String {
|
||||
query
|
||||
.split_whitespace()
|
||||
.map(|w| {
|
||||
let mut chars = w.chars();
|
||||
match chars.next() {
|
||||
Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
|
||||
None => String::new(),
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn bench_path_matching(criterion: &mut Criterion) {
|
||||
let sizes = [100, 1000, 10_000];
|
||||
let all_path_strings = sizes.map(generate_path_strings);
|
||||
let query_count = 200;
|
||||
let (q1, q2, q4) = generate_queries(query_count);
|
||||
let q1_upper: Vec<String> = q1.iter().map(|q| capitalize_each_word(q)).collect();
|
||||
let q2_upper: Vec<String> = q2.iter().map(|q| capitalize_each_word(q)).collect();
|
||||
let q4_upper: Vec<String> = q4.iter().map(|q| capitalize_each_word(q)).collect();
|
||||
|
||||
for (label, queries, case) in [
|
||||
("path/1-word", &q1, fuzzy_nucleo::Case::Ignore),
|
||||
("path/2-word", &q2, fuzzy_nucleo::Case::Ignore),
|
||||
("path/4-word", &q4, fuzzy_nucleo::Case::Ignore),
|
||||
("path_smart/1-word", &q1_upper, fuzzy_nucleo::Case::Smart),
|
||||
("path_smart/2-word", &q2_upper, fuzzy_nucleo::Case::Smart),
|
||||
("path_smart/4-word", &q4_upper, fuzzy_nucleo::Case::Smart),
|
||||
] {
|
||||
let mut group = criterion.benchmark_group(label);
|
||||
for (size_index, &size) in sizes.iter().enumerate() {
|
||||
let path_strings = all_path_strings[size_index];
|
||||
|
||||
let mut query_idx = 0usize;
|
||||
group.bench_function(BenchmarkId::new("nucleo", size), |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
let query = queries[query_idx % queries.len()].as_str();
|
||||
query_idx += 1;
|
||||
(generate_nucleo_path_candidates(path_strings), query)
|
||||
},
|
||||
|(candidates, query)| {
|
||||
fuzzy_nucleo::match_fixed_path_set(
|
||||
candidates,
|
||||
0,
|
||||
None,
|
||||
query,
|
||||
case,
|
||||
size,
|
||||
PathStyle::Posix,
|
||||
)
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
|
||||
let mut query_idx = 0usize;
|
||||
group.bench_function(BenchmarkId::new("fuzzy", size), |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
let query = queries[query_idx % queries.len()].as_str();
|
||||
query_idx += 1;
|
||||
(generate_fuzzy_path_candidates(path_strings), query)
|
||||
},
|
||||
|(candidates, query)| {
|
||||
fuzzy::match_fixed_path_set(
|
||||
candidates,
|
||||
0,
|
||||
None,
|
||||
query,
|
||||
false,
|
||||
size,
|
||||
PathStyle::Posix,
|
||||
)
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_path_matching);
|
||||
criterion_main!(benches);
|
||||
|
|
@ -3,3 +3,53 @@ mod paths;
|
|||
pub use paths::{
|
||||
PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets,
|
||||
};
|
||||
|
||||
pub(crate) struct Cancelled;
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Case {
|
||||
Smart,
|
||||
Ignore,
|
||||
}
|
||||
|
||||
impl Case {
|
||||
pub fn from_smart(smart: bool) -> Self {
|
||||
if smart { Self::Smart } else { Self::Ignore }
|
||||
}
|
||||
|
||||
pub fn is_smart(self) -> bool {
|
||||
matches!(self, Self::Smart)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum LengthPenalty {
|
||||
On,
|
||||
Off,
|
||||
}
|
||||
|
||||
impl LengthPenalty {
|
||||
pub fn from_bool(on: bool) -> Self {
|
||||
if on { Self::On } else { Self::Off }
|
||||
}
|
||||
|
||||
pub fn is_on(self) -> bool {
|
||||
matches!(self, Self::On)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reconstruct byte-offset match positions from a list of matched char offsets
|
||||
/// that is already sorted ascending and deduplicated.
|
||||
pub(crate) fn positions_from_sorted(s: &str, sorted_char_indices: &[u32]) -> Vec<usize> {
|
||||
let mut iter = sorted_char_indices.iter().copied().peekable();
|
||||
let mut out = Vec::with_capacity(sorted_char_indices.len());
|
||||
for (char_offset, (byte_offset, _)) in s.char_indices().enumerate() {
|
||||
if iter.peek().is_none() {
|
||||
break;
|
||||
}
|
||||
if iter.next_if(|&m| m == char_offset as u32).is_some() {
|
||||
out.push(byte_offset);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,15 @@ static MATCHERS: Mutex<Vec<nucleo::Matcher>> = Mutex::new(Vec::new());
|
|||
|
||||
pub const LENGTH_PENALTY: f64 = 0.01;
|
||||
|
||||
fn pool_cap() -> usize {
|
||||
std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(8)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub fn get_matcher(config: nucleo::Config) -> nucleo::Matcher {
|
||||
let mut matchers = MATCHERS.lock().unwrap();
|
||||
let mut matchers = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
|
||||
match matchers.pop() {
|
||||
Some(mut matcher) => {
|
||||
matcher.config = config;
|
||||
|
|
@ -16,12 +23,15 @@ pub fn get_matcher(config: nucleo::Config) -> nucleo::Matcher {
|
|||
}
|
||||
|
||||
pub fn return_matcher(matcher: nucleo::Matcher) {
|
||||
MATCHERS.lock().unwrap().push(matcher);
|
||||
let mut pool = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
|
||||
if pool.len() < pool_cap() {
|
||||
pool.push(matcher);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_matchers(n: usize, config: nucleo::Config) -> Vec<nucleo::Matcher> {
|
||||
let mut matchers: Vec<_> = {
|
||||
let mut pool = MATCHERS.lock().unwrap();
|
||||
let mut pool = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
|
||||
let available = pool.len().min(n);
|
||||
pool.drain(..available)
|
||||
.map(|mut matcher| {
|
||||
|
|
@ -34,6 +44,9 @@ pub fn get_matchers(n: usize, config: nucleo::Config) -> Vec<nucleo::Matcher> {
|
|||
matchers
|
||||
}
|
||||
|
||||
pub fn return_matchers(mut matchers: Vec<nucleo::Matcher>) {
|
||||
MATCHERS.lock().unwrap().append(&mut matchers);
|
||||
pub fn return_matchers(matchers: Vec<nucleo::Matcher>) {
|
||||
let cap = pool_cap();
|
||||
let mut pool = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
|
||||
let space = cap.saturating_sub(pool.len());
|
||||
pool.extend(matchers.into_iter().take(space));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,12 +11,35 @@ use util::{paths::PathStyle, rel_path::RelPath};
|
|||
use nucleo::Utf32Str;
|
||||
use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};
|
||||
|
||||
use fuzzy::CharBag;
|
||||
|
||||
use crate::matcher::{self, LENGTH_PENALTY};
|
||||
use crate::{Cancelled, Case, positions_from_sorted};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PathMatchCandidate<'a> {
|
||||
pub is_dir: bool,
|
||||
pub path: &'a RelPath,
|
||||
pub char_bag: CharBag,
|
||||
}
|
||||
|
||||
impl<'a> PathMatchCandidate<'a> {
|
||||
/// Build a candidate whose prefilter bag covers both the worktree prefix and the path.
|
||||
/// Pass `None` when matching against paths that have no worktree prefix.
|
||||
pub fn new(path: &'a RelPath, is_dir: bool, path_prefix: Option<&RelPath>) -> Self {
|
||||
let mut char_bag = CharBag::default();
|
||||
if let Some(prefix) = path_prefix
|
||||
&& !prefix.is_empty()
|
||||
{
|
||||
char_bag.extend(prefix.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
|
||||
}
|
||||
char_bag.extend(path.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
|
||||
Self {
|
||||
is_dir,
|
||||
path,
|
||||
char_bag,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
@ -62,8 +85,7 @@ impl PartialOrd for PathMatch {
|
|||
impl Ord for PathMatch {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.score
|
||||
.partial_cmp(&other.score)
|
||||
.unwrap_or(Ordering::Equal)
|
||||
.total_cmp(&other.score)
|
||||
.then_with(|| self.worktree_id.cmp(&other.worktree_id))
|
||||
.then_with(|| {
|
||||
other
|
||||
|
|
@ -74,18 +96,47 @@ impl Ord for PathMatch {
|
|||
}
|
||||
}
|
||||
|
||||
fn make_atoms(query: &str, smart_case: bool) -> Vec<Atom> {
|
||||
let case = if smart_case {
|
||||
CaseMatching::Smart
|
||||
} else {
|
||||
CaseMatching::Ignore
|
||||
};
|
||||
// Path matching is always case-insensitive at the nucleo level. `Case::Smart`
|
||||
// is honored as a *scoring hint*: when the query contains uppercase, candidates
|
||||
// whose matched characters disagree in case are downranked by a factor per
|
||||
// mismatch rather than dropped. This keeps `"Editor: Backspace"` matching
|
||||
// `"editor: backspace"` while still preferring exact-case hits.
|
||||
const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
|
||||
|
||||
pub(crate) fn make_atoms(query: &str) -> Vec<Atom> {
|
||||
query
|
||||
.split_whitespace()
|
||||
.map(|word| Atom::new(word, case, Normalization::Smart, AtomKind::Fuzzy, false))
|
||||
.map(|word| {
|
||||
Atom::new(
|
||||
word,
|
||||
CaseMatching::Ignore,
|
||||
Normalization::Smart,
|
||||
AtomKind::Fuzzy,
|
||||
false,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Only populated when we will actually charge a smart-case penalty, so the hot
|
||||
// path can iterate a plain `&[Atom]` and ignore this slice entirely.
|
||||
fn make_source_words(query: &str, case: Case) -> Option<Vec<Vec<char>>> {
|
||||
(case.is_smart() && query.chars().any(|c| c.is_uppercase())).then(|| {
|
||||
query
|
||||
.split_whitespace()
|
||||
.map(|word| word.chars().collect())
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
||||
fn case_penalty(mismatches: u32) -> f64 {
|
||||
if mismatches == 0 {
|
||||
1.0
|
||||
} else {
|
||||
SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
|
||||
let mut path_components = path.components();
|
||||
let mut relative_components = relative_to.components();
|
||||
|
|
@ -121,11 +172,12 @@ fn get_filename_match_bonus(
|
|||
}
|
||||
total_score as f64 / filename.len().max(1) as f64
|
||||
}
|
||||
struct Cancelled;
|
||||
|
||||
fn path_match_helper<'a>(
|
||||
matcher: &mut nucleo::Matcher,
|
||||
atoms: &[Atom],
|
||||
source_words: Option<&[Vec<char>]>,
|
||||
query_bag: CharBag,
|
||||
candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
|
||||
results: &mut Vec<PathMatch>,
|
||||
worktree_id: usize,
|
||||
|
|
@ -146,6 +198,7 @@ fn path_match_helper<'a>(
|
|||
let mut buf = Vec::new();
|
||||
let mut matched_chars: Vec<u32> = Vec::new();
|
||||
let mut atom_matched_chars = Vec::new();
|
||||
let mut candidate_chars: Vec<char> = Vec::new();
|
||||
for candidate in candidates {
|
||||
buf.clear();
|
||||
matched_chars.clear();
|
||||
|
|
@ -153,6 +206,10 @@ fn path_match_helper<'a>(
|
|||
return Err(Cancelled);
|
||||
}
|
||||
|
||||
if !candidate.char_bag.is_superset(query_bag) {
|
||||
continue;
|
||||
}
|
||||
|
||||
candidate_buf.truncate(path_prefix_len);
|
||||
if root_is_file {
|
||||
candidate_buf.push_str(path_prefix.as_unix_str());
|
||||
|
|
@ -162,18 +219,36 @@ fn path_match_helper<'a>(
|
|||
|
||||
let haystack = Utf32Str::new(&candidate_buf, &mut buf);
|
||||
|
||||
if source_words.is_some() {
|
||||
candidate_chars.clear();
|
||||
candidate_chars.extend(candidate_buf.chars());
|
||||
}
|
||||
|
||||
let mut total_score: u32 = 0;
|
||||
let mut case_mismatches: u32 = 0;
|
||||
let mut all_matched = true;
|
||||
|
||||
for atom in atoms {
|
||||
for (atom_idx, atom) in atoms.iter().enumerate() {
|
||||
atom_matched_chars.clear();
|
||||
if let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) {
|
||||
total_score = total_score.saturating_add(score as u32);
|
||||
matched_chars.extend_from_slice(&atom_matched_chars);
|
||||
} else {
|
||||
let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
|
||||
all_matched = false;
|
||||
break;
|
||||
};
|
||||
total_score = total_score.saturating_add(score as u32);
|
||||
if let Some(source_words) = source_words {
|
||||
let query_chars = &source_words[atom_idx];
|
||||
if query_chars.len() == atom_matched_chars.len() {
|
||||
for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
|
||||
if let Some(&candidate_char) = candidate_chars.get(pos as usize)
|
||||
&& candidate_char != query_char
|
||||
&& candidate_char.eq_ignore_ascii_case(&query_char)
|
||||
{
|
||||
case_mismatches += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
matched_chars.extend_from_slice(&atom_matched_chars);
|
||||
}
|
||||
|
||||
if all_matched && !atoms.is_empty() {
|
||||
|
|
@ -182,17 +257,9 @@ fn path_match_helper<'a>(
|
|||
|
||||
let length_penalty = candidate_buf.len() as f64 * LENGTH_PENALTY;
|
||||
let filename_bonus = get_filename_match_bonus(&candidate_buf, atoms, matcher);
|
||||
let adjusted_score = total_score as f64 + filename_bonus - length_penalty;
|
||||
let mut positions: Vec<usize> = candidate_buf
|
||||
.char_indices()
|
||||
.enumerate()
|
||||
.filter_map(|(char_offset, (byte_offset, _))| {
|
||||
matched_chars
|
||||
.contains(&(char_offset as u32))
|
||||
.then_some(byte_offset)
|
||||
})
|
||||
.collect();
|
||||
positions.sort_unstable();
|
||||
let positive = (total_score as f64 + filename_bonus) * case_penalty(case_mismatches);
|
||||
let adjusted_score = positive - length_penalty;
|
||||
let positions = positions_from_sorted(&candidate_buf, &matched_chars);
|
||||
|
||||
results.push(PathMatch {
|
||||
score: adjusted_score,
|
||||
|
|
@ -225,7 +292,7 @@ pub fn match_fixed_path_set(
|
|||
worktree_id: usize,
|
||||
worktree_root_name: Option<Arc<RelPath>>,
|
||||
query: &str,
|
||||
smart_case: bool,
|
||||
case: Case,
|
||||
max_results: usize,
|
||||
path_style: PathStyle,
|
||||
) -> Vec<PathMatch> {
|
||||
|
|
@ -233,7 +300,9 @@ pub fn match_fixed_path_set(
|
|||
config.set_match_paths();
|
||||
let mut matcher = matcher::get_matcher(config);
|
||||
|
||||
let atoms = make_atoms(query, smart_case);
|
||||
let atoms = make_atoms(query);
|
||||
let source_words = make_source_words(query, case);
|
||||
let query_bag = CharBag::from(query);
|
||||
|
||||
let root_is_file = worktree_root_name.is_some() && candidates.iter().all(|c| c.path.is_empty());
|
||||
|
||||
|
|
@ -244,6 +313,8 @@ pub fn match_fixed_path_set(
|
|||
path_match_helper(
|
||||
&mut matcher,
|
||||
&atoms,
|
||||
source_words.as_deref(),
|
||||
query_bag,
|
||||
candidates.into_iter(),
|
||||
&mut results,
|
||||
worktree_id,
|
||||
|
|
@ -263,7 +334,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
candidate_sets: &'a [Set],
|
||||
query: &str,
|
||||
relative_to: &Option<Arc<RelPath>>,
|
||||
smart_case: bool,
|
||||
case: Case,
|
||||
max_results: usize,
|
||||
cancel_flag: &AtomicBool,
|
||||
executor: BackgroundExecutor,
|
||||
|
|
@ -281,7 +352,9 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
query.to_owned()
|
||||
};
|
||||
|
||||
let atoms = make_atoms(&query, smart_case);
|
||||
let atoms = make_atoms(&query);
|
||||
let source_words = make_source_words(&query, case);
|
||||
let query_bag = CharBag::from(query.as_str());
|
||||
|
||||
let num_cpus = executor.num_cpus().min(path_count);
|
||||
let segment_size = path_count.div_ceil(num_cpus);
|
||||
|
|
@ -299,6 +372,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
.enumerate()
|
||||
{
|
||||
let atoms = atoms.clone();
|
||||
let source_words = source_words.clone();
|
||||
let relative_to = relative_to.clone();
|
||||
scope.spawn(async move {
|
||||
let segment_start = segment_idx * segment_size;
|
||||
|
|
@ -316,6 +390,8 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
if path_match_helper(
|
||||
matcher,
|
||||
&atoms,
|
||||
source_words.as_deref(),
|
||||
query_bag,
|
||||
candidates,
|
||||
results,
|
||||
candidate_set.id(),
|
||||
|
|
|
|||
|
|
@ -6439,6 +6439,7 @@ impl<'a> Iterator for PathMatchCandidateSetNucleoIter<'a> {
|
|||
.map(|entry| fuzzy_nucleo::PathMatchCandidate {
|
||||
is_dir: entry.kind.is_dir(),
|
||||
path: &entry.path,
|
||||
char_bag: entry.char_bag,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue