fuzzy_nucleo: Refactor multi-atom code to use nucleo::Pattern (#55264)

refactor of the fuzzy_nucleo string and path matching code, instead of
handling the multiple atoms ourselves we can just use `nucleo::Pattern`
and abstract that all away. this replaces the for loop in the
path/string_match_helper functions. all functionality is exactly the
same. basically the same / within some tiny margin of the original.

this could enable the use of `nucleo::Pattern::parse` in the future if
that was wanted, which allows some extra syntax to activate different
matching modes. [more info from
deepwiki](https://deepwiki.com/search/how-do-the-different-atom-matc_37e510de-af27-44a1-a52f-3fc367462e6e?mode=fast).
I'm pretty sure that enabling that is as simple as switching a
`Pattern::new(...)` call with `Pattern::parse(...)`.

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- N/A
This commit is contained in:
Finn Eitreim 2026-05-06 03:56:20 -07:00 committed by GitHub
parent 759f027f8a
commit 7b5b0e4e95
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 168 additions and 240 deletions

View file

@ -2,6 +2,9 @@ mod matcher;
mod paths;
mod strings;
use fuzzy::CharBag;
use nucleo::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
pub use paths::{
PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets,
};
@ -45,6 +48,83 @@ impl LengthPenalty {
}
}
// Matching is always case-insensitive at the nucleo level — using
// `CaseMatching::Smart` there would *reject* candidates whose capitalization
// doesn't match the query, breaking pickers like the command palette
// (`"Editor: Backspace"` against the action named `"editor: backspace"`).
// `Case::Smart` is honored as a *scoring hint* instead: when the query
// contains uppercase, candidates whose matched characters disagree in case
// are downranked by a per-mismatch penalty rather than dropped.
pub(crate) struct Query {
pub(crate) pattern: Pattern,
/// Non-whitespace query chars in input order, populated only when a smart-case
/// penalty will actually be charged. Aligns 1:1 with the indices appended by
/// `Pattern::indices` (atom-order, needle-order within each atom).
pub(crate) query_chars: Option<Vec<char>>,
pub(crate) char_bag: CharBag,
}
impl Query {
pub(crate) fn build(query: &str, case: Case) -> Option<Self> {
if query.chars().all(char::is_whitespace) {
return None;
}
let normalized = query.split_whitespace().collect::<Vec<_>>().join(" ");
let pattern = Pattern::new(
&normalized,
CaseMatching::Ignore,
Normalization::Smart,
AtomKind::Fuzzy,
);
let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase());
let query_chars =
wants_case_penalty.then(|| query.chars().filter(|c| !c.is_whitespace()).collect());
Some(Query {
pattern,
query_chars,
char_bag: CharBag::from(query),
})
}
}
#[inline]
pub(crate) fn count_case_mismatches(
query_chars: Option<&[char]>,
matched_chars: &[u32],
candidate: &str,
candidate_chars: &mut Vec<char>,
) -> u32 {
let Some(query_chars) = query_chars else {
return 0;
};
if query_chars.len() != matched_chars.len() {
return 0;
}
candidate_chars.clear();
candidate_chars.extend(candidate.chars());
let mut mismatches: u32 = 0;
for (&query_char, &pos) in query_chars.iter().zip(matched_chars) {
if let Some(&candidate_char) = candidate_chars.get(pos as usize)
&& candidate_char != query_char
&& candidate_char.eq_ignore_ascii_case(&query_char)
{
mismatches += 1;
}
}
mismatches
}
const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
#[inline]
pub(crate) fn case_penalty(mismatches: u32) -> f64 {
if mismatches == 0 {
1.0
} else {
SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
}
}
/// Reconstruct byte-offset match positions from a list of matched char offsets
/// that is already sorted ascending and deduplicated.
pub(crate) fn positions_from_sorted(s: &str, sorted_char_indices: &[u32]) -> Vec<usize> {

View file

@ -9,12 +9,12 @@ use std::{
use util::{paths::PathStyle, rel_path::RelPath};
use nucleo::Utf32Str;
use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};
use nucleo::pattern::Pattern;
use fuzzy::CharBag;
use crate::matcher::{self, LENGTH_PENALTY};
use crate::{Cancelled, Case, positions_from_sorted};
use crate::{Cancelled, Case, Query, case_penalty, count_case_mismatches, positions_from_sorted};
#[derive(Clone, Debug)]
pub struct PathMatchCandidate<'a> {
@ -96,47 +96,6 @@ impl Ord for PathMatch {
}
}
// Path matching is always case-insensitive at the nucleo level. `Case::Smart`
// is honored as a *scoring hint*: when the query contains uppercase, candidates
// whose matched characters disagree in case are downranked by a factor per
// mismatch rather than dropped. This keeps `"Editor: Backspace"` matching
// `"editor: backspace"` while still preferring exact-case hits.
const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
pub(crate) fn make_atoms(query: &str) -> Vec<Atom> {
query
.split_whitespace()
.map(|word| {
Atom::new(
word,
CaseMatching::Ignore,
Normalization::Smart,
AtomKind::Fuzzy,
false,
)
})
.collect()
}
// Only populated when we will actually charge a smart-case penalty, so the hot
// path can iterate a plain `&[Atom]` and ignore this slice entirely.
fn make_source_words(query: &str, case: Case) -> Option<Vec<Vec<char>>> {
(case.is_smart() && query.chars().any(|c| c.is_uppercase())).then(|| {
query
.split_whitespace()
.map(|word| word.chars().collect())
.collect()
})
}
fn case_penalty(mismatches: u32) -> f64 {
if mismatches == 0 {
1.0
} else {
SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
}
}
pub(crate) fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
let mut path_components = path.components();
let mut relative_components = relative_to.components();
@ -150,34 +109,34 @@ pub(crate) fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> u
path_components.count() + relative_components.count() + 1
}
#[inline]
fn get_filename_match_bonus(
candidate_buf: &str,
query_atoms: &[Atom],
pattern: &Pattern,
matcher: &mut nucleo::Matcher,
) -> f64 {
let filename = match std::path::Path::new(candidate_buf).file_name() {
Some(f) => f.to_str().unwrap_or(""),
None => return 0.0,
};
if filename.is_empty() || query_atoms.is_empty() {
let Some(filename) = std::path::Path::new(candidate_buf)
.file_name()
.and_then(|f| f.to_str())
.filter(|f| !f.is_empty())
else {
return 0.0;
}
};
let mut buf = Vec::new();
let haystack = Utf32Str::new(filename, &mut buf);
let mut total_score = 0u32;
for atom in query_atoms {
if let Some(score) = atom.score(haystack, matcher) {
total_score = total_score.saturating_add(score as u32);
}
}
total_score as f64 / filename.len().max(1) as f64
let score: u32 = pattern
.atoms
.iter()
.filter_map(|atom| atom.score(haystack, matcher))
.map(|s| s as u32)
.sum();
score as f64 / filename.len().max(1) as f64
}
fn path_match_helper<'a>(
matcher: &mut nucleo::Matcher,
atoms: &[Atom],
source_words: Option<&[Vec<char>]>,
query_bag: CharBag,
query: &Query,
candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
results: &mut Vec<PathMatch>,
worktree_id: usize,
@ -197,7 +156,6 @@ fn path_match_helper<'a>(
let path_prefix_len = candidate_buf.len();
let mut buf = Vec::new();
let mut matched_chars: Vec<u32> = Vec::new();
let mut atom_matched_chars = Vec::new();
let mut candidate_chars: Vec<char> = Vec::new();
for candidate in candidates {
buf.clear();
@ -206,7 +164,7 @@ fn path_match_helper<'a>(
return Err(Cancelled);
}
if !candidate.char_bag.is_superset(query_bag) {
if !candidate.char_bag.is_superset(query.char_bag) {
continue;
}
@ -219,70 +177,45 @@ fn path_match_helper<'a>(
let haystack = Utf32Str::new(&candidate_buf, &mut buf);
if source_words.is_some() {
candidate_chars.clear();
candidate_chars.extend(candidate_buf.chars());
}
let Some(score) = query.pattern.indices(haystack, matcher, &mut matched_chars) else {
continue;
};
let mut total_score: u32 = 0;
let mut case_mismatches: u32 = 0;
let mut all_matched = true;
let case_mismatches = count_case_mismatches(
query.query_chars.as_deref(),
&matched_chars,
&candidate_buf,
&mut candidate_chars,
);
for (atom_idx, atom) in atoms.iter().enumerate() {
atom_matched_chars.clear();
let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
all_matched = false;
break;
};
total_score = total_score.saturating_add(score as u32);
if let Some(source_words) = source_words {
let query_chars = &source_words[atom_idx];
if query_chars.len() == atom_matched_chars.len() {
for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
if let Some(&candidate_char) = candidate_chars.get(pos as usize)
&& candidate_char != query_char
&& candidate_char.eq_ignore_ascii_case(&query_char)
{
case_mismatches += 1;
}
}
}
}
matched_chars.extend_from_slice(&atom_matched_chars);
}
matched_chars.sort_unstable();
matched_chars.dedup();
if all_matched && !atoms.is_empty() {
matched_chars.sort_unstable();
matched_chars.dedup();
let length_penalty = candidate_buf.len() as f64 * LENGTH_PENALTY;
let filename_bonus = get_filename_match_bonus(&candidate_buf, &query.pattern, matcher);
let positive = (score as f64 + filename_bonus) * case_penalty(case_mismatches);
let adjusted_score = positive - length_penalty;
let positions = positions_from_sorted(&candidate_buf, &matched_chars);
let length_penalty = candidate_buf.len() as f64 * LENGTH_PENALTY;
let filename_bonus = get_filename_match_bonus(&candidate_buf, atoms, matcher);
let positive = (total_score as f64 + filename_bonus) * case_penalty(case_mismatches);
let adjusted_score = positive - length_penalty;
let positions = positions_from_sorted(&candidate_buf, &matched_chars);
results.push(PathMatch {
score: adjusted_score,
positions,
worktree_id,
path: if root_is_file {
Arc::clone(path_prefix)
} else {
candidate.path.into()
},
path_prefix: if root_is_file {
RelPath::empty().into()
} else {
Arc::clone(path_prefix)
},
is_dir: candidate.is_dir,
distance_to_relative_ancestor: relative_to
.as_ref()
.map_or(usize::MAX, |relative_to| {
distance_between_paths(candidate.path, relative_to.as_ref())
}),
});
}
results.push(PathMatch {
score: adjusted_score,
positions,
worktree_id,
path: if root_is_file {
Arc::clone(path_prefix)
} else {
candidate.path.into()
},
path_prefix: if root_is_file {
RelPath::empty().into()
} else {
Arc::clone(path_prefix)
},
is_dir: candidate.is_dir,
distance_to_relative_ancestor: relative_to.as_ref().map_or(usize::MAX, |relative_to| {
distance_between_paths(candidate.path, relative_to.as_ref())
}),
});
}
Ok(())
}
@ -296,14 +229,14 @@ pub fn match_fixed_path_set(
max_results: usize,
path_style: PathStyle,
) -> Vec<PathMatch> {
let Some(query) = Query::build(query, case) else {
return Vec::new();
};
let mut config = nucleo::Config::DEFAULT;
config.set_match_paths();
let mut matcher = matcher::get_matcher(config);
let atoms = make_atoms(query);
let source_words = make_source_words(query, case);
let query_bag = CharBag::from(query);
let root_is_file = worktree_root_name.is_some() && candidates.iter().all(|c| c.path.is_empty());
let path_prefix = worktree_root_name.unwrap_or_else(|| RelPath::empty().into());
@ -312,9 +245,7 @@ pub fn match_fixed_path_set(
path_match_helper(
&mut matcher,
&atoms,
source_words.as_deref(),
query_bag,
&query,
candidates.into_iter(),
&mut results,
worktree_id,
@ -352,9 +283,9 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
query.to_owned()
};
let atoms = make_atoms(&query);
let source_words = make_source_words(&query, case);
let query_bag = CharBag::from(query.as_str());
let Some(query) = Query::build(&query, case) else {
return Vec::new();
};
let num_cpus = executor.num_cpus().min(path_count);
let segment_size = path_count.div_ceil(num_cpus);
@ -371,8 +302,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
.zip(matchers.iter_mut())
.enumerate()
{
let atoms = atoms.clone();
let source_words = source_words.clone();
let query = &query;
let relative_to = relative_to.clone();
scope.spawn(async move {
let segment_start = segment_idx * segment_size;
@ -389,9 +319,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
if path_match_helper(
matcher,
&atoms,
source_words.as_deref(),
query_bag,
query,
candidates,
results,
candidate_set.id(),

View file

@ -8,61 +8,14 @@ use std::{
use gpui::{BackgroundExecutor, SharedString};
use nucleo::Utf32Str;
use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};
use crate::{
Cancelled, Case, LengthPenalty,
Cancelled, Case, LengthPenalty, Query, case_penalty, count_case_mismatches,
matcher::{self, LENGTH_PENALTY},
positions_from_sorted,
};
use fuzzy::CharBag;
// String matching is always case-insensitive at the nucleo level — using
// `CaseMatching::Smart` there would reject queries whose capitalization
// doesn't match the candidate, breaking pickers like the command palette
// (`"Editor: Backspace"` against the action named `"editor: backspace"`).
// `Case::Smart` is still honored as a *scoring hint*: when the query
// contains uppercase, candidates whose matched characters disagree in case
// are downranked rather than dropped.
const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
struct Query {
atoms: Vec<Atom>,
source_words: Option<Vec<Vec<char>>>,
char_bag: CharBag,
}
impl Query {
fn build(query: &str, case: Case) -> Option<Self> {
let mut atoms = Vec::new();
let mut source_words = Vec::new();
let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase());
for word in query.split_whitespace() {
atoms.push(Atom::new(
word,
CaseMatching::Ignore,
Normalization::Smart,
AtomKind::Fuzzy,
false,
));
if wants_case_penalty {
source_words.push(word.chars().collect());
}
}
if atoms.is_empty() {
return None;
}
Some(Query {
atoms,
source_words: wants_case_penalty.then_some(source_words),
char_bag: CharBag::from(query),
})
}
}
#[derive(Clone, Debug)]
pub struct StringMatchCandidate {
pub id: usize,
@ -281,7 +234,6 @@ where
{
let mut buf = Vec::new();
let mut matched_chars: Vec<u32> = Vec::new();
let mut atom_matched_chars = Vec::new();
let mut candidate_chars: Vec<char> = Vec::new();
for candidate in candidates {
@ -297,69 +249,37 @@ where
continue;
}
let haystack: Utf32Str = Utf32Str::new(&borrowed.string, &mut buf);
let haystack: Utf32Str = Utf32Str::new(borrowed.string.as_ref(), &mut buf);
if query.source_words.is_some() {
candidate_chars.clear();
candidate_chars.extend(borrowed.string.chars());
}
let Some(score) = query.pattern.indices(haystack, matcher, &mut matched_chars) else {
continue;
};
let mut total_score: u32 = 0;
let mut case_mismatches: u32 = 0;
let mut all_matched = true;
let case_mismatches = count_case_mismatches(
query.query_chars.as_deref(),
&matched_chars,
borrowed.string.as_ref(),
&mut candidate_chars,
);
for (atom_idx, atom) in query.atoms.iter().enumerate() {
atom_matched_chars.clear();
let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
all_matched = false;
break;
};
total_score = total_score.saturating_add(score as u32);
if let Some(source_words) = query.source_words.as_deref() {
let query_chars = &source_words[atom_idx];
if query_chars.len() == atom_matched_chars.len() {
for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
if let Some(&candidate_char) = candidate_chars.get(pos as usize)
&& candidate_char != query_char
&& candidate_char.eq_ignore_ascii_case(&query_char)
{
case_mismatches += 1;
}
}
}
}
matched_chars.extend_from_slice(&atom_matched_chars);
}
matched_chars.sort_unstable();
matched_chars.dedup();
if all_matched {
matched_chars.sort_unstable();
matched_chars.dedup();
let positive = score as f64 * case_penalty(case_mismatches);
let adjusted_score =
positive - length_penalty_for(borrowed.string.as_ref(), length_penalty);
let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars);
let positive = total_score as f64 * case_penalty(case_mismatches);
let adjusted_score =
positive - length_penalty_for(borrowed.string.as_ref(), length_penalty);
let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars);
results.push(StringMatch {
candidate_id: borrowed.id,
score: adjusted_score,
positions,
string: borrowed.string.clone(),
});
}
results.push(StringMatch {
candidate_id: borrowed.id,
score: adjusted_score,
positions,
string: borrowed.string.clone(),
});
}
Ok(())
}
#[inline]
fn case_penalty(mismatches: u32) -> f64 {
if mismatches == 0 {
1.0
} else {
SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
}
}
#[inline]
fn length_penalty_for(s: &str, length_penalty: LengthPenalty) -> f64 {
if length_penalty.is_on() {