fuzzy_nucleo: Refactor multi-atom code to use nucleo::Pattern (#55264)

refactor of the fuzzy_nucleo string and path matching code, instead of handling the multiple atoms ourselves we can just use `nucleo::Pattern` and abstract that all away. this replaces the for loop in the path/string_match_helper functions. all functionality is exactly the same. basically the same / within some tiny margin of the original. this could enable the use of `nucleo::Pattern::parse` in the future if that was wanted, which allows some extra syntax to activate different matching modes. [more info from deepwiki](https://deepwiki.com/search/how-do-the-different-atom-matc_37e510de-af27-44a1-a52f-3fc367462e6e?mode=fast). I'm pretty sure that enabling that is as simple as switching a `Pattern::new(...)` call with `Pattern::parse(...)`. Self-Review Checklist: - [x] I've reviewed my own diff for quality, security, and reliability - [x] Unsafe blocks (if any) have justifying comments - [x] The content is consistent with the [UI/UX checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) - [x] Tests cover the new/changed behavior - [x] Performance impact has been considered and is acceptable Release Notes: - N/A
2026-05-31 19:05:00 +07:00 · 2026-05-06 03:56:20 -07:00 · 2026-05-06 03:56:20 -07:00 · 7b5b0e4e95
commit 7b5b0e4e95
parent 759f027f8a
3 changed files with 168 additions and 240 deletions
--- a/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs
+++ b/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs
@ -2,6 +2,9 @@ mod matcher;
 mod paths;
 mod strings;

+use fuzzy::CharBag;
+use nucleo::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
+
 pub use paths::{
    PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets,
 };
@ -45,6 +48,83 @@ impl LengthPenalty {
    }
 }

+// Matching is always case-insensitive at the nucleo level — using
+// `CaseMatching::Smart` there would *reject* candidates whose capitalization
+// doesn't match the query, breaking pickers like the command palette
+// (`"Editor: Backspace"` against the action named `"editor: backspace"`).
+// `Case::Smart` is honored as a *scoring hint* instead: when the query
+// contains uppercase, candidates whose matched characters disagree in case
+// are downranked by a per-mismatch penalty rather than dropped.
+pub(crate) struct Query {
+    pub(crate) pattern: Pattern,
+    /// Non-whitespace query chars in input order, populated only when a smart-case
+    /// penalty will actually be charged. Aligns 1:1 with the indices appended by
+    /// `Pattern::indices` (atom-order, needle-order within each atom).
+    pub(crate) query_chars: Option<Vec<char>>,
+    pub(crate) char_bag: CharBag,
+}
+
+impl Query {
+    pub(crate) fn build(query: &str, case: Case) -> Option<Self> {
+        if query.chars().all(char::is_whitespace) {
+            return None;
+        }
+        let normalized = query.split_whitespace().collect::<Vec<_>>().join(" ");
+        let pattern = Pattern::new(
+            &normalized,
+            CaseMatching::Ignore,
+            Normalization::Smart,
+            AtomKind::Fuzzy,
+        );
+        let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase());
+        let query_chars =
+            wants_case_penalty.then(|| query.chars().filter(|c| !c.is_whitespace()).collect());
+        Some(Query {
+            pattern,
+            query_chars,
+            char_bag: CharBag::from(query),
+        })
+    }
+}
+
+#[inline]
+pub(crate) fn count_case_mismatches(
+    query_chars: Option<&[char]>,
+    matched_chars: &[u32],
+    candidate: &str,
+    candidate_chars: &mut Vec<char>,
+) -> u32 {
+    let Some(query_chars) = query_chars else {
+        return 0;
+    };
+    if query_chars.len() != matched_chars.len() {
+        return 0;
+    }
+    candidate_chars.clear();
+    candidate_chars.extend(candidate.chars());
+    let mut mismatches: u32 = 0;
+    for (&query_char, &pos) in query_chars.iter().zip(matched_chars) {
+        if let Some(&candidate_char) = candidate_chars.get(pos as usize)
+            && candidate_char != query_char
+            && candidate_char.eq_ignore_ascii_case(&query_char)
+        {
+            mismatches += 1;
+        }
+    }
+    mismatches
+}
+
+const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
+
+#[inline]
+pub(crate) fn case_penalty(mismatches: u32) -> f64 {
+    if mismatches == 0 {
+        1.0
+    } else {
+        SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
+    }
+}
+
 /// Reconstruct byte-offset match positions from a list of matched char offsets
 /// that is already sorted ascending and deduplicated.
 pub(crate) fn positions_from_sorted(s: &str, sorted_char_indices: &[u32]) -> Vec<usize> {
--- a/crates/fuzzy_nucleo/src/paths.rs
+++ b/crates/fuzzy_nucleo/src/paths.rs
@ -9,12 +9,12 @@ use std::{
 use util::{paths::PathStyle, rel_path::RelPath};

 use nucleo::Utf32Str;
-use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};
+use nucleo::pattern::Pattern;

 use fuzzy::CharBag;

 use crate::matcher::{self, LENGTH_PENALTY};
-use crate::{Cancelled, Case, positions_from_sorted};
+use crate::{Cancelled, Case, Query, case_penalty, count_case_mismatches, positions_from_sorted};

 #[derive(Clone, Debug)]
 pub struct PathMatchCandidate<'a> {
@ -96,47 +96,6 @@ impl Ord for PathMatch {
    }
 }

-// Path matching is always case-insensitive at the nucleo level. `Case::Smart`
-// is honored as a *scoring hint*: when the query contains uppercase, candidates
-// whose matched characters disagree in case are downranked by a factor per
-// mismatch rather than dropped. This keeps `"Editor: Backspace"` matching
-// `"editor: backspace"` while still preferring exact-case hits.
-const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
-
-pub(crate) fn make_atoms(query: &str) -> Vec<Atom> {
-    query
-        .split_whitespace()
-        .map(|word| {
-            Atom::new(
-                word,
-                CaseMatching::Ignore,
-                Normalization::Smart,
-                AtomKind::Fuzzy,
-                false,
-            )
-        })
-        .collect()
-}
-
-// Only populated when we will actually charge a smart-case penalty, so the hot
-// path can iterate a plain `&[Atom]` and ignore this slice entirely.
-fn make_source_words(query: &str, case: Case) -> Option<Vec<Vec<char>>> {
-    (case.is_smart() && query.chars().any(|c| c.is_uppercase())).then(|| {
-        query
-            .split_whitespace()
-            .map(|word| word.chars().collect())
-            .collect()
-    })
-}
-
-fn case_penalty(mismatches: u32) -> f64 {
-    if mismatches == 0 {
-        1.0
-    } else {
-        SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
-    }
-}
-
 pub(crate) fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
    let mut path_components = path.components();
    let mut relative_components = relative_to.components();
@ -150,34 +109,34 @@ pub(crate) fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> u
    path_components.count() + relative_components.count() + 1
 }

+#[inline]
 fn get_filename_match_bonus(
    candidate_buf: &str,
-    query_atoms: &[Atom],
+    pattern: &Pattern,
    matcher: &mut nucleo::Matcher,
 ) -> f64 {
-    let filename = match std::path::Path::new(candidate_buf).file_name() {
-        Some(f) => f.to_str().unwrap_or(""),
-        None => return 0.0,
-    };
-    if filename.is_empty() || query_atoms.is_empty() {
+    let Some(filename) = std::path::Path::new(candidate_buf)
+        .file_name()
+        .and_then(|f| f.to_str())
+        .filter(|f| !f.is_empty())
+    else {
        return 0.0;
-    }
+    };
    let mut buf = Vec::new();
    let haystack = Utf32Str::new(filename, &mut buf);
-    let mut total_score = 0u32;
-    for atom in query_atoms {
-        if let Some(score) = atom.score(haystack, matcher) {
-            total_score = total_score.saturating_add(score as u32);
-        }
-    }
-    total_score as f64 / filename.len().max(1) as f64
+    let score: u32 = pattern
+        .atoms
+        .iter()
+        .filter_map(|atom| atom.score(haystack, matcher))
+        .map(|s| s as u32)
+        .sum();
+
+    score as f64 / filename.len().max(1) as f64
 }

 fn path_match_helper<'a>(
    matcher: &mut nucleo::Matcher,
-    atoms: &[Atom],
-    source_words: Option<&[Vec<char>]>,
-    query_bag: CharBag,
+    query: &Query,
    candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
    results: &mut Vec<PathMatch>,
    worktree_id: usize,
@ -197,7 +156,6 @@ fn path_match_helper<'a>(
    let path_prefix_len = candidate_buf.len();
    let mut buf = Vec::new();
    let mut matched_chars: Vec<u32> = Vec::new();
-    let mut atom_matched_chars = Vec::new();
    let mut candidate_chars: Vec<char> = Vec::new();
    for candidate in candidates {
        buf.clear();
@ -206,7 +164,7 @@ fn path_match_helper<'a>(
            return Err(Cancelled);
        }

-        if !candidate.char_bag.is_superset(query_bag) {
+        if !candidate.char_bag.is_superset(query.char_bag) {
            continue;
        }

@ -219,70 +177,45 @@ fn path_match_helper<'a>(

        let haystack = Utf32Str::new(&candidate_buf, &mut buf);

-        if source_words.is_some() {
-            candidate_chars.clear();
-            candidate_chars.extend(candidate_buf.chars());
-        }
+        let Some(score) = query.pattern.indices(haystack, matcher, &mut matched_chars) else {
+            continue;
+        };

-        let mut total_score: u32 = 0;
-        let mut case_mismatches: u32 = 0;
-        let mut all_matched = true;
+        let case_mismatches = count_case_mismatches(
+            query.query_chars.as_deref(),
+            &matched_chars,
+            &candidate_buf,
+            &mut candidate_chars,
+        );

-        for (atom_idx, atom) in atoms.iter().enumerate() {
-            atom_matched_chars.clear();
-            let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
-                all_matched = false;
-                break;
-            };
-            total_score = total_score.saturating_add(score as u32);
-            if let Some(source_words) = source_words {
-                let query_chars = &source_words[atom_idx];
-                if query_chars.len() == atom_matched_chars.len() {
-                    for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
-                        if let Some(&candidate_char) = candidate_chars.get(pos as usize)
-                            && candidate_char != query_char
-                            && candidate_char.eq_ignore_ascii_case(&query_char)
-                        {
-                            case_mismatches += 1;
-                        }
-                    }
-                }
-            }
-            matched_chars.extend_from_slice(&atom_matched_chars);
-        }
+        matched_chars.sort_unstable();
+        matched_chars.dedup();

-        if all_matched && !atoms.is_empty() {
-            matched_chars.sort_unstable();
-            matched_chars.dedup();
+        let length_penalty = candidate_buf.len() as f64 * LENGTH_PENALTY;
+        let filename_bonus = get_filename_match_bonus(&candidate_buf, &query.pattern, matcher);
+        let positive = (score as f64 + filename_bonus) * case_penalty(case_mismatches);
+        let adjusted_score = positive - length_penalty;
+        let positions = positions_from_sorted(&candidate_buf, &matched_chars);

-            let length_penalty = candidate_buf.len() as f64 * LENGTH_PENALTY;
-            let filename_bonus = get_filename_match_bonus(&candidate_buf, atoms, matcher);
-            let positive = (total_score as f64 + filename_bonus) * case_penalty(case_mismatches);
-            let adjusted_score = positive - length_penalty;
-            let positions = positions_from_sorted(&candidate_buf, &matched_chars);
-
-            results.push(PathMatch {
-                score: adjusted_score,
-                positions,
-                worktree_id,
-                path: if root_is_file {
-                    Arc::clone(path_prefix)
-                } else {
-                    candidate.path.into()
-                },
-                path_prefix: if root_is_file {
-                    RelPath::empty().into()
-                } else {
-                    Arc::clone(path_prefix)
-                },
-                is_dir: candidate.is_dir,
-                distance_to_relative_ancestor: relative_to
-                    .as_ref()
-                    .map_or(usize::MAX, |relative_to| {
-                        distance_between_paths(candidate.path, relative_to.as_ref())
-                    }),
-            });
-        }
+        results.push(PathMatch {
+            score: adjusted_score,
+            positions,
+            worktree_id,
+            path: if root_is_file {
+                Arc::clone(path_prefix)
+            } else {
+                candidate.path.into()
+            },
+            path_prefix: if root_is_file {
+                RelPath::empty().into()
+            } else {
+                Arc::clone(path_prefix)
+            },
+            is_dir: candidate.is_dir,
+            distance_to_relative_ancestor: relative_to.as_ref().map_or(usize::MAX, |relative_to| {
+                distance_between_paths(candidate.path, relative_to.as_ref())
+            }),
+        });
    }
    Ok(())
 }
@ -296,14 +229,14 @@ pub fn match_fixed_path_set(
    max_results: usize,
    path_style: PathStyle,
 ) -> Vec<PathMatch> {
+    let Some(query) = Query::build(query, case) else {
+        return Vec::new();
+    };
+
    let mut config = nucleo::Config::DEFAULT;
    config.set_match_paths();
    let mut matcher = matcher::get_matcher(config);

-    let atoms = make_atoms(query);
-    let source_words = make_source_words(query, case);
-    let query_bag = CharBag::from(query);
-
    let root_is_file = worktree_root_name.is_some() && candidates.iter().all(|c| c.path.is_empty());

    let path_prefix = worktree_root_name.unwrap_or_else(|| RelPath::empty().into());
@ -312,9 +245,7 @@ pub fn match_fixed_path_set(

    path_match_helper(
        &mut matcher,
-        &atoms,
-        source_words.as_deref(),
-        query_bag,
+        &query,
        candidates.into_iter(),
        &mut results,
        worktree_id,
@ -352,9 +283,9 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
        query.to_owned()
    };

-    let atoms = make_atoms(&query);
-    let source_words = make_source_words(&query, case);
-    let query_bag = CharBag::from(query.as_str());
+    let Some(query) = Query::build(&query, case) else {
+        return Vec::new();
+    };

    let num_cpus = executor.num_cpus().min(path_count);
    let segment_size = path_count.div_ceil(num_cpus);
@ -371,8 +302,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
                .zip(matchers.iter_mut())
                .enumerate()
            {
-                let atoms = atoms.clone();
-                let source_words = source_words.clone();
+                let query = &query;
                let relative_to = relative_to.clone();
                scope.spawn(async move {
                    let segment_start = segment_idx * segment_size;
@ -389,9 +319,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(

                            if path_match_helper(
                                matcher,
-                                &atoms,
-                                source_words.as_deref(),
-                                query_bag,
+                                query,
                                candidates,
                                results,
                                candidate_set.id(),
--- a/crates/fuzzy_nucleo/src/strings.rs
+++ b/crates/fuzzy_nucleo/src/strings.rs
@ -8,61 +8,14 @@ use std::{

 use gpui::{BackgroundExecutor, SharedString};
 use nucleo::Utf32Str;
-use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};

 use crate::{
-    Cancelled, Case, LengthPenalty,
+    Cancelled, Case, LengthPenalty, Query, case_penalty, count_case_mismatches,
    matcher::{self, LENGTH_PENALTY},
    positions_from_sorted,
 };
 use fuzzy::CharBag;

-// String matching is always case-insensitive at the nucleo level — using
-// `CaseMatching::Smart` there would reject queries whose capitalization
-// doesn't match the candidate, breaking pickers like the command palette
-// (`"Editor: Backspace"` against the action named `"editor: backspace"`).
-// `Case::Smart` is still honored as a *scoring hint*: when the query
-// contains uppercase, candidates whose matched characters disagree in case
-// are downranked rather than dropped.
-const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
-
-struct Query {
-    atoms: Vec<Atom>,
-    source_words: Option<Vec<Vec<char>>>,
-    char_bag: CharBag,
-}
-
-impl Query {
-    fn build(query: &str, case: Case) -> Option<Self> {
-        let mut atoms = Vec::new();
-        let mut source_words = Vec::new();
-        let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase());
-
-        for word in query.split_whitespace() {
-            atoms.push(Atom::new(
-                word,
-                CaseMatching::Ignore,
-                Normalization::Smart,
-                AtomKind::Fuzzy,
-                false,
-            ));
-            if wants_case_penalty {
-                source_words.push(word.chars().collect());
-            }
-        }
-
-        if atoms.is_empty() {
-            return None;
-        }
-
-        Some(Query {
-            atoms,
-            source_words: wants_case_penalty.then_some(source_words),
-            char_bag: CharBag::from(query),
-        })
-    }
-}
-
 #[derive(Clone, Debug)]
 pub struct StringMatchCandidate {
    pub id: usize,
@ -281,7 +234,6 @@ where
 {
    let mut buf = Vec::new();
    let mut matched_chars: Vec<u32> = Vec::new();
-    let mut atom_matched_chars = Vec::new();
    let mut candidate_chars: Vec<char> = Vec::new();

    for candidate in candidates {
@ -297,69 +249,37 @@ where
            continue;
        }

-        let haystack: Utf32Str = Utf32Str::new(&borrowed.string, &mut buf);
+        let haystack: Utf32Str = Utf32Str::new(borrowed.string.as_ref(), &mut buf);

-        if query.source_words.is_some() {
-            candidate_chars.clear();
-            candidate_chars.extend(borrowed.string.chars());
-        }
+        let Some(score) = query.pattern.indices(haystack, matcher, &mut matched_chars) else {
+            continue;
+        };

-        let mut total_score: u32 = 0;
-        let mut case_mismatches: u32 = 0;
-        let mut all_matched = true;
+        let case_mismatches = count_case_mismatches(
+            query.query_chars.as_deref(),
+            &matched_chars,
+            borrowed.string.as_ref(),
+            &mut candidate_chars,
+        );

-        for (atom_idx, atom) in query.atoms.iter().enumerate() {
-            atom_matched_chars.clear();
-            let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
-                all_matched = false;
-                break;
-            };
-            total_score = total_score.saturating_add(score as u32);
-            if let Some(source_words) = query.source_words.as_deref() {
-                let query_chars = &source_words[atom_idx];
-                if query_chars.len() == atom_matched_chars.len() {
-                    for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
-                        if let Some(&candidate_char) = candidate_chars.get(pos as usize)
-                            && candidate_char != query_char
-                            && candidate_char.eq_ignore_ascii_case(&query_char)
-                        {
-                            case_mismatches += 1;
-                        }
-                    }
-                }
-            }
-            matched_chars.extend_from_slice(&atom_matched_chars);
-        }
+        matched_chars.sort_unstable();
+        matched_chars.dedup();

-        if all_matched {
-            matched_chars.sort_unstable();
-            matched_chars.dedup();
+        let positive = score as f64 * case_penalty(case_mismatches);
+        let adjusted_score =
+            positive - length_penalty_for(borrowed.string.as_ref(), length_penalty);
+        let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars);

-            let positive = total_score as f64 * case_penalty(case_mismatches);
-            let adjusted_score =
-                positive - length_penalty_for(borrowed.string.as_ref(), length_penalty);
-            let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars);
-
-            results.push(StringMatch {
-                candidate_id: borrowed.id,
-                score: adjusted_score,
-                positions,
-                string: borrowed.string.clone(),
-            });
-        }
+        results.push(StringMatch {
+            candidate_id: borrowed.id,
+            score: adjusted_score,
+            positions,
+            string: borrowed.string.clone(),
+        });
    }
    Ok(())
 }

-#[inline]
-fn case_penalty(mismatches: u32) -> f64 {
-    if mismatches == 0 {
-        1.0
-    } else {
-        SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
-    }
-}
-
 #[inline]
 fn length_penalty_for(s: &str, length_penalty: LengthPenalty) -> f64 {
    if length_penalty.is_on() {