zeta2: Parse imports via Tree-sitter queries + improve zeta retrieval-stats (#39735)

Release Notes:

- N/A

---------

Co-authored-by: Max <max@zed.dev>
Co-authored-by: Agus <agus@zed.dev>
Co-authored-by: Oleksiy <oleksiy@zed.dev>
This commit is contained in:
Michael Sloan 2025-10-08 12:04:06 -06:00 committed by GitHub
parent 5fd187769d
commit bcef3b5010
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 3079 additions and 511 deletions

7
Cargo.lock generated
View file

@ -5189,6 +5189,9 @@ dependencies = [
"strum 0.27.1",
"text",
"tree-sitter",
"tree-sitter-c",
"tree-sitter-cpp",
"tree-sitter-go",
"workspace-hack",
"zed-collections",
"zed-util",
@ -16964,8 +16967,7 @@ dependencies = [
[[package]]
name = "tree-sitter-typescript"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff"
source = "git+https://github.com/zed-industries/tree-sitter-typescript?rev=e2c53597d6a5d9cf7bbe8dccde576fe1e46c5899#e2c53597d6a5d9cf7bbe8dccde576fe1e46c5899"
dependencies = [
"cc",
"tree-sitter-language",
@ -20785,6 +20787,7 @@ dependencies = [
"terminal_view",
"watch",
"workspace-hack",
"zed-collections",
"zed-util",
"zeta",
"zeta2",

View file

@ -693,7 +693,7 @@ tree-sitter-python = "0.25"
tree-sitter-regex = "0.24"
tree-sitter-ruby = "0.23"
tree-sitter-rust = "0.24"
tree-sitter-typescript = "0.23"
tree-sitter-typescript = { git = "https://github.com/zed-industries/tree-sitter-typescript", rev = "e2c53597d6a5d9cf7bbe8dccde576fe1e46c5899" } # https://github.com/tree-sitter/tree-sitter-typescript/pull/347
tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "baff0b51c64ef6a1fb1f8390f3ad6015b83ec13a" }
unicase = "2.6"
unicode-script = "0.5.7"

View file

@ -127,7 +127,6 @@ pub struct DeclarationScoreComponents {
pub declaration_count: usize,
pub reference_line_distance: u32,
pub declaration_line_distance: u32,
pub declaration_line_distance_rank: usize,
pub excerpt_vs_item_jaccard: f32,
pub excerpt_vs_signature_jaccard: f32,
pub adjacent_vs_item_jaccard: f32,
@ -136,6 +135,13 @@ pub struct DeclarationScoreComponents {
pub excerpt_vs_signature_weighted_overlap: f32,
pub adjacent_vs_item_weighted_overlap: f32,
pub adjacent_vs_signature_weighted_overlap: f32,
pub path_import_match_count: usize,
pub wildcard_path_import_match_count: usize,
pub import_similarity: f32,
pub max_import_similarity: f32,
pub normalized_import_similarity: f32,
pub wildcard_import_similarity: f32,
pub normalized_wildcard_import_similarity: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View file

@ -19,6 +19,7 @@ collections.workspace = true
futures.workspace = true
gpui.workspace = true
hashbrown.workspace = true
indoc.workspace = true
itertools.workspace = true
language.workspace = true
log.workspace = true
@ -45,5 +46,8 @@ project = {workspace= true, features = ["test-support"]}
serde_json.workspace = true
settings = {workspace= true, features = ["test-support"]}
text = { workspace = true, features = ["test-support"] }
tree-sitter-c.workspace = true
tree-sitter-cpp.workspace = true
tree-sitter-go.workspace = true
util = { workspace = true, features = ["test-support"] }
zlog.workspace = true

View file

@ -1,9 +1,11 @@
use language::LanguageId;
use language::{Language, LanguageId};
use project::ProjectEntryId;
use std::borrow::Cow;
use std::ops::Range;
use std::sync::Arc;
use std::{borrow::Cow, path::Path};
use text::{Bias, BufferId, Rope};
use util::paths::{path_ends_with, strip_path_suffix};
use util::rel_path::RelPath;
use crate::outline::OutlineDeclaration;
@ -22,12 +24,14 @@ pub enum Declaration {
File {
project_entry_id: ProjectEntryId,
declaration: FileDeclaration,
cached_path: CachedDeclarationPath,
},
Buffer {
project_entry_id: ProjectEntryId,
buffer_id: BufferId,
rope: Rope,
declaration: BufferDeclaration,
cached_path: CachedDeclarationPath,
},
}
@ -73,6 +77,13 @@ impl Declaration {
}
}
pub fn cached_path(&self) -> &CachedDeclarationPath {
match self {
Declaration::File { cached_path, .. } => cached_path,
Declaration::Buffer { cached_path, .. } => cached_path,
}
}
pub fn item_range(&self) -> Range<usize> {
match self {
Declaration::File { declaration, .. } => declaration.item_range.clone(),
@ -235,3 +246,69 @@ impl BufferDeclaration {
}
}
}
#[derive(Debug, Clone)]
pub struct CachedDeclarationPath {
pub worktree_abs_path: Arc<Path>,
pub rel_path: Arc<RelPath>,
/// The relative path of the file, possibly stripped according to `import_path_strip_regex`.
pub rel_path_after_regex_stripping: Arc<RelPath>,
}
impl CachedDeclarationPath {
pub fn new(
worktree_abs_path: Arc<Path>,
path: &Arc<RelPath>,
language: Option<&Arc<Language>>,
) -> Self {
let rel_path = path.clone();
let rel_path_after_regex_stripping = if let Some(language) = language
&& let Some(strip_regex) = language.config().import_path_strip_regex.as_ref()
&& let Ok(stripped) = RelPath::unix(&Path::new(
strip_regex.replace_all(rel_path.as_unix_str(), "").as_ref(),
)) {
Arc::from(stripped)
} else {
rel_path.clone()
};
CachedDeclarationPath {
worktree_abs_path,
rel_path,
rel_path_after_regex_stripping,
}
}
#[cfg(test)]
pub fn new_for_test(worktree_abs_path: &str, rel_path: &str) -> Self {
let rel_path: Arc<RelPath> = util::rel_path::rel_path(rel_path).into();
CachedDeclarationPath {
worktree_abs_path: std::path::PathBuf::from(worktree_abs_path).into(),
rel_path_after_regex_stripping: rel_path.clone(),
rel_path,
}
}
pub fn ends_with_posix_path(&self, path: &Path) -> bool {
if path.as_os_str().len() <= self.rel_path_after_regex_stripping.as_unix_str().len() {
path_ends_with(self.rel_path_after_regex_stripping.as_std_path(), path)
} else {
if let Some(remaining) =
strip_path_suffix(path, self.rel_path_after_regex_stripping.as_std_path())
{
path_ends_with(&self.worktree_abs_path, remaining)
} else {
false
}
}
}
pub fn equals_absolute_path(&self, path: &Path) -> bool {
if let Some(remaining) =
strip_path_suffix(path, &self.rel_path_after_regex_stripping.as_std_path())
{
self.worktree_abs_path.as_ref() == remaining
} else {
false
}
}
}

View file

@ -1,15 +1,15 @@
use cloud_llm_client::predict_edits_v3::DeclarationScoreComponents;
use collections::HashMap;
use itertools::Itertools as _;
use language::BufferSnapshot;
use ordered_float::OrderedFloat;
use serde::Serialize;
use std::{cmp::Reverse, ops::Range};
use std::{cmp::Reverse, ops::Range, path::Path, sync::Arc};
use strum::EnumIter;
use text::{Point, ToPoint};
use crate::{
Declaration, EditPredictionExcerpt, Identifier,
CachedDeclarationPath, Declaration, EditPredictionExcerpt, Identifier,
imports::{Import, Imports, Module},
reference::{Reference, ReferenceRegion},
syntax_index::SyntaxIndexState,
text_similarity::{Occurrences, jaccard_similarity, weighted_overlap_coefficient},
@ -17,12 +17,17 @@ use crate::{
const MAX_IDENTIFIER_DECLARATION_COUNT: usize = 16;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct EditPredictionScoreOptions {
pub omit_excerpt_overlaps: bool,
}
#[derive(Clone, Debug)]
pub struct ScoredDeclaration {
/// identifier used by the local reference
pub identifier: Identifier,
pub declaration: Declaration,
pub score_components: DeclarationScoreComponents,
pub scores: DeclarationScores,
pub components: DeclarationScoreComponents,
}
#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
@ -31,12 +36,55 @@ pub enum DeclarationStyle {
Declaration,
}
#[derive(Clone, Debug, Serialize, Default)]
pub struct DeclarationScores {
pub signature: f32,
pub declaration: f32,
pub retrieval: f32,
}
impl ScoredDeclaration {
/// Returns the score for this declaration with the specified style.
pub fn score(&self, style: DeclarationStyle) -> f32 {
// TODO: handle truncation
// Score related to how likely this is the correct declaration, range 0 to 1
let retrieval = self.retrieval_score();
// Score related to the distance between the reference and cursor, range 0 to 1
let distance_score = if self.components.is_referenced_nearby {
1.0 / (1.0 + self.components.reference_line_distance as f32 / 10.0).powf(2.0)
} else {
// same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
0.5
};
// For now instead of linear combination, the scores are just multiplied together.
let combined_score = 10.0 * retrieval * distance_score;
match style {
DeclarationStyle::Signature => self.scores.signature,
DeclarationStyle::Declaration => self.scores.declaration,
DeclarationStyle::Signature => {
combined_score * self.components.excerpt_vs_signature_weighted_overlap
}
DeclarationStyle::Declaration => {
2.0 * combined_score * self.components.excerpt_vs_item_weighted_overlap
}
}
}
pub fn retrieval_score(&self) -> f32 {
if self.components.is_same_file {
10.0 / self.components.same_file_declaration_count as f32
} else if self.components.path_import_match_count > 0 {
3.0
} else if self.components.wildcard_path_import_match_count > 0 {
1.0
} else if self.components.normalized_import_similarity > 0.0 {
self.components.normalized_import_similarity
} else if self.components.normalized_wildcard_import_similarity > 0.0 {
0.5 * self.components.normalized_wildcard_import_similarity
} else {
1.0 / self.components.declaration_count as f32
}
}
@ -54,100 +102,215 @@ impl ScoredDeclaration {
}
pub fn score_density(&self, style: DeclarationStyle) -> f32 {
self.score(style) / (self.size(style)) as f32
self.score(style) / self.size(style) as f32
}
}
pub fn scored_declarations(
options: &EditPredictionScoreOptions,
index: &SyntaxIndexState,
excerpt: &EditPredictionExcerpt,
excerpt_occurrences: &Occurrences,
adjacent_occurrences: &Occurrences,
imports: &Imports,
identifier_to_references: HashMap<Identifier, Vec<Reference>>,
cursor_offset: usize,
current_buffer: &BufferSnapshot,
) -> Vec<ScoredDeclaration> {
let cursor_point = cursor_offset.to_point(&current_buffer);
let mut wildcard_import_occurrences = Vec::new();
let mut wildcard_import_paths = Vec::new();
for wildcard_import in imports.wildcard_modules.iter() {
match wildcard_import {
Module::Namespace(namespace) => {
wildcard_import_occurrences.push(namespace.occurrences())
}
Module::SourceExact(path) => wildcard_import_paths.push(path),
Module::SourceFuzzy(path) => {
wildcard_import_occurrences.push(Occurrences::from_path(&path))
}
}
}
let mut declarations = identifier_to_references
.into_iter()
.flat_map(|(identifier, references)| {
let declarations =
index.declarations_for_identifier::<MAX_IDENTIFIER_DECLARATION_COUNT>(&identifier);
let mut import_occurrences = Vec::new();
let mut import_paths = Vec::new();
let mut found_external_identifier: Option<&Identifier> = None;
if let Some(imports) = imports.identifier_to_imports.get(&identifier) {
// only use alias when it's the only import, could be generalized if some language
// has overlapping aliases
//
// TODO: when an aliased declaration is included in the prompt, should include the
// aliasing in the prompt.
//
// TODO: For SourceFuzzy consider having componentwise comparison that pays
// attention to ordering.
if let [
Import::Alias {
module,
external_identifier,
},
] = imports.as_slice()
{
match module {
Module::Namespace(namespace) => {
import_occurrences.push(namespace.occurrences())
}
Module::SourceExact(path) => import_paths.push(path),
Module::SourceFuzzy(path) => {
import_occurrences.push(Occurrences::from_path(&path))
}
}
found_external_identifier = Some(&external_identifier);
} else {
for import in imports {
match import {
Import::Direct { module } => match module {
Module::Namespace(namespace) => {
import_occurrences.push(namespace.occurrences())
}
Module::SourceExact(path) => import_paths.push(path),
Module::SourceFuzzy(path) => {
import_occurrences.push(Occurrences::from_path(&path))
}
},
Import::Alias { .. } => {}
}
}
}
}
let identifier_to_lookup = found_external_identifier.unwrap_or(&identifier);
// TODO: update this to be able to return more declarations? Especially if there is the
// ability to quickly filter a large list (based on imports)
let declarations = index
.declarations_for_identifier::<MAX_IDENTIFIER_DECLARATION_COUNT>(
&identifier_to_lookup,
);
let declaration_count = declarations.len();
declarations
.into_iter()
.filter_map(|(declaration_id, declaration)| match declaration {
if declaration_count == 0 {
return Vec::new();
}
// TODO: option to filter out other candidates when same file / import match
let mut checked_declarations = Vec::new();
for (declaration_id, declaration) in declarations {
match declaration {
Declaration::Buffer {
buffer_id,
declaration: buffer_declaration,
..
} => {
let is_same_file = buffer_id == &current_buffer.remote_id();
if is_same_file {
let overlaps_excerpt =
if buffer_id == &current_buffer.remote_id() {
let already_included_in_prompt =
range_intersection(&buffer_declaration.item_range, &excerpt.range)
.is_some();
if overlaps_excerpt
|| excerpt
.parent_declarations
.iter()
.any(|(excerpt_parent, _)| excerpt_parent == &declaration_id)
{
None
} else {
.is_some()
|| excerpt.parent_declarations.iter().any(
|(excerpt_parent, _)| excerpt_parent == &declaration_id,
);
if !options.omit_excerpt_overlaps || !already_included_in_prompt {
let declaration_line = buffer_declaration
.item_range
.start
.to_point(current_buffer)
.row;
Some((
true,
(cursor_point.row as i32 - declaration_line as i32)
.unsigned_abs(),
let declaration_line_distance = (cursor_point.row as i32
- declaration_line as i32)
.unsigned_abs();
checked_declarations.push(CheckedDeclaration {
declaration,
))
same_file_line_distance: Some(declaration_line_distance),
path_import_match_count: 0,
wildcard_path_import_match_count: 0,
});
}
continue;
} else {
Some((false, u32::MAX, declaration))
}
}
Declaration::File { .. } => {
// We can assume that a file declaration is in a different file,
// because the current one must be open
Some((false, u32::MAX, declaration))
}
})
.sorted_by_key(|&(_, distance, _)| distance)
.enumerate()
.map(
|(
declaration_line_distance_rank,
(is_same_file, declaration_line_distance, declaration),
)| {
let same_file_declaration_count = index.file_declaration_count(declaration);
Declaration::File { .. } => {}
}
let declaration_path = declaration.cached_path();
let path_import_match_count = import_paths
.iter()
.filter(|import_path| {
declaration_path_matches_import(&declaration_path, import_path)
})
.count();
let wildcard_path_import_match_count = wildcard_import_paths
.iter()
.filter(|import_path| {
declaration_path_matches_import(&declaration_path, import_path)
})
.count();
checked_declarations.push(CheckedDeclaration {
declaration,
same_file_line_distance: None,
path_import_match_count,
wildcard_path_import_match_count,
});
}
score_declaration(
&identifier,
&references,
declaration.clone(),
is_same_file,
declaration_line_distance,
declaration_line_distance_rank,
same_file_declaration_count,
declaration_count,
&excerpt_occurrences,
&adjacent_occurrences,
cursor_point,
current_buffer,
)
},
)
.collect::<Vec<_>>()
let mut max_import_similarity = 0.0;
let mut max_wildcard_import_similarity = 0.0;
let mut scored_declarations_for_identifier = checked_declarations
.into_iter()
.map(|checked_declaration| {
let same_file_declaration_count =
index.file_declaration_count(checked_declaration.declaration);
let declaration = score_declaration(
&identifier,
&references,
checked_declaration,
same_file_declaration_count,
declaration_count,
&excerpt_occurrences,
&adjacent_occurrences,
&import_occurrences,
&wildcard_import_occurrences,
cursor_point,
current_buffer,
);
if declaration.components.import_similarity > max_import_similarity {
max_import_similarity = declaration.components.import_similarity;
}
if declaration.components.wildcard_import_similarity
> max_wildcard_import_similarity
{
max_wildcard_import_similarity =
declaration.components.wildcard_import_similarity;
}
declaration
})
.collect::<Vec<_>>();
if max_import_similarity > 0.0 || max_wildcard_import_similarity > 0.0 {
for declaration in scored_declarations_for_identifier.iter_mut() {
if max_import_similarity > 0.0 {
declaration.components.max_import_similarity = max_import_similarity;
declaration.components.normalized_import_similarity =
declaration.components.import_similarity / max_import_similarity;
}
if max_wildcard_import_similarity > 0.0 {
declaration.components.normalized_wildcard_import_similarity =
declaration.components.wildcard_import_similarity
/ max_wildcard_import_similarity;
}
}
}
scored_declarations_for_identifier
})
.flatten()
.collect::<Vec<_>>();
declarations.sort_unstable_by_key(|declaration| {
@ -160,6 +323,24 @@ pub fn scored_declarations(
declarations
}
struct CheckedDeclaration<'a> {
declaration: &'a Declaration,
same_file_line_distance: Option<u32>,
path_import_match_count: usize,
wildcard_path_import_match_count: usize,
}
fn declaration_path_matches_import(
declaration_path: &CachedDeclarationPath,
import_path: &Arc<Path>,
) -> bool {
if import_path.is_absolute() {
declaration_path.equals_absolute_path(import_path)
} else {
declaration_path.ends_with_posix_path(import_path)
}
}
fn range_intersection<T: Ord + Clone>(a: &Range<T>, b: &Range<T>) -> Option<Range<T>> {
let start = a.start.clone().max(b.start.clone());
let end = a.end.clone().min(b.end.clone());
@ -173,17 +354,23 @@ fn range_intersection<T: Ord + Clone>(a: &Range<T>, b: &Range<T>) -> Option<Rang
fn score_declaration(
identifier: &Identifier,
references: &[Reference],
declaration: Declaration,
is_same_file: bool,
declaration_line_distance: u32,
declaration_line_distance_rank: usize,
checked_declaration: CheckedDeclaration,
same_file_declaration_count: usize,
declaration_count: usize,
excerpt_occurrences: &Occurrences,
adjacent_occurrences: &Occurrences,
import_occurrences: &[Occurrences],
wildcard_import_occurrences: &[Occurrences],
cursor: Point,
current_buffer: &BufferSnapshot,
) -> Option<ScoredDeclaration> {
) -> ScoredDeclaration {
let CheckedDeclaration {
declaration,
same_file_line_distance,
path_import_match_count,
wildcard_path_import_match_count,
} = checked_declaration;
let is_referenced_nearby = references
.iter()
.any(|r| r.region == ReferenceRegion::Nearby);
@ -200,6 +387,9 @@ fn score_declaration(
.min()
.unwrap();
let is_same_file = same_file_line_distance.is_some();
let declaration_line_distance = same_file_line_distance.unwrap_or(u32::MAX);
let item_source_occurrences = Occurrences::within_string(&declaration.item_text().0);
let item_signature_occurrences = Occurrences::within_string(&declaration.signature_text().0);
let excerpt_vs_item_jaccard = jaccard_similarity(excerpt_occurrences, &item_source_occurrences);
@ -219,6 +409,37 @@ fn score_declaration(
let adjacent_vs_signature_weighted_overlap =
weighted_overlap_coefficient(adjacent_occurrences, &item_signature_occurrences);
let mut import_similarity = 0f32;
let mut wildcard_import_similarity = 0f32;
if !import_occurrences.is_empty() || !wildcard_import_occurrences.is_empty() {
let cached_path = declaration.cached_path();
let path_occurrences = Occurrences::from_worktree_path(
cached_path
.worktree_abs_path
.file_name()
.map(|f| f.to_string_lossy()),
&cached_path.rel_path,
);
import_similarity = import_occurrences
.iter()
.map(|namespace_occurrences| {
OrderedFloat(jaccard_similarity(namespace_occurrences, &path_occurrences))
})
.max()
.map(|similarity| similarity.into_inner())
.unwrap_or_default();
// TODO: Consider something other than max
wildcard_import_similarity = wildcard_import_occurrences
.iter()
.map(|namespace_occurrences| {
OrderedFloat(jaccard_similarity(namespace_occurrences, &path_occurrences))
})
.max()
.map(|similarity| similarity.into_inner())
.unwrap_or_default();
}
// TODO: Consider adding declaration_file_count
let score_components = DeclarationScoreComponents {
is_same_file,
@ -226,7 +447,6 @@ fn score_declaration(
is_referenced_in_breadcrumb,
reference_line_distance,
declaration_line_distance,
declaration_line_distance_rank,
reference_count,
same_file_declaration_count,
declaration_count,
@ -238,52 +458,59 @@ fn score_declaration(
excerpt_vs_signature_weighted_overlap,
adjacent_vs_item_weighted_overlap,
adjacent_vs_signature_weighted_overlap,
path_import_match_count,
wildcard_path_import_match_count,
import_similarity,
max_import_similarity: 0.0,
normalized_import_similarity: 0.0,
wildcard_import_similarity,
normalized_wildcard_import_similarity: 0.0,
};
Some(ScoredDeclaration {
ScoredDeclaration {
identifier: identifier.clone(),
declaration: declaration,
scores: DeclarationScores::score(&score_components),
score_components,
})
}
#[derive(Clone, Debug, Serialize)]
pub struct DeclarationScores {
pub signature: f32,
pub declaration: f32,
pub retrieval: f32,
}
impl DeclarationScores {
fn score(components: &DeclarationScoreComponents) -> DeclarationScores {
// TODO: handle truncation
// Score related to how likely this is the correct declaration, range 0 to 1
let retrieval = if components.is_same_file {
// TODO: use declaration_line_distance_rank
1.0 / components.same_file_declaration_count as f32
} else {
1.0 / components.declaration_count as f32
};
// Score related to the distance between the reference and cursor, range 0 to 1
let distance_score = if components.is_referenced_nearby {
1.0 / (1.0 + components.reference_line_distance as f32 / 10.0).powf(2.0)
} else {
// same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
0.5
};
// For now instead of linear combination, the scores are just multiplied together.
let combined_score = 10.0 * retrieval * distance_score;
DeclarationScores {
signature: combined_score * components.excerpt_vs_signature_weighted_overlap,
// declaration score gets boosted both by being multiplied by 2 and by there being more
// weighted overlap.
declaration: 2.0 * combined_score * components.excerpt_vs_item_weighted_overlap,
retrieval,
}
declaration: declaration.clone(),
components: score_components,
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_declaration_path_matches() {
let declaration_path =
CachedDeclarationPath::new_for_test("/home/user/project", "src/maths.ts");
assert!(declaration_path_matches_import(
&declaration_path,
&Path::new("maths.ts").into()
));
assert!(declaration_path_matches_import(
&declaration_path,
&Path::new("project/src/maths.ts").into()
));
assert!(declaration_path_matches_import(
&declaration_path,
&Path::new("user/project/src/maths.ts").into()
));
assert!(declaration_path_matches_import(
&declaration_path,
&Path::new("/home/user/project/src/maths.ts").into()
));
assert!(!declaration_path_matches_import(
&declaration_path,
&Path::new("other.ts").into()
));
assert!(!declaration_path_matches_import(
&declaration_path,
&Path::new("/home/user/project/src/other.ts").into()
));
}
}

View file

@ -1,12 +1,13 @@
mod declaration;
mod declaration_scoring;
mod excerpt;
mod imports;
mod outline;
mod reference;
mod syntax_index;
pub mod text_similarity;
use std::sync::Arc;
use std::{path::Path, sync::Arc};
use collections::HashMap;
use gpui::{App, AppContext as _, Entity, Task};
@ -16,9 +17,17 @@ use text::{Point, ToOffset as _};
pub use declaration::*;
pub use declaration_scoring::*;
pub use excerpt::*;
pub use imports::*;
pub use reference::*;
pub use syntax_index::*;
#[derive(Clone, Debug, PartialEq)]
pub struct EditPredictionContextOptions {
pub use_imports: bool,
pub excerpt: EditPredictionExcerptOptions,
pub score: EditPredictionScoreOptions,
}
#[derive(Clone, Debug)]
pub struct EditPredictionContext {
pub excerpt: EditPredictionExcerpt,
@ -31,21 +40,34 @@ impl EditPredictionContext {
pub fn gather_context_in_background(
cursor_point: Point,
buffer: BufferSnapshot,
excerpt_options: EditPredictionExcerptOptions,
options: EditPredictionContextOptions,
syntax_index: Option<Entity<SyntaxIndex>>,
cx: &mut App,
) -> Task<Option<Self>> {
let parent_abs_path = project::File::from_dyn(buffer.file()).and_then(|f| {
let mut path = f.worktree.read(cx).absolutize(&f.path);
if path.pop() { Some(path) } else { None }
});
if let Some(syntax_index) = syntax_index {
let index_state =
syntax_index.read_with(cx, |index, _cx| Arc::downgrade(index.state()));
cx.background_spawn(async move {
let parent_abs_path = parent_abs_path.as_deref();
let index_state = index_state.upgrade()?;
let index_state = index_state.lock().await;
Self::gather_context(cursor_point, &buffer, &excerpt_options, Some(&index_state))
Self::gather_context(
cursor_point,
&buffer,
parent_abs_path,
&options,
Some(&index_state),
)
})
} else {
cx.background_spawn(async move {
Self::gather_context(cursor_point, &buffer, &excerpt_options, None)
let parent_abs_path = parent_abs_path.as_deref();
Self::gather_context(cursor_point, &buffer, parent_abs_path, &options, None)
})
}
}
@ -53,13 +75,20 @@ impl EditPredictionContext {
pub fn gather_context(
cursor_point: Point,
buffer: &BufferSnapshot,
excerpt_options: &EditPredictionExcerptOptions,
parent_abs_path: Option<&Path>,
options: &EditPredictionContextOptions,
index_state: Option<&SyntaxIndexState>,
) -> Option<Self> {
let imports = if options.use_imports {
Imports::gather(&buffer, parent_abs_path)
} else {
Imports::default()
};
Self::gather_context_with_references_fn(
cursor_point,
buffer,
excerpt_options,
&imports,
options,
index_state,
references_in_excerpt,
)
@ -68,7 +97,8 @@ impl EditPredictionContext {
pub fn gather_context_with_references_fn(
cursor_point: Point,
buffer: &BufferSnapshot,
excerpt_options: &EditPredictionExcerptOptions,
imports: &Imports,
options: &EditPredictionContextOptions,
index_state: Option<&SyntaxIndexState>,
get_references: impl FnOnce(
&EditPredictionExcerpt,
@ -79,7 +109,7 @@ impl EditPredictionContext {
let excerpt = EditPredictionExcerpt::select_from_buffer(
cursor_point,
buffer,
excerpt_options,
&options.excerpt,
index_state,
)?;
let excerpt_text = excerpt.text(buffer);
@ -101,10 +131,12 @@ impl EditPredictionContext {
let references = get_references(&excerpt, &excerpt_text, buffer);
scored_declarations(
&options.score,
&index_state,
&excerpt,
&excerpt_occurrences,
&adjacent_occurrences,
&imports,
references,
cursor_offset_in_file,
buffer,
@ -160,12 +192,18 @@ mod tests {
EditPredictionContext::gather_context_in_background(
cursor_point,
buffer_snapshot,
EditPredictionExcerptOptions {
max_bytes: 60,
min_bytes: 10,
target_before_cursor_over_total_bytes: 0.5,
EditPredictionContextOptions {
use_imports: true,
excerpt: EditPredictionExcerptOptions {
max_bytes: 60,
min_bytes: 10,
target_before_cursor_over_total_bytes: 0.5,
},
score: EditPredictionScoreOptions {
omit_excerpt_overlaps: true,
},
},
Some(index),
Some(index.clone()),
cx,
)
})

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ use futures::lock::Mutex;
use futures::{FutureExt as _, StreamExt, future};
use gpui::{App, AppContext as _, AsyncApp, Context, Entity, Task, WeakEntity};
use itertools::Itertools;
use language::{Buffer, BufferEvent};
use postage::stream::Stream as _;
use project::buffer_store::{BufferStore, BufferStoreEvent};
@ -17,6 +18,7 @@ use std::sync::Arc;
use text::BufferId;
use util::{RangeExt as _, debug_panic, some_or_debug_panic};
use crate::CachedDeclarationPath;
use crate::declaration::{
BufferDeclaration, Declaration, DeclarationId, FileDeclaration, Identifier,
};
@ -28,6 +30,8 @@ use crate::outline::declarations_in_buffer;
// `buffer_declarations_containing_range` assumes that the index is always immediately up to date.
//
// * Add a per language configuration for skipping indexing.
//
// * Handle tsx / ts / js referencing each-other
// Potential future improvements:
//
@ -61,6 +65,7 @@ pub struct SyntaxIndex {
state: Arc<Mutex<SyntaxIndexState>>,
project: WeakEntity<Project>,
initial_file_indexing_done_rx: postage::watch::Receiver<bool>,
_file_indexing_task: Option<Task<()>>,
}
pub struct SyntaxIndexState {
@ -70,7 +75,6 @@ pub struct SyntaxIndexState {
buffers: HashMap<BufferId, BufferState>,
dirty_files: HashMap<ProjectEntryId, ProjectPath>,
dirty_files_tx: mpsc::Sender<()>,
_file_indexing_task: Option<Task<()>>,
}
#[derive(Debug, Default)]
@ -102,12 +106,12 @@ impl SyntaxIndex {
buffers: HashMap::default(),
dirty_files: HashMap::default(),
dirty_files_tx,
_file_indexing_task: None,
};
let this = Self {
let mut this = Self {
project: project.downgrade(),
state: Arc::new(Mutex::new(initial_state)),
initial_file_indexing_done_rx,
_file_indexing_task: None,
};
let worktree_store = project.read(cx).worktree_store();
@ -116,75 +120,77 @@ impl SyntaxIndex {
.worktrees()
.map(|w| w.read(cx).snapshot())
.collect::<Vec<_>>();
if !initial_worktree_snapshots.is_empty() {
this.state.try_lock().unwrap()._file_indexing_task =
Some(cx.spawn(async move |this, cx| {
let snapshots_file_count = initial_worktree_snapshots
.iter()
.map(|worktree| worktree.file_count())
.sum::<usize>();
let chunk_size = snapshots_file_count.div_ceil(file_indexing_parallelism);
let chunk_count = snapshots_file_count.div_ceil(chunk_size);
let file_chunks = initial_worktree_snapshots
.iter()
.flat_map(|worktree| {
let worktree_id = worktree.id();
worktree.files(false, 0).map(move |entry| {
(
entry.id,
ProjectPath {
worktree_id,
path: entry.path.clone(),
},
)
})
this._file_indexing_task = Some(cx.spawn(async move |this, cx| {
let snapshots_file_count = initial_worktree_snapshots
.iter()
.map(|worktree| worktree.file_count())
.sum::<usize>();
if snapshots_file_count > 0 {
let chunk_size = snapshots_file_count.div_ceil(file_indexing_parallelism);
let chunk_count = snapshots_file_count.div_ceil(chunk_size);
let file_chunks = initial_worktree_snapshots
.iter()
.flat_map(|worktree| {
let worktree_id = worktree.id();
worktree.files(false, 0).map(move |entry| {
(
entry.id,
ProjectPath {
worktree_id,
path: entry.path.clone(),
},
)
})
.chunks(chunk_size);
})
.chunks(chunk_size);
let mut tasks = Vec::with_capacity(chunk_count);
for chunk in file_chunks.into_iter() {
tasks.push(Self::update_dirty_files(
&this,
chunk.into_iter().collect(),
cx.clone(),
));
}
futures::future::join_all(tasks).await;
let mut tasks = Vec::with_capacity(chunk_count);
for chunk in file_chunks.into_iter() {
tasks.push(Self::update_dirty_files(
&this,
chunk.into_iter().collect(),
cx.clone(),
));
}
futures::future::join_all(tasks).await;
log::info!("Finished initial file indexing");
}
log::info!("Finished initial file indexing");
*initial_file_indexing_done_tx.borrow_mut() = true;
*initial_file_indexing_done_tx.borrow_mut() = true;
let Ok(state) = this.read_with(cx, |this, _cx| this.state.clone()) else {
return;
};
while dirty_files_rx.next().await.is_some() {
let mut state = state.lock().await;
let was_underused = state.dirty_files.capacity() > 255
&& state.dirty_files.len() * 8 < state.dirty_files.capacity();
let dirty_files = state.dirty_files.drain().collect::<Vec<_>>();
if was_underused {
state.dirty_files.shrink_to_fit();
}
drop(state);
if dirty_files.is_empty() {
continue;
}
let Ok(state) = this.read_with(cx, |this, _cx| Arc::downgrade(&this.state)) else {
return;
};
while dirty_files_rx.next().await.is_some() {
let Some(state) = state.upgrade() else {
return;
};
let mut state = state.lock().await;
let was_underused = state.dirty_files.capacity() > 255
&& state.dirty_files.len() * 8 < state.dirty_files.capacity();
let dirty_files = state.dirty_files.drain().collect::<Vec<_>>();
if was_underused {
state.dirty_files.shrink_to_fit();
}
drop(state);
if dirty_files.is_empty() {
continue;
}
let chunk_size = dirty_files.len().div_ceil(file_indexing_parallelism);
let chunk_count = dirty_files.len().div_ceil(chunk_size);
let mut tasks = Vec::with_capacity(chunk_count);
let chunks = dirty_files.into_iter().chunks(chunk_size);
for chunk in chunks.into_iter() {
tasks.push(Self::update_dirty_files(
&this,
chunk.into_iter().collect(),
cx.clone(),
));
}
futures::future::join_all(tasks).await;
}
}));
}
let chunk_size = dirty_files.len().div_ceil(file_indexing_parallelism);
let chunk_count = dirty_files.len().div_ceil(chunk_size);
let mut tasks = Vec::with_capacity(chunk_count);
let chunks = dirty_files.into_iter().chunks(chunk_size);
for chunk in chunks.into_iter() {
tasks.push(Self::update_dirty_files(
&this,
chunk.into_iter().collect(),
cx.clone(),
));
}
futures::future::join_all(tasks).await;
}
}));
cx.subscribe(&worktree_store, Self::handle_worktree_store_event)
.detach();
@ -364,7 +370,9 @@ impl SyntaxIndex {
cx: &mut Context<Self>,
) {
match event {
BufferEvent::Edited => self.update_buffer(buffer, cx),
BufferEvent::Edited |
// paths are cached and so should be updated
BufferEvent::FileHandleChanged => self.update_buffer(buffer, cx),
_ => {}
}
}
@ -375,8 +383,16 @@ impl SyntaxIndex {
return;
}
let Some(project_entry_id) =
project::File::from_dyn(buffer.file()).and_then(|f| f.project_entry_id(cx))
let Some((project_entry_id, cached_path)) = project::File::from_dyn(buffer.file())
.and_then(|f| {
let project_entry_id = f.project_entry_id()?;
let cached_path = CachedDeclarationPath::new(
f.worktree.read(cx).abs_path(),
&f.path,
buffer.language(),
);
Some((project_entry_id, cached_path))
})
else {
return;
};
@ -440,6 +456,7 @@ impl SyntaxIndex {
buffer_id,
declaration,
project_entry_id,
cached_path: cached_path.clone(),
});
new_ids.push(declaration_id);
@ -507,13 +524,14 @@ impl SyntaxIndex {
let snapshot_task = worktree.update(cx, |worktree, cx| {
let load_task = worktree.load_file(&project_path.path, cx);
let worktree_abs_path = worktree.abs_path();
cx.spawn(async move |_this, cx| {
let loaded_file = load_task.await?;
let language = language.await?;
let buffer = cx.new(|cx| {
let mut buffer = Buffer::local(loaded_file.text, cx);
buffer.set_language(Some(language), cx);
buffer.set_language(Some(language.clone()), cx);
buffer
})?;
@ -522,14 +540,22 @@ impl SyntaxIndex {
parse_status.changed().await?;
}
buffer.read_with(cx, |buffer, _cx| buffer.snapshot())
let cached_path = CachedDeclarationPath::new(
worktree_abs_path,
&project_path.path,
Some(&language),
);
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
anyhow::Ok((snapshot, cached_path))
})
});
let state = Arc::downgrade(&self.state);
cx.background_spawn(async move {
// TODO: How to handle errors?
let Ok(snapshot) = snapshot_task.await else {
let Ok((snapshot, cached_path)) = snapshot_task.await else {
return;
};
let rope = snapshot.as_rope();
@ -567,6 +593,7 @@ impl SyntaxIndex {
let declaration_id = state.declarations.insert(Declaration::File {
project_entry_id: entry_id,
declaration,
cached_path: cached_path.clone(),
});
new_ids.push(declaration_id);
@ -921,6 +948,7 @@ mod tests {
if let Declaration::File {
declaration,
project_entry_id: file,
..
} = declaration
{
assert_eq!(

View file

@ -1,9 +1,12 @@
use hashbrown::HashTable;
use regex::Regex;
use std::{
borrow::Cow,
hash::{Hash, Hasher as _},
path::Path,
sync::LazyLock,
};
use util::rel_path::RelPath;
use crate::reference::Reference;
@ -45,19 +48,34 @@ impl Occurrences {
)
}
pub fn from_identifiers<'a>(identifiers: impl IntoIterator<Item = &'a str>) -> Self {
pub fn from_identifiers(identifiers: impl IntoIterator<Item = impl AsRef<str>>) -> Self {
let mut this = Self::default();
// TODO: Score matches that match case higher?
//
// TODO: Also include unsplit identifier?
for identifier in identifiers {
for identifier_part in split_identifier(identifier) {
for identifier_part in split_identifier(identifier.as_ref()) {
this.add_hash(fx_hash(&identifier_part.to_lowercase()));
}
}
this
}
pub fn from_worktree_path(worktree_name: Option<Cow<'_, str>>, rel_path: &RelPath) -> Self {
if let Some(worktree_name) = worktree_name {
Self::from_identifiers(
std::iter::once(worktree_name)
.chain(iter_path_without_extension(rel_path.as_std_path())),
)
} else {
Self::from_path(rel_path.as_std_path())
}
}
pub fn from_path(path: &Path) -> Self {
Self::from_identifiers(iter_path_without_extension(path))
}
fn add_hash(&mut self, hash: u64) {
self.table
.entry(
@ -82,6 +100,15 @@ impl Occurrences {
}
}
fn iter_path_without_extension(path: &Path) -> impl Iterator<Item = Cow<'_, str>> {
let last_component: Option<Cow<'_, str>> = path.file_stem().map(|stem| stem.to_string_lossy());
let mut path_components = path.components();
path_components.next_back();
path_components
.map(|component| component.as_os_str().to_string_lossy())
.chain(last_component)
}
pub fn fx_hash<T: Hash + ?Sized>(data: &T) -> u64 {
let mut hasher = collections::FxHasher::default();
data.hash(&mut hasher);
@ -269,4 +296,19 @@ mod test {
// the smaller set, 10.
assert_eq!(weighted_overlap_coefficient(&set_a, &set_b), 7.0 / 10.0);
}
#[test]
fn test_iter_path_without_extension() {
let mut iter = iter_path_without_extension(Path::new(""));
assert_eq!(iter.next(), None);
let iter = iter_path_without_extension(Path::new("foo"));
assert_eq!(iter.collect::<Vec<_>>(), ["foo"]);
let iter = iter_path_without_extension(Path::new("foo/bar.txt"));
assert_eq!(iter.collect::<Vec<_>>(), ["foo", "bar"]);
let iter = iter_path_without_extension(Path::new("foo/bar/baz.txt"));
assert_eq!(iter.collect::<Vec<_>>(), ["foo", "bar", "baz"]);
}
}

View file

@ -5343,7 +5343,7 @@ impl Editor {
let buffer_worktree = project.worktree_for_id(buffer_file.worktree_id(cx), cx)?;
let worktree_entry = buffer_worktree
.read(cx)
.entry_for_id(buffer_file.project_entry_id(cx)?)?;
.entry_for_id(buffer_file.project_entry_id()?)?;
if worktree_entry.is_ignored {
return None;
}

View file

@ -777,6 +777,15 @@ pub struct LanguageConfig {
/// A list of preferred debuggers for this language.
#[serde(default)]
pub debuggers: IndexSet<SharedString>,
/// A list of import namespace segments that aren't expected to appear in file paths. For
/// example, "super" and "crate" in Rust.
#[serde(default)]
pub ignored_import_segments: HashSet<Arc<str>>,
/// Regular expression that matches substrings to omit from import paths, to make the paths more
/// similar to how they are specified when imported. For example, "/mod\.rs$" or "/__init__\.py$".
#[serde(default, deserialize_with = "deserialize_regex")]
#[schemars(schema_with = "regex_json_schema")]
pub import_path_strip_regex: Option<Regex>,
}
#[derive(Clone, Debug, Deserialize, Default, JsonSchema)]
@ -973,6 +982,8 @@ impl Default for LanguageConfig {
completion_query_characters: Default::default(),
linked_edit_characters: Default::default(),
debuggers: Default::default(),
ignored_import_segments: Default::default(),
import_path_strip_regex: None,
}
}
}
@ -1162,6 +1173,7 @@ pub struct Grammar {
pub(crate) injection_config: Option<InjectionConfig>,
pub(crate) override_config: Option<OverrideConfig>,
pub(crate) debug_variables_config: Option<DebugVariablesConfig>,
pub(crate) imports_config: Option<ImportsConfig>,
pub(crate) highlight_map: Mutex<HighlightMap>,
}
@ -1314,6 +1326,17 @@ pub struct DebugVariablesConfig {
pub objects_by_capture_ix: Vec<(u32, DebuggerTextObject)>,
}
pub struct ImportsConfig {
pub query: Query,
pub import_ix: u32,
pub name_ix: Option<u32>,
pub namespace_ix: Option<u32>,
pub source_ix: Option<u32>,
pub list_ix: Option<u32>,
pub wildcard_ix: Option<u32>,
pub alias_ix: Option<u32>,
}
impl Language {
pub fn new(config: LanguageConfig, ts_language: Option<tree_sitter::Language>) -> Self {
Self::new_with_id(LanguageId::new(), config, ts_language)
@ -1346,6 +1369,7 @@ impl Language {
runnable_config: None,
error_query: Query::new(&ts_language, "(ERROR) @error").ok(),
debug_variables_config: None,
imports_config: None,
ts_language,
highlight_map: Default::default(),
})
@ -1427,6 +1451,11 @@ impl Language {
.with_debug_variables_query(query.as_ref())
.context("Error loading debug variables query")?;
}
if let Some(query) = queries.imports {
self = self
.with_imports_query(query.as_ref())
.context("Error loading imports query")?;
}
Ok(self)
}
@ -1595,6 +1624,45 @@ impl Language {
Ok(self)
}
pub fn with_imports_query(mut self, source: &str) -> Result<Self> {
let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
let mut import_ix = 0;
let mut name_ix = None;
let mut namespace_ix = None;
let mut source_ix = None;
let mut list_ix = None;
let mut wildcard_ix = None;
let mut alias_ix = None;
if populate_capture_indices(
&query,
&self.config.name,
"imports",
&[],
&mut [
Capture::Required("import", &mut import_ix),
Capture::Optional("name", &mut name_ix),
Capture::Optional("namespace", &mut namespace_ix),
Capture::Optional("source", &mut source_ix),
Capture::Optional("list", &mut list_ix),
Capture::Optional("wildcard", &mut wildcard_ix),
Capture::Optional("alias", &mut alias_ix),
],
) {
self.grammar_mut()?.imports_config = Some(ImportsConfig {
query,
import_ix,
name_ix,
namespace_ix,
source_ix,
list_ix,
wildcard_ix,
alias_ix,
});
}
return Ok(self);
}
pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
let mut open_capture_ix = 0;
@ -2149,6 +2217,10 @@ impl Grammar {
pub fn debug_variables_config(&self) -> Option<&DebugVariablesConfig> {
self.debug_variables_config.as_ref()
}
pub fn imports_config(&self) -> Option<&ImportsConfig> {
self.imports_config.as_ref()
}
}
impl CodeLabel {

View file

@ -229,6 +229,7 @@ pub const QUERY_FILENAME_PREFIXES: &[(
("runnables", |q| &mut q.runnables),
("debugger", |q| &mut q.debugger),
("textobjects", |q| &mut q.text_objects),
("imports", |q| &mut q.imports),
];
/// Tree-sitter language queries for a given language.
@ -245,6 +246,7 @@ pub struct LanguageQueries {
pub runnables: Option<Cow<'static, str>>,
pub text_objects: Option<Cow<'static, str>>,
pub debugger: Option<Cow<'static, str>>,
pub imports: Option<Cow<'static, str>>,
}
#[derive(Clone, Default)]

View file

@ -17,3 +17,4 @@ brackets = [
]
debuggers = ["CodeLLDB", "GDB"]
documentation_comment = { start = "/*", prefix = "* ", end = "*/", tab_size = 1 }
import_path_strip_regex = "^<|>$"

View file

@ -0,0 +1,7 @@
(preproc_include
path: [
(
(system_lib_string) @source @wildcard
(#strip! @source "[<>]"))
(string_literal (string_content) @source @wildcard)
]) @import

View file

@ -17,3 +17,4 @@ brackets = [
]
debuggers = ["CodeLLDB", "GDB"]
documentation_comment = { start = "/*", prefix = "* ", end = "*/", tab_size = 1 }
import_path_strip_regex = "^<|>$"

View file

@ -0,0 +1,5 @@
(preproc_include
path: [
((system_lib_string) @source @wildcard)
(string_literal (string_content) @source @wildcard)
]) @import

View file

@ -0,0 +1,14 @@
(import_spec
name: [
(dot)
(package_identifier)
]
path: (interpreted_string_literal
(interpreted_string_literal_content) @namespace)
) @wildcard @import
(import_spec
!name
path: (interpreted_string_literal
(interpreted_string_literal_content) @namespace)
) @wildcard @import

View file

@ -23,6 +23,7 @@ tab_size = 2
scope_opt_in_language_servers = ["tailwindcss-language-server", "emmet-language-server"]
prettier_parser_name = "babel"
debuggers = ["JavaScript"]
import_path_strip_regex = "(?:/index)?\\.[jt]s$"
[jsx_tag_auto_close]
open_tag_node_name = "jsx_opening_element"

View file

@ -0,0 +1,14 @@
(import_statement
import_clause: (import_clause
[
(identifier) @name
(named_imports
(import_specifier
name: (_) @name
alias: (_)? @alias))
])
source: (string (string_fragment) @source)) @import
(import_statement
!import_clause
source: (string (string_fragment) @source @wildcard)) @import

View file

@ -35,3 +35,4 @@ decrease_indent_patterns = [
{ pattern = "^\\s*except\\b.*:\\s*(#.*)?", valid_after = ["try", "except"] },
{ pattern = "^\\s*finally\\b.*:\\s*(#.*)?", valid_after = ["try", "except", "else"] },
]
import_path_strip_regex = "/__init__\\.py$"

View file

@ -0,0 +1,32 @@
(import_statement
name: [
(dotted_name
((identifier) @namespace ".")*
(identifier) @namespace .)
(aliased_import
name: (dotted_name
((identifier) @namespace ".")*
(identifier) @namespace .))
]) @wildcard @import
(import_from_statement
module_name: [
(dotted_name
((identifier) @namespace ".")*
(identifier) @namespace .)
(relative_import
(dotted_name
((identifier) @namespace ".")*
(identifier) @namespace .)?)
]
(wildcard_import)? @wildcard
name: [
(dotted_name
((identifier) @namespace ".")*
(identifier) @name .)
(aliased_import
name: (dotted_name
((identifier) @namespace ".")*
(identifier) @name .)
alias: (identifier) @alias)
]?) @import

View file

@ -17,3 +17,5 @@ brackets = [
collapsed_placeholder = " /* ... */ "
debuggers = ["CodeLLDB", "GDB"]
documentation_comment = { start = "/*", prefix = "* ", end = "*/", tab_size = 1 }
ignored_import_segments = ["crate", "super"]
import_path_strip_regex = "/(lib|mod)\\.rs$"

View file

@ -0,0 +1,27 @@
(use_declaration) @import
(scoped_use_list
path: (_) @namespace
list: (_) @list)
(scoped_identifier
path: (_) @namespace
name: (identifier) @name)
(use_list (identifier) @name)
(use_declaration (identifier) @name)
(use_as_clause
path: (scoped_identifier
path: (_) @namespace
name: (_) @name)
alias: (_) @alias)
(use_as_clause
path: (identifier) @name
alias: (_) @alias)
(use_wildcard
(_)? @namespace
"*" @wildcard)

View file

@ -0,0 +1,14 @@
(import_statement
import_clause: (import_clause
[
(identifier) @name
(named_imports
(import_specifier
name: (_) @name
alias: (_)? @alias))
])
source: (string (string_fragment) @source)) @import
(import_statement
!import_clause
source: (string (string_fragment) @source @wildcard)) @import

View file

@ -22,6 +22,7 @@ prettier_parser_name = "typescript"
tab_size = 2
debuggers = ["JavaScript"]
scope_opt_in_language_servers = ["tailwindcss-language-server"]
import_path_strip_regex = "(?:/index)?\\.[jt]s$"
[overrides.string]
completion_query_characters = ["-", "."]

View file

@ -0,0 +1,20 @@
(import_statement
import_clause: (import_clause
[
(identifier) @name
(named_imports
(import_specifier
name: (_) @name
alias: (_)? @alias))
(namespace_import) @wildcard
])
source: (string (string_fragment) @source)) @import
(import_statement
!source
import_clause: (import_require_clause
source: (string (string_fragment) @source))) @wildcard @import
(import_statement
!import_clause
source: (string (string_fragment) @source)) @wildcard @import

View file

@ -2668,7 +2668,7 @@ impl OutlinePanel {
|mut buffer_excerpts, (excerpt_id, buffer_snapshot, excerpt_range)| {
let buffer_id = buffer_snapshot.remote_id();
let file = File::from_dyn(buffer_snapshot.file());
let entry_id = file.and_then(|file| file.project_entry_id(cx));
let entry_id = file.and_then(|file| file.project_entry_id());
let worktree = file.map(|file| file.worktree.read(cx).snapshot());
let is_new = new_entries.contains(&excerpt_id)
|| !outline_panel.excerpts.contains_key(&buffer_id);

View file

@ -2571,8 +2571,8 @@ impl Project {
let task = self.open_buffer(path, cx);
cx.spawn(async move |_project, cx| {
let buffer = task.await?;
let project_entry_id = buffer.read_with(cx, |buffer, cx| {
File::from_dyn(buffer.file()).and_then(|file| file.project_entry_id(cx))
let project_entry_id = buffer.read_with(cx, |buffer, _cx| {
File::from_dyn(buffer.file()).and_then(|file| file.project_entry_id())
})?;
Ok((project_entry_id, buffer))
@ -5515,8 +5515,8 @@ impl ProjectItem for Buffer {
Some(project.update(cx, |project, cx| project.open_buffer(path.clone(), cx)))
}
fn entry_id(&self, cx: &App) -> Option<ProjectEntryId> {
File::from_dyn(self.file()).and_then(|file| file.project_entry_id(cx))
fn entry_id(&self, _cx: &App) -> Option<ProjectEntryId> {
File::from_dyn(self.file()).and_then(|file| file.project_entry_id())
}
fn project_path(&self, cx: &App) -> Option<ProjectPath> {

View file

@ -4,6 +4,7 @@ use itertools::Itertools;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::mem;
use std::path::StripPrefixError;
@ -184,6 +185,31 @@ impl<T: AsRef<Path>> PathExt for T {
}
}
pub fn path_ends_with(base: &Path, suffix: &Path) -> bool {
strip_path_suffix(base, suffix).is_some()
}
pub fn strip_path_suffix<'a>(base: &'a Path, suffix: &Path) -> Option<&'a Path> {
if let Some(remainder) = base
.as_os_str()
.as_encoded_bytes()
.strip_suffix(suffix.as_os_str().as_encoded_bytes())
{
if remainder
.last()
.is_none_or(|last_byte| std::path::is_separator(*last_byte as char))
{
let os_str = unsafe {
OsStr::from_encoded_bytes_unchecked(
&remainder[0..remainder.len().saturating_sub(1)],
)
};
return Some(Path::new(os_str));
}
}
None
}
/// In memory, this is identical to `Path`. On non-Windows conversions to this type are no-ops. On
/// windows, these conversions sanitize UNC paths by removing the `\\\\?\\` prefix.
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
@ -401,6 +427,82 @@ pub fn is_absolute(path_like: &str, path_style: PathStyle) -> bool {
.is_some_and(|path| path.starts_with('/') || path.starts_with('\\')))
}
#[derive(Debug, PartialEq)]
#[non_exhaustive]
pub struct NormalizeError;
impl Error for NormalizeError {}
impl std::fmt::Display for NormalizeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("parent reference `..` points outside of base directory")
}
}
/// Copied from stdlib where it's unstable.
///
/// Normalize a path, including `..` without traversing the filesystem.
///
/// Returns an error if normalization would leave leading `..` components.
///
/// <div class="warning">
///
/// This function always resolves `..` to the "lexical" parent.
/// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path.
/// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn't `a`.
///
/// </div>
///
/// [`path::absolute`](absolute) is an alternative that preserves `..`.
/// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem.
pub fn normalize_lexically(path: &Path) -> Result<PathBuf, NormalizeError> {
use std::path::Component;
let mut lexical = PathBuf::new();
let mut iter = path.components().peekable();
// Find the root, if any, and add it to the lexical path.
// Here we treat the Windows path "C:\" as a single "root" even though
// `components` splits it into two: (Prefix, RootDir).
let root = match iter.peek() {
Some(Component::ParentDir) => return Err(NormalizeError),
Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => {
lexical.push(p);
iter.next();
lexical.as_os_str().len()
}
Some(Component::Prefix(prefix)) => {
lexical.push(prefix.as_os_str());
iter.next();
if let Some(p @ Component::RootDir) = iter.peek() {
lexical.push(p);
iter.next();
}
lexical.as_os_str().len()
}
None => return Ok(PathBuf::new()),
Some(Component::Normal(_)) => 0,
};
for component in iter {
match component {
Component::RootDir => unreachable!(),
Component::Prefix(_) => return Err(NormalizeError),
Component::CurDir => continue,
Component::ParentDir => {
// It's an error if ParentDir causes us to go above the "root".
if lexical.as_os_str().len() == root {
return Err(NormalizeError);
} else {
lexical.pop();
}
}
Component::Normal(path) => lexical.push(path),
}
}
Ok(lexical)
}
/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
@ -1798,4 +1900,35 @@ mod tests {
let path = Path::new("/a/b/c/long.app.tar.gz");
assert_eq!(path.multiple_extensions(), Some("app.tar.gz".to_string()));
}
#[test]
fn test_strip_path_suffix() {
let base = Path::new("/a/b/c/file_name");
let suffix = Path::new("file_name");
assert_eq!(strip_path_suffix(base, suffix), Some(Path::new("/a/b/c")));
let base = Path::new("/a/b/c/file_name.tsx");
let suffix = Path::new("file_name.tsx");
assert_eq!(strip_path_suffix(base, suffix), Some(Path::new("/a/b/c")));
let base = Path::new("/a/b/c/file_name.stories.tsx");
let suffix = Path::new("c/file_name.stories.tsx");
assert_eq!(strip_path_suffix(base, suffix), Some(Path::new("/a/b")));
let base = Path::new("/a/b/c/long.app.tar.gz");
let suffix = Path::new("b/c/long.app.tar.gz");
assert_eq!(strip_path_suffix(base, suffix), Some(Path::new("/a")));
let base = Path::new("/a/b/c/long.app.tar.gz");
let suffix = Path::new("/a/b/c/long.app.tar.gz");
assert_eq!(strip_path_suffix(base, suffix), Some(Path::new("")));
let base = Path::new("/a/b/c/long.app.tar.gz");
let suffix = Path::new("/a/b/c/no_match.app.tar.gz");
assert_eq!(strip_path_suffix(base, suffix), None);
let base = Path::new("/a/b/c/long.app.tar.gz");
let suffix = Path::new("app.tar.gz");
assert_eq!(strip_path_suffix(base, suffix), None);
}
}

View file

@ -3154,7 +3154,7 @@ impl File {
self.worktree.read(cx).id()
}
pub fn project_entry_id(&self, _: &App) -> Option<ProjectEntryId> {
pub fn project_entry_id(&self) -> Option<ProjectEntryId> {
match self.disk_state {
DiskState::Deleted => None,
_ => self.entry_id,

View file

@ -7,8 +7,8 @@ use cloud_llm_client::{
};
use cloud_zeta2_prompt::DEFAULT_MAX_PROMPT_BYTES;
use edit_prediction_context::{
DeclarationId, EditPredictionContext, EditPredictionExcerptOptions, SyntaxIndex,
SyntaxIndexState,
DeclarationId, DeclarationStyle, EditPredictionContext, EditPredictionContextOptions,
EditPredictionExcerptOptions, EditPredictionScoreOptions, SyntaxIndex, SyntaxIndexState,
};
use futures::AsyncReadExt as _;
use futures::channel::mpsc;
@ -43,14 +43,20 @@ const BUFFER_CHANGE_GROUPING_INTERVAL: Duration = Duration::from_secs(1);
/// Maximum number of events to track.
const MAX_EVENT_COUNT: usize = 16;
pub const DEFAULT_EXCERPT_OPTIONS: EditPredictionExcerptOptions = EditPredictionExcerptOptions {
max_bytes: 512,
min_bytes: 128,
target_before_cursor_over_total_bytes: 0.5,
pub const DEFAULT_CONTEXT_OPTIONS: EditPredictionContextOptions = EditPredictionContextOptions {
use_imports: true,
excerpt: EditPredictionExcerptOptions {
max_bytes: 512,
min_bytes: 128,
target_before_cursor_over_total_bytes: 0.5,
},
score: EditPredictionScoreOptions {
omit_excerpt_overlaps: true,
},
};
pub const DEFAULT_OPTIONS: ZetaOptions = ZetaOptions {
excerpt: DEFAULT_EXCERPT_OPTIONS,
context: DEFAULT_CONTEXT_OPTIONS,
max_prompt_bytes: DEFAULT_MAX_PROMPT_BYTES,
max_diagnostic_bytes: 2048,
prompt_format: PromptFormat::DEFAULT,
@ -75,7 +81,7 @@ pub struct Zeta {
#[derive(Debug, Clone, PartialEq)]
pub struct ZetaOptions {
pub excerpt: EditPredictionExcerptOptions,
pub context: EditPredictionContextOptions,
pub max_prompt_bytes: usize,
pub max_diagnostic_bytes: usize,
pub prompt_format: predict_edits_v3::PromptFormat,
@ -501,6 +507,11 @@ impl Zeta {
let diagnostics = snapshot.diagnostic_sets().clone();
let parent_abs_path = project::File::from_dyn(buffer.read(cx).file()).and_then(|f| {
let mut path = f.worktree.read(cx).absolutize(&f.path);
if path.pop() { Some(path) } else { None }
});
let request_task = cx.background_spawn({
let snapshot = snapshot.clone();
let buffer = buffer.clone();
@ -519,7 +530,8 @@ impl Zeta {
let Some(context) = EditPredictionContext::gather_context(
cursor_point,
&snapshot,
&options.excerpt,
parent_abs_path.as_deref(),
&options.context,
index_state.as_deref(),
) else {
return Ok(None);
@ -785,6 +797,11 @@ impl Zeta {
.map(|worktree| worktree.read(cx).snapshot())
.collect::<Vec<_>>();
let parent_abs_path = project::File::from_dyn(buffer.read(cx).file()).and_then(|f| {
let mut path = f.worktree.read(cx).absolutize(&f.path);
if path.pop() { Some(path) } else { None }
});
cx.background_spawn(async move {
let index_state = if let Some(index_state) = index_state {
Some(index_state.lock_owned().await)
@ -798,7 +815,8 @@ impl Zeta {
EditPredictionContext::gather_context(
cursor_point,
&snapshot,
&options.excerpt,
parent_abs_path.as_deref(),
&options.context,
index_state.as_deref(),
)
.context("Failed to select excerpt")
@ -893,9 +911,9 @@ fn make_cloud_request(
text_is_truncated,
signature_range: snippet.declaration.signature_range_in_item_text(),
parent_index,
score_components: snippet.score_components,
signature_score: snippet.scores.signature,
declaration_score: snippet.scores.declaration,
signature_score: snippet.score(DeclarationStyle::Signature),
declaration_score: snippet.score(DeclarationStyle::Declaration),
score_components: snippet.components,
});
}

View file

@ -16,7 +16,7 @@ use ui::{ContextMenu, ContextMenuEntry, DropdownMenu, prelude::*};
use ui_input::SingleLineInput;
use util::{ResultExt, paths::PathStyle, rel_path::RelPath};
use workspace::{Item, SplitDirection, Workspace};
use zeta2::{Zeta, ZetaOptions};
use zeta2::{DEFAULT_CONTEXT_OPTIONS, Zeta, ZetaOptions};
use edit_prediction_context::{DeclarationStyle, EditPredictionExcerptOptions};
@ -146,16 +146,19 @@ impl Zeta2Inspector {
cx: &mut Context<Self>,
) {
self.max_excerpt_bytes_input.update(cx, |input, cx| {
input.set_text(options.excerpt.max_bytes.to_string(), window, cx);
input.set_text(options.context.excerpt.max_bytes.to_string(), window, cx);
});
self.min_excerpt_bytes_input.update(cx, |input, cx| {
input.set_text(options.excerpt.min_bytes.to_string(), window, cx);
input.set_text(options.context.excerpt.min_bytes.to_string(), window, cx);
});
self.cursor_context_ratio_input.update(cx, |input, cx| {
input.set_text(
format!(
"{:.2}",
options.excerpt.target_before_cursor_over_total_bytes
options
.context
.excerpt
.target_before_cursor_over_total_bytes
),
window,
cx,
@ -236,7 +239,8 @@ impl Zeta2Inspector {
.unwrap_or_default()
}
let excerpt_options = EditPredictionExcerptOptions {
let mut context_options = DEFAULT_CONTEXT_OPTIONS.clone();
context_options.excerpt = EditPredictionExcerptOptions {
max_bytes: number_input_value(&this.max_excerpt_bytes_input, cx),
min_bytes: number_input_value(&this.min_excerpt_bytes_input, cx),
target_before_cursor_over_total_bytes: number_input_value(
@ -248,7 +252,7 @@ impl Zeta2Inspector {
let zeta_options = this.zeta.read(cx).options();
this.set_options(
ZetaOptions {
excerpt: excerpt_options,
context: context_options,
max_prompt_bytes: number_input_value(&this.max_prompt_bytes_input, cx),
max_diagnostic_bytes: zeta_options.max_diagnostic_bytes,
prompt_format: zeta_options.prompt_format,

View file

@ -18,6 +18,7 @@ clap.workspace = true
client.workspace = true
cloud_llm_client.workspace= true
cloud_zeta2_prompt.workspace= true
collections.workspace = true
debug_adapter_extension.workspace = true
edit_prediction_context.workspace = true
extension.workspace = true
@ -32,6 +33,7 @@ language_models.workspace = true
languages = { workspace = true, features = ["load-grammars"] }
log.workspace = true
node_runtime.workspace = true
ordered-float.workspace = true
paths.workspace = true
project.workspace = true
prompt_store.workspace = true
@ -49,4 +51,3 @@ workspace-hack.workspace = true
zeta.workspace = true
zeta2.workspace = true
zlog.workspace = true
ordered-float.workspace = true

File diff suppressed because it is too large Load diff