Prevent stale related excerpts by avoiding storing their contents as strings (#46666)

This fixes an issue that we noticed in particular with Mercury edit
predictions.

* [x] fix storage to not go stale
* [x] exclude excerpts that intersect the cursor excerpt
* [x] see if string representation of excerpts can be cached, to avoid
rebuilding it on every prediction

Release Notes:

- N/A

---------

Co-authored-by: Ben Kunkle <ben@zed.dev>
This commit is contained in:
Max Brunsfeld 2026-01-13 13:31:23 -08:00 committed by GitHub
parent 1b416c71d6
commit d67c8f2884
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 303 additions and 99 deletions

1
Cargo.lock generated
View file

@ -5310,6 +5310,7 @@ name = "edit_prediction_context"
version = "0.1.0"
dependencies = [
"anyhow",
"clock",
"cloud_llm_client",
"collections",
"env_logger 0.11.8",

View file

@ -194,7 +194,7 @@ pub struct EditPredictionModelInput {
snapshot: BufferSnapshot,
position: Anchor,
events: Vec<Arc<zeta_prompt::Event>>,
related_files: Arc<[RelatedFile]>,
related_files: Vec<RelatedFile>,
recent_paths: VecDeque<ProjectPath>,
trigger: PredictEditsRequestTrigger,
diagnostic_search_range: Range<Point>,
@ -766,22 +766,31 @@ impl EditPredictionStore {
pub fn context_for_project<'a>(
&'a self,
project: &Entity<Project>,
cx: &'a App,
) -> Arc<[RelatedFile]> {
cx: &'a mut App,
) -> Vec<RelatedFile> {
self.projects
.get(&project.entity_id())
.map(|project| project.context.read(cx).related_files())
.unwrap_or_else(|| vec![].into())
.map(|project| {
project
.context
.update(cx, |context, cx| context.related_files(cx))
})
.unwrap_or_default()
}
pub fn context_for_project_with_buffers<'a>(
&'a self,
project: &Entity<Project>,
cx: &'a App,
) -> Option<impl 'a + Iterator<Item = (RelatedFile, Entity<Buffer>)>> {
cx: &'a mut App,
) -> Vec<(RelatedFile, Entity<Buffer>)> {
self.projects
.get(&project.entity_id())
.map(|project| project.context.read(cx).related_files_with_buffers())
.map(|project| {
project
.context
.update(cx, |context, cx| context.related_files_with_buffers(cx))
})
.unwrap_or_default()
}
pub fn usage(&self, cx: &App) -> Option<EditPredictionUsage> {
@ -1721,7 +1730,7 @@ impl EditPredictionStore {
let related_files = if self.use_context {
self.context_for_project(&project, cx)
} else {
Vec::new().into()
Vec::new()
};
let inputs = EditPredictionModelInput {
@ -2089,8 +2098,8 @@ impl EditPredictionStore {
) {
self.get_or_init_project(project, cx)
.context
.update(cx, |store, _| {
store.set_related_files(related_files);
.update(cx, |store, cx| {
store.set_related_files(related_files, cx);
});
}
@ -2200,6 +2209,23 @@ impl EditPredictionStore {
}
}
pub(crate) fn filter_redundant_excerpts(
mut related_files: Vec<RelatedFile>,
cursor_path: &Path,
cursor_row_range: Range<u32>,
) -> Vec<RelatedFile> {
for file in &mut related_files {
if file.path.as_ref() == cursor_path {
file.excerpts.retain(|excerpt| {
excerpt.row_range.start < cursor_row_range.start
|| excerpt.row_range.end > cursor_row_range.end
});
}
}
related_files.retain(|file| !file.excerpts.is_empty());
related_files
}
#[derive(Error, Debug)]
#[error(
"You must update to Zed version {minimum_version} or higher to continue using edit predictions."

View file

@ -68,6 +68,12 @@ impl Mercury {
MAX_REWRITE_TOKENS,
);
let related_files = crate::filter_redundant_excerpts(
related_files,
full_path.as_ref(),
context_range.start.row..context_range.end.row,
);
let context_offset_range = context_range.to_offset(&snapshot);
let editable_offset_range = editable_range.to_offset(&snapshot);
@ -245,7 +251,7 @@ fn build_prompt(inputs: &ZetaPromptInput) -> String {
prompt.push_str(CODE_SNIPPET_FILE_PATH_PREFIX);
prompt.push_str(related_file.path.to_string_lossy().as_ref());
prompt.push('\n');
prompt.push_str(&related_excerpt.text.to_string());
prompt.push_str(related_excerpt.text.as_ref());
},
);
}

View file

@ -148,7 +148,7 @@ mod tests {
edit_preview,
inputs: ZetaPromptInput {
events: vec![],
related_files: vec![].into(),
related_files: vec![],
cursor_path: Path::new("path.txt").into(),
cursor_offset_in_excerpt: 0,
cursor_excerpt: "".into(),

View file

@ -133,7 +133,7 @@ pub(crate) fn request_prediction_with_zeta1(
let inputs = ZetaPromptInput {
events: included_events.into(),
related_files: vec![].into(),
related_files: vec![],
cursor_path: full_path,
cursor_excerpt: snapshot
.text_for_range(context_range)

View file

@ -189,7 +189,7 @@ pub fn request_prediction_with_zeta2(
pub fn zeta2_prompt_input(
snapshot: &language::BufferSnapshot,
related_files: Arc<[zeta_prompt::RelatedFile]>,
related_files: Vec<zeta_prompt::RelatedFile>,
events: Vec<Arc<zeta_prompt::Event>>,
excerpt_path: Arc<Path>,
cursor_offset: usize,
@ -204,6 +204,12 @@ pub fn zeta2_prompt_input(
MAX_CONTEXT_TOKENS,
);
let related_files = crate::filter_redundant_excerpts(
related_files,
excerpt_path.as_ref(),
context_range.start.row..context_range.end.row,
);
let context_start_offset = context_range.start.to_offset(snapshot);
let editable_offset_range = editable_range.to_offset(snapshot);
let cursor_offset_in_excerpt = cursor_offset - context_start_offset;

View file

@ -10,7 +10,6 @@ use language::{Anchor, Buffer};
use project::Project;
use serde::{Deserialize, Serialize};
use std::ops::Range;
use std::sync::Arc;
use std::{
borrow::Cow,
io::Read,
@ -61,7 +60,7 @@ pub struct ExampleState {
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ExampleContext {
pub files: Arc<[RelatedFile]>,
pub files: Vec<RelatedFile>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]

View file

@ -218,7 +218,7 @@ impl TeacherPrompt {
}
let mut prompt = String::new();
for file in context.files.as_ref() {
for file in context.files.iter() {
let path_str = file.path.to_string_lossy();
writeln!(&mut prompt, "`````{path_str}").ok();
let mut prev_row = 0;

View file

@ -12,18 +12,20 @@ workspace = true
path = "src/edit_prediction_context.rs"
[dependencies]
parking_lot.workspace = true
anyhow.workspace = true
clock.workspace = true
cloud_llm_client.workspace = true
collections.workspace = true
futures.workspace = true
gpui.workspace = true
language.workspace = true
lsp.workspace = true
project.workspace = true
log.workspace = true
lsp.workspace = true
parking_lot.workspace = true
project.workspace = true
serde.workspace = true
smallvec.workspace = true
text.workspace = true
tree-sitter.workspace = true
util.workspace = true
zeta_prompt.workspace = true

View file

@ -1,16 +1,15 @@
use language::{BufferSnapshot, OffsetRangeExt as _, Point};
use std::ops::Range;
use zeta_prompt::RelatedExcerpt;
#[cfg(not(test))]
const MAX_OUTLINE_ITEM_BODY_SIZE: usize = 512;
#[cfg(test)]
const MAX_OUTLINE_ITEM_BODY_SIZE: usize = 24;
pub fn assemble_excerpts(
pub fn assemble_excerpt_ranges(
buffer: &BufferSnapshot,
mut input_ranges: Vec<Range<Point>>,
) -> Vec<RelatedExcerpt> {
) -> Vec<Range<u32>> {
merge_ranges(&mut input_ranges);
let mut outline_ranges = Vec::new();
@ -76,10 +75,7 @@ pub fn assemble_excerpts(
input_ranges
.into_iter()
.map(|range| RelatedExcerpt {
row_range: range.start.row..range.end.row,
text: buffer.text_for_range(range).collect(),
})
.map(|range| range.start.row..range.end.row)
.collect()
}

View file

@ -1,8 +1,8 @@
use crate::assemble_excerpts::assemble_excerpts;
use crate::assemble_excerpts::assemble_excerpt_ranges;
use anyhow::Result;
use collections::HashMap;
use futures::{FutureExt, StreamExt as _, channel::mpsc, future};
use gpui::{App, AppContext, AsyncApp, Context, Entity, EventEmitter, Task, WeakEntity};
use gpui::{App, AppContext, AsyncApp, Context, Entity, EntityId, EventEmitter, Task, WeakEntity};
use language::{Anchor, Buffer, BufferSnapshot, OffsetRangeExt as _, Point, ToOffset as _};
use project::{LocationLink, Project, ProjectPath};
use smallvec::SmallVec;
@ -13,6 +13,8 @@ use std::{
sync::Arc,
time::{Duration, Instant},
};
use util::paths::PathStyle;
use util::rel_path::RelPath;
use util::{RangeExt as _, ResultExt};
mod assemble_excerpts;
@ -30,13 +32,24 @@ const IDENTIFIER_LINE_COUNT: u32 = 3;
pub struct RelatedExcerptStore {
project: WeakEntity<Project>,
related_files: Arc<[RelatedFile]>,
related_file_buffers: Vec<Entity<Buffer>>,
related_buffers: Vec<RelatedBuffer>,
cache: HashMap<Identifier, Arc<CacheEntry>>,
update_tx: mpsc::UnboundedSender<(Entity<Buffer>, Anchor)>,
identifier_line_count: u32,
}
struct RelatedBuffer {
buffer: Entity<Buffer>,
path: Arc<Path>,
anchor_ranges: Vec<Range<Anchor>>,
cached_file: Option<CachedRelatedFile>,
}
struct CachedRelatedFile {
excerpts: Vec<RelatedExcerpt>,
buffer_version: clock::Global,
}
pub enum RelatedExcerptStoreEvent {
StartedRefresh,
FinishedRefresh {
@ -105,8 +118,7 @@ impl RelatedExcerptStore {
RelatedExcerptStore {
project: project.downgrade(),
update_tx,
related_files: Vec::new().into(),
related_file_buffers: Vec::new(),
related_buffers: Vec::new(),
cache: Default::default(),
identifier_line_count: IDENTIFIER_LINE_COUNT,
}
@ -120,21 +132,64 @@ impl RelatedExcerptStore {
self.update_tx.unbounded_send((buffer, position)).ok();
}
pub fn related_files(&self) -> Arc<[RelatedFile]> {
self.related_files.clone()
pub fn related_files(&mut self, cx: &App) -> Vec<RelatedFile> {
self.related_buffers
.iter_mut()
.map(|related| related.related_file(cx))
.collect()
}
pub fn related_files_with_buffers(
&self,
) -> impl Iterator<Item = (RelatedFile, Entity<Buffer>)> {
self.related_files
.iter()
.cloned()
.zip(self.related_file_buffers.iter().cloned())
pub fn related_files_with_buffers(&mut self, cx: &App) -> Vec<(RelatedFile, Entity<Buffer>)> {
self.related_buffers
.iter_mut()
.map(|related| (related.related_file(cx), related.buffer.clone()))
.collect::<Vec<_>>()
}
pub fn set_related_files(&mut self, files: Vec<RelatedFile>) {
self.related_files = files.into();
pub fn set_related_files(&mut self, files: Vec<RelatedFile>, cx: &App) {
self.related_buffers = files
.into_iter()
.filter_map(|file| {
let project = self.project.upgrade()?;
let project = project.read(cx);
let worktree = project.worktrees(cx).find(|wt| {
let root_name = wt.read(cx).root_name().as_unix_str();
file.path
.components()
.next()
.is_some_and(|c| c.as_os_str() == root_name)
})?;
let worktree = worktree.read(cx);
let relative_path = file
.path
.strip_prefix(worktree.root_name().as_unix_str())
.ok()?;
let relative_path = RelPath::new(relative_path, PathStyle::Posix).ok()?;
let project_path = ProjectPath {
worktree_id: worktree.id(),
path: relative_path.into_owned().into(),
};
let buffer = project.get_open_buffer(&project_path, cx)?;
let snapshot = buffer.read(cx).snapshot();
let anchor_ranges = file
.excerpts
.iter()
.map(|excerpt| {
let start = snapshot.anchor_before(Point::new(excerpt.row_range.start, 0));
let end_row = excerpt.row_range.end;
let end_col = snapshot.line_len(end_row);
let end = snapshot.anchor_after(Point::new(end_row, end_col));
start..end
})
.collect();
Some(RelatedBuffer {
buffer,
path: file.path.clone(),
anchor_ranges,
cached_file: None,
})
})
.collect();
}
async fn fetch_excerpts(
@ -236,8 +291,7 @@ impl RelatedExcerptStore {
}
mean_definition_latency /= cache_miss_count.max(1) as u32;
let (new_cache, related_files, related_file_buffers) =
rebuild_related_files(&project, new_cache, cx).await?;
let (new_cache, related_buffers) = rebuild_related_files(&project, new_cache, cx).await?;
if let Some(file) = &file {
log::debug!(
@ -249,8 +303,7 @@ impl RelatedExcerptStore {
this.update(cx, |this, cx| {
this.cache = new_cache;
this.related_files = related_files.into();
this.related_file_buffers = related_file_buffers;
this.related_buffers = related_buffers;
cx.emit(RelatedExcerptStoreEvent::FinishedRefresh {
cache_hit_count,
cache_miss_count,
@ -265,13 +318,9 @@ impl RelatedExcerptStore {
async fn rebuild_related_files(
project: &Entity<Project>,
new_entries: HashMap<Identifier, Arc<CacheEntry>>,
mut new_entries: HashMap<Identifier, Arc<CacheEntry>>,
cx: &mut AsyncApp,
) -> Result<(
HashMap<Identifier, Arc<CacheEntry>>,
Vec<RelatedFile>,
Vec<Entity<Buffer>>,
)> {
) -> Result<(HashMap<Identifier, Arc<CacheEntry>>, Vec<RelatedBuffer>)> {
let mut snapshots = HashMap::default();
let mut worktree_root_names = HashMap::default();
for entry in new_entries.values() {
@ -302,59 +351,109 @@ async fn rebuild_related_files(
Ok(cx
.background_spawn(async move {
let mut files = Vec::new();
let mut ranges_by_buffer = HashMap::<_, Vec<Range<Point>>>::default();
let mut ranges_by_buffer =
HashMap::<EntityId, (Entity<Buffer>, Vec<Range<Point>>)>::default();
let mut paths_by_buffer = HashMap::default();
for entry in new_entries.values() {
for entry in new_entries.values_mut() {
for definition in &entry.definitions {
let Some(snapshot) = snapshots.get(&definition.buffer.entity_id()) else {
continue;
};
paths_by_buffer.insert(definition.buffer.entity_id(), definition.path.clone());
ranges_by_buffer
.entry(definition.buffer.clone())
.or_default()
.entry(definition.buffer.entity_id())
.or_insert_with(|| (definition.buffer.clone(), Vec::new()))
.1
.push(definition.anchor_range.to_point(snapshot));
}
}
for (buffer, ranges) in ranges_by_buffer {
let Some(snapshot) = snapshots.get(&buffer.entity_id()) else {
continue;
};
let Some(project_path) = paths_by_buffer.get(&buffer.entity_id()) else {
continue;
};
let excerpts = assemble_excerpts(snapshot, ranges);
let Some(root_name) = worktree_root_names.get(&project_path.worktree_id) else {
continue;
};
let mut related_buffers: Vec<RelatedBuffer> = ranges_by_buffer
.into_iter()
.filter_map(|(entity_id, (buffer, ranges))| {
let snapshot = snapshots.get(&entity_id)?;
let project_path = paths_by_buffer.get(&entity_id)?;
let row_ranges = assemble_excerpt_ranges(snapshot, ranges);
let root_name = worktree_root_names.get(&project_path.worktree_id)?;
let path = Path::new(&format!(
"{}/{}",
root_name,
project_path.path.as_unix_str()
))
.into();
let path: Arc<Path> = Path::new(&format!(
"{}/{}",
root_name,
project_path.path.as_unix_str()
))
.into();
files.push((
buffer,
RelatedFile {
let anchor_ranges = row_ranges
.into_iter()
.map(|row_range| {
let start = snapshot.anchor_before(Point::new(row_range.start, 0));
let end_col = snapshot.line_len(row_range.end);
let end = snapshot.anchor_after(Point::new(row_range.end, end_col));
start..end
})
.collect();
let mut related_buffer = RelatedBuffer {
buffer,
path,
excerpts,
max_row: snapshot.max_point().row,
},
));
}
anchor_ranges,
cached_file: None,
};
related_buffer.fill_cache(snapshot);
Some(related_buffer)
})
.collect();
files.sort_by_key(|(_, file)| file.path.clone());
let (related_buffers, related_files) = files.into_iter().unzip();
related_buffers.sort_by_key(|related| related.path.clone());
(new_entries, related_files, related_buffers)
(new_entries, related_buffers)
})
.await)
}
impl RelatedBuffer {
fn related_file(&mut self, cx: &App) -> RelatedFile {
let buffer = self.buffer.read(cx);
let path = self.path.clone();
let cached = if let Some(cached) = &self.cached_file
&& buffer.version() == cached.buffer_version
{
cached
} else {
self.fill_cache(buffer)
};
let related_file = RelatedFile {
path,
excerpts: cached.excerpts.clone(),
max_row: buffer.max_point().row,
};
return related_file;
}
fn fill_cache(&mut self, buffer: &text::BufferSnapshot) -> &CachedRelatedFile {
let excerpts = self
.anchor_ranges
.iter()
.map(|range| {
let start = range.start.to_point(buffer);
let end = range.end.to_point(buffer);
RelatedExcerpt {
row_range: start.row..end.row,
text: buffer.text_for_range(start..end).collect::<String>().into(),
}
})
.collect::<Vec<_>>();
self.cached_file = Some(CachedRelatedFile {
excerpts: excerpts,
buffer_version: buffer.version().clone(),
});
self.cached_file.as_ref().unwrap()
}
}
use language::ToPoint as _;
const MAX_TARGET_LEN: usize = 128;
fn process_definition(

View file

@ -1,4 +1,5 @@
use super::*;
use crate::assemble_excerpts::assemble_excerpt_ranges;
use futures::channel::mpsc::UnboundedReceiver;
use gpui::TestAppContext;
use indoc::indoc;
@ -42,8 +43,8 @@ async fn test_edit_prediction_context(cx: &mut TestAppContext) {
});
cx.executor().advance_clock(DEBOUNCE_DURATION);
related_excerpt_store.update(cx, |store, _| {
let excerpts = store.related_files();
related_excerpt_store.update(cx, |store, cx| {
let excerpts = store.related_files(cx);
assert_related_files(
&excerpts,
&[
@ -84,6 +85,65 @@ async fn test_edit_prediction_context(cx: &mut TestAppContext) {
],
);
});
let company_buffer = related_excerpt_store.update(cx, |store, cx| {
store
.related_files_with_buffers(cx)
.into_iter()
.find(|(file, _)| file.path.to_str() == Some("root/src/company.rs"))
.map(|(_, buffer)| buffer)
.expect("company.rs buffer not found")
});
company_buffer.update(cx, |buffer, cx| {
let text = buffer.text();
let insert_pos = text.find("address: Address,").unwrap() + "address: Address,".len();
buffer.edit([(insert_pos..insert_pos, "\n name: String,")], None, cx);
});
related_excerpt_store.update(cx, |store, cx| {
let excerpts = store.related_files(cx);
assert_related_files(
&excerpts,
&[
(
"root/src/company.rs",
&[indoc! {"
pub struct Company {
owner: Arc<Person>,
address: Address,
name: String,
}"}],
),
(
"root/src/main.rs",
&[
indoc! {"
pub struct Session {
company: Arc<Company>,
}
impl Session {
pub fn set_company(&mut self, company: Arc<Company>) {"},
indoc! {"
}
}"},
],
),
(
"root/src/person.rs",
&[
indoc! {"
impl Person {
pub fn get_first_name(&self) -> &str {
&self.first_name
}"},
"}",
],
),
],
);
});
}
#[gpui::test]
@ -222,7 +282,18 @@ fn test_assemble_excerpts(cx: &mut TestAppContext) {
.map(|range| range.to_point(&buffer))
.collect();
let excerpts = assemble_excerpts(&buffer.snapshot(), ranges);
let row_ranges = assemble_excerpt_ranges(&buffer.snapshot(), ranges);
let excerpts: Vec<RelatedExcerpt> = row_ranges
.into_iter()
.map(|row_range| {
let start = Point::new(row_range.start, 0);
let end = Point::new(row_range.end, buffer.line_len(row_range.end));
RelatedExcerpt {
row_range,
text: buffer.text_for_range(start..end).collect::<String>().into(),
}
})
.collect();
let output = format_excerpts(buffer, &excerpts);
assert_eq!(output, expected_output);

View file

@ -153,11 +153,9 @@ impl EditPredictionContextView {
run.finished_at = Some(info.timestamp);
run.metadata = info.metadata;
let related_files = self
.store
.read(cx)
.context_for_project_with_buffers(&self.project, cx)
.map_or(Vec::new(), |files| files.collect());
let related_files = self.store.update(cx, |store, cx| {
store.context_for_project_with_buffers(&self.project, cx)
});
let editor = run.editor.clone();
let multibuffer = run.editor.read(cx).buffer().clone();

View file

@ -374,7 +374,7 @@ impl RatePredictionsModal {
write!(&mut formatted_inputs, "## Related files\n\n").unwrap();
for included_file in prediction.inputs.related_files.as_ref() {
for included_file in prediction.inputs.related_files.iter() {
write!(
&mut formatted_inputs,
"### {}\n\n",

View file

@ -13,7 +13,7 @@ pub struct ZetaPromptInput {
pub editable_range_in_excerpt: Range<usize>,
pub cursor_offset_in_excerpt: usize,
pub events: Vec<Arc<Event>>,
pub related_files: Arc<[RelatedFile]>,
pub related_files: Vec<RelatedFile>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
@ -66,7 +66,7 @@ pub struct RelatedFile {
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RelatedExcerpt {
pub row_range: Range<u32>,
pub text: String,
pub text: Arc<str>,
}
pub fn format_zeta_prompt(input: &ZetaPromptInput) -> String {