use std::{ any::Any, future::Future, path::Path, sync::Arc, task::{Context, Poll}, }; use action_log::ActionLog; use agent::{ AgentTool, ContextServerRegistry, EditFileTool, EditFileToolInput, EditFileToolOutput, Templates, Thread, ToolCallEventStream, ToolInput, }; use agent_settings::{AgentSettings, ToolRules}; use criterion::{ BatchSize, BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main, }; use editor::{Editor, EditorStyle}; use futures::{StreamExt as _, pin_mut, task::noop_waker}; use gpui::{ AnyWindowHandle, AppContext as _, BackgroundExecutor, Entity, Focusable as _, TestAppContext, UpdateGlobal as _, }; use language::{FakeLspAdapter, rust_lang}; use language_model::fake_provider::FakeLanguageModel; use project::{FakeFs, Project}; use prompt_store::ProjectContext; use rand::{Rng as _, SeedableRng as _, rngs::StdRng}; use serde_json::{Value, json}; use settings::{Settings as _, SettingsStore}; use ui::IntoElement as _; const SEED: u64 = 0x5EED_5EED; const OLD_TEXT_CHUNK_SIZE: usize = 512; const NEW_TEXT_CHUNK_SIZE: usize = 512; const FILE_PROJECT_PATH: &str = "root/src/workspace_snapshot.rs"; const FILE_ABS_PATH: &str = "/root/src/workspace_snapshot.rs"; #[derive(Clone)] struct EditOp { old_text: String, new_text: String, } #[derive(Clone)] struct EditFixture { name: &'static str, old_file_text: String, expected_file_text: String, edits: Vec, } struct BenchmarkHarness { cx: Option, edit_tool: Option>, thread: Option>, partial_payloads: Vec, final_payload: Value, expected_file_text: String, editor: Option>, window: Option, // Keeps the LSP buffer-registration handle and the fake language server alive // for the lifetime of the benchmark so `didChange`/diagnostics keep flowing // while edits are applied. keep_alive: Vec>, } impl Drop for BenchmarkHarness { fn drop(&mut self) { // Release our handles to the entities first. self.edit_tool.take(); self.thread.take(); self.editor.take(); self.keep_alive.clear(); if let Some(mut cx) = self.cx.take() { // Close the editor window so the editor entity and the buffer handles // it holds are released, then pump the executor so cancelled editor / // action-log background tasks drop their captured handles before the // leak detector runs on `TestAppContext` drop. if let Some(window) = self.window.take() { cx.update_window(window, |_, window, _| window.remove_window()) .ok(); } cx.update(|_| {}); cx.executor().run_until_parked(); cx.quit(); } } } fn edit_file_tool_streaming(c: &mut Criterion) { let fixtures = fixtures(); let mut group = c.benchmark_group("edit_file_tool_streaming"); group.sample_size(10); for fixture in fixtures { let new_bytes: usize = fixture.edits.iter().map(|edit| edit.new_text.len()).sum(); group.throughput(Throughput::Bytes(new_bytes as u64)); group.bench_with_input( BenchmarkId::new(fixture.name, fixture.old_file_text.len()), &fixture, |bench, fixture| { bench.iter_batched( || setup_harness(fixture.clone()), |mut harness| { let output = run_streamed_edit(&mut harness); let EditFileToolOutput::Success { new_text, .. } = &output else { panic!("expected edit_file tool to succeed"); }; assert_eq!(new_text, &harness.expected_file_text); // Return the harness as part of the output so its teardown (which has // to pump the executor to release `Entity` handles captured by // background tasks) runs in criterion's drop phase after the timer has // stopped, rather than inside the timed region. (black_box(output), harness) }, BatchSize::SmallInput, ); }, ); } group.finish(); } fn setup_harness(fixture: EditFixture) -> BenchmarkHarness { let mut cx = init_context(); let executor = cx.executor(); let parts = block_on_executor( &executor, setup_editor_and_tool(&mut cx, fixture.old_file_text.clone()), ); // Let the LSP handshake, initial parse, and first layout settle before timing. cx.executor().run_until_parked(); let partial_payloads = streamed_partial_payloads(&fixture.edits); let final_payload = json!({ "path": FILE_PROJECT_PATH, "edits": fixture .edits .iter() .map(|edit| json!({ "old_text": edit.old_text, "new_text": edit.new_text })) .collect::>(), }); BenchmarkHarness { cx: Some(cx), edit_tool: Some(parts.edit_tool), thread: Some(parts.thread), partial_payloads, final_payload, expected_file_text: fixture.expected_file_text, editor: Some(parts.editor), window: Some(parts.window), keep_alive: parts.keep_alive, } } struct HarnessParts { edit_tool: Arc, thread: Entity, editor: Entity, window: AnyWindowHandle, keep_alive: Vec>, } /// Builds a project + edit tool, opens the target buffer in an editor view inside /// a window, and attaches a fake Rust language server. This mirrors the real app: /// the edited file is open in a pane with a language server, so each buffer edit /// drives the editor's observer cascade (matching brackets, code actions, outline, /// bracket colorization), a tree-sitter reparse, and an LSP `didChange` + /// diagnostics round-trip — the costs that dominate a real agent edit. async fn setup_editor_and_tool(cx: &mut TestAppContext, file_text: String) -> HarnessParts { let fs = FakeFs::new(cx.executor()); fs.insert_tree( "/root", json!({ "src": { "workspace_snapshot.rs": file_text, }, }), ) .await; let project = Project::test(fs, [Path::new("/root")], cx).await; let language_registry = project.read_with(cx, |project, _cx| project.languages().clone()); language_registry.add(rust_lang()); let mut fake_servers = language_registry.register_fake_lsp( "Rust", FakeLspAdapter { capabilities: lsp::ServerCapabilities { text_document_sync: Some(lsp::TextDocumentSyncCapability::Kind( lsp::TextDocumentSyncKind::INCREMENTAL, )), ..Default::default() }, ..Default::default() }, ); let context_server_registry = cx.new(|cx| ContextServerRegistry::new(project.read(cx).context_server_store(), cx)); let model = Arc::new(FakeLanguageModel::default()); let thread = cx.new(|cx| { Thread::new( project.clone(), cx.new(|_cx| ProjectContext::default()), context_server_registry, Templates::new(), Some(model), cx, ) }); let action_log: Entity = thread.read_with(cx, |thread, _cx| thread.action_log().clone()); let edit_tool = Arc::new(EditFileTool::new( project.clone(), thread.downgrade(), action_log, language_registry, )); // Open the same buffer the tool will edit and register it with the language // servers so edits produce `didChange` notifications. let buffer = project .update(cx, |project, cx| { project.open_local_buffer(FILE_ABS_PATH, cx) }) .await .expect("failed to open buffer"); let lsp_handle = project.update(cx, |project, cx| { project.register_buffer_with_language_servers(&buffer, cx) }); let fake_server = fake_servers .next() .await .expect("fake language server should start"); // Publish diagnostics on every edit, mirroring a real server reacting to // `didChange`, so the editor's diagnostics path runs per edit. let server = fake_server.clone(); fake_server.handle_notification::( move |params, _cx| { server.notify::(lsp::PublishDiagnosticsParams { uri: params.text_document.uri.clone(), version: Some(params.text_document.version), diagnostics: vec![lsp::Diagnostic { range: lsp::Range::new(lsp::Position::new(0, 0), lsp::Position::new(0, 1)), severity: Some(lsp::DiagnosticSeverity::WARNING), message: "bench diagnostic".to_string(), ..Default::default() }], }); }, ); // Attach an editor view in a window and lay it out once so the viewport-gated // observers (bracket colorization, selection highlights) have a visible range. let window = cx.add_window(|window, cx| { let mut editor = Editor::for_buffer(buffer.clone(), Some(project.clone()), window, cx); editor.set_style(EditorStyle::default(), window, cx); window.focus(&editor.focus_handle(cx), cx); editor }); let editor = window.root(cx).expect("window should have an editor root"); let window: AnyWindowHandle = window.into(); // Lay out and paint a real frame so the editor establishes a viewport (this // is what makes the viewport-gated observers like bracket colorization run). { let mut visual_cx = gpui::VisualTestContext::from_window(window, &*cx); visual_cx.draw( gpui::point(gpui::px(0.0), gpui::px(0.0)), gpui::size(gpui::px(1024.0), gpui::px(768.0)), |_, _| editor.clone().into_any_element(), ); } let keep_alive: Vec> = vec![ Box::new(lsp_handle), Box::new(fake_server), Box::new(fake_servers), Box::new(buffer), ]; HarnessParts { edit_tool, thread, editor, window, keep_alive, } } fn init_context() -> TestAppContext { let cx = TestAppContext::single(); cx.update(|cx| { let settings_store = SettingsStore::test(cx); cx.set_global(settings_store); assets::Assets.load_test_fonts(cx); theme_settings::init(theme::LoadThemes::JustBase, cx); editor::init(cx); SettingsStore::update_global(cx, |store: &mut SettingsStore, cx| { store.update_user_settings(cx, |settings| { settings .project .all_languages .defaults .ensure_final_newline_on_save = Some(false); settings.project.all_languages.defaults.colorize_brackets = Some(true); }); }); let mut agent_settings = AgentSettings::get_global(cx).clone(); agent_settings.tool_permissions.tools.insert( EditFileTool::NAME.into(), ToolRules { default: Some(settings::ToolPermissionMode::Allow), always_allow: vec![], always_deny: vec![], always_confirm: vec![], invalid_patterns: vec![], }, ); AgentSettings::override_global(agent_settings, cx); }); cx } fn run_streamed_edit(harness: &mut BenchmarkHarness) -> EditFileToolOutput { let (mut sender, input): (_, ToolInput) = ToolInput::test(); for payload in &harness.partial_payloads { sender.send_partial(payload.clone()); } sender.send_full(harness.final_payload.clone()); let (event_stream, _event_rx) = ToolCallEventStream::test(); let cx = harness .cx .as_ref() .expect("benchmark harness should have a cx"); let task = cx.update(|cx| { harness .edit_tool .as_ref() .expect("benchmark harness should have an edit tool") .clone() .run(input, event_stream, cx) }); let executor = harness .cx .as_ref() .expect("benchmark harness should have a cx") .executor(); block_on_executor(&executor, task).unwrap() } fn block_on_executor(executor: &BackgroundExecutor, future: impl Future) -> R { pin_mut!(future); let waker = noop_waker(); let mut task_context = Context::from_waker(&waker); for _ in 0..10_000 { if let Poll::Ready(output) = future.as_mut().poll(&mut task_context) { return output; } executor.run_until_parked(); } panic!("future did not complete while running edit_file_tool benchmark"); } /// Builds the streamed partial payloads for a (possibly multi-edit) session, /// mirroring how the agent reveals one edit at a time: earlier edits stay /// complete in the array while the current edit streams its `old_text` then its /// `new_text` in chunks. fn streamed_partial_payloads(edits: &[EditOp]) -> Vec { let path = FILE_PROJECT_PATH; let mut payloads = vec![json!({ "path": path }), json!({ "path": path })]; for index in 0..edits.len() { let completed: Vec = edits[..index] .iter() .map(|edit| json!({ "old_text": edit.old_text, "new_text": edit.new_text })) .collect(); let edit = &edits[index]; for old_end in chunk_ends(&edit.old_text, OLD_TEXT_CHUNK_SIZE) { let mut arr = completed.clone(); arr.push(json!({ "old_text": &edit.old_text[..old_end] })); payloads.push(json!({ "path": path, "edits": arr })); } let mut arr = completed.clone(); arr.push(json!({ "old_text": edit.old_text, "new_text": "" })); payloads.push(json!({ "path": path, "edits": arr })); for new_end in chunk_ends(&edit.new_text, NEW_TEXT_CHUNK_SIZE) { let mut arr = completed.clone(); arr.push(json!({ "old_text": edit.old_text, "new_text": &edit.new_text[..new_end] })); payloads.push(json!({ "path": path, "edits": arr })); } } payloads } fn chunk_ends(text: &str, chunk_size: usize) -> impl Iterator + '_ { let mut end = 0; std::iter::from_fn(move || { if end == text.len() { return None; } end = (end + chunk_size).min(text.len()); while !text.is_char_boundary(end) { end -= 1; } Some(end) }) } fn fixtures() -> Vec { vec![ make_fixture( "tiny_function_rewrite", 2, EditPattern::LocalizedRewrite { start_line: 12, line_count: 6, }, SEED, ), make_fixture( "small_function_rewrite", 5, EditPattern::LocalizedRewrite { start_line: 22, line_count: 12, }, SEED + 1, ), make_fixture( "medium_many_small_changes", 8, EditPattern::ManySmallChanges { every_nth_line: 7 }, SEED + 2, ), make_fixture( "medium_insertions", 8, EditPattern::InsertHelperBlocks { every_nth_line: 9 }, SEED + 3, ), make_large_multi_edit_fixture("large_multi_edit", 80, 16, SEED + 4), ] } enum EditPattern { LocalizedRewrite { start_line: usize, line_count: usize, }, ManySmallChanges { every_nth_line: usize, }, InsertHelperBlocks { every_nth_line: usize, }, } fn make_fixture( name: &'static str, function_count: usize, pattern: EditPattern, seed: u64, ) -> EditFixture { let mut rng = StdRng::seed_from_u64(seed); let old_lines = random_rust_module(&mut rng, function_count); let edit_range = edit_range(&old_lines, &pattern); let old_text = old_lines[edit_range.clone()].join("\n"); let mut new_lines = old_lines.clone(); match pattern { EditPattern::LocalizedRewrite { .. } => { rewrite_local_block(&mut new_lines[edit_range.clone()], &mut rng) } EditPattern::ManySmallChanges { every_nth_line } => { rewrite_many_small_lines(&mut new_lines[edit_range.clone()], every_nth_line, &mut rng) } EditPattern::InsertHelperBlocks { every_nth_line } => { insert_helper_blocks(&mut new_lines, edit_range.clone(), every_nth_line, &mut rng) } } let new_text_end = edit_range.end + new_lines.len().saturating_sub(old_lines.len()); let old_file_text = old_lines.join("\n"); let expected_file_text = new_lines.join("\n"); let new_text = new_lines[edit_range.start..new_text_end].join("\n"); EditFixture { name, old_file_text, expected_file_text, edits: vec![EditOp { old_text, new_text }], } } fn make_large_multi_edit_fixture( name: &'static str, function_count: usize, edit_count: usize, seed: u64, ) -> EditFixture { const HEADER_LINES: usize = 10; const FUNCTION_LINES: usize = 12; const FUNCTION_BODY_LINES: usize = 11; let mut rng = StdRng::seed_from_u64(seed); let old_lines = random_rust_module(&mut rng, function_count); let old_file_text = old_lines.join("\n"); let step = (function_count / edit_count).max(1); let mut picks: Vec = (0..edit_count) .map(|k| (k * step).min(function_count - 1)) .collect(); picks.dedup(); let replacements: Vec<(usize, Vec)> = picks .iter() .map(|&function_index| { ( function_index, large_function_lines(&mut rng, function_index), ) }) .collect(); let edits = replacements .iter() .map(|(function_index, new_function)| { let start = HEADER_LINES + function_index * FUNCTION_LINES; let end = start + FUNCTION_BODY_LINES; EditOp { old_text: old_lines[start..end].join("\n"), new_text: new_function.join("\n"), } }) .collect(); let mut new_lines = old_lines; for (function_index, new_function) in replacements.iter().rev() { let start = HEADER_LINES + function_index * FUNCTION_LINES; let end = start + FUNCTION_BODY_LINES; new_lines.splice(start..end, new_function.iter().cloned()); } let expected_file_text = new_lines.join("\n"); EditFixture { name, old_file_text, expected_file_text, edits, } } fn large_function_lines(rng: &mut StdRng, index: usize) -> Vec { let function_name = identifier(rng, index + 40_000); let argument_name = identifier(rng, index + 41_000); let mut lines = vec![ format!( " pub fn {function_name}(&mut self, {argument_name}: usize) -> Result {{" ), format!(" let mut accumulator = {argument_name};"), ]; let body_lines = rng.random_range(30..42); for body_index in 0..body_lines { let local_name = identifier(rng, index + 50_000 + body_index); let multiplier = rng.random_range(2..19); let offset = rng.random_range(1..256); match body_index % 4 { 0 => lines.push(format!( " let {local_name} = accumulator.saturating_mul({multiplier}).saturating_add({offset});" )), 1 => lines.push(format!( " accumulator = {local_name}.saturating_sub(self.version % {offset}.max(1));" )), 2 => lines.push(format!( " if {local_name} % {multiplier} == 0 {{ accumulator = accumulator.saturating_add({local_name}); }}" )), _ => lines.push(format!( " self.buffers.insert(\"{local_name}\".to_string(), accumulator);" )), } } lines.push(" self.version = self.version.saturating_add(accumulator);".to_string()); lines.push(" Ok(accumulator)".to_string()); lines.push(" }".to_string()); lines } fn edit_range(lines: &[String], pattern: &EditPattern) -> std::ops::Range { let mut range = match pattern { EditPattern::LocalizedRewrite { start_line, line_count, } => *start_line..(*start_line + *line_count).min(lines.len()), EditPattern::ManySmallChanges { .. } | EditPattern::InsertHelperBlocks { .. } => { 10..lines.len().saturating_sub(5) } }; while range.end > range.start && lines[range.end - 1].is_empty() { range.end -= 1; } range } fn random_rust_module(rng: &mut StdRng, function_count: usize) -> Vec { let mut lines = vec![ "use anyhow::{Context as _, Result};".to_string(), "use collections::HashMap;".to_string(), "".to_string(), "#[derive(Clone, Debug)]".to_string(), "pub struct WorkspaceSnapshot {".to_string(), " buffers: HashMap,".to_string(), " version: usize,".to_string(), "}".to_string(), "".to_string(), "impl WorkspaceSnapshot {".to_string(), ]; for function_index in 0..function_count { let function_name = identifier(rng, function_index); let argument_name = identifier(rng, function_index + 1_000); let local_name = identifier(rng, function_index + 2_000); let branch_name = identifier(rng, function_index + 3_000); let multiplier = rng.random_range(2..17); let offset = rng.random_range(1..128); lines.extend([ format!( " pub fn {function_name}(&mut self, {argument_name}: usize) -> Result {{" ), format!(" let mut {local_name} = {argument_name}.saturating_mul({multiplier});"), format!(" if {local_name} % 2 == 0 {{"), format!( " {local_name} = {local_name}.saturating_add(self.version + {offset});" ), " } else {".to_string(), format!(" {local_name} = {local_name}.saturating_sub({offset});"), " }".to_string(), format!(" let {branch_name} = self.buffers.len().saturating_add({local_name});"), format!(" self.version = self.version.saturating_add({branch_name});"), format!(" Ok({branch_name})"), " }".to_string(), "".to_string(), ]); } lines.push("}".to_string()); lines.push("".to_string()); lines.push("pub fn normalize_path(path: &str) -> String {".to_string()); lines.push(" path.replace('\\\\', \"/\")".to_string()); lines.push("}".to_string()); lines } fn rewrite_local_block(lines: &mut [String], rng: &mut StdRng) { for (line_index, line) in lines.iter_mut().enumerate() { let suffix = identifier(rng, line_index + 10_000); if line.contains("saturating_add") { *line = format!( " let {suffix} = self.version.checked_add({line_index}).context(\"version overflow\")?;" ); } else if line.contains("saturating_sub") { *line = format!( " {suffix}.saturating_sub({});", rng.random_range(8..256) ); } else if line.trim().is_empty() { *line = format!(" tracing::trace!(target: \"agent_bench\", value = {line_index});"); } else { *line = format!("{line} // updated {suffix}"); } } } fn rewrite_many_small_lines(lines: &mut [String], every_nth_line: usize, rng: &mut StdRng) { for (line_index, line) in lines.iter_mut().enumerate() { if line_index.is_multiple_of(every_nth_line) || line.trim().is_empty() { continue; } let suffix = identifier(rng, line_index + 20_000); *line = format!("{line} // audited {suffix}"); } } fn insert_helper_blocks( lines: &mut Vec, range: std::ops::Range, every_nth_line: usize, rng: &mut StdRng, ) { let mut line_index = range.start; while line_index < range.end.min(lines.len()) { if line_index.is_multiple_of(every_nth_line) && !lines[line_index].trim().is_empty() { let suffix = identifier(rng, line_index + 30_000); lines.splice( line_index..line_index, [ format!(" let {suffix}_before = self.version;"), format!(" tracing::debug!(version = {suffix}_before);"), ], ); line_index += 2; } line_index += 1; } } fn identifier(rng: &mut StdRng, salt: usize) -> String { const PARTS: &[&str] = &[ "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "theta", "lambda", "sigma", "omega", ]; format!( "{}_{}_{}", PARTS[rng.random_range(0..PARTS.len())], salt, rng.random_range(0..10_000) ) } criterion_group!(benches, edit_file_tool_streaming); criterion_main!(benches);