mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Speed up StreamingDiff::push_new by 30-36% (#57772)
## Motivation
Miniprofs of Zed dropping frames showed foreground thread stalls of
711.56 ms, 641.01 ms, and 221.99 ms from the `edit_file_tool`. The new
benchmark isolated `StreamingDiff::push_new` as one of the expensive
phases so this PR aims to speed it up so we can avoid dropping more
frames in the future.
## Benchmark methodology
I had an agent create a Criterion suite with four deterministic Rust
edit fixtures: tiny localized rewrite, small localized rewrite, many
small changes, and helper block insertions.
Benchmarked with:
```sh
cargo bench -p streaming_diff --bench streaming_diff --profile release-fast -- --warm-up-time 1 --measurement-time 2
```
The benchmark binary was also recorded under `xctrace` CPU Counters to
inspect CPU samples before and after the change.
## Results
`StreamingDiff::push_new` improved across all fixtures:
| Benchmark | Before | After | Improvement |
|---|---:|---:|---:|
| `tiny_function_rewrite` | ~10.81 ms | ~6.91 ms | ~36% faster |
| `small_function_rewrite` | ~51.02 ms | ~35.39 ms | ~31% faster |
| `medium_many_small_changes` | ~130.71 ms | ~83.83 ms | ~36% faster |
| `medium_insertions` | ~120.52 ms | ~79.90 ms | ~34% faster |
The `xctrace` baseline showed samples in `Hash::hash`,
`RandomState::hash_one`, `HashMap::insert`, and
`RawTable::reserve_rehash`. After replacing the map with two `Vec<u32>`
row buffers, the hash table frames disappeared from the top samples.
The speedup comes from removing hash work from the DP inner loop and
replacing scattered hash table probes with contiguous row buffer access
(better cache hits 🏎️). The trace did not include direct cache miss
counts, but this layout is likely more cache friendly because it
replaces hash table access with a continuous line of memory (the
`Vec`s).
Release Notes:
- N/A
This commit is contained in:
parent
9b4737b0d7
commit
6afac23e4a
4 changed files with 340 additions and 9 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -17769,6 +17769,7 @@ checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
|
|||
name = "streaming_diff"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"ordered-float 2.10.1",
|
||||
"rand 0.9.4",
|
||||
"rope",
|
||||
|
|
|
|||
|
|
@ -16,5 +16,10 @@ ordered-float.workspace = true
|
|||
rope.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
criterion.workspace = true
|
||||
rand.workspace = true
|
||||
util = { workspace = true, features = ["test-support"] }
|
||||
|
||||
[[bench]]
|
||||
name = "streaming_diff"
|
||||
harness = false
|
||||
|
|
|
|||
321
crates/streaming_diff/benches/streaming_diff.rs
Normal file
321
crates/streaming_diff/benches/streaming_diff.rs
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
use criterion::{
|
||||
BatchSize, BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main,
|
||||
};
|
||||
use rand::{Rng as _, SeedableRng as _, rngs::StdRng};
|
||||
use streaming_diff::StreamingDiff;
|
||||
|
||||
const SEED: u64 = 0x5EED_5EED;
|
||||
const CHUNK_SIZE: usize = 512;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct EditFixture {
|
||||
name: &'static str,
|
||||
old_text: String,
|
||||
new_text: String,
|
||||
}
|
||||
|
||||
fn streaming_diff_push_new(criterion: &mut Criterion) {
|
||||
let fixtures = fixtures();
|
||||
let mut group = criterion.benchmark_group("streaming_diff_push_new");
|
||||
group.sample_size(10);
|
||||
|
||||
for fixture in fixtures {
|
||||
group.throughput(Throughput::Bytes(fixture.new_text.len() as u64));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(fixture.name, fixture.old_text.len()),
|
||||
&fixture,
|
||||
|bench, fixture| {
|
||||
bench.iter_batched(
|
||||
|| StreamingDiff::new(fixture.old_text.clone()),
|
||||
|mut diff| {
|
||||
let mut operation_count = 0;
|
||||
for chunk in chunk_text(&fixture.new_text, CHUNK_SIZE) {
|
||||
operation_count += black_box(diff.push_new(chunk)).len();
|
||||
}
|
||||
black_box(operation_count);
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn streaming_diff_finish(criterion: &mut Criterion) {
|
||||
let fixtures = fixtures();
|
||||
let mut group = criterion.benchmark_group("streaming_diff_finish");
|
||||
group.sample_size(10);
|
||||
|
||||
for fixture in fixtures {
|
||||
group.throughput(Throughput::Bytes(fixture.new_text.len() as u64));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(fixture.name, fixture.old_text.len()),
|
||||
&fixture,
|
||||
|bench, fixture| {
|
||||
bench.iter_batched(
|
||||
|| {
|
||||
let mut diff = StreamingDiff::new(fixture.old_text.clone());
|
||||
for chunk in chunk_text(&fixture.new_text, CHUNK_SIZE) {
|
||||
black_box(diff.push_new(chunk));
|
||||
}
|
||||
diff
|
||||
},
|
||||
|diff| {
|
||||
black_box(diff.finish());
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn fixtures() -> Vec<EditFixture> {
|
||||
// Keep fixtures modest because `StreamingDiff` is intentionally stressed here and
|
||||
// can become very slow on tens of kilobytes of replacement text. These sizes still
|
||||
// represent realistic `edit_file` old/new text blocks and are large enough to cross
|
||||
// frame-budget-sized CPU work.
|
||||
vec![
|
||||
make_fixture(
|
||||
"tiny_function_rewrite",
|
||||
2,
|
||||
EditPattern::LocalizedRewrite {
|
||||
start_line: 12,
|
||||
line_count: 6,
|
||||
},
|
||||
SEED,
|
||||
),
|
||||
make_fixture(
|
||||
"small_function_rewrite",
|
||||
5,
|
||||
EditPattern::LocalizedRewrite {
|
||||
start_line: 22,
|
||||
line_count: 12,
|
||||
},
|
||||
SEED + 1,
|
||||
),
|
||||
make_fixture(
|
||||
"medium_many_small_changes",
|
||||
8,
|
||||
EditPattern::ManySmallChanges { every_nth_line: 7 },
|
||||
SEED + 2,
|
||||
),
|
||||
make_fixture(
|
||||
"medium_insertions",
|
||||
8,
|
||||
EditPattern::InsertHelperBlocks { every_nth_line: 9 },
|
||||
SEED + 3,
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
enum EditPattern {
|
||||
LocalizedRewrite {
|
||||
start_line: usize,
|
||||
line_count: usize,
|
||||
},
|
||||
ManySmallChanges {
|
||||
every_nth_line: usize,
|
||||
},
|
||||
InsertHelperBlocks {
|
||||
every_nth_line: usize,
|
||||
},
|
||||
}
|
||||
|
||||
fn make_fixture(
|
||||
name: &'static str,
|
||||
function_count: usize,
|
||||
pattern: EditPattern,
|
||||
seed: u64,
|
||||
) -> EditFixture {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let mut lines = random_rust_module(&mut rng, function_count);
|
||||
let old_text = lines.join("\n");
|
||||
|
||||
match pattern {
|
||||
EditPattern::LocalizedRewrite {
|
||||
start_line,
|
||||
line_count,
|
||||
} => rewrite_local_block(&mut lines, start_line, line_count, &mut rng),
|
||||
EditPattern::ManySmallChanges { every_nth_line } => {
|
||||
rewrite_many_small_lines(&mut lines, every_nth_line, &mut rng)
|
||||
}
|
||||
EditPattern::InsertHelperBlocks { every_nth_line } => {
|
||||
insert_helper_blocks(&mut lines, every_nth_line, &mut rng)
|
||||
}
|
||||
}
|
||||
|
||||
EditFixture {
|
||||
name,
|
||||
old_text,
|
||||
new_text: lines.join("\n"),
|
||||
}
|
||||
}
|
||||
|
||||
fn random_rust_module(rng: &mut StdRng, function_count: usize) -> Vec<String> {
|
||||
let mut lines = vec![
|
||||
"use anyhow::{Context as _, Result};".to_string(),
|
||||
"use collections::HashMap;".to_string(),
|
||||
"".to_string(),
|
||||
"#[derive(Clone, Debug)]".to_string(),
|
||||
"pub struct WorkspaceSnapshot {".to_string(),
|
||||
" buffers: HashMap<String, usize>,".to_string(),
|
||||
" version: usize,".to_string(),
|
||||
"}".to_string(),
|
||||
"".to_string(),
|
||||
"impl WorkspaceSnapshot {".to_string(),
|
||||
];
|
||||
|
||||
for function_index in 0..function_count {
|
||||
let function_name = identifier(rng, function_index);
|
||||
let argument_name = identifier(rng, function_index + 1_000);
|
||||
let local_name = identifier(rng, function_index + 2_000);
|
||||
let branch_name = identifier(rng, function_index + 3_000);
|
||||
let multiplier = rng.random_range(2..17);
|
||||
let offset = rng.random_range(1..128);
|
||||
|
||||
lines.extend([
|
||||
format!(
|
||||
" pub fn {function_name}(&mut self, {argument_name}: usize) -> Result<usize> {{"
|
||||
),
|
||||
format!(" let mut {local_name} = {argument_name}.saturating_mul({multiplier});"),
|
||||
format!(" if {local_name} % 2 == 0 {{"),
|
||||
format!(
|
||||
" {local_name} = {local_name}.saturating_add(self.version + {offset});"
|
||||
),
|
||||
" } else {".to_string(),
|
||||
format!(" {local_name} = {local_name}.saturating_sub({offset});"),
|
||||
" }".to_string(),
|
||||
format!(" let {branch_name} = self.buffers.len().saturating_add({local_name});"),
|
||||
format!(" self.version = self.version.saturating_add({branch_name});"),
|
||||
format!(" Ok({branch_name})"),
|
||||
" }".to_string(),
|
||||
"".to_string(),
|
||||
]);
|
||||
}
|
||||
|
||||
lines.push("}".to_string());
|
||||
lines.push("".to_string());
|
||||
lines.push("pub fn normalize_path(path: &str) -> String {".to_string());
|
||||
lines.push(" path.replace('\\\\', \"/\")".to_string());
|
||||
lines.push("}".to_string());
|
||||
lines
|
||||
}
|
||||
|
||||
fn rewrite_local_block(
|
||||
lines: &mut [String],
|
||||
start_line: usize,
|
||||
line_count: usize,
|
||||
rng: &mut StdRng,
|
||||
) {
|
||||
let end_line = (start_line + line_count).min(lines.len());
|
||||
for (relative_index, line) in lines[start_line..end_line].iter_mut().enumerate() {
|
||||
let suffix = identifier(rng, relative_index + 10_000);
|
||||
if line.contains("saturating_add") {
|
||||
*line = format!(
|
||||
" let {suffix} = self.version.checked_add({relative_index}).context(\"version overflow\")?;"
|
||||
);
|
||||
} else if line.contains("saturating_sub") {
|
||||
*line = format!(
|
||||
" {suffix}.saturating_sub({});",
|
||||
rng.random_range(8..256)
|
||||
);
|
||||
} else if line.trim().is_empty() {
|
||||
*line = format!(
|
||||
" tracing::trace!(target: \"agent_bench\", value = {relative_index});"
|
||||
);
|
||||
} else {
|
||||
*line = format!("{line} // updated {suffix}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn rewrite_many_small_lines(lines: &mut [String], every_nth_line: usize, rng: &mut StdRng) {
|
||||
for (line_index, line) in lines.iter_mut().enumerate() {
|
||||
if line_index % every_nth_line != 0 || line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if line.contains("let mut") {
|
||||
*line = line.replace("let mut", "let mut updated");
|
||||
} else if line.contains("Ok(") {
|
||||
*line = line.replace("Ok(", "Ok(black_box_value(");
|
||||
} else if line.ends_with('{') {
|
||||
*line = format!("{line} // scenario {}", identifier(rng, line_index));
|
||||
} else {
|
||||
*line = format!("{line} // touched {}", identifier(rng, line_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_helper_blocks(lines: &mut Vec<String>, every_nth_line: usize, rng: &mut StdRng) {
|
||||
let mut line_index = every_nth_line;
|
||||
while line_index < lines.len() {
|
||||
if lines[line_index].trim() == "}" {
|
||||
let helper_name = identifier(rng, line_index + 20_000);
|
||||
lines.splice(
|
||||
line_index..line_index,
|
||||
[
|
||||
format!(" let {helper_name} = self.buffers.len();"),
|
||||
format!(" tracing::trace!(target: \"agent_bench\", {helper_name});"),
|
||||
],
|
||||
);
|
||||
line_index += 2;
|
||||
}
|
||||
line_index += every_nth_line;
|
||||
}
|
||||
}
|
||||
|
||||
fn identifier(rng: &mut StdRng, salt: usize) -> String {
|
||||
const WORDS: &[&str] = &[
|
||||
"buffer",
|
||||
"workspace",
|
||||
"snapshot",
|
||||
"version",
|
||||
"project",
|
||||
"entry",
|
||||
"path",
|
||||
"cursor",
|
||||
"anchor",
|
||||
"edit",
|
||||
"thread",
|
||||
"message",
|
||||
"context",
|
||||
"store",
|
||||
"diff",
|
||||
"range",
|
||||
"token",
|
||||
"parser",
|
||||
"semantic",
|
||||
"format",
|
||||
"completion",
|
||||
"diagnostic",
|
||||
"terminal",
|
||||
"channel",
|
||||
];
|
||||
|
||||
let first = WORDS[(rng.random_range(0..WORDS.len()) + salt) % WORDS.len()];
|
||||
let second = WORDS[(rng.random_range(0..WORDS.len()) + salt / 3) % WORDS.len()];
|
||||
format!("{first}_{second}_{salt}")
|
||||
}
|
||||
|
||||
fn chunk_text(text: &str, max_chunk_size: usize) -> Vec<&str> {
|
||||
let mut chunks = Vec::new();
|
||||
let mut start = 0;
|
||||
while start < text.len() {
|
||||
let mut end = (start + max_chunk_size).min(text.len());
|
||||
while end < text.len() && !text.is_char_boundary(end) {
|
||||
end += 1;
|
||||
}
|
||||
chunks.push(&text[start..end]);
|
||||
start = end;
|
||||
}
|
||||
chunks
|
||||
}
|
||||
|
||||
criterion_group!(benches, streaming_diff_push_new, streaming_diff_finish);
|
||||
criterion_main!(benches);
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
use ordered_float::OrderedFloat;
|
||||
use rope::{Point, Rope, TextSummary};
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::collections::BTreeSet;
|
||||
use std::{
|
||||
cmp,
|
||||
fmt::{self, Debug},
|
||||
|
|
@ -103,7 +103,8 @@ pub struct StreamingDiff {
|
|||
scores: Matrix,
|
||||
old_text_ix: usize,
|
||||
new_text_ix: usize,
|
||||
equal_runs: HashMap<(usize, usize), u32>,
|
||||
previous_equal_runs: Vec<u32>,
|
||||
current_equal_runs: Vec<u32>,
|
||||
}
|
||||
|
||||
impl StreamingDiff {
|
||||
|
|
@ -114,9 +115,10 @@ impl StreamingDiff {
|
|||
|
||||
pub fn new(old: String) -> Self {
|
||||
let old = old.chars().collect::<Vec<_>>();
|
||||
let old_len = old.len();
|
||||
let mut scores = Matrix::new();
|
||||
scores.resize(old.len() + 1, 1);
|
||||
for i in 0..=old.len() {
|
||||
scores.resize(old_len + 1, 1);
|
||||
for i in 0..=old_len {
|
||||
scores.set(i, 0, i as f64 * Self::DELETION_SCORE);
|
||||
}
|
||||
Self {
|
||||
|
|
@ -125,7 +127,8 @@ impl StreamingDiff {
|
|||
scores,
|
||||
old_text_ix: 0,
|
||||
new_text_ix: 0,
|
||||
equal_runs: Default::default(),
|
||||
previous_equal_runs: vec![0; old_len + 1],
|
||||
current_equal_runs: vec![0; old_len + 1],
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -134,9 +137,9 @@ impl StreamingDiff {
|
|||
self.scores.swap_columns(0, self.scores.cols - 1);
|
||||
self.scores
|
||||
.resize(self.old.len() + 1, self.new.len() - self.new_text_ix + 1);
|
||||
self.equal_runs.retain(|(_i, j), _| *j == self.new_text_ix);
|
||||
|
||||
for j in self.new_text_ix + 1..=self.new.len() {
|
||||
self.current_equal_runs.fill(0);
|
||||
let relative_j = j - self.new_text_ix;
|
||||
|
||||
self.scores
|
||||
|
|
@ -145,9 +148,8 @@ impl StreamingDiff {
|
|||
let insertion_score = self.scores.get(i, relative_j - 1) + Self::INSERTION_SCORE;
|
||||
let deletion_score = self.scores.get(i - 1, relative_j) + Self::DELETION_SCORE;
|
||||
let equality_score = if self.old[i - 1] == self.new[j - 1] {
|
||||
let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
|
||||
equal_run += 1;
|
||||
self.equal_runs.insert((i, j), equal_run);
|
||||
let equal_run = self.previous_equal_runs[i - 1] + 1;
|
||||
self.current_equal_runs[i] = equal_run;
|
||||
|
||||
let exponent = cmp::min(equal_run as i32 / 4, Self::MAX_EQUALITY_EXPONENT);
|
||||
self.scores.get(i - 1, relative_j - 1) + Self::EQUALITY_BASE.powi(exponent)
|
||||
|
|
@ -158,6 +160,8 @@ impl StreamingDiff {
|
|||
let score = insertion_score.max(deletion_score).max(equality_score);
|
||||
self.scores.set(i, relative_j, score);
|
||||
}
|
||||
|
||||
std::mem::swap(&mut self.previous_equal_runs, &mut self.current_equal_runs);
|
||||
}
|
||||
|
||||
let mut max_score = f64::NEG_INFINITY;
|
||||
|
|
|
|||
Loading…
Reference in a new issue