perf: Greatly expand profiler (#38584)

Expands on #38543 (notably allows setting importance categories and
weights on tests, and a lot of internal refactoring) because I couldn't
help myself. Also allows exporting runs to json and comparing across them. See code for docs.

Release Notes:

- N/A
This commit is contained in:
Nia 2025-09-21 13:54:59 +02:00 committed by GitHub
parent 839c216620
commit 11041ef3b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 919 additions and 168 deletions

View file

@ -5,6 +5,7 @@ rustflags = ["-C", "symbol-mangling-version=v0", "--cfg", "tokio_unstable"]
[alias]
xtask = "run --package xtask --"
perf-test = ["test", "--profile", "release-fast", "--lib", "--bins", "--tests", "--config", "target.'cfg(true)'.runner='target/release/perf'", "--config", "target.'cfg(true)'.rustflags=[\"--cfg\", \"perf_enabled\"]"]
perf-compare = ["run", "--release", "-p", "perf", "--", "compare"]
[target.x86_64-unknown-linux-gnu]
linker = "clang"
@ -24,3 +25,4 @@ rustflags = [
[env]
MACOSX_DEPLOYMENT_TARGET = "10.15.7"
CARGO_WORKSPACE_DIR = { value = "", relative = true }

1
.gitignore vendored
View file

@ -20,6 +20,7 @@
.venv
.vscode
.wrangler
.perf-runs
/assets/*licenses.*
/crates/collab/seed.json
/crates/theme/schemas/theme.json

4
Cargo.lock generated
View file

@ -12169,6 +12169,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
name = "perf"
version = "0.1.0"
dependencies = [
"collections",
"serde",
"serde_json",
"workspace-hack",
]
@ -18586,6 +18589,7 @@ dependencies = [
name = "util_macros"
version = "0.1.0"
dependencies = [
"perf",
"quote",
"syn 2.0.101",
"workspace-hack",

View file

@ -356,6 +356,7 @@ outline = { path = "crates/outline" }
outline_panel = { path = "crates/outline_panel" }
panel = { path = "crates/panel" }
paths = { path = "crates/paths" }
perf = { path = "tooling/perf" }
picker = { path = "crates/picker" }
plugin = { path = "crates/plugin" }
plugin_macros = { path = "crates/plugin_macros" }

View file

@ -16,6 +16,7 @@ doctest = false
[dependencies]
quote.workspace = true
syn.workspace = true
perf.workspace = true
workspace-hack.workspace = true
[features]

View file

@ -1,6 +1,7 @@
#![cfg_attr(not(target_os = "windows"), allow(unused))]
#![allow(clippy::test_attr_in_doctest)]
use perf::*;
use proc_macro::TokenStream;
use quote::{ToTokens, quote};
use syn::{ItemFn, LitStr, parse_macro_input, parse_quote};
@ -90,68 +91,81 @@ pub fn line_endings(input: TokenStream) -> TokenStream {
}
/// Inner data for the perf macro.
#[derive(Default)]
struct PerfArgs {
/// How many times to loop a test before rerunning the test binary.
/// If left empty, the test harness will auto-determine this value.
/// How many times to loop a test before rerunning the test binary. If left
/// empty, the test harness will auto-determine this value.
iterations: Option<syn::Expr>,
/// How much this test's results should be weighed when comparing across runs.
/// If unspecified, defaults to `WEIGHT_DEFAULT` (50).
weight: Option<syn::Expr>,
/// How relevant a benchmark is to overall performance. See docs on the enum
/// for details. If unspecified, `Average` is selected.
importance: Importance,
}
impl syn::parse::Parse for PerfArgs {
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
if input.is_empty() {
return Ok(PerfArgs { iterations: None });
#[warn(clippy::all, clippy::pedantic)]
impl PerfArgs {
/// Parses attribute arguments into a `PerfArgs`.
fn parse_into(&mut self, meta: syn::meta::ParseNestedMeta) -> syn::Result<()> {
if meta.path.is_ident("iterations") {
self.iterations = Some(meta.value()?.parse()?);
} else if meta.path.is_ident("weight") {
self.weight = Some(meta.value()?.parse()?);
} else if meta.path.is_ident("critical") {
self.importance = Importance::Critical;
} else if meta.path.is_ident("important") {
self.importance = Importance::Important;
} else if meta.path.is_ident("average") {
// This shouldn't be specified manually, but oh well.
self.importance = Importance::Average;
} else if meta.path.is_ident("iffy") {
self.importance = Importance::Iffy;
} else if meta.path.is_ident("fluff") {
self.importance = Importance::Fluff;
} else {
return Err(syn::Error::new_spanned(meta.path, "unexpected identifier"));
}
let mut iterations = None;
// In principle we only have one possible argument, but leave this as
// a loop in case we expand this in the future.
for meta in
syn::punctuated::Punctuated::<syn::Meta, syn::Token![,]>::parse_terminated(input)?
{
match &meta {
syn::Meta::NameValue(meta_name_value) => {
if meta_name_value.path.is_ident("iterations") {
iterations = Some(meta_name_value.value.clone());
} else {
return Err(syn::Error::new_spanned(
&meta_name_value.path,
"unexpected argument, expected 'iterations'",
));
}
}
_ => {
return Err(syn::Error::new_spanned(
meta,
"expected name-value argument like 'iterations = 1'",
));
}
}
}
Ok(PerfArgs { iterations })
Ok(())
}
}
/// Marks a test as perf-sensitive, to be triaged when checking the performance
/// of a build. This also automatically applies `#[test]`.
///
/// By default, the number of iterations when profiling this test is auto-determined.
/// If this needs to be overwritten, pass the desired iteration count to the macro
/// as a parameter (`#[perf(iterations = n)]`). Note that the actual profiler may still
/// run the test an arbitrary number times; this flag just sets the number of executions
/// before the process is restarted and global state is reset.
///
/// # Usage notes
/// This should probably not be applied to tests that do any significant fs IO, as
/// locks on files may not be released in time when repeating a test many times. This
/// might lead to spurious failures.
/// # Usage
/// Applying this attribute to a test marks it as average importance by default.
/// There are 4 levels of importance (`Critical`, `Important`, `Average`, `Fluff`);
/// see the documentation on `Importance` for details. Add the importance as a
/// parameter to override the default (e.g. `#[perf(important)]`).
///
/// Each test also has a weight factor. This is irrelevant on its own, but is considered
/// when comparing results across different runs. By default, this is set to 50;
/// pass `weight = n` as a parameter to override this. Note that this value is only
/// relevant within its importance category.
///
/// By default, the number of iterations when profiling this test is auto-determined.
/// If this needs to be overwritten, pass the desired iteration count as a parameter
/// (`#[perf(iterations = n)]`). Note that the actual profiler may still run the test
/// an arbitrary number times; this flag just sets the number of executions before the
/// process is restarted and global state is reset.
///
/// This attribute should probably not be applied to tests that do any significant
/// disk IO, as locks on files may not be released in time when repeating a test many
/// times. This might lead to spurious failures.
///
/// # Examples
/// ```rust
/// use util_macros::perf;
///
/// #[perf]
/// fn expensive_computation_test() {
/// fn generic_test() {
/// // Test goes here.
/// }
///
/// #[perf(fluff, weight = 30)]
/// fn cold_path_test() {
/// // Test goes here.
/// }
/// ```
@ -161,72 +175,108 @@ impl syn::parse::Parse for PerfArgs {
/// ```rust,ignore
/// use util_macros::perf;
///
/// #[perf(iterations = 1)]
/// #[perf(iterations = 1, critical)]
/// #[gpui::test]
/// fn oneshot_test(_cx: &mut gpui::TestAppContext) {
/// // Test goes here.
/// }
/// ```
#[proc_macro_attribute]
#[warn(clippy::all, clippy::pedantic)]
pub fn perf(our_attr: TokenStream, input: TokenStream) -> TokenStream {
// If any of the below constants are changed, make sure to also update the perf
// profiler to match!
/// The suffix on tests marked with `#[perf]`.
const SUF_NORMAL: &str = "__ZED_PERF";
/// The suffix on tests marked with `#[perf(iterations = n)]`.
const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER";
/// The env var in which we pass the iteration count to our tests.
const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
let iter_count = parse_macro_input!(our_attr as PerfArgs).iterations;
let mut args = PerfArgs::default();
let parser = syn::meta::parser(|meta| PerfArgs::parse_into(&mut args, meta));
parse_macro_input!(our_attr with parser);
let ItemFn {
mut attrs,
attrs: mut attrs_main,
vis,
mut sig,
sig: mut sig_main,
block,
} = parse_macro_input!(input as ItemFn);
attrs.push(parse_quote!(#[test]));
attrs.push(parse_quote!(#[allow(non_snake_case)]));
attrs_main.push(parse_quote!(#[test]));
attrs_main.push(parse_quote!(#[allow(non_snake_case)]));
let fns = if cfg!(perf_enabled) {
#[allow(clippy::wildcard_imports, reason = "We control the other side")]
use consts::*;
let block: Box<syn::Block> = if cfg!(perf_enabled) {
// Make the ident obvious when calling, for the test parser.
let mut new_ident = sig.ident.to_string();
if iter_count.is_some() {
new_ident.push_str(SUF_FIXED);
} else {
new_ident.push_str(SUF_NORMAL);
}
// Also set up values for the second metadata-returning "test".
let mut new_ident_main = sig_main.ident.to_string();
let mut new_ident_meta = new_ident_main.clone();
new_ident_main.push_str(SUF_NORMAL);
new_ident_meta.push_str(SUF_MDATA);
let new_ident = syn::Ident::new(&new_ident, sig.ident.span());
sig.ident = new_ident;
// If we have a preset iteration count, just use that.
if let Some(iter_count) = iter_count {
parse_quote!({
for _ in 0..#iter_count {
#block
}
})
} else {
// Otherwise, the perf harness will pass us the value in an env var.
let new_ident_main = syn::Ident::new(&new_ident_main, sig_main.ident.span());
sig_main.ident = new_ident_main;
// We don't want any nonsense if the original test had a weird signature.
let new_ident_meta = syn::Ident::new(&new_ident_meta, sig_main.ident.span());
let sig_meta = parse_quote!(fn #new_ident_meta());
let attrs_meta = parse_quote!(#[test] #[allow(non_snake_case)]);
// Make the test loop as the harness instructs it to.
let block_main = {
// The perf harness will pass us the value in an env var. Even if we
// have a preset value, just do this to keep the code paths unified.
parse_quote!({
let iter_count = std::env::var(#ITER_ENV_VAR).unwrap().parse::<usize>().unwrap();
for _ in 0..iter_count {
#block
}
})
}
};
let importance = format!("{}", args.importance);
let block_meta = {
// This function's job is to just print some relevant info to stdout,
// based on the params this attr is passed. It's not an actual test.
// Since we use a custom attr set on our metadata fn, it shouldn't
// cause problems with xfail tests.
let q_iter = if let Some(iter) = args.iterations {
quote! {
println!("{} {} {}", #MDATA_LINE_PREF, #ITER_COUNT_LINE_NAME, #iter);
}
} else {
quote! {}
};
let weight = args
.weight
.unwrap_or_else(|| parse_quote! { #WEIGHT_DEFAULT });
parse_quote!({
#q_iter
println!("{} {} {}", #MDATA_LINE_PREF, #WEIGHT_LINE_NAME, #weight);
println!("{} {} {}", #MDATA_LINE_PREF, #IMPORTANCE_LINE_NAME, #importance);
println!("{} {} {}", #MDATA_LINE_PREF, #VERSION_LINE_NAME, #MDATA_VER);
})
};
vec![
// The real test.
ItemFn {
attrs: attrs_main,
vis: vis.clone(),
sig: sig_main,
block: block_main,
},
// The fake test.
ItemFn {
attrs: attrs_meta,
vis,
sig: sig_meta,
block: block_meta,
},
]
} else {
block
vec![ItemFn {
attrs: attrs_main,
vis,
sig: sig_main,
block,
}]
};
ItemFn {
attrs,
vis,
sig,
block,
}
.into_token_stream()
.into()
fns.into_iter()
.flat_map(|f| TokenStream::from(f.into_token_stream()))
.collect()
}

View file

@ -1,11 +1,30 @@
[package]
name = "perf"
version = "0.1.0"
description = "A tool for measuring Zed test performance, with too many Clippy lints"
publish.workspace = true
edition.workspace = true
[lints]
workspace = true
[lib]
# Some personal lint preferences :3
[lints.rust]
missing_docs = "warn"
[lints.clippy]
needless_continue = "allow" # For a convenience macro
all = "warn"
pedantic = "warn"
style = "warn"
missing_docs_in_private_items = "warn"
as_underscore = "deny"
allow_attributes_without_reason = "deny"
let_underscore_must_use = "forbid"
undocumented_unsafe_blocks = "forbid"
missing_safety_doc = "forbid"
[dependencies]
collections.workspace = true
serde.workspace = true
serde_json.workspace = true
workspace-hack.workspace = true

413
tooling/perf/src/lib.rs Normal file
View file

@ -0,0 +1,413 @@
//! Some constants and datatypes used in the Zed perf profiler. Should only be
//! consumed by the crate providing the matching macros.
use collections::HashMap;
use serde::{Deserialize, Serialize};
use std::time::Duration;
pub mod consts {
//! Preset idenitifiers and constants so that the profiler and proc macro agree
//! on their communication protocol.
/// The suffix on the actual test function.
pub const SUF_NORMAL: &str = "__ZED_PERF_FN";
/// The suffix on an extra function which prints metadata about a test to stdout.
pub const SUF_MDATA: &str = "__ZED_PERF_MDATA";
/// The env var in which we pass the iteration count to our tests.
pub const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
/// The prefix printed on all benchmark test metadata lines, to distinguish it from
/// possible output by the test harness itself.
pub const MDATA_LINE_PREF: &str = "ZED_MDATA_";
/// The version number for the data returned from the test metadata function.
/// Increment on non-backwards-compatible changes.
pub const MDATA_VER: u32 = 0;
/// The default weight, if none is specified.
pub const WEIGHT_DEFAULT: u8 = 50;
/// How long a test must have run to be assumed to be reliable-ish.
pub const NOISE_CUTOFF: std::time::Duration = std::time::Duration::from_millis(250);
/// Identifier for the iteration count of a test metadata.
pub const ITER_COUNT_LINE_NAME: &str = "iter_count";
/// Identifier for the weight of a test metadata.
pub const WEIGHT_LINE_NAME: &str = "weight";
/// Identifier for importance in test metadata.
pub const IMPORTANCE_LINE_NAME: &str = "importance";
/// Identifier for the test metadata version.
pub const VERSION_LINE_NAME: &str = "version";
/// Where to save json run information.
pub const RUNS_DIR: &str = ".perf-runs";
}
/// How relevant a benchmark is.
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub enum Importance {
/// Regressions shouldn't be accepted without good reason.
Critical = 4,
/// Regressions should be paid extra attention.
Important = 3,
/// No extra attention should be paid to regressions, but they might still
/// be indicative of something happening.
#[default]
Average = 2,
/// Unclear if regressions are likely to be meaningful, but still worth keeping
/// an eye on. Lowest level that's checked by default by the profiler.
Iffy = 1,
/// Regressions are likely to be spurious or don't affect core functionality.
/// Only relevant if a lot of them happen, or as supplemental evidence for a
/// higher-importance benchmark regressing. Not checked by default.
Fluff = 0,
}
impl std::fmt::Display for Importance {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Importance::Critical => f.write_str("critical"),
Importance::Important => f.write_str("important"),
Importance::Average => f.write_str("average"),
Importance::Iffy => f.write_str("iffy"),
Importance::Fluff => f.write_str("fluff"),
}
}
}
/// Why or when did this test fail?
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum FailKind {
/// Failed while triaging it to determine the iteration count.
Triage,
/// Failed while profiling it.
Profile,
/// Failed due to an incompatible version for the test.
VersionMismatch,
/// Skipped due to filters applied on the perf run.
Skipped,
}
impl std::fmt::Display for FailKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FailKind::Triage => f.write_str("failed in triage"),
FailKind::Profile => f.write_str("failed while profiling"),
FailKind::VersionMismatch => f.write_str("test version mismatch"),
FailKind::Skipped => f.write_str("skipped"),
}
}
}
/// Information about a given perf test.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TestMdata {
/// A version number for when the test was generated. If this is greater
/// than the version this test handler expects, one of the following will
/// happen in an unspecified manner:
/// - The test is skipped silently.
/// - The handler exits with an error message indicating the version mismatch
/// or inability to parse the metadata.
///
/// INVARIANT: If `version` <= `MDATA_VER`, this tool *must* be able to
/// correctly parse the output of this test.
pub version: u32,
/// How many iterations to pass this test, if this is preset.
pub iterations: Option<usize>,
/// The importance of this particular test. See the docs on `Importance` for
/// details.
pub importance: Importance,
/// The weight of this particular test within its importance category. Used
/// when comparing across runs.
pub weight: u8,
}
/// The actual timings of a test, as measured by Hyperfine.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Timings {
/// Mean runtime for `self.iter_total` runs of this test.
pub mean: Duration,
/// Standard deviation for the above.
pub stddev: Duration,
}
impl Timings {
/// How many iterations does this test seem to do per second?
#[expect(
clippy::cast_precision_loss,
reason = "We only care about a couple sig figs anyways"
)]
#[must_use]
pub fn iters_per_sec(&self, total_iters: usize) -> f64 {
(1000. / self.mean.as_millis() as f64) * total_iters as f64
}
}
/// Aggregate output of all tests run by this handler.
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct Output {
/// A list of test outputs. Format is `(test_name, iter_count, timings)`.
/// The latter being set indicates the test succeeded.
///
/// INVARIANT: If the test succeeded, the second field is `Some(mdata)` and
/// `mdata.iterations` is `Some(_)`.
tests: Vec<(String, Option<TestMdata>, Result<Timings, FailKind>)>,
}
impl Output {
/// Instantiates an empty "output". Useful for merging.
#[must_use]
pub fn blank() -> Self {
Output { tests: Vec::new() }
}
/// Reports a success and adds it to this run's `Output`.
pub fn success(
&mut self,
name: impl AsRef<str>,
mut mdata: TestMdata,
iters: usize,
timings: Timings,
) {
mdata.iterations = Some(iters);
self.tests
.push((name.as_ref().to_string(), Some(mdata), Ok(timings)));
}
/// Reports a failure and adds it to this run's `Output`. If this test was tried
/// with some number of iterations (i.e. this was not a version mismatch or skipped
/// test), it should be reported also.
///
/// Using the `fail!()` macro is usually more convenient.
pub fn failure(
&mut self,
name: impl AsRef<str>,
mut mdata: Option<TestMdata>,
attempted_iters: Option<usize>,
kind: FailKind,
) {
if let Some(ref mut mdata) = mdata {
mdata.iterations = attempted_iters;
}
self.tests
.push((name.as_ref().to_string(), mdata, Err(kind)));
}
/// True if no tests executed this run.
#[must_use]
pub fn is_empty(&self) -> bool {
self.tests.is_empty()
}
/// Sorts the runs in the output in the order that we want it printed.
pub fn sort(&mut self) {
self.tests.sort_unstable_by(|a, b| match (a, b) {
// Tests where we got no metadata go at the end.
((_, Some(_), _), (_, None, _)) => std::cmp::Ordering::Greater,
((_, None, _), (_, Some(_), _)) => std::cmp::Ordering::Less,
// Then sort by importance, then weight.
((_, Some(a_mdata), _), (_, Some(b_mdata), _)) => {
let c = a_mdata.importance.cmp(&b_mdata.importance);
if matches!(c, std::cmp::Ordering::Equal) {
a_mdata.weight.cmp(&b_mdata.weight)
} else {
c
}
}
// Lastly by name.
((a_name, ..), (b_name, ..)) => a_name.cmp(b_name),
});
}
/// Merges the output of two runs, appending a prefix to the results of the new run.
/// To be used in conjunction with `Output::blank()`, or else only some tests will have
/// a prefix set.
pub fn merge(&mut self, other: Self, pref_other: impl AsRef<str>) {
self.tests = std::mem::take(&mut self.tests)
.into_iter()
.chain(other.tests.into_iter().map(|(name, md, tm)| {
let mut new_name = "crates/".to_string();
new_name.push_str(pref_other.as_ref());
new_name.push_str("::");
new_name.push_str(&name);
(new_name, md, tm)
}))
.collect();
}
/// Evaluates the performance of `self` against `baseline`. The latter is taken
/// as the comparison point, i.e. a positive resulting `PerfReport` means that
/// `self` performed better.
///
/// # Panics
/// `self` and `baseline` are assumed to have the iterations field on all
/// `TestMdata`s set to `Some(_)` if the `TestMdata` is present itself.
#[must_use]
pub fn compare_perf(self, baseline: Self) -> PerfReport {
let self_categories = self.collapse();
let mut other_categories = baseline.collapse();
let deltas = self_categories
.into_iter()
.filter_map(|(cat, self_data)| {
// Only compare categories where both meow
// runs have data. /
let mut other_data = other_categories.remove(&cat)?;
let mut max = 0.;
let mut min = 0.;
// Running totals for averaging out tests.
let mut r_total_numerator = 0.;
let mut r_total_denominator = 0;
// Yeah this is O(n^2), but realistically it'll hardly be a bottleneck.
for (name, (s_timings, s_iters, weight)) in self_data {
// Only use the new weights if they conflict.
let Some((o_timings, o_iters, _)) = other_data.remove(&name) else {
continue;
};
let shift =
(s_timings.iters_per_sec(s_iters) / o_timings.iters_per_sec(o_iters)) - 1.;
if shift > max {
max = shift;
}
if shift < min {
min = shift;
}
r_total_numerator += shift * f64::from(weight);
r_total_denominator += u32::from(weight);
}
let mean = r_total_numerator / f64::from(r_total_denominator);
// TODO: also aggregate standard deviation? that's harder to keep
// meaningful, though, since we dk which tests are correlated
Some((cat, PerfDelta { max, mean, min }))
})
.collect();
PerfReport { deltas }
}
/// Collapses the `PerfReport` into a `HashMap` of `Importance` <-> tests
/// each represented as a map of `name, (Timings, iterations, weight)`.
fn collapse(self) -> HashMap<Importance, HashMap<String, (Timings, usize, u8)>> {
let mut categories = HashMap::<Importance, HashMap<String, _>>::default();
for entry in self.tests {
if let Some(mdata) = entry.1
&& let Ok(timings) = entry.2
{
if let Some(handle) = categories.get_mut(&mdata.importance) {
handle.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
} else {
let mut new = HashMap::default();
new.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
categories.insert(mdata.importance, new);
}
}
}
categories
}
}
impl std::fmt::Display for Output {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Don't print the header for an empty run.
if self.tests.is_empty() {
return Ok(());
}
// We want to print important tests at the top, then alphabetical.
let mut sorted = self.clone();
sorted.sort();
// Markdown header for making a nice little table :>
writeln!(
f,
"| Command | Iter/sec | Mean [ms] | SD [ms] | Iterations | Importance (weight) |",
)?;
writeln!(f, "|:---|---:|---:|---:|---:|---:|")?;
for (name, metadata, timings) in &sorted.tests {
match metadata {
Some(metadata) => match timings {
// Happy path.
Ok(timings) => {
// If the test succeeded, then metadata.iterations is Some(_).
writeln!(
f,
"| {} | {:.2} | {} | {:.2} | {} | {} ({}) |",
name,
timings.iters_per_sec(metadata.iterations.unwrap()),
{
// Very small mean runtimes will give inaccurate
// results. Should probably also penalise weight.
let mean = timings.mean.as_secs_f64() * 1000.;
if mean < consts::NOISE_CUTOFF.as_secs_f64() * 1000. / 8. {
format!("{mean:.2} (unreliable)")
} else {
format!("{mean:.2}")
}
},
timings.stddev.as_secs_f64() * 1000.,
metadata.iterations.unwrap(),
metadata.importance,
metadata.weight,
)?;
}
// We have (some) metadata, but the test errored.
Err(err) => writeln!(
f,
"| ({}) {} | N/A | N/A | N/A | {} | {} ({}) |",
err,
name,
metadata
.iterations
.map_or_else(|| "N/A".to_owned(), |i| format!("{i}")),
metadata.importance,
metadata.weight
)?,
},
// No metadata, couldn't even parse the test output.
None => writeln!(
f,
"| ({}) {} | N/A | N/A | N/A | N/A | N/A |",
timings.as_ref().unwrap_err(),
name
)?,
}
}
writeln!(f)?;
Ok(())
}
}
/// The difference in performance between two runs within a given importance
/// category.
struct PerfDelta {
/// The biggest improvement / least bad regression.
max: f64,
/// The weighted average change in test times.
mean: f64,
/// The worst regression / smallest improvement.
min: f64,
}
/// Shim type for reporting all performance deltas across importance categories.
pub struct PerfReport {
/// Inner (group, diff) pairing.
deltas: HashMap<Importance, PerfDelta>,
}
impl std::fmt::Display for PerfReport {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.deltas.is_empty() {
return write!(f, "(no matching tests)");
}
let sorted = self.deltas.iter().collect::<Vec<_>>();
writeln!(f, "| Category | Max | Mean | Min |")?;
// We don't want to print too many newlines at the end, so handle newlines
// a little jankily like this.
write!(f, "|:---|---:|---:|---:|")?;
for (cat, delta) in sorted.into_iter().rev() {
write!(
f,
"\n| {cat} | {:.3} | {:.3} | {:.3} |",
delta.max, delta.mean, delta.min
)?;
}
Ok(())
}
}

View file

@ -1,18 +1,15 @@
#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)]
#![cfg_attr(release, deny(warnings))]
//! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]`
//! attribute to stdout in Markdown. See the documentation of `util_macros::perf`
//! for usage details on the actual attribute.
//!
//! # Setup
//! Make sure `hyperfine` is installed and in the shell path, then run
//! `cargo build --bin perf --workspace --release` to build the profiler.
//! `cargo build -p perf --release` to build the profiler.
//!
//! # Usage
//! Calling this tool rebuilds everything with some cfg flags set for the perf
//! proc macro *and* enables optimisations (`release-fast` profile), so expect it
//! to take a little while.
//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the
//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect
//! it to take a little while.
//!
//! To test an individual crate, run:
//! ```sh
@ -24,37 +21,221 @@
//! cargo perf-test --workspace
//! ```
//!
//! Some command-line parameters are also recognised by this profiler. To filter
//! out all tests below a certain importance (e.g. `important`), run:
//! ```sh
//! cargo perf-test $WHATEVER -- --important
//! ```
//!
//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`.
//! These flags can be combined.
//!
//! ## Comparing runs
//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g.
//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json`
//! being created (unless no tests were run). These results can be automatically
//! compared. To do so, run `cargo perf-compare new-ident old-ident`.
//!
//! NB: All files matching `.perf-runs/ident.*.json` will be considered when
//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs`
//! directory that might match that!
//!
//! # Notes
//! This should probably not be called manually unless you're working on the profiler
//! itself; use the `cargo perf-test` alias (after building this crate) instead.
#[allow(clippy::wildcard_imports, reason = "Our crate")]
use perf::*;
use std::{
fs::OpenOptions,
io::Write,
path::{Path, PathBuf},
process::{Command, Stdio},
time::{Duration, Instant},
};
/// How many iterations to attempt the first time a test is run.
const DEFAULT_ITER_COUNT: usize = 12;
const DEFAULT_ITER_COUNT: usize = 3;
/// Multiplier for the iteration count when a test doesn't pass the noise cutoff.
const ITER_COUNT_MUL: usize = 4;
/// How long a test must have run to be assumed to be reliable-ish.
const NOISE_CUTOFF: Duration = Duration::from_millis(250);
// If any of the below constants are changed, make sure to also update the perf
// proc macro to match!
/// Report a failure into the output and skip an iteration.
macro_rules! fail {
($output:ident, $name:expr, $kind:expr) => {{
$output.failure($name, None, None, $kind);
continue;
}};
($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{
$output.failure($name, Some($mdata), None, $kind);
continue;
}};
($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{
$output.failure($name, Some($mdata), Some($count), $kind);
continue;
}};
}
/// The suffix on tests marked with `#[perf]`.
const SUF_NORMAL: &str = "__ZED_PERF";
/// The suffix on tests marked with `#[perf(iterations = n)]`.
const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER";
/// The env var in which we pass the iteration count to our tests.
const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
/// How does this perf run return its output?
enum OutputKind<'a> {
/// Print markdown to the terminal.
Markdown,
/// Save JSON to a file.
Json(&'a Path),
}
#[allow(clippy::too_many_lines)]
/// Runs a given metadata-returning function from a test handler, parsing its
/// output into a `TestMdata`.
fn parse_mdata(test_bin: &str, mdata_fn: &str) -> Result<TestMdata, FailKind> {
let mut cmd = Command::new(test_bin);
cmd.args([mdata_fn, "--exact", "--nocapture"]);
let out = cmd
.output()
.expect("FATAL: Could not run test binary {test_bin}");
assert!(out.status.success());
let stdout = String::from_utf8_lossy(&out.stdout);
let mut version = None;
let mut iterations = None;
let mut importance = Importance::default();
let mut weight = consts::WEIGHT_DEFAULT;
for line in stdout
.lines()
.filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF))
{
let mut items = line.split_whitespace();
// For v0, we know the ident always comes first, then one field.
match items.next().unwrap() {
consts::VERSION_LINE_NAME => {
let v = items.next().unwrap().parse::<u32>().unwrap();
if v > consts::MDATA_VER {
return Err(FailKind::VersionMismatch);
}
version = Some(v);
}
consts::ITER_COUNT_LINE_NAME => {
iterations = Some(items.next().unwrap().parse::<usize>().unwrap());
}
consts::IMPORTANCE_LINE_NAME => {
importance = match items.next().unwrap() {
"critical" => Importance::Critical,
"important" => Importance::Important,
"average" => Importance::Average,
"iffy" => Importance::Iffy,
"fluff" => Importance::Fluff,
_ => unreachable!(),
};
}
consts::WEIGHT_LINE_NAME => {
weight = items.next().unwrap().parse::<u8>().unwrap();
}
_ => unreachable!(),
}
}
Ok(TestMdata {
version: version.unwrap(),
// Iterations may be determined by us and thus left unspecified.
iterations,
// In principle this should always be set, but just for the sake of
// stability allow the potentially-breaking change of not reporting the
// importance without erroring. Maybe we want to change this.
importance,
// Same with weight.
weight,
})
}
/// Compares the perf results of two profiles as per the arguments passed in.
fn compare_profiles(args: &[String]) {
let ident_new = args.first().expect("FATAL: missing identifier for new run");
let ident_old = args.get(1).expect("FATAL: missing identifier for old run");
// TODO: move this to a constant also tbh
let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap();
let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
// Use the blank outputs initially, so we can merge into these with prefixes.
let mut outputs_new = Output::blank();
let mut outputs_old = Output::blank();
for e in runs_dir.read_dir().unwrap() {
let Ok(entry) = e else {
continue;
};
let Ok(metadata) = entry.metadata() else {
continue;
};
if metadata.is_file() {
let Ok(name) = entry.file_name().into_string() else {
continue;
};
// A little helper to avoid code duplication. Reads the `output` from
// a json file, then merges it into what we have so far.
let read_into = |output: &mut Output| {
let mut elems = name.split('.').skip(1);
let prefix = elems.next().unwrap();
assert_eq!("json", elems.next().unwrap());
assert!(elems.next().is_none());
let handle = OpenOptions::new().read(true).open(entry.path()).unwrap();
let o_other: Output = serde_json::from_reader(handle).unwrap();
output.merge(o_other, prefix);
};
if name.starts_with(ident_old) {
read_into(&mut outputs_old);
} else if name.starts_with(ident_new) {
read_into(&mut outputs_new);
}
}
}
let res = outputs_new.compare_perf(outputs_old);
println!("{res}");
}
#[expect(clippy::too_many_lines, reason = "This will be split up soon!")]
fn main() {
let args = std::env::args().collect::<Vec<_>>();
// We get passed the test we need to run as the 1st argument after our own name.
let test_bin = std::env::args().nth(1).unwrap();
let mut cmd = Command::new(&test_bin);
let test_bin = args
.get(1)
.expect("FATAL: No test binary or command; this shouldn't be manually invoked!");
// We're being asked to compare two results, not run the profiler.
if test_bin == "compare" {
compare_profiles(&args[2..]);
return;
}
// Whether to skip printing some information to stderr.
let mut quiet = false;
// Minimum test importance we care about this run.
let mut thresh = Importance::Iffy;
// Where to print the output of this run.
let mut out_kind = OutputKind::Markdown;
for arg in args.iter().skip(2) {
match arg.as_str() {
"--critical" => thresh = Importance::Critical,
"--important" => thresh = Importance::Important,
"--average" => thresh = Importance::Average,
"--iffy" => thresh = Importance::Iffy,
"--fluff" => thresh = Importance::Fluff,
"--quiet" => quiet = true,
s if s.starts_with("--json") => {
out_kind = OutputKind::Json(Path::new(
s.strip_prefix("--json=")
.expect("FATAL: Invalid json parameter; pass --json=filename"),
));
}
_ => (),
}
}
if !quiet {
eprintln!("Starting perf check");
}
let mut cmd = Command::new(test_bin);
// --format=json is nightly-only :(
cmd.args(["--list", "--format=terse"]);
let out = cmd
@ -64,6 +245,9 @@ fn main() {
out.status.success(),
"FATAL: Cannot do perf check - test binary {test_bin} returned an error"
);
if !quiet {
eprintln!("Test binary ran successfully; starting profile...");
}
// Parse the test harness output to look for tests we care about.
let stdout = String::from_utf8_lossy(&out.stdout);
let mut test_list: Vec<_> = stdout
@ -79,37 +263,62 @@ fn main() {
}
})
// Exclude tests that aren't marked for perf triage based on suffix.
.filter(|t_name| t_name.ends_with(SUF_NORMAL) || t_name.ends_with(SUF_FIXED))
.filter(|t_name| {
t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA)
})
.collect();
// Pulling itertools just for .dedup() would be quite a big dependency that's
// not used elsewhere, so do this on the vec instead.
// not used elsewhere, so do this on a vec instead.
test_list.sort_unstable();
test_list.dedup();
if !test_list.is_empty() {
// Print the markdown header which matches hyperfine's result.
// TODO: Support exporting JSON also.
println!(
"| Command | Mean [ms] | Min [ms] | Max [ms] | Iterations | Iter/sec |\n|:---|---:|---:|---:|---:|---:|"
);
}
let len = test_list.len();
// Tests should come in pairs with their mdata fn!
assert!(
len.is_multiple_of(2),
"Malformed tests in test binary {test_bin}"
);
let mut output = Output::default();
// Spawn and profile an instance of each perf-sensitive test, via hyperfine.
for t_name in test_list {
// Pretty-print the stripped name for the test.
let t_name_normal = t_name.replace(SUF_FIXED, "").replace(SUF_NORMAL, "");
// Each test is a pair of (test, metadata-returning-fn), so grab both. We also
// know the list is sorted.
for (idx, t_pair) in test_list.chunks_exact(2).enumerate() {
if !quiet {
eprint!("\rProfiling test {}/{}", idx + 1, len / 2);
}
// Be resilient against changes to these constants.
let (t_name, t_mdata) = if consts::SUF_NORMAL < consts::SUF_MDATA {
(t_pair[0], t_pair[1])
} else {
(t_pair[1], t_pair[0])
};
// Pretty-printable stripped name for the test.
let t_name_pretty = t_name.replace(consts::SUF_NORMAL, "");
// Get the metadata this test reports for us.
let t_mdata = match parse_mdata(test_bin, t_mdata) {
Ok(mdata) => mdata,
Err(err) => fail!(output, t_name_pretty, err),
};
if t_mdata.importance < thresh {
fail!(output, t_name_pretty, t_mdata, FailKind::Skipped);
}
// Time test execution to see how many iterations we need to do in order
// to account for random noise. This is skipped for tests with fixed
// iteration counts.
let final_iter_count = if t_name.ends_with(SUF_FIXED) {
None
} else {
let mut errored = false;
let final_iter_count = t_mdata.iterations.unwrap_or_else(|| {
let mut iter_count = DEFAULT_ITER_COUNT;
loop {
let mut cmd = Command::new(&test_bin);
let mut cmd = Command::new(test_bin);
cmd.args([t_name, "--exact"]);
cmd.env(ITER_ENV_VAR, format!("{iter_count}"));
cmd.env(consts::ITER_ENV_VAR, format!("{iter_count}"));
// Don't let the child muck up our stdin/out/err.
cmd.stdin(Stdio::null());
cmd.stdout(Stdio::null());
@ -119,28 +328,32 @@ fn main() {
let out = cmd.spawn().unwrap().wait();
let post = Instant::now();
if !out.unwrap().success() {
println!(
"| {t_name_normal} (ERRORED IN TRIAGE) | N/A | N/A | N/A | {iter_count} | N/A |"
);
return;
errored = true;
break iter_count;
}
if post - pre > NOISE_CUTOFF {
break Some(iter_count);
if post - pre > consts::NOISE_CUTOFF {
break iter_count;
} else if let Some(c) = iter_count.checked_mul(ITER_COUNT_MUL) {
iter_count = c;
} else {
// This should almost never happen, but maybe..?
eprintln!(
"WARNING: Running nearly usize::MAX iterations of test {t_name_normal}"
"WARNING: Running nearly usize::MAX iterations of test {t_name_pretty}"
);
break Some(iter_count);
break iter_count;
}
}
};
});
// Don't profile failing tests.
if errored {
fail!(output, t_name_pretty, t_mdata, FailKind::Triage);
}
// Now profile!
let mut perf_cmd = Command::new("hyperfine");
// Warm up the cache and print markdown output to stdout.
// TODO: json
perf_cmd.args([
"--style",
"none",
@ -150,42 +363,89 @@ fn main() {
"-",
&format!("{test_bin} {t_name}"),
]);
if let Some(final_iter_count) = final_iter_count {
perf_cmd.env(ITER_ENV_VAR, format!("{final_iter_count}"));
}
perf_cmd.env(consts::ITER_ENV_VAR, format!("{final_iter_count}"));
let p_out = perf_cmd.output().unwrap();
let fin_iter = match final_iter_count {
Some(i) => &format!("{i}"),
None => "(preset)",
};
if p_out.status.success() {
let output = String::from_utf8_lossy(&p_out.stdout);
// Strip the name of the test binary from the table (and the space after it!)
// + our extraneous test bits + the "Relative" column (which is always at the end and "1.00").
let output = output
.replace(&format!("{test_bin} "), "")
.replace(SUF_FIXED, "")
.replace(SUF_NORMAL, "")
.replace(" 1.00 |", "");
let cmd_output = String::from_utf8_lossy(&p_out.stdout);
// Can't use .last() since we have a trailing newline. Sigh.
let fin = output.lines().nth(3).unwrap();
let results_line = cmd_output.lines().nth(3).unwrap();
// Grab the values out of the pretty-print.
// TODO: Parse json instead.
let mut res_iter = results_line.split_whitespace();
// Durations are given in milliseconds, so account for that.
let mean =
Duration::from_secs_f64(res_iter.nth(4).unwrap().parse::<f64>().unwrap() / 1000.);
let stddev =
Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::<f64>().unwrap() / 1000.);
// Calculate how many iterations this does per second, for easy comparison.
let ms = fin
.split_whitespace()
.nth(3)
.unwrap()
.parse::<f64>()
.unwrap();
let mul_fac = 1000.0 / ms;
let iter_sec = match final_iter_count {
#[allow(clippy::cast_precision_loss)]
Some(c) => &format!("{:.1}", mul_fac * c as f64),
None => "(unknown)",
};
println!("{fin} {fin_iter} | {iter_sec} |");
output.success(
t_name_pretty,
t_mdata,
final_iter_count,
Timings { mean, stddev },
);
} else {
println!("{t_name_normal} (ERRORED) | N/A | N/A | N/A | {fin_iter} | N/A |");
fail!(
output,
t_name_pretty,
t_mdata,
final_iter_count,
FailKind::Profile
);
}
}
if !quiet {
if output.is_empty() {
eprintln!("Nothing to do.");
} else {
// If stdout and stderr are on the same terminal, move us after the
// output from above.
eprintln!();
}
}
// No need making an empty json file on every empty test bin.
if output.is_empty() {
return;
}
match out_kind {
OutputKind::Markdown => print!("{output}"),
OutputKind::Json(user_path) => {
let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap();
let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
std::fs::create_dir_all(&runs_dir).unwrap();
assert!(
!user_path.to_string_lossy().is_empty(),
"FATAL: Empty filename specified!"
);
// Get the test binary's crate's name; a path like
// target/release-fast/deps/gpui-061ff76c9b7af5d7
// would be reduced to just "gpui".
let test_bin_stripped = Path::new(test_bin)
.file_name()
.unwrap()
.to_str()
.unwrap()
.rsplit_once('-')
.unwrap()
.0;
let mut file_path = runs_dir.join(user_path);
file_path
.as_mut_os_string()
.push(format!(".{test_bin_stripped}.json"));
let mut out_file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&file_path)
.unwrap();
out_file
.write_all(&serde_json::to_vec(&output).unwrap())
.unwrap();
if !quiet {
eprintln!("JSON output written to {}", file_path.display());
}
}
}
}