mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
perf: Greatly expand profiler (#38584)
Expands on #38543 (notably allows setting importance categories and weights on tests, and a lot of internal refactoring) because I couldn't help myself. Also allows exporting runs to json and comparing across them. See code for docs. Release Notes: - N/A
This commit is contained in:
parent
839c216620
commit
11041ef3b0
9 changed files with 919 additions and 168 deletions
|
|
@ -5,6 +5,7 @@ rustflags = ["-C", "symbol-mangling-version=v0", "--cfg", "tokio_unstable"]
|
|||
[alias]
|
||||
xtask = "run --package xtask --"
|
||||
perf-test = ["test", "--profile", "release-fast", "--lib", "--bins", "--tests", "--config", "target.'cfg(true)'.runner='target/release/perf'", "--config", "target.'cfg(true)'.rustflags=[\"--cfg\", \"perf_enabled\"]"]
|
||||
perf-compare = ["run", "--release", "-p", "perf", "--", "compare"]
|
||||
|
||||
[target.x86_64-unknown-linux-gnu]
|
||||
linker = "clang"
|
||||
|
|
@ -24,3 +25,4 @@ rustflags = [
|
|||
|
||||
[env]
|
||||
MACOSX_DEPLOYMENT_TARGET = "10.15.7"
|
||||
CARGO_WORKSPACE_DIR = { value = "", relative = true }
|
||||
|
|
|
|||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -20,6 +20,7 @@
|
|||
.venv
|
||||
.vscode
|
||||
.wrangler
|
||||
.perf-runs
|
||||
/assets/*licenses.*
|
||||
/crates/collab/seed.json
|
||||
/crates/theme/schemas/theme.json
|
||||
|
|
|
|||
4
Cargo.lock
generated
4
Cargo.lock
generated
|
|
@ -12169,6 +12169,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||
name = "perf"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"collections",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
|
|
@ -18586,6 +18589,7 @@ dependencies = [
|
|||
name = "util_macros"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"perf",
|
||||
"quote",
|
||||
"syn 2.0.101",
|
||||
"workspace-hack",
|
||||
|
|
|
|||
|
|
@ -356,6 +356,7 @@ outline = { path = "crates/outline" }
|
|||
outline_panel = { path = "crates/outline_panel" }
|
||||
panel = { path = "crates/panel" }
|
||||
paths = { path = "crates/paths" }
|
||||
perf = { path = "tooling/perf" }
|
||||
picker = { path = "crates/picker" }
|
||||
plugin = { path = "crates/plugin" }
|
||||
plugin_macros = { path = "crates/plugin_macros" }
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ doctest = false
|
|||
[dependencies]
|
||||
quote.workspace = true
|
||||
syn.workspace = true
|
||||
perf.workspace = true
|
||||
workspace-hack.workspace = true
|
||||
|
||||
[features]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#![cfg_attr(not(target_os = "windows"), allow(unused))]
|
||||
#![allow(clippy::test_attr_in_doctest)]
|
||||
|
||||
use perf::*;
|
||||
use proc_macro::TokenStream;
|
||||
use quote::{ToTokens, quote};
|
||||
use syn::{ItemFn, LitStr, parse_macro_input, parse_quote};
|
||||
|
|
@ -90,68 +91,81 @@ pub fn line_endings(input: TokenStream) -> TokenStream {
|
|||
}
|
||||
|
||||
/// Inner data for the perf macro.
|
||||
#[derive(Default)]
|
||||
struct PerfArgs {
|
||||
/// How many times to loop a test before rerunning the test binary.
|
||||
/// If left empty, the test harness will auto-determine this value.
|
||||
/// How many times to loop a test before rerunning the test binary. If left
|
||||
/// empty, the test harness will auto-determine this value.
|
||||
iterations: Option<syn::Expr>,
|
||||
/// How much this test's results should be weighed when comparing across runs.
|
||||
/// If unspecified, defaults to `WEIGHT_DEFAULT` (50).
|
||||
weight: Option<syn::Expr>,
|
||||
/// How relevant a benchmark is to overall performance. See docs on the enum
|
||||
/// for details. If unspecified, `Average` is selected.
|
||||
importance: Importance,
|
||||
}
|
||||
|
||||
impl syn::parse::Parse for PerfArgs {
|
||||
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
|
||||
if input.is_empty() {
|
||||
return Ok(PerfArgs { iterations: None });
|
||||
#[warn(clippy::all, clippy::pedantic)]
|
||||
impl PerfArgs {
|
||||
/// Parses attribute arguments into a `PerfArgs`.
|
||||
fn parse_into(&mut self, meta: syn::meta::ParseNestedMeta) -> syn::Result<()> {
|
||||
if meta.path.is_ident("iterations") {
|
||||
self.iterations = Some(meta.value()?.parse()?);
|
||||
} else if meta.path.is_ident("weight") {
|
||||
self.weight = Some(meta.value()?.parse()?);
|
||||
} else if meta.path.is_ident("critical") {
|
||||
self.importance = Importance::Critical;
|
||||
} else if meta.path.is_ident("important") {
|
||||
self.importance = Importance::Important;
|
||||
} else if meta.path.is_ident("average") {
|
||||
// This shouldn't be specified manually, but oh well.
|
||||
self.importance = Importance::Average;
|
||||
} else if meta.path.is_ident("iffy") {
|
||||
self.importance = Importance::Iffy;
|
||||
} else if meta.path.is_ident("fluff") {
|
||||
self.importance = Importance::Fluff;
|
||||
} else {
|
||||
return Err(syn::Error::new_spanned(meta.path, "unexpected identifier"));
|
||||
}
|
||||
|
||||
let mut iterations = None;
|
||||
// In principle we only have one possible argument, but leave this as
|
||||
// a loop in case we expand this in the future.
|
||||
for meta in
|
||||
syn::punctuated::Punctuated::<syn::Meta, syn::Token![,]>::parse_terminated(input)?
|
||||
{
|
||||
match &meta {
|
||||
syn::Meta::NameValue(meta_name_value) => {
|
||||
if meta_name_value.path.is_ident("iterations") {
|
||||
iterations = Some(meta_name_value.value.clone());
|
||||
} else {
|
||||
return Err(syn::Error::new_spanned(
|
||||
&meta_name_value.path,
|
||||
"unexpected argument, expected 'iterations'",
|
||||
));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(syn::Error::new_spanned(
|
||||
meta,
|
||||
"expected name-value argument like 'iterations = 1'",
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(PerfArgs { iterations })
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Marks a test as perf-sensitive, to be triaged when checking the performance
|
||||
/// of a build. This also automatically applies `#[test]`.
|
||||
///
|
||||
/// By default, the number of iterations when profiling this test is auto-determined.
|
||||
/// If this needs to be overwritten, pass the desired iteration count to the macro
|
||||
/// as a parameter (`#[perf(iterations = n)]`). Note that the actual profiler may still
|
||||
/// run the test an arbitrary number times; this flag just sets the number of executions
|
||||
/// before the process is restarted and global state is reset.
|
||||
///
|
||||
/// # Usage notes
|
||||
/// This should probably not be applied to tests that do any significant fs IO, as
|
||||
/// locks on files may not be released in time when repeating a test many times. This
|
||||
/// might lead to spurious failures.
|
||||
/// # Usage
|
||||
/// Applying this attribute to a test marks it as average importance by default.
|
||||
/// There are 4 levels of importance (`Critical`, `Important`, `Average`, `Fluff`);
|
||||
/// see the documentation on `Importance` for details. Add the importance as a
|
||||
/// parameter to override the default (e.g. `#[perf(important)]`).
|
||||
///
|
||||
/// Each test also has a weight factor. This is irrelevant on its own, but is considered
|
||||
/// when comparing results across different runs. By default, this is set to 50;
|
||||
/// pass `weight = n` as a parameter to override this. Note that this value is only
|
||||
/// relevant within its importance category.
|
||||
///
|
||||
/// By default, the number of iterations when profiling this test is auto-determined.
|
||||
/// If this needs to be overwritten, pass the desired iteration count as a parameter
|
||||
/// (`#[perf(iterations = n)]`). Note that the actual profiler may still run the test
|
||||
/// an arbitrary number times; this flag just sets the number of executions before the
|
||||
/// process is restarted and global state is reset.
|
||||
///
|
||||
/// This attribute should probably not be applied to tests that do any significant
|
||||
/// disk IO, as locks on files may not be released in time when repeating a test many
|
||||
/// times. This might lead to spurious failures.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// use util_macros::perf;
|
||||
///
|
||||
/// #[perf]
|
||||
/// fn expensive_computation_test() {
|
||||
/// fn generic_test() {
|
||||
/// // Test goes here.
|
||||
/// }
|
||||
///
|
||||
/// #[perf(fluff, weight = 30)]
|
||||
/// fn cold_path_test() {
|
||||
/// // Test goes here.
|
||||
/// }
|
||||
/// ```
|
||||
|
|
@ -161,72 +175,108 @@ impl syn::parse::Parse for PerfArgs {
|
|||
/// ```rust,ignore
|
||||
/// use util_macros::perf;
|
||||
///
|
||||
/// #[perf(iterations = 1)]
|
||||
/// #[perf(iterations = 1, critical)]
|
||||
/// #[gpui::test]
|
||||
/// fn oneshot_test(_cx: &mut gpui::TestAppContext) {
|
||||
/// // Test goes here.
|
||||
/// }
|
||||
/// ```
|
||||
#[proc_macro_attribute]
|
||||
#[warn(clippy::all, clippy::pedantic)]
|
||||
pub fn perf(our_attr: TokenStream, input: TokenStream) -> TokenStream {
|
||||
// If any of the below constants are changed, make sure to also update the perf
|
||||
// profiler to match!
|
||||
|
||||
/// The suffix on tests marked with `#[perf]`.
|
||||
const SUF_NORMAL: &str = "__ZED_PERF";
|
||||
/// The suffix on tests marked with `#[perf(iterations = n)]`.
|
||||
const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER";
|
||||
/// The env var in which we pass the iteration count to our tests.
|
||||
const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
|
||||
|
||||
let iter_count = parse_macro_input!(our_attr as PerfArgs).iterations;
|
||||
let mut args = PerfArgs::default();
|
||||
let parser = syn::meta::parser(|meta| PerfArgs::parse_into(&mut args, meta));
|
||||
parse_macro_input!(our_attr with parser);
|
||||
|
||||
let ItemFn {
|
||||
mut attrs,
|
||||
attrs: mut attrs_main,
|
||||
vis,
|
||||
mut sig,
|
||||
sig: mut sig_main,
|
||||
block,
|
||||
} = parse_macro_input!(input as ItemFn);
|
||||
attrs.push(parse_quote!(#[test]));
|
||||
attrs.push(parse_quote!(#[allow(non_snake_case)]));
|
||||
attrs_main.push(parse_quote!(#[test]));
|
||||
attrs_main.push(parse_quote!(#[allow(non_snake_case)]));
|
||||
|
||||
let fns = if cfg!(perf_enabled) {
|
||||
#[allow(clippy::wildcard_imports, reason = "We control the other side")]
|
||||
use consts::*;
|
||||
|
||||
let block: Box<syn::Block> = if cfg!(perf_enabled) {
|
||||
// Make the ident obvious when calling, for the test parser.
|
||||
let mut new_ident = sig.ident.to_string();
|
||||
if iter_count.is_some() {
|
||||
new_ident.push_str(SUF_FIXED);
|
||||
} else {
|
||||
new_ident.push_str(SUF_NORMAL);
|
||||
}
|
||||
// Also set up values for the second metadata-returning "test".
|
||||
let mut new_ident_main = sig_main.ident.to_string();
|
||||
let mut new_ident_meta = new_ident_main.clone();
|
||||
new_ident_main.push_str(SUF_NORMAL);
|
||||
new_ident_meta.push_str(SUF_MDATA);
|
||||
|
||||
let new_ident = syn::Ident::new(&new_ident, sig.ident.span());
|
||||
sig.ident = new_ident;
|
||||
// If we have a preset iteration count, just use that.
|
||||
if let Some(iter_count) = iter_count {
|
||||
parse_quote!({
|
||||
for _ in 0..#iter_count {
|
||||
#block
|
||||
}
|
||||
})
|
||||
} else {
|
||||
// Otherwise, the perf harness will pass us the value in an env var.
|
||||
let new_ident_main = syn::Ident::new(&new_ident_main, sig_main.ident.span());
|
||||
sig_main.ident = new_ident_main;
|
||||
|
||||
// We don't want any nonsense if the original test had a weird signature.
|
||||
let new_ident_meta = syn::Ident::new(&new_ident_meta, sig_main.ident.span());
|
||||
let sig_meta = parse_quote!(fn #new_ident_meta());
|
||||
let attrs_meta = parse_quote!(#[test] #[allow(non_snake_case)]);
|
||||
|
||||
// Make the test loop as the harness instructs it to.
|
||||
let block_main = {
|
||||
// The perf harness will pass us the value in an env var. Even if we
|
||||
// have a preset value, just do this to keep the code paths unified.
|
||||
parse_quote!({
|
||||
let iter_count = std::env::var(#ITER_ENV_VAR).unwrap().parse::<usize>().unwrap();
|
||||
for _ in 0..iter_count {
|
||||
#block
|
||||
}
|
||||
})
|
||||
}
|
||||
};
|
||||
let importance = format!("{}", args.importance);
|
||||
let block_meta = {
|
||||
// This function's job is to just print some relevant info to stdout,
|
||||
// based on the params this attr is passed. It's not an actual test.
|
||||
// Since we use a custom attr set on our metadata fn, it shouldn't
|
||||
// cause problems with xfail tests.
|
||||
let q_iter = if let Some(iter) = args.iterations {
|
||||
quote! {
|
||||
println!("{} {} {}", #MDATA_LINE_PREF, #ITER_COUNT_LINE_NAME, #iter);
|
||||
}
|
||||
} else {
|
||||
quote! {}
|
||||
};
|
||||
let weight = args
|
||||
.weight
|
||||
.unwrap_or_else(|| parse_quote! { #WEIGHT_DEFAULT });
|
||||
parse_quote!({
|
||||
#q_iter
|
||||
println!("{} {} {}", #MDATA_LINE_PREF, #WEIGHT_LINE_NAME, #weight);
|
||||
println!("{} {} {}", #MDATA_LINE_PREF, #IMPORTANCE_LINE_NAME, #importance);
|
||||
println!("{} {} {}", #MDATA_LINE_PREF, #VERSION_LINE_NAME, #MDATA_VER);
|
||||
})
|
||||
};
|
||||
|
||||
vec![
|
||||
// The real test.
|
||||
ItemFn {
|
||||
attrs: attrs_main,
|
||||
vis: vis.clone(),
|
||||
sig: sig_main,
|
||||
block: block_main,
|
||||
},
|
||||
// The fake test.
|
||||
ItemFn {
|
||||
attrs: attrs_meta,
|
||||
vis,
|
||||
sig: sig_meta,
|
||||
block: block_meta,
|
||||
},
|
||||
]
|
||||
} else {
|
||||
block
|
||||
vec![ItemFn {
|
||||
attrs: attrs_main,
|
||||
vis,
|
||||
sig: sig_main,
|
||||
block,
|
||||
}]
|
||||
};
|
||||
|
||||
ItemFn {
|
||||
attrs,
|
||||
vis,
|
||||
sig,
|
||||
block,
|
||||
}
|
||||
.into_token_stream()
|
||||
.into()
|
||||
fns.into_iter()
|
||||
.flat_map(|f| TokenStream::from(f.into_token_stream()))
|
||||
.collect()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,30 @@
|
|||
[package]
|
||||
name = "perf"
|
||||
version = "0.1.0"
|
||||
description = "A tool for measuring Zed test performance, with too many Clippy lints"
|
||||
publish.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
[lib]
|
||||
|
||||
# Some personal lint preferences :3
|
||||
[lints.rust]
|
||||
missing_docs = "warn"
|
||||
|
||||
[lints.clippy]
|
||||
needless_continue = "allow" # For a convenience macro
|
||||
all = "warn"
|
||||
pedantic = "warn"
|
||||
style = "warn"
|
||||
missing_docs_in_private_items = "warn"
|
||||
as_underscore = "deny"
|
||||
allow_attributes_without_reason = "deny"
|
||||
let_underscore_must_use = "forbid"
|
||||
undocumented_unsafe_blocks = "forbid"
|
||||
missing_safety_doc = "forbid"
|
||||
|
||||
[dependencies]
|
||||
collections.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
workspace-hack.workspace = true
|
||||
|
|
|
|||
413
tooling/perf/src/lib.rs
Normal file
413
tooling/perf/src/lib.rs
Normal file
|
|
@ -0,0 +1,413 @@
|
|||
//! Some constants and datatypes used in the Zed perf profiler. Should only be
|
||||
//! consumed by the crate providing the matching macros.
|
||||
|
||||
use collections::HashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
pub mod consts {
|
||||
//! Preset idenitifiers and constants so that the profiler and proc macro agree
|
||||
//! on their communication protocol.
|
||||
|
||||
/// The suffix on the actual test function.
|
||||
pub const SUF_NORMAL: &str = "__ZED_PERF_FN";
|
||||
/// The suffix on an extra function which prints metadata about a test to stdout.
|
||||
pub const SUF_MDATA: &str = "__ZED_PERF_MDATA";
|
||||
/// The env var in which we pass the iteration count to our tests.
|
||||
pub const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
|
||||
/// The prefix printed on all benchmark test metadata lines, to distinguish it from
|
||||
/// possible output by the test harness itself.
|
||||
pub const MDATA_LINE_PREF: &str = "ZED_MDATA_";
|
||||
/// The version number for the data returned from the test metadata function.
|
||||
/// Increment on non-backwards-compatible changes.
|
||||
pub const MDATA_VER: u32 = 0;
|
||||
/// The default weight, if none is specified.
|
||||
pub const WEIGHT_DEFAULT: u8 = 50;
|
||||
/// How long a test must have run to be assumed to be reliable-ish.
|
||||
pub const NOISE_CUTOFF: std::time::Duration = std::time::Duration::from_millis(250);
|
||||
|
||||
/// Identifier for the iteration count of a test metadata.
|
||||
pub const ITER_COUNT_LINE_NAME: &str = "iter_count";
|
||||
/// Identifier for the weight of a test metadata.
|
||||
pub const WEIGHT_LINE_NAME: &str = "weight";
|
||||
/// Identifier for importance in test metadata.
|
||||
pub const IMPORTANCE_LINE_NAME: &str = "importance";
|
||||
/// Identifier for the test metadata version.
|
||||
pub const VERSION_LINE_NAME: &str = "version";
|
||||
|
||||
/// Where to save json run information.
|
||||
pub const RUNS_DIR: &str = ".perf-runs";
|
||||
}
|
||||
|
||||
/// How relevant a benchmark is.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum Importance {
|
||||
/// Regressions shouldn't be accepted without good reason.
|
||||
Critical = 4,
|
||||
/// Regressions should be paid extra attention.
|
||||
Important = 3,
|
||||
/// No extra attention should be paid to regressions, but they might still
|
||||
/// be indicative of something happening.
|
||||
#[default]
|
||||
Average = 2,
|
||||
/// Unclear if regressions are likely to be meaningful, but still worth keeping
|
||||
/// an eye on. Lowest level that's checked by default by the profiler.
|
||||
Iffy = 1,
|
||||
/// Regressions are likely to be spurious or don't affect core functionality.
|
||||
/// Only relevant if a lot of them happen, or as supplemental evidence for a
|
||||
/// higher-importance benchmark regressing. Not checked by default.
|
||||
Fluff = 0,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Importance {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Importance::Critical => f.write_str("critical"),
|
||||
Importance::Important => f.write_str("important"),
|
||||
Importance::Average => f.write_str("average"),
|
||||
Importance::Iffy => f.write_str("iffy"),
|
||||
Importance::Fluff => f.write_str("fluff"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Why or when did this test fail?
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum FailKind {
|
||||
/// Failed while triaging it to determine the iteration count.
|
||||
Triage,
|
||||
/// Failed while profiling it.
|
||||
Profile,
|
||||
/// Failed due to an incompatible version for the test.
|
||||
VersionMismatch,
|
||||
/// Skipped due to filters applied on the perf run.
|
||||
Skipped,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FailKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FailKind::Triage => f.write_str("failed in triage"),
|
||||
FailKind::Profile => f.write_str("failed while profiling"),
|
||||
FailKind::VersionMismatch => f.write_str("test version mismatch"),
|
||||
FailKind::Skipped => f.write_str("skipped"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a given perf test.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TestMdata {
|
||||
/// A version number for when the test was generated. If this is greater
|
||||
/// than the version this test handler expects, one of the following will
|
||||
/// happen in an unspecified manner:
|
||||
/// - The test is skipped silently.
|
||||
/// - The handler exits with an error message indicating the version mismatch
|
||||
/// or inability to parse the metadata.
|
||||
///
|
||||
/// INVARIANT: If `version` <= `MDATA_VER`, this tool *must* be able to
|
||||
/// correctly parse the output of this test.
|
||||
pub version: u32,
|
||||
/// How many iterations to pass this test, if this is preset.
|
||||
pub iterations: Option<usize>,
|
||||
/// The importance of this particular test. See the docs on `Importance` for
|
||||
/// details.
|
||||
pub importance: Importance,
|
||||
/// The weight of this particular test within its importance category. Used
|
||||
/// when comparing across runs.
|
||||
pub weight: u8,
|
||||
}
|
||||
|
||||
/// The actual timings of a test, as measured by Hyperfine.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Timings {
|
||||
/// Mean runtime for `self.iter_total` runs of this test.
|
||||
pub mean: Duration,
|
||||
/// Standard deviation for the above.
|
||||
pub stddev: Duration,
|
||||
}
|
||||
|
||||
impl Timings {
|
||||
/// How many iterations does this test seem to do per second?
|
||||
#[expect(
|
||||
clippy::cast_precision_loss,
|
||||
reason = "We only care about a couple sig figs anyways"
|
||||
)]
|
||||
#[must_use]
|
||||
pub fn iters_per_sec(&self, total_iters: usize) -> f64 {
|
||||
(1000. / self.mean.as_millis() as f64) * total_iters as f64
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregate output of all tests run by this handler.
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||
pub struct Output {
|
||||
/// A list of test outputs. Format is `(test_name, iter_count, timings)`.
|
||||
/// The latter being set indicates the test succeeded.
|
||||
///
|
||||
/// INVARIANT: If the test succeeded, the second field is `Some(mdata)` and
|
||||
/// `mdata.iterations` is `Some(_)`.
|
||||
tests: Vec<(String, Option<TestMdata>, Result<Timings, FailKind>)>,
|
||||
}
|
||||
|
||||
impl Output {
|
||||
/// Instantiates an empty "output". Useful for merging.
|
||||
#[must_use]
|
||||
pub fn blank() -> Self {
|
||||
Output { tests: Vec::new() }
|
||||
}
|
||||
|
||||
/// Reports a success and adds it to this run's `Output`.
|
||||
pub fn success(
|
||||
&mut self,
|
||||
name: impl AsRef<str>,
|
||||
mut mdata: TestMdata,
|
||||
iters: usize,
|
||||
timings: Timings,
|
||||
) {
|
||||
mdata.iterations = Some(iters);
|
||||
self.tests
|
||||
.push((name.as_ref().to_string(), Some(mdata), Ok(timings)));
|
||||
}
|
||||
|
||||
/// Reports a failure and adds it to this run's `Output`. If this test was tried
|
||||
/// with some number of iterations (i.e. this was not a version mismatch or skipped
|
||||
/// test), it should be reported also.
|
||||
///
|
||||
/// Using the `fail!()` macro is usually more convenient.
|
||||
pub fn failure(
|
||||
&mut self,
|
||||
name: impl AsRef<str>,
|
||||
mut mdata: Option<TestMdata>,
|
||||
attempted_iters: Option<usize>,
|
||||
kind: FailKind,
|
||||
) {
|
||||
if let Some(ref mut mdata) = mdata {
|
||||
mdata.iterations = attempted_iters;
|
||||
}
|
||||
self.tests
|
||||
.push((name.as_ref().to_string(), mdata, Err(kind)));
|
||||
}
|
||||
|
||||
/// True if no tests executed this run.
|
||||
#[must_use]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.tests.is_empty()
|
||||
}
|
||||
|
||||
/// Sorts the runs in the output in the order that we want it printed.
|
||||
pub fn sort(&mut self) {
|
||||
self.tests.sort_unstable_by(|a, b| match (a, b) {
|
||||
// Tests where we got no metadata go at the end.
|
||||
((_, Some(_), _), (_, None, _)) => std::cmp::Ordering::Greater,
|
||||
((_, None, _), (_, Some(_), _)) => std::cmp::Ordering::Less,
|
||||
// Then sort by importance, then weight.
|
||||
((_, Some(a_mdata), _), (_, Some(b_mdata), _)) => {
|
||||
let c = a_mdata.importance.cmp(&b_mdata.importance);
|
||||
if matches!(c, std::cmp::Ordering::Equal) {
|
||||
a_mdata.weight.cmp(&b_mdata.weight)
|
||||
} else {
|
||||
c
|
||||
}
|
||||
}
|
||||
// Lastly by name.
|
||||
((a_name, ..), (b_name, ..)) => a_name.cmp(b_name),
|
||||
});
|
||||
}
|
||||
|
||||
/// Merges the output of two runs, appending a prefix to the results of the new run.
|
||||
/// To be used in conjunction with `Output::blank()`, or else only some tests will have
|
||||
/// a prefix set.
|
||||
pub fn merge(&mut self, other: Self, pref_other: impl AsRef<str>) {
|
||||
self.tests = std::mem::take(&mut self.tests)
|
||||
.into_iter()
|
||||
.chain(other.tests.into_iter().map(|(name, md, tm)| {
|
||||
let mut new_name = "crates/".to_string();
|
||||
new_name.push_str(pref_other.as_ref());
|
||||
new_name.push_str("::");
|
||||
new_name.push_str(&name);
|
||||
(new_name, md, tm)
|
||||
}))
|
||||
.collect();
|
||||
}
|
||||
|
||||
/// Evaluates the performance of `self` against `baseline`. The latter is taken
|
||||
/// as the comparison point, i.e. a positive resulting `PerfReport` means that
|
||||
/// `self` performed better.
|
||||
///
|
||||
/// # Panics
|
||||
/// `self` and `baseline` are assumed to have the iterations field on all
|
||||
/// `TestMdata`s set to `Some(_)` if the `TestMdata` is present itself.
|
||||
#[must_use]
|
||||
pub fn compare_perf(self, baseline: Self) -> PerfReport {
|
||||
let self_categories = self.collapse();
|
||||
let mut other_categories = baseline.collapse();
|
||||
|
||||
let deltas = self_categories
|
||||
.into_iter()
|
||||
.filter_map(|(cat, self_data)| {
|
||||
// Only compare categories where both meow
|
||||
// runs have data. /
|
||||
let mut other_data = other_categories.remove(&cat)?;
|
||||
let mut max = 0.;
|
||||
let mut min = 0.;
|
||||
|
||||
// Running totals for averaging out tests.
|
||||
let mut r_total_numerator = 0.;
|
||||
let mut r_total_denominator = 0;
|
||||
// Yeah this is O(n^2), but realistically it'll hardly be a bottleneck.
|
||||
for (name, (s_timings, s_iters, weight)) in self_data {
|
||||
// Only use the new weights if they conflict.
|
||||
let Some((o_timings, o_iters, _)) = other_data.remove(&name) else {
|
||||
continue;
|
||||
};
|
||||
let shift =
|
||||
(s_timings.iters_per_sec(s_iters) / o_timings.iters_per_sec(o_iters)) - 1.;
|
||||
if shift > max {
|
||||
max = shift;
|
||||
}
|
||||
if shift < min {
|
||||
min = shift;
|
||||
}
|
||||
r_total_numerator += shift * f64::from(weight);
|
||||
r_total_denominator += u32::from(weight);
|
||||
}
|
||||
let mean = r_total_numerator / f64::from(r_total_denominator);
|
||||
// TODO: also aggregate standard deviation? that's harder to keep
|
||||
// meaningful, though, since we dk which tests are correlated
|
||||
Some((cat, PerfDelta { max, mean, min }))
|
||||
})
|
||||
.collect();
|
||||
|
||||
PerfReport { deltas }
|
||||
}
|
||||
|
||||
/// Collapses the `PerfReport` into a `HashMap` of `Importance` <-> tests
|
||||
/// each represented as a map of `name, (Timings, iterations, weight)`.
|
||||
fn collapse(self) -> HashMap<Importance, HashMap<String, (Timings, usize, u8)>> {
|
||||
let mut categories = HashMap::<Importance, HashMap<String, _>>::default();
|
||||
for entry in self.tests {
|
||||
if let Some(mdata) = entry.1
|
||||
&& let Ok(timings) = entry.2
|
||||
{
|
||||
if let Some(handle) = categories.get_mut(&mdata.importance) {
|
||||
handle.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
|
||||
} else {
|
||||
let mut new = HashMap::default();
|
||||
new.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
|
||||
categories.insert(mdata.importance, new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
categories
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Output {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// Don't print the header for an empty run.
|
||||
if self.tests.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// We want to print important tests at the top, then alphabetical.
|
||||
let mut sorted = self.clone();
|
||||
sorted.sort();
|
||||
// Markdown header for making a nice little table :>
|
||||
writeln!(
|
||||
f,
|
||||
"| Command | Iter/sec | Mean [ms] | SD [ms] | Iterations | Importance (weight) |",
|
||||
)?;
|
||||
writeln!(f, "|:---|---:|---:|---:|---:|---:|")?;
|
||||
for (name, metadata, timings) in &sorted.tests {
|
||||
match metadata {
|
||||
Some(metadata) => match timings {
|
||||
// Happy path.
|
||||
Ok(timings) => {
|
||||
// If the test succeeded, then metadata.iterations is Some(_).
|
||||
writeln!(
|
||||
f,
|
||||
"| {} | {:.2} | {} | {:.2} | {} | {} ({}) |",
|
||||
name,
|
||||
timings.iters_per_sec(metadata.iterations.unwrap()),
|
||||
{
|
||||
// Very small mean runtimes will give inaccurate
|
||||
// results. Should probably also penalise weight.
|
||||
let mean = timings.mean.as_secs_f64() * 1000.;
|
||||
if mean < consts::NOISE_CUTOFF.as_secs_f64() * 1000. / 8. {
|
||||
format!("{mean:.2} (unreliable)")
|
||||
} else {
|
||||
format!("{mean:.2}")
|
||||
}
|
||||
},
|
||||
timings.stddev.as_secs_f64() * 1000.,
|
||||
metadata.iterations.unwrap(),
|
||||
metadata.importance,
|
||||
metadata.weight,
|
||||
)?;
|
||||
}
|
||||
// We have (some) metadata, but the test errored.
|
||||
Err(err) => writeln!(
|
||||
f,
|
||||
"| ({}) {} | N/A | N/A | N/A | {} | {} ({}) |",
|
||||
err,
|
||||
name,
|
||||
metadata
|
||||
.iterations
|
||||
.map_or_else(|| "N/A".to_owned(), |i| format!("{i}")),
|
||||
metadata.importance,
|
||||
metadata.weight
|
||||
)?,
|
||||
},
|
||||
// No metadata, couldn't even parse the test output.
|
||||
None => writeln!(
|
||||
f,
|
||||
"| ({}) {} | N/A | N/A | N/A | N/A | N/A |",
|
||||
timings.as_ref().unwrap_err(),
|
||||
name
|
||||
)?,
|
||||
}
|
||||
}
|
||||
writeln!(f)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The difference in performance between two runs within a given importance
|
||||
/// category.
|
||||
struct PerfDelta {
|
||||
/// The biggest improvement / least bad regression.
|
||||
max: f64,
|
||||
/// The weighted average change in test times.
|
||||
mean: f64,
|
||||
/// The worst regression / smallest improvement.
|
||||
min: f64,
|
||||
}
|
||||
|
||||
/// Shim type for reporting all performance deltas across importance categories.
|
||||
pub struct PerfReport {
|
||||
/// Inner (group, diff) pairing.
|
||||
deltas: HashMap<Importance, PerfDelta>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PerfReport {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.deltas.is_empty() {
|
||||
return write!(f, "(no matching tests)");
|
||||
}
|
||||
let sorted = self.deltas.iter().collect::<Vec<_>>();
|
||||
writeln!(f, "| Category | Max | Mean | Min |")?;
|
||||
// We don't want to print too many newlines at the end, so handle newlines
|
||||
// a little jankily like this.
|
||||
write!(f, "|:---|---:|---:|---:|")?;
|
||||
for (cat, delta) in sorted.into_iter().rev() {
|
||||
write!(
|
||||
f,
|
||||
"\n| {cat} | {:.3} | {:.3} | {:.3} |",
|
||||
delta.max, delta.mean, delta.min
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,18 +1,15 @@
|
|||
#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)]
|
||||
#![cfg_attr(release, deny(warnings))]
|
||||
|
||||
//! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]`
|
||||
//! attribute to stdout in Markdown. See the documentation of `util_macros::perf`
|
||||
//! for usage details on the actual attribute.
|
||||
//!
|
||||
//! # Setup
|
||||
//! Make sure `hyperfine` is installed and in the shell path, then run
|
||||
//! `cargo build --bin perf --workspace --release` to build the profiler.
|
||||
//! `cargo build -p perf --release` to build the profiler.
|
||||
//!
|
||||
//! # Usage
|
||||
//! Calling this tool rebuilds everything with some cfg flags set for the perf
|
||||
//! proc macro *and* enables optimisations (`release-fast` profile), so expect it
|
||||
//! to take a little while.
|
||||
//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the
|
||||
//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect
|
||||
//! it to take a little while.
|
||||
//!
|
||||
//! To test an individual crate, run:
|
||||
//! ```sh
|
||||
|
|
@ -24,37 +21,221 @@
|
|||
//! cargo perf-test --workspace
|
||||
//! ```
|
||||
//!
|
||||
//! Some command-line parameters are also recognised by this profiler. To filter
|
||||
//! out all tests below a certain importance (e.g. `important`), run:
|
||||
//! ```sh
|
||||
//! cargo perf-test $WHATEVER -- --important
|
||||
//! ```
|
||||
//!
|
||||
//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`.
|
||||
//! These flags can be combined.
|
||||
//!
|
||||
//! ## Comparing runs
|
||||
//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g.
|
||||
//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json`
|
||||
//! being created (unless no tests were run). These results can be automatically
|
||||
//! compared. To do so, run `cargo perf-compare new-ident old-ident`.
|
||||
//!
|
||||
//! NB: All files matching `.perf-runs/ident.*.json` will be considered when
|
||||
//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs`
|
||||
//! directory that might match that!
|
||||
//!
|
||||
//! # Notes
|
||||
//! This should probably not be called manually unless you're working on the profiler
|
||||
//! itself; use the `cargo perf-test` alias (after building this crate) instead.
|
||||
|
||||
#[allow(clippy::wildcard_imports, reason = "Our crate")]
|
||||
use perf::*;
|
||||
|
||||
use std::{
|
||||
fs::OpenOptions,
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
process::{Command, Stdio},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
/// How many iterations to attempt the first time a test is run.
|
||||
const DEFAULT_ITER_COUNT: usize = 12;
|
||||
const DEFAULT_ITER_COUNT: usize = 3;
|
||||
/// Multiplier for the iteration count when a test doesn't pass the noise cutoff.
|
||||
const ITER_COUNT_MUL: usize = 4;
|
||||
/// How long a test must have run to be assumed to be reliable-ish.
|
||||
const NOISE_CUTOFF: Duration = Duration::from_millis(250);
|
||||
|
||||
// If any of the below constants are changed, make sure to also update the perf
|
||||
// proc macro to match!
|
||||
/// Report a failure into the output and skip an iteration.
|
||||
macro_rules! fail {
|
||||
($output:ident, $name:expr, $kind:expr) => {{
|
||||
$output.failure($name, None, None, $kind);
|
||||
continue;
|
||||
}};
|
||||
($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{
|
||||
$output.failure($name, Some($mdata), None, $kind);
|
||||
continue;
|
||||
}};
|
||||
($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{
|
||||
$output.failure($name, Some($mdata), Some($count), $kind);
|
||||
continue;
|
||||
}};
|
||||
}
|
||||
|
||||
/// The suffix on tests marked with `#[perf]`.
|
||||
const SUF_NORMAL: &str = "__ZED_PERF";
|
||||
/// The suffix on tests marked with `#[perf(iterations = n)]`.
|
||||
const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER";
|
||||
/// The env var in which we pass the iteration count to our tests.
|
||||
const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
|
||||
/// How does this perf run return its output?
|
||||
enum OutputKind<'a> {
|
||||
/// Print markdown to the terminal.
|
||||
Markdown,
|
||||
/// Save JSON to a file.
|
||||
Json(&'a Path),
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
/// Runs a given metadata-returning function from a test handler, parsing its
|
||||
/// output into a `TestMdata`.
|
||||
fn parse_mdata(test_bin: &str, mdata_fn: &str) -> Result<TestMdata, FailKind> {
|
||||
let mut cmd = Command::new(test_bin);
|
||||
cmd.args([mdata_fn, "--exact", "--nocapture"]);
|
||||
let out = cmd
|
||||
.output()
|
||||
.expect("FATAL: Could not run test binary {test_bin}");
|
||||
assert!(out.status.success());
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
let mut version = None;
|
||||
let mut iterations = None;
|
||||
let mut importance = Importance::default();
|
||||
let mut weight = consts::WEIGHT_DEFAULT;
|
||||
for line in stdout
|
||||
.lines()
|
||||
.filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF))
|
||||
{
|
||||
let mut items = line.split_whitespace();
|
||||
// For v0, we know the ident always comes first, then one field.
|
||||
match items.next().unwrap() {
|
||||
consts::VERSION_LINE_NAME => {
|
||||
let v = items.next().unwrap().parse::<u32>().unwrap();
|
||||
if v > consts::MDATA_VER {
|
||||
return Err(FailKind::VersionMismatch);
|
||||
}
|
||||
version = Some(v);
|
||||
}
|
||||
consts::ITER_COUNT_LINE_NAME => {
|
||||
iterations = Some(items.next().unwrap().parse::<usize>().unwrap());
|
||||
}
|
||||
consts::IMPORTANCE_LINE_NAME => {
|
||||
importance = match items.next().unwrap() {
|
||||
"critical" => Importance::Critical,
|
||||
"important" => Importance::Important,
|
||||
"average" => Importance::Average,
|
||||
"iffy" => Importance::Iffy,
|
||||
"fluff" => Importance::Fluff,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
}
|
||||
consts::WEIGHT_LINE_NAME => {
|
||||
weight = items.next().unwrap().parse::<u8>().unwrap();
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(TestMdata {
|
||||
version: version.unwrap(),
|
||||
// Iterations may be determined by us and thus left unspecified.
|
||||
iterations,
|
||||
// In principle this should always be set, but just for the sake of
|
||||
// stability allow the potentially-breaking change of not reporting the
|
||||
// importance without erroring. Maybe we want to change this.
|
||||
importance,
|
||||
// Same with weight.
|
||||
weight,
|
||||
})
|
||||
}
|
||||
|
||||
/// Compares the perf results of two profiles as per the arguments passed in.
|
||||
fn compare_profiles(args: &[String]) {
|
||||
let ident_new = args.first().expect("FATAL: missing identifier for new run");
|
||||
let ident_old = args.get(1).expect("FATAL: missing identifier for old run");
|
||||
// TODO: move this to a constant also tbh
|
||||
let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap();
|
||||
let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
|
||||
|
||||
// Use the blank outputs initially, so we can merge into these with prefixes.
|
||||
let mut outputs_new = Output::blank();
|
||||
let mut outputs_old = Output::blank();
|
||||
|
||||
for e in runs_dir.read_dir().unwrap() {
|
||||
let Ok(entry) = e else {
|
||||
continue;
|
||||
};
|
||||
let Ok(metadata) = entry.metadata() else {
|
||||
continue;
|
||||
};
|
||||
if metadata.is_file() {
|
||||
let Ok(name) = entry.file_name().into_string() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// A little helper to avoid code duplication. Reads the `output` from
|
||||
// a json file, then merges it into what we have so far.
|
||||
let read_into = |output: &mut Output| {
|
||||
let mut elems = name.split('.').skip(1);
|
||||
let prefix = elems.next().unwrap();
|
||||
assert_eq!("json", elems.next().unwrap());
|
||||
assert!(elems.next().is_none());
|
||||
let handle = OpenOptions::new().read(true).open(entry.path()).unwrap();
|
||||
let o_other: Output = serde_json::from_reader(handle).unwrap();
|
||||
output.merge(o_other, prefix);
|
||||
};
|
||||
|
||||
if name.starts_with(ident_old) {
|
||||
read_into(&mut outputs_old);
|
||||
} else if name.starts_with(ident_new) {
|
||||
read_into(&mut outputs_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let res = outputs_new.compare_perf(outputs_old);
|
||||
println!("{res}");
|
||||
}
|
||||
|
||||
#[expect(clippy::too_many_lines, reason = "This will be split up soon!")]
|
||||
fn main() {
|
||||
let args = std::env::args().collect::<Vec<_>>();
|
||||
// We get passed the test we need to run as the 1st argument after our own name.
|
||||
let test_bin = std::env::args().nth(1).unwrap();
|
||||
let mut cmd = Command::new(&test_bin);
|
||||
let test_bin = args
|
||||
.get(1)
|
||||
.expect("FATAL: No test binary or command; this shouldn't be manually invoked!");
|
||||
|
||||
// We're being asked to compare two results, not run the profiler.
|
||||
if test_bin == "compare" {
|
||||
compare_profiles(&args[2..]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Whether to skip printing some information to stderr.
|
||||
let mut quiet = false;
|
||||
// Minimum test importance we care about this run.
|
||||
let mut thresh = Importance::Iffy;
|
||||
// Where to print the output of this run.
|
||||
let mut out_kind = OutputKind::Markdown;
|
||||
|
||||
for arg in args.iter().skip(2) {
|
||||
match arg.as_str() {
|
||||
"--critical" => thresh = Importance::Critical,
|
||||
"--important" => thresh = Importance::Important,
|
||||
"--average" => thresh = Importance::Average,
|
||||
"--iffy" => thresh = Importance::Iffy,
|
||||
"--fluff" => thresh = Importance::Fluff,
|
||||
"--quiet" => quiet = true,
|
||||
s if s.starts_with("--json") => {
|
||||
out_kind = OutputKind::Json(Path::new(
|
||||
s.strip_prefix("--json=")
|
||||
.expect("FATAL: Invalid json parameter; pass --json=filename"),
|
||||
));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
if !quiet {
|
||||
eprintln!("Starting perf check");
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(test_bin);
|
||||
// --format=json is nightly-only :(
|
||||
cmd.args(["--list", "--format=terse"]);
|
||||
let out = cmd
|
||||
|
|
@ -64,6 +245,9 @@ fn main() {
|
|||
out.status.success(),
|
||||
"FATAL: Cannot do perf check - test binary {test_bin} returned an error"
|
||||
);
|
||||
if !quiet {
|
||||
eprintln!("Test binary ran successfully; starting profile...");
|
||||
}
|
||||
// Parse the test harness output to look for tests we care about.
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
let mut test_list: Vec<_> = stdout
|
||||
|
|
@ -79,37 +263,62 @@ fn main() {
|
|||
}
|
||||
})
|
||||
// Exclude tests that aren't marked for perf triage based on suffix.
|
||||
.filter(|t_name| t_name.ends_with(SUF_NORMAL) || t_name.ends_with(SUF_FIXED))
|
||||
.filter(|t_name| {
|
||||
t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Pulling itertools just for .dedup() would be quite a big dependency that's
|
||||
// not used elsewhere, so do this on the vec instead.
|
||||
// not used elsewhere, so do this on a vec instead.
|
||||
test_list.sort_unstable();
|
||||
test_list.dedup();
|
||||
|
||||
if !test_list.is_empty() {
|
||||
// Print the markdown header which matches hyperfine's result.
|
||||
// TODO: Support exporting JSON also.
|
||||
println!(
|
||||
"| Command | Mean [ms] | Min [ms] | Max [ms] | Iterations | Iter/sec |\n|:---|---:|---:|---:|---:|---:|"
|
||||
);
|
||||
}
|
||||
let len = test_list.len();
|
||||
|
||||
// Tests should come in pairs with their mdata fn!
|
||||
assert!(
|
||||
len.is_multiple_of(2),
|
||||
"Malformed tests in test binary {test_bin}"
|
||||
);
|
||||
|
||||
let mut output = Output::default();
|
||||
|
||||
// Spawn and profile an instance of each perf-sensitive test, via hyperfine.
|
||||
for t_name in test_list {
|
||||
// Pretty-print the stripped name for the test.
|
||||
let t_name_normal = t_name.replace(SUF_FIXED, "").replace(SUF_NORMAL, "");
|
||||
// Each test is a pair of (test, metadata-returning-fn), so grab both. We also
|
||||
// know the list is sorted.
|
||||
for (idx, t_pair) in test_list.chunks_exact(2).enumerate() {
|
||||
if !quiet {
|
||||
eprint!("\rProfiling test {}/{}", idx + 1, len / 2);
|
||||
}
|
||||
// Be resilient against changes to these constants.
|
||||
let (t_name, t_mdata) = if consts::SUF_NORMAL < consts::SUF_MDATA {
|
||||
(t_pair[0], t_pair[1])
|
||||
} else {
|
||||
(t_pair[1], t_pair[0])
|
||||
};
|
||||
// Pretty-printable stripped name for the test.
|
||||
let t_name_pretty = t_name.replace(consts::SUF_NORMAL, "");
|
||||
|
||||
// Get the metadata this test reports for us.
|
||||
let t_mdata = match parse_mdata(test_bin, t_mdata) {
|
||||
Ok(mdata) => mdata,
|
||||
Err(err) => fail!(output, t_name_pretty, err),
|
||||
};
|
||||
|
||||
if t_mdata.importance < thresh {
|
||||
fail!(output, t_name_pretty, t_mdata, FailKind::Skipped);
|
||||
}
|
||||
|
||||
// Time test execution to see how many iterations we need to do in order
|
||||
// to account for random noise. This is skipped for tests with fixed
|
||||
// iteration counts.
|
||||
let final_iter_count = if t_name.ends_with(SUF_FIXED) {
|
||||
None
|
||||
} else {
|
||||
let mut errored = false;
|
||||
let final_iter_count = t_mdata.iterations.unwrap_or_else(|| {
|
||||
let mut iter_count = DEFAULT_ITER_COUNT;
|
||||
loop {
|
||||
let mut cmd = Command::new(&test_bin);
|
||||
let mut cmd = Command::new(test_bin);
|
||||
cmd.args([t_name, "--exact"]);
|
||||
cmd.env(ITER_ENV_VAR, format!("{iter_count}"));
|
||||
cmd.env(consts::ITER_ENV_VAR, format!("{iter_count}"));
|
||||
// Don't let the child muck up our stdin/out/err.
|
||||
cmd.stdin(Stdio::null());
|
||||
cmd.stdout(Stdio::null());
|
||||
|
|
@ -119,28 +328,32 @@ fn main() {
|
|||
let out = cmd.spawn().unwrap().wait();
|
||||
let post = Instant::now();
|
||||
if !out.unwrap().success() {
|
||||
println!(
|
||||
"| {t_name_normal} (ERRORED IN TRIAGE) | N/A | N/A | N/A | {iter_count} | N/A |"
|
||||
);
|
||||
return;
|
||||
errored = true;
|
||||
break iter_count;
|
||||
}
|
||||
if post - pre > NOISE_CUTOFF {
|
||||
break Some(iter_count);
|
||||
if post - pre > consts::NOISE_CUTOFF {
|
||||
break iter_count;
|
||||
} else if let Some(c) = iter_count.checked_mul(ITER_COUNT_MUL) {
|
||||
iter_count = c;
|
||||
} else {
|
||||
// This should almost never happen, but maybe..?
|
||||
eprintln!(
|
||||
"WARNING: Running nearly usize::MAX iterations of test {t_name_normal}"
|
||||
"WARNING: Running nearly usize::MAX iterations of test {t_name_pretty}"
|
||||
);
|
||||
break Some(iter_count);
|
||||
break iter_count;
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
// Don't profile failing tests.
|
||||
if errored {
|
||||
fail!(output, t_name_pretty, t_mdata, FailKind::Triage);
|
||||
}
|
||||
|
||||
// Now profile!
|
||||
let mut perf_cmd = Command::new("hyperfine");
|
||||
// Warm up the cache and print markdown output to stdout.
|
||||
// TODO: json
|
||||
perf_cmd.args([
|
||||
"--style",
|
||||
"none",
|
||||
|
|
@ -150,42 +363,89 @@ fn main() {
|
|||
"-",
|
||||
&format!("{test_bin} {t_name}"),
|
||||
]);
|
||||
if let Some(final_iter_count) = final_iter_count {
|
||||
perf_cmd.env(ITER_ENV_VAR, format!("{final_iter_count}"));
|
||||
}
|
||||
perf_cmd.env(consts::ITER_ENV_VAR, format!("{final_iter_count}"));
|
||||
let p_out = perf_cmd.output().unwrap();
|
||||
let fin_iter = match final_iter_count {
|
||||
Some(i) => &format!("{i}"),
|
||||
None => "(preset)",
|
||||
};
|
||||
if p_out.status.success() {
|
||||
let output = String::from_utf8_lossy(&p_out.stdout);
|
||||
// Strip the name of the test binary from the table (and the space after it!)
|
||||
// + our extraneous test bits + the "Relative" column (which is always at the end and "1.00").
|
||||
let output = output
|
||||
.replace(&format!("{test_bin} "), "")
|
||||
.replace(SUF_FIXED, "")
|
||||
.replace(SUF_NORMAL, "")
|
||||
.replace(" 1.00 |", "");
|
||||
let cmd_output = String::from_utf8_lossy(&p_out.stdout);
|
||||
// Can't use .last() since we have a trailing newline. Sigh.
|
||||
let fin = output.lines().nth(3).unwrap();
|
||||
let results_line = cmd_output.lines().nth(3).unwrap();
|
||||
// Grab the values out of the pretty-print.
|
||||
// TODO: Parse json instead.
|
||||
let mut res_iter = results_line.split_whitespace();
|
||||
// Durations are given in milliseconds, so account for that.
|
||||
let mean =
|
||||
Duration::from_secs_f64(res_iter.nth(4).unwrap().parse::<f64>().unwrap() / 1000.);
|
||||
let stddev =
|
||||
Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::<f64>().unwrap() / 1000.);
|
||||
|
||||
// Calculate how many iterations this does per second, for easy comparison.
|
||||
let ms = fin
|
||||
.split_whitespace()
|
||||
.nth(3)
|
||||
.unwrap()
|
||||
.parse::<f64>()
|
||||
.unwrap();
|
||||
let mul_fac = 1000.0 / ms;
|
||||
let iter_sec = match final_iter_count {
|
||||
#[allow(clippy::cast_precision_loss)]
|
||||
Some(c) => &format!("{:.1}", mul_fac * c as f64),
|
||||
None => "(unknown)",
|
||||
};
|
||||
println!("{fin} {fin_iter} | {iter_sec} |");
|
||||
output.success(
|
||||
t_name_pretty,
|
||||
t_mdata,
|
||||
final_iter_count,
|
||||
Timings { mean, stddev },
|
||||
);
|
||||
} else {
|
||||
println!("{t_name_normal} (ERRORED) | N/A | N/A | N/A | {fin_iter} | N/A |");
|
||||
fail!(
|
||||
output,
|
||||
t_name_pretty,
|
||||
t_mdata,
|
||||
final_iter_count,
|
||||
FailKind::Profile
|
||||
);
|
||||
}
|
||||
}
|
||||
if !quiet {
|
||||
if output.is_empty() {
|
||||
eprintln!("Nothing to do.");
|
||||
} else {
|
||||
// If stdout and stderr are on the same terminal, move us after the
|
||||
// output from above.
|
||||
eprintln!();
|
||||
}
|
||||
}
|
||||
|
||||
// No need making an empty json file on every empty test bin.
|
||||
if output.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
match out_kind {
|
||||
OutputKind::Markdown => print!("{output}"),
|
||||
OutputKind::Json(user_path) => {
|
||||
let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap();
|
||||
let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
|
||||
std::fs::create_dir_all(&runs_dir).unwrap();
|
||||
assert!(
|
||||
!user_path.to_string_lossy().is_empty(),
|
||||
"FATAL: Empty filename specified!"
|
||||
);
|
||||
// Get the test binary's crate's name; a path like
|
||||
// target/release-fast/deps/gpui-061ff76c9b7af5d7
|
||||
// would be reduced to just "gpui".
|
||||
let test_bin_stripped = Path::new(test_bin)
|
||||
.file_name()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.rsplit_once('-')
|
||||
.unwrap()
|
||||
.0;
|
||||
let mut file_path = runs_dir.join(user_path);
|
||||
file_path
|
||||
.as_mut_os_string()
|
||||
.push(format!(".{test_bin_stripped}.json"));
|
||||
let mut out_file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(&file_path)
|
||||
.unwrap();
|
||||
out_file
|
||||
.write_all(&serde_json::to_vec(&output).unwrap())
|
||||
.unwrap();
|
||||
if !quiet {
|
||||
eprintln!("JSON output written to {}", file_path.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue