mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Split out cron and non-cron unit evals (#42472)
Release Notes: - N/A --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
This commit is contained in:
parent
5f4d0dbaab
commit
908ef03502
6 changed files with 178 additions and 25 deletions
5
.github/workflows/run_agent_evals.yml
vendored
5
.github/workflows/run_agent_evals.yml
vendored
|
|
@ -51,6 +51,11 @@ jobs:
|
|||
- name: run_agent_evals::agent_evals::run_eval
|
||||
run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
|
||||
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
|
||||
- name: steps::cleanup_cargo_config
|
||||
if: always()
|
||||
run: |
|
||||
|
|
|
|||
78
.github/workflows/run_cron_unit_evals.yml
vendored
Normal file
78
.github/workflows/run_cron_unit_evals.yml
vendored
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
# Generated from xtask::workflows::run_cron_unit_evals
|
||||
# Rebuild with `cargo xtask workflows`.
|
||||
name: run_cron_unit_evals
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
CARGO_INCREMENTAL: '0'
|
||||
RUST_BACKTRACE: '1'
|
||||
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
|
||||
on:
|
||||
schedule:
|
||||
- cron: 47 1 * * 2
|
||||
workflow_dispatch: {}
|
||||
jobs:
|
||||
cron_unit_evals:
|
||||
runs-on: namespace-profile-16x32-ubuntu-2204
|
||||
steps:
|
||||
- name: steps::checkout_repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
clean: false
|
||||
- name: steps::setup_cargo_config
|
||||
run: |
|
||||
mkdir -p ./../.cargo
|
||||
cp ./.cargo/ci-config.toml ./../.cargo/config.toml
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: steps::cache_rust_dependencies_namespace
|
||||
uses: namespacelabs/nscloud-cache-action@v1
|
||||
with:
|
||||
cache: rust
|
||||
- name: steps::setup_linux
|
||||
run: ./script/linux
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: steps::install_mold
|
||||
run: ./script/install-mold
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: steps::download_wasi_sdk
|
||||
run: ./script/download-wasi-sdk
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: steps::cargo_install_nextest
|
||||
run: cargo install cargo-nextest --locked
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: steps::clear_target_dir_if_large
|
||||
run: ./script/clear-target-dir-if-larger-than 250
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: ./script/run-unit-evals
|
||||
run: ./script/run-unit-evals
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
|
||||
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
|
||||
- name: run_agent_evals::unit_evals::send_failure_to_slack
|
||||
if: ${{ failure() }}
|
||||
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: C04UDRNNJFQ
|
||||
text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
|
||||
- name: steps::cleanup_cargo_config
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf ./../.cargo
|
||||
shell: bash -euxo pipefail {0}
|
||||
- name: run_agent_evals::cron_unit_evals::send_failure_to_slack
|
||||
if: ${{ failure() }}
|
||||
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: C04UDRNNJFQ
|
||||
text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
21
.github/workflows/run_unit_evals.yml
vendored
21
.github/workflows/run_unit_evals.yml
vendored
|
|
@ -6,12 +6,21 @@ env:
|
|||
CARGO_INCREMENTAL: '0'
|
||||
RUST_BACKTRACE: '1'
|
||||
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
|
||||
ZED_EVAL_TELEMETRY: '1'
|
||||
MODEL_NAME: ${{ inputs.model_name }}
|
||||
on:
|
||||
schedule:
|
||||
- cron: 47 1 * * 2
|
||||
workflow_dispatch: {}
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
model_name:
|
||||
description: model_name
|
||||
required: true
|
||||
type: string
|
||||
commit_sha:
|
||||
description: commit_sha
|
||||
required: true
|
||||
type: string
|
||||
jobs:
|
||||
unit_evals:
|
||||
run_unit_evals:
|
||||
runs-on: namespace-profile-16x32-ubuntu-2204
|
||||
steps:
|
||||
- name: steps::checkout_repo
|
||||
|
|
@ -47,6 +56,10 @@ jobs:
|
|||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
|
||||
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
|
||||
UNIT_EVAL_COMMIT: ${{ inputs.commit_sha }}
|
||||
- name: run_agent_evals::unit_evals::send_failure_to_slack
|
||||
if: ${{ failure() }}
|
||||
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
|
||||
|
|
|
|||
|
|
@ -2,4 +2,8 @@
|
|||
|
||||
set -euxo pipefail
|
||||
|
||||
if [ -n "${UNIT_EVAL_COMMIT:-}" ]; then
|
||||
git checkout "$UNIT_EVAL_COMMIT"
|
||||
fi
|
||||
|
||||
GPUI_TEST_TIMEOUT=1500 cargo nextest run --workspace --no-fail-fast --features unit-eval --no-capture -E 'test(::eval_)'
|
||||
|
|
|
|||
|
|
@ -33,6 +33,10 @@ pub fn run_workflows(_: GenerateWorkflowArgs) -> Result<()> {
|
|||
("cherry_pick.yml", cherry_pick::cherry_pick()),
|
||||
("compare_perf.yml", compare_perf::compare_perf()),
|
||||
("run_unit_evals.yml", run_agent_evals::run_unit_evals()),
|
||||
(
|
||||
"run_cron_unit_evals.yml",
|
||||
run_agent_evals::run_cron_unit_evals(),
|
||||
),
|
||||
("run_agent_evals.yml", run_agent_evals::run_agent_evals()),
|
||||
("after_release.yml", after_release::after_release()),
|
||||
];
|
||||
|
|
|
|||
|
|
@ -28,6 +28,36 @@ pub(crate) fn run_agent_evals() -> Workflow {
|
|||
.add_job(agent_evals.name, agent_evals.job)
|
||||
}
|
||||
|
||||
pub(crate) fn run_unit_evals() -> Workflow {
|
||||
let model_name = Input::string("model_name", None);
|
||||
let commit_sha = Input::string("commit_sha", None);
|
||||
|
||||
let unit_evals = named::job(unit_evals(Some(&commit_sha)));
|
||||
|
||||
named::workflow()
|
||||
.name("run_unit_evals")
|
||||
.on(Event::default().workflow_dispatch(
|
||||
WorkflowDispatch::default()
|
||||
.add_input(model_name.name, model_name.input())
|
||||
.add_input(commit_sha.name, commit_sha.input()),
|
||||
))
|
||||
.concurrency(vars::one_workflow_per_non_main_branch())
|
||||
.add_env(("CARGO_TERM_COLOR", "always"))
|
||||
.add_env(("CARGO_INCREMENTAL", 0))
|
||||
.add_env(("RUST_BACKTRACE", 1))
|
||||
.add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
|
||||
.add_env(("ZED_EVAL_TELEMETRY", 1))
|
||||
.add_env(("MODEL_NAME", model_name.to_string()))
|
||||
.add_job(unit_evals.name, unit_evals.job)
|
||||
}
|
||||
|
||||
fn add_api_keys(step: Step<Run>) -> Step<Run> {
|
||||
step.add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY))
|
||||
.add_env(("OPENAI_API_KEY", vars::OPENAI_API_KEY))
|
||||
.add_env(("GOOGLE_AI_API_KEY", vars::GOOGLE_AI_API_KEY))
|
||||
.add_env(("GOOGLE_CLOUD_PROJECT", vars::GOOGLE_CLOUD_PROJECT))
|
||||
}
|
||||
|
||||
fn agent_evals() -> NamedJob {
|
||||
fn run_eval() -> Step<Run> {
|
||||
named::bash(
|
||||
|
|
@ -44,16 +74,16 @@ fn agent_evals() -> NamedJob {
|
|||
.map(steps::install_linux_dependencies)
|
||||
.add_step(setup_cargo_config(Platform::Linux))
|
||||
.add_step(steps::script("cargo build --package=eval"))
|
||||
.add_step(run_eval())
|
||||
.add_step(add_api_keys(run_eval()))
|
||||
.add_step(steps::cleanup_cargo_config(Platform::Linux)),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn run_unit_evals() -> Workflow {
|
||||
let unit_evals = unit_evals();
|
||||
pub(crate) fn run_cron_unit_evals() -> Workflow {
|
||||
let unit_evals = cron_unit_evals();
|
||||
|
||||
named::workflow()
|
||||
.name("run_unit_evals")
|
||||
.name("run_cron_unit_evals")
|
||||
.on(Event::default()
|
||||
.schedule([
|
||||
// GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
|
||||
|
|
@ -68,7 +98,7 @@ pub(crate) fn run_unit_evals() -> Workflow {
|
|||
.add_job(unit_evals.name, unit_evals.job)
|
||||
}
|
||||
|
||||
fn unit_evals() -> NamedJob {
|
||||
fn cron_unit_evals() -> NamedJob {
|
||||
fn send_failure_to_slack() -> Step<Use> {
|
||||
named::uses(
|
||||
"slackapi",
|
||||
|
|
@ -84,20 +114,39 @@ fn unit_evals() -> NamedJob {
|
|||
"#}))
|
||||
}
|
||||
|
||||
named::job(
|
||||
Job::default()
|
||||
.runs_on(runners::LINUX_DEFAULT)
|
||||
.add_step(steps::checkout_repo())
|
||||
.add_step(steps::setup_cargo_config(Platform::Linux))
|
||||
.add_step(steps::cache_rust_dependencies_namespace())
|
||||
.map(steps::install_linux_dependencies)
|
||||
.add_step(steps::cargo_install_nextest(Platform::Linux))
|
||||
.add_step(steps::clear_target_dir_if_large(Platform::Linux))
|
||||
.add_step(
|
||||
steps::script("./script/run-unit-evals")
|
||||
.add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY)),
|
||||
)
|
||||
.add_step(send_failure_to_slack())
|
||||
.add_step(steps::cleanup_cargo_config(Platform::Linux)),
|
||||
)
|
||||
named::job(unit_evals(None).add_step(send_failure_to_slack()))
|
||||
}
|
||||
|
||||
fn unit_evals(commit: Option<&Input>) -> Job {
|
||||
fn send_failure_to_slack() -> Step<Use> {
|
||||
named::uses(
|
||||
"slackapi",
|
||||
"slack-github-action",
|
||||
"b0fa283ad8fea605de13dc3f449259339835fc52",
|
||||
)
|
||||
.if_condition(Expression::new("${{ failure() }}"))
|
||||
.add_with(("method", "chat.postMessage"))
|
||||
.add_with(("token", vars::SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN))
|
||||
.add_with(("payload", indoc::indoc!{r#"
|
||||
channel: C04UDRNNJFQ
|
||||
text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
|
||||
"#}))
|
||||
}
|
||||
|
||||
let script_step = add_api_keys(steps::script("./script/run-unit-evals"));
|
||||
|
||||
Job::default()
|
||||
.runs_on(runners::LINUX_DEFAULT)
|
||||
.add_step(steps::checkout_repo())
|
||||
.add_step(steps::setup_cargo_config(Platform::Linux))
|
||||
.add_step(steps::cache_rust_dependencies_namespace())
|
||||
.map(steps::install_linux_dependencies)
|
||||
.add_step(steps::cargo_install_nextest(Platform::Linux))
|
||||
.add_step(steps::clear_target_dir_if_large(Platform::Linux))
|
||||
.add_step(match commit {
|
||||
Some(commit) => script_step.add_env(("UNIT_EVAL_COMMIT", commit)),
|
||||
None => script_step,
|
||||
})
|
||||
.add_step(send_failure_to_slack())
|
||||
.add_step(steps::cleanup_cargo_config(Platform::Linux))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue