zed/.github/workflows/run_agent_evals.yml
Finn Evers 97f0ab77ca
ci: Define default shell for all workflows (#47883)
GitHub allows defining a default shell for all jobs on the workflow
level, which we did not use before, yet practically did when it comes
down to our usage of `named::bash`. Since this makes stuff quite
verbose, I decided on using the defaults instead so the workflows become
somewhat easier to audit when reading the generated files.

Powershell steps continue to use Powershell, only the default for bash
scripts was modified.


Release Notes:

- N/A
2026-01-28 14:07:06 -07:00

64 lines
2.1 KiB
YAML

# Generated from xtask::workflows::run_agent_evals
# Rebuild with `cargo xtask workflows`.
name: run_agent_evals
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: '0'
RUST_BACKTRACE: '1'
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
ZED_EVAL_TELEMETRY: '1'
MODEL_NAME: ${{ inputs.model_name }}
on:
workflow_dispatch:
inputs:
model_name:
description: model_name
required: true
type: string
jobs:
agent_evals:
runs-on: namespace-profile-16x32-ubuntu-2204
steps:
- name: steps::checkout_repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
clean: false
- name: steps::cache_rust_dependencies_namespace
uses: namespacelabs/nscloud-cache-action@v1
with:
cache: rust
path: ~/.rustup
- name: steps::setup_linux
run: ./script/linux
- name: steps::install_mold
run: ./script/install-mold
- name: steps::download_wasi_sdk
run: ./script/download-wasi-sdk
- name: steps::setup_cargo_config
run: |
mkdir -p ./../.cargo
cp ./.cargo/ci-config.toml ./../.cargo/config.toml
- name: cargo build --package=eval
run: cargo build --package=eval
- name: run_agent_evals::agent_evals::run_eval
run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
- name: steps::cleanup_cargo_config
if: always()
run: |
rm -rf ./../.cargo
timeout-minutes: 600
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
defaults:
run:
shell: bash -euxo pipefail {0}