mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
eval_cli: Simplify build setup for more datasets (#52686)
Cleans up build setup and simplifies it a bunch so that we can use the binary in more eval docker containers Release Notes: - N/A
This commit is contained in:
parent
46a0262dc5
commit
24ea5e98da
8 changed files with 223 additions and 131 deletions
|
|
@ -23,6 +23,7 @@ test-support = [
|
|||
"workspace/test-support",
|
||||
"agent/test-support",
|
||||
]
|
||||
audio = ["dep:audio"]
|
||||
unit-eval = []
|
||||
|
||||
[dependencies]
|
||||
|
|
@ -38,7 +39,7 @@ heapless.workspace = true
|
|||
assistant_text_thread.workspace = true
|
||||
assistant_slash_command.workspace = true
|
||||
assistant_slash_commands.workspace = true
|
||||
audio.workspace = true
|
||||
audio = { workspace = true, optional = true }
|
||||
base64.workspace = true
|
||||
buffer_diff.workspace = true
|
||||
chrono.workspace = true
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use agent_servers::AgentServerDelegate;
|
|||
use agent_servers::{AgentServer, GEMINI_TERMINAL_AUTH_METHOD_ID};
|
||||
use agent_settings::{AgentProfileId, AgentSettings};
|
||||
use anyhow::{Result, anyhow};
|
||||
#[cfg(feature = "audio")]
|
||||
use audio::{Audio, Sound};
|
||||
use buffer_diff::BufferDiff;
|
||||
use client::zed_urls;
|
||||
|
|
@ -2278,6 +2279,7 @@ impl ConversationView {
|
|||
window: &mut Window,
|
||||
cx: &mut Context<Self>,
|
||||
) {
|
||||
#[cfg(feature = "audio")]
|
||||
self.play_notification_sound(window, cx);
|
||||
self.show_notification(caption, icon, window, cx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,55 +7,44 @@
|
|||
# Or use the helper script:
|
||||
# crates/eval_cli/script/build-linux
|
||||
|
||||
FROM rust:1.93.1-bookworm AS builder
|
||||
FROM rust:1.93 AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install build dependencies (subset of script/linux needed for headless GPUI).
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
clang \
|
||||
g++ \
|
||||
libasound2-dev \
|
||||
libfontconfig-dev \
|
||||
libgit2-dev \
|
||||
libglib2.0-dev \
|
||||
libssl-dev \
|
||||
libwayland-dev \
|
||||
libx11-xcb-dev \
|
||||
libxkbcommon-x11-dev \
|
||||
libzstd-dev \
|
||||
libsqlite3-dev \
|
||||
build-essential \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install wild linker for faster linking (built from source to match bookworm's glibc).
|
||||
RUN cargo install --locked wild-linker --version 0.8.0 --root /usr/local
|
||||
|
||||
# Download WASI SDK (needed by some dependencies).
|
||||
ARG TARGETARCH
|
||||
RUN mkdir -p /app/target && \
|
||||
WASI_ARCH=$([ "$TARGETARCH" = "arm64" ] && echo "arm64" || echo "x86_64") && \
|
||||
curl -L "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/wasi-sdk-25.0-${WASI_ARCH}-linux.tar.gz" \
|
||||
| tar -xz -C /app/target && \
|
||||
mv /app/target/wasi-sdk-25.0-${WASI_ARCH}-linux /app/target/wasi-sdk
|
||||
|
||||
# Pre-install the toolchain specified in rust-toolchain.toml so it is cached.
|
||||
# Pre-install the toolchain specified in rust-toolchain.toml so it is cached.
|
||||
RUN rustup toolchain install 1.93 --profile minimal \
|
||||
--component rustfmt --component clippy --component rust-analyzer --component rust-src \
|
||||
--target wasm32-wasip2 --target wasm32-unknown-unknown --target x86_64-unknown-linux-musl
|
||||
--target wasm32-wasip2 --target wasm32-unknown-unknown --target x86_64-unknown-linux-musl --target x86_64-unknown-linux-gnu
|
||||
|
||||
# Install build tools. cmake + build-essential are needed for vendored C
|
||||
# libraries (libgit2-sys, zstd-sys, libsqlite3-sys). No audio/GUI -dev
|
||||
# packages required — eval-cli runs headless with those features disabled.
|
||||
#
|
||||
# cargo-zigbuild cross-compiles against a specific glibc version (2.31 =
|
||||
# Debian Bullseye / Ubuntu Focal) so the resulting binary is portable to
|
||||
# any Linux distro with glibc >= 2.31.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
build-essential \
|
||||
curl \
|
||||
xz-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /opt/zig \
|
||||
&& curl -fsSL https://ziglang.org/download/0.15.2/zig-x86_64-linux-0.15.2.tar.xz \
|
||||
| tar -xJ -C /opt/zig --strip-components=1 \
|
||||
&& ln -s /opt/zig/zig /usr/local/bin/zig
|
||||
|
||||
RUN cargo install --locked cargo-zigbuild
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV CC=clang CXX=clang++
|
||||
ENV RUSTFLAGS="-C linker=clang -C link-arg=--ld-path=wild"
|
||||
|
||||
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
||||
--mount=type=cache,target=/usr/local/cargo/git \
|
||||
--mount=type=cache,target=/app/target \
|
||||
cargo build --release --package eval_cli && \
|
||||
cp /app/target/release/eval-cli /eval-cli && \
|
||||
cargo zigbuild --release --package eval_cli \
|
||||
--target x86_64-unknown-linux-gnu.2.31 && \
|
||||
cp /app/target/x86_64-unknown-linux-gnu/release/eval-cli /eval-cli && \
|
||||
strip /eval-cli
|
||||
|
||||
FROM scratch
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Build eval-cli for x86_64 Linux from any host (macOS, Linux, etc.)
|
||||
# using Docker. The resulting binary is placed at the path printed on
|
||||
# completion (default: target/eval-cli).
|
||||
# using Docker + cargo-zigbuild. Targets glibc 2.31 (Debian Bullseye /
|
||||
# Ubuntu Focal) so the binary is portable to any modern Linux distro.
|
||||
# The resulting binary is placed at the path printed on completion
|
||||
# (default: target/eval-cli).
|
||||
#
|
||||
# Usage:
|
||||
# crates/eval_cli/script/build-linux [--output PATH]
|
||||
|
|
@ -36,7 +38,7 @@ cd "$REPO_ROOT"
|
|||
|
||||
IMAGE_TAG="eval-cli-builder"
|
||||
|
||||
echo "Building eval-cli for x86_64-unknown-linux-gnu..."
|
||||
echo "Building eval-cli for x86_64-unknown-linux-gnu (glibc >= 2.31)..."
|
||||
echo " Repo root: $REPO_ROOT"
|
||||
echo " Output: $OUTPUT"
|
||||
echo ""
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import os
|
|||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
from harbor.agents.installed.base import BaseInstalledAgent, ExecInput
|
||||
from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
|
||||
from harbor.environments.base import BaseEnvironment
|
||||
from harbor.models.agent.context import AgentContext
|
||||
|
||||
|
|
@ -51,12 +51,143 @@ class ZedAgent(BaseInstalledAgent):
|
|||
def name() -> str:
|
||||
return "zed"
|
||||
|
||||
@property
|
||||
def _install_agent_template_path(self) -> Path:
|
||||
return Path(__file__).parent / "install.sh.j2"
|
||||
async def _detect_workdir(self, environment: BaseEnvironment) -> str:
|
||||
"""Detect the repo working directory inside the container.
|
||||
|
||||
async def setup(self, environment: BaseEnvironment) -> None:
|
||||
await environment.exec(command="mkdir -p /installed-agent")
|
||||
Checks, in order:
|
||||
1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
|
||||
2. ``/app`` (SWE-bench Pro)
|
||||
3. ``/testbed`` (SWE-bench Verified)
|
||||
4. ``/repo``
|
||||
5. First git repo found under ``/`` (max depth 3)
|
||||
"""
|
||||
override = self._extra_env.get("EVAL_CLI_WORKDIR")
|
||||
if override:
|
||||
return override
|
||||
|
||||
result = await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
"for d in /app /testbed /repo; do "
|
||||
' if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
|
||||
"done; "
|
||||
"find / -maxdepth 3 -name .git -type d 2>/dev/null "
|
||||
'| head -1 | sed "s|/.git$||"'
|
||||
),
|
||||
)
|
||||
workdir = result.stdout.strip()
|
||||
if not workdir:
|
||||
raise RuntimeError(
|
||||
"Could not find a git repository in the container. "
|
||||
"Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
|
||||
)
|
||||
return workdir
|
||||
|
||||
async def install(self, environment: BaseEnvironment) -> None:
|
||||
await self.exec_as_root(
|
||||
environment,
|
||||
command=(
|
||||
"apt-get update && "
|
||||
"apt-get install -y --no-install-recommends "
|
||||
"ca-certificates "
|
||||
"curl "
|
||||
"git"
|
||||
),
|
||||
env={"DEBIAN_FRONTEND": "noninteractive"},
|
||||
)
|
||||
|
||||
await self.exec_as_root(
|
||||
environment,
|
||||
command=(
|
||||
"curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && "
|
||||
"apt-get install -y --no-install-recommends nodejs"
|
||||
),
|
||||
env={"DEBIAN_FRONTEND": "noninteractive"},
|
||||
)
|
||||
|
||||
# Pre-install default LSPs so Zed doesn't have to download them at
|
||||
# runtime. Each gets its own subdirectory under $ZED_DATA_DIR/languages.
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
"set -euo pipefail; "
|
||||
'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
|
||||
# basedpyright (Python - default type checker)
|
||||
'BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"; '
|
||||
'mkdir -p "$BASEDPYRIGHT_DIR"; '
|
||||
'npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright; '
|
||||
# typescript-language-server (TypeScript/JS - default LSP)
|
||||
'TSSERVER_DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
|
||||
'mkdir -p "$TSSERVER_DIR"; '
|
||||
'npm install --prefix "$TSSERVER_DIR" --save-exact typescript typescript-language-server; '
|
||||
# vtsls (VS Code TypeScript language features)
|
||||
'VTSLS_DIR="$ZED_DATA_DIR/languages/vtsls"; '
|
||||
'mkdir -p "$VTSLS_DIR"; '
|
||||
'npm install --prefix "$VTSLS_DIR" --save-exact @vtsls/language-server typescript; '
|
||||
# tailwindcss-language-server
|
||||
'TAILWIND_DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
|
||||
'mkdir -p "$TAILWIND_DIR"; '
|
||||
'npm install --prefix "$TAILWIND_DIR" --save-exact @tailwindcss/language-server'
|
||||
),
|
||||
)
|
||||
|
||||
# eslint LSP (downloaded from zed-industries/vscode-eslint GitHub release,
|
||||
# then compiled — this mirrors what Zed does at runtime).
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
"set -euo pipefail; "
|
||||
'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
|
||||
'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
|
||||
'mkdir -p "$ESLINT_DIR"; '
|
||||
'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
|
||||
'| tar -xz -C "$ESLINT_DIR"; '
|
||||
'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
|
||||
'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
|
||||
),
|
||||
)
|
||||
|
||||
# gopls (Go - default LSP). Only install when Go is present in the
|
||||
# container (i.e. Go-related SWE-bench tasks).
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
"if command -v go >/dev/null 2>&1; then "
|
||||
"go install golang.org/x/tools/gopls@latest; "
|
||||
"fi"
|
||||
),
|
||||
)
|
||||
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
"curl -LsSf https://astral.sh/uv/install.sh | sh && "
|
||||
'. "$HOME/.local/bin/env"'
|
||||
),
|
||||
)
|
||||
|
||||
agent_home_result = await self.exec_as_agent(
|
||||
environment,
|
||||
command='printf %s "$HOME"',
|
||||
)
|
||||
agent_home = agent_home_result.stdout.strip()
|
||||
if not agent_home:
|
||||
raise RuntimeError("Could not determine agent home directory")
|
||||
|
||||
await self.exec_as_root(
|
||||
environment,
|
||||
command=(
|
||||
f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
|
||||
f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
|
||||
),
|
||||
)
|
||||
|
||||
# Install a modern ruff so `ruff server` works without --preview.
|
||||
# This also makes it available as a CLI tool for the agent.
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=('export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff'),
|
||||
)
|
||||
|
||||
if self._binary_path:
|
||||
binary = Path(self._binary_path)
|
||||
|
|
@ -69,18 +200,29 @@ class ZedAgent(BaseInstalledAgent):
|
|||
source_path=binary,
|
||||
target_path="/usr/local/bin/eval-cli",
|
||||
)
|
||||
await environment.exec(command="chmod +x /usr/local/bin/eval-cli")
|
||||
await self.exec_as_root(
|
||||
environment,
|
||||
command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
|
||||
)
|
||||
return
|
||||
|
||||
await super().setup(environment)
|
||||
|
||||
@property
|
||||
def _template_variables(self) -> dict[str, str]:
|
||||
variables = super()._template_variables
|
||||
if self._binary_path:
|
||||
variables["binary_uploaded"] = "true"
|
||||
if self._download_url:
|
||||
variables["download_url"] = self._download_url
|
||||
return variables
|
||||
await self.exec_as_root(
|
||||
environment,
|
||||
command=(
|
||||
f"curl -fsSL {shlex.quote(self._download_url)} "
|
||||
"-o /usr/local/bin/eval-cli && "
|
||||
"chmod +x /usr/local/bin/eval-cli && "
|
||||
"eval-cli --help"
|
||||
),
|
||||
)
|
||||
return
|
||||
|
||||
raise ValueError(
|
||||
"No eval-cli binary provided. "
|
||||
"Either pass binary_path=/path/to/target/release/eval-cli "
|
||||
"or set download_url=/EVAL_CLI_DOWNLOAD_URL."
|
||||
)
|
||||
|
||||
def populate_context_post_run(self, context: AgentContext) -> None:
|
||||
result_data = None
|
||||
|
|
@ -131,18 +273,27 @@ class ZedAgent(BaseInstalledAgent):
|
|||
|
||||
return env
|
||||
|
||||
def create_run_agent_commands(self, instruction: str) -> list[ExecInput]:
|
||||
@with_prompt_template
|
||||
async def run(
|
||||
self, instruction: str, environment: BaseEnvironment, context: AgentContext
|
||||
) -> None:
|
||||
escaped_instruction = shlex.quote(instruction)
|
||||
env = self._get_api_env()
|
||||
|
||||
parts = ["eval-cli", "--workdir /testbed", "--output-dir /logs/agent"]
|
||||
workdir = await self._detect_workdir(environment)
|
||||
|
||||
parts = [
|
||||
"eval-cli",
|
||||
f"--workdir {shlex.quote(workdir)}",
|
||||
"--output-dir /logs/agent",
|
||||
]
|
||||
|
||||
if self.model_name:
|
||||
parts.append(f"--model {self.model_name}")
|
||||
parts.append(f"--model {shlex.quote(self.model_name)}")
|
||||
|
||||
timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
|
||||
if timeout:
|
||||
parts.append(f"--timeout {timeout}")
|
||||
parts.append(f"--timeout {shlex.quote(timeout)}")
|
||||
|
||||
staff = self._extra_env.get("EVAL_CLI_STAFF")
|
||||
if staff and staff.lower() == "false":
|
||||
|
|
@ -161,18 +312,20 @@ class ZedAgent(BaseInstalledAgent):
|
|||
|
||||
parts.append(f"--instruction {escaped_instruction}")
|
||||
|
||||
eval_cli_command = (
|
||||
" ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
" ".join(parts) + " 2>&1 | stdbuf -oL tee /logs/agent/eval-cli.txt"
|
||||
),
|
||||
env=env,
|
||||
)
|
||||
|
||||
patch_command = (
|
||||
"cd /testbed && "
|
||||
"git add -A && "
|
||||
"git diff --cached HEAD > /logs/agent/patch.diff && "
|
||||
'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
|
||||
await self.exec_as_agent(
|
||||
environment,
|
||||
command=(
|
||||
"git add -A && "
|
||||
"git diff --cached HEAD > /logs/agent/patch.diff && "
|
||||
'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"'
|
||||
),
|
||||
cwd=workdir,
|
||||
)
|
||||
|
||||
return [
|
||||
ExecInput(command=eval_cli_command, env=env),
|
||||
ExecInput(command=patch_command),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,55 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Install runtime dependencies needed by the eval-cli binary (dynamically linked
|
||||
# against glibc + these shared libraries from its GPUI/terminal/language stacks).
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
libasound2 \
|
||||
libfontconfig1 \
|
||||
libglib2.0-0 \
|
||||
libsqlite3-0 \
|
||||
libssl3 \
|
||||
libwayland-client0 \
|
||||
libx11-xcb1 \
|
||||
libxkbcommon-x11-0 \
|
||||
libzstd1
|
||||
|
||||
# Install Node.js 22 LTS (needed by language servers like basedpyright).
|
||||
curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
|
||||
apt-get install -y --no-install-recommends nodejs
|
||||
|
||||
# Preinstall basedpyright in Zed's language server cache to avoid first-run npm install latency.
|
||||
ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"
|
||||
BASEDPYRIGHT_DIR="$ZED_DATA_DIR/languages/basedpyright"
|
||||
mkdir -p "$BASEDPYRIGHT_DIR"
|
||||
npm install --prefix "$BASEDPYRIGHT_DIR" --save-exact basedpyright
|
||||
|
||||
# Install uv (needed for running Python tests in SWE-bench tasks).
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
. "$HOME/.local/bin/env"
|
||||
ln -sf "$HOME/.local/bin/uv" /usr/local/bin/uv
|
||||
ln -sf "$HOME/.local/bin/uvx" /usr/local/bin/uvx
|
||||
|
||||
{% if binary_uploaded is defined %}
|
||||
# Binary was uploaded directly via setup() — just verify it works.
|
||||
eval-cli --help
|
||||
{% elif download_url is defined %}
|
||||
curl -fsSL "{{ download_url }}" -o /usr/local/bin/eval-cli
|
||||
chmod +x /usr/local/bin/eval-cli
|
||||
eval-cli --help
|
||||
{% else %}
|
||||
echo "ERROR: No eval-cli binary provided."
|
||||
echo ""
|
||||
echo "Either pass binary_path= to upload a local build:"
|
||||
echo " --ae binary_path=/path/to/target/release/eval-cli"
|
||||
echo ""
|
||||
echo "Or set download_url= / EVAL_CLI_DOWNLOAD_URL:"
|
||||
echo " --ae download_url=https://example.com/eval-cli"
|
||||
exit 1
|
||||
{% endif %}
|
||||
|
||||
echo "INSTALL_SUCCESS"
|
||||
|
|
@ -20,7 +20,7 @@ action_log.workspace = true
|
|||
agent.workspace = true
|
||||
agent-client-protocol.workspace = true
|
||||
agent_settings.workspace = true
|
||||
agent_ui.workspace = true
|
||||
agent_ui = { workspace = true, features = ["audio"] }
|
||||
anyhow.workspace = true
|
||||
chrono.workspace = true
|
||||
collections.workspace = true
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ activity_indicator.workspace = true
|
|||
agent.workspace = true
|
||||
agent-client-protocol.workspace = true
|
||||
agent_settings.workspace = true
|
||||
agent_ui.workspace = true
|
||||
agent_ui = { workspace = true, features = ["audio"] }
|
||||
anyhow.workspace = true
|
||||
askpass.workspace = true
|
||||
assets.workspace = true
|
||||
|
|
|
|||
Loading…
Reference in a new issue