mirror of
https://github.com/ZSeven-W/openpencil.git
synced 2026-06-01 03:14:29 +07:00
feat(smoke): headless op-orchestrator smoke runner (task #28)
New `op-smoke` binary crate — drives one `Orchestrator::run` against `AnthropicProvider` without the desktop UI / `DesignSession` actor model, single-threaded `block_on` on an inline `DocSink`, every event dumped to stderr. Decouples task #28's "does the pipeline reach the LLM and apply EditorCommands correctly" verification from GUI smoke (canvas paint / chat panel rendering), which still needs the desktop binary. Usage: export OPENPENCIL_ANTHROPIC_API_KEY=sk-ant-... cargo run -p op-smoke -- "design a login screen" Optional `OPENPENCIL_ORCHESTRATOR_MODEL` (default `claude-sonnet-4-6`). What's traced to stderr: - `[SMOKE]` model + prompt - `[LLM]` per-call system/user lengths + engine errors - `[PROGRESS]` every `op_orchestrator::Progress` variant (Planning, ScaffoldDone, SubtaskStarted/Done/Failed, CleanupDone, validation + visual-ref variants) - `[CMD]` per `EditorCommand::apply` one-line label + applied result (InsertSubtree shows parent_id + nodes.len(); SetNodeStrokeHex shows hex; etc.) - `[UNDO]` begin / end batch boundaries - `[FINAL]` Ok/Err + elapsed; on Ok prints root_frame_id, total_nodes, per-subtask outcomes What this verifies vs the desktop GUI smoke (`superpowers/notes/2026-05-24-orchestrator-smoke-steps.md`): - LLM client construction (`AnthropicProvider::new` + auth) - Network reachability (200 / 401 / 429 surfaces as `LlmError`) - Planner → scaffold → subtask → cleanup transitions - `InsertSubtree` ID-remapping (via post-apply state) - Terminal `RunSummary` / `OrchestratorError` What it deliberately skips: - Canvas paint correctness — run the desktop binary - chat panel progress rendering — run the desktop binary - Pre-validation fixes — smoke uses `SkippedPreValidator` so the trace stays focused on orchestrator behaviour; the desktop binary uses `LintPreValidator` `op-host-desktop::chat_orchestrator::DesktopLlmClient` would have been ideal to reuse, but the host crate is binary-only (no lib target); the op-smoke `SmokeLlmClient` is a ~50-line copy that swaps `shared_runtime().spawn` for `tokio::spawn` (smoke owns its own tokio runtime via `#[tokio::main]`). cargo fmt + clippy --workspace --all-targets -D warnings clean. Sanity-checked the no-prompt and no-API-key error paths.
This commit is contained in:
parent
743e8a902c
commit
e2c6e74b42
2 changed files with 333 additions and 0 deletions
28
crates/op-smoke/Cargo.toml
Normal file
28
crates/op-smoke/Cargo.toml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
[package]
|
||||
name = "op-smoke"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
license.workspace = true
|
||||
description = "Headless smoke runner for op-orchestrator — drives one design turn against AnthropicProvider and dumps every event to stderr"
|
||||
|
||||
[[bin]]
|
||||
name = "op-smoke"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
# AnthropicProvider — the `anthropic` feature gates the real HTTP client.
|
||||
agent = { path = "../../vendor/agent/crates/agent", default-features = false, features = ["anthropic"] }
|
||||
# The orchestrator + its types. Smoke calls `Orchestrator::new().run(...)`
|
||||
# directly on the current thread, bypassing the actor-channel model
|
||||
# `DesignSession` uses for UI safety — we want the simplest possible
|
||||
# trace so debugging the LLM/orchestrator side stays linear.
|
||||
op-orchestrator = { path = "../op-orchestrator" }
|
||||
# `EditorState` + `EditorCommand` — the smoke's own `InlineDocSink` owns
|
||||
# one and applies commands on it.
|
||||
op-editor-core = { path = "../op-editor-core" }
|
||||
# `futures` for `BoxStream` + `mpsc` channels in the LlmClient impl.
|
||||
futures = "0.3"
|
||||
# `tokio` for the runtime that `Orchestrator::run` (async) runs on.
|
||||
# `rt-multi-thread` so the LLM HTTP client can spawn its own tasks.
|
||||
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
|
||||
305
crates/op-smoke/src/main.rs
Normal file
305
crates/op-smoke/src/main.rs
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
//! Headless smoke runner for `op-orchestrator`.
|
||||
//!
|
||||
//! Drives one design turn against `AnthropicProvider` without the
|
||||
//! desktop UI / `DesignSession` actor model — single-threaded
|
||||
//! `block_on(Orchestrator::run)` against an inline `DocSink`, with every
|
||||
//! progress event + every applied `EditorCommand` dumped to stderr.
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! ```sh
|
||||
//! export OPENPENCIL_ANTHROPIC_API_KEY=sk-ant-... # or ANTHROPIC_API_KEY
|
||||
//! cargo run -p op-smoke -- "design a login screen"
|
||||
//! ```
|
||||
//!
|
||||
//! Optional env overrides:
|
||||
//! - `OPENPENCIL_ORCHESTRATOR_MODEL` — default `claude-sonnet-4-6`.
|
||||
//!
|
||||
//! ## What this verifies vs the desktop GUI smoke
|
||||
//!
|
||||
//! - LLM client construction (`AnthropicProvider::new` + auth).
|
||||
//! - `Orchestrator::run` reaching the network (200 OK / 401 / 429 etc.
|
||||
//! surfaces as a `LlmError` in the streamed events).
|
||||
//! - Planner → scaffold → subtask → cleanup transitions
|
||||
//! (`Progress::*` enum, every variant rendered to stderr).
|
||||
//! - `EditorCommand` applied to the in-memory state, including
|
||||
//! `InsertSubtree` ID-remapping.
|
||||
//! - Terminal `RunSummary` (subtask outcomes + total node count) or
|
||||
//! `OrchestratorError`.
|
||||
//!
|
||||
//! What this does NOT verify (run the desktop binary for those):
|
||||
//! - Canvas rendering / paint correctness.
|
||||
//! - chat panel rendering of progress lines / streaming bubble.
|
||||
//! - Cross-session abort (mid-turn switch to chat — covered by
|
||||
//! `chat_session::launch_if_pending` host tests).
|
||||
//! - Pre-validation fixes — smoke runs with `SkippedPreValidator` so
|
||||
//! the trace stays focused on orchestrator behaviour. The host
|
||||
//! binary uses `LintPreValidator`; smoke skips that layer.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use agent::abort::AbortController;
|
||||
use agent::provider::anthropic::AnthropicProvider;
|
||||
use agent::provider::Provider;
|
||||
use agent::query::QueryEngine;
|
||||
use agent::stream::Event;
|
||||
use futures::channel::mpsc;
|
||||
use futures::StreamExt;
|
||||
use op_editor_core::{EditorCommand, EditorState};
|
||||
use op_orchestrator::{
|
||||
AbortFlag, CallRequest, DesignRequest, DocSink, LlmChunk, LlmClient, LlmError, Orchestrator,
|
||||
Progress, SkippedPreValidator, SkippedScreenshotProvider, SkippedVisionLlmClient,
|
||||
ValidationProviders,
|
||||
};
|
||||
|
||||
/// `LlmClient` impl for the smoke runner — `AnthropicProvider` under a
|
||||
/// `QueryEngine`, with every call spawned onto the current tokio runtime.
|
||||
/// Mirrors `op-host-desktop::chat_orchestrator::DesktopLlmClient` but
|
||||
/// uses `tokio::spawn` instead of a shared `Runtime::spawn` handle.
|
||||
struct SmokeLlmClient {
|
||||
provider: Arc<dyn Provider>,
|
||||
default_model: String,
|
||||
}
|
||||
|
||||
impl LlmClient for SmokeLlmClient {
|
||||
fn call(
|
||||
&self,
|
||||
req: CallRequest,
|
||||
) -> futures::stream::BoxStream<'static, Result<LlmChunk, LlmError>> {
|
||||
let (tx, rx) = mpsc::unbounded::<Result<LlmChunk, LlmError>>();
|
||||
if req.abort.is_set() {
|
||||
let _ = tx.unbounded_send(Err(LlmError {
|
||||
message: "aborted".into(),
|
||||
aborted: true,
|
||||
}));
|
||||
return Box::pin(rx);
|
||||
}
|
||||
let provider = self.provider.clone();
|
||||
let model = req
|
||||
.model
|
||||
.clone()
|
||||
.unwrap_or_else(|| self.default_model.clone());
|
||||
let system = req.system_prompt.clone();
|
||||
let user = req.user_prompt.clone();
|
||||
|
||||
eprintln!(
|
||||
"[LLM] call: model={model} system_len={} user_len={}",
|
||||
system.len(),
|
||||
user.len()
|
||||
);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let engine = QueryEngine::new(provider, model).with_system(system);
|
||||
let abort = AbortController::new();
|
||||
let stream = match engine.run(user, abort).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
eprintln!("[LLM] engine.run error: {e}");
|
||||
let _ = tx.unbounded_send(Err(LlmError {
|
||||
message: e.to_string(),
|
||||
aborted: false,
|
||||
}));
|
||||
return;
|
||||
}
|
||||
};
|
||||
let mut stream = stream;
|
||||
while let Some(item) = stream.next().await {
|
||||
let sent = match item {
|
||||
Ok(Event::TextDelta { delta }) => tx.unbounded_send(Ok(LlmChunk::Text(delta))),
|
||||
Ok(Event::Thinking { delta }) => {
|
||||
tx.unbounded_send(Ok(LlmChunk::Thinking(delta)))
|
||||
}
|
||||
Ok(Event::Result { .. }) => break,
|
||||
Ok(Event::Error { code, message }) => {
|
||||
eprintln!("[LLM] event error: {code}: {message}");
|
||||
tx.unbounded_send(Err(LlmError {
|
||||
message: format!("{code}: {message}"),
|
||||
aborted: false,
|
||||
}))
|
||||
}
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => {
|
||||
eprintln!("[LLM] stream error: {e}");
|
||||
tx.unbounded_send(Err(LlmError {
|
||||
message: e.to_string(),
|
||||
aborted: false,
|
||||
}))
|
||||
}
|
||||
};
|
||||
if sent.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Box::pin(rx)
|
||||
}
|
||||
}
|
||||
|
||||
/// Inline `DocSink` — owns the canonical state directly, no channel hop.
|
||||
/// Every `apply` echoes the command kind + result so the smoke trace
|
||||
/// shows the orchestrator's mutations linearly.
|
||||
struct InlineDocSink {
|
||||
state: EditorState,
|
||||
}
|
||||
|
||||
impl DocSink for InlineDocSink {
|
||||
fn state(&self) -> &EditorState {
|
||||
&self.state
|
||||
}
|
||||
|
||||
fn apply(&mut self, cmd: EditorCommand) -> bool {
|
||||
let label = describe_cmd(&cmd);
|
||||
let applied = self.state.apply(cmd);
|
||||
eprintln!("[CMD] {label} → applied={applied}");
|
||||
applied
|
||||
}
|
||||
|
||||
fn begin_undo_batch(&mut self) {
|
||||
eprintln!("[UNDO] begin");
|
||||
}
|
||||
|
||||
fn end_undo_batch(&mut self) {
|
||||
eprintln!("[UNDO] end");
|
||||
}
|
||||
}
|
||||
|
||||
/// One-line label for an `EditorCommand` variant. We don't dump the full
|
||||
/// payload (often kilobytes of node JSON) — just the variant + its key
|
||||
/// identifying field so the trace stays readable.
|
||||
fn describe_cmd(cmd: &EditorCommand) -> String {
|
||||
match cmd {
|
||||
EditorCommand::InsertSubtree { nodes, parent_id } => {
|
||||
format!("InsertSubtree(parent={parent_id:?}, nodes={})", nodes.len())
|
||||
}
|
||||
EditorCommand::UpdateNode { node_id, .. } => format!("UpdateNode({node_id:?})"),
|
||||
EditorCommand::DeleteNode { node_id } => format!("DeleteNode({node_id:?})"),
|
||||
EditorCommand::MoveNode { node_id, .. } => format!("MoveNode({node_id:?})"),
|
||||
EditorCommand::SetNodeLayoutProp {
|
||||
node_id, property, ..
|
||||
} => format!("SetNodeLayoutProp({node_id:?}, prop={property:?})"),
|
||||
EditorCommand::SetNodeStrokeHex { node_id, hex } => {
|
||||
format!("SetNodeStrokeHex({node_id:?}, {hex})")
|
||||
}
|
||||
EditorCommand::SetNodeStrokeWidth { node_id, .. } => {
|
||||
format!("SetNodeStrokeWidth({node_id:?})")
|
||||
}
|
||||
EditorCommand::SetNodeFillHex { node_id, hex } => {
|
||||
format!("SetNodeFillHex({node_id:?}, {hex})")
|
||||
}
|
||||
EditorCommand::RemoveNodeEffect { node_id, index } => {
|
||||
format!("RemoveNodeEffect({node_id:?}, [{index}])")
|
||||
}
|
||||
other => {
|
||||
let dbg = format!("{other:?}");
|
||||
// Truncate the Debug output so massive payloads don't blow up the trace.
|
||||
if dbg.len() > 120 {
|
||||
format!("{}...", &dbg[..117])
|
||||
} else {
|
||||
dbg
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main(flavor = "multi_thread")]
|
||||
async fn main() -> std::process::ExitCode {
|
||||
let prompt = match std::env::args().nth(1) {
|
||||
Some(p) if !p.is_empty() => p,
|
||||
_ => {
|
||||
eprintln!(
|
||||
"usage: op-smoke <prompt>\n\nexport OPENPENCIL_ANTHROPIC_API_KEY=... (or ANTHROPIC_API_KEY)"
|
||||
);
|
||||
return std::process::ExitCode::from(2);
|
||||
}
|
||||
};
|
||||
|
||||
let api_key = std::env::var("OPENPENCIL_ANTHROPIC_API_KEY")
|
||||
.ok()
|
||||
.or_else(|| std::env::var("ANTHROPIC_API_KEY").ok())
|
||||
.filter(|k| !k.is_empty());
|
||||
let Some(api_key) = api_key else {
|
||||
eprintln!("error: neither OPENPENCIL_ANTHROPIC_API_KEY nor ANTHROPIC_API_KEY is set");
|
||||
return std::process::ExitCode::from(3);
|
||||
};
|
||||
|
||||
let model = std::env::var("OPENPENCIL_ORCHESTRATOR_MODEL")
|
||||
.unwrap_or_else(|_| "claude-sonnet-4-6".into());
|
||||
|
||||
eprintln!("[SMOKE] model={model}");
|
||||
eprintln!("[SMOKE] prompt={prompt:?}");
|
||||
|
||||
let provider: Arc<dyn Provider> = Arc::new(AnthropicProvider::new(api_key));
|
||||
let llm = SmokeLlmClient {
|
||||
provider,
|
||||
default_model: model.clone(),
|
||||
};
|
||||
|
||||
let mut sink = InlineDocSink {
|
||||
state: EditorState::new(),
|
||||
};
|
||||
let request = DesignRequest {
|
||||
prompt,
|
||||
model: Some(model),
|
||||
provider: None,
|
||||
design_md: sink.state.doc.design_md.clone(),
|
||||
append_context: None,
|
||||
concurrency: 1,
|
||||
validation_enabled: false,
|
||||
visual_ref_enabled: false,
|
||||
};
|
||||
let abort = AbortFlag::new();
|
||||
// Skip pre-validation in the smoke trace — keeps the orchestrator
|
||||
// signal clean. The desktop binary swaps this for `LintPreValidator`.
|
||||
let pre_validator = SkippedPreValidator;
|
||||
let screenshot = SkippedScreenshotProvider;
|
||||
let vision = SkippedVisionLlmClient;
|
||||
let providers = ValidationProviders {
|
||||
pre_validator: &pre_validator,
|
||||
screenshot: &screenshot,
|
||||
vision: &vision,
|
||||
system_prompt: String::new(),
|
||||
};
|
||||
|
||||
let mut on_progress = |p: Progress| {
|
||||
eprintln!("[PROGRESS] {p:?}");
|
||||
};
|
||||
|
||||
let started = std::time::Instant::now();
|
||||
let result = Orchestrator::new()
|
||||
.run(
|
||||
request,
|
||||
&mut sink,
|
||||
&llm,
|
||||
&mut on_progress,
|
||||
&abort,
|
||||
&providers,
|
||||
)
|
||||
.await;
|
||||
let elapsed = started.elapsed();
|
||||
|
||||
match result {
|
||||
Ok(summary) => {
|
||||
eprintln!("[FINAL] Ok in {elapsed:?}");
|
||||
eprintln!(" root_frame_id = {:?}", summary.root_frame_id);
|
||||
eprintln!(" total_nodes = {}", summary.total_nodes);
|
||||
eprintln!(" subtasks = {}", summary.subtasks.len());
|
||||
for s in &summary.subtasks {
|
||||
eprintln!(
|
||||
" - {}: {} node(s){}",
|
||||
s.id,
|
||||
s.node_count,
|
||||
s.error
|
||||
.as_deref()
|
||||
.map(|e| format!(" [error: {e}]"))
|
||||
.unwrap_or_default()
|
||||
);
|
||||
}
|
||||
std::process::ExitCode::SUCCESS
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[FINAL] Err in {elapsed:?}: {e}");
|
||||
std::process::ExitCode::from(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue