feat(ai): add thinking mode and Codex provider support to server API

- Support thinkingMode/effort params in chat and generate endpoints
- Add Codex (OpenAI) provider streaming via codex-client utility
- Forward thinking config to both Anthropic SDK and Agent SDK paths
This commit is contained in:
Fini 2026-02-22 08:18:09 +08:00
parent 1f1de83bf7
commit 5bc192e451
3 changed files with 465 additions and 5 deletions

View file

@ -1,11 +1,15 @@
import { defineEventHandler, readBody, setResponseHeaders } from 'h3'
import { resolveClaudeCli } from '../../utils/resolve-claude-cli'
import { runCodexExec } from '../../utils/codex-client'
interface ChatBody {
system: string
messages: Array<{ role: 'user' | 'assistant'; content: string }>
model?: string
provider?: string
thinkingMode?: 'adaptive' | 'disabled' | 'enabled'
thinkingBudgetTokens?: number
effort?: 'low' | 'medium' | 'high' | 'max'
}
/**
@ -31,6 +35,9 @@ export default defineEventHandler(async (event) => {
if (body.provider === 'opencode') {
return streamViaOpenCode(body, body.model)
}
if (body.provider === 'openai') {
return streamViaCodex(body, body.model)
}
// Default: existing behavior (backward-compatible)
const apiKey = process.env.ANTHROPIC_API_KEY
@ -47,6 +54,29 @@ export default defineEventHandler(async (event) => {
// Keep-alive ping interval (ms) — prevents client timeout while waiting for API TTFT
const KEEPALIVE_INTERVAL_MS = 15_000
function getAnthropicThinkingConfig(body: ChatBody):
| { type: 'adaptive' | 'disabled' }
| { type: 'enabled'; budget_tokens: number }
| undefined {
if (!body.thinkingMode) return undefined
if (body.thinkingMode === 'enabled') {
const budget = Math.max(1024, body.thinkingBudgetTokens ?? 1024)
return { type: 'enabled', budget_tokens: budget }
}
return { type: body.thinkingMode }
}
function getAgentThinkingConfig(body: ChatBody):
| { type: 'adaptive' | 'disabled' }
| { type: 'enabled'; budgetTokens?: number }
| undefined {
if (!body.thinkingMode) return undefined
if (body.thinkingMode === 'enabled') {
return { type: 'enabled', budgetTokens: body.thinkingBudgetTokens }
}
return { type: body.thinkingMode }
}
/** Stream via Anthropic SDK (when API key is available) */
async function streamViaAnthropicSDK(apiKey: string, body: ChatBody, model?: string) {
const { default: Anthropic } = await import('@anthropic-ai/sdk')
@ -62,11 +92,14 @@ async function streamViaAnthropicSDK(apiKey: string, body: ChatBody, model?: str
} catch { /* stream already closed */ }
}, KEEPALIVE_INTERVAL_MS)
try {
const thinking = getAnthropicThinkingConfig(body)
const messageStream = client.messages.stream({
model: model || 'claude-sonnet-4-5-20250929',
max_tokens: 16384,
system: body.system,
messages: body.messages,
...(body.effort ? { effort: body.effort } : {}),
...(thinking ? { thinking } : {}),
})
for await (const ev of messageStream) {
@ -118,13 +151,14 @@ function streamViaAgentSDK(body: ChatBody, model?: string) {
// Build prompt from the last user message
const lastUserMsg = [...body.messages].reverse().find((m) => m.role === 'user')
const prompt = lastUserMsg?.content ?? ''
let prompt = lastUserMsg?.content ?? ''
// Remove CLAUDECODE env to allow running from within a CC terminal
const env = { ...process.env } as Record<string, string | undefined>
delete env.CLAUDECODE
const claudePath = resolveClaudeCli()
const thinking = getAgentThinkingConfig(body)
const q = query({
prompt,
@ -134,8 +168,11 @@ function streamViaAgentSDK(body: ChatBody, model?: string) {
maxTurns: 1,
includePartialMessages: true,
tools: [],
plugins: [],
permissionMode: 'plan',
persistSession: false,
...(body.effort ? { effort: body.effort } : {}),
...(thinking ? { thinking } : {}),
env,
...(claudePath ? { pathToClaudeCodeExecutable: claudePath } : {}),
},
@ -191,6 +228,106 @@ function parseOpenCodeModel(model?: string): { providerID: string; modelID: stri
return { providerID: model.slice(0, idx), modelID: model.slice(idx + 1) }
}
function mapOpenCodeEffort(
effort?: 'low' | 'medium' | 'high' | 'max',
): 'low' | 'medium' | 'high' | undefined {
if (!effort) return undefined
if (effort === 'max') return 'high'
return effort
}
function buildOpenCodeReasoning(
body: ChatBody,
): Record<string, unknown> | undefined {
const reasoning: Record<string, unknown> = {}
const effort = mapOpenCodeEffort(body.effort)
if (effort) {
reasoning.effort = effort
}
if (body.thinkingMode === 'enabled') {
reasoning.enabled = true
} else if (body.thinkingMode === 'disabled') {
reasoning.enabled = false
}
if (typeof body.thinkingBudgetTokens === 'number' && body.thinkingBudgetTokens > 0) {
reasoning.budgetTokens = body.thinkingBudgetTokens
}
return Object.keys(reasoning).length > 0 ? reasoning : undefined
}
async function promptOpenCodeWithThinking(
ocClient: any,
basePayload: Record<string, unknown>,
body: ChatBody,
): Promise<{ data: any; error: any }> {
const reasoning = buildOpenCodeReasoning(body)
if (!reasoning) {
return await ocClient.session.prompt(basePayload)
}
const enhanced = { ...basePayload, reasoning }
const firstTry = await ocClient.session.prompt(enhanced)
if (!firstTry.error) {
return firstTry
}
console.warn('[AI] OpenCode reasoning options rejected, retrying without reasoning.')
return await ocClient.session.prompt(basePayload)
}
function streamViaCodex(body: ChatBody, model?: string) {
const stream = new ReadableStream({
async start(controller) {
const encoder = new TextEncoder()
const pingTimer = setInterval(() => {
try {
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ type: 'ping', content: '' })}\n\n`))
} catch { /* stream already closed */ }
}, KEEPALIVE_INTERVAL_MS)
try {
const lastUserMsg = [...body.messages].reverse().find((m) => m.role === 'user')
const prompt = lastUserMsg?.content ?? ''
const result = await runCodexExec(prompt, {
model,
systemPrompt: body.system,
thinkingMode: body.thinkingMode,
thinkingBudgetTokens: body.thinkingBudgetTokens,
effort: body.effort,
})
clearInterval(pingTimer)
if (result.error) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ type: 'error', content: result.error })}\n\n`),
)
return
}
if (result.text) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ type: 'text', content: result.text })}\n\n`),
)
}
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ type: 'done', content: '' })}\n\n`),
)
} catch (error) {
const content = error instanceof Error ? error.message : 'Unknown error'
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ type: 'error', content })}\n\n`),
)
} finally {
clearInterval(pingTimer)
controller.close()
}
},
})
return new Response(stream)
}
/** Stream via OpenCode SDK (connects to a running OpenCode server) */
function streamViaOpenCode(body: ChatBody, model?: string) {
const stream = new ReadableStream({
@ -231,11 +368,17 @@ function streamViaOpenCode(body: ChatBody, model?: string) {
const parsed = parseOpenCodeModel(model)
// Send prompt and await full response
const { data: result, error: promptError } = await ocClient.session.prompt({
const promptPayload: Record<string, unknown> = {
sessionID: session.id,
...(parsed ? { model: parsed } : {}),
parts: [{ type: 'text', text: prompt }],
})
}
const { data: result, error: promptError } = await promptOpenCodeWithThinking(
ocClient,
promptPayload,
body,
)
if (promptError) {
throw new Error('OpenCode prompt failed')

View file

@ -1,11 +1,15 @@
import { defineEventHandler, readBody, setResponseHeaders } from 'h3'
import { resolveClaudeCli } from '../../utils/resolve-claude-cli'
import { runCodexExec } from '../../utils/codex-client'
interface GenerateBody {
system: string
message: string
model?: string
provider?: string
thinkingMode?: 'adaptive' | 'disabled' | 'enabled'
thinkingBudgetTokens?: number
effort?: 'low' | 'medium' | 'high' | 'max'
}
/**
@ -25,6 +29,9 @@ export default defineEventHandler(async (event) => {
if (body.provider === 'opencode') {
return generateViaOpenCode(body, body.model)
}
if (body.provider === 'openai') {
return generateViaCodex(body, body.model)
}
// Default: existing behavior (backward-compatible)
const apiKey = process.env.ANTHROPIC_API_KEY
@ -76,6 +83,7 @@ async function generateViaAgentSDK(body: GenerateBody, model?: string): Promise<
model: model || 'claude-sonnet-4-6',
maxTurns: 1,
tools: [],
plugins: [],
permissionMode: 'plan',
persistSession: false,
env,
@ -100,6 +108,64 @@ async function generateViaAgentSDK(body: GenerateBody, model?: string): Promise<
}
}
async function generateViaCodex(body: GenerateBody, model?: string): Promise<{ text?: string; error?: string }> {
const result = await runCodexExec(body.message, {
model,
systemPrompt: body.system,
thinkingMode: body.thinkingMode,
thinkingBudgetTokens: body.thinkingBudgetTokens,
effort: body.effort,
})
return result.error ? { error: result.error } : { text: result.text ?? '' }
}
function mapOpenCodeEffort(
effort?: 'low' | 'medium' | 'high' | 'max',
): 'low' | 'medium' | 'high' | undefined {
if (!effort) return undefined
if (effort === 'max') return 'high'
return effort
}
function buildOpenCodeReasoning(
body: GenerateBody,
): Record<string, unknown> | undefined {
const reasoning: Record<string, unknown> = {}
const effort = mapOpenCodeEffort(body.effort)
if (effort) {
reasoning.effort = effort
}
if (body.thinkingMode === 'enabled') {
reasoning.enabled = true
} else if (body.thinkingMode === 'disabled') {
reasoning.enabled = false
}
if (typeof body.thinkingBudgetTokens === 'number' && body.thinkingBudgetTokens > 0) {
reasoning.budgetTokens = body.thinkingBudgetTokens
}
return Object.keys(reasoning).length > 0 ? reasoning : undefined
}
async function promptOpenCodeWithThinking(
ocClient: any,
basePayload: Record<string, unknown>,
body: GenerateBody,
): Promise<{ data: any; error: any }> {
const reasoning = buildOpenCodeReasoning(body)
if (!reasoning) {
return await ocClient.session.prompt(basePayload)
}
const enhanced = { ...basePayload, reasoning }
const firstTry = await ocClient.session.prompt(enhanced)
if (!firstTry.error) {
return firstTry
}
console.warn('[AI] OpenCode reasoning options rejected, retrying without reasoning.')
return await ocClient.session.prompt(basePayload)
}
/** Generate via OpenCode SDK (connects to a running OpenCode server) */
async function generateViaOpenCode(body: GenerateBody, model?: string): Promise<{ text?: string; error?: string }> {
let ocServer: { close(): void } | undefined
@ -131,11 +197,17 @@ async function generateViaOpenCode(body: GenerateBody, model?: string): Promise<
}
// Send main prompt and await full response
const { data: result, error: promptError } = await ocClient.session.prompt({
const promptPayload: Record<string, unknown> = {
sessionID: session.id,
...(modelOption ? { model: modelOption } : {}),
parts: [{ type: 'text', text: body.message }],
})
}
const { data: result, error: promptError } = await promptOpenCodeWithThinking(
ocClient,
promptPayload,
body,
)
if (promptError) {
return { error: 'OpenCode generation failed' }

View file

@ -0,0 +1,245 @@
import { spawn } from 'node:child_process'
import { mkdtemp, readFile, rm } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
type ThinkingMode = 'adaptive' | 'disabled' | 'enabled'
type ThinkingEffort = 'low' | 'medium' | 'high' | 'max'
interface CodexExecOptions {
model?: string
systemPrompt?: string
thinkingMode?: ThinkingMode
thinkingBudgetTokens?: number
effort?: ThinkingEffort
timeoutMs?: number
}
interface CodexCliResult {
text?: string
error?: string
}
const DEFAULT_CODEX_TIMEOUT_MS = 15 * 60 * 1000
export async function runCodexExec(
userPrompt: string,
options: CodexExecOptions = {},
): Promise<CodexCliResult> {
const tempDir = await mkdtemp(join(tmpdir(), 'openpencil-codex-'))
const outputPath = join(tempDir, 'last-message.txt')
const prompt = buildPrompt(options.systemPrompt, userPrompt)
const codexEffort = resolveCodexEffort(options.thinkingMode, options.effort)
const args = [
'exec',
'--json',
'--skip-git-repo-check',
'--sandbox',
'read-only',
'--output-last-message',
outputPath,
]
if (options.model) {
args.push('--model', options.model)
}
if (codexEffort) {
args.push('--config', `model_reasoning_effort="${codexEffort}"`)
}
args.push(prompt)
try {
const runResult = await executeCodexCommand(
args,
options.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS,
)
const finalText = await readFile(outputPath, 'utf-8').catch(() => '')
const normalizedText = finalText.trim() || runResult.text.trim()
if (normalizedText) {
return { text: normalizedText }
}
if (runResult.errors.length > 0) {
return { error: runResult.errors.join('; ') }
}
return { error: 'Codex returned no output.' }
} catch (error) {
return { error: error instanceof Error ? error.message : 'Codex execution failed' }
} finally {
await rm(tempDir, { recursive: true, force: true }).catch(() => {})
}
}
function buildPrompt(systemPrompt: string | undefined, userPrompt: string): string {
const userText = userPrompt.trim()
if (!systemPrompt?.trim()) {
return userText
}
return [
'SYSTEM INSTRUCTIONS:',
systemPrompt.trim(),
'',
'USER REQUEST:',
userText,
].join('\n')
}
function resolveCodexEffort(
thinkingMode: ThinkingMode | undefined,
effort: ThinkingEffort | undefined,
): 'low' | 'medium' | 'high' | undefined {
if (thinkingMode === 'disabled') {
return 'low'
}
if (effort === 'max') {
return 'high'
}
if (effort === 'low' || effort === 'medium' || effort === 'high') {
return effort
}
if (thinkingMode === 'enabled') {
return 'medium'
}
return undefined
}
async function executeCodexCommand(
args: string[],
timeoutMs: number,
): Promise<{ text: string; errors: string[] }> {
return await new Promise((resolve, reject) => {
const child = spawn('codex', args, {
env: process.env,
stdio: ['ignore', 'pipe', 'pipe'],
})
let stdoutBuffer = ''
let stderrBuffer = ''
let textAccumulator = ''
const errors: string[] = []
const flushStdoutLine = (line: string) => {
const event = parseCodexJsonLine(line)
if (!event) return
if (event.text) {
textAccumulator += event.text
}
if (event.error) {
errors.push(event.error)
}
}
const timer = setTimeout(() => {
child.kill('SIGTERM')
reject(new Error(`Codex request timed out after ${Math.round(timeoutMs / 1000)}s.`))
}, timeoutMs)
child.stdout.on('data', (chunk: Buffer) => {
stdoutBuffer += chunk.toString('utf-8')
let idx = stdoutBuffer.indexOf('\n')
while (idx >= 0) {
const line = stdoutBuffer.slice(0, idx).trim()
stdoutBuffer = stdoutBuffer.slice(idx + 1)
if (line) flushStdoutLine(line)
idx = stdoutBuffer.indexOf('\n')
}
})
child.stderr.on('data', (chunk: Buffer) => {
stderrBuffer += chunk.toString('utf-8')
})
child.on('error', (err) => {
clearTimeout(timer)
reject(err)
})
child.on('close', (code) => {
clearTimeout(timer)
const tail = stdoutBuffer.trim()
if (tail) {
flushStdoutLine(tail)
}
if (code === 0) {
resolve({ text: textAccumulator, errors })
return
}
const stderrError = extractCodexCliError(stderrBuffer)
const fallback = errors[errors.length - 1]
reject(
new Error(
stderrError
|| fallback
|| `Codex exited with code ${code ?? 'unknown'}.`,
),
)
})
})
}
function parseCodexJsonLine(
line: string,
): { text?: string; error?: string } | null {
let parsed: Record<string, unknown>
try {
parsed = JSON.parse(line) as Record<string, unknown>
} catch {
return null
}
const type = typeof parsed.type === 'string' ? parsed.type : ''
if (type === 'error') {
const message = getStringField(parsed, ['message'])
return { error: message || 'Codex returned an unknown error.' }
}
// Common Codex JSONL stream events include deltas in "delta" or "text".
const text =
getStringField(parsed, ['delta'])
|| getStringField(parsed, ['text'])
|| getStringField(parsed, ['content'])
if (!text) return null
return { text }
}
function getStringField(
obj: Record<string, unknown>,
keys: string[],
): string | null {
for (const key of keys) {
const val = obj[key]
if (typeof val === 'string' && val.length > 0) {
return val
}
}
return null
}
function extractCodexCliError(stderr: string): string | null {
const trimmed = stderr.trim()
if (!trimmed) return null
const lines = trimmed.split('\n').map((line) => line.trim()).filter(Boolean)
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i]
if (line.toLowerCase().startsWith('error:')) {
return line.replace(/^error:\s*/i, '').trim()
}
}
return lines[lines.length - 1] ?? null
}