feat(ai): add thinking mode and Codex provider support to server API

- Support thinkingMode/effort params in chat and generate endpoints - Add Codex (OpenAI) provider streaming via codex-client utility - Forward thinking config to both Anthropic SDK and Agent SDK paths
2026-06-01 03:14:29 +07:00 · 2026-02-22 08:18:09 +08:00 · 2026-02-22 08:18:09 +08:00 · 5bc192e451
commit 5bc192e451
parent 1f1de83bf7
3 changed files with 465 additions and 5 deletions
--- a/server/api/ai/chat.ts
+++ b/server/api/ai/chat.ts
@ -1,11 +1,15 @@
 import { defineEventHandler, readBody, setResponseHeaders } from 'h3'
 import { resolveClaudeCli } from '../../utils/resolve-claude-cli'
+import { runCodexExec } from '../../utils/codex-client'

 interface ChatBody {
  system: string
  messages: Array<{ role: 'user' | 'assistant'; content: string }>
  model?: string
  provider?: string
+  thinkingMode?: 'adaptive' | 'disabled' | 'enabled'
+  thinkingBudgetTokens?: number
+  effort?: 'low' | 'medium' | 'high' | 'max'
 }

 /**
@ -31,6 +35,9 @@ export default defineEventHandler(async (event) => {
  if (body.provider === 'opencode') {
    return streamViaOpenCode(body, body.model)
  }
+  if (body.provider === 'openai') {
+    return streamViaCodex(body, body.model)
+  }

  // Default: existing behavior (backward-compatible)
  const apiKey = process.env.ANTHROPIC_API_KEY
@ -47,6 +54,29 @@ export default defineEventHandler(async (event) => {
 // Keep-alive ping interval (ms) — prevents client timeout while waiting for API TTFT
 const KEEPALIVE_INTERVAL_MS = 15_000

+function getAnthropicThinkingConfig(body: ChatBody):
+  | { type: 'adaptive' | 'disabled' }
+  | { type: 'enabled'; budget_tokens: number }
+  | undefined {
+  if (!body.thinkingMode) return undefined
+  if (body.thinkingMode === 'enabled') {
+    const budget = Math.max(1024, body.thinkingBudgetTokens ?? 1024)
+    return { type: 'enabled', budget_tokens: budget }
+  }
+  return { type: body.thinkingMode }
+}
+
+function getAgentThinkingConfig(body: ChatBody):
+  | { type: 'adaptive' | 'disabled' }
+  | { type: 'enabled'; budgetTokens?: number }
+  | undefined {
+  if (!body.thinkingMode) return undefined
+  if (body.thinkingMode === 'enabled') {
+    return { type: 'enabled', budgetTokens: body.thinkingBudgetTokens }
+  }
+  return { type: body.thinkingMode }
+}
+
 /** Stream via Anthropic SDK (when API key is available) */
 async function streamViaAnthropicSDK(apiKey: string, body: ChatBody, model?: string) {
  const { default: Anthropic } = await import('@anthropic-ai/sdk')
@ -62,11 +92,14 @@ async function streamViaAnthropicSDK(apiKey: string, body: ChatBody, model?: str
        } catch { /* stream already closed */ }
      }, KEEPALIVE_INTERVAL_MS)
      try {
+        const thinking = getAnthropicThinkingConfig(body)
        const messageStream = client.messages.stream({
          model: model || 'claude-sonnet-4-5-20250929',
          max_tokens: 16384,
          system: body.system,
          messages: body.messages,
+          ...(body.effort ? { effort: body.effort } : {}),
+          ...(thinking ? { thinking } : {}),
        })

        for await (const ev of messageStream) {
@ -118,13 +151,14 @@ function streamViaAgentSDK(body: ChatBody, model?: string) {

        // Build prompt from the last user message
        const lastUserMsg = [...body.messages].reverse().find((m) => m.role === 'user')
-        const prompt = lastUserMsg?.content ?? ''
+        let prompt = lastUserMsg?.content ?? ''

        // Remove CLAUDECODE env to allow running from within a CC terminal
        const env = { ...process.env } as Record<string, string | undefined>
        delete env.CLAUDECODE

        const claudePath = resolveClaudeCli()
+        const thinking = getAgentThinkingConfig(body)

        const q = query({
          prompt,
@ -134,8 +168,11 @@ function streamViaAgentSDK(body: ChatBody, model?: string) {
            maxTurns: 1,
            includePartialMessages: true,
            tools: [],
+            plugins: [],
            permissionMode: 'plan',
            persistSession: false,
+            ...(body.effort ? { effort: body.effort } : {}),
+            ...(thinking ? { thinking } : {}),
            env,
            ...(claudePath ? { pathToClaudeCodeExecutable: claudePath } : {}),
          },
@ -191,6 +228,106 @@ function parseOpenCodeModel(model?: string): { providerID: string; modelID: stri
  return { providerID: model.slice(0, idx), modelID: model.slice(idx + 1) }
 }

+function mapOpenCodeEffort(
+  effort?: 'low' | 'medium' | 'high' | 'max',
+): 'low' | 'medium' | 'high' | undefined {
+  if (!effort) return undefined
+  if (effort === 'max') return 'high'
+  return effort
+}
+
+function buildOpenCodeReasoning(
+  body: ChatBody,
+): Record<string, unknown> | undefined {
+  const reasoning: Record<string, unknown> = {}
+  const effort = mapOpenCodeEffort(body.effort)
+  if (effort) {
+    reasoning.effort = effort
+  }
+  if (body.thinkingMode === 'enabled') {
+    reasoning.enabled = true
+  } else if (body.thinkingMode === 'disabled') {
+    reasoning.enabled = false
+  }
+  if (typeof body.thinkingBudgetTokens === 'number' && body.thinkingBudgetTokens > 0) {
+    reasoning.budgetTokens = body.thinkingBudgetTokens
+  }
+  return Object.keys(reasoning).length > 0 ? reasoning : undefined
+}
+
+async function promptOpenCodeWithThinking(
+  ocClient: any,
+  basePayload: Record<string, unknown>,
+  body: ChatBody,
+): Promise<{ data: any; error: any }> {
+  const reasoning = buildOpenCodeReasoning(body)
+  if (!reasoning) {
+    return await ocClient.session.prompt(basePayload)
+  }
+
+  const enhanced = { ...basePayload, reasoning }
+  const firstTry = await ocClient.session.prompt(enhanced)
+  if (!firstTry.error) {
+    return firstTry
+  }
+
+  console.warn('[AI] OpenCode reasoning options rejected, retrying without reasoning.')
+  return await ocClient.session.prompt(basePayload)
+}
+
+function streamViaCodex(body: ChatBody, model?: string) {
+  const stream = new ReadableStream({
+    async start(controller) {
+      const encoder = new TextEncoder()
+      const pingTimer = setInterval(() => {
+        try {
+          controller.enqueue(encoder.encode(`data: ${JSON.stringify({ type: 'ping', content: '' })}\n\n`))
+        } catch { /* stream already closed */ }
+      }, KEEPALIVE_INTERVAL_MS)
+
+      try {
+        const lastUserMsg = [...body.messages].reverse().find((m) => m.role === 'user')
+        const prompt = lastUserMsg?.content ?? ''
+        const result = await runCodexExec(prompt, {
+          model,
+          systemPrompt: body.system,
+          thinkingMode: body.thinkingMode,
+          thinkingBudgetTokens: body.thinkingBudgetTokens,
+          effort: body.effort,
+        })
+
+        clearInterval(pingTimer)
+        if (result.error) {
+          controller.enqueue(
+            encoder.encode(`data: ${JSON.stringify({ type: 'error', content: result.error })}\n\n`),
+          )
+          return
+        }
+
+        if (result.text) {
+          controller.enqueue(
+            encoder.encode(`data: ${JSON.stringify({ type: 'text', content: result.text })}\n\n`),
+          )
+        }
+
+        controller.enqueue(
+          encoder.encode(`data: ${JSON.stringify({ type: 'done', content: '' })}\n\n`),
+        )
+      } catch (error) {
+        const content = error instanceof Error ? error.message : 'Unknown error'
+        controller.enqueue(
+          encoder.encode(`data: ${JSON.stringify({ type: 'error', content })}\n\n`),
+        )
+      } finally {
+        clearInterval(pingTimer)
+        controller.close()
+      }
+    },
+  })
+
+  return new Response(stream)
+}
+
 /** Stream via OpenCode SDK (connects to a running OpenCode server) */
 function streamViaOpenCode(body: ChatBody, model?: string) {
  const stream = new ReadableStream({
@ -231,11 +368,17 @@ function streamViaOpenCode(body: ChatBody, model?: string) {
        const parsed = parseOpenCodeModel(model)

        // Send prompt and await full response
-        const { data: result, error: promptError } = await ocClient.session.prompt({
+        const promptPayload: Record<string, unknown> = {
          sessionID: session.id,
          ...(parsed ? { model: parsed } : {}),
          parts: [{ type: 'text', text: prompt }],
-        })
+        }
+
+        const { data: result, error: promptError } = await promptOpenCodeWithThinking(
+          ocClient,
+          promptPayload,
+          body,
+        )

        if (promptError) {
          throw new Error('OpenCode prompt failed')
--- a/server/api/ai/generate.ts
+++ b/server/api/ai/generate.ts
@ -1,11 +1,15 @@
 import { defineEventHandler, readBody, setResponseHeaders } from 'h3'
 import { resolveClaudeCli } from '../../utils/resolve-claude-cli'
+import { runCodexExec } from '../../utils/codex-client'

 interface GenerateBody {
  system: string
  message: string
  model?: string
  provider?: string
+  thinkingMode?: 'adaptive' | 'disabled' | 'enabled'
+  thinkingBudgetTokens?: number
+  effort?: 'low' | 'medium' | 'high' | 'max'
 }

 /**
@ -25,6 +29,9 @@ export default defineEventHandler(async (event) => {
  if (body.provider === 'opencode') {
    return generateViaOpenCode(body, body.model)
  }
+  if (body.provider === 'openai') {
+    return generateViaCodex(body, body.model)
+  }

  // Default: existing behavior (backward-compatible)
  const apiKey = process.env.ANTHROPIC_API_KEY
@ -76,6 +83,7 @@ async function generateViaAgentSDK(body: GenerateBody, model?: string): Promise<
        model: model || 'claude-sonnet-4-6',
        maxTurns: 1,
        tools: [],
+        plugins: [],
        permissionMode: 'plan',
        persistSession: false,
        env,
@ -100,6 +108,64 @@ async function generateViaAgentSDK(body: GenerateBody, model?: string): Promise<
  }
 }

+async function generateViaCodex(body: GenerateBody, model?: string): Promise<{ text?: string; error?: string }> {
+  const result = await runCodexExec(body.message, {
+    model,
+    systemPrompt: body.system,
+    thinkingMode: body.thinkingMode,
+    thinkingBudgetTokens: body.thinkingBudgetTokens,
+    effort: body.effort,
+  })
+  return result.error ? { error: result.error } : { text: result.text ?? '' }
+}
+
+function mapOpenCodeEffort(
+  effort?: 'low' | 'medium' | 'high' | 'max',
+): 'low' | 'medium' | 'high' | undefined {
+  if (!effort) return undefined
+  if (effort === 'max') return 'high'
+  return effort
+}
+
+function buildOpenCodeReasoning(
+  body: GenerateBody,
+): Record<string, unknown> | undefined {
+  const reasoning: Record<string, unknown> = {}
+  const effort = mapOpenCodeEffort(body.effort)
+  if (effort) {
+    reasoning.effort = effort
+  }
+  if (body.thinkingMode === 'enabled') {
+    reasoning.enabled = true
+  } else if (body.thinkingMode === 'disabled') {
+    reasoning.enabled = false
+  }
+  if (typeof body.thinkingBudgetTokens === 'number' && body.thinkingBudgetTokens > 0) {
+    reasoning.budgetTokens = body.thinkingBudgetTokens
+  }
+  return Object.keys(reasoning).length > 0 ? reasoning : undefined
+}
+
+async function promptOpenCodeWithThinking(
+  ocClient: any,
+  basePayload: Record<string, unknown>,
+  body: GenerateBody,
+): Promise<{ data: any; error: any }> {
+  const reasoning = buildOpenCodeReasoning(body)
+  if (!reasoning) {
+    return await ocClient.session.prompt(basePayload)
+  }
+
+  const enhanced = { ...basePayload, reasoning }
+  const firstTry = await ocClient.session.prompt(enhanced)
+  if (!firstTry.error) {
+    return firstTry
+  }
+
+  console.warn('[AI] OpenCode reasoning options rejected, retrying without reasoning.')
+  return await ocClient.session.prompt(basePayload)
+}
+
 /** Generate via OpenCode SDK (connects to a running OpenCode server) */
 async function generateViaOpenCode(body: GenerateBody, model?: string): Promise<{ text?: string; error?: string }> {
  let ocServer: { close(): void } | undefined
@ -131,11 +197,17 @@ async function generateViaOpenCode(body: GenerateBody, model?: string): Promise<
    }

    // Send main prompt and await full response
-    const { data: result, error: promptError } = await ocClient.session.prompt({
+    const promptPayload: Record<string, unknown> = {
      sessionID: session.id,
      ...(modelOption ? { model: modelOption } : {}),
      parts: [{ type: 'text', text: body.message }],
-    })
+    }
+
+    const { data: result, error: promptError } = await promptOpenCodeWithThinking(
+      ocClient,
+      promptPayload,
+      body,
+    )

    if (promptError) {
      return { error: 'OpenCode generation failed' }
--- a/server/utils/codex-client.ts
+++ b/server/utils/codex-client.ts
@ -0,0 +1,245 @@
+import { spawn } from 'node:child_process'
+import { mkdtemp, readFile, rm } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+type ThinkingMode = 'adaptive' | 'disabled' | 'enabled'
+type ThinkingEffort = 'low' | 'medium' | 'high' | 'max'
+
+interface CodexExecOptions {
+  model?: string
+  systemPrompt?: string
+  thinkingMode?: ThinkingMode
+  thinkingBudgetTokens?: number
+  effort?: ThinkingEffort
+  timeoutMs?: number
+}
+
+interface CodexCliResult {
+  text?: string
+  error?: string
+}
+
+const DEFAULT_CODEX_TIMEOUT_MS = 15 * 60 * 1000
+
+export async function runCodexExec(
+  userPrompt: string,
+  options: CodexExecOptions = {},
+): Promise<CodexCliResult> {
+  const tempDir = await mkdtemp(join(tmpdir(), 'openpencil-codex-'))
+  const outputPath = join(tempDir, 'last-message.txt')
+  const prompt = buildPrompt(options.systemPrompt, userPrompt)
+  const codexEffort = resolveCodexEffort(options.thinkingMode, options.effort)
+
+  const args = [
+    'exec',
+    '--json',
+    '--skip-git-repo-check',
+    '--sandbox',
+    'read-only',
+    '--output-last-message',
+    outputPath,
+  ]
+
+  if (options.model) {
+    args.push('--model', options.model)
+  }
+
+  if (codexEffort) {
+    args.push('--config', `model_reasoning_effort="${codexEffort}"`)
+  }
+
+  args.push(prompt)
+
+  try {
+    const runResult = await executeCodexCommand(
+      args,
+      options.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS,
+    )
+    const finalText = await readFile(outputPath, 'utf-8').catch(() => '')
+    const normalizedText = finalText.trim() || runResult.text.trim()
+
+    if (normalizedText) {
+      return { text: normalizedText }
+    }
+
+    if (runResult.errors.length > 0) {
+      return { error: runResult.errors.join('; ') }
+    }
+
+    return { error: 'Codex returned no output.' }
+  } catch (error) {
+    return { error: error instanceof Error ? error.message : 'Codex execution failed' }
+  } finally {
+    await rm(tempDir, { recursive: true, force: true }).catch(() => {})
+  }
+}
+
+function buildPrompt(systemPrompt: string | undefined, userPrompt: string): string {
+  const userText = userPrompt.trim()
+  if (!systemPrompt?.trim()) {
+    return userText
+  }
+
+  return [
+    'SYSTEM INSTRUCTIONS:',
+    systemPrompt.trim(),
+    '',
+    'USER REQUEST:',
+    userText,
+  ].join('\n')
+}
+
+function resolveCodexEffort(
+  thinkingMode: ThinkingMode | undefined,
+  effort: ThinkingEffort | undefined,
+): 'low' | 'medium' | 'high' | undefined {
+  if (thinkingMode === 'disabled') {
+    return 'low'
+  }
+
+  if (effort === 'max') {
+    return 'high'
+  }
+
+  if (effort === 'low' || effort === 'medium' || effort === 'high') {
+    return effort
+  }
+
+  if (thinkingMode === 'enabled') {
+    return 'medium'
+  }
+
+  return undefined
+}
+
+async function executeCodexCommand(
+  args: string[],
+  timeoutMs: number,
+): Promise<{ text: string; errors: string[] }> {
+  return await new Promise((resolve, reject) => {
+    const child = spawn('codex', args, {
+      env: process.env,
+      stdio: ['ignore', 'pipe', 'pipe'],
+    })
+
+    let stdoutBuffer = ''
+    let stderrBuffer = ''
+    let textAccumulator = ''
+    const errors: string[] = []
+
+    const flushStdoutLine = (line: string) => {
+      const event = parseCodexJsonLine(line)
+      if (!event) return
+      if (event.text) {
+        textAccumulator += event.text
+      }
+      if (event.error) {
+        errors.push(event.error)
+      }
+    }
+
+    const timer = setTimeout(() => {
+      child.kill('SIGTERM')
+      reject(new Error(`Codex request timed out after ${Math.round(timeoutMs / 1000)}s.`))
+    }, timeoutMs)
+
+    child.stdout.on('data', (chunk: Buffer) => {
+      stdoutBuffer += chunk.toString('utf-8')
+      let idx = stdoutBuffer.indexOf('\n')
+      while (idx >= 0) {
+        const line = stdoutBuffer.slice(0, idx).trim()
+        stdoutBuffer = stdoutBuffer.slice(idx + 1)
+        if (line) flushStdoutLine(line)
+        idx = stdoutBuffer.indexOf('\n')
+      }
+    })
+
+    child.stderr.on('data', (chunk: Buffer) => {
+      stderrBuffer += chunk.toString('utf-8')
+    })
+
+    child.on('error', (err) => {
+      clearTimeout(timer)
+      reject(err)
+    })
+
+    child.on('close', (code) => {
+      clearTimeout(timer)
+
+      const tail = stdoutBuffer.trim()
+      if (tail) {
+        flushStdoutLine(tail)
+      }
+
+      if (code === 0) {
+        resolve({ text: textAccumulator, errors })
+        return
+      }
+
+      const stderrError = extractCodexCliError(stderrBuffer)
+      const fallback = errors[errors.length - 1]
+      reject(
+        new Error(
+          stderrError
+            || fallback
+            || `Codex exited with code ${code ?? 'unknown'}.`,
+        ),
+      )
+    })
+  })
+}
+
+function parseCodexJsonLine(
+  line: string,
+): { text?: string; error?: string } | null {
+  let parsed: Record<string, unknown>
+  try {
+    parsed = JSON.parse(line) as Record<string, unknown>
+  } catch {
+    return null
+  }
+
+  const type = typeof parsed.type === 'string' ? parsed.type : ''
+  if (type === 'error') {
+    const message = getStringField(parsed, ['message'])
+    return { error: message || 'Codex returned an unknown error.' }
+  }
+
+  // Common Codex JSONL stream events include deltas in "delta" or "text".
+  const text =
+    getStringField(parsed, ['delta'])
+    || getStringField(parsed, ['text'])
+    || getStringField(parsed, ['content'])
+
+  if (!text) return null
+  return { text }
+}
+
+function getStringField(
+  obj: Record<string, unknown>,
+  keys: string[],
+): string | null {
+  for (const key of keys) {
+    const val = obj[key]
+    if (typeof val === 'string' && val.length > 0) {
+      return val
+    }
+  }
+  return null
+}
+
+function extractCodexCliError(stderr: string): string | null {
+  const trimmed = stderr.trim()
+  if (!trimmed) return null
+
+  const lines = trimmed.split('\n').map((line) => line.trim()).filter(Boolean)
+  for (let i = lines.length - 1; i >= 0; i--) {
+    const line = lines[i]
+    if (line.toLowerCase().startsWith('error:')) {
+      return line.replace(/^error:\s*/i, '').trim()
+    }
+  }
+
+  return lines[lines.length - 1] ?? null
+}