mirror of
https://github.com/nexu-io/open-design.git
synced 2026-05-31 19:04:39 +07:00
* fix(web,daemon): make max_tokens configurable (closes #29) BYOK users on custom Anthropic-compatible providers (e.g. Xiaomi MiMo) hit the hardcoded 8192 cap and saw artifacts truncated mid-stream. - AppConfig.maxTokens with Settings input (EN/CN + 8 other locales) - ProxyStreamRequest.maxTokens contract field - anthropic, anthropic-compatible, and openai-compatible providers all forward cfg.maxTokens - /api/proxy/anthropic/stream and /api/proxy/stream payloads honor it, defaulting to 8192 when unset so prior clients are unaffected Original sketch by @mashu in #78 (50a9d14); rebased to the apps/web layout and extended to the proxy paths actually used when baseUrl is set, which is where #29's user actually traffics. * feat(web): per-model max_tokens defaults Adds a hand-maintained MODEL_MAX_TOKENS table (Claude 4.5 line → 64k, mimo-v2.5-pro → 32k) and an effectiveMaxTokens helper layered over the override field added in6a3ae5f, so #29's user — and others on supported models — don't have to discover Settings to avoid mid-stream truncation. - apps/web/src/state/maxTokens.ts: lookup + helpers - providers/{anthropic,anthropic-compatible,openai-compatible}.ts: forward effectiveMaxTokens(cfg) instead of cfg.maxTokens ?? 8192 - SettingsDialog: input becomes an optional override (blank = default, shown as placeholder) - 10 locale hint strings updated to the new semantics * feat(web): vendor LiteLLM model metadata for max_tokens defaults Replaces the 4-entry hand-rolled MODEL_MAX_TOKENS map from544e67ewith a vendored slice of BerriAI/litellm's model_prices_and_context_window JSON (1970 chat models, ~97KB raw / ~25KB gzip). Future model launches land in maxTokens.ts via `pnpm sync-litellm-models` instead of manual edits. - scripts/sync-litellm-models.ts: fetches the upstream JSON, filters to chat-mode entries, projects each entry to its max_output_tokens (or max_tokens fallback), and writes a sorted, license-attributed JSON - apps/web/src/state/litellm-models.json: generated artifact, committed - apps/web/src/state/maxTokens.ts: lookup is now OVERRIDES → LITELLM_MODELS → FALLBACK_MAX_TOKENS. The OVERRIDES table shrinks to just `mimo-v2.5-pro` (LiteLLM only ships MiMo via OpenRouter/Novita aliases, not the canonical id Xiaomi's API uses). LiteLLM is MIT-licensed (BerriAI/litellm/blob/main/LICENSE); attribution is preserved in both the script header and the generated JSON's _license field. * test(web,docs): cover maxTokens lookup + document sync workflow - apps/web/src/state/maxTokens.test.ts: six vitest cases pinning the three-tier lookup (override → LiteLLM → fallback) and the effectiveMaxTokens user-override path. Guards against a future sync silently dropping the Anthropic 4.5 entries we rely on. - CONTRIBUTING.md / CONTRIBUTING.zh-CN.md: new "Updating model max_tokens metadata" section pointing future maintainers at scripts/sync-litellm-models.ts and explaining when OVERRIDES is appropriate (it's the rare exception, not the default). * fix(web): mark Max tokens label as optional in 10 locales The Settings field is optional (blank means "use the per-model default") but the label gave no visual cue, breaking the implicit pattern that every other API-mode field (key/model/baseUrl) is required. Append "(optional)" — using the locale's natural parenthetical convention (Chinese full-width brackets, Japanese 任意, Russian опционально, etc.) — so the field reads as discretionary at a glance. * fix(web): validate maxTokens override against advertised UI bounds Addresses Siri-Ray's review on commit0d98185. The Settings input declares min={1024}/max={200000}/step={1024}, but until now effectiveMaxTokens trusted any defined cfg.maxTokens, so a stale or hand-edited localStorage value (negative, zero, fractional, billions) would pass straight to the Anthropic SDK on the direct path while the daemon proxy quietly clamped it back to 8192 on the proxied path — same config, divergent behavior depending on route. - maxTokens.ts: add MIN_MAX_TOKENS / MAX_MAX_TOKENS exports and isValidOverride helper. effectiveMaxTokens only honors the override when it is a finite integer in [1024, 200000]; otherwise falls back to modelMaxTokensDefault. - SettingsDialog.tsx: input bounds now reference the same constants so the UI promise can't drift from the runtime check. - maxTokens.test.ts: six new cases pinning the rejection of negative, zero, sub-MIN, super-MAX, non-integer (fractional / NaN / Infinity) overrides plus the inclusive MIN/MAX boundaries. The daemon proxy's existing `> 0` fallback stays as defense-in-depth.
80 lines
2.6 KiB
JavaScript
80 lines
2.6 KiB
JavaScript
#!/usr/bin/env node
|
|
// Sync apps/web/src/state/litellm-models.json from BerriAI/litellm.
|
|
//
|
|
// LiteLLM (MIT, https://github.com/BerriAI/litellm) maintains the de-facto
|
|
// community catalog of model context/output caps and pricing across every
|
|
// major provider. We vendor a filtered slice (chat-mode max_output_tokens
|
|
// only) so the web client can default `max_tokens` per model without an
|
|
// extra network call at runtime.
|
|
//
|
|
// Usage:
|
|
// node --experimental-strip-types scripts/sync-litellm-models.ts
|
|
//
|
|
// Re-run periodically (or when a new model the user cares about lands) and
|
|
// commit the regenerated JSON. Coverage gaps (e.g. mimo-v2.5-pro) are
|
|
// filled by the hand-maintained override table in maxTokens.ts.
|
|
|
|
import { writeFileSync } from 'node:fs';
|
|
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const SOURCE_URL =
|
|
'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
|
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
const OUT_PATH = path.resolve(
|
|
__dirname,
|
|
'..',
|
|
'apps/web/src/state/litellm-models.json',
|
|
);
|
|
|
|
interface LiteLLMEntry {
|
|
mode?: string;
|
|
max_tokens?: number | string;
|
|
max_output_tokens?: number | string;
|
|
}
|
|
|
|
async function main() {
|
|
console.log(`fetching ${SOURCE_URL}`);
|
|
const res = await fetch(SOURCE_URL);
|
|
if (!res.ok) throw new Error(`fetch ${res.status}: ${res.statusText}`);
|
|
const raw = (await res.json()) as Record<string, unknown>;
|
|
|
|
const out: Record<string, number> = {};
|
|
let scanned = 0;
|
|
for (const [id, value] of Object.entries(raw)) {
|
|
if (id === 'sample_spec') continue;
|
|
if (!value || typeof value !== 'object') continue;
|
|
const entry = value as LiteLLMEntry;
|
|
if (entry.mode !== 'chat') continue;
|
|
scanned++;
|
|
const candidate = entry.max_output_tokens ?? entry.max_tokens;
|
|
if (typeof candidate === 'number' && Number.isFinite(candidate) && candidate > 0) {
|
|
out[id] = candidate;
|
|
}
|
|
}
|
|
|
|
// Sort keys so diffs stay readable when models churn.
|
|
const sorted = Object.fromEntries(
|
|
Object.entries(out).sort(([a], [b]) => a.localeCompare(b)),
|
|
);
|
|
|
|
const payload = {
|
|
_source: SOURCE_URL,
|
|
_generated_at: new Date().toISOString().slice(0, 10),
|
|
_license:
|
|
'BerriAI/litellm is MIT-licensed; see https://github.com/BerriAI/litellm/blob/main/LICENSE',
|
|
models: sorted,
|
|
};
|
|
|
|
const json = JSON.stringify(payload, null, 2) + '\n';
|
|
writeFileSync(OUT_PATH, json);
|
|
console.log(
|
|
`wrote ${OUT_PATH} (${Object.keys(sorted).length} models / ${scanned} chat-mode scanned)`,
|
|
);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|