mirror of
https://github.com/nexu-io/open-design.git
synced 2026-06-01 03:14:35 +07:00
* feat(senseaudio): BYOK chat with image + video generation tools
Adds SenseAudio as a first-class BYOK chat protocol and wires the daemon's
chat proxy with a tool loop so BYOK users can generate images and videos
without dropping to a CLI agent.
- BYOK protocol: new senseaudio tab + /api/proxy/senseaudio/stream route +
connection-test + provider-models discovery (OpenAI-compatible wire)
- Tool loop: generate_image (synchronous /v1/image/sync) and generate_video
(async /v1/video/create + 5s polling /v1/video/status, 10-min ceiling,
periodic progress log every 30s)
- Settings dropdown + chat-composer dropdown for the BYOK image model
default; generate_image's model enum lets the LLM override per call
- Seed-on-success: a successful BYOK chat call idempotently mirrors the
key into media-config (preserves env-resolved + already-stored keys)
- Generated artifacts land in <projectsRoot>/<projectId>/ so FileViewer,
DesignFilesPanel, and project export pick them up automatically;
legacy /api/byok-image/:id route kept for old conversation links
- Markdown renderer learns  image syntax with a scheme
allowlist (http(s) / data:image/ / blob: / relative paths)
- i18n key settings.byokImageModel across all 19 locales
- 3 SenseAudio image models registered (2.0, 1.0, doubao-seedream-5.0);
1 video model (doubao-seedance-2.0)
- Tests: byok-tools (29), media-senseaudio-image (8), media-config seed
(7), proxy-routes (47), markdown image rendering (8)
* fix(senseaudio): unblock image gen + design file preview switching
- SenseAudio /v1/image/sync rejected the previous size mapping with
`参数错误:size` (1664x936, 936x1664, 1280x960, 960x1280 are not in
the gateway's accepted set). Switched to standard HD / SD sizes that
every aspect bucket can hit: 1024×1024, 1280×720, 720×1280,
1024×768, 768×1024. Kept the byok-tools and media.ts tables in sync
so the BYOK chat tool and the CLI agent path both stop failing on
non-square aspects.
- DesignFilesPanel's <DfPreview> was missing a key prop, so React
reused the same iframe DOM node when the user picked a different
file — the src prop changed but the iframe never navigated. Added
key={previewFile.name} so the previous preview unmounts cleanly.
- Updated byok-tools + media-senseaudio-image tests for the new size
expectations.
* docs(senseaudio): clear stale provider hint + update README
- Settings → Media → SenseAudio: clear the auto-promoted
"Image · TTS · 70+ voices · clone" hint; the provider label alone is
enough now that the BYOK chat surface covers image + video tooling.
- README: list the new senseaudio (and missing ollama) proxy routes so
the BYOK section reflects what the daemon actually serves, and
mention the generate_image / generate_video chat tools that ship
with the SenseAudio path.
* fix(senseaudio): address PR #2065 review feedback
Three non-blocking review notes from @PerishCode on PR #2065:
1. Drop the dead /api/byok-image/:id route. The PR description claimed
it was "legacy fallback for old chat history" but that storage
layout never existed on main, so the route can only ever 400 or
404 — never 200. Removed the handler, the isSafeByokImageId
export, the unused createReadStream / stat / path / Request /
Response imports, and the two byok-image regression tests.
2. Add rejectProxyPluginContext guard to the senseaudio proxy
handler so it matches the invariant the other five proxy paths
already enforce (plugin runs must go through /api/runs for
snapshot pinning). Extended the existing "API fallback rejects
plugin runs" describe to also cover /api/proxy/senseaudio/stream
with the 409 PLUGIN_REQUIRES_DAEMON expectation.
3. Wrap the secondary image / video downloads (the URLs the
SenseAudio gateway hands back in /v1/image/sync .url and
/v1/video/status .video_url) in validateBaseUrlResolved so a
malicious gateway can't point us at 169.254.169.254 (AWS / Azure
metadata) or RFC1918 hosts via the response payload. Also passed
`redirect: 'error'` on both fetches to match the SSRF posture
the primary proxy fetch already uses. The new
assertExternalAssetUrl helper lives next to executeGenerateImage
so future tool downloads can reuse it.
Tests: 120/120 daemon tests pass; guard + typecheck green.
* fix(senseaudio): mirror SSRF guard onto renderSenseAudioImage CLI path
Follow-up to 01b1260a — the chat-tool fix in byok-tools.ts wasn't
mirrored onto the parallel renderSenseAudioImage path in media.ts.
Same attacker-controllable shape (gateway-returned `data.url`),
same one-line fix.
- Hoist assertExternalAssetUrl from byok-tools.ts into
connectionTest.ts next to validateBaseUrlResolved so both call
sites (the BYOK chat tool loop AND the CLI agent media dispatcher)
share one helper. Made the error strings provider-agnostic so a
future caller doesn't get a misleading "senseaudio" attribution
for a Volcengine / Grok / etc. download.
- renderSenseAudioImage now runs the response url through
assertExternalAssetUrl before fetching bytes, and passes
redirect: 'error' to block a 3xx hop into private space.
Scope intentionally limited to the senseaudio path PerishCode
flagged; the other unguarded fetch(entry.url) call sites in
media.ts (OpenAI / Volcengine / Grok / Nano-Banana) are pre-existing
patterns and belong in a separate follow-up if the daemon wants
defense-in-depth across every provider.
Tests: 127/127 daemon tests pass; guard + typecheck green.
---------
Co-authored-by: unknown <mazeliang@sensetime.com>
644 lines
21 KiB
TypeScript
644 lines
21 KiB
TypeScript
/**
|
||
* Single source of truth for the media-generation model registry.
|
||
*
|
||
* Both the frontend (NewProjectPanel model pickers, Settings dialog
|
||
* provider list) and the daemon (od media generate dispatcher) consume
|
||
* this registry. When you add a model entry here, the picker shows it,
|
||
* the daemon can dispatch to it, and the Settings dialog knows which
|
||
* API keys are needed.
|
||
*
|
||
* The model catalogue mirrors the breadth of lobehub's model-bank:
|
||
* every image / video model that lobehub natively supports is listed
|
||
* here so the user can pick from the same surface area without us
|
||
* re-implementing every provider's transport. For provider integrations
|
||
* we only ship the two flagship paths today — OpenAI (gpt-image-*) and
|
||
* Volcengine Ark (Seedance 2.0) — the rest fall back to a placeholder
|
||
* with a clear "no provider integration yet" note. The contract the
|
||
* code agent follows is identical regardless.
|
||
*
|
||
* The daemon imports the JS mirror of this file at
|
||
* daemon/media-models.js (kept in sync by review).
|
||
*/
|
||
|
||
import type { AudioKind, MediaAspect } from '../types';
|
||
|
||
/**
|
||
* Provider identifier — used both as a grouping key in the picker and as
|
||
* the lookup key for API-credentials in `AppConfig.mediaProviders`. New
|
||
* providers must be added to {@link MEDIA_PROVIDERS} below.
|
||
*/
|
||
export type MediaProviderId =
|
||
| 'openai'
|
||
| 'volcengine'
|
||
| 'grok'
|
||
| 'hyperframes'
|
||
| 'nanobanana'
|
||
| 'imagerouter'
|
||
| 'custom-image'
|
||
| 'bfl'
|
||
| 'fal'
|
||
| 'replicate'
|
||
| 'google'
|
||
| 'midjourney'
|
||
| 'kling'
|
||
| 'minimax'
|
||
| 'suno'
|
||
| 'udio'
|
||
| 'elevenlabs'
|
||
| 'fishaudio'
|
||
| 'senseaudio'
|
||
| 'tavily'
|
||
| 'leonardo'
|
||
| 'stub';
|
||
|
||
export interface MediaProvider {
|
||
id: MediaProviderId;
|
||
/** Display name shown in Settings + ModelPicker headers. */
|
||
label: string;
|
||
/** Short marketing-style sub-label. */
|
||
hint: string;
|
||
/** Whether the daemon ships a real integration for this provider. */
|
||
integrated: boolean;
|
||
/** Whether the provider needs user-supplied credentials. */
|
||
credentialsRequired?: boolean;
|
||
/** Whether the provider should appear in Settings -> Media. */
|
||
settingsVisible?: boolean;
|
||
/** Default base URL the daemon hits when no override is configured. */
|
||
defaultBaseUrl?: string;
|
||
/** Documentation URL for getting an API key. */
|
||
docsUrl?: string;
|
||
/** Whether Settings should expose a custom model override field. */
|
||
supportsCustomModel?: boolean;
|
||
/** Placeholder text for custom model override fields in Settings. */
|
||
customModelPlaceholder?: string;
|
||
}
|
||
|
||
/**
|
||
* Catalogue of providers. The Settings dialog renders one section per
|
||
* entry; the new-project model picker uses {@link integrated} to flag
|
||
* cards that will silently fall back to a stub if the user hasn't
|
||
* configured a key.
|
||
*/
|
||
export const MEDIA_PROVIDERS: MediaProvider[] = [
|
||
{
|
||
id: 'openai',
|
||
label: 'OpenAI',
|
||
hint: 'gpt-image-2 / dall-e-3',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.openai.com/v1',
|
||
docsUrl: 'https://platform.openai.com/api-keys',
|
||
},
|
||
{
|
||
id: 'volcengine',
|
||
label: 'Volcengine Ark (Doubao)',
|
||
hint: 'Seedance 2.0 / Seedream',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://ark.cn-beijing.volces.com/api/v3',
|
||
docsUrl: 'https://console.volcengine.com/ark',
|
||
},
|
||
{
|
||
id: 'grok',
|
||
label: 'xAI Grok Imagine',
|
||
hint: 'grok-imagine — image + video with native audio',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.x.ai/v1',
|
||
docsUrl: 'https://docs.x.ai/developers/model-capabilities/video/generation',
|
||
},
|
||
{
|
||
id: 'hyperframes',
|
||
label: 'HyperFrames',
|
||
hint: 'Local HTML -> MP4 renderer',
|
||
integrated: true,
|
||
credentialsRequired: false,
|
||
settingsVisible: false,
|
||
docsUrl: 'https://hyperframes.heygen.com',
|
||
},
|
||
{
|
||
id: 'nanobanana',
|
||
label: 'Nano Banana',
|
||
hint: 'Google official by default; custom gateway configurable',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://generativelanguage.googleapis.com',
|
||
docsUrl: 'https://ai.google.dev/gemini-api/docs/api-key',
|
||
supportsCustomModel: true,
|
||
},
|
||
{
|
||
id: 'imagerouter',
|
||
label: 'ImageRouter',
|
||
hint: 'OpenAI-compatible image + video routing',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.imagerouter.io/v1/openai',
|
||
docsUrl: 'https://docs.imagerouter.io/api-reference/image-generation/',
|
||
supportsCustomModel: true,
|
||
customModelPlaceholder: 'openai/gpt-image-2 or xAI/grok-imagine-video',
|
||
},
|
||
{
|
||
id: 'custom-image',
|
||
label: 'Custom Image API',
|
||
hint: 'OpenAI-compatible /v1/images/generations endpoint',
|
||
integrated: true,
|
||
docsUrl: 'https://platform.openai.com/docs/api-reference/images',
|
||
supportsCustomModel: true,
|
||
customModelPlaceholder: 'my-image-model',
|
||
},
|
||
{
|
||
id: 'bfl',
|
||
label: 'Black Forest Labs',
|
||
hint: 'FLUX 1.1 Pro / FLUX Pro / Dev',
|
||
integrated: false,
|
||
defaultBaseUrl: 'https://api.bfl.ai',
|
||
docsUrl: 'https://docs.bfl.ai/quick_start/create_account',
|
||
},
|
||
{
|
||
id: 'fal',
|
||
label: 'Fal.ai',
|
||
hint: 'Sora / Seedance / Veo / FLUX',
|
||
integrated: false,
|
||
defaultBaseUrl: 'https://fal.run',
|
||
docsUrl: 'https://fal.ai/dashboard/keys',
|
||
},
|
||
{
|
||
id: 'leonardo',
|
||
label: 'Leonardo.ai',
|
||
hint: 'Phoenix / Kino XL / FLUX',
|
||
integrated: true,
|
||
credentialsRequired: true,
|
||
settingsVisible: true,
|
||
defaultBaseUrl: 'https://cloud.leonardo.ai/api/rest/v1',
|
||
docsUrl: 'https://docs.leonardo.ai/docs/create-an-api-key',
|
||
},
|
||
{
|
||
id: 'replicate',
|
||
label: 'Replicate',
|
||
hint: 'FLUX / SDXL / Ideogram',
|
||
integrated: false,
|
||
defaultBaseUrl: 'https://api.replicate.com/v1',
|
||
docsUrl: 'https://replicate.com/account/api-tokens',
|
||
},
|
||
{
|
||
id: 'google',
|
||
label: 'Google AI / Vertex',
|
||
hint: 'Imagen 4 / Veo 3 / Lyria',
|
||
integrated: false,
|
||
docsUrl: 'https://ai.google.dev/gemini-api/docs/api-key',
|
||
},
|
||
{
|
||
id: 'kling',
|
||
label: 'Kuaishou Kling',
|
||
hint: 'Kling 1.6 / 2.0 video',
|
||
integrated: false,
|
||
docsUrl: 'https://klingai.com/dev-center',
|
||
},
|
||
{
|
||
id: 'midjourney',
|
||
label: 'Midjourney (proxy)',
|
||
hint: 'midjourney-v7',
|
||
integrated: false,
|
||
},
|
||
{
|
||
id: 'minimax',
|
||
label: 'MiniMax',
|
||
hint: 'TTS / video-01',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.minimaxi.chat/v1',
|
||
docsUrl: 'https://platform.minimaxi.com',
|
||
},
|
||
{
|
||
id: 'suno',
|
||
label: 'Suno',
|
||
hint: 'Music generation',
|
||
integrated: false,
|
||
},
|
||
{
|
||
id: 'udio',
|
||
label: 'Udio',
|
||
hint: 'Music generation',
|
||
integrated: false,
|
||
},
|
||
{
|
||
id: 'elevenlabs',
|
||
label: 'ElevenLabs',
|
||
hint: 'Voice / SFX',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.elevenlabs.io',
|
||
docsUrl: 'https://elevenlabs.io/app/settings/api-keys',
|
||
},
|
||
{
|
||
id: 'fishaudio',
|
||
label: 'FishAudio',
|
||
hint: 'Speech / voice clone',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.fish.audio',
|
||
docsUrl: 'https://fish.audio',
|
||
},
|
||
{
|
||
id: 'senseaudio',
|
||
label: 'SenseAudio',
|
||
hint: '',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.senseaudio.cn',
|
||
docsUrl: 'https://docs.senseaudio.cn',
|
||
},
|
||
{
|
||
id: 'tavily',
|
||
label: 'Tavily Search',
|
||
hint: 'Agent-callable web research',
|
||
integrated: true,
|
||
defaultBaseUrl: 'https://api.tavily.com',
|
||
docsUrl: 'https://app.tavily.com/home',
|
||
},
|
||
{
|
||
id: 'stub',
|
||
label: 'Stub (placeholder)',
|
||
hint: 'Deterministic local placeholder bytes',
|
||
integrated: true,
|
||
// Internal fixture provider used by the daemon for deterministic
|
||
// tests / offline demos. Hidden from Settings the same way
|
||
// HyperFrames is — end users have nothing to configure here, and
|
||
// exposing it pollutes the provider list.
|
||
settingsVisible: false,
|
||
},
|
||
];
|
||
|
||
export interface MediaModel {
|
||
/** Stable ID used in metadata.imageModel / videoModel / audioModel. */
|
||
id: string;
|
||
/** Short label shown in pickers. */
|
||
label: string;
|
||
/** Vendor / context hint shown under the label. */
|
||
hint: string;
|
||
/** Provider this model is dispatched through. */
|
||
provider: MediaProviderId;
|
||
/**
|
||
* Capabilities the agent may rely on when planning. Used downstream by
|
||
* the dispatcher to decide which provider call to make.
|
||
*/
|
||
caps?: string[];
|
||
/** Marks the default-checked card per surface in the picker. */
|
||
default?: boolean;
|
||
}
|
||
|
||
/**
|
||
* Image generation models. Mirrors the breadth of
|
||
* `packages/model-bank/src/aiModels/openai.ts` and friends in lobehub.
|
||
*/
|
||
export const IMAGE_MODELS: MediaModel[] = [
|
||
// OpenAI — fully integrated path.
|
||
{
|
||
id: 'gpt-image-2',
|
||
label: 'gpt-image-2',
|
||
hint: 'OpenAI · 4K, native multimodal',
|
||
provider: 'openai',
|
||
caps: ['t2i', 'i2i', 'inpaint'],
|
||
default: true,
|
||
},
|
||
{
|
||
id: 'gpt-image-1.5',
|
||
label: 'gpt-image-1.5',
|
||
hint: 'OpenAI · 4× faster than gpt-image-1',
|
||
provider: 'openai',
|
||
caps: ['t2i', 'i2i', 'inpaint'],
|
||
},
|
||
{
|
||
id: 'gpt-image-1',
|
||
label: 'gpt-image-1',
|
||
hint: 'OpenAI · ChatGPT native',
|
||
provider: 'openai',
|
||
caps: ['t2i', 'i2i', 'inpaint'],
|
||
},
|
||
{
|
||
id: 'gpt-image-1-mini',
|
||
label: 'gpt-image-1-mini',
|
||
hint: 'OpenAI · low-cost variant',
|
||
provider: 'openai',
|
||
caps: ['t2i', 'i2i'],
|
||
},
|
||
{
|
||
id: 'dall-e-3',
|
||
label: 'dall-e-3',
|
||
hint: 'OpenAI · classic',
|
||
provider: 'openai',
|
||
caps: ['t2i'],
|
||
},
|
||
{
|
||
id: 'dall-e-2',
|
||
label: 'dall-e-2',
|
||
hint: 'OpenAI · legacy',
|
||
provider: 'openai',
|
||
caps: ['t2i'],
|
||
},
|
||
|
||
// Volcengine — Doubao Seedream image generation.
|
||
{
|
||
id: 'doubao-seedream-3-0-t2i-250415',
|
||
label: 'seedream-3.0',
|
||
hint: 'ByteDance · Doubao image',
|
||
provider: 'volcengine',
|
||
caps: ['t2i'],
|
||
},
|
||
{
|
||
id: 'doubao-seededit-3-0-i2i-250628',
|
||
label: 'seededit-3.0',
|
||
hint: 'ByteDance · image edit',
|
||
provider: 'volcengine',
|
||
caps: ['i2i'],
|
||
},
|
||
|
||
// SenseAudio — synchronous /v1/image/sync, Bearer auth, reference URL or data URI.
|
||
{
|
||
id: 'senseaudio-image-2.0-260319',
|
||
label: 'senseaudio-image-2.0',
|
||
hint: 'SenseAudio · multi-aspect, latest',
|
||
provider: 'senseaudio',
|
||
caps: ['t2i', 'i2i'],
|
||
},
|
||
{
|
||
id: 'senseaudio-image-1.0-260319',
|
||
label: 'senseaudio-image-1.0',
|
||
hint: 'SenseAudio · standard',
|
||
provider: 'senseaudio',
|
||
caps: ['t2i', 'i2i'],
|
||
},
|
||
{
|
||
id: 'doubao-seedream-5-0-260128',
|
||
label: 'seedream-5.0',
|
||
hint: 'SenseAudio · ByteDance Seedream 5.0 hi-res',
|
||
provider: 'senseaudio',
|
||
caps: ['t2i', 'i2i'],
|
||
},
|
||
|
||
// xAI Grok Imagine — text-to-image (1k/2k, 11+ aspect ratios).
|
||
{
|
||
id: 'grok-imagine-image',
|
||
label: 'grok-imagine-image',
|
||
hint: 'xAI · 2K text-to-image',
|
||
provider: 'grok',
|
||
caps: ['t2i'],
|
||
},
|
||
|
||
// Nano Banana — Google-compatible generateContent image path.
|
||
{
|
||
id: 'gemini-3.1-flash-image-preview',
|
||
label: 'nano-banana-2',
|
||
hint: 'Nano Banana · text-to-image',
|
||
provider: 'nanobanana',
|
||
caps: ['t2i'],
|
||
},
|
||
|
||
// ImageRouter — OpenAI-compatible routed image models.
|
||
{
|
||
id: 'openai/gpt-image-2',
|
||
label: 'openai/gpt-image-2',
|
||
hint: 'ImageRouter · routed GPT Image',
|
||
provider: 'imagerouter',
|
||
caps: ['t2i'],
|
||
},
|
||
{
|
||
id: 'openai/gpt-image-1.5',
|
||
label: 'openai/gpt-image-1.5',
|
||
hint: 'ImageRouter · routed GPT Image',
|
||
provider: 'imagerouter',
|
||
caps: ['t2i'],
|
||
},
|
||
{
|
||
id: 'black-forest-labs/FLUX-1.1-pro',
|
||
label: 'FLUX-1.1-pro',
|
||
hint: 'ImageRouter · Black Forest Labs',
|
||
provider: 'imagerouter',
|
||
caps: ['t2i'],
|
||
},
|
||
|
||
// Custom OpenAI-compatible /v1/images/generations endpoint.
|
||
{
|
||
id: 'custom-image',
|
||
label: 'custom-image',
|
||
hint: 'Custom · OpenAI-compatible endpoint',
|
||
provider: 'custom-image',
|
||
caps: ['t2i'],
|
||
},
|
||
|
||
// Black Forest Labs FLUX family.
|
||
{ id: 'flux-1.1-pro', label: 'flux-1.1-pro', hint: 'BFL · flagship', provider: 'bfl', caps: ['t2i', 'i2i'] },
|
||
{ id: 'flux-pro', label: 'flux-pro', hint: 'BFL', provider: 'bfl', caps: ['t2i'] },
|
||
{ id: 'flux-dev', label: 'flux-dev', hint: 'BFL · open weights', provider: 'bfl', caps: ['t2i'] },
|
||
{ id: 'flux-schnell', label: 'flux-schnell', hint: 'BFL · fast', provider: 'bfl', caps: ['t2i'] },
|
||
{ id: 'flux-kontext-pro', label: 'flux-kontext-pro', hint: 'BFL · in-context edits', provider: 'bfl', caps: ['t2i', 'i2i'] },
|
||
|
||
// Google.
|
||
{ id: 'imagen-4', label: 'imagen-4', hint: 'Google · latest', provider: 'google', caps: ['t2i'] },
|
||
{ id: 'imagen-3', label: 'imagen-3', hint: 'Google', provider: 'google', caps: ['t2i'] },
|
||
{ id: 'gemini-3-pro-image-preview', label: 'gemini-3-pro-image', hint: 'Google · Nano Banana Pro', provider: 'google', caps: ['t2i', 'i2i'] },
|
||
|
||
// Replicate / Fal hosted image models.
|
||
{ id: 'ideogram-v2', label: 'ideogram-v2', hint: 'Replicate · typography', provider: 'replicate', caps: ['t2i'] },
|
||
{ id: 'sdxl', label: 'stable-diffusion-xl', hint: 'Replicate · SDXL', provider: 'replicate', caps: ['t2i'] },
|
||
{ id: 'sd-3.5', label: 'stable-diffusion-3.5', hint: 'Fal · SD 3.5', provider: 'fal', caps: ['t2i'] },
|
||
|
||
// Leonardo.ai models
|
||
{ id: 'leonardo-phoenix', label: 'Phoenix', hint: 'Leonardo · versatile', provider: 'leonardo', caps: ['t2i'] },
|
||
{ id: 'leonardo-kino-xl', label: 'Kino XL', hint: 'Leonardo · cinematic', provider: 'leonardo', caps: ['t2i'] },
|
||
{ id: 'leonardo-flux-dev', label: 'FLUX Dev', hint: 'Leonardo · FLUX', provider: 'leonardo', caps: ['t2i'] },
|
||
{ id: 'leonardo-flux-schnell', label: 'FLUX Schnell', hint: 'Leonardo · fast', provider: 'leonardo', caps: ['t2i'] },
|
||
{ id: 'leonardo-anime-pastel', label: 'Anime Pastel Dream', hint: 'Leonardo · anime', provider: 'leonardo', caps: ['t2i'] },
|
||
|
||
// Midjourney via community proxies.
|
||
{ id: 'midjourney-v7', label: 'midjourney-v7', hint: 'Midjourney · via proxy', provider: 'midjourney', caps: ['t2i'] },
|
||
];
|
||
|
||
/**
|
||
* Video generation models. Mirrors lobehub's volcengine.ts (Seedance,
|
||
* Seedance Lite), kling.ts and friends.
|
||
*/
|
||
export const VIDEO_MODELS: MediaModel[] = [
|
||
// Volcengine — Seedance 2.0 (integrated).
|
||
{
|
||
id: 'doubao-seedance-2-0-260128',
|
||
label: 'seedance-2.0',
|
||
hint: 'ByteDance · t2v + i2v + audio',
|
||
provider: 'volcengine',
|
||
caps: ['t2v', 'i2v', 'audio'],
|
||
default: true,
|
||
},
|
||
{
|
||
id: 'doubao-seedance-2-0-fast-260128',
|
||
label: 'seedance-2.0-fast',
|
||
hint: 'ByteDance · faster, cheaper',
|
||
provider: 'volcengine',
|
||
caps: ['t2v', 'i2v', 'audio'],
|
||
},
|
||
{
|
||
id: 'doubao-seedance-1-0-pro-250528',
|
||
label: 'seedance-1.0-pro',
|
||
hint: 'ByteDance · 1.0',
|
||
provider: 'volcengine',
|
||
caps: ['t2v', 'i2v'],
|
||
},
|
||
{
|
||
id: 'doubao-seedance-1-0-lite-i2v-250428',
|
||
label: 'seedance-1.0-lite-i2v',
|
||
hint: 'ByteDance · image-to-video',
|
||
provider: 'volcengine',
|
||
caps: ['i2v'],
|
||
},
|
||
{
|
||
id: 'doubao-seedance-1-0-lite-t2v-250428',
|
||
label: 'seedance-1.0-lite-t2v',
|
||
hint: 'ByteDance · text-to-video',
|
||
provider: 'volcengine',
|
||
caps: ['t2v'],
|
||
},
|
||
|
||
// xAI Grok Imagine — 720p t2v + i2v with natively generated audio.
|
||
{
|
||
id: 'grok-imagine-video',
|
||
label: 'grok-imagine-video',
|
||
hint: 'xAI · 720p t2v + i2v + native audio',
|
||
provider: 'grok',
|
||
caps: ['t2v', 'i2v', 'audio'],
|
||
},
|
||
|
||
// ImageRouter — routed video models.
|
||
{
|
||
id: 'xAI/grok-imagine-video',
|
||
label: 'xAI/grok-imagine-video',
|
||
hint: 'ImageRouter · routed video',
|
||
provider: 'imagerouter',
|
||
caps: ['t2v', 'audio'],
|
||
},
|
||
{
|
||
id: 'bytedance/seedance-1.5-pro',
|
||
label: 'seedance-1.5-pro',
|
||
hint: 'ImageRouter · Bytedance',
|
||
provider: 'imagerouter',
|
||
caps: ['t2v'],
|
||
},
|
||
{
|
||
id: 'google/veo-3.1-lite',
|
||
label: 'veo-3.1-lite',
|
||
hint: 'ImageRouter · Google',
|
||
provider: 'imagerouter',
|
||
caps: ['t2v'],
|
||
},
|
||
|
||
// Kuaishou Kling.
|
||
{ id: 'kling-2.0', label: 'kling-2.0', hint: 'Kuaishou · latest', provider: 'kling', caps: ['t2v', 'i2v'] },
|
||
{ id: 'kling-1.6', label: 'kling-1.6', hint: 'Kuaishou', provider: 'kling', caps: ['t2v', 'i2v'] },
|
||
{ id: 'kling-1.5', label: 'kling-1.5', hint: 'Kuaishou', provider: 'kling', caps: ['t2v', 'i2v'] },
|
||
|
||
// Google Veo.
|
||
{ id: 'veo-3', label: 'veo-3', hint: 'Google · sound-on', provider: 'google', caps: ['t2v', 'audio'] },
|
||
{ id: 'veo-2', label: 'veo-2', hint: 'Google', provider: 'google', caps: ['t2v'] },
|
||
|
||
// OpenAI Sora (via Fal hosting today).
|
||
{ id: 'sora-2', label: 'sora-2', hint: 'OpenAI · via Fal', provider: 'fal', caps: ['t2v'] },
|
||
{ id: 'sora-2-pro', label: 'sora-2-pro', hint: 'OpenAI · via Fal', provider: 'fal', caps: ['t2v'] },
|
||
|
||
// MiniMax video.
|
||
{ id: 'minimax-video-01', label: 'video-01', hint: 'MiniMax · Hailuo', provider: 'minimax', caps: ['t2v', 'i2v'] },
|
||
{ id: 'hyperframes-html', label: 'hyperframes-html', hint: 'HyperFrames · local HTML renderer', provider: 'hyperframes', caps: ['t2v'] },
|
||
];
|
||
|
||
export const AUDIO_MODELS_BY_KIND: Record<AudioKind, MediaModel[]> = {
|
||
music: [
|
||
{ id: 'suno-v5', label: 'suno-v5', hint: 'Suno · default', provider: 'suno', caps: ['music'], default: true },
|
||
{ id: 'suno-v4-5', label: 'suno-v4.5', hint: 'Suno', provider: 'suno', caps: ['music'] },
|
||
{ id: 'udio-v2', label: 'udio-v2', hint: 'Udio', provider: 'udio', caps: ['music'] },
|
||
{ id: 'lyria-2', label: 'lyria-2', hint: 'Google', provider: 'google', caps: ['music'] },
|
||
],
|
||
speech: [
|
||
{ id: 'minimax-tts', label: 'minimax-tts', hint: 'MiniMax', provider: 'minimax', caps: ['tts'], default: true },
|
||
{ id: 'fish-speech-2', label: 'fish-speech-2', hint: 'FishAudio', provider: 'fishaudio', caps: ['tts', 'voice-clone'] },
|
||
{ id: 'elevenlabs-v3', label: 'elevenlabs-v3', hint: 'ElevenLabs', provider: 'elevenlabs', caps: ['tts', 'voice-clone'] },
|
||
{ id: 'senseaudio-tts', label: 'senseaudio-tts', hint: 'SenseAudio', provider: 'senseaudio', caps: ['tts', 'voice-clone'] },
|
||
{ id: 'doubao-tts', label: 'doubao-tts', hint: 'Volcengine', provider: 'volcengine', caps: ['tts'] },
|
||
{ id: 'gpt-4o-mini-tts', label: 'gpt-4o-mini-tts', hint: 'OpenAI', provider: 'openai', caps: ['tts'] },
|
||
],
|
||
sfx: [
|
||
{ id: 'elevenlabs-sfx', label: 'elevenlabs-sfx', hint: 'ElevenLabs SFX', provider: 'elevenlabs', caps: ['sfx'], default: true },
|
||
{ id: 'audiocraft', label: 'audiocraft', hint: 'Meta · open', provider: 'replicate', caps: ['sfx', 'music'] },
|
||
],
|
||
};
|
||
|
||
export const MEDIA_ASPECTS: MediaAspect[] = ['1:1', '16:9', '9:16', '4:3', '3:4'];
|
||
|
||
export const VIDEO_LENGTHS_SEC: number[] = [3, 5, 8, 10, 15, 30];
|
||
export const AUDIO_DURATIONS_SEC: number[] = [5, 10, 15, 30, 60, 120];
|
||
|
||
export const DEFAULT_IMAGE_MODEL =
|
||
IMAGE_MODELS.find((m) => m.default)?.id ?? IMAGE_MODELS[0]!.id;
|
||
export const DEFAULT_VIDEO_MODEL =
|
||
VIDEO_MODELS.find((m) => m.default)?.id ?? VIDEO_MODELS[0]!.id;
|
||
export const DEFAULT_AUDIO_MODEL: Record<AudioKind, string> = {
|
||
music:
|
||
AUDIO_MODELS_BY_KIND.music.find((m) => m.default)?.id
|
||
?? AUDIO_MODELS_BY_KIND.music[0]!.id,
|
||
speech:
|
||
AUDIO_MODELS_BY_KIND.speech.find((m) => m.default)?.id
|
||
?? AUDIO_MODELS_BY_KIND.speech[0]!.id,
|
||
sfx:
|
||
AUDIO_MODELS_BY_KIND.sfx.find((m) => m.default)?.id
|
||
?? AUDIO_MODELS_BY_KIND.sfx[0]!.id,
|
||
};
|
||
|
||
/**
|
||
* Look up a model record across all surfaces by ID. Returns null if the
|
||
* agent passes an unknown model — the dispatcher rejects with a clear
|
||
* error so the agent re-plans instead of silently falling back.
|
||
*/
|
||
export function findMediaModel(id: string): MediaModel | null {
|
||
const all: MediaModel[] = [
|
||
...IMAGE_MODELS,
|
||
...VIDEO_MODELS,
|
||
...AUDIO_MODELS_BY_KIND.music,
|
||
...AUDIO_MODELS_BY_KIND.speech,
|
||
...AUDIO_MODELS_BY_KIND.sfx,
|
||
];
|
||
return all.find((m) => m.id === id) ?? null;
|
||
}
|
||
|
||
export function findProvider(id: MediaProviderId): MediaProvider | null {
|
||
return MEDIA_PROVIDERS.find((p) => p.id === id) ?? null;
|
||
}
|
||
|
||
/** All model IDs grouped by surface, used for prompt-side disclosure. */
|
||
export function modelIdsBySurface(): {
|
||
image: string[];
|
||
video: string[];
|
||
audio: { music: string[]; speech: string[]; sfx: string[] };
|
||
} {
|
||
return {
|
||
image: IMAGE_MODELS.map((m) => m.id),
|
||
video: VIDEO_MODELS.map((m) => m.id),
|
||
audio: {
|
||
music: AUDIO_MODELS_BY_KIND.music.map((m) => m.id),
|
||
speech: AUDIO_MODELS_BY_KIND.speech.map((m) => m.id),
|
||
sfx: AUDIO_MODELS_BY_KIND.sfx.map((m) => m.id),
|
||
},
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Group a flat list of {@link MediaModel} by provider while preserving
|
||
* the catalogue order. Used by the picker to render section headers.
|
||
*/
|
||
export function groupByProvider(models: MediaModel[]): Array<{
|
||
provider: MediaProvider;
|
||
models: MediaModel[];
|
||
}> {
|
||
const order: MediaProviderId[] = [];
|
||
const map = new Map<MediaProviderId, MediaModel[]>();
|
||
for (const m of models) {
|
||
if (!map.has(m.provider)) {
|
||
order.push(m.provider);
|
||
map.set(m.provider, []);
|
||
}
|
||
map.get(m.provider)!.push(m);
|
||
}
|
||
return order
|
||
.map((id) => {
|
||
const provider = findProvider(id);
|
||
const list = map.get(id) ?? [];
|
||
return provider ? { provider, models: list } : null;
|
||
})
|
||
.filter((entry): entry is { provider: MediaProvider; models: MediaModel[] } => entry != null);
|
||
}
|