Wire web research provider fallback order

This commit is contained in:
a1chzt 2026-05-07 20:16:52 +08:00
parent 23630d1746
commit 4c9e16036b
5 changed files with 627 additions and 67 deletions

View file

@ -0,0 +1,115 @@
import type { ResearchSource } from '@open-design/contracts/api/research';
const DEFAULT_BASE_URL = 'https://api.exa.ai';
const DEFAULT_TIMEOUT_MS = 30_000;
const EXA_MAX_RESULTS_LIMIT = 20;
export interface ExaSearchInput {
apiKey: string;
baseUrl?: string;
query: string;
maxResults?: number;
signal?: AbortSignal;
}
interface ExaRawResult {
title?: unknown;
url?: unknown;
text?: unknown;
summary?: unknown;
publishedDate?: unknown;
published_date?: unknown;
}
interface ExaRawResponse {
results?: unknown;
}
export interface ExaSearchOutput {
answer: string;
sources: ResearchSource[];
}
export class ExaError extends Error {
constructor(
message: string,
public readonly status?: number,
) {
super(message);
this.name = 'ExaError';
}
}
export async function exaSearch(input: ExaSearchInput): Promise<ExaSearchOutput> {
if (!input.apiKey) {
throw new ExaError('Exa API key is not configured');
}
const base = (input.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, '');
const requestedMax = input.maxResults ?? 5;
const maxResults = Math.max(0, Math.min(requestedMax, EXA_MAX_RESULTS_LIMIT));
const body = {
query: input.query,
text: true,
numResults: maxResults,
};
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), DEFAULT_TIMEOUT_MS);
if (input.signal) {
input.signal.addEventListener('abort', () => ctrl.abort(), { once: true });
}
let resp: Response;
try {
resp = await fetch(`${base}/search`, {
method: 'POST',
headers: {
'content-type': 'application/json',
'x-api-key': input.apiKey,
},
body: JSON.stringify(body),
signal: ctrl.signal,
});
} catch (err) {
throw new ExaError(
`Exa request failed: ${(err as Error).message || String(err)}`,
);
} finally {
clearTimeout(timer);
}
if (!resp.ok) {
const text = await resp.text().catch(() => '');
throw new ExaError(
`Exa ${resp.status}: ${text.slice(0, 200) || 'no body'}`,
resp.status,
);
}
const json = (await resp.json()) as ExaRawResponse;
const rawResults = Array.isArray(json.results) ? json.results : [];
const sources: ResearchSource[] = [];
for (const r of rawResults as ExaRawResult[]) {
const url = typeof r.url === 'string' ? r.url : '';
if (!url) continue;
const publishedAt =
typeof r.publishedDate === 'string' && r.publishedDate.trim()
? r.publishedDate.trim()
: typeof r.published_date === 'string' && r.published_date.trim()
? r.published_date.trim()
: null;
const snippet =
typeof r.summary === 'string' && r.summary.trim()
? r.summary.trim()
: typeof r.text === 'string'
? r.text.trim().slice(0, 800)
: '';
sources.push({
title:
typeof r.title === 'string' && r.title.trim()
? r.title.trim()
: url,
url,
snippet,
provider: 'exa',
...(publishedAt ? { publishedAt } : {}),
});
}
return { answer: '', sources };
}

View file

@ -4,10 +4,14 @@ import type {
ResearchSource,
} from '@open-design/contracts/api/research';
import { resolveProviderConfig } from '../media-config.js';
import { exaSearch, ExaError } from './exa.js';
import { perplexitySearch, PerplexityError } from './perplexity.js';
import { tavilySearch, TavilyError } from './tavily.js';
const DEFAULT_MAX_SOURCES = 5;
const TAVILY_MAX_RESULTS_LIMIT = 20;
const WEB_RESEARCH_PROVIDER_ORDER = ['exa', 'perplexity', 'tavily'] as const;
type WebResearchProvider = (typeof WEB_RESEARCH_PROVIDER_ORDER)[number];
export class ResearchError extends Error {
constructor(
@ -36,64 +40,135 @@ export async function searchResearch(
throw new ResearchError('query required', 400, 'QUERY_REQUIRED');
}
const depth: ResearchDepth = 'shallow';
const requested = Array.isArray(input.providers) ? input.providers : [];
const providers = requested.filter(
(p: unknown): p is string => typeof p === 'string' && p.length > 0,
);
const provider = providers[0] ?? 'tavily';
const providers = resolveProviderOrder(input.providers);
const maxSources = clampMaxSources(input.maxSources);
const providerErrors: string[] = [];
let sawConfiguredProvider = false;
if (provider !== 'tavily') {
throw new ResearchError(
`provider "${provider}" not supported in Phase 1`,
400,
'UNSUPPORTED_RESEARCH_PROVIDER',
);
for (const provider of providers) {
const cfg = await resolveProviderConfig(input.projectRoot, provider);
if (!cfg.apiKey) continue;
sawConfiguredProvider = true;
try {
const out = await runProviderSearch(provider, {
apiKey: cfg.apiKey,
baseUrl: cfg.baseUrl,
query,
maxSources,
...(input.signal ? { signal: input.signal } : {}),
});
if (out.sources.length === 0) {
providerErrors.push(`${provider}: no sources found`);
continue;
}
return {
query,
summary: out.answer || synthesizeFallbackSummary(out.sources),
sources: out.sources,
provider,
depth,
fetchedAt: Date.now(),
};
} catch (err) {
providerErrors.push(`${provider}: ${providerErrorMessage(err)}`);
}
}
const cfg = await resolveProviderConfig(input.projectRoot, 'tavily');
if (!cfg.apiKey) {
if (!sawConfiguredProvider) {
throw new ResearchError(
'Tavily API key not configured (Settings -> Tavily Search)',
'No web research provider API key configured (configure Exa, Perplexity, or Tavily in Settings -> Media providers)',
400,
'TAVILY_API_KEY_MISSING',
'WEB_RESEARCH_PROVIDER_KEY_MISSING',
);
}
throw new ResearchError(
`All configured web research providers failed: ${providerErrors.join('; ')}`,
502,
'RESEARCH_PROVIDER_FAILED',
);
}
let answer = '';
let sources: ResearchSource[] = [];
try {
const out = await tavilySearch({
apiKey: cfg.apiKey,
query,
searchDepth: 'basic',
maxResults: maxSources,
includeAnswer: true,
...(cfg.baseUrl ? { baseUrl: cfg.baseUrl } : {}),
function resolveProviderOrder(providers: unknown): WebResearchProvider[] {
const requested = Array.isArray(providers)
? providers.filter(
(p: unknown): p is string => typeof p === 'string' && p.trim().length > 0,
)
: [];
const order = requested.length > 0 ? requested : WEB_RESEARCH_PROVIDER_ORDER;
const resolved: WebResearchProvider[] = [];
for (const raw of order) {
const provider = raw.trim().toLowerCase();
if (provider === 'financialdatasets') {
throw new ResearchError(
'Financial Datasets is not a web search provider; use Exa, Perplexity, or Tavily for research search',
400,
'UNSUPPORTED_RESEARCH_PROVIDER',
);
}
if (!isWebResearchProvider(provider)) {
throw new ResearchError(
`provider "${raw}" not supported for web research`,
400,
'UNSUPPORTED_RESEARCH_PROVIDER',
);
}
if (!resolved.includes(provider)) resolved.push(provider);
}
return resolved;
}
function isWebResearchProvider(value: string): value is WebResearchProvider {
return (WEB_RESEARCH_PROVIDER_ORDER as readonly string[]).includes(value);
}
async function runProviderSearch(
provider: WebResearchProvider,
input: {
apiKey: string;
baseUrl?: string;
query: string;
maxSources: number;
signal?: AbortSignal;
},
): Promise<{ answer: string; sources: ResearchSource[] }> {
if (provider === 'exa') {
return exaSearch({
apiKey: input.apiKey,
query: input.query,
maxResults: input.maxSources,
...(input.baseUrl ? { baseUrl: input.baseUrl } : {}),
...(input.signal ? { signal: input.signal } : {}),
});
answer = out.answer;
sources = out.sources;
} catch (err) {
const message =
err instanceof TavilyError
? err.message
: `research failed: ${(err as Error).message || String(err)}`;
throw new ResearchError(message, 502, 'RESEARCH_PROVIDER_FAILED');
}
if (sources.length === 0) {
throw new ResearchError('no sources found', 404, 'NO_RESEARCH_SOURCES');
if (provider === 'perplexity') {
return perplexitySearch({
apiKey: input.apiKey,
query: input.query,
maxResults: input.maxSources,
...(input.baseUrl ? { baseUrl: input.baseUrl } : {}),
...(input.signal ? { signal: input.signal } : {}),
});
}
return tavilySearch({
apiKey: input.apiKey,
query: input.query,
searchDepth: 'basic',
maxResults: input.maxSources,
includeAnswer: true,
...(input.baseUrl ? { baseUrl: input.baseUrl } : {}),
...(input.signal ? { signal: input.signal } : {}),
});
}
return {
query,
summary: answer || synthesizeFallbackSummary(sources),
sources,
provider,
depth,
fetchedAt: Date.now(),
};
function providerErrorMessage(err: unknown): string {
if (
err instanceof ExaError ||
err instanceof PerplexityError ||
err instanceof TavilyError
) {
return err.message;
}
return (err as Error).message || String(err);
}
function synthesizeFallbackSummary(sources: ResearchSource[]): string {

View file

@ -0,0 +1,159 @@
import type { ResearchSource } from '@open-design/contracts/api/research';
const DEFAULT_BASE_URL = 'https://api.perplexity.ai';
const DEFAULT_TIMEOUT_MS = 30_000;
const PERPLEXITY_MAX_RESULTS_LIMIT = 20;
export interface PerplexitySearchInput {
apiKey: string;
baseUrl?: string;
query: string;
maxResults?: number;
signal?: AbortSignal;
}
interface PerplexityRawSearchResult {
title?: unknown;
url?: unknown;
snippet?: unknown;
date?: unknown;
last_updated?: unknown;
}
interface PerplexityRawResponse {
choices?: unknown;
citations?: unknown;
search_results?: unknown;
}
export interface PerplexitySearchOutput {
answer: string;
sources: ResearchSource[];
}
export class PerplexityError extends Error {
constructor(
message: string,
public readonly status?: number,
) {
super(message);
this.name = 'PerplexityError';
}
}
export async function perplexitySearch(
input: PerplexitySearchInput,
): Promise<PerplexitySearchOutput> {
if (!input.apiKey) {
throw new PerplexityError('Perplexity API key is not configured');
}
const base = (input.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, '');
const maxResults = Math.max(
0,
Math.min(input.maxResults ?? 5, PERPLEXITY_MAX_RESULTS_LIMIT),
);
const body = {
model: 'sonar',
messages: [{ role: 'user', content: input.query }],
};
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), DEFAULT_TIMEOUT_MS);
if (input.signal) {
input.signal.addEventListener('abort', () => ctrl.abort(), { once: true });
}
let resp: Response;
try {
resp = await fetch(`${base}/v1/sonar`, {
method: 'POST',
headers: {
'content-type': 'application/json',
authorization: `Bearer ${input.apiKey}`,
},
body: JSON.stringify(body),
signal: ctrl.signal,
});
} catch (err) {
throw new PerplexityError(
`Perplexity request failed: ${(err as Error).message || String(err)}`,
);
} finally {
clearTimeout(timer);
}
if (!resp.ok) {
const text = await resp.text().catch(() => '');
throw new PerplexityError(
`Perplexity ${resp.status}: ${text.slice(0, 200) || 'no body'}`,
resp.status,
);
}
const json = (await resp.json()) as PerplexityRawResponse;
const answer = extractAnswer(json);
const rawSearchResults = Array.isArray(json.search_results)
? json.search_results
: [];
const sources = normalizeSearchResults(rawSearchResults, maxResults);
if (sources.length > 0) {
return { answer, sources };
}
const citations = Array.isArray(json.citations) ? json.citations : [];
return {
answer,
sources: citations
.filter(
(value): value is string =>
typeof value === 'string' && value.trim().length > 0,
)
.slice(0, maxResults)
.map((url) => ({
title: url,
url,
snippet: '',
provider: 'perplexity',
})),
};
}
function extractAnswer(json: PerplexityRawResponse): string {
if (!Array.isArray(json.choices)) return '';
for (const choice of json.choices) {
if (!choice || typeof choice !== 'object') continue;
const message = (choice as { message?: unknown }).message;
if (!message || typeof message !== 'object') continue;
const content = (message as { content?: unknown }).content;
if (typeof content === 'string' && content.trim()) return content.trim();
}
return '';
}
function normalizeSearchResults(
rawSearchResults: unknown[],
maxResults: number,
): ResearchSource[] {
const sources: ResearchSource[] = [];
for (const r of rawSearchResults as PerplexityRawSearchResult[]) {
const url = typeof r.url === 'string' ? r.url : '';
if (!url) continue;
const publishedAt =
typeof r.date === 'string' && r.date.trim()
? r.date.trim()
: typeof r.last_updated === 'string' && r.last_updated.trim()
? r.last_updated.trim()
: null;
sources.push({
title:
typeof r.title === 'string' && r.title.trim()
? r.title.trim()
: url,
url,
snippet:
typeof r.snippet === 'string'
? r.snippet.trim().slice(0, 800)
: '',
provider: 'perplexity',
...(publishedAt ? { publishedAt } : {}),
});
if (sources.length >= maxResults) break;
}
return sources;
}

View file

@ -1,23 +1,38 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { afterEach, describe, expect, it, vi } from 'vitest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { searchResearch, ResearchError } from '../src/research/index.js';
const TAVILY_ENV_KEYS = ['OD_TAVILY_API_KEY', 'TAVILY_API_KEY'];
const RESEARCH_ENV_KEYS = [
'OD_EXASEARCH_API_KEY',
'EXASEARCH_API_KEY',
'EXA_API_KEY',
'OD_PERPLEXITY_API_KEY',
'PERPLEXITY_API_KEY',
'OD_TAVILY_API_KEY',
'TAVILY_API_KEY',
'OD_FINANCIAL_DATASETS_API_KEY',
'FINANCIAL_DATASETS_API_KEY',
];
type FetchInput = Parameters<typeof fetch>[0];
type FetchInit = Parameters<typeof fetch>[1];
describe('research search', () => {
const originalEnv = Object.fromEntries(
TAVILY_ENV_KEYS.map((key) => [key, process.env[key]]),
RESEARCH_ENV_KEYS.map((key) => [key, process.env[key]]),
);
let projectRoot: string | null = null;
beforeEach(() => {
for (const key of RESEARCH_ENV_KEYS) delete process.env[key];
});
afterEach(async () => {
vi.unstubAllGlobals();
for (const key of TAVILY_ENV_KEYS) {
for (const key of RESEARCH_ENV_KEYS) {
if (originalEnv[key] == null) delete process.env[key];
else process.env[key] = originalEnv[key];
}
@ -31,34 +46,150 @@ describe('research search', () => {
return projectRoot;
}
it('requires a Tavily API key', async () => {
for (const key of TAVILY_ENV_KEYS) delete process.env[key];
function exaResponse() {
return new Response(
JSON.stringify({
results: [
{
title: 'EV Exa report',
url: 'https://example.com/exa-ev',
text: 'Exa found EV market growth.',
publishedDate: '2025-04-01T00:00:00.000Z',
},
],
}),
{ status: 200, headers: { 'content-type': 'application/json' } },
);
}
function perplexityResponse() {
return new Response(
JSON.stringify({
choices: [
{
message: {
content: 'Perplexity says EV adoption is rising.',
},
},
],
search_results: [
{
title: 'Perplexity EV report',
url: 'https://example.com/perplexity-ev',
snippet: 'Perplexity found EV market adoption.',
date: '2025-04-02',
},
],
}),
{ status: 200, headers: { 'content-type': 'application/json' } },
);
}
function tavilyResponse() {
return new Response(
JSON.stringify({
answer: 'EV sales are growing.',
results: [
{
title: 'EV report',
url: 'https://example.com/ev',
content: 'EV adoption increased in 2025.',
published_date: '2025-05-01',
},
],
}),
{ status: 200, headers: { 'content-type': 'application/json' } },
);
}
it('requires a configured web research provider key', async () => {
await expect(
searchResearch({ projectRoot: await tempProjectRoot(), query: 'EV trends' }),
).rejects.toMatchObject({
code: 'TAVILY_API_KEY_MISSING',
code: 'WEB_RESEARCH_PROVIDER_KEY_MISSING',
status: 400,
} satisfies Partial<ResearchError>);
});
it('uses Exa first when an Exa key is configured', async () => {
process.env.OD_EXASEARCH_API_KEY = 'exa-test';
process.env.OD_TAVILY_API_KEY = 'tvly-test';
const fetchMock = vi.fn(async (_input: FetchInput, _init?: FetchInit) =>
exaResponse(),
);
vi.stubGlobal('fetch', fetchMock);
const findings = await searchResearch({
projectRoot: await tempProjectRoot(),
query: 'EV market 2025 trends',
maxSources: 5,
});
expect(findings).toMatchObject({
query: 'EV market 2025 trends',
provider: 'exa',
depth: 'shallow',
sources: [
{
title: 'EV Exa report',
url: 'https://example.com/exa-ev',
snippet: 'Exa found EV market growth.',
provider: 'exa',
publishedAt: '2025-04-01T00:00:00.000Z',
},
],
});
const [url, init] = fetchMock.mock.calls[0] as [FetchInput, FetchInit];
expect(String(url)).toBe('https://api.exa.ai/search');
expect((init!.headers as Record<string, string>)['x-api-key']).toBe('exa-test');
expect(JSON.parse(String(init!.body))).toMatchObject({
query: 'EV market 2025 trends',
text: true,
numResults: 5,
});
});
it('uses Perplexity when Exa is not configured', async () => {
process.env.PERPLEXITY_API_KEY = 'pplx-test';
process.env.OD_TAVILY_API_KEY = 'tvly-test';
const fetchMock = vi.fn(async (_input: FetchInput, _init?: FetchInit) =>
perplexityResponse(),
);
vi.stubGlobal('fetch', fetchMock);
const findings = await searchResearch({
projectRoot: await tempProjectRoot(),
query: 'EV market 2025 trends',
});
expect(findings).toMatchObject({
summary: 'Perplexity says EV adoption is rising.',
provider: 'perplexity',
sources: [
{
title: 'Perplexity EV report',
url: 'https://example.com/perplexity-ev',
snippet: 'Perplexity found EV market adoption.',
provider: 'perplexity',
publishedAt: '2025-04-02',
},
],
});
const [url, init] = fetchMock.mock.calls[0] as [FetchInput, FetchInit];
expect(String(url)).toBe('https://api.perplexity.ai/v1/sonar');
expect((init!.headers as Record<string, string>).authorization).toBe(
'Bearer pplx-test',
);
expect(JSON.parse(String(init!.body))).toMatchObject({
model: 'sonar',
messages: [{ role: 'user', content: 'EV market 2025 trends' }],
});
});
it('uses shallow Tavily search and normalizes JSON findings', async () => {
process.env.OD_TAVILY_API_KEY = 'tvly-test';
const fetchMock = vi.fn(async (_input: FetchInput, _init?: FetchInit) =>
new Response(
JSON.stringify({
answer: 'EV sales are growing.',
results: [
{
title: 'EV report',
url: 'https://example.com/ev',
content: 'EV adoption increased in 2025.',
published_date: '2025-05-01',
},
],
}),
{ status: 200, headers: { 'content-type': 'application/json' } },
),
tavilyResponse(),
);
vi.stubGlobal('fetch', fetchMock);
@ -83,7 +214,11 @@ describe('research search', () => {
},
],
});
const [, init] = fetchMock.mock.calls[0] as [FetchInput, FetchInit];
const [url, init] = fetchMock.mock.calls[0] as [FetchInput, FetchInit];
expect(String(url)).toBe('https://api.tavily.com/search');
expect((init!.headers as Record<string, string>).authorization).toBe(
'Bearer tvly-test',
);
const body = JSON.parse(String(init!.body));
expect(body).toMatchObject({
query: 'EV market 2025 trends',
@ -93,4 +228,80 @@ describe('research search', () => {
include_raw_content: false,
});
});
it('falls back from a failing Exa request to configured Perplexity', async () => {
process.env.EXASEARCH_API_KEY = 'exa-test';
process.env.PERPLEXITY_API_KEY = 'pplx-test';
const fetchMock = vi
.fn()
.mockResolvedValueOnce(new Response('bad exa key', { status: 401 }))
.mockResolvedValueOnce(perplexityResponse());
vi.stubGlobal('fetch', fetchMock);
const findings = await searchResearch({
projectRoot: await tempProjectRoot(),
query: 'EV market 2025 trends',
});
expect(findings.provider).toBe('perplexity');
expect(fetchMock).toHaveBeenCalledTimes(2);
expect(String(fetchMock.mock.calls[0]![0])).toBe('https://api.exa.ai/search');
expect(String(fetchMock.mock.calls[1]![0])).toBe(
'https://api.perplexity.ai/v1/sonar',
);
});
it('preserves explicit supported provider order', async () => {
process.env.EXASEARCH_API_KEY = 'exa-test';
process.env.OD_TAVILY_API_KEY = 'tvly-test';
const fetchMock = vi.fn(async (_input: FetchInput, _init?: FetchInit) =>
tavilyResponse(),
);
vi.stubGlobal('fetch', fetchMock);
const findings = await searchResearch({
projectRoot: await tempProjectRoot(),
query: 'EV market 2025 trends',
providers: ['tavily', 'exa'],
});
expect(findings.provider).toBe('tavily');
const [url] = fetchMock.mock.calls[0] as [FetchInput, FetchInit];
expect(String(url)).toBe('https://api.tavily.com/search');
});
it('returns a clear provider failure when every configured provider fails', async () => {
process.env.EXASEARCH_API_KEY = 'exa-test';
process.env.OD_TAVILY_API_KEY = 'tvly-test';
const fetchMock = vi.fn(async (_input: FetchInput, _init?: FetchInit) =>
new Response('provider unavailable', { status: 503 }),
);
vi.stubGlobal('fetch', fetchMock);
await expect(
searchResearch({
projectRoot: await tempProjectRoot(),
query: 'EV market 2025 trends',
}),
).rejects.toMatchObject({
code: 'RESEARCH_PROVIDER_FAILED',
status: 502,
} satisfies Partial<ResearchError>);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
it('rejects Financial Datasets as unsupported for web research', async () => {
process.env.FINANCIAL_DATASETS_API_KEY = 'financial-data-test';
await expect(
searchResearch({
projectRoot: await tempProjectRoot(),
query: 'AAPL revenue',
providers: ['financialdatasets'],
}),
).rejects.toMatchObject({
code: 'UNSUPPORTED_RESEARCH_PROVIDER',
status: 400,
} satisfies Partial<ResearchError>);
});
});

View file

@ -14,7 +14,7 @@ export interface ResearchOptions {
depth?: ResearchDepth;
/** Cap on returned sources. Defaults follow the depth. */
maxSources?: number;
/** Provider preference order. Phase 1 supports ['tavily']. */
/** Web provider preference order. Supports ['exa', 'perplexity', 'tavily']. */
providers?: string[];
}