mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Add OpenAI Responses API support with chat_completions capability flag (#39989)
Add support for OpenAI's /responses endpoint for models that don't support /chat/completions API. This enables compatibility with newer model variants (`gpt-5-codex`, `gpt-5-pro`, `o3-pro`, etc) while maintaining compatibility with existing configs Changes: - Add `supports_chat_completions` flag to model capabilities that defaults to true for existing behavior - Implement responses API client with streaming support as per [OpenAI documentation](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml). - Add `ResponseEventMapper` to convert responses events to completion events for maintainer simplicity - Update UI to allow toggling `chat_completions` capability - Add `gpt-5-codex` model Closes #38858 Release Notes: - Added support for `gpt-5-codex` model --------- Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
This commit is contained in:
parent
e70d2524b3
commit
84017bca89
8 changed files with 1565 additions and 37 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -8983,6 +8983,7 @@ dependencies = [
|
|||
"open_ai",
|
||||
"open_router",
|
||||
"partial-json-fixer",
|
||||
"pretty_assertions",
|
||||
"project",
|
||||
"release_channel",
|
||||
"schemars",
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@ struct ModelCapabilityToggles {
|
|||
pub supports_images: ToggleState,
|
||||
pub supports_parallel_tool_calls: ToggleState,
|
||||
pub supports_prompt_cache_key: ToggleState,
|
||||
pub supports_chat_completions: ToggleState,
|
||||
}
|
||||
|
||||
struct ModelInput {
|
||||
|
|
@ -154,6 +155,7 @@ impl ModelInput {
|
|||
images,
|
||||
parallel_tool_calls,
|
||||
prompt_cache_key,
|
||||
chat_completions,
|
||||
} = ModelCapabilities::default();
|
||||
|
||||
Self {
|
||||
|
|
@ -166,6 +168,7 @@ impl ModelInput {
|
|||
supports_images: images.into(),
|
||||
supports_parallel_tool_calls: parallel_tool_calls.into(),
|
||||
supports_prompt_cache_key: prompt_cache_key.into(),
|
||||
supports_chat_completions: chat_completions.into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
@ -203,6 +206,7 @@ impl ModelInput {
|
|||
images: self.capabilities.supports_images.selected(),
|
||||
parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
|
||||
prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
|
||||
chat_completions: self.capabilities.supports_chat_completions.selected(),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
|
@ -426,6 +430,20 @@ impl AddLlmProviderModal {
|
|||
cx.notify();
|
||||
},
|
||||
)),
|
||||
)
|
||||
.child(
|
||||
Checkbox::new(
|
||||
("supports-chat-completions", ix),
|
||||
model.capabilities.supports_chat_completions,
|
||||
)
|
||||
.label("Supports /chat/completions")
|
||||
.on_click(cx.listener(
|
||||
move |this, checked, _window, cx| {
|
||||
this.input.models[ix].capabilities.supports_chat_completions =
|
||||
*checked;
|
||||
cx.notify();
|
||||
},
|
||||
)),
|
||||
),
|
||||
)
|
||||
.when(has_more_than_one_model, |this| {
|
||||
|
|
@ -724,12 +742,17 @@ mod tests {
|
|||
model_input.capabilities.supports_prompt_cache_key,
|
||||
ToggleState::Unselected
|
||||
);
|
||||
assert_eq!(
|
||||
model_input.capabilities.supports_chat_completions,
|
||||
ToggleState::Selected
|
||||
);
|
||||
|
||||
let parsed_model = model_input.parse(cx).unwrap();
|
||||
assert!(parsed_model.capabilities.tools);
|
||||
assert!(!parsed_model.capabilities.images);
|
||||
assert!(!parsed_model.capabilities.parallel_tool_calls);
|
||||
assert!(!parsed_model.capabilities.prompt_cache_key);
|
||||
assert!(parsed_model.capabilities.chat_completions);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -749,12 +772,14 @@ mod tests {
|
|||
model_input.capabilities.supports_images = ToggleState::Unselected;
|
||||
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Unselected;
|
||||
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
|
||||
model_input.capabilities.supports_chat_completions = ToggleState::Unselected;
|
||||
|
||||
let parsed_model = model_input.parse(cx).unwrap();
|
||||
assert!(!parsed_model.capabilities.tools);
|
||||
assert!(!parsed_model.capabilities.images);
|
||||
assert!(!parsed_model.capabilities.parallel_tool_calls);
|
||||
assert!(!parsed_model.capabilities.prompt_cache_key);
|
||||
assert!(!parsed_model.capabilities.chat_completions);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -774,6 +799,7 @@ mod tests {
|
|||
model_input.capabilities.supports_images = ToggleState::Unselected;
|
||||
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Selected;
|
||||
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
|
||||
model_input.capabilities.supports_chat_completions = ToggleState::Selected;
|
||||
|
||||
let parsed_model = model_input.parse(cx).unwrap();
|
||||
assert_eq!(parsed_model.name, "somemodel");
|
||||
|
|
@ -781,6 +807,7 @@ mod tests {
|
|||
assert!(!parsed_model.capabilities.images);
|
||||
assert!(parsed_model.capabilities.parallel_tool_calls);
|
||||
assert!(!parsed_model.capabilities.prompt_cache_key);
|
||||
assert!(parsed_model.capabilities.chat_completions);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,4 +66,5 @@ x_ai = { workspace = true, features = ["schemars"] }
|
|||
[dev-dependencies]
|
||||
editor = { workspace = true, features = ["test-support"] }
|
||||
language_model = { workspace = true, features = ["test-support"] }
|
||||
pretty_assertions.workspace = true
|
||||
project = { workspace = true, features = ["test-support"] }
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -10,14 +10,20 @@ use language_model::{
|
|||
LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolSchemaFormat, RateLimiter,
|
||||
};
|
||||
use menu;
|
||||
use open_ai::{ResponseStreamEvent, stream_completion};
|
||||
use open_ai::{
|
||||
ResponseStreamEvent,
|
||||
responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
|
||||
stream_completion,
|
||||
};
|
||||
use settings::{Settings, SettingsStore};
|
||||
use std::sync::Arc;
|
||||
use ui::{ElevationIndex, Tooltip, prelude::*};
|
||||
use ui_input::InputField;
|
||||
use util::ResultExt;
|
||||
|
||||
use crate::provider::open_ai::{OpenAiEventMapper, into_open_ai};
|
||||
use crate::provider::open_ai::{
|
||||
OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
|
||||
};
|
||||
pub use settings::OpenAiCompatibleAvailableModel as AvailableModel;
|
||||
pub use settings::OpenAiCompatibleModelCapabilities as ModelCapabilities;
|
||||
|
||||
|
|
@ -236,6 +242,43 @@ impl OpenAiCompatibleLanguageModel {
|
|||
|
||||
async move { Ok(future.await?.boxed()) }.boxed()
|
||||
}
|
||||
|
||||
fn stream_response(
|
||||
&self,
|
||||
request: ResponseRequest,
|
||||
cx: &AsyncApp,
|
||||
) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponsesStreamEvent>>>>
|
||||
{
|
||||
let http_client = self.http_client.clone();
|
||||
|
||||
let Ok((api_key, api_url)) = self.state.read_with(cx, |state, _cx| {
|
||||
let api_url = &state.settings.api_url;
|
||||
(
|
||||
state.api_key_state.key(api_url),
|
||||
state.settings.api_url.clone(),
|
||||
)
|
||||
}) else {
|
||||
return future::ready(Err(anyhow!("App state dropped"))).boxed();
|
||||
};
|
||||
|
||||
let provider = self.provider_name.clone();
|
||||
let future = self.request_limiter.stream(async move {
|
||||
let Some(api_key) = api_key else {
|
||||
return Err(LanguageModelCompletionError::NoApiKey { provider });
|
||||
};
|
||||
let request = stream_response(
|
||||
http_client.as_ref(),
|
||||
provider.0.as_str(),
|
||||
&api_url,
|
||||
&api_key,
|
||||
request,
|
||||
);
|
||||
let response = request.await?;
|
||||
Ok(response)
|
||||
});
|
||||
|
||||
async move { Ok(future.await?.boxed()) }.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageModel for OpenAiCompatibleLanguageModel {
|
||||
|
|
@ -327,20 +370,37 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
|
|||
LanguageModelCompletionError,
|
||||
>,
|
||||
> {
|
||||
let request = into_open_ai(
|
||||
request,
|
||||
&self.model.name,
|
||||
self.model.capabilities.parallel_tool_calls,
|
||||
self.model.capabilities.prompt_cache_key,
|
||||
self.max_output_tokens(),
|
||||
None,
|
||||
);
|
||||
let completions = self.stream_completion(request, cx);
|
||||
async move {
|
||||
let mapper = OpenAiEventMapper::new();
|
||||
Ok(mapper.map_stream(completions.await?).boxed())
|
||||
if self.model.capabilities.chat_completions {
|
||||
let request = into_open_ai(
|
||||
request,
|
||||
&self.model.name,
|
||||
self.model.capabilities.parallel_tool_calls,
|
||||
self.model.capabilities.prompt_cache_key,
|
||||
self.max_output_tokens(),
|
||||
None,
|
||||
);
|
||||
let completions = self.stream_completion(request, cx);
|
||||
async move {
|
||||
let mapper = OpenAiEventMapper::new();
|
||||
Ok(mapper.map_stream(completions.await?).boxed())
|
||||
}
|
||||
.boxed()
|
||||
} else {
|
||||
let request = into_open_ai_response(
|
||||
request,
|
||||
&self.model.name,
|
||||
self.model.capabilities.parallel_tool_calls,
|
||||
self.model.capabilities.prompt_cache_key,
|
||||
self.max_output_tokens(),
|
||||
None,
|
||||
);
|
||||
let completions = self.stream_response(request, cx);
|
||||
async move {
|
||||
let mapper = OpenAiResponseEventMapper::new();
|
||||
Ok(mapper.map_stream(completions.await?).boxed())
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -81,6 +81,8 @@ pub enum Model {
|
|||
O4Mini,
|
||||
#[serde(rename = "gpt-5")]
|
||||
Five,
|
||||
#[serde(rename = "gpt-5-codex")]
|
||||
FiveCodex,
|
||||
#[serde(rename = "gpt-5-mini")]
|
||||
FiveMini,
|
||||
#[serde(rename = "gpt-5-nano")]
|
||||
|
|
@ -98,9 +100,15 @@ pub enum Model {
|
|||
max_output_tokens: Option<u64>,
|
||||
max_completion_tokens: Option<u64>,
|
||||
reasoning_effort: Option<ReasoningEffort>,
|
||||
#[serde(default = "default_supports_chat_completions")]
|
||||
supports_chat_completions: bool,
|
||||
},
|
||||
}
|
||||
|
||||
const fn default_supports_chat_completions() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl Model {
|
||||
pub fn default_fast() -> Self {
|
||||
// TODO: Replace with FiveMini since all other models are deprecated
|
||||
|
|
@ -122,6 +130,7 @@ impl Model {
|
|||
"o3" => Ok(Self::O3),
|
||||
"o4-mini" => Ok(Self::O4Mini),
|
||||
"gpt-5" => Ok(Self::Five),
|
||||
"gpt-5-codex" => Ok(Self::FiveCodex),
|
||||
"gpt-5-mini" => Ok(Self::FiveMini),
|
||||
"gpt-5-nano" => Ok(Self::FiveNano),
|
||||
"gpt-5.1" => Ok(Self::FivePointOne),
|
||||
|
|
@ -145,6 +154,7 @@ impl Model {
|
|||
Self::O3 => "o3",
|
||||
Self::O4Mini => "o4-mini",
|
||||
Self::Five => "gpt-5",
|
||||
Self::FiveCodex => "gpt-5-codex",
|
||||
Self::FiveMini => "gpt-5-mini",
|
||||
Self::FiveNano => "gpt-5-nano",
|
||||
Self::FivePointOne => "gpt-5.1",
|
||||
|
|
@ -168,6 +178,7 @@ impl Model {
|
|||
Self::O3 => "o3",
|
||||
Self::O4Mini => "o4-mini",
|
||||
Self::Five => "gpt-5",
|
||||
Self::FiveCodex => "gpt-5-codex",
|
||||
Self::FiveMini => "gpt-5-mini",
|
||||
Self::FiveNano => "gpt-5-nano",
|
||||
Self::FivePointOne => "gpt-5.1",
|
||||
|
|
@ -193,6 +204,7 @@ impl Model {
|
|||
Self::O3 => 200_000,
|
||||
Self::O4Mini => 200_000,
|
||||
Self::Five => 272_000,
|
||||
Self::FiveCodex => 272_000,
|
||||
Self::FiveMini => 272_000,
|
||||
Self::FiveNano => 272_000,
|
||||
Self::FivePointOne => 400_000,
|
||||
|
|
@ -219,6 +231,7 @@ impl Model {
|
|||
Self::O3 => Some(100_000),
|
||||
Self::O4Mini => Some(100_000),
|
||||
Self::Five => Some(128_000),
|
||||
Self::FiveCodex => Some(128_000),
|
||||
Self::FiveMini => Some(128_000),
|
||||
Self::FiveNano => Some(128_000),
|
||||
Self::FivePointOne => Some(128_000),
|
||||
|
|
@ -235,6 +248,17 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn supports_chat_completions(&self) -> bool {
|
||||
match self {
|
||||
Self::Custom {
|
||||
supports_chat_completions,
|
||||
..
|
||||
} => *supports_chat_completions,
|
||||
Self::FiveCodex => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
|
||||
///
|
||||
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
|
||||
|
|
@ -249,6 +273,7 @@ impl Model {
|
|||
| Self::FourPointOneMini
|
||||
| Self::FourPointOneNano
|
||||
| Self::Five
|
||||
| Self::FiveCodex
|
||||
| Self::FiveMini
|
||||
| Self::FivePointOne
|
||||
| Self::FivePointTwo
|
||||
|
|
@ -624,3 +649,362 @@ pub fn embed<'a>(
|
|||
Ok(response)
|
||||
}
|
||||
}
|
||||
|
||||
pub mod responses {
|
||||
use anyhow::{Result, anyhow};
|
||||
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
|
||||
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::RequestError;
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct Request {
|
||||
pub model: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub input: Vec<Value>,
|
||||
#[serde(default)]
|
||||
pub stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub top_p: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub parallel_tool_calls: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_choice: Option<super::ToolChoice>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub tools: Vec<ToolDefinition>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub prompt_cache_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning: Option<ReasoningConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct ReasoningConfig {
|
||||
pub effort: super::ReasoningEffort,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ToolDefinition {
|
||||
Function {
|
||||
name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
parameters: Option<Value>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
strict: Option<bool>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct Error {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum StreamEvent {
|
||||
#[serde(rename = "response.created")]
|
||||
Created { response: ResponseSummary },
|
||||
#[serde(rename = "response.in_progress")]
|
||||
InProgress { response: ResponseSummary },
|
||||
#[serde(rename = "response.output_item.added")]
|
||||
OutputItemAdded {
|
||||
output_index: usize,
|
||||
#[serde(default)]
|
||||
sequence_number: Option<u64>,
|
||||
item: ResponseOutputItem,
|
||||
},
|
||||
#[serde(rename = "response.output_item.done")]
|
||||
OutputItemDone {
|
||||
output_index: usize,
|
||||
#[serde(default)]
|
||||
sequence_number: Option<u64>,
|
||||
item: ResponseOutputItem,
|
||||
},
|
||||
#[serde(rename = "response.content_part.added")]
|
||||
ContentPartAdded {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
content_index: usize,
|
||||
part: Value,
|
||||
},
|
||||
#[serde(rename = "response.content_part.done")]
|
||||
ContentPartDone {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
content_index: usize,
|
||||
part: Value,
|
||||
},
|
||||
#[serde(rename = "response.output_text.delta")]
|
||||
OutputTextDelta {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
#[serde(default)]
|
||||
content_index: Option<usize>,
|
||||
delta: String,
|
||||
},
|
||||
#[serde(rename = "response.output_text.done")]
|
||||
OutputTextDone {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
#[serde(default)]
|
||||
content_index: Option<usize>,
|
||||
text: String,
|
||||
},
|
||||
#[serde(rename = "response.function_call_arguments.delta")]
|
||||
FunctionCallArgumentsDelta {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
delta: String,
|
||||
#[serde(default)]
|
||||
sequence_number: Option<u64>,
|
||||
},
|
||||
#[serde(rename = "response.function_call_arguments.done")]
|
||||
FunctionCallArgumentsDone {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
arguments: String,
|
||||
#[serde(default)]
|
||||
sequence_number: Option<u64>,
|
||||
},
|
||||
#[serde(rename = "response.completed")]
|
||||
Completed { response: ResponseSummary },
|
||||
#[serde(rename = "response.incomplete")]
|
||||
Incomplete { response: ResponseSummary },
|
||||
#[serde(rename = "response.failed")]
|
||||
Failed { response: ResponseSummary },
|
||||
#[serde(rename = "response.error")]
|
||||
Error { error: Error },
|
||||
#[serde(rename = "error")]
|
||||
GenericError { error: Error },
|
||||
#[serde(other)]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
pub struct ResponseSummary {
|
||||
#[serde(default)]
|
||||
pub id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status_details: Option<ResponseStatusDetails>,
|
||||
#[serde(default)]
|
||||
pub usage: Option<ResponseUsage>,
|
||||
#[serde(default)]
|
||||
pub output: Vec<ResponseOutputItem>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
pub struct ResponseStatusDetails {
|
||||
#[serde(default)]
|
||||
pub reason: Option<String>,
|
||||
#[serde(default)]
|
||||
pub r#type: Option<String>,
|
||||
#[serde(default)]
|
||||
pub error: Option<Value>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
pub struct ResponseUsage {
|
||||
#[serde(default)]
|
||||
pub input_tokens: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub output_tokens: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub total_tokens: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseOutputItem {
|
||||
Message(ResponseOutputMessage),
|
||||
FunctionCall(ResponseFunctionToolCall),
|
||||
#[serde(other)]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct ResponseOutputMessage {
|
||||
#[serde(default)]
|
||||
pub id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub content: Vec<Value>,
|
||||
#[serde(default)]
|
||||
pub role: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct ResponseFunctionToolCall {
|
||||
#[serde(default)]
|
||||
pub id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub arguments: String,
|
||||
#[serde(default)]
|
||||
pub call_id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub name: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn stream_response(
|
||||
client: &dyn HttpClient,
|
||||
provider_name: &str,
|
||||
api_url: &str,
|
||||
api_key: &str,
|
||||
request: Request,
|
||||
) -> Result<BoxStream<'static, Result<StreamEvent>>, RequestError> {
|
||||
let uri = format!("{api_url}/responses");
|
||||
let request_builder = HttpRequest::builder()
|
||||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Authorization", format!("Bearer {}", api_key.trim()));
|
||||
|
||||
let is_streaming = request.stream;
|
||||
let request = request_builder
|
||||
.body(AsyncBody::from(
|
||||
serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
|
||||
))
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
|
||||
let mut response = client.send(request).await?;
|
||||
if response.status().is_success() {
|
||||
if is_streaming {
|
||||
let reader = BufReader::new(response.into_body());
|
||||
Ok(reader
|
||||
.lines()
|
||||
.filter_map(|line| async move {
|
||||
match line {
|
||||
Ok(line) => {
|
||||
let line = line
|
||||
.strip_prefix("data: ")
|
||||
.or_else(|| line.strip_prefix("data:"))?;
|
||||
if line == "[DONE]" || line.is_empty() {
|
||||
None
|
||||
} else {
|
||||
match serde_json::from_str::<StreamEvent>(line) {
|
||||
Ok(event) => Some(Ok(event)),
|
||||
Err(error) => {
|
||||
log::error!(
|
||||
"Failed to parse OpenAI responses stream event: `{}`\nResponse: `{}`",
|
||||
error,
|
||||
line,
|
||||
);
|
||||
Some(Err(anyhow!(error)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(error) => Some(Err(anyhow!(error))),
|
||||
}
|
||||
})
|
||||
.boxed())
|
||||
} else {
|
||||
let mut body = String::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_string(&mut body)
|
||||
.await
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
|
||||
match serde_json::from_str::<ResponseSummary>(&body) {
|
||||
Ok(response_summary) => {
|
||||
let events = vec![
|
||||
StreamEvent::Created {
|
||||
response: response_summary.clone(),
|
||||
},
|
||||
StreamEvent::InProgress {
|
||||
response: response_summary.clone(),
|
||||
},
|
||||
];
|
||||
|
||||
let mut all_events = events;
|
||||
for (output_index, item) in response_summary.output.iter().enumerate() {
|
||||
all_events.push(StreamEvent::OutputItemAdded {
|
||||
output_index,
|
||||
sequence_number: None,
|
||||
item: item.clone(),
|
||||
});
|
||||
|
||||
match item {
|
||||
ResponseOutputItem::Message(message) => {
|
||||
for content_item in &message.content {
|
||||
if let Some(text) = content_item.get("text") {
|
||||
if let Some(text_str) = text.as_str() {
|
||||
if let Some(ref item_id) = message.id {
|
||||
all_events.push(StreamEvent::OutputTextDelta {
|
||||
item_id: item_id.clone(),
|
||||
output_index,
|
||||
content_index: None,
|
||||
delta: text_str.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ResponseOutputItem::FunctionCall(function_call) => {
|
||||
if let Some(ref item_id) = function_call.id {
|
||||
all_events.push(StreamEvent::FunctionCallArgumentsDone {
|
||||
item_id: item_id.clone(),
|
||||
output_index,
|
||||
arguments: function_call.arguments.clone(),
|
||||
sequence_number: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
ResponseOutputItem::Unknown => {}
|
||||
}
|
||||
|
||||
all_events.push(StreamEvent::OutputItemDone {
|
||||
output_index,
|
||||
sequence_number: None,
|
||||
item: item.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
all_events.push(StreamEvent::Completed {
|
||||
response: response_summary,
|
||||
});
|
||||
|
||||
Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())
|
||||
}
|
||||
Err(error) => {
|
||||
log::error!(
|
||||
"Failed to parse OpenAI non-streaming response: `{}`\nResponse: `{}`",
|
||||
error,
|
||||
body,
|
||||
);
|
||||
Err(RequestError::Other(anyhow!(error)))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let mut body = String::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_string(&mut body)
|
||||
.await
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
|
||||
Err(RequestError::HttpResponseError {
|
||||
provider: provider_name.to_owned(),
|
||||
status_code: response.status(),
|
||||
body,
|
||||
headers: response.headers().clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -208,6 +208,8 @@ pub struct OpenAiAvailableModel {
|
|||
pub max_output_tokens: Option<u64>,
|
||||
pub max_completion_tokens: Option<u64>,
|
||||
pub reasoning_effort: Option<OpenAiReasoningEffort>,
|
||||
#[serde(default)]
|
||||
pub capabilities: OpenAiModelCapabilities,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
|
||||
|
|
@ -226,6 +228,21 @@ pub struct OpenAiCompatibleSettingsContent {
|
|||
pub available_models: Vec<OpenAiCompatibleAvailableModel>,
|
||||
}
|
||||
|
||||
#[with_fallible_options]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
|
||||
pub struct OpenAiModelCapabilities {
|
||||
#[serde(default = "default_true")]
|
||||
pub chat_completions: bool,
|
||||
}
|
||||
|
||||
impl Default for OpenAiModelCapabilities {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
chat_completions: default_true(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[with_fallible_options]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
|
||||
pub struct OpenAiCompatibleAvailableModel {
|
||||
|
|
@ -245,6 +262,8 @@ pub struct OpenAiCompatibleModelCapabilities {
|
|||
pub images: bool,
|
||||
pub parallel_tool_calls: bool,
|
||||
pub prompt_cache_key: bool,
|
||||
#[serde(default = "default_true")]
|
||||
pub chat_completions: bool,
|
||||
}
|
||||
|
||||
impl Default for OpenAiCompatibleModelCapabilities {
|
||||
|
|
@ -254,6 +273,7 @@ impl Default for OpenAiCompatibleModelCapabilities {
|
|||
images: false,
|
||||
parallel_tool_calls: false,
|
||||
prompt_cache_key: false,
|
||||
chat_completions: default_true(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -469,6 +469,14 @@ To use alternate models, perhaps a preview release, or if you wish to control th
|
|||
"name": "gpt-4o-2024-08-06",
|
||||
"display_name": "GPT 4o Summer 2024",
|
||||
"max_tokens": 128000
|
||||
},
|
||||
{
|
||||
"name": "gpt-5-codex",
|
||||
"display_name": "GPT-5 Codex",
|
||||
"max_tokens": 128000,
|
||||
"capabilities": {
|
||||
"chat_completions": false
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -478,7 +486,10 @@ To use alternate models, perhaps a preview release, or if you wish to control th
|
|||
|
||||
You must provide the model's context window in the `max_tokens` parameter; this can be found in the [OpenAI model documentation](https://platform.openai.com/docs/models).
|
||||
|
||||
OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
|
||||
OpenAI `o1` and `o`-class models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
|
||||
|
||||
If a model does not support the `/chat/completions` endpoint (for example `gpt-5-codex`), disable it by setting `capabilities.chat_completions` to `false`. Zed will use the Responses endpoint instead.
|
||||
|
||||
Custom models will be listed in the model dropdown in the Agent Panel.
|
||||
|
||||
### OpenAI API Compatible {#openai-api-compatible}
|
||||
|
|
@ -525,6 +536,9 @@ By default, OpenAI-compatible models inherit the following capabilities:
|
|||
- `images`: false (does not support image inputs)
|
||||
- `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
|
||||
- `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
|
||||
- `chat_completions`: true (calls the `/chat/completions` endpoint)
|
||||
|
||||
If a provider exposes models that only work with the Responses API, set `chat_completions` to `false` for those entries. Zed uses the Responses endpoint for these models.
|
||||
|
||||
Note that LLM API keys aren't stored in your settings file.
|
||||
So, ensure you have it set in your environment variables (`<PROVIDER_NAME>_API_KEY=<your api key>`) so your settings can pick it up. In the example above, it would be `TOGETHER_AI_API_KEY=<your api key>`.
|
||||
|
|
|
|||
Loading…
Reference in a new issue