mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Add OpenAI Responses API support with chat_completions capability flag (#39989)
Add support for OpenAI's /responses endpoint for models that don't support /chat/completions API. This enables compatibility with newer model variants (`gpt-5-codex`, `gpt-5-pro`, `o3-pro`, etc) while maintaining compatibility with existing configs Changes: - Add `supports_chat_completions` flag to model capabilities that defaults to true for existing behavior - Implement responses API client with streaming support as per [OpenAI documentation](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml). - Add `ResponseEventMapper` to convert responses events to completion events for maintainer simplicity - Update UI to allow toggling `chat_completions` capability - Add `gpt-5-codex` model Closes #38858 Release Notes: - Added support for `gpt-5-codex` model --------- Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
This commit is contained in:
parent
e70d2524b3
commit
84017bca89
8 changed files with 1565 additions and 37 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -8983,6 +8983,7 @@ dependencies = [
|
||||||
"open_ai",
|
"open_ai",
|
||||||
"open_router",
|
"open_router",
|
||||||
"partial-json-fixer",
|
"partial-json-fixer",
|
||||||
|
"pretty_assertions",
|
||||||
"project",
|
"project",
|
||||||
"release_channel",
|
"release_channel",
|
||||||
"schemars",
|
"schemars",
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,7 @@ struct ModelCapabilityToggles {
|
||||||
pub supports_images: ToggleState,
|
pub supports_images: ToggleState,
|
||||||
pub supports_parallel_tool_calls: ToggleState,
|
pub supports_parallel_tool_calls: ToggleState,
|
||||||
pub supports_prompt_cache_key: ToggleState,
|
pub supports_prompt_cache_key: ToggleState,
|
||||||
|
pub supports_chat_completions: ToggleState,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ModelInput {
|
struct ModelInput {
|
||||||
|
|
@ -154,6 +155,7 @@ impl ModelInput {
|
||||||
images,
|
images,
|
||||||
parallel_tool_calls,
|
parallel_tool_calls,
|
||||||
prompt_cache_key,
|
prompt_cache_key,
|
||||||
|
chat_completions,
|
||||||
} = ModelCapabilities::default();
|
} = ModelCapabilities::default();
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
|
|
@ -166,6 +168,7 @@ impl ModelInput {
|
||||||
supports_images: images.into(),
|
supports_images: images.into(),
|
||||||
supports_parallel_tool_calls: parallel_tool_calls.into(),
|
supports_parallel_tool_calls: parallel_tool_calls.into(),
|
||||||
supports_prompt_cache_key: prompt_cache_key.into(),
|
supports_prompt_cache_key: prompt_cache_key.into(),
|
||||||
|
supports_chat_completions: chat_completions.into(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -203,6 +206,7 @@ impl ModelInput {
|
||||||
images: self.capabilities.supports_images.selected(),
|
images: self.capabilities.supports_images.selected(),
|
||||||
parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
|
parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
|
||||||
prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
|
prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
|
||||||
|
chat_completions: self.capabilities.supports_chat_completions.selected(),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -426,6 +430,20 @@ impl AddLlmProviderModal {
|
||||||
cx.notify();
|
cx.notify();
|
||||||
},
|
},
|
||||||
)),
|
)),
|
||||||
|
)
|
||||||
|
.child(
|
||||||
|
Checkbox::new(
|
||||||
|
("supports-chat-completions", ix),
|
||||||
|
model.capabilities.supports_chat_completions,
|
||||||
|
)
|
||||||
|
.label("Supports /chat/completions")
|
||||||
|
.on_click(cx.listener(
|
||||||
|
move |this, checked, _window, cx| {
|
||||||
|
this.input.models[ix].capabilities.supports_chat_completions =
|
||||||
|
*checked;
|
||||||
|
cx.notify();
|
||||||
|
},
|
||||||
|
)),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.when(has_more_than_one_model, |this| {
|
.when(has_more_than_one_model, |this| {
|
||||||
|
|
@ -724,12 +742,17 @@ mod tests {
|
||||||
model_input.capabilities.supports_prompt_cache_key,
|
model_input.capabilities.supports_prompt_cache_key,
|
||||||
ToggleState::Unselected
|
ToggleState::Unselected
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
model_input.capabilities.supports_chat_completions,
|
||||||
|
ToggleState::Selected
|
||||||
|
);
|
||||||
|
|
||||||
let parsed_model = model_input.parse(cx).unwrap();
|
let parsed_model = model_input.parse(cx).unwrap();
|
||||||
assert!(parsed_model.capabilities.tools);
|
assert!(parsed_model.capabilities.tools);
|
||||||
assert!(!parsed_model.capabilities.images);
|
assert!(!parsed_model.capabilities.images);
|
||||||
assert!(!parsed_model.capabilities.parallel_tool_calls);
|
assert!(!parsed_model.capabilities.parallel_tool_calls);
|
||||||
assert!(!parsed_model.capabilities.prompt_cache_key);
|
assert!(!parsed_model.capabilities.prompt_cache_key);
|
||||||
|
assert!(parsed_model.capabilities.chat_completions);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -749,12 +772,14 @@ mod tests {
|
||||||
model_input.capabilities.supports_images = ToggleState::Unselected;
|
model_input.capabilities.supports_images = ToggleState::Unselected;
|
||||||
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Unselected;
|
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Unselected;
|
||||||
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
|
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
|
||||||
|
model_input.capabilities.supports_chat_completions = ToggleState::Unselected;
|
||||||
|
|
||||||
let parsed_model = model_input.parse(cx).unwrap();
|
let parsed_model = model_input.parse(cx).unwrap();
|
||||||
assert!(!parsed_model.capabilities.tools);
|
assert!(!parsed_model.capabilities.tools);
|
||||||
assert!(!parsed_model.capabilities.images);
|
assert!(!parsed_model.capabilities.images);
|
||||||
assert!(!parsed_model.capabilities.parallel_tool_calls);
|
assert!(!parsed_model.capabilities.parallel_tool_calls);
|
||||||
assert!(!parsed_model.capabilities.prompt_cache_key);
|
assert!(!parsed_model.capabilities.prompt_cache_key);
|
||||||
|
assert!(!parsed_model.capabilities.chat_completions);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -774,6 +799,7 @@ mod tests {
|
||||||
model_input.capabilities.supports_images = ToggleState::Unselected;
|
model_input.capabilities.supports_images = ToggleState::Unselected;
|
||||||
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Selected;
|
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Selected;
|
||||||
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
|
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
|
||||||
|
model_input.capabilities.supports_chat_completions = ToggleState::Selected;
|
||||||
|
|
||||||
let parsed_model = model_input.parse(cx).unwrap();
|
let parsed_model = model_input.parse(cx).unwrap();
|
||||||
assert_eq!(parsed_model.name, "somemodel");
|
assert_eq!(parsed_model.name, "somemodel");
|
||||||
|
|
@ -781,6 +807,7 @@ mod tests {
|
||||||
assert!(!parsed_model.capabilities.images);
|
assert!(!parsed_model.capabilities.images);
|
||||||
assert!(parsed_model.capabilities.parallel_tool_calls);
|
assert!(parsed_model.capabilities.parallel_tool_calls);
|
||||||
assert!(!parsed_model.capabilities.prompt_cache_key);
|
assert!(!parsed_model.capabilities.prompt_cache_key);
|
||||||
|
assert!(parsed_model.capabilities.chat_completions);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,4 +66,5 @@ x_ai = { workspace = true, features = ["schemars"] }
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
editor = { workspace = true, features = ["test-support"] }
|
editor = { workspace = true, features = ["test-support"] }
|
||||||
language_model = { workspace = true, features = ["test-support"] }
|
language_model = { workspace = true, features = ["test-support"] }
|
||||||
|
pretty_assertions.workspace = true
|
||||||
project = { workspace = true, features = ["test-support"] }
|
project = { workspace = true, features = ["test-support"] }
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -10,14 +10,20 @@ use language_model::{
|
||||||
LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolSchemaFormat, RateLimiter,
|
LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolSchemaFormat, RateLimiter,
|
||||||
};
|
};
|
||||||
use menu;
|
use menu;
|
||||||
use open_ai::{ResponseStreamEvent, stream_completion};
|
use open_ai::{
|
||||||
|
ResponseStreamEvent,
|
||||||
|
responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
|
||||||
|
stream_completion,
|
||||||
|
};
|
||||||
use settings::{Settings, SettingsStore};
|
use settings::{Settings, SettingsStore};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use ui::{ElevationIndex, Tooltip, prelude::*};
|
use ui::{ElevationIndex, Tooltip, prelude::*};
|
||||||
use ui_input::InputField;
|
use ui_input::InputField;
|
||||||
use util::ResultExt;
|
use util::ResultExt;
|
||||||
|
|
||||||
use crate::provider::open_ai::{OpenAiEventMapper, into_open_ai};
|
use crate::provider::open_ai::{
|
||||||
|
OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
|
||||||
|
};
|
||||||
pub use settings::OpenAiCompatibleAvailableModel as AvailableModel;
|
pub use settings::OpenAiCompatibleAvailableModel as AvailableModel;
|
||||||
pub use settings::OpenAiCompatibleModelCapabilities as ModelCapabilities;
|
pub use settings::OpenAiCompatibleModelCapabilities as ModelCapabilities;
|
||||||
|
|
||||||
|
|
@ -236,6 +242,43 @@ impl OpenAiCompatibleLanguageModel {
|
||||||
|
|
||||||
async move { Ok(future.await?.boxed()) }.boxed()
|
async move { Ok(future.await?.boxed()) }.boxed()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn stream_response(
|
||||||
|
&self,
|
||||||
|
request: ResponseRequest,
|
||||||
|
cx: &AsyncApp,
|
||||||
|
) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponsesStreamEvent>>>>
|
||||||
|
{
|
||||||
|
let http_client = self.http_client.clone();
|
||||||
|
|
||||||
|
let Ok((api_key, api_url)) = self.state.read_with(cx, |state, _cx| {
|
||||||
|
let api_url = &state.settings.api_url;
|
||||||
|
(
|
||||||
|
state.api_key_state.key(api_url),
|
||||||
|
state.settings.api_url.clone(),
|
||||||
|
)
|
||||||
|
}) else {
|
||||||
|
return future::ready(Err(anyhow!("App state dropped"))).boxed();
|
||||||
|
};
|
||||||
|
|
||||||
|
let provider = self.provider_name.clone();
|
||||||
|
let future = self.request_limiter.stream(async move {
|
||||||
|
let Some(api_key) = api_key else {
|
||||||
|
return Err(LanguageModelCompletionError::NoApiKey { provider });
|
||||||
|
};
|
||||||
|
let request = stream_response(
|
||||||
|
http_client.as_ref(),
|
||||||
|
provider.0.as_str(),
|
||||||
|
&api_url,
|
||||||
|
&api_key,
|
||||||
|
request,
|
||||||
|
);
|
||||||
|
let response = request.await?;
|
||||||
|
Ok(response)
|
||||||
|
});
|
||||||
|
|
||||||
|
async move { Ok(future.await?.boxed()) }.boxed()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LanguageModel for OpenAiCompatibleLanguageModel {
|
impl LanguageModel for OpenAiCompatibleLanguageModel {
|
||||||
|
|
@ -327,20 +370,37 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
|
||||||
LanguageModelCompletionError,
|
LanguageModelCompletionError,
|
||||||
>,
|
>,
|
||||||
> {
|
> {
|
||||||
let request = into_open_ai(
|
if self.model.capabilities.chat_completions {
|
||||||
request,
|
let request = into_open_ai(
|
||||||
&self.model.name,
|
request,
|
||||||
self.model.capabilities.parallel_tool_calls,
|
&self.model.name,
|
||||||
self.model.capabilities.prompt_cache_key,
|
self.model.capabilities.parallel_tool_calls,
|
||||||
self.max_output_tokens(),
|
self.model.capabilities.prompt_cache_key,
|
||||||
None,
|
self.max_output_tokens(),
|
||||||
);
|
None,
|
||||||
let completions = self.stream_completion(request, cx);
|
);
|
||||||
async move {
|
let completions = self.stream_completion(request, cx);
|
||||||
let mapper = OpenAiEventMapper::new();
|
async move {
|
||||||
Ok(mapper.map_stream(completions.await?).boxed())
|
let mapper = OpenAiEventMapper::new();
|
||||||
|
Ok(mapper.map_stream(completions.await?).boxed())
|
||||||
|
}
|
||||||
|
.boxed()
|
||||||
|
} else {
|
||||||
|
let request = into_open_ai_response(
|
||||||
|
request,
|
||||||
|
&self.model.name,
|
||||||
|
self.model.capabilities.parallel_tool_calls,
|
||||||
|
self.model.capabilities.prompt_cache_key,
|
||||||
|
self.max_output_tokens(),
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
let completions = self.stream_response(request, cx);
|
||||||
|
async move {
|
||||||
|
let mapper = OpenAiResponseEventMapper::new();
|
||||||
|
Ok(mapper.map_stream(completions.await?).boxed())
|
||||||
|
}
|
||||||
|
.boxed()
|
||||||
}
|
}
|
||||||
.boxed()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,8 @@ pub enum Model {
|
||||||
O4Mini,
|
O4Mini,
|
||||||
#[serde(rename = "gpt-5")]
|
#[serde(rename = "gpt-5")]
|
||||||
Five,
|
Five,
|
||||||
|
#[serde(rename = "gpt-5-codex")]
|
||||||
|
FiveCodex,
|
||||||
#[serde(rename = "gpt-5-mini")]
|
#[serde(rename = "gpt-5-mini")]
|
||||||
FiveMini,
|
FiveMini,
|
||||||
#[serde(rename = "gpt-5-nano")]
|
#[serde(rename = "gpt-5-nano")]
|
||||||
|
|
@ -98,9 +100,15 @@ pub enum Model {
|
||||||
max_output_tokens: Option<u64>,
|
max_output_tokens: Option<u64>,
|
||||||
max_completion_tokens: Option<u64>,
|
max_completion_tokens: Option<u64>,
|
||||||
reasoning_effort: Option<ReasoningEffort>,
|
reasoning_effort: Option<ReasoningEffort>,
|
||||||
|
#[serde(default = "default_supports_chat_completions")]
|
||||||
|
supports_chat_completions: bool,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fn default_supports_chat_completions() -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
impl Model {
|
impl Model {
|
||||||
pub fn default_fast() -> Self {
|
pub fn default_fast() -> Self {
|
||||||
// TODO: Replace with FiveMini since all other models are deprecated
|
// TODO: Replace with FiveMini since all other models are deprecated
|
||||||
|
|
@ -122,6 +130,7 @@ impl Model {
|
||||||
"o3" => Ok(Self::O3),
|
"o3" => Ok(Self::O3),
|
||||||
"o4-mini" => Ok(Self::O4Mini),
|
"o4-mini" => Ok(Self::O4Mini),
|
||||||
"gpt-5" => Ok(Self::Five),
|
"gpt-5" => Ok(Self::Five),
|
||||||
|
"gpt-5-codex" => Ok(Self::FiveCodex),
|
||||||
"gpt-5-mini" => Ok(Self::FiveMini),
|
"gpt-5-mini" => Ok(Self::FiveMini),
|
||||||
"gpt-5-nano" => Ok(Self::FiveNano),
|
"gpt-5-nano" => Ok(Self::FiveNano),
|
||||||
"gpt-5.1" => Ok(Self::FivePointOne),
|
"gpt-5.1" => Ok(Self::FivePointOne),
|
||||||
|
|
@ -145,6 +154,7 @@ impl Model {
|
||||||
Self::O3 => "o3",
|
Self::O3 => "o3",
|
||||||
Self::O4Mini => "o4-mini",
|
Self::O4Mini => "o4-mini",
|
||||||
Self::Five => "gpt-5",
|
Self::Five => "gpt-5",
|
||||||
|
Self::FiveCodex => "gpt-5-codex",
|
||||||
Self::FiveMini => "gpt-5-mini",
|
Self::FiveMini => "gpt-5-mini",
|
||||||
Self::FiveNano => "gpt-5-nano",
|
Self::FiveNano => "gpt-5-nano",
|
||||||
Self::FivePointOne => "gpt-5.1",
|
Self::FivePointOne => "gpt-5.1",
|
||||||
|
|
@ -168,6 +178,7 @@ impl Model {
|
||||||
Self::O3 => "o3",
|
Self::O3 => "o3",
|
||||||
Self::O4Mini => "o4-mini",
|
Self::O4Mini => "o4-mini",
|
||||||
Self::Five => "gpt-5",
|
Self::Five => "gpt-5",
|
||||||
|
Self::FiveCodex => "gpt-5-codex",
|
||||||
Self::FiveMini => "gpt-5-mini",
|
Self::FiveMini => "gpt-5-mini",
|
||||||
Self::FiveNano => "gpt-5-nano",
|
Self::FiveNano => "gpt-5-nano",
|
||||||
Self::FivePointOne => "gpt-5.1",
|
Self::FivePointOne => "gpt-5.1",
|
||||||
|
|
@ -193,6 +204,7 @@ impl Model {
|
||||||
Self::O3 => 200_000,
|
Self::O3 => 200_000,
|
||||||
Self::O4Mini => 200_000,
|
Self::O4Mini => 200_000,
|
||||||
Self::Five => 272_000,
|
Self::Five => 272_000,
|
||||||
|
Self::FiveCodex => 272_000,
|
||||||
Self::FiveMini => 272_000,
|
Self::FiveMini => 272_000,
|
||||||
Self::FiveNano => 272_000,
|
Self::FiveNano => 272_000,
|
||||||
Self::FivePointOne => 400_000,
|
Self::FivePointOne => 400_000,
|
||||||
|
|
@ -219,6 +231,7 @@ impl Model {
|
||||||
Self::O3 => Some(100_000),
|
Self::O3 => Some(100_000),
|
||||||
Self::O4Mini => Some(100_000),
|
Self::O4Mini => Some(100_000),
|
||||||
Self::Five => Some(128_000),
|
Self::Five => Some(128_000),
|
||||||
|
Self::FiveCodex => Some(128_000),
|
||||||
Self::FiveMini => Some(128_000),
|
Self::FiveMini => Some(128_000),
|
||||||
Self::FiveNano => Some(128_000),
|
Self::FiveNano => Some(128_000),
|
||||||
Self::FivePointOne => Some(128_000),
|
Self::FivePointOne => Some(128_000),
|
||||||
|
|
@ -235,6 +248,17 @@ impl Model {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn supports_chat_completions(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::Custom {
|
||||||
|
supports_chat_completions,
|
||||||
|
..
|
||||||
|
} => *supports_chat_completions,
|
||||||
|
Self::FiveCodex => false,
|
||||||
|
_ => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
|
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
|
||||||
///
|
///
|
||||||
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
|
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
|
||||||
|
|
@ -249,6 +273,7 @@ impl Model {
|
||||||
| Self::FourPointOneMini
|
| Self::FourPointOneMini
|
||||||
| Self::FourPointOneNano
|
| Self::FourPointOneNano
|
||||||
| Self::Five
|
| Self::Five
|
||||||
|
| Self::FiveCodex
|
||||||
| Self::FiveMini
|
| Self::FiveMini
|
||||||
| Self::FivePointOne
|
| Self::FivePointOne
|
||||||
| Self::FivePointTwo
|
| Self::FivePointTwo
|
||||||
|
|
@ -624,3 +649,362 @@ pub fn embed<'a>(
|
||||||
Ok(response)
|
Ok(response)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub mod responses {
|
||||||
|
use anyhow::{Result, anyhow};
|
||||||
|
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
|
||||||
|
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use crate::RequestError;
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
pub struct Request {
|
||||||
|
pub model: String,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub input: Vec<Value>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub stream: bool,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub temperature: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub top_p: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub max_output_tokens: Option<u64>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub parallel_tool_calls: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub tool_choice: Option<super::ToolChoice>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub tools: Vec<ToolDefinition>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub prompt_cache_key: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub reasoning: Option<ReasoningConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
pub struct ReasoningConfig {
|
||||||
|
pub effort: super::ReasoningEffort,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(tag = "type", rename_all = "snake_case")]
|
||||||
|
pub enum ToolDefinition {
|
||||||
|
Function {
|
||||||
|
name: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
description: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
parameters: Option<Value>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
strict: Option<bool>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
pub struct Error {
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
#[serde(tag = "type")]
|
||||||
|
pub enum StreamEvent {
|
||||||
|
#[serde(rename = "response.created")]
|
||||||
|
Created { response: ResponseSummary },
|
||||||
|
#[serde(rename = "response.in_progress")]
|
||||||
|
InProgress { response: ResponseSummary },
|
||||||
|
#[serde(rename = "response.output_item.added")]
|
||||||
|
OutputItemAdded {
|
||||||
|
output_index: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
sequence_number: Option<u64>,
|
||||||
|
item: ResponseOutputItem,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.output_item.done")]
|
||||||
|
OutputItemDone {
|
||||||
|
output_index: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
sequence_number: Option<u64>,
|
||||||
|
item: ResponseOutputItem,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.content_part.added")]
|
||||||
|
ContentPartAdded {
|
||||||
|
item_id: String,
|
||||||
|
output_index: usize,
|
||||||
|
content_index: usize,
|
||||||
|
part: Value,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.content_part.done")]
|
||||||
|
ContentPartDone {
|
||||||
|
item_id: String,
|
||||||
|
output_index: usize,
|
||||||
|
content_index: usize,
|
||||||
|
part: Value,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.output_text.delta")]
|
||||||
|
OutputTextDelta {
|
||||||
|
item_id: String,
|
||||||
|
output_index: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
content_index: Option<usize>,
|
||||||
|
delta: String,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.output_text.done")]
|
||||||
|
OutputTextDone {
|
||||||
|
item_id: String,
|
||||||
|
output_index: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
content_index: Option<usize>,
|
||||||
|
text: String,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.function_call_arguments.delta")]
|
||||||
|
FunctionCallArgumentsDelta {
|
||||||
|
item_id: String,
|
||||||
|
output_index: usize,
|
||||||
|
delta: String,
|
||||||
|
#[serde(default)]
|
||||||
|
sequence_number: Option<u64>,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.function_call_arguments.done")]
|
||||||
|
FunctionCallArgumentsDone {
|
||||||
|
item_id: String,
|
||||||
|
output_index: usize,
|
||||||
|
arguments: String,
|
||||||
|
#[serde(default)]
|
||||||
|
sequence_number: Option<u64>,
|
||||||
|
},
|
||||||
|
#[serde(rename = "response.completed")]
|
||||||
|
Completed { response: ResponseSummary },
|
||||||
|
#[serde(rename = "response.incomplete")]
|
||||||
|
Incomplete { response: ResponseSummary },
|
||||||
|
#[serde(rename = "response.failed")]
|
||||||
|
Failed { response: ResponseSummary },
|
||||||
|
#[serde(rename = "response.error")]
|
||||||
|
Error { error: Error },
|
||||||
|
#[serde(rename = "error")]
|
||||||
|
GenericError { error: Error },
|
||||||
|
#[serde(other)]
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Default, Clone)]
|
||||||
|
pub struct ResponseSummary {
|
||||||
|
#[serde(default)]
|
||||||
|
pub id: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub status: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub status_details: Option<ResponseStatusDetails>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub usage: Option<ResponseUsage>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub output: Vec<ResponseOutputItem>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Default, Clone)]
|
||||||
|
pub struct ResponseStatusDetails {
|
||||||
|
#[serde(default)]
|
||||||
|
pub reason: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub r#type: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub error: Option<Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Default, Clone)]
|
||||||
|
pub struct ResponseUsage {
|
||||||
|
#[serde(default)]
|
||||||
|
pub input_tokens: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub output_tokens: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub total_tokens: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
|
#[serde(tag = "type", rename_all = "snake_case")]
|
||||||
|
pub enum ResponseOutputItem {
|
||||||
|
Message(ResponseOutputMessage),
|
||||||
|
FunctionCall(ResponseFunctionToolCall),
|
||||||
|
#[serde(other)]
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
|
pub struct ResponseOutputMessage {
|
||||||
|
#[serde(default)]
|
||||||
|
pub id: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub content: Vec<Value>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub role: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub status: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
|
pub struct ResponseFunctionToolCall {
|
||||||
|
#[serde(default)]
|
||||||
|
pub id: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub arguments: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub call_id: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub name: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub status: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stream_response(
|
||||||
|
client: &dyn HttpClient,
|
||||||
|
provider_name: &str,
|
||||||
|
api_url: &str,
|
||||||
|
api_key: &str,
|
||||||
|
request: Request,
|
||||||
|
) -> Result<BoxStream<'static, Result<StreamEvent>>, RequestError> {
|
||||||
|
let uri = format!("{api_url}/responses");
|
||||||
|
let request_builder = HttpRequest::builder()
|
||||||
|
.method(Method::POST)
|
||||||
|
.uri(uri)
|
||||||
|
.header("Content-Type", "application/json")
|
||||||
|
.header("Authorization", format!("Bearer {}", api_key.trim()));
|
||||||
|
|
||||||
|
let is_streaming = request.stream;
|
||||||
|
let request = request_builder
|
||||||
|
.body(AsyncBody::from(
|
||||||
|
serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
|
||||||
|
))
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
|
||||||
|
let mut response = client.send(request).await?;
|
||||||
|
if response.status().is_success() {
|
||||||
|
if is_streaming {
|
||||||
|
let reader = BufReader::new(response.into_body());
|
||||||
|
Ok(reader
|
||||||
|
.lines()
|
||||||
|
.filter_map(|line| async move {
|
||||||
|
match line {
|
||||||
|
Ok(line) => {
|
||||||
|
let line = line
|
||||||
|
.strip_prefix("data: ")
|
||||||
|
.or_else(|| line.strip_prefix("data:"))?;
|
||||||
|
if line == "[DONE]" || line.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
match serde_json::from_str::<StreamEvent>(line) {
|
||||||
|
Ok(event) => Some(Ok(event)),
|
||||||
|
Err(error) => {
|
||||||
|
log::error!(
|
||||||
|
"Failed to parse OpenAI responses stream event: `{}`\nResponse: `{}`",
|
||||||
|
error,
|
||||||
|
line,
|
||||||
|
);
|
||||||
|
Some(Err(anyhow!(error)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(error) => Some(Err(anyhow!(error))),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.boxed())
|
||||||
|
} else {
|
||||||
|
let mut body = String::new();
|
||||||
|
response
|
||||||
|
.body_mut()
|
||||||
|
.read_to_string(&mut body)
|
||||||
|
.await
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
|
||||||
|
match serde_json::from_str::<ResponseSummary>(&body) {
|
||||||
|
Ok(response_summary) => {
|
||||||
|
let events = vec![
|
||||||
|
StreamEvent::Created {
|
||||||
|
response: response_summary.clone(),
|
||||||
|
},
|
||||||
|
StreamEvent::InProgress {
|
||||||
|
response: response_summary.clone(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut all_events = events;
|
||||||
|
for (output_index, item) in response_summary.output.iter().enumerate() {
|
||||||
|
all_events.push(StreamEvent::OutputItemAdded {
|
||||||
|
output_index,
|
||||||
|
sequence_number: None,
|
||||||
|
item: item.clone(),
|
||||||
|
});
|
||||||
|
|
||||||
|
match item {
|
||||||
|
ResponseOutputItem::Message(message) => {
|
||||||
|
for content_item in &message.content {
|
||||||
|
if let Some(text) = content_item.get("text") {
|
||||||
|
if let Some(text_str) = text.as_str() {
|
||||||
|
if let Some(ref item_id) = message.id {
|
||||||
|
all_events.push(StreamEvent::OutputTextDelta {
|
||||||
|
item_id: item_id.clone(),
|
||||||
|
output_index,
|
||||||
|
content_index: None,
|
||||||
|
delta: text_str.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ResponseOutputItem::FunctionCall(function_call) => {
|
||||||
|
if let Some(ref item_id) = function_call.id {
|
||||||
|
all_events.push(StreamEvent::FunctionCallArgumentsDone {
|
||||||
|
item_id: item_id.clone(),
|
||||||
|
output_index,
|
||||||
|
arguments: function_call.arguments.clone(),
|
||||||
|
sequence_number: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ResponseOutputItem::Unknown => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
all_events.push(StreamEvent::OutputItemDone {
|
||||||
|
output_index,
|
||||||
|
sequence_number: None,
|
||||||
|
item: item.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
all_events.push(StreamEvent::Completed {
|
||||||
|
response: response_summary,
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
log::error!(
|
||||||
|
"Failed to parse OpenAI non-streaming response: `{}`\nResponse: `{}`",
|
||||||
|
error,
|
||||||
|
body,
|
||||||
|
);
|
||||||
|
Err(RequestError::Other(anyhow!(error)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let mut body = String::new();
|
||||||
|
response
|
||||||
|
.body_mut()
|
||||||
|
.read_to_string(&mut body)
|
||||||
|
.await
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
|
||||||
|
Err(RequestError::HttpResponseError {
|
||||||
|
provider: provider_name.to_owned(),
|
||||||
|
status_code: response.status(),
|
||||||
|
body,
|
||||||
|
headers: response.headers().clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -208,6 +208,8 @@ pub struct OpenAiAvailableModel {
|
||||||
pub max_output_tokens: Option<u64>,
|
pub max_output_tokens: Option<u64>,
|
||||||
pub max_completion_tokens: Option<u64>,
|
pub max_completion_tokens: Option<u64>,
|
||||||
pub reasoning_effort: Option<OpenAiReasoningEffort>,
|
pub reasoning_effort: Option<OpenAiReasoningEffort>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub capabilities: OpenAiModelCapabilities,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
|
||||||
|
|
@ -226,6 +228,21 @@ pub struct OpenAiCompatibleSettingsContent {
|
||||||
pub available_models: Vec<OpenAiCompatibleAvailableModel>,
|
pub available_models: Vec<OpenAiCompatibleAvailableModel>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[with_fallible_options]
|
||||||
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
|
||||||
|
pub struct OpenAiModelCapabilities {
|
||||||
|
#[serde(default = "default_true")]
|
||||||
|
pub chat_completions: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for OpenAiModelCapabilities {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
chat_completions: default_true(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[with_fallible_options]
|
#[with_fallible_options]
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
|
||||||
pub struct OpenAiCompatibleAvailableModel {
|
pub struct OpenAiCompatibleAvailableModel {
|
||||||
|
|
@ -245,6 +262,8 @@ pub struct OpenAiCompatibleModelCapabilities {
|
||||||
pub images: bool,
|
pub images: bool,
|
||||||
pub parallel_tool_calls: bool,
|
pub parallel_tool_calls: bool,
|
||||||
pub prompt_cache_key: bool,
|
pub prompt_cache_key: bool,
|
||||||
|
#[serde(default = "default_true")]
|
||||||
|
pub chat_completions: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for OpenAiCompatibleModelCapabilities {
|
impl Default for OpenAiCompatibleModelCapabilities {
|
||||||
|
|
@ -254,6 +273,7 @@ impl Default for OpenAiCompatibleModelCapabilities {
|
||||||
images: false,
|
images: false,
|
||||||
parallel_tool_calls: false,
|
parallel_tool_calls: false,
|
||||||
prompt_cache_key: false,
|
prompt_cache_key: false,
|
||||||
|
chat_completions: default_true(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -469,6 +469,14 @@ To use alternate models, perhaps a preview release, or if you wish to control th
|
||||||
"name": "gpt-4o-2024-08-06",
|
"name": "gpt-4o-2024-08-06",
|
||||||
"display_name": "GPT 4o Summer 2024",
|
"display_name": "GPT 4o Summer 2024",
|
||||||
"max_tokens": 128000
|
"max_tokens": 128000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "gpt-5-codex",
|
||||||
|
"display_name": "GPT-5 Codex",
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"capabilities": {
|
||||||
|
"chat_completions": false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -478,7 +486,10 @@ To use alternate models, perhaps a preview release, or if you wish to control th
|
||||||
|
|
||||||
You must provide the model's context window in the `max_tokens` parameter; this can be found in the [OpenAI model documentation](https://platform.openai.com/docs/models).
|
You must provide the model's context window in the `max_tokens` parameter; this can be found in the [OpenAI model documentation](https://platform.openai.com/docs/models).
|
||||||
|
|
||||||
OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
|
OpenAI `o1` and `o`-class models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
|
||||||
|
|
||||||
|
If a model does not support the `/chat/completions` endpoint (for example `gpt-5-codex`), disable it by setting `capabilities.chat_completions` to `false`. Zed will use the Responses endpoint instead.
|
||||||
|
|
||||||
Custom models will be listed in the model dropdown in the Agent Panel.
|
Custom models will be listed in the model dropdown in the Agent Panel.
|
||||||
|
|
||||||
### OpenAI API Compatible {#openai-api-compatible}
|
### OpenAI API Compatible {#openai-api-compatible}
|
||||||
|
|
@ -525,6 +536,9 @@ By default, OpenAI-compatible models inherit the following capabilities:
|
||||||
- `images`: false (does not support image inputs)
|
- `images`: false (does not support image inputs)
|
||||||
- `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
|
- `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
|
||||||
- `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
|
- `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
|
||||||
|
- `chat_completions`: true (calls the `/chat/completions` endpoint)
|
||||||
|
|
||||||
|
If a provider exposes models that only work with the Responses API, set `chat_completions` to `false` for those entries. Zed uses the Responses endpoint for these models.
|
||||||
|
|
||||||
Note that LLM API keys aren't stored in your settings file.
|
Note that LLM API keys aren't stored in your settings file.
|
||||||
So, ensure you have it set in your environment variables (`<PROVIDER_NAME>_API_KEY=<your api key>`) so your settings can pick it up. In the example above, it would be `TOGETHER_AI_API_KEY=<your api key>`.
|
So, ensure you have it set in your environment variables (`<PROVIDER_NAME>_API_KEY=<your api key>`) so your settings can pick it up. In the example above, it would be `TOGETHER_AI_API_KEY=<your api key>`.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue