mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
open_ai: Responses API improvements (#56476)
Release Notes: - Removed deprecated OpenAI models - Added support for gpt-5.4-nano/mini models for OpenAI provider - Improved output quality when using OpenAI models --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de> Co-authored-by: Smit Barmase <heysmitbarmase@gmail.com> Co-authored-by: Gaauwe Rombouts <mail@grombouts.nl>
This commit is contained in:
parent
6f1409b31c
commit
78c889c21d
10 changed files with 1715 additions and 200 deletions
|
|
@ -3123,6 +3123,57 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
|
|||
});
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_latest_token_usage_counts_cached_input_tokens(cx: &mut TestAppContext) {
|
||||
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
|
||||
let fake_model = model.as_fake();
|
||||
|
||||
let message_1_id = UserMessageId::new();
|
||||
thread
|
||||
.update(cx, |thread, cx| {
|
||||
thread.send(message_1_id, ["Message 1"], cx)
|
||||
})
|
||||
.unwrap();
|
||||
cx.run_until_parked();
|
||||
|
||||
fake_model.send_last_completion_stream_text_chunk("Response 1");
|
||||
fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
|
||||
language_model::TokenUsage {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
cache_creation_input_tokens: 25,
|
||||
cache_read_input_tokens: 75,
|
||||
},
|
||||
));
|
||||
fake_model.end_last_completion_stream();
|
||||
cx.run_until_parked();
|
||||
|
||||
thread.read_with(cx, |thread, _| {
|
||||
assert_eq!(
|
||||
thread.latest_token_usage(),
|
||||
Some(acp_thread::TokenUsage {
|
||||
used_tokens: 250,
|
||||
max_tokens: 1_000_000,
|
||||
max_output_tokens: None,
|
||||
input_tokens: 200,
|
||||
output_tokens: 50,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
let message_2_id = UserMessageId::new();
|
||||
thread
|
||||
.update(cx, |thread, cx| {
|
||||
thread.send(message_2_id.clone(), ["Message 2"], cx)
|
||||
})
|
||||
.unwrap();
|
||||
cx.run_until_parked();
|
||||
|
||||
thread.read_with(cx, |thread, _| {
|
||||
assert_eq!(thread.tokens_before_message(&message_2_id), Some(200));
|
||||
});
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_truncate_second_message(cx: &mut TestAppContext) {
|
||||
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
|
||||
|
|
|
|||
|
|
@ -1750,11 +1750,13 @@ impl Thread {
|
|||
pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
|
||||
let usage = self.latest_request_token_usage()?;
|
||||
let model = self.model.clone()?;
|
||||
let input_tokens = total_input_tokens(usage);
|
||||
|
||||
Some(acp_thread::TokenUsage {
|
||||
max_tokens: model.max_token_count(),
|
||||
max_output_tokens: model.max_output_tokens(),
|
||||
used_tokens: usage.total_tokens(),
|
||||
input_tokens: usage.input_tokens,
|
||||
input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
})
|
||||
}
|
||||
|
|
@ -1773,7 +1775,7 @@ impl Thread {
|
|||
if &user_msg.id == target_id {
|
||||
let prev_id = previous_user_message_id?;
|
||||
let usage = self.request_token_usage.get(prev_id)?;
|
||||
return Some(usage.input_tokens);
|
||||
return Some(total_input_tokens(*usage));
|
||||
}
|
||||
previous_user_message_id = Some(&user_msg.id);
|
||||
}
|
||||
|
|
@ -3224,6 +3226,13 @@ impl Thread {
|
|||
}
|
||||
}
|
||||
|
||||
fn total_input_tokens(usage: language_model::TokenUsage) -> u64 {
|
||||
usage
|
||||
.input_tokens
|
||||
.saturating_add(usage.cache_creation_input_tokens)
|
||||
.saturating_add(usage.cache_read_input_tokens)
|
||||
}
|
||||
|
||||
struct RunningTurn {
|
||||
/// Holds the task that handles agent interaction until the end of the turn.
|
||||
/// Survives across multiple requests as the model performs tool calls and
|
||||
|
|
|
|||
|
|
@ -468,6 +468,7 @@ pub enum ModelMode {
|
|||
#[serde(rename_all = "lowercase")]
|
||||
#[strum(serialize_all = "lowercase")]
|
||||
pub enum ReasoningEffort {
|
||||
None,
|
||||
Minimal,
|
||||
Low,
|
||||
Medium,
|
||||
|
|
|
|||
|
|
@ -217,6 +217,107 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
|
|||
}
|
||||
}
|
||||
|
||||
fn default_thinking_reasoning_effort(model: &open_ai::Model) -> Option<open_ai::ReasoningEffort> {
|
||||
use open_ai::ReasoningEffort;
|
||||
|
||||
model
|
||||
.reasoning_effort()
|
||||
.filter(|effort| *effort != ReasoningEffort::None)
|
||||
.or_else(|| {
|
||||
let supported_efforts = model.supported_reasoning_efforts();
|
||||
if supported_efforts.contains(&ReasoningEffort::Medium) {
|
||||
Some(ReasoningEffort::Medium)
|
||||
} else {
|
||||
supported_efforts
|
||||
.iter()
|
||||
.copied()
|
||||
.find(|effort| *effort != ReasoningEffort::None)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn supports_selectable_thinking_effort(model: &open_ai::Model) -> bool {
|
||||
model.uses_responses_api()
|
||||
&& model
|
||||
.supported_reasoning_efforts()
|
||||
.iter()
|
||||
.any(|effort| *effort != open_ai::ReasoningEffort::None)
|
||||
}
|
||||
|
||||
fn supported_thinking_effort_levels(model: &open_ai::Model) -> Vec<LanguageModelEffortLevel> {
|
||||
if !supports_selectable_thinking_effort(model) {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let default_effort = default_thinking_reasoning_effort(model);
|
||||
model
|
||||
.supported_reasoning_efforts()
|
||||
.iter()
|
||||
.copied()
|
||||
.filter_map(|effort| {
|
||||
let (name, value) = match effort {
|
||||
open_ai::ReasoningEffort::None => return None,
|
||||
open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"),
|
||||
open_ai::ReasoningEffort::Low => ("Low", "low"),
|
||||
open_ai::ReasoningEffort::Medium => ("Medium", "medium"),
|
||||
open_ai::ReasoningEffort::High => ("High", "high"),
|
||||
open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"),
|
||||
};
|
||||
|
||||
Some(LanguageModelEffortLevel {
|
||||
name: name.into(),
|
||||
value: value.into(),
|
||||
is_default: Some(effort) == default_effort,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn supported_thinking_effort_levels_hide_none() {
|
||||
let effort_levels = supported_thinking_effort_levels(&open_ai::Model::FivePointTwo);
|
||||
let values = effort_levels
|
||||
.iter()
|
||||
.map(|level| level.value.as_ref())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(values, ["low", "medium", "high", "xhigh"]);
|
||||
assert_eq!(
|
||||
effort_levels
|
||||
.iter()
|
||||
.find(|level| level.is_default)
|
||||
.map(|level| level.value.as_ref()),
|
||||
Some("medium")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn models_supporting_only_none_have_no_selectable_thinking_effort() {
|
||||
let model = open_ai::Model::Custom {
|
||||
name: "custom-model".to_string(),
|
||||
display_name: None,
|
||||
max_tokens: 128_000,
|
||||
max_output_tokens: None,
|
||||
max_completion_tokens: None,
|
||||
reasoning_effort: Some(open_ai::ReasoningEffort::None),
|
||||
supports_chat_completions: false,
|
||||
supports_images: true,
|
||||
};
|
||||
|
||||
assert!(!supports_selectable_thinking_effort(&model));
|
||||
assert!(supported_thinking_effort_levels(&model).is_empty());
|
||||
assert!(
|
||||
model
|
||||
.supported_reasoning_efforts()
|
||||
.contains(&open_ai::ReasoningEffort::None)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpenAiLanguageModel {
|
||||
id: LanguageModelId,
|
||||
model: open_ai::Model,
|
||||
|
|
@ -316,22 +417,20 @@ impl LanguageModel for OpenAiLanguageModel {
|
|||
use open_ai::Model;
|
||||
match &self.model {
|
||||
Model::FourOmniMini
|
||||
| Model::FourPointOneNano
|
||||
| Model::Five
|
||||
| Model::FiveCodex
|
||||
| Model::FiveMini
|
||||
| Model::FiveNano
|
||||
| Model::FivePointOne
|
||||
| Model::FivePointTwo
|
||||
| Model::FivePointTwoCodex
|
||||
| Model::FivePointThreeCodex
|
||||
| Model::FivePointFour
|
||||
| Model::FivePointFourMini
|
||||
| Model::FivePointFourNano
|
||||
| Model::FivePointFourPro
|
||||
| Model::FivePointFive
|
||||
| Model::FivePointFivePro
|
||||
| Model::O1
|
||||
| Model::O3 => true,
|
||||
Model::ThreePointFiveTurbo | Model::Four | Model::FourTurbo | Model::O3Mini => false,
|
||||
Model::Four => false,
|
||||
Model::Custom {
|
||||
supports_images, ..
|
||||
} => *supports_images,
|
||||
|
|
@ -351,34 +450,11 @@ impl LanguageModel for OpenAiLanguageModel {
|
|||
}
|
||||
|
||||
fn supports_thinking(&self) -> bool {
|
||||
self.model.uses_responses_api() && self.model.reasoning_effort().is_some()
|
||||
supports_selectable_thinking_effort(&self.model)
|
||||
}
|
||||
|
||||
fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
|
||||
if !self.supports_thinking() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let default_effort = self.model.reasoning_effort();
|
||||
self.model
|
||||
.supported_reasoning_efforts()
|
||||
.iter()
|
||||
.map(|effort| {
|
||||
let (name, value) = match effort {
|
||||
open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"),
|
||||
open_ai::ReasoningEffort::Low => ("Low", "low"),
|
||||
open_ai::ReasoningEffort::Medium => ("Medium", "medium"),
|
||||
open_ai::ReasoningEffort::High => ("High", "high"),
|
||||
open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"),
|
||||
};
|
||||
|
||||
LanguageModelEffortLevel {
|
||||
name: name.into(),
|
||||
value: value.into(),
|
||||
is_default: Some(*effort) == default_effort,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
supported_thinking_effort_levels(&self.model)
|
||||
}
|
||||
|
||||
fn supports_split_token_display(&self) -> bool {
|
||||
|
|
@ -418,7 +494,10 @@ impl LanguageModel for OpenAiLanguageModel {
|
|||
self.model.supports_parallel_tool_calls(),
|
||||
self.model.supports_prompt_cache_key(),
|
||||
self.max_output_tokens(),
|
||||
self.model.reasoning_effort(),
|
||||
default_thinking_reasoning_effort(&self.model),
|
||||
self.model
|
||||
.supported_reasoning_efforts()
|
||||
.contains(&open_ai::ReasoningEffort::None),
|
||||
);
|
||||
let completions = self.stream_response(request, cx);
|
||||
async move {
|
||||
|
|
|
|||
|
|
@ -397,7 +397,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
|
|||
self.model.capabilities.parallel_tool_calls,
|
||||
self.model.capabilities.prompt_cache_key,
|
||||
self.max_output_tokens(),
|
||||
self.model.reasoning_effort,
|
||||
self.model
|
||||
.reasoning_effort
|
||||
.filter(|effort| *effort != open_ai::ReasoningEffort::None),
|
||||
self.model.reasoning_effort == Some(open_ai::ReasoningEffort::None),
|
||||
);
|
||||
let completions = self.stream_response(request, cx);
|
||||
async move {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ use crate::provider::open_ai::{
|
|||
|
||||
fn normalize_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
|
||||
match effort.trim().to_ascii_lowercase().as_str() {
|
||||
"none" => Some(ReasoningEffort::None),
|
||||
"minimal" => Some(ReasoningEffort::Minimal),
|
||||
"low" => Some(ReasoningEffort::Low),
|
||||
"medium" => Some(ReasoningEffort::Medium),
|
||||
|
|
@ -43,6 +44,7 @@ fn normalize_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
|
|||
|
||||
fn reasoning_effort_display(effort: ReasoningEffort) -> (&'static str, &'static str) {
|
||||
match effort {
|
||||
ReasoningEffort::None => ("None", "none"),
|
||||
ReasoningEffort::Minimal => ("Minimal", "minimal"),
|
||||
ReasoningEffort::Low => ("Low", "low"),
|
||||
ReasoningEffort::Medium => ("Medium", "medium"),
|
||||
|
|
@ -549,13 +551,17 @@ impl LanguageModel for OpenCodeLanguageModel {
|
|||
fn supports_thinking(&self) -> bool {
|
||||
self.model
|
||||
.supported_reasoning_effort_levels()
|
||||
.is_some_and(|levels| !levels.is_empty())
|
||||
.is_some_and(|levels| levels.iter().any(|effort| *effort != ReasoningEffort::None))
|
||||
}
|
||||
|
||||
fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
|
||||
self.model
|
||||
.supported_reasoning_effort_levels()
|
||||
.map(|levels| {
|
||||
let levels = levels
|
||||
.into_iter()
|
||||
.filter(|effort| *effort != ReasoningEffort::None)
|
||||
.collect::<Vec<_>>();
|
||||
if levels.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -675,21 +681,18 @@ impl LanguageModel for OpenCodeLanguageModel {
|
|||
.boxed()
|
||||
}
|
||||
ApiProtocol::OpenAiResponses => {
|
||||
let reasoning_effort = if request.thinking_allowed {
|
||||
request
|
||||
.thinking_effort
|
||||
.as_deref()
|
||||
.and_then(normalize_reasoning_effort)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let supports_none_reasoning_effort = self
|
||||
.model
|
||||
.supported_reasoning_effort_levels()
|
||||
.is_some_and(|levels| levels.contains(&ReasoningEffort::None));
|
||||
let response_request = into_open_ai_response(
|
||||
request,
|
||||
self.model.id(),
|
||||
false,
|
||||
false,
|
||||
self.model.max_output_tokens(),
|
||||
reasoning_effort,
|
||||
None,
|
||||
supports_none_reasoning_effort,
|
||||
);
|
||||
let stream = self.stream_openai_response(response_request, http_client, cx);
|
||||
async move {
|
||||
|
|
|
|||
|
|
@ -460,7 +460,13 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
|
|||
let effort = request
|
||||
.thinking_effort
|
||||
.as_ref()
|
||||
.and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok());
|
||||
.and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok())
|
||||
.filter(|effort| *effort != open_ai::ReasoningEffort::None);
|
||||
let supports_none_reasoning_effort =
|
||||
self.model.supported_effort_levels.iter().any(|effort| {
|
||||
open_ai::ReasoningEffort::from_str(&effort.value)
|
||||
.is_ok_and(|effort| effort == open_ai::ReasoningEffort::None)
|
||||
});
|
||||
|
||||
let mut request = into_open_ai_response(
|
||||
request,
|
||||
|
|
@ -469,6 +475,7 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
|
|||
true,
|
||||
None,
|
||||
None,
|
||||
supports_none_reasoning_effort,
|
||||
);
|
||||
|
||||
if enable_thinking && let Some(effort) = effort {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -58,26 +58,14 @@ impl From<Role> for String {
|
|||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
|
||||
pub enum Model {
|
||||
#[serde(rename = "gpt-3.5-turbo")]
|
||||
ThreePointFiveTurbo,
|
||||
#[serde(rename = "gpt-4")]
|
||||
Four,
|
||||
#[serde(rename = "gpt-4-turbo")]
|
||||
FourTurbo,
|
||||
#[serde(rename = "gpt-4o-mini")]
|
||||
FourOmniMini,
|
||||
#[serde(rename = "gpt-4.1-nano")]
|
||||
FourPointOneNano,
|
||||
#[serde(rename = "o1")]
|
||||
O1,
|
||||
#[serde(rename = "o3-mini")]
|
||||
O3Mini,
|
||||
#[serde(rename = "o3")]
|
||||
O3,
|
||||
#[serde(rename = "gpt-5")]
|
||||
Five,
|
||||
#[serde(rename = "gpt-5-codex")]
|
||||
FiveCodex,
|
||||
#[serde(rename = "gpt-5-mini")]
|
||||
#[default]
|
||||
FiveMini,
|
||||
|
|
@ -87,10 +75,12 @@ pub enum Model {
|
|||
FivePointOne,
|
||||
#[serde(rename = "gpt-5.2")]
|
||||
FivePointTwo,
|
||||
#[serde(rename = "gpt-5.2-codex")]
|
||||
FivePointTwoCodex,
|
||||
#[serde(rename = "gpt-5.3-codex")]
|
||||
FivePointThreeCodex,
|
||||
#[serde(rename = "gpt-5.4-nano")]
|
||||
FivePointFourNano,
|
||||
#[serde(rename = "gpt-5.4-mini")]
|
||||
FivePointFourMini,
|
||||
#[serde(rename = "gpt-5.4")]
|
||||
FivePointFour,
|
||||
#[serde(rename = "gpt-5.4-pro")]
|
||||
|
|
@ -130,22 +120,17 @@ impl Model {
|
|||
|
||||
pub fn from_id(id: &str) -> Result<Self> {
|
||||
match id {
|
||||
"gpt-3.5-turbo" => Ok(Self::ThreePointFiveTurbo),
|
||||
"gpt-4" => Ok(Self::Four),
|
||||
"gpt-4-turbo-preview" => Ok(Self::FourTurbo),
|
||||
"gpt-4o-mini" => Ok(Self::FourOmniMini),
|
||||
"gpt-4.1-nano" => Ok(Self::FourPointOneNano),
|
||||
"o1" => Ok(Self::O1),
|
||||
"o3-mini" => Ok(Self::O3Mini),
|
||||
"o3" => Ok(Self::O3),
|
||||
"gpt-5" => Ok(Self::Five),
|
||||
"gpt-5-codex" => Ok(Self::FiveCodex),
|
||||
"gpt-5-mini" => Ok(Self::FiveMini),
|
||||
"gpt-5-nano" => Ok(Self::FiveNano),
|
||||
"gpt-5.1" => Ok(Self::FivePointOne),
|
||||
"gpt-5.2" => Ok(Self::FivePointTwo),
|
||||
"gpt-5.2-codex" => Ok(Self::FivePointTwoCodex),
|
||||
"gpt-5.3-codex" => Ok(Self::FivePointThreeCodex),
|
||||
"gpt-5.4-nano" => Ok(Self::FivePointFourNano),
|
||||
"gpt-5.4-mini" => Ok(Self::FivePointFourMini),
|
||||
"gpt-5.4" => Ok(Self::FivePointFour),
|
||||
"gpt-5.4-pro" => Ok(Self::FivePointFourPro),
|
||||
"gpt-5.5" => Ok(Self::FivePointFive),
|
||||
|
|
@ -156,22 +141,17 @@ impl Model {
|
|||
|
||||
pub fn id(&self) -> &str {
|
||||
match self {
|
||||
Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
|
||||
Self::Four => "gpt-4",
|
||||
Self::FourTurbo => "gpt-4-turbo",
|
||||
Self::FourOmniMini => "gpt-4o-mini",
|
||||
Self::FourPointOneNano => "gpt-4.1-nano",
|
||||
Self::O1 => "o1",
|
||||
Self::O3Mini => "o3-mini",
|
||||
Self::O3 => "o3",
|
||||
Self::Five => "gpt-5",
|
||||
Self::FiveCodex => "gpt-5-codex",
|
||||
Self::FiveMini => "gpt-5-mini",
|
||||
Self::FiveNano => "gpt-5-nano",
|
||||
Self::FivePointOne => "gpt-5.1",
|
||||
Self::FivePointTwo => "gpt-5.2",
|
||||
Self::FivePointTwoCodex => "gpt-5.2-codex",
|
||||
Self::FivePointThreeCodex => "gpt-5.3-codex",
|
||||
Self::FivePointFourNano => "gpt-5.4-nano",
|
||||
Self::FivePointFourMini => "gpt-5.4-mini",
|
||||
Self::FivePointFour => "gpt-5.4",
|
||||
Self::FivePointFourPro => "gpt-5.4-pro",
|
||||
Self::FivePointFive => "gpt-5.5",
|
||||
|
|
@ -182,22 +162,17 @@ impl Model {
|
|||
|
||||
pub fn display_name(&self) -> &str {
|
||||
match self {
|
||||
Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
|
||||
Self::Four => "gpt-4",
|
||||
Self::FourTurbo => "gpt-4-turbo",
|
||||
Self::FourOmniMini => "gpt-4o-mini",
|
||||
Self::FourPointOneNano => "gpt-4.1-nano",
|
||||
Self::O1 => "o1",
|
||||
Self::O3Mini => "o3-mini",
|
||||
Self::O3 => "o3",
|
||||
Self::Five => "gpt-5",
|
||||
Self::FiveCodex => "gpt-5-codex",
|
||||
Self::FiveMini => "gpt-5-mini",
|
||||
Self::FiveNano => "gpt-5-nano",
|
||||
Self::FivePointOne => "gpt-5.1",
|
||||
Self::FivePointTwo => "gpt-5.2",
|
||||
Self::FivePointTwoCodex => "gpt-5.2-codex",
|
||||
Self::FivePointThreeCodex => "gpt-5.3-codex",
|
||||
Self::FivePointFourNano => "gpt-5.4-nano",
|
||||
Self::FivePointFourMini => "gpt-5.4-mini",
|
||||
Self::FivePointFour => "gpt-5.4",
|
||||
Self::FivePointFourPro => "gpt-5.4-pro",
|
||||
Self::FivePointFive => "gpt-5.5",
|
||||
|
|
@ -208,22 +183,17 @@ impl Model {
|
|||
|
||||
pub fn max_token_count(&self) -> u64 {
|
||||
match self {
|
||||
Self::ThreePointFiveTurbo => 16_385,
|
||||
Self::Four => 8_192,
|
||||
Self::FourTurbo => 128_000,
|
||||
Self::FourOmniMini => 128_000,
|
||||
Self::FourPointOneNano => 1_047_576,
|
||||
Self::O1 => 200_000,
|
||||
Self::O3Mini => 200_000,
|
||||
Self::O3 => 200_000,
|
||||
Self::Five => 272_000,
|
||||
Self::FiveCodex => 272_000,
|
||||
Self::FiveMini => 400_000,
|
||||
Self::FiveNano => 400_000,
|
||||
Self::FivePointOne => 400_000,
|
||||
Self::FivePointTwo => 400_000,
|
||||
Self::FivePointTwoCodex => 400_000,
|
||||
Self::FivePointThreeCodex => 400_000,
|
||||
Self::FivePointFourNano => 400_000,
|
||||
Self::FivePointFourMini => 400_000,
|
||||
Self::FivePointFour => 1_050_000,
|
||||
Self::FivePointFourPro => 1_050_000,
|
||||
Self::FivePointFive => 1_050_000,
|
||||
|
|
@ -237,22 +207,17 @@ impl Model {
|
|||
Self::Custom {
|
||||
max_output_tokens, ..
|
||||
} => *max_output_tokens,
|
||||
Self::ThreePointFiveTurbo => Some(4_096),
|
||||
Self::Four => Some(8_192),
|
||||
Self::FourTurbo => Some(4_096),
|
||||
Self::FourOmniMini => Some(16_384),
|
||||
Self::FourPointOneNano => Some(32_768),
|
||||
Self::O1 => Some(100_000),
|
||||
Self::O3Mini => Some(100_000),
|
||||
Self::O3 => Some(100_000),
|
||||
Self::Five => Some(128_000),
|
||||
Self::FiveCodex => Some(128_000),
|
||||
Self::FiveMini => Some(128_000),
|
||||
Self::FiveNano => Some(128_000),
|
||||
Self::FivePointOne => Some(128_000),
|
||||
Self::FivePointTwo => Some(128_000),
|
||||
Self::FivePointTwoCodex => Some(128_000),
|
||||
Self::FivePointThreeCodex => Some(128_000),
|
||||
Self::FivePointFourNano => Some(128_000),
|
||||
Self::FivePointFourMini => Some(128_000),
|
||||
Self::FivePointFour => Some(128_000),
|
||||
Self::FivePointFourPro => Some(128_000),
|
||||
Self::FivePointFive => Some(128_000),
|
||||
|
|
@ -265,18 +230,16 @@ impl Model {
|
|||
Self::Custom {
|
||||
reasoning_effort, ..
|
||||
} => reasoning_effort.to_owned(),
|
||||
Self::O1
|
||||
| Self::O3
|
||||
| Self::O3Mini
|
||||
Self::FivePointOne
|
||||
| Self::FivePointTwo
|
||||
| Self::FivePointFour
|
||||
| Self::FivePointFourMini
|
||||
| Self::FivePointFourNano => Some(ReasoningEffort::None),
|
||||
Self::O3
|
||||
| Self::Five
|
||||
| Self::FiveCodex
|
||||
| Self::FiveMini
|
||||
| Self::FiveNano
|
||||
| Self::FivePointOne
|
||||
| Self::FivePointTwo
|
||||
| Self::FivePointTwoCodex
|
||||
| Self::FivePointThreeCodex
|
||||
| Self::FivePointFour
|
||||
| Self::FivePointFourPro
|
||||
| Self::FivePointFive
|
||||
| Self::FivePointFivePro => Some(ReasoningEffort::Medium),
|
||||
|
|
@ -290,13 +253,20 @@ impl Model {
|
|||
reasoning_effort: Some(effort),
|
||||
..
|
||||
} => match effort {
|
||||
ReasoningEffort::None => &[ReasoningEffort::None],
|
||||
ReasoningEffort::Minimal => &[ReasoningEffort::Minimal],
|
||||
ReasoningEffort::Low => &[ReasoningEffort::Low],
|
||||
ReasoningEffort::Medium => &[ReasoningEffort::Medium],
|
||||
ReasoningEffort::High => &[ReasoningEffort::High],
|
||||
ReasoningEffort::XHigh => &[ReasoningEffort::XHigh],
|
||||
},
|
||||
Self::O1 | Self::O3 | Self::O3Mini | Self::FivePointOne => &[
|
||||
Self::O3 => &[
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
],
|
||||
Self::FivePointOne => &[
|
||||
ReasoningEffort::None,
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
|
|
@ -307,10 +277,13 @@ impl Model {
|
|||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
],
|
||||
Self::FiveCodex
|
||||
| Self::FivePointTwoCodex
|
||||
| Self::FivePointThreeCodex
|
||||
| Self::FivePointFourPro => &[
|
||||
Self::FivePointFourPro | Self::FivePointFivePro => &[
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
ReasoningEffort::XHigh,
|
||||
],
|
||||
Self::FivePointThreeCodex => &[
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
ReasoningEffort::XHigh,
|
||||
|
|
@ -318,7 +291,9 @@ impl Model {
|
|||
Self::FivePointTwo
|
||||
| Self::FivePointFour
|
||||
| Self::FivePointFive
|
||||
| Self::FivePointFivePro => &[
|
||||
| Self::FivePointFourMini
|
||||
| Self::FivePointFourNano => &[
|
||||
ReasoningEffort::None,
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
|
|
@ -343,24 +318,21 @@ impl Model {
|
|||
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
|
||||
pub fn supports_parallel_tool_calls(&self) -> bool {
|
||||
match self {
|
||||
Self::ThreePointFiveTurbo
|
||||
| Self::Four
|
||||
| Self::FourTurbo
|
||||
Self::Four
|
||||
| Self::FourOmniMini
|
||||
| Self::FourPointOneNano
|
||||
| Self::Five
|
||||
| Self::FiveCodex
|
||||
| Self::FiveMini
|
||||
| Self::FivePointOne
|
||||
| Self::FivePointTwo
|
||||
| Self::FivePointTwoCodex
|
||||
| Self::FivePointThreeCodex
|
||||
| Self::FivePointFour
|
||||
| Self::FivePointFourMini
|
||||
| Self::FivePointFourNano
|
||||
| Self::FivePointFourPro
|
||||
| Self::FivePointFive
|
||||
| Self::FivePointFivePro
|
||||
| Self::FiveNano => true,
|
||||
Self::O1 | Self::O3 | Self::O3Mini | Model::Custom { .. } => false,
|
||||
Self::O3 | Model::Custom { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -372,6 +344,81 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Model, ReasoningEffort};
|
||||
|
||||
#[test]
|
||||
fn gpt_5_1_uses_none_reasoning_by_default() {
|
||||
let expected_efforts = [
|
||||
ReasoningEffort::None,
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
Model::FivePointOne.reasoning_effort(),
|
||||
Some(ReasoningEffort::None)
|
||||
);
|
||||
assert_eq!(
|
||||
Model::FivePointOne.supported_reasoning_efforts(),
|
||||
expected_efforts.as_slice()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn newer_frontier_models_support_none_reasoning() {
|
||||
let expected_efforts = [
|
||||
ReasoningEffort::None,
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
ReasoningEffort::XHigh,
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
Model::FivePointTwo.reasoning_effort(),
|
||||
Some(ReasoningEffort::None)
|
||||
);
|
||||
assert_eq!(
|
||||
Model::FivePointTwo.supported_reasoning_efforts(),
|
||||
expected_efforts.as_slice()
|
||||
);
|
||||
assert_eq!(
|
||||
Model::FivePointFour.reasoning_effort(),
|
||||
Some(ReasoningEffort::None)
|
||||
);
|
||||
assert_eq!(
|
||||
Model::FivePointFour.supported_reasoning_efforts(),
|
||||
expected_efforts.as_slice()
|
||||
);
|
||||
assert_eq!(
|
||||
Model::FivePointFive.reasoning_effort(),
|
||||
Some(ReasoningEffort::Medium)
|
||||
);
|
||||
assert_eq!(
|
||||
Model::FivePointFive.supported_reasoning_efforts(),
|
||||
expected_efforts.as_slice()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn newer_codex_models_support_low_reasoning_effort() {
|
||||
let expected_efforts = [
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
ReasoningEffort::XHigh,
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
Model::FivePointThreeCodex.supported_reasoning_efforts(),
|
||||
expected_efforts.as_slice()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StreamOptions {
|
||||
pub include_usage: bool,
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ pub struct Request {
|
|||
pub model: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub input: Vec<ResponseInputItem>,
|
||||
pub store: bool,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub include: Vec<ResponseIncludable>,
|
||||
#[serde(default)]
|
||||
pub stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
|
|
@ -31,18 +34,28 @@ pub struct Request {
|
|||
pub reasoning: Option<ReasoningConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ResponseIncludable {
|
||||
#[serde(rename = "reasoning.encrypted_content")]
|
||||
ReasoningEncryptedContent,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseInputItem {
|
||||
Message(ResponseMessageItem),
|
||||
FunctionCall(ResponseFunctionCallItem),
|
||||
FunctionCallOutput(ResponseFunctionCallOutputItem),
|
||||
Reasoning(ResponseReasoningInputItem),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ResponseMessageItem {
|
||||
pub role: Role,
|
||||
pub content: Vec<ResponseInputContent>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub phase: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
|
@ -58,6 +71,26 @@ pub struct ResponseFunctionCallOutputItem {
|
|||
pub output: ResponseFunctionCallOutputContent,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct ResponseReasoningInputItem {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub summary: Vec<ResponseReasoningSummaryPart>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub content: Vec<Value>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub encrypted_content: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub status: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseReasoningSummaryPart {
|
||||
SummaryText { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum ResponseFunctionCallOutputContent {
|
||||
|
|
@ -111,9 +144,13 @@ pub enum ToolDefinition {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct Error {
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct ResponseError {
|
||||
#[serde(default)]
|
||||
pub code: Option<String>,
|
||||
pub message: String,
|
||||
#[serde(default)]
|
||||
pub param: Option<Value>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
|
|
@ -167,6 +204,24 @@ pub enum StreamEvent {
|
|||
content_index: Option<usize>,
|
||||
text: String,
|
||||
},
|
||||
#[serde(rename = "response.refusal.delta")]
|
||||
RefusalDelta {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
content_index: usize,
|
||||
delta: String,
|
||||
#[serde(default)]
|
||||
sequence_number: Option<u64>,
|
||||
},
|
||||
#[serde(rename = "response.refusal.done")]
|
||||
RefusalDone {
|
||||
item_id: String,
|
||||
output_index: usize,
|
||||
content_index: usize,
|
||||
refusal: String,
|
||||
#[serde(default)]
|
||||
sequence_number: Option<u64>,
|
||||
},
|
||||
#[serde(rename = "response.reasoning_summary_part.added")]
|
||||
ReasoningSummaryPartAdded {
|
||||
item_id: String,
|
||||
|
|
@ -214,9 +269,12 @@ pub enum StreamEvent {
|
|||
#[serde(rename = "response.failed")]
|
||||
Failed { response: ResponseSummary },
|
||||
#[serde(rename = "response.error")]
|
||||
Error { error: Error },
|
||||
Error { error: ResponseError },
|
||||
#[serde(rename = "error")]
|
||||
GenericError { error: Error },
|
||||
GenericError {
|
||||
#[serde(flatten)]
|
||||
error: ResponseError,
|
||||
},
|
||||
#[serde(other)]
|
||||
Unknown,
|
||||
}
|
||||
|
|
@ -228,7 +286,9 @@ pub struct ResponseSummary {
|
|||
#[serde(default)]
|
||||
pub status: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status_details: Option<ResponseStatusDetails>,
|
||||
pub incomplete_details: Option<ResponseIncompleteDetails>,
|
||||
#[serde(default)]
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(default)]
|
||||
pub usage: Option<ResponseUsage>,
|
||||
#[serde(default)]
|
||||
|
|
@ -236,13 +296,9 @@ pub struct ResponseSummary {
|
|||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
pub struct ResponseStatusDetails {
|
||||
pub struct ResponseIncompleteDetails {
|
||||
#[serde(default)]
|
||||
pub reason: Option<String>,
|
||||
#[serde(default)]
|
||||
pub r#type: Option<String>,
|
||||
#[serde(default)]
|
||||
pub error: Option<Value>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
|
|
@ -250,11 +306,27 @@ pub struct ResponseUsage {
|
|||
#[serde(default)]
|
||||
pub input_tokens: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub input_tokens_details: ResponseInputTokensDetails,
|
||||
#[serde(default)]
|
||||
pub output_tokens: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub output_tokens_details: ResponseOutputTokensDetails,
|
||||
#[serde(default)]
|
||||
pub total_tokens: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
pub struct ResponseInputTokensDetails {
|
||||
#[serde(default)]
|
||||
pub cached_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
pub struct ResponseOutputTokensDetails {
|
||||
#[serde(default)]
|
||||
pub reasoning_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseOutputItem {
|
||||
|
|
@ -271,6 +343,12 @@ pub struct ResponseReasoningItem {
|
|||
pub id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub summary: Vec<ReasoningSummaryPart>,
|
||||
#[serde(default)]
|
||||
pub content: Vec<Value>,
|
||||
#[serde(default)]
|
||||
pub encrypted_content: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
|
|
@ -293,6 +371,8 @@ pub struct ResponseOutputMessage {
|
|||
pub role: Option<String>,
|
||||
#[serde(default)]
|
||||
pub status: Option<String>,
|
||||
#[serde(default)]
|
||||
pub phase: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
|
|
@ -441,8 +521,17 @@ pub async fn stream_response(
|
|||
});
|
||||
}
|
||||
|
||||
all_events.push(StreamEvent::Completed {
|
||||
response: response_summary,
|
||||
let status = response_summary.status.clone();
|
||||
all_events.push(match status.as_deref() {
|
||||
Some("incomplete") => StreamEvent::Incomplete {
|
||||
response: response_summary,
|
||||
},
|
||||
Some("failed") => StreamEvent::Failed {
|
||||
response: response_summary,
|
||||
},
|
||||
_ => StreamEvent::Completed {
|
||||
response: response_summary,
|
||||
},
|
||||
});
|
||||
|
||||
Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())
|
||||
|
|
|
|||
Loading…
Reference in a new issue