open_ai: Responses API improvements (#56476)

Release Notes:

- Removed deprecated OpenAI models
- Added support for gpt-5.4-nano/mini models for OpenAI provider
- Improved output quality when using OpenAI models

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
Co-authored-by: Smit Barmase <heysmitbarmase@gmail.com>
Co-authored-by: Gaauwe Rombouts <mail@grombouts.nl>
This commit is contained in:
Ben Brandt 2026-05-12 16:47:16 +02:00 committed by GitHub
parent 6f1409b31c
commit 78c889c21d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1715 additions and 200 deletions

View file

@ -3123,6 +3123,57 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
});
}
#[gpui::test]
async fn test_latest_token_usage_counts_cached_input_tokens(cx: &mut TestAppContext) {
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
let fake_model = model.as_fake();
let message_1_id = UserMessageId::new();
thread
.update(cx, |thread, cx| {
thread.send(message_1_id, ["Message 1"], cx)
})
.unwrap();
cx.run_until_parked();
fake_model.send_last_completion_stream_text_chunk("Response 1");
fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
language_model::TokenUsage {
input_tokens: 100,
output_tokens: 50,
cache_creation_input_tokens: 25,
cache_read_input_tokens: 75,
},
));
fake_model.end_last_completion_stream();
cx.run_until_parked();
thread.read_with(cx, |thread, _| {
assert_eq!(
thread.latest_token_usage(),
Some(acp_thread::TokenUsage {
used_tokens: 250,
max_tokens: 1_000_000,
max_output_tokens: None,
input_tokens: 200,
output_tokens: 50,
})
);
});
let message_2_id = UserMessageId::new();
thread
.update(cx, |thread, cx| {
thread.send(message_2_id.clone(), ["Message 2"], cx)
})
.unwrap();
cx.run_until_parked();
thread.read_with(cx, |thread, _| {
assert_eq!(thread.tokens_before_message(&message_2_id), Some(200));
});
}
#[gpui::test]
async fn test_truncate_second_message(cx: &mut TestAppContext) {
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;

View file

@ -1750,11 +1750,13 @@ impl Thread {
pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
let usage = self.latest_request_token_usage()?;
let model = self.model.clone()?;
let input_tokens = total_input_tokens(usage);
Some(acp_thread::TokenUsage {
max_tokens: model.max_token_count(),
max_output_tokens: model.max_output_tokens(),
used_tokens: usage.total_tokens(),
input_tokens: usage.input_tokens,
input_tokens,
output_tokens: usage.output_tokens,
})
}
@ -1773,7 +1775,7 @@ impl Thread {
if &user_msg.id == target_id {
let prev_id = previous_user_message_id?;
let usage = self.request_token_usage.get(prev_id)?;
return Some(usage.input_tokens);
return Some(total_input_tokens(*usage));
}
previous_user_message_id = Some(&user_msg.id);
}
@ -3224,6 +3226,13 @@ impl Thread {
}
}
fn total_input_tokens(usage: language_model::TokenUsage) -> u64 {
usage
.input_tokens
.saturating_add(usage.cache_creation_input_tokens)
.saturating_add(usage.cache_read_input_tokens)
}
struct RunningTurn {
/// Holds the task that handles agent interaction until the end of the turn.
/// Survives across multiple requests as the model performs tool calls and

View file

@ -468,6 +468,7 @@ pub enum ModelMode {
#[serde(rename_all = "lowercase")]
#[strum(serialize_all = "lowercase")]
pub enum ReasoningEffort {
None,
Minimal,
Low,
Medium,

View file

@ -217,6 +217,107 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
}
}
fn default_thinking_reasoning_effort(model: &open_ai::Model) -> Option<open_ai::ReasoningEffort> {
use open_ai::ReasoningEffort;
model
.reasoning_effort()
.filter(|effort| *effort != ReasoningEffort::None)
.or_else(|| {
let supported_efforts = model.supported_reasoning_efforts();
if supported_efforts.contains(&ReasoningEffort::Medium) {
Some(ReasoningEffort::Medium)
} else {
supported_efforts
.iter()
.copied()
.find(|effort| *effort != ReasoningEffort::None)
}
})
}
fn supports_selectable_thinking_effort(model: &open_ai::Model) -> bool {
model.uses_responses_api()
&& model
.supported_reasoning_efforts()
.iter()
.any(|effort| *effort != open_ai::ReasoningEffort::None)
}
fn supported_thinking_effort_levels(model: &open_ai::Model) -> Vec<LanguageModelEffortLevel> {
if !supports_selectable_thinking_effort(model) {
return Vec::new();
}
let default_effort = default_thinking_reasoning_effort(model);
model
.supported_reasoning_efforts()
.iter()
.copied()
.filter_map(|effort| {
let (name, value) = match effort {
open_ai::ReasoningEffort::None => return None,
open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"),
open_ai::ReasoningEffort::Low => ("Low", "low"),
open_ai::ReasoningEffort::Medium => ("Medium", "medium"),
open_ai::ReasoningEffort::High => ("High", "high"),
open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"),
};
Some(LanguageModelEffortLevel {
name: name.into(),
value: value.into(),
is_default: Some(effort) == default_effort,
})
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn supported_thinking_effort_levels_hide_none() {
let effort_levels = supported_thinking_effort_levels(&open_ai::Model::FivePointTwo);
let values = effort_levels
.iter()
.map(|level| level.value.as_ref())
.collect::<Vec<_>>();
assert_eq!(values, ["low", "medium", "high", "xhigh"]);
assert_eq!(
effort_levels
.iter()
.find(|level| level.is_default)
.map(|level| level.value.as_ref()),
Some("medium")
);
}
#[test]
fn models_supporting_only_none_have_no_selectable_thinking_effort() {
let model = open_ai::Model::Custom {
name: "custom-model".to_string(),
display_name: None,
max_tokens: 128_000,
max_output_tokens: None,
max_completion_tokens: None,
reasoning_effort: Some(open_ai::ReasoningEffort::None),
supports_chat_completions: false,
supports_images: true,
};
assert!(!supports_selectable_thinking_effort(&model));
assert!(supported_thinking_effort_levels(&model).is_empty());
assert!(
model
.supported_reasoning_efforts()
.contains(&open_ai::ReasoningEffort::None)
);
}
}
pub struct OpenAiLanguageModel {
id: LanguageModelId,
model: open_ai::Model,
@ -316,22 +417,20 @@ impl LanguageModel for OpenAiLanguageModel {
use open_ai::Model;
match &self.model {
Model::FourOmniMini
| Model::FourPointOneNano
| Model::Five
| Model::FiveCodex
| Model::FiveMini
| Model::FiveNano
| Model::FivePointOne
| Model::FivePointTwo
| Model::FivePointTwoCodex
| Model::FivePointThreeCodex
| Model::FivePointFour
| Model::FivePointFourMini
| Model::FivePointFourNano
| Model::FivePointFourPro
| Model::FivePointFive
| Model::FivePointFivePro
| Model::O1
| Model::O3 => true,
Model::ThreePointFiveTurbo | Model::Four | Model::FourTurbo | Model::O3Mini => false,
Model::Four => false,
Model::Custom {
supports_images, ..
} => *supports_images,
@ -351,34 +450,11 @@ impl LanguageModel for OpenAiLanguageModel {
}
fn supports_thinking(&self) -> bool {
self.model.uses_responses_api() && self.model.reasoning_effort().is_some()
supports_selectable_thinking_effort(&self.model)
}
fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
if !self.supports_thinking() {
return Vec::new();
}
let default_effort = self.model.reasoning_effort();
self.model
.supported_reasoning_efforts()
.iter()
.map(|effort| {
let (name, value) = match effort {
open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"),
open_ai::ReasoningEffort::Low => ("Low", "low"),
open_ai::ReasoningEffort::Medium => ("Medium", "medium"),
open_ai::ReasoningEffort::High => ("High", "high"),
open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"),
};
LanguageModelEffortLevel {
name: name.into(),
value: value.into(),
is_default: Some(*effort) == default_effort,
}
})
.collect()
supported_thinking_effort_levels(&self.model)
}
fn supports_split_token_display(&self) -> bool {
@ -418,7 +494,10 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.supports_parallel_tool_calls(),
self.model.supports_prompt_cache_key(),
self.max_output_tokens(),
self.model.reasoning_effort(),
default_thinking_reasoning_effort(&self.model),
self.model
.supported_reasoning_efforts()
.contains(&open_ai::ReasoningEffort::None),
);
let completions = self.stream_response(request, cx);
async move {

View file

@ -397,7 +397,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
self.model.capabilities.parallel_tool_calls,
self.model.capabilities.prompt_cache_key,
self.max_output_tokens(),
self.model.reasoning_effort,
self.model
.reasoning_effort
.filter(|effort| *effort != open_ai::ReasoningEffort::None),
self.model.reasoning_effort == Some(open_ai::ReasoningEffort::None),
);
let completions = self.stream_response(request, cx);
async move {

View file

@ -32,6 +32,7 @@ use crate::provider::open_ai::{
fn normalize_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
match effort.trim().to_ascii_lowercase().as_str() {
"none" => Some(ReasoningEffort::None),
"minimal" => Some(ReasoningEffort::Minimal),
"low" => Some(ReasoningEffort::Low),
"medium" => Some(ReasoningEffort::Medium),
@ -43,6 +44,7 @@ fn normalize_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
fn reasoning_effort_display(effort: ReasoningEffort) -> (&'static str, &'static str) {
match effort {
ReasoningEffort::None => ("None", "none"),
ReasoningEffort::Minimal => ("Minimal", "minimal"),
ReasoningEffort::Low => ("Low", "low"),
ReasoningEffort::Medium => ("Medium", "medium"),
@ -549,13 +551,17 @@ impl LanguageModel for OpenCodeLanguageModel {
fn supports_thinking(&self) -> bool {
self.model
.supported_reasoning_effort_levels()
.is_some_and(|levels| !levels.is_empty())
.is_some_and(|levels| levels.iter().any(|effort| *effort != ReasoningEffort::None))
}
fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
self.model
.supported_reasoning_effort_levels()
.map(|levels| {
let levels = levels
.into_iter()
.filter(|effort| *effort != ReasoningEffort::None)
.collect::<Vec<_>>();
if levels.is_empty() {
return Vec::new();
}
@ -675,21 +681,18 @@ impl LanguageModel for OpenCodeLanguageModel {
.boxed()
}
ApiProtocol::OpenAiResponses => {
let reasoning_effort = if request.thinking_allowed {
request
.thinking_effort
.as_deref()
.and_then(normalize_reasoning_effort)
} else {
None
};
let supports_none_reasoning_effort = self
.model
.supported_reasoning_effort_levels()
.is_some_and(|levels| levels.contains(&ReasoningEffort::None));
let response_request = into_open_ai_response(
request,
self.model.id(),
false,
false,
self.model.max_output_tokens(),
reasoning_effort,
None,
supports_none_reasoning_effort,
);
let stream = self.stream_openai_response(response_request, http_client, cx);
async move {

View file

@ -460,7 +460,13 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
let effort = request
.thinking_effort
.as_ref()
.and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok());
.and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok())
.filter(|effort| *effort != open_ai::ReasoningEffort::None);
let supports_none_reasoning_effort =
self.model.supported_effort_levels.iter().any(|effort| {
open_ai::ReasoningEffort::from_str(&effort.value)
.is_ok_and(|effort| effort == open_ai::ReasoningEffort::None)
});
let mut request = into_open_ai_response(
request,
@ -469,6 +475,7 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
true,
None,
None,
supports_none_reasoning_effort,
);
if enable_thinking && let Some(effort) = effort {

File diff suppressed because it is too large Load diff

View file

@ -58,26 +58,14 @@ impl From<Role> for String {
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
pub enum Model {
#[serde(rename = "gpt-3.5-turbo")]
ThreePointFiveTurbo,
#[serde(rename = "gpt-4")]
Four,
#[serde(rename = "gpt-4-turbo")]
FourTurbo,
#[serde(rename = "gpt-4o-mini")]
FourOmniMini,
#[serde(rename = "gpt-4.1-nano")]
FourPointOneNano,
#[serde(rename = "o1")]
O1,
#[serde(rename = "o3-mini")]
O3Mini,
#[serde(rename = "o3")]
O3,
#[serde(rename = "gpt-5")]
Five,
#[serde(rename = "gpt-5-codex")]
FiveCodex,
#[serde(rename = "gpt-5-mini")]
#[default]
FiveMini,
@ -87,10 +75,12 @@ pub enum Model {
FivePointOne,
#[serde(rename = "gpt-5.2")]
FivePointTwo,
#[serde(rename = "gpt-5.2-codex")]
FivePointTwoCodex,
#[serde(rename = "gpt-5.3-codex")]
FivePointThreeCodex,
#[serde(rename = "gpt-5.4-nano")]
FivePointFourNano,
#[serde(rename = "gpt-5.4-mini")]
FivePointFourMini,
#[serde(rename = "gpt-5.4")]
FivePointFour,
#[serde(rename = "gpt-5.4-pro")]
@ -130,22 +120,17 @@ impl Model {
pub fn from_id(id: &str) -> Result<Self> {
match id {
"gpt-3.5-turbo" => Ok(Self::ThreePointFiveTurbo),
"gpt-4" => Ok(Self::Four),
"gpt-4-turbo-preview" => Ok(Self::FourTurbo),
"gpt-4o-mini" => Ok(Self::FourOmniMini),
"gpt-4.1-nano" => Ok(Self::FourPointOneNano),
"o1" => Ok(Self::O1),
"o3-mini" => Ok(Self::O3Mini),
"o3" => Ok(Self::O3),
"gpt-5" => Ok(Self::Five),
"gpt-5-codex" => Ok(Self::FiveCodex),
"gpt-5-mini" => Ok(Self::FiveMini),
"gpt-5-nano" => Ok(Self::FiveNano),
"gpt-5.1" => Ok(Self::FivePointOne),
"gpt-5.2" => Ok(Self::FivePointTwo),
"gpt-5.2-codex" => Ok(Self::FivePointTwoCodex),
"gpt-5.3-codex" => Ok(Self::FivePointThreeCodex),
"gpt-5.4-nano" => Ok(Self::FivePointFourNano),
"gpt-5.4-mini" => Ok(Self::FivePointFourMini),
"gpt-5.4" => Ok(Self::FivePointFour),
"gpt-5.4-pro" => Ok(Self::FivePointFourPro),
"gpt-5.5" => Ok(Self::FivePointFive),
@ -156,22 +141,17 @@ impl Model {
pub fn id(&self) -> &str {
match self {
Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
Self::Four => "gpt-4",
Self::FourTurbo => "gpt-4-turbo",
Self::FourOmniMini => "gpt-4o-mini",
Self::FourPointOneNano => "gpt-4.1-nano",
Self::O1 => "o1",
Self::O3Mini => "o3-mini",
Self::O3 => "o3",
Self::Five => "gpt-5",
Self::FiveCodex => "gpt-5-codex",
Self::FiveMini => "gpt-5-mini",
Self::FiveNano => "gpt-5-nano",
Self::FivePointOne => "gpt-5.1",
Self::FivePointTwo => "gpt-5.2",
Self::FivePointTwoCodex => "gpt-5.2-codex",
Self::FivePointThreeCodex => "gpt-5.3-codex",
Self::FivePointFourNano => "gpt-5.4-nano",
Self::FivePointFourMini => "gpt-5.4-mini",
Self::FivePointFour => "gpt-5.4",
Self::FivePointFourPro => "gpt-5.4-pro",
Self::FivePointFive => "gpt-5.5",
@ -182,22 +162,17 @@ impl Model {
pub fn display_name(&self) -> &str {
match self {
Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
Self::Four => "gpt-4",
Self::FourTurbo => "gpt-4-turbo",
Self::FourOmniMini => "gpt-4o-mini",
Self::FourPointOneNano => "gpt-4.1-nano",
Self::O1 => "o1",
Self::O3Mini => "o3-mini",
Self::O3 => "o3",
Self::Five => "gpt-5",
Self::FiveCodex => "gpt-5-codex",
Self::FiveMini => "gpt-5-mini",
Self::FiveNano => "gpt-5-nano",
Self::FivePointOne => "gpt-5.1",
Self::FivePointTwo => "gpt-5.2",
Self::FivePointTwoCodex => "gpt-5.2-codex",
Self::FivePointThreeCodex => "gpt-5.3-codex",
Self::FivePointFourNano => "gpt-5.4-nano",
Self::FivePointFourMini => "gpt-5.4-mini",
Self::FivePointFour => "gpt-5.4",
Self::FivePointFourPro => "gpt-5.4-pro",
Self::FivePointFive => "gpt-5.5",
@ -208,22 +183,17 @@ impl Model {
pub fn max_token_count(&self) -> u64 {
match self {
Self::ThreePointFiveTurbo => 16_385,
Self::Four => 8_192,
Self::FourTurbo => 128_000,
Self::FourOmniMini => 128_000,
Self::FourPointOneNano => 1_047_576,
Self::O1 => 200_000,
Self::O3Mini => 200_000,
Self::O3 => 200_000,
Self::Five => 272_000,
Self::FiveCodex => 272_000,
Self::FiveMini => 400_000,
Self::FiveNano => 400_000,
Self::FivePointOne => 400_000,
Self::FivePointTwo => 400_000,
Self::FivePointTwoCodex => 400_000,
Self::FivePointThreeCodex => 400_000,
Self::FivePointFourNano => 400_000,
Self::FivePointFourMini => 400_000,
Self::FivePointFour => 1_050_000,
Self::FivePointFourPro => 1_050_000,
Self::FivePointFive => 1_050_000,
@ -237,22 +207,17 @@ impl Model {
Self::Custom {
max_output_tokens, ..
} => *max_output_tokens,
Self::ThreePointFiveTurbo => Some(4_096),
Self::Four => Some(8_192),
Self::FourTurbo => Some(4_096),
Self::FourOmniMini => Some(16_384),
Self::FourPointOneNano => Some(32_768),
Self::O1 => Some(100_000),
Self::O3Mini => Some(100_000),
Self::O3 => Some(100_000),
Self::Five => Some(128_000),
Self::FiveCodex => Some(128_000),
Self::FiveMini => Some(128_000),
Self::FiveNano => Some(128_000),
Self::FivePointOne => Some(128_000),
Self::FivePointTwo => Some(128_000),
Self::FivePointTwoCodex => Some(128_000),
Self::FivePointThreeCodex => Some(128_000),
Self::FivePointFourNano => Some(128_000),
Self::FivePointFourMini => Some(128_000),
Self::FivePointFour => Some(128_000),
Self::FivePointFourPro => Some(128_000),
Self::FivePointFive => Some(128_000),
@ -265,18 +230,16 @@ impl Model {
Self::Custom {
reasoning_effort, ..
} => reasoning_effort.to_owned(),
Self::O1
| Self::O3
| Self::O3Mini
Self::FivePointOne
| Self::FivePointTwo
| Self::FivePointFour
| Self::FivePointFourMini
| Self::FivePointFourNano => Some(ReasoningEffort::None),
Self::O3
| Self::Five
| Self::FiveCodex
| Self::FiveMini
| Self::FiveNano
| Self::FivePointOne
| Self::FivePointTwo
| Self::FivePointTwoCodex
| Self::FivePointThreeCodex
| Self::FivePointFour
| Self::FivePointFourPro
| Self::FivePointFive
| Self::FivePointFivePro => Some(ReasoningEffort::Medium),
@ -290,13 +253,20 @@ impl Model {
reasoning_effort: Some(effort),
..
} => match effort {
ReasoningEffort::None => &[ReasoningEffort::None],
ReasoningEffort::Minimal => &[ReasoningEffort::Minimal],
ReasoningEffort::Low => &[ReasoningEffort::Low],
ReasoningEffort::Medium => &[ReasoningEffort::Medium],
ReasoningEffort::High => &[ReasoningEffort::High],
ReasoningEffort::XHigh => &[ReasoningEffort::XHigh],
},
Self::O1 | Self::O3 | Self::O3Mini | Self::FivePointOne => &[
Self::O3 => &[
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
],
Self::FivePointOne => &[
ReasoningEffort::None,
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
@ -307,10 +277,13 @@ impl Model {
ReasoningEffort::Medium,
ReasoningEffort::High,
],
Self::FiveCodex
| Self::FivePointTwoCodex
| Self::FivePointThreeCodex
| Self::FivePointFourPro => &[
Self::FivePointFourPro | Self::FivePointFivePro => &[
ReasoningEffort::Medium,
ReasoningEffort::High,
ReasoningEffort::XHigh,
],
Self::FivePointThreeCodex => &[
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
ReasoningEffort::XHigh,
@ -318,7 +291,9 @@ impl Model {
Self::FivePointTwo
| Self::FivePointFour
| Self::FivePointFive
| Self::FivePointFivePro => &[
| Self::FivePointFourMini
| Self::FivePointFourNano => &[
ReasoningEffort::None,
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
@ -343,24 +318,21 @@ impl Model {
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
pub fn supports_parallel_tool_calls(&self) -> bool {
match self {
Self::ThreePointFiveTurbo
| Self::Four
| Self::FourTurbo
Self::Four
| Self::FourOmniMini
| Self::FourPointOneNano
| Self::Five
| Self::FiveCodex
| Self::FiveMini
| Self::FivePointOne
| Self::FivePointTwo
| Self::FivePointTwoCodex
| Self::FivePointThreeCodex
| Self::FivePointFour
| Self::FivePointFourMini
| Self::FivePointFourNano
| Self::FivePointFourPro
| Self::FivePointFive
| Self::FivePointFivePro
| Self::FiveNano => true,
Self::O1 | Self::O3 | Self::O3Mini | Model::Custom { .. } => false,
Self::O3 | Model::Custom { .. } => false,
}
}
@ -372,6 +344,81 @@ impl Model {
}
}
#[cfg(test)]
mod tests {
use super::{Model, ReasoningEffort};
#[test]
fn gpt_5_1_uses_none_reasoning_by_default() {
let expected_efforts = [
ReasoningEffort::None,
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
];
assert_eq!(
Model::FivePointOne.reasoning_effort(),
Some(ReasoningEffort::None)
);
assert_eq!(
Model::FivePointOne.supported_reasoning_efforts(),
expected_efforts.as_slice()
);
}
#[test]
fn newer_frontier_models_support_none_reasoning() {
let expected_efforts = [
ReasoningEffort::None,
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
ReasoningEffort::XHigh,
];
assert_eq!(
Model::FivePointTwo.reasoning_effort(),
Some(ReasoningEffort::None)
);
assert_eq!(
Model::FivePointTwo.supported_reasoning_efforts(),
expected_efforts.as_slice()
);
assert_eq!(
Model::FivePointFour.reasoning_effort(),
Some(ReasoningEffort::None)
);
assert_eq!(
Model::FivePointFour.supported_reasoning_efforts(),
expected_efforts.as_slice()
);
assert_eq!(
Model::FivePointFive.reasoning_effort(),
Some(ReasoningEffort::Medium)
);
assert_eq!(
Model::FivePointFive.supported_reasoning_efforts(),
expected_efforts.as_slice()
);
}
#[test]
fn newer_codex_models_support_low_reasoning_effort() {
let expected_efforts = [
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
ReasoningEffort::XHigh,
];
assert_eq!(
Model::FivePointThreeCodex.supported_reasoning_efforts(),
expected_efforts.as_slice()
);
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct StreamOptions {
pub include_usage: bool,

View file

@ -11,6 +11,9 @@ pub struct Request {
pub model: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub input: Vec<ResponseInputItem>,
pub store: bool,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub include: Vec<ResponseIncludable>,
#[serde(default)]
pub stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
@ -31,18 +34,28 @@ pub struct Request {
pub reasoning: Option<ReasoningConfig>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ResponseIncludable {
#[serde(rename = "reasoning.encrypted_content")]
ReasoningEncryptedContent,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseInputItem {
Message(ResponseMessageItem),
FunctionCall(ResponseFunctionCallItem),
FunctionCallOutput(ResponseFunctionCallOutputItem),
Reasoning(ResponseReasoningInputItem),
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ResponseMessageItem {
pub role: Role,
pub content: Vec<ResponseInputContent>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub phase: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
@ -58,6 +71,26 @@ pub struct ResponseFunctionCallOutputItem {
pub output: ResponseFunctionCallOutputContent,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ResponseReasoningInputItem {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
#[serde(default)]
pub summary: Vec<ResponseReasoningSummaryPart>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub content: Vec<Value>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub encrypted_content: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub status: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseReasoningSummaryPart {
SummaryText { text: String },
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum ResponseFunctionCallOutputContent {
@ -111,9 +144,13 @@ pub enum ToolDefinition {
},
}
#[derive(Deserialize, Debug)]
pub struct Error {
#[derive(Deserialize, Debug, Clone)]
pub struct ResponseError {
#[serde(default)]
pub code: Option<String>,
pub message: String,
#[serde(default)]
pub param: Option<Value>,
}
#[derive(Deserialize, Debug)]
@ -167,6 +204,24 @@ pub enum StreamEvent {
content_index: Option<usize>,
text: String,
},
#[serde(rename = "response.refusal.delta")]
RefusalDelta {
item_id: String,
output_index: usize,
content_index: usize,
delta: String,
#[serde(default)]
sequence_number: Option<u64>,
},
#[serde(rename = "response.refusal.done")]
RefusalDone {
item_id: String,
output_index: usize,
content_index: usize,
refusal: String,
#[serde(default)]
sequence_number: Option<u64>,
},
#[serde(rename = "response.reasoning_summary_part.added")]
ReasoningSummaryPartAdded {
item_id: String,
@ -214,9 +269,12 @@ pub enum StreamEvent {
#[serde(rename = "response.failed")]
Failed { response: ResponseSummary },
#[serde(rename = "response.error")]
Error { error: Error },
Error { error: ResponseError },
#[serde(rename = "error")]
GenericError { error: Error },
GenericError {
#[serde(flatten)]
error: ResponseError,
},
#[serde(other)]
Unknown,
}
@ -228,7 +286,9 @@ pub struct ResponseSummary {
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub status_details: Option<ResponseStatusDetails>,
pub incomplete_details: Option<ResponseIncompleteDetails>,
#[serde(default)]
pub error: Option<ResponseError>,
#[serde(default)]
pub usage: Option<ResponseUsage>,
#[serde(default)]
@ -236,13 +296,9 @@ pub struct ResponseSummary {
}
#[derive(Deserialize, Debug, Default, Clone)]
pub struct ResponseStatusDetails {
pub struct ResponseIncompleteDetails {
#[serde(default)]
pub reason: Option<String>,
#[serde(default)]
pub r#type: Option<String>,
#[serde(default)]
pub error: Option<Value>,
}
#[derive(Deserialize, Debug, Default, Clone)]
@ -250,11 +306,27 @@ pub struct ResponseUsage {
#[serde(default)]
pub input_tokens: Option<u64>,
#[serde(default)]
pub input_tokens_details: ResponseInputTokensDetails,
#[serde(default)]
pub output_tokens: Option<u64>,
#[serde(default)]
pub output_tokens_details: ResponseOutputTokensDetails,
#[serde(default)]
pub total_tokens: Option<u64>,
}
#[derive(Deserialize, Debug, Default, Clone)]
pub struct ResponseInputTokensDetails {
#[serde(default)]
pub cached_tokens: u64,
}
#[derive(Deserialize, Debug, Default, Clone)]
pub struct ResponseOutputTokensDetails {
#[serde(default)]
pub reasoning_tokens: u64,
}
#[derive(Deserialize, Debug, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseOutputItem {
@ -271,6 +343,12 @@ pub struct ResponseReasoningItem {
pub id: Option<String>,
#[serde(default)]
pub summary: Vec<ReasoningSummaryPart>,
#[serde(default)]
pub content: Vec<Value>,
#[serde(default)]
pub encrypted_content: Option<String>,
#[serde(default)]
pub status: Option<String>,
}
#[derive(Deserialize, Debug, Clone)]
@ -293,6 +371,8 @@ pub struct ResponseOutputMessage {
pub role: Option<String>,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub phase: Option<String>,
}
#[derive(Deserialize, Debug, Clone)]
@ -441,8 +521,17 @@ pub async fn stream_response(
});
}
all_events.push(StreamEvent::Completed {
response: response_summary,
let status = response_summary.status.clone();
all_events.push(match status.as_deref() {
Some("incomplete") => StreamEvent::Incomplete {
response: response_summary,
},
Some("failed") => StreamEvent::Failed {
response: response_summary,
},
_ => StreamEvent::Completed {
response: response_summary,
},
});
Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())