open_ai: Responses API improvements (#56476)

Release Notes: - Removed deprecated OpenAI models - Added support for gpt-5.4-nano/mini models for OpenAI provider - Improved output quality when using OpenAI models --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de> Co-authored-by: Smit Barmase <heysmitbarmase@gmail.com> Co-authored-by: Gaauwe Rombouts <mail@grombouts.nl>
2026-06-01 03:14:56 +07:00 · 2026-05-12 16:47:16 +02:00 · 2026-05-12 16:47:16 +02:00 · 78c889c21d
commit 78c889c21d
parent 6f1409b31c
10 changed files with 1715 additions and 200 deletions
--- a/crates/agent/src/tests/mod.rs
+++ b/crates/agent/src/tests/mod.rs
@ -3123,6 +3123,57 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
    });
 }

+#[gpui::test]
+async fn test_latest_token_usage_counts_cached_input_tokens(cx: &mut TestAppContext) {
+    let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
+    let fake_model = model.as_fake();
+
+    let message_1_id = UserMessageId::new();
+    thread
+        .update(cx, |thread, cx| {
+            thread.send(message_1_id, ["Message 1"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    fake_model.send_last_completion_stream_text_chunk("Response 1");
+    fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::UsageUpdate(
+        language_model::TokenUsage {
+            input_tokens: 100,
+            output_tokens: 50,
+            cache_creation_input_tokens: 25,
+            cache_read_input_tokens: 75,
+        },
+    ));
+    fake_model.end_last_completion_stream();
+    cx.run_until_parked();
+
+    thread.read_with(cx, |thread, _| {
+        assert_eq!(
+            thread.latest_token_usage(),
+            Some(acp_thread::TokenUsage {
+                used_tokens: 250,
+                max_tokens: 1_000_000,
+                max_output_tokens: None,
+                input_tokens: 200,
+                output_tokens: 50,
+            })
+        );
+    });
+
+    let message_2_id = UserMessageId::new();
+    thread
+        .update(cx, |thread, cx| {
+            thread.send(message_2_id.clone(), ["Message 2"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    thread.read_with(cx, |thread, _| {
+        assert_eq!(thread.tokens_before_message(&message_2_id), Some(200));
+    });
+}
+
 #[gpui::test]
 async fn test_truncate_second_message(cx: &mut TestAppContext) {
    let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -1750,11 +1750,13 @@ impl Thread {
    pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
        let usage = self.latest_request_token_usage()?;
        let model = self.model.clone()?;
+        let input_tokens = total_input_tokens(usage);
+
        Some(acp_thread::TokenUsage {
            max_tokens: model.max_token_count(),
            max_output_tokens: model.max_output_tokens(),
            used_tokens: usage.total_tokens(),
-            input_tokens: usage.input_tokens,
+            input_tokens,
            output_tokens: usage.output_tokens,
        })
    }
@ -1773,7 +1775,7 @@ impl Thread {
                if &user_msg.id == target_id {
                    let prev_id = previous_user_message_id?;
                    let usage = self.request_token_usage.get(prev_id)?;
-                    return Some(usage.input_tokens);
+                    return Some(total_input_tokens(*usage));
                }
                previous_user_message_id = Some(&user_msg.id);
            }
@ -3224,6 +3226,13 @@ impl Thread {
    }
 }

+fn total_input_tokens(usage: language_model::TokenUsage) -> u64 {
+    usage
+        .input_tokens
+        .saturating_add(usage.cache_creation_input_tokens)
+        .saturating_add(usage.cache_read_input_tokens)
+}
+
 struct RunningTurn {
    /// Holds the task that handles agent interaction until the end of the turn.
    /// Survives across multiple requests as the model performs tool calls and
--- a/crates/language_model_core/src/language_model_core.rs
+++ b/crates/language_model_core/src/language_model_core.rs
@ -468,6 +468,7 @@ pub enum ModelMode {
 #[serde(rename_all = "lowercase")]
 #[strum(serialize_all = "lowercase")]
 pub enum ReasoningEffort {
+    None,
    Minimal,
    Low,
    Medium,
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
@ -217,6 +217,107 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
    }
 }

+fn default_thinking_reasoning_effort(model: &open_ai::Model) -> Option<open_ai::ReasoningEffort> {
+    use open_ai::ReasoningEffort;
+
+    model
+        .reasoning_effort()
+        .filter(|effort| *effort != ReasoningEffort::None)
+        .or_else(|| {
+            let supported_efforts = model.supported_reasoning_efforts();
+            if supported_efforts.contains(&ReasoningEffort::Medium) {
+                Some(ReasoningEffort::Medium)
+            } else {
+                supported_efforts
+                    .iter()
+                    .copied()
+                    .find(|effort| *effort != ReasoningEffort::None)
+            }
+        })
+}
+
+fn supports_selectable_thinking_effort(model: &open_ai::Model) -> bool {
+    model.uses_responses_api()
+        && model
+            .supported_reasoning_efforts()
+            .iter()
+            .any(|effort| *effort != open_ai::ReasoningEffort::None)
+}
+
+fn supported_thinking_effort_levels(model: &open_ai::Model) -> Vec<LanguageModelEffortLevel> {
+    if !supports_selectable_thinking_effort(model) {
+        return Vec::new();
+    }
+
+    let default_effort = default_thinking_reasoning_effort(model);
+    model
+        .supported_reasoning_efforts()
+        .iter()
+        .copied()
+        .filter_map(|effort| {
+            let (name, value) = match effort {
+                open_ai::ReasoningEffort::None => return None,
+                open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"),
+                open_ai::ReasoningEffort::Low => ("Low", "low"),
+                open_ai::ReasoningEffort::Medium => ("Medium", "medium"),
+                open_ai::ReasoningEffort::High => ("High", "high"),
+                open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"),
+            };
+
+            Some(LanguageModelEffortLevel {
+                name: name.into(),
+                value: value.into(),
+                is_default: Some(effort) == default_effort,
+            })
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn supported_thinking_effort_levels_hide_none() {
+        let effort_levels = supported_thinking_effort_levels(&open_ai::Model::FivePointTwo);
+        let values = effort_levels
+            .iter()
+            .map(|level| level.value.as_ref())
+            .collect::<Vec<_>>();
+
+        assert_eq!(values, ["low", "medium", "high", "xhigh"]);
+        assert_eq!(
+            effort_levels
+                .iter()
+                .find(|level| level.is_default)
+                .map(|level| level.value.as_ref()),
+            Some("medium")
+        );
+    }
+
+    #[test]
+    fn models_supporting_only_none_have_no_selectable_thinking_effort() {
+        let model = open_ai::Model::Custom {
+            name: "custom-model".to_string(),
+            display_name: None,
+            max_tokens: 128_000,
+            max_output_tokens: None,
+            max_completion_tokens: None,
+            reasoning_effort: Some(open_ai::ReasoningEffort::None),
+            supports_chat_completions: false,
+            supports_images: true,
+        };
+
+        assert!(!supports_selectable_thinking_effort(&model));
+        assert!(supported_thinking_effort_levels(&model).is_empty());
+        assert!(
+            model
+                .supported_reasoning_efforts()
+                .contains(&open_ai::ReasoningEffort::None)
+        );
+    }
+}
+
 pub struct OpenAiLanguageModel {
    id: LanguageModelId,
    model: open_ai::Model,
@ -316,22 +417,20 @@ impl LanguageModel for OpenAiLanguageModel {
        use open_ai::Model;
        match &self.model {
            Model::FourOmniMini
-            | Model::FourPointOneNano
            | Model::Five
-            | Model::FiveCodex
            | Model::FiveMini
            | Model::FiveNano
            | Model::FivePointOne
            | Model::FivePointTwo
-            | Model::FivePointTwoCodex
            | Model::FivePointThreeCodex
            | Model::FivePointFour
+            | Model::FivePointFourMini
+            | Model::FivePointFourNano
            | Model::FivePointFourPro
            | Model::FivePointFive
            | Model::FivePointFivePro
-            | Model::O1
            | Model::O3 => true,
-            Model::ThreePointFiveTurbo | Model::Four | Model::FourTurbo | Model::O3Mini => false,
+            Model::Four => false,
            Model::Custom {
                supports_images, ..
            } => *supports_images,
@ -351,34 +450,11 @@ impl LanguageModel for OpenAiLanguageModel {
    }

    fn supports_thinking(&self) -> bool {
-        self.model.uses_responses_api() && self.model.reasoning_effort().is_some()
+        supports_selectable_thinking_effort(&self.model)
    }

    fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
-        if !self.supports_thinking() {
-            return Vec::new();
-        }
-
-        let default_effort = self.model.reasoning_effort();
-        self.model
-            .supported_reasoning_efforts()
-            .iter()
-            .map(|effort| {
-                let (name, value) = match effort {
-                    open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"),
-                    open_ai::ReasoningEffort::Low => ("Low", "low"),
-                    open_ai::ReasoningEffort::Medium => ("Medium", "medium"),
-                    open_ai::ReasoningEffort::High => ("High", "high"),
-                    open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"),
-                };
-
-                LanguageModelEffortLevel {
-                    name: name.into(),
-                    value: value.into(),
-                    is_default: Some(*effort) == default_effort,
-                }
-            })
-            .collect()
+        supported_thinking_effort_levels(&self.model)
    }

    fn supports_split_token_display(&self) -> bool {
@ -418,7 +494,10 @@ impl LanguageModel for OpenAiLanguageModel {
                self.model.supports_parallel_tool_calls(),
                self.model.supports_prompt_cache_key(),
                self.max_output_tokens(),
-                self.model.reasoning_effort(),
+                default_thinking_reasoning_effort(&self.model),
+                self.model
+                    .supported_reasoning_efforts()
+                    .contains(&open_ai::ReasoningEffort::None),
            );
            let completions = self.stream_response(request, cx);
            async move {
--- a/crates/language_models/src/provider/open_ai_compatible.rs
+++ b/crates/language_models/src/provider/open_ai_compatible.rs
@ -397,7 +397,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
                self.model.capabilities.parallel_tool_calls,
                self.model.capabilities.prompt_cache_key,
                self.max_output_tokens(),
-                self.model.reasoning_effort,
+                self.model
+                    .reasoning_effort
+                    .filter(|effort| *effort != open_ai::ReasoningEffort::None),
+                self.model.reasoning_effort == Some(open_ai::ReasoningEffort::None),
            );
            let completions = self.stream_response(request, cx);
            async move {
--- a/crates/language_models/src/provider/opencode.rs
+++ b/crates/language_models/src/provider/opencode.rs
@ -32,6 +32,7 @@ use crate::provider::open_ai::{

 fn normalize_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
    match effort.trim().to_ascii_lowercase().as_str() {
+        "none" => Some(ReasoningEffort::None),
        "minimal" => Some(ReasoningEffort::Minimal),
        "low" => Some(ReasoningEffort::Low),
        "medium" => Some(ReasoningEffort::Medium),
@ -43,6 +44,7 @@ fn normalize_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {

 fn reasoning_effort_display(effort: ReasoningEffort) -> (&'static str, &'static str) {
    match effort {
+        ReasoningEffort::None => ("None", "none"),
        ReasoningEffort::Minimal => ("Minimal", "minimal"),
        ReasoningEffort::Low => ("Low", "low"),
        ReasoningEffort::Medium => ("Medium", "medium"),
@ -549,13 +551,17 @@ impl LanguageModel for OpenCodeLanguageModel {
    fn supports_thinking(&self) -> bool {
        self.model
            .supported_reasoning_effort_levels()
-            .is_some_and(|levels| !levels.is_empty())
+            .is_some_and(|levels| levels.iter().any(|effort| *effort != ReasoningEffort::None))
    }

    fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
        self.model
            .supported_reasoning_effort_levels()
            .map(|levels| {
+                let levels = levels
+                    .into_iter()
+                    .filter(|effort| *effort != ReasoningEffort::None)
+                    .collect::<Vec<_>>();
                if levels.is_empty() {
                    return Vec::new();
                }
@ -675,21 +681,18 @@ impl LanguageModel for OpenCodeLanguageModel {
                .boxed()
            }
            ApiProtocol::OpenAiResponses => {
-                let reasoning_effort = if request.thinking_allowed {
-                    request
-                        .thinking_effort
-                        .as_deref()
-                        .and_then(normalize_reasoning_effort)
-                } else {
-                    None
-                };
+                let supports_none_reasoning_effort = self
+                    .model
+                    .supported_reasoning_effort_levels()
+                    .is_some_and(|levels| levels.contains(&ReasoningEffort::None));
                let response_request = into_open_ai_response(
                    request,
                    self.model.id(),
                    false,
                    false,
                    self.model.max_output_tokens(),
-                    reasoning_effort,
+                    None,
+                    supports_none_reasoning_effort,
                );
                let stream = self.stream_openai_response(response_request, http_client, cx);
                async move {
--- a/crates/language_models_cloud/src/language_models_cloud.rs
+++ b/crates/language_models_cloud/src/language_models_cloud.rs
@ -460,7 +460,13 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
                let effort = request
                    .thinking_effort
                    .as_ref()
-                    .and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok());
+                    .and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok())
+                    .filter(|effort| *effort != open_ai::ReasoningEffort::None);
+                let supports_none_reasoning_effort =
+                    self.model.supported_effort_levels.iter().any(|effort| {
+                        open_ai::ReasoningEffort::from_str(&effort.value)
+                            .is_ok_and(|effort| effort == open_ai::ReasoningEffort::None)
+                    });

                let mut request = into_open_ai_response(
                    request,
@ -469,6 +475,7 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
                    true,
                    None,
                    None,
+                    supports_none_reasoning_effort,
                );

                if enable_thinking && let Some(effort) = effort {
--- a/crates/open_ai/src/completion.rs
+++ b/crates/open_ai/src/completion.rs
--- a/crates/open_ai/src/open_ai.rs
+++ b/crates/open_ai/src/open_ai.rs
@ -58,26 +58,14 @@ impl From<Role> for String {
 #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
 pub enum Model {
-    #[serde(rename = "gpt-3.5-turbo")]
-    ThreePointFiveTurbo,
    #[serde(rename = "gpt-4")]
    Four,
-    #[serde(rename = "gpt-4-turbo")]
-    FourTurbo,
    #[serde(rename = "gpt-4o-mini")]
    FourOmniMini,
-    #[serde(rename = "gpt-4.1-nano")]
-    FourPointOneNano,
-    #[serde(rename = "o1")]
-    O1,
-    #[serde(rename = "o3-mini")]
-    O3Mini,
    #[serde(rename = "o3")]
    O3,
    #[serde(rename = "gpt-5")]
    Five,
-    #[serde(rename = "gpt-5-codex")]
-    FiveCodex,
    #[serde(rename = "gpt-5-mini")]
    #[default]
    FiveMini,
@ -87,10 +75,12 @@ pub enum Model {
    FivePointOne,
    #[serde(rename = "gpt-5.2")]
    FivePointTwo,
-    #[serde(rename = "gpt-5.2-codex")]
-    FivePointTwoCodex,
    #[serde(rename = "gpt-5.3-codex")]
    FivePointThreeCodex,
+    #[serde(rename = "gpt-5.4-nano")]
+    FivePointFourNano,
+    #[serde(rename = "gpt-5.4-mini")]
+    FivePointFourMini,
    #[serde(rename = "gpt-5.4")]
    FivePointFour,
    #[serde(rename = "gpt-5.4-pro")]
@ -130,22 +120,17 @@ impl Model {

    pub fn from_id(id: &str) -> Result<Self> {
        match id {
-            "gpt-3.5-turbo" => Ok(Self::ThreePointFiveTurbo),
            "gpt-4" => Ok(Self::Four),
-            "gpt-4-turbo-preview" => Ok(Self::FourTurbo),
            "gpt-4o-mini" => Ok(Self::FourOmniMini),
-            "gpt-4.1-nano" => Ok(Self::FourPointOneNano),
-            "o1" => Ok(Self::O1),
-            "o3-mini" => Ok(Self::O3Mini),
            "o3" => Ok(Self::O3),
            "gpt-5" => Ok(Self::Five),
-            "gpt-5-codex" => Ok(Self::FiveCodex),
            "gpt-5-mini" => Ok(Self::FiveMini),
            "gpt-5-nano" => Ok(Self::FiveNano),
            "gpt-5.1" => Ok(Self::FivePointOne),
            "gpt-5.2" => Ok(Self::FivePointTwo),
-            "gpt-5.2-codex" => Ok(Self::FivePointTwoCodex),
            "gpt-5.3-codex" => Ok(Self::FivePointThreeCodex),
+            "gpt-5.4-nano" => Ok(Self::FivePointFourNano),
+            "gpt-5.4-mini" => Ok(Self::FivePointFourMini),
            "gpt-5.4" => Ok(Self::FivePointFour),
            "gpt-5.4-pro" => Ok(Self::FivePointFourPro),
            "gpt-5.5" => Ok(Self::FivePointFive),
@ -156,22 +141,17 @@ impl Model {

    pub fn id(&self) -> &str {
        match self {
-            Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
            Self::Four => "gpt-4",
-            Self::FourTurbo => "gpt-4-turbo",
            Self::FourOmniMini => "gpt-4o-mini",
-            Self::FourPointOneNano => "gpt-4.1-nano",
-            Self::O1 => "o1",
-            Self::O3Mini => "o3-mini",
            Self::O3 => "o3",
            Self::Five => "gpt-5",
-            Self::FiveCodex => "gpt-5-codex",
            Self::FiveMini => "gpt-5-mini",
            Self::FiveNano => "gpt-5-nano",
            Self::FivePointOne => "gpt-5.1",
            Self::FivePointTwo => "gpt-5.2",
-            Self::FivePointTwoCodex => "gpt-5.2-codex",
            Self::FivePointThreeCodex => "gpt-5.3-codex",
+            Self::FivePointFourNano => "gpt-5.4-nano",
+            Self::FivePointFourMini => "gpt-5.4-mini",
            Self::FivePointFour => "gpt-5.4",
            Self::FivePointFourPro => "gpt-5.4-pro",
            Self::FivePointFive => "gpt-5.5",
@ -182,22 +162,17 @@ impl Model {

    pub fn display_name(&self) -> &str {
        match self {
-            Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
            Self::Four => "gpt-4",
-            Self::FourTurbo => "gpt-4-turbo",
            Self::FourOmniMini => "gpt-4o-mini",
-            Self::FourPointOneNano => "gpt-4.1-nano",
-            Self::O1 => "o1",
-            Self::O3Mini => "o3-mini",
            Self::O3 => "o3",
            Self::Five => "gpt-5",
-            Self::FiveCodex => "gpt-5-codex",
            Self::FiveMini => "gpt-5-mini",
            Self::FiveNano => "gpt-5-nano",
            Self::FivePointOne => "gpt-5.1",
            Self::FivePointTwo => "gpt-5.2",
-            Self::FivePointTwoCodex => "gpt-5.2-codex",
            Self::FivePointThreeCodex => "gpt-5.3-codex",
+            Self::FivePointFourNano => "gpt-5.4-nano",
+            Self::FivePointFourMini => "gpt-5.4-mini",
            Self::FivePointFour => "gpt-5.4",
            Self::FivePointFourPro => "gpt-5.4-pro",
            Self::FivePointFive => "gpt-5.5",
@ -208,22 +183,17 @@ impl Model {

    pub fn max_token_count(&self) -> u64 {
        match self {
-            Self::ThreePointFiveTurbo => 16_385,
            Self::Four => 8_192,
-            Self::FourTurbo => 128_000,
            Self::FourOmniMini => 128_000,
-            Self::FourPointOneNano => 1_047_576,
-            Self::O1 => 200_000,
-            Self::O3Mini => 200_000,
            Self::O3 => 200_000,
            Self::Five => 272_000,
-            Self::FiveCodex => 272_000,
            Self::FiveMini => 400_000,
            Self::FiveNano => 400_000,
            Self::FivePointOne => 400_000,
            Self::FivePointTwo => 400_000,
-            Self::FivePointTwoCodex => 400_000,
            Self::FivePointThreeCodex => 400_000,
+            Self::FivePointFourNano => 400_000,
+            Self::FivePointFourMini => 400_000,
            Self::FivePointFour => 1_050_000,
            Self::FivePointFourPro => 1_050_000,
            Self::FivePointFive => 1_050_000,
@ -237,22 +207,17 @@ impl Model {
            Self::Custom {
                max_output_tokens, ..
            } => *max_output_tokens,
-            Self::ThreePointFiveTurbo => Some(4_096),
            Self::Four => Some(8_192),
-            Self::FourTurbo => Some(4_096),
            Self::FourOmniMini => Some(16_384),
-            Self::FourPointOneNano => Some(32_768),
-            Self::O1 => Some(100_000),
-            Self::O3Mini => Some(100_000),
            Self::O3 => Some(100_000),
            Self::Five => Some(128_000),
-            Self::FiveCodex => Some(128_000),
            Self::FiveMini => Some(128_000),
            Self::FiveNano => Some(128_000),
            Self::FivePointOne => Some(128_000),
            Self::FivePointTwo => Some(128_000),
-            Self::FivePointTwoCodex => Some(128_000),
            Self::FivePointThreeCodex => Some(128_000),
+            Self::FivePointFourNano => Some(128_000),
+            Self::FivePointFourMini => Some(128_000),
            Self::FivePointFour => Some(128_000),
            Self::FivePointFourPro => Some(128_000),
            Self::FivePointFive => Some(128_000),
@ -265,18 +230,16 @@ impl Model {
            Self::Custom {
                reasoning_effort, ..
            } => reasoning_effort.to_owned(),
-            Self::O1
-            | Self::O3
-            | Self::O3Mini
+            Self::FivePointOne
+            | Self::FivePointTwo
+            | Self::FivePointFour
+            | Self::FivePointFourMini
+            | Self::FivePointFourNano => Some(ReasoningEffort::None),
+            Self::O3
            | Self::Five
-            | Self::FiveCodex
            | Self::FiveMini
            | Self::FiveNano
-            | Self::FivePointOne
-            | Self::FivePointTwo
-            | Self::FivePointTwoCodex
            | Self::FivePointThreeCodex
-            | Self::FivePointFour
            | Self::FivePointFourPro
            | Self::FivePointFive
            | Self::FivePointFivePro => Some(ReasoningEffort::Medium),
@ -290,13 +253,20 @@ impl Model {
                reasoning_effort: Some(effort),
                ..
            } => match effort {
+                ReasoningEffort::None => &[ReasoningEffort::None],
                ReasoningEffort::Minimal => &[ReasoningEffort::Minimal],
                ReasoningEffort::Low => &[ReasoningEffort::Low],
                ReasoningEffort::Medium => &[ReasoningEffort::Medium],
                ReasoningEffort::High => &[ReasoningEffort::High],
                ReasoningEffort::XHigh => &[ReasoningEffort::XHigh],
            },
-            Self::O1 | Self::O3 | Self::O3Mini | Self::FivePointOne => &[
+            Self::O3 => &[
+                ReasoningEffort::Low,
+                ReasoningEffort::Medium,
+                ReasoningEffort::High,
+            ],
+            Self::FivePointOne => &[
+                ReasoningEffort::None,
                ReasoningEffort::Low,
                ReasoningEffort::Medium,
                ReasoningEffort::High,
@ -307,10 +277,13 @@ impl Model {
                ReasoningEffort::Medium,
                ReasoningEffort::High,
            ],
-            Self::FiveCodex
-            | Self::FivePointTwoCodex
-            | Self::FivePointThreeCodex
-            | Self::FivePointFourPro => &[
+            Self::FivePointFourPro | Self::FivePointFivePro => &[
+                ReasoningEffort::Medium,
+                ReasoningEffort::High,
+                ReasoningEffort::XHigh,
+            ],
+            Self::FivePointThreeCodex => &[
+                ReasoningEffort::Low,
                ReasoningEffort::Medium,
                ReasoningEffort::High,
                ReasoningEffort::XHigh,
@ -318,7 +291,9 @@ impl Model {
            Self::FivePointTwo
            | Self::FivePointFour
            | Self::FivePointFive
-            | Self::FivePointFivePro => &[
+            | Self::FivePointFourMini
+            | Self::FivePointFourNano => &[
+                ReasoningEffort::None,
                ReasoningEffort::Low,
                ReasoningEffort::Medium,
                ReasoningEffort::High,
@ -343,24 +318,21 @@ impl Model {
    /// If the model does not support the parameter, do not pass it up, or the API will return an error.
    pub fn supports_parallel_tool_calls(&self) -> bool {
        match self {
-            Self::ThreePointFiveTurbo
-            | Self::Four
-            | Self::FourTurbo
+            Self::Four
            | Self::FourOmniMini
-            | Self::FourPointOneNano
            | Self::Five
-            | Self::FiveCodex
            | Self::FiveMini
            | Self::FivePointOne
            | Self::FivePointTwo
-            | Self::FivePointTwoCodex
            | Self::FivePointThreeCodex
            | Self::FivePointFour
+            | Self::FivePointFourMini
+            | Self::FivePointFourNano
            | Self::FivePointFourPro
            | Self::FivePointFive
            | Self::FivePointFivePro
            | Self::FiveNano => true,
-            Self::O1 | Self::O3 | Self::O3Mini | Model::Custom { .. } => false,
+            Self::O3 | Model::Custom { .. } => false,
        }
    }

@ -372,6 +344,81 @@ impl Model {
    }
 }

+#[cfg(test)]
+mod tests {
+    use super::{Model, ReasoningEffort};
+
+    #[test]
+    fn gpt_5_1_uses_none_reasoning_by_default() {
+        let expected_efforts = [
+            ReasoningEffort::None,
+            ReasoningEffort::Low,
+            ReasoningEffort::Medium,
+            ReasoningEffort::High,
+        ];
+
+        assert_eq!(
+            Model::FivePointOne.reasoning_effort(),
+            Some(ReasoningEffort::None)
+        );
+        assert_eq!(
+            Model::FivePointOne.supported_reasoning_efforts(),
+            expected_efforts.as_slice()
+        );
+    }
+
+    #[test]
+    fn newer_frontier_models_support_none_reasoning() {
+        let expected_efforts = [
+            ReasoningEffort::None,
+            ReasoningEffort::Low,
+            ReasoningEffort::Medium,
+            ReasoningEffort::High,
+            ReasoningEffort::XHigh,
+        ];
+
+        assert_eq!(
+            Model::FivePointTwo.reasoning_effort(),
+            Some(ReasoningEffort::None)
+        );
+        assert_eq!(
+            Model::FivePointTwo.supported_reasoning_efforts(),
+            expected_efforts.as_slice()
+        );
+        assert_eq!(
+            Model::FivePointFour.reasoning_effort(),
+            Some(ReasoningEffort::None)
+        );
+        assert_eq!(
+            Model::FivePointFour.supported_reasoning_efforts(),
+            expected_efforts.as_slice()
+        );
+        assert_eq!(
+            Model::FivePointFive.reasoning_effort(),
+            Some(ReasoningEffort::Medium)
+        );
+        assert_eq!(
+            Model::FivePointFive.supported_reasoning_efforts(),
+            expected_efforts.as_slice()
+        );
+    }
+
+    #[test]
+    fn newer_codex_models_support_low_reasoning_effort() {
+        let expected_efforts = [
+            ReasoningEffort::Low,
+            ReasoningEffort::Medium,
+            ReasoningEffort::High,
+            ReasoningEffort::XHigh,
+        ];
+
+        assert_eq!(
+            Model::FivePointThreeCodex.supported_reasoning_efforts(),
+            expected_efforts.as_slice()
+        );
+    }
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 pub struct StreamOptions {
    pub include_usage: bool,
--- a/crates/open_ai/src/responses.rs
+++ b/crates/open_ai/src/responses.rs
@ -11,6 +11,9 @@ pub struct Request {
    pub model: String,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub input: Vec<ResponseInputItem>,
+    pub store: bool,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    pub include: Vec<ResponseIncludable>,
    #[serde(default)]
    pub stream: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
@ -31,18 +34,28 @@ pub struct Request {
    pub reasoning: Option<ReasoningConfig>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ResponseIncludable {
+    #[serde(rename = "reasoning.encrypted_content")]
+    ReasoningEncryptedContent,
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ResponseInputItem {
    Message(ResponseMessageItem),
    FunctionCall(ResponseFunctionCallItem),
    FunctionCallOutput(ResponseFunctionCallOutputItem),
+    Reasoning(ResponseReasoningInputItem),
 }

 #[derive(Debug, Serialize, Deserialize)]
 pub struct ResponseMessageItem {
    pub role: Role,
    pub content: Vec<ResponseInputContent>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub phase: Option<String>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@ -58,6 +71,26 @@ pub struct ResponseFunctionCallOutputItem {
    pub output: ResponseFunctionCallOutputContent,
 }

+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct ResponseReasoningInputItem {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    #[serde(default)]
+    pub summary: Vec<ResponseReasoningSummaryPart>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub content: Vec<Value>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub encrypted_content: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub status: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ResponseReasoningSummaryPart {
+    SummaryText { text: String },
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum ResponseFunctionCallOutputContent {
@ -111,9 +144,13 @@ pub enum ToolDefinition {
    },
 }

-#[derive(Deserialize, Debug)]
-pub struct Error {
+#[derive(Deserialize, Debug, Clone)]
+pub struct ResponseError {
+    #[serde(default)]
+    pub code: Option<String>,
    pub message: String,
+    #[serde(default)]
+    pub param: Option<Value>,
 }

 #[derive(Deserialize, Debug)]
@ -167,6 +204,24 @@ pub enum StreamEvent {
        content_index: Option<usize>,
        text: String,
    },
+    #[serde(rename = "response.refusal.delta")]
+    RefusalDelta {
+        item_id: String,
+        output_index: usize,
+        content_index: usize,
+        delta: String,
+        #[serde(default)]
+        sequence_number: Option<u64>,
+    },
+    #[serde(rename = "response.refusal.done")]
+    RefusalDone {
+        item_id: String,
+        output_index: usize,
+        content_index: usize,
+        refusal: String,
+        #[serde(default)]
+        sequence_number: Option<u64>,
+    },
    #[serde(rename = "response.reasoning_summary_part.added")]
    ReasoningSummaryPartAdded {
        item_id: String,
@ -214,9 +269,12 @@ pub enum StreamEvent {
    #[serde(rename = "response.failed")]
    Failed { response: ResponseSummary },
    #[serde(rename = "response.error")]
-    Error { error: Error },
+    Error { error: ResponseError },
    #[serde(rename = "error")]
-    GenericError { error: Error },
+    GenericError {
+        #[serde(flatten)]
+        error: ResponseError,
+    },
    #[serde(other)]
    Unknown,
 }
@ -228,7 +286,9 @@ pub struct ResponseSummary {
    #[serde(default)]
    pub status: Option<String>,
    #[serde(default)]
-    pub status_details: Option<ResponseStatusDetails>,
+    pub incomplete_details: Option<ResponseIncompleteDetails>,
+    #[serde(default)]
+    pub error: Option<ResponseError>,
    #[serde(default)]
    pub usage: Option<ResponseUsage>,
    #[serde(default)]
@ -236,13 +296,9 @@ pub struct ResponseSummary {
 }

 #[derive(Deserialize, Debug, Default, Clone)]
-pub struct ResponseStatusDetails {
+pub struct ResponseIncompleteDetails {
    #[serde(default)]
    pub reason: Option<String>,
-    #[serde(default)]
-    pub r#type: Option<String>,
-    #[serde(default)]
-    pub error: Option<Value>,
 }

 #[derive(Deserialize, Debug, Default, Clone)]
@ -250,11 +306,27 @@ pub struct ResponseUsage {
    #[serde(default)]
    pub input_tokens: Option<u64>,
    #[serde(default)]
+    pub input_tokens_details: ResponseInputTokensDetails,
+    #[serde(default)]
    pub output_tokens: Option<u64>,
    #[serde(default)]
+    pub output_tokens_details: ResponseOutputTokensDetails,
+    #[serde(default)]
    pub total_tokens: Option<u64>,
 }

+#[derive(Deserialize, Debug, Default, Clone)]
+pub struct ResponseInputTokensDetails {
+    #[serde(default)]
+    pub cached_tokens: u64,
+}
+
+#[derive(Deserialize, Debug, Default, Clone)]
+pub struct ResponseOutputTokensDetails {
+    #[serde(default)]
+    pub reasoning_tokens: u64,
+}
+
 #[derive(Deserialize, Debug, Clone)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ResponseOutputItem {
@ -271,6 +343,12 @@ pub struct ResponseReasoningItem {
    pub id: Option<String>,
    #[serde(default)]
    pub summary: Vec<ReasoningSummaryPart>,
+    #[serde(default)]
+    pub content: Vec<Value>,
+    #[serde(default)]
+    pub encrypted_content: Option<String>,
+    #[serde(default)]
+    pub status: Option<String>,
 }

 #[derive(Deserialize, Debug, Clone)]
@ -293,6 +371,8 @@ pub struct ResponseOutputMessage {
    pub role: Option<String>,
    #[serde(default)]
    pub status: Option<String>,
+    #[serde(default)]
+    pub phase: Option<String>,
 }

 #[derive(Deserialize, Debug, Clone)]
@ -441,8 +521,17 @@ pub async fn stream_response(
                        });
                    }

-                    all_events.push(StreamEvent::Completed {
-                        response: response_summary,
+                    let status = response_summary.status.clone();
+                    all_events.push(match status.as_deref() {
+                        Some("incomplete") => StreamEvent::Incomplete {
+                            response: response_summary,
+                        },
+                        Some("failed") => StreamEvent::Failed {
+                            response: response_summary,
+                        },
+                        _ => StreamEvent::Completed {
+                            response: response_summary,
+                        },
                    });

                    Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())