From aeb05899b364b41dfba0dea64d289d762791e56b Mon Sep 17 00:00:00 2001 From: Bennet Bo Fenner Date: Mon, 11 May 2026 15:48:33 +0200 Subject: [PATCH] open_ai: Support specifying reasoning effort (#56411) Closes #54875 Release Notes: - Added support for specifying effort level when using OpenAI models --- .../language_models/src/provider/open_ai.rs | 39 ++++++++++-- crates/open_ai/src/completion.rs | 60 +++++++++++++++--- crates/open_ai/src/open_ai.rs | 62 ++++++++++++++++++- 3 files changed, 143 insertions(+), 18 deletions(-) diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 4957eea9635..81c3f07a31c 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -6,10 +6,10 @@ use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, TaskExt, use http_client::HttpClient; use language_model::{ ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError, - LanguageModelCompletionEvent, LanguageModelId, LanguageModelName, LanguageModelProvider, - LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, - LanguageModelRequest, LanguageModelToolChoice, OPEN_AI_PROVIDER_ID, OPEN_AI_PROVIDER_NAME, - RateLimiter, env_var, + LanguageModelCompletionEvent, LanguageModelEffortLevel, LanguageModelId, LanguageModelName, + LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, + LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, OPEN_AI_PROVIDER_ID, + OPEN_AI_PROVIDER_NAME, RateLimiter, env_var, }; use menu; use open_ai::{ @@ -351,7 +351,34 @@ impl LanguageModel for OpenAiLanguageModel { } fn supports_thinking(&self) -> bool { - self.model.reasoning_effort().is_some() + self.model.uses_responses_api() && self.model.reasoning_effort().is_some() + } + + fn supported_effort_levels(&self) -> Vec { + if !self.supports_thinking() { + return Vec::new(); + } + + let default_effort = self.model.reasoning_effort(); + self.model + .supported_reasoning_efforts() + .iter() + .map(|effort| { + let (name, value) = match effort { + open_ai::ReasoningEffort::Minimal => ("Minimal", "minimal"), + open_ai::ReasoningEffort::Low => ("Low", "low"), + open_ai::ReasoningEffort::Medium => ("Medium", "medium"), + open_ai::ReasoningEffort::High => ("High", "high"), + open_ai::ReasoningEffort::XHigh => ("Extra High", "xhigh"), + }; + + LanguageModelEffortLevel { + name: name.into(), + value: value.into(), + is_default: Some(*effort) == default_effort, + } + }) + .collect() } fn supports_split_token_display(&self) -> bool { @@ -406,7 +433,7 @@ impl LanguageModel for OpenAiLanguageModel { self.model.supports_parallel_tool_calls(), self.model.supports_prompt_cache_key(), self.max_output_tokens(), - self.model.reasoning_effort(), + None, false, ); let completions = self.stream_completion(request, cx); diff --git a/crates/open_ai/src/completion.rs b/crates/open_ai/src/completion.rs index 53f0f35054b..24c8f3b120e 100644 --- a/crates/open_ai/src/completion.rs +++ b/crates/open_ai/src/completion.rs @@ -190,8 +190,8 @@ pub fn into_open_ai_response( tool_choice, stop: _, temperature, - thinking_allowed: _, - thinking_effort: _, + thinking_allowed, + thinking_effort, speed: _, } = request; @@ -233,10 +233,18 @@ pub fn into_open_ai_response( } else { None }, - reasoning: reasoning_effort.map(|effort| crate::responses::ReasoningConfig { - effort, - summary: Some(crate::responses::ReasoningSummaryMode::Auto), - }), + reasoning: if thinking_allowed { + thinking_effort + .as_deref() + .and_then(|effort| effort.parse::().ok()) + .or(reasoning_effort) + .map(|effort| crate::responses::ReasoningConfig { + effort, + summary: Some(crate::responses::ReasoningSummaryMode::Auto), + }) + } else { + None + }, } } @@ -1018,8 +1026,8 @@ mod tests { tool_choice: Some(LanguageModelToolChoice::Any), stop: vec!["".into()], temperature: None, - thinking_allowed: false, - thinking_effort: None, + thinking_allowed: true, + thinking_effort: Some("high".into()), speed: None, }; @@ -1083,12 +1091,46 @@ mod tests { } ], "prompt_cache_key": "thread-123", - "reasoning": { "effort": "low", "summary": "auto" } + "reasoning": { "effort": "high", "summary": "auto" } }); assert_eq!(serialized, expected); } + #[test] + fn into_open_ai_response_omits_reasoning_when_thinking_is_disabled() { + let request = LanguageModelRequest { + thread_id: None, + prompt_id: None, + intent: None, + messages: vec![LanguageModelRequestMessage { + role: Role::User, + content: vec![MessageContent::Text("Hello".into())], + cache: false, + reasoning_details: None, + }], + tools: Vec::new(), + tool_choice: None, + stop: Vec::new(), + temperature: None, + thinking_allowed: false, + thinking_effort: Some("high".into()), + speed: None, + }; + + let response = into_open_ai_response( + request, + "gpt-5", + true, + true, + None, + Some(ReasoningEffort::Medium), + ); + + let serialized = serde_json::to_value(&response).unwrap(); + assert_eq!(serialized.get("reasoning"), None); + } + #[test] fn responses_stream_maps_tool_calls() { let events = vec![ diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 5d313272548..009cdd897e2 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -265,13 +265,69 @@ impl Model { Self::Custom { reasoning_effort, .. } => reasoning_effort.to_owned(), - Self::FivePointThreeCodex | Self::FivePointFourPro | Self::FivePointFivePro => { - Some(ReasoningEffort::Medium) - } + Self::O1 + | Self::O3 + | Self::O3Mini + | Self::Five + | Self::FiveCodex + | Self::FiveMini + | Self::FiveNano + | Self::FivePointOne + | Self::FivePointTwo + | Self::FivePointTwoCodex + | Self::FivePointThreeCodex + | Self::FivePointFour + | Self::FivePointFourPro + | Self::FivePointFive + | Self::FivePointFivePro => Some(ReasoningEffort::Medium), _ => None, } } + pub fn supported_reasoning_efforts(&self) -> &'static [ReasoningEffort] { + match self { + Self::Custom { + reasoning_effort: Some(effort), + .. + } => match effort { + ReasoningEffort::Minimal => &[ReasoningEffort::Minimal], + ReasoningEffort::Low => &[ReasoningEffort::Low], + ReasoningEffort::Medium => &[ReasoningEffort::Medium], + ReasoningEffort::High => &[ReasoningEffort::High], + ReasoningEffort::XHigh => &[ReasoningEffort::XHigh], + }, + Self::O1 | Self::O3 | Self::O3Mini | Self::FivePointOne => &[ + ReasoningEffort::Low, + ReasoningEffort::Medium, + ReasoningEffort::High, + ], + Self::Five | Self::FiveMini | Self::FiveNano => &[ + ReasoningEffort::Minimal, + ReasoningEffort::Low, + ReasoningEffort::Medium, + ReasoningEffort::High, + ], + Self::FiveCodex + | Self::FivePointTwoCodex + | Self::FivePointThreeCodex + | Self::FivePointFourPro => &[ + ReasoningEffort::Medium, + ReasoningEffort::High, + ReasoningEffort::XHigh, + ], + Self::FivePointTwo + | Self::FivePointFour + | Self::FivePointFive + | Self::FivePointFivePro => &[ + ReasoningEffort::Low, + ReasoningEffort::Medium, + ReasoningEffort::High, + ReasoningEffort::XHigh, + ], + _ => &[], + } + } + pub fn uses_responses_api(&self) -> bool { match self { Self::Custom {