Add OpenAI Responses API support with chat_completions capability flag (#39989)

Add support for OpenAI's /responses endpoint for models that don't support /chat/completions API. This enables compatibility with newer model variants (`gpt-5-codex`, `gpt-5-pro`, `o3-pro`, etc) while maintaining compatibility with existing configs Changes: - Add `supports_chat_completions` flag to model capabilities that defaults to true for existing behavior - Implement responses API client with streaming support as per [OpenAI documentation](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml). - Add `ResponseEventMapper` to convert responses events to completion events for maintainer simplicity - Update UI to allow toggling `chat_completions` capability - Add `gpt-5-codex` model Closes #38858 Release Notes: - Added support for `gpt-5-codex` model --------- Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
2026-06-01 03:14:56 +07:00 · 2026-01-05 09:15:54 -08:00 · 2026-01-05 09:15:54 -08:00 · 84017bca89
commit 84017bca89
parent e70d2524b3
8 changed files with 1565 additions and 37 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -8983,6 +8983,7 @@ dependencies = [
 "open_ai",
 "open_router",
 "partial-json-fixer",
+ "pretty_assertions",
 "project",
 "release_channel",
 "schemars",
--- a/crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs
+++ b/crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs
@ -102,6 +102,7 @@ struct ModelCapabilityToggles {
    pub supports_images: ToggleState,
    pub supports_parallel_tool_calls: ToggleState,
    pub supports_prompt_cache_key: ToggleState,
+    pub supports_chat_completions: ToggleState,
 }

 struct ModelInput {
@ -154,6 +155,7 @@ impl ModelInput {
            images,
            parallel_tool_calls,
            prompt_cache_key,
+            chat_completions,
        } = ModelCapabilities::default();

        Self {
@ -166,6 +168,7 @@ impl ModelInput {
                supports_images: images.into(),
                supports_parallel_tool_calls: parallel_tool_calls.into(),
                supports_prompt_cache_key: prompt_cache_key.into(),
+                supports_chat_completions: chat_completions.into(),
            },
        }
    }
@ -203,6 +206,7 @@ impl ModelInput {
                images: self.capabilities.supports_images.selected(),
                parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
                prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
+                chat_completions: self.capabilities.supports_chat_completions.selected(),
            },
        })
    }
@ -426,6 +430,20 @@ impl AddLlmProviderModal {
                                cx.notify();
                            },
                        )),
+                    )
+                    .child(
+                        Checkbox::new(
+                            ("supports-chat-completions", ix),
+                            model.capabilities.supports_chat_completions,
+                        )
+                        .label("Supports /chat/completions")
+                        .on_click(cx.listener(
+                            move |this, checked, _window, cx| {
+                                this.input.models[ix].capabilities.supports_chat_completions =
+                                    *checked;
+                                cx.notify();
+                            },
+                        )),
                    ),
            )
            .when(has_more_than_one_model, |this| {
@ -724,12 +742,17 @@ mod tests {
                model_input.capabilities.supports_prompt_cache_key,
                ToggleState::Unselected
            );
+            assert_eq!(
+                model_input.capabilities.supports_chat_completions,
+                ToggleState::Selected
+            );

            let parsed_model = model_input.parse(cx).unwrap();
            assert!(parsed_model.capabilities.tools);
            assert!(!parsed_model.capabilities.images);
            assert!(!parsed_model.capabilities.parallel_tool_calls);
            assert!(!parsed_model.capabilities.prompt_cache_key);
+            assert!(parsed_model.capabilities.chat_completions);
        });
    }

@ -749,12 +772,14 @@ mod tests {
            model_input.capabilities.supports_images = ToggleState::Unselected;
            model_input.capabilities.supports_parallel_tool_calls = ToggleState::Unselected;
            model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
+            model_input.capabilities.supports_chat_completions = ToggleState::Unselected;

            let parsed_model = model_input.parse(cx).unwrap();
            assert!(!parsed_model.capabilities.tools);
            assert!(!parsed_model.capabilities.images);
            assert!(!parsed_model.capabilities.parallel_tool_calls);
            assert!(!parsed_model.capabilities.prompt_cache_key);
+            assert!(!parsed_model.capabilities.chat_completions);
        });
    }

@ -774,6 +799,7 @@ mod tests {
            model_input.capabilities.supports_images = ToggleState::Unselected;
            model_input.capabilities.supports_parallel_tool_calls = ToggleState::Selected;
            model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
+            model_input.capabilities.supports_chat_completions = ToggleState::Selected;

            let parsed_model = model_input.parse(cx).unwrap();
            assert_eq!(parsed_model.name, "somemodel");
@ -781,6 +807,7 @@ mod tests {
            assert!(!parsed_model.capabilities.images);
            assert!(parsed_model.capabilities.parallel_tool_calls);
            assert!(!parsed_model.capabilities.prompt_cache_key);
+            assert!(parsed_model.capabilities.chat_completions);
        });
    }

--- a/crates/language_models/Cargo.toml
+++ b/crates/language_models/Cargo.toml
@ -66,4 +66,5 @@ x_ai = { workspace = true, features = ["schemars"] }
 [dev-dependencies]
 editor = { workspace = true, features = ["test-support"] }
 language_model = { workspace = true, features = ["test-support"] }
+pretty_assertions.workspace = true
 project = { workspace = true, features = ["test-support"] }
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
--- a/crates/language_models/src/provider/open_ai_compatible.rs
+++ b/crates/language_models/src/provider/open_ai_compatible.rs
@ -10,14 +10,20 @@ use language_model::{
    LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolSchemaFormat, RateLimiter,
 };
 use menu;
-use open_ai::{ResponseStreamEvent, stream_completion};
+use open_ai::{
+    ResponseStreamEvent,
+    responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
+    stream_completion,
+};
 use settings::{Settings, SettingsStore};
 use std::sync::Arc;
 use ui::{ElevationIndex, Tooltip, prelude::*};
 use ui_input::InputField;
 use util::ResultExt;

-use crate::provider::open_ai::{OpenAiEventMapper, into_open_ai};
+use crate::provider::open_ai::{
+    OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
+};
 pub use settings::OpenAiCompatibleAvailableModel as AvailableModel;
 pub use settings::OpenAiCompatibleModelCapabilities as ModelCapabilities;

@ -236,6 +242,43 @@ impl OpenAiCompatibleLanguageModel {

        async move { Ok(future.await?.boxed()) }.boxed()
    }
+
+    fn stream_response(
+        &self,
+        request: ResponseRequest,
+        cx: &AsyncApp,
+    ) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponsesStreamEvent>>>>
+    {
+        let http_client = self.http_client.clone();
+
+        let Ok((api_key, api_url)) = self.state.read_with(cx, |state, _cx| {
+            let api_url = &state.settings.api_url;
+            (
+                state.api_key_state.key(api_url),
+                state.settings.api_url.clone(),
+            )
+        }) else {
+            return future::ready(Err(anyhow!("App state dropped"))).boxed();
+        };
+
+        let provider = self.provider_name.clone();
+        let future = self.request_limiter.stream(async move {
+            let Some(api_key) = api_key else {
+                return Err(LanguageModelCompletionError::NoApiKey { provider });
+            };
+            let request = stream_response(
+                http_client.as_ref(),
+                provider.0.as_str(),
+                &api_url,
+                &api_key,
+                request,
+            );
+            let response = request.await?;
+            Ok(response)
+        });
+
+        async move { Ok(future.await?.boxed()) }.boxed()
+    }
 }

 impl LanguageModel for OpenAiCompatibleLanguageModel {
@ -327,20 +370,37 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
            LanguageModelCompletionError,
        >,
    > {
-        let request = into_open_ai(
-            request,
-            &self.model.name,
-            self.model.capabilities.parallel_tool_calls,
-            self.model.capabilities.prompt_cache_key,
-            self.max_output_tokens(),
-            None,
-        );
-        let completions = self.stream_completion(request, cx);
-        async move {
-            let mapper = OpenAiEventMapper::new();
-            Ok(mapper.map_stream(completions.await?).boxed())
+        if self.model.capabilities.chat_completions {
+            let request = into_open_ai(
+                request,
+                &self.model.name,
+                self.model.capabilities.parallel_tool_calls,
+                self.model.capabilities.prompt_cache_key,
+                self.max_output_tokens(),
+                None,
+            );
+            let completions = self.stream_completion(request, cx);
+            async move {
+                let mapper = OpenAiEventMapper::new();
+                Ok(mapper.map_stream(completions.await?).boxed())
+            }
+            .boxed()
+        } else {
+            let request = into_open_ai_response(
+                request,
+                &self.model.name,
+                self.model.capabilities.parallel_tool_calls,
+                self.model.capabilities.prompt_cache_key,
+                self.max_output_tokens(),
+                None,
+            );
+            let completions = self.stream_response(request, cx);
+            async move {
+                let mapper = OpenAiResponseEventMapper::new();
+                Ok(mapper.map_stream(completions.await?).boxed())
+            }
+            .boxed()
        }
-        .boxed()
    }
 }

--- a/crates/open_ai/src/open_ai.rs
+++ b/crates/open_ai/src/open_ai.rs
@ -81,6 +81,8 @@ pub enum Model {
    O4Mini,
    #[serde(rename = "gpt-5")]
    Five,
+    #[serde(rename = "gpt-5-codex")]
+    FiveCodex,
    #[serde(rename = "gpt-5-mini")]
    FiveMini,
    #[serde(rename = "gpt-5-nano")]
@ -98,9 +100,15 @@ pub enum Model {
        max_output_tokens: Option<u64>,
        max_completion_tokens: Option<u64>,
        reasoning_effort: Option<ReasoningEffort>,
+        #[serde(default = "default_supports_chat_completions")]
+        supports_chat_completions: bool,
    },
 }

+const fn default_supports_chat_completions() -> bool {
+    true
+}
+
 impl Model {
    pub fn default_fast() -> Self {
        // TODO: Replace with FiveMini since all other models are deprecated
@ -122,6 +130,7 @@ impl Model {
            "o3" => Ok(Self::O3),
            "o4-mini" => Ok(Self::O4Mini),
            "gpt-5" => Ok(Self::Five),
+            "gpt-5-codex" => Ok(Self::FiveCodex),
            "gpt-5-mini" => Ok(Self::FiveMini),
            "gpt-5-nano" => Ok(Self::FiveNano),
            "gpt-5.1" => Ok(Self::FivePointOne),
@ -145,6 +154,7 @@ impl Model {
            Self::O3 => "o3",
            Self::O4Mini => "o4-mini",
            Self::Five => "gpt-5",
+            Self::FiveCodex => "gpt-5-codex",
            Self::FiveMini => "gpt-5-mini",
            Self::FiveNano => "gpt-5-nano",
            Self::FivePointOne => "gpt-5.1",
@ -168,6 +178,7 @@ impl Model {
            Self::O3 => "o3",
            Self::O4Mini => "o4-mini",
            Self::Five => "gpt-5",
+            Self::FiveCodex => "gpt-5-codex",
            Self::FiveMini => "gpt-5-mini",
            Self::FiveNano => "gpt-5-nano",
            Self::FivePointOne => "gpt-5.1",
@ -193,6 +204,7 @@ impl Model {
            Self::O3 => 200_000,
            Self::O4Mini => 200_000,
            Self::Five => 272_000,
+            Self::FiveCodex => 272_000,
            Self::FiveMini => 272_000,
            Self::FiveNano => 272_000,
            Self::FivePointOne => 400_000,
@ -219,6 +231,7 @@ impl Model {
            Self::O3 => Some(100_000),
            Self::O4Mini => Some(100_000),
            Self::Five => Some(128_000),
+            Self::FiveCodex => Some(128_000),
            Self::FiveMini => Some(128_000),
            Self::FiveNano => Some(128_000),
            Self::FivePointOne => Some(128_000),
@ -235,6 +248,17 @@ impl Model {
        }
    }

+    pub fn supports_chat_completions(&self) -> bool {
+        match self {
+            Self::Custom {
+                supports_chat_completions,
+                ..
+            } => *supports_chat_completions,
+            Self::FiveCodex => false,
+            _ => true,
+        }
+    }
+
    /// Returns whether the given model supports the `parallel_tool_calls` parameter.
    ///
    /// If the model does not support the parameter, do not pass it up, or the API will return an error.
@ -249,6 +273,7 @@ impl Model {
            | Self::FourPointOneMini
            | Self::FourPointOneNano
            | Self::Five
+            | Self::FiveCodex
            | Self::FiveMini
            | Self::FivePointOne
            | Self::FivePointTwo
@ -624,3 +649,362 @@ pub fn embed<'a>(
        Ok(response)
    }
 }
+
+pub mod responses {
+    use anyhow::{Result, anyhow};
+    use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
+    use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
+    use serde::{Deserialize, Serialize};
+    use serde_json::Value;
+
+    use crate::RequestError;
+
+    #[derive(Serialize, Debug)]
+    pub struct Request {
+        pub model: String,
+        #[serde(skip_serializing_if = "Vec::is_empty")]
+        pub input: Vec<Value>,
+        #[serde(default)]
+        pub stream: bool,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub temperature: Option<f32>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub top_p: Option<f32>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub max_output_tokens: Option<u64>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub parallel_tool_calls: Option<bool>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub tool_choice: Option<super::ToolChoice>,
+        #[serde(skip_serializing_if = "Vec::is_empty")]
+        pub tools: Vec<ToolDefinition>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub prompt_cache_key: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        pub reasoning: Option<ReasoningConfig>,
+    }
+
+    #[derive(Serialize, Debug)]
+    pub struct ReasoningConfig {
+        pub effort: super::ReasoningEffort,
+    }
+
+    #[derive(Serialize, Debug)]
+    #[serde(tag = "type", rename_all = "snake_case")]
+    pub enum ToolDefinition {
+        Function {
+            name: String,
+            #[serde(skip_serializing_if = "Option::is_none")]
+            description: Option<String>,
+            #[serde(skip_serializing_if = "Option::is_none")]
+            parameters: Option<Value>,
+            #[serde(skip_serializing_if = "Option::is_none")]
+            strict: Option<bool>,
+        },
+    }
+
+    #[derive(Deserialize, Debug)]
+    pub struct Error {
+        pub message: String,
+    }
+
+    #[derive(Deserialize, Debug)]
+    #[serde(tag = "type")]
+    pub enum StreamEvent {
+        #[serde(rename = "response.created")]
+        Created { response: ResponseSummary },
+        #[serde(rename = "response.in_progress")]
+        InProgress { response: ResponseSummary },
+        #[serde(rename = "response.output_item.added")]
+        OutputItemAdded {
+            output_index: usize,
+            #[serde(default)]
+            sequence_number: Option<u64>,
+            item: ResponseOutputItem,
+        },
+        #[serde(rename = "response.output_item.done")]
+        OutputItemDone {
+            output_index: usize,
+            #[serde(default)]
+            sequence_number: Option<u64>,
+            item: ResponseOutputItem,
+        },
+        #[serde(rename = "response.content_part.added")]
+        ContentPartAdded {
+            item_id: String,
+            output_index: usize,
+            content_index: usize,
+            part: Value,
+        },
+        #[serde(rename = "response.content_part.done")]
+        ContentPartDone {
+            item_id: String,
+            output_index: usize,
+            content_index: usize,
+            part: Value,
+        },
+        #[serde(rename = "response.output_text.delta")]
+        OutputTextDelta {
+            item_id: String,
+            output_index: usize,
+            #[serde(default)]
+            content_index: Option<usize>,
+            delta: String,
+        },
+        #[serde(rename = "response.output_text.done")]
+        OutputTextDone {
+            item_id: String,
+            output_index: usize,
+            #[serde(default)]
+            content_index: Option<usize>,
+            text: String,
+        },
+        #[serde(rename = "response.function_call_arguments.delta")]
+        FunctionCallArgumentsDelta {
+            item_id: String,
+            output_index: usize,
+            delta: String,
+            #[serde(default)]
+            sequence_number: Option<u64>,
+        },
+        #[serde(rename = "response.function_call_arguments.done")]
+        FunctionCallArgumentsDone {
+            item_id: String,
+            output_index: usize,
+            arguments: String,
+            #[serde(default)]
+            sequence_number: Option<u64>,
+        },
+        #[serde(rename = "response.completed")]
+        Completed { response: ResponseSummary },
+        #[serde(rename = "response.incomplete")]
+        Incomplete { response: ResponseSummary },
+        #[serde(rename = "response.failed")]
+        Failed { response: ResponseSummary },
+        #[serde(rename = "response.error")]
+        Error { error: Error },
+        #[serde(rename = "error")]
+        GenericError { error: Error },
+        #[serde(other)]
+        Unknown,
+    }
+
+    #[derive(Deserialize, Debug, Default, Clone)]
+    pub struct ResponseSummary {
+        #[serde(default)]
+        pub id: Option<String>,
+        #[serde(default)]
+        pub status: Option<String>,
+        #[serde(default)]
+        pub status_details: Option<ResponseStatusDetails>,
+        #[serde(default)]
+        pub usage: Option<ResponseUsage>,
+        #[serde(default)]
+        pub output: Vec<ResponseOutputItem>,
+    }
+
+    #[derive(Deserialize, Debug, Default, Clone)]
+    pub struct ResponseStatusDetails {
+        #[serde(default)]
+        pub reason: Option<String>,
+        #[serde(default)]
+        pub r#type: Option<String>,
+        #[serde(default)]
+        pub error: Option<Value>,
+    }
+
+    #[derive(Deserialize, Debug, Default, Clone)]
+    pub struct ResponseUsage {
+        #[serde(default)]
+        pub input_tokens: Option<u64>,
+        #[serde(default)]
+        pub output_tokens: Option<u64>,
+        #[serde(default)]
+        pub total_tokens: Option<u64>,
+    }
+
+    #[derive(Deserialize, Debug, Clone)]
+    #[serde(tag = "type", rename_all = "snake_case")]
+    pub enum ResponseOutputItem {
+        Message(ResponseOutputMessage),
+        FunctionCall(ResponseFunctionToolCall),
+        #[serde(other)]
+        Unknown,
+    }
+
+    #[derive(Deserialize, Debug, Clone)]
+    pub struct ResponseOutputMessage {
+        #[serde(default)]
+        pub id: Option<String>,
+        #[serde(default)]
+        pub content: Vec<Value>,
+        #[serde(default)]
+        pub role: Option<String>,
+        #[serde(default)]
+        pub status: Option<String>,
+    }
+
+    #[derive(Deserialize, Debug, Clone)]
+    pub struct ResponseFunctionToolCall {
+        #[serde(default)]
+        pub id: Option<String>,
+        #[serde(default)]
+        pub arguments: String,
+        #[serde(default)]
+        pub call_id: Option<String>,
+        #[serde(default)]
+        pub name: Option<String>,
+        #[serde(default)]
+        pub status: Option<String>,
+    }
+
+    pub async fn stream_response(
+        client: &dyn HttpClient,
+        provider_name: &str,
+        api_url: &str,
+        api_key: &str,
+        request: Request,
+    ) -> Result<BoxStream<'static, Result<StreamEvent>>, RequestError> {
+        let uri = format!("{api_url}/responses");
+        let request_builder = HttpRequest::builder()
+            .method(Method::POST)
+            .uri(uri)
+            .header("Content-Type", "application/json")
+            .header("Authorization", format!("Bearer {}", api_key.trim()));
+
+        let is_streaming = request.stream;
+        let request = request_builder
+            .body(AsyncBody::from(
+                serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
+            ))
+            .map_err(|e| RequestError::Other(e.into()))?;
+
+        let mut response = client.send(request).await?;
+        if response.status().is_success() {
+            if is_streaming {
+                let reader = BufReader::new(response.into_body());
+                Ok(reader
+                    .lines()
+                    .filter_map(|line| async move {
+                        match line {
+                            Ok(line) => {
+                                let line = line
+                                    .strip_prefix("data: ")
+                                    .or_else(|| line.strip_prefix("data:"))?;
+                                if line == "[DONE]" || line.is_empty() {
+                                    None
+                                } else {
+                                    match serde_json::from_str::<StreamEvent>(line) {
+                                        Ok(event) => Some(Ok(event)),
+                                        Err(error) => {
+                                            log::error!(
+                                                "Failed to parse OpenAI responses stream event: `{}`\nResponse: `{}`",
+                                                error,
+                                                line,
+                                            );
+                                            Some(Err(anyhow!(error)))
+                                        }
+                                    }
+                                }
+                            }
+                            Err(error) => Some(Err(anyhow!(error))),
+                        }
+                    })
+                    .boxed())
+            } else {
+                let mut body = String::new();
+                response
+                    .body_mut()
+                    .read_to_string(&mut body)
+                    .await
+                    .map_err(|e| RequestError::Other(e.into()))?;
+
+                match serde_json::from_str::<ResponseSummary>(&body) {
+                    Ok(response_summary) => {
+                        let events = vec![
+                            StreamEvent::Created {
+                                response: response_summary.clone(),
+                            },
+                            StreamEvent::InProgress {
+                                response: response_summary.clone(),
+                            },
+                        ];
+
+                        let mut all_events = events;
+                        for (output_index, item) in response_summary.output.iter().enumerate() {
+                            all_events.push(StreamEvent::OutputItemAdded {
+                                output_index,
+                                sequence_number: None,
+                                item: item.clone(),
+                            });
+
+                            match item {
+                                ResponseOutputItem::Message(message) => {
+                                    for content_item in &message.content {
+                                        if let Some(text) = content_item.get("text") {
+                                            if let Some(text_str) = text.as_str() {
+                                                if let Some(ref item_id) = message.id {
+                                                    all_events.push(StreamEvent::OutputTextDelta {
+                                                        item_id: item_id.clone(),
+                                                        output_index,
+                                                        content_index: None,
+                                                        delta: text_str.to_string(),
+                                                    });
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                ResponseOutputItem::FunctionCall(function_call) => {
+                                    if let Some(ref item_id) = function_call.id {
+                                        all_events.push(StreamEvent::FunctionCallArgumentsDone {
+                                            item_id: item_id.clone(),
+                                            output_index,
+                                            arguments: function_call.arguments.clone(),
+                                            sequence_number: None,
+                                        });
+                                    }
+                                }
+                                ResponseOutputItem::Unknown => {}
+                            }
+
+                            all_events.push(StreamEvent::OutputItemDone {
+                                output_index,
+                                sequence_number: None,
+                                item: item.clone(),
+                            });
+                        }
+
+                        all_events.push(StreamEvent::Completed {
+                            response: response_summary,
+                        });
+
+                        Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())
+                    }
+                    Err(error) => {
+                        log::error!(
+                            "Failed to parse OpenAI non-streaming response: `{}`\nResponse: `{}`",
+                            error,
+                            body,
+                        );
+                        Err(RequestError::Other(anyhow!(error)))
+                    }
+                }
+            }
+        } else {
+            let mut body = String::new();
+            response
+                .body_mut()
+                .read_to_string(&mut body)
+                .await
+                .map_err(|e| RequestError::Other(e.into()))?;
+
+            Err(RequestError::HttpResponseError {
+                provider: provider_name.to_owned(),
+                status_code: response.status(),
+                body,
+                headers: response.headers().clone(),
+            })
+        }
+    }
+}
--- a/crates/settings/src/settings_content/language_model.rs
+++ b/crates/settings/src/settings_content/language_model.rs
@ -208,6 +208,8 @@ pub struct OpenAiAvailableModel {
    pub max_output_tokens: Option<u64>,
    pub max_completion_tokens: Option<u64>,
    pub reasoning_effort: Option<OpenAiReasoningEffort>,
+    #[serde(default)]
+    pub capabilities: OpenAiModelCapabilities,
 }

 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
@ -226,6 +228,21 @@ pub struct OpenAiCompatibleSettingsContent {
    pub available_models: Vec<OpenAiCompatibleAvailableModel>,
 }

+#[with_fallible_options]
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
+pub struct OpenAiModelCapabilities {
+    #[serde(default = "default_true")]
+    pub chat_completions: bool,
+}
+
+impl Default for OpenAiModelCapabilities {
+    fn default() -> Self {
+        Self {
+            chat_completions: default_true(),
+        }
+    }
+}
+
 #[with_fallible_options]
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
 pub struct OpenAiCompatibleAvailableModel {
@ -245,6 +262,8 @@ pub struct OpenAiCompatibleModelCapabilities {
    pub images: bool,
    pub parallel_tool_calls: bool,
    pub prompt_cache_key: bool,
+    #[serde(default = "default_true")]
+    pub chat_completions: bool,
 }

 impl Default for OpenAiCompatibleModelCapabilities {
@ -254,6 +273,7 @@ impl Default for OpenAiCompatibleModelCapabilities {
            images: false,
            parallel_tool_calls: false,
            prompt_cache_key: false,
+            chat_completions: default_true(),
        }
    }
 }
--- a/docs/src/ai/llm-providers.md
+++ b/docs/src/ai/llm-providers.md
@ -469,6 +469,14 @@ To use alternate models, perhaps a preview release, or if you wish to control th
          "name": "gpt-4o-2024-08-06",
          "display_name": "GPT 4o Summer 2024",
          "max_tokens": 128000
+        },
+        {
+          "name": "gpt-5-codex",
+          "display_name": "GPT-5 Codex",
+          "max_tokens": 128000,
+          "capabilities": {
+            "chat_completions": false
+          }
        }
      ]
    }
@ -478,7 +486,10 @@ To use alternate models, perhaps a preview release, or if you wish to control th

 You must provide the model's context window in the `max_tokens` parameter; this can be found in the [OpenAI model documentation](https://platform.openai.com/docs/models).

-OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
+OpenAI `o1` and `o`-class models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
+
+If a model does not support the `/chat/completions` endpoint (for example `gpt-5-codex`), disable it by setting `capabilities.chat_completions` to `false`. Zed will use the Responses endpoint instead.
+
 Custom models will be listed in the model dropdown in the Agent Panel.

 ### OpenAI API Compatible {#openai-api-compatible}
@ -525,6 +536,9 @@ By default, OpenAI-compatible models inherit the following capabilities:
 - `images`: false (does not support image inputs)
 - `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
 - `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
+- `chat_completions`: true (calls the `/chat/completions` endpoint)
+
+If a provider exposes models that only work with the Responses API, set `chat_completions` to `false` for those entries. Zed uses the Responses endpoint for these models.

 Note that LLM API keys aren't stored in your settings file.
 So, ensure you have it set in your environment variables (`<PROVIDER_NAME>_API_KEY=<your api key>`) so your settings can pick it up. In the example above, it would be `TOGETHER_AI_API_KEY=<your api key>`.