Add OpenAI Responses API support with chat_completions capability flag (#39989)

Add support for OpenAI's /responses endpoint for models that don't
support /chat/completions API. This enables compatibility with newer
model variants (`gpt-5-codex`, `gpt-5-pro`, `o3-pro`, etc) while
maintaining compatibility with existing configs

Changes:
- Add `supports_chat_completions` flag to model capabilities that
defaults to true for existing behavior
- Implement responses API client with streaming support as per [OpenAI
documentation](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml).
- Add `ResponseEventMapper` to convert responses events to completion
events for maintainer simplicity
- Update UI to allow toggling `chat_completions` capability
- Add `gpt-5-codex` model

Closes #38858

Release Notes:
- Added support for `gpt-5-codex` model

---------

Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
This commit is contained in:
Matt Stallone 2026-01-05 09:15:54 -08:00 committed by GitHub
parent e70d2524b3
commit 84017bca89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 1565 additions and 37 deletions

1
Cargo.lock generated
View file

@ -8983,6 +8983,7 @@ dependencies = [
"open_ai",
"open_router",
"partial-json-fixer",
"pretty_assertions",
"project",
"release_channel",
"schemars",

View file

@ -102,6 +102,7 @@ struct ModelCapabilityToggles {
pub supports_images: ToggleState,
pub supports_parallel_tool_calls: ToggleState,
pub supports_prompt_cache_key: ToggleState,
pub supports_chat_completions: ToggleState,
}
struct ModelInput {
@ -154,6 +155,7 @@ impl ModelInput {
images,
parallel_tool_calls,
prompt_cache_key,
chat_completions,
} = ModelCapabilities::default();
Self {
@ -166,6 +168,7 @@ impl ModelInput {
supports_images: images.into(),
supports_parallel_tool_calls: parallel_tool_calls.into(),
supports_prompt_cache_key: prompt_cache_key.into(),
supports_chat_completions: chat_completions.into(),
},
}
}
@ -203,6 +206,7 @@ impl ModelInput {
images: self.capabilities.supports_images.selected(),
parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
chat_completions: self.capabilities.supports_chat_completions.selected(),
},
})
}
@ -426,6 +430,20 @@ impl AddLlmProviderModal {
cx.notify();
},
)),
)
.child(
Checkbox::new(
("supports-chat-completions", ix),
model.capabilities.supports_chat_completions,
)
.label("Supports /chat/completions")
.on_click(cx.listener(
move |this, checked, _window, cx| {
this.input.models[ix].capabilities.supports_chat_completions =
*checked;
cx.notify();
},
)),
),
)
.when(has_more_than_one_model, |this| {
@ -724,12 +742,17 @@ mod tests {
model_input.capabilities.supports_prompt_cache_key,
ToggleState::Unselected
);
assert_eq!(
model_input.capabilities.supports_chat_completions,
ToggleState::Selected
);
let parsed_model = model_input.parse(cx).unwrap();
assert!(parsed_model.capabilities.tools);
assert!(!parsed_model.capabilities.images);
assert!(!parsed_model.capabilities.parallel_tool_calls);
assert!(!parsed_model.capabilities.prompt_cache_key);
assert!(parsed_model.capabilities.chat_completions);
});
}
@ -749,12 +772,14 @@ mod tests {
model_input.capabilities.supports_images = ToggleState::Unselected;
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Unselected;
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
model_input.capabilities.supports_chat_completions = ToggleState::Unselected;
let parsed_model = model_input.parse(cx).unwrap();
assert!(!parsed_model.capabilities.tools);
assert!(!parsed_model.capabilities.images);
assert!(!parsed_model.capabilities.parallel_tool_calls);
assert!(!parsed_model.capabilities.prompt_cache_key);
assert!(!parsed_model.capabilities.chat_completions);
});
}
@ -774,6 +799,7 @@ mod tests {
model_input.capabilities.supports_images = ToggleState::Unselected;
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Selected;
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
model_input.capabilities.supports_chat_completions = ToggleState::Selected;
let parsed_model = model_input.parse(cx).unwrap();
assert_eq!(parsed_model.name, "somemodel");
@ -781,6 +807,7 @@ mod tests {
assert!(!parsed_model.capabilities.images);
assert!(parsed_model.capabilities.parallel_tool_calls);
assert!(!parsed_model.capabilities.prompt_cache_key);
assert!(parsed_model.capabilities.chat_completions);
});
}

View file

@ -66,4 +66,5 @@ x_ai = { workspace = true, features = ["schemars"] }
[dev-dependencies]
editor = { workspace = true, features = ["test-support"] }
language_model = { workspace = true, features = ["test-support"] }
pretty_assertions.workspace = true
project = { workspace = true, features = ["test-support"] }

File diff suppressed because it is too large Load diff

View file

@ -10,14 +10,20 @@ use language_model::{
LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolSchemaFormat, RateLimiter,
};
use menu;
use open_ai::{ResponseStreamEvent, stream_completion};
use open_ai::{
ResponseStreamEvent,
responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
stream_completion,
};
use settings::{Settings, SettingsStore};
use std::sync::Arc;
use ui::{ElevationIndex, Tooltip, prelude::*};
use ui_input::InputField;
use util::ResultExt;
use crate::provider::open_ai::{OpenAiEventMapper, into_open_ai};
use crate::provider::open_ai::{
OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
};
pub use settings::OpenAiCompatibleAvailableModel as AvailableModel;
pub use settings::OpenAiCompatibleModelCapabilities as ModelCapabilities;
@ -236,6 +242,43 @@ impl OpenAiCompatibleLanguageModel {
async move { Ok(future.await?.boxed()) }.boxed()
}
fn stream_response(
&self,
request: ResponseRequest,
cx: &AsyncApp,
) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponsesStreamEvent>>>>
{
let http_client = self.http_client.clone();
let Ok((api_key, api_url)) = self.state.read_with(cx, |state, _cx| {
let api_url = &state.settings.api_url;
(
state.api_key_state.key(api_url),
state.settings.api_url.clone(),
)
}) else {
return future::ready(Err(anyhow!("App state dropped"))).boxed();
};
let provider = self.provider_name.clone();
let future = self.request_limiter.stream(async move {
let Some(api_key) = api_key else {
return Err(LanguageModelCompletionError::NoApiKey { provider });
};
let request = stream_response(
http_client.as_ref(),
provider.0.as_str(),
&api_url,
&api_key,
request,
);
let response = request.await?;
Ok(response)
});
async move { Ok(future.await?.boxed()) }.boxed()
}
}
impl LanguageModel for OpenAiCompatibleLanguageModel {
@ -327,20 +370,37 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
LanguageModelCompletionError,
>,
> {
let request = into_open_ai(
request,
&self.model.name,
self.model.capabilities.parallel_tool_calls,
self.model.capabilities.prompt_cache_key,
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {
let mapper = OpenAiEventMapper::new();
Ok(mapper.map_stream(completions.await?).boxed())
if self.model.capabilities.chat_completions {
let request = into_open_ai(
request,
&self.model.name,
self.model.capabilities.parallel_tool_calls,
self.model.capabilities.prompt_cache_key,
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {
let mapper = OpenAiEventMapper::new();
Ok(mapper.map_stream(completions.await?).boxed())
}
.boxed()
} else {
let request = into_open_ai_response(
request,
&self.model.name,
self.model.capabilities.parallel_tool_calls,
self.model.capabilities.prompt_cache_key,
self.max_output_tokens(),
None,
);
let completions = self.stream_response(request, cx);
async move {
let mapper = OpenAiResponseEventMapper::new();
Ok(mapper.map_stream(completions.await?).boxed())
}
.boxed()
}
.boxed()
}
}

View file

@ -81,6 +81,8 @@ pub enum Model {
O4Mini,
#[serde(rename = "gpt-5")]
Five,
#[serde(rename = "gpt-5-codex")]
FiveCodex,
#[serde(rename = "gpt-5-mini")]
FiveMini,
#[serde(rename = "gpt-5-nano")]
@ -98,9 +100,15 @@ pub enum Model {
max_output_tokens: Option<u64>,
max_completion_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
#[serde(default = "default_supports_chat_completions")]
supports_chat_completions: bool,
},
}
const fn default_supports_chat_completions() -> bool {
true
}
impl Model {
pub fn default_fast() -> Self {
// TODO: Replace with FiveMini since all other models are deprecated
@ -122,6 +130,7 @@ impl Model {
"o3" => Ok(Self::O3),
"o4-mini" => Ok(Self::O4Mini),
"gpt-5" => Ok(Self::Five),
"gpt-5-codex" => Ok(Self::FiveCodex),
"gpt-5-mini" => Ok(Self::FiveMini),
"gpt-5-nano" => Ok(Self::FiveNano),
"gpt-5.1" => Ok(Self::FivePointOne),
@ -145,6 +154,7 @@ impl Model {
Self::O3 => "o3",
Self::O4Mini => "o4-mini",
Self::Five => "gpt-5",
Self::FiveCodex => "gpt-5-codex",
Self::FiveMini => "gpt-5-mini",
Self::FiveNano => "gpt-5-nano",
Self::FivePointOne => "gpt-5.1",
@ -168,6 +178,7 @@ impl Model {
Self::O3 => "o3",
Self::O4Mini => "o4-mini",
Self::Five => "gpt-5",
Self::FiveCodex => "gpt-5-codex",
Self::FiveMini => "gpt-5-mini",
Self::FiveNano => "gpt-5-nano",
Self::FivePointOne => "gpt-5.1",
@ -193,6 +204,7 @@ impl Model {
Self::O3 => 200_000,
Self::O4Mini => 200_000,
Self::Five => 272_000,
Self::FiveCodex => 272_000,
Self::FiveMini => 272_000,
Self::FiveNano => 272_000,
Self::FivePointOne => 400_000,
@ -219,6 +231,7 @@ impl Model {
Self::O3 => Some(100_000),
Self::O4Mini => Some(100_000),
Self::Five => Some(128_000),
Self::FiveCodex => Some(128_000),
Self::FiveMini => Some(128_000),
Self::FiveNano => Some(128_000),
Self::FivePointOne => Some(128_000),
@ -235,6 +248,17 @@ impl Model {
}
}
pub fn supports_chat_completions(&self) -> bool {
match self {
Self::Custom {
supports_chat_completions,
..
} => *supports_chat_completions,
Self::FiveCodex => false,
_ => true,
}
}
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
///
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
@ -249,6 +273,7 @@ impl Model {
| Self::FourPointOneMini
| Self::FourPointOneNano
| Self::Five
| Self::FiveCodex
| Self::FiveMini
| Self::FivePointOne
| Self::FivePointTwo
@ -624,3 +649,362 @@ pub fn embed<'a>(
Ok(response)
}
}
pub mod responses {
use anyhow::{Result, anyhow};
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::RequestError;
#[derive(Serialize, Debug)]
pub struct Request {
pub model: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub input: Vec<Value>,
#[serde(default)]
pub stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub parallel_tool_calls: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<super::ToolChoice>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<ToolDefinition>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning: Option<ReasoningConfig>,
}
#[derive(Serialize, Debug)]
pub struct ReasoningConfig {
pub effort: super::ReasoningEffort,
}
#[derive(Serialize, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ToolDefinition {
Function {
name: String,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
parameters: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
strict: Option<bool>,
},
}
#[derive(Deserialize, Debug)]
pub struct Error {
pub message: String,
}
#[derive(Deserialize, Debug)]
#[serde(tag = "type")]
pub enum StreamEvent {
#[serde(rename = "response.created")]
Created { response: ResponseSummary },
#[serde(rename = "response.in_progress")]
InProgress { response: ResponseSummary },
#[serde(rename = "response.output_item.added")]
OutputItemAdded {
output_index: usize,
#[serde(default)]
sequence_number: Option<u64>,
item: ResponseOutputItem,
},
#[serde(rename = "response.output_item.done")]
OutputItemDone {
output_index: usize,
#[serde(default)]
sequence_number: Option<u64>,
item: ResponseOutputItem,
},
#[serde(rename = "response.content_part.added")]
ContentPartAdded {
item_id: String,
output_index: usize,
content_index: usize,
part: Value,
},
#[serde(rename = "response.content_part.done")]
ContentPartDone {
item_id: String,
output_index: usize,
content_index: usize,
part: Value,
},
#[serde(rename = "response.output_text.delta")]
OutputTextDelta {
item_id: String,
output_index: usize,
#[serde(default)]
content_index: Option<usize>,
delta: String,
},
#[serde(rename = "response.output_text.done")]
OutputTextDone {
item_id: String,
output_index: usize,
#[serde(default)]
content_index: Option<usize>,
text: String,
},
#[serde(rename = "response.function_call_arguments.delta")]
FunctionCallArgumentsDelta {
item_id: String,
output_index: usize,
delta: String,
#[serde(default)]
sequence_number: Option<u64>,
},
#[serde(rename = "response.function_call_arguments.done")]
FunctionCallArgumentsDone {
item_id: String,
output_index: usize,
arguments: String,
#[serde(default)]
sequence_number: Option<u64>,
},
#[serde(rename = "response.completed")]
Completed { response: ResponseSummary },
#[serde(rename = "response.incomplete")]
Incomplete { response: ResponseSummary },
#[serde(rename = "response.failed")]
Failed { response: ResponseSummary },
#[serde(rename = "response.error")]
Error { error: Error },
#[serde(rename = "error")]
GenericError { error: Error },
#[serde(other)]
Unknown,
}
#[derive(Deserialize, Debug, Default, Clone)]
pub struct ResponseSummary {
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub status_details: Option<ResponseStatusDetails>,
#[serde(default)]
pub usage: Option<ResponseUsage>,
#[serde(default)]
pub output: Vec<ResponseOutputItem>,
}
#[derive(Deserialize, Debug, Default, Clone)]
pub struct ResponseStatusDetails {
#[serde(default)]
pub reason: Option<String>,
#[serde(default)]
pub r#type: Option<String>,
#[serde(default)]
pub error: Option<Value>,
}
#[derive(Deserialize, Debug, Default, Clone)]
pub struct ResponseUsage {
#[serde(default)]
pub input_tokens: Option<u64>,
#[serde(default)]
pub output_tokens: Option<u64>,
#[serde(default)]
pub total_tokens: Option<u64>,
}
#[derive(Deserialize, Debug, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseOutputItem {
Message(ResponseOutputMessage),
FunctionCall(ResponseFunctionToolCall),
#[serde(other)]
Unknown,
}
#[derive(Deserialize, Debug, Clone)]
pub struct ResponseOutputMessage {
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub content: Vec<Value>,
#[serde(default)]
pub role: Option<String>,
#[serde(default)]
pub status: Option<String>,
}
#[derive(Deserialize, Debug, Clone)]
pub struct ResponseFunctionToolCall {
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub arguments: String,
#[serde(default)]
pub call_id: Option<String>,
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub status: Option<String>,
}
pub async fn stream_response(
client: &dyn HttpClient,
provider_name: &str,
api_url: &str,
api_key: &str,
request: Request,
) -> Result<BoxStream<'static, Result<StreamEvent>>, RequestError> {
let uri = format!("{api_url}/responses");
let request_builder = HttpRequest::builder()
.method(Method::POST)
.uri(uri)
.header("Content-Type", "application/json")
.header("Authorization", format!("Bearer {}", api_key.trim()));
let is_streaming = request.stream;
let request = request_builder
.body(AsyncBody::from(
serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
))
.map_err(|e| RequestError::Other(e.into()))?;
let mut response = client.send(request).await?;
if response.status().is_success() {
if is_streaming {
let reader = BufReader::new(response.into_body());
Ok(reader
.lines()
.filter_map(|line| async move {
match line {
Ok(line) => {
let line = line
.strip_prefix("data: ")
.or_else(|| line.strip_prefix("data:"))?;
if line == "[DONE]" || line.is_empty() {
None
} else {
match serde_json::from_str::<StreamEvent>(line) {
Ok(event) => Some(Ok(event)),
Err(error) => {
log::error!(
"Failed to parse OpenAI responses stream event: `{}`\nResponse: `{}`",
error,
line,
);
Some(Err(anyhow!(error)))
}
}
}
}
Err(error) => Some(Err(anyhow!(error))),
}
})
.boxed())
} else {
let mut body = String::new();
response
.body_mut()
.read_to_string(&mut body)
.await
.map_err(|e| RequestError::Other(e.into()))?;
match serde_json::from_str::<ResponseSummary>(&body) {
Ok(response_summary) => {
let events = vec![
StreamEvent::Created {
response: response_summary.clone(),
},
StreamEvent::InProgress {
response: response_summary.clone(),
},
];
let mut all_events = events;
for (output_index, item) in response_summary.output.iter().enumerate() {
all_events.push(StreamEvent::OutputItemAdded {
output_index,
sequence_number: None,
item: item.clone(),
});
match item {
ResponseOutputItem::Message(message) => {
for content_item in &message.content {
if let Some(text) = content_item.get("text") {
if let Some(text_str) = text.as_str() {
if let Some(ref item_id) = message.id {
all_events.push(StreamEvent::OutputTextDelta {
item_id: item_id.clone(),
output_index,
content_index: None,
delta: text_str.to_string(),
});
}
}
}
}
}
ResponseOutputItem::FunctionCall(function_call) => {
if let Some(ref item_id) = function_call.id {
all_events.push(StreamEvent::FunctionCallArgumentsDone {
item_id: item_id.clone(),
output_index,
arguments: function_call.arguments.clone(),
sequence_number: None,
});
}
}
ResponseOutputItem::Unknown => {}
}
all_events.push(StreamEvent::OutputItemDone {
output_index,
sequence_number: None,
item: item.clone(),
});
}
all_events.push(StreamEvent::Completed {
response: response_summary,
});
Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())
}
Err(error) => {
log::error!(
"Failed to parse OpenAI non-streaming response: `{}`\nResponse: `{}`",
error,
body,
);
Err(RequestError::Other(anyhow!(error)))
}
}
}
} else {
let mut body = String::new();
response
.body_mut()
.read_to_string(&mut body)
.await
.map_err(|e| RequestError::Other(e.into()))?;
Err(RequestError::HttpResponseError {
provider: provider_name.to_owned(),
status_code: response.status(),
body,
headers: response.headers().clone(),
})
}
}
}

View file

@ -208,6 +208,8 @@ pub struct OpenAiAvailableModel {
pub max_output_tokens: Option<u64>,
pub max_completion_tokens: Option<u64>,
pub reasoning_effort: Option<OpenAiReasoningEffort>,
#[serde(default)]
pub capabilities: OpenAiModelCapabilities,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
@ -226,6 +228,21 @@ pub struct OpenAiCompatibleSettingsContent {
pub available_models: Vec<OpenAiCompatibleAvailableModel>,
}
#[with_fallible_options]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
pub struct OpenAiModelCapabilities {
#[serde(default = "default_true")]
pub chat_completions: bool,
}
impl Default for OpenAiModelCapabilities {
fn default() -> Self {
Self {
chat_completions: default_true(),
}
}
}
#[with_fallible_options]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
pub struct OpenAiCompatibleAvailableModel {
@ -245,6 +262,8 @@ pub struct OpenAiCompatibleModelCapabilities {
pub images: bool,
pub parallel_tool_calls: bool,
pub prompt_cache_key: bool,
#[serde(default = "default_true")]
pub chat_completions: bool,
}
impl Default for OpenAiCompatibleModelCapabilities {
@ -254,6 +273,7 @@ impl Default for OpenAiCompatibleModelCapabilities {
images: false,
parallel_tool_calls: false,
prompt_cache_key: false,
chat_completions: default_true(),
}
}
}

View file

@ -469,6 +469,14 @@ To use alternate models, perhaps a preview release, or if you wish to control th
"name": "gpt-4o-2024-08-06",
"display_name": "GPT 4o Summer 2024",
"max_tokens": 128000
},
{
"name": "gpt-5-codex",
"display_name": "GPT-5 Codex",
"max_tokens": 128000,
"capabilities": {
"chat_completions": false
}
}
]
}
@ -478,7 +486,10 @@ To use alternate models, perhaps a preview release, or if you wish to control th
You must provide the model's context window in the `max_tokens` parameter; this can be found in the [OpenAI model documentation](https://platform.openai.com/docs/models).
OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
OpenAI `o1` and `o`-class models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
If a model does not support the `/chat/completions` endpoint (for example `gpt-5-codex`), disable it by setting `capabilities.chat_completions` to `false`. Zed will use the Responses endpoint instead.
Custom models will be listed in the model dropdown in the Agent Panel.
### OpenAI API Compatible {#openai-api-compatible}
@ -525,6 +536,9 @@ By default, OpenAI-compatible models inherit the following capabilities:
- `images`: false (does not support image inputs)
- `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
- `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
- `chat_completions`: true (calls the `/chat/completions` endpoint)
If a provider exposes models that only work with the Responses API, set `chat_completions` to `false` for those entries. Zed uses the Responses endpoint for these models.
Note that LLM API keys aren't stored in your settings file.
So, ensure you have it set in your environment variables (`<PROVIDER_NAME>_API_KEY=<your api key>`) so your settings can pick it up. In the example above, it would be `TOGETHER_AI_API_KEY=<your api key>`.