mirror of
https://github.com/zed-industries/zed.git
synced 2026-05-31 19:05:00 +07:00
language_models: Support fast mode on ChatGPT subscription provider (#57436)
Same mechanism as for BYOK: `service_tier == priority`. Most of the work is already done. When validating this in manual testing, I noticed we get back `service_tier == auto` in the response, unlike in the regular OpenAI API scenario with BYOK, but apparently [it doesn't mean priority tier wasn't applied](https://github.com/openai/codex/issues/14204#issuecomment-4033184620). It's not a hard confirmation, but the model does seem to respond faster when I toggle fast mode on. Release Notes: - Added Fast Mode (priority service tier) support to OpenAI models used through the ChatGPT subscription provider.
This commit is contained in:
parent
1965d69819
commit
a1d019bdd8
2 changed files with 18 additions and 1 deletions
|
|
@ -345,6 +345,13 @@ impl ChatGptModel {
|
|||
fn supports_prompt_cache_key(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_priority(&self) -> bool {
|
||||
match self {
|
||||
Self::Gpt55 | Self::Gpt54 => true,
|
||||
Self::Gpt54Mini | Self::Gpt53Codex | Self::Gpt52 => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct OpenAiSubscribedLanguageModel {
|
||||
|
|
@ -392,6 +399,10 @@ impl LanguageModel for OpenAiSubscribedLanguageModel {
|
|||
true
|
||||
}
|
||||
|
||||
fn supports_fast_mode(&self) -> bool {
|
||||
self.model.supports_priority()
|
||||
}
|
||||
|
||||
fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
|
||||
let default_effort = self.model.default_reasoning_effort();
|
||||
self.model
|
||||
|
|
@ -431,7 +442,7 @@ impl LanguageModel for OpenAiSubscribedLanguageModel {
|
|||
|
||||
fn stream_completion(
|
||||
&self,
|
||||
request: LanguageModelRequest,
|
||||
mut request: LanguageModelRequest,
|
||||
cx: &AsyncApp,
|
||||
) -> BoxFuture<
|
||||
'static,
|
||||
|
|
@ -443,6 +454,10 @@ impl LanguageModel for OpenAiSubscribedLanguageModel {
|
|||
LanguageModelCompletionError,
|
||||
>,
|
||||
> {
|
||||
if !self.model.supports_priority() {
|
||||
request.speed = None;
|
||||
}
|
||||
|
||||
// The Codex backend rejects `max_output_tokens` (`Unsupported parameter`),
|
||||
// unlike the public OpenAI Responses API. Pass `None` so the field is
|
||||
// omitted from the serialized request body entirely.
|
||||
|
|
|
|||
|
|
@ -335,6 +335,8 @@ pub struct ResponseSummary {
|
|||
pub usage: Option<ResponseUsage>,
|
||||
#[serde(default)]
|
||||
pub output: Vec<ResponseOutputItem>,
|
||||
#[serde(default)]
|
||||
pub service_tier: Option<crate::ServiceTier>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Default, Clone)]
|
||||
|
|
|
|||
Loading…
Reference in a new issue