mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Add OpenAI native context compaction
This commit is contained in:
parent
568bf53639
commit
ce686f1069
12 changed files with 567 additions and 71 deletions
|
|
@ -37,12 +37,12 @@ use gpui::{
|
|||
};
|
||||
use heck::ToSnakeCase as _;
|
||||
use language_model::{
|
||||
CompletionIntent, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
|
||||
LanguageModelId, LanguageModelImage, LanguageModelProviderId, LanguageModelRegistry,
|
||||
LanguageModelRequest, LanguageModelRequestMessage, LanguageModelRequestTool,
|
||||
LanguageModelToolResult, LanguageModelToolResultContent, LanguageModelToolSchemaFormat,
|
||||
LanguageModelToolUse, LanguageModelToolUseId, Role, SelectedModel, Speed, StopReason,
|
||||
TokenUsage, ZED_CLOUD_PROVIDER_ID,
|
||||
CompactionStrategyKind, CompletionIntent, LanguageModel, LanguageModelCompletionError,
|
||||
LanguageModelCompletionEvent, LanguageModelId, LanguageModelImage, LanguageModelProviderId,
|
||||
LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
|
||||
LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent,
|
||||
LanguageModelToolSchemaFormat, LanguageModelToolUse, LanguageModelToolUseId, Role,
|
||||
SelectedModel, Speed, StopReason, TokenUsage, ZED_CLOUD_PROVIDER_ID,
|
||||
};
|
||||
use project::Project;
|
||||
use prompt_store::ProjectContext;
|
||||
|
|
@ -132,14 +132,23 @@ enum AutoCompactPhase {
|
|||
MidTurn,
|
||||
}
|
||||
|
||||
struct GenericCompactionInput {
|
||||
struct CompactionInput {
|
||||
id: ContextCompactionId,
|
||||
model: Arc<dyn LanguageModel>,
|
||||
request: LanguageModelRequest,
|
||||
retained_user_messages: Vec<RetainedUserMessage>,
|
||||
strategy: PreparedCompactionStrategy,
|
||||
held_message: Option<Message>,
|
||||
}
|
||||
|
||||
enum PreparedCompactionStrategy {
|
||||
GenericSummary {
|
||||
retained_user_messages: Vec<RetainedUserMessage>,
|
||||
},
|
||||
Native {
|
||||
source: NativeCompactionSource,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Message {
|
||||
User(UserMessage),
|
||||
|
|
@ -2092,7 +2101,19 @@ impl Thread {
|
|||
|
||||
fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
|
||||
let messages = self.build_request_messages(Vec::new(), cx);
|
||||
estimate_request_tokens(&messages)
|
||||
let prefix_tokens = self
|
||||
.model
|
||||
.as_ref()
|
||||
.and_then(|model| {
|
||||
self.provider_native_prefix_for_conversation(
|
||||
self.current_conversation(),
|
||||
model.as_ref(),
|
||||
cx,
|
||||
)
|
||||
})
|
||||
.map(|prefix| estimate_native_seed_tokens(&prefix))
|
||||
.unwrap_or(0);
|
||||
estimate_request_tokens(&messages).saturating_add(prefix_tokens)
|
||||
}
|
||||
|
||||
fn generic_retained_user_messages(&self) -> Vec<RetainedUserMessage> {
|
||||
|
|
@ -2102,11 +2123,11 @@ impl Thread {
|
|||
)
|
||||
}
|
||||
|
||||
fn prepare_generic_compaction(
|
||||
fn prepare_compaction(
|
||||
&self,
|
||||
phase: AutoCompactPhase,
|
||||
cx: &App,
|
||||
) -> Result<Option<GenericCompactionInput>> {
|
||||
) -> Result<Option<CompactionInput>> {
|
||||
if !self.should_auto_compact(cx) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
@ -2119,27 +2140,39 @@ impl Thread {
|
|||
return Ok(None);
|
||||
}
|
||||
|
||||
let retained_user_messages = retained_user_messages_from_messages(
|
||||
&conversation.messages,
|
||||
GENERIC_COMPACTION_RETAINED_USER_MESSAGE_TOKENS,
|
||||
);
|
||||
let mut request =
|
||||
let native_source = native_compaction_source_for_model(model.as_ref(), cx);
|
||||
let use_native = model.compaction_strategy(cx) == CompactionStrategyKind::Native;
|
||||
let mut request_messages =
|
||||
self.build_request_messages_for_conversation(&conversation, Vec::new(), cx, false);
|
||||
request.push(LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![SUMMARIZE_THREAD_DETAILED_PROMPT.into()],
|
||||
cache: false,
|
||||
reasoning_details: None,
|
||||
});
|
||||
let provider_native_prefix =
|
||||
self.provider_native_prefix_for_conversation(&conversation, model.as_ref(), cx);
|
||||
|
||||
Ok(Some(GenericCompactionInput {
|
||||
let strategy = if let (true, Some(source)) = (use_native, native_source) {
|
||||
PreparedCompactionStrategy::Native { source }
|
||||
} else {
|
||||
request_messages.push(LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![SUMMARIZE_THREAD_DETAILED_PROMPT.into()],
|
||||
cache: false,
|
||||
reasoning_details: None,
|
||||
});
|
||||
PreparedCompactionStrategy::GenericSummary {
|
||||
retained_user_messages: retained_user_messages_from_messages(
|
||||
&conversation.messages,
|
||||
GENERIC_COMPACTION_RETAINED_USER_MESSAGE_TOKENS,
|
||||
),
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(CompactionInput {
|
||||
id: ContextCompactionId::new(),
|
||||
model: model.clone(),
|
||||
request: LanguageModelRequest {
|
||||
thread_id: Some(self.id.to_string()),
|
||||
prompt_id: Some(self.prompt_id.to_string()),
|
||||
intent: Some(CompletionIntent::ThreadContextSummarization),
|
||||
messages: request,
|
||||
provider_native_prefix,
|
||||
messages: request_messages,
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
|
|
@ -2148,7 +2181,7 @@ impl Thread {
|
|||
thinking_effort: None,
|
||||
speed: self.speed(),
|
||||
},
|
||||
retained_user_messages,
|
||||
strategy,
|
||||
held_message,
|
||||
}))
|
||||
}
|
||||
|
|
@ -2171,11 +2204,30 @@ impl Thread {
|
|||
(conversation, held_message)
|
||||
}
|
||||
|
||||
fn install_generic_compaction(
|
||||
fn provider_native_prefix_for_conversation(
|
||||
&self,
|
||||
conversation: &Conversation,
|
||||
model: &dyn LanguageModel,
|
||||
cx: &App,
|
||||
) -> Option<Vec<serde_json::Value>> {
|
||||
let seed = conversation.seed.as_ref()?;
|
||||
let CompactionArtifact::ProviderNative { source, items } = &seed.artifact else {
|
||||
return None;
|
||||
};
|
||||
let model_source = native_compaction_source_for_model(model, cx)?;
|
||||
if native_compaction_sources_match(&model_source, source) {
|
||||
Some(items.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn install_compaction(
|
||||
&mut self,
|
||||
id: ContextCompactionId,
|
||||
summary: SharedString,
|
||||
artifact: CompactionArtifact,
|
||||
retained_user_messages: Vec<RetainedUserMessage>,
|
||||
baseline_tokens: u64,
|
||||
held_message: Option<Message>,
|
||||
cx: &mut Context<Self>,
|
||||
) -> Result<()> {
|
||||
|
|
@ -2193,13 +2245,12 @@ impl Thread {
|
|||
self.current_messages_mut().pop();
|
||||
}
|
||||
|
||||
let baseline_tokens = estimate_generic_seed_tokens(&retained_user_messages, &summary);
|
||||
self.conversations.push(Conversation {
|
||||
marker: Some(ContextCompactionMarker { id }),
|
||||
messages: Vec::new(),
|
||||
request_token_usage: HashMap::default(),
|
||||
seed: Some(CompactionSeed {
|
||||
artifact: CompactionArtifact::Summary(summary),
|
||||
artifact,
|
||||
retained_user_messages,
|
||||
baseline_tokens,
|
||||
baseline_observed: false,
|
||||
|
|
@ -2219,7 +2270,7 @@ impl Thread {
|
|||
if self.should_auto_compact(cx) {
|
||||
let retained_user_message_count = self.generic_retained_user_messages().len();
|
||||
log::info!(
|
||||
"auto-compaction threshold reached at {phase}; handoff is not wired yet; retained_user_messages={retained_user_message_count}"
|
||||
"auto-compaction threshold reached at {phase}; retained_user_messages={retained_user_message_count}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -2686,45 +2737,89 @@ impl Thread {
|
|||
AutoCompactPhase::MidTurn => "mid-turn",
|
||||
};
|
||||
this.log_auto_compact_hook(phase_name, cx);
|
||||
this.prepare_generic_compaction(phase, cx)
|
||||
this.prepare_compaction(phase, cx)
|
||||
})??
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
event_stream.send_compaction_started(input.id.clone());
|
||||
let compaction_id = input.id.clone();
|
||||
let summary = match Self::run_generic_compaction(
|
||||
input.model.clone(),
|
||||
input.request,
|
||||
cancellation_rx.clone(),
|
||||
cx,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(summary)) => summary,
|
||||
Ok(None) => {
|
||||
event_stream.send_compaction_failed(compaction_id);
|
||||
return Ok(());
|
||||
let CompactionInput {
|
||||
id,
|
||||
model,
|
||||
request,
|
||||
strategy,
|
||||
held_message,
|
||||
} = input;
|
||||
|
||||
event_stream.send_compaction_started(id.clone());
|
||||
let compaction_id = id.clone();
|
||||
let (artifact, retained_user_messages, baseline_tokens) = match strategy {
|
||||
PreparedCompactionStrategy::GenericSummary {
|
||||
retained_user_messages,
|
||||
} => {
|
||||
let summary = match Self::run_generic_compaction(
|
||||
model.clone(),
|
||||
request,
|
||||
cancellation_rx.clone(),
|
||||
cx,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(summary)) => summary,
|
||||
Ok(None) => {
|
||||
event_stream.send_compaction_failed(compaction_id);
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
event_stream.send_compaction_failed(compaction_id);
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
let baseline_tokens =
|
||||
estimate_generic_seed_tokens(&retained_user_messages, &summary);
|
||||
(
|
||||
CompactionArtifact::Summary(summary),
|
||||
retained_user_messages,
|
||||
baseline_tokens,
|
||||
)
|
||||
}
|
||||
Err(error) => {
|
||||
event_stream.send_compaction_failed(compaction_id);
|
||||
return Err(error);
|
||||
PreparedCompactionStrategy::Native { source } => {
|
||||
let items =
|
||||
match Self::run_native_compaction(model, request, cancellation_rx.clone(), cx)
|
||||
.await
|
||||
{
|
||||
Ok(Some(items)) => items,
|
||||
Ok(None) => {
|
||||
event_stream.send_compaction_failed(compaction_id);
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
event_stream.send_compaction_failed(compaction_id);
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
let baseline_tokens = estimate_native_seed_tokens(&items);
|
||||
(
|
||||
CompactionArtifact::ProviderNative { source, items },
|
||||
Vec::new(),
|
||||
baseline_tokens,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
if *cancellation_rx.borrow() {
|
||||
event_stream.send_compaction_failed(input.id);
|
||||
event_stream.send_compaction_failed(id);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let succeeded_id = input.id.clone();
|
||||
let succeeded_id = id.clone();
|
||||
this.update(cx, |this, cx| {
|
||||
this.install_generic_compaction(
|
||||
input.id.clone(),
|
||||
summary,
|
||||
input.retained_user_messages,
|
||||
input.held_message,
|
||||
this.install_compaction(
|
||||
id.clone(),
|
||||
artifact,
|
||||
retained_user_messages,
|
||||
baseline_tokens,
|
||||
held_message,
|
||||
cx,
|
||||
)
|
||||
})??;
|
||||
|
|
@ -2763,6 +2858,24 @@ impl Thread {
|
|||
Ok(Some(summary.into()))
|
||||
}
|
||||
|
||||
async fn run_native_compaction(
|
||||
model: Arc<dyn LanguageModel>,
|
||||
request: LanguageModelRequest,
|
||||
mut cancellation_rx: watch::Receiver<bool>,
|
||||
cx: &mut AsyncApp,
|
||||
) -> Result<Option<Vec<serde_json::Value>>> {
|
||||
if *cancellation_rx.borrow() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let compacted = model.compact(request, cx).await?;
|
||||
if *cancellation_rx.borrow() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(Some(compacted.items))
|
||||
}
|
||||
|
||||
fn process_tool_result(
|
||||
this: &WeakEntity<Thread>,
|
||||
event_stream: &ThreadEventStream,
|
||||
|
|
@ -3497,12 +3610,18 @@ impl Thread {
|
|||
|
||||
log::debug!("Request includes {} tools", available_tools.len());
|
||||
let messages = self.build_request_messages(available_tools, cx);
|
||||
let provider_native_prefix = self.provider_native_prefix_for_conversation(
|
||||
self.current_conversation(),
|
||||
model.as_ref(),
|
||||
cx,
|
||||
);
|
||||
log::debug!("Request will include {} messages", messages.len());
|
||||
|
||||
let request = LanguageModelRequest {
|
||||
thread_id: Some(self.id.to_string()),
|
||||
prompt_id: Some(self.prompt_id.to_string()),
|
||||
intent: Some(completion_intent),
|
||||
provider_native_prefix,
|
||||
messages,
|
||||
tools,
|
||||
tool_choice: None,
|
||||
|
|
@ -3995,6 +4114,31 @@ fn estimate_generic_seed_tokens(
|
|||
estimate_request_tokens(&messages)
|
||||
}
|
||||
|
||||
fn estimate_native_seed_tokens(items: &[serde_json::Value]) -> u64 {
|
||||
serde_json::to_string(items)
|
||||
.map(|items| estimate_text_tokens(&items))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn native_compaction_source_for_model(
|
||||
model: &dyn LanguageModel,
|
||||
cx: &App,
|
||||
) -> Option<NativeCompactionSource> {
|
||||
model
|
||||
.native_compaction_source(cx)
|
||||
.map(|source| NativeCompactionSource {
|
||||
provider: source.provider.to_string(),
|
||||
api_url: source.api_url,
|
||||
})
|
||||
}
|
||||
|
||||
fn native_compaction_sources_match(
|
||||
current: &NativeCompactionSource,
|
||||
stored: &NativeCompactionSource,
|
||||
) -> bool {
|
||||
current.provider == stored.provider && current.api_url == stored.api_url
|
||||
}
|
||||
|
||||
fn retained_user_message_from_content(
|
||||
content: &[UserMessageContent],
|
||||
token_budget: u64,
|
||||
|
|
@ -5637,15 +5781,18 @@ mod tests {
|
|||
thread.current_messages_mut().push(live_message.clone());
|
||||
|
||||
let compaction_id = ContextCompactionId::new();
|
||||
let summary: SharedString = "summary text".into();
|
||||
let retained_user_messages = vec![RetainedUserMessage {
|
||||
content: vec![RetainedUserMessageContent::Text("retained text".to_string())],
|
||||
}];
|
||||
let baseline_tokens =
|
||||
estimate_generic_seed_tokens(&retained_user_messages, &summary);
|
||||
thread
|
||||
.install_generic_compaction(
|
||||
.install_compaction(
|
||||
compaction_id.clone(),
|
||||
"summary text".into(),
|
||||
vec![RetainedUserMessage {
|
||||
content: vec![RetainedUserMessageContent::Text(
|
||||
"retained text".to_string(),
|
||||
)],
|
||||
}],
|
||||
CompactionArtifact::Summary(summary),
|
||||
retained_user_messages,
|
||||
baseline_tokens,
|
||||
Some(live_message),
|
||||
cx,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -541,6 +541,7 @@ impl CodegenAlternative {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: Some(CompletionIntent::InlineAssist),
|
||||
provider_native_prefix: None,
|
||||
tools,
|
||||
tool_choice,
|
||||
stop: Vec::new(),
|
||||
|
|
@ -621,6 +622,7 @@ impl CodegenAlternative {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: Some(CompletionIntent::InlineAssist),
|
||||
provider_native_prefix: None,
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -268,6 +268,7 @@ impl TerminalInlineAssistant {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: Some(CompletionIntent::TerminalInlineAssist),
|
||||
provider_native_prefix: None,
|
||||
messages: vec![request_message],
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
|
|
|
|||
|
|
@ -551,6 +551,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
stop: vec![],
|
||||
temperature: None,
|
||||
tools: vec![language_model_core::LanguageModelRequestTool {
|
||||
|
|
@ -655,6 +656,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
stop: vec![],
|
||||
temperature: None,
|
||||
tools: vec![language_model_core::LanguageModelRequestTool {
|
||||
|
|
@ -725,6 +727,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
stop: vec![],
|
||||
temperature: None,
|
||||
tools: vec![language_model_core::LanguageModelRequestTool {
|
||||
|
|
@ -768,6 +771,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
stop: vec![],
|
||||
temperature: None,
|
||||
tools: vec![],
|
||||
|
|
|
|||
|
|
@ -2794,6 +2794,7 @@ impl GitPanel {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: Some(CompletionIntent::GenerateGitCommitMessage),
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![content.into()],
|
||||
|
|
|
|||
|
|
@ -43,6 +43,23 @@ impl Default for LanguageModelTextStream {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CompactionStrategyKind {
|
||||
Native,
|
||||
GenericSummary,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LanguageModelNativeCompactionSource {
|
||||
pub provider: LanguageModelProviderId,
|
||||
pub api_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct LanguageModelNativeCompaction {
|
||||
pub items: Vec<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub trait LanguageModel: Send + Sync {
|
||||
fn id(&self) -> LanguageModelId;
|
||||
fn name(&self) -> LanguageModelName;
|
||||
|
|
@ -121,6 +138,28 @@ pub trait LanguageModel: Send + Sync {
|
|||
None
|
||||
}
|
||||
|
||||
fn compaction_strategy(&self, _cx: &App) -> CompactionStrategyKind {
|
||||
CompactionStrategyKind::GenericSummary
|
||||
}
|
||||
|
||||
fn native_compaction_source(&self, _cx: &App) -> Option<LanguageModelNativeCompactionSource> {
|
||||
None
|
||||
}
|
||||
|
||||
fn compact(
|
||||
&self,
|
||||
_request: LanguageModelRequest,
|
||||
_cx: &AsyncApp,
|
||||
) -> BoxFuture<'static, Result<LanguageModelNativeCompaction, LanguageModelCompletionError>>
|
||||
{
|
||||
async move {
|
||||
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
|
||||
"native compaction is not supported by this model"
|
||||
)))
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
|
||||
fn stream_completion(
|
||||
&self,
|
||||
request: LanguageModelRequest,
|
||||
|
|
|
|||
|
|
@ -362,6 +362,8 @@ pub struct LanguageModelRequest {
|
|||
pub thread_id: Option<String>,
|
||||
pub prompt_id: Option<String>,
|
||||
pub intent: Option<CompletionIntent>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub provider_native_prefix: Option<Vec<serde_json::Value>>,
|
||||
pub messages: Vec<LanguageModelRequestMessage>,
|
||||
pub tools: Vec<LanguageModelRequestTool>,
|
||||
pub tool_choice: Option<LanguageModelToolChoice>,
|
||||
|
|
|
|||
|
|
@ -1205,6 +1205,7 @@ fn into_copilot_responses(
|
|||
thread_id: _,
|
||||
prompt_id: _,
|
||||
intent: _,
|
||||
provider_native_prefix: _,
|
||||
messages,
|
||||
tools,
|
||||
tool_choice,
|
||||
|
|
|
|||
|
|
@ -978,6 +978,7 @@ mod tests {
|
|||
thread_id: Some("abcdef".into()),
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
stop: vec![],
|
||||
thinking_allowed: true,
|
||||
thinking_effort: None,
|
||||
|
|
@ -1014,6 +1015,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
stop: vec![],
|
||||
thinking_allowed: true,
|
||||
thinking_effort: None,
|
||||
|
|
|
|||
|
|
@ -5,16 +5,20 @@ use futures::{FutureExt, StreamExt, future::BoxFuture};
|
|||
use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, TaskExt, Window};
|
||||
use http_client::HttpClient;
|
||||
use language_model::{
|
||||
ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError,
|
||||
LanguageModelCompletionEvent, LanguageModelEffortLevel, LanguageModelId, LanguageModelName,
|
||||
LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, OPEN_AI_PROVIDER_ID,
|
||||
OPEN_AI_PROVIDER_NAME, RateLimiter, env_var,
|
||||
ApiKeyState, AuthenticateError, CompactionStrategyKind, EnvVar, IconOrSvg, LanguageModel,
|
||||
LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelEffortLevel,
|
||||
LanguageModelId, LanguageModelName, LanguageModelNativeCompaction,
|
||||
LanguageModelNativeCompactionSource, LanguageModelProvider, LanguageModelProviderId,
|
||||
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
|
||||
LanguageModelToolChoice, OPEN_AI_PROVIDER_ID, OPEN_AI_PROVIDER_NAME, RateLimiter, env_var,
|
||||
};
|
||||
use menu;
|
||||
use open_ai::{
|
||||
OPEN_AI_API_URL, ResponseStreamEvent,
|
||||
responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
|
||||
responses::{
|
||||
CompactRequest as ResponseCompactRequest, Request as ResponseRequest,
|
||||
StreamEvent as ResponsesStreamEvent, compact_response, stream_response,
|
||||
},
|
||||
stream_completion,
|
||||
};
|
||||
use settings::{OpenAiAvailableModel as AvailableModel, Settings, SettingsStore};
|
||||
|
|
@ -391,6 +395,38 @@ impl OpenAiLanguageModel {
|
|||
|
||||
async move { Ok(future.await?.boxed()) }.boxed()
|
||||
}
|
||||
|
||||
fn compact_response(
|
||||
&self,
|
||||
request: ResponseCompactRequest,
|
||||
cx: &AsyncApp,
|
||||
) -> BoxFuture<'static, Result<open_ai::responses::CompactedResponse>> {
|
||||
let http_client = self.http_client.clone();
|
||||
|
||||
let (api_key, api_url) = self.state.read_with(cx, |state, cx| {
|
||||
let api_url = OpenAiLanguageModelProvider::api_url(cx);
|
||||
(state.api_key_state.key(&api_url), api_url)
|
||||
});
|
||||
|
||||
let provider = PROVIDER_NAME;
|
||||
let future = self.request_limiter.run(async move {
|
||||
let Some(api_key) = api_key else {
|
||||
return Err(LanguageModelCompletionError::NoApiKey { provider });
|
||||
};
|
||||
let request = compact_response(
|
||||
http_client.as_ref(),
|
||||
provider.0.as_str(),
|
||||
&api_url,
|
||||
&api_key,
|
||||
request,
|
||||
vec![],
|
||||
);
|
||||
let response = request.await?;
|
||||
Ok(response)
|
||||
});
|
||||
|
||||
async move { Ok(future.await?) }.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageModel for OpenAiLanguageModel {
|
||||
|
|
@ -474,6 +510,66 @@ impl LanguageModel for OpenAiLanguageModel {
|
|||
self.model.max_output_tokens()
|
||||
}
|
||||
|
||||
fn compaction_strategy(&self, cx: &App) -> CompactionStrategyKind {
|
||||
if self.native_compaction_source(cx).is_some() {
|
||||
CompactionStrategyKind::Native
|
||||
} else {
|
||||
CompactionStrategyKind::GenericSummary
|
||||
}
|
||||
}
|
||||
|
||||
fn native_compaction_source(&self, cx: &App) -> Option<LanguageModelNativeCompactionSource> {
|
||||
if !self.model.uses_responses_api() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let api_url = OpenAiLanguageModelProvider::api_url(cx);
|
||||
if api_url.as_ref() != OPEN_AI_API_URL {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(LanguageModelNativeCompactionSource {
|
||||
provider: PROVIDER_ID,
|
||||
api_url: Some(api_url.to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
fn compact(
|
||||
&self,
|
||||
request: LanguageModelRequest,
|
||||
cx: &AsyncApp,
|
||||
) -> BoxFuture<'static, Result<LanguageModelNativeCompaction, LanguageModelCompletionError>>
|
||||
{
|
||||
if !self.model.uses_responses_api() {
|
||||
return async move {
|
||||
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
|
||||
"native compaction requires the OpenAI Responses API"
|
||||
)))
|
||||
}
|
||||
.boxed();
|
||||
}
|
||||
|
||||
let request = into_open_ai_response(
|
||||
request,
|
||||
self.model.id(),
|
||||
self.model.supports_parallel_tool_calls(),
|
||||
self.model.supports_prompt_cache_key(),
|
||||
self.max_output_tokens(),
|
||||
default_thinking_reasoning_effort(&self.model),
|
||||
self.model
|
||||
.supported_reasoning_efforts()
|
||||
.contains(&open_ai::ReasoningEffort::None),
|
||||
);
|
||||
let compacted = self.compact_response(request.into(), cx);
|
||||
|
||||
async move {
|
||||
Ok(LanguageModelNativeCompaction {
|
||||
items: compacted.await?.output,
|
||||
})
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
|
||||
fn stream_completion(
|
||||
&self,
|
||||
request: LanguageModelRequest,
|
||||
|
|
|
|||
|
|
@ -191,6 +191,7 @@ pub fn into_open_ai_response(
|
|||
thread_id,
|
||||
prompt_id: _,
|
||||
intent: _,
|
||||
provider_native_prefix,
|
||||
messages,
|
||||
tools,
|
||||
tool_choice,
|
||||
|
|
@ -260,6 +261,7 @@ pub fn into_open_ai_response(
|
|||
ResponseRequest {
|
||||
model: model_id.into(),
|
||||
instructions: None,
|
||||
native_input_prefix: provider_native_prefix.unwrap_or_default(),
|
||||
input: input_items,
|
||||
store: Some(false),
|
||||
include,
|
||||
|
|
@ -1347,6 +1349,7 @@ mod tests {
|
|||
thread_id: Some("thread-123".into()),
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![
|
||||
LanguageModelRequestMessage {
|
||||
role: Role::System,
|
||||
|
|
@ -1479,6 +1482,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::Assistant,
|
||||
content: vec![MessageContent::ToolUse(tool_use)],
|
||||
|
|
@ -1567,6 +1571,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::Assistant,
|
||||
content: vec![MessageContent::Text("Done.".into())],
|
||||
|
|
@ -1631,12 +1636,45 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn into_open_ai_response_carries_native_prefix() {
|
||||
let native_item = json!({
|
||||
"type": "compaction",
|
||||
"encrypted_content": "opaque"
|
||||
});
|
||||
let request = LanguageModelRequest {
|
||||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: Some(vec![native_item.clone()]),
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![MessageContent::Text("Continue".into())],
|
||||
cache: false,
|
||||
reasoning_details: None,
|
||||
}],
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
temperature: None,
|
||||
thinking_allowed: true,
|
||||
thinking_effort: None,
|
||||
speed: None,
|
||||
};
|
||||
|
||||
let response = into_open_ai_response(request, "gpt-5", true, true, None, None, false);
|
||||
|
||||
assert_eq!(response.native_input_prefix, vec![native_item]);
|
||||
assert_eq!(response.store, Some(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn into_open_ai_response_omits_reasoning_when_thinking_is_disabled_and_none_is_unsupported() {
|
||||
let request = LanguageModelRequest {
|
||||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![MessageContent::Text("Hello".into())],
|
||||
|
|
@ -1672,6 +1710,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![MessageContent::Text("Hello".into())],
|
||||
|
|
@ -1710,6 +1749,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![MessageContent::Text("Hello".into())],
|
||||
|
|
@ -1750,6 +1790,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::Assistant,
|
||||
content: vec![MessageContent::Text("Done.".into())],
|
||||
|
|
@ -1841,6 +1882,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![
|
||||
LanguageModelRequestMessage {
|
||||
role: Role::Assistant,
|
||||
|
|
@ -1916,6 +1958,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![LanguageModelRequestMessage {
|
||||
role: Role::Assistant,
|
||||
content: vec![
|
||||
|
|
@ -2943,6 +2986,7 @@ mod tests {
|
|||
thread_id: None,
|
||||
prompt_id: None,
|
||||
intent: None,
|
||||
provider_native_prefix: None,
|
||||
messages: vec![
|
||||
LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ pub struct Request {
|
|||
pub model: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instructions: Option<String>,
|
||||
#[serde(skip)]
|
||||
pub native_input_prefix: Vec<Value>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub input: Vec<ResponseInputItem>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
|
|
@ -37,6 +39,43 @@ pub struct Request {
|
|||
pub store: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct CompactRequest {
|
||||
pub model: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instructions: Option<String>,
|
||||
#[serde(skip)]
|
||||
pub native_input_prefix: Vec<Value>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub input: Vec<ResponseInputItem>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub parallel_tool_calls: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_choice: Option<ToolChoice>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub tools: Vec<ToolDefinition>,
|
||||
}
|
||||
|
||||
impl From<Request> for CompactRequest {
|
||||
fn from(request: Request) -> Self {
|
||||
Self {
|
||||
model: request.model,
|
||||
instructions: request.instructions,
|
||||
native_input_prefix: request.native_input_prefix,
|
||||
input: request.input,
|
||||
parallel_tool_calls: request.parallel_tool_calls,
|
||||
tool_choice: request.tool_choice,
|
||||
tools: request.tools,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct CompactedResponse {
|
||||
#[serde(default)]
|
||||
pub output: Vec<Value>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ResponseIncludable {
|
||||
|
|
@ -411,10 +450,10 @@ pub async fn stream_response(
|
|||
}
|
||||
|
||||
let is_streaming = request.stream;
|
||||
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
let request = request_builder
|
||||
.body(AsyncBody::from(
|
||||
serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
|
||||
))
|
||||
.body(AsyncBody::from(body))
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
|
||||
let mut response = client.send(request).await?;
|
||||
|
|
@ -569,3 +608,121 @@ pub async fn stream_response(
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn compact_response(
|
||||
client: &dyn HttpClient,
|
||||
provider_name: &str,
|
||||
api_url: &str,
|
||||
api_key: &str,
|
||||
request: CompactRequest,
|
||||
extra_headers: Vec<(String, String)>,
|
||||
) -> Result<CompactedResponse, RequestError> {
|
||||
let uri = format!("{api_url}/responses/compact");
|
||||
let mut request_builder = HttpRequest::builder()
|
||||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Authorization", format!("Bearer {}", api_key.trim()));
|
||||
for (name, value) in &extra_headers {
|
||||
request_builder = request_builder.header(name.as_str(), value.as_str());
|
||||
}
|
||||
|
||||
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
let request = request_builder
|
||||
.body(AsyncBody::from(body))
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
|
||||
let mut response = client.send(request).await?;
|
||||
let mut body = String::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_string(&mut body)
|
||||
.await
|
||||
.map_err(|e| RequestError::Other(e.into()))?;
|
||||
|
||||
if response.status().is_success() {
|
||||
serde_json::from_str::<CompactedResponse>(&body).map_err(|error| {
|
||||
log::error!(
|
||||
"Failed to parse OpenAI compact response: `{}`\nResponse: `{}`",
|
||||
error,
|
||||
body,
|
||||
);
|
||||
RequestError::Other(anyhow!(error))
|
||||
})
|
||||
} else {
|
||||
Err(RequestError::HttpResponseError {
|
||||
provider: provider_name.to_owned(),
|
||||
status_code: response.status(),
|
||||
body,
|
||||
headers: response.headers().clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_body_with_native_prefix<T: Serialize>(
|
||||
request: &T,
|
||||
native_input_prefix: &[Value],
|
||||
) -> serde_json::Result<String> {
|
||||
let mut body = serde_json::to_value(request)?;
|
||||
|
||||
if !native_input_prefix.is_empty() {
|
||||
if let Value::Object(object) = &mut body {
|
||||
let input = object
|
||||
.entry("input")
|
||||
.or_insert_with(|| Value::Array(Vec::new()));
|
||||
if let Value::Array(input) = input {
|
||||
let mut prefixed_input = native_input_prefix.to_vec();
|
||||
prefixed_input.append(input);
|
||||
*input = prefixed_input;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_string(&body)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn native_input_prefix_is_prepended_to_responses_body() {
|
||||
let native_item = json!({
|
||||
"type": "compaction",
|
||||
"encrypted_content": "opaque"
|
||||
});
|
||||
let request = Request {
|
||||
model: "gpt-5".to_string(),
|
||||
instructions: None,
|
||||
native_input_prefix: vec![native_item.clone()],
|
||||
input: vec![ResponseInputItem::Message(ResponseMessageItem {
|
||||
role: Role::User,
|
||||
content: vec![ResponseInputContent::Text {
|
||||
text: "Continue".to_string(),
|
||||
}],
|
||||
phase: None,
|
||||
})],
|
||||
include: Vec::new(),
|
||||
stream: true,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
max_output_tokens: None,
|
||||
parallel_tool_calls: None,
|
||||
tool_choice: None,
|
||||
tools: Vec::new(),
|
||||
prompt_cache_key: None,
|
||||
reasoning: None,
|
||||
store: Some(false),
|
||||
};
|
||||
|
||||
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
|
||||
.expect("request should serialize");
|
||||
let body: Value = serde_json::from_str(&body).expect("body should be valid JSON");
|
||||
let input = body["input"].as_array().expect("input should be an array");
|
||||
|
||||
assert_eq!(input.first(), Some(&native_item));
|
||||
assert_eq!(body["store"], json!(false));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue