mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Add OpenAI native context compaction
This commit is contained in:
parent
568bf53639
commit
ce686f1069
12 changed files with 567 additions and 71 deletions
|
|
@ -37,12 +37,12 @@ use gpui::{
|
||||||
};
|
};
|
||||||
use heck::ToSnakeCase as _;
|
use heck::ToSnakeCase as _;
|
||||||
use language_model::{
|
use language_model::{
|
||||||
CompletionIntent, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
|
CompactionStrategyKind, CompletionIntent, LanguageModel, LanguageModelCompletionError,
|
||||||
LanguageModelId, LanguageModelImage, LanguageModelProviderId, LanguageModelRegistry,
|
LanguageModelCompletionEvent, LanguageModelId, LanguageModelImage, LanguageModelProviderId,
|
||||||
LanguageModelRequest, LanguageModelRequestMessage, LanguageModelRequestTool,
|
LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
|
||||||
LanguageModelToolResult, LanguageModelToolResultContent, LanguageModelToolSchemaFormat,
|
LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent,
|
||||||
LanguageModelToolUse, LanguageModelToolUseId, Role, SelectedModel, Speed, StopReason,
|
LanguageModelToolSchemaFormat, LanguageModelToolUse, LanguageModelToolUseId, Role,
|
||||||
TokenUsage, ZED_CLOUD_PROVIDER_ID,
|
SelectedModel, Speed, StopReason, TokenUsage, ZED_CLOUD_PROVIDER_ID,
|
||||||
};
|
};
|
||||||
use project::Project;
|
use project::Project;
|
||||||
use prompt_store::ProjectContext;
|
use prompt_store::ProjectContext;
|
||||||
|
|
@ -132,14 +132,23 @@ enum AutoCompactPhase {
|
||||||
MidTurn,
|
MidTurn,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct GenericCompactionInput {
|
struct CompactionInput {
|
||||||
id: ContextCompactionId,
|
id: ContextCompactionId,
|
||||||
model: Arc<dyn LanguageModel>,
|
model: Arc<dyn LanguageModel>,
|
||||||
request: LanguageModelRequest,
|
request: LanguageModelRequest,
|
||||||
retained_user_messages: Vec<RetainedUserMessage>,
|
strategy: PreparedCompactionStrategy,
|
||||||
held_message: Option<Message>,
|
held_message: Option<Message>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum PreparedCompactionStrategy {
|
||||||
|
GenericSummary {
|
||||||
|
retained_user_messages: Vec<RetainedUserMessage>,
|
||||||
|
},
|
||||||
|
Native {
|
||||||
|
source: NativeCompactionSource,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
pub enum Message {
|
pub enum Message {
|
||||||
User(UserMessage),
|
User(UserMessage),
|
||||||
|
|
@ -2092,7 +2101,19 @@ impl Thread {
|
||||||
|
|
||||||
fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
|
fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
|
||||||
let messages = self.build_request_messages(Vec::new(), cx);
|
let messages = self.build_request_messages(Vec::new(), cx);
|
||||||
estimate_request_tokens(&messages)
|
let prefix_tokens = self
|
||||||
|
.model
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|model| {
|
||||||
|
self.provider_native_prefix_for_conversation(
|
||||||
|
self.current_conversation(),
|
||||||
|
model.as_ref(),
|
||||||
|
cx,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.map(|prefix| estimate_native_seed_tokens(&prefix))
|
||||||
|
.unwrap_or(0);
|
||||||
|
estimate_request_tokens(&messages).saturating_add(prefix_tokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn generic_retained_user_messages(&self) -> Vec<RetainedUserMessage> {
|
fn generic_retained_user_messages(&self) -> Vec<RetainedUserMessage> {
|
||||||
|
|
@ -2102,11 +2123,11 @@ impl Thread {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_generic_compaction(
|
fn prepare_compaction(
|
||||||
&self,
|
&self,
|
||||||
phase: AutoCompactPhase,
|
phase: AutoCompactPhase,
|
||||||
cx: &App,
|
cx: &App,
|
||||||
) -> Result<Option<GenericCompactionInput>> {
|
) -> Result<Option<CompactionInput>> {
|
||||||
if !self.should_auto_compact(cx) {
|
if !self.should_auto_compact(cx) {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
@ -2119,27 +2140,39 @@ impl Thread {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
let retained_user_messages = retained_user_messages_from_messages(
|
let native_source = native_compaction_source_for_model(model.as_ref(), cx);
|
||||||
&conversation.messages,
|
let use_native = model.compaction_strategy(cx) == CompactionStrategyKind::Native;
|
||||||
GENERIC_COMPACTION_RETAINED_USER_MESSAGE_TOKENS,
|
let mut request_messages =
|
||||||
);
|
|
||||||
let mut request =
|
|
||||||
self.build_request_messages_for_conversation(&conversation, Vec::new(), cx, false);
|
self.build_request_messages_for_conversation(&conversation, Vec::new(), cx, false);
|
||||||
request.push(LanguageModelRequestMessage {
|
let provider_native_prefix =
|
||||||
role: Role::User,
|
self.provider_native_prefix_for_conversation(&conversation, model.as_ref(), cx);
|
||||||
content: vec![SUMMARIZE_THREAD_DETAILED_PROMPT.into()],
|
|
||||||
cache: false,
|
|
||||||
reasoning_details: None,
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(Some(GenericCompactionInput {
|
let strategy = if let (true, Some(source)) = (use_native, native_source) {
|
||||||
|
PreparedCompactionStrategy::Native { source }
|
||||||
|
} else {
|
||||||
|
request_messages.push(LanguageModelRequestMessage {
|
||||||
|
role: Role::User,
|
||||||
|
content: vec![SUMMARIZE_THREAD_DETAILED_PROMPT.into()],
|
||||||
|
cache: false,
|
||||||
|
reasoning_details: None,
|
||||||
|
});
|
||||||
|
PreparedCompactionStrategy::GenericSummary {
|
||||||
|
retained_user_messages: retained_user_messages_from_messages(
|
||||||
|
&conversation.messages,
|
||||||
|
GENERIC_COMPACTION_RETAINED_USER_MESSAGE_TOKENS,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Some(CompactionInput {
|
||||||
id: ContextCompactionId::new(),
|
id: ContextCompactionId::new(),
|
||||||
model: model.clone(),
|
model: model.clone(),
|
||||||
request: LanguageModelRequest {
|
request: LanguageModelRequest {
|
||||||
thread_id: Some(self.id.to_string()),
|
thread_id: Some(self.id.to_string()),
|
||||||
prompt_id: Some(self.prompt_id.to_string()),
|
prompt_id: Some(self.prompt_id.to_string()),
|
||||||
intent: Some(CompletionIntent::ThreadContextSummarization),
|
intent: Some(CompletionIntent::ThreadContextSummarization),
|
||||||
messages: request,
|
provider_native_prefix,
|
||||||
|
messages: request_messages,
|
||||||
tools: Vec::new(),
|
tools: Vec::new(),
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
stop: Vec::new(),
|
stop: Vec::new(),
|
||||||
|
|
@ -2148,7 +2181,7 @@ impl Thread {
|
||||||
thinking_effort: None,
|
thinking_effort: None,
|
||||||
speed: self.speed(),
|
speed: self.speed(),
|
||||||
},
|
},
|
||||||
retained_user_messages,
|
strategy,
|
||||||
held_message,
|
held_message,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
@ -2171,11 +2204,30 @@ impl Thread {
|
||||||
(conversation, held_message)
|
(conversation, held_message)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn install_generic_compaction(
|
fn provider_native_prefix_for_conversation(
|
||||||
|
&self,
|
||||||
|
conversation: &Conversation,
|
||||||
|
model: &dyn LanguageModel,
|
||||||
|
cx: &App,
|
||||||
|
) -> Option<Vec<serde_json::Value>> {
|
||||||
|
let seed = conversation.seed.as_ref()?;
|
||||||
|
let CompactionArtifact::ProviderNative { source, items } = &seed.artifact else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
let model_source = native_compaction_source_for_model(model, cx)?;
|
||||||
|
if native_compaction_sources_match(&model_source, source) {
|
||||||
|
Some(items.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn install_compaction(
|
||||||
&mut self,
|
&mut self,
|
||||||
id: ContextCompactionId,
|
id: ContextCompactionId,
|
||||||
summary: SharedString,
|
artifact: CompactionArtifact,
|
||||||
retained_user_messages: Vec<RetainedUserMessage>,
|
retained_user_messages: Vec<RetainedUserMessage>,
|
||||||
|
baseline_tokens: u64,
|
||||||
held_message: Option<Message>,
|
held_message: Option<Message>,
|
||||||
cx: &mut Context<Self>,
|
cx: &mut Context<Self>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
|
@ -2193,13 +2245,12 @@ impl Thread {
|
||||||
self.current_messages_mut().pop();
|
self.current_messages_mut().pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
let baseline_tokens = estimate_generic_seed_tokens(&retained_user_messages, &summary);
|
|
||||||
self.conversations.push(Conversation {
|
self.conversations.push(Conversation {
|
||||||
marker: Some(ContextCompactionMarker { id }),
|
marker: Some(ContextCompactionMarker { id }),
|
||||||
messages: Vec::new(),
|
messages: Vec::new(),
|
||||||
request_token_usage: HashMap::default(),
|
request_token_usage: HashMap::default(),
|
||||||
seed: Some(CompactionSeed {
|
seed: Some(CompactionSeed {
|
||||||
artifact: CompactionArtifact::Summary(summary),
|
artifact,
|
||||||
retained_user_messages,
|
retained_user_messages,
|
||||||
baseline_tokens,
|
baseline_tokens,
|
||||||
baseline_observed: false,
|
baseline_observed: false,
|
||||||
|
|
@ -2219,7 +2270,7 @@ impl Thread {
|
||||||
if self.should_auto_compact(cx) {
|
if self.should_auto_compact(cx) {
|
||||||
let retained_user_message_count = self.generic_retained_user_messages().len();
|
let retained_user_message_count = self.generic_retained_user_messages().len();
|
||||||
log::info!(
|
log::info!(
|
||||||
"auto-compaction threshold reached at {phase}; handoff is not wired yet; retained_user_messages={retained_user_message_count}"
|
"auto-compaction threshold reached at {phase}; retained_user_messages={retained_user_message_count}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2686,45 +2737,89 @@ impl Thread {
|
||||||
AutoCompactPhase::MidTurn => "mid-turn",
|
AutoCompactPhase::MidTurn => "mid-turn",
|
||||||
};
|
};
|
||||||
this.log_auto_compact_hook(phase_name, cx);
|
this.log_auto_compact_hook(phase_name, cx);
|
||||||
this.prepare_generic_compaction(phase, cx)
|
this.prepare_compaction(phase, cx)
|
||||||
})??
|
})??
|
||||||
else {
|
else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
};
|
};
|
||||||
|
|
||||||
event_stream.send_compaction_started(input.id.clone());
|
let CompactionInput {
|
||||||
let compaction_id = input.id.clone();
|
id,
|
||||||
let summary = match Self::run_generic_compaction(
|
model,
|
||||||
input.model.clone(),
|
request,
|
||||||
input.request,
|
strategy,
|
||||||
cancellation_rx.clone(),
|
held_message,
|
||||||
cx,
|
} = input;
|
||||||
)
|
|
||||||
.await
|
event_stream.send_compaction_started(id.clone());
|
||||||
{
|
let compaction_id = id.clone();
|
||||||
Ok(Some(summary)) => summary,
|
let (artifact, retained_user_messages, baseline_tokens) = match strategy {
|
||||||
Ok(None) => {
|
PreparedCompactionStrategy::GenericSummary {
|
||||||
event_stream.send_compaction_failed(compaction_id);
|
retained_user_messages,
|
||||||
return Ok(());
|
} => {
|
||||||
|
let summary = match Self::run_generic_compaction(
|
||||||
|
model.clone(),
|
||||||
|
request,
|
||||||
|
cancellation_rx.clone(),
|
||||||
|
cx,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(Some(summary)) => summary,
|
||||||
|
Ok(None) => {
|
||||||
|
event_stream.send_compaction_failed(compaction_id);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
event_stream.send_compaction_failed(compaction_id);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let baseline_tokens =
|
||||||
|
estimate_generic_seed_tokens(&retained_user_messages, &summary);
|
||||||
|
(
|
||||||
|
CompactionArtifact::Summary(summary),
|
||||||
|
retained_user_messages,
|
||||||
|
baseline_tokens,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
Err(error) => {
|
PreparedCompactionStrategy::Native { source } => {
|
||||||
event_stream.send_compaction_failed(compaction_id);
|
let items =
|
||||||
return Err(error);
|
match Self::run_native_compaction(model, request, cancellation_rx.clone(), cx)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(Some(items)) => items,
|
||||||
|
Ok(None) => {
|
||||||
|
event_stream.send_compaction_failed(compaction_id);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
event_stream.send_compaction_failed(compaction_id);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let baseline_tokens = estimate_native_seed_tokens(&items);
|
||||||
|
(
|
||||||
|
CompactionArtifact::ProviderNative { source, items },
|
||||||
|
Vec::new(),
|
||||||
|
baseline_tokens,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if *cancellation_rx.borrow() {
|
if *cancellation_rx.borrow() {
|
||||||
event_stream.send_compaction_failed(input.id);
|
event_stream.send_compaction_failed(id);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let succeeded_id = input.id.clone();
|
let succeeded_id = id.clone();
|
||||||
this.update(cx, |this, cx| {
|
this.update(cx, |this, cx| {
|
||||||
this.install_generic_compaction(
|
this.install_compaction(
|
||||||
input.id.clone(),
|
id.clone(),
|
||||||
summary,
|
artifact,
|
||||||
input.retained_user_messages,
|
retained_user_messages,
|
||||||
input.held_message,
|
baseline_tokens,
|
||||||
|
held_message,
|
||||||
cx,
|
cx,
|
||||||
)
|
)
|
||||||
})??;
|
})??;
|
||||||
|
|
@ -2763,6 +2858,24 @@ impl Thread {
|
||||||
Ok(Some(summary.into()))
|
Ok(Some(summary.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn run_native_compaction(
|
||||||
|
model: Arc<dyn LanguageModel>,
|
||||||
|
request: LanguageModelRequest,
|
||||||
|
mut cancellation_rx: watch::Receiver<bool>,
|
||||||
|
cx: &mut AsyncApp,
|
||||||
|
) -> Result<Option<Vec<serde_json::Value>>> {
|
||||||
|
if *cancellation_rx.borrow() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let compacted = model.compact(request, cx).await?;
|
||||||
|
if *cancellation_rx.borrow() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(compacted.items))
|
||||||
|
}
|
||||||
|
|
||||||
fn process_tool_result(
|
fn process_tool_result(
|
||||||
this: &WeakEntity<Thread>,
|
this: &WeakEntity<Thread>,
|
||||||
event_stream: &ThreadEventStream,
|
event_stream: &ThreadEventStream,
|
||||||
|
|
@ -3497,12 +3610,18 @@ impl Thread {
|
||||||
|
|
||||||
log::debug!("Request includes {} tools", available_tools.len());
|
log::debug!("Request includes {} tools", available_tools.len());
|
||||||
let messages = self.build_request_messages(available_tools, cx);
|
let messages = self.build_request_messages(available_tools, cx);
|
||||||
|
let provider_native_prefix = self.provider_native_prefix_for_conversation(
|
||||||
|
self.current_conversation(),
|
||||||
|
model.as_ref(),
|
||||||
|
cx,
|
||||||
|
);
|
||||||
log::debug!("Request will include {} messages", messages.len());
|
log::debug!("Request will include {} messages", messages.len());
|
||||||
|
|
||||||
let request = LanguageModelRequest {
|
let request = LanguageModelRequest {
|
||||||
thread_id: Some(self.id.to_string()),
|
thread_id: Some(self.id.to_string()),
|
||||||
prompt_id: Some(self.prompt_id.to_string()),
|
prompt_id: Some(self.prompt_id.to_string()),
|
||||||
intent: Some(completion_intent),
|
intent: Some(completion_intent),
|
||||||
|
provider_native_prefix,
|
||||||
messages,
|
messages,
|
||||||
tools,
|
tools,
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
|
|
@ -3995,6 +4114,31 @@ fn estimate_generic_seed_tokens(
|
||||||
estimate_request_tokens(&messages)
|
estimate_request_tokens(&messages)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn estimate_native_seed_tokens(items: &[serde_json::Value]) -> u64 {
|
||||||
|
serde_json::to_string(items)
|
||||||
|
.map(|items| estimate_text_tokens(&items))
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn native_compaction_source_for_model(
|
||||||
|
model: &dyn LanguageModel,
|
||||||
|
cx: &App,
|
||||||
|
) -> Option<NativeCompactionSource> {
|
||||||
|
model
|
||||||
|
.native_compaction_source(cx)
|
||||||
|
.map(|source| NativeCompactionSource {
|
||||||
|
provider: source.provider.to_string(),
|
||||||
|
api_url: source.api_url,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn native_compaction_sources_match(
|
||||||
|
current: &NativeCompactionSource,
|
||||||
|
stored: &NativeCompactionSource,
|
||||||
|
) -> bool {
|
||||||
|
current.provider == stored.provider && current.api_url == stored.api_url
|
||||||
|
}
|
||||||
|
|
||||||
fn retained_user_message_from_content(
|
fn retained_user_message_from_content(
|
||||||
content: &[UserMessageContent],
|
content: &[UserMessageContent],
|
||||||
token_budget: u64,
|
token_budget: u64,
|
||||||
|
|
@ -5637,15 +5781,18 @@ mod tests {
|
||||||
thread.current_messages_mut().push(live_message.clone());
|
thread.current_messages_mut().push(live_message.clone());
|
||||||
|
|
||||||
let compaction_id = ContextCompactionId::new();
|
let compaction_id = ContextCompactionId::new();
|
||||||
|
let summary: SharedString = "summary text".into();
|
||||||
|
let retained_user_messages = vec![RetainedUserMessage {
|
||||||
|
content: vec![RetainedUserMessageContent::Text("retained text".to_string())],
|
||||||
|
}];
|
||||||
|
let baseline_tokens =
|
||||||
|
estimate_generic_seed_tokens(&retained_user_messages, &summary);
|
||||||
thread
|
thread
|
||||||
.install_generic_compaction(
|
.install_compaction(
|
||||||
compaction_id.clone(),
|
compaction_id.clone(),
|
||||||
"summary text".into(),
|
CompactionArtifact::Summary(summary),
|
||||||
vec![RetainedUserMessage {
|
retained_user_messages,
|
||||||
content: vec![RetainedUserMessageContent::Text(
|
baseline_tokens,
|
||||||
"retained text".to_string(),
|
|
||||||
)],
|
|
||||||
}],
|
|
||||||
Some(live_message),
|
Some(live_message),
|
||||||
cx,
|
cx,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -541,6 +541,7 @@ impl CodegenAlternative {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: Some(CompletionIntent::InlineAssist),
|
intent: Some(CompletionIntent::InlineAssist),
|
||||||
|
provider_native_prefix: None,
|
||||||
tools,
|
tools,
|
||||||
tool_choice,
|
tool_choice,
|
||||||
stop: Vec::new(),
|
stop: Vec::new(),
|
||||||
|
|
@ -621,6 +622,7 @@ impl CodegenAlternative {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: Some(CompletionIntent::InlineAssist),
|
intent: Some(CompletionIntent::InlineAssist),
|
||||||
|
provider_native_prefix: None,
|
||||||
tools: Vec::new(),
|
tools: Vec::new(),
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
stop: Vec::new(),
|
stop: Vec::new(),
|
||||||
|
|
|
||||||
|
|
@ -268,6 +268,7 @@ impl TerminalInlineAssistant {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: Some(CompletionIntent::TerminalInlineAssist),
|
intent: Some(CompletionIntent::TerminalInlineAssist),
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![request_message],
|
messages: vec![request_message],
|
||||||
tools: Vec::new(),
|
tools: Vec::new(),
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
|
|
|
||||||
|
|
@ -551,6 +551,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
stop: vec![],
|
stop: vec![],
|
||||||
temperature: None,
|
temperature: None,
|
||||||
tools: vec![language_model_core::LanguageModelRequestTool {
|
tools: vec![language_model_core::LanguageModelRequestTool {
|
||||||
|
|
@ -655,6 +656,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
stop: vec![],
|
stop: vec![],
|
||||||
temperature: None,
|
temperature: None,
|
||||||
tools: vec![language_model_core::LanguageModelRequestTool {
|
tools: vec![language_model_core::LanguageModelRequestTool {
|
||||||
|
|
@ -725,6 +727,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
stop: vec![],
|
stop: vec![],
|
||||||
temperature: None,
|
temperature: None,
|
||||||
tools: vec![language_model_core::LanguageModelRequestTool {
|
tools: vec![language_model_core::LanguageModelRequestTool {
|
||||||
|
|
@ -768,6 +771,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
stop: vec![],
|
stop: vec![],
|
||||||
temperature: None,
|
temperature: None,
|
||||||
tools: vec![],
|
tools: vec![],
|
||||||
|
|
|
||||||
|
|
@ -2794,6 +2794,7 @@ impl GitPanel {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: Some(CompletionIntent::GenerateGitCommitMessage),
|
intent: Some(CompletionIntent::GenerateGitCommitMessage),
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::User,
|
role: Role::User,
|
||||||
content: vec![content.into()],
|
content: vec![content.into()],
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,23 @@ impl Default for LanguageModelTextStream {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum CompactionStrategyKind {
|
||||||
|
Native,
|
||||||
|
GenericSummary,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct LanguageModelNativeCompactionSource {
|
||||||
|
pub provider: LanguageModelProviderId,
|
||||||
|
pub api_url: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct LanguageModelNativeCompaction {
|
||||||
|
pub items: Vec<serde_json::Value>,
|
||||||
|
}
|
||||||
|
|
||||||
pub trait LanguageModel: Send + Sync {
|
pub trait LanguageModel: Send + Sync {
|
||||||
fn id(&self) -> LanguageModelId;
|
fn id(&self) -> LanguageModelId;
|
||||||
fn name(&self) -> LanguageModelName;
|
fn name(&self) -> LanguageModelName;
|
||||||
|
|
@ -121,6 +138,28 @@ pub trait LanguageModel: Send + Sync {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn compaction_strategy(&self, _cx: &App) -> CompactionStrategyKind {
|
||||||
|
CompactionStrategyKind::GenericSummary
|
||||||
|
}
|
||||||
|
|
||||||
|
fn native_compaction_source(&self, _cx: &App) -> Option<LanguageModelNativeCompactionSource> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compact(
|
||||||
|
&self,
|
||||||
|
_request: LanguageModelRequest,
|
||||||
|
_cx: &AsyncApp,
|
||||||
|
) -> BoxFuture<'static, Result<LanguageModelNativeCompaction, LanguageModelCompletionError>>
|
||||||
|
{
|
||||||
|
async move {
|
||||||
|
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
|
||||||
|
"native compaction is not supported by this model"
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
.boxed()
|
||||||
|
}
|
||||||
|
|
||||||
fn stream_completion(
|
fn stream_completion(
|
||||||
&self,
|
&self,
|
||||||
request: LanguageModelRequest,
|
request: LanguageModelRequest,
|
||||||
|
|
|
||||||
|
|
@ -362,6 +362,8 @@ pub struct LanguageModelRequest {
|
||||||
pub thread_id: Option<String>,
|
pub thread_id: Option<String>,
|
||||||
pub prompt_id: Option<String>,
|
pub prompt_id: Option<String>,
|
||||||
pub intent: Option<CompletionIntent>,
|
pub intent: Option<CompletionIntent>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub provider_native_prefix: Option<Vec<serde_json::Value>>,
|
||||||
pub messages: Vec<LanguageModelRequestMessage>,
|
pub messages: Vec<LanguageModelRequestMessage>,
|
||||||
pub tools: Vec<LanguageModelRequestTool>,
|
pub tools: Vec<LanguageModelRequestTool>,
|
||||||
pub tool_choice: Option<LanguageModelToolChoice>,
|
pub tool_choice: Option<LanguageModelToolChoice>,
|
||||||
|
|
|
||||||
|
|
@ -1205,6 +1205,7 @@ fn into_copilot_responses(
|
||||||
thread_id: _,
|
thread_id: _,
|
||||||
prompt_id: _,
|
prompt_id: _,
|
||||||
intent: _,
|
intent: _,
|
||||||
|
provider_native_prefix: _,
|
||||||
messages,
|
messages,
|
||||||
tools,
|
tools,
|
||||||
tool_choice,
|
tool_choice,
|
||||||
|
|
|
||||||
|
|
@ -978,6 +978,7 @@ mod tests {
|
||||||
thread_id: Some("abcdef".into()),
|
thread_id: Some("abcdef".into()),
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
stop: vec![],
|
stop: vec![],
|
||||||
thinking_allowed: true,
|
thinking_allowed: true,
|
||||||
thinking_effort: None,
|
thinking_effort: None,
|
||||||
|
|
@ -1014,6 +1015,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
stop: vec![],
|
stop: vec![],
|
||||||
thinking_allowed: true,
|
thinking_allowed: true,
|
||||||
thinking_effort: None,
|
thinking_effort: None,
|
||||||
|
|
|
||||||
|
|
@ -5,16 +5,20 @@ use futures::{FutureExt, StreamExt, future::BoxFuture};
|
||||||
use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, TaskExt, Window};
|
use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, TaskExt, Window};
|
||||||
use http_client::HttpClient;
|
use http_client::HttpClient;
|
||||||
use language_model::{
|
use language_model::{
|
||||||
ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError,
|
ApiKeyState, AuthenticateError, CompactionStrategyKind, EnvVar, IconOrSvg, LanguageModel,
|
||||||
LanguageModelCompletionEvent, LanguageModelEffortLevel, LanguageModelId, LanguageModelName,
|
LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelEffortLevel,
|
||||||
LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
|
LanguageModelId, LanguageModelName, LanguageModelNativeCompaction,
|
||||||
LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, OPEN_AI_PROVIDER_ID,
|
LanguageModelNativeCompactionSource, LanguageModelProvider, LanguageModelProviderId,
|
||||||
OPEN_AI_PROVIDER_NAME, RateLimiter, env_var,
|
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
|
||||||
|
LanguageModelToolChoice, OPEN_AI_PROVIDER_ID, OPEN_AI_PROVIDER_NAME, RateLimiter, env_var,
|
||||||
};
|
};
|
||||||
use menu;
|
use menu;
|
||||||
use open_ai::{
|
use open_ai::{
|
||||||
OPEN_AI_API_URL, ResponseStreamEvent,
|
OPEN_AI_API_URL, ResponseStreamEvent,
|
||||||
responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
|
responses::{
|
||||||
|
CompactRequest as ResponseCompactRequest, Request as ResponseRequest,
|
||||||
|
StreamEvent as ResponsesStreamEvent, compact_response, stream_response,
|
||||||
|
},
|
||||||
stream_completion,
|
stream_completion,
|
||||||
};
|
};
|
||||||
use settings::{OpenAiAvailableModel as AvailableModel, Settings, SettingsStore};
|
use settings::{OpenAiAvailableModel as AvailableModel, Settings, SettingsStore};
|
||||||
|
|
@ -391,6 +395,38 @@ impl OpenAiLanguageModel {
|
||||||
|
|
||||||
async move { Ok(future.await?.boxed()) }.boxed()
|
async move { Ok(future.await?.boxed()) }.boxed()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn compact_response(
|
||||||
|
&self,
|
||||||
|
request: ResponseCompactRequest,
|
||||||
|
cx: &AsyncApp,
|
||||||
|
) -> BoxFuture<'static, Result<open_ai::responses::CompactedResponse>> {
|
||||||
|
let http_client = self.http_client.clone();
|
||||||
|
|
||||||
|
let (api_key, api_url) = self.state.read_with(cx, |state, cx| {
|
||||||
|
let api_url = OpenAiLanguageModelProvider::api_url(cx);
|
||||||
|
(state.api_key_state.key(&api_url), api_url)
|
||||||
|
});
|
||||||
|
|
||||||
|
let provider = PROVIDER_NAME;
|
||||||
|
let future = self.request_limiter.run(async move {
|
||||||
|
let Some(api_key) = api_key else {
|
||||||
|
return Err(LanguageModelCompletionError::NoApiKey { provider });
|
||||||
|
};
|
||||||
|
let request = compact_response(
|
||||||
|
http_client.as_ref(),
|
||||||
|
provider.0.as_str(),
|
||||||
|
&api_url,
|
||||||
|
&api_key,
|
||||||
|
request,
|
||||||
|
vec![],
|
||||||
|
);
|
||||||
|
let response = request.await?;
|
||||||
|
Ok(response)
|
||||||
|
});
|
||||||
|
|
||||||
|
async move { Ok(future.await?) }.boxed()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LanguageModel for OpenAiLanguageModel {
|
impl LanguageModel for OpenAiLanguageModel {
|
||||||
|
|
@ -474,6 +510,66 @@ impl LanguageModel for OpenAiLanguageModel {
|
||||||
self.model.max_output_tokens()
|
self.model.max_output_tokens()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn compaction_strategy(&self, cx: &App) -> CompactionStrategyKind {
|
||||||
|
if self.native_compaction_source(cx).is_some() {
|
||||||
|
CompactionStrategyKind::Native
|
||||||
|
} else {
|
||||||
|
CompactionStrategyKind::GenericSummary
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn native_compaction_source(&self, cx: &App) -> Option<LanguageModelNativeCompactionSource> {
|
||||||
|
if !self.model.uses_responses_api() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let api_url = OpenAiLanguageModelProvider::api_url(cx);
|
||||||
|
if api_url.as_ref() != OPEN_AI_API_URL {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(LanguageModelNativeCompactionSource {
|
||||||
|
provider: PROVIDER_ID,
|
||||||
|
api_url: Some(api_url.to_string()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compact(
|
||||||
|
&self,
|
||||||
|
request: LanguageModelRequest,
|
||||||
|
cx: &AsyncApp,
|
||||||
|
) -> BoxFuture<'static, Result<LanguageModelNativeCompaction, LanguageModelCompletionError>>
|
||||||
|
{
|
||||||
|
if !self.model.uses_responses_api() {
|
||||||
|
return async move {
|
||||||
|
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
|
||||||
|
"native compaction requires the OpenAI Responses API"
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
.boxed();
|
||||||
|
}
|
||||||
|
|
||||||
|
let request = into_open_ai_response(
|
||||||
|
request,
|
||||||
|
self.model.id(),
|
||||||
|
self.model.supports_parallel_tool_calls(),
|
||||||
|
self.model.supports_prompt_cache_key(),
|
||||||
|
self.max_output_tokens(),
|
||||||
|
default_thinking_reasoning_effort(&self.model),
|
||||||
|
self.model
|
||||||
|
.supported_reasoning_efforts()
|
||||||
|
.contains(&open_ai::ReasoningEffort::None),
|
||||||
|
);
|
||||||
|
let compacted = self.compact_response(request.into(), cx);
|
||||||
|
|
||||||
|
async move {
|
||||||
|
Ok(LanguageModelNativeCompaction {
|
||||||
|
items: compacted.await?.output,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
.boxed()
|
||||||
|
}
|
||||||
|
|
||||||
fn stream_completion(
|
fn stream_completion(
|
||||||
&self,
|
&self,
|
||||||
request: LanguageModelRequest,
|
request: LanguageModelRequest,
|
||||||
|
|
|
||||||
|
|
@ -191,6 +191,7 @@ pub fn into_open_ai_response(
|
||||||
thread_id,
|
thread_id,
|
||||||
prompt_id: _,
|
prompt_id: _,
|
||||||
intent: _,
|
intent: _,
|
||||||
|
provider_native_prefix,
|
||||||
messages,
|
messages,
|
||||||
tools,
|
tools,
|
||||||
tool_choice,
|
tool_choice,
|
||||||
|
|
@ -260,6 +261,7 @@ pub fn into_open_ai_response(
|
||||||
ResponseRequest {
|
ResponseRequest {
|
||||||
model: model_id.into(),
|
model: model_id.into(),
|
||||||
instructions: None,
|
instructions: None,
|
||||||
|
native_input_prefix: provider_native_prefix.unwrap_or_default(),
|
||||||
input: input_items,
|
input: input_items,
|
||||||
store: Some(false),
|
store: Some(false),
|
||||||
include,
|
include,
|
||||||
|
|
@ -1347,6 +1349,7 @@ mod tests {
|
||||||
thread_id: Some("thread-123".into()),
|
thread_id: Some("thread-123".into()),
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![
|
messages: vec![
|
||||||
LanguageModelRequestMessage {
|
LanguageModelRequestMessage {
|
||||||
role: Role::System,
|
role: Role::System,
|
||||||
|
|
@ -1479,6 +1482,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::Assistant,
|
role: Role::Assistant,
|
||||||
content: vec![MessageContent::ToolUse(tool_use)],
|
content: vec![MessageContent::ToolUse(tool_use)],
|
||||||
|
|
@ -1567,6 +1571,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::Assistant,
|
role: Role::Assistant,
|
||||||
content: vec![MessageContent::Text("Done.".into())],
|
content: vec![MessageContent::Text("Done.".into())],
|
||||||
|
|
@ -1631,12 +1636,45 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn into_open_ai_response_carries_native_prefix() {
|
||||||
|
let native_item = json!({
|
||||||
|
"type": "compaction",
|
||||||
|
"encrypted_content": "opaque"
|
||||||
|
});
|
||||||
|
let request = LanguageModelRequest {
|
||||||
|
thread_id: None,
|
||||||
|
prompt_id: None,
|
||||||
|
intent: None,
|
||||||
|
provider_native_prefix: Some(vec![native_item.clone()]),
|
||||||
|
messages: vec![LanguageModelRequestMessage {
|
||||||
|
role: Role::User,
|
||||||
|
content: vec![MessageContent::Text("Continue".into())],
|
||||||
|
cache: false,
|
||||||
|
reasoning_details: None,
|
||||||
|
}],
|
||||||
|
tools: Vec::new(),
|
||||||
|
tool_choice: None,
|
||||||
|
stop: Vec::new(),
|
||||||
|
temperature: None,
|
||||||
|
thinking_allowed: true,
|
||||||
|
thinking_effort: None,
|
||||||
|
speed: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let response = into_open_ai_response(request, "gpt-5", true, true, None, None, false);
|
||||||
|
|
||||||
|
assert_eq!(response.native_input_prefix, vec![native_item]);
|
||||||
|
assert_eq!(response.store, Some(false));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn into_open_ai_response_omits_reasoning_when_thinking_is_disabled_and_none_is_unsupported() {
|
fn into_open_ai_response_omits_reasoning_when_thinking_is_disabled_and_none_is_unsupported() {
|
||||||
let request = LanguageModelRequest {
|
let request = LanguageModelRequest {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::User,
|
role: Role::User,
|
||||||
content: vec![MessageContent::Text("Hello".into())],
|
content: vec![MessageContent::Text("Hello".into())],
|
||||||
|
|
@ -1672,6 +1710,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::User,
|
role: Role::User,
|
||||||
content: vec![MessageContent::Text("Hello".into())],
|
content: vec![MessageContent::Text("Hello".into())],
|
||||||
|
|
@ -1710,6 +1749,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::User,
|
role: Role::User,
|
||||||
content: vec![MessageContent::Text("Hello".into())],
|
content: vec![MessageContent::Text("Hello".into())],
|
||||||
|
|
@ -1750,6 +1790,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::Assistant,
|
role: Role::Assistant,
|
||||||
content: vec![MessageContent::Text("Done.".into())],
|
content: vec![MessageContent::Text("Done.".into())],
|
||||||
|
|
@ -1841,6 +1882,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![
|
messages: vec![
|
||||||
LanguageModelRequestMessage {
|
LanguageModelRequestMessage {
|
||||||
role: Role::Assistant,
|
role: Role::Assistant,
|
||||||
|
|
@ -1916,6 +1958,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![LanguageModelRequestMessage {
|
messages: vec![LanguageModelRequestMessage {
|
||||||
role: Role::Assistant,
|
role: Role::Assistant,
|
||||||
content: vec![
|
content: vec![
|
||||||
|
|
@ -2943,6 +2986,7 @@ mod tests {
|
||||||
thread_id: None,
|
thread_id: None,
|
||||||
prompt_id: None,
|
prompt_id: None,
|
||||||
intent: None,
|
intent: None,
|
||||||
|
provider_native_prefix: None,
|
||||||
messages: vec![
|
messages: vec![
|
||||||
LanguageModelRequestMessage {
|
LanguageModelRequestMessage {
|
||||||
role: Role::User,
|
role: Role::User,
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ pub struct Request {
|
||||||
pub model: String,
|
pub model: String,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub instructions: Option<String>,
|
pub instructions: Option<String>,
|
||||||
|
#[serde(skip)]
|
||||||
|
pub native_input_prefix: Vec<Value>,
|
||||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
pub input: Vec<ResponseInputItem>,
|
pub input: Vec<ResponseInputItem>,
|
||||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
|
@ -37,6 +39,43 @@ pub struct Request {
|
||||||
pub store: Option<bool>,
|
pub store: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
pub struct CompactRequest {
|
||||||
|
pub model: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub instructions: Option<String>,
|
||||||
|
#[serde(skip)]
|
||||||
|
pub native_input_prefix: Vec<Value>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub input: Vec<ResponseInputItem>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub parallel_tool_calls: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub tool_choice: Option<ToolChoice>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub tools: Vec<ToolDefinition>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Request> for CompactRequest {
|
||||||
|
fn from(request: Request) -> Self {
|
||||||
|
Self {
|
||||||
|
model: request.model,
|
||||||
|
instructions: request.instructions,
|
||||||
|
native_input_prefix: request.native_input_prefix,
|
||||||
|
input: request.input,
|
||||||
|
parallel_tool_calls: request.parallel_tool_calls,
|
||||||
|
tool_choice: request.tool_choice,
|
||||||
|
tools: request.tools,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
|
pub struct CompactedResponse {
|
||||||
|
#[serde(default)]
|
||||||
|
pub output: Vec<Value>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
pub enum ResponseIncludable {
|
pub enum ResponseIncludable {
|
||||||
|
|
@ -411,10 +450,10 @@ pub async fn stream_response(
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_streaming = request.stream;
|
let is_streaming = request.stream;
|
||||||
|
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
let request = request_builder
|
let request = request_builder
|
||||||
.body(AsyncBody::from(
|
.body(AsyncBody::from(body))
|
||||||
serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
|
|
||||||
))
|
|
||||||
.map_err(|e| RequestError::Other(e.into()))?;
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
|
||||||
let mut response = client.send(request).await?;
|
let mut response = client.send(request).await?;
|
||||||
|
|
@ -569,3 +608,121 @@ pub async fn stream_response(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn compact_response(
|
||||||
|
client: &dyn HttpClient,
|
||||||
|
provider_name: &str,
|
||||||
|
api_url: &str,
|
||||||
|
api_key: &str,
|
||||||
|
request: CompactRequest,
|
||||||
|
extra_headers: Vec<(String, String)>,
|
||||||
|
) -> Result<CompactedResponse, RequestError> {
|
||||||
|
let uri = format!("{api_url}/responses/compact");
|
||||||
|
let mut request_builder = HttpRequest::builder()
|
||||||
|
.method(Method::POST)
|
||||||
|
.uri(uri)
|
||||||
|
.header("Content-Type", "application/json")
|
||||||
|
.header("Authorization", format!("Bearer {}", api_key.trim()));
|
||||||
|
for (name, value) in &extra_headers {
|
||||||
|
request_builder = request_builder.header(name.as_str(), value.as_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
let request = request_builder
|
||||||
|
.body(AsyncBody::from(body))
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
|
||||||
|
let mut response = client.send(request).await?;
|
||||||
|
let mut body = String::new();
|
||||||
|
response
|
||||||
|
.body_mut()
|
||||||
|
.read_to_string(&mut body)
|
||||||
|
.await
|
||||||
|
.map_err(|e| RequestError::Other(e.into()))?;
|
||||||
|
|
||||||
|
if response.status().is_success() {
|
||||||
|
serde_json::from_str::<CompactedResponse>(&body).map_err(|error| {
|
||||||
|
log::error!(
|
||||||
|
"Failed to parse OpenAI compact response: `{}`\nResponse: `{}`",
|
||||||
|
error,
|
||||||
|
body,
|
||||||
|
);
|
||||||
|
RequestError::Other(anyhow!(error))
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Err(RequestError::HttpResponseError {
|
||||||
|
provider: provider_name.to_owned(),
|
||||||
|
status_code: response.status(),
|
||||||
|
body,
|
||||||
|
headers: response.headers().clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_body_with_native_prefix<T: Serialize>(
|
||||||
|
request: &T,
|
||||||
|
native_input_prefix: &[Value],
|
||||||
|
) -> serde_json::Result<String> {
|
||||||
|
let mut body = serde_json::to_value(request)?;
|
||||||
|
|
||||||
|
if !native_input_prefix.is_empty() {
|
||||||
|
if let Value::Object(object) = &mut body {
|
||||||
|
let input = object
|
||||||
|
.entry("input")
|
||||||
|
.or_insert_with(|| Value::Array(Vec::new()));
|
||||||
|
if let Value::Array(input) = input {
|
||||||
|
let mut prefixed_input = native_input_prefix.to_vec();
|
||||||
|
prefixed_input.append(input);
|
||||||
|
*input = prefixed_input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
serde_json::to_string(&body)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn native_input_prefix_is_prepended_to_responses_body() {
|
||||||
|
let native_item = json!({
|
||||||
|
"type": "compaction",
|
||||||
|
"encrypted_content": "opaque"
|
||||||
|
});
|
||||||
|
let request = Request {
|
||||||
|
model: "gpt-5".to_string(),
|
||||||
|
instructions: None,
|
||||||
|
native_input_prefix: vec![native_item.clone()],
|
||||||
|
input: vec![ResponseInputItem::Message(ResponseMessageItem {
|
||||||
|
role: Role::User,
|
||||||
|
content: vec![ResponseInputContent::Text {
|
||||||
|
text: "Continue".to_string(),
|
||||||
|
}],
|
||||||
|
phase: None,
|
||||||
|
})],
|
||||||
|
include: Vec::new(),
|
||||||
|
stream: true,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
max_output_tokens: None,
|
||||||
|
parallel_tool_calls: None,
|
||||||
|
tool_choice: None,
|
||||||
|
tools: Vec::new(),
|
||||||
|
prompt_cache_key: None,
|
||||||
|
reasoning: None,
|
||||||
|
store: Some(false),
|
||||||
|
};
|
||||||
|
|
||||||
|
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
|
||||||
|
.expect("request should serialize");
|
||||||
|
let body: Value = serde_json::from_str(&body).expect("body should be valid JSON");
|
||||||
|
let input = body["input"].as_array().expect("input should be an array");
|
||||||
|
|
||||||
|
assert_eq!(input.first(), Some(&native_item));
|
||||||
|
assert_eq!(body["store"], json!(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue