Add OpenAI native context compaction

This commit is contained in:
Richard Feldman 2026-05-29 00:41:47 -04:00
parent 568bf53639
commit ce686f1069
No known key found for this signature in database
12 changed files with 567 additions and 71 deletions

View file

@ -37,12 +37,12 @@ use gpui::{
};
use heck::ToSnakeCase as _;
use language_model::{
CompletionIntent, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
LanguageModelId, LanguageModelImage, LanguageModelProviderId, LanguageModelRegistry,
LanguageModelRequest, LanguageModelRequestMessage, LanguageModelRequestTool,
LanguageModelToolResult, LanguageModelToolResultContent, LanguageModelToolSchemaFormat,
LanguageModelToolUse, LanguageModelToolUseId, Role, SelectedModel, Speed, StopReason,
TokenUsage, ZED_CLOUD_PROVIDER_ID,
CompactionStrategyKind, CompletionIntent, LanguageModel, LanguageModelCompletionError,
LanguageModelCompletionEvent, LanguageModelId, LanguageModelImage, LanguageModelProviderId,
LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent,
LanguageModelToolSchemaFormat, LanguageModelToolUse, LanguageModelToolUseId, Role,
SelectedModel, Speed, StopReason, TokenUsage, ZED_CLOUD_PROVIDER_ID,
};
use project::Project;
use prompt_store::ProjectContext;
@ -132,14 +132,23 @@ enum AutoCompactPhase {
MidTurn,
}
struct GenericCompactionInput {
struct CompactionInput {
id: ContextCompactionId,
model: Arc<dyn LanguageModel>,
request: LanguageModelRequest,
retained_user_messages: Vec<RetainedUserMessage>,
strategy: PreparedCompactionStrategy,
held_message: Option<Message>,
}
enum PreparedCompactionStrategy {
GenericSummary {
retained_user_messages: Vec<RetainedUserMessage>,
},
Native {
source: NativeCompactionSource,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Message {
User(UserMessage),
@ -2092,7 +2101,19 @@ impl Thread {
fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
let messages = self.build_request_messages(Vec::new(), cx);
estimate_request_tokens(&messages)
let prefix_tokens = self
.model
.as_ref()
.and_then(|model| {
self.provider_native_prefix_for_conversation(
self.current_conversation(),
model.as_ref(),
cx,
)
})
.map(|prefix| estimate_native_seed_tokens(&prefix))
.unwrap_or(0);
estimate_request_tokens(&messages).saturating_add(prefix_tokens)
}
fn generic_retained_user_messages(&self) -> Vec<RetainedUserMessage> {
@ -2102,11 +2123,11 @@ impl Thread {
)
}
fn prepare_generic_compaction(
fn prepare_compaction(
&self,
phase: AutoCompactPhase,
cx: &App,
) -> Result<Option<GenericCompactionInput>> {
) -> Result<Option<CompactionInput>> {
if !self.should_auto_compact(cx) {
return Ok(None);
}
@ -2119,27 +2140,39 @@ impl Thread {
return Ok(None);
}
let retained_user_messages = retained_user_messages_from_messages(
&conversation.messages,
GENERIC_COMPACTION_RETAINED_USER_MESSAGE_TOKENS,
);
let mut request =
let native_source = native_compaction_source_for_model(model.as_ref(), cx);
let use_native = model.compaction_strategy(cx) == CompactionStrategyKind::Native;
let mut request_messages =
self.build_request_messages_for_conversation(&conversation, Vec::new(), cx, false);
request.push(LanguageModelRequestMessage {
role: Role::User,
content: vec![SUMMARIZE_THREAD_DETAILED_PROMPT.into()],
cache: false,
reasoning_details: None,
});
let provider_native_prefix =
self.provider_native_prefix_for_conversation(&conversation, model.as_ref(), cx);
Ok(Some(GenericCompactionInput {
let strategy = if let (true, Some(source)) = (use_native, native_source) {
PreparedCompactionStrategy::Native { source }
} else {
request_messages.push(LanguageModelRequestMessage {
role: Role::User,
content: vec![SUMMARIZE_THREAD_DETAILED_PROMPT.into()],
cache: false,
reasoning_details: None,
});
PreparedCompactionStrategy::GenericSummary {
retained_user_messages: retained_user_messages_from_messages(
&conversation.messages,
GENERIC_COMPACTION_RETAINED_USER_MESSAGE_TOKENS,
),
}
};
Ok(Some(CompactionInput {
id: ContextCompactionId::new(),
model: model.clone(),
request: LanguageModelRequest {
thread_id: Some(self.id.to_string()),
prompt_id: Some(self.prompt_id.to_string()),
intent: Some(CompletionIntent::ThreadContextSummarization),
messages: request,
provider_native_prefix,
messages: request_messages,
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
@ -2148,7 +2181,7 @@ impl Thread {
thinking_effort: None,
speed: self.speed(),
},
retained_user_messages,
strategy,
held_message,
}))
}
@ -2171,11 +2204,30 @@ impl Thread {
(conversation, held_message)
}
fn install_generic_compaction(
fn provider_native_prefix_for_conversation(
&self,
conversation: &Conversation,
model: &dyn LanguageModel,
cx: &App,
) -> Option<Vec<serde_json::Value>> {
let seed = conversation.seed.as_ref()?;
let CompactionArtifact::ProviderNative { source, items } = &seed.artifact else {
return None;
};
let model_source = native_compaction_source_for_model(model, cx)?;
if native_compaction_sources_match(&model_source, source) {
Some(items.clone())
} else {
None
}
}
fn install_compaction(
&mut self,
id: ContextCompactionId,
summary: SharedString,
artifact: CompactionArtifact,
retained_user_messages: Vec<RetainedUserMessage>,
baseline_tokens: u64,
held_message: Option<Message>,
cx: &mut Context<Self>,
) -> Result<()> {
@ -2193,13 +2245,12 @@ impl Thread {
self.current_messages_mut().pop();
}
let baseline_tokens = estimate_generic_seed_tokens(&retained_user_messages, &summary);
self.conversations.push(Conversation {
marker: Some(ContextCompactionMarker { id }),
messages: Vec::new(),
request_token_usage: HashMap::default(),
seed: Some(CompactionSeed {
artifact: CompactionArtifact::Summary(summary),
artifact,
retained_user_messages,
baseline_tokens,
baseline_observed: false,
@ -2219,7 +2270,7 @@ impl Thread {
if self.should_auto_compact(cx) {
let retained_user_message_count = self.generic_retained_user_messages().len();
log::info!(
"auto-compaction threshold reached at {phase}; handoff is not wired yet; retained_user_messages={retained_user_message_count}"
"auto-compaction threshold reached at {phase}; retained_user_messages={retained_user_message_count}"
);
}
}
@ -2686,45 +2737,89 @@ impl Thread {
AutoCompactPhase::MidTurn => "mid-turn",
};
this.log_auto_compact_hook(phase_name, cx);
this.prepare_generic_compaction(phase, cx)
this.prepare_compaction(phase, cx)
})??
else {
return Ok(());
};
event_stream.send_compaction_started(input.id.clone());
let compaction_id = input.id.clone();
let summary = match Self::run_generic_compaction(
input.model.clone(),
input.request,
cancellation_rx.clone(),
cx,
)
.await
{
Ok(Some(summary)) => summary,
Ok(None) => {
event_stream.send_compaction_failed(compaction_id);
return Ok(());
let CompactionInput {
id,
model,
request,
strategy,
held_message,
} = input;
event_stream.send_compaction_started(id.clone());
let compaction_id = id.clone();
let (artifact, retained_user_messages, baseline_tokens) = match strategy {
PreparedCompactionStrategy::GenericSummary {
retained_user_messages,
} => {
let summary = match Self::run_generic_compaction(
model.clone(),
request,
cancellation_rx.clone(),
cx,
)
.await
{
Ok(Some(summary)) => summary,
Ok(None) => {
event_stream.send_compaction_failed(compaction_id);
return Ok(());
}
Err(error) => {
event_stream.send_compaction_failed(compaction_id);
return Err(error);
}
};
let baseline_tokens =
estimate_generic_seed_tokens(&retained_user_messages, &summary);
(
CompactionArtifact::Summary(summary),
retained_user_messages,
baseline_tokens,
)
}
Err(error) => {
event_stream.send_compaction_failed(compaction_id);
return Err(error);
PreparedCompactionStrategy::Native { source } => {
let items =
match Self::run_native_compaction(model, request, cancellation_rx.clone(), cx)
.await
{
Ok(Some(items)) => items,
Ok(None) => {
event_stream.send_compaction_failed(compaction_id);
return Ok(());
}
Err(error) => {
event_stream.send_compaction_failed(compaction_id);
return Err(error);
}
};
let baseline_tokens = estimate_native_seed_tokens(&items);
(
CompactionArtifact::ProviderNative { source, items },
Vec::new(),
baseline_tokens,
)
}
};
if *cancellation_rx.borrow() {
event_stream.send_compaction_failed(input.id);
event_stream.send_compaction_failed(id);
return Ok(());
}
let succeeded_id = input.id.clone();
let succeeded_id = id.clone();
this.update(cx, |this, cx| {
this.install_generic_compaction(
input.id.clone(),
summary,
input.retained_user_messages,
input.held_message,
this.install_compaction(
id.clone(),
artifact,
retained_user_messages,
baseline_tokens,
held_message,
cx,
)
})??;
@ -2763,6 +2858,24 @@ impl Thread {
Ok(Some(summary.into()))
}
async fn run_native_compaction(
model: Arc<dyn LanguageModel>,
request: LanguageModelRequest,
mut cancellation_rx: watch::Receiver<bool>,
cx: &mut AsyncApp,
) -> Result<Option<Vec<serde_json::Value>>> {
if *cancellation_rx.borrow() {
return Ok(None);
}
let compacted = model.compact(request, cx).await?;
if *cancellation_rx.borrow() {
return Ok(None);
}
Ok(Some(compacted.items))
}
fn process_tool_result(
this: &WeakEntity<Thread>,
event_stream: &ThreadEventStream,
@ -3497,12 +3610,18 @@ impl Thread {
log::debug!("Request includes {} tools", available_tools.len());
let messages = self.build_request_messages(available_tools, cx);
let provider_native_prefix = self.provider_native_prefix_for_conversation(
self.current_conversation(),
model.as_ref(),
cx,
);
log::debug!("Request will include {} messages", messages.len());
let request = LanguageModelRequest {
thread_id: Some(self.id.to_string()),
prompt_id: Some(self.prompt_id.to_string()),
intent: Some(completion_intent),
provider_native_prefix,
messages,
tools,
tool_choice: None,
@ -3995,6 +4114,31 @@ fn estimate_generic_seed_tokens(
estimate_request_tokens(&messages)
}
fn estimate_native_seed_tokens(items: &[serde_json::Value]) -> u64 {
serde_json::to_string(items)
.map(|items| estimate_text_tokens(&items))
.unwrap_or(0)
}
fn native_compaction_source_for_model(
model: &dyn LanguageModel,
cx: &App,
) -> Option<NativeCompactionSource> {
model
.native_compaction_source(cx)
.map(|source| NativeCompactionSource {
provider: source.provider.to_string(),
api_url: source.api_url,
})
}
fn native_compaction_sources_match(
current: &NativeCompactionSource,
stored: &NativeCompactionSource,
) -> bool {
current.provider == stored.provider && current.api_url == stored.api_url
}
fn retained_user_message_from_content(
content: &[UserMessageContent],
token_budget: u64,
@ -5637,15 +5781,18 @@ mod tests {
thread.current_messages_mut().push(live_message.clone());
let compaction_id = ContextCompactionId::new();
let summary: SharedString = "summary text".into();
let retained_user_messages = vec![RetainedUserMessage {
content: vec![RetainedUserMessageContent::Text("retained text".to_string())],
}];
let baseline_tokens =
estimate_generic_seed_tokens(&retained_user_messages, &summary);
thread
.install_generic_compaction(
.install_compaction(
compaction_id.clone(),
"summary text".into(),
vec![RetainedUserMessage {
content: vec![RetainedUserMessageContent::Text(
"retained text".to_string(),
)],
}],
CompactionArtifact::Summary(summary),
retained_user_messages,
baseline_tokens,
Some(live_message),
cx,
)

View file

@ -541,6 +541,7 @@ impl CodegenAlternative {
thread_id: None,
prompt_id: None,
intent: Some(CompletionIntent::InlineAssist),
provider_native_prefix: None,
tools,
tool_choice,
stop: Vec::new(),
@ -621,6 +622,7 @@ impl CodegenAlternative {
thread_id: None,
prompt_id: None,
intent: Some(CompletionIntent::InlineAssist),
provider_native_prefix: None,
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),

View file

@ -268,6 +268,7 @@ impl TerminalInlineAssistant {
thread_id: None,
prompt_id: None,
intent: Some(CompletionIntent::TerminalInlineAssist),
provider_native_prefix: None,
messages: vec![request_message],
tools: Vec::new(),
tool_choice: None,

View file

@ -551,6 +551,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
stop: vec![],
temperature: None,
tools: vec![language_model_core::LanguageModelRequestTool {
@ -655,6 +656,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
stop: vec![],
temperature: None,
tools: vec![language_model_core::LanguageModelRequestTool {
@ -725,6 +727,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
stop: vec![],
temperature: None,
tools: vec![language_model_core::LanguageModelRequestTool {
@ -768,6 +771,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
stop: vec![],
temperature: None,
tools: vec![],

View file

@ -2794,6 +2794,7 @@ impl GitPanel {
thread_id: None,
prompt_id: None,
intent: Some(CompletionIntent::GenerateGitCommitMessage),
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::User,
content: vec![content.into()],

View file

@ -43,6 +43,23 @@ impl Default for LanguageModelTextStream {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompactionStrategyKind {
Native,
GenericSummary,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LanguageModelNativeCompactionSource {
pub provider: LanguageModelProviderId,
pub api_url: Option<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct LanguageModelNativeCompaction {
pub items: Vec<serde_json::Value>,
}
pub trait LanguageModel: Send + Sync {
fn id(&self) -> LanguageModelId;
fn name(&self) -> LanguageModelName;
@ -121,6 +138,28 @@ pub trait LanguageModel: Send + Sync {
None
}
fn compaction_strategy(&self, _cx: &App) -> CompactionStrategyKind {
CompactionStrategyKind::GenericSummary
}
fn native_compaction_source(&self, _cx: &App) -> Option<LanguageModelNativeCompactionSource> {
None
}
fn compact(
&self,
_request: LanguageModelRequest,
_cx: &AsyncApp,
) -> BoxFuture<'static, Result<LanguageModelNativeCompaction, LanguageModelCompletionError>>
{
async move {
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
"native compaction is not supported by this model"
)))
}
.boxed()
}
fn stream_completion(
&self,
request: LanguageModelRequest,

View file

@ -362,6 +362,8 @@ pub struct LanguageModelRequest {
pub thread_id: Option<String>,
pub prompt_id: Option<String>,
pub intent: Option<CompletionIntent>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub provider_native_prefix: Option<Vec<serde_json::Value>>,
pub messages: Vec<LanguageModelRequestMessage>,
pub tools: Vec<LanguageModelRequestTool>,
pub tool_choice: Option<LanguageModelToolChoice>,

View file

@ -1205,6 +1205,7 @@ fn into_copilot_responses(
thread_id: _,
prompt_id: _,
intent: _,
provider_native_prefix: _,
messages,
tools,
tool_choice,

View file

@ -978,6 +978,7 @@ mod tests {
thread_id: Some("abcdef".into()),
prompt_id: None,
intent: None,
provider_native_prefix: None,
stop: vec![],
thinking_allowed: true,
thinking_effort: None,
@ -1014,6 +1015,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
stop: vec![],
thinking_allowed: true,
thinking_effort: None,

View file

@ -5,16 +5,20 @@ use futures::{FutureExt, StreamExt, future::BoxFuture};
use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, TaskExt, Window};
use http_client::HttpClient;
use language_model::{
ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError,
LanguageModelCompletionEvent, LanguageModelEffortLevel, LanguageModelId, LanguageModelName,
LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, OPEN_AI_PROVIDER_ID,
OPEN_AI_PROVIDER_NAME, RateLimiter, env_var,
ApiKeyState, AuthenticateError, CompactionStrategyKind, EnvVar, IconOrSvg, LanguageModel,
LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelEffortLevel,
LanguageModelId, LanguageModelName, LanguageModelNativeCompaction,
LanguageModelNativeCompactionSource, LanguageModelProvider, LanguageModelProviderId,
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
LanguageModelToolChoice, OPEN_AI_PROVIDER_ID, OPEN_AI_PROVIDER_NAME, RateLimiter, env_var,
};
use menu;
use open_ai::{
OPEN_AI_API_URL, ResponseStreamEvent,
responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
responses::{
CompactRequest as ResponseCompactRequest, Request as ResponseRequest,
StreamEvent as ResponsesStreamEvent, compact_response, stream_response,
},
stream_completion,
};
use settings::{OpenAiAvailableModel as AvailableModel, Settings, SettingsStore};
@ -391,6 +395,38 @@ impl OpenAiLanguageModel {
async move { Ok(future.await?.boxed()) }.boxed()
}
fn compact_response(
&self,
request: ResponseCompactRequest,
cx: &AsyncApp,
) -> BoxFuture<'static, Result<open_ai::responses::CompactedResponse>> {
let http_client = self.http_client.clone();
let (api_key, api_url) = self.state.read_with(cx, |state, cx| {
let api_url = OpenAiLanguageModelProvider::api_url(cx);
(state.api_key_state.key(&api_url), api_url)
});
let provider = PROVIDER_NAME;
let future = self.request_limiter.run(async move {
let Some(api_key) = api_key else {
return Err(LanguageModelCompletionError::NoApiKey { provider });
};
let request = compact_response(
http_client.as_ref(),
provider.0.as_str(),
&api_url,
&api_key,
request,
vec![],
);
let response = request.await?;
Ok(response)
});
async move { Ok(future.await?) }.boxed()
}
}
impl LanguageModel for OpenAiLanguageModel {
@ -474,6 +510,66 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.max_output_tokens()
}
fn compaction_strategy(&self, cx: &App) -> CompactionStrategyKind {
if self.native_compaction_source(cx).is_some() {
CompactionStrategyKind::Native
} else {
CompactionStrategyKind::GenericSummary
}
}
fn native_compaction_source(&self, cx: &App) -> Option<LanguageModelNativeCompactionSource> {
if !self.model.uses_responses_api() {
return None;
}
let api_url = OpenAiLanguageModelProvider::api_url(cx);
if api_url.as_ref() != OPEN_AI_API_URL {
return None;
}
Some(LanguageModelNativeCompactionSource {
provider: PROVIDER_ID,
api_url: Some(api_url.to_string()),
})
}
fn compact(
&self,
request: LanguageModelRequest,
cx: &AsyncApp,
) -> BoxFuture<'static, Result<LanguageModelNativeCompaction, LanguageModelCompletionError>>
{
if !self.model.uses_responses_api() {
return async move {
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
"native compaction requires the OpenAI Responses API"
)))
}
.boxed();
}
let request = into_open_ai_response(
request,
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.model.supports_prompt_cache_key(),
self.max_output_tokens(),
default_thinking_reasoning_effort(&self.model),
self.model
.supported_reasoning_efforts()
.contains(&open_ai::ReasoningEffort::None),
);
let compacted = self.compact_response(request.into(), cx);
async move {
Ok(LanguageModelNativeCompaction {
items: compacted.await?.output,
})
}
.boxed()
}
fn stream_completion(
&self,
request: LanguageModelRequest,

View file

@ -191,6 +191,7 @@ pub fn into_open_ai_response(
thread_id,
prompt_id: _,
intent: _,
provider_native_prefix,
messages,
tools,
tool_choice,
@ -260,6 +261,7 @@ pub fn into_open_ai_response(
ResponseRequest {
model: model_id.into(),
instructions: None,
native_input_prefix: provider_native_prefix.unwrap_or_default(),
input: input_items,
store: Some(false),
include,
@ -1347,6 +1349,7 @@ mod tests {
thread_id: Some("thread-123".into()),
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![
LanguageModelRequestMessage {
role: Role::System,
@ -1479,6 +1482,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::Assistant,
content: vec![MessageContent::ToolUse(tool_use)],
@ -1567,6 +1571,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::Assistant,
content: vec![MessageContent::Text("Done.".into())],
@ -1631,12 +1636,45 @@ mod tests {
);
}
#[test]
fn into_open_ai_response_carries_native_prefix() {
let native_item = json!({
"type": "compaction",
"encrypted_content": "opaque"
});
let request = LanguageModelRequest {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: Some(vec![native_item.clone()]),
messages: vec![LanguageModelRequestMessage {
role: Role::User,
content: vec![MessageContent::Text("Continue".into())],
cache: false,
reasoning_details: None,
}],
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature: None,
thinking_allowed: true,
thinking_effort: None,
speed: None,
};
let response = into_open_ai_response(request, "gpt-5", true, true, None, None, false);
assert_eq!(response.native_input_prefix, vec![native_item]);
assert_eq!(response.store, Some(false));
}
#[test]
fn into_open_ai_response_omits_reasoning_when_thinking_is_disabled_and_none_is_unsupported() {
let request = LanguageModelRequest {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::User,
content: vec![MessageContent::Text("Hello".into())],
@ -1672,6 +1710,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::User,
content: vec![MessageContent::Text("Hello".into())],
@ -1710,6 +1749,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::User,
content: vec![MessageContent::Text("Hello".into())],
@ -1750,6 +1790,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::Assistant,
content: vec![MessageContent::Text("Done.".into())],
@ -1841,6 +1882,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![
LanguageModelRequestMessage {
role: Role::Assistant,
@ -1916,6 +1958,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![LanguageModelRequestMessage {
role: Role::Assistant,
content: vec![
@ -2943,6 +2986,7 @@ mod tests {
thread_id: None,
prompt_id: None,
intent: None,
provider_native_prefix: None,
messages: vec![
LanguageModelRequestMessage {
role: Role::User,

View file

@ -11,6 +11,8 @@ pub struct Request {
pub model: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub instructions: Option<String>,
#[serde(skip)]
pub native_input_prefix: Vec<Value>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub input: Vec<ResponseInputItem>,
#[serde(skip_serializing_if = "Vec::is_empty")]
@ -37,6 +39,43 @@ pub struct Request {
pub store: Option<bool>,
}
#[derive(Serialize, Debug)]
pub struct CompactRequest {
pub model: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub instructions: Option<String>,
#[serde(skip)]
pub native_input_prefix: Vec<Value>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub input: Vec<ResponseInputItem>,
#[serde(skip_serializing_if = "Option::is_none")]
pub parallel_tool_calls: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<ToolChoice>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<ToolDefinition>,
}
impl From<Request> for CompactRequest {
fn from(request: Request) -> Self {
Self {
model: request.model,
instructions: request.instructions,
native_input_prefix: request.native_input_prefix,
input: request.input,
parallel_tool_calls: request.parallel_tool_calls,
tool_choice: request.tool_choice,
tools: request.tools,
}
}
}
#[derive(Deserialize, Debug, Clone)]
pub struct CompactedResponse {
#[serde(default)]
pub output: Vec<Value>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ResponseIncludable {
@ -411,10 +450,10 @@ pub async fn stream_response(
}
let is_streaming = request.stream;
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
.map_err(|e| RequestError::Other(e.into()))?;
let request = request_builder
.body(AsyncBody::from(
serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
))
.body(AsyncBody::from(body))
.map_err(|e| RequestError::Other(e.into()))?;
let mut response = client.send(request).await?;
@ -569,3 +608,121 @@ pub async fn stream_response(
})
}
}
pub async fn compact_response(
client: &dyn HttpClient,
provider_name: &str,
api_url: &str,
api_key: &str,
request: CompactRequest,
extra_headers: Vec<(String, String)>,
) -> Result<CompactedResponse, RequestError> {
let uri = format!("{api_url}/responses/compact");
let mut request_builder = HttpRequest::builder()
.method(Method::POST)
.uri(uri)
.header("Content-Type", "application/json")
.header("Authorization", format!("Bearer {}", api_key.trim()));
for (name, value) in &extra_headers {
request_builder = request_builder.header(name.as_str(), value.as_str());
}
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
.map_err(|e| RequestError::Other(e.into()))?;
let request = request_builder
.body(AsyncBody::from(body))
.map_err(|e| RequestError::Other(e.into()))?;
let mut response = client.send(request).await?;
let mut body = String::new();
response
.body_mut()
.read_to_string(&mut body)
.await
.map_err(|e| RequestError::Other(e.into()))?;
if response.status().is_success() {
serde_json::from_str::<CompactedResponse>(&body).map_err(|error| {
log::error!(
"Failed to parse OpenAI compact response: `{}`\nResponse: `{}`",
error,
body,
);
RequestError::Other(anyhow!(error))
})
} else {
Err(RequestError::HttpResponseError {
provider: provider_name.to_owned(),
status_code: response.status(),
body,
headers: response.headers().clone(),
})
}
}
fn serialize_body_with_native_prefix<T: Serialize>(
request: &T,
native_input_prefix: &[Value],
) -> serde_json::Result<String> {
let mut body = serde_json::to_value(request)?;
if !native_input_prefix.is_empty() {
if let Value::Object(object) = &mut body {
let input = object
.entry("input")
.or_insert_with(|| Value::Array(Vec::new()));
if let Value::Array(input) = input {
let mut prefixed_input = native_input_prefix.to_vec();
prefixed_input.append(input);
*input = prefixed_input;
}
}
}
serde_json::to_string(&body)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn native_input_prefix_is_prepended_to_responses_body() {
let native_item = json!({
"type": "compaction",
"encrypted_content": "opaque"
});
let request = Request {
model: "gpt-5".to_string(),
instructions: None,
native_input_prefix: vec![native_item.clone()],
input: vec![ResponseInputItem::Message(ResponseMessageItem {
role: Role::User,
content: vec![ResponseInputContent::Text {
text: "Continue".to_string(),
}],
phase: None,
})],
include: Vec::new(),
stream: true,
temperature: None,
top_p: None,
max_output_tokens: None,
parallel_tool_calls: None,
tool_choice: None,
tools: Vec::new(),
prompt_cache_key: None,
reasoning: None,
store: Some(false),
};
let body = serialize_body_with_native_prefix(&request, &request.native_input_prefix)
.expect("request should serialize");
let body: Value = serde_json::from_str(&body).expect("body should be valid JSON");
let input = body["input"].as_array().expect("input should be an array");
assert_eq!(input.first(), Some(&native_item));
assert_eq!(body["store"], json!(false));
}
}