Add auto-compaction threshold settings

This commit is contained in:
Richard Feldman 2026-05-29 00:14:29 -04:00
parent dcd9da19b6
commit f851cbf286
No known key found for this signature in database
6 changed files with 297 additions and 1 deletions

View file

@ -1063,6 +1063,23 @@
// "temperature": 1.0
// }
],
// Settings for automatically compacting agent thread context near the model's context limit.
"auto_compact": {
// Whether to automatically compact agent thread context.
//
// Default: true
"enabled": true,
// Which part of the context window is measured against the threshold.
// "total" - Count the full active request window
// "body_after_prefix" - Count only growth after the compacted prefix baseline
//
// Default: body_after_prefix
"scope": "body_after_prefix",
// Fraction of the model context window at which auto-compaction should run.
//
// Default: 0.9
"threshold": 0.9,
},
// Permission rules for tool actions.
//
// The "default" setting applies when no tool-specific rules match.

View file

@ -16,7 +16,8 @@ use feature_flags::{
use agent_client_protocol::schema as acp;
use agent_settings::{
AgentProfileId, AgentSettings, SUMMARIZE_THREAD_DETAILED_PROMPT, SUMMARIZE_THREAD_PROMPT,
AgentProfileId, AgentSettings, AutoCompactScope, SUMMARIZE_THREAD_DETAILED_PROMPT,
SUMMARIZE_THREAD_PROMPT,
};
use anyhow::{Context as _, Result, anyhow};
use chrono::{DateTime, Local, Utc};
@ -1938,6 +1939,13 @@ impl Thread {
};
let last_user_message_id = last_user_message.id.clone();
if let Some(seed) = self.current_conversation_mut().seed.as_mut()
&& !seed.baseline_observed
{
seed.baseline_tokens = total_input_tokens(update);
seed.baseline_observed = true;
}
self.current_conversation_mut()
.request_token_usage
.insert(last_user_message_id, update);
@ -2008,6 +2016,66 @@ impl Thread {
})
}
fn should_auto_compact(&self, cx: &App) -> bool {
if !cx.has_flag::<HandoffFeatureFlag>() {
return false;
}
let settings = &AgentSettings::get_global(cx).auto_compact;
if !settings.enabled {
return false;
}
let Some(model) = self.model.as_ref() else {
return false;
};
let active_tokens = self
.latest_request_token_usage()
.map(|usage| total_input_tokens(usage).saturating_add(usage.output_tokens))
.unwrap_or_else(|| self.estimate_model_visible_tokens(cx));
self.auto_compact_threshold_reached(
settings.scope,
settings.threshold,
active_tokens,
model.max_token_count(),
)
}
fn auto_compact_threshold_reached(
&self,
scope: AutoCompactScope,
threshold: f32,
active_tokens: u64,
model_max_tokens: u64,
) -> bool {
let limit = ((model_max_tokens as f64) * f64::from(threshold))
.max(0.0)
.ceil() as u64;
match scope {
AutoCompactScope::Total => active_tokens >= limit,
AutoCompactScope::BodyAfterPrefix => {
let baseline = self
.current_conversation()
.seed
.as_ref()
.map_or(0, |seed| seed.baseline_tokens);
let body_tokens = active_tokens.saturating_sub(baseline);
body_tokens >= limit || active_tokens >= model_max_tokens
}
}
}
fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
let messages = self.build_request_messages(Vec::new(), cx);
estimate_request_tokens(&messages)
}
fn log_auto_compact_hook(&self, phase: &str, cx: &App) {
if self.should_auto_compact(cx) {
log::info!("auto-compaction threshold reached at {phase}; handoff is not wired yet");
}
}
/// Get the total input token count as of the message before the given message.
///
/// Returns `None` if:
@ -2239,6 +2307,17 @@ impl Thread {
.clone()
.ok_or_else(|| anyhow!(NoModelConfiguredError))?;
this.refresh_turn_tools(cx);
if attempt == 0 {
match intent {
CompletionIntent::UserPrompt | CompletionIntent::Subagent => {
this.log_auto_compact_hook("pre-turn", cx);
}
CompletionIntent::ToolResults => {
this.log_auto_compact_hook("mid-turn", cx);
}
_ => {}
}
}
let request = this.build_completion_request(intent, cx)?;
anyhow::Ok((model, request))
})??;
@ -3519,6 +3598,37 @@ fn total_input_tokens(usage: language_model::TokenUsage) -> u64 {
.saturating_add(usage.cache_read_input_tokens)
}
fn estimate_request_tokens(messages: &[LanguageModelRequestMessage]) -> u64 {
messages
.iter()
.flat_map(|message| &message.content)
.fold(0_u64, |tokens, content| {
tokens.saturating_add(match content {
language_model::MessageContent::Text(text) => estimate_text_tokens(text),
language_model::MessageContent::Thinking { text, .. } => estimate_text_tokens(text),
language_model::MessageContent::RedactedThinking(text) => {
estimate_text_tokens(text)
}
language_model::MessageContent::Image(_) => 1024,
language_model::MessageContent::ToolUse(tool_use) => {
estimate_text_tokens(&tool_use.raw_input).saturating_add(256)
}
language_model::MessageContent::ToolResult(tool_result) => tool_result
.content
.iter()
.map(|content| match content {
LanguageModelToolResultContent::Text(text) => estimate_text_tokens(text),
LanguageModelToolResultContent::Image(_) => 1024,
})
.sum(),
})
})
}
fn estimate_text_tokens(text: &str) -> u64 {
text.len().div_ceil(4) as u64
}
struct RunningTurn {
/// Holds the task that handles agent interaction until the end of the turn.
/// Survives across multiple requests as the model performs tool calls and
@ -4869,6 +4979,104 @@ mod tests {
});
}
#[gpui::test]
async fn test_auto_compact_threshold_scopes(cx: &mut TestAppContext) {
let (thread, _event_stream) = setup_thread_for_test(cx).await;
cx.update(|cx| {
thread.update(cx, |thread, _cx| {
assert!(!thread.auto_compact_threshold_reached(
AutoCompactScope::Total,
0.9,
89,
100
));
assert!(thread.auto_compact_threshold_reached(
AutoCompactScope::Total,
0.9,
90,
100
));
thread.current_conversation_mut().seed = Some(CompactionSeed {
artifact: CompactionArtifact::Summary("summary".into()),
retained_user_messages: Vec::new(),
baseline_tokens: 50,
baseline_observed: true,
});
assert!(!thread.auto_compact_threshold_reached(
AutoCompactScope::BodyAfterPrefix,
0.9,
139,
1000
));
assert!(thread.auto_compact_threshold_reached(
AutoCompactScope::BodyAfterPrefix,
0.9,
140,
100
));
assert!(thread.auto_compact_threshold_reached(
AutoCompactScope::BodyAfterPrefix,
0.9,
100,
100
));
});
});
}
#[gpui::test]
async fn test_usage_update_observes_compaction_baseline_once(cx: &mut TestAppContext) {
let (thread, _event_stream) = setup_thread_for_test(cx).await;
let user_message_id = UserMessageId::new();
cx.update(|cx| {
thread.update(cx, |thread, cx| {
thread
.current_conversation_mut()
.messages
.push(Message::User(UserMessage {
id: user_message_id,
content: vec!["hello".into()],
}));
thread.current_conversation_mut().seed = Some(CompactionSeed {
artifact: CompactionArtifact::Summary("summary".into()),
retained_user_messages: Vec::new(),
baseline_tokens: 12,
baseline_observed: false,
});
thread.update_token_usage(
TokenUsage {
input_tokens: 30,
output_tokens: 7,
cache_creation_input_tokens: 5,
cache_read_input_tokens: 0,
},
cx,
);
let seed = thread.current_conversation().seed.as_ref().unwrap();
assert_eq!(seed.baseline_tokens, 35);
assert!(seed.baseline_observed);
thread.update_token_usage(
TokenUsage {
input_tokens: 80,
output_tokens: 9,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
cx,
);
let seed = thread.current_conversation().seed.as_ref().unwrap();
assert_eq!(seed.baseline_tokens, 35);
assert!(seed.baseline_observed);
});
});
}
struct ReplayImageTool;
impl AgentTool for ReplayImageTool {

View file

@ -589,6 +589,7 @@ mod tests {
play_sound_when_agent_done: PlaySoundWhenAgentDone::default(),
single_file_review: false,
model_parameters: vec![],
auto_compact: Default::default(),
enable_feedback: false,
expand_edit_card: true,
expand_terminal_card: true,

View file

@ -20,6 +20,7 @@ use settings::{
};
pub use crate::agent_profile::*;
pub use settings::AutoCompactScope;
pub const SUMMARIZE_THREAD_PROMPT: &str = include_str!("prompts/summarize_thread_prompt.txt");
pub const SUMMARIZE_THREAD_DETAILED_PROMPT: &str =
@ -158,6 +159,7 @@ pub struct AgentSettings {
pub play_sound_when_agent_done: PlaySoundWhenAgentDone,
pub single_file_review: bool,
pub model_parameters: Vec<LanguageModelParameters>,
pub auto_compact: AutoCompactSettings,
pub enable_feedback: bool,
pub expand_edit_card: bool,
pub expand_terminal_card: bool,
@ -170,6 +172,23 @@ pub struct AgentSettings {
pub tool_permissions: ToolPermissions,
}
#[derive(Debug, Clone, PartialEq)]
pub struct AutoCompactSettings {
pub enabled: bool,
pub scope: AutoCompactScope,
pub threshold: f32,
}
impl Default for AutoCompactSettings {
fn default() -> Self {
Self {
enabled: true,
scope: AutoCompactScope::BodyAfterPrefix,
threshold: 0.9,
}
}
}
impl AgentSettings {
pub fn enabled(&self, cx: &App) -> bool {
self.enabled && !DisableAiSettings::get_global(cx).disable_ai
@ -662,6 +681,14 @@ impl Settings for AgentSettings {
play_sound_when_agent_done: agent.play_sound_when_agent_done.unwrap_or_default(),
single_file_review: agent.single_file_review.unwrap(),
model_parameters: agent.model_parameters,
auto_compact: {
let auto_compact = agent.auto_compact.unwrap();
AutoCompactSettings {
enabled: auto_compact.enabled.unwrap(),
scope: auto_compact.scope.unwrap(),
threshold: auto_compact.threshold.unwrap(),
}
},
enable_feedback: agent.enable_feedback.unwrap(),
expand_edit_card: agent.expand_edit_card.unwrap(),
expand_terminal_card: agent.expand_terminal_card.unwrap(),

View file

@ -863,6 +863,7 @@ mod tests {
play_sound_when_agent_done: PlaySoundWhenAgentDone::Never,
single_file_review: false,
model_parameters: vec![],
auto_compact: Default::default(),
enable_feedback: false,
expand_edit_card: true,
expand_terminal_card: true,

View file

@ -71,6 +71,46 @@ pub enum ThinkingBlockDisplay {
AlwaysCollapsed,
}
#[derive(
Clone,
Copy,
Debug,
Default,
PartialEq,
Eq,
Serialize,
Deserialize,
JsonSchema,
MergeFrom,
strum::VariantArray,
strum::VariantNames,
)]
#[serde(rename_all = "snake_case")]
pub enum AutoCompactScope {
/// Count the full active request window against the threshold.
Total,
/// Count only growth after the compacted prefix baseline against the threshold.
#[default]
BodyAfterPrefix,
}
#[with_fallible_options]
#[derive(Clone, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom, Debug, Default)]
pub struct AutoCompactSettingsContent {
/// Whether to automatically compact agent thread context near the model limit.
///
/// Default: true
pub enabled: Option<bool>,
/// Which part of the context window is measured against the threshold.
///
/// Default: body_after_prefix
pub scope: Option<AutoCompactScope>,
/// Fraction of the model context window at which auto-compaction should run.
///
/// Default: 0.9
pub threshold: Option<f32>,
}
#[with_fallible_options]
#[derive(Clone, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom, Debug, Default)]
pub struct AgentSettingsContent {
@ -162,6 +202,8 @@ pub struct AgentSettingsContent {
/// Default: []
#[serde(default)]
pub model_parameters: Vec<LanguageModelParameters>,
/// Settings for automatic agent context compaction.
pub auto_compact: Option<AutoCompactSettingsContent>,
/// Whether to show thumb buttons for feedback in the agent panel.
///
/// Default: true