Add auto-compaction threshold settings

2026-05-31 19:05:00 +07:00 · 2026-05-29 00:14:29 -04:00 · 2026-05-29 00:14:29 -04:00 · f851cbf286
commit f851cbf286
parent dcd9da19b6
6 changed files with 297 additions and 1 deletions
--- a/assets/settings/default.json
+++ b/assets/settings/default.json
@ -1063,6 +1063,23 @@
      //   "temperature": 1.0
      // }
    ],
+    // Settings for automatically compacting agent thread context near the model's context limit.
+    "auto_compact": {
+      // Whether to automatically compact agent thread context.
+      //
+      // Default: true
+      "enabled": true,
+      // Which part of the context window is measured against the threshold.
+      // "total" - Count the full active request window
+      // "body_after_prefix" - Count only growth after the compacted prefix baseline
+      //
+      // Default: body_after_prefix
+      "scope": "body_after_prefix",
+      // Fraction of the model context window at which auto-compaction should run.
+      //
+      // Default: 0.9
+      "threshold": 0.9,
+    },
    // Permission rules for tool actions.
    //
    // The "default" setting applies when no tool-specific rules match.
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -16,7 +16,8 @@ use feature_flags::{

 use agent_client_protocol::schema as acp;
 use agent_settings::{
-    AgentProfileId, AgentSettings, SUMMARIZE_THREAD_DETAILED_PROMPT, SUMMARIZE_THREAD_PROMPT,
+    AgentProfileId, AgentSettings, AutoCompactScope, SUMMARIZE_THREAD_DETAILED_PROMPT,
+    SUMMARIZE_THREAD_PROMPT,
 };
 use anyhow::{Context as _, Result, anyhow};
 use chrono::{DateTime, Local, Utc};
@ -1938,6 +1939,13 @@ impl Thread {
        };
        let last_user_message_id = last_user_message.id.clone();

+        if let Some(seed) = self.current_conversation_mut().seed.as_mut()
+            && !seed.baseline_observed
+        {
+            seed.baseline_tokens = total_input_tokens(update);
+            seed.baseline_observed = true;
+        }
+
        self.current_conversation_mut()
            .request_token_usage
            .insert(last_user_message_id, update);
@ -2008,6 +2016,66 @@ impl Thread {
        })
    }

+    fn should_auto_compact(&self, cx: &App) -> bool {
+        if !cx.has_flag::<HandoffFeatureFlag>() {
+            return false;
+        }
+        let settings = &AgentSettings::get_global(cx).auto_compact;
+        if !settings.enabled {
+            return false;
+        }
+        let Some(model) = self.model.as_ref() else {
+            return false;
+        };
+        let active_tokens = self
+            .latest_request_token_usage()
+            .map(|usage| total_input_tokens(usage).saturating_add(usage.output_tokens))
+            .unwrap_or_else(|| self.estimate_model_visible_tokens(cx));
+
+        self.auto_compact_threshold_reached(
+            settings.scope,
+            settings.threshold,
+            active_tokens,
+            model.max_token_count(),
+        )
+    }
+
+    fn auto_compact_threshold_reached(
+        &self,
+        scope: AutoCompactScope,
+        threshold: f32,
+        active_tokens: u64,
+        model_max_tokens: u64,
+    ) -> bool {
+        let limit = ((model_max_tokens as f64) * f64::from(threshold))
+            .max(0.0)
+            .ceil() as u64;
+
+        match scope {
+            AutoCompactScope::Total => active_tokens >= limit,
+            AutoCompactScope::BodyAfterPrefix => {
+                let baseline = self
+                    .current_conversation()
+                    .seed
+                    .as_ref()
+                    .map_or(0, |seed| seed.baseline_tokens);
+                let body_tokens = active_tokens.saturating_sub(baseline);
+                body_tokens >= limit || active_tokens >= model_max_tokens
+            }
+        }
+    }
+
+    fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
+        let messages = self.build_request_messages(Vec::new(), cx);
+        estimate_request_tokens(&messages)
+    }
+
+    fn log_auto_compact_hook(&self, phase: &str, cx: &App) {
+        if self.should_auto_compact(cx) {
+            log::info!("auto-compaction threshold reached at {phase}; handoff is not wired yet");
+        }
+    }
+
    /// Get the total input token count as of the message before the given message.
    ///
    /// Returns `None` if:
@ -2239,6 +2307,17 @@ impl Thread {
                    .clone()
                    .ok_or_else(|| anyhow!(NoModelConfiguredError))?;
                this.refresh_turn_tools(cx);
+                if attempt == 0 {
+                    match intent {
+                        CompletionIntent::UserPrompt | CompletionIntent::Subagent => {
+                            this.log_auto_compact_hook("pre-turn", cx);
+                        }
+                        CompletionIntent::ToolResults => {
+                            this.log_auto_compact_hook("mid-turn", cx);
+                        }
+                        _ => {}
+                    }
+                }
                let request = this.build_completion_request(intent, cx)?;
                anyhow::Ok((model, request))
            })??;
@ -3519,6 +3598,37 @@ fn total_input_tokens(usage: language_model::TokenUsage) -> u64 {
        .saturating_add(usage.cache_read_input_tokens)
 }

+fn estimate_request_tokens(messages: &[LanguageModelRequestMessage]) -> u64 {
+    messages
+        .iter()
+        .flat_map(|message| &message.content)
+        .fold(0_u64, |tokens, content| {
+            tokens.saturating_add(match content {
+                language_model::MessageContent::Text(text) => estimate_text_tokens(text),
+                language_model::MessageContent::Thinking { text, .. } => estimate_text_tokens(text),
+                language_model::MessageContent::RedactedThinking(text) => {
+                    estimate_text_tokens(text)
+                }
+                language_model::MessageContent::Image(_) => 1024,
+                language_model::MessageContent::ToolUse(tool_use) => {
+                    estimate_text_tokens(&tool_use.raw_input).saturating_add(256)
+                }
+                language_model::MessageContent::ToolResult(tool_result) => tool_result
+                    .content
+                    .iter()
+                    .map(|content| match content {
+                        LanguageModelToolResultContent::Text(text) => estimate_text_tokens(text),
+                        LanguageModelToolResultContent::Image(_) => 1024,
+                    })
+                    .sum(),
+            })
+        })
+}
+
+fn estimate_text_tokens(text: &str) -> u64 {
+    text.len().div_ceil(4) as u64
+}
+
 struct RunningTurn {
    /// Holds the task that handles agent interaction until the end of the turn.
    /// Survives across multiple requests as the model performs tool calls and
@ -4869,6 +4979,104 @@ mod tests {
        });
    }

+    #[gpui::test]
+    async fn test_auto_compact_threshold_scopes(cx: &mut TestAppContext) {
+        let (thread, _event_stream) = setup_thread_for_test(cx).await;
+
+        cx.update(|cx| {
+            thread.update(cx, |thread, _cx| {
+                assert!(!thread.auto_compact_threshold_reached(
+                    AutoCompactScope::Total,
+                    0.9,
+                    89,
+                    100
+                ));
+                assert!(thread.auto_compact_threshold_reached(
+                    AutoCompactScope::Total,
+                    0.9,
+                    90,
+                    100
+                ));
+
+                thread.current_conversation_mut().seed = Some(CompactionSeed {
+                    artifact: CompactionArtifact::Summary("summary".into()),
+                    retained_user_messages: Vec::new(),
+                    baseline_tokens: 50,
+                    baseline_observed: true,
+                });
+
+                assert!(!thread.auto_compact_threshold_reached(
+                    AutoCompactScope::BodyAfterPrefix,
+                    0.9,
+                    139,
+                    1000
+                ));
+                assert!(thread.auto_compact_threshold_reached(
+                    AutoCompactScope::BodyAfterPrefix,
+                    0.9,
+                    140,
+                    100
+                ));
+                assert!(thread.auto_compact_threshold_reached(
+                    AutoCompactScope::BodyAfterPrefix,
+                    0.9,
+                    100,
+                    100
+                ));
+            });
+        });
+    }
+
+    #[gpui::test]
+    async fn test_usage_update_observes_compaction_baseline_once(cx: &mut TestAppContext) {
+        let (thread, _event_stream) = setup_thread_for_test(cx).await;
+        let user_message_id = UserMessageId::new();
+
+        cx.update(|cx| {
+            thread.update(cx, |thread, cx| {
+                thread
+                    .current_conversation_mut()
+                    .messages
+                    .push(Message::User(UserMessage {
+                        id: user_message_id,
+                        content: vec!["hello".into()],
+                    }));
+                thread.current_conversation_mut().seed = Some(CompactionSeed {
+                    artifact: CompactionArtifact::Summary("summary".into()),
+                    retained_user_messages: Vec::new(),
+                    baseline_tokens: 12,
+                    baseline_observed: false,
+                });
+
+                thread.update_token_usage(
+                    TokenUsage {
+                        input_tokens: 30,
+                        output_tokens: 7,
+                        cache_creation_input_tokens: 5,
+                        cache_read_input_tokens: 0,
+                    },
+                    cx,
+                );
+                let seed = thread.current_conversation().seed.as_ref().unwrap();
+                assert_eq!(seed.baseline_tokens, 35);
+                assert!(seed.baseline_observed);
+
+                thread.update_token_usage(
+                    TokenUsage {
+                        input_tokens: 80,
+                        output_tokens: 9,
+                        cache_creation_input_tokens: 0,
+                        cache_read_input_tokens: 0,
+                    },
+                    cx,
+                );
+                let seed = thread.current_conversation().seed.as_ref().unwrap();
+                assert_eq!(seed.baseline_tokens, 35);
+                assert!(seed.baseline_observed);
+            });
+        });
+    }
+
    struct ReplayImageTool;

    impl AgentTool for ReplayImageTool {
--- a/crates/agent/src/tool_permissions.rs
+++ b/crates/agent/src/tool_permissions.rs
@ -589,6 +589,7 @@ mod tests {
            play_sound_when_agent_done: PlaySoundWhenAgentDone::default(),
            single_file_review: false,
            model_parameters: vec![],
+            auto_compact: Default::default(),
            enable_feedback: false,
            expand_edit_card: true,
            expand_terminal_card: true,
--- a/crates/agent_settings/src/agent_settings.rs
+++ b/crates/agent_settings/src/agent_settings.rs
@ -20,6 +20,7 @@ use settings::{
 };

 pub use crate::agent_profile::*;
+pub use settings::AutoCompactScope;

 pub const SUMMARIZE_THREAD_PROMPT: &str = include_str!("prompts/summarize_thread_prompt.txt");
 pub const SUMMARIZE_THREAD_DETAILED_PROMPT: &str =
@ -158,6 +159,7 @@ pub struct AgentSettings {
    pub play_sound_when_agent_done: PlaySoundWhenAgentDone,
    pub single_file_review: bool,
    pub model_parameters: Vec<LanguageModelParameters>,
+    pub auto_compact: AutoCompactSettings,
    pub enable_feedback: bool,
    pub expand_edit_card: bool,
    pub expand_terminal_card: bool,
@ -170,6 +172,23 @@ pub struct AgentSettings {
    pub tool_permissions: ToolPermissions,
 }

+#[derive(Debug, Clone, PartialEq)]
+pub struct AutoCompactSettings {
+    pub enabled: bool,
+    pub scope: AutoCompactScope,
+    pub threshold: f32,
+}
+
+impl Default for AutoCompactSettings {
+    fn default() -> Self {
+        Self {
+            enabled: true,
+            scope: AutoCompactScope::BodyAfterPrefix,
+            threshold: 0.9,
+        }
+    }
+}
+
 impl AgentSettings {
    pub fn enabled(&self, cx: &App) -> bool {
        self.enabled && !DisableAiSettings::get_global(cx).disable_ai
@ -662,6 +681,14 @@ impl Settings for AgentSettings {
            play_sound_when_agent_done: agent.play_sound_when_agent_done.unwrap_or_default(),
            single_file_review: agent.single_file_review.unwrap(),
            model_parameters: agent.model_parameters,
+            auto_compact: {
+                let auto_compact = agent.auto_compact.unwrap();
+                AutoCompactSettings {
+                    enabled: auto_compact.enabled.unwrap(),
+                    scope: auto_compact.scope.unwrap(),
+                    threshold: auto_compact.threshold.unwrap(),
+                }
+            },
            enable_feedback: agent.enable_feedback.unwrap(),
            expand_edit_card: agent.expand_edit_card.unwrap(),
            expand_terminal_card: agent.expand_terminal_card.unwrap(),
--- a/crates/agent_ui/src/agent_ui.rs
+++ b/crates/agent_ui/src/agent_ui.rs
@ -863,6 +863,7 @@ mod tests {
            play_sound_when_agent_done: PlaySoundWhenAgentDone::Never,
            single_file_review: false,
            model_parameters: vec![],
+            auto_compact: Default::default(),
            enable_feedback: false,
            expand_edit_card: true,
            expand_terminal_card: true,
--- a/crates/settings_content/src/agent.rs
+++ b/crates/settings_content/src/agent.rs
@ -71,6 +71,46 @@ pub enum ThinkingBlockDisplay {
    AlwaysCollapsed,
 }

+#[derive(
+    Clone,
+    Copy,
+    Debug,
+    Default,
+    PartialEq,
+    Eq,
+    Serialize,
+    Deserialize,
+    JsonSchema,
+    MergeFrom,
+    strum::VariantArray,
+    strum::VariantNames,
+)]
+#[serde(rename_all = "snake_case")]
+pub enum AutoCompactScope {
+    /// Count the full active request window against the threshold.
+    Total,
+    /// Count only growth after the compacted prefix baseline against the threshold.
+    #[default]
+    BodyAfterPrefix,
+}
+
+#[with_fallible_options]
+#[derive(Clone, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom, Debug, Default)]
+pub struct AutoCompactSettingsContent {
+    /// Whether to automatically compact agent thread context near the model limit.
+    ///
+    /// Default: true
+    pub enabled: Option<bool>,
+    /// Which part of the context window is measured against the threshold.
+    ///
+    /// Default: body_after_prefix
+    pub scope: Option<AutoCompactScope>,
+    /// Fraction of the model context window at which auto-compaction should run.
+    ///
+    /// Default: 0.9
+    pub threshold: Option<f32>,
+}
+
 #[with_fallible_options]
 #[derive(Clone, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom, Debug, Default)]
 pub struct AgentSettingsContent {
@ -162,6 +202,8 @@ pub struct AgentSettingsContent {
    /// Default: []
    #[serde(default)]
    pub model_parameters: Vec<LanguageModelParameters>,
+    /// Settings for automatic agent context compaction.
+    pub auto_compact: Option<AutoCompactSettingsContent>,
    /// Whether to show thumb buttons for feedback in the agent panel.
    ///
    /// Default: true