mirror of
https://github.com/zed-industries/zed.git
synced 2026-05-31 19:05:00 +07:00
Add auto-compaction threshold settings
This commit is contained in:
parent
dcd9da19b6
commit
f851cbf286
6 changed files with 297 additions and 1 deletions
|
|
@ -1063,6 +1063,23 @@
|
|||
// "temperature": 1.0
|
||||
// }
|
||||
],
|
||||
// Settings for automatically compacting agent thread context near the model's context limit.
|
||||
"auto_compact": {
|
||||
// Whether to automatically compact agent thread context.
|
||||
//
|
||||
// Default: true
|
||||
"enabled": true,
|
||||
// Which part of the context window is measured against the threshold.
|
||||
// "total" - Count the full active request window
|
||||
// "body_after_prefix" - Count only growth after the compacted prefix baseline
|
||||
//
|
||||
// Default: body_after_prefix
|
||||
"scope": "body_after_prefix",
|
||||
// Fraction of the model context window at which auto-compaction should run.
|
||||
//
|
||||
// Default: 0.9
|
||||
"threshold": 0.9,
|
||||
},
|
||||
// Permission rules for tool actions.
|
||||
//
|
||||
// The "default" setting applies when no tool-specific rules match.
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ use feature_flags::{
|
|||
|
||||
use agent_client_protocol::schema as acp;
|
||||
use agent_settings::{
|
||||
AgentProfileId, AgentSettings, SUMMARIZE_THREAD_DETAILED_PROMPT, SUMMARIZE_THREAD_PROMPT,
|
||||
AgentProfileId, AgentSettings, AutoCompactScope, SUMMARIZE_THREAD_DETAILED_PROMPT,
|
||||
SUMMARIZE_THREAD_PROMPT,
|
||||
};
|
||||
use anyhow::{Context as _, Result, anyhow};
|
||||
use chrono::{DateTime, Local, Utc};
|
||||
|
|
@ -1938,6 +1939,13 @@ impl Thread {
|
|||
};
|
||||
let last_user_message_id = last_user_message.id.clone();
|
||||
|
||||
if let Some(seed) = self.current_conversation_mut().seed.as_mut()
|
||||
&& !seed.baseline_observed
|
||||
{
|
||||
seed.baseline_tokens = total_input_tokens(update);
|
||||
seed.baseline_observed = true;
|
||||
}
|
||||
|
||||
self.current_conversation_mut()
|
||||
.request_token_usage
|
||||
.insert(last_user_message_id, update);
|
||||
|
|
@ -2008,6 +2016,66 @@ impl Thread {
|
|||
})
|
||||
}
|
||||
|
||||
fn should_auto_compact(&self, cx: &App) -> bool {
|
||||
if !cx.has_flag::<HandoffFeatureFlag>() {
|
||||
return false;
|
||||
}
|
||||
let settings = &AgentSettings::get_global(cx).auto_compact;
|
||||
if !settings.enabled {
|
||||
return false;
|
||||
}
|
||||
let Some(model) = self.model.as_ref() else {
|
||||
return false;
|
||||
};
|
||||
let active_tokens = self
|
||||
.latest_request_token_usage()
|
||||
.map(|usage| total_input_tokens(usage).saturating_add(usage.output_tokens))
|
||||
.unwrap_or_else(|| self.estimate_model_visible_tokens(cx));
|
||||
|
||||
self.auto_compact_threshold_reached(
|
||||
settings.scope,
|
||||
settings.threshold,
|
||||
active_tokens,
|
||||
model.max_token_count(),
|
||||
)
|
||||
}
|
||||
|
||||
fn auto_compact_threshold_reached(
|
||||
&self,
|
||||
scope: AutoCompactScope,
|
||||
threshold: f32,
|
||||
active_tokens: u64,
|
||||
model_max_tokens: u64,
|
||||
) -> bool {
|
||||
let limit = ((model_max_tokens as f64) * f64::from(threshold))
|
||||
.max(0.0)
|
||||
.ceil() as u64;
|
||||
|
||||
match scope {
|
||||
AutoCompactScope::Total => active_tokens >= limit,
|
||||
AutoCompactScope::BodyAfterPrefix => {
|
||||
let baseline = self
|
||||
.current_conversation()
|
||||
.seed
|
||||
.as_ref()
|
||||
.map_or(0, |seed| seed.baseline_tokens);
|
||||
let body_tokens = active_tokens.saturating_sub(baseline);
|
||||
body_tokens >= limit || active_tokens >= model_max_tokens
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_model_visible_tokens(&self, cx: &App) -> u64 {
|
||||
let messages = self.build_request_messages(Vec::new(), cx);
|
||||
estimate_request_tokens(&messages)
|
||||
}
|
||||
|
||||
fn log_auto_compact_hook(&self, phase: &str, cx: &App) {
|
||||
if self.should_auto_compact(cx) {
|
||||
log::info!("auto-compaction threshold reached at {phase}; handoff is not wired yet");
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the total input token count as of the message before the given message.
|
||||
///
|
||||
/// Returns `None` if:
|
||||
|
|
@ -2239,6 +2307,17 @@ impl Thread {
|
|||
.clone()
|
||||
.ok_or_else(|| anyhow!(NoModelConfiguredError))?;
|
||||
this.refresh_turn_tools(cx);
|
||||
if attempt == 0 {
|
||||
match intent {
|
||||
CompletionIntent::UserPrompt | CompletionIntent::Subagent => {
|
||||
this.log_auto_compact_hook("pre-turn", cx);
|
||||
}
|
||||
CompletionIntent::ToolResults => {
|
||||
this.log_auto_compact_hook("mid-turn", cx);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let request = this.build_completion_request(intent, cx)?;
|
||||
anyhow::Ok((model, request))
|
||||
})??;
|
||||
|
|
@ -3519,6 +3598,37 @@ fn total_input_tokens(usage: language_model::TokenUsage) -> u64 {
|
|||
.saturating_add(usage.cache_read_input_tokens)
|
||||
}
|
||||
|
||||
fn estimate_request_tokens(messages: &[LanguageModelRequestMessage]) -> u64 {
|
||||
messages
|
||||
.iter()
|
||||
.flat_map(|message| &message.content)
|
||||
.fold(0_u64, |tokens, content| {
|
||||
tokens.saturating_add(match content {
|
||||
language_model::MessageContent::Text(text) => estimate_text_tokens(text),
|
||||
language_model::MessageContent::Thinking { text, .. } => estimate_text_tokens(text),
|
||||
language_model::MessageContent::RedactedThinking(text) => {
|
||||
estimate_text_tokens(text)
|
||||
}
|
||||
language_model::MessageContent::Image(_) => 1024,
|
||||
language_model::MessageContent::ToolUse(tool_use) => {
|
||||
estimate_text_tokens(&tool_use.raw_input).saturating_add(256)
|
||||
}
|
||||
language_model::MessageContent::ToolResult(tool_result) => tool_result
|
||||
.content
|
||||
.iter()
|
||||
.map(|content| match content {
|
||||
LanguageModelToolResultContent::Text(text) => estimate_text_tokens(text),
|
||||
LanguageModelToolResultContent::Image(_) => 1024,
|
||||
})
|
||||
.sum(),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn estimate_text_tokens(text: &str) -> u64 {
|
||||
text.len().div_ceil(4) as u64
|
||||
}
|
||||
|
||||
struct RunningTurn {
|
||||
/// Holds the task that handles agent interaction until the end of the turn.
|
||||
/// Survives across multiple requests as the model performs tool calls and
|
||||
|
|
@ -4869,6 +4979,104 @@ mod tests {
|
|||
});
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_auto_compact_threshold_scopes(cx: &mut TestAppContext) {
|
||||
let (thread, _event_stream) = setup_thread_for_test(cx).await;
|
||||
|
||||
cx.update(|cx| {
|
||||
thread.update(cx, |thread, _cx| {
|
||||
assert!(!thread.auto_compact_threshold_reached(
|
||||
AutoCompactScope::Total,
|
||||
0.9,
|
||||
89,
|
||||
100
|
||||
));
|
||||
assert!(thread.auto_compact_threshold_reached(
|
||||
AutoCompactScope::Total,
|
||||
0.9,
|
||||
90,
|
||||
100
|
||||
));
|
||||
|
||||
thread.current_conversation_mut().seed = Some(CompactionSeed {
|
||||
artifact: CompactionArtifact::Summary("summary".into()),
|
||||
retained_user_messages: Vec::new(),
|
||||
baseline_tokens: 50,
|
||||
baseline_observed: true,
|
||||
});
|
||||
|
||||
assert!(!thread.auto_compact_threshold_reached(
|
||||
AutoCompactScope::BodyAfterPrefix,
|
||||
0.9,
|
||||
139,
|
||||
1000
|
||||
));
|
||||
assert!(thread.auto_compact_threshold_reached(
|
||||
AutoCompactScope::BodyAfterPrefix,
|
||||
0.9,
|
||||
140,
|
||||
100
|
||||
));
|
||||
assert!(thread.auto_compact_threshold_reached(
|
||||
AutoCompactScope::BodyAfterPrefix,
|
||||
0.9,
|
||||
100,
|
||||
100
|
||||
));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_usage_update_observes_compaction_baseline_once(cx: &mut TestAppContext) {
|
||||
let (thread, _event_stream) = setup_thread_for_test(cx).await;
|
||||
let user_message_id = UserMessageId::new();
|
||||
|
||||
cx.update(|cx| {
|
||||
thread.update(cx, |thread, cx| {
|
||||
thread
|
||||
.current_conversation_mut()
|
||||
.messages
|
||||
.push(Message::User(UserMessage {
|
||||
id: user_message_id,
|
||||
content: vec!["hello".into()],
|
||||
}));
|
||||
thread.current_conversation_mut().seed = Some(CompactionSeed {
|
||||
artifact: CompactionArtifact::Summary("summary".into()),
|
||||
retained_user_messages: Vec::new(),
|
||||
baseline_tokens: 12,
|
||||
baseline_observed: false,
|
||||
});
|
||||
|
||||
thread.update_token_usage(
|
||||
TokenUsage {
|
||||
input_tokens: 30,
|
||||
output_tokens: 7,
|
||||
cache_creation_input_tokens: 5,
|
||||
cache_read_input_tokens: 0,
|
||||
},
|
||||
cx,
|
||||
);
|
||||
let seed = thread.current_conversation().seed.as_ref().unwrap();
|
||||
assert_eq!(seed.baseline_tokens, 35);
|
||||
assert!(seed.baseline_observed);
|
||||
|
||||
thread.update_token_usage(
|
||||
TokenUsage {
|
||||
input_tokens: 80,
|
||||
output_tokens: 9,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
},
|
||||
cx,
|
||||
);
|
||||
let seed = thread.current_conversation().seed.as_ref().unwrap();
|
||||
assert_eq!(seed.baseline_tokens, 35);
|
||||
assert!(seed.baseline_observed);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
struct ReplayImageTool;
|
||||
|
||||
impl AgentTool for ReplayImageTool {
|
||||
|
|
|
|||
|
|
@ -589,6 +589,7 @@ mod tests {
|
|||
play_sound_when_agent_done: PlaySoundWhenAgentDone::default(),
|
||||
single_file_review: false,
|
||||
model_parameters: vec![],
|
||||
auto_compact: Default::default(),
|
||||
enable_feedback: false,
|
||||
expand_edit_card: true,
|
||||
expand_terminal_card: true,
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ use settings::{
|
|||
};
|
||||
|
||||
pub use crate::agent_profile::*;
|
||||
pub use settings::AutoCompactScope;
|
||||
|
||||
pub const SUMMARIZE_THREAD_PROMPT: &str = include_str!("prompts/summarize_thread_prompt.txt");
|
||||
pub const SUMMARIZE_THREAD_DETAILED_PROMPT: &str =
|
||||
|
|
@ -158,6 +159,7 @@ pub struct AgentSettings {
|
|||
pub play_sound_when_agent_done: PlaySoundWhenAgentDone,
|
||||
pub single_file_review: bool,
|
||||
pub model_parameters: Vec<LanguageModelParameters>,
|
||||
pub auto_compact: AutoCompactSettings,
|
||||
pub enable_feedback: bool,
|
||||
pub expand_edit_card: bool,
|
||||
pub expand_terminal_card: bool,
|
||||
|
|
@ -170,6 +172,23 @@ pub struct AgentSettings {
|
|||
pub tool_permissions: ToolPermissions,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct AutoCompactSettings {
|
||||
pub enabled: bool,
|
||||
pub scope: AutoCompactScope,
|
||||
pub threshold: f32,
|
||||
}
|
||||
|
||||
impl Default for AutoCompactSettings {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: true,
|
||||
scope: AutoCompactScope::BodyAfterPrefix,
|
||||
threshold: 0.9,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AgentSettings {
|
||||
pub fn enabled(&self, cx: &App) -> bool {
|
||||
self.enabled && !DisableAiSettings::get_global(cx).disable_ai
|
||||
|
|
@ -662,6 +681,14 @@ impl Settings for AgentSettings {
|
|||
play_sound_when_agent_done: agent.play_sound_when_agent_done.unwrap_or_default(),
|
||||
single_file_review: agent.single_file_review.unwrap(),
|
||||
model_parameters: agent.model_parameters,
|
||||
auto_compact: {
|
||||
let auto_compact = agent.auto_compact.unwrap();
|
||||
AutoCompactSettings {
|
||||
enabled: auto_compact.enabled.unwrap(),
|
||||
scope: auto_compact.scope.unwrap(),
|
||||
threshold: auto_compact.threshold.unwrap(),
|
||||
}
|
||||
},
|
||||
enable_feedback: agent.enable_feedback.unwrap(),
|
||||
expand_edit_card: agent.expand_edit_card.unwrap(),
|
||||
expand_terminal_card: agent.expand_terminal_card.unwrap(),
|
||||
|
|
|
|||
|
|
@ -863,6 +863,7 @@ mod tests {
|
|||
play_sound_when_agent_done: PlaySoundWhenAgentDone::Never,
|
||||
single_file_review: false,
|
||||
model_parameters: vec![],
|
||||
auto_compact: Default::default(),
|
||||
enable_feedback: false,
|
||||
expand_edit_card: true,
|
||||
expand_terminal_card: true,
|
||||
|
|
|
|||
|
|
@ -71,6 +71,46 @@ pub enum ThinkingBlockDisplay {
|
|||
AlwaysCollapsed,
|
||||
}
|
||||
|
||||
#[derive(
|
||||
Clone,
|
||||
Copy,
|
||||
Debug,
|
||||
Default,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
JsonSchema,
|
||||
MergeFrom,
|
||||
strum::VariantArray,
|
||||
strum::VariantNames,
|
||||
)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum AutoCompactScope {
|
||||
/// Count the full active request window against the threshold.
|
||||
Total,
|
||||
/// Count only growth after the compacted prefix baseline against the threshold.
|
||||
#[default]
|
||||
BodyAfterPrefix,
|
||||
}
|
||||
|
||||
#[with_fallible_options]
|
||||
#[derive(Clone, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom, Debug, Default)]
|
||||
pub struct AutoCompactSettingsContent {
|
||||
/// Whether to automatically compact agent thread context near the model limit.
|
||||
///
|
||||
/// Default: true
|
||||
pub enabled: Option<bool>,
|
||||
/// Which part of the context window is measured against the threshold.
|
||||
///
|
||||
/// Default: body_after_prefix
|
||||
pub scope: Option<AutoCompactScope>,
|
||||
/// Fraction of the model context window at which auto-compaction should run.
|
||||
///
|
||||
/// Default: 0.9
|
||||
pub threshold: Option<f32>,
|
||||
}
|
||||
|
||||
#[with_fallible_options]
|
||||
#[derive(Clone, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom, Debug, Default)]
|
||||
pub struct AgentSettingsContent {
|
||||
|
|
@ -162,6 +202,8 @@ pub struct AgentSettingsContent {
|
|||
/// Default: []
|
||||
#[serde(default)]
|
||||
pub model_parameters: Vec<LanguageModelParameters>,
|
||||
/// Settings for automatic agent context compaction.
|
||||
pub auto_compact: Option<AutoCompactSettingsContent>,
|
||||
/// Whether to show thumb buttons for feedback in the agent panel.
|
||||
///
|
||||
/// Default: true
|
||||
|
|
|
|||
Loading…
Reference in a new issue