Preserve ILM-style conversation continuity during auto compaction

Auto compaction was keying off cumulative usage and re-summarizing from the front of the session, which made long chats shed continuity after the first compaction. The runtime now compacts against the current turn's prompt pressure and preserves prior compacted context as retained summary state instead of treating it like disposable history.

Constraint: Existing /compact behavior and saved-session resume flow had to keep working without schema changes
Rejected: Keep using cumulative input tokens | caused repeat compaction after every subsequent turn once the threshold was crossed
Rejected: Re-summarize prior compacted system messages as ordinary history | degraded continuity and could drop earlier context
Confidence: high
Scope-risk: moderate
Reversibility: clean
Directive: Preserve compacted-summary boundaries when extending compaction again; do not fold prior compacted context back into raw-message removal
Tested: cargo fmt --check; cargo clippy -p runtime -p commands --tests -- -D warnings; cargo test -p runtime; cargo test -p commands
Not-tested: End-to-end interactive CLI auto-compaction against a live Anthropic session
This commit is contained in:
Yeachan-Heo
2026-04-01 07:55:25 +00:00
parent 97d725d5e5
commit 782d9cea71
2 changed files with 303 additions and 19 deletions

View File

@@ -1,5 +1,10 @@
use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session}; use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
const COMPACT_CONTINUATION_PREAMBLE: &str =
"This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.\n\n";
const COMPACT_RECENT_MESSAGES_NOTE: &str = "Recent messages are preserved verbatim.";
const COMPACT_DIRECT_RESUME_INSTRUCTION: &str = "Continue the conversation from where it left off without asking the user any further questions. Resume directly — do not acknowledge the summary, do not recap what was happening, and do not preface with continuation text.";
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CompactionConfig { pub struct CompactionConfig {
pub preserve_recent_messages: usize, pub preserve_recent_messages: usize,
@@ -30,8 +35,15 @@ pub fn estimate_session_tokens(session: &Session) -> usize {
#[must_use] #[must_use]
pub fn should_compact(session: &Session, config: CompactionConfig) -> bool { pub fn should_compact(session: &Session, config: CompactionConfig) -> bool {
session.messages.len() > config.preserve_recent_messages let start = compacted_summary_prefix_len(session);
&& estimate_session_tokens(session) >= config.max_estimated_tokens let compactable = &session.messages[start..];
compactable.len() > config.preserve_recent_messages
&& compactable
.iter()
.map(estimate_message_tokens)
.sum::<usize>()
>= config.max_estimated_tokens
} }
#[must_use] #[must_use]
@@ -56,16 +68,18 @@ pub fn get_compact_continuation_message(
recent_messages_preserved: bool, recent_messages_preserved: bool,
) -> String { ) -> String {
let mut base = format!( let mut base = format!(
"This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.\n\n{}", "{COMPACT_CONTINUATION_PREAMBLE}{}",
format_compact_summary(summary) format_compact_summary(summary)
); );
if recent_messages_preserved { if recent_messages_preserved {
base.push_str("\n\nRecent messages are preserved verbatim."); base.push_str("\n\n");
base.push_str(COMPACT_RECENT_MESSAGES_NOTE);
} }
if suppress_follow_up_questions { if suppress_follow_up_questions {
base.push_str("\nContinue the conversation from where it left off without asking the user any further questions. Resume directly — do not acknowledge the summary, do not recap what was happening, and do not preface with continuation text."); base.push('\n');
base.push_str(COMPACT_DIRECT_RESUME_INSTRUCTION);
} }
base base
@@ -82,13 +96,19 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
}; };
} }
let existing_summary = session
.messages
.first()
.and_then(extract_existing_compacted_summary);
let compacted_prefix_len = usize::from(existing_summary.is_some());
let keep_from = session let keep_from = session
.messages .messages
.len() .len()
.saturating_sub(config.preserve_recent_messages); .saturating_sub(config.preserve_recent_messages);
let removed = &session.messages[..keep_from]; let removed = &session.messages[compacted_prefix_len..keep_from];
let preserved = session.messages[keep_from..].to_vec(); let preserved = session.messages[keep_from..].to_vec();
let summary = summarize_messages(removed); let summary =
merge_compact_summaries(existing_summary.as_deref(), &summarize_messages(removed));
let formatted_summary = format_compact_summary(&summary); let formatted_summary = format_compact_summary(&summary);
let continuation = get_compact_continuation_message(&summary, true, !preserved.is_empty()); let continuation = get_compact_continuation_message(&summary, true, !preserved.is_empty());
@@ -110,6 +130,16 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
} }
} }
fn compacted_summary_prefix_len(session: &Session) -> usize {
usize::from(
session
.messages
.first()
.and_then(extract_existing_compacted_summary)
.is_some(),
)
}
fn summarize_messages(messages: &[ConversationMessage]) -> String { fn summarize_messages(messages: &[ConversationMessage]) -> String {
let user_messages = messages let user_messages = messages
.iter() .iter()
@@ -197,6 +227,41 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
lines.join("\n") lines.join("\n")
} }
fn merge_compact_summaries(existing_summary: Option<&str>, new_summary: &str) -> String {
let Some(existing_summary) = existing_summary else {
return new_summary.to_string();
};
let previous_highlights = extract_summary_highlights(existing_summary);
let new_formatted_summary = format_compact_summary(new_summary);
let new_highlights = extract_summary_highlights(&new_formatted_summary);
let new_timeline = extract_summary_timeline(&new_formatted_summary);
let mut lines = vec!["<summary>".to_string(), "Conversation summary:".to_string()];
if !previous_highlights.is_empty() {
lines.push("- Previously compacted context:".to_string());
lines.extend(
previous_highlights
.into_iter()
.map(|line| format!(" {line}")),
);
}
if !new_highlights.is_empty() {
lines.push("- Newly compacted context:".to_string());
lines.extend(new_highlights.into_iter().map(|line| format!(" {line}")));
}
if !new_timeline.is_empty() {
lines.push("- Key timeline:".to_string());
lines.extend(new_timeline.into_iter().map(|line| format!(" {line}")));
}
lines.push("</summary>".to_string());
lines.join("\n")
}
fn summarize_block(block: &ContentBlock) -> String { fn summarize_block(block: &ContentBlock) -> String {
let raw = match block { let raw = match block {
ContentBlock::Text { text } => text.clone(), ContentBlock::Text { text } => text.clone(),
@@ -374,11 +439,71 @@ fn collapse_blank_lines(content: &str) -> String {
result result
} }
fn extract_existing_compacted_summary(message: &ConversationMessage) -> Option<String> {
if message.role != MessageRole::System {
return None;
}
let text = first_text_block(message)?;
let summary = text.strip_prefix(COMPACT_CONTINUATION_PREAMBLE)?;
let summary = summary
.split_once(&format!("\n\n{COMPACT_RECENT_MESSAGES_NOTE}"))
.map_or(summary, |(value, _)| value);
let summary = summary
.split_once(&format!("\n{COMPACT_DIRECT_RESUME_INSTRUCTION}"))
.map_or(summary, |(value, _)| value);
Some(summary.trim().to_string())
}
fn extract_summary_highlights(summary: &str) -> Vec<String> {
let mut lines = Vec::new();
let mut in_timeline = false;
for line in format_compact_summary(summary).lines() {
let trimmed = line.trim_end();
if trimmed.is_empty() || trimmed == "Summary:" || trimmed == "Conversation summary:" {
continue;
}
if trimmed == "- Key timeline:" {
in_timeline = true;
continue;
}
if in_timeline {
continue;
}
lines.push(trimmed.to_string());
}
lines
}
fn extract_summary_timeline(summary: &str) -> Vec<String> {
let mut lines = Vec::new();
let mut in_timeline = false;
for line in format_compact_summary(summary).lines() {
let trimmed = line.trim_end();
if trimmed == "- Key timeline:" {
in_timeline = true;
continue;
}
if !in_timeline {
continue;
}
if trimmed.is_empty() {
break;
}
lines.push(trimmed.to_string());
}
lines
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{ use super::{
collect_key_files, compact_session, estimate_session_tokens, format_compact_summary, collect_key_files, compact_session, estimate_session_tokens, format_compact_summary,
infer_pending_work, should_compact, CompactionConfig, get_compact_continuation_message, infer_pending_work, should_compact, CompactionConfig,
}; };
use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session}; use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
@@ -453,6 +578,98 @@ mod tests {
); );
} }
#[test]
fn keeps_previous_compacted_context_when_compacting_again() {
let initial_session = Session {
version: 1,
messages: vec![
ConversationMessage::user_text("Investigate rust/crates/runtime/src/compact.rs"),
ConversationMessage::assistant(vec![ContentBlock::Text {
text: "I will inspect the compact flow.".to_string(),
}]),
ConversationMessage::user_text(
"Also update rust/crates/runtime/src/conversation.rs",
),
ConversationMessage::assistant(vec![ContentBlock::Text {
text: "Next: preserve prior summary context during auto compact.".to_string(),
}]),
],
};
let config = CompactionConfig {
preserve_recent_messages: 2,
max_estimated_tokens: 1,
};
let first = compact_session(&initial_session, config);
let mut follow_up_messages = first.compacted_session.messages.clone();
follow_up_messages.extend([
ConversationMessage::user_text("Please add regression tests for compaction."),
ConversationMessage::assistant(vec![ContentBlock::Text {
text: "Working on regression coverage now.".to_string(),
}]),
]);
let second = compact_session(
&Session {
version: 1,
messages: follow_up_messages,
},
config,
);
assert!(second
.formatted_summary
.contains("Previously compacted context:"));
assert!(second
.formatted_summary
.contains("Scope: 2 earlier messages compacted"));
assert!(second
.formatted_summary
.contains("Newly compacted context:"));
assert!(second
.formatted_summary
.contains("Also update rust/crates/runtime/src/conversation.rs"));
assert!(matches!(
&second.compacted_session.messages[0].blocks[0],
ContentBlock::Text { text }
if text.contains("Previously compacted context:")
&& text.contains("Newly compacted context:")
));
assert!(matches!(
&second.compacted_session.messages[1].blocks[0],
ContentBlock::Text { text } if text.contains("Please add regression tests for compaction.")
));
}
#[test]
fn ignores_existing_compacted_summary_when_deciding_to_recompact() {
let summary = "<summary>Conversation summary:\n- Scope: earlier work preserved.\n- Key timeline:\n - user: large preserved context\n</summary>";
let session = Session {
version: 1,
messages: vec![
ConversationMessage {
role: MessageRole::System,
blocks: vec![ContentBlock::Text {
text: get_compact_continuation_message(summary, true, true),
}],
usage: None,
},
ConversationMessage::user_text("tiny"),
ConversationMessage::assistant(vec![ContentBlock::Text {
text: "recent".to_string(),
}]),
],
};
assert!(!should_compact(
&session,
CompactionConfig {
preserve_recent_messages: 2,
max_estimated_tokens: 1,
}
));
}
#[test] #[test]
fn truncates_long_blocks_in_summary() { fn truncates_long_blocks_in_summary() {
let summary = super::summarize_block(&ContentBlock::Text { let summary = super::summarize_block(&ContentBlock::Text {

View File

@@ -234,6 +234,7 @@ where
let mut assistant_messages = Vec::new(); let mut assistant_messages = Vec::new();
let mut tool_results = Vec::new(); let mut tool_results = Vec::new();
let mut iterations = 0; let mut iterations = 0;
let mut max_turn_input_tokens = 0;
loop { loop {
iterations += 1; iterations += 1;
@@ -250,6 +251,7 @@ where
let events = self.api_client.stream(request)?; let events = self.api_client.stream(request)?;
let (assistant_message, usage) = build_assistant_message(events)?; let (assistant_message, usage) = build_assistant_message(events)?;
if let Some(usage) = usage { if let Some(usage) = usage {
max_turn_input_tokens = max_turn_input_tokens.max(usage.input_tokens);
self.usage_tracker.record(usage); self.usage_tracker.record(usage);
} }
let pending_tool_uses = assistant_message let pending_tool_uses = assistant_message
@@ -365,7 +367,7 @@ where
} }
} }
let auto_compaction = self.maybe_auto_compact(); let auto_compaction = self.maybe_auto_compact(max_turn_input_tokens);
Ok(TurnSummary { Ok(TurnSummary {
assistant_messages, assistant_messages,
@@ -426,17 +428,16 @@ where
) )
} }
fn maybe_auto_compact(&mut self) -> Option<AutoCompactionEvent> { fn maybe_auto_compact(&mut self, turn_input_tokens: u32) -> Option<AutoCompactionEvent> {
if self.usage_tracker.cumulative_usage().input_tokens if turn_input_tokens < self.auto_compaction_input_tokens_threshold {
< self.auto_compaction_input_tokens_threshold
{
return None; return None;
} }
let result = compact_session( let result = compact_session(
&self.session, &self.session,
CompactionConfig { CompactionConfig {
max_estimated_tokens: 0, max_estimated_tokens: usize::try_from(self.auto_compaction_input_tokens_threshold)
.unwrap_or(usize::MAX),
..CompactionConfig::default() ..CompactionConfig::default()
}, },
); );
@@ -1204,7 +1205,7 @@ mod tests {
} }
#[test] #[test]
fn auto_compacts_when_cumulative_input_threshold_is_crossed() { fn auto_compacts_when_turn_input_threshold_is_crossed() {
struct SimpleApi; struct SimpleApi;
impl ApiClient for SimpleApi { impl ApiClient for SimpleApi {
fn stream( fn stream(
@@ -1227,13 +1228,13 @@ mod tests {
let session = Session { let session = Session {
version: 1, version: 1,
messages: vec![ messages: vec![
crate::session::ConversationMessage::user_text("one"), crate::session::ConversationMessage::user_text("one ".repeat(30_000)),
crate::session::ConversationMessage::assistant(vec![ContentBlock::Text { crate::session::ConversationMessage::assistant(vec![ContentBlock::Text {
text: "two".to_string(), text: "two ".repeat(30_000),
}]), }]),
crate::session::ConversationMessage::user_text("three"), crate::session::ConversationMessage::user_text("three ".repeat(30_000)),
crate::session::ConversationMessage::assistant(vec![ContentBlock::Text { crate::session::ConversationMessage::assistant(vec![ContentBlock::Text {
text: "four".to_string(), text: "four ".repeat(30_000),
}]), }]),
], ],
}; };
@@ -1260,6 +1261,72 @@ mod tests {
assert_eq!(runtime.session().messages[0].role, MessageRole::System); assert_eq!(runtime.session().messages[0].role, MessageRole::System);
} }
#[test]
fn auto_compaction_does_not_repeat_after_context_is_already_compacted() {
struct SequentialUsageApi {
call_count: usize,
}
impl ApiClient for SequentialUsageApi {
fn stream(
&mut self,
_request: ApiRequest,
) -> Result<Vec<AssistantEvent>, RuntimeError> {
self.call_count += 1;
let input_tokens = if self.call_count == 1 { 120_000 } else { 64 };
Ok(vec![
AssistantEvent::TextDelta("done".to_string()),
AssistantEvent::Usage(TokenUsage {
input_tokens,
output_tokens: 4,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
}),
AssistantEvent::MessageStop,
])
}
}
let session = Session {
version: 1,
messages: vec![
crate::session::ConversationMessage::user_text("one ".repeat(30_000)),
crate::session::ConversationMessage::assistant(vec![ContentBlock::Text {
text: "two ".repeat(30_000),
}]),
crate::session::ConversationMessage::user_text("three ".repeat(30_000)),
crate::session::ConversationMessage::assistant(vec![ContentBlock::Text {
text: "four ".repeat(30_000),
}]),
],
};
let mut runtime = ConversationRuntime::new(
session,
SequentialUsageApi { call_count: 0 },
StaticToolExecutor::new(),
PermissionPolicy::new(PermissionMode::DangerFullAccess),
vec!["system".to_string()],
)
.with_auto_compaction_input_tokens_threshold(100_000);
let first = runtime
.run_turn("trigger", None)
.expect("first turn should succeed");
assert_eq!(
first.auto_compaction,
Some(AutoCompactionEvent {
removed_message_count: 2,
})
);
let second = runtime
.run_turn("continue", None)
.expect("second turn should succeed");
assert_eq!(second.auto_compaction, None);
assert_eq!(runtime.session().messages[0].role, MessageRole::System);
}
#[test] #[test]
fn skips_auto_compaction_below_threshold() { fn skips_auto_compaction_below_threshold() {
struct SimpleApi; struct SimpleApi;