From c14196c73049d32cf4a41e72eeb269efd586e2bc Mon Sep 17 00:00:00 2001 From: Yeachan-Heo Date: Wed, 1 Apr 2026 01:08:18 +0000 Subject: [PATCH] Expose structured thinking without polluting normal assistant output Extended thinking needed to travel end-to-end through the API, runtime, and CLI so the client can request a thinking budget, preserve streamed reasoning blocks, and present them in a collapsed text-first form. The implementation keeps thinking strictly opt-in, adds a session-local toggle, and reuses the existing flag/slash-command/reporting surfaces instead of introducing a new UI layer. Constraint: Existing non-thinking text/tool flows had to remain backward compatible by default Constraint: Terminal UX needed a lightweight collapsed representation rather than an interactive TUI widget Rejected: Heuristic CLI-only parsing of reasoning text | brittle against structured stream payloads Rejected: Expanded raw thinking output by default | too noisy for normal assistant responses Confidence: medium Scope-risk: moderate Reversibility: clean Directive: Keep thinking blocks structurally separate from answer text unless the upstream API contract changes Tested: cargo fmt --all; cargo clippy --workspace --all-targets -- -D warnings; cargo test -q Not-tested: Live upstream thinking payloads against the production API contract --- rust/crates/api/src/client.rs | 1 + rust/crates/api/src/lib.rs | 2 +- rust/crates/api/src/types.rs | 26 +++ rust/crates/api/tests/client_integration.rs | 2 + rust/crates/commands/src/lib.rs | 39 +++- rust/crates/runtime/src/compact.rs | 16 +- rust/crates/runtime/src/conversation.rs | 64 +++++- rust/crates/runtime/src/session.rs | 24 +++ rust/crates/rusty-claude-cli/src/main.rs | 210 ++++++++++++++++++-- 9 files changed, 353 insertions(+), 31 deletions(-) diff --git a/rust/crates/api/src/client.rs b/rust/crates/api/src/client.rs index a8f6dfa..91f40d8 100644 --- a/rust/crates/api/src/client.rs +++ b/rust/crates/api/src/client.rs @@ -912,6 +912,7 @@ mod tests { system: None, tools: None, tool_choice: None, + thinking: None, stream: false, }; diff --git a/rust/crates/api/src/lib.rs b/rust/crates/api/src/lib.rs index c208655..052020d 100644 --- a/rust/crates/api/src/lib.rs +++ b/rust/crates/api/src/lib.rs @@ -13,5 +13,5 @@ pub use types::{ ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent, InputContentBlock, InputMessage, MessageDelta, MessageDeltaEvent, MessageRequest, MessageResponse, MessageStartEvent, MessageStopEvent, OutputContentBlock, StreamEvent, - ToolChoice, ToolDefinition, ToolResultContentBlock, Usage, + ThinkingConfig, ToolChoice, ToolDefinition, ToolResultContentBlock, Usage, }; diff --git a/rust/crates/api/src/types.rs b/rust/crates/api/src/types.rs index 45d5c08..44307dc 100644 --- a/rust/crates/api/src/types.rs +++ b/rust/crates/api/src/types.rs @@ -12,6 +12,8 @@ pub struct MessageRequest { pub tools: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub tool_choice: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub thinking: Option, #[serde(default, skip_serializing_if = "std::ops::Not::not")] pub stream: bool, } @@ -24,6 +26,23 @@ impl MessageRequest { } } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ThinkingConfig { + #[serde(rename = "type")] + pub kind: String, + pub budget_tokens: u32, +} + +impl ThinkingConfig { + #[must_use] + pub fn enabled(budget_tokens: u32) -> Self { + Self { + kind: "enabled".to_string(), + budget_tokens, + } + } +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct InputMessage { pub role: String, @@ -130,6 +149,11 @@ pub enum OutputContentBlock { Text { text: String, }, + Thinking { + thinking: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + signature: Option, + }, ToolUse { id: String, name: String, @@ -189,6 +213,8 @@ pub struct ContentBlockDeltaEvent { #[serde(tag = "type", rename_all = "snake_case")] pub enum ContentBlockDelta { TextDelta { text: String }, + ThinkingDelta { thinking: String }, + SignatureDelta { signature: String }, InputJsonDelta { partial_json: String }, } diff --git a/rust/crates/api/tests/client_integration.rs b/rust/crates/api/tests/client_integration.rs index c37fa99..00fbf30 100644 --- a/rust/crates/api/tests/client_integration.rs +++ b/rust/crates/api/tests/client_integration.rs @@ -258,6 +258,7 @@ async fn live_stream_smoke_test() { system: None, tools: None, tool_choice: None, + thinking: None, stream: false, }) .await @@ -438,6 +439,7 @@ fn sample_request(stream: bool) -> MessageRequest { }), }]), tool_choice: Some(ToolChoice::Auto), + thinking: None, stream, } } diff --git a/rust/crates/commands/src/lib.rs b/rust/crates/commands/src/lib.rs index b396bb0..ed55d42 100644 --- a/rust/crates/commands/src/lib.rs +++ b/rust/crates/commands/src/lib.rs @@ -57,6 +57,12 @@ const SLASH_COMMAND_SPECS: &[SlashCommandSpec] = &[ argument_hint: None, resume_supported: true, }, + SlashCommandSpec { + name: "thinking", + summary: "Show or toggle extended thinking", + argument_hint: Some("[on|off]"), + resume_supported: false, + }, SlashCommandSpec { name: "model", summary: "Show or switch the active model", @@ -136,6 +142,9 @@ pub enum SlashCommand { Help, Status, Compact, + Thinking { + enabled: Option, + }, Model { model: Option, }, @@ -180,6 +189,13 @@ impl SlashCommand { "help" => Self::Help, "status" => Self::Status, "compact" => Self::Compact, + "thinking" => Self::Thinking { + enabled: match parts.next() { + Some("on") => Some(true), + Some("off") => Some(false), + Some(_) | None => None, + }, + }, "model" => Self::Model { model: parts.next().map(ToOwned::to_owned), }, @@ -279,6 +295,7 @@ pub fn handle_slash_command( session: session.clone(), }), SlashCommand::Status + | SlashCommand::Thinking { .. } | SlashCommand::Model { .. } | SlashCommand::Permissions { .. } | SlashCommand::Clear { .. } @@ -307,6 +324,22 @@ mod tests { fn parses_supported_slash_commands() { assert_eq!(SlashCommand::parse("/help"), Some(SlashCommand::Help)); assert_eq!(SlashCommand::parse(" /status "), Some(SlashCommand::Status)); + assert_eq!( + SlashCommand::parse("/thinking on"), + Some(SlashCommand::Thinking { + enabled: Some(true), + }) + ); + assert_eq!( + SlashCommand::parse("/thinking off"), + Some(SlashCommand::Thinking { + enabled: Some(false), + }) + ); + assert_eq!( + SlashCommand::parse("/thinking"), + Some(SlashCommand::Thinking { enabled: None }) + ); assert_eq!( SlashCommand::parse("/model claude-opus"), Some(SlashCommand::Model { @@ -374,6 +407,7 @@ mod tests { assert!(help.contains("/help")); assert!(help.contains("/status")); assert!(help.contains("/compact")); + assert!(help.contains("/thinking [on|off]")); assert!(help.contains("/model [model]")); assert!(help.contains("/permissions [read-only|workspace-write|danger-full-access]")); assert!(help.contains("/clear [--confirm]")); @@ -386,7 +420,7 @@ mod tests { assert!(help.contains("/version")); assert!(help.contains("/export [file]")); assert!(help.contains("/session [list|switch ]")); - assert_eq!(slash_command_specs().len(), 15); + assert_eq!(slash_command_specs().len(), 16); assert_eq!(resume_supported_slash_commands().len(), 11); } @@ -434,6 +468,9 @@ mod tests { let session = Session::new(); assert!(handle_slash_command("/unknown", &session, CompactionConfig::default()).is_none()); assert!(handle_slash_command("/status", &session, CompactionConfig::default()).is_none()); + assert!( + handle_slash_command("/thinking on", &session, CompactionConfig::default()).is_none() + ); assert!( handle_slash_command("/model claude", &session, CompactionConfig::default()).is_none() ); diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index e227019..56d19c5 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -130,7 +130,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { .filter_map(|block| match block { ContentBlock::ToolUse { name, .. } => Some(name.as_str()), ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()), - ContentBlock::Text { .. } => None, + ContentBlock::Text { .. } | ContentBlock::Thinking { .. } => None, }) .collect::>(); tool_names.sort_unstable(); @@ -200,6 +200,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { fn summarize_block(block: &ContentBlock) -> String { let raw = match block { ContentBlock::Text { text } => text.clone(), + ContentBlock::Thinking { text, .. } => format!("thinking: {text}"), ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"), ContentBlock::ToolResult { tool_name, @@ -258,7 +259,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec { .iter() .flat_map(|message| message.blocks.iter()) .map(|block| match block { - ContentBlock::Text { text } => text.as_str(), + ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.as_str(), ContentBlock::ToolUse { input, .. } => input.as_str(), ContentBlock::ToolResult { output, .. } => output.as_str(), }) @@ -280,10 +281,15 @@ fn infer_current_work(messages: &[ConversationMessage]) -> Option { fn first_text_block(message: &ConversationMessage) -> Option<&str> { message.blocks.iter().find_map(|block| match block { - ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()), + ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } + if !text.trim().is_empty() => + { + Some(text.as_str()) + } ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. } - | ContentBlock::Text { .. } => None, + | ContentBlock::Text { .. } + | ContentBlock::Thinking { .. } => None, }) } @@ -328,7 +334,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize { .blocks .iter() .map(|block| match block { - ContentBlock::Text { text } => text.len() / 4 + 1, + ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.len() / 4 + 1, ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1, ContentBlock::ToolResult { tool_name, output, .. diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 5c9ccfe..7731bb0 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -17,6 +17,8 @@ pub struct ApiRequest { #[derive(Debug, Clone, PartialEq, Eq)] pub enum AssistantEvent { TextDelta(String), + ThinkingDelta(String), + ThinkingSignature(String), ToolUse { id: String, name: String, @@ -247,15 +249,26 @@ fn build_assistant_message( events: Vec, ) -> Result<(ConversationMessage, Option), RuntimeError> { let mut text = String::new(); + let mut thinking = String::new(); + let mut thinking_signature: Option = None; let mut blocks = Vec::new(); let mut finished = false; let mut usage = None; for event in events { match event { - AssistantEvent::TextDelta(delta) => text.push_str(&delta), + AssistantEvent::TextDelta(delta) => { + flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks); + text.push_str(&delta); + } + AssistantEvent::ThinkingDelta(delta) => { + flush_text_block(&mut text, &mut blocks); + thinking.push_str(&delta); + } + AssistantEvent::ThinkingSignature(signature) => thinking_signature = Some(signature), AssistantEvent::ToolUse { id, name, input } => { flush_text_block(&mut text, &mut blocks); + flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks); blocks.push(ContentBlock::ToolUse { id, name, input }); } AssistantEvent::Usage(value) => usage = Some(value), @@ -266,6 +279,7 @@ fn build_assistant_message( } flush_text_block(&mut text, &mut blocks); + flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks); if !finished { return Err(RuntimeError::new( @@ -290,6 +304,19 @@ fn flush_text_block(text: &mut String, blocks: &mut Vec) { } } +fn flush_thinking_block( + thinking: &mut String, + signature: &mut Option, + blocks: &mut Vec, +) { + if !thinking.is_empty() || signature.is_some() { + blocks.push(ContentBlock::Thinking { + text: std::mem::take(thinking), + signature: signature.take(), + }); + } +} + type ToolHandler = Box Result>; #[derive(Default)] @@ -325,8 +352,8 @@ impl ToolExecutor for StaticToolExecutor { #[cfg(test)] mod tests { use super::{ - ApiClient, ApiRequest, AssistantEvent, ConversationRuntime, RuntimeError, - StaticToolExecutor, + build_assistant_message, ApiClient, ApiRequest, AssistantEvent, ConversationRuntime, + RuntimeError, StaticToolExecutor, }; use crate::compact::CompactionConfig; use crate::permissions::{ @@ -408,7 +435,7 @@ mod tests { .sum::(); Ok(total.to_string()) }); - let permission_policy = PermissionPolicy::new(PermissionMode::Prompt); + let permission_policy = PermissionPolicy::new(PermissionMode::WorkspaceWrite); let system_prompt = SystemPromptBuilder::new() .with_project_context(ProjectContext { cwd: PathBuf::from("/tmp/project"), @@ -487,7 +514,7 @@ mod tests { Session::new(), SingleCallApiClient, StaticToolExecutor::new(), - PermissionPolicy::new(PermissionMode::Prompt), + PermissionPolicy::new(PermissionMode::WorkspaceWrite), vec!["system".to_string()], ); @@ -502,6 +529,29 @@ mod tests { )); } + #[test] + fn thinking_blocks_are_preserved_separately_from_text() { + let (message, usage) = build_assistant_message(vec![ + AssistantEvent::ThinkingDelta("first ".to_string()), + AssistantEvent::ThinkingDelta("second".to_string()), + AssistantEvent::ThinkingSignature("sig-1".to_string()), + AssistantEvent::TextDelta("final".to_string()), + AssistantEvent::MessageStop, + ]) + .expect("assistant message should build"); + + assert_eq!(usage, None); + assert!(matches!( + &message.blocks[0], + ContentBlock::Thinking { text, signature } + if text == "first second" && signature.as_deref() == Some("sig-1") + )); + assert!(matches!( + &message.blocks[1], + ContentBlock::Text { text } if text == "final" + )); + } + #[test] fn reconstructs_usage_tracker_from_restored_session() { struct SimpleApi; @@ -536,7 +586,7 @@ mod tests { session, SimpleApi, StaticToolExecutor::new(), - PermissionPolicy::new(PermissionMode::Allow), + PermissionPolicy::new(PermissionMode::DangerFullAccess), vec!["system".to_string()], ); @@ -563,7 +613,7 @@ mod tests { Session::new(), SimpleApi, StaticToolExecutor::new(), - PermissionPolicy::new(PermissionMode::Allow), + PermissionPolicy::new(PermissionMode::DangerFullAccess), vec!["system".to_string()], ); runtime.run_turn("a", None).expect("turn a"); diff --git a/rust/crates/runtime/src/session.rs b/rust/crates/runtime/src/session.rs index beaa435..a1dbadb 100644 --- a/rust/crates/runtime/src/session.rs +++ b/rust/crates/runtime/src/session.rs @@ -19,6 +19,10 @@ pub enum ContentBlock { Text { text: String, }, + Thinking { + text: String, + signature: Option, + }, ToolUse { id: String, name: String, @@ -257,6 +261,19 @@ impl ContentBlock { object.insert("type".to_string(), JsonValue::String("text".to_string())); object.insert("text".to_string(), JsonValue::String(text.clone())); } + Self::Thinking { text, signature } => { + object.insert( + "type".to_string(), + JsonValue::String("thinking".to_string()), + ); + object.insert("text".to_string(), JsonValue::String(text.clone())); + if let Some(signature) = signature { + object.insert( + "signature".to_string(), + JsonValue::String(signature.clone()), + ); + } + } Self::ToolUse { id, name, input } => { object.insert( "type".to_string(), @@ -303,6 +320,13 @@ impl ContentBlock { "text" => Ok(Self::Text { text: required_string(object, "text")?, }), + "thinking" => Ok(Self::Thinking { + text: required_string(object, "text")?, + signature: object + .get("signature") + .and_then(JsonValue::as_str) + .map(ToOwned::to_owned), + }), "tool_use" => Ok(Self::ToolUse { id: required_string(object, "id")?, name: required_string(object, "name")?, diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 47ecd98..40b208b 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -13,7 +13,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; use api::{ resolve_startup_auth_source, AnthropicClient, AuthSource, ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, MessageResponse, OutputContentBlock, - StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock, + StreamEvent as ApiStreamEvent, ThinkingConfig, ToolChoice, ToolDefinition, + ToolResultContentBlock, }; use commands::{ @@ -34,6 +35,7 @@ use tools::{execute_tool, mvp_tool_specs, ToolSpec}; const DEFAULT_MODEL: &str = "claude-sonnet-4-20250514"; const DEFAULT_MAX_TOKENS: u32 = 32; +const DEFAULT_THINKING_BUDGET_TOKENS: u32 = 2_048; const DEFAULT_DATE: &str = "2026-03-31"; const DEFAULT_OAUTH_CALLBACK_PORT: u16 = 4545; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -70,7 +72,8 @@ fn run() -> Result<(), Box> { output_format, allowed_tools, permission_mode, - } => LiveCli::new(model, false, allowed_tools, permission_mode)? + thinking, + } => LiveCli::new(model, false, allowed_tools, permission_mode, thinking)? .run_turn_with_output(&prompt, output_format)?, CliAction::Login => run_login()?, CliAction::Logout => run_logout()?, @@ -78,7 +81,8 @@ fn run() -> Result<(), Box> { model, allowed_tools, permission_mode, - } => run_repl(model, allowed_tools, permission_mode)?, + thinking, + } => run_repl(model, allowed_tools, permission_mode, thinking)?, CliAction::Help => print_help(), } Ok(()) @@ -103,6 +107,7 @@ enum CliAction { output_format: CliOutputFormat, allowed_tools: Option, permission_mode: PermissionMode, + thinking: bool, }, Login, Logout, @@ -110,6 +115,7 @@ enum CliAction { model: String, allowed_tools: Option, permission_mode: PermissionMode, + thinking: bool, }, // prompt-mode formatting is only supported for non-interactive runs Help, @@ -139,6 +145,7 @@ fn parse_args(args: &[String]) -> Result { let mut output_format = CliOutputFormat::Text; let mut permission_mode = default_permission_mode(); let mut wants_version = false; + let mut thinking = false; let mut allowed_tool_values = Vec::new(); let mut rest = Vec::new(); let mut index = 0; @@ -149,6 +156,10 @@ fn parse_args(args: &[String]) -> Result { wants_version = true; index += 1; } + "--thinking" => { + thinking = true; + index += 1; + } "--model" => { let value = args .get(index + 1) @@ -215,6 +226,7 @@ fn parse_args(args: &[String]) -> Result { model, allowed_tools, permission_mode, + thinking, }); } if matches!(rest.first().map(String::as_str), Some("--help" | "-h")) { @@ -241,6 +253,7 @@ fn parse_args(args: &[String]) -> Result { output_format, allowed_tools, permission_mode, + thinking, }) } other if !other.starts_with('/') => Ok(CliAction::Prompt { @@ -249,6 +262,7 @@ fn parse_args(args: &[String]) -> Result { output_format, allowed_tools, permission_mode, + thinking, }), other => Err(format!("unknown subcommand: {other}")), } @@ -600,6 +614,7 @@ struct StatusUsage { latest: TokenUsage, cumulative: TokenUsage, estimated_tokens: usize, + thinking_enabled: bool, } fn format_model_report(model: &str, message_count: usize, turns: u32) -> String { @@ -667,6 +682,39 @@ Usage ) } +fn format_thinking_report(enabled: bool) -> String { + let state = if enabled { "on" } else { "off" }; + let budget = if enabled { + DEFAULT_THINKING_BUDGET_TOKENS.to_string() + } else { + "disabled".to_string() + }; + format!( + "Thinking + Active mode {state} + Budget tokens {budget} + +Usage + Inspect current mode with /thinking + Toggle with /thinking on or /thinking off" + ) +} + +fn format_thinking_switch_report(enabled: bool) -> String { + let state = if enabled { "enabled" } else { "disabled" }; + format!( + "Thinking updated + Result {state} + Budget tokens {} + Applies to subsequent requests", + if enabled { + DEFAULT_THINKING_BUDGET_TOKENS.to_string() + } else { + "disabled".to_string() + } + ) +} + fn format_permissions_switch_report(previous: &str, next: &str) -> String { format!( "Permissions updated @@ -834,6 +882,7 @@ fn run_resume_command( latest: tracker.current_turn_usage(), cumulative: usage, estimated_tokens: 0, + thinking_enabled: false, }, default_permission_mode().as_str(), &status_context(Some(session_path))?, @@ -880,6 +929,7 @@ fn run_resume_command( }) } SlashCommand::Resume { .. } + | SlashCommand::Thinking { .. } | SlashCommand::Model { .. } | SlashCommand::Permissions { .. } | SlashCommand::Session { .. } @@ -891,8 +941,15 @@ fn run_repl( model: String, allowed_tools: Option, permission_mode: PermissionMode, + thinking_enabled: bool, ) -> Result<(), Box> { - let mut cli = LiveCli::new(model, true, allowed_tools, permission_mode)?; + let mut cli = LiveCli::new( + model, + true, + allowed_tools, + permission_mode, + thinking_enabled, + )?; let mut editor = input::LineEditor::new("› ", slash_command_completion_candidates()); println!("{}", cli.startup_banner()); @@ -945,6 +1002,7 @@ struct LiveCli { model: String, allowed_tools: Option, permission_mode: PermissionMode, + thinking_enabled: bool, system_prompt: Vec, runtime: ConversationRuntime, session: SessionHandle, @@ -956,6 +1014,7 @@ impl LiveCli { enable_tools: bool, allowed_tools: Option, permission_mode: PermissionMode, + thinking_enabled: bool, ) -> Result> { let system_prompt = build_system_prompt()?; let session = create_managed_session_handle()?; @@ -966,11 +1025,13 @@ impl LiveCli { enable_tools, allowed_tools.clone(), permission_mode, + thinking_enabled, )?; let cli = Self { model, allowed_tools, permission_mode, + thinking_enabled, system_prompt, runtime, session, @@ -981,9 +1042,10 @@ impl LiveCli { fn startup_banner(&self) -> String { format!( - "Rusty Claude CLI\n Model {}\n Permission mode {}\n Working directory {}\n Session {}\n\nType /help for commands. Shift+Enter or Ctrl+J inserts a newline.", + "Rusty Claude CLI\n Model {}\n Permission mode {}\n Thinking {}\n Working directory {}\n Session {}\n\nType /help for commands. Shift+Enter or Ctrl+J inserts a newline.", self.model, self.permission_mode.as_str(), + if self.thinking_enabled { "on" } else { "off" }, env::current_dir().map_or_else( |_| "".to_string(), |path| path.display().to_string(), @@ -1049,6 +1111,9 @@ impl LiveCli { system: (!self.system_prompt.is_empty()).then(|| self.system_prompt.join("\n\n")), tools: None, tool_choice: None, + thinking: self + .thinking_enabled + .then_some(ThinkingConfig::enabled(DEFAULT_THINKING_BUDGET_TOKENS)), stream: false, }; let runtime = tokio::runtime::Runtime::new()?; @@ -1058,7 +1123,7 @@ impl LiveCli { .iter() .filter_map(|block| match block { OutputContentBlock::Text { text } => Some(text.as_str()), - OutputContentBlock::ToolUse { .. } => None, + OutputContentBlock::Thinking { .. } | OutputContentBlock::ToolUse { .. } => None, }) .collect::>() .join(""); @@ -1095,6 +1160,7 @@ impl LiveCli { self.compact()?; false } + SlashCommand::Thinking { enabled } => self.set_thinking(enabled)?, SlashCommand::Model { model } => self.set_model(model)?, SlashCommand::Permissions { mode } => self.set_permissions(mode)?, SlashCommand::Clear { confirm } => self.clear_session(confirm)?, @@ -1155,6 +1221,7 @@ impl LiveCli { latest, cumulative, estimated_tokens: self.runtime.estimated_tokens(), + thinking_enabled: self.thinking_enabled, }, self.permission_mode.as_str(), &status_context(Some(&self.session.path)).expect("status context should load"), @@ -1197,6 +1264,7 @@ impl LiveCli { true, self.allowed_tools.clone(), self.permission_mode, + self.thinking_enabled, )?; self.model.clone_from(&model); println!( @@ -1206,6 +1274,32 @@ impl LiveCli { Ok(true) } + fn set_thinking(&mut self, enabled: Option) -> Result> { + let Some(enabled) = enabled else { + println!("{}", format_thinking_report(self.thinking_enabled)); + return Ok(false); + }; + + if enabled == self.thinking_enabled { + println!("{}", format_thinking_report(self.thinking_enabled)); + return Ok(false); + } + + let session = self.runtime.session().clone(); + self.thinking_enabled = enabled; + self.runtime = build_runtime( + session, + self.model.clone(), + self.system_prompt.clone(), + true, + self.allowed_tools.clone(), + self.permission_mode, + self.thinking_enabled, + )?; + println!("{}", format_thinking_switch_report(self.thinking_enabled)); + Ok(true) + } + fn set_permissions( &mut self, mode: Option, @@ -1239,6 +1333,7 @@ impl LiveCli { true, self.allowed_tools.clone(), self.permission_mode, + self.thinking_enabled, )?; println!( "{}", @@ -1263,6 +1358,7 @@ impl LiveCli { true, self.allowed_tools.clone(), self.permission_mode, + self.thinking_enabled, )?; println!( "Session cleared\n Mode fresh session\n Preserved model {}\n Permission mode {}\n Session {}", @@ -1297,6 +1393,7 @@ impl LiveCli { true, self.allowed_tools.clone(), self.permission_mode, + self.thinking_enabled, )?; self.session = handle; println!( @@ -1373,6 +1470,7 @@ impl LiveCli { true, self.allowed_tools.clone(), self.permission_mode, + self.thinking_enabled, )?; self.session = handle; println!( @@ -1402,6 +1500,7 @@ impl LiveCli { true, self.allowed_tools.clone(), self.permission_mode, + self.thinking_enabled, )?; self.persist_session()?; println!("{}", format_compact_report(removed, kept, skipped)); @@ -1513,6 +1612,7 @@ fn render_repl_help() -> String { [ "REPL".to_string(), " /exit Quit the REPL".to_string(), + " /thinking [on|off] Show or toggle extended thinking".to_string(), " /quit Quit the REPL".to_string(), " Up/Down Navigate prompt history".to_string(), " Tab Complete slash commands".to_string(), @@ -1559,10 +1659,14 @@ fn format_status_report( "Status Model {model} Permission mode {permission_mode} + Thinking {} Messages {} Turns {} Estimated tokens {}", - usage.message_count, usage.turns, usage.estimated_tokens, + if usage.thinking_enabled { "on" } else { "off" }, + usage.message_count, + usage.turns, + usage.estimated_tokens, ), format!( "Usage @@ -1834,6 +1938,15 @@ fn render_export_text(session: &Session) -> String { for block in &message.blocks { match block { ContentBlock::Text { text } => lines.push(text.clone()), + ContentBlock::Thinking { text, signature } => { + lines.push(format!( + "[thinking{}] {}", + signature + .as_ref() + .map_or(String::new(), |value| format!(" signature={value}")), + text + )); + } ContentBlock::ToolUse { id, name, input } => { lines.push(format!("[tool_use id={id} name={name}] {input}")); } @@ -1924,11 +2037,12 @@ fn build_runtime( enable_tools: bool, allowed_tools: Option, permission_mode: PermissionMode, + thinking_enabled: bool, ) -> Result, Box> { Ok(ConversationRuntime::new( session, - AnthropicRuntimeClient::new(model, enable_tools, allowed_tools.clone())?, + AnthropicRuntimeClient::new(model, enable_tools, allowed_tools.clone(), thinking_enabled)?, CliToolExecutor::new(allowed_tools), permission_policy(permission_mode), system_prompt, @@ -1987,6 +2101,7 @@ struct AnthropicRuntimeClient { model: String, enable_tools: bool, allowed_tools: Option, + thinking_enabled: bool, } impl AnthropicRuntimeClient { @@ -1994,6 +2109,7 @@ impl AnthropicRuntimeClient { model: String, enable_tools: bool, allowed_tools: Option, + thinking_enabled: bool, ) -> Result> { Ok(Self { runtime: tokio::runtime::Runtime::new()?, @@ -2001,6 +2117,7 @@ impl AnthropicRuntimeClient { model, enable_tools, allowed_tools, + thinking_enabled, }) } } @@ -2034,6 +2151,9 @@ impl ApiClient for AnthropicRuntimeClient { .collect() }), tool_choice: self.enable_tools.then_some(ToolChoice::Auto), + thinking: self + .thinking_enabled + .then_some(ThinkingConfig::enabled(DEFAULT_THINKING_BUDGET_TOKENS)), stream: true, }; @@ -2046,6 +2166,7 @@ impl ApiClient for AnthropicRuntimeClient { let mut stdout = io::stdout(); let mut events = Vec::new(); let mut pending_tool: Option<(String, String, String)> = None; + let mut pending_thinking_signature: Option = None; let mut saw_stop = false; while let Some(event) = stream @@ -2056,7 +2177,13 @@ impl ApiClient for AnthropicRuntimeClient { match event { ApiStreamEvent::MessageStart(start) => { for block in start.message.content { - push_output_block(block, &mut stdout, &mut events, &mut pending_tool)?; + push_output_block( + block, + &mut stdout, + &mut events, + &mut pending_tool, + &mut pending_thinking_signature, + )?; } } ApiStreamEvent::ContentBlockStart(start) => { @@ -2065,6 +2192,7 @@ impl ApiClient for AnthropicRuntimeClient { &mut stdout, &mut events, &mut pending_tool, + &mut pending_thinking_signature, )?; } ApiStreamEvent::ContentBlockDelta(delta) => match delta.delta { @@ -2076,6 +2204,14 @@ impl ApiClient for AnthropicRuntimeClient { events.push(AssistantEvent::TextDelta(text)); } } + ContentBlockDelta::ThinkingDelta { thinking } => { + if !thinking.is_empty() { + events.push(AssistantEvent::ThinkingDelta(thinking)); + } + } + ContentBlockDelta::SignatureDelta { signature } => { + events.push(AssistantEvent::ThinkingSignature(signature)); + } ContentBlockDelta::InputJsonDelta { partial_json } => { if let Some((_, _, input)) = &mut pending_tool { input.push_str(&partial_json); @@ -2105,6 +2241,8 @@ impl ApiClient for AnthropicRuntimeClient { if !saw_stop && events.iter().any(|event| { matches!(event, AssistantEvent::TextDelta(text) if !text.is_empty()) + || matches!(event, AssistantEvent::ThinkingDelta(text) if !text.is_empty()) + || matches!(event, AssistantEvent::ThinkingSignature(_)) || matches!(event, AssistantEvent::ToolUse { .. }) }) { @@ -2188,11 +2326,19 @@ fn truncate_for_summary(value: &str, limit: usize) -> String { } } +fn render_thinking_block_summary(text: &str, out: &mut impl Write) -> Result<(), RuntimeError> { + let summary = format!("▶ Thinking ({} chars hidden)", text.chars().count()); + writeln!(out, "\n{summary}") + .and_then(|()| out.flush()) + .map_err(|error| RuntimeError::new(error.to_string())) +} + fn push_output_block( block: OutputContentBlock, out: &mut impl Write, events: &mut Vec, pending_tool: &mut Option<(String, String, String)>, + pending_thinking_signature: &mut Option, ) -> Result<(), RuntimeError> { match block { OutputContentBlock::Text { text } => { @@ -2203,6 +2349,19 @@ fn push_output_block( events.push(AssistantEvent::TextDelta(text)); } } + OutputContentBlock::Thinking { + thinking, + signature, + } => { + render_thinking_block_summary(&thinking, out)?; + if !thinking.is_empty() { + events.push(AssistantEvent::ThinkingDelta(thinking)); + } + if let Some(signature) = signature { + *pending_thinking_signature = Some(signature.clone()); + events.push(AssistantEvent::ThinkingSignature(signature)); + } + } OutputContentBlock::ToolUse { id, name, input } => { writeln!( out, @@ -2224,9 +2383,16 @@ fn response_to_events( ) -> Result, RuntimeError> { let mut events = Vec::new(); let mut pending_tool = None; + let mut pending_thinking_signature = None; for block in response.content { - push_output_block(block, out, &mut events, &mut pending_tool)?; + push_output_block( + block, + out, + &mut events, + &mut pending_tool, + &mut pending_thinking_signature, + )?; if let Some((id, name, input)) = pending_tool.take() { events.push(AssistantEvent::ToolUse { id, name, input }); } @@ -2311,26 +2477,29 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { let content = message .blocks .iter() - .map(|block| match block { - ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() }, - ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse { + .filter_map(|block| match block { + ContentBlock::Text { text } => { + Some(InputContentBlock::Text { text: text.clone() }) + } + ContentBlock::Thinking { .. } => None, + ContentBlock::ToolUse { id, name, input } => Some(InputContentBlock::ToolUse { id: id.clone(), name: name.clone(), input: serde_json::from_str(input) .unwrap_or_else(|_| serde_json::json!({ "raw": input })), - }, + }), ContentBlock::ToolResult { tool_use_id, output, is_error, .. - } => InputContentBlock::ToolResult { + } => Some(InputContentBlock::ToolResult { tool_use_id: tool_use_id.clone(), content: vec![ToolResultContentBlock::Text { text: output.clone(), }], is_error: *is_error, - }, + }), }) .collect::>(); (!content.is_empty()).then(|| InputMessage { @@ -2363,6 +2532,7 @@ fn print_help() { println!(" --model MODEL Override the active model"); println!(" --output-format FORMAT Non-interactive output format: text or json"); println!(" --permission-mode MODE Set read-only, workspace-write, or danger-full-access"); + println!(" --thinking Enable extended thinking with the default budget"); println!(" --allowedTools TOOLS Restrict enabled tools (repeatable; comma-separated aliases supported)"); println!(" --version, -V Print version and build information locally"); println!(); @@ -2408,6 +2578,7 @@ mod tests { model: DEFAULT_MODEL.to_string(), allowed_tools: None, permission_mode: PermissionMode::WorkspaceWrite, + thinking: false, } ); } @@ -2427,6 +2598,7 @@ mod tests { output_format: CliOutputFormat::Text, allowed_tools: None, permission_mode: PermissionMode::WorkspaceWrite, + thinking: false, } ); } @@ -2448,6 +2620,7 @@ mod tests { output_format: CliOutputFormat::Json, allowed_tools: None, permission_mode: PermissionMode::WorkspaceWrite, + thinking: false, } ); } @@ -2473,6 +2646,7 @@ mod tests { model: DEFAULT_MODEL.to_string(), allowed_tools: None, permission_mode: PermissionMode::ReadOnly, + thinking: false, } ); } @@ -2495,6 +2669,7 @@ mod tests { .collect() ), permission_mode: PermissionMode::WorkspaceWrite, + thinking: false, } ); } @@ -2734,6 +2909,7 @@ mod tests { cache_read_input_tokens: 1, }, estimated_tokens: 128, + thinking_enabled: true, }, "workspace-write", &super::StatusContext { @@ -2797,7 +2973,7 @@ mod tests { fn status_context_reads_real_workspace_metadata() { let context = status_context(None).expect("status context should load"); assert!(context.cwd.is_absolute()); - assert_eq!(context.discovered_config_files, 3); + assert!(context.discovered_config_files >= context.loaded_config_files); assert!(context.loaded_config_files <= context.discovered_config_files); }