1 Commits

Author SHA1 Message Date
Yeachan-Heo
c14196c730 Expose structured thinking without polluting normal assistant output
Extended thinking needed to travel end-to-end through the API,
runtime, and CLI so the client can request a thinking budget,
preserve streamed reasoning blocks, and present them in a
collapsed text-first form. The implementation keeps thinking
strictly opt-in, adds a session-local toggle, and reuses the
existing flag/slash-command/reporting surfaces instead of
introducing a new UI layer.

Constraint: Existing non-thinking text/tool flows had to remain backward compatible by default
Constraint: Terminal UX needed a lightweight collapsed representation rather than an interactive TUI widget
Rejected: Heuristic CLI-only parsing of reasoning text | brittle against structured stream payloads
Rejected: Expanded raw thinking output by default | too noisy for normal assistant responses
Confidence: medium
Scope-risk: moderate
Reversibility: clean
Directive: Keep thinking blocks structurally separate from answer text unless the upstream API contract changes
Tested: cargo fmt --all; cargo clippy --workspace --all-targets -- -D warnings; cargo test -q
Not-tested: Live upstream thinking payloads against the production API contract
2026-04-01 01:08:18 +00:00
9 changed files with 357 additions and 748 deletions

View File

@@ -912,6 +912,7 @@ mod tests {
system: None,
tools: None,
tool_choice: None,
thinking: None,
stream: false,
};

View File

@@ -13,5 +13,5 @@ pub use types::{
ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent,
InputContentBlock, InputMessage, MessageDelta, MessageDeltaEvent, MessageRequest,
MessageResponse, MessageStartEvent, MessageStopEvent, OutputContentBlock, StreamEvent,
ToolChoice, ToolDefinition, ToolResultContentBlock, Usage,
ThinkingConfig, ToolChoice, ToolDefinition, ToolResultContentBlock, Usage,
};

View File

@@ -12,6 +12,8 @@ pub struct MessageRequest {
pub tools: Option<Vec<ToolDefinition>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<ToolChoice>,
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking: Option<ThinkingConfig>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub stream: bool,
}
@@ -24,6 +26,23 @@ impl MessageRequest {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ThinkingConfig {
#[serde(rename = "type")]
pub kind: String,
pub budget_tokens: u32,
}
impl ThinkingConfig {
#[must_use]
pub fn enabled(budget_tokens: u32) -> Self {
Self {
kind: "enabled".to_string(),
budget_tokens,
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InputMessage {
pub role: String,
@@ -130,6 +149,11 @@ pub enum OutputContentBlock {
Text {
text: String,
},
Thinking {
thinking: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
signature: Option<String>,
},
ToolUse {
id: String,
name: String,
@@ -189,6 +213,8 @@ pub struct ContentBlockDeltaEvent {
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentBlockDelta {
TextDelta { text: String },
ThinkingDelta { thinking: String },
SignatureDelta { signature: String },
InputJsonDelta { partial_json: String },
}

View File

@@ -258,6 +258,7 @@ async fn live_stream_smoke_test() {
system: None,
tools: None,
tool_choice: None,
thinking: None,
stream: false,
})
.await
@@ -438,6 +439,7 @@ fn sample_request(stream: bool) -> MessageRequest {
}),
}]),
tool_choice: Some(ToolChoice::Auto),
thinking: None,
stream,
}
}

View File

@@ -57,6 +57,12 @@ const SLASH_COMMAND_SPECS: &[SlashCommandSpec] = &[
argument_hint: None,
resume_supported: true,
},
SlashCommandSpec {
name: "thinking",
summary: "Show or toggle extended thinking",
argument_hint: Some("[on|off]"),
resume_supported: false,
},
SlashCommandSpec {
name: "model",
summary: "Show or switch the active model",
@@ -136,6 +142,9 @@ pub enum SlashCommand {
Help,
Status,
Compact,
Thinking {
enabled: Option<bool>,
},
Model {
model: Option<String>,
},
@@ -180,6 +189,13 @@ impl SlashCommand {
"help" => Self::Help,
"status" => Self::Status,
"compact" => Self::Compact,
"thinking" => Self::Thinking {
enabled: match parts.next() {
Some("on") => Some(true),
Some("off") => Some(false),
Some(_) | None => None,
},
},
"model" => Self::Model {
model: parts.next().map(ToOwned::to_owned),
},
@@ -279,6 +295,7 @@ pub fn handle_slash_command(
session: session.clone(),
}),
SlashCommand::Status
| SlashCommand::Thinking { .. }
| SlashCommand::Model { .. }
| SlashCommand::Permissions { .. }
| SlashCommand::Clear { .. }
@@ -307,6 +324,22 @@ mod tests {
fn parses_supported_slash_commands() {
assert_eq!(SlashCommand::parse("/help"), Some(SlashCommand::Help));
assert_eq!(SlashCommand::parse(" /status "), Some(SlashCommand::Status));
assert_eq!(
SlashCommand::parse("/thinking on"),
Some(SlashCommand::Thinking {
enabled: Some(true),
})
);
assert_eq!(
SlashCommand::parse("/thinking off"),
Some(SlashCommand::Thinking {
enabled: Some(false),
})
);
assert_eq!(
SlashCommand::parse("/thinking"),
Some(SlashCommand::Thinking { enabled: None })
);
assert_eq!(
SlashCommand::parse("/model claude-opus"),
Some(SlashCommand::Model {
@@ -374,6 +407,7 @@ mod tests {
assert!(help.contains("/help"));
assert!(help.contains("/status"));
assert!(help.contains("/compact"));
assert!(help.contains("/thinking [on|off]"));
assert!(help.contains("/model [model]"));
assert!(help.contains("/permissions [read-only|workspace-write|danger-full-access]"));
assert!(help.contains("/clear [--confirm]"));
@@ -386,7 +420,7 @@ mod tests {
assert!(help.contains("/version"));
assert!(help.contains("/export [file]"));
assert!(help.contains("/session [list|switch <session-id>]"));
assert_eq!(slash_command_specs().len(), 15);
assert_eq!(slash_command_specs().len(), 16);
assert_eq!(resume_supported_slash_commands().len(), 11);
}
@@ -434,6 +468,9 @@ mod tests {
let session = Session::new();
assert!(handle_slash_command("/unknown", &session, CompactionConfig::default()).is_none());
assert!(handle_slash_command("/status", &session, CompactionConfig::default()).is_none());
assert!(
handle_slash_command("/thinking on", &session, CompactionConfig::default()).is_none()
);
assert!(
handle_slash_command("/model claude", &session, CompactionConfig::default()).is_none()
);

View File

@@ -130,7 +130,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
.filter_map(|block| match block {
ContentBlock::ToolUse { name, .. } => Some(name.as_str()),
ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()),
ContentBlock::Text { .. } => None,
ContentBlock::Text { .. } | ContentBlock::Thinking { .. } => None,
})
.collect::<Vec<_>>();
tool_names.sort_unstable();
@@ -200,6 +200,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
fn summarize_block(block: &ContentBlock) -> String {
let raw = match block {
ContentBlock::Text { text } => text.clone(),
ContentBlock::Thinking { text, .. } => format!("thinking: {text}"),
ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"),
ContentBlock::ToolResult {
tool_name,
@@ -258,7 +259,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec<String> {
.iter()
.flat_map(|message| message.blocks.iter())
.map(|block| match block {
ContentBlock::Text { text } => text.as_str(),
ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.as_str(),
ContentBlock::ToolUse { input, .. } => input.as_str(),
ContentBlock::ToolResult { output, .. } => output.as_str(),
})
@@ -280,10 +281,15 @@ fn infer_current_work(messages: &[ConversationMessage]) -> Option<String> {
fn first_text_block(message: &ConversationMessage) -> Option<&str> {
message.blocks.iter().find_map(|block| match block {
ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()),
ContentBlock::Text { text } | ContentBlock::Thinking { text, .. }
if !text.trim().is_empty() =>
{
Some(text.as_str())
}
ContentBlock::ToolUse { .. }
| ContentBlock::ToolResult { .. }
| ContentBlock::Text { .. } => None,
| ContentBlock::Text { .. }
| ContentBlock::Thinking { .. } => None,
})
}
@@ -328,7 +334,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize {
.blocks
.iter()
.map(|block| match block {
ContentBlock::Text { text } => text.len() / 4 + 1,
ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.len() / 4 + 1,
ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
ContentBlock::ToolResult {
tool_name, output, ..

View File

@@ -17,6 +17,8 @@ pub struct ApiRequest {
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AssistantEvent {
TextDelta(String),
ThinkingDelta(String),
ThinkingSignature(String),
ToolUse {
id: String,
name: String,
@@ -247,15 +249,26 @@ fn build_assistant_message(
events: Vec<AssistantEvent>,
) -> Result<(ConversationMessage, Option<TokenUsage>), RuntimeError> {
let mut text = String::new();
let mut thinking = String::new();
let mut thinking_signature: Option<String> = None;
let mut blocks = Vec::new();
let mut finished = false;
let mut usage = None;
for event in events {
match event {
AssistantEvent::TextDelta(delta) => text.push_str(&delta),
AssistantEvent::TextDelta(delta) => {
flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks);
text.push_str(&delta);
}
AssistantEvent::ThinkingDelta(delta) => {
flush_text_block(&mut text, &mut blocks);
thinking.push_str(&delta);
}
AssistantEvent::ThinkingSignature(signature) => thinking_signature = Some(signature),
AssistantEvent::ToolUse { id, name, input } => {
flush_text_block(&mut text, &mut blocks);
flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks);
blocks.push(ContentBlock::ToolUse { id, name, input });
}
AssistantEvent::Usage(value) => usage = Some(value),
@@ -266,6 +279,7 @@ fn build_assistant_message(
}
flush_text_block(&mut text, &mut blocks);
flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks);
if !finished {
return Err(RuntimeError::new(
@@ -290,6 +304,19 @@ fn flush_text_block(text: &mut String, blocks: &mut Vec<ContentBlock>) {
}
}
fn flush_thinking_block(
thinking: &mut String,
signature: &mut Option<String>,
blocks: &mut Vec<ContentBlock>,
) {
if !thinking.is_empty() || signature.is_some() {
blocks.push(ContentBlock::Thinking {
text: std::mem::take(thinking),
signature: signature.take(),
});
}
}
type ToolHandler = Box<dyn FnMut(&str) -> Result<String, ToolError>>;
#[derive(Default)]
@@ -325,8 +352,8 @@ impl ToolExecutor for StaticToolExecutor {
#[cfg(test)]
mod tests {
use super::{
ApiClient, ApiRequest, AssistantEvent, ConversationRuntime, RuntimeError,
StaticToolExecutor,
build_assistant_message, ApiClient, ApiRequest, AssistantEvent, ConversationRuntime,
RuntimeError, StaticToolExecutor,
};
use crate::compact::CompactionConfig;
use crate::permissions::{
@@ -502,6 +529,29 @@ mod tests {
));
}
#[test]
fn thinking_blocks_are_preserved_separately_from_text() {
let (message, usage) = build_assistant_message(vec![
AssistantEvent::ThinkingDelta("first ".to_string()),
AssistantEvent::ThinkingDelta("second".to_string()),
AssistantEvent::ThinkingSignature("sig-1".to_string()),
AssistantEvent::TextDelta("final".to_string()),
AssistantEvent::MessageStop,
])
.expect("assistant message should build");
assert_eq!(usage, None);
assert!(matches!(
&message.blocks[0],
ContentBlock::Thinking { text, signature }
if text == "first second" && signature.as_deref() == Some("sig-1")
));
assert!(matches!(
&message.blocks[1],
ContentBlock::Text { text } if text == "final"
));
}
#[test]
fn reconstructs_usage_tracker_from_restored_session() {
struct SimpleApi;

View File

@@ -19,6 +19,10 @@ pub enum ContentBlock {
Text {
text: String,
},
Thinking {
text: String,
signature: Option<String>,
},
ToolUse {
id: String,
name: String,
@@ -257,6 +261,19 @@ impl ContentBlock {
object.insert("type".to_string(), JsonValue::String("text".to_string()));
object.insert("text".to_string(), JsonValue::String(text.clone()));
}
Self::Thinking { text, signature } => {
object.insert(
"type".to_string(),
JsonValue::String("thinking".to_string()),
);
object.insert("text".to_string(), JsonValue::String(text.clone()));
if let Some(signature) = signature {
object.insert(
"signature".to_string(),
JsonValue::String(signature.clone()),
);
}
}
Self::ToolUse { id, name, input } => {
object.insert(
"type".to_string(),
@@ -303,6 +320,13 @@ impl ContentBlock {
"text" => Ok(Self::Text {
text: required_string(object, "text")?,
}),
"thinking" => Ok(Self::Thinking {
text: required_string(object, "text")?,
signature: object
.get("signature")
.and_then(JsonValue::as_str)
.map(ToOwned::to_owned),
}),
"tool_use" => Ok(Self::ToolUse {
id: required_string(object, "id")?,
name: required_string(object, "name")?,

File diff suppressed because it is too large Load Diff