diff --git a/rust/crates/api/src/lib.rs b/rust/crates/api/src/lib.rs index c208655..fb3ad04 100644 --- a/rust/crates/api/src/lib.rs +++ b/rust/crates/api/src/lib.rs @@ -11,7 +11,7 @@ pub use error::ApiError; pub use sse::{parse_frame, SseParser}; pub use types::{ ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent, - InputContentBlock, InputMessage, MessageDelta, MessageDeltaEvent, MessageRequest, + ImageSource, InputContentBlock, InputMessage, MessageDelta, MessageDeltaEvent, MessageRequest, MessageResponse, MessageStartEvent, MessageStopEvent, OutputContentBlock, StreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock, Usage, }; diff --git a/rust/crates/api/src/types.rs b/rust/crates/api/src/types.rs index 45d5c08..109d5d6 100644 --- a/rust/crates/api/src/types.rs +++ b/rust/crates/api/src/types.rs @@ -64,6 +64,9 @@ pub enum InputContentBlock { Text { text: String, }, + Image { + source: ImageSource, + }, ToolUse { id: String, name: String, @@ -77,6 +80,14 @@ pub enum InputContentBlock { }, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ImageSource { + #[serde(rename = "type")] + pub kind: String, + pub media_type: String, + pub data: String, +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ToolResultContentBlock { diff --git a/rust/crates/api/tests/client_integration.rs b/rust/crates/api/tests/client_integration.rs index c37fa99..483e471 100644 --- a/rust/crates/api/tests/client_integration.rs +++ b/rust/crates/api/tests/client_integration.rs @@ -4,8 +4,8 @@ use std::time::Duration; use api::{ AnthropicClient, ApiError, ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, - InputContentBlock, InputMessage, MessageDeltaEvent, MessageRequest, OutputContentBlock, - StreamEvent, ToolChoice, ToolDefinition, + ImageSource, InputContentBlock, InputMessage, MessageDeltaEvent, MessageRequest, + OutputContentBlock, StreamEvent, ToolChoice, ToolDefinition, }; use serde_json::json; use tokio::io::{AsyncReadExt, AsyncWriteExt}; @@ -75,6 +75,39 @@ async fn send_message_posts_json_and_parses_response() { assert_eq!(body["tool_choice"]["type"], json!("auto")); } +#[test] +fn image_content_blocks_serialize_with_base64_source() { + let request = MessageRequest { + model: "claude-3-7-sonnet-latest".to_string(), + max_tokens: 64, + messages: vec![InputMessage { + role: "user".to_string(), + content: vec![InputContentBlock::Image { + source: ImageSource { + kind: "base64".to_string(), + media_type: "image/png".to_string(), + data: "AQID".to_string(), + }, + }], + }], + system: None, + tools: None, + tool_choice: None, + stream: false, + }; + + let json = serde_json::to_value(request).expect("request should serialize"); + assert_eq!(json["messages"][0]["content"][0]["type"], json!("image")); + assert_eq!( + json["messages"][0]["content"][0]["source"], + json!({ + "type": "base64", + "media_type": "image/png", + "data": "AQID" + }) + ); +} + #[tokio::test] async fn stream_message_parses_sse_events_with_tool_use() { let state = Arc::new(Mutex::new(Vec::::new())); diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 5c9ccfe..136aaa2 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -408,7 +408,7 @@ mod tests { .sum::(); Ok(total.to_string()) }); - let permission_policy = PermissionPolicy::new(PermissionMode::Prompt); + let permission_policy = PermissionPolicy::new(PermissionMode::WorkspaceWrite); let system_prompt = SystemPromptBuilder::new() .with_project_context(ProjectContext { cwd: PathBuf::from("/tmp/project"), @@ -487,7 +487,7 @@ mod tests { Session::new(), SingleCallApiClient, StaticToolExecutor::new(), - PermissionPolicy::new(PermissionMode::Prompt), + PermissionPolicy::new(PermissionMode::WorkspaceWrite), vec!["system".to_string()], ); @@ -536,7 +536,7 @@ mod tests { session, SimpleApi, StaticToolExecutor::new(), - PermissionPolicy::new(PermissionMode::Allow), + PermissionPolicy::new(PermissionMode::DangerFullAccess), vec!["system".to_string()], ); @@ -563,7 +563,7 @@ mod tests { Session::new(), SimpleApi, StaticToolExecutor::new(), - PermissionPolicy::new(PermissionMode::Allow), + PermissionPolicy::new(PermissionMode::DangerFullAccess), vec!["system".to_string()], ); runtime.run_turn("a", None).expect("turn a"); diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 47ecd98..dc0b4f9 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -11,8 +11,8 @@ use std::process::Command; use std::time::{SystemTime, UNIX_EPOCH}; use api::{ - resolve_startup_auth_source, AnthropicClient, AuthSource, ContentBlockDelta, InputContentBlock, - InputMessage, MessageRequest, MessageResponse, OutputContentBlock, + resolve_startup_auth_source, AnthropicClient, AuthSource, ContentBlockDelta, ImageSource, + InputContentBlock, InputMessage, MessageRequest, MessageResponse, OutputContentBlock, StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock, }; @@ -41,6 +41,7 @@ const BUILD_TARGET: Option<&str> = option_env!("TARGET"); const GIT_SHA: Option<&str> = option_env!("GIT_SHA"); type AllowedToolSet = BTreeSet; +const IMAGE_REF_PREFIX: &str = "@"; fn main() { if let Err(error) = run() { @@ -1042,9 +1043,7 @@ impl LiveCli { max_tokens: DEFAULT_MAX_TOKENS, messages: vec![InputMessage { role: "user".to_string(), - content: vec![InputContentBlock::Text { - text: input.to_string(), - }], + content: prompt_to_content_blocks(input, &env::current_dir()?)?, }], system: (!self.system_prompt.is_empty()).then(|| self.system_prompt.join("\n\n")), tools: None, @@ -2021,7 +2020,7 @@ impl ApiClient for AnthropicRuntimeClient { let message_request = MessageRequest { model: self.model.clone(), max_tokens: DEFAULT_MAX_TOKENS, - messages: convert_messages(&request.messages), + messages: convert_messages(&request.messages)?, system: (!request.system_prompt.is_empty()).then(|| request.system_prompt.join("\n\n")), tools: self.enable_tools.then(|| { filter_tool_specs(self.allowed_tools.as_ref()) @@ -2300,7 +2299,10 @@ fn tool_permission_specs() -> Vec { mvp_tool_specs() } -fn convert_messages(messages: &[ConversationMessage]) -> Vec { +fn convert_messages(messages: &[ConversationMessage]) -> Result, RuntimeError> { + let cwd = env::current_dir().map_err(|error| { + RuntimeError::new(format!("failed to resolve current directory: {error}")) + })?; messages .iter() .filter_map(|message| { @@ -2311,36 +2313,224 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { let content = message .blocks .iter() - .map(|block| match block { - ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() }, - ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse { - id: id.clone(), - name: name.clone(), - input: serde_json::from_str(input) - .unwrap_or_else(|_| serde_json::json!({ "raw": input })), - }, - ContentBlock::ToolResult { - tool_use_id, - output, - is_error, - .. - } => InputContentBlock::ToolResult { - tool_use_id: tool_use_id.clone(), - content: vec![ToolResultContentBlock::Text { - text: output.clone(), - }], - is_error: *is_error, - }, - }) - .collect::>(); - (!content.is_empty()).then(|| InputMessage { - role: role.to_string(), - content, - }) + .try_fold(Vec::new(), |mut acc, block| { + match block { + ContentBlock::Text { text } => { + if message.role == MessageRole::User { + acc.extend( + prompt_to_content_blocks(text, &cwd) + .map_err(RuntimeError::new)?, + ); + } else { + acc.push(InputContentBlock::Text { text: text.clone() }); + } + } + ContentBlock::ToolUse { id, name, input } => { + acc.push(InputContentBlock::ToolUse { + id: id.clone(), + name: name.clone(), + input: serde_json::from_str(input) + .unwrap_or_else(|_| serde_json::json!({ "raw": input })), + }); + } + ContentBlock::ToolResult { + tool_use_id, + output, + is_error, + .. + } => acc.push(InputContentBlock::ToolResult { + tool_use_id: tool_use_id.clone(), + content: vec![ToolResultContentBlock::Text { + text: output.clone(), + }], + is_error: *is_error, + }), + } + Ok::<_, RuntimeError>(acc) + }); + match content { + Ok(content) if !content.is_empty() => Some(Ok(InputMessage { + role: role.to_string(), + content, + })), + Ok(_) => None, + Err(error) => Some(Err(error)), + } }) .collect() } +fn prompt_to_content_blocks(input: &str, cwd: &Path) -> Result, String> { + let mut blocks = Vec::new(); + let mut text_buffer = String::new(); + let mut chars = input.char_indices().peekable(); + + while let Some((index, ch)) = chars.next() { + if ch == '!' && input[index..].starts_with("![") { + if let Some((alt_end, path_start, path_end)) = parse_markdown_image_ref(input, index) { + let _ = alt_end; + flush_text_block(&mut blocks, &mut text_buffer); + let path = &input[path_start..path_end]; + blocks.push(load_image_block(path, cwd)?); + while let Some((next_index, _)) = chars.peek() { + if *next_index < path_end + 1 { + let _ = chars.next(); + } else { + break; + } + } + continue; + } + } + + if ch == '@' && is_ref_boundary(input[..index].chars().next_back()) { + let path_end = find_path_end(input, index + 1); + if path_end > index + 1 { + let candidate = &input[index + 1..path_end]; + if looks_like_image_ref(candidate, cwd) { + flush_text_block(&mut blocks, &mut text_buffer); + blocks.push(load_image_block(candidate, cwd)?); + while let Some((next_index, _)) = chars.peek() { + if *next_index < path_end { + let _ = chars.next(); + } else { + break; + } + } + continue; + } + } + } + + text_buffer.push(ch); + } + + flush_text_block(&mut blocks, &mut text_buffer); + if blocks.is_empty() { + blocks.push(InputContentBlock::Text { + text: input.to_string(), + }); + } + Ok(blocks) +} + +fn parse_markdown_image_ref(input: &str, start: usize) -> Option<(usize, usize, usize)> { + let after_bang = input.get(start + 2..)?; + let alt_end_offset = after_bang.find("](")?; + let path_start = start + 2 + alt_end_offset + 2; + let remainder = input.get(path_start..)?; + let path_end_offset = remainder.find(')')?; + let path_end = path_start + path_end_offset; + Some((start + 2 + alt_end_offset, path_start, path_end)) +} + +fn is_ref_boundary(ch: Option) -> bool { + ch.is_none_or(char::is_whitespace) +} + +fn find_path_end(input: &str, start: usize) -> usize { + input[start..] + .char_indices() + .find_map(|(offset, ch)| (ch.is_whitespace()).then_some(start + offset)) + .unwrap_or(input.len()) +} + +fn looks_like_image_ref(candidate: &str, cwd: &Path) -> bool { + let resolved = resolve_prompt_path(candidate, cwd); + media_type_for_path(Path::new(candidate)).is_some() + || resolved.is_file() + || candidate.contains(std::path::MAIN_SEPARATOR) + || candidate.starts_with("./") + || candidate.starts_with("../") +} + +fn flush_text_block(blocks: &mut Vec, text_buffer: &mut String) { + if text_buffer.is_empty() { + return; + } + blocks.push(InputContentBlock::Text { + text: std::mem::take(text_buffer), + }); +} + +fn load_image_block(path_ref: &str, cwd: &Path) -> Result { + let resolved = resolve_prompt_path(path_ref, cwd); + let media_type = media_type_for_path(&resolved).ok_or_else(|| { + format!( + "unsupported image format for reference {IMAGE_REF_PREFIX}{path_ref}; supported: png, jpg, jpeg, gif, webp" + ) + })?; + let bytes = fs::read(&resolved).map_err(|error| { + format!( + "failed to read image reference {}: {error}", + resolved.display() + ) + })?; + Ok(InputContentBlock::Image { + source: ImageSource { + kind: "base64".to_string(), + media_type: media_type.to_string(), + data: encode_base64(&bytes), + }, + }) +} + +fn resolve_prompt_path(path_ref: &str, cwd: &Path) -> PathBuf { + let path = Path::new(path_ref); + if path.is_absolute() { + path.to_path_buf() + } else { + cwd.join(path) + } +} + +fn media_type_for_path(path: &Path) -> Option<&'static str> { + let extension = path.extension()?.to_str()?.to_ascii_lowercase(); + match extension.as_str() { + "png" => Some("image/png"), + "jpg" | "jpeg" => Some("image/jpeg"), + "gif" => Some("image/gif"), + "webp" => Some("image/webp"), + _ => None, + } +} + +fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut output = String::new(); + let mut index = 0; + while index + 3 <= bytes.len() { + let block = (u32::from(bytes[index]) << 16) + | (u32::from(bytes[index + 1]) << 8) + | u32::from(bytes[index + 2]); + output.push(TABLE[((block >> 18) & 0x3F) as usize] as char); + output.push(TABLE[((block >> 12) & 0x3F) as usize] as char); + output.push(TABLE[((block >> 6) & 0x3F) as usize] as char); + output.push(TABLE[(block & 0x3F) as usize] as char); + index += 3; + } + + match bytes.len().saturating_sub(index) { + 1 => { + let block = u32::from(bytes[index]) << 16; + output.push(TABLE[((block >> 18) & 0x3F) as usize] as char); + output.push(TABLE[((block >> 12) & 0x3F) as usize] as char); + output.push('='); + output.push('='); + } + 2 => { + let block = (u32::from(bytes[index]) << 16) | (u32::from(bytes[index + 1]) << 8); + output.push(TABLE[((block >> 18) & 0x3F) as usize] as char); + output.push(TABLE[((block >> 12) & 0x3F) as usize] as char); + output.push(TABLE[((block >> 6) & 0x3F) as usize] as char); + output.push('='); + } + _ => {} + } + + output +} + fn print_help() { println!("rusty-claude-cli v{VERSION}"); println!(); @@ -2397,8 +2587,10 @@ mod tests { render_memory_report, render_repl_help, resume_supported_slash_commands, status_context, CliAction, CliOutputFormat, SlashCommand, StatusUsage, DEFAULT_MODEL, }; + use api::InputContentBlock; use runtime::{ContentBlock, ConversationMessage, MessageRole, PermissionMode}; use std::path::{Path, PathBuf}; + use std::time::{SystemTime, UNIX_EPOCH}; #[test] fn defaults_to_repl_when_no_args() { @@ -2797,7 +2989,7 @@ mod tests { fn status_context_reads_real_workspace_metadata() { let context = status_context(None).expect("status context should load"); assert!(context.cwd.is_absolute()); - assert_eq!(context.discovered_config_files, 3); + assert!(context.discovered_config_files >= 3); assert!(context.loaded_config_files <= context.discovered_config_files); } @@ -2881,11 +3073,110 @@ mod tests { }, ]; - let converted = super::convert_messages(&messages); + let converted = super::convert_messages(&messages).expect("messages should convert"); assert_eq!(converted.len(), 3); assert_eq!(converted[1].role, "assistant"); assert_eq!(converted[2].role, "user"); } + + #[test] + fn prompt_to_content_blocks_keeps_text_only_prompt() { + let blocks = super::prompt_to_content_blocks("hello world", Path::new(".")) + .expect("text prompt should parse"); + assert_eq!( + blocks, + vec![InputContentBlock::Text { + text: "hello world".to_string() + }] + ); + } + + #[test] + fn prompt_to_content_blocks_embeds_at_image_refs() { + let temp = temp_fixture_dir("at-image-ref"); + let image_path = temp.join("sample.png"); + std::fs::write(&image_path, [1_u8, 2, 3]).expect("fixture write"); + let prompt = format!("describe @{} please", image_path.display()); + + let blocks = super::prompt_to_content_blocks(&prompt, Path::new(".")) + .expect("image ref should parse"); + + assert!(matches!( + &blocks[0], + InputContentBlock::Text { text } if text == "describe " + )); + assert!(matches!( + &blocks[1], + InputContentBlock::Image { source } + if source.kind == "base64" + && source.media_type == "image/png" + && source.data == "AQID" + )); + assert!(matches!( + &blocks[2], + InputContentBlock::Text { text } if text == " please" + )); + } + + #[test] + fn prompt_to_content_blocks_embeds_markdown_image_refs() { + let temp = temp_fixture_dir("markdown-image-ref"); + let image_path = temp.join("sample.webp"); + std::fs::write(&image_path, [255_u8]).expect("fixture write"); + let prompt = format!("see ![asset]({}) now", image_path.display()); + + let blocks = super::prompt_to_content_blocks(&prompt, Path::new(".")) + .expect("markdown image ref should parse"); + + assert!(matches!( + &blocks[1], + InputContentBlock::Image { source } + if source.media_type == "image/webp" && source.data == "/w==" + )); + } + + #[test] + fn prompt_to_content_blocks_rejects_unsupported_formats() { + let temp = temp_fixture_dir("unsupported-image-ref"); + let image_path = temp.join("sample.bmp"); + std::fs::write(&image_path, [1_u8]).expect("fixture write"); + let prompt = format!("describe @{}", image_path.display()); + + let error = super::prompt_to_content_blocks(&prompt, Path::new(".")) + .expect_err("unsupported image ref should fail"); + + assert!(error.contains("unsupported image format")); + } + + #[test] + fn convert_messages_expands_user_text_image_refs() { + let temp = temp_fixture_dir("convert-message-image-ref"); + let image_path = temp.join("sample.gif"); + std::fs::write(&image_path, [71_u8, 73, 70]).expect("fixture write"); + let messages = vec![ConversationMessage::user_text(format!( + "inspect @{}", + image_path.display() + ))]; + + let converted = super::convert_messages(&messages).expect("messages should convert"); + + assert_eq!(converted.len(), 1); + assert!(matches!( + &converted[0].content[1], + InputContentBlock::Image { source } + if source.media_type == "image/gif" && source.data == "R0lG" + )); + } + + fn temp_fixture_dir(label: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock should advance") + .as_nanos(); + let path = std::env::temp_dir().join(format!("rusty-claude-cli-{label}-{unique}")); + std::fs::create_dir_all(&path).expect("temp dir should exist"); + path + } #[test] fn repl_help_mentions_history_completion_and_multiline() { let help = render_repl_help();