Improve WebFetch title prompts for HTML pages

Make title-focused WebFetch prompts prefer the real HTML <title> value when present instead of always falling back to the first rendered text line. Keep the behavior narrow and preserve the existing summary path for non-title prompts.\n\nConstraint: Must not touch unrelated dirty api files in this worktree\nConstraint: Keep the change limited to rust/crates/tools\nRejected: Broader HTML parsing dependency | not needed for this small parity slice\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Preserve lightweight HTML handling unless parity requires a materially more robust parser\nTested: cargo test -p tools\nNot-tested: malformed HTML with mixed-case or nested title edge cases
This commit is contained in:
Yeachan-Heo
2026-03-31 20:26:06 +00:00
parent 4db21e9595
commit 67423d005a

View File

@@ -639,7 +639,7 @@ fn execute_web_fetch(input: &WebFetchInput) -> Result<WebFetchOutput, String> {
let body = response.text().map_err(|error| error.to_string())?;
let bytes = body.len();
let normalized = normalize_fetched_content(&body, &content_type);
let result = summarize_web_fetch(&final_url, &input.prompt, &normalized);
let result = summarize_web_fetch(&final_url, &input.prompt, &normalized, &body, &content_type);
Ok(WebFetchOutput {
bytes,
@@ -750,12 +750,18 @@ fn normalize_fetched_content(body: &str, content_type: &str) -> String {
}
}
fn summarize_web_fetch(url: &str, prompt: &str, content: &str) -> String {
fn summarize_web_fetch(
url: &str,
prompt: &str,
content: &str,
raw_body: &str,
content_type: &str,
) -> String {
let lower_prompt = prompt.to_lowercase();
let compact = collapse_whitespace(content);
let detail = if lower_prompt.contains("title") {
extract_title(content)
extract_title(content, raw_body, content_type)
.map(|title| format!("Title: {title}"))
.unwrap_or_else(|| preview_text(&compact, 600))
} else if lower_prompt.contains("summary") || lower_prompt.contains("summarize") {
@@ -768,7 +774,21 @@ fn summarize_web_fetch(url: &str, prompt: &str, content: &str) -> String {
format!("Fetched {url}\n{detail}")
}
fn extract_title(content: &str) -> Option<String> {
fn extract_title(content: &str, raw_body: &str, content_type: &str) -> Option<String> {
if content_type.contains("html") {
let lowered = raw_body.to_lowercase();
if let Some(start) = lowered.find("<title>") {
let after = start + "<title>".len();
if let Some(end_rel) = lowered[after..].find("</title>") {
let title =
collapse_whitespace(&decode_html_entities(&raw_body[after..after + end_rel]));
if !title.is_empty() {
return Some(title);
}
}
}
}
for line in content.lines() {
let trimmed = line.trim();
if !trimmed.is_empty() {
@@ -1798,6 +1818,18 @@ mod tests {
assert!(summary.contains("Fetched"));
assert!(summary.contains("Test Page"));
assert!(summary.contains("Hello world from local server"));
let titled = execute_tool(
"WebFetch",
&json!({
"url": format!("http://{}/page", server.addr()),
"prompt": "What is the page title?"
}),
)
.expect("WebFetch title query should succeed");
let titled_output: serde_json::Value = serde_json::from_str(&titled).expect("valid json");
let titled_summary = titled_output["result"].as_str().expect("result string");
assert!(titled_summary.contains("Title: Ignored"));
}
#[test]