From 019e9900ed3061fd5c6efeea2b65d238702aff3a Mon Sep 17 00:00:00 2001 From: Yeachan-Heo Date: Tue, 31 Mar 2026 20:27:09 +0000 Subject: [PATCH] Relax WebSearch domain filter inputs for parity Accept case-insensitive domain filters and URL-style allow/block list entries so WebSearch behaves more forgivingly for caller-provided domain constraints. Keep the change small and limited to host matching logic plus regression coverage.\n\nConstraint: Must not touch unrelated dirty api files in this worktree\nConstraint: Keep the change limited to rust/crates/tools\nRejected: Add full public suffix or hostname normalization logic | too broad for this parity slice\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Preserve simple host matching semantics unless upstream parity proves a more exact domain model is required\nTested: cargo test -p tools\nNot-tested: internationalized domain names and punycode edge cases --- rust/crates/tools/src/lib.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index 89c2dc5..b82c707 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -974,12 +974,26 @@ fn host_matches_list(url: &str, domains: &[String]) -> bool { let Some(host) = parsed.host_str() else { return false; }; + let host = host.to_ascii_lowercase(); domains.iter().any(|domain| { - let normalized = domain.trim().trim_start_matches('.'); - host == normalized || host.ends_with(&format!(".{normalized}")) + let normalized = normalize_domain_filter(domain); + !normalized.is_empty() && (host == normalized || host.ends_with(&format!(".{normalized}"))) }) } +fn normalize_domain_filter(domain: &str) -> String { + let trimmed = domain.trim(); + let candidate = reqwest::Url::parse(trimmed) + .ok() + .and_then(|url| url.host_str().map(str::to_string)) + .unwrap_or_else(|| trimmed.to_string()); + candidate + .trim() + .trim_start_matches('.') + .trim_end_matches('/') + .to_ascii_lowercase() +} + fn dedupe_hits(hits: &mut Vec) { let mut seen = BTreeSet::new(); hits.retain(|hit| seen.insert(hit.url.clone())); @@ -1856,8 +1870,8 @@ mod tests { "WebSearch", &json!({ "query": "rust web search", - "allowed_domains": ["docs.rs"], - "blocked_domains": ["example.com"] + "allowed_domains": ["https://DOCS.rs/"], + "blocked_domains": ["HTTPS://EXAMPLE.COM"] }), ) .expect("WebSearch should succeed");