fix(browser): retry agent-browser clicks before JS fallback
Retry transient agent-browser click failures, refresh snapshot refs when the selector is a snapshot ref, and fall back to eval-based JavaScript clicking for CSS and text selectors. Add targeted tests for the retry heuristics and fallback script generation.
This commit is contained in:
parent
5f8521c137
commit
58800f5e4c
@ -9,6 +9,7 @@ use super::traits::{Tool, ToolResult};
|
||||
use crate::security::SecurityPolicy;
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use base64::Engine;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use std::io::ErrorKind;
|
||||
@ -120,6 +121,8 @@ struct AgentBrowserResponse {
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
const AGENT_BROWSER_CLICK_RETRY_DELAY_MS: u64 = 180;
|
||||
|
||||
/// Response format from computer-use sidecar.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ComputerUseResponse {
|
||||
@ -522,7 +525,9 @@ impl BrowserTool {
|
||||
}
|
||||
|
||||
BrowserAction::Click { selector } => {
|
||||
let resp = self.run_command(&["click", &selector]).await?;
|
||||
let resp = self
|
||||
.run_agent_browser_click_with_recovery(&selector)
|
||||
.await?;
|
||||
self.to_result(resp)
|
||||
}
|
||||
|
||||
@ -626,6 +631,62 @@ impl BrowserTool {
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_agent_browser_click_with_recovery(
|
||||
&self,
|
||||
selector: &str,
|
||||
) -> anyhow::Result<AgentBrowserResponse> {
|
||||
let first = self.run_command(&["click", selector]).await?;
|
||||
if first.success || !is_agent_browser_click_retryable(first.error.as_deref()) {
|
||||
return Ok(first);
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(AGENT_BROWSER_CLICK_RETRY_DELAY_MS)).await;
|
||||
|
||||
if is_agent_browser_ref_selector(selector) {
|
||||
let _ = self.run_command(&["snapshot", "-i"]).await;
|
||||
}
|
||||
|
||||
let retry = self.run_command(&["click", selector]).await?;
|
||||
if retry.success || is_agent_browser_ref_selector(selector) {
|
||||
return Ok(retry);
|
||||
}
|
||||
|
||||
let js_fallback = self.run_agent_browser_js_click(selector).await?;
|
||||
if js_fallback.success {
|
||||
return Ok(AgentBrowserResponse {
|
||||
success: true,
|
||||
data: Some(json!({
|
||||
"backend": "agent_browser",
|
||||
"action": "click",
|
||||
"selector": selector,
|
||||
"fallback": "javascript_eval",
|
||||
"data": js_fallback.data.unwrap_or_else(|| json!({"clicked": true})),
|
||||
})),
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(AgentBrowserResponse {
|
||||
success: false,
|
||||
data: None,
|
||||
error: Some(format!(
|
||||
"agent-browser click failed for selector '{selector}'. direct_error={}; retry_error={}; js_fallback_error={}",
|
||||
first.error.as_deref().unwrap_or("unknown"),
|
||||
retry.error.as_deref().unwrap_or("unknown"),
|
||||
js_fallback.error.as_deref().unwrap_or("unknown"),
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
||||
async fn run_agent_browser_js_click(
|
||||
&self,
|
||||
selector: &str,
|
||||
) -> anyhow::Result<AgentBrowserResponse> {
|
||||
let script = agent_browser_js_click_script(selector);
|
||||
let encoded = base64::engine::general_purpose::STANDARD.encode(script);
|
||||
self.run_command(&["eval", "-b", &encoded]).await
|
||||
}
|
||||
|
||||
#[allow(clippy::unused_async)]
|
||||
async fn execute_rust_native_action(
|
||||
&self,
|
||||
@ -998,6 +1059,84 @@ impl BrowserTool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_agent_browser_ref_selector(selector: &str) -> bool {
|
||||
let trimmed = selector.trim();
|
||||
trimmed.starts_with('@') && trimmed.len() > 1
|
||||
}
|
||||
|
||||
fn is_agent_browser_click_retryable(error: Option<&str>) -> bool {
|
||||
let Some(error) = error else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let message = error.to_ascii_lowercase();
|
||||
[
|
||||
"timeout",
|
||||
"not found",
|
||||
"detached",
|
||||
"stale",
|
||||
"not attached",
|
||||
"intercept",
|
||||
"not clickable",
|
||||
"not visible",
|
||||
"element is outside of the viewport",
|
||||
"another element",
|
||||
]
|
||||
.iter()
|
||||
.any(|needle| message.contains(needle))
|
||||
}
|
||||
|
||||
fn agent_browser_js_click_script(selector: &str) -> String {
|
||||
let selector_literal = serde_json::to_string(selector).unwrap_or_else(|_| "\"\"".to_string());
|
||||
|
||||
format!(
|
||||
r#"(() => {{
|
||||
const raw = {selector_literal};
|
||||
const normalizedText = raw.startsWith('text=') ? raw.slice(5).trim().toLowerCase() : null;
|
||||
|
||||
const interactiveCandidates = () => Array.from(
|
||||
document.querySelectorAll('a,button,input,select,textarea,label,summary,[role],[tabindex],[onclick]')
|
||||
);
|
||||
|
||||
let el = null;
|
||||
if (normalizedText !== null) {{
|
||||
el = interactiveCandidates().find((candidate) => {{
|
||||
const text = (candidate.innerText || candidate.textContent || '').trim().toLowerCase();
|
||||
return text.includes(normalizedText);
|
||||
}}) || null;
|
||||
}} else {{
|
||||
try {{
|
||||
el = document.querySelector(raw);
|
||||
}} catch (error) {{
|
||||
return {{ clicked: false, error: `invalid selector: ${{error.message}}` }};
|
||||
}}
|
||||
}}
|
||||
|
||||
if (!el) {{
|
||||
return {{ clicked: false, error: 'element not found' }};
|
||||
}}
|
||||
|
||||
el.scrollIntoView({{ block: 'center', inline: 'center' }});
|
||||
const rect = el.getBoundingClientRect();
|
||||
const centerX = rect.left + rect.width / 2;
|
||||
const centerY = rect.top + rect.height / 2;
|
||||
const topElement = document.elementFromPoint(centerX, centerY);
|
||||
const target = topElement && (topElement === el || el.contains(topElement)) ? topElement : el;
|
||||
|
||||
target.dispatchEvent(new MouseEvent('mouseover', {{ bubbles: true, cancelable: true, view: window }}));
|
||||
target.dispatchEvent(new MouseEvent('mousedown', {{ bubbles: true, cancelable: true, view: window }}));
|
||||
target.dispatchEvent(new MouseEvent('mouseup', {{ bubbles: true, cancelable: true, view: window }}));
|
||||
target.click();
|
||||
|
||||
return {{
|
||||
clicked: true,
|
||||
tag: target.tagName ? target.tagName.toLowerCase() : null,
|
||||
text: (target.innerText || target.textContent || '').trim().slice(0, 120),
|
||||
}};
|
||||
}})()"#
|
||||
)
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for BrowserTool {
|
||||
fn name(&self) -> &str {
|
||||
@ -2616,6 +2755,42 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_browser_ref_selector_detection_matches_snapshot_refs() {
|
||||
assert!(is_agent_browser_ref_selector("@e1"));
|
||||
assert!(is_agent_browser_ref_selector(" @node-42 "));
|
||||
assert!(!is_agent_browser_ref_selector("#submit"));
|
||||
assert!(!is_agent_browser_ref_selector("text=Continue"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_browser_click_retry_detection_matches_interactability_failures() {
|
||||
assert!(is_agent_browser_click_retryable(Some(
|
||||
"Element is outside of the viewport"
|
||||
)));
|
||||
assert!(is_agent_browser_click_retryable(Some(
|
||||
"element click intercepted by overlay"
|
||||
)));
|
||||
assert!(is_agent_browser_click_retryable(Some(
|
||||
"Node is detached from document"
|
||||
)));
|
||||
assert!(!is_agent_browser_click_retryable(Some(
|
||||
"Host 'localhost' not in browser.allowed_domains"
|
||||
)));
|
||||
assert!(!is_agent_browser_click_retryable(None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_browser_js_click_script_supports_text_and_css_selectors() {
|
||||
let text_script = agent_browser_js_click_script("text=Continue");
|
||||
assert!(text_script.contains("raw.startsWith('text=')"));
|
||||
assert!(text_script.contains("interactiveCandidates"));
|
||||
|
||||
let css_script = agent_browser_js_click_script("#submit");
|
||||
assert!(css_script.contains("document.querySelector(raw)"));
|
||||
assert!(css_script.contains("target.click()"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn computer_use_endpoint_rejects_public_http_by_default() {
|
||||
let security = Arc::new(SecurityPolicy::default());
|
||||
|
||||
Loading…
Reference in New Issue
Block a user