fix(channels): robust qq/feishu image delivery and multimodal proxy fetch routing
This commit is contained in:
parent
f18fac5b26
commit
61398eb900
@ -622,6 +622,11 @@ Notes:
|
||||
- Remote URL only when `allow_remote_fetch = true`
|
||||
- Allowed MIME types: `image/png`, `image/jpeg`, `image/webp`, `image/gif`, `image/bmp`.
|
||||
- When the active provider does not support vision, requests fail with a structured capability error (`capability=vision`) instead of silently dropping images.
|
||||
- In `proxy.scope = "services"` mode, remote image fetch uses service-key routing. For best compatibility include relevant selectors/keys such as:
|
||||
- `channel.qq` (QQ media hosts like `multimedia.nt.qq.com.cn`)
|
||||
- `tool.multimodal` (dedicated multimodal fetch path)
|
||||
- `tool.http_request` (compatibility fallback path)
|
||||
- `provider.*` or the active provider key (for example `provider.openai`)
|
||||
|
||||
## `[browser]`
|
||||
|
||||
|
||||
@ -113,14 +113,14 @@ Use when only part of the system should use proxy (for example specific provider
|
||||
### 5.1 Target specific services
|
||||
|
||||
```json
|
||||
{"action":"set","enabled":true,"scope":"services","services":["provider.openai","tool.http_request","channel.telegram"],"all_proxy":"socks5h://127.0.0.1:1080","no_proxy":["localhost","127.0.0.1",".internal"]}
|
||||
{"action":"set","enabled":true,"scope":"services","services":["provider.openai","tool.multimodal","tool.http_request","channel.telegram"],"all_proxy":"socks5h://127.0.0.1:1080","no_proxy":["localhost","127.0.0.1",".internal"]}
|
||||
{"action":"get"}
|
||||
```
|
||||
|
||||
### 5.2 Target by selectors
|
||||
|
||||
```json
|
||||
{"action":"set","enabled":true,"scope":"services","services":["provider.*","tool.*"],"http_proxy":"http://127.0.0.1:7890"}
|
||||
{"action":"set","enabled":true,"scope":"services","services":["provider.*","tool.*","channel.qq"],"http_proxy":"http://127.0.0.1:7890"}
|
||||
{"action":"get"}
|
||||
```
|
||||
|
||||
|
||||
@ -1146,8 +1146,12 @@ pub async fn run_tool_call_loop(
|
||||
.into());
|
||||
}
|
||||
|
||||
let prepared_messages =
|
||||
multimodal::prepare_messages_for_provider(history, multimodal_config).await?;
|
||||
let prepared_messages = multimodal::prepare_messages_for_provider_with_provider_hint(
|
||||
history,
|
||||
multimodal_config,
|
||||
Some(provider_name),
|
||||
)
|
||||
.await?;
|
||||
let mut request_messages = prepared_messages.messages.clone();
|
||||
if let Some(prompt) = missing_tool_call_retry_prompt.take() {
|
||||
request_messages.push(ChatMessage::user(prompt));
|
||||
|
||||
@ -5,6 +5,7 @@ use base64::Engine;
|
||||
use futures_util::{SinkExt, StreamExt};
|
||||
use prost::Message as ProstMessage;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, RwLock as StdRwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::RwLock;
|
||||
@ -304,6 +305,146 @@ fn parse_image_key_value(content: &serde_json::Value) -> Option<String> {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_image_filename(path_like: &str) -> bool {
|
||||
let normalized = path_like
|
||||
.split('?')
|
||||
.next()
|
||||
.unwrap_or(path_like)
|
||||
.split('#')
|
||||
.next()
|
||||
.unwrap_or(path_like)
|
||||
.to_ascii_lowercase();
|
||||
|
||||
normalized.ends_with(".png")
|
||||
|| normalized.ends_with(".jpg")
|
||||
|| normalized.ends_with(".jpeg")
|
||||
|| normalized.ends_with(".gif")
|
||||
|| normalized.ends_with(".webp")
|
||||
|| normalized.ends_with(".bmp")
|
||||
|| normalized.ends_with(".heic")
|
||||
|| normalized.ends_with(".heif")
|
||||
|| normalized.ends_with(".svg")
|
||||
}
|
||||
|
||||
fn parse_image_marker_line(line: &str) -> Option<&str> {
|
||||
let trimmed = line.trim();
|
||||
let marker = trimmed.strip_prefix("[IMAGE:")?.strip_suffix(']')?.trim();
|
||||
if marker.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(marker)
|
||||
}
|
||||
|
||||
fn is_data_image_uri(target: &str) -> bool {
|
||||
let lower = target.trim().to_ascii_lowercase();
|
||||
lower.starts_with("data:image/") && lower.contains(";base64,")
|
||||
}
|
||||
|
||||
fn extract_local_image_path_line(line: &str) -> Option<String> {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let candidate = trimmed.trim_matches(|c| matches!(c, '`' | '"' | '\''));
|
||||
let candidate = candidate.strip_prefix("file://").unwrap_or(candidate);
|
||||
if candidate.is_empty() || candidate.contains('\0') {
|
||||
return None;
|
||||
}
|
||||
|
||||
if !is_image_filename(candidate) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let path = Path::new(candidate);
|
||||
if !path.is_file() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(candidate.to_string())
|
||||
}
|
||||
|
||||
fn parse_outgoing_content(content: &str) -> (String, Vec<String>) {
|
||||
let mut text_lines = Vec::new();
|
||||
let mut image_targets = Vec::new();
|
||||
|
||||
for line in content.lines() {
|
||||
if let Some(marker_target) = parse_image_marker_line(line) {
|
||||
image_targets.push(marker_target.to_string());
|
||||
continue;
|
||||
}
|
||||
|
||||
let trimmed = line.trim();
|
||||
if is_data_image_uri(trimmed) {
|
||||
image_targets.push(trimmed.to_string());
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(local_path) = extract_local_image_path_line(line) {
|
||||
image_targets.push(local_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
text_lines.push(line);
|
||||
}
|
||||
|
||||
(text_lines.join("\n").trim().to_string(), image_targets)
|
||||
}
|
||||
|
||||
fn decode_data_image_uri(source: &str) -> anyhow::Result<(Vec<u8>, String)> {
|
||||
let trimmed = source.trim();
|
||||
let (header, payload) = trimmed
|
||||
.split_once(',')
|
||||
.ok_or_else(|| anyhow::anyhow!("invalid data URI: missing comma separator"))?;
|
||||
|
||||
let lower_header = header.to_ascii_lowercase();
|
||||
if !lower_header.starts_with("data:image/") {
|
||||
anyhow::bail!("unsupported data URI mime (expected image/*): {header}");
|
||||
}
|
||||
if !lower_header.contains(";base64") {
|
||||
anyhow::bail!("unsupported data URI encoding (expected base64): {header}");
|
||||
}
|
||||
|
||||
let mime = header
|
||||
.trim_start_matches("data:")
|
||||
.split(';')
|
||||
.next()
|
||||
.unwrap_or("image/png")
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
let bytes = base64::engine::general_purpose::STANDARD
|
||||
.decode(payload.trim())
|
||||
.map_err(|e| anyhow::anyhow!("invalid data URI base64 payload: {e}"))?;
|
||||
if bytes.is_empty() {
|
||||
anyhow::bail!("image payload is empty");
|
||||
}
|
||||
|
||||
Ok((bytes, mime))
|
||||
}
|
||||
|
||||
fn image_extension_from_mime(mime: &str) -> &'static str {
|
||||
match mime {
|
||||
"image/jpeg" => "jpg",
|
||||
"image/gif" => "gif",
|
||||
"image/webp" => "webp",
|
||||
"image/bmp" => "bmp",
|
||||
"image/svg+xml" => "svg",
|
||||
"image/heic" => "heic",
|
||||
"image/heif" => "heif",
|
||||
_ => "png",
|
||||
}
|
||||
}
|
||||
|
||||
fn display_image_target(target: &str) -> String {
|
||||
let trimmed = target.trim();
|
||||
if is_data_image_uri(trimmed) {
|
||||
"[inline image data]".to_string()
|
||||
} else {
|
||||
trimmed.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_lark_token_ttl_seconds(body: &serde_json::Value) -> u64 {
|
||||
let ttl = body
|
||||
.get("expire")
|
||||
@ -1207,6 +1348,256 @@ impl LarkChannel {
|
||||
}
|
||||
}
|
||||
|
||||
fn image_upload_url(&self) -> String {
|
||||
format!("{}/im/v1/images", self.api_base())
|
||||
}
|
||||
|
||||
async fn send_image_once(
|
||||
&self,
|
||||
url: &str,
|
||||
token: &str,
|
||||
recipient: &str,
|
||||
image_key: &str,
|
||||
) -> anyhow::Result<(reqwest::StatusCode, serde_json::Value)> {
|
||||
let content = serde_json::json!({ "image_key": image_key }).to_string();
|
||||
let body = serde_json::json!({
|
||||
"receive_id": recipient,
|
||||
"msg_type": "image",
|
||||
"content": content,
|
||||
});
|
||||
|
||||
self.send_text_once(url, token, &body).await
|
||||
}
|
||||
|
||||
async fn upload_image_once(
|
||||
&self,
|
||||
url: &str,
|
||||
token: &str,
|
||||
bytes: Vec<u8>,
|
||||
file_name: &str,
|
||||
) -> anyhow::Result<(reqwest::StatusCode, serde_json::Value)> {
|
||||
let part = reqwest::multipart::Part::bytes(bytes).file_name(file_name.to_string());
|
||||
let form = reqwest::multipart::Form::new()
|
||||
.text("image_type", "message")
|
||||
.part("image", part);
|
||||
|
||||
let resp = self
|
||||
.http_client()
|
||||
.post(url)
|
||||
.header("Authorization", format!("Bearer {token}"))
|
||||
.multipart(form)
|
||||
.send()
|
||||
.await?;
|
||||
let status = resp.status();
|
||||
let raw = resp.text().await.unwrap_or_default();
|
||||
let parsed = serde_json::from_str::<serde_json::Value>(&raw)
|
||||
.unwrap_or_else(|_| serde_json::json!({ "raw": raw }));
|
||||
Ok((status, parsed))
|
||||
}
|
||||
|
||||
async fn resolve_outgoing_image_target(
|
||||
&self,
|
||||
target: &str,
|
||||
) -> anyhow::Result<(Vec<u8>, String, String)> {
|
||||
let trimmed = target.trim();
|
||||
|
||||
if is_data_image_uri(trimmed) {
|
||||
let (bytes, mime) = decode_data_image_uri(trimmed)?;
|
||||
let ext = image_extension_from_mime(&mime);
|
||||
return Ok((bytes, format!("image.{ext}"), mime));
|
||||
}
|
||||
|
||||
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
|
||||
let resp = self.http_client().get(trimmed).send().await?;
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
let sanitized = crate::providers::sanitize_api_error(&body);
|
||||
anyhow::bail!(
|
||||
"failed to fetch remote image {trimmed}: status={status}, body={sanitized}"
|
||||
);
|
||||
}
|
||||
|
||||
let content_type = resp
|
||||
.headers()
|
||||
.get(reqwest::header::CONTENT_TYPE)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.and_then(|value| value.split(';').next())
|
||||
.map(str::trim)
|
||||
.map(|value| value.to_ascii_lowercase());
|
||||
|
||||
let path_like = trimmed
|
||||
.split('?')
|
||||
.next()
|
||||
.unwrap_or(trimmed)
|
||||
.split('#')
|
||||
.next()
|
||||
.unwrap_or(trimmed);
|
||||
let guessed_mime = mime_guess::from_path(path_like)
|
||||
.first_raw()
|
||||
.unwrap_or("image/png")
|
||||
.to_string();
|
||||
|
||||
let mime = content_type.unwrap_or(guessed_mime);
|
||||
if !mime.starts_with("image/") {
|
||||
anyhow::bail!("remote target is not an image: {trimmed}");
|
||||
}
|
||||
|
||||
let file_name = path_like
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
.unwrap_or_else(|| format!("image.{}", image_extension_from_mime(&mime)));
|
||||
|
||||
let bytes = resp.bytes().await?.to_vec();
|
||||
if bytes.is_empty() {
|
||||
anyhow::bail!("remote image payload is empty: {trimmed}");
|
||||
}
|
||||
|
||||
return Ok((bytes, file_name, mime));
|
||||
}
|
||||
|
||||
let local_path = trimmed.strip_prefix("file://").unwrap_or(trimmed);
|
||||
let path = Path::new(local_path);
|
||||
if !path.is_file() {
|
||||
anyhow::bail!("local image path not found: {local_path}");
|
||||
}
|
||||
|
||||
let mime = mime_guess::from_path(path)
|
||||
.first_raw()
|
||||
.unwrap_or("image/png")
|
||||
.to_string();
|
||||
if !mime.starts_with("image/") {
|
||||
anyhow::bail!("local image path is not an image: {local_path}");
|
||||
}
|
||||
|
||||
let bytes = tokio::fs::read(path)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("failed to read local image {local_path}: {e}"))?;
|
||||
if bytes.is_empty() {
|
||||
anyhow::bail!("local image payload is empty: {local_path}");
|
||||
}
|
||||
|
||||
let file_name = path
|
||||
.file_name()
|
||||
.and_then(|name| name.to_str())
|
||||
.filter(|name| !name.trim().is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
.unwrap_or_else(|| format!("image.{}", image_extension_from_mime(&mime)));
|
||||
|
||||
Ok((bytes, file_name, mime))
|
||||
}
|
||||
|
||||
async fn send_text_with_retry(
|
||||
&self,
|
||||
url: &str,
|
||||
body: &serde_json::Value,
|
||||
) -> anyhow::Result<()> {
|
||||
let token = self.get_tenant_access_token().await?;
|
||||
let (status, response) = self.send_text_once(url, &token, body).await?;
|
||||
|
||||
if should_refresh_lark_tenant_token(status, &response) {
|
||||
self.invalidate_token().await;
|
||||
let new_token = self.get_tenant_access_token().await?;
|
||||
let (retry_status, retry_response) = self.send_text_once(url, &new_token, body).await?;
|
||||
|
||||
if should_refresh_lark_tenant_token(retry_status, &retry_response) {
|
||||
let sanitized = sanitize_lark_body(&retry_response);
|
||||
anyhow::bail!(
|
||||
"Lark send failed after token refresh: status={retry_status}, body={sanitized}"
|
||||
);
|
||||
}
|
||||
|
||||
ensure_lark_send_success(retry_status, &retry_response, "after token refresh")?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure_lark_send_success(status, &response, "without token refresh")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn send_image_target_with_retry(
|
||||
&self,
|
||||
message_url: &str,
|
||||
recipient: &str,
|
||||
image_target: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let upload_url = self.image_upload_url();
|
||||
let (image_bytes, file_name, _mime) =
|
||||
self.resolve_outgoing_image_target(image_target).await?;
|
||||
|
||||
let mut token = self.get_tenant_access_token().await?;
|
||||
let (status, mut upload_response) = self
|
||||
.upload_image_once(&upload_url, &token, image_bytes.clone(), &file_name)
|
||||
.await?;
|
||||
|
||||
if should_refresh_lark_tenant_token(status, &upload_response) {
|
||||
self.invalidate_token().await;
|
||||
token = self.get_tenant_access_token().await?;
|
||||
let (retry_status, retry_response) = self
|
||||
.upload_image_once(&upload_url, &token, image_bytes, &file_name)
|
||||
.await?;
|
||||
upload_response = retry_response;
|
||||
|
||||
if should_refresh_lark_tenant_token(retry_status, &upload_response) {
|
||||
let sanitized = sanitize_lark_body(&upload_response);
|
||||
anyhow::bail!(
|
||||
"Lark image upload failed after token refresh: status={retry_status}, body={sanitized}"
|
||||
);
|
||||
}
|
||||
|
||||
ensure_lark_send_success(
|
||||
retry_status,
|
||||
&upload_response,
|
||||
"image upload after token refresh",
|
||||
)?;
|
||||
} else {
|
||||
ensure_lark_send_success(
|
||||
status,
|
||||
&upload_response,
|
||||
"image upload without token refresh",
|
||||
)?;
|
||||
}
|
||||
|
||||
let image_key = upload_response
|
||||
.pointer("/data/image_key")
|
||||
.and_then(|value| value.as_str())
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.ok_or_else(|| anyhow::anyhow!("Lark image upload response missing data.image_key"))?;
|
||||
|
||||
let (send_status, send_response) = self
|
||||
.send_image_once(message_url, &token, recipient, image_key)
|
||||
.await?;
|
||||
if should_refresh_lark_tenant_token(send_status, &send_response) {
|
||||
self.invalidate_token().await;
|
||||
let new_token = self.get_tenant_access_token().await?;
|
||||
let (retry_status, retry_response) = self
|
||||
.send_image_once(message_url, &new_token, recipient, image_key)
|
||||
.await?;
|
||||
if should_refresh_lark_tenant_token(retry_status, &retry_response) {
|
||||
let sanitized = sanitize_lark_body(&retry_response);
|
||||
anyhow::bail!(
|
||||
"Lark image send failed after token refresh: status={retry_status}, body={sanitized}"
|
||||
);
|
||||
}
|
||||
ensure_lark_send_success(
|
||||
retry_status,
|
||||
&retry_response,
|
||||
"image send after token refresh",
|
||||
)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure_lark_send_success(
|
||||
send_status,
|
||||
&send_response,
|
||||
"image send without token refresh",
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn send_text_once(
|
||||
&self,
|
||||
url: &str,
|
||||
@ -1509,37 +1900,41 @@ impl Channel for LarkChannel {
|
||||
}
|
||||
|
||||
async fn send(&self, message: &SendMessage) -> anyhow::Result<()> {
|
||||
let token = self.get_tenant_access_token().await?;
|
||||
let url = self.send_message_url();
|
||||
let (text_content, image_targets) = parse_outgoing_content(&message.content);
|
||||
|
||||
let content = serde_json::json!({ "text": message.content }).to_string();
|
||||
let body = serde_json::json!({
|
||||
"receive_id": message.recipient,
|
||||
"msg_type": "text",
|
||||
"content": content,
|
||||
});
|
||||
|
||||
let (status, response) = self.send_text_once(&url, &token, &body).await?;
|
||||
|
||||
if should_refresh_lark_tenant_token(status, &response) {
|
||||
// Token expired/invalid, invalidate and retry once.
|
||||
self.invalidate_token().await;
|
||||
let new_token = self.get_tenant_access_token().await?;
|
||||
let (retry_status, retry_response) =
|
||||
self.send_text_once(&url, &new_token, &body).await?;
|
||||
|
||||
if should_refresh_lark_tenant_token(retry_status, &retry_response) {
|
||||
let sanitized = sanitize_lark_body(&retry_response);
|
||||
anyhow::bail!(
|
||||
"Lark send failed after token refresh: status={retry_status}, body={sanitized}"
|
||||
);
|
||||
}
|
||||
|
||||
ensure_lark_send_success(retry_status, &retry_response, "after token refresh")?;
|
||||
return Ok(());
|
||||
if !text_content.is_empty() {
|
||||
let content = serde_json::json!({ "text": text_content }).to_string();
|
||||
let body = serde_json::json!({
|
||||
"receive_id": message.recipient,
|
||||
"msg_type": "text",
|
||||
"content": content,
|
||||
});
|
||||
self.send_text_with_retry(&url, &body).await?;
|
||||
}
|
||||
|
||||
for image_target in image_targets {
|
||||
if let Err(err) = self
|
||||
.send_image_target_with_retry(&url, &message.recipient, &image_target)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"Lark image send failed for target '{}': {err}",
|
||||
display_image_target(&image_target)
|
||||
);
|
||||
let fallback = serde_json::json!({
|
||||
"text": format!("Image: {}", display_image_target(&image_target))
|
||||
})
|
||||
.to_string();
|
||||
let body = serde_json::json!({
|
||||
"receive_id": message.recipient,
|
||||
"msg_type": "text",
|
||||
"content": fallback,
|
||||
});
|
||||
let _ = self.send_text_with_retry(&url, &body).await;
|
||||
}
|
||||
}
|
||||
|
||||
ensure_lark_send_success(status, &response, "without token refresh")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -2117,6 +2512,38 @@ mod tests {
|
||||
assert_eq!(ch.name(), "lark");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lark_parse_outgoing_content_extracts_image_markers_and_local_path_lines() {
|
||||
let temp = tempfile::tempdir().expect("temp dir");
|
||||
let image_path = temp.path().join("capture.png");
|
||||
std::fs::write(&image_path, b"png-bytes").expect("write image");
|
||||
|
||||
let input = format!(
|
||||
"处理好了\n[IMAGE:https://cdn.example.com/a.png]\n{}\n/path/does/not/exist.png",
|
||||
image_path.display()
|
||||
);
|
||||
let (text, images) = parse_outgoing_content(&input);
|
||||
|
||||
assert_eq!(text, "处理好了\n/path/does/not/exist.png");
|
||||
assert_eq!(
|
||||
images,
|
||||
vec![
|
||||
"https://cdn.example.com/a.png".to_string(),
|
||||
image_path.display().to_string()
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lark_parse_outgoing_content_extracts_data_uri_lines() {
|
||||
let data_uri = "data:image/png;base64,aGVsbG8=";
|
||||
let input = format!("这是一张图\n{data_uri}");
|
||||
let (text, images) = parse_outgoing_content(&input);
|
||||
|
||||
assert_eq!(text, "这是一张图");
|
||||
assert_eq!(images, vec![data_uri.to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lark_ws_activity_refreshes_heartbeat_watchdog() {
|
||||
assert!(should_refresh_last_recv(&WsMsg::Binary(
|
||||
|
||||
@ -543,6 +543,24 @@ fn channel_delivery_instructions(channel_name: &str) -> Option<&'static str> {
|
||||
- You can combine text and media in one response — text is sent first, then each attachment.\n\
|
||||
- Use tool results silently: answer the latest user message directly, and do not narrate delayed/internal tool execution bookkeeping.",
|
||||
),
|
||||
"lark" | "feishu" => Some(
|
||||
"When responding on Lark/Feishu:\n\
|
||||
- For image attachments, use markers: [IMAGE:<path-or-url-or-data-uri>]\n\
|
||||
- Keep normal text outside markers and never wrap markers in code fences.\n\
|
||||
- Prefer one marker per line to keep delivery deterministic.\n\
|
||||
- If you include both text and images, put text first, then image markers.\n\
|
||||
- Be concise and direct. Skip filler phrases.\n\
|
||||
- Use tool results silently: answer the latest user message directly, and do not narrate delayed/internal tool execution bookkeeping.",
|
||||
),
|
||||
"qq" => Some(
|
||||
"When responding on QQ:\n\
|
||||
- For image attachments, use markers: [IMAGE:<path-or-url-or-data-uri>]\n\
|
||||
- Keep normal text outside markers and never wrap markers in code fences.\n\
|
||||
- Prefer one marker per line to keep delivery deterministic.\n\
|
||||
- If you include both text and images, put text first, then image markers.\n\
|
||||
- Be concise and direct. Skip filler phrases.\n\
|
||||
- Use tool results silently: answer the latest user message directly, and do not narrate delayed/internal tool execution bookkeeping.",
|
||||
),
|
||||
"bluebubbles" => Some(
|
||||
"You are responding on iMessage via BlueBubbles. Always complete your research before replying — use as many tool calls as needed to get a full, accurate answer.\n\
|
||||
\n\
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
use super::traits::{Channel, ChannelMessage, SendMessage};
|
||||
use crate::config::schema::QQEnvironment;
|
||||
use async_trait::async_trait;
|
||||
use base64::Engine;
|
||||
use futures_util::{SinkExt, StreamExt};
|
||||
use ring::signature::Ed25519KeyPair;
|
||||
use serde_json::{json, Map, Value};
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tokio::sync::RwLock;
|
||||
@ -29,6 +31,11 @@ fn is_remote_media_url(url: &str) -> bool {
|
||||
trimmed.starts_with("https://") || trimmed.starts_with("http://")
|
||||
}
|
||||
|
||||
fn is_data_image_uri(target: &str) -> bool {
|
||||
let lower = target.trim().to_ascii_lowercase();
|
||||
lower.starts_with("data:image/") && lower.contains(";base64,")
|
||||
}
|
||||
|
||||
fn is_image_filename(filename: &str) -> bool {
|
||||
let lower = filename.to_ascii_lowercase();
|
||||
lower.ends_with(".png")
|
||||
@ -42,6 +49,25 @@ fn is_image_filename(filename: &str) -> bool {
|
||||
|| lower.ends_with(".svg")
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum OutgoingImageTarget {
|
||||
RemoteUrl(String),
|
||||
LocalPath(String),
|
||||
DataUri(String),
|
||||
}
|
||||
|
||||
impl OutgoingImageTarget {
|
||||
fn display_target(&self) -> &str {
|
||||
match self {
|
||||
Self::RemoteUrl(url) | Self::LocalPath(url) | Self::DataUri(url) => url,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_inline_data(&self) -> bool {
|
||||
matches!(self, Self::DataUri(_))
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_image_marker_from_attachment(attachment: &serde_json::Value) -> Option<String> {
|
||||
let url = attachment.get("url").and_then(|u| u.as_str())?.trim();
|
||||
if url.is_empty() {
|
||||
@ -75,21 +101,97 @@ fn parse_image_marker_line(line: &str) -> Option<&str> {
|
||||
Some(marker)
|
||||
}
|
||||
|
||||
fn parse_outgoing_content(content: &str) -> (String, Vec<String>) {
|
||||
fn parse_outgoing_image_target(
|
||||
candidate: &str,
|
||||
allow_extensionless_remote_url: bool,
|
||||
) -> Option<OutgoingImageTarget> {
|
||||
let trimmed = candidate.trim();
|
||||
if trimmed.is_empty() || trimmed.contains('\0') {
|
||||
return None;
|
||||
}
|
||||
|
||||
let normalized = trimmed.trim_matches(|c| matches!(c, '`' | '"' | '\''));
|
||||
let normalized = normalized.strip_prefix("file://").unwrap_or(normalized);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if is_data_image_uri(normalized) {
|
||||
return Some(OutgoingImageTarget::DataUri(normalized.to_string()));
|
||||
}
|
||||
|
||||
if is_remote_media_url(normalized) {
|
||||
if allow_extensionless_remote_url || is_image_filename(normalized) {
|
||||
return Some(OutgoingImageTarget::RemoteUrl(normalized.to_string()));
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
if !is_image_filename(normalized) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let path = Path::new(normalized);
|
||||
if !path.is_file() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(OutgoingImageTarget::LocalPath(normalized.to_string()))
|
||||
}
|
||||
|
||||
fn parse_outgoing_content(content: &str) -> (String, Vec<OutgoingImageTarget>) {
|
||||
let mut passthrough_lines = Vec::new();
|
||||
let mut image_urls = Vec::new();
|
||||
let mut image_targets = Vec::new();
|
||||
|
||||
for line in content.lines() {
|
||||
if let Some(marker_target) = parse_image_marker_line(line) {
|
||||
if is_remote_media_url(marker_target) {
|
||||
image_urls.push(marker_target.to_string());
|
||||
if let Some(parsed) = parse_outgoing_image_target(marker_target, true) {
|
||||
image_targets.push(parsed);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(parsed) = parse_outgoing_image_target(line, false) {
|
||||
if matches!(
|
||||
parsed,
|
||||
OutgoingImageTarget::LocalPath(_) | OutgoingImageTarget::DataUri(_)
|
||||
) {
|
||||
image_targets.push(parsed);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
passthrough_lines.push(line);
|
||||
}
|
||||
|
||||
(passthrough_lines.join("\n").trim().to_string(), image_urls)
|
||||
(
|
||||
passthrough_lines.join("\n").trim().to_string(),
|
||||
image_targets,
|
||||
)
|
||||
}
|
||||
|
||||
fn decode_data_image_payload(data_uri: &str) -> anyhow::Result<String> {
|
||||
let trimmed = data_uri.trim();
|
||||
let (header, payload) = trimmed
|
||||
.split_once(',')
|
||||
.ok_or_else(|| anyhow::anyhow!("invalid data URI: missing comma separator"))?;
|
||||
|
||||
let lower_header = header.to_ascii_lowercase();
|
||||
if !lower_header.starts_with("data:image/") {
|
||||
anyhow::bail!("unsupported data URI mime (expected image/*): {header}");
|
||||
}
|
||||
if !lower_header.contains(";base64") {
|
||||
anyhow::bail!("unsupported data URI encoding (expected base64): {header}");
|
||||
}
|
||||
|
||||
let decoded = base64::engine::general_purpose::STANDARD
|
||||
.decode(payload.trim())
|
||||
.map_err(|e| anyhow::anyhow!("invalid data URI base64 payload: {e}"))?;
|
||||
if decoded.is_empty() {
|
||||
anyhow::bail!("image payload is empty");
|
||||
}
|
||||
|
||||
Ok(base64::engine::general_purpose::STANDARD.encode(decoded))
|
||||
}
|
||||
|
||||
fn compose_message_content(payload: &serde_json::Value) -> Option<String> {
|
||||
@ -480,6 +582,48 @@ impl QQChannel {
|
||||
Ok(file_info.to_string())
|
||||
}
|
||||
|
||||
async fn upload_media_file_data(
|
||||
&self,
|
||||
token: &str,
|
||||
files_url: &str,
|
||||
file_data_base64: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
ensure_https(files_url)?;
|
||||
|
||||
let upload_body = json!({
|
||||
"file_type": 1,
|
||||
"file_data": file_data_base64,
|
||||
"srv_send_msg": false
|
||||
});
|
||||
|
||||
let resp = self
|
||||
.http_client()
|
||||
.post(files_url)
|
||||
.header("Authorization", format!("QQBot {token}"))
|
||||
.json(&upload_body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let err = resp.text().await.unwrap_or_default();
|
||||
let sanitized = crate::providers::sanitize_api_error(&err);
|
||||
anyhow::bail!("QQ upload media(file_data) failed ({status}): {sanitized}");
|
||||
}
|
||||
|
||||
let payload: Value = resp.json().await?;
|
||||
let file_info = payload
|
||||
.get("file_info")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!("QQ upload media(file_data) response missing file_info")
|
||||
})?;
|
||||
|
||||
Ok(file_info.to_string())
|
||||
}
|
||||
|
||||
/// Fetch an access token from QQ's OAuth2 endpoint.
|
||||
async fn fetch_access_token(&self) -> anyhow::Result<(String, u64)> {
|
||||
let body = json!({
|
||||
@ -627,15 +771,68 @@ impl Channel for QQChannel {
|
||||
}
|
||||
}
|
||||
|
||||
for image_url in image_urls {
|
||||
let file_info = self
|
||||
.upload_media_file_info(&token, &files_url, &image_url)
|
||||
.await?;
|
||||
let media_body = build_media_message_body(&file_info, passive_msg_id, msg_seq);
|
||||
self.post_json(&token, &message_url, &media_body, "send message")
|
||||
.await?;
|
||||
if passive_msg_id.is_some() {
|
||||
msg_seq += 1;
|
||||
for image_target in image_urls {
|
||||
let file_info = match &image_target {
|
||||
OutgoingImageTarget::RemoteUrl(image_url) => {
|
||||
self.upload_media_file_info(&token, &files_url, image_url)
|
||||
.await
|
||||
}
|
||||
OutgoingImageTarget::LocalPath(path) => match tokio::fs::read(path).await {
|
||||
Ok(bytes) => {
|
||||
if bytes.is_empty() {
|
||||
Err(anyhow::anyhow!("QQ local image payload is empty: {path}"))
|
||||
} else {
|
||||
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
|
||||
self.upload_media_file_data(&token, &files_url, &encoded)
|
||||
.await
|
||||
}
|
||||
}
|
||||
Err(e) => Err(anyhow::anyhow!("QQ local image read failed ({path}): {e}")),
|
||||
},
|
||||
OutgoingImageTarget::DataUri(data_uri) => {
|
||||
match decode_data_image_payload(data_uri) {
|
||||
Ok(encoded) => {
|
||||
self.upload_media_file_data(&token, &files_url, &encoded)
|
||||
.await
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match file_info {
|
||||
Ok(file_info) => {
|
||||
let media_body = build_media_message_body(&file_info, passive_msg_id, msg_seq);
|
||||
self.post_json(&token, &message_url, &media_body, "send message")
|
||||
.await?;
|
||||
if passive_msg_id.is_some() {
|
||||
msg_seq += 1;
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
"QQ: failed to upload image target '{}': {err}",
|
||||
if image_target.is_inline_data() {
|
||||
"[inline image data]"
|
||||
} else {
|
||||
image_target.display_target()
|
||||
}
|
||||
);
|
||||
let fallback_text = if image_target.is_inline_data() {
|
||||
"Image attachment upload failed".to_string()
|
||||
} else {
|
||||
format!("Image: {}", image_target.display_target())
|
||||
};
|
||||
if let Some(body) =
|
||||
build_text_message_body(&fallback_text, passive_msg_id, msg_seq)
|
||||
{
|
||||
self.post_json(&token, &message_url, &body, "send message")
|
||||
.await?;
|
||||
if passive_msg_id.is_some() {
|
||||
msg_seq += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1073,12 +1270,26 @@ allowed_users = ["user1"]
|
||||
assert_eq!(
|
||||
images,
|
||||
vec![
|
||||
"https://cdn.example.com/a.png".to_string(),
|
||||
"http://cdn.example.com/b.jpg".to_string()
|
||||
OutgoingImageTarget::RemoteUrl("https://cdn.example.com/a.png".to_string()),
|
||||
OutgoingImageTarget::RemoteUrl("http://cdn.example.com/b.jpg".to_string())
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_outgoing_content_accepts_marker_remote_url_without_extension() {
|
||||
let input = "hello\n[IMAGE:https://multimedia.nt.qq.com.cn/download?appid=1406]\nbye";
|
||||
let (text, images) = parse_outgoing_content(input);
|
||||
|
||||
assert_eq!(text, "hello\nbye");
|
||||
assert_eq!(
|
||||
images,
|
||||
vec![OutgoingImageTarget::RemoteUrl(
|
||||
"https://multimedia.nt.qq.com.cn/download?appid=1406".to_string()
|
||||
)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_outgoing_content_keeps_non_remote_image_marker_as_text() {
|
||||
let input = "[IMAGE:/tmp/a.png]\nhello";
|
||||
@ -1088,6 +1299,38 @@ allowed_users = ["user1"]
|
||||
assert!(images.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_outgoing_content_extracts_existing_local_path_lines() {
|
||||
let temp = tempfile::tempdir().expect("temp dir");
|
||||
let local_path = temp.path().join("capture.png");
|
||||
std::fs::write(&local_path, b"png-bytes").expect("write local image");
|
||||
|
||||
let input = format!("done\n{}\nnext", local_path.display());
|
||||
let (text, images) = parse_outgoing_content(&input);
|
||||
|
||||
assert_eq!(text, "done\nnext");
|
||||
assert_eq!(
|
||||
images,
|
||||
vec![OutgoingImageTarget::LocalPath(
|
||||
local_path.display().to_string()
|
||||
)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_outgoing_content_extracts_data_uri_markers() {
|
||||
let input = "hello\n[IMAGE:data:image/png;base64,aGVsbG8=]\nbye";
|
||||
let (text, images) = parse_outgoing_content(input);
|
||||
|
||||
assert_eq!(text, "hello\nbye");
|
||||
assert_eq!(
|
||||
images,
|
||||
vec![OutgoingImageTarget::DataUri(
|
||||
"data:image/png;base64,aGVsbG8=".to_string()
|
||||
)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_text_message_body_with_passive_fields() {
|
||||
let body = build_text_message_body("hello", Some("msg-123"), 2).expect("text body");
|
||||
|
||||
@ -139,6 +139,7 @@ const SUPPORTED_PROXY_SERVICE_KEYS: &[&str] = &[
|
||||
"tool.browser",
|
||||
"tool.composio",
|
||||
"tool.http_request",
|
||||
"tool.multimodal",
|
||||
"tool.pushover",
|
||||
"memory.embeddings",
|
||||
"tunnel.custom",
|
||||
|
||||
@ -1068,9 +1068,16 @@ async fn prepare_gateway_messages_for_provider(
|
||||
messages.push(ChatMessage::system(system_prompt));
|
||||
messages.extend(user_messages);
|
||||
|
||||
let multimodal_config = state.config.lock().multimodal.clone();
|
||||
let prepared =
|
||||
crate::multimodal::prepare_messages_for_provider(&messages, &multimodal_config).await?;
|
||||
let (multimodal_config, provider_hint) = {
|
||||
let config = state.config.lock();
|
||||
(config.multimodal.clone(), config.default_provider.clone())
|
||||
};
|
||||
let prepared = crate::multimodal::prepare_messages_for_provider_with_provider_hint(
|
||||
&messages,
|
||||
&multimodal_config,
|
||||
provider_hint.as_deref(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(prepared.messages)
|
||||
}
|
||||
|
||||
@ -8,6 +8,10 @@ use std::path::Path;
|
||||
const IMAGE_MARKER_PREFIX: &str = "[IMAGE:";
|
||||
const OPTIMIZED_IMAGE_MAX_DIMENSION: u32 = 512;
|
||||
const OPTIMIZED_IMAGE_TARGET_BYTES: usize = 256 * 1024;
|
||||
const REMOTE_FETCH_MULTIMODAL_SERVICE_KEY: &str = "tool.multimodal";
|
||||
const REMOTE_FETCH_TOOL_SERVICE_KEY: &str = "tool.http_request";
|
||||
const REMOTE_FETCH_QQ_SERVICE_KEY: &str = "channel.qq";
|
||||
const REMOTE_FETCH_LEGACY_SERVICE_KEY: &str = "provider.ollama";
|
||||
const ALLOWED_IMAGE_MIME_TYPES: &[&str] = &[
|
||||
"image/png",
|
||||
"image/jpeg",
|
||||
@ -118,6 +122,14 @@ pub fn extract_ollama_image_payload(image_ref: &str) -> Option<String> {
|
||||
pub async fn prepare_messages_for_provider(
|
||||
messages: &[ChatMessage],
|
||||
config: &MultimodalConfig,
|
||||
) -> anyhow::Result<PreparedMessages> {
|
||||
prepare_messages_for_provider_with_provider_hint(messages, config, None).await
|
||||
}
|
||||
|
||||
pub async fn prepare_messages_for_provider_with_provider_hint(
|
||||
messages: &[ChatMessage],
|
||||
config: &MultimodalConfig,
|
||||
provider_hint: Option<&str>,
|
||||
) -> anyhow::Result<PreparedMessages> {
|
||||
let (max_images, max_image_size_mb) = config.effective_limits();
|
||||
let max_bytes = max_image_size_mb.saturating_mul(1024 * 1024);
|
||||
@ -138,8 +150,6 @@ pub async fn prepare_messages_for_provider(
|
||||
});
|
||||
}
|
||||
|
||||
let remote_client = build_runtime_proxy_client_with_timeouts("provider.ollama", 30, 10);
|
||||
|
||||
let mut normalized_messages = Vec::with_capacity(messages.len());
|
||||
for message in messages {
|
||||
if message.role != "user" {
|
||||
@ -156,7 +166,7 @@ pub async fn prepare_messages_for_provider(
|
||||
let mut normalized_refs = Vec::with_capacity(refs.len());
|
||||
for reference in refs {
|
||||
let data_uri =
|
||||
normalize_image_reference(&reference, config, max_bytes, &remote_client).await?;
|
||||
normalize_image_reference(&reference, config, max_bytes, provider_hint).await?;
|
||||
normalized_refs.push(data_uri);
|
||||
}
|
||||
|
||||
@ -198,7 +208,7 @@ async fn normalize_image_reference(
|
||||
source: &str,
|
||||
config: &MultimodalConfig,
|
||||
max_bytes: usize,
|
||||
remote_client: &Client,
|
||||
provider_hint: Option<&str>,
|
||||
) -> anyhow::Result<String> {
|
||||
if source.starts_with("data:") {
|
||||
return normalize_data_uri(source, max_bytes).await;
|
||||
@ -212,7 +222,7 @@ async fn normalize_image_reference(
|
||||
.into());
|
||||
}
|
||||
|
||||
return normalize_remote_image(source, max_bytes, remote_client).await;
|
||||
return normalize_remote_image(source, max_bytes, provider_hint).await;
|
||||
}
|
||||
|
||||
normalize_local_image(source, max_bytes).await
|
||||
@ -266,24 +276,59 @@ async fn normalize_data_uri(source: &str, max_bytes: usize) -> anyhow::Result<St
|
||||
}
|
||||
|
||||
async fn normalize_remote_image(
|
||||
source: &str,
|
||||
max_bytes: usize,
|
||||
provider_hint: Option<&str>,
|
||||
) -> anyhow::Result<String> {
|
||||
let service_keys = build_remote_fetch_service_keys(source, provider_hint);
|
||||
let mut failures = Vec::new();
|
||||
|
||||
for service_key in service_keys {
|
||||
let client = build_runtime_proxy_client_with_timeouts(&service_key, 30, 10);
|
||||
match normalize_remote_image_once(source, max_bytes, &client).await {
|
||||
Ok(normalized) => return Ok(normalized),
|
||||
Err(error) => {
|
||||
let reason = error.to_string();
|
||||
tracing::debug!(
|
||||
service_key = %service_key,
|
||||
source = %source,
|
||||
"multimodal remote fetch attempt failed: {reason}"
|
||||
);
|
||||
failures.push(format!("{service_key}: {reason}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(MultimodalError::RemoteFetchFailed {
|
||||
input: source.to_string(),
|
||||
reason: format!(
|
||||
"{}; hint: when proxy.scope='services', include one of channel.qq/tool.multimodal/tool.http_request/provider.* as needed",
|
||||
failures.join(" | ")
|
||||
),
|
||||
}
|
||||
.into())
|
||||
}
|
||||
|
||||
async fn normalize_remote_image_once(
|
||||
source: &str,
|
||||
max_bytes: usize,
|
||||
remote_client: &Client,
|
||||
) -> anyhow::Result<String> {
|
||||
let response = remote_client.get(source).send().await.map_err(|error| {
|
||||
MultimodalError::RemoteFetchFailed {
|
||||
input: source.to_string(),
|
||||
reason: error.to_string(),
|
||||
}
|
||||
})?;
|
||||
let mut request = remote_client
|
||||
.get(source)
|
||||
.header(reqwest::header::USER_AGENT, "ZeroClaw/1.0");
|
||||
if source_looks_like_qq_media(source) {
|
||||
request = request.header(reqwest::header::REFERER, "https://qq.com/");
|
||||
}
|
||||
|
||||
let response = request
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| anyhow::anyhow!("error sending request for url ({source}): {error}"))?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
return Err(MultimodalError::RemoteFetchFailed {
|
||||
input: source.to_string(),
|
||||
reason: format!("HTTP {status}"),
|
||||
}
|
||||
.into());
|
||||
anyhow::bail!("HTTP {status}");
|
||||
}
|
||||
|
||||
if let Some(content_length) = response.content_length() {
|
||||
@ -300,10 +345,7 @@ async fn normalize_remote_image(
|
||||
let bytes = response
|
||||
.bytes()
|
||||
.await
|
||||
.map_err(|error| MultimodalError::RemoteFetchFailed {
|
||||
input: source.to_string(),
|
||||
reason: error.to_string(),
|
||||
})?;
|
||||
.map_err(|error| anyhow::anyhow!("failed to read response body: {error}"))?;
|
||||
|
||||
validate_size(source, bytes.len(), max_bytes)?;
|
||||
|
||||
@ -325,6 +367,72 @@ async fn normalize_remote_image(
|
||||
))
|
||||
}
|
||||
|
||||
fn normalize_provider_service_key_hint(provider_hint: Option<&str>) -> Option<String> {
|
||||
let raw = provider_hint
|
||||
.map(str::trim)
|
||||
.filter(|candidate| !candidate.is_empty())?
|
||||
.split('#')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
if raw.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let candidate = if raw.starts_with("provider.") {
|
||||
raw
|
||||
} else {
|
||||
format!("provider.{raw}")
|
||||
};
|
||||
|
||||
if !candidate
|
||||
.chars()
|
||||
.all(|ch| ch.is_ascii_lowercase() || ch.is_ascii_digit() || matches!(ch, '.' | '_' | '-'))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(candidate)
|
||||
}
|
||||
|
||||
fn source_looks_like_qq_media(source: &str) -> bool {
|
||||
let Ok(parsed) = reqwest::Url::parse(source) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let Some(host) = parsed.host_str() else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let host = host.to_ascii_lowercase();
|
||||
host == "multimedia.nt.qq.com.cn" || host.ends_with(".qq.com.cn") || host.ends_with(".qq.com")
|
||||
}
|
||||
|
||||
fn push_service_key_once(keys: &mut Vec<String>, key: String) {
|
||||
if !key.trim().is_empty() && !keys.iter().any(|existing| existing == &key) {
|
||||
keys.push(key);
|
||||
}
|
||||
}
|
||||
|
||||
fn build_remote_fetch_service_keys(source: &str, provider_hint: Option<&str>) -> Vec<String> {
|
||||
let mut keys = Vec::new();
|
||||
|
||||
if source_looks_like_qq_media(source) {
|
||||
push_service_key_once(&mut keys, REMOTE_FETCH_QQ_SERVICE_KEY.to_string());
|
||||
}
|
||||
|
||||
if let Some(provider_service_key) = normalize_provider_service_key_hint(provider_hint) {
|
||||
push_service_key_once(&mut keys, provider_service_key);
|
||||
}
|
||||
|
||||
push_service_key_once(&mut keys, REMOTE_FETCH_MULTIMODAL_SERVICE_KEY.to_string());
|
||||
push_service_key_once(&mut keys, REMOTE_FETCH_TOOL_SERVICE_KEY.to_string());
|
||||
push_service_key_once(&mut keys, REMOTE_FETCH_LEGACY_SERVICE_KEY.to_string());
|
||||
keys
|
||||
}
|
||||
|
||||
async fn normalize_local_image(source: &str, max_bytes: usize) -> anyhow::Result<String> {
|
||||
let path = Path::new(source);
|
||||
if !path.exists() || !path.is_file() {
|
||||
@ -681,6 +789,63 @@ mod tests {
|
||||
assert!(optimized_image.height() <= OPTIMIZED_IMAGE_MAX_DIMENSION);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_provider_service_key_hint_builds_provider_prefix() {
|
||||
assert_eq!(
|
||||
normalize_provider_service_key_hint(Some("openai")),
|
||||
Some("provider.openai".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_provider_service_key_hint(Some("provider.gemini")),
|
||||
Some("provider.gemini".to_string())
|
||||
);
|
||||
assert_eq!(normalize_provider_service_key_hint(Some(" ")), None);
|
||||
assert_eq!(normalize_provider_service_key_hint(None), None);
|
||||
assert_eq!(
|
||||
normalize_provider_service_key_hint(Some("openai#fast-route")),
|
||||
Some("provider.openai".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_provider_service_key_hint(Some("provider.gemini#img")),
|
||||
Some("provider.gemini".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_provider_service_key_hint(Some("custom:https://api.example.com/v1")),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_remote_fetch_service_keys_prefers_qq_channel_for_qq_media_hosts() {
|
||||
let keys = build_remote_fetch_service_keys(
|
||||
"https://multimedia.nt.qq.com.cn/download?appid=1406",
|
||||
Some("openai"),
|
||||
);
|
||||
assert_eq!(
|
||||
keys,
|
||||
vec![
|
||||
"channel.qq".to_string(),
|
||||
"provider.openai".to_string(),
|
||||
"tool.multimodal".to_string(),
|
||||
"tool.http_request".to_string(),
|
||||
"provider.ollama".to_string(),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_remote_fetch_service_keys_deduplicates_service_candidates() {
|
||||
let keys = build_remote_fetch_service_keys("https://example.com/a.png", Some("ollama"));
|
||||
assert_eq!(
|
||||
keys,
|
||||
vec![
|
||||
"provider.ollama".to_string(),
|
||||
"tool.multimodal".to_string(),
|
||||
"tool.http_request".to_string(),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_ollama_image_payload_supports_data_uris() {
|
||||
let payload = extract_ollama_image_payload("data:image/png;base64,abcd==")
|
||||
|
||||
@ -1098,7 +1098,12 @@ impl Provider for OpenAiCodexProvider {
|
||||
|
||||
// Normalize images: convert file paths to data URIs
|
||||
let config = crate::config::MultimodalConfig::default();
|
||||
let prepared = crate::multimodal::prepare_messages_for_provider(&messages, &config).await?;
|
||||
let prepared = crate::multimodal::prepare_messages_for_provider_with_provider_hint(
|
||||
&messages,
|
||||
&config,
|
||||
Some("openai"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (instructions, input) = build_responses_input(&prepared.messages);
|
||||
self.send_responses_request(input, instructions, model)
|
||||
@ -1113,7 +1118,12 @@ impl Provider for OpenAiCodexProvider {
|
||||
) -> anyhow::Result<String> {
|
||||
// Normalize image markers: convert file paths to data URIs
|
||||
let config = crate::config::MultimodalConfig::default();
|
||||
let prepared = crate::multimodal::prepare_messages_for_provider(messages, &config).await?;
|
||||
let prepared = crate::multimodal::prepare_messages_for_provider_with_provider_hint(
|
||||
messages,
|
||||
&config,
|
||||
Some("openai"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (instructions, input) = build_responses_input(&prepared.messages);
|
||||
self.send_responses_request(input, instructions, model)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user