Merge branch 'dev' into codex/supersede-pr-1639-20260225021812-271412-files

This commit is contained in:
Chum Yin 2026-02-25 11:06:52 +08:00 committed by GitHub
commit dc7cf36a0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 601 additions and 32 deletions

View File

@ -26,6 +26,14 @@ Schema export command:
| `provider_api` | unset | Optional API mode for `custom:<url>` providers: `openai-chat-completions` or `openai-responses` |
| `default_model` | `anthropic/claude-sonnet-4-6` | model routed through selected provider |
| `default_temperature` | `0.7` | model temperature |
| `model_support_vision` | unset (`None`) | Vision support override for active provider/model |
Notes:
- `model_support_vision = true` forces vision support on (e.g. Ollama running `llava`).
- `model_support_vision = false` forces vision support off.
- Unset keeps the provider's built-in default.
- Environment override: `ZEROCLAW_MODEL_SUPPORT_VISION` or `MODEL_SUPPORT_VISION` (values: `true`/`false`/`1`/`0`/`yes`/`no`/`on`/`off`).
## `[observability]`

View File

@ -25,6 +25,14 @@ Lệnh xuất schema:
| `default_provider` | `openrouter` | ID hoặc bí danh provider |
| `default_model` | `anthropic/claude-sonnet-4-6` | Model định tuyến qua provider đã chọn |
| `default_temperature` | `0.7` | Nhiệt độ model |
| `model_support_vision` | chưa đặt (`None`) | Ghi đè hỗ trợ vision cho provider/model đang dùng |
Lưu ý:
- `model_support_vision = true` bật vision (ví dụ Ollama chạy `llava`).
- `model_support_vision = false` tắt vision.
- Để trống giữ mặc định của provider.
- Biến môi trường: `ZEROCLAW_MODEL_SUPPORT_VISION` hoặc `MODEL_SUPPORT_VISION` (giá trị: `true`/`false`/`1`/`0`/`yes`/`no`/`on`/`off`).
## `[observability]`

View File

@ -94,6 +94,25 @@ Hành vi:
- `true`: gửi `think: true`.
- Không đặt: bỏ qua `think` và giữ nguyên mặc định của Ollama/model.
### Ghi đè Vision cho Ollama
Một số model Ollama hỗ trợ vision (ví dụ `llava`, `llama3.2-vision`) trong khi các model khác thì không.
Vì ZeroClaw không thể tự động phát hiện, bạn có thể ghi đè trong `config.toml`:
```toml
default_provider = "ollama"
default_model = "llava"
model_support_vision = true
```
Hành vi:
- `true`: bật xử lý hình ảnh đính kèm trong vòng lặp agent.
- `false`: tắt vision ngay cả khi provider báo hỗ trợ.
- Không đặt: dùng mặc định của provider.
Biến môi trường: `ZEROCLAW_MODEL_SUPPORT_VISION=true`
### Ghi chú về Kimi Code
- Provider ID: `kimi-code`

View File

@ -152,6 +152,25 @@ Behavior:
- `true`: sends `think: true`.
- Unset: omits `think` and keeps Ollama/model defaults.
### Ollama Vision Override
Some Ollama models support vision (e.g. `llava`, `llama3.2-vision`) while others do not.
Since ZeroClaw cannot auto-detect this, you can override it in `config.toml`:
```toml
default_provider = "ollama"
default_model = "llava"
model_support_vision = true
```
Behavior:
- `true`: enables image attachment processing in the agent loop.
- `false`: disables vision even if the provider reports support.
- Unset: uses the provider's built-in default.
Environment override: `ZEROCLAW_MODEL_SUPPORT_VISION=true`
### Kimi Code Notes
- Provider ID: `kimi-code`

View File

@ -25,6 +25,14 @@ Lệnh xuất schema:
| `default_provider` | `openrouter` | ID hoặc bí danh provider |
| `default_model` | `anthropic/claude-sonnet-4-6` | Model định tuyến qua provider đã chọn |
| `default_temperature` | `0.7` | Nhiệt độ model |
| `model_support_vision` | chưa đặt (`None`) | Ghi đè hỗ trợ vision cho provider/model đang dùng |
Lưu ý:
- `model_support_vision = true` bật vision (ví dụ Ollama chạy `llava`).
- `model_support_vision = false` tắt vision.
- Để trống giữ mặc định của provider.
- Biến môi trường: `ZEROCLAW_MODEL_SUPPORT_VISION` hoặc `MODEL_SUPPORT_VISION` (giá trị: `true`/`false`/`1`/`0`/`yes`/`no`/`on`/`off`).
## `[observability]`

View File

@ -94,6 +94,25 @@ Hành vi:
- `true`: gửi `think: true`.
- Không đặt: bỏ qua `think` và giữ nguyên mặc định của Ollama/model.
### Ghi đè Vision cho Ollama
Một số model Ollama hỗ trợ vision (ví dụ `llava`, `llama3.2-vision`) trong khi các model khác thì không.
Vì ZeroClaw không thể tự động phát hiện, bạn có thể ghi đè trong `config.toml`:
```toml
default_provider = "ollama"
default_model = "llava"
model_support_vision = true
```
Hành vi:
- `true`: bật xử lý hình ảnh đính kèm trong vòng lặp agent.
- `false`: tắt vision ngay cả khi provider báo hỗ trợ.
- Không đặt: dùng mặc định của provider.
Biến môi trường: `ZEROCLAW_MODEL_SUPPORT_VISION=true`
### Ghi chú về Kimi Code
- Provider ID: `kimi-code`

View File

@ -1137,6 +1137,7 @@ pub async fn run(
reasoning_enabled: config.runtime.reasoning_enabled,
custom_provider_api_mode: config.provider_api.map(|mode| mode.as_compatible_mode()),
max_tokens_override: None,
model_support_vision: config.model_support_vision,
};
let provider: Box<dyn Provider> = providers::create_routed_provider_with_options(
@ -1598,6 +1599,7 @@ pub async fn process_message(config: Config, message: &str) -> Result<String> {
reasoning_enabled: config.runtime.reasoning_enabled,
custom_provider_api_mode: config.provider_api.map(|mode| mode.as_compatible_mode()),
max_tokens_override: None,
model_support_vision: config.model_support_vision,
};
let provider: Box<dyn Provider> = providers::create_routed_provider_with_options(
provider_name,

View File

@ -413,6 +413,17 @@ fn channel_delivery_instructions(channel_name: &str) -> Option<&'static str> {
- Keep normal text outside markers and never wrap markers in code fences.\n\
- Use tool results silently: answer the latest user message directly, and do not narrate delayed/internal tool execution bookkeeping.",
),
"whatsapp" => Some(
"When responding on WhatsApp:\n\
- Use *bold* for emphasis (WhatsApp uses single asterisks).\n\
- Be concise. No markdown headers (## etc.) they don't render.\n\
- No markdown tables use bullet lists instead.\n\
- For sending images, documents, videos, or audio files use markers: [IMAGE:<absolute-path>], [DOCUMENT:<absolute-path>], [VIDEO:<absolute-path>], [AUDIO:<absolute-path>]\n\
- The path MUST be an absolute filesystem path to a local file (e.g. [IMAGE:/home/nicolas/.zeroclaw/workspace/images/chart.png]).\n\
- Keep normal text outside markers and never wrap markers in code fences.\n\
- You can combine text and media in one response text is sent first, then each attachment.\n\
- Use tool results silently: answer the latest user message directly, and do not narrate delayed/internal tool execution bookkeeping.",
),
_ => None,
}
}
@ -3047,6 +3058,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
reasoning_enabled: config.runtime.reasoning_enabled,
custom_provider_api_mode: config.provider_api.map(|mode| mode.as_compatible_mode()),
max_tokens_override: None,
model_support_vision: config.model_support_vision,
};
let provider: Arc<dyn Provider> = Arc::from(
create_resilient_provider_nonblocking(

View File

@ -34,6 +34,127 @@ use parking_lot::Mutex;
use std::sync::Arc;
use tokio::select;
// ── Media attachment support ──────────────────────────────────────────
/// Supported WhatsApp media attachment kinds.
#[cfg(feature = "whatsapp-web")]
#[derive(Debug, Clone, Copy)]
enum WaAttachmentKind {
Image,
Document,
Video,
Audio,
}
#[cfg(feature = "whatsapp-web")]
impl WaAttachmentKind {
/// Parse from the marker prefix (case-insensitive).
fn from_marker(s: &str) -> Option<Self> {
match s.to_ascii_uppercase().as_str() {
"IMAGE" => Some(Self::Image),
"DOCUMENT" => Some(Self::Document),
"VIDEO" => Some(Self::Video),
"AUDIO" => Some(Self::Audio),
_ => None,
}
}
/// Map to the wa-rs `MediaType` used for upload encryption.
fn media_type(self) -> wa_rs_core::download::MediaType {
match self {
Self::Image => wa_rs_core::download::MediaType::Image,
Self::Document => wa_rs_core::download::MediaType::Document,
Self::Video => wa_rs_core::download::MediaType::Video,
Self::Audio => wa_rs_core::download::MediaType::Audio,
}
}
}
/// A parsed media attachment from `[KIND:path]` markers in the response text.
#[cfg(feature = "whatsapp-web")]
#[derive(Debug, Clone)]
struct WaAttachment {
kind: WaAttachmentKind,
target: String,
}
/// Parse `[IMAGE:/path]`, `[DOCUMENT:/path]`, etc. markers out of a message.
/// Returns the cleaned text (markers removed) and a vec of attachments.
#[cfg(feature = "whatsapp-web")]
fn parse_wa_attachment_markers(message: &str) -> (String, Vec<WaAttachment>) {
let mut cleaned = String::with_capacity(message.len());
let mut attachments = Vec::new();
let mut cursor = 0;
while cursor < message.len() {
let Some(open_rel) = message[cursor..].find('[') else {
cleaned.push_str(&message[cursor..]);
break;
};
let open = cursor + open_rel;
cleaned.push_str(&message[cursor..open]);
let Some(close_rel) = message[open..].find(']') else {
cleaned.push_str(&message[open..]);
break;
};
let close = open + close_rel;
let marker = &message[open + 1..close];
let parsed = marker.split_once(':').and_then(|(kind, target)| {
let kind = WaAttachmentKind::from_marker(kind)?;
let target = target.trim();
if target.is_empty() {
return None;
}
Some(WaAttachment {
kind,
target: target.to_string(),
})
});
if let Some(attachment) = parsed {
attachments.push(attachment);
} else {
// Not a valid media marker — keep the original text.
cleaned.push_str(&message[open..=close]);
}
cursor = close + 1;
}
(cleaned.trim().to_string(), attachments)
}
/// Infer MIME type from file extension.
#[cfg(feature = "whatsapp-web")]
fn mime_from_path(path: &std::path::Path) -> &'static str {
match path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase()
.as_str()
{
"png" => "image/png",
"jpg" | "jpeg" => "image/jpeg",
"gif" => "image/gif",
"webp" => "image/webp",
"mp4" => "video/mp4",
"mov" => "video/quicktime",
"mp3" => "audio/mpeg",
"ogg" | "opus" => "audio/ogg",
"pdf" => "application/pdf",
"doc" => "application/msword",
"docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"xls" => "application/vnd.ms-excel",
"xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
_ => "application/octet-stream",
}
}
/// WhatsApp Web channel using wa-rs with custom rusqlite storage
///
/// # Status: Functional Implementation
@ -233,6 +354,108 @@ impl WhatsAppWebChannel {
Ok(wa_rs_binary::jid::Jid::pn(digits))
}
/// Upload a file to WhatsApp media servers and send it as the appropriate message type.
#[cfg(feature = "whatsapp-web")]
async fn send_media_attachment(
&self,
client: &Arc<wa_rs::Client>,
to: &wa_rs_binary::jid::Jid,
attachment: &WaAttachment,
) -> Result<()> {
use std::path::Path;
let path = Path::new(&attachment.target);
if !path.exists() {
anyhow::bail!("Media file not found: {}", attachment.target);
}
let data = tokio::fs::read(path).await?;
let file_len = data.len() as u64;
let mimetype = mime_from_path(path).to_string();
tracing::info!(
"WhatsApp Web: uploading {:?} ({} bytes, {})",
attachment.kind,
file_len,
mimetype
);
let upload = client.upload(data, attachment.kind.media_type()).await?;
let outgoing = match attachment.kind {
WaAttachmentKind::Image => wa_rs_proto::whatsapp::Message {
image_message: Some(Box::new(wa_rs_proto::whatsapp::message::ImageMessage {
url: Some(upload.url),
direct_path: Some(upload.direct_path),
media_key: Some(upload.media_key),
file_enc_sha256: Some(upload.file_enc_sha256),
file_sha256: Some(upload.file_sha256),
file_length: Some(upload.file_length),
mimetype: Some(mimetype),
..Default::default()
})),
..Default::default()
},
WaAttachmentKind::Document => {
let file_name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("file")
.to_string();
wa_rs_proto::whatsapp::Message {
document_message: Some(Box::new(
wa_rs_proto::whatsapp::message::DocumentMessage {
url: Some(upload.url),
direct_path: Some(upload.direct_path),
media_key: Some(upload.media_key),
file_enc_sha256: Some(upload.file_enc_sha256),
file_sha256: Some(upload.file_sha256),
file_length: Some(upload.file_length),
mimetype: Some(mimetype),
file_name: Some(file_name),
..Default::default()
},
)),
..Default::default()
}
}
WaAttachmentKind::Video => wa_rs_proto::whatsapp::Message {
video_message: Some(Box::new(wa_rs_proto::whatsapp::message::VideoMessage {
url: Some(upload.url),
direct_path: Some(upload.direct_path),
media_key: Some(upload.media_key),
file_enc_sha256: Some(upload.file_enc_sha256),
file_sha256: Some(upload.file_sha256),
file_length: Some(upload.file_length),
mimetype: Some(mimetype),
..Default::default()
})),
..Default::default()
},
WaAttachmentKind::Audio => wa_rs_proto::whatsapp::Message {
audio_message: Some(Box::new(wa_rs_proto::whatsapp::message::AudioMessage {
url: Some(upload.url),
direct_path: Some(upload.direct_path),
media_key: Some(upload.media_key),
file_enc_sha256: Some(upload.file_enc_sha256),
file_sha256: Some(upload.file_sha256),
file_length: Some(upload.file_length),
mimetype: Some(mimetype),
..Default::default()
})),
..Default::default()
},
};
let msg_id = client.send_message(to.clone(), outgoing).await?;
tracing::info!(
"WhatsApp Web: sent {:?} media (id: {})",
attachment.kind,
msg_id
);
Ok(())
}
}
#[cfg(feature = "whatsapp-web")]
@ -261,17 +484,59 @@ impl Channel for WhatsAppWebChannel {
}
let to = self.recipient_to_jid(&message.recipient)?;
let outgoing = wa_rs_proto::whatsapp::Message {
conversation: Some(message.content.clone()),
..Default::default()
};
let message_id = client.send_message(to, outgoing).await?;
tracing::debug!(
"WhatsApp Web: sent message to {} (id: {})",
message.recipient,
message_id
);
// Parse media attachment markers from the response text.
let (text_without_markers, attachments) = parse_wa_attachment_markers(&message.content);
// Send any text portion first.
if !text_without_markers.is_empty() {
let text_msg = wa_rs_proto::whatsapp::Message {
conversation: Some(text_without_markers.clone()),
..Default::default()
};
let msg_id = client.send_message(to.clone(), text_msg).await?;
tracing::debug!(
"WhatsApp Web: sent text to {} (id: {})",
message.recipient,
msg_id
);
}
// Send each media attachment.
for attachment in &attachments {
if let Err(e) = self.send_media_attachment(&client, &to, attachment).await {
tracing::error!(
"WhatsApp Web: failed to send {:?} attachment {}: {}",
attachment.kind,
attachment.target,
e
);
// Fall back to sending the path as text so the user knows something went wrong.
let fallback = wa_rs_proto::whatsapp::Message {
conversation: Some(format!("[Failed to send media: {}]", attachment.target)),
..Default::default()
};
let _ = client.send_message(to.clone(), fallback).await;
}
}
// If there were no markers and no text (shouldn't happen), send original content.
if attachments.is_empty()
&& text_without_markers.is_empty()
&& !message.content.trim().is_empty()
{
let outgoing = wa_rs_proto::whatsapp::Message {
conversation: Some(message.content.clone()),
..Default::default()
};
let message_id = client.send_message(to, outgoing).await?;
tracing::debug!(
"WhatsApp Web: sent message to {} (id: {})",
message.recipient,
message_id
);
}
Ok(())
}
@ -720,4 +985,44 @@ mod tests {
let ch = make_channel();
assert!(!ch.health_check().await);
}
#[test]
#[cfg(feature = "whatsapp-web")]
fn parse_wa_markers_image() {
let msg = "Here is the timeline [IMAGE:/tmp/chart.png]";
let (text, attachments) = parse_wa_attachment_markers(msg);
assert_eq!(text, "Here is the timeline");
assert_eq!(attachments.len(), 1);
assert_eq!(attachments[0].target, "/tmp/chart.png");
assert!(matches!(attachments[0].kind, WaAttachmentKind::Image));
}
#[test]
#[cfg(feature = "whatsapp-web")]
fn parse_wa_markers_multiple() {
let msg = "Text [IMAGE:/a.png] more [DOCUMENT:/b.pdf]";
let (text, attachments) = parse_wa_attachment_markers(msg);
assert_eq!(text, "Text more");
assert_eq!(attachments.len(), 2);
assert!(matches!(attachments[0].kind, WaAttachmentKind::Image));
assert!(matches!(attachments[1].kind, WaAttachmentKind::Document));
}
#[test]
#[cfg(feature = "whatsapp-web")]
fn parse_wa_markers_no_markers() {
let msg = "Just regular text";
let (text, attachments) = parse_wa_attachment_markers(msg);
assert_eq!(text, "Just regular text");
assert!(attachments.is_empty());
}
#[test]
#[cfg(feature = "whatsapp-web")]
fn parse_wa_markers_unknown_kind_preserved() {
let msg = "Check [UNKNOWN:/foo] out";
let (text, attachments) = parse_wa_attachment_markers(msg);
assert_eq!(text, "Check [UNKNOWN:/foo] out");
assert!(attachments.is_empty());
}
}

View File

@ -252,6 +252,13 @@ pub struct Config {
/// Inter-process agent communication (`[agents_ipc]`).
#[serde(default)]
pub agents_ipc: AgentsIpcConfig,
/// Vision support override for the active provider/model.
/// - `None` (default): use provider's built-in default
/// - `Some(true)`: force vision support on (e.g. Ollama running llava)
/// - `Some(false)`: force vision support off
#[serde(default)]
pub model_support_vision: Option<bool>,
}
/// Named provider profile definition compatible with Codex app-server style config.
@ -3888,6 +3895,7 @@ impl Default for Config {
query_classification: QueryClassificationConfig::default(),
transcription: TranscriptionConfig::default(),
agents_ipc: AgentsIpcConfig::default(),
model_support_vision: None,
}
}
}
@ -4808,6 +4816,18 @@ impl Config {
}
}
// Vision support override: ZEROCLAW_MODEL_SUPPORT_VISION or MODEL_SUPPORT_VISION
if let Ok(flag) = std::env::var("ZEROCLAW_MODEL_SUPPORT_VISION")
.or_else(|_| std::env::var("MODEL_SUPPORT_VISION"))
{
let normalized = flag.trim().to_ascii_lowercase();
match normalized.as_str() {
"1" | "true" | "yes" | "on" => self.model_support_vision = Some(true),
"0" | "false" | "no" | "off" => self.model_support_vision = Some(false),
_ => {}
}
}
// Web search enabled: ZEROCLAW_WEB_SEARCH_ENABLED or WEB_SEARCH_ENABLED
if let Ok(enabled) = std::env::var("ZEROCLAW_WEB_SEARCH_ENABLED")
.or_else(|_| std::env::var("WEB_SEARCH_ENABLED"))
@ -5437,6 +5457,7 @@ default_temperature = 0.7
hardware: HardwareConfig::default(),
transcription: TranscriptionConfig::default(),
agents_ipc: AgentsIpcConfig::default(),
model_support_vision: None,
};
let toml_str = toml::to_string_pretty(&config).unwrap();
@ -5528,6 +5549,24 @@ reasoning_enabled = false
assert_eq!(parsed.runtime.reasoning_enabled, Some(false));
}
#[test]
async fn model_support_vision_deserializes() {
let raw = r#"
default_temperature = 0.7
model_support_vision = true
"#;
let parsed: Config = toml::from_str(raw).unwrap();
assert_eq!(parsed.model_support_vision, Some(true));
// Default (omitted) should be None
let raw_no_vision = r#"
default_temperature = 0.7
"#;
let parsed2: Config = toml::from_str(raw_no_vision).unwrap();
assert_eq!(parsed2.model_support_vision, None);
}
#[test]
async fn agent_config_defaults() {
let cfg = AgentConfig::default();
@ -5622,6 +5661,7 @@ tool_dispatcher = "xml"
hardware: HardwareConfig::default(),
transcription: TranscriptionConfig::default(),
agents_ipc: AgentsIpcConfig::default(),
model_support_vision: None,
};
config.save().await.unwrap();
@ -7394,6 +7434,28 @@ default_model = "legacy-model"
std::env::remove_var("ZEROCLAW_REASONING_ENABLED");
}
#[test]
async fn env_override_model_support_vision() {
let _env_guard = env_override_lock().await;
let mut config = Config::default();
assert_eq!(config.model_support_vision, None);
std::env::set_var("ZEROCLAW_MODEL_SUPPORT_VISION", "true");
config.apply_env_overrides();
assert_eq!(config.model_support_vision, Some(true));
std::env::set_var("ZEROCLAW_MODEL_SUPPORT_VISION", "false");
config.apply_env_overrides();
assert_eq!(config.model_support_vision, Some(false));
std::env::set_var("ZEROCLAW_MODEL_SUPPORT_VISION", "maybe");
config.model_support_vision = Some(true);
config.apply_env_overrides();
assert_eq!(config.model_support_vision, Some(true));
std::env::remove_var("ZEROCLAW_MODEL_SUPPORT_VISION");
}
#[test]
async fn env_override_invalid_port_ignored() {
let _env_guard = env_override_lock().await;

View File

@ -365,6 +365,7 @@ pub async fn run_gateway(host: &str, port: u16, config: Config) -> Result<()> {
reasoning_enabled: config.runtime.reasoning_enabled,
custom_provider_api_mode: config.provider_api.map(|mode| mode.as_compatible_mode()),
max_tokens_override: None,
model_support_vision: config.model_support_vision,
},
)?);
let model = config

View File

@ -176,6 +176,7 @@ pub async fn run_wizard(force: bool) -> Result<Config> {
query_classification: crate::config::QueryClassificationConfig::default(),
transcription: crate::config::TranscriptionConfig::default(),
agents_ipc: crate::config::AgentsIpcConfig::default(),
model_support_vision: None,
};
println!(
@ -530,6 +531,7 @@ async fn run_quick_setup_with_home(
query_classification: crate::config::QueryClassificationConfig::default(),
transcription: crate::config::TranscriptionConfig::default(),
agents_ipc: crate::config::AgentsIpcConfig::default(),
model_support_vision: None,
};
config.save().await?;

View File

@ -1,6 +1,6 @@
use crate::providers::traits::{
ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse,
Provider, TokenUsage, ToolCall as ProviderToolCall,
Provider, ProviderCapabilities, TokenUsage, ToolCall as ProviderToolCall,
};
use crate::tools::ToolSpec;
use async_trait::async_trait;
@ -468,13 +468,6 @@ impl AnthropicProvider {
#[async_trait]
impl Provider for AnthropicProvider {
fn capabilities(&self) -> crate::providers::traits::ProviderCapabilities {
crate::providers::traits::ProviderCapabilities {
vision: true,
native_tool_calling: true,
}
}
async fn chat_with_system(
&self,
system_prompt: Option<&str>,
@ -566,6 +559,13 @@ impl Provider for AnthropicProvider {
true
}
fn capabilities(&self) -> ProviderCapabilities {
ProviderCapabilities {
native_tool_calling: true,
vision: true,
}
}
async fn chat_with_tools(
&self,
messages: &[ChatMessage],

View File

@ -678,6 +678,7 @@ pub struct ProviderRuntimeOptions {
pub reasoning_enabled: Option<bool>,
pub custom_provider_api_mode: Option<CompatibleApiMode>,
pub max_tokens_override: Option<u32>,
pub model_support_vision: Option<bool>,
}
impl Default for ProviderRuntimeOptions {
@ -690,6 +691,7 @@ impl Default for ProviderRuntimeOptions {
reasoning_enabled: None,
custom_provider_api_mode: None,
max_tokens_override: None,
model_support_vision: None,
}
}
}
@ -1355,7 +1357,8 @@ pub fn create_resilient_provider_with_options(
reliability.provider_backoff_ms,
)
.with_api_keys(reliability.api_keys.clone())
.with_model_fallbacks(reliability.model_fallbacks.clone());
.with_model_fallbacks(reliability.model_fallbacks.clone())
.with_vision_override(options.model_support_vision);
Ok(Box::new(reliable))
}
@ -1427,8 +1430,7 @@ pub fn create_routed_provider_with_options(
.then_some(api_url)
.flatten();
let mut route_options = options.clone();
route_options.max_tokens_override = route.max_tokens;
let route_options = options.clone();
match create_resilient_provider_with_options(
&route.provider,
@ -1458,11 +1460,24 @@ pub fn create_routed_provider_with_options(
}
}
Ok(Box::new(router::RouterProvider::new(
providers,
routes,
default_model.to_string(),
)))
// Build route table
let routes: Vec<(String, router::Route)> = model_routes
.iter()
.map(|r| {
(
r.hint.clone(),
router::Route {
provider_name: r.provider.clone(),
model: r.model.clone(),
},
)
})
.collect();
Ok(Box::new(
router::RouterProvider::new(providers, routes, default_model.to_string())
.with_vision_override(options.model_support_vision),
))
}
/// Information about a supported provider for display purposes.

View File

@ -1034,6 +1034,7 @@ data: [DONE]
reasoning_enabled: None,
custom_provider_api_mode: None,
max_tokens_override: None,
model_support_vision: None,
};
let provider =
OpenAiCodexProvider::new(&options, None).expect("provider should initialize");

View File

@ -233,6 +233,8 @@ pub struct ReliableProvider {
model_fallbacks: HashMap<String, Vec<String>>,
/// Provider-scoped model remaps: provider_name → [model_1, model_2, ...]
provider_model_fallbacks: HashMap<String, Vec<String>>,
/// Vision support override from config (`None` = defer to provider).
vision_override: Option<bool>,
}
impl ReliableProvider {
@ -249,6 +251,7 @@ impl ReliableProvider {
key_index: AtomicUsize::new(0),
model_fallbacks: HashMap::new(),
provider_model_fallbacks: HashMap::new(),
vision_override: None,
}
}
@ -279,6 +282,12 @@ impl ReliableProvider {
self
}
/// Set vision support override from runtime config.
pub fn with_vision_override(mut self, vision_override: Option<bool>) -> Self {
self.vision_override = vision_override;
self
}
/// Build the list of models to try: [original, fallback1, fallback2, ...]
fn model_chain<'a>(&'a self, model: &'a str) -> Vec<&'a str> {
let mut chain = vec![model];
@ -605,9 +614,11 @@ impl Provider for ReliableProvider {
}
fn supports_vision(&self) -> bool {
self.providers
.iter()
.any(|(_, provider)| provider.supports_vision())
self.vision_override.unwrap_or_else(|| {
self.providers
.iter()
.any(|(_, provider)| provider.supports_vision())
})
}
async fn chat_with_tools(
@ -2105,4 +2116,68 @@ mod tests {
assert_eq!(primary_calls.load(Ordering::SeqCst), 1);
assert_eq!(fallback_calls.load(Ordering::SeqCst), 1);
}
#[test]
fn vision_override_forces_true() {
let calls = Arc::new(AtomicUsize::new(0));
let provider = ReliableProvider::new(
vec![(
"primary".into(),
Box::new(MockProvider {
calls: Arc::clone(&calls),
fail_until_attempt: 0,
response: "ok",
error: "",
}) as Box<dyn Provider>,
)],
1,
100,
)
.with_vision_override(Some(true));
// MockProvider default capabilities → vision: false
// Override should force true
assert!(provider.supports_vision());
}
#[test]
fn vision_override_forces_false() {
let calls = Arc::new(AtomicUsize::new(0));
let provider = ReliableProvider::new(
vec![(
"primary".into(),
Box::new(MockProvider {
calls: Arc::clone(&calls),
fail_until_attempt: 0,
response: "ok",
error: "",
}) as Box<dyn Provider>,
)],
1,
100,
)
.with_vision_override(Some(false));
assert!(!provider.supports_vision());
}
#[test]
fn vision_override_none_defers_to_provider() {
let calls = Arc::new(AtomicUsize::new(0));
let provider = ReliableProvider::new(
vec![(
"primary".into(),
Box::new(MockProvider {
calls: Arc::clone(&calls),
fail_until_attempt: 0,
response: "ok",
error: "",
}) as Box<dyn Provider>,
)],
1,
100,
);
// No override set → should defer to provider default (false)
assert!(!provider.supports_vision());
}
}

View File

@ -23,6 +23,8 @@ pub struct RouterProvider {
providers: Vec<(String, Box<dyn Provider>)>,
default_index: usize,
default_model: String,
/// Vision support override from config (`None` = defer to providers).
vision_override: Option<bool>,
}
impl RouterProvider {
@ -66,9 +68,16 @@ impl RouterProvider {
providers,
default_index: 0,
default_model,
vision_override: None,
}
}
/// Set vision support override from runtime config.
pub fn with_vision_override(mut self, vision_override: Option<bool>) -> Self {
self.vision_override = vision_override;
self
}
/// Resolve a model parameter to a (provider, actual_model) pair.
///
/// If the model starts with "hint:", look up the hint in the route table.
@ -159,9 +168,11 @@ impl Provider for RouterProvider {
}
fn supports_vision(&self) -> bool {
self.providers
.iter()
.any(|(_, provider)| provider.supports_vision())
self.vision_override.unwrap_or_else(|| {
self.providers
.iter()
.any(|(_, provider)| provider.supports_vision())
})
}
async fn warmup(&self) -> anyhow::Result<()> {

View File

@ -364,6 +364,7 @@ pub fn all_tools_with_runtime(
.provider_api
.map(|mode| mode.as_compatible_mode()),
max_tokens_override: None,
model_support_vision: root_config.model_support_vision,
},
)
.with_parent_tools(parent_tools)

View File

@ -153,6 +153,7 @@ async fn openai_codex_second_vision_support() -> Result<()> {
reasoning_enabled: None,
custom_provider_api_mode: None,
max_tokens_override: None,
model_support_vision: None,
};
let provider = zeroclaw::providers::create_provider_with_options("openai-codex", None, &opts)?;