feat(i18n): externalize tool descriptions for translation (#3912)

Add a locale-aware tool description system that loads translations from
TOML files in tool_descriptions/. This enables non-English users to see
tool descriptions in their language.

- Add src/i18n.rs module with ToolDescriptions loader, locale detection
  (ZEROCLAW_LOCALE, LANG, LC_ALL env vars), and English fallback chain
- Add locale config field to Config struct for explicit locale override
- Create tool_descriptions/en.toml with all 47 tool descriptions
- Create tool_descriptions/zh-CN.toml with Chinese translations
- Integrate with ToolsSection::build() and build_tool_instructions()
  to resolve descriptions from locale files before hardcoded fallback
- Add PromptContext.tool_descriptions field for prompt-time resolution
- Add AgentBuilder.tool_descriptions() setter for Agent construction
- Include tool_descriptions/ in Cargo.toml package include list
- Add 8 unit tests covering locale loading, fallback chains, env
  detection, and config override

Closes #3901
This commit is contained in:
Argenis
2026-03-18 17:01:39 -04:00
committed by Roman Tataurov
parent 3430f9bf1a
commit 81d99f513c
12 changed files with 522 additions and 8 deletions
+11
View File
@@ -4,6 +4,7 @@ use crate::agent::dispatcher::{
use crate::agent::memory_loader::{DefaultMemoryLoader, MemoryLoader};
use crate::agent::prompt::{PromptContext, SystemPromptBuilder};
use crate::config::Config;
use crate::i18n::ToolDescriptions;
use crate::memory::{self, Memory, MemoryCategory};
use crate::observability::{self, Observer, ObserverEvent};
use crate::providers::{self, ChatMessage, ChatRequest, ConversationMessage, Provider};
@@ -40,6 +41,7 @@ pub struct Agent {
route_model_by_hint: HashMap<String, String>,
allowed_tools: Option<Vec<String>>,
response_cache: Option<Arc<crate::memory::response_cache::ResponseCache>>,
tool_descriptions: Option<ToolDescriptions>,
}
pub struct AgentBuilder {
@@ -64,6 +66,7 @@ pub struct AgentBuilder {
route_model_by_hint: Option<HashMap<String, String>>,
allowed_tools: Option<Vec<String>>,
response_cache: Option<Arc<crate::memory::response_cache::ResponseCache>>,
tool_descriptions: Option<ToolDescriptions>,
}
impl AgentBuilder {
@@ -90,6 +93,7 @@ impl AgentBuilder {
route_model_by_hint: None,
allowed_tools: None,
response_cache: None,
tool_descriptions: None,
}
}
@@ -207,6 +211,11 @@ impl AgentBuilder {
self
}
pub fn tool_descriptions(mut self, tool_descriptions: Option<ToolDescriptions>) -> Self {
self.tool_descriptions = tool_descriptions;
self
}
pub fn build(self) -> Result<Agent> {
let mut tools = self
.tools
@@ -257,6 +266,7 @@ impl AgentBuilder {
route_model_by_hint: self.route_model_by_hint.unwrap_or_default(),
allowed_tools: allowed,
response_cache: self.response_cache,
tool_descriptions: self.tool_descriptions,
})
}
}
@@ -456,6 +466,7 @@ impl Agent {
skills_prompt_mode: self.skills_prompt_mode,
identity_config: Some(&self.identity_config),
dispatcher_instructions: &instructions,
tool_descriptions: self.tool_descriptions.as_ref(),
};
self.prompt_builder.build(&ctx)
}
+32 -5
View File
@@ -1,5 +1,6 @@
use crate::approval::{ApprovalManager, ApprovalRequest, ApprovalResponse};
use crate::config::Config;
use crate::i18n::ToolDescriptions;
use crate::memory::{self, Memory, MemoryCategory};
use crate::multimodal;
use crate::observability::{self, runtime_trace, Observer, ObserverEvent};
@@ -3078,7 +3079,10 @@ pub(crate) async fn run_tool_call_loop(
/// Build the tool instruction block for the system prompt so the LLM knows
/// how to invoke tools.
pub(crate) fn build_tool_instructions(tools_registry: &[Box<dyn Tool>]) -> String {
pub(crate) fn build_tool_instructions(
tools_registry: &[Box<dyn Tool>],
tool_descriptions: Option<&ToolDescriptions>,
) -> String {
let mut instructions = String::new();
instructions.push_str("\n## Tool Use Protocol\n\n");
instructions.push_str("To use a tool, wrap a JSON object in <tool_call></tool_call> tags:\n\n");
@@ -3094,11 +3098,14 @@ pub(crate) fn build_tool_instructions(tools_registry: &[Box<dyn Tool>]) -> Strin
instructions.push_str("### Available Tools\n\n");
for tool in tools_registry {
let desc = tool_descriptions
.and_then(|td| td.get(tool.name()))
.unwrap_or_else(|| tool.description());
let _ = writeln!(
instructions,
"**{}**: {}\nParameters: `{}`\n",
tool.name(),
tool.description(),
desc,
tool.parameters_schema()
);
}
@@ -3324,6 +3331,16 @@ pub async fn run(
.map(|b| b.board.clone())
.collect();
// ── Load locale-aware tool descriptions ────────────────────────
let i18n_locale = config
.locale
.as_deref()
.filter(|s| !s.is_empty())
.map(ToString::to_string)
.unwrap_or_else(crate::i18n::detect_locale);
let i18n_search_dirs = crate::i18n::default_search_dirs(&config.workspace_dir);
let i18n_descs = crate::i18n::ToolDescriptions::load(&i18n_locale, &i18n_search_dirs);
// ── Build system prompt from workspace MD files (OpenClaw framework) ──
let skills = crate::skills::load_skills_with_config(&config.workspace_dir, &config);
let mut tool_descs: Vec<(&str, &str)> = vec![
@@ -3453,7 +3470,7 @@ pub async fn run(
// Append structured tool-use instructions with schemas (only for non-native providers)
if !native_tools {
system_prompt.push_str(&build_tool_instructions(&tools_registry));
system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs)));
}
// Append deferred MCP tool names so the LLM knows what is available
@@ -3989,6 +4006,16 @@ pub async fn process_message(
.map(|b| b.board.clone())
.collect();
// ── Load locale-aware tool descriptions ────────────────────────
let i18n_locale = config
.locale
.as_deref()
.filter(|s| !s.is_empty())
.map(ToString::to_string)
.unwrap_or_else(crate::i18n::detect_locale);
let i18n_search_dirs = crate::i18n::default_search_dirs(&config.workspace_dir);
let i18n_descs = crate::i18n::ToolDescriptions::load(&i18n_locale, &i18n_search_dirs);
let skills = crate::skills::load_skills_with_config(&config.workspace_dir, &config);
let mut tool_descs: Vec<(&str, &str)> = vec![
("shell", "Execute terminal commands."),
@@ -4054,7 +4081,7 @@ pub async fn process_message(
config.skills.prompt_injection_mode,
);
if !native_tools {
system_prompt.push_str(&build_tool_instructions(&tools_registry));
system_prompt.push_str(&build_tool_instructions(&tools_registry, Some(&i18n_descs)));
}
if !deferred_section.is_empty() {
system_prompt.push('\n');
@@ -5764,7 +5791,7 @@ Tail"#;
std::path::Path::new("/tmp"),
));
let tools = tools::default_tools(security);
let instructions = build_tool_instructions(&tools);
let instructions = build_tool_instructions(&tools, None);
assert!(instructions.contains("## Tool Use Protocol"));
assert!(instructions.contains("<tool_call>"));
+15 -1
View File
@@ -1,4 +1,5 @@
use crate::config::IdentityConfig;
use crate::i18n::ToolDescriptions;
use crate::identity;
use crate::skills::Skill;
use crate::tools::Tool;
@@ -17,6 +18,9 @@ pub struct PromptContext<'a> {
pub skills_prompt_mode: crate::config::SkillsPromptInjectionMode,
pub identity_config: Option<&'a IdentityConfig>,
pub dispatcher_instructions: &'a str,
/// Locale-aware tool descriptions. When present, tool descriptions in
/// prompts are resolved from the locale file instead of hardcoded values.
pub tool_descriptions: Option<&'a ToolDescriptions>,
}
pub trait PromptSection: Send + Sync {
@@ -124,11 +128,15 @@ impl PromptSection for ToolsSection {
fn build(&self, ctx: &PromptContext<'_>) -> Result<String> {
let mut out = String::from("## Tools\n\n");
for tool in ctx.tools {
let desc = ctx
.tool_descriptions
.and_then(|td: &ToolDescriptions| td.get(tool.name()))
.unwrap_or_else(|| tool.description());
let _ = writeln!(
out,
"- **{}**: {}\n Parameters: `{}`",
tool.name(),
tool.description(),
desc,
tool.parameters_schema()
);
}
@@ -317,6 +325,7 @@ mod tests {
skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
identity_config: Some(&identity_config),
dispatcher_instructions: "",
tool_descriptions: None,
};
let section = IdentitySection;
@@ -345,6 +354,7 @@ mod tests {
skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
identity_config: None,
dispatcher_instructions: "instr",
tool_descriptions: None,
};
let prompt = SystemPromptBuilder::with_defaults().build(&ctx).unwrap();
assert!(prompt.contains("## Tools"));
@@ -380,6 +390,7 @@ mod tests {
skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
identity_config: None,
dispatcher_instructions: "",
tool_descriptions: None,
};
let output = SkillsSection.build(&ctx).unwrap();
@@ -418,6 +429,7 @@ mod tests {
skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Compact,
identity_config: None,
dispatcher_instructions: "",
tool_descriptions: None,
};
let output = SkillsSection.build(&ctx).unwrap();
@@ -439,6 +451,7 @@ mod tests {
skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
identity_config: None,
dispatcher_instructions: "instr",
tool_descriptions: None,
};
let rendered = DateTimeSection.build(&ctx).unwrap();
@@ -477,6 +490,7 @@ mod tests {
skills_prompt_mode: crate::config::SkillsPromptInjectionMode::Full,
identity_config: None,
dispatcher_instructions: "",
tool_descriptions: None,
};
let prompt = SystemPromptBuilder::with_defaults().build(&ctx).unwrap();
+15 -2
View File
@@ -3939,6 +3939,16 @@ pub async fn start_channels(config: Config) -> Result<()> {
let skills = crate::skills::load_skills_with_config(&workspace, &config);
// ── Load locale-aware tool descriptions ────────────────────────
let i18n_locale = config
.locale
.as_deref()
.filter(|s| !s.is_empty())
.map(ToString::to_string)
.unwrap_or_else(crate::i18n::detect_locale);
let i18n_search_dirs = crate::i18n::default_search_dirs(&workspace);
let i18n_descs = crate::i18n::ToolDescriptions::load(&i18n_locale, &i18n_search_dirs);
// Collect tool descriptions for the prompt
let mut tool_descs: Vec<(&str, &str)> = vec![
(
@@ -4018,7 +4028,10 @@ pub async fn start_channels(config: Config) -> Result<()> {
config.skills.prompt_injection_mode,
);
if !native_tools {
system_prompt.push_str(&build_tool_instructions(tools_registry.as_ref()));
system_prompt.push_str(&build_tool_instructions(
tools_registry.as_ref(),
Some(&i18n_descs),
));
}
// Append deferred MCP tool names so the LLM knows what is available
@@ -6768,7 +6781,7 @@ BTC is currently around $65,000 based on latest tool output."#
"build_system_prompt should not emit protocol block directly"
);
prompt.push_str(&build_tool_instructions(&[]));
prompt.push_str(&build_tool_instructions(&[], None));
assert_eq!(
prompt.matches("## Tool Use Protocol").count(),
+14
View File
@@ -339,6 +339,17 @@ pub struct Config {
/// Plugin system configuration (`[plugins]`).
#[serde(default)]
pub plugins: PluginsConfig,
/// Locale for tool descriptions (e.g. `"en"`, `"zh-CN"`).
///
/// When set, tool descriptions shown in system prompts are loaded from
/// `tool_descriptions/<locale>.toml`. Falls back to English, then to
/// hardcoded descriptions.
///
/// If omitted or empty, the locale is auto-detected from `ZEROCLAW_LOCALE`,
/// `LANG`, or `LC_ALL` environment variables (defaulting to `"en"`).
#[serde(default)]
pub locale: Option<String>,
}
/// Multi-client workspace isolation configuration.
@@ -5996,6 +6007,7 @@ impl Default for Config {
knowledge: KnowledgeConfig::default(),
linkedin: LinkedInConfig::default(),
plugins: PluginsConfig::default(),
locale: None,
}
}
}
@@ -8433,6 +8445,7 @@ default_temperature = 0.7
knowledge: KnowledgeConfig::default(),
linkedin: LinkedInConfig::default(),
plugins: PluginsConfig::default(),
locale: None,
};
let toml_str = toml::to_string_pretty(&config).unwrap();
@@ -8766,6 +8779,7 @@ tool_dispatcher = "xml"
knowledge: KnowledgeConfig::default(),
linkedin: LinkedInConfig::default(),
plugins: PluginsConfig::default(),
locale: None,
};
config.save().await.unwrap();
+311
View File
@@ -0,0 +1,311 @@
//! Internationalization support for tool descriptions.
//!
//! Loads tool descriptions from TOML locale files in `tool_descriptions/`.
//! Falls back to English when a locale file or specific key is missing,
//! and ultimately falls back to the hardcoded `tool.description()` value
//! if no file-based description exists.
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use tracing::debug;
/// Container for locale-specific tool descriptions loaded from TOML files.
#[derive(Debug, Clone)]
pub struct ToolDescriptions {
/// Descriptions from the requested locale (may be empty if file missing).
locale_descriptions: HashMap<String, String>,
/// English fallback descriptions (always loaded when locale != "en").
english_fallback: HashMap<String, String>,
/// The resolved locale tag (e.g. "en", "zh-CN").
locale: String,
}
/// TOML structure: `[tools]` table mapping tool name -> description string.
#[derive(Debug, serde::Deserialize)]
struct DescriptionFile {
#[serde(default)]
tools: HashMap<String, String>,
}
impl ToolDescriptions {
/// Load descriptions for the given locale.
///
/// `search_dirs` lists directories to probe for `tool_descriptions/<locale>.toml`.
/// The first directory containing a matching file wins.
///
/// Resolution:
/// 1. Look up tool name in the locale file.
/// 2. If missing (or locale file absent), look up in `en.toml`.
/// 3. If still missing, callers fall back to `tool.description()`.
pub fn load(locale: &str, search_dirs: &[PathBuf]) -> Self {
let locale_descriptions = load_locale_file(locale, search_dirs);
let english_fallback = if locale == "en" {
HashMap::new()
} else {
load_locale_file("en", search_dirs)
};
debug!(
locale = locale,
locale_keys = locale_descriptions.len(),
english_keys = english_fallback.len(),
"tool descriptions loaded"
);
Self {
locale_descriptions,
english_fallback,
locale: locale.to_string(),
}
}
/// Get the description for a tool by name.
///
/// Returns `Some(description)` if found in the locale file or English fallback.
/// Returns `None` if neither file contains the key (caller should use hardcoded).
pub fn get(&self, tool_name: &str) -> Option<&str> {
self.locale_descriptions
.get(tool_name)
.or_else(|| self.english_fallback.get(tool_name))
.map(String::as_str)
}
/// The resolved locale tag.
pub fn locale(&self) -> &str {
&self.locale
}
/// Create an empty instance that always returns `None` (hardcoded fallback).
pub fn empty() -> Self {
Self {
locale_descriptions: HashMap::new(),
english_fallback: HashMap::new(),
locale: "en".to_string(),
}
}
}
/// Detect the user's preferred locale from environment variables.
///
/// Checks `ZEROCLAW_LOCALE`, then `LANG`, then `LC_ALL`.
/// Returns "en" if none are set or parseable.
pub fn detect_locale() -> String {
if let Ok(val) = std::env::var("ZEROCLAW_LOCALE") {
let val = val.trim().to_string();
if !val.is_empty() {
return normalize_locale(&val);
}
}
for var in &["LANG", "LC_ALL"] {
if let Ok(val) = std::env::var(var) {
let locale = normalize_locale(&val);
if locale != "C" && locale != "POSIX" && !locale.is_empty() {
return locale;
}
}
}
"en".to_string()
}
/// Normalize a raw locale string (e.g. "zh_CN.UTF-8") to a tag we use
/// for file lookup (e.g. "zh-CN").
fn normalize_locale(raw: &str) -> String {
// Strip encoding suffix (.UTF-8, .utf8, etc.)
let base = raw.split('.').next().unwrap_or(raw);
// Replace underscores with hyphens for BCP-47-ish consistency
base.replace('_', "-")
}
/// Build the default set of search directories for locale files.
///
/// 1. The workspace directory itself (for project-local overrides).
/// 2. The binary's parent directory (for installed distributions).
/// 3. The compile-time `CARGO_MANIFEST_DIR` as a final fallback during dev.
pub fn default_search_dirs(workspace_dir: &Path) -> Vec<PathBuf> {
let mut dirs = vec![workspace_dir.to_path_buf()];
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
dirs.push(parent.to_path_buf());
}
}
// During development, also check the project root (where Cargo.toml lives).
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
if !dirs.contains(&manifest_dir) {
dirs.push(manifest_dir);
}
dirs
}
/// Try to load and parse a locale TOML file from the first matching search dir.
fn load_locale_file(locale: &str, search_dirs: &[PathBuf]) -> HashMap<String, String> {
let filename = format!("tool_descriptions/{locale}.toml");
for dir in search_dirs {
let path = dir.join(&filename);
match std::fs::read_to_string(&path) {
Ok(contents) => match toml::from_str::<DescriptionFile>(&contents) {
Ok(parsed) => {
debug!(path = %path.display(), keys = parsed.tools.len(), "loaded locale file");
return parsed.tools;
}
Err(e) => {
debug!(path = %path.display(), error = %e, "failed to parse locale file");
}
},
Err(_) => {
// File not found in this directory, try next.
}
}
}
debug!(
locale = locale,
"no locale file found in any search directory"
);
HashMap::new()
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
/// Helper: create a temp dir with a `tool_descriptions/<locale>.toml` file.
fn write_locale_file(dir: &Path, locale: &str, content: &str) {
let td = dir.join("tool_descriptions");
fs::create_dir_all(&td).unwrap();
fs::write(td.join(format!("{locale}.toml")), content).unwrap();
}
#[test]
fn load_english_descriptions() {
let tmp = tempfile::tempdir().unwrap();
write_locale_file(
tmp.path(),
"en",
r#"[tools]
shell = "Execute a shell command"
file_read = "Read file contents"
"#,
);
let descs = ToolDescriptions::load("en", &[tmp.path().to_path_buf()]);
assert_eq!(descs.get("shell"), Some("Execute a shell command"));
assert_eq!(descs.get("file_read"), Some("Read file contents"));
assert_eq!(descs.get("nonexistent"), None);
assert_eq!(descs.locale(), "en");
}
#[test]
fn fallback_to_english_when_locale_key_missing() {
let tmp = tempfile::tempdir().unwrap();
write_locale_file(
tmp.path(),
"en",
r#"[tools]
shell = "Execute a shell command"
file_read = "Read file contents"
"#,
);
write_locale_file(
tmp.path(),
"zh-CN",
r#"[tools]
shell = "在工作区目录中执行 shell 命令"
"#,
);
let descs = ToolDescriptions::load("zh-CN", &[tmp.path().to_path_buf()]);
// Translated key returns Chinese.
assert_eq!(descs.get("shell"), Some("在工作区目录中执行 shell 命令"));
// Missing key falls back to English.
assert_eq!(descs.get("file_read"), Some("Read file contents"));
assert_eq!(descs.locale(), "zh-CN");
}
#[test]
fn fallback_when_locale_file_missing() {
let tmp = tempfile::tempdir().unwrap();
write_locale_file(
tmp.path(),
"en",
r#"[tools]
shell = "Execute a shell command"
"#,
);
// Request a locale that has no file.
let descs = ToolDescriptions::load("fr", &[tmp.path().to_path_buf()]);
// Falls back to English.
assert_eq!(descs.get("shell"), Some("Execute a shell command"));
assert_eq!(descs.locale(), "fr");
}
#[test]
fn fallback_when_no_files_exist() {
let tmp = tempfile::tempdir().unwrap();
let descs = ToolDescriptions::load("en", &[tmp.path().to_path_buf()]);
assert_eq!(descs.get("shell"), None);
}
#[test]
fn empty_always_returns_none() {
let descs = ToolDescriptions::empty();
assert_eq!(descs.get("shell"), None);
assert_eq!(descs.locale(), "en");
}
#[test]
fn detect_locale_from_env() {
// Save and restore env.
let saved = std::env::var("ZEROCLAW_LOCALE").ok();
let saved_lang = std::env::var("LANG").ok();
std::env::set_var("ZEROCLAW_LOCALE", "ja-JP");
assert_eq!(detect_locale(), "ja-JP");
std::env::remove_var("ZEROCLAW_LOCALE");
std::env::set_var("LANG", "zh_CN.UTF-8");
assert_eq!(detect_locale(), "zh-CN");
// Restore.
match saved {
Some(v) => std::env::set_var("ZEROCLAW_LOCALE", v),
None => std::env::remove_var("ZEROCLAW_LOCALE"),
}
match saved_lang {
Some(v) => std::env::set_var("LANG", v),
None => std::env::remove_var("LANG"),
}
}
#[test]
fn normalize_locale_strips_encoding() {
assert_eq!(normalize_locale("en_US.UTF-8"), "en-US");
assert_eq!(normalize_locale("zh_CN.utf8"), "zh-CN");
assert_eq!(normalize_locale("fr"), "fr");
assert_eq!(normalize_locale("pt_BR"), "pt-BR");
}
#[test]
fn config_locale_overrides_env() {
// This tests the precedence logic: if config provides a locale,
// it should be used instead of detect_locale().
// The actual override happens at the call site in prompt.rs / loop_.rs,
// so here we just verify ToolDescriptions works with an explicit locale.
let tmp = tempfile::tempdir().unwrap();
write_locale_file(
tmp.path(),
"de",
r#"[tools]
shell = "Einen Shell-Befehl im Arbeitsverzeichnis ausführen"
"#,
);
let descs = ToolDescriptions::load("de", &[tmp.path().to_path_buf()]);
assert_eq!(
descs.get("shell"),
Some("Einen Shell-Befehl im Arbeitsverzeichnis ausführen")
);
}
}
+1
View File
@@ -54,6 +54,7 @@ pub(crate) mod hardware;
pub(crate) mod health;
pub(crate) mod heartbeat;
pub mod hooks;
pub mod i18n;
pub(crate) mod identity;
pub(crate) mod integrations;
pub mod memory;
+1
View File
@@ -89,6 +89,7 @@ mod hardware;
mod health;
mod heartbeat;
mod hooks;
mod i18n;
mod identity;
mod integrations;
mod memory;
+2
View File
@@ -193,6 +193,7 @@ pub async fn run_wizard(force: bool) -> Result<Config> {
knowledge: crate::config::KnowledgeConfig::default(),
linkedin: crate::config::LinkedInConfig::default(),
plugins: crate::config::PluginsConfig::default(),
locale: None,
};
println!(
@@ -567,6 +568,7 @@ async fn run_quick_setup_with_home(
knowledge: crate::config::KnowledgeConfig::default(),
linkedin: crate::config::LinkedInConfig::default(),
plugins: crate::config::PluginsConfig::default(),
locale: None,
};
config.save().await?;