fix(security): harden Microsoft 365 token cache and OAuth URL construction

- Restrict token cache file permissions to 0o600 on Unix to prevent
  other local users from reading cached OAuth access/refresh tokens
- Validate tenant_id format (alphanumeric, hyphens, dots) in config
  validation to prevent URL injection in OAuth endpoint construction
- Add custom Debug impl for CachedTokenState that redacts tokens
- Replace unstable DefaultHasher with deterministic fingerprint for
  cache filenames to prevent orphaned files on Rust toolchain updates
- Fix OneDrive path encoding to preserve '/' separators (encode each
  segment individually) so subdirectory traversal works correctly
- Check Content-Length header before buffering download response to
  prevent OOM on oversized files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
simianastronaut 2026-03-15 16:29:04 -04:00
parent 97b9739320
commit 8e0f87fa3d
3 changed files with 78 additions and 12 deletions

View File

@ -5322,6 +5322,20 @@ impl Config {
"microsoft365.tenant_id must not be empty when microsoft365 is enabled"
);
}
// Validate tenant_id format to prevent URL injection in OAuth
// endpoints. Azure AD tenant IDs are either UUIDs or verified
// domain names (alphanumeric, hyphens, dots).
if let Some(tid) = tenant {
if !tid
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '.')
{
anyhow::bail!(
"microsoft365.tenant_id contains invalid characters; \
expected UUID or domain format (alphanumeric, hyphens, dots)"
);
}
}
let client = self
.microsoft365
.client_id

View File

@ -1,13 +1,12 @@
use anyhow::Context;
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::fmt;
use std::path::PathBuf;
use tokio::sync::Mutex;
/// Cached OAuth2 token state persisted to disk between runs.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Clone, Serialize, Deserialize)]
pub struct CachedTokenState {
pub access_token: String,
pub refresh_token: Option<String>,
@ -15,6 +14,16 @@ pub struct CachedTokenState {
pub expires_at: i64,
}
impl fmt::Debug for CachedTokenState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CachedTokenState")
.field("access_token", &"***")
.field("refresh_token", &self.refresh_token.as_ref().map(|_| "***"))
.field("expires_at", &self.expires_at)
.finish()
}
}
impl CachedTokenState {
/// Returns `true` when the token is expired or will expire within 60 seconds.
pub fn is_expired(&self) -> bool {
@ -48,11 +57,11 @@ impl TokenCache {
// Scope cache file to (tenant_id, client_id, auth_flow) so config
// changes never reuse tokens from a different account/flow.
let mut hasher = DefaultHasher::new();
config.tenant_id.hash(&mut hasher);
config.client_id.hash(&mut hasher);
config.auth_flow.hash(&mut hasher);
let fingerprint = format!("{:016x}", hasher.finish());
// Use a deterministic fingerprint (not DefaultHasher which is unstable
// across Rust versions and would orphan cache files on toolchain updates).
let fingerprint = format!(
"{}_{}_{}", config.tenant_id, config.client_id, config.auth_flow
).replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_', "_");
let cache_path = zeroclaw_dir.join(format!("ms365_token_cache_{fingerprint}.json"));
let cached = Self::load_from_disk(&cache_path);
@ -320,11 +329,33 @@ impl TokenCache {
fn persist_to_disk(&self, state: &CachedTokenState) {
if let Ok(json) = serde_json::to_string_pretty(state) {
if let Err(e) = std::fs::write(&self.cache_path, json) {
if let Err(e) = Self::write_restricted(&self.cache_path, &json) {
tracing::warn!("ms365: failed to persist token cache: {e}");
}
}
}
/// Write file with restricted permissions (0o600 on Unix) to prevent
/// other local users from reading cached OAuth tokens.
fn write_restricted(path: &std::path::Path, content: &str) -> std::io::Result<()> {
#[cfg(unix)]
{
use std::io::Write;
use std::os::unix::fs::OpenOptionsExt;
let mut file = std::fs::OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.mode(0o600)
.open(path)?;
file.write_all(content.as_bytes())?;
Ok(())
}
#[cfg(not(unix))]
{
std::fs::write(path, content)
}
}
}
#[derive(Deserialize)]

View File

@ -285,7 +285,13 @@ pub async fn onedrive_list(
let base = user_path(user_id);
let url = match path {
Some(p) if !p.is_empty() => {
let encoded = urlencoding::encode(p);
// Encode each path component individually, preserving '/' separators.
// Graph API expects literal '/' in the root:/{path}: syntax.
let encoded: String = p
.split('/')
.map(|seg| urlencoding::encode(seg).into_owned())
.collect::<Vec<_>>()
.join("/");
format!("{GRAPH_BASE}{base}/drive/root:/{encoded}:/children")
}
_ => format!("{GRAPH_BASE}{base}/drive/root/children"),
@ -330,6 +336,16 @@ pub async fn onedrive_download(
anyhow::bail!("ms365: onedrive_download failed ({status}, code={code})");
}
// Check Content-Length header first to reject oversized files before
// buffering the entire response body into memory.
if let Some(content_length) = resp.content_length() {
if usize::try_from(content_length).unwrap_or(usize::MAX) > max_size {
anyhow::bail!(
"ms365: file Content-Length exceeds max_size ({content_length} > {max_size})"
);
}
}
let bytes = resp
.bytes()
.await
@ -479,11 +495,16 @@ mod tests {
#[test]
fn onedrive_path_url() {
let base = user_path("me");
let encoded = urlencoding::encode("Documents/Reports");
let path = "Documents/Reports";
let encoded: String = path
.split('/')
.map(|seg| urlencoding::encode(seg).into_owned())
.collect::<Vec<_>>()
.join("/");
let url = format!("{GRAPH_BASE}{base}/drive/root:/{encoded}:/children");
assert_eq!(
url,
"https://graph.microsoft.com/v1.0/me/drive/root:/Documents%2FReports:/children"
"https://graph.microsoft.com/v1.0/me/drive/root:/Documents/Reports:/children"
);
}