feat(observability): add Hands dashboard metrics and events (#3595)

Add HandStarted, HandCompleted, and HandFailed event variants to
ObserverEvent, and HandRunDuration, HandFindingsCount, HandSuccessRate
metric variants to ObserverMetric. Update all observer backends (log,
noop, verbose, prometheus, otel) to handle the new variants with
appropriate instrumentation. Prometheus backend registers hand_runs
counter, hand_duration histogram, and hand_findings counter. OTel
backend creates spans and records metrics for hand runs.
This commit is contained in:
Giulio V 2026-03-16 23:24:47 +01:00 committed by GitHub
parent 595b81be41
commit 2deb91455d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 497 additions and 0 deletions

View File

@ -92,6 +92,23 @@ impl Observer for LogObserver {
"llm.response"
);
}
ObserverEvent::HandStarted { hand_name } => {
info!(hand = %hand_name, "hand.started");
}
ObserverEvent::HandCompleted {
hand_name,
duration_ms,
findings_count,
} => {
info!(hand = %hand_name, duration_ms = duration_ms, findings = findings_count, "hand.completed");
}
ObserverEvent::HandFailed {
hand_name,
error,
duration_ms,
} => {
info!(hand = %hand_name, error = %error, duration_ms = duration_ms, "hand.failed");
}
}
}
@ -110,6 +127,19 @@ impl Observer for LogObserver {
ObserverMetric::QueueDepth(d) => {
info!(depth = d, "metric.queue_depth");
}
ObserverMetric::HandRunDuration {
hand_name,
duration,
} => {
let ms = u64::try_from(duration.as_millis()).unwrap_or(u64::MAX);
info!(hand = %hand_name, duration_ms = ms, "metric.hand_run_duration");
}
ObserverMetric::HandFindingsCount { hand_name, count } => {
info!(hand = %hand_name, count = count, "metric.hand_findings_count");
}
ObserverMetric::HandSuccessRate { hand_name, success } => {
info!(hand = %hand_name, success = success, "metric.hand_success_rate");
}
}
}
@ -196,4 +226,39 @@ mod tests {
obs.record_metric(&ObserverMetric::ActiveSessions(1));
obs.record_metric(&ObserverMetric::QueueDepth(999));
}
#[test]
fn log_observer_hand_events_no_panic() {
let obs = LogObserver::new();
obs.record_event(&ObserverEvent::HandStarted {
hand_name: "review".into(),
});
obs.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 1500,
findings_count: 3,
});
obs.record_event(&ObserverEvent::HandFailed {
hand_name: "review".into(),
error: "timeout".into(),
duration_ms: 5000,
});
}
#[test]
fn log_observer_hand_metrics_no_panic() {
let obs = LogObserver::new();
obs.record_metric(&ObserverMetric::HandRunDuration {
hand_name: "review".into(),
duration: Duration::from_millis(1500),
});
obs.record_metric(&ObserverMetric::HandFindingsCount {
hand_name: "review".into(),
count: 5,
});
obs.record_metric(&ObserverMetric::HandSuccessRate {
hand_name: "review".into(),
success: true,
});
}
}

View File

@ -80,4 +80,39 @@ mod tests {
fn noop_flush_does_not_panic() {
NoopObserver.flush();
}
#[test]
fn noop_hand_events_do_not_panic() {
let obs = NoopObserver;
obs.record_event(&ObserverEvent::HandStarted {
hand_name: "review".into(),
});
obs.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 1500,
findings_count: 3,
});
obs.record_event(&ObserverEvent::HandFailed {
hand_name: "review".into(),
error: "timeout".into(),
duration_ms: 5000,
});
}
#[test]
fn noop_hand_metrics_do_not_panic() {
let obs = NoopObserver;
obs.record_metric(&ObserverMetric::HandRunDuration {
hand_name: "review".into(),
duration: Duration::from_millis(1500),
});
obs.record_metric(&ObserverMetric::HandFindingsCount {
hand_name: "review".into(),
count: 5,
});
obs.record_metric(&ObserverMetric::HandSuccessRate {
hand_name: "review".into(),
success: true,
});
}
}

View File

@ -27,6 +27,9 @@ pub struct OtelObserver {
tokens_used: Counter<u64>,
active_sessions: Gauge<u64>,
queue_depth: Gauge<u64>,
hand_runs: Counter<u64>,
hand_duration: Histogram<f64>,
hand_findings: Counter<u64>,
}
impl OtelObserver {
@ -152,6 +155,22 @@ impl OtelObserver {
.with_description("Current message queue depth")
.build();
let hand_runs = meter
.u64_counter("zeroclaw.hand.runs")
.with_description("Total hand runs")
.build();
let hand_duration = meter
.f64_histogram("zeroclaw.hand.duration")
.with_description("Hand run duration in seconds")
.with_unit("s")
.build();
let hand_findings = meter
.u64_counter("zeroclaw.hand.findings")
.with_description("Total findings produced by hand runs")
.build();
Ok(Self {
tracer_provider,
meter_provider: meter_provider_clone,
@ -168,6 +187,9 @@ impl OtelObserver {
tokens_used,
active_sessions,
queue_depth,
hand_runs,
hand_duration,
hand_findings,
})
}
}
@ -335,6 +357,77 @@ impl Observer for OtelObserver {
self.errors
.add(1, &[KeyValue::new("component", component.clone())]);
}
ObserverEvent::HandStarted { .. } => {}
ObserverEvent::HandCompleted {
hand_name,
duration_ms,
findings_count,
} => {
let secs = *duration_ms as f64 / 1000.0;
let duration = std::time::Duration::from_millis(*duration_ms);
let start_time = SystemTime::now()
.checked_sub(duration)
.unwrap_or(SystemTime::now());
let mut span = tracer.build(
opentelemetry::trace::SpanBuilder::from_name("hand.run")
.with_kind(SpanKind::Internal)
.with_start_time(start_time)
.with_attributes(vec![
KeyValue::new("hand.name", hand_name.clone()),
KeyValue::new("hand.success", true),
KeyValue::new("hand.findings", *findings_count as i64),
KeyValue::new("duration_s", secs),
]),
);
span.set_status(Status::Ok);
span.end();
let attrs = [
KeyValue::new("hand", hand_name.clone()),
KeyValue::new("success", "true"),
];
self.hand_runs.add(1, &attrs);
self.hand_duration
.record(secs, &[KeyValue::new("hand", hand_name.clone())]);
self.hand_findings.add(
*findings_count as u64,
&[KeyValue::new("hand", hand_name.clone())],
);
}
ObserverEvent::HandFailed {
hand_name,
error,
duration_ms,
} => {
let secs = *duration_ms as f64 / 1000.0;
let duration = std::time::Duration::from_millis(*duration_ms);
let start_time = SystemTime::now()
.checked_sub(duration)
.unwrap_or(SystemTime::now());
let mut span = tracer.build(
opentelemetry::trace::SpanBuilder::from_name("hand.run")
.with_kind(SpanKind::Internal)
.with_start_time(start_time)
.with_attributes(vec![
KeyValue::new("hand.name", hand_name.clone()),
KeyValue::new("hand.success", false),
KeyValue::new("error.message", error.clone()),
KeyValue::new("duration_s", secs),
]),
);
span.set_status(Status::error(error.clone()));
span.end();
let attrs = [
KeyValue::new("hand", hand_name.clone()),
KeyValue::new("success", "false"),
];
self.hand_runs.add(1, &attrs);
self.hand_duration
.record(secs, &[KeyValue::new("hand", hand_name.clone())]);
}
}
}
@ -352,6 +445,29 @@ impl Observer for OtelObserver {
ObserverMetric::QueueDepth(d) => {
self.queue_depth.record(*d as u64, &[]);
}
ObserverMetric::HandRunDuration {
hand_name,
duration,
} => {
self.hand_duration.record(
duration.as_secs_f64(),
&[KeyValue::new("hand", hand_name.clone())],
);
}
ObserverMetric::HandFindingsCount { hand_name, count } => {
self.hand_findings
.add(*count, &[KeyValue::new("hand", hand_name.clone())]);
}
ObserverMetric::HandSuccessRate { hand_name, success } => {
let success_str = if *success { "true" } else { "false" };
self.hand_runs.add(
1,
&[
KeyValue::new("hand", hand_name.clone()),
KeyValue::new("success", success_str),
],
);
}
}
}
@ -519,6 +635,41 @@ mod tests {
obs.record_metric(&ObserverMetric::QueueDepth(0));
}
#[test]
fn otel_hand_events_do_not_panic() {
let obs = test_observer();
obs.record_event(&ObserverEvent::HandStarted {
hand_name: "review".into(),
});
obs.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 1500,
findings_count: 3,
});
obs.record_event(&ObserverEvent::HandFailed {
hand_name: "review".into(),
error: "timeout".into(),
duration_ms: 5000,
});
}
#[test]
fn otel_hand_metrics_do_not_panic() {
let obs = test_observer();
obs.record_metric(&ObserverMetric::HandRunDuration {
hand_name: "review".into(),
duration: Duration::from_millis(1500),
});
obs.record_metric(&ObserverMetric::HandFindingsCount {
hand_name: "review".into(),
count: 5,
});
obs.record_metric(&ObserverMetric::HandSuccessRate {
hand_name: "review".into(),
success: true,
});
}
#[test]
fn otel_observer_creation_with_valid_endpoint_succeeds() {
// Even though endpoint is unreachable, creation should succeed

View File

@ -29,6 +29,11 @@ pub struct PrometheusObserver {
tokens_used: prometheus::IntGauge,
active_sessions: GaugeVec,
queue_depth: GaugeVec,
// Hands
hand_runs: IntCounterVec,
hand_duration: HistogramVec,
hand_findings: IntCounterVec,
}
impl PrometheusObserver {
@ -152,6 +157,31 @@ impl PrometheusObserver {
)
.expect("valid metric");
let hand_runs = IntCounterVec::new(
prometheus::Opts::new("zeroclaw_hand_runs_total", "Total hand runs by outcome"),
&["hand", "success"],
)
.expect("valid metric");
let hand_duration = HistogramVec::new(
HistogramOpts::new(
"zeroclaw_hand_duration_seconds",
"Hand run duration in seconds",
)
.buckets(vec![0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0]),
&["hand"],
)
.expect("valid metric");
let hand_findings = IntCounterVec::new(
prometheus::Opts::new(
"zeroclaw_hand_findings_total",
"Total findings produced by hand runs",
),
&["hand"],
)
.expect("valid metric");
// Register all metrics
registry.register(Box::new(agent_starts.clone())).ok();
registry.register(Box::new(llm_requests.clone())).ok();
@ -172,6 +202,9 @@ impl PrometheusObserver {
registry.register(Box::new(tokens_used.clone())).ok();
registry.register(Box::new(active_sessions.clone())).ok();
registry.register(Box::new(queue_depth.clone())).ok();
registry.register(Box::new(hand_runs.clone())).ok();
registry.register(Box::new(hand_duration.clone())).ok();
registry.register(Box::new(hand_findings.clone())).ok();
Self {
registry,
@ -192,6 +225,9 @@ impl PrometheusObserver {
tokens_used,
active_sessions,
queue_depth,
hand_runs,
hand_duration,
hand_findings,
}
}
@ -293,6 +329,38 @@ impl Observer for PrometheusObserver {
} => {
self.errors.with_label_values(&[component]).inc();
}
ObserverEvent::HandStarted { hand_name } => {
self.hand_runs
.with_label_values(&[hand_name.as_str(), "true"])
.inc_by(0); // touch the series so it appears in output
}
ObserverEvent::HandCompleted {
hand_name,
duration_ms,
findings_count,
} => {
self.hand_runs
.with_label_values(&[hand_name.as_str(), "true"])
.inc();
self.hand_duration
.with_label_values(&[hand_name.as_str()])
.observe(*duration_ms as f64 / 1000.0);
self.hand_findings
.with_label_values(&[hand_name.as_str()])
.inc_by(*findings_count as u64);
}
ObserverEvent::HandFailed {
hand_name,
duration_ms,
..
} => {
self.hand_runs
.with_label_values(&[hand_name.as_str(), "false"])
.inc();
self.hand_duration
.with_label_values(&[hand_name.as_str()])
.observe(*duration_ms as f64 / 1000.0);
}
}
}
@ -314,6 +382,25 @@ impl Observer for PrometheusObserver {
.with_label_values(&[] as &[&str])
.set(*d as f64);
}
ObserverMetric::HandRunDuration {
hand_name,
duration,
} => {
self.hand_duration
.with_label_values(&[hand_name.as_str()])
.observe(duration.as_secs_f64());
}
ObserverMetric::HandFindingsCount { hand_name, count } => {
self.hand_findings
.with_label_values(&[hand_name.as_str()])
.inc_by(*count);
}
ObserverMetric::HandSuccessRate { hand_name, success } => {
let success_str = if *success { "true" } else { "false" };
self.hand_runs
.with_label_values(&[hand_name.as_str(), success_str])
.inc();
}
}
}
@ -513,6 +600,61 @@ mod tests {
));
}
#[test]
fn hand_events_track_runs_and_duration() {
let obs = PrometheusObserver::new();
obs.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 1500,
findings_count: 3,
});
obs.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 2000,
findings_count: 1,
});
obs.record_event(&ObserverEvent::HandFailed {
hand_name: "review".into(),
error: "timeout".into(),
duration_ms: 5000,
});
let output = obs.encode();
assert!(output.contains(r#"zeroclaw_hand_runs_total{hand="review",success="true"} 2"#));
assert!(output.contains(r#"zeroclaw_hand_runs_total{hand="review",success="false"} 1"#));
assert!(output.contains(r#"zeroclaw_hand_findings_total{hand="review"} 4"#));
assert!(output.contains("zeroclaw_hand_duration_seconds"));
}
#[test]
fn hand_metrics_record_duration_and_findings() {
let obs = PrometheusObserver::new();
obs.record_metric(&ObserverMetric::HandRunDuration {
hand_name: "scan".into(),
duration: Duration::from_millis(800),
});
obs.record_metric(&ObserverMetric::HandFindingsCount {
hand_name: "scan".into(),
count: 5,
});
obs.record_metric(&ObserverMetric::HandSuccessRate {
hand_name: "scan".into(),
success: true,
});
obs.record_metric(&ObserverMetric::HandSuccessRate {
hand_name: "scan".into(),
success: false,
});
let output = obs.encode();
assert!(output.contains("zeroclaw_hand_duration_seconds"));
assert!(output.contains(r#"zeroclaw_hand_findings_total{hand="scan"} 5"#));
assert!(output.contains(r#"zeroclaw_hand_runs_total{hand="scan",success="true"} 1"#));
assert!(output.contains(r#"zeroclaw_hand_runs_total{hand="scan",success="false"} 1"#));
}
#[test]
fn llm_response_without_tokens_increments_request_only() {
let obs = PrometheusObserver::new();

View File

@ -80,6 +80,20 @@ pub enum ObserverEvent {
/// Human-readable error description. Must not contain secrets or tokens.
message: String,
},
/// A hand has started execution.
HandStarted { hand_name: String },
/// A hand has completed execution successfully.
HandCompleted {
hand_name: String,
duration_ms: u64,
findings_count: usize,
},
/// A hand has failed during execution.
HandFailed {
hand_name: String,
error: String,
duration_ms: u64,
},
}
/// Numeric metrics emitted by the agent runtime.
@ -96,6 +110,15 @@ pub enum ObserverMetric {
ActiveSessions(u64),
/// Current depth of the inbound message queue.
QueueDepth(u64),
/// Duration of a single hand run.
HandRunDuration {
hand_name: String,
duration: Duration,
},
/// Number of findings produced by a hand run.
HandFindingsCount { hand_name: String, count: u64 },
/// Records a hand run outcome for success-rate tracking.
HandSuccessRate { hand_name: String, success: bool },
}
/// Core observability trait for recording agent runtime telemetry.
@ -212,4 +235,67 @@ mod tests {
assert!(matches!(cloned_event, ObserverEvent::ToolCall { .. }));
assert!(matches!(cloned_metric, ObserverMetric::RequestLatency(_)));
}
#[test]
fn hand_events_recordable() {
let observer = DummyObserver::default();
observer.record_event(&ObserverEvent::HandStarted {
hand_name: "review".into(),
});
observer.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 1500,
findings_count: 3,
});
observer.record_event(&ObserverEvent::HandFailed {
hand_name: "review".into(),
error: "timeout".into(),
duration_ms: 5000,
});
assert_eq!(*observer.events.lock(), 3);
}
#[test]
fn hand_metrics_recordable() {
let observer = DummyObserver::default();
observer.record_metric(&ObserverMetric::HandRunDuration {
hand_name: "review".into(),
duration: Duration::from_millis(1500),
});
observer.record_metric(&ObserverMetric::HandFindingsCount {
hand_name: "review".into(),
count: 3,
});
observer.record_metric(&ObserverMetric::HandSuccessRate {
hand_name: "review".into(),
success: true,
});
assert_eq!(*observer.metrics.lock(), 3);
}
#[test]
fn hand_event_and_metric_are_cloneable() {
let event = ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 500,
findings_count: 2,
};
let metric = ObserverMetric::HandRunDuration {
hand_name: "review".into(),
duration: Duration::from_millis(500),
};
let cloned_event = event.clone();
let cloned_metric = metric.clone();
assert!(matches!(cloned_event, ObserverEvent::HandCompleted { .. }));
assert!(matches!(
cloned_metric,
ObserverMetric::HandRunDuration { .. }
));
}
}

View File

@ -101,4 +101,22 @@ mod tests {
});
obs.record_event(&ObserverEvent::TurnComplete);
}
#[test]
fn verbose_hand_events_do_not_panic() {
let obs = VerboseObserver::new();
obs.record_event(&ObserverEvent::HandStarted {
hand_name: "review".into(),
});
obs.record_event(&ObserverEvent::HandCompleted {
hand_name: "review".into(),
duration_ms: 1500,
findings_count: 3,
});
obs.record_event(&ObserverEvent::HandFailed {
hand_name: "review".into(),
error: "timeout".into(),
duration_ms: 5000,
});
}
}