Production Deployment & Observability
Monitoring and Metrics
3 min read
Production guardrails require comprehensive monitoring to track safety, performance, and business impact. This lesson covers essential metrics and monitoring strategies.
Key Guardrail Metrics
Safety Metrics
| Metric | Description | Target |
|---|---|---|
| Block Rate | % of requests blocked | < 5% for production |
| False Positive Rate | Safe content incorrectly blocked | < 1% |
| False Negative Rate | Unsafe content passed | < 0.1% |
| Category Distribution | Breakdown of violations by type | Varies |
| Escalation Rate | % requiring human review | < 2% |
Performance Metrics
| Metric | Description | Target |
|---|---|---|
| P50 Latency | Median guardrail time | < 50ms |
| P99 Latency | 99th percentile | < 200ms |
| Throughput | Requests per second | Varies |
| Error Rate | Guardrail failures | < 0.1% |
| Timeout Rate | Guardrail timeouts | < 0.01% |
Prometheus Metrics Implementation
from prometheus_client import Counter, Histogram, Gauge
import time
from functools import wraps
# Define metrics
GUARDRAIL_REQUESTS = Counter(
'guardrail_requests_total',
'Total guardrail requests',
['rail_type', 'result']
)
GUARDRAIL_LATENCY = Histogram(
'guardrail_latency_seconds',
'Guardrail processing latency',
['rail_type'],
buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
)
GUARDRAIL_BLOCKS = Counter(
'guardrail_blocks_total',
'Total blocked requests',
['rail_type', 'category']
)
ACTIVE_GUARDRAIL_CHECKS = Gauge(
'guardrail_active_checks',
'Currently running guardrail checks',
['rail_type']
)
class MetricsMiddleware:
"""Middleware for tracking guardrail metrics."""
def __init__(self, rail_type: str):
self.rail_type = rail_type
def __call__(self, func):
@wraps(func)
async def wrapper(*args, **kwargs):
ACTIVE_GUARDRAIL_CHECKS.labels(rail_type=self.rail_type).inc()
start_time = time.time()
try:
result = await func(*args, **kwargs)
# Track result
outcome = "blocked" if result.blocked else "passed"
GUARDRAIL_REQUESTS.labels(
rail_type=self.rail_type,
result=outcome
).inc()
if result.blocked:
for category in result.categories:
GUARDRAIL_BLOCKS.labels(
rail_type=self.rail_type,
category=category
).inc()
return result
except Exception as e:
GUARDRAIL_REQUESTS.labels(
rail_type=self.rail_type,
result="error"
).inc()
raise
finally:
duration = time.time() - start_time
GUARDRAIL_LATENCY.labels(
rail_type=self.rail_type
).observe(duration)
ACTIVE_GUARDRAIL_CHECKS.labels(rail_type=self.rail_type).dec()
return wrapper
# Usage
@MetricsMiddleware(rail_type="toxicity")
async def check_toxicity(text: str):
# Toxicity check logic
pass
Structured Logging
import structlog
from datetime import datetime
from typing import Dict, Any
import json
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.JSONRenderer()
],
wrapper_class=structlog.stdlib.BoundLogger,
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
)
logger = structlog.get_logger()
class GuardrailLogger:
"""Structured logging for guardrail events."""
def log_check(
self,
request_id: str,
rail_type: str,
input_text: str,
result: Dict[str, Any],
latency_ms: float
):
"""Log a guardrail check result."""
logger.info(
"guardrail_check",
request_id=request_id,
rail_type=rail_type,
input_length=len(input_text),
result=result.get("decision"),
categories=result.get("categories", []),
confidence=result.get("confidence"),
latency_ms=latency_ms,
timestamp=datetime.utcnow().isoformat()
)
def log_block(
self,
request_id: str,
rail_type: str,
reason: str,
categories: list,
input_preview: str = None
):
"""Log a blocked request."""
logger.warning(
"guardrail_block",
request_id=request_id,
rail_type=rail_type,
reason=reason,
categories=categories,
input_preview=input_preview[:100] if input_preview else None
)
def log_error(
self,
request_id: str,
rail_type: str,
error: Exception,
fallback_action: str
):
"""Log a guardrail error."""
logger.error(
"guardrail_error",
request_id=request_id,
rail_type=rail_type,
error_type=type(error).__name__,
error_message=str(error),
fallback_action=fallback_action
)
Real-time Dashboard
from dataclasses import dataclass
from datetime import datetime, timedelta
from collections import defaultdict
import asyncio
@dataclass
class MetricWindow:
"""Rolling window for metric aggregation."""
window_seconds: int = 60
buckets: dict = None
def __post_init__(self):
self.buckets = defaultdict(list)
def record(self, key: str, value: float):
now = datetime.now()
self.buckets[key].append((now, value))
self._cleanup(key)
def _cleanup(self, key: str):
cutoff = datetime.now() - timedelta(seconds=self.window_seconds)
self.buckets[key] = [
(ts, v) for ts, v in self.buckets[key]
if ts > cutoff
]
def get_stats(self, key: str) -> dict:
values = [v for _, v in self.buckets[key]]
if not values:
return {"count": 0, "avg": 0, "p99": 0}
values.sort()
return {
"count": len(values),
"avg": sum(values) / len(values),
"p50": values[len(values) // 2],
"p99": values[int(len(values) * 0.99)]
}
class RealTimeDashboard:
"""Real-time metrics dashboard."""
def __init__(self):
self.latency = MetricWindow(window_seconds=60)
self.block_rates = MetricWindow(window_seconds=300)
self.error_counts = defaultdict(int)
def record_check(
self,
rail_type: str,
latency_ms: float,
blocked: bool
):
self.latency.record(rail_type, latency_ms)
self.block_rates.record(rail_type, 1 if blocked else 0)
def record_error(self, rail_type: str):
self.error_counts[rail_type] += 1
def get_dashboard_data(self) -> dict:
"""Get current dashboard metrics."""
data = {}
for rail_type in self.latency.buckets:
latency_stats = self.latency.get_stats(rail_type)
block_stats = self.block_rates.get_stats(rail_type)
data[rail_type] = {
"latency_ms": {
"avg": latency_stats["avg"],
"p50": latency_stats["p50"],
"p99": latency_stats["p99"]
},
"requests_per_minute": latency_stats["count"],
"block_rate": block_stats["avg"] * 100,
"errors": self.error_counts[rail_type]
}
return data
Alerting Rules
# prometheus_alerts.yml
groups:
- name: guardrails
rules:
- alert: HighGuardrailLatency
expr: histogram_quantile(0.99, rate(guardrail_latency_seconds_bucket[5m])) > 0.2
for: 5m
labels:
severity: warning
annotations:
summary: "Guardrail P99 latency above 200ms"
- alert: HighBlockRate
expr: rate(guardrail_blocks_total[5m]) / rate(guardrail_requests_total[5m]) > 0.1
for: 10m
labels:
severity: warning
annotations:
summary: "Block rate above 10%"
- alert: GuardrailErrors
expr: rate(guardrail_requests_total{result="error"}[5m]) > 0.01
for: 5m
labels:
severity: critical
annotations:
summary: "Guardrail error rate above 1%"
- alert: UnusualViolationSpike
expr: rate(guardrail_blocks_total{category="hate_speech"}[5m]) > 2 * avg_over_time(rate(guardrail_blocks_total{category="hate_speech"}[5m])[1h:5m])
for: 10m
labels:
severity: warning
annotations:
summary: "Unusual spike in hate speech violations"
Monitoring Tip: Track both safety and performance metrics. A guardrail that's 100% accurate but adds 5 seconds of latency is unusable. Balance thoroughness with user experience.
Next: A/B testing guardrail configurations. :::