Guardrails AI Framework
LiteLLM Integration
3 min read
LiteLLM provides a unified interface for 100+ LLM providers. Combined with Guardrails AI, you get validated outputs across any model with automatic fallback and load balancing.
LiteLLM Setup
pip install litellm
import litellm
# Configure API keys
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
os.environ["ANTHROPIC_API_KEY"] = "sk-ant-..."
os.environ["AZURE_API_KEY"] = "..."
os.environ["AZURE_API_BASE"] = "https://your-resource.openai.azure.com"
Basic LiteLLM with Guardrails
from guardrails import Guard
from pydantic import BaseModel, Field
import litellm
class AnalysisResult(BaseModel):
summary: str = Field(description="Analysis summary")
key_findings: list[str] = Field(description="Key findings")
confidence: float = Field(ge=0, le=1, description="Confidence score")
guard = Guard.for_pydantic(AnalysisResult)
# Use any LiteLLM-supported model
result = guard(
model="gpt-4o", # OpenAI
messages=[{"role": "user", "content": "Analyze the Q3 earnings report"}]
)
# Switch providers easily
result = guard(
model="claude-3-5-sonnet-20241022", # Anthropic
messages=[{"role": "user", "content": "Analyze the Q3 earnings report"}]
)
result = guard(
model="azure/gpt-4o", # Azure OpenAI
messages=[{"role": "user", "content": "Analyze the Q3 earnings report"}]
)
Fallback Configuration
from guardrails import Guard
import litellm
from litellm import Router
# Configure fallback chain
router = Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.environ["OPENAI_API_KEY"]
}
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "azure/gpt-4o",
"api_key": os.environ["AZURE_API_KEY"],
"api_base": os.environ["AZURE_API_BASE"]
}
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "claude-3-5-sonnet-20241022",
"api_key": os.environ["ANTHROPIC_API_KEY"]
}
}
],
fallbacks=[
{"gpt-4": ["azure/gpt-4o", "claude-3-5-sonnet-20241022"]}
],
retry_after=5 # Retry after 5 seconds
)
guard = Guard.for_pydantic(AnalysisResult)
# Use router for automatic fallback
async def analyze_with_fallback(content: str):
try:
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": content}]
)
return guard.parse(response.choices[0].message.content)
except Exception as e:
# Router automatically tries fallbacks
raise
Cost-Optimized Routing
from litellm import Router
from guardrails import Guard
# Route based on complexity
router = Router(
model_list=[
# Cheap models for simple tasks
{
"model_name": "simple",
"litellm_params": {
"model": "gpt-4o-mini",
"api_key": os.environ["OPENAI_API_KEY"]
}
},
# Expensive models for complex tasks
{
"model_name": "complex",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.environ["OPENAI_API_KEY"]
}
}
]
)
class TaskRouter:
def __init__(self, guard: Guard):
self.guard = guard
self.router = router
async def route(self, task: str, complexity: str = "auto") -> dict:
"""Route task to appropriate model based on complexity."""
if complexity == "auto":
complexity = self._estimate_complexity(task)
model = "simple" if complexity == "low" else "complex"
result = self.guard(
model=self.router.model_list[0 if model == "simple" else 1]["litellm_params"]["model"],
messages=[{"role": "user", "content": task}]
)
return {
"result": result.validated_output,
"model_used": model,
"estimated_cost": self._estimate_cost(model, task)
}
def _estimate_complexity(self, task: str) -> str:
"""Simple complexity estimation."""
complex_keywords = ["analyze", "compare", "synthesize", "evaluate"]
return "high" if any(kw in task.lower() for kw in complex_keywords) else "low"
def _estimate_cost(self, model: str, task: str) -> float:
"""Estimate cost based on model and input length."""
costs = {"simple": 0.00015, "complex": 0.005} # Per 1K tokens
tokens = len(task.split()) * 1.3 # Rough token estimate
return costs[model] * tokens / 1000
Rate Limiting and Quotas
from litellm import Router
import asyncio
from datetime import datetime, timedelta
class RateLimitedRouter:
"""Router with rate limiting per model."""
def __init__(self, guard: Guard):
self.guard = guard
self.request_counts = {}
self.rate_limits = {
"gpt-4o": 60, # 60 RPM
"gpt-4o-mini": 200, # 200 RPM
"claude-3-5-sonnet-20241022": 40 # 40 RPM
}
self.router = Router(
model_list=[
{"model_name": "primary", "litellm_params": {"model": "gpt-4o"}},
{"model_name": "fallback", "litellm_params": {"model": "gpt-4o-mini"}},
]
)
async def completion(self, model: str, messages: list) -> dict:
"""Make completion with rate limit awareness."""
await self._wait_for_rate_limit(model)
result = self.guard(
model=model,
messages=messages
)
self._record_request(model)
return result
async def _wait_for_rate_limit(self, model: str):
"""Wait if rate limit would be exceeded."""
limit = self.rate_limits.get(model, 60)
current = self._get_request_count(model)
if current >= limit:
wait_time = 60 - (datetime.now().second)
await asyncio.sleep(wait_time)
def _record_request(self, model: str):
"""Record a request for rate limiting."""
now = datetime.now()
minute_key = now.strftime("%Y%m%d%H%M")
if model not in self.request_counts:
self.request_counts[model] = {}
self.request_counts[model][minute_key] = \
self.request_counts[model].get(minute_key, 0) + 1
def _get_request_count(self, model: str) -> int:
"""Get request count for current minute."""
minute_key = datetime.now().strftime("%Y%m%d%H%M")
return self.request_counts.get(model, {}).get(minute_key, 0)
Streaming with Guardrails
from guardrails import Guard
import litellm
guard = Guard.for_pydantic(AnalysisResult)
async def stream_with_validation(prompt: str):
"""Stream response and validate at the end."""
full_response = ""
async for chunk in await litellm.acompletion(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
stream=True
):
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
full_response += content
yield {"type": "chunk", "content": content}
# Validate complete response
try:
validated = guard.parse(full_response)
yield {"type": "validated", "result": validated}
except Exception as e:
yield {"type": "error", "error": str(e)}
Production Wrapper
from guardrails import Guard
from pydantic import BaseModel
import litellm
from typing import TypeVar, Type
import logging
T = TypeVar("T", bound=BaseModel)
class GuardedLLM:
"""Production-ready LLM wrapper with Guardrails."""
def __init__(
self,
primary_model: str = "gpt-4o",
fallback_model: str = "gpt-4o-mini",
max_retries: int = 3
):
self.primary_model = primary_model
self.fallback_model = fallback_model
self.max_retries = max_retries
self.logger = logging.getLogger(__name__)
async def generate(
self,
schema: Type[T],
prompt: str,
system_prompt: str = None,
num_reasks: int = 2
) -> T:
"""Generate validated response."""
guard = Guard.for_pydantic(schema)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
# Try primary model
try:
result = guard(
model=self.primary_model,
messages=messages,
num_reasks=num_reasks
)
if result.validated_output:
return result.validated_output
except Exception as e:
self.logger.warning(f"Primary model failed: {e}")
# Fallback
try:
result = guard(
model=self.fallback_model,
messages=messages,
num_reasks=num_reasks + 1 # Extra retry for fallback
)
if result.validated_output:
return result.validated_output
except Exception as e:
self.logger.error(f"Fallback model failed: {e}")
raise
raise ValueError("Failed to generate valid response")
# Usage
llm = GuardedLLM()
result = await llm.generate(
schema=AnalysisResult,
prompt="Analyze the market trends for Q4",
system_prompt="You are a financial analyst."
)
LiteLLM Tip: Use environment variable prefixes for provider-specific configs:
OPENAI_API_KEY,ANTHROPIC_API_KEY,AZURE_API_KEY, etc. LiteLLM automatically picks them up.
Next: Production deployment patterns and observability. :::