Error Handling & Recovery
Debugging Agents
3 min read
Agent debugging is uniquely challenging due to non-deterministic behavior and complex tool chains. Here are proven strategies for finding and fixing issues.
Comprehensive Logging
Log everything the agent does:
import logging
from datetime import datetime
class AgentLogger:
def __init__(self, agent_id):
self.agent_id = agent_id
self.logger = logging.getLogger(f"agent.{agent_id}")
self.trace = []
def log_step(self, step_type, data):
entry = {
"timestamp": datetime.now().isoformat(),
"agent_id": self.agent_id,
"step_type": step_type,
"data": data
}
self.trace.append(entry)
self.logger.info(f"[{step_type}] {data}")
def log_llm_call(self, prompt, response, tokens_used):
self.log_step("llm_call", {
"prompt_preview": prompt[:500],
"response_preview": response[:500],
"tokens": tokens_used
})
def log_tool_call(self, tool_name, params, result, duration_ms):
self.log_step("tool_call", {
"tool": tool_name,
"params": params,
"result": str(result)[:500],
"duration_ms": duration_ms
})
def log_error(self, error_type, details, stack_trace):
self.log_step("error", {
"type": error_type,
"details": details,
"stack": stack_trace
})
def get_trace(self):
return self.trace
Trace Visualization
Create readable execution traces:
def visualize_trace(trace):
"""Generate human-readable execution trace"""
output = []
indent = 0
for entry in trace:
step_type = entry["step_type"]
data = entry["data"]
if step_type == "llm_call":
output.append(f"{' ' * indent}🤖 LLM Call ({data['tokens']} tokens)")
output.append(f"{' ' * indent} Input: {data['prompt_preview'][:100]}...")
output.append(f"{' ' * indent} Output: {data['response_preview'][:100]}...")
elif step_type == "tool_call":
output.append(f"{' ' * indent}🔧 Tool: {data['tool']} ({data['duration_ms']}ms)")
output.append(f"{' ' * indent} Params: {data['params']}")
output.append(f"{' ' * indent} Result: {data['result'][:100]}...")
elif step_type == "error":
output.append(f"{' ' * indent}❌ Error: {data['type']}")
output.append(f"{' ' * indent} {data['details']}")
return "\n".join(output)
Replay and Reproduce
Save sessions for replay:
class SessionRecorder:
def __init__(self, session_id):
self.session_id = session_id
self.events = []
def record(self, event_type, data):
self.events.append({
"event_type": event_type,
"data": data,
"timestamp": datetime.now().isoformat()
})
def save(self, path):
with open(path, 'w') as f:
json.dump({
"session_id": self.session_id,
"events": self.events
}, f)
@classmethod
def replay(cls, path, agent):
"""Replay a recorded session"""
with open(path) as f:
session = json.load(f)
for event in session["events"]:
if event["event_type"] == "user_input":
response = agent.process(event["data"]["message"])
print(f"Original: {event['data'].get('original_response')}")
print(f"Replayed: {response}")
Common Debug Scenarios
1. Agent Loops
def diagnose_loop(trace):
"""Detect repeated actions in trace"""
actions = [e["data"]["tool"] for e in trace if e["step_type"] == "tool_call"]
# Find repeating patterns
for window_size in range(2, len(actions) // 2):
for i in range(len(actions) - window_size * 2):
window1 = actions[i:i + window_size]
window2 = actions[i + window_size:i + window_size * 2]
if window1 == window2:
return f"Loop detected: {window1} repeated at position {i}"
return None
2. Tool Failures
def analyze_tool_failures(trace):
"""Summarize tool failure patterns"""
failures = {}
for entry in trace:
if entry["step_type"] == "error" and "tool" in entry["data"]:
tool = entry["data"]["tool"]
error = entry["data"]["type"]
if tool not in failures:
failures[tool] = {}
if error not in failures[tool]:
failures[tool][error] = 0
failures[tool][error] += 1
return failures
3. Context Overflow
def check_context_usage(trace):
"""Track token usage over time"""
total_tokens = 0
max_tokens = 128000 # Adjust per model
for entry in trace:
if entry["step_type"] == "llm_call":
total_tokens = entry["data"]["tokens"]
if total_tokens > max_tokens * 0.9:
return {
"warning": "Context nearly full",
"usage": f"{total_tokens}/{max_tokens}",
"timestamp": entry["timestamp"]
}
return {"status": "ok", "final_usage": total_tokens}
Debugging Tools Integration
# LangSmith integration for LangChain
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "my-agent-debug"
# Phoenix for observability
from phoenix.trace import langchain as phoenix_langchain
phoenix_langchain.instrument()
Debug Checklist
| Check | Tool | When |
|---|---|---|
| Execution trace | Logger | Every run |
| Token usage | Token counter | Before deploy |
| Tool success rate | Failure analyzer | Weekly |
| Response quality | Evaluation suite | After changes |
| Latency | Performance monitor | Continuous |
Best Practices
- Log prompts and responses (with size limits)
- Record all tool calls with timing
- Enable replay for production issues
- Set up alerts for error spikes
- Use observability platforms (LangSmith, Phoenix, Arize)
Test your error handling knowledge in the module quiz! :::