Lesson 16 of 20

Error Handling & Recovery

Debugging Agents

3 min read

Agent debugging is uniquely challenging due to non-deterministic behavior and complex tool chains. Here are proven strategies for finding and fixing issues.

Comprehensive Logging

Log everything the agent does:

import logging
from datetime import datetime

class AgentLogger:
    def __init__(self, agent_id):
        self.agent_id = agent_id
        self.logger = logging.getLogger(f"agent.{agent_id}")
        self.trace = []

    def log_step(self, step_type, data):
        entry = {
            "timestamp": datetime.now().isoformat(),
            "agent_id": self.agent_id,
            "step_type": step_type,
            "data": data
        }
        self.trace.append(entry)
        self.logger.info(f"[{step_type}] {data}")

    def log_llm_call(self, prompt, response, tokens_used):
        self.log_step("llm_call", {
            "prompt_preview": prompt[:500],
            "response_preview": response[:500],
            "tokens": tokens_used
        })

    def log_tool_call(self, tool_name, params, result, duration_ms):
        self.log_step("tool_call", {
            "tool": tool_name,
            "params": params,
            "result": str(result)[:500],
            "duration_ms": duration_ms
        })

    def log_error(self, error_type, details, stack_trace):
        self.log_step("error", {
            "type": error_type,
            "details": details,
            "stack": stack_trace
        })

    def get_trace(self):
        return self.trace

Trace Visualization

Create readable execution traces:

def visualize_trace(trace):
    """Generate human-readable execution trace"""
    output = []
    indent = 0

    for entry in trace:
        step_type = entry["step_type"]
        data = entry["data"]

        if step_type == "llm_call":
            output.append(f"{'  ' * indent}🤖 LLM Call ({data['tokens']} tokens)")
            output.append(f"{'  ' * indent}   Input: {data['prompt_preview'][:100]}...")
            output.append(f"{'  ' * indent}   Output: {data['response_preview'][:100]}...")

        elif step_type == "tool_call":
            output.append(f"{'  ' * indent}🔧 Tool: {data['tool']} ({data['duration_ms']}ms)")
            output.append(f"{'  ' * indent}   Params: {data['params']}")
            output.append(f"{'  ' * indent}   Result: {data['result'][:100]}...")

        elif step_type == "error":
            output.append(f"{'  ' * indent}❌ Error: {data['type']}")
            output.append(f"{'  ' * indent}   {data['details']}")

    return "\n".join(output)

Replay and Reproduce

Save sessions for replay:

class SessionRecorder:
    def __init__(self, session_id):
        self.session_id = session_id
        self.events = []

    def record(self, event_type, data):
        self.events.append({
            "event_type": event_type,
            "data": data,
            "timestamp": datetime.now().isoformat()
        })

    def save(self, path):
        with open(path, 'w') as f:
            json.dump({
                "session_id": self.session_id,
                "events": self.events
            }, f)

    @classmethod
    def replay(cls, path, agent):
        """Replay a recorded session"""
        with open(path) as f:
            session = json.load(f)

        for event in session["events"]:
            if event["event_type"] == "user_input":
                response = agent.process(event["data"]["message"])
                print(f"Original: {event['data'].get('original_response')}")
                print(f"Replayed: {response}")

Common Debug Scenarios

1. Agent Loops

def diagnose_loop(trace):
    """Detect repeated actions in trace"""
    actions = [e["data"]["tool"] for e in trace if e["step_type"] == "tool_call"]

    # Find repeating patterns
    for window_size in range(2, len(actions) // 2):
        for i in range(len(actions) - window_size * 2):
            window1 = actions[i:i + window_size]
            window2 = actions[i + window_size:i + window_size * 2]
            if window1 == window2:
                return f"Loop detected: {window1} repeated at position {i}"

    return None

2. Tool Failures

def analyze_tool_failures(trace):
    """Summarize tool failure patterns"""
    failures = {}

    for entry in trace:
        if entry["step_type"] == "error" and "tool" in entry["data"]:
            tool = entry["data"]["tool"]
            error = entry["data"]["type"]

            if tool not in failures:
                failures[tool] = {}
            if error not in failures[tool]:
                failures[tool][error] = 0
            failures[tool][error] += 1

    return failures

3. Context Overflow

def check_context_usage(trace):
    """Track token usage over time"""
    total_tokens = 0
    max_tokens = 128000  # Adjust per model

    for entry in trace:
        if entry["step_type"] == "llm_call":
            total_tokens = entry["data"]["tokens"]

            if total_tokens > max_tokens * 0.9:
                return {
                    "warning": "Context nearly full",
                    "usage": f"{total_tokens}/{max_tokens}",
                    "timestamp": entry["timestamp"]
                }

    return {"status": "ok", "final_usage": total_tokens}

Debugging Tools Integration

# LangSmith integration for LangChain
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "my-agent-debug"

# Phoenix for observability
from phoenix.trace import langchain as phoenix_langchain
phoenix_langchain.instrument()

Debug Checklist

Check Tool When
Execution trace Logger Every run
Token usage Token counter Before deploy
Tool success rate Failure analyzer Weekly
Response quality Evaluation suite After changes
Latency Performance monitor Continuous

Best Practices

  • Log prompts and responses (with size limits)
  • Record all tool calls with timing
  • Enable replay for production issues
  • Set up alerts for error spikes
  • Use observability platforms (LangSmith, Phoenix, Arize)

Test your error handling knowledge in the module quiz! :::

Quiz

Module 4: Error Handling & Recovery

Take Quiz