Self-Improving Agents

The most interesting agents don't just execute—they learn from their mistakes. Self-improvement is the frontier of agent development in 2025.

The Learning Loop

Execute → Observe Outcome → Analyze Failure → Update Strategy → Execute Better

This isn't just retry logic. It's agents that genuinely improve their approach based on what worked and what didn't.

Reflection Patterns

After each task, the agent reflects on its performance:

class ReflectiveAgent:
    def __init__(self, model: str = "claude-sonnet-4-20250514"):
        self.model = model
        self.experience_log = []

    async def execute_with_reflection(self, task: str) -> dict:
        """Execute task and learn from the outcome."""

        # Step 1: Execute the task
        result = await self.execute_task(task)

        # Step 2: Reflect on the execution
        reflection = await self.reflect(task, result)

        # Step 3: Store the learning
        self.experience_log.append({
            "task": task,
            "result": result,
            "reflection": reflection,
            "timestamp": datetime.now()
        })

        return {"result": result, "reflection": reflection}

    async def reflect(self, task: str, result: dict) -> dict:
        """Generate reflection on task execution."""
        response = await llm.chat(
            model=self.model,
            messages=[{
                "role": "user",
                "content": f"""Reflect on this task execution:

Task: {task}
Result: {json.dumps(result, indent=2)}
Success: {result.get('success', False)}

Analyze:
1. What went well?
2. What could be improved?
3. What would you do differently next time?
4. What patterns should be remembered?

Return JSON with keys: went_well, improvements, next_time, patterns"""
            }]
        )
        return json.loads(response.content)

Experience-Based Prompting

Use past experiences to improve future performance:

class ExperienceAugmentedAgent:
    def __init__(self):
        self.experience_store = []

    async def build_context_from_experience(self, task: str) -> str:
        """Find relevant past experiences to include in context."""

        # Find similar past tasks
        similar = await self.find_similar_experiences(task, limit=3)

        if not similar:
            return ""

        context_parts = ["## Relevant Past Experiences\n"]

        for exp in similar:
            context_parts.append(f"""
### Similar Task: {exp['task'][:100]}...
- Outcome: {'Success' if exp['result']['success'] else 'Failed'}
- Key Learning: {exp['reflection'].get('patterns', 'None recorded')}
- What to avoid: {exp['reflection'].get('improvements', 'None recorded')}
""")

        return "\n".join(context_parts)

    async def execute(self, task: str) -> dict:
        """Execute with experience-augmented context."""
        experience_context = await self.build_context_from_experience(task)

        system_prompt = f"""You are an AI agent that learns from experience.

{experience_context}

Apply these learnings to the current task. Avoid past mistakes and build on successful patterns."""

        return await self.run_with_prompt(system_prompt, task)

Tool Selection Learning

Track which tools work best for which tasks:

class ToolLearner:
    def __init__(self):
        self.tool_success_rates = {}  # (task_type, tool) -> success_rate
        self.tool_sequences = {}       # task_type -> [successful sequences]

    def record_tool_usage(self, task_type: str, tools_used: list[str], success: bool):
        """Learn which tools work for which tasks."""
        for tool in tools_used:
            key = (task_type, tool)
            if key not in self.tool_success_rates:
                self.tool_success_rates[key] = {"successes": 0, "total": 0}

            self.tool_success_rates[key]["total"] += 1
            if success:
                self.tool_success_rates[key]["successes"] += 1

        # Record successful sequences
        if success:
            if task_type not in self.tool_sequences:
                self.tool_sequences[task_type] = []
            self.tool_sequences[task_type].append(tools_used)

    def get_recommended_tools(self, task_type: str) -> list[str]:
        """Recommend tools based on past success."""
        relevant = [
            (tool, stats["successes"] / stats["total"])
            for (t, tool), stats in self.tool_success_rates.items()
            if t == task_type and stats["total"] >= 3
        ]
        # Sort by success rate
        relevant.sort(key=lambda x: x[1], reverse=True)
        return [tool for tool, _ in relevant[:5]]

Prompt Evolution

Agents that improve their own prompts:

class PromptEvolver:
    def __init__(self):
        self.prompt_versions = {}  # prompt_id -> [versions]
        self.prompt_scores = {}    # (prompt_id, version) -> score

    async def evolve_prompt(self, prompt_id: str, current_prompt: str, feedback: list[dict]) -> str:
        """Generate improved prompt based on feedback."""

        feedback_summary = "\n".join([
            f"- Task: {f['task'][:50]}... | Success: {f['success']} | Issue: {f.get('issue', 'None')}"
            for f in feedback[-10:]  # Last 10 interactions
        ])

        response = await llm.chat(
            model="claude-sonnet-4-20250514",
            messages=[{
                "role": "user",
                "content": f"""Improve this system prompt based on feedback.

Current Prompt:
{current_prompt}

Recent Feedback:
{feedback_summary}

Generate an improved version that:
1. Addresses the failures mentioned
2. Reinforces successful patterns
3. Adds specific guidance for problem areas
4. Remains concise and clear

Return only the improved prompt text."""
            }]
        )

        new_prompt = response.content
        version = len(self.prompt_versions.get(prompt_id, [])) + 1
        self.prompt_versions.setdefault(prompt_id, []).append(new_prompt)

        return new_prompt

The Meta-Learning Agent

An agent that learns how to learn:

class MetaLearningAgent:
    def __init__(self):
        self.learning_strategies = [
            "reflection_after_failure",
            "experience_augmentation",
            "tool_preference_learning",
            "prompt_evolution"
        ]
        self.strategy_effectiveness = {s: [] for s in self.learning_strategies}

    async def select_learning_strategy(self, context: dict) -> str:
        """Choose which learning strategy to apply."""
        # Analyze which strategies have been most effective
        strategy_scores = {}
        for strategy, results in self.strategy_effectiveness.items():
            if results:
                strategy_scores[strategy] = sum(results[-10:]) / len(results[-10:])
            else:
                strategy_scores[strategy] = 0.5  # Default score for untried

        # Select best strategy with some exploration
        if random.random() < 0.1:  # 10% exploration
            return random.choice(self.learning_strategies)
        else:
            return max(strategy_scores, key=strategy_scores.get)

Nerd Note: Self-improving agents are powerful but need guardrails. An agent that modifies its own prompts could drift in unexpected ways. Always log changes and maintain rollback capability.

Next: How agents work together in ecosystems. :::