Build a Research Agent
Next Steps
3 min read
Congratulations! You've built a functional research agent. Here's how to extend it and continue your learning journey.
Enhancements to Try
1. Add More Tools
# tools/scraper.py
import requests
from bs4 import BeautifulSoup
class WebScraperTool(BaseTool):
name = "web_scraper"
description = "Extract full content from a specific URL"
def run(self, url: str) -> dict:
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for tag in soup(['script', 'style', 'nav', 'footer']):
tag.decompose()
text = soup.get_text(separator=' ', strip=True)
return {
"success": True,
"content": text[:Config.MAX_CONTENT_LENGTH],
"url": url
}
except Exception as e:
return {"success": False, "error": str(e)}
2. Implement Caching
# utils/cache.py
import hashlib
import json
from pathlib import Path
from datetime import datetime, timedelta
class SearchCache:
def __init__(self, cache_dir: str = ".cache", ttl_hours: int = 24):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(exist_ok=True)
self.ttl = timedelta(hours=ttl_hours)
def _get_key(self, query: str) -> str:
return hashlib.md5(query.encode()).hexdigest()
def get(self, query: str) -> dict | None:
key = self._get_key(query)
cache_file = self.cache_dir / f"{key}.json"
if cache_file.exists():
data = json.loads(cache_file.read_text())
cached_time = datetime.fromisoformat(data["timestamp"])
if datetime.now() - cached_time < self.ttl:
return data["results"]
return None
def set(self, query: str, results: dict):
key = self._get_key(query)
cache_file = self.cache_dir / f"{key}.json"
data = {
"timestamp": datetime.now().isoformat(),
"query": query,
"results": results
}
cache_file.write_text(json.dumps(data))
3. Add Streaming Output
# agent.py (enhanced)
async def research_stream(self, topic: str):
"""Stream research progress to the user"""
yield f"Starting research on: {topic}\n"
self.memory = ResearchMemory()
iteration = 0
async for step in self._research_loop_async(topic):
iteration += 1
yield f"\n[Step {iteration}] {step['type']}\n"
if step['type'] == 'search':
yield f" Searching: {step['query']}\n"
yield f" Found {len(step['results'])} results\n"
elif step['type'] == 'thinking':
yield f" {step['thought'][:100]}...\n"
yield "\n[Synthesizing report...]\n"
report = await self._synthesize_report_async(topic)
yield "\n" + "="*50 + "\n"
yield report
Architecture Improvements
| Current | Enhanced |
|---|---|
| Single LLM | Primary + fallback LLM |
| Sync execution | Async with parallel search |
| Memory in RAM | Persistent vector store |
| Single search | Multi-source aggregation |
Production Considerations
Monitoring
# utils/monitoring.py
from dataclasses import dataclass
import time
@dataclass
class AgentMetrics:
total_searches: int = 0
total_tokens: int = 0
avg_response_time: float = 0
error_count: int = 0
class MetricsCollector:
def __init__(self):
self.metrics = AgentMetrics()
self.response_times = []
def record_search(self):
self.metrics.total_searches += 1
def record_tokens(self, count: int):
self.metrics.total_tokens += count
def record_response_time(self, duration: float):
self.response_times.append(duration)
self.metrics.avg_response_time = sum(self.response_times) / len(self.response_times)
def record_error(self):
self.metrics.error_count += 1
Rate Limiting
# utils/rate_limiter.py
import time
from collections import deque
class RateLimiter:
def __init__(self, max_requests: int, time_window: int):
self.max_requests = max_requests
self.time_window = time_window # seconds
self.requests = deque()
def acquire(self) -> bool:
now = time.time()
# Remove old requests
while self.requests and self.requests[0] < now - self.time_window:
self.requests.popleft()
if len(self.requests) < self.max_requests:
self.requests.append(now)
return True
return False
def wait(self):
while not self.acquire():
time.sleep(0.1)
Continue Learning
Advanced Topics
- Multi-agent systems: Build teams of specialized agents
- Human-in-the-loop: Add approval steps for critical actions
- Fine-tuning: Train models for specific research domains
- Evaluation frameworks: Build systematic quality assessment
Resources
| Resource | Focus |
|---|---|
| LangChain Docs | Framework deep-dive |
| LangSmith | Production monitoring |
| Anthropic Cookbook | Claude best practices |
| OpenAI Guide | GPT optimization |
Project Ideas
- Domain Expert Agent: Research agent specialized for a specific field (legal, medical, technical)
- Comparison Agent: Research that compares multiple options
- Trend Analyzer: Track and report on emerging topics
- Fact Checker: Verify claims against authoritative sources
Course Recap
You've learned:
- Agentic patterns: ReAct, tool use, planning, multi-step workflows
- Frameworks: LangChain, CrewAI, OpenAI Agents SDK
- Memory systems: Context management, RAG, short/long-term memory
- Error handling: Graceful degradation, validation, debugging
- Building agents: Full implementation from setup to testing
Complete the final quiz to earn your course badge! :::