Build a Research Agent
Project Setup
3 min read
Let's build a research agent that can search the web, gather information, and produce structured reports. This capstone project brings together everything you've learned.
What We're Building
A research agent that:
- Takes a research topic from the user
- Searches multiple sources for information
- Synthesizes findings into a coherent report
- Cites sources properly
Project Structure
research_agent/
├── agent.py # Main agent logic
├── tools/
│ ├── __init__.py
│ ├── search.py # Web search tool
│ └── scraper.py # Content extraction
├── memory/
│ ├── __init__.py
│ └── store.py # Research memory
├── prompts/
│ └── templates.py # Prompt templates
├── utils/
│ └── validators.py # Output validation
├── config.py # Configuration
├── requirements.txt
└── main.py # Entry point
Dependencies
# requirements.txt
langchain>=0.1.0
langchain-openai>=0.0.5
duckduckgo-search>=4.0
beautifulsoup4>=4.12
pydantic>=2.0
python-dotenv>=1.0
Configuration
# config.py
import os
from dotenv import load_dotenv
load_dotenv()
class Config:
# LLM settings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o")
TEMPERATURE = 0.3
# Search settings
MAX_SEARCH_RESULTS = 5
MAX_CONTENT_LENGTH = 4000
# Agent settings
MAX_ITERATIONS = 10
TIMEOUT_SECONDS = 120
# Output settings
REPORT_MAX_LENGTH = 2000
INCLUDE_SOURCES = True
Base Tool Structure
# tools/__init__.py
from abc import ABC, abstractmethod
class BaseTool(ABC):
name: str
description: str
@abstractmethod
def run(self, query: str) -> dict:
"""Execute the tool and return results"""
pass
def __call__(self, query: str) -> dict:
return self.run(query)
Search Tool Implementation
# tools/search.py
from duckduckgo_search import DDGS
from config import Config
class WebSearchTool(BaseTool):
name = "web_search"
description = "Search the web for current information on a topic"
def __init__(self):
self.ddgs = DDGS()
def run(self, query: str) -> dict:
try:
results = list(self.ddgs.text(
query,
max_results=Config.MAX_SEARCH_RESULTS
))
return {
"success": True,
"results": [
{
"title": r["title"],
"url": r["href"],
"snippet": r["body"]
}
for r in results
]
}
except Exception as e:
return {
"success": False,
"error": str(e),
"results": []
}
Entry Point
# main.py
from agent import ResearchAgent
from config import Config
def main():
agent = ResearchAgent(config=Config())
topic = input("Enter research topic: ")
print(f"\nResearching: {topic}\n")
report = agent.research(topic)
print("\n" + "="*50)
print("RESEARCH REPORT")
print("="*50)
print(report)
if __name__ == "__main__":
main()
Environment Setup
# Create virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install dependencies
pip install -r requirements.txt
# Create .env file
echo "OPENAI_API_KEY=your-key-here" > .env
Next: Implement the core agent logic with ReAct pattern. :::