Lesson 17 of 20

Build a Research Agent

Project Setup

3 min read

Let's build a research agent that can search the web, gather information, and produce structured reports. This capstone project brings together everything you've learned.

What We're Building

A research agent that:

  • Takes a research topic from the user
  • Searches multiple sources for information
  • Synthesizes findings into a coherent report
  • Cites sources properly

Project Structure

research_agent/
├── agent.py           # Main agent logic
├── tools/
│   ├── __init__.py
│   ├── search.py      # Web search tool
│   └── scraper.py     # Content extraction
├── memory/
│   ├── __init__.py
│   └── store.py       # Research memory
├── prompts/
│   └── templates.py   # Prompt templates
├── utils/
│   └── validators.py  # Output validation
├── config.py          # Configuration
├── requirements.txt
└── main.py            # Entry point

Dependencies

# requirements.txt
langchain>=0.1.0
langchain-openai>=0.0.5
duckduckgo-search>=4.0
beautifulsoup4>=4.12
pydantic>=2.0
python-dotenv>=1.0

Configuration

# config.py
import os
from dotenv import load_dotenv

load_dotenv()

class Config:
    # LLM settings
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o")
    TEMPERATURE = 0.3

    # Search settings
    MAX_SEARCH_RESULTS = 5
    MAX_CONTENT_LENGTH = 4000

    # Agent settings
    MAX_ITERATIONS = 10
    TIMEOUT_SECONDS = 120

    # Output settings
    REPORT_MAX_LENGTH = 2000
    INCLUDE_SOURCES = True

Base Tool Structure

# tools/__init__.py
from abc import ABC, abstractmethod

class BaseTool(ABC):
    name: str
    description: str

    @abstractmethod
    def run(self, query: str) -> dict:
        """Execute the tool and return results"""
        pass

    def __call__(self, query: str) -> dict:
        return self.run(query)

Search Tool Implementation

# tools/search.py
from duckduckgo_search import DDGS
from config import Config

class WebSearchTool(BaseTool):
    name = "web_search"
    description = "Search the web for current information on a topic"

    def __init__(self):
        self.ddgs = DDGS()

    def run(self, query: str) -> dict:
        try:
            results = list(self.ddgs.text(
                query,
                max_results=Config.MAX_SEARCH_RESULTS
            ))

            return {
                "success": True,
                "results": [
                    {
                        "title": r["title"],
                        "url": r["href"],
                        "snippet": r["body"]
                    }
                    for r in results
                ]
            }
        except Exception as e:
            return {
                "success": False,
                "error": str(e),
                "results": []
            }

Entry Point

# main.py
from agent import ResearchAgent
from config import Config

def main():
    agent = ResearchAgent(config=Config())

    topic = input("Enter research topic: ")
    print(f"\nResearching: {topic}\n")

    report = agent.research(topic)

    print("\n" + "="*50)
    print("RESEARCH REPORT")
    print("="*50)
    print(report)

if __name__ == "__main__":
    main()

Environment Setup

# Create virtual environment
python -m venv venv
source venv/bin/activate  # On Windows: venv\Scripts\activate

# Install dependencies
pip install -r requirements.txt

# Create .env file
echo "OPENAI_API_KEY=your-key-here" > .env

Next: Implement the core agent logic with ReAct pattern. :::

Quiz

Module 5: Build a Research Agent

Take Quiz