Lesson 18 of 20

Build a Research Agent

Core Logic

3 min read

Now let's implement the agent's brain—the ReAct loop that reasons, acts, and synthesizes research findings.

Prompt Templates

# prompts/templates.py

SYSTEM_PROMPT = """You are a research assistant that helps users understand topics thoroughly.

You have access to these tools:
{tools}

When researching, follow these steps:
1. Break down the topic into key questions
2. Search for information to answer each question
3. Synthesize findings into a coherent narrative
4. Always cite your sources

Use this format:
Thought: [Your reasoning about what to do next]
Action: [tool_name]
Action Input: [query for the tool]

After gathering enough information, provide your final answer with:
Final Answer: [Your comprehensive response with citations]
"""

SYNTHESIS_PROMPT = """Based on the following research findings, write a comprehensive report on "{topic}".

Findings:
{findings}

Requirements:
- Start with a brief overview
- Organize into logical sections
- Include specific facts and data
- Cite sources using [1], [2], etc.
- End with key takeaways
- Keep under {max_length} words

Sources:
{sources}
"""

Memory Store

# memory/store.py
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime

@dataclass
class ResearchFinding:
    query: str
    content: str
    source_url: str
    source_title: str
    timestamp: datetime

class ResearchMemory:
    def __init__(self):
        self.findings: List[ResearchFinding] = []
        self.queries_made: set = set()

    def add_finding(self, query: str, content: str, url: str, title: str):
        finding = ResearchFinding(
            query=query,
            content=content,
            source_url=url,
            source_title=title,
            timestamp=datetime.now()
        )
        self.findings.append(finding)
        self.queries_made.add(query)

    def get_all_findings(self) -> str:
        return "\n\n".join([
            f"Query: {f.query}\n"
            f"Source: {f.source_title}\n"
            f"Content: {f.content}"
            for f in self.findings
        ])

    def get_sources(self) -> List[dict]:
        seen = set()
        sources = []
        for i, f in enumerate(self.findings, 1):
            if f.source_url not in seen:
                sources.append({
                    "id": i,
                    "title": f.source_title,
                    "url": f.source_url
                })
                seen.add(f.source_url)
        return sources

    def has_searched(self, query: str) -> bool:
        return query.lower() in {q.lower() for q in self.queries_made}

Main Agent Class

# agent.py
import re
from langchain_openai import ChatOpenAI
from tools.search import WebSearchTool
from memory.store import ResearchMemory
from prompts.templates import SYSTEM_PROMPT, SYNTHESIS_PROMPT
from config import Config

class ResearchAgent:
    def __init__(self, config: Config):
        self.config = config
        self.llm = ChatOpenAI(
            model=config.MODEL_NAME,
            temperature=config.TEMPERATURE,
            api_key=config.OPENAI_API_KEY
        )
        self.tools = {
            "web_search": WebSearchTool()
        }
        self.memory = None

    def research(self, topic: str) -> str:
        """Main entry point for research"""
        self.memory = ResearchMemory()

        # Run ReAct loop
        self._research_loop(topic)

        # Synthesize findings
        report = self._synthesize_report(topic)

        return report

    def _research_loop(self, topic: str):
        """ReAct loop for gathering information"""
        tools_desc = "\n".join([
            f"- {name}: {tool.description}"
            for name, tool in self.tools.items()
        ])

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT.format(tools=tools_desc)},
            {"role": "user", "content": f"Research this topic thoroughly: {topic}"}
        ]

        for iteration in range(self.config.MAX_ITERATIONS):
            response = self.llm.invoke(messages)
            content = response.content

            # Check for final answer
            if "Final Answer:" in content:
                break

            # Parse action
            action_match = re.search(
                r"Action:\s*(\w+)\s*\nAction Input:\s*(.+?)(?=\n|$)",
                content,
                re.DOTALL
            )

            if action_match:
                tool_name = action_match.group(1).strip()
                tool_input = action_match.group(2).strip()

                # Execute tool
                if tool_name in self.tools:
                    result = self.tools[tool_name].run(tool_input)

                    # Store findings in memory
                    if result["success"]:
                        for r in result["results"]:
                            self.memory.add_finding(
                                query=tool_input,
                                content=r["snippet"],
                                url=r["url"],
                                title=r["title"]
                            )

                    # Add observation to messages
                    observation = f"Observation: {self._format_results(result)}"
                    messages.append({"role": "assistant", "content": content})
                    messages.append({"role": "user", "content": observation})
                else:
                    messages.append({
                        "role": "user",
                        "content": f"Error: Unknown tool '{tool_name}'"
                    })

    def _format_results(self, result: dict) -> str:
        if not result["success"]:
            return f"Search failed: {result.get('error', 'Unknown error')}"

        if not result["results"]:
            return "No results found"

        formatted = []
        for r in result["results"]:
            formatted.append(f"- {r['title']}: {r['snippet'][:200]}...")

        return "\n".join(formatted)

    def _synthesize_report(self, topic: str) -> str:
        """Generate final report from findings"""
        findings = self.memory.get_all_findings()
        sources = self.memory.get_sources()

        sources_text = "\n".join([
            f"[{s['id']}] {s['title']}: {s['url']}"
            for s in sources
        ])

        prompt = SYNTHESIS_PROMPT.format(
            topic=topic,
            findings=findings,
            sources=sources_text,
            max_length=self.config.REPORT_MAX_LENGTH
        )

        response = self.llm.invoke([{"role": "user", "content": prompt}])

        return response.content

Key Design Decisions

DecisionRationale
ReAct patternTransparent reasoning, controllable
Separate memoryPersistent findings across iterations
Tool abstractionEasy to add new search sources
Synthesis stepBetter quality than streaming output

Next: Add testing and validation to ensure reliable results. :::

Quick check: how does this lesson land for you?

Quiz

Module 5: Build a Research Agent

Take Quiz
FREE WEEKLY NEWSLETTER

Stay on the Nerd Track

One email per week — courses, deep dives, tools, and AI experiments.

No spam. Unsubscribe anytime.