Lesson 18 of 20

Build a Research Agent

Core Logic

3 min read

Now let's implement the agent's brain—the ReAct loop that reasons, acts, and synthesizes research findings.

Prompt Templates

# prompts/templates.py

SYSTEM_PROMPT = """You are a research assistant that helps users understand topics thoroughly.

You have access to these tools:
{tools}

When researching, follow these steps:
1. Break down the topic into key questions
2. Search for information to answer each question
3. Synthesize findings into a coherent narrative
4. Always cite your sources

Use this format:
Thought: [Your reasoning about what to do next]
Action: [tool_name]
Action Input: [query for the tool]

After gathering enough information, provide your final answer with:
Final Answer: [Your comprehensive response with citations]
"""

SYNTHESIS_PROMPT = """Based on the following research findings, write a comprehensive report on "{topic}".

Findings:
{findings}

Requirements:
- Start with a brief overview
- Organize into logical sections
- Include specific facts and data
- Cite sources using [1], [2], etc.
- End with key takeaways
- Keep under {max_length} words

Sources:
{sources}
"""

Memory Store

# memory/store.py
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime

@dataclass
class ResearchFinding:
    query: str
    content: str
    source_url: str
    source_title: str
    timestamp: datetime

class ResearchMemory:
    def __init__(self):
        self.findings: List[ResearchFinding] = []
        self.queries_made: set = set()

    def add_finding(self, query: str, content: str, url: str, title: str):
        finding = ResearchFinding(
            query=query,
            content=content,
            source_url=url,
            source_title=title,
            timestamp=datetime.now()
        )
        self.findings.append(finding)
        self.queries_made.add(query)

    def get_all_findings(self) -> str:
        return "\n\n".join([
            f"Query: {f.query}\n"
            f"Source: {f.source_title}\n"
            f"Content: {f.content}"
            for f in self.findings
        ])

    def get_sources(self) -> List[dict]:
        seen = set()
        sources = []
        for i, f in enumerate(self.findings, 1):
            if f.source_url not in seen:
                sources.append({
                    "id": i,
                    "title": f.source_title,
                    "url": f.source_url
                })
                seen.add(f.source_url)
        return sources

    def has_searched(self, query: str) -> bool:
        return query.lower() in {q.lower() for q in self.queries_made}

Main Agent Class

# agent.py
import re
from langchain_openai import ChatOpenAI
from tools.search import WebSearchTool
from memory.store import ResearchMemory
from prompts.templates import SYSTEM_PROMPT, SYNTHESIS_PROMPT
from config import Config

class ResearchAgent:
    def __init__(self, config: Config):
        self.config = config
        self.llm = ChatOpenAI(
            model=config.MODEL_NAME,
            temperature=config.TEMPERATURE,
            api_key=config.OPENAI_API_KEY
        )
        self.tools = {
            "web_search": WebSearchTool()
        }
        self.memory = None

    def research(self, topic: str) -> str:
        """Main entry point for research"""
        self.memory = ResearchMemory()

        # Run ReAct loop
        self._research_loop(topic)

        # Synthesize findings
        report = self._synthesize_report(topic)

        return report

    def _research_loop(self, topic: str):
        """ReAct loop for gathering information"""
        tools_desc = "\n".join([
            f"- {name}: {tool.description}"
            for name, tool in self.tools.items()
        ])

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT.format(tools=tools_desc)},
            {"role": "user", "content": f"Research this topic thoroughly: {topic}"}
        ]

        for iteration in range(self.config.MAX_ITERATIONS):
            response = self.llm.invoke(messages)
            content = response.content

            # Check for final answer
            if "Final Answer:" in content:
                break

            # Parse action
            action_match = re.search(
                r"Action:\s*(\w+)\s*\nAction Input:\s*(.+?)(?=\n|$)",
                content,
                re.DOTALL
            )

            if action_match:
                tool_name = action_match.group(1).strip()
                tool_input = action_match.group(2).strip()

                # Execute tool
                if tool_name in self.tools:
                    result = self.tools[tool_name].run(tool_input)

                    # Store findings in memory
                    if result["success"]:
                        for r in result["results"]:
                            self.memory.add_finding(
                                query=tool_input,
                                content=r["snippet"],
                                url=r["url"],
                                title=r["title"]
                            )

                    # Add observation to messages
                    observation = f"Observation: {self._format_results(result)}"
                    messages.append({"role": "assistant", "content": content})
                    messages.append({"role": "user", "content": observation})
                else:
                    messages.append({
                        "role": "user",
                        "content": f"Error: Unknown tool '{tool_name}'"
                    })

    def _format_results(self, result: dict) -> str:
        if not result["success"]:
            return f"Search failed: {result.get('error', 'Unknown error')}"

        if not result["results"]:
            return "No results found"

        formatted = []
        for r in result["results"]:
            formatted.append(f"- {r['title']}: {r['snippet'][:200]}...")

        return "\n".join(formatted)

    def _synthesize_report(self, topic: str) -> str:
        """Generate final report from findings"""
        findings = self.memory.get_all_findings()
        sources = self.memory.get_sources()

        sources_text = "\n".join([
            f"[{s['id']}] {s['title']}: {s['url']}"
            for s in sources
        ])

        prompt = SYNTHESIS_PROMPT.format(
            topic=topic,
            findings=findings,
            sources=sources_text,
            max_length=self.config.REPORT_MAX_LENGTH
        )

        response = self.llm.invoke([{"role": "user", "content": prompt}])

        return response.content

Key Design Decisions

Decision Rationale
ReAct pattern Transparent reasoning, controllable
Separate memory Persistent findings across iterations
Tool abstraction Easy to add new search sources
Synthesis step Better quality than streaming output

Next: Add testing and validation to ensure reliable results. :::

Quiz

Module 5: Build a Research Agent

Take Quiz