Build a Research Agent
Core Logic
3 min read
Now let's implement the agent's brain—the ReAct loop that reasons, acts, and synthesizes research findings.
Prompt Templates
# prompts/templates.py
SYSTEM_PROMPT = """You are a research assistant that helps users understand topics thoroughly.
You have access to these tools:
{tools}
When researching, follow these steps:
1. Break down the topic into key questions
2. Search for information to answer each question
3. Synthesize findings into a coherent narrative
4. Always cite your sources
Use this format:
Thought: [Your reasoning about what to do next]
Action: [tool_name]
Action Input: [query for the tool]
After gathering enough information, provide your final answer with:
Final Answer: [Your comprehensive response with citations]
"""
SYNTHESIS_PROMPT = """Based on the following research findings, write a comprehensive report on "{topic}".
Findings:
{findings}
Requirements:
- Start with a brief overview
- Organize into logical sections
- Include specific facts and data
- Cite sources using [1], [2], etc.
- End with key takeaways
- Keep under {max_length} words
Sources:
{sources}
"""
Memory Store
# memory/store.py
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime
@dataclass
class ResearchFinding:
query: str
content: str
source_url: str
source_title: str
timestamp: datetime
class ResearchMemory:
def __init__(self):
self.findings: List[ResearchFinding] = []
self.queries_made: set = set()
def add_finding(self, query: str, content: str, url: str, title: str):
finding = ResearchFinding(
query=query,
content=content,
source_url=url,
source_title=title,
timestamp=datetime.now()
)
self.findings.append(finding)
self.queries_made.add(query)
def get_all_findings(self) -> str:
return "\n\n".join([
f"Query: {f.query}\n"
f"Source: {f.source_title}\n"
f"Content: {f.content}"
for f in self.findings
])
def get_sources(self) -> List[dict]:
seen = set()
sources = []
for i, f in enumerate(self.findings, 1):
if f.source_url not in seen:
sources.append({
"id": i,
"title": f.source_title,
"url": f.source_url
})
seen.add(f.source_url)
return sources
def has_searched(self, query: str) -> bool:
return query.lower() in {q.lower() for q in self.queries_made}
Main Agent Class
# agent.py
import re
from langchain_openai import ChatOpenAI
from tools.search import WebSearchTool
from memory.store import ResearchMemory
from prompts.templates import SYSTEM_PROMPT, SYNTHESIS_PROMPT
from config import Config
class ResearchAgent:
def __init__(self, config: Config):
self.config = config
self.llm = ChatOpenAI(
model=config.MODEL_NAME,
temperature=config.TEMPERATURE,
api_key=config.OPENAI_API_KEY
)
self.tools = {
"web_search": WebSearchTool()
}
self.memory = None
def research(self, topic: str) -> str:
"""Main entry point for research"""
self.memory = ResearchMemory()
# Run ReAct loop
self._research_loop(topic)
# Synthesize findings
report = self._synthesize_report(topic)
return report
def _research_loop(self, topic: str):
"""ReAct loop for gathering information"""
tools_desc = "\n".join([
f"- {name}: {tool.description}"
for name, tool in self.tools.items()
])
messages = [
{"role": "system", "content": SYSTEM_PROMPT.format(tools=tools_desc)},
{"role": "user", "content": f"Research this topic thoroughly: {topic}"}
]
for iteration in range(self.config.MAX_ITERATIONS):
response = self.llm.invoke(messages)
content = response.content
# Check for final answer
if "Final Answer:" in content:
break
# Parse action
action_match = re.search(
r"Action:\s*(\w+)\s*\nAction Input:\s*(.+?)(?=\n|$)",
content,
re.DOTALL
)
if action_match:
tool_name = action_match.group(1).strip()
tool_input = action_match.group(2).strip()
# Execute tool
if tool_name in self.tools:
result = self.tools[tool_name].run(tool_input)
# Store findings in memory
if result["success"]:
for r in result["results"]:
self.memory.add_finding(
query=tool_input,
content=r["snippet"],
url=r["url"],
title=r["title"]
)
# Add observation to messages
observation = f"Observation: {self._format_results(result)}"
messages.append({"role": "assistant", "content": content})
messages.append({"role": "user", "content": observation})
else:
messages.append({
"role": "user",
"content": f"Error: Unknown tool '{tool_name}'"
})
def _format_results(self, result: dict) -> str:
if not result["success"]:
return f"Search failed: {result.get('error', 'Unknown error')}"
if not result["results"]:
return "No results found"
formatted = []
for r in result["results"]:
formatted.append(f"- {r['title']}: {r['snippet'][:200]}...")
return "\n".join(formatted)
def _synthesize_report(self, topic: str) -> str:
"""Generate final report from findings"""
findings = self.memory.get_all_findings()
sources = self.memory.get_sources()
sources_text = "\n".join([
f"[{s['id']}] {s['title']}: {s['url']}"
for s in sources
])
prompt = SYNTHESIS_PROMPT.format(
topic=topic,
findings=findings,
sources=sources_text,
max_length=self.config.REPORT_MAX_LENGTH
)
response = self.llm.invoke([{"role": "user", "content": prompt}])
return response.content
Key Design Decisions
| Decision | Rationale |
|---|---|
| ReAct pattern | Transparent reasoning, controllable |
| Separate memory | Persistent findings across iterations |
| Tool abstraction | Easy to add new search sources |
| Synthesis step | Better quality than streaming output |
Next: Add testing and validation to ensure reliable results. :::