Hybrid Search & Reranking
Implementing Hybrid Search
4 min read
Combine BM25 keyword search with vector similarity search using Reciprocal Rank Fusion (RRF) for optimal results.
Architecture Overview
Query
│
┌───────────┴───────────┐
▼ ▼
┌──────────┐ ┌──────────┐
│ BM25 │ │ Vector │
│ Search │ │ Search │
└────┬─────┘ └────┬─────┘
│ │
│ Results + Ranks │
└──────────┬───────────┘
▼
┌──────────────┐
│ RRF │
│ Fusion │
└──────┬───────┘
▼
Combined Results
BM25 Implementation
First, set up keyword search with BM25:
from rank_bm25 import BM25Okapi
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
class BM25Retriever:
def __init__(self, documents: list[str]):
# Tokenize documents
self.documents = documents
self.tokenized_docs = [word_tokenize(doc.lower()) for doc in documents]
# Build BM25 index
self.bm25 = BM25Okapi(self.tokenized_docs)
def search(self, query: str, k: int = 10) -> list[tuple[int, float]]:
"""Return (doc_index, score) tuples."""
tokenized_query = word_tokenize(query.lower())
scores = self.bm25.get_scores(tokenized_query)
# Get top-k indices and scores
top_indices = scores.argsort()[-k:][::-1]
return [(idx, scores[idx]) for idx in top_indices]
Vector Search Implementation
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
class VectorRetriever:
def __init__(self, documents: list[str]):
self.documents = documents
self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# Build vector index
self.vectorstore = Chroma.from_texts(
texts=documents,
embedding=self.embeddings
)
def search(self, query: str, k: int = 10) -> list[tuple[int, float]]:
"""Return (doc_index, score) tuples."""
results = self.vectorstore.similarity_search_with_score(query, k=k)
# Map results back to indices
doc_to_idx = {doc: idx for idx, doc in enumerate(self.documents)}
return [(doc_to_idx[r[0].page_content], r[1]) for r in results]
Reciprocal Rank Fusion (RRF)
RRF combines rankings without needing score normalization:
def reciprocal_rank_fusion(
rankings: list[list[tuple[int, float]]],
k: int = 60
) -> list[tuple[int, float]]:
"""
Combine multiple rankings using RRF.
Args:
rankings: List of rankings, each is [(doc_id, score), ...]
k: RRF constant (default 60)
Returns:
Fused ranking [(doc_id, rrf_score), ...]
"""
rrf_scores = {}
for ranking in rankings:
for rank, (doc_id, _) in enumerate(ranking):
if doc_id not in rrf_scores:
rrf_scores[doc_id] = 0
# RRF formula: 1 / (k + rank)
rrf_scores[doc_id] += 1 / (k + rank + 1)
# Sort by RRF score descending
sorted_results = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
return sorted_results
Why RRF works:
- Score-agnostic: No need to normalize different scoring systems
- Position-based: Documents ranked highly by multiple systems get boosted
- Constant k: Controls how much to favor top-ranked documents
Complete Hybrid Retriever
class HybridRetriever:
def __init__(self, documents: list[str]):
self.documents = documents
self.bm25 = BM25Retriever(documents)
self.vector = VectorRetriever(documents)
def search(
self,
query: str,
k: int = 10,
bm25_weight: float = 0.5,
vector_weight: float = 0.5
) -> list[dict]:
"""
Hybrid search combining BM25 and vector search.
Args:
query: Search query
k: Number of results to return
bm25_weight: Weight for BM25 results
vector_weight: Weight for vector results
Returns:
List of documents with scores
"""
# Get results from both systems
bm25_results = self.bm25.search(query, k=k*2)
vector_results = self.vector.search(query, k=k*2)
# Apply RRF fusion
fused = reciprocal_rank_fusion([bm25_results, vector_results])
# Return top-k with document content
results = []
for doc_id, score in fused[:k]:
results.append({
"content": self.documents[doc_id],
"score": score,
"doc_id": doc_id
})
return results
LangChain Implementation
Using LangChain's built-in ensemble retriever:
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import Chroma
# Create retrievers
bm25_retriever = BM25Retriever.from_documents(documents)
bm25_retriever.k = 10
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
# Combine with ensemble
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, vector_retriever],
weights=[0.5, 0.5] # Equal weighting
)
# Search
results = ensemble_retriever.get_relevant_documents("OAuth authentication")
Database-Native Hybrid Search
Many vector databases support hybrid search natively:
Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import models
client = QdrantClient("localhost", port=6333)
# Hybrid search with built-in fusion
results = client.query_points(
collection_name="documents",
query=query_embedding,
using="dense", # Vector field
with_payload=True,
limit=10,
query_filter=None,
# Enable hybrid with sparse vectors
with_vectors=False,
).points
# Or use Qdrant's sparse vectors for BM25-like search
Weaviate
import weaviate
client = weaviate.connect_to_local()
collection = client.collections.get("Document")
# Hybrid search
results = collection.query.hybrid(
query="OAuth authentication",
alpha=0.5, # 0 = keyword only, 1 = vector only, 0.5 = balanced
limit=10
)
Pinecone
from pinecone import Pinecone
pc = Pinecone(api_key="your-key")
index = pc.Index("hybrid-index")
# Pinecone hybrid with sparse-dense vectors
results = index.query(
vector=dense_embedding,
sparse_vector={
"indices": sparse_indices,
"values": sparse_values
},
top_k=10,
include_metadata=True
)
Tuning Hybrid Weights
def find_optimal_weights(
queries: list[str],
ground_truth: list[list[int]],
retriever: HybridRetriever
) -> tuple[float, float]:
"""Find optimal BM25/vector weights."""
best_weights = (0.5, 0.5)
best_score = 0
for bm25_w in [0.3, 0.4, 0.5, 0.6, 0.7]:
vector_w = 1 - bm25_w
total_recall = 0
for query, truth in zip(queries, ground_truth):
results = retriever.search(
query,
k=10,
bm25_weight=bm25_w,
vector_weight=vector_w
)
retrieved_ids = [r["doc_id"] for r in results]
recall = len(set(retrieved_ids) & set(truth)) / len(truth)
total_recall += recall
avg_recall = total_recall / len(queries)
if avg_recall > best_score:
best_score = avg_recall
best_weights = (bm25_w, vector_w)
return best_weights
Implementation Tip: Start with equal weights (0.5/0.5), then tune based on your query patterns. Technical domains often benefit from higher BM25 weight; conceptual domains favor vector weight.
Next, let's explore reranking strategies to further improve result quality. :::