Lesson 13 of 23

Hybrid Search & Reranking

Implementing Hybrid Search

4 min read

Combine BM25 keyword search with vector similarity search using Reciprocal Rank Fusion (RRF) for optimal results.

Architecture Overview

                    Query
          ┌───────────┴───────────┐
          ▼                       ▼
    ┌──────────┐           ┌──────────┐
    │   BM25   │           │  Vector  │
    │  Search  │           │  Search  │
    └────┬─────┘           └────┬─────┘
         │                      │
         │   Results + Ranks    │
         └──────────┬───────────┘
            ┌──────────────┐
            │     RRF      │
            │    Fusion    │
            └──────┬───────┘
            Combined Results

BM25 Implementation

First, set up keyword search with BM25:

from rank_bm25 import BM25Okapi
import nltk
from nltk.tokenize import word_tokenize

nltk.download('punkt')

class BM25Retriever:
    def __init__(self, documents: list[str]):
        # Tokenize documents
        self.documents = documents
        self.tokenized_docs = [word_tokenize(doc.lower()) for doc in documents]

        # Build BM25 index
        self.bm25 = BM25Okapi(self.tokenized_docs)

    def search(self, query: str, k: int = 10) -> list[tuple[int, float]]:
        """Return (doc_index, score) tuples."""
        tokenized_query = word_tokenize(query.lower())
        scores = self.bm25.get_scores(tokenized_query)

        # Get top-k indices and scores
        top_indices = scores.argsort()[-k:][::-1]
        return [(idx, scores[idx]) for idx in top_indices]

Vector Search Implementation

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

class VectorRetriever:
    def __init__(self, documents: list[str]):
        self.documents = documents
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

        # Build vector index
        self.vectorstore = Chroma.from_texts(
            texts=documents,
            embedding=self.embeddings
        )

    def search(self, query: str, k: int = 10) -> list[tuple[int, float]]:
        """Return (doc_index, score) tuples."""
        results = self.vectorstore.similarity_search_with_score(query, k=k)

        # Map results back to indices
        doc_to_idx = {doc: idx for idx, doc in enumerate(self.documents)}
        return [(doc_to_idx[r[0].page_content], r[1]) for r in results]

Reciprocal Rank Fusion (RRF)

RRF combines rankings without needing score normalization:

def reciprocal_rank_fusion(
    rankings: list[list[tuple[int, float]]],
    k: int = 60
) -> list[tuple[int, float]]:
    """
    Combine multiple rankings using RRF.

    Args:
        rankings: List of rankings, each is [(doc_id, score), ...]
        k: RRF constant (default 60)

    Returns:
        Fused ranking [(doc_id, rrf_score), ...]
    """
    rrf_scores = {}

    for ranking in rankings:
        for rank, (doc_id, _) in enumerate(ranking):
            if doc_id not in rrf_scores:
                rrf_scores[doc_id] = 0
            # RRF formula: 1 / (k + rank)
            rrf_scores[doc_id] += 1 / (k + rank + 1)

    # Sort by RRF score descending
    sorted_results = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
    return sorted_results

Why RRF works:

  • Score-agnostic: No need to normalize different scoring systems
  • Position-based: Documents ranked highly by multiple systems get boosted
  • Constant k: Controls how much to favor top-ranked documents

Complete Hybrid Retriever

class HybridRetriever:
    def __init__(self, documents: list[str]):
        self.documents = documents
        self.bm25 = BM25Retriever(documents)
        self.vector = VectorRetriever(documents)

    def search(
        self,
        query: str,
        k: int = 10,
        bm25_weight: float = 0.5,
        vector_weight: float = 0.5
    ) -> list[dict]:
        """
        Hybrid search combining BM25 and vector search.

        Args:
            query: Search query
            k: Number of results to return
            bm25_weight: Weight for BM25 results
            vector_weight: Weight for vector results

        Returns:
            List of documents with scores
        """
        # Get results from both systems
        bm25_results = self.bm25.search(query, k=k*2)
        vector_results = self.vector.search(query, k=k*2)

        # Apply RRF fusion
        fused = reciprocal_rank_fusion([bm25_results, vector_results])

        # Return top-k with document content
        results = []
        for doc_id, score in fused[:k]:
            results.append({
                "content": self.documents[doc_id],
                "score": score,
                "doc_id": doc_id
            })

        return results

LangChain Implementation

Using LangChain's built-in ensemble retriever:

from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import Chroma

# Create retrievers
bm25_retriever = BM25Retriever.from_documents(documents)
bm25_retriever.k = 10

vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

# Combine with ensemble
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, vector_retriever],
    weights=[0.5, 0.5]  # Equal weighting
)

# Search
results = ensemble_retriever.get_relevant_documents("OAuth authentication")

Many vector databases support hybrid search natively:

Qdrant

from qdrant_client import QdrantClient
from qdrant_client.models import models

client = QdrantClient("localhost", port=6333)

# Hybrid search with built-in fusion
results = client.query_points(
    collection_name="documents",
    query=query_embedding,
    using="dense",  # Vector field
    with_payload=True,
    limit=10,
    query_filter=None,
    # Enable hybrid with sparse vectors
    with_vectors=False,
).points

# Or use Qdrant's sparse vectors for BM25-like search

Weaviate

import weaviate

client = weaviate.connect_to_local()
collection = client.collections.get("Document")

# Hybrid search
results = collection.query.hybrid(
    query="OAuth authentication",
    alpha=0.5,  # 0 = keyword only, 1 = vector only, 0.5 = balanced
    limit=10
)

Pinecone

from pinecone import Pinecone

pc = Pinecone(api_key="your-key")
index = pc.Index("hybrid-index")

# Pinecone hybrid with sparse-dense vectors
results = index.query(
    vector=dense_embedding,
    sparse_vector={
        "indices": sparse_indices,
        "values": sparse_values
    },
    top_k=10,
    include_metadata=True
)

Tuning Hybrid Weights

def find_optimal_weights(
    queries: list[str],
    ground_truth: list[list[int]],
    retriever: HybridRetriever
) -> tuple[float, float]:
    """Find optimal BM25/vector weights."""
    best_weights = (0.5, 0.5)
    best_score = 0

    for bm25_w in [0.3, 0.4, 0.5, 0.6, 0.7]:
        vector_w = 1 - bm25_w

        total_recall = 0
        for query, truth in zip(queries, ground_truth):
            results = retriever.search(
                query,
                k=10,
                bm25_weight=bm25_w,
                vector_weight=vector_w
            )
            retrieved_ids = [r["doc_id"] for r in results]
            recall = len(set(retrieved_ids) & set(truth)) / len(truth)
            total_recall += recall

        avg_recall = total_recall / len(queries)
        if avg_recall > best_score:
            best_score = avg_recall
            best_weights = (bm25_w, vector_w)

    return best_weights

Implementation Tip: Start with equal weights (0.5/0.5), then tune based on your query patterns. Technical domains often benefit from higher BM25 weight; conceptual domains favor vector weight.

Next, let's explore reranking strategies to further improve result quality. :::

Quiz

Module 4: Hybrid Search & Reranking

Take Quiz