Lesson 10 of 22

Building Applications with Ollama

LangChain with Ollama

3 min read

LangChain provides high-level abstractions for building LLM applications. The langchain-ollama package offers seamless local model integration.

Installation

# Install the official Ollama integration (not langchain-community)
pip install langchain-ollama langchain

Basic Chat Model

from langchain_ollama import ChatOllama

# Initialize the chat model
llm = ChatOllama(
    model="llama3.2",
    temperature=0.7
)

# Simple invocation
response = llm.invoke("What is the meaning of life?")
print(response.content)

Using Message Types

from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

llm = ChatOllama(model="llama3.2")

messages = [
    SystemMessage(content="You are a helpful Python tutor."),
    HumanMessage(content="What's a decorator?"),
]

response = llm.invoke(messages)
print(response.content)

# Continue the conversation
messages.append(response)  # Add AI response
messages.append(HumanMessage(content="Can you show me an example?"))
response2 = llm.invoke(messages)
print(response2.content)

Streaming Responses

from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.2")

# Stream tokens
for chunk in llm.stream("Explain recursion step by step"):
    print(chunk.content, end="", flush=True)
print()

Prompt Templates

from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOllama(model="llama3.2")

# Create a reusable prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a {role}. Be concise and helpful."),
    ("human", "{question}")
])

# Create the chain
chain = prompt | llm

# Use the chain
response = chain.invoke({
    "role": "senior Python developer",
    "question": "What's the difference between a list and a tuple?"
})
print(response.content)

Embeddings

from langchain_ollama import OllamaEmbeddings

# Initialize embeddings
embeddings = OllamaEmbeddings(model="llama3.2")

# Single text
vector = embeddings.embed_query("What is machine learning?")
print(f"Embedding dimension: {len(vector)}")

# Multiple texts
texts = ["Python programming", "JavaScript development", "AI research"]
vectors = embeddings.embed_documents(texts)
print(f"Generated {len(vectors)} embeddings")

Building a Simple RAG Chain

from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Components
llm = ChatOllama(model="llama3.2")
embeddings = OllamaEmbeddings(model="llama3.2")

# Simple in-memory document store (for demo)
documents = [
    "Python was created by Guido van Rossum in 1991.",
    "Python emphasizes code readability and simplicity.",
    "Python supports multiple programming paradigms."
]

# Embed documents
doc_embeddings = embeddings.embed_documents(documents)

def simple_retriever(query: str) -> str:
    """Find most relevant document using cosine similarity."""
    import numpy as np

    query_embedding = embeddings.embed_query(query)

    similarities = [
        np.dot(query_embedding, doc_emb) /
        (np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb))
        for doc_emb in doc_embeddings
    ]

    best_idx = np.argmax(similarities)
    return documents[best_idx]

# RAG prompt
prompt = ChatPromptTemplate.from_template("""
Answer the question based only on the following context:

Context: {context}

Question: {question}

Answer:""")

# Build the chain
rag_chain = (
    {"context": lambda x: simple_retriever(x), "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Use the chain
answer = rag_chain.invoke("Who created Python?")
print(answer)

Output Parsing

from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field

class CodeReview(BaseModel):
    """Structured code review output."""
    issues: list[str] = Field(description="List of issues found")
    suggestions: list[str] = Field(description="Improvement suggestions")
    score: int = Field(description="Code quality score 1-10")

llm = ChatOllama(model="llama3.2", format="json")

prompt = ChatPromptTemplate.from_messages([
    ("system", "Review the code and respond in JSON with: issues, suggestions, score (1-10)"),
    ("human", "Review this code:\n```python\n{code}\n```")
])

chain = prompt | llm | JsonOutputParser()

code = """
def calc(x):
    return x*2+1
"""

review = chain.invoke({"code": code})
print(f"Score: {review['score']}/10")
print(f"Issues: {review['issues']}")

Configuring Ollama Options

from langchain_ollama import ChatOllama

# Full configuration
llm = ChatOllama(
    model="llama3.2",
    temperature=0.8,
    num_ctx=4096,         # Context window
    num_predict=256,      # Max tokens
    top_k=40,
    top_p=0.9,
    repeat_penalty=1.1,
    stop=["Human:", "User:"],  # Stop sequences
    base_url="http://localhost:11434"  # Custom endpoint
)

Batch Processing

from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.2")

# Process multiple prompts
prompts = [
    "What is Python?",
    "What is JavaScript?",
    "What is Rust?"
]

# Batch invoke
responses = llm.batch(prompts)

for prompt, response in zip(prompts, responses):
    print(f"Q: {prompt}")
    print(f"A: {response.content[:100]}...")
    print()

Caching Responses

from langchain_ollama import ChatOllama
from langchain.cache import InMemoryCache
from langchain.globals import set_llm_cache

# Enable caching
set_llm_cache(InMemoryCache())

llm = ChatOllama(model="llama3.2")

# First call - hits the model
response1 = llm.invoke("What is 2+2?")

# Second call - returns cached result instantly
response2 = llm.invoke("What is 2+2?")

Key Points

Feature LangChain Class
Chat model ChatOllama
Embeddings OllamaEmbeddings
Package langchain-ollama
Format format="json" for JSON output

LangChain abstracts away the complexity of building LLM applications. In the next lesson, we'll use LangGraph for stateful, multi-step workflows. :::

Quiz

Module 3: Building Applications with Ollama

Take Quiz