Building Applications with Ollama
LangChain with Ollama
3 min read
LangChain provides high-level abstractions for building LLM applications. The langchain-ollama package offers seamless local model integration.
Installation
# Install the official Ollama integration (not langchain-community)
pip install langchain-ollama langchain
Basic Chat Model
from langchain_ollama import ChatOllama
# Initialize the chat model
llm = ChatOllama(
model="llama3.2",
temperature=0.7
)
# Simple invocation
response = llm.invoke("What is the meaning of life?")
print(response.content)
Using Message Types
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
llm = ChatOllama(model="llama3.2")
messages = [
SystemMessage(content="You are a helpful Python tutor."),
HumanMessage(content="What's a decorator?"),
]
response = llm.invoke(messages)
print(response.content)
# Continue the conversation
messages.append(response) # Add AI response
messages.append(HumanMessage(content="Can you show me an example?"))
response2 = llm.invoke(messages)
print(response2.content)
Streaming Responses
from langchain_ollama import ChatOllama
llm = ChatOllama(model="llama3.2")
# Stream tokens
for chunk in llm.stream("Explain recursion step by step"):
print(chunk.content, end="", flush=True)
print()
Prompt Templates
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
llm = ChatOllama(model="llama3.2")
# Create a reusable prompt template
prompt = ChatPromptTemplate.from_messages([
("system", "You are a {role}. Be concise and helpful."),
("human", "{question}")
])
# Create the chain
chain = prompt | llm
# Use the chain
response = chain.invoke({
"role": "senior Python developer",
"question": "What's the difference between a list and a tuple?"
})
print(response.content)
Embeddings
from langchain_ollama import OllamaEmbeddings
# Initialize embeddings
embeddings = OllamaEmbeddings(model="llama3.2")
# Single text
vector = embeddings.embed_query("What is machine learning?")
print(f"Embedding dimension: {len(vector)}")
# Multiple texts
texts = ["Python programming", "JavaScript development", "AI research"]
vectors = embeddings.embed_documents(texts)
print(f"Generated {len(vectors)} embeddings")
Building a Simple RAG Chain
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# Components
llm = ChatOllama(model="llama3.2")
embeddings = OllamaEmbeddings(model="llama3.2")
# Simple in-memory document store (for demo)
documents = [
"Python was created by Guido van Rossum in 1991.",
"Python emphasizes code readability and simplicity.",
"Python supports multiple programming paradigms."
]
# Embed documents
doc_embeddings = embeddings.embed_documents(documents)
def simple_retriever(query: str) -> str:
"""Find most relevant document using cosine similarity."""
import numpy as np
query_embedding = embeddings.embed_query(query)
similarities = [
np.dot(query_embedding, doc_emb) /
(np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb))
for doc_emb in doc_embeddings
]
best_idx = np.argmax(similarities)
return documents[best_idx]
# RAG prompt
prompt = ChatPromptTemplate.from_template("""
Answer the question based only on the following context:
Context: {context}
Question: {question}
Answer:""")
# Build the chain
rag_chain = (
{"context": lambda x: simple_retriever(x), "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Use the chain
answer = rag_chain.invoke("Who created Python?")
print(answer)
Output Parsing
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field
class CodeReview(BaseModel):
"""Structured code review output."""
issues: list[str] = Field(description="List of issues found")
suggestions: list[str] = Field(description="Improvement suggestions")
score: int = Field(description="Code quality score 1-10")
llm = ChatOllama(model="llama3.2", format="json")
prompt = ChatPromptTemplate.from_messages([
("system", "Review the code and respond in JSON with: issues, suggestions, score (1-10)"),
("human", "Review this code:\n```python\n{code}\n```")
])
chain = prompt | llm | JsonOutputParser()
code = """
def calc(x):
return x*2+1
"""
review = chain.invoke({"code": code})
print(f"Score: {review['score']}/10")
print(f"Issues: {review['issues']}")
Configuring Ollama Options
from langchain_ollama import ChatOllama
# Full configuration
llm = ChatOllama(
model="llama3.2",
temperature=0.8,
num_ctx=4096, # Context window
num_predict=256, # Max tokens
top_k=40,
top_p=0.9,
repeat_penalty=1.1,
stop=["Human:", "User:"], # Stop sequences
base_url="http://localhost:11434" # Custom endpoint
)
Batch Processing
from langchain_ollama import ChatOllama
llm = ChatOllama(model="llama3.2")
# Process multiple prompts
prompts = [
"What is Python?",
"What is JavaScript?",
"What is Rust?"
]
# Batch invoke
responses = llm.batch(prompts)
for prompt, response in zip(prompts, responses):
print(f"Q: {prompt}")
print(f"A: {response.content[:100]}...")
print()
Caching Responses
from langchain_ollama import ChatOllama
from langchain.cache import InMemoryCache
from langchain.globals import set_llm_cache
# Enable caching
set_llm_cache(InMemoryCache())
llm = ChatOllama(model="llama3.2")
# First call - hits the model
response1 = llm.invoke("What is 2+2?")
# Second call - returns cached result instantly
response2 = llm.invoke("What is 2+2?")
Key Points
| Feature | LangChain Class |
|---|---|
| Chat model | ChatOllama |
| Embeddings | OllamaEmbeddings |
| Package | langchain-ollama |
| Format | format="json" for JSON output |
LangChain abstracts away the complexity of building LLM applications. In the next lesson, we'll use LangGraph for stateful, multi-step workflows. :::