Function Calling Locally

Function calling (tool use) allows LLMs to interact with external systems. Let's implement this pattern with local models.

How Local Function Calling Works

┌─────────────────────────────────────────────────────────────────┐
│                  Local Function Calling Flow                     │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  1. User Query ──► LLM analyzes and decides to call a function  │
│                                                                 │
│  2. LLM Output ──► JSON with function name and arguments        │
│                                                                 │
│  3. Your Code ──► Parses JSON, executes the function            │
│                                                                 │
│  4. Result ──► Fed back to LLM for final response               │
│                                                                 │
│  Note: Local models output text. You parse it as function calls │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

Basic Implementation

import ollama
import json
from typing import Callable

# Define available functions
def get_weather(city: str) -> str:
    """Get weather for a city (mock implementation)."""
    weather_data = {
        "paris": "Sunny, 22°C",
        "london": "Cloudy, 15°C",
        "tokyo": "Rainy, 18°C"
    }
    return weather_data.get(city.lower(), f"Weather data not available for {city}")

def calculate(expression: str) -> str:
    """Safely evaluate a math expression."""
    try:
        # Only allow safe math operations
        allowed = set("0123456789+-*/().  ")
        if all(c in allowed for c in expression):
            result = eval(expression)
            return str(result)
        return "Invalid expression"
    except Exception as e:
        return f"Error: {e}"

# Function registry
FUNCTIONS = {
    "get_weather": get_weather,
    "calculate": calculate
}

# System prompt for function calling
SYSTEM_PROMPT = """You are a helpful assistant with access to these functions:

1. get_weather(city: str) - Get current weather for a city
2. calculate(expression: str) - Calculate a math expression

When you need to use a function, respond ONLY with JSON in this format:
{"function": "function_name", "arguments": {"arg_name": "value"}}

If you don't need a function, respond normally.
"""

def process_with_functions(user_query: str) -> str:
    """Process a query with function calling support."""
    # First LLM call - decide if function is needed
    response = ollama.chat(
        model="llama3.2",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_query}
        ]
    )

    content = response["message"]["content"].strip()

    # Try to parse as function call
    try:
        if content.startswith("{"):
            call = json.loads(content)
            if "function" in call:
                func_name = call["function"]
                args = call.get("arguments", {})

                if func_name in FUNCTIONS:
                    # Execute the function
                    result = FUNCTIONS[func_name](**args)

                    # Second LLM call - generate final response
                    response = ollama.chat(
                        model="llama3.2",
                        messages=[
                            {"role": "system", "content": SYSTEM_PROMPT},
                            {"role": "user", "content": user_query},
                            {"role": "assistant", "content": content},
                            {"role": "user", "content": f"Function result: {result}"}
                        ]
                    )
                    return response["message"]["content"]
    except json.JSONDecodeError:
        pass

    return content

# Test
print(process_with_functions("What's the weather in Paris?"))
print(process_with_functions("Calculate 15 * 7 + 23"))
print(process_with_functions("Hello, how are you?"))

Robust Function Parser

import ollama
import json
import re

def extract_function_call(text: str) -> dict | None:
    """Extract function call from LLM output, handling various formats."""
    # Try direct JSON parsing
    try:
        # Find JSON object in the text
        json_match = re.search(r'\{[^{}]*\}', text)
        if json_match:
            return json.loads(json_match.group())
    except json.JSONDecodeError:
        pass

    # Try parsing structured text
    patterns = [
        r'function:\s*(\w+)\s*arguments?:\s*(.+)',
        r'call\s+(\w+)\s+with\s+(.+)',
    ]

    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            return {
                "function": match.group(1),
                "arguments": {"input": match.group(2).strip()}
            }

    return None

LangChain Tool Integration

from langchain_ollama import ChatOllama
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, ToolMessage

@tool
def search_database(query: str) -> str:
    """Search the internal database for information."""
    # Mock database search
    data = {
        "revenue": "Q4 2024 revenue was $1.2M",
        "employees": "Current employee count is 45",
        "products": "We offer 3 main products"
    }
    for key, value in data.items():
        if key in query.lower():
            return value
    return "No matching data found"

@tool
def send_email(to: str, subject: str, body: str) -> str:
    """Send an email to the specified recipient."""
    # Mock email sending
    return f"Email sent to {to} with subject: {subject}"

# Create LLM with tools
llm = ChatOllama(model="llama3.2")

# Bind tools (for models that support native tool calling)
llm_with_tools = llm.bind_tools([search_database, send_email])

# For models without native tool calling, use prompt-based approach
tools_description = """
Available tools:
1. search_database(query: str) - Search internal database
2. send_email(to: str, subject: str, body: str) - Send email

Respond with JSON: {"tool": "name", "args": {...}} when using a tool.
"""

def agent_step(user_input: str):
    """Single agent step with tool use."""
    response = llm.invoke(
        f"{tools_description}\n\nUser: {user_input}"
    )
    return response.content

Multi-Turn Tool Agent

import ollama
import json

class LocalToolAgent:
    """Agent that can use tools across multiple turns."""

    def __init__(self, model: str = "llama3.2"):
        self.model = model
        self.tools = {}
        self.conversation = []

    def register_tool(self, name: str, func: callable, description: str):
        """Register a tool for the agent to use."""
        self.tools[name] = {"function": func, "description": description}

    def _get_tools_prompt(self) -> str:
        """Generate tools description for the prompt."""
        lines = ["Available tools:"]
        for name, info in self.tools.items():
            lines.append(f"- {name}: {info['description']}")
        lines.append("")
        lines.append('To use a tool, respond with: {"tool": "name", "args": {...}}')
        lines.append("After using a tool, you'll see the result and can respond to the user.")
        return "\n".join(lines)

    def run(self, user_input: str) -> str:
        """Process user input, potentially using tools."""
        self.conversation.append({"role": "user", "content": user_input})

        # Build messages
        messages = [
            {"role": "system", "content": self._get_tools_prompt()}
        ] + self.conversation

        response = ollama.chat(model=self.model, messages=messages)
        content = response["message"]["content"]

        # Check for tool call
        tool_call = self._parse_tool_call(content)
        if tool_call:
            # Execute tool
            tool_name = tool_call["tool"]
            args = tool_call.get("args", {})

            if tool_name in self.tools:
                result = self.tools[tool_name]["function"](**args)

                # Add to conversation
                self.conversation.append({"role": "assistant", "content": content})
                self.conversation.append({
                    "role": "user",
                    "content": f"Tool result for {tool_name}: {result}"
                })

                # Get final response
                final_response = ollama.chat(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": self._get_tools_prompt()}
                    ] + self.conversation
                )
                final_content = final_response["message"]["content"]
                self.conversation.append({"role": "assistant", "content": final_content})
                return final_content

        self.conversation.append({"role": "assistant", "content": content})
        return content

    def _parse_tool_call(self, text: str) -> dict | None:
        """Parse tool call from response."""
        try:
            match = re.search(r'\{[^{}]*"tool"[^{}]*\}', text)
            if match:
                return json.loads(match.group())
        except:
            pass
        return None

# Usage
agent = LocalToolAgent()

agent.register_tool(
    "get_time",
    lambda: __import__("datetime").datetime.now().strftime("%H:%M"),
    "Get the current time"
)

agent.register_tool(
    "search",
    lambda query: f"Search results for '{query}': [Result 1, Result 2]",
    "Search for information"
)

print(agent.run("What time is it?"))
print(agent.run("Search for Python tutorials"))

Best Practices

Practice	Why
Clear tool descriptions	Helps LLM understand when to use each tool
Validate arguments	LLM outputs can be malformed
Handle errors gracefully	Tools can fail
Limit tool count	Too many tools confuses smaller models
Use JSON format	Most parseable format for function calls

Function calling extends local LLMs beyond text generation. Next, we'll explore multi-model workflows. :::