Guardrails AI Framework
Pydantic Schema Validation
3 min read
Guardrails AI leverages Pydantic for type-safe schema definitions. This lesson covers building complex schemas with nested objects, custom validators, and production patterns.
Basic Schema Patterns
Simple Fields
from pydantic import BaseModel, Field
from typing import Optional, List
from datetime import datetime
class SimpleResponse(BaseModel):
"""Basic schema with typed fields."""
title: str = Field(
description="Response title",
min_length=1,
max_length=100
)
content: str = Field(
description="Main response content"
)
confidence: float = Field(
description="Confidence score",
ge=0.0,
le=1.0
)
category: Optional[str] = Field(
default=None,
description="Optional category"
)
tags: List[str] = Field(
default_factory=list,
description="List of relevant tags"
)
Nested Objects
from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum
class Priority(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class ActionItem(BaseModel):
"""Nested action item."""
description: str = Field(description="What needs to be done")
assignee: Optional[str] = Field(default=None)
due_date: Optional[str] = Field(default=None)
priority: Priority = Field(default=Priority.MEDIUM)
class MeetingNotes(BaseModel):
"""Complex schema with nested objects."""
title: str = Field(description="Meeting title")
date: str = Field(description="Meeting date (YYYY-MM-DD)")
attendees: List[str] = Field(description="List of attendee names")
summary: str = Field(description="Brief meeting summary")
action_items: List[ActionItem] = Field(
description="Action items from the meeting"
)
next_meeting: Optional[str] = Field(
default=None,
description="Next meeting date if scheduled"
)
Custom Pydantic Validators
from pydantic import BaseModel, Field, field_validator, model_validator
from typing import List
import re
class CustomerTicket(BaseModel):
ticket_id: str = Field(description="Ticket ID in format TKT-XXXXXX")
customer_email: str = Field(description="Customer email address")
issue_type: str = Field(description="Type of issue")
description: str = Field(description="Issue description")
suggested_resolution: str = Field(description="Suggested resolution")
@field_validator("ticket_id")
@classmethod
def validate_ticket_id(cls, v: str) -> str:
"""Ensure ticket ID matches expected format."""
if not re.match(r"^TKT-\d{6}$", v):
raise ValueError("Ticket ID must be in format TKT-XXXXXX")
return v
@field_validator("customer_email")
@classmethod
def validate_email(cls, v: str) -> str:
"""Basic email validation."""
if "@" not in v or "." not in v.split("@")[-1]:
raise ValueError("Invalid email format")
return v.lower()
@field_validator("issue_type")
@classmethod
def validate_issue_type(cls, v: str) -> str:
"""Ensure issue type is from allowed list."""
allowed = ["billing", "technical", "account", "general"]
if v.lower() not in allowed:
raise ValueError(f"Issue type must be one of: {allowed}")
return v.lower()
@model_validator(mode="after")
def validate_resolution_length(self) -> "CustomerTicket":
"""Ensure resolution is longer than description."""
if len(self.suggested_resolution) < len(self.description) // 2:
raise ValueError("Resolution should be detailed enough")
return self
Guardrails Hub Validators
from guardrails import Guard
from guardrails.hub import (
ValidLength,
RegexMatch,
ValidChoices,
ToxicLanguage,
ReadingLevel,
SimilarToDocument
)
from pydantic import BaseModel, Field
class SupportResponse(BaseModel):
"""Schema with Hub validators."""
greeting: str = Field(
description="Professional greeting",
json_schema_extra={
"validators": [
ValidLength(min=10, max=50, on_fail="fix"),
ToxicLanguage(threshold=0.3, on_fail="exception")
]
}
)
response: str = Field(
description="Main response to customer",
json_schema_extra={
"validators": [
ValidLength(min=50, max=500, on_fail="reask"),
ReadingLevel(level="8th grade", on_fail="fix"),
ToxicLanguage(threshold=0.1, on_fail="exception")
]
}
)
tone: str = Field(
description="Response tone",
json_schema_extra={
"validators": [
ValidChoices(
choices=["professional", "friendly", "empathetic"],
on_fail="fix"
)
]
}
)
reference_number: str = Field(
description="Support reference number",
json_schema_extra={
"validators": [
RegexMatch(regex=r"^REF-[A-Z]{3}-\d{4}$", on_fail="fix")
]
}
)
Complex Nested Schemas
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from enum import Enum
class Severity(str, Enum):
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"
class Finding(BaseModel):
"""Individual analysis finding."""
title: str = Field(description="Finding title")
severity: Severity = Field(description="Severity level")
description: str = Field(description="Detailed description")
recommendation: str = Field(description="Recommended action")
affected_areas: List[str] = Field(description="Affected areas")
class RiskAssessment(BaseModel):
"""Risk score and factors."""
overall_score: float = Field(ge=0, le=10, description="Risk score 0-10")
factors: Dict[str, float] = Field(description="Individual risk factors")
mitigation_priority: str = Field(description="Priority level")
class AnalysisReport(BaseModel):
"""Complete analysis report schema."""
report_id: str = Field(description="Unique report identifier")
timestamp: str = Field(description="ISO timestamp")
subject: str = Field(description="What was analyzed")
executive_summary: str = Field(
description="Brief summary for executives",
json_schema_extra={
"validators": [ValidLength(min=100, max=300)]
}
)
findings: List[Finding] = Field(
description="List of findings",
min_length=1
)
risk_assessment: RiskAssessment = Field(
description="Overall risk assessment"
)
recommendations: List[str] = Field(
description="Prioritized recommendations",
min_length=1,
max_length=10
)
metadata: Optional[Dict[str, Any]] = Field(
default=None,
description="Additional metadata"
)
Dynamic Schema Generation
from pydantic import BaseModel, Field, create_model
from typing import Any, Dict, Type
def create_extraction_schema(
fields: Dict[str, Dict[str, Any]]
) -> Type[BaseModel]:
"""
Dynamically create Pydantic schema from field definitions.
fields format:
{
"field_name": {
"type": str | int | float | bool,
"description": "Field description",
"required": True | False,
"validators": [...] # Optional
}
}
"""
field_definitions = {}
for name, config in fields.items():
field_type = config["type"]
description = config.get("description", "")
required = config.get("required", True)
validators = config.get("validators", [])
if required:
field_definitions[name] = (
field_type,
Field(
description=description,
json_schema_extra={"validators": validators} if validators else None
)
)
else:
field_definitions[name] = (
field_type | None,
Field(
default=None,
description=description,
json_schema_extra={"validators": validators} if validators else None
)
)
return create_model("DynamicExtraction", **field_definitions)
# Usage
schema = create_extraction_schema({
"company_name": {
"type": str,
"description": "Company name",
"required": True
},
"revenue": {
"type": float,
"description": "Annual revenue in millions",
"required": False
},
"industry": {
"type": str,
"description": "Industry category",
"required": True
}
})
guard = Guard.for_pydantic(schema)
Schema Composition
from pydantic import BaseModel, Field
from typing import Union
class EmailResponse(BaseModel):
"""Email-specific response."""
subject: str = Field(description="Email subject line")
body: str = Field(description="Email body")
sign_off: str = Field(description="Email signature")
class ChatResponse(BaseModel):
"""Chat-specific response."""
message: str = Field(description="Chat message")
suggested_actions: List[str] = Field(description="Quick reply options")
class TicketResponse(BaseModel):
"""Ticket-specific response."""
ticket_update: str = Field(description="Ticket status update")
internal_notes: str = Field(description="Internal notes")
customer_message: str = Field(description="Customer-facing message")
class UnifiedResponse(BaseModel):
"""Unified response supporting multiple channels."""
channel: str = Field(description="Response channel: email, chat, or ticket")
response: Union[EmailResponse, ChatResponse, TicketResponse] = Field(
description="Channel-specific response"
)
confidence: float = Field(ge=0, le=1, description="Response confidence")
Schema Design Tip: Keep schemas focused and composable. Use inheritance and composition to build complex structures from simple, tested components.
Next: Exploring Guardrails Hub validators for common use cases. :::