Guardrails AI Framework
Hub Validators
3 min read
Guardrails Hub provides pre-built validators for common validation needs. This lesson covers the most useful Hub validators and how to create custom validators.
Installing Hub Validators
# Install individual validators
guardrails hub install hub://guardrails/valid_length
guardrails hub install hub://guardrails/regex_match
guardrails hub install hub://guardrails/toxic_language
guardrails hub install hub://guardrails/detect_pii
guardrails hub install hub://guardrails/reading_level
guardrails hub install hub://guardrails/valid_choices
guardrails hub install hub://guardrails/similar_to_document
# List installed validators
guardrails hub list
Text Validation Validators
ValidLength
from guardrails.hub import ValidLength
# Basic length validation
validator = ValidLength(min=10, max=500, on_fail="fix")
# With character or word count
validator = ValidLength(
min=50,
max=200,
unit="words", # "characters" or "words"
on_fail="reask"
)
RegexMatch
from guardrails.hub import RegexMatch
# Email validation
email_validator = RegexMatch(
regex=r"^[\w\.-]+@[\w\.-]+\.\w+$",
match_type="fullmatch",
on_fail="exception"
)
# Phone number
phone_validator = RegexMatch(
regex=r"^\+?1?\d{9,15}$",
on_fail="fix"
)
# Custom format
order_id_validator = RegexMatch(
regex=r"^ORD-[A-Z]{2}-\d{6}$",
on_fail="fix"
)
ValidChoices
from guardrails.hub import ValidChoices
# Enum-like validation
sentiment_validator = ValidChoices(
choices=["positive", "negative", "neutral"],
on_fail="fix"
)
# Case insensitive
category_validator = ValidChoices(
choices=["Technology", "Finance", "Healthcare", "Retail"],
case_sensitive=False,
on_fail="reask"
)
Content Safety Validators
ToxicLanguage
from guardrails.hub import ToxicLanguage
# Basic toxicity check
toxicity_validator = ToxicLanguage(
threshold=0.5, # Score threshold (0-1)
on_fail="exception"
)
# Stricter for customer-facing
strict_validator = ToxicLanguage(
threshold=0.2, # Lower = stricter
validation_method="sentence", # Check each sentence
on_fail="exception"
)
DetectPII
from guardrails.hub import DetectPII
# Block PII in outputs
pii_validator = DetectPII(
pii_entities=[
"EMAIL_ADDRESS",
"PHONE_NUMBER",
"CREDIT_CARD",
"SSN",
"PERSON"
],
on_fail="fix" # Attempts to redact
)
# Anonymous mode - mask instead of block
mask_validator = DetectPII(
pii_entities=["EMAIL_ADDRESS", "PHONE_NUMBER"],
on_fail="fix",
redact_with="[REDACTED]"
)
Quality Validators
ReadingLevel
from guardrails.hub import ReadingLevel
# Ensure accessible content
reading_validator = ReadingLevel(
level="8th grade", # Flesch-Kincaid grade level
on_fail="reask"
)
# For technical documentation
tech_docs_validator = ReadingLevel(
level="12th grade",
on_fail="fix"
)
SimilarToDocument
from guardrails.hub import SimilarToDocument
# Ensure response is grounded
grounding_validator = SimilarToDocument(
document="Reference text or knowledge base content here...",
threshold=0.7, # Similarity threshold
on_fail="reask"
)
# For RAG applications
rag_validator = SimilarToDocument(
document=retrieved_context,
threshold=0.6,
on_fail="exception"
)
Combining Validators
from guardrails import Guard
from guardrails.hub import (
ValidLength,
ToxicLanguage,
DetectPII,
ReadingLevel,
ValidChoices
)
from pydantic import BaseModel, Field
class CustomerSupportResponse(BaseModel):
"""Complete response schema with multiple validators."""
greeting: str = Field(
description="Professional greeting",
json_schema_extra={
"validators": [
ValidLength(min=5, max=50, on_fail="fix"),
ToxicLanguage(threshold=0.3, on_fail="exception")
]
}
)
main_response: str = Field(
description="Main response content",
json_schema_extra={
"validators": [
ValidLength(min=50, max=500, on_fail="reask"),
ToxicLanguage(threshold=0.2, on_fail="exception"),
DetectPII(
pii_entities=["CREDIT_CARD", "SSN"],
on_fail="fix"
),
ReadingLevel(level="8th grade", on_fail="fix")
]
}
)
category: str = Field(
description="Response category",
json_schema_extra={
"validators": [
ValidChoices(
choices=["technical", "billing", "general"],
on_fail="fix"
)
]
}
)
guard = Guard.for_pydantic(CustomerSupportResponse)
Creating Custom Validators
from guardrails.validators import (
Validator,
register_validator,
ValidationResult,
PassResult,
FailResult
)
from typing import Any, Dict
@register_validator(name="no_competitor_mentions", data_type="string")
class NoCompetitorMentions(Validator):
"""Custom validator to block competitor mentions."""
def __init__(
self,
competitors: list[str],
on_fail: str = "exception"
):
super().__init__(on_fail=on_fail)
self.competitors = [c.lower() for c in competitors]
def validate(
self,
value: str,
metadata: Dict[str, Any] = {}
) -> ValidationResult:
"""Check for competitor mentions."""
value_lower = value.lower()
for competitor in self.competitors:
if competitor in value_lower:
return FailResult(
error_message=f"Response mentions competitor: {competitor}",
fix_value=self._remove_mention(value, competitor)
)
return PassResult()
def _remove_mention(self, text: str, competitor: str) -> str:
"""Remove competitor mention from text."""
import re
pattern = re.compile(re.escape(competitor), re.IGNORECASE)
return pattern.sub("[COMPETITOR]", text)
# Usage
from pydantic import BaseModel, Field
class BrandSafeResponse(BaseModel):
response: str = Field(
json_schema_extra={
"validators": [
NoCompetitorMentions(
competitors=["CompetitorA", "CompetitorB"],
on_fail="fix"
)
]
}
)
ML-Based Custom Validator
from guardrails.validators import Validator, register_validator
from transformers import pipeline
@register_validator(name="sentiment_check", data_type="string")
class SentimentCheck(Validator):
"""Ensure response has appropriate sentiment."""
def __init__(
self,
required_sentiment: str = "positive",
threshold: float = 0.7,
on_fail: str = "reask"
):
super().__init__(on_fail=on_fail)
self.required_sentiment = required_sentiment
self.threshold = threshold
self._classifier = None
@property
def classifier(self):
if self._classifier is None:
self._classifier = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english"
)
return self._classifier
def validate(self, value: str, metadata: dict = {}):
result = self.classifier(value[:512])[0] # Truncate for model
if result["label"].lower() == self.required_sentiment:
if result["score"] >= self.threshold:
return PassResult()
return FailResult(
error_message=f"Response sentiment is {result['label']} "
f"(score: {result['score']:.2f}), "
f"expected {self.required_sentiment}"
)
# Usage
class PositiveResponse(BaseModel):
message: str = Field(
json_schema_extra={
"validators": [
SentimentCheck(
required_sentiment="positive",
threshold=0.8,
on_fail="reask"
)
]
}
)
Validator Execution Order
from guardrails import Guard
from guardrails.hub import ValidLength, ToxicLanguage, DetectPII
# Validators execute in order defined
guard = Guard().use_many(
ValidLength(min=10, on_fail="reask"), # 1st: Check length
ToxicLanguage(on_fail="exception"), # 2nd: Check toxicity
DetectPII(on_fail="fix") # 3rd: Mask PII
)
# Short-circuit on failure
# If ValidLength fails with "exception", subsequent validators don't run
Validator Strategy: Order validators from cheapest to most expensive. Put fast regex checks first, ML-based validators last. Use "exception" for critical safety checks to fail fast.
Next: LiteLLM integration for multi-provider support. :::