Hub Validators

Guardrails Hub provides pre-built validators for common validation needs. This lesson covers the most useful Hub validators and how to create custom validators.

Installing Hub Validators

# Install individual validators
guardrails hub install hub://guardrails/valid_length
guardrails hub install hub://guardrails/regex_match
guardrails hub install hub://guardrails/toxic_language
guardrails hub install hub://guardrails/detect_pii
guardrails hub install hub://guardrails/reading_level
guardrails hub install hub://guardrails/valid_choices
guardrails hub install hub://guardrails/similar_to_document

# List installed validators
guardrails hub list

Text Validation Validators

ValidLength

from guardrails.hub import ValidLength

# Basic length validation
validator = ValidLength(min=10, max=500, on_fail="fix")

# With character or word count
validator = ValidLength(
    min=50,
    max=200,
    unit="words",  # "characters" or "words"
    on_fail="reask"
)

RegexMatch

from guardrails.hub import RegexMatch

# Email validation
email_validator = RegexMatch(
    regex=r"^[\w\.-]+@[\w\.-]+\.\w+$",
    match_type="fullmatch",
    on_fail="exception"
)

# Phone number
phone_validator = RegexMatch(
    regex=r"^\+?1?\d{9,15}$",
    on_fail="fix"
)

# Custom format
order_id_validator = RegexMatch(
    regex=r"^ORD-[A-Z]{2}-\d{6}$",
    on_fail="fix"
)

ValidChoices

from guardrails.hub import ValidChoices

# Enum-like validation
sentiment_validator = ValidChoices(
    choices=["positive", "negative", "neutral"],
    on_fail="fix"
)

# Case insensitive
category_validator = ValidChoices(
    choices=["Technology", "Finance", "Healthcare", "Retail"],
    case_sensitive=False,
    on_fail="reask"
)

Content Safety Validators

ToxicLanguage

from guardrails.hub import ToxicLanguage

# Basic toxicity check
toxicity_validator = ToxicLanguage(
    threshold=0.5,  # Score threshold (0-1)
    on_fail="exception"
)

# Stricter for customer-facing
strict_validator = ToxicLanguage(
    threshold=0.2,  # Lower = stricter
    validation_method="sentence",  # Check each sentence
    on_fail="exception"
)

DetectPII

from guardrails.hub import DetectPII

# Block PII in outputs
pii_validator = DetectPII(
    pii_entities=[
        "EMAIL_ADDRESS",
        "PHONE_NUMBER",
        "CREDIT_CARD",
        "SSN",
        "PERSON"
    ],
    on_fail="fix"  # Attempts to redact
)

# Anonymous mode - mask instead of block
mask_validator = DetectPII(
    pii_entities=["EMAIL_ADDRESS", "PHONE_NUMBER"],
    on_fail="fix",
    redact_with="[REDACTED]"
)

Quality Validators

ReadingLevel

from guardrails.hub import ReadingLevel

# Ensure accessible content
reading_validator = ReadingLevel(
    level="8th grade",  # Flesch-Kincaid grade level
    on_fail="reask"
)

# For technical documentation
tech_docs_validator = ReadingLevel(
    level="12th grade",
    on_fail="fix"
)

SimilarToDocument

from guardrails.hub import SimilarToDocument

# Ensure response is grounded
grounding_validator = SimilarToDocument(
    document="Reference text or knowledge base content here...",
    threshold=0.7,  # Similarity threshold
    on_fail="reask"
)

# For RAG applications
rag_validator = SimilarToDocument(
    document=retrieved_context,
    threshold=0.6,
    on_fail="exception"
)

Combining Validators

from guardrails import Guard
from guardrails.hub import (
    ValidLength,
    ToxicLanguage,
    DetectPII,
    ReadingLevel,
    ValidChoices
)
from pydantic import BaseModel, Field

class CustomerSupportResponse(BaseModel):
    """Complete response schema with multiple validators."""

    greeting: str = Field(
        description="Professional greeting",
        json_schema_extra={
            "validators": [
                ValidLength(min=5, max=50, on_fail="fix"),
                ToxicLanguage(threshold=0.3, on_fail="exception")
            ]
        }
    )

    main_response: str = Field(
        description="Main response content",
        json_schema_extra={
            "validators": [
                ValidLength(min=50, max=500, on_fail="reask"),
                ToxicLanguage(threshold=0.2, on_fail="exception"),
                DetectPII(
                    pii_entities=["CREDIT_CARD", "SSN"],
                    on_fail="fix"
                ),
                ReadingLevel(level="8th grade", on_fail="fix")
            ]
        }
    )

    category: str = Field(
        description="Response category",
        json_schema_extra={
            "validators": [
                ValidChoices(
                    choices=["technical", "billing", "general"],
                    on_fail="fix"
                )
            ]
        }
    )

guard = Guard.for_pydantic(CustomerSupportResponse)

Creating Custom Validators

from guardrails.validators import (
    Validator,
    register_validator,
    ValidationResult,
    PassResult,
    FailResult
)
from typing import Any, Dict

@register_validator(name="no_competitor_mentions", data_type="string")
class NoCompetitorMentions(Validator):
    """Custom validator to block competitor mentions."""

    def __init__(
        self,
        competitors: list[str],
        on_fail: str = "exception"
    ):
        super().__init__(on_fail=on_fail)
        self.competitors = [c.lower() for c in competitors]

    def validate(
        self,
        value: str,
        metadata: Dict[str, Any] = {}
    ) -> ValidationResult:
        """Check for competitor mentions."""
        value_lower = value.lower()

        for competitor in self.competitors:
            if competitor in value_lower:
                return FailResult(
                    error_message=f"Response mentions competitor: {competitor}",
                    fix_value=self._remove_mention(value, competitor)
                )

        return PassResult()

    def _remove_mention(self, text: str, competitor: str) -> str:
        """Remove competitor mention from text."""
        import re
        pattern = re.compile(re.escape(competitor), re.IGNORECASE)
        return pattern.sub("[COMPETITOR]", text)

# Usage
from pydantic import BaseModel, Field

class BrandSafeResponse(BaseModel):
    response: str = Field(
        json_schema_extra={
            "validators": [
                NoCompetitorMentions(
                    competitors=["CompetitorA", "CompetitorB"],
                    on_fail="fix"
                )
            ]
        }
    )

ML-Based Custom Validator

from guardrails.validators import Validator, register_validator
from transformers import pipeline

@register_validator(name="sentiment_check", data_type="string")
class SentimentCheck(Validator):
    """Ensure response has appropriate sentiment."""

    def __init__(
        self,
        required_sentiment: str = "positive",
        threshold: float = 0.7,
        on_fail: str = "reask"
    ):
        super().__init__(on_fail=on_fail)
        self.required_sentiment = required_sentiment
        self.threshold = threshold
        self._classifier = None

    @property
    def classifier(self):
        if self._classifier is None:
            self._classifier = pipeline(
                "sentiment-analysis",
                model="distilbert-base-uncased-finetuned-sst-2-english"
            )
        return self._classifier

    def validate(self, value: str, metadata: dict = {}):
        result = self.classifier(value[:512])[0]  # Truncate for model

        if result["label"].lower() == self.required_sentiment:
            if result["score"] >= self.threshold:
                return PassResult()

        return FailResult(
            error_message=f"Response sentiment is {result['label']} "
                         f"(score: {result['score']:.2f}), "
                         f"expected {self.required_sentiment}"
        )

# Usage
class PositiveResponse(BaseModel):
    message: str = Field(
        json_schema_extra={
            "validators": [
                SentimentCheck(
                    required_sentiment="positive",
                    threshold=0.8,
                    on_fail="reask"
                )
            ]
        }
    )

Validator Execution Order

from guardrails import Guard
from guardrails.hub import ValidLength, ToxicLanguage, DetectPII

# Validators execute in order defined
guard = Guard().use_many(
    ValidLength(min=10, on_fail="reask"),    # 1st: Check length
    ToxicLanguage(on_fail="exception"),       # 2nd: Check toxicity
    DetectPII(on_fail="fix")                  # 3rd: Mask PII
)

# Short-circuit on failure
# If ValidLength fails with "exception", subsequent validators don't run

Validator Strategy: Order validators from cheapest to most expensive. Put fast regex checks first, ML-based validators last. Use "exception" for critical safety checks to fail fast.

Next: LiteLLM integration for multi-provider support. :::