Input Filtering at Scale

Building Custom Input Validators

3 min read

Generic guardrails don't cover domain-specific requirements. This lesson shows how to build custom validators for your application's unique safety and business rules.

Validator Design Pattern

from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Optional, List

class ValidationResult(Enum):
    PASS = "pass"
    FAIL = "fail"
    WARN = "warn"

@dataclass
class ValidatorOutput:
    result: ValidationResult
    message: str
    details: Optional[dict] = None

class InputValidator(ABC):
    """Base class for custom input validators."""

    @property
    @abstractmethod
    def name(self) -> str:
        """Validator identifier."""
        pass

    @abstractmethod
    def validate(self, text: str, context: dict = None) -> ValidatorOutput:
        """Validate input text."""
        pass

class ValidatorChain:
    """Chain multiple validators together."""

    def __init__(self, validators: List[InputValidator]):
        self.validators = validators

    def validate(self, text: str, context: dict = None) -> List[ValidatorOutput]:
        """Run all validators and return results."""
        return [v.validate(text, context) for v in self.validators]

    def validate_all_pass(self, text: str, context: dict = None) -> bool:
        """Return True only if all validators pass."""
        results = self.validate(text, context)
        return all(r.result == ValidationResult.PASS for r in results)

Domain-Specific Validators

Financial Services

import re
from typing import List

class FinancialAdviceValidator(InputValidator):
    """Prevent requests for specific financial advice."""

    @property
    def name(self) -> str:
        return "financial_advice"

    PROHIBITED_PATTERNS = [
        r"should i (buy|sell|invest)",
        r"which (stock|crypto|fund) (should|to)",
        r"(buy|sell) (now|today)",
        r"guarantee.*return",
        r"risk.free.*investment",
    ]

    def validate(self, text: str, context: dict = None) -> ValidatorOutput:
        text_lower = text.lower()

        for pattern in self.PROHIBITED_PATTERNS:
            if re.search(pattern, text_lower):
                return ValidatorOutput(
                    result=ValidationResult.FAIL,
                    message="Request appears to seek specific financial advice",
                    details={"matched_pattern": pattern}
                )

        return ValidatorOutput(
            result=ValidationResult.PASS,
            message="No financial advice patterns detected"
        )

class TransactionAmountValidator(InputValidator):
    """Validate transaction amount requests."""

    @property
    def name(self) -> str:
        return "transaction_amount"

    def __init__(self, max_amount: float = 10000):
        self.max_amount = max_amount

    def validate(self, text: str, context: dict = None) -> ValidatorOutput:
        # Extract dollar amounts
        amounts = re.findall(r'\$[\d,]+\.?\d*', text)
        amounts += re.findall(r'(\d+(?:,\d{3})*(?:\.\d{2})?)\s*(?:dollars?|USD)', text, re.I)

        for amount_str in amounts:
            # Parse amount
            amount_str = amount_str.replace('$', '').replace(',', '')
            try:
                amount = float(amount_str)
                if amount > self.max_amount:
                    return ValidatorOutput(
                        result=ValidationResult.FAIL,
                        message=f"Transaction amount ${amount:,.2f} exceeds limit ${self.max_amount:,.2f}",
                        details={"amount": amount, "limit": self.max_amount}
                    )
            except ValueError:
                continue

        return ValidatorOutput(
            result=ValidationResult.PASS,
            message="Transaction amounts within limits"
        )

Healthcare

class MedicalAdviceValidator(InputValidator):
    """Prevent requests for medical diagnosis or treatment."""

    @property
    def name(self) -> str:
        return "medical_advice"

    MEDICAL_TRIGGERS = [
        r"(diagnose|diagnosis)",
        r"(prescribe|prescription)",
        r"should i (take|stop taking)",
        r"(dosage|dose) of",
        r"(cure|treat|treatment) for",
        r"is it (safe|dangerous) to",
    ]

    DISCLAIMER_REQUIRED = [
        r"symptom",
        r"medicine",
        r"medication",
        r"drug",
        r"health",
        r"medical",
    ]

    def validate(self, text: str, context: dict = None) -> ValidatorOutput:
        text_lower = text.lower()

        # Check for prohibited medical advice requests
        for pattern in self.MEDICAL_TRIGGERS:
            if re.search(pattern, text_lower):
                return ValidatorOutput(
                    result=ValidationResult.FAIL,
                    message="Request appears to seek medical diagnosis or treatment advice",
                    details={"matched_pattern": pattern}
                )

        # Flag content that should include disclaimer
        for pattern in self.DISCLAIMER_REQUIRED:
            if re.search(pattern, text_lower):
                return ValidatorOutput(
                    result=ValidationResult.WARN,
                    message="Medical topics detected - response should include disclaimer",
                    details={"topic": pattern}
                )

        return ValidatorOutput(
            result=ValidationResult.PASS,
            message="No medical advice patterns detected"
        )

E-commerce

class CompetitorMentionValidator(InputValidator):
    """Detect and handle competitor mentions."""

    @property
    def name(self) -> str:
        return "competitor_mention"

    def __init__(self, competitors: List[str]):
        self.competitors = [c.lower() for c in competitors]

    def validate(self, text: str, context: dict = None) -> ValidatorOutput:
        text_lower = text.lower()

        mentioned = []
        for competitor in self.competitors:
            if competitor in text_lower:
                mentioned.append(competitor)

        if mentioned:
            return ValidatorOutput(
                result=ValidationResult.WARN,
                message="Competitor mentioned - ensure neutral response",
                details={"competitors": mentioned}
            )

        return ValidatorOutput(
            result=ValidationResult.PASS,
            message="No competitor mentions"
        )

class ProductAvailabilityValidator(InputValidator):
    """Validate product inquiries against inventory."""

    @property
    def name(self) -> str:
        return "product_availability"

    def __init__(self, inventory_service):
        self.inventory = inventory_service

    def validate(self, text: str, context: dict = None) -> ValidatorOutput:
        # Extract product mentions (simplified)
        product_ids = context.get("detected_products", []) if context else []

        unavailable = []
        for product_id in product_ids:
            if not self.inventory.is_available(product_id):
                unavailable.append(product_id)

        if unavailable:
            return ValidatorOutput(
                result=ValidationResult.WARN,
                message="Some products unavailable",
                details={"unavailable_products": unavailable}
            )

        return ValidatorOutput(
            result=ValidationResult.PASS,
            message="All products available"
        )

Composing Validators for Production

class ProductionInputValidator:
    """Complete input validation for production deployment."""

    def __init__(self, domain: str):
        self.validators = self._build_chain(domain)

    def _build_chain(self, domain: str) -> ValidatorChain:
        """Build domain-specific validator chain."""
        common_validators = [
            LengthValidator(max_length=5000),
            LanguageValidator(allowed=["en", "es", "fr"]),
        ]

        domain_validators = {
            "finance": [
                FinancialAdviceValidator(),
                TransactionAmountValidator(max_amount=50000),
            ],
            "healthcare": [
                MedicalAdviceValidator(),
            ],
            "ecommerce": [
                CompetitorMentionValidator(["competitor1", "competitor2"]),
            ],
        }

        validators = common_validators + domain_validators.get(domain, [])
        return ValidatorChain(validators)

    async def validate(self, text: str, context: dict = None) -> dict:
        """Run validation and return structured result."""
        results = self.validators.validate(text, context)

        failed = [r for r in results if r.result == ValidationResult.FAIL]
        warned = [r for r in results if r.result == ValidationResult.WARN]

        return {
            "valid": len(failed) == 0,
            "blocked": len(failed) > 0,
            "warnings": [w.message for w in warned],
            "failures": [f.message for f in failed],
            "all_results": [
                {"validator": v.name, "result": r.result.value, "message": r.message}
                for v, r in zip(self.validators.validators, results)
            ]
        }

# Usage
validator = ProductionInputValidator(domain="finance")
result = await validator.validate("Should I buy Tesla stock now?")
# Returns: {"valid": False, "blocked": True, "failures": ["Request appears to seek specific financial advice"]}

Key Insight: Domain-specific validators encode your business rules in code. They complement generic guardrails by enforcing application-specific constraints that off-the-shelf tools can't provide.

Next module: Deep dive into safety classifiers like LlamaGuard 3 and ShieldGemma. :::

Quiz

Module 2: Input Filtering at Scale

Take Quiz