Input Filtering at Scale
Building Custom Input Validators
3 min read
Generic guardrails don't cover domain-specific requirements. This lesson shows how to build custom validators for your application's unique safety and business rules.
Validator Design Pattern
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Optional, List
class ValidationResult(Enum):
PASS = "pass"
FAIL = "fail"
WARN = "warn"
@dataclass
class ValidatorOutput:
result: ValidationResult
message: str
details: Optional[dict] = None
class InputValidator(ABC):
"""Base class for custom input validators."""
@property
@abstractmethod
def name(self) -> str:
"""Validator identifier."""
pass
@abstractmethod
def validate(self, text: str, context: dict = None) -> ValidatorOutput:
"""Validate input text."""
pass
class ValidatorChain:
"""Chain multiple validators together."""
def __init__(self, validators: List[InputValidator]):
self.validators = validators
def validate(self, text: str, context: dict = None) -> List[ValidatorOutput]:
"""Run all validators and return results."""
return [v.validate(text, context) for v in self.validators]
def validate_all_pass(self, text: str, context: dict = None) -> bool:
"""Return True only if all validators pass."""
results = self.validate(text, context)
return all(r.result == ValidationResult.PASS for r in results)
Domain-Specific Validators
Financial Services
import re
from typing import List
class FinancialAdviceValidator(InputValidator):
"""Prevent requests for specific financial advice."""
@property
def name(self) -> str:
return "financial_advice"
PROHIBITED_PATTERNS = [
r"should i (buy|sell|invest)",
r"which (stock|crypto|fund) (should|to)",
r"(buy|sell) (now|today)",
r"guarantee.*return",
r"risk.free.*investment",
]
def validate(self, text: str, context: dict = None) -> ValidatorOutput:
text_lower = text.lower()
for pattern in self.PROHIBITED_PATTERNS:
if re.search(pattern, text_lower):
return ValidatorOutput(
result=ValidationResult.FAIL,
message="Request appears to seek specific financial advice",
details={"matched_pattern": pattern}
)
return ValidatorOutput(
result=ValidationResult.PASS,
message="No financial advice patterns detected"
)
class TransactionAmountValidator(InputValidator):
"""Validate transaction amount requests."""
@property
def name(self) -> str:
return "transaction_amount"
def __init__(self, max_amount: float = 10000):
self.max_amount = max_amount
def validate(self, text: str, context: dict = None) -> ValidatorOutput:
# Extract dollar amounts
amounts = re.findall(r'\$[\d,]+\.?\d*', text)
amounts += re.findall(r'(\d+(?:,\d{3})*(?:\.\d{2})?)\s*(?:dollars?|USD)', text, re.I)
for amount_str in amounts:
# Parse amount
amount_str = amount_str.replace('$', '').replace(',', '')
try:
amount = float(amount_str)
if amount > self.max_amount:
return ValidatorOutput(
result=ValidationResult.FAIL,
message=f"Transaction amount ${amount:,.2f} exceeds limit ${self.max_amount:,.2f}",
details={"amount": amount, "limit": self.max_amount}
)
except ValueError:
continue
return ValidatorOutput(
result=ValidationResult.PASS,
message="Transaction amounts within limits"
)
Healthcare
class MedicalAdviceValidator(InputValidator):
"""Prevent requests for medical diagnosis or treatment."""
@property
def name(self) -> str:
return "medical_advice"
MEDICAL_TRIGGERS = [
r"(diagnose|diagnosis)",
r"(prescribe|prescription)",
r"should i (take|stop taking)",
r"(dosage|dose) of",
r"(cure|treat|treatment) for",
r"is it (safe|dangerous) to",
]
DISCLAIMER_REQUIRED = [
r"symptom",
r"medicine",
r"medication",
r"drug",
r"health",
r"medical",
]
def validate(self, text: str, context: dict = None) -> ValidatorOutput:
text_lower = text.lower()
# Check for prohibited medical advice requests
for pattern in self.MEDICAL_TRIGGERS:
if re.search(pattern, text_lower):
return ValidatorOutput(
result=ValidationResult.FAIL,
message="Request appears to seek medical diagnosis or treatment advice",
details={"matched_pattern": pattern}
)
# Flag content that should include disclaimer
for pattern in self.DISCLAIMER_REQUIRED:
if re.search(pattern, text_lower):
return ValidatorOutput(
result=ValidationResult.WARN,
message="Medical topics detected - response should include disclaimer",
details={"topic": pattern}
)
return ValidatorOutput(
result=ValidationResult.PASS,
message="No medical advice patterns detected"
)
E-commerce
class CompetitorMentionValidator(InputValidator):
"""Detect and handle competitor mentions."""
@property
def name(self) -> str:
return "competitor_mention"
def __init__(self, competitors: List[str]):
self.competitors = [c.lower() for c in competitors]
def validate(self, text: str, context: dict = None) -> ValidatorOutput:
text_lower = text.lower()
mentioned = []
for competitor in self.competitors:
if competitor in text_lower:
mentioned.append(competitor)
if mentioned:
return ValidatorOutput(
result=ValidationResult.WARN,
message="Competitor mentioned - ensure neutral response",
details={"competitors": mentioned}
)
return ValidatorOutput(
result=ValidationResult.PASS,
message="No competitor mentions"
)
class ProductAvailabilityValidator(InputValidator):
"""Validate product inquiries against inventory."""
@property
def name(self) -> str:
return "product_availability"
def __init__(self, inventory_service):
self.inventory = inventory_service
def validate(self, text: str, context: dict = None) -> ValidatorOutput:
# Extract product mentions (simplified)
product_ids = context.get("detected_products", []) if context else []
unavailable = []
for product_id in product_ids:
if not self.inventory.is_available(product_id):
unavailable.append(product_id)
if unavailable:
return ValidatorOutput(
result=ValidationResult.WARN,
message="Some products unavailable",
details={"unavailable_products": unavailable}
)
return ValidatorOutput(
result=ValidationResult.PASS,
message="All products available"
)
Composing Validators for Production
class ProductionInputValidator:
"""Complete input validation for production deployment."""
def __init__(self, domain: str):
self.validators = self._build_chain(domain)
def _build_chain(self, domain: str) -> ValidatorChain:
"""Build domain-specific validator chain."""
common_validators = [
LengthValidator(max_length=5000),
LanguageValidator(allowed=["en", "es", "fr"]),
]
domain_validators = {
"finance": [
FinancialAdviceValidator(),
TransactionAmountValidator(max_amount=50000),
],
"healthcare": [
MedicalAdviceValidator(),
],
"ecommerce": [
CompetitorMentionValidator(["competitor1", "competitor2"]),
],
}
validators = common_validators + domain_validators.get(domain, [])
return ValidatorChain(validators)
async def validate(self, text: str, context: dict = None) -> dict:
"""Run validation and return structured result."""
results = self.validators.validate(text, context)
failed = [r for r in results if r.result == ValidationResult.FAIL]
warned = [r for r in results if r.result == ValidationResult.WARN]
return {
"valid": len(failed) == 0,
"blocked": len(failed) > 0,
"warnings": [w.message for w in warned],
"failures": [f.message for f in failed],
"all_results": [
{"validator": v.name, "result": r.result.value, "message": r.message}
for v, r in zip(self.validators.validators, results)
]
}
# Usage
validator = ProductionInputValidator(domain="finance")
result = await validator.validate("Should I buy Tesla stock now?")
# Returns: {"valid": False, "blocked": True, "failures": ["Request appears to seek specific financial advice"]}
Key Insight: Domain-specific validators encode your business rules in code. They complement generic guardrails by enforcing application-specific constraints that off-the-shelf tools can't provide.
Next module: Deep dive into safety classifiers like LlamaGuard 3 and ShieldGemma. :::