Advanced NeMo Guardrails

Custom Rails Implementation

3 min read

Building custom rails allows you to enforce domain-specific policies that generic guardrails don't cover. This lesson covers implementing custom input, output, and dialog rails.

Custom Input Rail

Python Action Implementation

# actions/input_safety.py
from nemoguardrails.actions import action
from nemoguardrails.actions.actions import ActionResult
import re

@action(name="check_financial_advice")
async def check_financial_advice(user_message: str) -> ActionResult:
    """
    Check if user is asking for financial advice.
    Block requests that could be interpreted as seeking
    personalized investment recommendations.
    """
    financial_patterns = [
        r"should i (buy|sell|invest)",
        r"what stock",
        r"investment advice",
        r"financial recommendation",
        r"is .* a good investment",
    ]

    for pattern in financial_patterns:
        if re.search(pattern, user_message.lower()):
            return ActionResult(
                return_value=False,
                context_updates={"blocked_reason": "financial_advice"}
            )

    return ActionResult(return_value=True)


@action(name="check_competitor_mention")
async def check_competitor_mention(user_message: str) -> ActionResult:
    """Block discussions about competitors."""
    competitors = ["competitor_a", "competitor_b", "rival_corp"]

    message_lower = user_message.lower()
    for competitor in competitors:
        if competitor in message_lower:
            return ActionResult(
                return_value=False,
                context_updates={"blocked_reason": "competitor_mention"}
            )

    return ActionResult(return_value=True)


@action(name="extract_intent_entities")
async def extract_intent_entities(user_message: str) -> dict:
    """Extract entities and intent from user message."""
    # In production, use NER or intent classifier
    entities = {
        "product_mentioned": None,
        "action_requested": None,
        "urgency": "normal"
    }

    if "urgent" in user_message.lower():
        entities["urgency"] = "high"

    return ActionResult(return_value=entities)

Colang Flow

# rails/custom_input.co
define flow custom input checks
  # Run all custom input validations
  $financial_ok = execute check_financial_advice(user_message=$user_message)

  if not $financial_ok
    bot decline financial advice
    stop

  $competitor_ok = execute check_competitor_mention(user_message=$user_message)

  if not $competitor_ok
    bot redirect away from competitor
    stop

  # Extract entities for downstream use
  $entities = execute extract_intent_entities(user_message=$user_message)
  # Store in context for later flows

define bot decline financial advice
  "I'm not able to provide personalized financial advice. For investment decisions, please consult a licensed financial advisor."

define bot redirect away from competitor
  "I'd be happy to focus on how our solutions can help you. What specific needs are you looking to address?"

Custom Output Rail

Fact-Checking Action

# actions/output_safety.py
from nemoguardrails.actions import action
import httpx

@action(name="verify_factual_claims")
async def verify_factual_claims(bot_message: str) -> dict:
    """
    Verify factual claims in the response.
    Returns verification result with flagged claims.
    """
    # Extract claims (in production, use NLP)
    claims = extract_claims(bot_message)

    results = {
        "verified": True,
        "flagged_claims": [],
        "confidence": 1.0
    }

    for claim in claims:
        # Check against knowledge base
        verified = await check_against_kb(claim)
        if not verified:
            results["flagged_claims"].append(claim)
            results["verified"] = False
            results["confidence"] *= 0.5

    return results


@action(name="check_response_tone")
async def check_response_tone(bot_message: str) -> dict:
    """Ensure response maintains professional tone."""
    informal_patterns = [
        r"lol", r"omg", r"gonna", r"wanna",
        r"!!+", r"\?\?+", r"😀|😊|🙂"  # Emoji check
    ]

    issues = []
    for pattern in informal_patterns:
        if re.search(pattern, bot_message.lower()):
            issues.append(f"Informal pattern: {pattern}")

    return {
        "professional": len(issues) == 0,
        "issues": issues
    }


@action(name="sanitize_output")
async def sanitize_output(bot_message: str) -> str:
    """Remove or mask sensitive information from response."""
    import re

    # Mask any leaked internal references
    sanitized = re.sub(
        r'internal[_-]?id[:\s]*\w+',
        '[INTERNAL_REF_REMOVED]',
        bot_message,
        flags=re.IGNORECASE
    )

    # Remove debug information
    sanitized = re.sub(
        r'\[DEBUG:.*?\]',
        '',
        sanitized
    )

    return sanitized

Output Verification Flow

# rails/custom_output.co
define flow custom output checks
  # Verify factual accuracy
  $fact_check = execute verify_factual_claims(bot_message=$bot_message)

  if not $fact_check.verified
    # Regenerate with caveats
    $bot_message = execute add_uncertainty_caveats(
      message=$bot_message,
      claims=$fact_check.flagged_claims
    )

  # Check professional tone
  $tone_check = execute check_response_tone(bot_message=$bot_message)

  if not $tone_check.professional
    $bot_message = execute rephrase_professionally(message=$bot_message)

  # Final sanitization
  $bot_message = execute sanitize_output(bot_message=$bot_message)

define flow add uncertainty caveats
  """Add hedging language for unverified claims."""
  bot say "Based on my understanding, though I'd recommend verifying: "
  bot say $bot_message

Custom Dialog Rail

Conversation Guardrails

# actions/dialog_controls.py
from nemoguardrails.actions import action
from datetime import datetime, timedelta

# Track conversation state
conversation_state = {}

@action(name="check_rate_limit")
async def check_rate_limit(user_id: str) -> bool:
    """Rate limit users to prevent abuse."""
    now = datetime.now()
    window = timedelta(minutes=1)

    if user_id not in conversation_state:
        conversation_state[user_id] = {"messages": [], "warnings": 0}

    state = conversation_state[user_id]

    # Remove old messages
    state["messages"] = [
        ts for ts in state["messages"]
        if now - ts < window
    ]

    # Check limit (10 messages per minute)
    if len(state["messages"]) >= 10:
        state["warnings"] += 1
        return False

    state["messages"].append(now)
    return True


@action(name="check_topic_allowed")
async def check_topic_allowed(
    topic: str,
    user_tier: str = "free"
) -> bool:
    """Check if user tier allows access to topic."""
    topic_permissions = {
        "free": ["general", "faq", "basic_support"],
        "pro": ["general", "faq", "basic_support", "advanced_support"],
        "enterprise": ["*"]  # All topics
    }

    allowed = topic_permissions.get(user_tier, [])
    return "*" in allowed or topic in allowed


@action(name="track_conversation_metrics")
async def track_conversation_metrics(
    user_id: str,
    message: str,
    response: str
) -> None:
    """Track metrics for analytics."""
    # Send to analytics system
    await log_to_analytics({
        "user_id": user_id,
        "message_length": len(message),
        "response_length": len(response),
        "timestamp": datetime.now().isoformat()
    })

Dialog Control Flow

# rails/custom_dialog.co
define flow controlled conversation
  # Rate limiting
  $rate_ok = execute check_rate_limit(user_id=$user_id)

  if not $rate_ok
    bot rate limit warning
    stop

  # Topic access control
  $topic = execute classify_topic(text=$user_message)
  $user_tier = execute get_user_tier(user_id=$user_id)
  $topic_allowed = execute check_topic_allowed(
    topic=$topic,
    user_tier=$user_tier
  )

  if not $topic_allowed
    bot suggest upgrade
    stop

  # Proceed with normal flow
  # ... rest of conversation logic

define bot rate limit warning
  "You're sending messages too quickly. Please wait a moment before continuing."

define bot suggest upgrade
  "This topic is available with our Pro plan. Would you like to learn about upgrading?"

Integrating Custom Rails

Configuration

# config/config.yml
models:
  - type: main
    engine: openai
    model: gpt-4o

rails:
  input:
    flows:
      - self check input
      - custom input checks    # Our custom rail

  output:
    flows:
      - self check output
      - custom output checks   # Our custom rail

  dialog:
    flows:
      - controlled conversation  # Our custom dialog rail

# Register action modules
actions:
  - actions.input_safety
  - actions.output_safety
  - actions.dialog_controls

Main Application

# main.py
from nemoguardrails import LLMRails, RailsConfig

# Import custom actions to register them
import actions.input_safety
import actions.output_safety
import actions.dialog_controls

config = RailsConfig.from_path("./config")
rails = LLMRails(config)

async def process_message(user_id: str, message: str) -> str:
    """Process user message with all custom rails."""
    response = await rails.generate_async(
        messages=[{"role": "user", "content": message}],
        context={"user_id": user_id}
    )

    # Track metrics
    await actions.dialog_controls.track_conversation_metrics(
        user_id=user_id,
        message=message,
        response=response["content"]
    )

    return response["content"]

Implementation Tip: Keep actions focused and testable. Each action should do one thing well. Compose complex behavior in Colang flows rather than in Python actions.

Next: Implementing RAG retrieval rails for knowledge-grounded responses. :::

Quiz

Module 4: Advanced NeMo Guardrails

Take Quiz