Setting Up Your Red Team Environment

Working with Garak

3 min read

Garak is NVIDIA's LLM vulnerability scanner with 100+ attack probes. Its CLI-first design makes it ideal for quick security assessments and CI/CD integration.

Installation

# Install Garak (all platforms)
# pip install garak

# Verify installation
import subprocess
import sys

def check_garak():
    """Verify Garak is installed and working."""
    try:
        result = subprocess.run(
            [sys.executable, "-m", "garak", "--version"],
            capture_output=True,
            text=True
        )
        print(f"Garak installed: {result.stdout.strip()}")
        return True
    except Exception as e:
        print(f"Garak not found: {e}")
        return False

check_garak()

CLI Quick Start

Garak's CLI is the fastest way to scan:

# Running Garak from Python (cross-platform)
import subprocess
import sys
from pathlib import Path

def run_garak_scan(
    model_type: str,
    model_name: str,
    probes: list,
    output_dir: Path
):
    """Run a Garak scan programmatically."""
    output_dir.mkdir(parents=True, exist_ok=True)

    # Build command
    cmd = [
        sys.executable, "-m", "garak",
        "--model_type", model_type,
        "--model_name", model_name,
        "--probes", ",".join(probes),
        "--report_prefix", str(output_dir / "garak_report"),
    ]

    print(f"Running: {' '.join(cmd)}")

    # Execute scan
    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode == 0:
        print("Scan completed successfully")
        print(result.stdout)
    else:
        print(f"Scan failed: {result.stderr}")

    return result

# Example: Scan OpenAI model
output = Path.cwd() / "garak_results"
run_garak_scan(
    model_type="openai",
    model_name="gpt-4",
    probes=["promptinject", "encoding", "dan"],
    output_dir=output
)

Understanding Probe Categories

Garak organizes attacks into probe categories:

from dataclasses import dataclass
from typing import List

@dataclass
class GarakProbe:
    """Information about a Garak probe."""
    name: str
    category: str
    description: str
    effectiveness: str

# Key probe categories
probes = [
    GarakProbe(
        name="promptinject",
        category="injection",
        description="Test for prompt injection vulnerabilities",
        effectiveness="High against unguarded systems"
    ),
    GarakProbe(
        name="encoding",
        category="evasion",
        description="Encoded payloads (base64, rot13, hex)",
        effectiveness="Medium, bypasses basic filters"
    ),
    GarakProbe(
        name="dan",
        category="jailbreak",
        description="DAN (Do Anything Now) jailbreak prompts",
        effectiveness="Low against modern models"
    ),
    GarakProbe(
        name="continuation",
        category="harmful",
        description="Ask model to continue harmful text",
        effectiveness="Medium to High"
    ),
    GarakProbe(
        name="realtoxicityprompts",
        category="toxic",
        description="Test for toxic output generation",
        effectiveness="High for toxicity testing"
    ),
    GarakProbe(
        name="xss",
        category="injection",
        description="Cross-site scripting in outputs",
        effectiveness="High for web applications"
    ),
]

# Display probe info
for probe in probes:
    print(f"\n{probe.name} ({probe.category})")
    print(f"  {probe.description}")
    print(f"  Effectiveness: {probe.effectiveness}")

Custom Probe Configuration

Create targeted scans with specific probes:

from pathlib import Path
import json

def create_garak_config(
    target_probes: List[str],
    attack_config: dict,
    output_file: Path
):
    """Create a Garak configuration file."""
    config = {
        "probes": target_probes,
        "generators": {
            "repetitions": attack_config.get("repetitions", 3),
        },
        "detectors": {
            "always_trigger": False,
        },
        "buffs": attack_config.get("buffs", []),
    }

    output_file.write_text(json.dumps(config, indent=2))
    print(f"Config saved to: {output_file}")
    return config

# Create focused scan config
config_path = Path.cwd() / "garak_config.json"
create_garak_config(
    target_probes=[
        "promptinject.HijackHateHumansMini",
        "encoding.InjectBase64",
        "dan.Dan_11_0",
    ],
    attack_config={
        "repetitions": 5,
        "buffs": ["lowercase", "paraphrase"],
    },
    output_file=config_path
)

Parsing Garak Results

Process scan results for reporting:

from pathlib import Path
import json
from dataclasses import dataclass
from typing import List, Optional

@dataclass
class GarakFinding:
    """Parsed Garak scan finding."""
    probe: str
    detector: str
    passed: bool
    output: str
    severity: str

def parse_garak_results(report_file: Path) -> List[GarakFinding]:
    """Parse Garak JSON report into findings."""
    if not report_file.exists():
        print(f"Report not found: {report_file}")
        return []

    with open(report_file) as f:
        data = json.load(f)

    findings = []
    for entry in data.get("results", []):
        finding = GarakFinding(
            probe=entry.get("probe", "unknown"),
            detector=entry.get("detector", "unknown"),
            passed=entry.get("passed", True),
            output=entry.get("output", "")[:200],  # Truncate
            severity="HIGH" if not entry.get("passed") else "INFO"
        )
        findings.append(finding)

    return findings

def summarize_findings(findings: List[GarakFinding]) -> dict:
    """Create summary statistics from findings."""
    total = len(findings)
    failed = sum(1 for f in findings if not f.passed)

    return {
        "total_tests": total,
        "passed": total - failed,
        "failed": failed,
        "pass_rate": (total - failed) / total if total > 0 else 0,
        "high_severity": sum(1 for f in findings if f.severity == "HIGH"),
    }

# Parse and summarize
report = Path.cwd() / "garak_results" / "garak_report.json"
findings = parse_garak_results(report)
summary = summarize_findings(findings)
print(f"Pass rate: {summary['pass_rate']:.1%}")

Integrating with CI/CD

Add Garak to your pipeline:

import sys
import subprocess
from pathlib import Path

def ci_garak_scan(
    model_type: str,
    model_name: str,
    fail_threshold: float = 0.9
) -> bool:
    """Run Garak scan in CI/CD pipeline."""
    output_dir = Path.cwd() / "security_reports"
    output_dir.mkdir(exist_ok=True)

    # Run focused security probes
    cmd = [
        sys.executable, "-m", "garak",
        "--model_type", model_type,
        "--model_name", model_name,
        "--probes", "promptinject,encoding",
        "--report_prefix", str(output_dir / "ci_scan"),
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)

    # Parse results
    report_file = output_dir / "ci_scan.report.json"
    findings = parse_garak_results(report_file)
    summary = summarize_findings(findings)

    # Check threshold
    if summary["pass_rate"] < fail_threshold:
        print(f"FAILED: Pass rate {summary['pass_rate']:.1%} < {fail_threshold:.1%}")
        return False

    print(f"PASSED: Pass rate {summary['pass_rate']:.1%}")
    return True

# In CI pipeline
success = ci_garak_scan("openai", "gpt-4", fail_threshold=0.95)
sys.exit(0 if success else 1)

Key Insight: Garak's 100+ probes provide broad coverage quickly. Use it for initial assessments, then drill deeper with DeepTeam or PyRIT.

Next, we'll explore PyRIT for advanced multi-turn attack orchestration. :::

Quiz

Module 2: Setting Up Your Red Team Environment

Take Quiz