Setting Up Your Red Team Environment

Installing DeepTeam

3 min read

DeepTeam by Confident AI is a comprehensive LLM red teaming framework with 40+ vulnerability classes and 10+ attack strategies. This lesson covers installation, configuration, and running your first vulnerability scan.

Installation

DeepTeam is pip-installable on all platforms:

# Install DeepTeam (all platforms)
# pip install deepteam

# Verify installation
try:
    import deepteam
    print(f"DeepTeam version: {deepteam.__version__}")
except ImportError:
    print("DeepTeam not installed. Run: pip install deepteam")

Configuration Setup

Create a configuration file using environment variables:

from pathlib import Path
from dotenv import load_dotenv
import os

def configure_deepteam():
    """Set up DeepTeam configuration."""
    # Load environment variables from .env file
    env_file = Path.cwd() / ".env"
    load_dotenv(env_file)

    # Verify required API keys
    required_keys = ["OPENAI_API_KEY"]
    missing = [k for k in required_keys if not os.getenv(k)]

    if missing:
        print(f"Missing API keys: {missing}")
        print("Add them to your .env file")
        return False

    print("Configuration valid. Ready to scan.")
    return True

# Run configuration check
configure_deepteam()

Your First Vulnerability Scan

Run a basic scan targeting common vulnerabilities:

from deepteam import RedTeamer, Vulnerability
from deepteam.models import OpenAIModel

# Initialize the red teamer
red_teamer = RedTeamer(
    # Target model to test
    target_model=OpenAIModel(model_name="gpt-4"),

    # Vulnerabilities to test for
    vulnerabilities=[
        Vulnerability.PROMPT_INJECTION,
        Vulnerability.PII_LEAKAGE,
        Vulnerability.JAILBREAK,
        Vulnerability.HARMFUL_CONTENT,
    ],

    # Number of attack attempts per vulnerability
    attack_count=5,
)

# Run the scan
results = red_teamer.scan()

# Display results
for vuln_type, findings in results.items():
    print(f"\n{vuln_type}:")
    print(f"  Attempts: {findings['total_attempts']}")
    print(f"  Successful: {findings['successful_attacks']}")
    print(f"  ASR: {findings['attack_success_rate']:.1%}")

Understanding Vulnerability Classes

DeepTeam tests for 40+ vulnerability classes:

from enum import Enum
from dataclasses import dataclass
from typing import List

class VulnerabilityCategory(Enum):
    INJECTION = "injection"
    LEAKAGE = "leakage"
    HARMFUL = "harmful"
    ROBUSTNESS = "robustness"

@dataclass
class VulnerabilityInfo:
    """Information about a vulnerability class."""
    name: str
    category: VulnerabilityCategory
    description: str
    owasp_mapping: str

# Key vulnerabilities to understand
vulnerabilities = [
    VulnerabilityInfo(
        name="Prompt Injection",
        category=VulnerabilityCategory.INJECTION,
        description="Attacker manipulates model via crafted input",
        owasp_mapping="LLM01"
    ),
    VulnerabilityInfo(
        name="PII Leakage",
        category=VulnerabilityCategory.LEAKAGE,
        description="Model reveals personal information",
        owasp_mapping="LLM06"
    ),
    VulnerabilityInfo(
        name="Jailbreak",
        category=VulnerabilityCategory.HARMFUL,
        description="Bypassing safety guardrails",
        owasp_mapping="LLM01"
    ),
    VulnerabilityInfo(
        name="Hallucination",
        category=VulnerabilityCategory.ROBUSTNESS,
        description="Model generates false information",
        owasp_mapping="LLM09"
    ),
]

for vuln in vulnerabilities:
    print(f"{vuln.name} ({vuln.owasp_mapping}): {vuln.description}")

Attack Strategies

DeepTeam includes multiple attack strategies:

from deepteam import AttackStrategy

# Available attack strategies
strategies = {
    "base64_injection": "Encode payload in base64",
    "role_play": "Make model assume different persona",
    "hypothetical": "Frame as hypothetical scenario",
    "multi_language": "Use non-English to bypass filters",
    "continuation": "Ask model to continue harmful text",
    "few_shot": "Provide examples of desired behavior",
}

# Run with specific strategy
red_teamer = RedTeamer(
    target_model=OpenAIModel(model_name="gpt-4"),
    vulnerabilities=[Vulnerability.JAILBREAK],
    attack_strategies=[
        AttackStrategy.ROLE_PLAY,
        AttackStrategy.HYPOTHETICAL,
    ],
)

results = red_teamer.scan()

Saving Results

Export results for reporting:

from pathlib import Path
import json
from datetime import datetime

def save_scan_results(results: dict, output_dir: Path):
    """Save scan results to JSON file."""
    output_dir.mkdir(parents=True, exist_ok=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = output_dir / f"deepteam_scan_{timestamp}.json"

    # Prepare report
    report = {
        "scan_date": timestamp,
        "tool": "DeepTeam",
        "results": results,
        "summary": {
            "total_vulns_tested": len(results),
            "vulns_with_findings": sum(
                1 for r in results.values()
                if r["successful_attacks"] > 0
            ),
        }
    }

    filename.write_text(json.dumps(report, indent=2))
    print(f"Results saved to: {filename}")
    return filename

# After scan
output_path = Path.cwd() / "results"
save_scan_results(results, output_path)

Key Insight: DeepTeam's Python API makes it easy to integrate into existing test suites. Start with the four core vulnerabilities before expanding coverage.

Next, we'll explore Garak for rapid vulnerability scanning. :::

Quiz

Module 2: Setting Up Your Red Team Environment

Take Quiz