Setting Up Your Red Team Environment
Installing DeepTeam
3 min read
DeepTeam by Confident AI is a comprehensive LLM red teaming framework with 40+ vulnerability classes and 10+ attack strategies. This lesson covers installation, configuration, and running your first vulnerability scan.
Installation
DeepTeam is pip-installable on all platforms:
# Install DeepTeam (all platforms)
# pip install deepteam
# Verify installation
try:
import deepteam
print(f"DeepTeam version: {deepteam.__version__}")
except ImportError:
print("DeepTeam not installed. Run: pip install deepteam")
Configuration Setup
Create a configuration file using environment variables:
from pathlib import Path
from dotenv import load_dotenv
import os
def configure_deepteam():
"""Set up DeepTeam configuration."""
# Load environment variables from .env file
env_file = Path.cwd() / ".env"
load_dotenv(env_file)
# Verify required API keys
required_keys = ["OPENAI_API_KEY"]
missing = [k for k in required_keys if not os.getenv(k)]
if missing:
print(f"Missing API keys: {missing}")
print("Add them to your .env file")
return False
print("Configuration valid. Ready to scan.")
return True
# Run configuration check
configure_deepteam()
Your First Vulnerability Scan
Run a basic scan targeting common vulnerabilities:
from deepteam import RedTeamer, Vulnerability
from deepteam.models import OpenAIModel
# Initialize the red teamer
red_teamer = RedTeamer(
# Target model to test
target_model=OpenAIModel(model_name="gpt-4"),
# Vulnerabilities to test for
vulnerabilities=[
Vulnerability.PROMPT_INJECTION,
Vulnerability.PII_LEAKAGE,
Vulnerability.JAILBREAK,
Vulnerability.HARMFUL_CONTENT,
],
# Number of attack attempts per vulnerability
attack_count=5,
)
# Run the scan
results = red_teamer.scan()
# Display results
for vuln_type, findings in results.items():
print(f"\n{vuln_type}:")
print(f" Attempts: {findings['total_attempts']}")
print(f" Successful: {findings['successful_attacks']}")
print(f" ASR: {findings['attack_success_rate']:.1%}")
Understanding Vulnerability Classes
DeepTeam tests for 40+ vulnerability classes:
from enum import Enum
from dataclasses import dataclass
from typing import List
class VulnerabilityCategory(Enum):
INJECTION = "injection"
LEAKAGE = "leakage"
HARMFUL = "harmful"
ROBUSTNESS = "robustness"
@dataclass
class VulnerabilityInfo:
"""Information about a vulnerability class."""
name: str
category: VulnerabilityCategory
description: str
owasp_mapping: str
# Key vulnerabilities to understand
vulnerabilities = [
VulnerabilityInfo(
name="Prompt Injection",
category=VulnerabilityCategory.INJECTION,
description="Attacker manipulates model via crafted input",
owasp_mapping="LLM01"
),
VulnerabilityInfo(
name="PII Leakage",
category=VulnerabilityCategory.LEAKAGE,
description="Model reveals personal information",
owasp_mapping="LLM06"
),
VulnerabilityInfo(
name="Jailbreak",
category=VulnerabilityCategory.HARMFUL,
description="Bypassing safety guardrails",
owasp_mapping="LLM01"
),
VulnerabilityInfo(
name="Hallucination",
category=VulnerabilityCategory.ROBUSTNESS,
description="Model generates false information",
owasp_mapping="LLM09"
),
]
for vuln in vulnerabilities:
print(f"{vuln.name} ({vuln.owasp_mapping}): {vuln.description}")
Attack Strategies
DeepTeam includes multiple attack strategies:
from deepteam import AttackStrategy
# Available attack strategies
strategies = {
"base64_injection": "Encode payload in base64",
"role_play": "Make model assume different persona",
"hypothetical": "Frame as hypothetical scenario",
"multi_language": "Use non-English to bypass filters",
"continuation": "Ask model to continue harmful text",
"few_shot": "Provide examples of desired behavior",
}
# Run with specific strategy
red_teamer = RedTeamer(
target_model=OpenAIModel(model_name="gpt-4"),
vulnerabilities=[Vulnerability.JAILBREAK],
attack_strategies=[
AttackStrategy.ROLE_PLAY,
AttackStrategy.HYPOTHETICAL,
],
)
results = red_teamer.scan()
Saving Results
Export results for reporting:
from pathlib import Path
import json
from datetime import datetime
def save_scan_results(results: dict, output_dir: Path):
"""Save scan results to JSON file."""
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = output_dir / f"deepteam_scan_{timestamp}.json"
# Prepare report
report = {
"scan_date": timestamp,
"tool": "DeepTeam",
"results": results,
"summary": {
"total_vulns_tested": len(results),
"vulns_with_findings": sum(
1 for r in results.values()
if r["successful_attacks"] > 0
),
}
}
filename.write_text(json.dumps(report, indent=2))
print(f"Results saved to: {filename}")
return filename
# After scan
output_path = Path.cwd() / "results"
save_scan_results(results, output_path)
Key Insight: DeepTeam's Python API makes it easy to integrate into existing test suites. Start with the four core vulnerabilities before expanding coverage.
Next, we'll explore Garak for rapid vulnerability scanning. :::