DVC + CML for ML Automation

Automated Experiment Reports

5 min read

English Content

Why Automated Reports?

Manual experiment tracking leads to:

  • Inconsistent documentation
  • Lost experiment context
  • Difficult code review for ML changes
  • No visibility into model performance changes

Automated reports solve these by creating consistent, comprehensive experiment documentation directly in your Git workflow.

Report Structure Best Practices

A good ML experiment report includes:

  1. Experiment metadata: Branch, commit, timestamp
  2. Metrics comparison: Current vs baseline
  3. Visualizations: Confusion matrix, ROC, learning curves
  4. Model details: Architecture, hyperparameters
  5. Data summary: Dataset size, splits, distributions

Complete Reporting Workflow

# .github/workflows/experiment-report.yml
name: ML Experiment Report
on:
  pull_request:
    branches: [main]
    paths:
      - 'src/**'
      - 'params.yaml'
      - 'data/**'

jobs:
  experiment:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - uses: actions/setup-node@v4
        with:
          node-version: '18'

      - name: Install dependencies
        run: |
          pip install dvc pandas scikit-learn matplotlib seaborn jinja2
          npm install -g @dvcorg/cml

      - name: Pull data
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        run: dvc pull

      - name: Train model
        run: python train.py

      - name: Generate visualizations
        run: python scripts/generate_plots.py

      - name: Create report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: python scripts/create_report.py

      - name: Post report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: cml comment create report.md

Report Generation Script

# scripts/create_report.py
import json
import os
import subprocess
from datetime import datetime

def run_command(cmd):
    """Run shell command and return output."""
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    return result.stdout.strip()

def create_report():
    """Generate comprehensive ML experiment report."""
    report = []

    # Header
    report.append("# ML Experiment Report")
    report.append("")

    # Metadata
    report.append("## Experiment Metadata")
    report.append(f"- **Branch:** {os.environ.get('GITHUB_HEAD_REF', 'unknown')}")
    report.append(f"- **Commit:** {os.environ.get('GITHUB_SHA', 'unknown')[:7]}")
    report.append(f"- **Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}")
    report.append(f"- **Triggered by:** {os.environ.get('GITHUB_ACTOR', 'unknown')}")
    report.append("")

    # Metrics comparison
    report.append("## Metrics Comparison")
    report.append("")
    report.append("| Metric | Baseline (main) | This PR | Change |")
    report.append("|--------|-----------------|---------|--------|")

    # Load current metrics
    with open("metrics/eval_metrics.json") as f:
        current = json.load(f)

    # Get baseline metrics (from main branch)
    baseline_str = run_command("git show main:metrics/eval_metrics.json 2>/dev/null || echo '{}'")
    try:
        baseline = json.loads(baseline_str)
    except:
        baseline = {}

    for metric, value in current.items():
        baseline_val = baseline.get(metric, "N/A")
        if isinstance(baseline_val, (int, float)) and isinstance(value, (int, float)):
            change = value - baseline_val
            emoji = "" if abs(change) < 0.01 else ("" if change > 0 else "")
            report.append(f"| {metric} | {baseline_val:.4f} | {value:.4f} | {change:+.4f} {emoji} |")
        else:
            report.append(f"| {metric} | {baseline_val} | {value} | - |")

    report.append("")

    # Hyperparameters
    report.append("## Hyperparameters")
    report.append("")
    report.append("```yaml")
    with open("params.yaml") as f:
        report.append(f.read())
    report.append("```")
    report.append("")

    # Visualizations
    report.append("## Visualizations")
    report.append("")

    plots = [
        ("Confusion Matrix", "plots/confusion_matrix.png"),
        ("ROC Curve", "plots/roc_curve.png"),
        ("Precision-Recall Curve", "plots/pr_curve.png"),
        ("Feature Importance", "plots/feature_importance.png"),
        ("Learning Curves", "plots/learning_curves.png"),
    ]

    for title, path in plots:
        if os.path.exists(path):
            # Use CML to publish asset
            md_link = run_command(f"cml asset publish {path} --md")
            report.append(f"### {title}")
            report.append(md_link)
            report.append("")

    # Model summary
    report.append("## Model Summary")
    report.append("")

    if os.path.exists("metrics/model_summary.txt"):
        report.append("```")
        with open("metrics/model_summary.txt") as f:
            report.append(f.read())
        report.append("```")
    report.append("")

    # Data summary
    report.append("## Data Summary")
    report.append("")

    if os.path.exists("metrics/data_summary.json"):
        with open("metrics/data_summary.json") as f:
            data_summary = json.load(f)
        report.append(f"- **Training samples:** {data_summary.get('train_size', 'N/A')}")
        report.append(f"- **Validation samples:** {data_summary.get('val_size', 'N/A')}")
        report.append(f"- **Test samples:** {data_summary.get('test_size', 'N/A')}")
        report.append(f"- **Features:** {data_summary.get('num_features', 'N/A')}")
    report.append("")

    # Write report
    with open("report.md", "w") as f:
        f.write("\n".join(report))

    print("Report generated: report.md")

if __name__ == "__main__":
    create_report()

Visualization Generation

# scripts/generate_plots.py
import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve
import joblib
import pandas as pd
import os

os.makedirs("plots", exist_ok=True)

# Load model and data
model = joblib.load("models/model.pkl")
X_test = pd.read_parquet("data/features/X_test.parquet")
y_test = pd.read_parquet("data/features/y_test.parquet")["label"]
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("plots/confusion_matrix.png", dpi=100, bbox_inches="tight")
plt.close()

# ROC Curve
plt.figure(figsize=(8, 6))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label="Model")
plt.plot([0, 1], [0, 1], "k--", label="Random")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.savefig("plots/roc_curve.png", dpi=100, bbox_inches="tight")
plt.close()

# Precision-Recall Curve
plt.figure(figsize=(8, 6))
precision, recall, _ = precision_recall_curve(y_test, y_proba)
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.savefig("plots/pr_curve.png", dpi=100, bbox_inches="tight")
plt.close()

# Feature Importance
if hasattr(model, "feature_importances_"):
    plt.figure(figsize=(10, 8))
    importance = pd.DataFrame({
        "feature": X_test.columns,
        "importance": model.feature_importances_
    }).sort_values("importance", ascending=True).tail(20)

    plt.barh(importance["feature"], importance["importance"])
    plt.xlabel("Importance")
    plt.title("Top 20 Feature Importance")
    plt.savefig("plots/feature_importance.png", dpi=100, bbox_inches="tight")
    plt.close()

print("Plots generated successfully!")

Key Takeaways

Report Component Purpose
Metadata Track experiment context
Metrics comparison Show improvement/regression
Visualizations Visual model performance
Hyperparameters Document configuration
Data summary Dataset characteristics

المحتوى العربي

لماذا التقارير الآلية؟

تتبع التجارب اليدوي يؤدي إلى:

  • توثيق غير متسق
  • فقدان سياق التجربة
  • صعوبة مراجعة الكود لتغييرات ML
  • لا رؤية لتغييرات أداء النموذج

التقارير الآلية تحل هذه بإنشاء توثيق تجارب متسق وشامل مباشرة في سير عمل Git الخاص بك.

أفضل ممارسات هيكل التقرير

تقرير تجربة ML جيد يتضمن:

  1. البيانات الوصفية للتجربة: الفرع، الـ commit، الطابع الزمني
  2. مقارنة المقاييس: الحالي مقابل خط الأساس
  3. التصورات: مصفوفة الارتباك، ROC، منحنيات التعلم
  4. تفاصيل النموذج: البنية، المعلمات الفائقة
  5. ملخص البيانات: حجم مجموعة البيانات، التقسيمات، التوزيعات

سير العمل الكامل للتقارير

# .github/workflows/experiment-report.yml
name: ML Experiment Report
on:
  pull_request:
    branches: [main]
    paths:
      - 'src/**'
      - 'params.yaml'
      - 'data/**'

jobs:
  experiment:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - uses: actions/setup-node@v4
        with:
          node-version: '18'

      - name: Install dependencies
        run: |
          pip install dvc pandas scikit-learn matplotlib seaborn jinja2
          npm install -g @dvcorg/cml

      - name: Pull data
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        run: dvc pull

      - name: Train model
        run: python train.py

      - name: Generate visualizations
        run: python scripts/generate_plots.py

      - name: Create report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: python scripts/create_report.py

      - name: Post report
        env:
          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: cml comment create report.md

سكريبت إنشاء التقرير

# scripts/create_report.py
import json
import os
import subprocess
from datetime import datetime

def run_command(cmd):
    """تشغيل أمر shell وإرجاع المخرجات."""
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    return result.stdout.strip()

def create_report():
    """إنشاء تقرير تجربة ML شامل."""
    report = []

    # العنوان
    report.append("# ML Experiment Report")
    report.append("")

    # البيانات الوصفية
    report.append("## Experiment Metadata")
    report.append(f"- **Branch:** {os.environ.get('GITHUB_HEAD_REF', 'unknown')}")
    report.append(f"- **Commit:** {os.environ.get('GITHUB_SHA', 'unknown')[:7]}")
    report.append(f"- **Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}")
    report.append(f"- **Triggered by:** {os.environ.get('GITHUB_ACTOR', 'unknown')}")
    report.append("")

    # مقارنة المقاييس
    report.append("## Metrics Comparison")
    report.append("")
    report.append("| Metric | Baseline (main) | This PR | Change |")
    report.append("|--------|-----------------|---------|--------|")

    # تحميل المقاييس الحالية
    with open("metrics/eval_metrics.json") as f:
        current = json.load(f)

    # الحصول على مقاييس خط الأساس (من فرع main)
    baseline_str = run_command("git show main:metrics/eval_metrics.json 2>/dev/null || echo '{}'")
    try:
        baseline = json.loads(baseline_str)
    except:
        baseline = {}

    for metric, value in current.items():
        baseline_val = baseline.get(metric, "N/A")
        if isinstance(baseline_val, (int, float)) and isinstance(value, (int, float)):
            change = value - baseline_val
            emoji = "" if abs(change) < 0.01 else ("" if change > 0 else "")
            report.append(f"| {metric} | {baseline_val:.4f} | {value:.4f} | {change:+.4f} {emoji} |")
        else:
            report.append(f"| {metric} | {baseline_val} | {value} | - |")

    report.append("")

    # المعلمات الفائقة
    report.append("## Hyperparameters")
    report.append("")
    report.append("```yaml")
    with open("params.yaml") as f:
        report.append(f.read())
    report.append("```")
    report.append("")

    # التصورات
    report.append("## Visualizations")
    report.append("")

    plots = [
        ("Confusion Matrix", "plots/confusion_matrix.png"),
        ("ROC Curve", "plots/roc_curve.png"),
        ("Precision-Recall Curve", "plots/pr_curve.png"),
        ("Feature Importance", "plots/feature_importance.png"),
        ("Learning Curves", "plots/learning_curves.png"),
    ]

    for title, path in plots:
        if os.path.exists(path):
            md_link = run_command(f"cml asset publish {path} --md")
            report.append(f"### {title}")
            report.append(md_link)
            report.append("")

    # ملخص النموذج
    report.append("## Model Summary")
    report.append("")

    if os.path.exists("metrics/model_summary.txt"):
        report.append("```")
        with open("metrics/model_summary.txt") as f:
            report.append(f.read())
        report.append("```")
    report.append("")

    # ملخص البيانات
    report.append("## Data Summary")
    report.append("")

    if os.path.exists("metrics/data_summary.json"):
        with open("metrics/data_summary.json") as f:
            data_summary = json.load(f)
        report.append(f"- **Training samples:** {data_summary.get('train_size', 'N/A')}")
        report.append(f"- **Validation samples:** {data_summary.get('val_size', 'N/A')}")
        report.append(f"- **Test samples:** {data_summary.get('test_size', 'N/A')}")
        report.append(f"- **Features:** {data_summary.get('num_features', 'N/A')}")
    report.append("")

    # كتابة التقرير
    with open("report.md", "w") as f:
        f.write("\n".join(report))

    print("Report generated: report.md")

if __name__ == "__main__":
    create_report()

إنشاء التصورات

# scripts/generate_plots.py
import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve
import joblib
import pandas as pd
import os

os.makedirs("plots", exist_ok=True)

# تحميل النموذج والبيانات
model = joblib.load("models/model.pkl")
X_test = pd.read_parquet("data/features/X_test.parquet")
y_test = pd.read_parquet("data/features/y_test.parquet")["label"]
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# مصفوفة الارتباك
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("plots/confusion_matrix.png", dpi=100, bbox_inches="tight")
plt.close()

# منحنى ROC
plt.figure(figsize=(8, 6))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label="Model")
plt.plot([0, 1], [0, 1], "k--", label="Random")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.savefig("plots/roc_curve.png", dpi=100, bbox_inches="tight")
plt.close()

# منحنى Precision-Recall
plt.figure(figsize=(8, 6))
precision, recall, _ = precision_recall_curve(y_test, y_proba)
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.savefig("plots/pr_curve.png", dpi=100, bbox_inches="tight")
plt.close()

# أهمية الميزات
if hasattr(model, "feature_importances_"):
    plt.figure(figsize=(10, 8))
    importance = pd.DataFrame({
        "feature": X_test.columns,
        "importance": model.feature_importances_
    }).sort_values("importance", ascending=True).tail(20)

    plt.barh(importance["feature"], importance["importance"])
    plt.xlabel("Importance")
    plt.title("Top 20 Feature Importance")
    plt.savefig("plots/feature_importance.png", dpi=100, bbox_inches="tight")
    plt.close()

print("Plots generated successfully!")

النقاط الرئيسية

مكون التقرير الغرض
البيانات الوصفية تتبع سياق التجربة
مقارنة المقاييس إظهار التحسن/التراجع
التصورات أداء النموذج البصري
المعلمات الفائقة توثيق التكوين
ملخص البيانات خصائص مجموعة البيانات

Quiz

Module 5: DVC + CML for ML Automation

Take Quiz