DVC + CML for ML Automation
Automated Experiment Reports
5 min read
English Content
Why Automated Reports?
Manual experiment tracking leads to:
- Inconsistent documentation
- Lost experiment context
- Difficult code review for ML changes
- No visibility into model performance changes
Automated reports solve these by creating consistent, comprehensive experiment documentation directly in your Git workflow.
Report Structure Best Practices
A good ML experiment report includes:
- Experiment metadata: Branch, commit, timestamp
- Metrics comparison: Current vs baseline
- Visualizations: Confusion matrix, ROC, learning curves
- Model details: Architecture, hyperparameters
- Data summary: Dataset size, splits, distributions
Complete Reporting Workflow
# .github/workflows/experiment-report.yml
name: ML Experiment Report
on:
pull_request:
branches: [main]
paths:
- 'src/**'
- 'params.yaml'
- 'data/**'
jobs:
experiment:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- uses: actions/setup-node@v4
with:
node-version: '18'
- name: Install dependencies
run: |
pip install dvc pandas scikit-learn matplotlib seaborn jinja2
npm install -g @dvcorg/cml
- name: Pull data
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: dvc pull
- name: Train model
run: python train.py
- name: Generate visualizations
run: python scripts/generate_plots.py
- name: Create report
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python scripts/create_report.py
- name: Post report
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: cml comment create report.md
Report Generation Script
# scripts/create_report.py
import json
import os
import subprocess
from datetime import datetime
def run_command(cmd):
"""Run shell command and return output."""
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
return result.stdout.strip()
def create_report():
"""Generate comprehensive ML experiment report."""
report = []
# Header
report.append("# ML Experiment Report")
report.append("")
# Metadata
report.append("## Experiment Metadata")
report.append(f"- **Branch:** {os.environ.get('GITHUB_HEAD_REF', 'unknown')}")
report.append(f"- **Commit:** {os.environ.get('GITHUB_SHA', 'unknown')[:7]}")
report.append(f"- **Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}")
report.append(f"- **Triggered by:** {os.environ.get('GITHUB_ACTOR', 'unknown')}")
report.append("")
# Metrics comparison
report.append("## Metrics Comparison")
report.append("")
report.append("| Metric | Baseline (main) | This PR | Change |")
report.append("|--------|-----------------|---------|--------|")
# Load current metrics
with open("metrics/eval_metrics.json") as f:
current = json.load(f)
# Get baseline metrics (from main branch)
baseline_str = run_command("git show main:metrics/eval_metrics.json 2>/dev/null || echo '{}'")
try:
baseline = json.loads(baseline_str)
except:
baseline = {}
for metric, value in current.items():
baseline_val = baseline.get(metric, "N/A")
if isinstance(baseline_val, (int, float)) and isinstance(value, (int, float)):
change = value - baseline_val
emoji = "" if abs(change) < 0.01 else ("" if change > 0 else "")
report.append(f"| {metric} | {baseline_val:.4f} | {value:.4f} | {change:+.4f} {emoji} |")
else:
report.append(f"| {metric} | {baseline_val} | {value} | - |")
report.append("")
# Hyperparameters
report.append("## Hyperparameters")
report.append("")
report.append("```yaml")
with open("params.yaml") as f:
report.append(f.read())
report.append("```")
report.append("")
# Visualizations
report.append("## Visualizations")
report.append("")
plots = [
("Confusion Matrix", "plots/confusion_matrix.png"),
("ROC Curve", "plots/roc_curve.png"),
("Precision-Recall Curve", "plots/pr_curve.png"),
("Feature Importance", "plots/feature_importance.png"),
("Learning Curves", "plots/learning_curves.png"),
]
for title, path in plots:
if os.path.exists(path):
# Use CML to publish asset
md_link = run_command(f"cml asset publish {path} --md")
report.append(f"### {title}")
report.append(md_link)
report.append("")
# Model summary
report.append("## Model Summary")
report.append("")
if os.path.exists("metrics/model_summary.txt"):
report.append("```")
with open("metrics/model_summary.txt") as f:
report.append(f.read())
report.append("```")
report.append("")
# Data summary
report.append("## Data Summary")
report.append("")
if os.path.exists("metrics/data_summary.json"):
with open("metrics/data_summary.json") as f:
data_summary = json.load(f)
report.append(f"- **Training samples:** {data_summary.get('train_size', 'N/A')}")
report.append(f"- **Validation samples:** {data_summary.get('val_size', 'N/A')}")
report.append(f"- **Test samples:** {data_summary.get('test_size', 'N/A')}")
report.append(f"- **Features:** {data_summary.get('num_features', 'N/A')}")
report.append("")
# Write report
with open("report.md", "w") as f:
f.write("\n".join(report))
print("Report generated: report.md")
if __name__ == "__main__":
create_report()
Visualization Generation
# scripts/generate_plots.py
import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve
import joblib
import pandas as pd
import os
os.makedirs("plots", exist_ok=True)
# Load model and data
model = joblib.load("models/model.pkl")
X_test = pd.read_parquet("data/features/X_test.parquet")
y_test = pd.read_parquet("data/features/y_test.parquet")["label"]
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("plots/confusion_matrix.png", dpi=100, bbox_inches="tight")
plt.close()
# ROC Curve
plt.figure(figsize=(8, 6))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label="Model")
plt.plot([0, 1], [0, 1], "k--", label="Random")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.savefig("plots/roc_curve.png", dpi=100, bbox_inches="tight")
plt.close()
# Precision-Recall Curve
plt.figure(figsize=(8, 6))
precision, recall, _ = precision_recall_curve(y_test, y_proba)
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.savefig("plots/pr_curve.png", dpi=100, bbox_inches="tight")
plt.close()
# Feature Importance
if hasattr(model, "feature_importances_"):
plt.figure(figsize=(10, 8))
importance = pd.DataFrame({
"feature": X_test.columns,
"importance": model.feature_importances_
}).sort_values("importance", ascending=True).tail(20)
plt.barh(importance["feature"], importance["importance"])
plt.xlabel("Importance")
plt.title("Top 20 Feature Importance")
plt.savefig("plots/feature_importance.png", dpi=100, bbox_inches="tight")
plt.close()
print("Plots generated successfully!")
Key Takeaways
| Report Component | Purpose |
|---|---|
| Metadata | Track experiment context |
| Metrics comparison | Show improvement/regression |
| Visualizations | Visual model performance |
| Hyperparameters | Document configuration |
| Data summary | Dataset characteristics |
المحتوى العربي
لماذا التقارير الآلية؟
تتبع التجارب اليدوي يؤدي إلى:
- توثيق غير متسق
- فقدان سياق التجربة
- صعوبة مراجعة الكود لتغييرات ML
- لا رؤية لتغييرات أداء النموذج
التقارير الآلية تحل هذه بإنشاء توثيق تجارب متسق وشامل مباشرة في سير عمل Git الخاص بك.
أفضل ممارسات هيكل التقرير
تقرير تجربة ML جيد يتضمن:
- البيانات الوصفية للتجربة: الفرع، الـ commit، الطابع الزمني
- مقارنة المقاييس: الحالي مقابل خط الأساس
- التصورات: مصفوفة الارتباك، ROC، منحنيات التعلم
- تفاصيل النموذج: البنية، المعلمات الفائقة
- ملخص البيانات: حجم مجموعة البيانات، التقسيمات، التوزيعات
سير العمل الكامل للتقارير
# .github/workflows/experiment-report.yml
name: ML Experiment Report
on:
pull_request:
branches: [main]
paths:
- 'src/**'
- 'params.yaml'
- 'data/**'
jobs:
experiment:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- uses: actions/setup-node@v4
with:
node-version: '18'
- name: Install dependencies
run: |
pip install dvc pandas scikit-learn matplotlib seaborn jinja2
npm install -g @dvcorg/cml
- name: Pull data
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: dvc pull
- name: Train model
run: python train.py
- name: Generate visualizations
run: python scripts/generate_plots.py
- name: Create report
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python scripts/create_report.py
- name: Post report
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: cml comment create report.md
سكريبت إنشاء التقرير
# scripts/create_report.py
import json
import os
import subprocess
from datetime import datetime
def run_command(cmd):
"""تشغيل أمر shell وإرجاع المخرجات."""
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
return result.stdout.strip()
def create_report():
"""إنشاء تقرير تجربة ML شامل."""
report = []
# العنوان
report.append("# ML Experiment Report")
report.append("")
# البيانات الوصفية
report.append("## Experiment Metadata")
report.append(f"- **Branch:** {os.environ.get('GITHUB_HEAD_REF', 'unknown')}")
report.append(f"- **Commit:** {os.environ.get('GITHUB_SHA', 'unknown')[:7]}")
report.append(f"- **Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}")
report.append(f"- **Triggered by:** {os.environ.get('GITHUB_ACTOR', 'unknown')}")
report.append("")
# مقارنة المقاييس
report.append("## Metrics Comparison")
report.append("")
report.append("| Metric | Baseline (main) | This PR | Change |")
report.append("|--------|-----------------|---------|--------|")
# تحميل المقاييس الحالية
with open("metrics/eval_metrics.json") as f:
current = json.load(f)
# الحصول على مقاييس خط الأساس (من فرع main)
baseline_str = run_command("git show main:metrics/eval_metrics.json 2>/dev/null || echo '{}'")
try:
baseline = json.loads(baseline_str)
except:
baseline = {}
for metric, value in current.items():
baseline_val = baseline.get(metric, "N/A")
if isinstance(baseline_val, (int, float)) and isinstance(value, (int, float)):
change = value - baseline_val
emoji = "" if abs(change) < 0.01 else ("" if change > 0 else "")
report.append(f"| {metric} | {baseline_val:.4f} | {value:.4f} | {change:+.4f} {emoji} |")
else:
report.append(f"| {metric} | {baseline_val} | {value} | - |")
report.append("")
# المعلمات الفائقة
report.append("## Hyperparameters")
report.append("")
report.append("```yaml")
with open("params.yaml") as f:
report.append(f.read())
report.append("```")
report.append("")
# التصورات
report.append("## Visualizations")
report.append("")
plots = [
("Confusion Matrix", "plots/confusion_matrix.png"),
("ROC Curve", "plots/roc_curve.png"),
("Precision-Recall Curve", "plots/pr_curve.png"),
("Feature Importance", "plots/feature_importance.png"),
("Learning Curves", "plots/learning_curves.png"),
]
for title, path in plots:
if os.path.exists(path):
md_link = run_command(f"cml asset publish {path} --md")
report.append(f"### {title}")
report.append(md_link)
report.append("")
# ملخص النموذج
report.append("## Model Summary")
report.append("")
if os.path.exists("metrics/model_summary.txt"):
report.append("```")
with open("metrics/model_summary.txt") as f:
report.append(f.read())
report.append("```")
report.append("")
# ملخص البيانات
report.append("## Data Summary")
report.append("")
if os.path.exists("metrics/data_summary.json"):
with open("metrics/data_summary.json") as f:
data_summary = json.load(f)
report.append(f"- **Training samples:** {data_summary.get('train_size', 'N/A')}")
report.append(f"- **Validation samples:** {data_summary.get('val_size', 'N/A')}")
report.append(f"- **Test samples:** {data_summary.get('test_size', 'N/A')}")
report.append(f"- **Features:** {data_summary.get('num_features', 'N/A')}")
report.append("")
# كتابة التقرير
with open("report.md", "w") as f:
f.write("\n".join(report))
print("Report generated: report.md")
if __name__ == "__main__":
create_report()
إنشاء التصورات
# scripts/generate_plots.py
import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve
import joblib
import pandas as pd
import os
os.makedirs("plots", exist_ok=True)
# تحميل النموذج والبيانات
model = joblib.load("models/model.pkl")
X_test = pd.read_parquet("data/features/X_test.parquet")
y_test = pd.read_parquet("data/features/y_test.parquet")["label"]
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
# مصفوفة الارتباك
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("plots/confusion_matrix.png", dpi=100, bbox_inches="tight")
plt.close()
# منحنى ROC
plt.figure(figsize=(8, 6))
fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label="Model")
plt.plot([0, 1], [0, 1], "k--", label="Random")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.savefig("plots/roc_curve.png", dpi=100, bbox_inches="tight")
plt.close()
# منحنى Precision-Recall
plt.figure(figsize=(8, 6))
precision, recall, _ = precision_recall_curve(y_test, y_proba)
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.savefig("plots/pr_curve.png", dpi=100, bbox_inches="tight")
plt.close()
# أهمية الميزات
if hasattr(model, "feature_importances_"):
plt.figure(figsize=(10, 8))
importance = pd.DataFrame({
"feature": X_test.columns,
"importance": model.feature_importances_
}).sort_values("importance", ascending=True).tail(20)
plt.barh(importance["feature"], importance["importance"])
plt.xlabel("Importance")
plt.title("Top 20 Feature Importance")
plt.savefig("plots/feature_importance.png", dpi=100, bbox_inches="tight")
plt.close()
print("Plots generated successfully!")
النقاط الرئيسية
| مكون التقرير | الغرض |
|---|---|
| البيانات الوصفية | تتبع سياق التجربة |
| مقارنة المقاييس | إظهار التحسن/التراجع |
| التصورات | أداء النموذج البصري |
| المعلمات الفائقة | توثيق التكوين |
| ملخص البيانات | خصائص مجموعة البيانات |