GitHub Actions لسير عمل التعلم الآلي
سير عمل التحقق من النماذج
3 دقيقة للقراءة
تدريب النموذج هو فقط الخطوة الأولى. قبل النشر، تحتاج التحقق من أن النموذج يستوفي معايير الجودة. دعنا نبني سير عمل تحقق يعمل كبوابات جودة.
خط أنابيب التحقق
┌─────────────────────────────────────────────────────────────┐
│ خط أنابيب التحقق من النموذج │
├─────────────────────────────────────────────────────────────┤
│ │
│ تدريب ──▶ فحص ──▶ فحص ──▶ فحص ──▶ نشر │
│ الدقة زمن العدالة │
│ │ الاستجابة │ │
│ ▼ ▼ ▼ │
│ >0.85؟ <100ms؟ التحيز OK؟ │
│ │
└─────────────────────────────────────────────────────────────┘
بوابة عتبة الدقة
name: Model Validation
on:
workflow_run:
workflows: ["Training Pipeline"]
types: [completed]
jobs:
validate-accuracy:
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
steps:
- uses: actions/checkout@v4
- name: Download trained model
uses: actions/download-artifact@v4
with:
name: trained-model
path: models/
run-id: ${{ github.event.workflow_run.id }}
- name: Evaluate model
id: evaluate
run: |
python scripts/evaluate.py \
--model models/model.pkl \
--test-data data/test.parquet \
--output metrics.json
ACCURACY=$(jq .accuracy metrics.json)
echo "accuracy=$ACCURACY" >> $GITHUB_OUTPUT
- name: Check accuracy threshold
run: |
ACCURACY=${{ steps.evaluate.outputs.accuracy }}
THRESHOLD=0.85
if (( $(echo "$ACCURACY < $THRESHOLD" | bc -l) )); then
echo "::error::Accuracy $ACCURACY below threshold $THRESHOLD"
exit 1
fi
echo "::notice::Accuracy $ACCURACY meets threshold $THRESHOLD"
التحقق من زمن الاستجابة
validate-latency:
needs: validate-accuracy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download model
uses: actions/download-artifact@v4
with:
name: trained-model
path: models/
- name: Measure inference latency
id: latency
run: |
python scripts/benchmark.py \
--model models/model.pkl \
--samples 1000 \
--output latency.json
P50=$(jq .p50_ms latency.json)
P99=$(jq .p99_ms latency.json)
echo "p50=$P50" >> $GITHUB_OUTPUT
echo "p99=$P99" >> $GITHUB_OUTPUT
- name: Check latency threshold
run: |
P99=${{ steps.latency.outputs.p99 }}
MAX_LATENCY=100 # 100ms
if (( $(echo "$P99 > $MAX_LATENCY" | bc -l) )); then
echo "::error::P99 latency ${P99}ms exceeds ${MAX_LATENCY}ms"
exit 1
fi
echo "::notice::P99 latency ${P99}ms within threshold"
اختبار العدالة والتحيز
validate-fairness:
needs: validate-accuracy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download model
uses: actions/download-artifact@v4
with:
name: trained-model
path: models/
- name: Run fairness analysis
id: fairness
run: |
python scripts/fairness_check.py \
--model models/model.pkl \
--test-data data/test.parquet \
--protected-attributes gender,age_group \
--output fairness.json
- name: Check demographic parity
run: |
# فحص إذا كان لأي مجموعة نتائج مختلفة بشكل كبير
python -c "
import json
with open('fairness.json') as f:
results = json.load(f)
for attr, metrics in results['demographic_parity'].items():
ratio = metrics['ratio']
if ratio < 0.8 or ratio > 1.25:
print(f'::error::Demographic parity violation for {attr}: {ratio}')
exit(1)
print('::notice::All fairness checks passed')
"
اختبار الانحدار ضد خط الأساس
validate-regression:
needs: validate-accuracy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download new model
uses: actions/download-artifact@v4
with:
name: trained-model
path: models/new/
- name: Download baseline model
run: |
# جلب نموذج الإنتاج من السجل
python scripts/fetch_baseline.py --output models/baseline/
- name: Compare models
id: compare
run: |
python scripts/compare_models.py \
--new models/new/model.pkl \
--baseline models/baseline/model.pkl \
--test-data data/test.parquet \
--output comparison.json
IMPROVEMENT=$(jq .accuracy_improvement comparison.json)
echo "improvement=$IMPROVEMENT" >> $GITHUB_OUTPUT
- name: Check for regression
run: |
IMPROVEMENT=${{ steps.compare.outputs.improvement }}
MIN_IMPROVEMENT=-0.01 # السماح بتدهور 1% كحد أقصى
if (( $(echo "$IMPROVEMENT < $MIN_IMPROVEMENT" | bc -l) )); then
echo "::error::Model regression detected: ${IMPROVEMENT}"
exit 1
fi
تعليق PR مع النتائج
report-results:
needs: [validate-accuracy, validate-latency, validate-fairness]
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- name: Create validation report
uses: actions/github-script@v7
with:
script: |
const accuracy = '${{ needs.validate-accuracy.outputs.accuracy }}';
const p99 = '${{ needs.validate-latency.outputs.p99 }}';
const body = `## تقرير التحقق من النموذج
| المقياس | القيمة | العتبة | الحالة |
|--------|-------|--------|--------|
| الدقة | ${accuracy} | ≥ 0.85 | ✅ |
| P99 زمن الاستجابة | ${p99}ms | ≤ 100ms | ✅ |
| العدالة | ناجح | - | ✅ |
**جاهز للنشر** `;
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});
سير عمل التحقق الكامل
name: Model Validation
on:
workflow_run:
workflows: ["Training"]
types: [completed]
jobs:
gate-accuracy:
runs-on: ubuntu-latest
outputs:
accuracy: ${{ steps.eval.outputs.accuracy }}
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
with:
name: model
run-id: ${{ github.event.workflow_run.id }}
- id: eval
run: |
python evaluate.py --output metrics.json
echo "accuracy=$(jq .accuracy metrics.json)" >> $GITHUB_OUTPUT
- name: Gate check
run: |
if (( $(echo "${{ steps.eval.outputs.accuracy }} < 0.85" | bc -l) )); then
exit 1
fi
gate-latency:
needs: gate-accuracy
runs-on: ubuntu-latest
steps:
- run: python benchmark.py --max-p99 100
gate-fairness:
needs: gate-accuracy
runs-on: ubuntu-latest
steps:
- run: python fairness.py --threshold 0.8
deploy:
needs: [gate-accuracy, gate-latency, gate-fairness]
runs-on: ubuntu-latest
steps:
- run: echo "All gates passed, deploying..."
- run: ./deploy.sh
الرؤية الرئيسية: بوابات التحقق يجب أن تغطي الدقة وزمن الاستجابة والعدالة والانحدار. النموذج الدقيق لكن البطيء أو المتحيز لا يجب أن يصل للإنتاج.
التالي، سنستكشف سير العمل القابل لإعادة الاستخدام وأفضل الممارسات. :::