سير عمل التحقق من النماذج

تدريب النموذج هو فقط الخطوة الأولى. قبل النشر، تحتاج التحقق من أن النموذج يستوفي معايير الجودة. دعنا نبني سير عمل تحقق يعمل كبوابات جودة.

خط أنابيب التحقق

┌─────────────────────────────────────────────────────────────┐
│                   خط أنابيب التحقق من النموذج                │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  تدريب ──▶ فحص    ──▶ فحص    ──▶ فحص     ──▶ نشر          │
│           الدقة      زمن         العدالة                   │
│            │       الاستجابة      │                        │
│            ▼           ▼           ▼                        │
│         >0.85؟     <100ms؟    التحيز OK؟                   │
│                                                             │
└─────────────────────────────────────────────────────────────┘

بوابة عتبة الدقة

name: Model Validation

on:
  workflow_run:
    workflows: ["Training Pipeline"]
    types: [completed]

jobs:
  validate-accuracy:
    runs-on: ubuntu-latest
    if: ${{ github.event.workflow_run.conclusion == 'success' }}

    steps:
      - uses: actions/checkout@v4

      - name: Download trained model
        uses: actions/download-artifact@v4
        with:
          name: trained-model
          path: models/
          run-id: ${{ github.event.workflow_run.id }}

      - name: Evaluate model
        id: evaluate
        run: |
          python scripts/evaluate.py \
            --model models/model.pkl \
            --test-data data/test.parquet \
            --output metrics.json

          ACCURACY=$(jq .accuracy metrics.json)
          echo "accuracy=$ACCURACY" >> $GITHUB_OUTPUT

      - name: Check accuracy threshold
        run: |
          ACCURACY=${{ steps.evaluate.outputs.accuracy }}
          THRESHOLD=0.85

          if (( $(echo "$ACCURACY < $THRESHOLD" | bc -l) )); then
            echo "::error::Accuracy $ACCURACY below threshold $THRESHOLD"
            exit 1
          fi

          echo "::notice::Accuracy $ACCURACY meets threshold $THRESHOLD"

التحقق من زمن الاستجابة

validate-latency:
  needs: validate-accuracy
  runs-on: ubuntu-latest

  steps:
    - uses: actions/checkout@v4

    - name: Download model
      uses: actions/download-artifact@v4
      with:
        name: trained-model
        path: models/

    - name: Measure inference latency
      id: latency
      run: |
        python scripts/benchmark.py \
          --model models/model.pkl \
          --samples 1000 \
          --output latency.json

        P50=$(jq .p50_ms latency.json)
        P99=$(jq .p99_ms latency.json)

        echo "p50=$P50" >> $GITHUB_OUTPUT
        echo "p99=$P99" >> $GITHUB_OUTPUT

    - name: Check latency threshold
      run: |
        P99=${{ steps.latency.outputs.p99 }}
        MAX_LATENCY=100  # 100ms

        if (( $(echo "$P99 > $MAX_LATENCY" | bc -l) )); then
          echo "::error::P99 latency ${P99}ms exceeds ${MAX_LATENCY}ms"
          exit 1
        fi

        echo "::notice::P99 latency ${P99}ms within threshold"

اختبار العدالة والتحيز

validate-fairness:
  needs: validate-accuracy
  runs-on: ubuntu-latest

  steps:
    - uses: actions/checkout@v4

    - name: Download model
      uses: actions/download-artifact@v4
      with:
        name: trained-model
        path: models/

    - name: Run fairness analysis
      id: fairness
      run: |
        python scripts/fairness_check.py \
          --model models/model.pkl \
          --test-data data/test.parquet \
          --protected-attributes gender,age_group \
          --output fairness.json

    - name: Check demographic parity
      run: |
        # فحص إذا كان لأي مجموعة نتائج مختلفة بشكل كبير
        python -c "
        import json
        with open('fairness.json') as f:
            results = json.load(f)

        for attr, metrics in results['demographic_parity'].items():
            ratio = metrics['ratio']
            if ratio < 0.8 or ratio > 1.25:
                print(f'::error::Demographic parity violation for {attr}: {ratio}')
                exit(1)

        print('::notice::All fairness checks passed')
        "

اختبار الانحدار ضد خط الأساس

validate-regression:
  needs: validate-accuracy
  runs-on: ubuntu-latest

  steps:
    - uses: actions/checkout@v4

    - name: Download new model
      uses: actions/download-artifact@v4
      with:
        name: trained-model
        path: models/new/

    - name: Download baseline model
      run: |
        # جلب نموذج الإنتاج من السجل
        python scripts/fetch_baseline.py --output models/baseline/

    - name: Compare models
      id: compare
      run: |
        python scripts/compare_models.py \
          --new models/new/model.pkl \
          --baseline models/baseline/model.pkl \
          --test-data data/test.parquet \
          --output comparison.json

        IMPROVEMENT=$(jq .accuracy_improvement comparison.json)
        echo "improvement=$IMPROVEMENT" >> $GITHUB_OUTPUT

    - name: Check for regression
      run: |
        IMPROVEMENT=${{ steps.compare.outputs.improvement }}
        MIN_IMPROVEMENT=-0.01  # السماح بتدهور 1% كحد أقصى

        if (( $(echo "$IMPROVEMENT < $MIN_IMPROVEMENT" | bc -l) )); then
          echo "::error::Model regression detected: ${IMPROVEMENT}"
          exit 1
        fi

تعليق PR مع النتائج

report-results:
  needs: [validate-accuracy, validate-latency, validate-fairness]
  runs-on: ubuntu-latest
  if: github.event_name == 'pull_request'

  steps:
    - name: Create validation report
      uses: actions/github-script@v7
      with:
        script: |
          const accuracy = '${{ needs.validate-accuracy.outputs.accuracy }}';
          const p99 = '${{ needs.validate-latency.outputs.p99 }}';

          const body = `## تقرير التحقق من النموذج

          | المقياس | القيمة | العتبة | الحالة |
          |--------|-------|--------|--------|
          | الدقة | ${accuracy} | ≥ 0.85 | ✅ |
          | P99 زمن الاستجابة | ${p99}ms | ≤ 100ms | ✅ |
          | العدالة | ناجح | - | ✅ |

          **جاهز للنشر** `;

          github.rest.issues.createComment({
            owner: context.repo.owner,
            repo: context.repo.repo,
            issue_number: context.issue.number,
            body: body
          });

سير عمل التحقق الكامل

name: Model Validation

on:
  workflow_run:
    workflows: ["Training"]
    types: [completed]

jobs:
  gate-accuracy:
    runs-on: ubuntu-latest
    outputs:
      accuracy: ${{ steps.eval.outputs.accuracy }}
    steps:
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
        with:
          name: model
          run-id: ${{ github.event.workflow_run.id }}
      - id: eval
        run: |
          python evaluate.py --output metrics.json
          echo "accuracy=$(jq .accuracy metrics.json)" >> $GITHUB_OUTPUT
      - name: Gate check
        run: |
          if (( $(echo "${{ steps.eval.outputs.accuracy }} < 0.85" | bc -l) )); then
            exit 1
          fi

  gate-latency:
    needs: gate-accuracy
    runs-on: ubuntu-latest
    steps:
      - run: python benchmark.py --max-p99 100

  gate-fairness:
    needs: gate-accuracy
    runs-on: ubuntu-latest
    steps:
      - run: python fairness.py --threshold 0.8

  deploy:
    needs: [gate-accuracy, gate-latency, gate-fairness]
    runs-on: ubuntu-latest
    steps:
      - run: echo "All gates passed, deploying..."
      - run: ./deploy.sh

الرؤية الرئيسية: بوابات التحقق يجب أن تغطي الدقة وزمن الاستجابة والعدالة والانحدار. النموذج الدقيق لكن البطيء أو المتحيز لا يجب أن يصل للإنتاج.

التالي، سنستكشف سير العمل القابل لإعادة الاستخدام وأفضل الممارسات. :::