Istio لأعباء عمل ML

شبكة خدمات Istio توفر قدرات أساسية لأنظمة ML الإنتاجية: إدارة حركة المرور والأمان والمراقبة. في 2025-2026، يقدم Gateway API Inference Extension في Istio ميزات خاصة بـ ML للتوجيه الذكي.

بنية شبكة الخدمات لـ ML

┌─────────────────────────────────────────────────────────────────────┐
│                    بنية Istio لـ ML                                  │
├─────────────────────────────────────────────────────────────────────┤
│                                                                      │
│  ┌─────────────────────────────────────────────────────────────┐    │
│  │                   مستوى تحكم Istio                           │    │
│  │  ┌──────────┐  ┌──────────┐  ┌──────────┐                   │    │
│  │  │  Istiod  │  │  Pilot   │  │  Citadel │                   │    │
│  │  │ (التكوين)│  │ (المرور)│  │ (mTLS)   │                   │    │
│  │  └──────────┘  └──────────┘  └──────────┘                   │    │
│  └─────────────────────────────────────────────────────────────┘    │
│                              │                                       │
│  ┌─────────────────────────────────────────────────────────────┐    │
│  │                   مستوى البيانات (Envoy Sidecars)            │    │
│  │                                                               │    │
│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐          │    │
│  │  │ خدمة       │  │  مخزن      │  │  سجل       │          │    │
│  │  │ الاستدلال  │  │  الميزات   │  │  النماذج   │          │    │
│  │  │ [Envoy]    │  │  [Envoy]   │  │  [Envoy]   │          │    │
│  │  └─────────────┘  └─────────────┘  └─────────────┘          │    │
│  │                                                               │    │
│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐          │    │
│  │  │ مهام       │  │  متحكم    │  │  مكدس      │          │    │
│  │  │ التدريب    │  │  الخطوط   │  │  المراقبة  │          │    │
│  │  │ [Envoy]    │  │  [Envoy]   │  │  [Envoy]   │          │    │
│  │  └─────────────┘  └─────────────┘  └─────────────┘          │    │
│  └─────────────────────────────────────────────────────────────┘    │
│                                                                      │
└─────────────────────────────────────────────────────────────────────┘

تثبيت Istio لمنصة ML

# تثبيت Istio مع ملف تعريف محسّن لـ ML
istioctl install --set profile=default \
  --set meshConfig.accessLogFile=/dev/stdout \
  --set meshConfig.enableTracing=true \
  --set values.pilot.traceSampling=10

# تسمية مساحة الاسم لحقن sidecar
kubectl label namespace ml-serving istio-injection=enabled
kubectl label namespace ml-training istio-injection=enabled

# التحقق من التثبيت
istioctl verify-install
kubectl get pods -n istio-system

Gateway API Inference Extension (2025)

# Gateway API مع Inference Extension لتوجيه ML
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: ml-gateway
  namespace: ml-serving
spec:
  gatewayClassName: istio
  listeners:
  - name: http
    port: 80
    protocol: HTTP
  - name: grpc
    port: 8081
    protocol: GRPC
---
# HTTPRoute مُدرك للاستدلال
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: inference-route
  namespace: ml-serving
spec:
  parentRefs:
  - name: ml-gateway
  hostnames:
  - "inference.ml.example.com"
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /v1/models
    backendRefs:
    - name: kserve-predictor
      port: 8080
      weight: 100

تكوين mTLS لخدمات ML

# mTLS صارم لجميع خدمات ML
apiVersion: security.istio.io/v1beta1
kind: PeerAuthentication
metadata:
  name: ml-mtls
  namespace: ml-serving
spec:
  mtls:
    mode: STRICT
---
# السماح لخدمات محددة بالاتصال
apiVersion: security.istio.io/v1beta1
kind: AuthorizationPolicy
metadata:
  name: inference-access
  namespace: ml-serving
spec:
  selector:
    matchLabels:
      app: inference-service
  action: ALLOW
  rules:
  - from:
    - source:
        principals:
        - "cluster.local/ns/ml-frontend/sa/frontend-sa"
        - "cluster.local/ns/ml-serving/sa/gateway-sa"
    to:
    - operation:
        methods: ["POST", "GET"]
        paths: ["/v1/models/*", "/v2/models/*"]

إدارة حركة المرور لـ ML

# Virtual Service لتوجيه الاستدلال
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
  name: inference-routing
  namespace: ml-serving
spec:
  hosts:
  - inference-service
  http:
  # التوجيه بناءً على رأس إصدار النموذج
  - match:
    - headers:
        x-model-version:
          exact: "v2"
    route:
    - destination:
        host: inference-v2
        port:
          number: 8080
    timeout: 30s
    retries:
      attempts: 3
      perTryTimeout: 10s
      retryOn: 5xx,reset,connect-failure

  # المسار الافتراضي مع تقسيم حركة المرور
  - route:
    - destination:
        host: inference-v1
        port:
          number: 8080
      weight: 90
    - destination:
        host: inference-v2
        port:
          number: 8080
      weight: 10
---
# Destination Rule مع قاطع الدائرة
apiVersion: networking.istio.io/v1beta1
kind: DestinationRule
metadata:
  name: inference-destination
  namespace: ml-serving
spec:
  host: inference-service
  trafficPolicy:
    connectionPool:
      tcp:
        maxConnections: 1000
      http:
        h2UpgradePolicy: UPGRADE
        http1MaxPendingRequests: 1000
        http2MaxRequests: 1000
        maxRequestsPerConnection: 100
    outlierDetection:
      consecutive5xxErrors: 5
      interval: 30s
      baseEjectionTime: 60s
      maxEjectionPercent: 50
    loadBalancer:
      simple: LEAST_REQUEST

مهلات الطلب للاستدلال

# مهلات طويلة لاستدلال LLM
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
  name: llm-timeouts
spec:
  hosts:
  - llm-service
  http:
  - match:
    - uri:
        prefix: /v1/chat/completions
    timeout: 120s
    retries:
      attempts: 2
      perTryTimeout: 60s
      retryOn: 5xx,reset
  - match:
    - uri:
        prefix: /v1/embeddings
    timeout: 30s
    retries:
      attempts: 3
      perTryTimeout: 10s

تكوين موارد Sidecar

# Sidecar محسّن لأعباء عمل ML
apiVersion: install.istio.io/v1alpha1
kind: IstioOperator
spec:
  meshConfig:
    defaultConfig:
      proxyMetadata:
        # زيادة أحجام المخزن المؤقت لحمولات ML الكبيرة
        ISTIO_META_HTTP10: "1"
      concurrency: 2
  values:
    global:
      proxy:
        resources:
          requests:
            cpu: 100m
            memory: 256Mi
          limits:
            cpu: 500m
            memory: 512Mi
---
# تجاوز sidecar لكل pod للاستدلال
apiVersion: v1
kind: Pod
metadata:
  annotations:
    sidecar.istio.io/proxyCPU: "500m"
    sidecar.istio.io/proxyMemory: "512Mi"
    sidecar.istio.io/proxyCPULimit: "1000m"
    sidecar.istio.io/proxyMemoryLimit: "1Gi"

الدرس التالي: المراقبة والتتبع الموزع لخطوط ML. :::