الاختبار والتحقق

اختبار وكلاء الذكاء الاصطناعي يختلف عن البرمجيات التقليدية—نحتاج للتعامل مع عدم الحتمية وتقييم الجودة، ليس فقط الصحة.

هيكل الاختبار

# tests/test_agent.py
import pytest
from unittest.mock import Mock, patch
from agent import ResearchAgent
from config import Config

class TestResearchAgent:
    @pytest.fixture
    def agent(self):
        config = Config()
        config.MAX_ITERATIONS = 3  # تحديد للاختبارات
        return ResearchAgent(config)

    @pytest.fixture
    def mock_search_results(self):
        return {
            "success": True,
            "results": [
                {
                    "title": "مقالة اختبار",
                    "url": "https://example.com/test",
                    "snippet": "هذا محتوى اختبار عن وكلاء الذكاء الاصطناعي."
                }
            ]
        }

اختبارات الوحدة

اختبر المكونات الفردية:

# tests/test_tools.py
from tools.search import WebSearchTool

class TestWebSearchTool:
    def test_search_returns_results(self):
        tool = WebSearchTool()
        result = tool.run("Python programming")

        assert result["success"] == True
        assert len(result["results"]) > 0
        assert "title" in result["results"][0]

    def test_search_handles_errors(self):
        tool = WebSearchTool()
        # اختبار مع استعلام إشكالي
        result = tool.run("")

        assert "success" in result
        # يجب ألا يرفع استثناء

# tests/test_memory.py
from memory.store import ResearchMemory

class TestResearchMemory:
    def test_add_and_retrieve_finding(self):
        memory = ResearchMemory()

        memory.add_finding(
            query="AI agents",
            content="الوكلاء هي أنظمة مستقلة",
            url="https://example.com",
            title="عن الوكلاء"
        )

        assert len(memory.findings) == 1
        assert memory.has_searched("AI agents")
        assert not memory.has_searched("موضوع غير معروف")

    def test_get_sources_deduplicates(self):
        memory = ResearchMemory()

        # إضافة نفس المصدر مرتين
        for _ in range(2):
            memory.add_finding(
                query="test",
                content="محتوى",
                url="https://same-url.com",
                title="نفس العنوان"
            )

        sources = memory.get_sources()
        assert len(sources) == 1  # إزالة التكرارات

اختبارات التكامل

اختبر تدفق الوكيل الكامل:

# tests/test_integration.py
import pytest
from agent import ResearchAgent
from config import Config

class TestAgentIntegration:
    @pytest.mark.integration
    def test_full_research_flow(self):
        """اختبار البحث الكامل على موضوع معروف"""
        agent = ResearchAgent(Config())

        report = agent.research("What is machine learning?")

        # التحقق من هيكل التقرير
        assert len(report) > 100
        assert "machine learning" in report.lower()

    @pytest.mark.integration
    def test_handles_unknown_topic(self):
        """يجب أن ينتج الوكيل مخرجات للمواضيع الغامضة"""
        agent = ResearchAgent(Config())

        report = agent.research("xyznonexistenttopic123")

        # يجب ألا يتعطل، يجب أن يشير لنتائج محدودة
        assert report is not None

التحقق من المخرجات

# utils/validators.py
from pydantic import BaseModel, validator
from typing import List, Optional
import re

class ResearchReport(BaseModel):
    content: str
    sources: List[str]

    @validator('content')
    def content_not_empty(cls, v):
        if len(v.strip()) < 50:
            raise ValueError('التقرير قصير جداً')
        return v

    @validator('content')
    def no_hallucinated_urls(cls, v):
        # التحقق من أنماط URL المشبوهة
        urls = re.findall(r'https?://[^\s]+', v)
        for url in urls:
            if 'example.com' in url and 'real' not in url:
                raise ValueError(f'URL مهلوس محتمل: {url}')
        return v

def validate_report(report: str, sources: List[dict]) -> dict:
    """التحقق من تقرير مولد"""
    issues = []

    # التحقق من الحد الأدنى للطول
    if len(report) < 100:
        issues.append("التقرير قصير جداً")

    # التحقق من الاستشهادات
    citation_pattern = r'\[\d+\]'
    citations = re.findall(citation_pattern, report)
    if not citations:
        issues.append("لم يتم العثور على استشهادات")

    # التحقق من تطابق أرقام الاستشهادات مع المصادر
    cited_numbers = set(int(c[1:-1]) for c in citations)
    available_numbers = set(s['id'] for s in sources)
    invalid_citations = cited_numbers - available_numbers
    if invalid_citations:
        issues.append(f"أرقام استشهاد غير صالحة: {invalid_citations}")

    return {
        "valid": len(issues) == 0,
        "issues": issues
    }

تقييم الجودة

# tests/test_quality.py
from langchain_openai import ChatOpenAI

def evaluate_report_quality(report: str, topic: str) -> dict:
    """استخدام LLM لتقييم جودة التقرير"""
    llm = ChatOpenAI(model="gpt-4o")

    eval_prompt = f"""
    قيّم تقرير البحث هذا عن "{topic}".

    التقرير:
    {report}

    قيّم كل معيار من 1-5:
    1. الصلة: هل يتناول الموضوع؟
    2. الدقة: هل الحقائق صحيحة (حسب معرفتك)؟
    3. الشمولية: هل الجوانب الرئيسية مغطاة؟
    4. الوضوح: هل هو منظم ومقروء؟
    5. المصادر: هل الادعاءات موثقة؟

    أرجع JSON: {{"relevance": X, "accuracy": X, "completeness": X, "clarity": X, "sources": X, "overall": X, "feedback": "..."}}
    """

    response = llm.invoke([{"role": "user", "content": eval_prompt}])
    return parse_json(response.content)

class TestReportQuality:
    @pytest.mark.quality
    def test_report_meets_quality_threshold(self):
        agent = ResearchAgent(Config())
        report = agent.research("Benefits of renewable energy")

        scores = evaluate_report_quality(report, "renewable energy")

        assert scores["overall"] >= 3.5
        assert scores["relevance"] >= 4

تشغيل الاختبارات

# تشغيل جميع الاختبارات
pytest tests/

# تشغيل اختبارات الوحدة فقط
pytest tests/ -m "not integration and not quality"

# تشغيل مع التغطية
pytest tests/ --cov=. --cov-report=html

# تشغيل اختبارات تقييم الجودة
pytest tests/ -m quality -v

التالي: تعلم كيفية توسيع ونشر وكيل البحث الخاص بك. :::