All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 36s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 51s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
272 lines
9.2 KiB
Python
272 lines
9.2 KiB
Python
"""Tests for ManualExtractor Gemini-based maintenance schedule extraction.
|
|
|
|
Covers: normal extraction with subtype mapping, unusual service names,
|
|
empty Gemini response, and Gemini call failure.
|
|
All GeminiEngine calls are mocked.
|
|
"""
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from app.engines.gemini_engine import (
|
|
GeminiProcessingError,
|
|
MaintenanceExtractionResult,
|
|
MaintenanceItem,
|
|
)
|
|
from app.extractors.manual_extractor import (
|
|
ExtractedSchedule,
|
|
ManualExtractionResult,
|
|
ManualExtractor,
|
|
)
|
|
|
|
|
|
# --- Helpers ---
|
|
|
|
|
|
def _make_pdf_bytes(size: int = 1024) -> bytes:
|
|
"""Create fake PDF bytes of a given size."""
|
|
header = b"%PDF-1.4 fake"
|
|
return header + b"\x00" * max(0, size - len(header))
|
|
|
|
|
|
def _make_gemini_result(items: list[MaintenanceItem]) -> MaintenanceExtractionResult:
|
|
"""Create a mock Gemini extraction result."""
|
|
return MaintenanceExtractionResult(items=items, model="gemini-2.5-flash")
|
|
|
|
|
|
# --- Successful extraction ---
|
|
|
|
|
|
class TestNormalExtraction:
|
|
"""Verify normal PDF extraction returns mapped schedules with subtypes."""
|
|
|
|
def test_pdf_with_maintenance_schedule_returns_mapped_items(self):
|
|
"""Normal: PDF with maintenance schedule returns extracted items with subtypes."""
|
|
items = [
|
|
MaintenanceItem(
|
|
service_name="Engine Oil Change",
|
|
interval_miles=5000,
|
|
interval_months=6,
|
|
details="Use 0W-20 full synthetic oil",
|
|
),
|
|
MaintenanceItem(
|
|
service_name="Tire Rotation",
|
|
interval_miles=5000,
|
|
interval_months=6,
|
|
details=None,
|
|
),
|
|
MaintenanceItem(
|
|
service_name="Cabin Filter",
|
|
interval_miles=15000,
|
|
interval_months=12,
|
|
details=None,
|
|
),
|
|
]
|
|
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
assert result.success is True
|
|
assert result.error is None
|
|
assert len(result.maintenance_schedules) == 3
|
|
|
|
# Oil change should map to Engine Oil subtype
|
|
oil = result.maintenance_schedules[0]
|
|
assert oil.service == "Engine Oil Change"
|
|
assert oil.interval_miles == 5000
|
|
assert oil.interval_months == 6
|
|
assert oil.details == "Use 0W-20 full synthetic oil"
|
|
assert "Engine Oil" in oil.subtypes
|
|
assert oil.confidence > 0.0
|
|
|
|
# Tire rotation should map to Tires subtype
|
|
tire = result.maintenance_schedules[1]
|
|
assert tire.service == "Tire Rotation"
|
|
assert "Tires" in tire.subtypes
|
|
|
|
# Cabin filter should map to Cabin Air Filter / Purifier
|
|
cabin = result.maintenance_schedules[2]
|
|
assert "Cabin Air Filter / Purifier" in cabin.subtypes
|
|
|
|
def test_progress_callbacks_fire_at_intervals(self):
|
|
"""Progress callbacks fire at appropriate intervals during processing."""
|
|
items = [
|
|
MaintenanceItem(service_name="Oil Change", interval_miles=5000),
|
|
]
|
|
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
|
|
|
|
progress_calls: list[tuple[int, str]] = []
|
|
|
|
def track_progress(percent: int, message: str) -> None:
|
|
progress_calls.append((percent, message))
|
|
|
|
extractor.extract(_make_pdf_bytes(), progress_callback=track_progress)
|
|
|
|
# Should have progress calls at 10, 50, 95 (100% is set by complete_manual_job)
|
|
percents = [p for p, _ in progress_calls]
|
|
assert 10 in percents
|
|
assert 50 in percents
|
|
assert 95 in percents
|
|
# Percents should be non-decreasing
|
|
assert percents == sorted(percents)
|
|
|
|
|
|
# --- Unusual service names ---
|
|
|
|
|
|
class TestUnusualServiceNames:
|
|
"""Verify that unusual service names still map to closest subtype."""
|
|
|
|
def test_unusual_names_fuzzy_match_to_subtypes(self):
|
|
"""Edge: PDF with unusual service names still maps to closest subtype."""
|
|
items = [
|
|
MaintenanceItem(
|
|
service_name="Replace engine air cleaner element",
|
|
interval_miles=30000,
|
|
),
|
|
MaintenanceItem(
|
|
service_name="Inspect drive belt for cracks",
|
|
interval_miles=60000,
|
|
),
|
|
]
|
|
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
assert result.success is True
|
|
assert len(result.maintenance_schedules) == 2
|
|
|
|
# "air cleaner element" should fuzzy match to Air Filter Element
|
|
air_filter = result.maintenance_schedules[0]
|
|
assert "Air Filter Element" in air_filter.subtypes
|
|
|
|
# "drive belt" should match to Drive Belt
|
|
belt = result.maintenance_schedules[1]
|
|
assert "Drive Belt" in belt.subtypes
|
|
|
|
def test_unmapped_service_uses_gemini_name_directly(self):
|
|
"""Edge: Service name with no match uses Gemini name and default confidence."""
|
|
items = [
|
|
MaintenanceItem(
|
|
service_name="Recalibrate Quantum Flux Capacitor",
|
|
interval_miles=100000,
|
|
),
|
|
]
|
|
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
assert result.success is True
|
|
assert len(result.maintenance_schedules) == 1
|
|
|
|
item = result.maintenance_schedules[0]
|
|
assert item.service == "Recalibrate Quantum Flux Capacitor"
|
|
assert item.subtypes == []
|
|
assert item.confidence == ManualExtractor.DEFAULT_CONFIDENCE
|
|
|
|
|
|
# --- Empty response ---
|
|
|
|
|
|
class TestEmptyResponse:
|
|
"""Verify handling of empty Gemini responses."""
|
|
|
|
def test_empty_gemini_response_returns_empty_schedules(self):
|
|
"""Edge: Empty Gemini response returns empty schedules list."""
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.return_value = _make_gemini_result([])
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
assert result.success is True
|
|
assert result.maintenance_schedules == []
|
|
assert result.error is None
|
|
assert result.processing_time_ms >= 0
|
|
|
|
|
|
# --- Error handling ---
|
|
|
|
|
|
class TestErrorHandling:
|
|
"""Verify error handling when Gemini calls fail."""
|
|
|
|
def test_gemini_failure_returns_error_result(self):
|
|
"""Error: Gemini call failure returns ManualExtractionResult with error."""
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.side_effect = GeminiProcessingError(
|
|
"Gemini maintenance extraction failed: API quota exceeded"
|
|
)
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
assert result.success is False
|
|
assert result.maintenance_schedules == []
|
|
assert result.error is not None
|
|
assert "quota exceeded" in result.error.lower()
|
|
|
|
def test_unexpected_exception_returns_error_result(self):
|
|
"""Error: Unexpected exception is caught and returned as error."""
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.side_effect = RuntimeError(
|
|
"Unexpected failure"
|
|
)
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
assert result.success is False
|
|
assert result.error is not None
|
|
assert "Unexpected failure" in result.error
|
|
|
|
|
|
# --- Job queue integration ---
|
|
|
|
|
|
class TestJobQueueIntegration:
|
|
"""Verify the extractor works within the existing job queue flow."""
|
|
|
|
def test_extract_returns_all_required_fields(self):
|
|
"""The result contains all fields needed by process_manual_job in extract.py."""
|
|
items = [
|
|
MaintenanceItem(service_name="Oil Change", interval_miles=5000),
|
|
]
|
|
|
|
extractor = ManualExtractor()
|
|
extractor._engine = MagicMock()
|
|
extractor._engine.extract_maintenance.return_value = _make_gemini_result(items)
|
|
|
|
result = extractor.extract(_make_pdf_bytes())
|
|
|
|
# All fields used by process_manual_job must be present
|
|
assert hasattr(result, "success")
|
|
assert hasattr(result, "vehicle_info")
|
|
assert hasattr(result, "maintenance_schedules")
|
|
assert hasattr(result, "raw_tables")
|
|
assert hasattr(result, "processing_time_ms")
|
|
assert hasattr(result, "total_pages")
|
|
assert hasattr(result, "pages_processed")
|
|
assert hasattr(result, "error")
|
|
|
|
# Schedules have required fields
|
|
schedule = result.maintenance_schedules[0]
|
|
assert hasattr(schedule, "service")
|
|
assert hasattr(schedule, "interval_miles")
|
|
assert hasattr(schedule, "interval_months")
|
|
assert hasattr(schedule, "details")
|
|
assert hasattr(schedule, "confidence")
|
|
assert hasattr(schedule, "subtypes")
|