"""Tests for ManualExtractor Gemini-based maintenance schedule extraction. Covers: normal extraction with subtype mapping, unusual service names, empty Gemini response, and Gemini call failure. All GeminiEngine calls are mocked. """ from unittest.mock import MagicMock, patch import pytest from app.engines.gemini_engine import ( GeminiProcessingError, MaintenanceExtractionResult, MaintenanceItem, ) from app.extractors.manual_extractor import ( ExtractedSchedule, ManualExtractionResult, ManualExtractor, ) # --- Helpers --- def _make_pdf_bytes(size: int = 1024) -> bytes: """Create fake PDF bytes of a given size.""" header = b"%PDF-1.4 fake" return header + b"\x00" * max(0, size - len(header)) def _make_gemini_result(items: list[MaintenanceItem]) -> MaintenanceExtractionResult: """Create a mock Gemini extraction result.""" return MaintenanceExtractionResult(items=items, model="gemini-2.5-flash") # --- Successful extraction --- class TestNormalExtraction: """Verify normal PDF extraction returns mapped schedules with subtypes.""" def test_pdf_with_maintenance_schedule_returns_mapped_items(self): """Normal: PDF with maintenance schedule returns extracted items with subtypes.""" items = [ MaintenanceItem( service_name="Engine Oil Change", interval_miles=5000, interval_months=6, details="Use 0W-20 full synthetic oil", ), MaintenanceItem( service_name="Tire Rotation", interval_miles=5000, interval_months=6, details=None, ), MaintenanceItem( service_name="Cabin Filter", interval_miles=15000, interval_months=12, details=None, ), ] extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.return_value = _make_gemini_result(items) result = extractor.extract(_make_pdf_bytes()) assert result.success is True assert result.error is None assert len(result.maintenance_schedules) == 3 # Oil change should map to Engine Oil subtype oil = result.maintenance_schedules[0] assert oil.service == "Engine Oil Change" assert oil.interval_miles == 5000 assert oil.interval_months == 6 assert oil.details == "Use 0W-20 full synthetic oil" assert "Engine Oil" in oil.subtypes assert oil.confidence > 0.0 # Tire rotation should map to Tires subtype tire = result.maintenance_schedules[1] assert tire.service == "Tire Rotation" assert "Tires" in tire.subtypes # Cabin filter should map to Cabin Air Filter / Purifier cabin = result.maintenance_schedules[2] assert "Cabin Air Filter / Purifier" in cabin.subtypes def test_progress_callbacks_fire_at_intervals(self): """Progress callbacks fire at appropriate intervals during processing.""" items = [ MaintenanceItem(service_name="Oil Change", interval_miles=5000), ] extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.return_value = _make_gemini_result(items) progress_calls: list[tuple[int, str]] = [] def track_progress(percent: int, message: str) -> None: progress_calls.append((percent, message)) extractor.extract(_make_pdf_bytes(), progress_callback=track_progress) # Should have progress calls at 10, 50, 95 (100% is set by complete_manual_job) percents = [p for p, _ in progress_calls] assert 10 in percents assert 50 in percents assert 95 in percents # Percents should be non-decreasing assert percents == sorted(percents) # --- Unusual service names --- class TestUnusualServiceNames: """Verify that unusual service names still map to closest subtype.""" def test_unusual_names_fuzzy_match_to_subtypes(self): """Edge: PDF with unusual service names still maps to closest subtype.""" items = [ MaintenanceItem( service_name="Replace engine air cleaner element", interval_miles=30000, ), MaintenanceItem( service_name="Inspect drive belt for cracks", interval_miles=60000, ), ] extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.return_value = _make_gemini_result(items) result = extractor.extract(_make_pdf_bytes()) assert result.success is True assert len(result.maintenance_schedules) == 2 # "air cleaner element" should fuzzy match to Air Filter Element air_filter = result.maintenance_schedules[0] assert "Air Filter Element" in air_filter.subtypes # "drive belt" should match to Drive Belt belt = result.maintenance_schedules[1] assert "Drive Belt" in belt.subtypes def test_unmapped_service_uses_gemini_name_directly(self): """Edge: Service name with no match uses Gemini name and default confidence.""" items = [ MaintenanceItem( service_name="Recalibrate Quantum Flux Capacitor", interval_miles=100000, ), ] extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.return_value = _make_gemini_result(items) result = extractor.extract(_make_pdf_bytes()) assert result.success is True assert len(result.maintenance_schedules) == 1 item = result.maintenance_schedules[0] assert item.service == "Recalibrate Quantum Flux Capacitor" assert item.subtypes == [] assert item.confidence == ManualExtractor.DEFAULT_CONFIDENCE # --- Empty response --- class TestEmptyResponse: """Verify handling of empty Gemini responses.""" def test_empty_gemini_response_returns_empty_schedules(self): """Edge: Empty Gemini response returns empty schedules list.""" extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.return_value = _make_gemini_result([]) result = extractor.extract(_make_pdf_bytes()) assert result.success is True assert result.maintenance_schedules == [] assert result.error is None assert result.processing_time_ms >= 0 # --- Error handling --- class TestErrorHandling: """Verify error handling when Gemini calls fail.""" def test_gemini_failure_returns_error_result(self): """Error: Gemini call failure returns ManualExtractionResult with error.""" extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.side_effect = GeminiProcessingError( "Gemini maintenance extraction failed: API quota exceeded" ) result = extractor.extract(_make_pdf_bytes()) assert result.success is False assert result.maintenance_schedules == [] assert result.error is not None assert "quota exceeded" in result.error.lower() def test_unexpected_exception_returns_error_result(self): """Error: Unexpected exception is caught and returned as error.""" extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.side_effect = RuntimeError( "Unexpected failure" ) result = extractor.extract(_make_pdf_bytes()) assert result.success is False assert result.error is not None assert "Unexpected failure" in result.error # --- Job queue integration --- class TestJobQueueIntegration: """Verify the extractor works within the existing job queue flow.""" def test_extract_returns_all_required_fields(self): """The result contains all fields needed by process_manual_job in extract.py.""" items = [ MaintenanceItem(service_name="Oil Change", interval_miles=5000), ] extractor = ManualExtractor() extractor._engine = MagicMock() extractor._engine.extract_maintenance.return_value = _make_gemini_result(items) result = extractor.extract(_make_pdf_bytes()) # All fields used by process_manual_job must be present assert hasattr(result, "success") assert hasattr(result, "vehicle_info") assert hasattr(result, "maintenance_schedules") assert hasattr(result, "raw_tables") assert hasattr(result, "processing_time_ms") assert hasattr(result, "total_pages") assert hasattr(result, "pages_processed") assert hasattr(result, "error") # Schedules have required fields schedule = result.maintenance_schedules[0] assert hasattr(schedule, "service") assert hasattr(schedule, "interval_miles") assert hasattr(schedule, "interval_months") assert hasattr(schedule, "details") assert hasattr(schedule, "confidence") assert hasattr(schedule, "subtypes")