"""Tests for Gemini engine maintenance schedule extraction. Covers: GeminiEngine initialization, PDF size validation, successful extraction, empty results, and error handling. All google-genai SDK calls are mocked. """ import json from unittest.mock import MagicMock, patch import pytest from app.engines.gemini_engine import ( GeminiEngine, GeminiEngineError, GeminiProcessingError, GeminiUnavailableError, MaintenanceExtractionResult, MaintenanceItem, _MAX_PDF_BYTES, ) # --- Helpers --- def _make_pdf_bytes(size: int = 1024) -> bytes: """Create fake PDF bytes of a given size.""" # Minimal PDF header so it looks plausible, padded to size header = b"%PDF-1.4 fake" return header + b"\x00" * max(0, size - len(header)) def _make_gemini_response(schedule: list[dict]) -> MagicMock: """Create a mock Gemini generate_content response.""" response = MagicMock() response.text = json.dumps({"maintenanceSchedule": schedule}) return response # --- Exception hierarchy --- class TestExceptionHierarchy: """Verify the Gemini exception class relationships.""" def test_processing_error_is_engine_error(self): assert issubclass(GeminiProcessingError, GeminiEngineError) def test_unavailable_error_is_engine_error(self): assert issubclass(GeminiUnavailableError, GeminiEngineError) def test_engine_error_is_exception(self): assert issubclass(GeminiEngineError, Exception) # --- Data types --- class TestMaintenanceItem: """Verify MaintenanceItem dataclass construction.""" def test_required_fields_only(self): item = MaintenanceItem(service_name="Oil Change") assert item.service_name == "Oil Change" assert item.interval_miles is None assert item.interval_months is None assert item.details is None def test_all_fields(self): item = MaintenanceItem( service_name="Tire Rotation", interval_miles=5000, interval_months=6, details="Rotate front to rear on same side.", ) assert item.service_name == "Tire Rotation" assert item.interval_miles == 5000 assert item.interval_months == 6 assert item.details == "Rotate front to rear on same side." class TestMaintenanceExtractionResult: """Verify MaintenanceExtractionResult dataclass.""" def test_construction(self): result = MaintenanceExtractionResult( items=[MaintenanceItem(service_name="Oil Change")], model="gemini-2.5-flash", ) assert len(result.items) == 1 assert result.model == "gemini-2.5-flash" def test_empty_items(self): result = MaintenanceExtractionResult(items=[], model="gemini-2.5-flash") assert result.items == [] # --- PDF size validation --- class TestPdfSizeValidation: """Verify the 20 MB PDF size limit.""" def test_oversized_pdf_rejected(self): """PDFs exceeding 20 MB must be rejected with a clear error.""" engine = GeminiEngine() oversized = _make_pdf_bytes(_MAX_PDF_BYTES + 1) with pytest.raises(GeminiProcessingError, match="exceeds the 20 MB limit"): engine.extract_maintenance(oversized) def test_exactly_at_limit_accepted(self): """PDFs exactly at 20 MB should pass size validation. The engine will still fail at model init (mocked away in other tests), but the size check itself should pass. """ engine = GeminiEngine() exact = _make_pdf_bytes(_MAX_PDF_BYTES) # Should fail at _get_model, not at size check with pytest.raises(GeminiUnavailableError): engine.extract_maintenance(exact) # --- Successful extraction --- class TestExtractMaintenance: """Verify successful maintenance schedule extraction.""" @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_valid_pdf_returns_structured_schedules( self, mock_isfile, mock_settings ): """Normal: Valid PDF returns structured maintenance schedules.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" schedule = [ { "serviceName": "Engine Oil Change", "intervalMiles": 5000, "intervalMonths": 6, "details": "Use 0W-20 full synthetic oil.", }, { "serviceName": "Tire Rotation", "intervalMiles": 5000, "intervalMonths": 6, "details": None, }, ] mock_client = MagicMock() mock_client.models.generate_content.return_value = _make_gemini_response(schedule) with patch.dict("sys.modules", { "google.genai": MagicMock(), "google.genai.types": MagicMock(), }): engine = GeminiEngine() engine._client = mock_client engine._model_name = "gemini-2.5-flash" result = engine.extract_maintenance(_make_pdf_bytes()) assert isinstance(result, MaintenanceExtractionResult) assert len(result.items) == 2 assert result.model == "gemini-2.5-flash" oil = result.items[0] assert oil.service_name == "Engine Oil Change" assert oil.interval_miles == 5000 assert oil.interval_months == 6 assert oil.details == "Use 0W-20 full synthetic oil." tire = result.items[1] assert tire.service_name == "Tire Rotation" assert tire.details is None @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_no_maintenance_content_returns_empty_array( self, mock_isfile, mock_settings ): """Edge: PDF with no maintenance content returns empty array.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" mock_client = MagicMock() mock_client.models.generate_content.return_value = _make_gemini_response([]) engine = GeminiEngine() engine._client = mock_client engine._model_name = "gemini-2.5-flash" result = engine.extract_maintenance(_make_pdf_bytes()) assert isinstance(result, MaintenanceExtractionResult) assert result.items == [] @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_nullable_fields_handled(self, mock_isfile, mock_settings): """Items with only serviceName (nullable fields omitted) parse correctly.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" schedule = [{"serviceName": "Brake Fluid Replacement"}] mock_client = MagicMock() mock_client.models.generate_content.return_value = _make_gemini_response(schedule) engine = GeminiEngine() engine._client = mock_client engine._model_name = "gemini-2.5-flash" result = engine.extract_maintenance(_make_pdf_bytes()) assert len(result.items) == 1 item = result.items[0] assert item.service_name == "Brake Fluid Replacement" assert item.interval_miles is None assert item.interval_months is None assert item.details is None # --- Error handling --- class TestErrorHandling: """Verify error handling for various failure modes.""" def test_missing_credential_file_raises_unavailable(self): """Auth failure: Missing credential file raises GeminiUnavailableError.""" engine = GeminiEngine() with ( patch("app.engines.gemini_engine.os.path.isfile", return_value=False), pytest.raises(GeminiUnavailableError, match="credential config not found"), ): engine.extract_maintenance(_make_pdf_bytes()) @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_missing_sdk_raises_unavailable(self, mock_isfile): """Auth failure: Missing SDK raises GeminiUnavailableError.""" engine = GeminiEngine() with ( patch("app.engines.gemini_engine.settings") as mock_settings, patch.dict("sys.modules", { "google": None, "google.genai": None, }), ): mock_settings.google_vision_key_path = "/fake/creds.json" with pytest.raises(GeminiUnavailableError): engine.extract_maintenance(_make_pdf_bytes()) @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_generate_content_exception_raises_processing_error( self, mock_isfile, mock_settings ): """Runtime error from Gemini API is wrapped as GeminiProcessingError.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" mock_client = MagicMock() mock_client.models.generate_content.side_effect = RuntimeError("API quota exceeded") engine = GeminiEngine() engine._client = mock_client engine._model_name = "gemini-2.5-flash" with pytest.raises(GeminiProcessingError, match="maintenance extraction failed"): engine.extract_maintenance(_make_pdf_bytes()) @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_invalid_json_response_raises_processing_error( self, mock_isfile, mock_settings ): """Gemini returning invalid JSON is caught and wrapped.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" mock_response = MagicMock() mock_response.text = "not valid json {{" mock_client = MagicMock() mock_client.models.generate_content.return_value = mock_response engine = GeminiEngine() engine._client = mock_client engine._model_name = "gemini-2.5-flash" with pytest.raises(GeminiProcessingError, match="invalid JSON"): engine.extract_maintenance(_make_pdf_bytes()) # --- Lazy initialization --- class TestLazyInitialization: """Verify the client is not created until first use.""" def test_client_is_none_after_construction(self): """GeminiEngine should not initialize the client in __init__.""" engine = GeminiEngine() assert engine._client is None @patch("app.engines.gemini_engine.settings") @patch("app.engines.gemini_engine.os.path.isfile", return_value=True) def test_client_reused_on_second_call(self, mock_isfile, mock_settings): """Once initialized, the same client instance is reused.""" mock_settings.google_vision_key_path = "/fake/creds.json" mock_settings.vertex_ai_project = "test-project" mock_settings.vertex_ai_location = "us-central1" mock_settings.gemini_model = "gemini-2.5-flash" schedule = [{"serviceName": "Oil Change", "intervalMiles": 5000}] mock_client = MagicMock() mock_client.models.generate_content.return_value = _make_gemini_response(schedule) engine = GeminiEngine() engine._client = mock_client engine._model_name = "gemini-2.5-flash" engine.extract_maintenance(_make_pdf_bytes()) engine.extract_maintenance(_make_pdf_bytes()) # Client's generate_content should have been called twice assert mock_client.models.generate_content.call_count == 2