""" Unit Tests for JsonExtractor Tests the JSON extraction functionality including: - JSON structure validation - Make/model/year/trim/engine extraction - Electric vehicle handling (empty engines arrays) - Data normalization and quality assurance - Error handling and reporting - Integration with MakeNameMapper and EngineSpecParser """ import unittest import tempfile import json import os from unittest.mock import patch, MagicMock # Import the classes we're testing from ..extractors.json_extractor import ( JsonExtractor, MakeData, ModelData, ExtractionResult, ValidationResult ) from ..utils.make_name_mapper import MakeNameMapper from ..utils.engine_spec_parser import EngineSpecParser, EngineSpec class TestJsonExtractor(unittest.TestCase): """Test cases for JsonExtractor functionality""" def setUp(self): """Set up test environment before each test""" self.make_mapper = MakeNameMapper() self.engine_parser = EngineSpecParser() self.extractor = JsonExtractor(self.make_mapper, self.engine_parser) def create_test_json_file(self, filename: str, content: dict) -> str: """Create a temporary JSON file for testing""" temp_dir = tempfile.mkdtemp() file_path = os.path.join(temp_dir, filename) with open(file_path, 'w', encoding='utf-8') as f: json.dump(content, f) return file_path def test_validate_json_structure_valid(self): """Test JSON structure validation with valid data""" valid_json = { "toyota": [ { "year": "2024", "models": [ { "name": "camry", "engines": ["2.5L I4", "3.5L V6"], "submodels": ["LE", "XLE", "XSE"] } ] } ] } result = self.extractor.validate_json_structure(valid_json, "toyota.json") self.assertTrue(result.is_valid) self.assertEqual(len(result.errors), 0) def test_validate_json_structure_invalid_top_level(self): """Test JSON validation with invalid top-level structure""" invalid_json = ["not", "a", "dict"] result = self.extractor.validate_json_structure(invalid_json, "test.json") self.assertFalse(result.is_valid) self.assertGreater(len(result.errors), 0) self.assertIn("must be a dictionary", result.errors[0]) def test_validate_json_structure_multiple_keys(self): """Test JSON validation with multiple top-level keys""" invalid_json = { "toyota": [], "honda": [] } result = self.extractor.validate_json_structure(invalid_json, "test.json") self.assertFalse(result.is_valid) self.assertIn("exactly one top-level key", result.errors[0]) def test_validate_json_structure_missing_required_fields(self): """Test JSON validation with missing required fields""" invalid_json = { "toyota": [ { # Missing 'year' field "models": [ { # Missing 'name' field "engines": ["2.5L I4"] } ] } ] } result = self.extractor.validate_json_structure(invalid_json, "test.json") self.assertFalse(result.is_valid) self.assertTrue(any("missing 'year' field" in error for error in result.errors)) self.assertTrue(any("missing 'name' field" in error for error in result.errors)) def test_extract_make_data_simple(self): """Test extraction of simple make data""" test_json = { "toyota": [ { "year": "2024", "models": [ { "name": "camry", "engines": ["2.5L I4", "3.5L V6"], "submodels": ["LE", "XLE"] } ] } ] } json_file = self.create_test_json_file("toyota.json", test_json) try: make_data = self.extractor.extract_make_data(json_file) self.assertEqual(make_data.name, "Toyota") self.assertEqual(make_data.filename, "toyota.json") self.assertEqual(len(make_data.models), 1) self.assertEqual(len(make_data.processing_errors), 0) # Check model data model = make_data.models[0] self.assertEqual(model.name, "camry") self.assertEqual(model.years, [2024]) self.assertEqual(len(model.engines), 2) self.assertEqual(len(model.trims), 2) self.assertFalse(model.is_electric) finally: os.unlink(json_file) def test_extract_make_data_electric_vehicle(self): """Test extraction with electric vehicle (empty engines array)""" test_json = { "tesla": [ { "year": "2024", "models": [ { "name": "model s", "engines": [], # Empty engines - electric vehicle "submodels": ["Base", "Plaid"] } ] } ] } json_file = self.create_test_json_file("tesla.json", test_json) try: make_data = self.extractor.extract_make_data(json_file) self.assertEqual(make_data.name, "Tesla") self.assertEqual(len(make_data.models), 1) model = make_data.models[0] self.assertTrue(model.is_electric) self.assertEqual(len(model.engines), 1) # Should get default electric motor self.assertEqual(model.engines[0].fuel_type, "Electric") self.assertEqual(model.engines[0].configuration, "Electric") finally: os.unlink(json_file) def test_extract_make_data_multiple_years(self): """Test extraction with model appearing across multiple years""" test_json = { "honda": [ { "year": "2023", "models": [ { "name": "civic", "engines": ["1.5L I4"], "submodels": ["LX", "EX"] } ] }, { "year": "2024", "models": [ { "name": "civic", "engines": ["1.5L I4", "2.0L I4"], "submodels": ["LX", "EX", "Type R"] } ] } ] } json_file = self.create_test_json_file("honda.json", test_json) try: make_data = self.extractor.extract_make_data(json_file) self.assertEqual(len(make_data.models), 1) # Should merge into one model model = make_data.models[0] self.assertEqual(model.name, "civic") self.assertEqual(sorted(model.years), [2023, 2024]) self.assertEqual(len(model.engines), 2) # Should have both engines self.assertEqual(len(model.trims), 3) # Should have unique trims finally: os.unlink(json_file) def test_extract_make_data_l_to_i_normalization(self): """Test that L→I normalization is applied during extraction""" test_json = { "geo": [ { "year": "1995", "models": [ { "name": "metro", "engines": ["1.0L L3", "1.3L I4"], # L3 should become I3 "submodels": ["Base", "LSi"] } ] } ] } json_file = self.create_test_json_file("geo.json", test_json) try: make_data = self.extractor.extract_make_data(json_file) model = make_data.models[0] # Find the L3 engine (should be normalized to I3) l3_engine = None for engine in model.engines: if engine.displacement_l == 1.0 and engine.cylinders == 3: l3_engine = engine break self.assertIsNotNone(l3_engine) self.assertEqual(l3_engine.configuration, "I") # Should be normalized from L finally: os.unlink(json_file) def test_extract_make_data_invalid_json(self): """Test extraction with invalid JSON file""" json_file = self.create_test_json_file("invalid.json", {"invalid": "structure"}) try: make_data = self.extractor.extract_make_data(json_file) # Should return make data with errors self.assertEqual(make_data.name, "Invalid") self.assertEqual(len(make_data.models), 0) self.assertGreater(len(make_data.processing_errors), 0) finally: os.unlink(json_file) def test_extract_all_makes_multiple_files(self): """Test extraction of multiple make files""" # Create temporary directory with multiple JSON files temp_dir = tempfile.mkdtemp() try: # Create test files toyota_json = {"toyota": [{"year": "2024", "models": [{"name": "camry", "engines": ["2.5L I4"], "submodels": ["LE"]}]}]} tesla_json = {"tesla": [{"year": "2024", "models": [{"name": "model s", "engines": [], "submodels": ["Base"]}]}]} toyota_file = os.path.join(temp_dir, "toyota.json") tesla_file = os.path.join(temp_dir, "tesla.json") with open(toyota_file, 'w') as f: json.dump(toyota_json, f) with open(tesla_file, 'w') as f: json.dump(tesla_json, f) # Extract all makes result = self.extractor.extract_all_makes(temp_dir) self.assertEqual(result.total_files_processed, 2) self.assertEqual(result.successful_extractions, 2) self.assertEqual(result.failed_extractions, 0) self.assertEqual(len(result.makes), 2) self.assertEqual(result.total_models, 2) self.assertEqual(result.total_engines, 2) # Toyota: 1, Tesla: 1 (electric) self.assertEqual(result.total_electric_models, 1) # Tesla # Check make names make_names = [make.name for make in result.makes] self.assertIn("Toyota", make_names) self.assertIn("Tesla", make_names) finally: # Clean up for file in os.listdir(temp_dir): os.unlink(os.path.join(temp_dir, file)) os.rmdir(temp_dir) def test_extract_all_makes_empty_directory(self): """Test extraction from empty directory""" temp_dir = tempfile.mkdtemp() try: result = self.extractor.extract_all_makes(temp_dir) self.assertEqual(result.total_files_processed, 0) self.assertEqual(result.successful_extractions, 0) self.assertEqual(result.failed_extractions, 0) self.assertEqual(len(result.makes), 0) finally: os.rmdir(temp_dir) def test_get_extraction_statistics(self): """Test extraction statistics generation""" # Create mock extraction result make1 = MakeData("Toyota", "toyota.json", [], [], []) make1.models = [ModelData("camry", [2024], [], [], False)] make2 = MakeData("Tesla", "tesla.json", [], [], []) make2.models = [ModelData("model s", [2024], [], [], True)] result = ExtractionResult( makes=[make1, make2], total_files_processed=2, successful_extractions=2, failed_extractions=0, total_models=2, total_engines=2, total_electric_models=1 ) stats = self.extractor.get_extraction_statistics(result) self.assertEqual(stats['files']['total_processed'], 2) self.assertEqual(stats['files']['successful'], 2) self.assertEqual(stats['files']['success_rate'], 1.0) self.assertEqual(stats['data']['total_makes'], 2) self.assertEqual(stats['data']['total_models'], 2) self.assertEqual(stats['data']['electric_models'], 1) self.assertEqual(len(stats['makes']), 2) class TestDataStructures(unittest.TestCase): """Test cases for data structure classes""" def test_validation_result(self): """Test ValidationResult properties""" result = ValidationResult(True, [], ["warning"]) self.assertTrue(result.is_valid) self.assertFalse(result.has_errors) self.assertTrue(result.has_warnings) def test_model_data_properties(self): """Test ModelData calculated properties""" # Create mock engine specs engines = [ EngineSpec(2.5, "I", 4, "Gasoline", "Natural", "2.5L I4"), EngineSpec(3.5, "V", 6, "Gasoline", "Natural", "3.5L V6") ] model = ModelData( name="camry", years=[2023, 2024], engines=engines, trims=["LE", "XLE", "XSE"], is_electric=False ) self.assertEqual(model.total_trims, 3) self.assertEqual(model.total_engines, 2) self.assertEqual(model.year_range, "2023-2024") def test_model_data_single_year(self): """Test ModelData with single year""" model = ModelData("camry", [2024], [], ["LE"]) self.assertEqual(model.year_range, "2024") def test_make_data_properties(self): """Test MakeData calculated properties""" model1 = ModelData("camry", [2024], [], ["LE", "XLE"], False) model2 = ModelData("prius", [2024], [], ["L", "LE"], True) # Electric make = MakeData("Toyota", "toyota.json", [model1, model2], [], []) self.assertEqual(make.total_models, 2) self.assertEqual(make.total_trims, 4) self.assertEqual(make.electric_models_count, 1) def test_extraction_result_properties(self): """Test ExtractionResult calculated properties""" result = ExtractionResult( makes=[], total_files_processed=10, successful_extractions=8, failed_extractions=2, total_models=100, total_engines=500, total_electric_models=25 ) self.assertEqual(result.success_rate, 0.8) if __name__ == '__main__': unittest.main(verbosity=2)