Initial Commit

2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions
--- a/mvp-platform-services/vehicles/etl/tests/test_json_extractor.py
+++ b/mvp-platform-services/vehicles/etl/tests/test_json_extractor.py
@@ -0,0 +1,427 @@
+"""
+Unit Tests for JsonExtractor
+
+Tests the JSON extraction functionality including:
+- JSON structure validation  
+- Make/model/year/trim/engine extraction
+- Electric vehicle handling (empty engines arrays)
+- Data normalization and quality assurance
+- Error handling and reporting
+- Integration with MakeNameMapper and EngineSpecParser
+"""
+
+import unittest
+import tempfile
+import json
+import os
+from unittest.mock import patch, MagicMock
+
+# Import the classes we're testing
+from ..extractors.json_extractor import (
+    JsonExtractor, MakeData, ModelData, ExtractionResult, ValidationResult
+)
+from ..utils.make_name_mapper import MakeNameMapper
+from ..utils.engine_spec_parser import EngineSpecParser, EngineSpec
+
+
+class TestJsonExtractor(unittest.TestCase):
+    """Test cases for JsonExtractor functionality"""
+    
+    def setUp(self):
+        """Set up test environment before each test"""
+        self.make_mapper = MakeNameMapper()
+        self.engine_parser = EngineSpecParser()
+        self.extractor = JsonExtractor(self.make_mapper, self.engine_parser)
+    
+    def create_test_json_file(self, filename: str, content: dict) -> str:
+        """Create a temporary JSON file for testing"""
+        temp_dir = tempfile.mkdtemp()
+        file_path = os.path.join(temp_dir, filename)
+        
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(content, f)
+        
+        return file_path
+    
+    def test_validate_json_structure_valid(self):
+        """Test JSON structure validation with valid data"""
+        valid_json = {
+            "toyota": [
+                {
+                    "year": "2024",
+                    "models": [
+                        {
+                            "name": "camry",
+                            "engines": ["2.5L I4", "3.5L V6"],
+                            "submodels": ["LE", "XLE", "XSE"]
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        result = self.extractor.validate_json_structure(valid_json, "toyota.json")
+        
+        self.assertTrue(result.is_valid)
+        self.assertEqual(len(result.errors), 0)
+    
+    def test_validate_json_structure_invalid_top_level(self):
+        """Test JSON validation with invalid top-level structure"""
+        invalid_json = ["not", "a", "dict"]
+        
+        result = self.extractor.validate_json_structure(invalid_json, "test.json")
+        
+        self.assertFalse(result.is_valid)
+        self.assertGreater(len(result.errors), 0)
+        self.assertIn("must be a dictionary", result.errors[0])
+    
+    def test_validate_json_structure_multiple_keys(self):
+        """Test JSON validation with multiple top-level keys"""
+        invalid_json = {
+            "toyota": [],
+            "honda": []
+        }
+        
+        result = self.extractor.validate_json_structure(invalid_json, "test.json")
+        
+        self.assertFalse(result.is_valid)
+        self.assertIn("exactly one top-level key", result.errors[0])
+    
+    def test_validate_json_structure_missing_required_fields(self):
+        """Test JSON validation with missing required fields"""
+        invalid_json = {
+            "toyota": [
+                {
+                    # Missing 'year' field
+                    "models": [
+                        {
+                            # Missing 'name' field
+                            "engines": ["2.5L I4"]
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        result = self.extractor.validate_json_structure(invalid_json, "test.json")
+        
+        self.assertFalse(result.is_valid)
+        self.assertTrue(any("missing 'year' field" in error for error in result.errors))
+        self.assertTrue(any("missing 'name' field" in error for error in result.errors))
+    
+    def test_extract_make_data_simple(self):
+        """Test extraction of simple make data"""
+        test_json = {
+            "toyota": [
+                {
+                    "year": "2024",
+                    "models": [
+                        {
+                            "name": "camry",
+                            "engines": ["2.5L I4", "3.5L V6"],
+                            "submodels": ["LE", "XLE"]
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        json_file = self.create_test_json_file("toyota.json", test_json)
+        
+        try:
+            make_data = self.extractor.extract_make_data(json_file)
+            
+            self.assertEqual(make_data.name, "Toyota")
+            self.assertEqual(make_data.filename, "toyota.json")
+            self.assertEqual(len(make_data.models), 1)
+            self.assertEqual(len(make_data.processing_errors), 0)
+            
+            # Check model data
+            model = make_data.models[0]
+            self.assertEqual(model.name, "camry")
+            self.assertEqual(model.years, [2024])
+            self.assertEqual(len(model.engines), 2)
+            self.assertEqual(len(model.trims), 2)
+            self.assertFalse(model.is_electric)
+            
+        finally:
+            os.unlink(json_file)
+    
+    def test_extract_make_data_electric_vehicle(self):
+        """Test extraction with electric vehicle (empty engines array)"""
+        test_json = {
+            "tesla": [
+                {
+                    "year": "2024",
+                    "models": [
+                        {
+                            "name": "model s",
+                            "engines": [],  # Empty engines - electric vehicle
+                            "submodels": ["Base", "Plaid"]
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        json_file = self.create_test_json_file("tesla.json", test_json)
+        
+        try:
+            make_data = self.extractor.extract_make_data(json_file)
+            
+            self.assertEqual(make_data.name, "Tesla")
+            self.assertEqual(len(make_data.models), 1)
+            
+            model = make_data.models[0]
+            self.assertTrue(model.is_electric)
+            self.assertEqual(len(model.engines), 1)  # Should get default electric motor
+            self.assertEqual(model.engines[0].fuel_type, "Electric")
+            self.assertEqual(model.engines[0].configuration, "Electric")
+            
+        finally:
+            os.unlink(json_file)
+    
+    def test_extract_make_data_multiple_years(self):
+        """Test extraction with model appearing across multiple years"""
+        test_json = {
+            "honda": [
+                {
+                    "year": "2023",
+                    "models": [
+                        {
+                            "name": "civic",
+                            "engines": ["1.5L I4"],
+                            "submodels": ["LX", "EX"]
+                        }
+                    ]
+                },
+                {
+                    "year": "2024",
+                    "models": [
+                        {
+                            "name": "civic",
+                            "engines": ["1.5L I4", "2.0L I4"],
+                            "submodels": ["LX", "EX", "Type R"]
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        json_file = self.create_test_json_file("honda.json", test_json)
+        
+        try:
+            make_data = self.extractor.extract_make_data(json_file)
+            
+            self.assertEqual(len(make_data.models), 1)  # Should merge into one model
+            
+            model = make_data.models[0]
+            self.assertEqual(model.name, "civic")
+            self.assertEqual(sorted(model.years), [2023, 2024])
+            self.assertEqual(len(model.engines), 2)  # Should have both engines
+            self.assertEqual(len(model.trims), 3)  # Should have unique trims
+            
+        finally:
+            os.unlink(json_file)
+    
+    def test_extract_make_data_l_to_i_normalization(self):
+        """Test that L→I normalization is applied during extraction"""
+        test_json = {
+            "geo": [
+                {
+                    "year": "1995",
+                    "models": [
+                        {
+                            "name": "metro",
+                            "engines": ["1.0L L3", "1.3L I4"],  # L3 should become I3
+                            "submodels": ["Base", "LSi"]
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        json_file = self.create_test_json_file("geo.json", test_json)
+        
+        try:
+            make_data = self.extractor.extract_make_data(json_file)
+            
+            model = make_data.models[0]
+            
+            # Find the L3 engine (should be normalized to I3)
+            l3_engine = None
+            for engine in model.engines:
+                if engine.displacement_l == 1.0 and engine.cylinders == 3:
+                    l3_engine = engine
+                    break
+            
+            self.assertIsNotNone(l3_engine)
+            self.assertEqual(l3_engine.configuration, "I")  # Should be normalized from L
+            
+        finally:
+            os.unlink(json_file)
+    
+    def test_extract_make_data_invalid_json(self):
+        """Test extraction with invalid JSON file"""
+        json_file = self.create_test_json_file("invalid.json", {"invalid": "structure"})
+        
+        try:
+            make_data = self.extractor.extract_make_data(json_file)
+            
+            # Should return make data with errors
+            self.assertEqual(make_data.name, "Invalid")
+            self.assertEqual(len(make_data.models), 0)
+            self.assertGreater(len(make_data.processing_errors), 0)
+            
+        finally:
+            os.unlink(json_file)
+    
+    def test_extract_all_makes_multiple_files(self):
+        """Test extraction of multiple make files"""
+        # Create temporary directory with multiple JSON files
+        temp_dir = tempfile.mkdtemp()
+        
+        try:
+            # Create test files
+            toyota_json = {"toyota": [{"year": "2024", "models": [{"name": "camry", "engines": ["2.5L I4"], "submodels": ["LE"]}]}]}
+            tesla_json = {"tesla": [{"year": "2024", "models": [{"name": "model s", "engines": [], "submodels": ["Base"]}]}]}
+            
+            toyota_file = os.path.join(temp_dir, "toyota.json")
+            tesla_file = os.path.join(temp_dir, "tesla.json")
+            
+            with open(toyota_file, 'w') as f:
+                json.dump(toyota_json, f)
+            with open(tesla_file, 'w') as f:
+                json.dump(tesla_json, f)
+            
+            # Extract all makes
+            result = self.extractor.extract_all_makes(temp_dir)
+            
+            self.assertEqual(result.total_files_processed, 2)
+            self.assertEqual(result.successful_extractions, 2)
+            self.assertEqual(result.failed_extractions, 0)
+            self.assertEqual(len(result.makes), 2)
+            self.assertEqual(result.total_models, 2)
+            self.assertEqual(result.total_engines, 2)  # Toyota: 1, Tesla: 1 (electric)
+            self.assertEqual(result.total_electric_models, 1)  # Tesla
+            
+            # Check make names
+            make_names = [make.name for make in result.makes]
+            self.assertIn("Toyota", make_names)
+            self.assertIn("Tesla", make_names)
+            
+        finally:
+            # Clean up
+            for file in os.listdir(temp_dir):
+                os.unlink(os.path.join(temp_dir, file))
+            os.rmdir(temp_dir)
+    
+    def test_extract_all_makes_empty_directory(self):
+        """Test extraction from empty directory"""
+        temp_dir = tempfile.mkdtemp()
+        
+        try:
+            result = self.extractor.extract_all_makes(temp_dir)
+            
+            self.assertEqual(result.total_files_processed, 0)
+            self.assertEqual(result.successful_extractions, 0)
+            self.assertEqual(result.failed_extractions, 0)
+            self.assertEqual(len(result.makes), 0)
+            
+        finally:
+            os.rmdir(temp_dir)
+    
+    def test_get_extraction_statistics(self):
+        """Test extraction statistics generation"""
+        # Create mock extraction result
+        make1 = MakeData("Toyota", "toyota.json", [], [], [])
+        make1.models = [ModelData("camry", [2024], [], [], False)]
+        
+        make2 = MakeData("Tesla", "tesla.json", [], [], [])
+        make2.models = [ModelData("model s", [2024], [], [], True)]
+        
+        result = ExtractionResult(
+            makes=[make1, make2],
+            total_files_processed=2,
+            successful_extractions=2,
+            failed_extractions=0,
+            total_models=2,
+            total_engines=2,
+            total_electric_models=1
+        )
+        
+        stats = self.extractor.get_extraction_statistics(result)
+        
+        self.assertEqual(stats['files']['total_processed'], 2)
+        self.assertEqual(stats['files']['successful'], 2)
+        self.assertEqual(stats['files']['success_rate'], 1.0)
+        self.assertEqual(stats['data']['total_makes'], 2)
+        self.assertEqual(stats['data']['total_models'], 2)
+        self.assertEqual(stats['data']['electric_models'], 1)
+        self.assertEqual(len(stats['makes']), 2)
+
+
+class TestDataStructures(unittest.TestCase):
+    """Test cases for data structure classes"""
+    
+    def test_validation_result(self):
+        """Test ValidationResult properties"""
+        result = ValidationResult(True, [], ["warning"])
+        
+        self.assertTrue(result.is_valid)
+        self.assertFalse(result.has_errors)
+        self.assertTrue(result.has_warnings)
+    
+    def test_model_data_properties(self):
+        """Test ModelData calculated properties"""
+        # Create mock engine specs
+        engines = [
+            EngineSpec(2.5, "I", 4, "Gasoline", "Natural", "2.5L I4"),
+            EngineSpec(3.5, "V", 6, "Gasoline", "Natural", "3.5L V6")
+        ]
+        
+        model = ModelData(
+            name="camry",
+            years=[2023, 2024],
+            engines=engines,
+            trims=["LE", "XLE", "XSE"],
+            is_electric=False
+        )
+        
+        self.assertEqual(model.total_trims, 3)
+        self.assertEqual(model.total_engines, 2)
+        self.assertEqual(model.year_range, "2023-2024")
+    
+    def test_model_data_single_year(self):
+        """Test ModelData with single year"""
+        model = ModelData("camry", [2024], [], ["LE"])
+        self.assertEqual(model.year_range, "2024")
+    
+    def test_make_data_properties(self):
+        """Test MakeData calculated properties"""
+        model1 = ModelData("camry", [2024], [], ["LE", "XLE"], False)
+        model2 = ModelData("prius", [2024], [], ["L", "LE"], True)  # Electric
+        
+        make = MakeData("Toyota", "toyota.json", [model1, model2], [], [])
+        
+        self.assertEqual(make.total_models, 2)
+        self.assertEqual(make.total_trims, 4)
+        self.assertEqual(make.electric_models_count, 1)
+    
+    def test_extraction_result_properties(self):
+        """Test ExtractionResult calculated properties"""
+        result = ExtractionResult(
+            makes=[],
+            total_files_processed=10,
+            successful_extractions=8,
+            failed_extractions=2,
+            total_models=100,
+            total_engines=500,
+            total_electric_models=25
+        )
+        
+        self.assertEqual(result.success_rate, 0.8)
+
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)