Initial Commit

2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions
--- a/docs/changes/vehicles-dropdown-v2/examples/sample-json-processing.py
+++ b/docs/changes/vehicles-dropdown-v2/examples/sample-json-processing.py
@@ -0,0 +1,449 @@
+#!/usr/bin/env python3
+"""
+Sample JSON Processing Examples
+
+This file demonstrates complete processing of JSON vehicle data,
+from file reading through database-ready output structures.
+
+Usage:
+    python sample-json-processing.py
+"""
+
+import json
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class EngineSpec:
+    """Parsed engine specification"""
+    displacement_l: Optional[float]
+    configuration: str
+    cylinders: Optional[int]
+    fuel_type: str
+    aspiration: str
+    raw_string: str
+
+
+@dataclass
+class ModelData:
+    """Model information for a specific year"""
+    name: str
+    engines: List[EngineSpec]
+    trims: List[str]  # From submodels
+
+
+@dataclass
+class YearData:
+    """Vehicle data for a specific year"""
+    year: int
+    models: List[ModelData]
+
+
+@dataclass
+class MakeData:
+    """Complete make information"""
+    name: str           # Normalized display name
+    filename: str       # Original JSON filename
+    years: List[YearData]
+    
+    @property
+    def total_models(self) -> int:
+        return sum(len(year.models) for year in self.years)
+    
+    @property
+    def total_engines(self) -> int:
+        return sum(len(model.engines) 
+                  for year in self.years 
+                  for model in year.models)
+    
+    @property
+    def total_trims(self) -> int:
+        return sum(len(model.trims) 
+                  for year in self.years 
+                  for model in year.models)
+
+
+class JsonProcessor:
+    """Process JSON vehicle files into structured data"""
+    
+    def __init__(self):
+        # Import our utility classes
+        from engine_parsing_examples import EngineSpecParser
+        from make_mapping_examples import MakeNameMapper
+        
+        self.engine_parser = EngineSpecParser()
+        self.make_mapper = MakeNameMapper()
+    
+    def process_json_file(self, json_data: Dict[str, Any], filename: str) -> MakeData:
+        """Process complete JSON file into structured data"""
+        
+        # Get the make name (first key in JSON)
+        make_key = list(json_data.keys())[0]
+        display_name = self.make_mapper.normalize_make_name(filename)
+        
+        years_data = []
+        for year_entry in json_data[make_key]:
+            year = int(year_entry['year'])
+            models_data = []
+            
+            for model_entry in year_entry.get('models', []):
+                model_name = model_entry['name']
+                
+                # Process engines
+                engines = []
+                engine_strings = model_entry.get('engines', [])
+                
+                if not engine_strings:
+                    # Electric vehicle - create default engine
+                    engines.append(self.engine_parser.create_electric_motor())
+                else:
+                    for engine_str in engine_strings:
+                        engine_spec = self.engine_parser.parse_engine_string(engine_str)
+                        engines.append(engine_spec)
+                
+                # Process trims (from submodels)
+                trims = model_entry.get('submodels', [])
+                
+                models_data.append(ModelData(
+                    name=model_name,
+                    engines=engines,
+                    trims=trims
+                ))
+            
+            years_data.append(YearData(
+                year=year,
+                models=models_data
+            ))
+        
+        return MakeData(
+            name=display_name,
+            filename=filename,
+            years=years_data
+        )
+
+
+def demonstrate_tesla_processing():
+    """Demonstrate processing Tesla JSON (electric vehicle example)"""
+    
+    # Sample Tesla data (simplified from actual tesla.json)
+    tesla_json = {
+        "tesla": [
+            {
+                "year": "2024",
+                "models": [
+                    {
+                        "name": "3",
+                        "engines": [],  # Empty - electric vehicle
+                        "submodels": [
+                            "Long Range AWD",
+                            "Performance",
+                            "Standard Plus"
+                        ]
+                    },
+                    {
+                        "name": "y",
+                        "engines": [],  # Empty - electric vehicle
+                        "submodels": [
+                            "Long Range",
+                            "Performance"
+                        ]
+                    }
+                ]
+            },
+            {
+                "year": "2023",
+                "models": [
+                    {
+                        "name": "s",
+                        "engines": [],  # Empty - electric vehicle
+                        "submodels": [
+                            "Plaid",
+                            "Long Range Plus"
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(tesla_json, 'tesla.json')
+    
+    print("⚡ Tesla JSON Processing Example")
+    print("=" * 35)
+    print(f"Filename: tesla.json")
+    print(f"Display Name: {make_data.name}")
+    print(f"Years: {len(make_data.years)}")
+    print(f"Total Models: {make_data.total_models}")
+    print(f"Total Engines: {make_data.total_engines}")
+    print(f"Total Trims: {make_data.total_trims}")
+    
+    print(f"\nDetailed Breakdown:")
+    for year_data in make_data.years:
+        print(f"\n  {year_data.year}:")
+        for model in year_data.models:
+            print(f"    Model: {model.name}")
+            print(f"    Engines: {[e.raw_string for e in model.engines]}")
+            print(f"    Trims: {model.trims}")
+
+
+def demonstrate_subaru_processing():
+    """Demonstrate processing Subaru JSON (Boxer engines, H4 configuration)"""
+    
+    # Sample Subaru data showing H4 engines
+    subaru_json = {
+        "subaru": [
+            {
+                "year": "2024", 
+                "models": [
+                    {
+                        "name": "crosstrek",
+                        "engines": [
+                            "2.0L H4",
+                            "2.0L H4 PLUG-IN HYBRID EV- (PHEV)",
+                            "2.5L H4"
+                        ],
+                        "submodels": [
+                            "Base",
+                            "Premium", 
+                            "Limited",
+                            "Hybrid"
+                        ]
+                    },
+                    {
+                        "name": "forester",
+                        "engines": [
+                            "2.5L H4"
+                        ],
+                        "submodels": [
+                            "Base",
+                            "Premium",
+                            "Sport",
+                            "Limited"
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(subaru_json, 'subaru.json')
+    
+    print(f"\n\n🚗 Subaru JSON Processing Example (Boxer Engines)")
+    print("=" * 50)
+    print(f"Display Name: {make_data.name}")
+    
+    for year_data in make_data.years:
+        print(f"\n{year_data.year}:")
+        for model in year_data.models:
+            print(f"  {model.name}:")
+            for engine in model.engines:
+                config_note = " (Boxer)" if engine.configuration == 'H' else ""
+                hybrid_note = " (Hybrid)" if 'Hybrid' in engine.fuel_type else ""
+                print(f"    Engine: {engine.raw_string}")
+                print(f"      → {engine.displacement_l}L {engine.configuration}{engine.cylinders}{config_note}{hybrid_note}")
+
+
+def demonstrate_l_to_i_processing():
+    """Demonstrate L→I normalization during processing"""
+    
+    # Sample data with L-configuration engines 
+    nissan_json = {
+        "nissan": [
+            {
+                "year": "2024",
+                "models": [
+                    {
+                        "name": "versa",
+                        "engines": [
+                            "1.6L I4"
+                        ],
+                        "submodels": ["S", "SV", "SR"]
+                    },
+                    {
+                        "name": "kicks",
+                        "engines": [
+                            "1.5L L3 PLUG-IN HYBRID EV- (PHEV)"  # L3 → I3
+                        ],
+                        "submodels": ["S", "SV", "SR"]
+                    },
+                    {
+                        "name": "note",
+                        "engines": [
+                            "1.2L L3 FULL HYBRID EV- (FHEV)"  # L3 → I3
+                        ],
+                        "submodels": ["Base", "Premium"]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(nissan_json, 'nissan.json')
+    
+    print(f"\n\n🎯 L→I Normalization Processing Example")
+    print("=" * 42)
+    
+    for year_data in make_data.years:
+        for model in year_data.models:
+            for engine in model.engines:
+                original_config = "L" if "L3" in engine.raw_string else "I"
+                normalized_config = engine.configuration
+                
+                print(f"Model: {model.name}")
+                print(f"  Input: \"{engine.raw_string}\"")
+                print(f"  Configuration: {original_config}{engine.cylinders} → {normalized_config}{engine.cylinders}")
+                
+                if original_config == "L" and normalized_config == "I":
+                    print(f"  🎯 NORMALIZED: L→I conversion applied")
+                print()
+
+
+def demonstrate_database_ready_output():
+    """Show how processed data maps to database tables"""
+    
+    # Sample mixed data
+    sample_json = {
+        "toyota": [
+            {
+                "year": "2024",
+                "models": [
+                    {
+                        "name": "camry",
+                        "engines": [
+                            "2.5L I4",
+                            "2.5L I4 FULL HYBRID EV- (FHEV)"
+                        ],
+                        "submodels": [
+                            "LE", 
+                            "XLE",
+                            "Hybrid LE"
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(sample_json, 'toyota.json')
+    
+    print(f"\n\n💾 Database-Ready Output")
+    print("=" * 25)
+    
+    # Show SQL INSERT statements
+    print("-- Make table")
+    print(f"INSERT INTO vehicles.make (name) VALUES ('{make_data.name}');")
+    
+    print(f"\n-- Model table (assuming make_id = 1)")
+    for year_data in make_data.years:
+        for model in year_data.models:
+            print(f"INSERT INTO vehicles.model (make_id, name) VALUES (1, '{model.name}');")
+    
+    print(f"\n-- Model Year table (assuming model_id = 1)")
+    for year_data in make_data.years:
+        print(f"INSERT INTO vehicles.model_year (model_id, year) VALUES (1, {year_data.year});")
+    
+    print(f"\n-- Engine table")
+    unique_engines = set()
+    for year_data in make_data.years:
+        for model in year_data.models:
+            for engine in model.engines:
+                engine_key = (engine.raw_string, engine.displacement_l, engine.cylinders, engine.fuel_type)
+                if engine_key not in unique_engines:
+                    unique_engines.add(engine_key)
+                    print(f"INSERT INTO vehicles.engine (name, displacement_l, cylinders, fuel_type, aspiration)")
+                    print(f"  VALUES ('{engine.raw_string}', {engine.displacement_l}, {engine.cylinders}, '{engine.fuel_type}', '{engine.aspiration}');")
+    
+    print(f"\n-- Trim table (assuming model_year_id = 1)")
+    for year_data in make_data.years:
+        for model in year_data.models:
+            for trim in model.trims:
+                print(f"INSERT INTO vehicles.trim (model_year_id, name) VALUES (1, '{trim}');")
+
+
+def run_processing_validation():
+    """Validate that processing works correctly"""
+    
+    print(f"\n\n✅ Processing Validation")
+    print("=" * 25)
+    
+    processor = JsonProcessor()
+    
+    # Test cases
+    test_cases = [
+        # Tesla (electric, empty engines)
+        ('tesla.json', {"tesla": [{"year": "2024", "models": [{"name": "3", "engines": [], "submodels": ["Base"]}]}]}),
+        # Subaru (H4 engines)  
+        ('subaru.json', {"subaru": [{"year": "2024", "models": [{"name": "crosstrek", "engines": ["2.0L H4"], "submodels": ["Base"]}]}]}),
+        # Nissan (L→I normalization)
+        ('nissan.json', {"nissan": [{"year": "2024", "models": [{"name": "kicks", "engines": ["1.5L L3"], "submodels": ["Base"]}]}]})
+    ]
+    
+    for filename, json_data in test_cases:
+        try:
+            make_data = processor.process_json_file(json_data, filename)
+            
+            # Basic validation
+            assert make_data.name is not None, "Make name should not be None"
+            assert len(make_data.years) > 0, "Should have at least one year"
+            assert make_data.total_models > 0, "Should have at least one model"
+            
+            print(f"✅ {filename} processed successfully")
+            print(f"   Make: {make_data.name}, Models: {make_data.total_models}, Engines: {make_data.total_engines}")
+            
+            # Special validations
+            if filename == 'tesla.json':
+                # Should have electric motors for empty engines
+                for year_data in make_data.years:
+                    for model in year_data.models:
+                        assert all(e.fuel_type == 'Electric' for e in model.engines), "Tesla should have electric engines"
+            
+            if filename == 'nissan.json':
+                # Should have L→I normalization
+                for year_data in make_data.years:
+                    for model in year_data.models:
+                        for engine in model.engines:
+                            if 'L3' in engine.raw_string:
+                                assert engine.configuration == 'I', "L3 should become I3"
+            
+        except Exception as e:
+            print(f"❌ {filename} failed: {e}")
+            return False
+    
+    print(f"\n🎉 All processing validation tests passed!")
+    return True
+
+
+if __name__ == "__main__":
+    demonstrate_tesla_processing()
+    demonstrate_subaru_processing() 
+    demonstrate_l_to_i_processing()
+    demonstrate_database_ready_output()
+    
+    success = run_processing_validation()
+    
+    print("\n\n📋 Summary")
+    print("=" * 10)
+    print("✅ JSON file processing implemented")
+    print("✅ Electric vehicle handling (empty engines → Electric Motor)")
+    print("✅ L→I normalization during processing")
+    print("✅ Database-ready output structures")
+    print("✅ Make name normalization integrated")
+    print("✅ Engine specification parsing integrated")
+    
+    if success:
+        print("\n🚀 Ready for ETL pipeline integration!")
+    else:
+        print("\n⚠️  Review failed validations")
+    
+    print("\nNext Steps:")
+    print("• Integrate with PostgreSQL loader")
+    print("• Add batch processing for all 55 files")
+    print("• Implement clear/append modes")
+    print("• Add CLI interface")
+    print("• Create comprehensive test suite")