Initial Commit

2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions
--- a/docs/changes/vehicles-dropdown-v2/examples/engine-parsing-examples.py
+++ b/docs/changes/vehicles-dropdown-v2/examples/engine-parsing-examples.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+"""
+Engine Specification Parsing Examples
+
+This file contains comprehensive examples of engine parsing patterns
+found in the JSON vehicle data, demonstrating the L→I normalization
+and hybrid/electric detection requirements.
+
+Usage:
+    python engine-parsing-examples.py
+"""
+
+import re
+from dataclasses import dataclass
+from typing import Optional, List
+
+
+@dataclass
+class EngineSpec:
+    """Parsed engine specification"""
+    displacement_l: Optional[float]
+    configuration: str  # I, V, H, Electric
+    cylinders: Optional[int]
+    fuel_type: str      # Gasoline, Hybrid, Electric, Flex Fuel
+    aspiration: str     # Natural, Turbo, Supercharged
+    raw_string: str
+
+
+class EngineSpecParser:
+    """Engine specification parser with L→I normalization"""
+    
+    def __init__(self):
+        # Primary pattern: {displacement}L {config}{cylinders}
+        self.engine_pattern = re.compile(r'(\d+\.?\d*)L\s+([IVHL])(\d+)')
+        
+        # Hybrid patterns
+        self.hybrid_patterns = [
+            re.compile(r'PLUG-IN HYBRID EV-?\s*\(PHEV\)', re.IGNORECASE),
+            re.compile(r'FULL HYBRID EV-?\s*\(FHEV\)', re.IGNORECASE),
+            re.compile(r'HYBRID', re.IGNORECASE),
+        ]
+        
+        # Other fuel type patterns
+        self.fuel_patterns = [
+            (re.compile(r'FLEX', re.IGNORECASE), 'Flex Fuel'),
+            (re.compile(r'ELECTRIC', re.IGNORECASE), 'Electric'),
+        ]
+        
+        # Aspiration patterns
+        self.aspiration_patterns = [
+            (re.compile(r'TURBO', re.IGNORECASE), 'Turbocharged'),
+            (re.compile(r'SUPERCHARGED|SC', re.IGNORECASE), 'Supercharged'),
+        ]
+    
+    def normalize_configuration(self, config: str) -> str:
+        """CRITICAL: Convert L to I (L-configuration becomes Inline)"""
+        return 'I' if config == 'L' else config
+    
+    def extract_fuel_type(self, engine_str: str) -> str:
+        """Extract fuel type from engine string"""
+        # Check hybrid patterns first (most specific)
+        for pattern in self.hybrid_patterns:
+            if pattern.search(engine_str):
+                if 'PLUG-IN' in engine_str.upper():
+                    return 'Plug-in Hybrid'
+                elif 'FULL' in engine_str.upper():
+                    return 'Full Hybrid' 
+                else:
+                    return 'Hybrid'
+        
+        # Check other fuel types
+        for pattern, fuel_type in self.fuel_patterns:
+            if pattern.search(engine_str):
+                return fuel_type
+                
+        return 'Gasoline'  # Default
+    
+    def extract_aspiration(self, engine_str: str) -> str:
+        """Extract aspiration from engine string"""
+        for pattern, aspiration in self.aspiration_patterns:
+            if pattern.search(engine_str):
+                return aspiration
+        return 'Natural'  # Default
+    
+    def parse_engine_string(self, engine_str: str) -> EngineSpec:
+        """Parse complete engine specification"""
+        match = self.engine_pattern.match(engine_str)
+        
+        if not match:
+            # Handle unparseable engines
+            return self.create_fallback_engine(engine_str)
+        
+        displacement = float(match.group(1))
+        config = self.normalize_configuration(match.group(2))  # L→I here!
+        cylinders = int(match.group(3))
+        
+        fuel_type = self.extract_fuel_type(engine_str)
+        aspiration = self.extract_aspiration(engine_str)
+        
+        return EngineSpec(
+            displacement_l=displacement,
+            configuration=config,
+            cylinders=cylinders,
+            fuel_type=fuel_type,
+            aspiration=aspiration,
+            raw_string=engine_str
+        )
+    
+    def create_fallback_engine(self, raw_string: str) -> EngineSpec:
+        """Create fallback for unparseable engines"""
+        return EngineSpec(
+            displacement_l=None,
+            configuration="Unknown",
+            cylinders=None,
+            fuel_type="Unknown",
+            aspiration="Natural",
+            raw_string=raw_string
+        )
+    
+    def create_electric_motor(self) -> EngineSpec:
+        """Create default electric motor for empty engines arrays"""
+        return EngineSpec(
+            displacement_l=None,
+            configuration="Electric",
+            cylinders=None,
+            fuel_type="Electric",
+            aspiration=None,
+            raw_string="Electric Motor"
+        )
+
+
+def demonstrate_engine_parsing():
+    """Demonstrate engine parsing with real examples from JSON files"""
+    
+    parser = EngineSpecParser()
+    
+    # Test cases from actual JSON data
+    test_engines = [
+        # Standard engines
+        "2.0L I4",
+        "3.5L V6", 
+        "5.6L V8",
+        
+        # L→I normalization examples (CRITICAL)
+        "1.5L L3",
+        "2.0L L4", 
+        "1.2L L3 FULL HYBRID EV- (FHEV)",
+        
+        # Subaru Boxer engines
+        "2.4L H4",
+        "2.0L H4",
+        
+        # Hybrid examples from Nissan
+        "2.5L I4 FULL HYBRID EV- (FHEV)",
+        "1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
+        
+        # Flex fuel examples
+        "5.6L V8 FLEX",
+        "4.0L V6 FLEX",
+        
+        # Electric examples
+        "1.8L I4 ELECTRIC",
+        
+        # Unparseable examples (should create fallback)
+        "Custom Hybrid System",
+        "V12 Twin-Turbo Custom",
+        "V10 Plus",
+    ]
+    
+    print("🔧 Engine Specification Parsing Examples")
+    print("=" * 50)
+    
+    for engine_str in test_engines:
+        spec = parser.parse_engine_string(engine_str)
+        
+        print(f"\nInput: \"{engine_str}\"")
+        print(f"  Displacement: {spec.displacement_l}L")
+        print(f"  Configuration: {spec.configuration}")
+        print(f"  Cylinders: {spec.cylinders}")
+        print(f"  Fuel Type: {spec.fuel_type}")
+        print(f"  Aspiration: {spec.aspiration}")
+        
+        # Highlight L→I normalization
+        if 'L' in engine_str and spec.configuration == 'I':
+            print(f"  🎯 L→I NORMALIZED: L{spec.cylinders} became I{spec.cylinders}")
+    
+    # Demonstrate electric vehicle handling
+    print(f"\n\n⚡ Electric Vehicle Default Engine:")
+    electric_spec = parser.create_electric_motor()
+    print(f"  Name: {electric_spec.raw_string}")
+    print(f"  Configuration: {electric_spec.configuration}")
+    print(f"  Fuel Type: {electric_spec.fuel_type}")
+
+
+def demonstrate_l_to_i_normalization():
+    """Specifically demonstrate L→I normalization requirement"""
+    
+    parser = EngineSpecParser()
+    
+    print("\n\n🎯 L→I Configuration Normalization")
+    print("=" * 40)
+    print("CRITICAL REQUIREMENT: All L-configurations must become I (Inline)")
+    
+    l_configuration_examples = [
+        "1.5L L3",
+        "2.0L L4",
+        "1.2L L3 FULL HYBRID EV- (FHEV)",
+        "1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
+    ]
+    
+    for engine_str in l_configuration_examples:
+        spec = parser.parse_engine_string(engine_str)
+        original_config = engine_str.split()[1][0]  # Extract L from "L3"
+        
+        print(f"\nOriginal: \"{engine_str}\"")
+        print(f"  Input Configuration: {original_config}{spec.cylinders}")
+        print(f"  Output Configuration: {spec.configuration}{spec.cylinders}")
+        print(f"  ✅ Normalized: {original_config}→{spec.configuration}")
+
+
+def demonstrate_database_storage():
+    """Show how parsed engines map to database records"""
+    
+    parser = EngineSpecParser()
+    
+    print("\n\n💾 Database Storage Examples")
+    print("=" * 35)
+    print("SQL: INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)")
+    
+    examples = [
+        "2.0L I4",
+        "1.5L L3 PLUG-IN HYBRID EV- (PHEV)",  # L→I case
+        "2.4L H4",  # Subaru Boxer
+        "5.6L V8 FLEX",
+    ]
+    
+    for engine_str in examples:
+        spec = parser.parse_engine_string(engine_str)
+        
+        # Format as SQL INSERT values
+        sql_values = (
+            f"('{spec.raw_string}', NULL, {spec.displacement_l}, "
+            f"{spec.cylinders}, '{spec.fuel_type}', '{spec.aspiration}')"
+        )
+        
+        print(f"\nEngine: \"{engine_str}\"")
+        print(f"  SQL: VALUES {sql_values}")
+        
+        if 'L' in engine_str and spec.configuration == 'I':
+            print(f"  🎯 Note: L{spec.cylinders} normalized to I{spec.cylinders}")
+    
+    # Electric motor example
+    electric_spec = parser.create_electric_motor()
+    sql_values = (
+        f"('{electric_spec.raw_string}', NULL, NULL, "
+        f"NULL, '{electric_spec.fuel_type}', NULL)"
+    )
+    print(f"\nElectric Vehicle:")
+    print(f"  SQL: VALUES {sql_values}")
+
+
+def run_validation_tests():
+    """Run validation tests to ensure parsing works correctly"""
+    
+    parser = EngineSpecParser()
+    
+    print("\n\n✅ Validation Tests")
+    print("=" * 20)
+    
+    # Test L→I normalization
+    test_cases = [
+        ("1.5L L3", "I", 3),
+        ("2.0L L4", "I", 4), 
+        ("1.2L L3 FULL HYBRID EV- (FHEV)", "I", 3),
+    ]
+    
+    for engine_str, expected_config, expected_cylinders in test_cases:
+        spec = parser.parse_engine_string(engine_str)
+        
+        assert spec.configuration == expected_config, \
+            f"Expected {expected_config}, got {spec.configuration}"
+        assert spec.cylinders == expected_cylinders, \
+            f"Expected {expected_cylinders} cylinders, got {spec.cylinders}"
+        
+        print(f"✅ {engine_str} → {spec.configuration}{spec.cylinders}")
+    
+    # Test hybrid detection
+    hybrid_cases = [
+        ("2.5L I4 FULL HYBRID EV- (FHEV)", "Full Hybrid"),
+        ("1.5L L3 PLUG-IN HYBRID EV- (PHEV)", "Plug-in Hybrid"),
+    ]
+    
+    for engine_str, expected_fuel_type in hybrid_cases:
+        spec = parser.parse_engine_string(engine_str)
+        assert spec.fuel_type == expected_fuel_type, \
+            f"Expected {expected_fuel_type}, got {spec.fuel_type}"
+        print(f"✅ {engine_str} → {spec.fuel_type}")
+    
+    print("\n🎉 All validation tests passed!")
+
+
+if __name__ == "__main__":
+    demonstrate_engine_parsing()
+    demonstrate_l_to_i_normalization()
+    demonstrate_database_storage()
+    run_validation_tests()
+    
+    print("\n\n📋 Summary")
+    print("=" * 10)
+    print("✅ Engine parsing patterns implemented")
+    print("✅ L→I normalization working correctly")
+    print("✅ Hybrid/electric detection functional")  
+    print("✅ Database storage format validated")
+    print("\n🚀 Ready for integration into ETL system!")
--- a/docs/changes/vehicles-dropdown-v2/examples/make-mapping-examples.py
+++ b/docs/changes/vehicles-dropdown-v2/examples/make-mapping-examples.py
@@ -0,0 +1,334 @@
+#!/usr/bin/env python3
+"""
+Make Name Mapping Examples
+
+This file demonstrates the complete make name normalization process,
+converting JSON filenames to proper display names for the database.
+
+Usage:
+    python make-mapping-examples.py
+"""
+
+import json
+import glob
+import os
+from typing import Dict, Set, List, Tuple
+from dataclasses import dataclass
+
+
+@dataclass
+class ValidationReport:
+    """Make name validation report"""
+    total_files: int
+    valid_mappings: int
+    mismatches: List[Dict[str, str]]
+    
+    @property
+    def success_rate(self) -> float:
+        return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
+
+
+class MakeNameMapper:
+    """Convert JSON filenames to proper make display names"""
+    
+    def __init__(self):
+        # Special capitalization cases
+        self.special_cases = {
+            'Bmw': 'BMW',           # Bayerische Motoren Werke
+            'Gmc': 'GMC',           # General Motors Company
+            'Mini': 'MINI',         # Brand styling
+            'Mclaren': 'McLaren',   # Scottish naming convention
+        }
+        
+        # Authoritative makes list (would be loaded from sources/makes.json)
+        self.authoritative_makes = {
+            'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
+            'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
+            'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
+            'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
+            'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
+            'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
+            'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
+            'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
+            'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
+            'Volvo'
+        }
+    
+    def normalize_make_name(self, filename: str) -> str:
+        """Convert filename to proper display name"""
+        # Remove .json extension
+        base_name = filename.replace('.json', '')
+        
+        # Replace underscores with spaces
+        spaced_name = base_name.replace('_', ' ')
+        
+        # Apply title case
+        title_cased = spaced_name.title()
+        
+        # Apply special cases
+        return self.special_cases.get(title_cased, title_cased)
+    
+    def validate_mapping(self, filename: str, display_name: str) -> bool:
+        """Validate mapped name against authoritative list"""
+        return display_name in self.authoritative_makes
+    
+    def get_all_mappings(self) -> Dict[str, str]:
+        """Get complete filename → display name mapping"""
+        # Simulate the 55 JSON files found in the actual directory
+        json_files = [
+            'acura.json', 'alfa_romeo.json', 'aston_martin.json', 'audi.json',
+            'bentley.json', 'bmw.json', 'buick.json', 'cadillac.json',
+            'chevrolet.json', 'chrysler.json', 'dodge.json', 'ferrari.json',
+            'fiat.json', 'ford.json', 'genesis.json', 'geo.json', 'gmc.json',
+            'honda.json', 'hummer.json', 'hyundai.json', 'infiniti.json',
+            'isuzu.json', 'jaguar.json', 'jeep.json', 'kia.json',
+            'lamborghini.json', 'land_rover.json', 'lexus.json', 'lincoln.json',
+            'lotus.json', 'lucid.json', 'maserati.json', 'mazda.json',
+            'mclaren.json', 'mercury.json', 'mini.json', 'mitsubishi.json',
+            'nissan.json', 'oldsmobile.json', 'plymouth.json', 'polestar.json',
+            'pontiac.json', 'porsche.json', 'ram.json', 'rivian.json',
+            'rolls_royce.json', 'saab.json', 'saturn.json', 'scion.json',
+            'smart.json', 'subaru.json', 'tesla.json', 'toyota.json',
+            'volkswagen.json', 'volvo.json'
+        ]
+        
+        mappings = {}
+        for filename in json_files:
+            display_name = self.normalize_make_name(filename)
+            mappings[filename] = display_name
+            
+        return mappings
+    
+    def validate_all_mappings(self) -> ValidationReport:
+        """Validate all mappings against authoritative list"""
+        mappings = self.get_all_mappings()
+        mismatches = []
+        
+        for filename, display_name in mappings.items():
+            if not self.validate_mapping(filename, display_name):
+                mismatches.append({
+                    'filename': filename,
+                    'mapped_name': display_name,
+                    'status': 'NOT_FOUND_IN_AUTHORITATIVE'
+                })
+        
+        return ValidationReport(
+            total_files=len(mappings),
+            valid_mappings=len(mappings) - len(mismatches),
+            mismatches=mismatches
+        )
+
+
+def demonstrate_make_name_mapping():
+    """Demonstrate make name normalization process"""
+    
+    mapper = MakeNameMapper()
+    
+    print("🏷️  Make Name Mapping Examples")
+    print("=" * 40)
+    
+    # Test cases showing different transformation types
+    test_cases = [
+        # Single word makes (standard title case)
+        ('toyota.json', 'Toyota'),
+        ('honda.json', 'Honda'),
+        ('ford.json', 'Ford'),
+        
+        # Multi-word makes (underscore → space + title case)
+        ('alfa_romeo.json', 'Alfa Romeo'),
+        ('land_rover.json', 'Land Rover'),
+        ('rolls_royce.json', 'Rolls Royce'),
+        ('aston_martin.json', 'Aston Martin'),
+        
+        # Special capitalization cases
+        ('bmw.json', 'BMW'),
+        ('gmc.json', 'GMC'),
+        ('mini.json', 'MINI'),
+        ('mclaren.json', 'McLaren'),
+    ]
+    
+    for filename, expected in test_cases:
+        result = mapper.normalize_make_name(filename)
+        status = "✅" if result == expected else "❌"
+        
+        print(f"{status} {filename:20} → {result:15} (expected: {expected})")
+        
+        if result != expected:
+            print(f"   ⚠️  MISMATCH: Expected '{expected}', got '{result}'")
+
+
+def demonstrate_complete_mapping():
+    """Show complete mapping of all 55 make files"""
+    
+    mapper = MakeNameMapper()
+    all_mappings = mapper.get_all_mappings()
+    
+    print(f"\n\n📋 Complete Make Name Mappings ({len(all_mappings)} files)")
+    print("=" * 50)
+    
+    # Group by transformation type for clarity
+    single_words = []
+    multi_words = []
+    special_cases = []
+    
+    for filename, display_name in sorted(all_mappings.items()):
+        if '_' in filename:
+            multi_words.append((filename, display_name))
+        elif display_name in ['BMW', 'GMC', 'MINI', 'McLaren']:
+            special_cases.append((filename, display_name))
+        else:
+            single_words.append((filename, display_name))
+    
+    print("\n🔤 Single Word Makes (Standard Title Case):")
+    for filename, display_name in single_words:
+        print(f"  {filename:20} → {display_name}")
+    
+    print(f"\n📝 Multi-Word Makes (Underscore → Space, {len(multi_words)} total):")
+    for filename, display_name in multi_words:
+        print(f"  {filename:20} → {display_name}")
+    
+    print(f"\n⭐ Special Capitalization Cases ({len(special_cases)} total):")
+    for filename, display_name in special_cases:
+        print(f"  {filename:20} → {display_name}")
+
+
+def demonstrate_validation():
+    """Demonstrate validation against authoritative makes list"""
+    
+    mapper = MakeNameMapper()
+    report = mapper.validate_all_mappings()
+    
+    print(f"\n\n✅ Validation Report")
+    print("=" * 20)
+    print(f"Total files processed: {report.total_files}")
+    print(f"Valid mappings: {report.valid_mappings}")  
+    print(f"Success rate: {report.success_rate:.1%}")
+    
+    if report.mismatches:
+        print(f"\n⚠️  Mismatches found ({len(report.mismatches)}):")
+        for mismatch in report.mismatches:
+            print(f"  {mismatch['filename']} → {mismatch['mapped_name']}")
+            print(f"    Status: {mismatch['status']}")
+    else:
+        print("\n🎉 All mappings valid!")
+
+
+def demonstrate_database_integration():
+    """Show how mappings integrate with database operations"""
+    
+    mapper = MakeNameMapper()
+    
+    print(f"\n\n💾 Database Integration Example")
+    print("=" * 35)
+    
+    sample_files = ['toyota.json', 'alfa_romeo.json', 'bmw.json', 'land_rover.json']
+    
+    print("SQL: INSERT INTO vehicles.make (name) VALUES")
+    
+    for i, filename in enumerate(sample_files):
+        display_name = mapper.normalize_make_name(filename)
+        comma = "," if i < len(sample_files) - 1 else ";"
+        
+        print(f"  ('{display_name}'){comma}")
+        print(f"    -- From file: {filename}")
+
+
+def demonstrate_error_handling():
+    """Demonstrate error handling for edge cases"""
+    
+    mapper = MakeNameMapper()
+    
+    print(f"\n\n🛠️  Error Handling Examples")
+    print("=" * 30)
+    
+    edge_cases = [
+        'unknown_brand.json',
+        'test__multiple__underscores.json',
+        'no_extension',
+        '.json',  # Only extension
+    ]
+    
+    for filename in edge_cases:
+        try:
+            display_name = mapper.normalize_make_name(filename)
+            is_valid = mapper.validate_mapping(filename, display_name)
+            status = "✅ Valid" if is_valid else "⚠️  Not in authoritative list"
+            
+            print(f"  {filename:35} → {display_name:15} ({status})")
+        except Exception as e:
+            print(f"  {filename:35} → ERROR: {e}")
+
+
+def run_validation_tests():
+    """Run comprehensive validation tests"""
+    
+    mapper = MakeNameMapper()
+    
+    print(f"\n\n🧪 Validation Tests")
+    print("=" * 20)
+    
+    # Test cases with expected results
+    test_cases = [
+        ('toyota.json', 'Toyota', True),
+        ('alfa_romeo.json', 'Alfa Romeo', True),
+        ('bmw.json', 'BMW', True),
+        ('gmc.json', 'GMC', True),
+        ('mclaren.json', 'McLaren', True),
+        ('unknown_brand.json', 'Unknown Brand', False),
+    ]
+    
+    passed = 0
+    for filename, expected_name, expected_valid in test_cases:
+        actual_name = mapper.normalize_make_name(filename)
+        actual_valid = mapper.validate_mapping(filename, actual_name)
+        
+        name_correct = actual_name == expected_name
+        valid_correct = actual_valid == expected_valid
+        
+        if name_correct and valid_correct:
+            print(f"✅ {filename} → {actual_name} (valid: {actual_valid})")
+            passed += 1
+        else:
+            print(f"❌ {filename}")
+            if not name_correct:
+                print(f"   Name: Expected '{expected_name}', got '{actual_name}'")
+            if not valid_correct:
+                print(f"   Valid: Expected {expected_valid}, got {actual_valid}")
+    
+    print(f"\n📊 Test Results: {passed}/{len(test_cases)} tests passed")
+    
+    if passed == len(test_cases):
+        print("🎉 All validation tests passed!")
+        return True
+    else:
+        print("⚠️  Some tests failed!")
+        return False
+
+
+if __name__ == "__main__":
+    demonstrate_make_name_mapping()
+    demonstrate_complete_mapping()
+    demonstrate_validation()
+    demonstrate_database_integration()
+    demonstrate_error_handling()
+    
+    success = run_validation_tests()
+    
+    print("\n\n📋 Summary")
+    print("=" * 10)
+    print("✅ Make name normalization patterns implemented")
+    print("✅ Special capitalization cases handled")
+    print("✅ Multi-word make names (underscore → space) working")
+    print("✅ Validation against authoritative list functional")
+    print("✅ Database integration format demonstrated")
+    
+    if success:
+        print("\n🚀 Ready for integration into ETL system!")
+    else:
+        print("\n⚠️  Review failed tests before integration")
+        
+    print("\nKey Implementation Notes:")
+    print("• filename.replace('.json', '').replace('_', ' ').title()")
+    print("• Special cases: BMW, GMC, MINI, McLaren")
+    print("• Validation against sources/makes.json required")
+    print("• Handle unknown makes gracefully (log warning, continue)")
--- a/docs/changes/vehicles-dropdown-v2/examples/sample-json-processing.py
+++ b/docs/changes/vehicles-dropdown-v2/examples/sample-json-processing.py
@@ -0,0 +1,449 @@
+#!/usr/bin/env python3
+"""
+Sample JSON Processing Examples
+
+This file demonstrates complete processing of JSON vehicle data,
+from file reading through database-ready output structures.
+
+Usage:
+    python sample-json-processing.py
+"""
+
+import json
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class EngineSpec:
+    """Parsed engine specification"""
+    displacement_l: Optional[float]
+    configuration: str
+    cylinders: Optional[int]
+    fuel_type: str
+    aspiration: str
+    raw_string: str
+
+
+@dataclass
+class ModelData:
+    """Model information for a specific year"""
+    name: str
+    engines: List[EngineSpec]
+    trims: List[str]  # From submodels
+
+
+@dataclass
+class YearData:
+    """Vehicle data for a specific year"""
+    year: int
+    models: List[ModelData]
+
+
+@dataclass
+class MakeData:
+    """Complete make information"""
+    name: str           # Normalized display name
+    filename: str       # Original JSON filename
+    years: List[YearData]
+    
+    @property
+    def total_models(self) -> int:
+        return sum(len(year.models) for year in self.years)
+    
+    @property
+    def total_engines(self) -> int:
+        return sum(len(model.engines) 
+                  for year in self.years 
+                  for model in year.models)
+    
+    @property
+    def total_trims(self) -> int:
+        return sum(len(model.trims) 
+                  for year in self.years 
+                  for model in year.models)
+
+
+class JsonProcessor:
+    """Process JSON vehicle files into structured data"""
+    
+    def __init__(self):
+        # Import our utility classes
+        from engine_parsing_examples import EngineSpecParser
+        from make_mapping_examples import MakeNameMapper
+        
+        self.engine_parser = EngineSpecParser()
+        self.make_mapper = MakeNameMapper()
+    
+    def process_json_file(self, json_data: Dict[str, Any], filename: str) -> MakeData:
+        """Process complete JSON file into structured data"""
+        
+        # Get the make name (first key in JSON)
+        make_key = list(json_data.keys())[0]
+        display_name = self.make_mapper.normalize_make_name(filename)
+        
+        years_data = []
+        for year_entry in json_data[make_key]:
+            year = int(year_entry['year'])
+            models_data = []
+            
+            for model_entry in year_entry.get('models', []):
+                model_name = model_entry['name']
+                
+                # Process engines
+                engines = []
+                engine_strings = model_entry.get('engines', [])
+                
+                if not engine_strings:
+                    # Electric vehicle - create default engine
+                    engines.append(self.engine_parser.create_electric_motor())
+                else:
+                    for engine_str in engine_strings:
+                        engine_spec = self.engine_parser.parse_engine_string(engine_str)
+                        engines.append(engine_spec)
+                
+                # Process trims (from submodels)
+                trims = model_entry.get('submodels', [])
+                
+                models_data.append(ModelData(
+                    name=model_name,
+                    engines=engines,
+                    trims=trims
+                ))
+            
+            years_data.append(YearData(
+                year=year,
+                models=models_data
+            ))
+        
+        return MakeData(
+            name=display_name,
+            filename=filename,
+            years=years_data
+        )
+
+
+def demonstrate_tesla_processing():
+    """Demonstrate processing Tesla JSON (electric vehicle example)"""
+    
+    # Sample Tesla data (simplified from actual tesla.json)
+    tesla_json = {
+        "tesla": [
+            {
+                "year": "2024",
+                "models": [
+                    {
+                        "name": "3",
+                        "engines": [],  # Empty - electric vehicle
+                        "submodels": [
+                            "Long Range AWD",
+                            "Performance",
+                            "Standard Plus"
+                        ]
+                    },
+                    {
+                        "name": "y",
+                        "engines": [],  # Empty - electric vehicle
+                        "submodels": [
+                            "Long Range",
+                            "Performance"
+                        ]
+                    }
+                ]
+            },
+            {
+                "year": "2023",
+                "models": [
+                    {
+                        "name": "s",
+                        "engines": [],  # Empty - electric vehicle
+                        "submodels": [
+                            "Plaid",
+                            "Long Range Plus"
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(tesla_json, 'tesla.json')
+    
+    print("⚡ Tesla JSON Processing Example")
+    print("=" * 35)
+    print(f"Filename: tesla.json")
+    print(f"Display Name: {make_data.name}")
+    print(f"Years: {len(make_data.years)}")
+    print(f"Total Models: {make_data.total_models}")
+    print(f"Total Engines: {make_data.total_engines}")
+    print(f"Total Trims: {make_data.total_trims}")
+    
+    print(f"\nDetailed Breakdown:")
+    for year_data in make_data.years:
+        print(f"\n  {year_data.year}:")
+        for model in year_data.models:
+            print(f"    Model: {model.name}")
+            print(f"    Engines: {[e.raw_string for e in model.engines]}")
+            print(f"    Trims: {model.trims}")
+
+
+def demonstrate_subaru_processing():
+    """Demonstrate processing Subaru JSON (Boxer engines, H4 configuration)"""
+    
+    # Sample Subaru data showing H4 engines
+    subaru_json = {
+        "subaru": [
+            {
+                "year": "2024", 
+                "models": [
+                    {
+                        "name": "crosstrek",
+                        "engines": [
+                            "2.0L H4",
+                            "2.0L H4 PLUG-IN HYBRID EV- (PHEV)",
+                            "2.5L H4"
+                        ],
+                        "submodels": [
+                            "Base",
+                            "Premium", 
+                            "Limited",
+                            "Hybrid"
+                        ]
+                    },
+                    {
+                        "name": "forester",
+                        "engines": [
+                            "2.5L H4"
+                        ],
+                        "submodels": [
+                            "Base",
+                            "Premium",
+                            "Sport",
+                            "Limited"
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(subaru_json, 'subaru.json')
+    
+    print(f"\n\n🚗 Subaru JSON Processing Example (Boxer Engines)")
+    print("=" * 50)
+    print(f"Display Name: {make_data.name}")
+    
+    for year_data in make_data.years:
+        print(f"\n{year_data.year}:")
+        for model in year_data.models:
+            print(f"  {model.name}:")
+            for engine in model.engines:
+                config_note = " (Boxer)" if engine.configuration == 'H' else ""
+                hybrid_note = " (Hybrid)" if 'Hybrid' in engine.fuel_type else ""
+                print(f"    Engine: {engine.raw_string}")
+                print(f"      → {engine.displacement_l}L {engine.configuration}{engine.cylinders}{config_note}{hybrid_note}")
+
+
+def demonstrate_l_to_i_processing():
+    """Demonstrate L→I normalization during processing"""
+    
+    # Sample data with L-configuration engines 
+    nissan_json = {
+        "nissan": [
+            {
+                "year": "2024",
+                "models": [
+                    {
+                        "name": "versa",
+                        "engines": [
+                            "1.6L I4"
+                        ],
+                        "submodels": ["S", "SV", "SR"]
+                    },
+                    {
+                        "name": "kicks",
+                        "engines": [
+                            "1.5L L3 PLUG-IN HYBRID EV- (PHEV)"  # L3 → I3
+                        ],
+                        "submodels": ["S", "SV", "SR"]
+                    },
+                    {
+                        "name": "note",
+                        "engines": [
+                            "1.2L L3 FULL HYBRID EV- (FHEV)"  # L3 → I3
+                        ],
+                        "submodels": ["Base", "Premium"]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(nissan_json, 'nissan.json')
+    
+    print(f"\n\n🎯 L→I Normalization Processing Example")
+    print("=" * 42)
+    
+    for year_data in make_data.years:
+        for model in year_data.models:
+            for engine in model.engines:
+                original_config = "L" if "L3" in engine.raw_string else "I"
+                normalized_config = engine.configuration
+                
+                print(f"Model: {model.name}")
+                print(f"  Input: \"{engine.raw_string}\"")
+                print(f"  Configuration: {original_config}{engine.cylinders} → {normalized_config}{engine.cylinders}")
+                
+                if original_config == "L" and normalized_config == "I":
+                    print(f"  🎯 NORMALIZED: L→I conversion applied")
+                print()
+
+
+def demonstrate_database_ready_output():
+    """Show how processed data maps to database tables"""
+    
+    # Sample mixed data
+    sample_json = {
+        "toyota": [
+            {
+                "year": "2024",
+                "models": [
+                    {
+                        "name": "camry",
+                        "engines": [
+                            "2.5L I4",
+                            "2.5L I4 FULL HYBRID EV- (FHEV)"
+                        ],
+                        "submodels": [
+                            "LE", 
+                            "XLE",
+                            "Hybrid LE"
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    
+    processor = JsonProcessor()
+    make_data = processor.process_json_file(sample_json, 'toyota.json')
+    
+    print(f"\n\n💾 Database-Ready Output")
+    print("=" * 25)
+    
+    # Show SQL INSERT statements
+    print("-- Make table")
+    print(f"INSERT INTO vehicles.make (name) VALUES ('{make_data.name}');")
+    
+    print(f"\n-- Model table (assuming make_id = 1)")
+    for year_data in make_data.years:
+        for model in year_data.models:
+            print(f"INSERT INTO vehicles.model (make_id, name) VALUES (1, '{model.name}');")
+    
+    print(f"\n-- Model Year table (assuming model_id = 1)")
+    for year_data in make_data.years:
+        print(f"INSERT INTO vehicles.model_year (model_id, year) VALUES (1, {year_data.year});")
+    
+    print(f"\n-- Engine table")
+    unique_engines = set()
+    for year_data in make_data.years:
+        for model in year_data.models:
+            for engine in model.engines:
+                engine_key = (engine.raw_string, engine.displacement_l, engine.cylinders, engine.fuel_type)
+                if engine_key not in unique_engines:
+                    unique_engines.add(engine_key)
+                    print(f"INSERT INTO vehicles.engine (name, displacement_l, cylinders, fuel_type, aspiration)")
+                    print(f"  VALUES ('{engine.raw_string}', {engine.displacement_l}, {engine.cylinders}, '{engine.fuel_type}', '{engine.aspiration}');")
+    
+    print(f"\n-- Trim table (assuming model_year_id = 1)")
+    for year_data in make_data.years:
+        for model in year_data.models:
+            for trim in model.trims:
+                print(f"INSERT INTO vehicles.trim (model_year_id, name) VALUES (1, '{trim}');")
+
+
+def run_processing_validation():
+    """Validate that processing works correctly"""
+    
+    print(f"\n\n✅ Processing Validation")
+    print("=" * 25)
+    
+    processor = JsonProcessor()
+    
+    # Test cases
+    test_cases = [
+        # Tesla (electric, empty engines)
+        ('tesla.json', {"tesla": [{"year": "2024", "models": [{"name": "3", "engines": [], "submodels": ["Base"]}]}]}),
+        # Subaru (H4 engines)  
+        ('subaru.json', {"subaru": [{"year": "2024", "models": [{"name": "crosstrek", "engines": ["2.0L H4"], "submodels": ["Base"]}]}]}),
+        # Nissan (L→I normalization)
+        ('nissan.json', {"nissan": [{"year": "2024", "models": [{"name": "kicks", "engines": ["1.5L L3"], "submodels": ["Base"]}]}]})
+    ]
+    
+    for filename, json_data in test_cases:
+        try:
+            make_data = processor.process_json_file(json_data, filename)
+            
+            # Basic validation
+            assert make_data.name is not None, "Make name should not be None"
+            assert len(make_data.years) > 0, "Should have at least one year"
+            assert make_data.total_models > 0, "Should have at least one model"
+            
+            print(f"✅ {filename} processed successfully")
+            print(f"   Make: {make_data.name}, Models: {make_data.total_models}, Engines: {make_data.total_engines}")
+            
+            # Special validations
+            if filename == 'tesla.json':
+                # Should have electric motors for empty engines
+                for year_data in make_data.years:
+                    for model in year_data.models:
+                        assert all(e.fuel_type == 'Electric' for e in model.engines), "Tesla should have electric engines"
+            
+            if filename == 'nissan.json':
+                # Should have L→I normalization
+                for year_data in make_data.years:
+                    for model in year_data.models:
+                        for engine in model.engines:
+                            if 'L3' in engine.raw_string:
+                                assert engine.configuration == 'I', "L3 should become I3"
+            
+        except Exception as e:
+            print(f"❌ {filename} failed: {e}")
+            return False
+    
+    print(f"\n🎉 All processing validation tests passed!")
+    return True
+
+
+if __name__ == "__main__":
+    demonstrate_tesla_processing()
+    demonstrate_subaru_processing() 
+    demonstrate_l_to_i_processing()
+    demonstrate_database_ready_output()
+    
+    success = run_processing_validation()
+    
+    print("\n\n📋 Summary")
+    print("=" * 10)
+    print("✅ JSON file processing implemented")
+    print("✅ Electric vehicle handling (empty engines → Electric Motor)")
+    print("✅ L→I normalization during processing")
+    print("✅ Database-ready output structures")
+    print("✅ Make name normalization integrated")
+    print("✅ Engine specification parsing integrated")
+    
+    if success:
+        print("\n🚀 Ready for ETL pipeline integration!")
+    else:
+        print("\n⚠️  Review failed validations")
+    
+    print("\nNext Steps:")
+    print("• Integrate with PostgreSQL loader")
+    print("• Add batch processing for all 55 files")
+    print("• Implement clear/append modes")
+    print("• Add CLI interface")
+    print("• Create comprehensive test suite")