#!/usr/bin/env python3 """ Engine Specification Parsing Examples This file contains comprehensive examples of engine parsing patterns found in the JSON vehicle data, demonstrating the L→I normalization and hybrid/electric detection requirements. Usage: python engine-parsing-examples.py """ import re from dataclasses import dataclass from typing import Optional, List @dataclass class EngineSpec: """Parsed engine specification""" displacement_l: Optional[float] configuration: str # I, V, H, Electric cylinders: Optional[int] fuel_type: str # Gasoline, Hybrid, Electric, Flex Fuel aspiration: str # Natural, Turbo, Supercharged raw_string: str class EngineSpecParser: """Engine specification parser with L→I normalization""" def __init__(self): # Primary pattern: {displacement}L {config}{cylinders} self.engine_pattern = re.compile(r'(\d+\.?\d*)L\s+([IVHL])(\d+)') # Hybrid patterns self.hybrid_patterns = [ re.compile(r'PLUG-IN HYBRID EV-?\s*\(PHEV\)', re.IGNORECASE), re.compile(r'FULL HYBRID EV-?\s*\(FHEV\)', re.IGNORECASE), re.compile(r'HYBRID', re.IGNORECASE), ] # Other fuel type patterns self.fuel_patterns = [ (re.compile(r'FLEX', re.IGNORECASE), 'Flex Fuel'), (re.compile(r'ELECTRIC', re.IGNORECASE), 'Electric'), ] # Aspiration patterns self.aspiration_patterns = [ (re.compile(r'TURBO', re.IGNORECASE), 'Turbocharged'), (re.compile(r'SUPERCHARGED|SC', re.IGNORECASE), 'Supercharged'), ] def normalize_configuration(self, config: str) -> str: """CRITICAL: Convert L to I (L-configuration becomes Inline)""" return 'I' if config == 'L' else config def extract_fuel_type(self, engine_str: str) -> str: """Extract fuel type from engine string""" # Check hybrid patterns first (most specific) for pattern in self.hybrid_patterns: if pattern.search(engine_str): if 'PLUG-IN' in engine_str.upper(): return 'Plug-in Hybrid' elif 'FULL' in engine_str.upper(): return 'Full Hybrid' else: return 'Hybrid' # Check other fuel types for pattern, fuel_type in self.fuel_patterns: if pattern.search(engine_str): return fuel_type return 'Gasoline' # Default def extract_aspiration(self, engine_str: str) -> str: """Extract aspiration from engine string""" for pattern, aspiration in self.aspiration_patterns: if pattern.search(engine_str): return aspiration return 'Natural' # Default def parse_engine_string(self, engine_str: str) -> EngineSpec: """Parse complete engine specification""" match = self.engine_pattern.match(engine_str) if not match: # Handle unparseable engines return self.create_fallback_engine(engine_str) displacement = float(match.group(1)) config = self.normalize_configuration(match.group(2)) # L→I here! cylinders = int(match.group(3)) fuel_type = self.extract_fuel_type(engine_str) aspiration = self.extract_aspiration(engine_str) return EngineSpec( displacement_l=displacement, configuration=config, cylinders=cylinders, fuel_type=fuel_type, aspiration=aspiration, raw_string=engine_str ) def create_fallback_engine(self, raw_string: str) -> EngineSpec: """Create fallback for unparseable engines""" return EngineSpec( displacement_l=None, configuration="Unknown", cylinders=None, fuel_type="Unknown", aspiration="Natural", raw_string=raw_string ) def create_electric_motor(self) -> EngineSpec: """Create default electric motor for empty engines arrays""" return EngineSpec( displacement_l=None, configuration="Electric", cylinders=None, fuel_type="Electric", aspiration=None, raw_string="Electric Motor" ) def demonstrate_engine_parsing(): """Demonstrate engine parsing with real examples from JSON files""" parser = EngineSpecParser() # Test cases from actual JSON data test_engines = [ # Standard engines "2.0L I4", "3.5L V6", "5.6L V8", # L→I normalization examples (CRITICAL) "1.5L L3", "2.0L L4", "1.2L L3 FULL HYBRID EV- (FHEV)", # Subaru Boxer engines "2.4L H4", "2.0L H4", # Hybrid examples from Nissan "2.5L I4 FULL HYBRID EV- (FHEV)", "1.5L L3 PLUG-IN HYBRID EV- (PHEV)", # Flex fuel examples "5.6L V8 FLEX", "4.0L V6 FLEX", # Electric examples "1.8L I4 ELECTRIC", # Unparseable examples (should create fallback) "Custom Hybrid System", "V12 Twin-Turbo Custom", "V10 Plus", ] print("šŸ”§ Engine Specification Parsing Examples") print("=" * 50) for engine_str in test_engines: spec = parser.parse_engine_string(engine_str) print(f"\nInput: \"{engine_str}\"") print(f" Displacement: {spec.displacement_l}L") print(f" Configuration: {spec.configuration}") print(f" Cylinders: {spec.cylinders}") print(f" Fuel Type: {spec.fuel_type}") print(f" Aspiration: {spec.aspiration}") # Highlight L→I normalization if 'L' in engine_str and spec.configuration == 'I': print(f" šŸŽÆ L→I NORMALIZED: L{spec.cylinders} became I{spec.cylinders}") # Demonstrate electric vehicle handling print(f"\n\n⚔ Electric Vehicle Default Engine:") electric_spec = parser.create_electric_motor() print(f" Name: {electric_spec.raw_string}") print(f" Configuration: {electric_spec.configuration}") print(f" Fuel Type: {electric_spec.fuel_type}") def demonstrate_l_to_i_normalization(): """Specifically demonstrate L→I normalization requirement""" parser = EngineSpecParser() print("\n\nšŸŽÆ L→I Configuration Normalization") print("=" * 40) print("CRITICAL REQUIREMENT: All L-configurations must become I (Inline)") l_configuration_examples = [ "1.5L L3", "2.0L L4", "1.2L L3 FULL HYBRID EV- (FHEV)", "1.5L L3 PLUG-IN HYBRID EV- (PHEV)", ] for engine_str in l_configuration_examples: spec = parser.parse_engine_string(engine_str) original_config = engine_str.split()[1][0] # Extract L from "L3" print(f"\nOriginal: \"{engine_str}\"") print(f" Input Configuration: {original_config}{spec.cylinders}") print(f" Output Configuration: {spec.configuration}{spec.cylinders}") print(f" āœ… Normalized: {original_config}→{spec.configuration}") def demonstrate_database_storage(): """Show how parsed engines map to database records""" parser = EngineSpecParser() print("\n\nšŸ’¾ Database Storage Examples") print("=" * 35) print("SQL: INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)") examples = [ "2.0L I4", "1.5L L3 PLUG-IN HYBRID EV- (PHEV)", # L→I case "2.4L H4", # Subaru Boxer "5.6L V8 FLEX", ] for engine_str in examples: spec = parser.parse_engine_string(engine_str) # Format as SQL INSERT values sql_values = ( f"('{spec.raw_string}', NULL, {spec.displacement_l}, " f"{spec.cylinders}, '{spec.fuel_type}', '{spec.aspiration}')" ) print(f"\nEngine: \"{engine_str}\"") print(f" SQL: VALUES {sql_values}") if 'L' in engine_str and spec.configuration == 'I': print(f" šŸŽÆ Note: L{spec.cylinders} normalized to I{spec.cylinders}") # Electric motor example electric_spec = parser.create_electric_motor() sql_values = ( f"('{electric_spec.raw_string}', NULL, NULL, " f"NULL, '{electric_spec.fuel_type}', NULL)" ) print(f"\nElectric Vehicle:") print(f" SQL: VALUES {sql_values}") def run_validation_tests(): """Run validation tests to ensure parsing works correctly""" parser = EngineSpecParser() print("\n\nāœ… Validation Tests") print("=" * 20) # Test L→I normalization test_cases = [ ("1.5L L3", "I", 3), ("2.0L L4", "I", 4), ("1.2L L3 FULL HYBRID EV- (FHEV)", "I", 3), ] for engine_str, expected_config, expected_cylinders in test_cases: spec = parser.parse_engine_string(engine_str) assert spec.configuration == expected_config, \ f"Expected {expected_config}, got {spec.configuration}" assert spec.cylinders == expected_cylinders, \ f"Expected {expected_cylinders} cylinders, got {spec.cylinders}" print(f"āœ… {engine_str} → {spec.configuration}{spec.cylinders}") # Test hybrid detection hybrid_cases = [ ("2.5L I4 FULL HYBRID EV- (FHEV)", "Full Hybrid"), ("1.5L L3 PLUG-IN HYBRID EV- (PHEV)", "Plug-in Hybrid"), ] for engine_str, expected_fuel_type in hybrid_cases: spec = parser.parse_engine_string(engine_str) assert spec.fuel_type == expected_fuel_type, \ f"Expected {expected_fuel_type}, got {spec.fuel_type}" print(f"āœ… {engine_str} → {spec.fuel_type}") print("\nšŸŽ‰ All validation tests passed!") if __name__ == "__main__": demonstrate_engine_parsing() demonstrate_l_to_i_normalization() demonstrate_database_storage() run_validation_tests() print("\n\nšŸ“‹ Summary") print("=" * 10) print("āœ… Engine parsing patterns implemented") print("āœ… L→I normalization working correctly") print("āœ… Hybrid/electric detection functional") print("āœ… Database storage format validated") print("\nšŸš€ Ready for integration into ETL system!")