Files
motovaultpro/docs/changes/vehicles-dropdown-v2/examples/engine-parsing-examples.py
Eric Gullickson a052040e3a Initial Commit
2025-09-17 16:09:15 -05:00

314 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Engine Specification Parsing Examples
This file contains comprehensive examples of engine parsing patterns
found in the JSON vehicle data, demonstrating the L→I normalization
and hybrid/electric detection requirements.
Usage:
python engine-parsing-examples.py
"""
import re
from dataclasses import dataclass
from typing import Optional, List
@dataclass
class EngineSpec:
"""Parsed engine specification"""
displacement_l: Optional[float]
configuration: str # I, V, H, Electric
cylinders: Optional[int]
fuel_type: str # Gasoline, Hybrid, Electric, Flex Fuel
aspiration: str # Natural, Turbo, Supercharged
raw_string: str
class EngineSpecParser:
"""Engine specification parser with L→I normalization"""
def __init__(self):
# Primary pattern: {displacement}L {config}{cylinders}
self.engine_pattern = re.compile(r'(\d+\.?\d*)L\s+([IVHL])(\d+)')
# Hybrid patterns
self.hybrid_patterns = [
re.compile(r'PLUG-IN HYBRID EV-?\s*\(PHEV\)', re.IGNORECASE),
re.compile(r'FULL HYBRID EV-?\s*\(FHEV\)', re.IGNORECASE),
re.compile(r'HYBRID', re.IGNORECASE),
]
# Other fuel type patterns
self.fuel_patterns = [
(re.compile(r'FLEX', re.IGNORECASE), 'Flex Fuel'),
(re.compile(r'ELECTRIC', re.IGNORECASE), 'Electric'),
]
# Aspiration patterns
self.aspiration_patterns = [
(re.compile(r'TURBO', re.IGNORECASE), 'Turbocharged'),
(re.compile(r'SUPERCHARGED|SC', re.IGNORECASE), 'Supercharged'),
]
def normalize_configuration(self, config: str) -> str:
"""CRITICAL: Convert L to I (L-configuration becomes Inline)"""
return 'I' if config == 'L' else config
def extract_fuel_type(self, engine_str: str) -> str:
"""Extract fuel type from engine string"""
# Check hybrid patterns first (most specific)
for pattern in self.hybrid_patterns:
if pattern.search(engine_str):
if 'PLUG-IN' in engine_str.upper():
return 'Plug-in Hybrid'
elif 'FULL' in engine_str.upper():
return 'Full Hybrid'
else:
return 'Hybrid'
# Check other fuel types
for pattern, fuel_type in self.fuel_patterns:
if pattern.search(engine_str):
return fuel_type
return 'Gasoline' # Default
def extract_aspiration(self, engine_str: str) -> str:
"""Extract aspiration from engine string"""
for pattern, aspiration in self.aspiration_patterns:
if pattern.search(engine_str):
return aspiration
return 'Natural' # Default
def parse_engine_string(self, engine_str: str) -> EngineSpec:
"""Parse complete engine specification"""
match = self.engine_pattern.match(engine_str)
if not match:
# Handle unparseable engines
return self.create_fallback_engine(engine_str)
displacement = float(match.group(1))
config = self.normalize_configuration(match.group(2)) # L→I here!
cylinders = int(match.group(3))
fuel_type = self.extract_fuel_type(engine_str)
aspiration = self.extract_aspiration(engine_str)
return EngineSpec(
displacement_l=displacement,
configuration=config,
cylinders=cylinders,
fuel_type=fuel_type,
aspiration=aspiration,
raw_string=engine_str
)
def create_fallback_engine(self, raw_string: str) -> EngineSpec:
"""Create fallback for unparseable engines"""
return EngineSpec(
displacement_l=None,
configuration="Unknown",
cylinders=None,
fuel_type="Unknown",
aspiration="Natural",
raw_string=raw_string
)
def create_electric_motor(self) -> EngineSpec:
"""Create default electric motor for empty engines arrays"""
return EngineSpec(
displacement_l=None,
configuration="Electric",
cylinders=None,
fuel_type="Electric",
aspiration=None,
raw_string="Electric Motor"
)
def demonstrate_engine_parsing():
"""Demonstrate engine parsing with real examples from JSON files"""
parser = EngineSpecParser()
# Test cases from actual JSON data
test_engines = [
# Standard engines
"2.0L I4",
"3.5L V6",
"5.6L V8",
# L→I normalization examples (CRITICAL)
"1.5L L3",
"2.0L L4",
"1.2L L3 FULL HYBRID EV- (FHEV)",
# Subaru Boxer engines
"2.4L H4",
"2.0L H4",
# Hybrid examples from Nissan
"2.5L I4 FULL HYBRID EV- (FHEV)",
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
# Flex fuel examples
"5.6L V8 FLEX",
"4.0L V6 FLEX",
# Electric examples
"1.8L I4 ELECTRIC",
# Unparseable examples (should create fallback)
"Custom Hybrid System",
"V12 Twin-Turbo Custom",
"V10 Plus",
]
print("🔧 Engine Specification Parsing Examples")
print("=" * 50)
for engine_str in test_engines:
spec = parser.parse_engine_string(engine_str)
print(f"\nInput: \"{engine_str}\"")
print(f" Displacement: {spec.displacement_l}L")
print(f" Configuration: {spec.configuration}")
print(f" Cylinders: {spec.cylinders}")
print(f" Fuel Type: {spec.fuel_type}")
print(f" Aspiration: {spec.aspiration}")
# Highlight L→I normalization
if 'L' in engine_str and spec.configuration == 'I':
print(f" 🎯 L→I NORMALIZED: L{spec.cylinders} became I{spec.cylinders}")
# Demonstrate electric vehicle handling
print(f"\n\n⚡ Electric Vehicle Default Engine:")
electric_spec = parser.create_electric_motor()
print(f" Name: {electric_spec.raw_string}")
print(f" Configuration: {electric_spec.configuration}")
print(f" Fuel Type: {electric_spec.fuel_type}")
def demonstrate_l_to_i_normalization():
"""Specifically demonstrate L→I normalization requirement"""
parser = EngineSpecParser()
print("\n\n🎯 L→I Configuration Normalization")
print("=" * 40)
print("CRITICAL REQUIREMENT: All L-configurations must become I (Inline)")
l_configuration_examples = [
"1.5L L3",
"2.0L L4",
"1.2L L3 FULL HYBRID EV- (FHEV)",
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
]
for engine_str in l_configuration_examples:
spec = parser.parse_engine_string(engine_str)
original_config = engine_str.split()[1][0] # Extract L from "L3"
print(f"\nOriginal: \"{engine_str}\"")
print(f" Input Configuration: {original_config}{spec.cylinders}")
print(f" Output Configuration: {spec.configuration}{spec.cylinders}")
print(f" ✅ Normalized: {original_config}{spec.configuration}")
def demonstrate_database_storage():
"""Show how parsed engines map to database records"""
parser = EngineSpecParser()
print("\n\n💾 Database Storage Examples")
print("=" * 35)
print("SQL: INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)")
examples = [
"2.0L I4",
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)", # L→I case
"2.4L H4", # Subaru Boxer
"5.6L V8 FLEX",
]
for engine_str in examples:
spec = parser.parse_engine_string(engine_str)
# Format as SQL INSERT values
sql_values = (
f"('{spec.raw_string}', NULL, {spec.displacement_l}, "
f"{spec.cylinders}, '{spec.fuel_type}', '{spec.aspiration}')"
)
print(f"\nEngine: \"{engine_str}\"")
print(f" SQL: VALUES {sql_values}")
if 'L' in engine_str and spec.configuration == 'I':
print(f" 🎯 Note: L{spec.cylinders} normalized to I{spec.cylinders}")
# Electric motor example
electric_spec = parser.create_electric_motor()
sql_values = (
f"('{electric_spec.raw_string}', NULL, NULL, "
f"NULL, '{electric_spec.fuel_type}', NULL)"
)
print(f"\nElectric Vehicle:")
print(f" SQL: VALUES {sql_values}")
def run_validation_tests():
"""Run validation tests to ensure parsing works correctly"""
parser = EngineSpecParser()
print("\n\n✅ Validation Tests")
print("=" * 20)
# Test L→I normalization
test_cases = [
("1.5L L3", "I", 3),
("2.0L L4", "I", 4),
("1.2L L3 FULL HYBRID EV- (FHEV)", "I", 3),
]
for engine_str, expected_config, expected_cylinders in test_cases:
spec = parser.parse_engine_string(engine_str)
assert spec.configuration == expected_config, \
f"Expected {expected_config}, got {spec.configuration}"
assert spec.cylinders == expected_cylinders, \
f"Expected {expected_cylinders} cylinders, got {spec.cylinders}"
print(f"{engine_str}{spec.configuration}{spec.cylinders}")
# Test hybrid detection
hybrid_cases = [
("2.5L I4 FULL HYBRID EV- (FHEV)", "Full Hybrid"),
("1.5L L3 PLUG-IN HYBRID EV- (PHEV)", "Plug-in Hybrid"),
]
for engine_str, expected_fuel_type in hybrid_cases:
spec = parser.parse_engine_string(engine_str)
assert spec.fuel_type == expected_fuel_type, \
f"Expected {expected_fuel_type}, got {spec.fuel_type}"
print(f"{engine_str}{spec.fuel_type}")
print("\n🎉 All validation tests passed!")
if __name__ == "__main__":
demonstrate_engine_parsing()
demonstrate_l_to_i_normalization()
demonstrate_database_storage()
run_validation_tests()
print("\n\n📋 Summary")
print("=" * 10)
print("✅ Engine parsing patterns implemented")
print("✅ L→I normalization working correctly")
print("✅ Hybrid/electric detection functional")
print("✅ Database storage format validated")
print("\n🚀 Ready for integration into ETL system!")