314 lines
10 KiB
Python
314 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Engine Specification Parsing Examples
|
|
|
|
This file contains comprehensive examples of engine parsing patterns
|
|
found in the JSON vehicle data, demonstrating the L→I normalization
|
|
and hybrid/electric detection requirements.
|
|
|
|
Usage:
|
|
python engine-parsing-examples.py
|
|
"""
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from typing import Optional, List
|
|
|
|
|
|
@dataclass
|
|
class EngineSpec:
|
|
"""Parsed engine specification"""
|
|
displacement_l: Optional[float]
|
|
configuration: str # I, V, H, Electric
|
|
cylinders: Optional[int]
|
|
fuel_type: str # Gasoline, Hybrid, Electric, Flex Fuel
|
|
aspiration: str # Natural, Turbo, Supercharged
|
|
raw_string: str
|
|
|
|
|
|
class EngineSpecParser:
|
|
"""Engine specification parser with L→I normalization"""
|
|
|
|
def __init__(self):
|
|
# Primary pattern: {displacement}L {config}{cylinders}
|
|
self.engine_pattern = re.compile(r'(\d+\.?\d*)L\s+([IVHL])(\d+)')
|
|
|
|
# Hybrid patterns
|
|
self.hybrid_patterns = [
|
|
re.compile(r'PLUG-IN HYBRID EV-?\s*\(PHEV\)', re.IGNORECASE),
|
|
re.compile(r'FULL HYBRID EV-?\s*\(FHEV\)', re.IGNORECASE),
|
|
re.compile(r'HYBRID', re.IGNORECASE),
|
|
]
|
|
|
|
# Other fuel type patterns
|
|
self.fuel_patterns = [
|
|
(re.compile(r'FLEX', re.IGNORECASE), 'Flex Fuel'),
|
|
(re.compile(r'ELECTRIC', re.IGNORECASE), 'Electric'),
|
|
]
|
|
|
|
# Aspiration patterns
|
|
self.aspiration_patterns = [
|
|
(re.compile(r'TURBO', re.IGNORECASE), 'Turbocharged'),
|
|
(re.compile(r'SUPERCHARGED|SC', re.IGNORECASE), 'Supercharged'),
|
|
]
|
|
|
|
def normalize_configuration(self, config: str) -> str:
|
|
"""CRITICAL: Convert L to I (L-configuration becomes Inline)"""
|
|
return 'I' if config == 'L' else config
|
|
|
|
def extract_fuel_type(self, engine_str: str) -> str:
|
|
"""Extract fuel type from engine string"""
|
|
# Check hybrid patterns first (most specific)
|
|
for pattern in self.hybrid_patterns:
|
|
if pattern.search(engine_str):
|
|
if 'PLUG-IN' in engine_str.upper():
|
|
return 'Plug-in Hybrid'
|
|
elif 'FULL' in engine_str.upper():
|
|
return 'Full Hybrid'
|
|
else:
|
|
return 'Hybrid'
|
|
|
|
# Check other fuel types
|
|
for pattern, fuel_type in self.fuel_patterns:
|
|
if pattern.search(engine_str):
|
|
return fuel_type
|
|
|
|
return 'Gasoline' # Default
|
|
|
|
def extract_aspiration(self, engine_str: str) -> str:
|
|
"""Extract aspiration from engine string"""
|
|
for pattern, aspiration in self.aspiration_patterns:
|
|
if pattern.search(engine_str):
|
|
return aspiration
|
|
return 'Natural' # Default
|
|
|
|
def parse_engine_string(self, engine_str: str) -> EngineSpec:
|
|
"""Parse complete engine specification"""
|
|
match = self.engine_pattern.match(engine_str)
|
|
|
|
if not match:
|
|
# Handle unparseable engines
|
|
return self.create_fallback_engine(engine_str)
|
|
|
|
displacement = float(match.group(1))
|
|
config = self.normalize_configuration(match.group(2)) # L→I here!
|
|
cylinders = int(match.group(3))
|
|
|
|
fuel_type = self.extract_fuel_type(engine_str)
|
|
aspiration = self.extract_aspiration(engine_str)
|
|
|
|
return EngineSpec(
|
|
displacement_l=displacement,
|
|
configuration=config,
|
|
cylinders=cylinders,
|
|
fuel_type=fuel_type,
|
|
aspiration=aspiration,
|
|
raw_string=engine_str
|
|
)
|
|
|
|
def create_fallback_engine(self, raw_string: str) -> EngineSpec:
|
|
"""Create fallback for unparseable engines"""
|
|
return EngineSpec(
|
|
displacement_l=None,
|
|
configuration="Unknown",
|
|
cylinders=None,
|
|
fuel_type="Unknown",
|
|
aspiration="Natural",
|
|
raw_string=raw_string
|
|
)
|
|
|
|
def create_electric_motor(self) -> EngineSpec:
|
|
"""Create default electric motor for empty engines arrays"""
|
|
return EngineSpec(
|
|
displacement_l=None,
|
|
configuration="Electric",
|
|
cylinders=None,
|
|
fuel_type="Electric",
|
|
aspiration=None,
|
|
raw_string="Electric Motor"
|
|
)
|
|
|
|
|
|
def demonstrate_engine_parsing():
|
|
"""Demonstrate engine parsing with real examples from JSON files"""
|
|
|
|
parser = EngineSpecParser()
|
|
|
|
# Test cases from actual JSON data
|
|
test_engines = [
|
|
# Standard engines
|
|
"2.0L I4",
|
|
"3.5L V6",
|
|
"5.6L V8",
|
|
|
|
# L→I normalization examples (CRITICAL)
|
|
"1.5L L3",
|
|
"2.0L L4",
|
|
"1.2L L3 FULL HYBRID EV- (FHEV)",
|
|
|
|
# Subaru Boxer engines
|
|
"2.4L H4",
|
|
"2.0L H4",
|
|
|
|
# Hybrid examples from Nissan
|
|
"2.5L I4 FULL HYBRID EV- (FHEV)",
|
|
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
|
|
|
|
# Flex fuel examples
|
|
"5.6L V8 FLEX",
|
|
"4.0L V6 FLEX",
|
|
|
|
# Electric examples
|
|
"1.8L I4 ELECTRIC",
|
|
|
|
# Unparseable examples (should create fallback)
|
|
"Custom Hybrid System",
|
|
"V12 Twin-Turbo Custom",
|
|
"V10 Plus",
|
|
]
|
|
|
|
print("🔧 Engine Specification Parsing Examples")
|
|
print("=" * 50)
|
|
|
|
for engine_str in test_engines:
|
|
spec = parser.parse_engine_string(engine_str)
|
|
|
|
print(f"\nInput: \"{engine_str}\"")
|
|
print(f" Displacement: {spec.displacement_l}L")
|
|
print(f" Configuration: {spec.configuration}")
|
|
print(f" Cylinders: {spec.cylinders}")
|
|
print(f" Fuel Type: {spec.fuel_type}")
|
|
print(f" Aspiration: {spec.aspiration}")
|
|
|
|
# Highlight L→I normalization
|
|
if 'L' in engine_str and spec.configuration == 'I':
|
|
print(f" 🎯 L→I NORMALIZED: L{spec.cylinders} became I{spec.cylinders}")
|
|
|
|
# Demonstrate electric vehicle handling
|
|
print(f"\n\n⚡ Electric Vehicle Default Engine:")
|
|
electric_spec = parser.create_electric_motor()
|
|
print(f" Name: {electric_spec.raw_string}")
|
|
print(f" Configuration: {electric_spec.configuration}")
|
|
print(f" Fuel Type: {electric_spec.fuel_type}")
|
|
|
|
|
|
def demonstrate_l_to_i_normalization():
|
|
"""Specifically demonstrate L→I normalization requirement"""
|
|
|
|
parser = EngineSpecParser()
|
|
|
|
print("\n\n🎯 L→I Configuration Normalization")
|
|
print("=" * 40)
|
|
print("CRITICAL REQUIREMENT: All L-configurations must become I (Inline)")
|
|
|
|
l_configuration_examples = [
|
|
"1.5L L3",
|
|
"2.0L L4",
|
|
"1.2L L3 FULL HYBRID EV- (FHEV)",
|
|
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
|
|
]
|
|
|
|
for engine_str in l_configuration_examples:
|
|
spec = parser.parse_engine_string(engine_str)
|
|
original_config = engine_str.split()[1][0] # Extract L from "L3"
|
|
|
|
print(f"\nOriginal: \"{engine_str}\"")
|
|
print(f" Input Configuration: {original_config}{spec.cylinders}")
|
|
print(f" Output Configuration: {spec.configuration}{spec.cylinders}")
|
|
print(f" ✅ Normalized: {original_config}→{spec.configuration}")
|
|
|
|
|
|
def demonstrate_database_storage():
|
|
"""Show how parsed engines map to database records"""
|
|
|
|
parser = EngineSpecParser()
|
|
|
|
print("\n\n💾 Database Storage Examples")
|
|
print("=" * 35)
|
|
print("SQL: INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)")
|
|
|
|
examples = [
|
|
"2.0L I4",
|
|
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)", # L→I case
|
|
"2.4L H4", # Subaru Boxer
|
|
"5.6L V8 FLEX",
|
|
]
|
|
|
|
for engine_str in examples:
|
|
spec = parser.parse_engine_string(engine_str)
|
|
|
|
# Format as SQL INSERT values
|
|
sql_values = (
|
|
f"('{spec.raw_string}', NULL, {spec.displacement_l}, "
|
|
f"{spec.cylinders}, '{spec.fuel_type}', '{spec.aspiration}')"
|
|
)
|
|
|
|
print(f"\nEngine: \"{engine_str}\"")
|
|
print(f" SQL: VALUES {sql_values}")
|
|
|
|
if 'L' in engine_str and spec.configuration == 'I':
|
|
print(f" 🎯 Note: L{spec.cylinders} normalized to I{spec.cylinders}")
|
|
|
|
# Electric motor example
|
|
electric_spec = parser.create_electric_motor()
|
|
sql_values = (
|
|
f"('{electric_spec.raw_string}', NULL, NULL, "
|
|
f"NULL, '{electric_spec.fuel_type}', NULL)"
|
|
)
|
|
print(f"\nElectric Vehicle:")
|
|
print(f" SQL: VALUES {sql_values}")
|
|
|
|
|
|
def run_validation_tests():
|
|
"""Run validation tests to ensure parsing works correctly"""
|
|
|
|
parser = EngineSpecParser()
|
|
|
|
print("\n\n✅ Validation Tests")
|
|
print("=" * 20)
|
|
|
|
# Test L→I normalization
|
|
test_cases = [
|
|
("1.5L L3", "I", 3),
|
|
("2.0L L4", "I", 4),
|
|
("1.2L L3 FULL HYBRID EV- (FHEV)", "I", 3),
|
|
]
|
|
|
|
for engine_str, expected_config, expected_cylinders in test_cases:
|
|
spec = parser.parse_engine_string(engine_str)
|
|
|
|
assert spec.configuration == expected_config, \
|
|
f"Expected {expected_config}, got {spec.configuration}"
|
|
assert spec.cylinders == expected_cylinders, \
|
|
f"Expected {expected_cylinders} cylinders, got {spec.cylinders}"
|
|
|
|
print(f"✅ {engine_str} → {spec.configuration}{spec.cylinders}")
|
|
|
|
# Test hybrid detection
|
|
hybrid_cases = [
|
|
("2.5L I4 FULL HYBRID EV- (FHEV)", "Full Hybrid"),
|
|
("1.5L L3 PLUG-IN HYBRID EV- (PHEV)", "Plug-in Hybrid"),
|
|
]
|
|
|
|
for engine_str, expected_fuel_type in hybrid_cases:
|
|
spec = parser.parse_engine_string(engine_str)
|
|
assert spec.fuel_type == expected_fuel_type, \
|
|
f"Expected {expected_fuel_type}, got {spec.fuel_type}"
|
|
print(f"✅ {engine_str} → {spec.fuel_type}")
|
|
|
|
print("\n🎉 All validation tests passed!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
demonstrate_engine_parsing()
|
|
demonstrate_l_to_i_normalization()
|
|
demonstrate_database_storage()
|
|
run_validation_tests()
|
|
|
|
print("\n\n📋 Summary")
|
|
print("=" * 10)
|
|
print("✅ Engine parsing patterns implemented")
|
|
print("✅ L→I normalization working correctly")
|
|
print("✅ Hybrid/electric detection functional")
|
|
print("✅ Database storage format validated")
|
|
print("\n🚀 Ready for integration into ETL system!") |