Initial Commit

This commit is contained in:
Eric Gullickson
2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions

View File

@@ -0,0 +1,314 @@
#!/usr/bin/env python3
"""
Engine Specification Parsing Examples
This file contains comprehensive examples of engine parsing patterns
found in the JSON vehicle data, demonstrating the L→I normalization
and hybrid/electric detection requirements.
Usage:
python engine-parsing-examples.py
"""
import re
from dataclasses import dataclass
from typing import Optional, List
@dataclass
class EngineSpec:
"""Parsed engine specification"""
displacement_l: Optional[float]
configuration: str # I, V, H, Electric
cylinders: Optional[int]
fuel_type: str # Gasoline, Hybrid, Electric, Flex Fuel
aspiration: str # Natural, Turbo, Supercharged
raw_string: str
class EngineSpecParser:
"""Engine specification parser with L→I normalization"""
def __init__(self):
# Primary pattern: {displacement}L {config}{cylinders}
self.engine_pattern = re.compile(r'(\d+\.?\d*)L\s+([IVHL])(\d+)')
# Hybrid patterns
self.hybrid_patterns = [
re.compile(r'PLUG-IN HYBRID EV-?\s*\(PHEV\)', re.IGNORECASE),
re.compile(r'FULL HYBRID EV-?\s*\(FHEV\)', re.IGNORECASE),
re.compile(r'HYBRID', re.IGNORECASE),
]
# Other fuel type patterns
self.fuel_patterns = [
(re.compile(r'FLEX', re.IGNORECASE), 'Flex Fuel'),
(re.compile(r'ELECTRIC', re.IGNORECASE), 'Electric'),
]
# Aspiration patterns
self.aspiration_patterns = [
(re.compile(r'TURBO', re.IGNORECASE), 'Turbocharged'),
(re.compile(r'SUPERCHARGED|SC', re.IGNORECASE), 'Supercharged'),
]
def normalize_configuration(self, config: str) -> str:
"""CRITICAL: Convert L to I (L-configuration becomes Inline)"""
return 'I' if config == 'L' else config
def extract_fuel_type(self, engine_str: str) -> str:
"""Extract fuel type from engine string"""
# Check hybrid patterns first (most specific)
for pattern in self.hybrid_patterns:
if pattern.search(engine_str):
if 'PLUG-IN' in engine_str.upper():
return 'Plug-in Hybrid'
elif 'FULL' in engine_str.upper():
return 'Full Hybrid'
else:
return 'Hybrid'
# Check other fuel types
for pattern, fuel_type in self.fuel_patterns:
if pattern.search(engine_str):
return fuel_type
return 'Gasoline' # Default
def extract_aspiration(self, engine_str: str) -> str:
"""Extract aspiration from engine string"""
for pattern, aspiration in self.aspiration_patterns:
if pattern.search(engine_str):
return aspiration
return 'Natural' # Default
def parse_engine_string(self, engine_str: str) -> EngineSpec:
"""Parse complete engine specification"""
match = self.engine_pattern.match(engine_str)
if not match:
# Handle unparseable engines
return self.create_fallback_engine(engine_str)
displacement = float(match.group(1))
config = self.normalize_configuration(match.group(2)) # L→I here!
cylinders = int(match.group(3))
fuel_type = self.extract_fuel_type(engine_str)
aspiration = self.extract_aspiration(engine_str)
return EngineSpec(
displacement_l=displacement,
configuration=config,
cylinders=cylinders,
fuel_type=fuel_type,
aspiration=aspiration,
raw_string=engine_str
)
def create_fallback_engine(self, raw_string: str) -> EngineSpec:
"""Create fallback for unparseable engines"""
return EngineSpec(
displacement_l=None,
configuration="Unknown",
cylinders=None,
fuel_type="Unknown",
aspiration="Natural",
raw_string=raw_string
)
def create_electric_motor(self) -> EngineSpec:
"""Create default electric motor for empty engines arrays"""
return EngineSpec(
displacement_l=None,
configuration="Electric",
cylinders=None,
fuel_type="Electric",
aspiration=None,
raw_string="Electric Motor"
)
def demonstrate_engine_parsing():
"""Demonstrate engine parsing with real examples from JSON files"""
parser = EngineSpecParser()
# Test cases from actual JSON data
test_engines = [
# Standard engines
"2.0L I4",
"3.5L V6",
"5.6L V8",
# L→I normalization examples (CRITICAL)
"1.5L L3",
"2.0L L4",
"1.2L L3 FULL HYBRID EV- (FHEV)",
# Subaru Boxer engines
"2.4L H4",
"2.0L H4",
# Hybrid examples from Nissan
"2.5L I4 FULL HYBRID EV- (FHEV)",
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
# Flex fuel examples
"5.6L V8 FLEX",
"4.0L V6 FLEX",
# Electric examples
"1.8L I4 ELECTRIC",
# Unparseable examples (should create fallback)
"Custom Hybrid System",
"V12 Twin-Turbo Custom",
"V10 Plus",
]
print("🔧 Engine Specification Parsing Examples")
print("=" * 50)
for engine_str in test_engines:
spec = parser.parse_engine_string(engine_str)
print(f"\nInput: \"{engine_str}\"")
print(f" Displacement: {spec.displacement_l}L")
print(f" Configuration: {spec.configuration}")
print(f" Cylinders: {spec.cylinders}")
print(f" Fuel Type: {spec.fuel_type}")
print(f" Aspiration: {spec.aspiration}")
# Highlight L→I normalization
if 'L' in engine_str and spec.configuration == 'I':
print(f" 🎯 L→I NORMALIZED: L{spec.cylinders} became I{spec.cylinders}")
# Demonstrate electric vehicle handling
print(f"\n\n⚡ Electric Vehicle Default Engine:")
electric_spec = parser.create_electric_motor()
print(f" Name: {electric_spec.raw_string}")
print(f" Configuration: {electric_spec.configuration}")
print(f" Fuel Type: {electric_spec.fuel_type}")
def demonstrate_l_to_i_normalization():
"""Specifically demonstrate L→I normalization requirement"""
parser = EngineSpecParser()
print("\n\n🎯 L→I Configuration Normalization")
print("=" * 40)
print("CRITICAL REQUIREMENT: All L-configurations must become I (Inline)")
l_configuration_examples = [
"1.5L L3",
"2.0L L4",
"1.2L L3 FULL HYBRID EV- (FHEV)",
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
]
for engine_str in l_configuration_examples:
spec = parser.parse_engine_string(engine_str)
original_config = engine_str.split()[1][0] # Extract L from "L3"
print(f"\nOriginal: \"{engine_str}\"")
print(f" Input Configuration: {original_config}{spec.cylinders}")
print(f" Output Configuration: {spec.configuration}{spec.cylinders}")
print(f" ✅ Normalized: {original_config}{spec.configuration}")
def demonstrate_database_storage():
"""Show how parsed engines map to database records"""
parser = EngineSpecParser()
print("\n\n💾 Database Storage Examples")
print("=" * 35)
print("SQL: INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)")
examples = [
"2.0L I4",
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)", # L→I case
"2.4L H4", # Subaru Boxer
"5.6L V8 FLEX",
]
for engine_str in examples:
spec = parser.parse_engine_string(engine_str)
# Format as SQL INSERT values
sql_values = (
f"('{spec.raw_string}', NULL, {spec.displacement_l}, "
f"{spec.cylinders}, '{spec.fuel_type}', '{spec.aspiration}')"
)
print(f"\nEngine: \"{engine_str}\"")
print(f" SQL: VALUES {sql_values}")
if 'L' in engine_str and spec.configuration == 'I':
print(f" 🎯 Note: L{spec.cylinders} normalized to I{spec.cylinders}")
# Electric motor example
electric_spec = parser.create_electric_motor()
sql_values = (
f"('{electric_spec.raw_string}', NULL, NULL, "
f"NULL, '{electric_spec.fuel_type}', NULL)"
)
print(f"\nElectric Vehicle:")
print(f" SQL: VALUES {sql_values}")
def run_validation_tests():
"""Run validation tests to ensure parsing works correctly"""
parser = EngineSpecParser()
print("\n\n✅ Validation Tests")
print("=" * 20)
# Test L→I normalization
test_cases = [
("1.5L L3", "I", 3),
("2.0L L4", "I", 4),
("1.2L L3 FULL HYBRID EV- (FHEV)", "I", 3),
]
for engine_str, expected_config, expected_cylinders in test_cases:
spec = parser.parse_engine_string(engine_str)
assert spec.configuration == expected_config, \
f"Expected {expected_config}, got {spec.configuration}"
assert spec.cylinders == expected_cylinders, \
f"Expected {expected_cylinders} cylinders, got {spec.cylinders}"
print(f"{engine_str}{spec.configuration}{spec.cylinders}")
# Test hybrid detection
hybrid_cases = [
("2.5L I4 FULL HYBRID EV- (FHEV)", "Full Hybrid"),
("1.5L L3 PLUG-IN HYBRID EV- (PHEV)", "Plug-in Hybrid"),
]
for engine_str, expected_fuel_type in hybrid_cases:
spec = parser.parse_engine_string(engine_str)
assert spec.fuel_type == expected_fuel_type, \
f"Expected {expected_fuel_type}, got {spec.fuel_type}"
print(f"{engine_str}{spec.fuel_type}")
print("\n🎉 All validation tests passed!")
if __name__ == "__main__":
demonstrate_engine_parsing()
demonstrate_l_to_i_normalization()
demonstrate_database_storage()
run_validation_tests()
print("\n\n📋 Summary")
print("=" * 10)
print("✅ Engine parsing patterns implemented")
print("✅ L→I normalization working correctly")
print("✅ Hybrid/electric detection functional")
print("✅ Database storage format validated")
print("\n🚀 Ready for integration into ETL system!")

View File

@@ -0,0 +1,334 @@
#!/usr/bin/env python3
"""
Make Name Mapping Examples
This file demonstrates the complete make name normalization process,
converting JSON filenames to proper display names for the database.
Usage:
python make-mapping-examples.py
"""
import json
import glob
import os
from typing import Dict, Set, List, Tuple
from dataclasses import dataclass
@dataclass
class ValidationReport:
"""Make name validation report"""
total_files: int
valid_mappings: int
mismatches: List[Dict[str, str]]
@property
def success_rate(self) -> float:
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
class MakeNameMapper:
"""Convert JSON filenames to proper make display names"""
def __init__(self):
# Special capitalization cases
self.special_cases = {
'Bmw': 'BMW', # Bayerische Motoren Werke
'Gmc': 'GMC', # General Motors Company
'Mini': 'MINI', # Brand styling
'Mclaren': 'McLaren', # Scottish naming convention
}
# Authoritative makes list (would be loaded from sources/makes.json)
self.authoritative_makes = {
'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
'Volvo'
}
def normalize_make_name(self, filename: str) -> str:
"""Convert filename to proper display name"""
# Remove .json extension
base_name = filename.replace('.json', '')
# Replace underscores with spaces
spaced_name = base_name.replace('_', ' ')
# Apply title case
title_cased = spaced_name.title()
# Apply special cases
return self.special_cases.get(title_cased, title_cased)
def validate_mapping(self, filename: str, display_name: str) -> bool:
"""Validate mapped name against authoritative list"""
return display_name in self.authoritative_makes
def get_all_mappings(self) -> Dict[str, str]:
"""Get complete filename → display name mapping"""
# Simulate the 55 JSON files found in the actual directory
json_files = [
'acura.json', 'alfa_romeo.json', 'aston_martin.json', 'audi.json',
'bentley.json', 'bmw.json', 'buick.json', 'cadillac.json',
'chevrolet.json', 'chrysler.json', 'dodge.json', 'ferrari.json',
'fiat.json', 'ford.json', 'genesis.json', 'geo.json', 'gmc.json',
'honda.json', 'hummer.json', 'hyundai.json', 'infiniti.json',
'isuzu.json', 'jaguar.json', 'jeep.json', 'kia.json',
'lamborghini.json', 'land_rover.json', 'lexus.json', 'lincoln.json',
'lotus.json', 'lucid.json', 'maserati.json', 'mazda.json',
'mclaren.json', 'mercury.json', 'mini.json', 'mitsubishi.json',
'nissan.json', 'oldsmobile.json', 'plymouth.json', 'polestar.json',
'pontiac.json', 'porsche.json', 'ram.json', 'rivian.json',
'rolls_royce.json', 'saab.json', 'saturn.json', 'scion.json',
'smart.json', 'subaru.json', 'tesla.json', 'toyota.json',
'volkswagen.json', 'volvo.json'
]
mappings = {}
for filename in json_files:
display_name = self.normalize_make_name(filename)
mappings[filename] = display_name
return mappings
def validate_all_mappings(self) -> ValidationReport:
"""Validate all mappings against authoritative list"""
mappings = self.get_all_mappings()
mismatches = []
for filename, display_name in mappings.items():
if not self.validate_mapping(filename, display_name):
mismatches.append({
'filename': filename,
'mapped_name': display_name,
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
})
return ValidationReport(
total_files=len(mappings),
valid_mappings=len(mappings) - len(mismatches),
mismatches=mismatches
)
def demonstrate_make_name_mapping():
"""Demonstrate make name normalization process"""
mapper = MakeNameMapper()
print("🏷️ Make Name Mapping Examples")
print("=" * 40)
# Test cases showing different transformation types
test_cases = [
# Single word makes (standard title case)
('toyota.json', 'Toyota'),
('honda.json', 'Honda'),
('ford.json', 'Ford'),
# Multi-word makes (underscore → space + title case)
('alfa_romeo.json', 'Alfa Romeo'),
('land_rover.json', 'Land Rover'),
('rolls_royce.json', 'Rolls Royce'),
('aston_martin.json', 'Aston Martin'),
# Special capitalization cases
('bmw.json', 'BMW'),
('gmc.json', 'GMC'),
('mini.json', 'MINI'),
('mclaren.json', 'McLaren'),
]
for filename, expected in test_cases:
result = mapper.normalize_make_name(filename)
status = "" if result == expected else ""
print(f"{status} {filename:20}{result:15} (expected: {expected})")
if result != expected:
print(f" ⚠️ MISMATCH: Expected '{expected}', got '{result}'")
def demonstrate_complete_mapping():
"""Show complete mapping of all 55 make files"""
mapper = MakeNameMapper()
all_mappings = mapper.get_all_mappings()
print(f"\n\n📋 Complete Make Name Mappings ({len(all_mappings)} files)")
print("=" * 50)
# Group by transformation type for clarity
single_words = []
multi_words = []
special_cases = []
for filename, display_name in sorted(all_mappings.items()):
if '_' in filename:
multi_words.append((filename, display_name))
elif display_name in ['BMW', 'GMC', 'MINI', 'McLaren']:
special_cases.append((filename, display_name))
else:
single_words.append((filename, display_name))
print("\n🔤 Single Word Makes (Standard Title Case):")
for filename, display_name in single_words:
print(f" {filename:20}{display_name}")
print(f"\n📝 Multi-Word Makes (Underscore → Space, {len(multi_words)} total):")
for filename, display_name in multi_words:
print(f" {filename:20}{display_name}")
print(f"\n⭐ Special Capitalization Cases ({len(special_cases)} total):")
for filename, display_name in special_cases:
print(f" {filename:20}{display_name}")
def demonstrate_validation():
"""Demonstrate validation against authoritative makes list"""
mapper = MakeNameMapper()
report = mapper.validate_all_mappings()
print(f"\n\n✅ Validation Report")
print("=" * 20)
print(f"Total files processed: {report.total_files}")
print(f"Valid mappings: {report.valid_mappings}")
print(f"Success rate: {report.success_rate:.1%}")
if report.mismatches:
print(f"\n⚠️ Mismatches found ({len(report.mismatches)}):")
for mismatch in report.mismatches:
print(f" {mismatch['filename']}{mismatch['mapped_name']}")
print(f" Status: {mismatch['status']}")
else:
print("\n🎉 All mappings valid!")
def demonstrate_database_integration():
"""Show how mappings integrate with database operations"""
mapper = MakeNameMapper()
print(f"\n\n💾 Database Integration Example")
print("=" * 35)
sample_files = ['toyota.json', 'alfa_romeo.json', 'bmw.json', 'land_rover.json']
print("SQL: INSERT INTO vehicles.make (name) VALUES")
for i, filename in enumerate(sample_files):
display_name = mapper.normalize_make_name(filename)
comma = "," if i < len(sample_files) - 1 else ";"
print(f" ('{display_name}'){comma}")
print(f" -- From file: {filename}")
def demonstrate_error_handling():
"""Demonstrate error handling for edge cases"""
mapper = MakeNameMapper()
print(f"\n\n🛠️ Error Handling Examples")
print("=" * 30)
edge_cases = [
'unknown_brand.json',
'test__multiple__underscores.json',
'no_extension',
'.json', # Only extension
]
for filename in edge_cases:
try:
display_name = mapper.normalize_make_name(filename)
is_valid = mapper.validate_mapping(filename, display_name)
status = "✅ Valid" if is_valid else "⚠️ Not in authoritative list"
print(f" {filename:35}{display_name:15} ({status})")
except Exception as e:
print(f" {filename:35} → ERROR: {e}")
def run_validation_tests():
"""Run comprehensive validation tests"""
mapper = MakeNameMapper()
print(f"\n\n🧪 Validation Tests")
print("=" * 20)
# Test cases with expected results
test_cases = [
('toyota.json', 'Toyota', True),
('alfa_romeo.json', 'Alfa Romeo', True),
('bmw.json', 'BMW', True),
('gmc.json', 'GMC', True),
('mclaren.json', 'McLaren', True),
('unknown_brand.json', 'Unknown Brand', False),
]
passed = 0
for filename, expected_name, expected_valid in test_cases:
actual_name = mapper.normalize_make_name(filename)
actual_valid = mapper.validate_mapping(filename, actual_name)
name_correct = actual_name == expected_name
valid_correct = actual_valid == expected_valid
if name_correct and valid_correct:
print(f"{filename}{actual_name} (valid: {actual_valid})")
passed += 1
else:
print(f"{filename}")
if not name_correct:
print(f" Name: Expected '{expected_name}', got '{actual_name}'")
if not valid_correct:
print(f" Valid: Expected {expected_valid}, got {actual_valid}")
print(f"\n📊 Test Results: {passed}/{len(test_cases)} tests passed")
if passed == len(test_cases):
print("🎉 All validation tests passed!")
return True
else:
print("⚠️ Some tests failed!")
return False
if __name__ == "__main__":
demonstrate_make_name_mapping()
demonstrate_complete_mapping()
demonstrate_validation()
demonstrate_database_integration()
demonstrate_error_handling()
success = run_validation_tests()
print("\n\n📋 Summary")
print("=" * 10)
print("✅ Make name normalization patterns implemented")
print("✅ Special capitalization cases handled")
print("✅ Multi-word make names (underscore → space) working")
print("✅ Validation against authoritative list functional")
print("✅ Database integration format demonstrated")
if success:
print("\n🚀 Ready for integration into ETL system!")
else:
print("\n⚠️ Review failed tests before integration")
print("\nKey Implementation Notes:")
print("• filename.replace('.json', '').replace('_', ' ').title()")
print("• Special cases: BMW, GMC, MINI, McLaren")
print("• Validation against sources/makes.json required")
print("• Handle unknown makes gracefully (log warning, continue)")

View File

@@ -0,0 +1,449 @@
#!/usr/bin/env python3
"""
Sample JSON Processing Examples
This file demonstrates complete processing of JSON vehicle data,
from file reading through database-ready output structures.
Usage:
python sample-json-processing.py
"""
import json
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from pathlib import Path
@dataclass
class EngineSpec:
"""Parsed engine specification"""
displacement_l: Optional[float]
configuration: str
cylinders: Optional[int]
fuel_type: str
aspiration: str
raw_string: str
@dataclass
class ModelData:
"""Model information for a specific year"""
name: str
engines: List[EngineSpec]
trims: List[str] # From submodels
@dataclass
class YearData:
"""Vehicle data for a specific year"""
year: int
models: List[ModelData]
@dataclass
class MakeData:
"""Complete make information"""
name: str # Normalized display name
filename: str # Original JSON filename
years: List[YearData]
@property
def total_models(self) -> int:
return sum(len(year.models) for year in self.years)
@property
def total_engines(self) -> int:
return sum(len(model.engines)
for year in self.years
for model in year.models)
@property
def total_trims(self) -> int:
return sum(len(model.trims)
for year in self.years
for model in year.models)
class JsonProcessor:
"""Process JSON vehicle files into structured data"""
def __init__(self):
# Import our utility classes
from engine_parsing_examples import EngineSpecParser
from make_mapping_examples import MakeNameMapper
self.engine_parser = EngineSpecParser()
self.make_mapper = MakeNameMapper()
def process_json_file(self, json_data: Dict[str, Any], filename: str) -> MakeData:
"""Process complete JSON file into structured data"""
# Get the make name (first key in JSON)
make_key = list(json_data.keys())[0]
display_name = self.make_mapper.normalize_make_name(filename)
years_data = []
for year_entry in json_data[make_key]:
year = int(year_entry['year'])
models_data = []
for model_entry in year_entry.get('models', []):
model_name = model_entry['name']
# Process engines
engines = []
engine_strings = model_entry.get('engines', [])
if not engine_strings:
# Electric vehicle - create default engine
engines.append(self.engine_parser.create_electric_motor())
else:
for engine_str in engine_strings:
engine_spec = self.engine_parser.parse_engine_string(engine_str)
engines.append(engine_spec)
# Process trims (from submodels)
trims = model_entry.get('submodels', [])
models_data.append(ModelData(
name=model_name,
engines=engines,
trims=trims
))
years_data.append(YearData(
year=year,
models=models_data
))
return MakeData(
name=display_name,
filename=filename,
years=years_data
)
def demonstrate_tesla_processing():
"""Demonstrate processing Tesla JSON (electric vehicle example)"""
# Sample Tesla data (simplified from actual tesla.json)
tesla_json = {
"tesla": [
{
"year": "2024",
"models": [
{
"name": "3",
"engines": [], # Empty - electric vehicle
"submodels": [
"Long Range AWD",
"Performance",
"Standard Plus"
]
},
{
"name": "y",
"engines": [], # Empty - electric vehicle
"submodels": [
"Long Range",
"Performance"
]
}
]
},
{
"year": "2023",
"models": [
{
"name": "s",
"engines": [], # Empty - electric vehicle
"submodels": [
"Plaid",
"Long Range Plus"
]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(tesla_json, 'tesla.json')
print("⚡ Tesla JSON Processing Example")
print("=" * 35)
print(f"Filename: tesla.json")
print(f"Display Name: {make_data.name}")
print(f"Years: {len(make_data.years)}")
print(f"Total Models: {make_data.total_models}")
print(f"Total Engines: {make_data.total_engines}")
print(f"Total Trims: {make_data.total_trims}")
print(f"\nDetailed Breakdown:")
for year_data in make_data.years:
print(f"\n {year_data.year}:")
for model in year_data.models:
print(f" Model: {model.name}")
print(f" Engines: {[e.raw_string for e in model.engines]}")
print(f" Trims: {model.trims}")
def demonstrate_subaru_processing():
"""Demonstrate processing Subaru JSON (Boxer engines, H4 configuration)"""
# Sample Subaru data showing H4 engines
subaru_json = {
"subaru": [
{
"year": "2024",
"models": [
{
"name": "crosstrek",
"engines": [
"2.0L H4",
"2.0L H4 PLUG-IN HYBRID EV- (PHEV)",
"2.5L H4"
],
"submodels": [
"Base",
"Premium",
"Limited",
"Hybrid"
]
},
{
"name": "forester",
"engines": [
"2.5L H4"
],
"submodels": [
"Base",
"Premium",
"Sport",
"Limited"
]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(subaru_json, 'subaru.json')
print(f"\n\n🚗 Subaru JSON Processing Example (Boxer Engines)")
print("=" * 50)
print(f"Display Name: {make_data.name}")
for year_data in make_data.years:
print(f"\n{year_data.year}:")
for model in year_data.models:
print(f" {model.name}:")
for engine in model.engines:
config_note = " (Boxer)" if engine.configuration == 'H' else ""
hybrid_note = " (Hybrid)" if 'Hybrid' in engine.fuel_type else ""
print(f" Engine: {engine.raw_string}")
print(f"{engine.displacement_l}L {engine.configuration}{engine.cylinders}{config_note}{hybrid_note}")
def demonstrate_l_to_i_processing():
"""Demonstrate L→I normalization during processing"""
# Sample data with L-configuration engines
nissan_json = {
"nissan": [
{
"year": "2024",
"models": [
{
"name": "versa",
"engines": [
"1.6L I4"
],
"submodels": ["S", "SV", "SR"]
},
{
"name": "kicks",
"engines": [
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)" # L3 → I3
],
"submodels": ["S", "SV", "SR"]
},
{
"name": "note",
"engines": [
"1.2L L3 FULL HYBRID EV- (FHEV)" # L3 → I3
],
"submodels": ["Base", "Premium"]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(nissan_json, 'nissan.json')
print(f"\n\n🎯 L→I Normalization Processing Example")
print("=" * 42)
for year_data in make_data.years:
for model in year_data.models:
for engine in model.engines:
original_config = "L" if "L3" in engine.raw_string else "I"
normalized_config = engine.configuration
print(f"Model: {model.name}")
print(f" Input: \"{engine.raw_string}\"")
print(f" Configuration: {original_config}{engine.cylinders}{normalized_config}{engine.cylinders}")
if original_config == "L" and normalized_config == "I":
print(f" 🎯 NORMALIZED: L→I conversion applied")
print()
def demonstrate_database_ready_output():
"""Show how processed data maps to database tables"""
# Sample mixed data
sample_json = {
"toyota": [
{
"year": "2024",
"models": [
{
"name": "camry",
"engines": [
"2.5L I4",
"2.5L I4 FULL HYBRID EV- (FHEV)"
],
"submodels": [
"LE",
"XLE",
"Hybrid LE"
]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(sample_json, 'toyota.json')
print(f"\n\n💾 Database-Ready Output")
print("=" * 25)
# Show SQL INSERT statements
print("-- Make table")
print(f"INSERT INTO vehicles.make (name) VALUES ('{make_data.name}');")
print(f"\n-- Model table (assuming make_id = 1)")
for year_data in make_data.years:
for model in year_data.models:
print(f"INSERT INTO vehicles.model (make_id, name) VALUES (1, '{model.name}');")
print(f"\n-- Model Year table (assuming model_id = 1)")
for year_data in make_data.years:
print(f"INSERT INTO vehicles.model_year (model_id, year) VALUES (1, {year_data.year});")
print(f"\n-- Engine table")
unique_engines = set()
for year_data in make_data.years:
for model in year_data.models:
for engine in model.engines:
engine_key = (engine.raw_string, engine.displacement_l, engine.cylinders, engine.fuel_type)
if engine_key not in unique_engines:
unique_engines.add(engine_key)
print(f"INSERT INTO vehicles.engine (name, displacement_l, cylinders, fuel_type, aspiration)")
print(f" VALUES ('{engine.raw_string}', {engine.displacement_l}, {engine.cylinders}, '{engine.fuel_type}', '{engine.aspiration}');")
print(f"\n-- Trim table (assuming model_year_id = 1)")
for year_data in make_data.years:
for model in year_data.models:
for trim in model.trims:
print(f"INSERT INTO vehicles.trim (model_year_id, name) VALUES (1, '{trim}');")
def run_processing_validation():
"""Validate that processing works correctly"""
print(f"\n\n✅ Processing Validation")
print("=" * 25)
processor = JsonProcessor()
# Test cases
test_cases = [
# Tesla (electric, empty engines)
('tesla.json', {"tesla": [{"year": "2024", "models": [{"name": "3", "engines": [], "submodels": ["Base"]}]}]}),
# Subaru (H4 engines)
('subaru.json', {"subaru": [{"year": "2024", "models": [{"name": "crosstrek", "engines": ["2.0L H4"], "submodels": ["Base"]}]}]}),
# Nissan (L→I normalization)
('nissan.json', {"nissan": [{"year": "2024", "models": [{"name": "kicks", "engines": ["1.5L L3"], "submodels": ["Base"]}]}]})
]
for filename, json_data in test_cases:
try:
make_data = processor.process_json_file(json_data, filename)
# Basic validation
assert make_data.name is not None, "Make name should not be None"
assert len(make_data.years) > 0, "Should have at least one year"
assert make_data.total_models > 0, "Should have at least one model"
print(f"{filename} processed successfully")
print(f" Make: {make_data.name}, Models: {make_data.total_models}, Engines: {make_data.total_engines}")
# Special validations
if filename == 'tesla.json':
# Should have electric motors for empty engines
for year_data in make_data.years:
for model in year_data.models:
assert all(e.fuel_type == 'Electric' for e in model.engines), "Tesla should have electric engines"
if filename == 'nissan.json':
# Should have L→I normalization
for year_data in make_data.years:
for model in year_data.models:
for engine in model.engines:
if 'L3' in engine.raw_string:
assert engine.configuration == 'I', "L3 should become I3"
except Exception as e:
print(f"{filename} failed: {e}")
return False
print(f"\n🎉 All processing validation tests passed!")
return True
if __name__ == "__main__":
demonstrate_tesla_processing()
demonstrate_subaru_processing()
demonstrate_l_to_i_processing()
demonstrate_database_ready_output()
success = run_processing_validation()
print("\n\n📋 Summary")
print("=" * 10)
print("✅ JSON file processing implemented")
print("✅ Electric vehicle handling (empty engines → Electric Motor)")
print("✅ L→I normalization during processing")
print("✅ Database-ready output structures")
print("✅ Make name normalization integrated")
print("✅ Engine specification parsing integrated")
if success:
print("\n🚀 Ready for ETL pipeline integration!")
else:
print("\n⚠️ Review failed validations")
print("\nNext Steps:")
print("• Integrate with PostgreSQL loader")
print("• Add batch processing for all 55 files")
print("• Implement clear/append modes")
print("• Add CLI interface")
print("• Create comprehensive test suite")