449 lines
15 KiB
Python
449 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sample JSON Processing Examples
|
|
|
|
This file demonstrates complete processing of JSON vehicle data,
|
|
from file reading through database-ready output structures.
|
|
|
|
Usage:
|
|
python sample-json-processing.py
|
|
"""
|
|
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
|
|
@dataclass
|
|
class EngineSpec:
|
|
"""Parsed engine specification"""
|
|
displacement_l: Optional[float]
|
|
configuration: str
|
|
cylinders: Optional[int]
|
|
fuel_type: str
|
|
aspiration: str
|
|
raw_string: str
|
|
|
|
|
|
@dataclass
|
|
class ModelData:
|
|
"""Model information for a specific year"""
|
|
name: str
|
|
engines: List[EngineSpec]
|
|
trims: List[str] # From submodels
|
|
|
|
|
|
@dataclass
|
|
class YearData:
|
|
"""Vehicle data for a specific year"""
|
|
year: int
|
|
models: List[ModelData]
|
|
|
|
|
|
@dataclass
|
|
class MakeData:
|
|
"""Complete make information"""
|
|
name: str # Normalized display name
|
|
filename: str # Original JSON filename
|
|
years: List[YearData]
|
|
|
|
@property
|
|
def total_models(self) -> int:
|
|
return sum(len(year.models) for year in self.years)
|
|
|
|
@property
|
|
def total_engines(self) -> int:
|
|
return sum(len(model.engines)
|
|
for year in self.years
|
|
for model in year.models)
|
|
|
|
@property
|
|
def total_trims(self) -> int:
|
|
return sum(len(model.trims)
|
|
for year in self.years
|
|
for model in year.models)
|
|
|
|
|
|
class JsonProcessor:
|
|
"""Process JSON vehicle files into structured data"""
|
|
|
|
def __init__(self):
|
|
# Import our utility classes
|
|
from engine_parsing_examples import EngineSpecParser
|
|
from make_mapping_examples import MakeNameMapper
|
|
|
|
self.engine_parser = EngineSpecParser()
|
|
self.make_mapper = MakeNameMapper()
|
|
|
|
def process_json_file(self, json_data: Dict[str, Any], filename: str) -> MakeData:
|
|
"""Process complete JSON file into structured data"""
|
|
|
|
# Get the make name (first key in JSON)
|
|
make_key = list(json_data.keys())[0]
|
|
display_name = self.make_mapper.normalize_make_name(filename)
|
|
|
|
years_data = []
|
|
for year_entry in json_data[make_key]:
|
|
year = int(year_entry['year'])
|
|
models_data = []
|
|
|
|
for model_entry in year_entry.get('models', []):
|
|
model_name = model_entry['name']
|
|
|
|
# Process engines
|
|
engines = []
|
|
engine_strings = model_entry.get('engines', [])
|
|
|
|
if not engine_strings:
|
|
# Electric vehicle - create default engine
|
|
engines.append(self.engine_parser.create_electric_motor())
|
|
else:
|
|
for engine_str in engine_strings:
|
|
engine_spec = self.engine_parser.parse_engine_string(engine_str)
|
|
engines.append(engine_spec)
|
|
|
|
# Process trims (from submodels)
|
|
trims = model_entry.get('submodels', [])
|
|
|
|
models_data.append(ModelData(
|
|
name=model_name,
|
|
engines=engines,
|
|
trims=trims
|
|
))
|
|
|
|
years_data.append(YearData(
|
|
year=year,
|
|
models=models_data
|
|
))
|
|
|
|
return MakeData(
|
|
name=display_name,
|
|
filename=filename,
|
|
years=years_data
|
|
)
|
|
|
|
|
|
def demonstrate_tesla_processing():
|
|
"""Demonstrate processing Tesla JSON (electric vehicle example)"""
|
|
|
|
# Sample Tesla data (simplified from actual tesla.json)
|
|
tesla_json = {
|
|
"tesla": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "3",
|
|
"engines": [], # Empty - electric vehicle
|
|
"submodels": [
|
|
"Long Range AWD",
|
|
"Performance",
|
|
"Standard Plus"
|
|
]
|
|
},
|
|
{
|
|
"name": "y",
|
|
"engines": [], # Empty - electric vehicle
|
|
"submodels": [
|
|
"Long Range",
|
|
"Performance"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"year": "2023",
|
|
"models": [
|
|
{
|
|
"name": "s",
|
|
"engines": [], # Empty - electric vehicle
|
|
"submodels": [
|
|
"Plaid",
|
|
"Long Range Plus"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
processor = JsonProcessor()
|
|
make_data = processor.process_json_file(tesla_json, 'tesla.json')
|
|
|
|
print("⚡ Tesla JSON Processing Example")
|
|
print("=" * 35)
|
|
print(f"Filename: tesla.json")
|
|
print(f"Display Name: {make_data.name}")
|
|
print(f"Years: {len(make_data.years)}")
|
|
print(f"Total Models: {make_data.total_models}")
|
|
print(f"Total Engines: {make_data.total_engines}")
|
|
print(f"Total Trims: {make_data.total_trims}")
|
|
|
|
print(f"\nDetailed Breakdown:")
|
|
for year_data in make_data.years:
|
|
print(f"\n {year_data.year}:")
|
|
for model in year_data.models:
|
|
print(f" Model: {model.name}")
|
|
print(f" Engines: {[e.raw_string for e in model.engines]}")
|
|
print(f" Trims: {model.trims}")
|
|
|
|
|
|
def demonstrate_subaru_processing():
|
|
"""Demonstrate processing Subaru JSON (Boxer engines, H4 configuration)"""
|
|
|
|
# Sample Subaru data showing H4 engines
|
|
subaru_json = {
|
|
"subaru": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "crosstrek",
|
|
"engines": [
|
|
"2.0L H4",
|
|
"2.0L H4 PLUG-IN HYBRID EV- (PHEV)",
|
|
"2.5L H4"
|
|
],
|
|
"submodels": [
|
|
"Base",
|
|
"Premium",
|
|
"Limited",
|
|
"Hybrid"
|
|
]
|
|
},
|
|
{
|
|
"name": "forester",
|
|
"engines": [
|
|
"2.5L H4"
|
|
],
|
|
"submodels": [
|
|
"Base",
|
|
"Premium",
|
|
"Sport",
|
|
"Limited"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
processor = JsonProcessor()
|
|
make_data = processor.process_json_file(subaru_json, 'subaru.json')
|
|
|
|
print(f"\n\n🚗 Subaru JSON Processing Example (Boxer Engines)")
|
|
print("=" * 50)
|
|
print(f"Display Name: {make_data.name}")
|
|
|
|
for year_data in make_data.years:
|
|
print(f"\n{year_data.year}:")
|
|
for model in year_data.models:
|
|
print(f" {model.name}:")
|
|
for engine in model.engines:
|
|
config_note = " (Boxer)" if engine.configuration == 'H' else ""
|
|
hybrid_note = " (Hybrid)" if 'Hybrid' in engine.fuel_type else ""
|
|
print(f" Engine: {engine.raw_string}")
|
|
print(f" → {engine.displacement_l}L {engine.configuration}{engine.cylinders}{config_note}{hybrid_note}")
|
|
|
|
|
|
def demonstrate_l_to_i_processing():
|
|
"""Demonstrate L→I normalization during processing"""
|
|
|
|
# Sample data with L-configuration engines
|
|
nissan_json = {
|
|
"nissan": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "versa",
|
|
"engines": [
|
|
"1.6L I4"
|
|
],
|
|
"submodels": ["S", "SV", "SR"]
|
|
},
|
|
{
|
|
"name": "kicks",
|
|
"engines": [
|
|
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)" # L3 → I3
|
|
],
|
|
"submodels": ["S", "SV", "SR"]
|
|
},
|
|
{
|
|
"name": "note",
|
|
"engines": [
|
|
"1.2L L3 FULL HYBRID EV- (FHEV)" # L3 → I3
|
|
],
|
|
"submodels": ["Base", "Premium"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
processor = JsonProcessor()
|
|
make_data = processor.process_json_file(nissan_json, 'nissan.json')
|
|
|
|
print(f"\n\n🎯 L→I Normalization Processing Example")
|
|
print("=" * 42)
|
|
|
|
for year_data in make_data.years:
|
|
for model in year_data.models:
|
|
for engine in model.engines:
|
|
original_config = "L" if "L3" in engine.raw_string else "I"
|
|
normalized_config = engine.configuration
|
|
|
|
print(f"Model: {model.name}")
|
|
print(f" Input: \"{engine.raw_string}\"")
|
|
print(f" Configuration: {original_config}{engine.cylinders} → {normalized_config}{engine.cylinders}")
|
|
|
|
if original_config == "L" and normalized_config == "I":
|
|
print(f" 🎯 NORMALIZED: L→I conversion applied")
|
|
print()
|
|
|
|
|
|
def demonstrate_database_ready_output():
|
|
"""Show how processed data maps to database tables"""
|
|
|
|
# Sample mixed data
|
|
sample_json = {
|
|
"toyota": [
|
|
{
|
|
"year": "2024",
|
|
"models": [
|
|
{
|
|
"name": "camry",
|
|
"engines": [
|
|
"2.5L I4",
|
|
"2.5L I4 FULL HYBRID EV- (FHEV)"
|
|
],
|
|
"submodels": [
|
|
"LE",
|
|
"XLE",
|
|
"Hybrid LE"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
processor = JsonProcessor()
|
|
make_data = processor.process_json_file(sample_json, 'toyota.json')
|
|
|
|
print(f"\n\n💾 Database-Ready Output")
|
|
print("=" * 25)
|
|
|
|
# Show SQL INSERT statements
|
|
print("-- Make table")
|
|
print(f"INSERT INTO vehicles.make (name) VALUES ('{make_data.name}');")
|
|
|
|
print(f"\n-- Model table (assuming make_id = 1)")
|
|
for year_data in make_data.years:
|
|
for model in year_data.models:
|
|
print(f"INSERT INTO vehicles.model (make_id, name) VALUES (1, '{model.name}');")
|
|
|
|
print(f"\n-- Model Year table (assuming model_id = 1)")
|
|
for year_data in make_data.years:
|
|
print(f"INSERT INTO vehicles.model_year (model_id, year) VALUES (1, {year_data.year});")
|
|
|
|
print(f"\n-- Engine table")
|
|
unique_engines = set()
|
|
for year_data in make_data.years:
|
|
for model in year_data.models:
|
|
for engine in model.engines:
|
|
engine_key = (engine.raw_string, engine.displacement_l, engine.cylinders, engine.fuel_type)
|
|
if engine_key not in unique_engines:
|
|
unique_engines.add(engine_key)
|
|
print(f"INSERT INTO vehicles.engine (name, displacement_l, cylinders, fuel_type, aspiration)")
|
|
print(f" VALUES ('{engine.raw_string}', {engine.displacement_l}, {engine.cylinders}, '{engine.fuel_type}', '{engine.aspiration}');")
|
|
|
|
print(f"\n-- Trim table (assuming model_year_id = 1)")
|
|
for year_data in make_data.years:
|
|
for model in year_data.models:
|
|
for trim in model.trims:
|
|
print(f"INSERT INTO vehicles.trim (model_year_id, name) VALUES (1, '{trim}');")
|
|
|
|
|
|
def run_processing_validation():
|
|
"""Validate that processing works correctly"""
|
|
|
|
print(f"\n\n✅ Processing Validation")
|
|
print("=" * 25)
|
|
|
|
processor = JsonProcessor()
|
|
|
|
# Test cases
|
|
test_cases = [
|
|
# Tesla (electric, empty engines)
|
|
('tesla.json', {"tesla": [{"year": "2024", "models": [{"name": "3", "engines": [], "submodels": ["Base"]}]}]}),
|
|
# Subaru (H4 engines)
|
|
('subaru.json', {"subaru": [{"year": "2024", "models": [{"name": "crosstrek", "engines": ["2.0L H4"], "submodels": ["Base"]}]}]}),
|
|
# Nissan (L→I normalization)
|
|
('nissan.json', {"nissan": [{"year": "2024", "models": [{"name": "kicks", "engines": ["1.5L L3"], "submodels": ["Base"]}]}]})
|
|
]
|
|
|
|
for filename, json_data in test_cases:
|
|
try:
|
|
make_data = processor.process_json_file(json_data, filename)
|
|
|
|
# Basic validation
|
|
assert make_data.name is not None, "Make name should not be None"
|
|
assert len(make_data.years) > 0, "Should have at least one year"
|
|
assert make_data.total_models > 0, "Should have at least one model"
|
|
|
|
print(f"✅ {filename} processed successfully")
|
|
print(f" Make: {make_data.name}, Models: {make_data.total_models}, Engines: {make_data.total_engines}")
|
|
|
|
# Special validations
|
|
if filename == 'tesla.json':
|
|
# Should have electric motors for empty engines
|
|
for year_data in make_data.years:
|
|
for model in year_data.models:
|
|
assert all(e.fuel_type == 'Electric' for e in model.engines), "Tesla should have electric engines"
|
|
|
|
if filename == 'nissan.json':
|
|
# Should have L→I normalization
|
|
for year_data in make_data.years:
|
|
for model in year_data.models:
|
|
for engine in model.engines:
|
|
if 'L3' in engine.raw_string:
|
|
assert engine.configuration == 'I', "L3 should become I3"
|
|
|
|
except Exception as e:
|
|
print(f"❌ {filename} failed: {e}")
|
|
return False
|
|
|
|
print(f"\n🎉 All processing validation tests passed!")
|
|
return True
|
|
|
|
|
|
if __name__ == "__main__":
|
|
demonstrate_tesla_processing()
|
|
demonstrate_subaru_processing()
|
|
demonstrate_l_to_i_processing()
|
|
demonstrate_database_ready_output()
|
|
|
|
success = run_processing_validation()
|
|
|
|
print("\n\n📋 Summary")
|
|
print("=" * 10)
|
|
print("✅ JSON file processing implemented")
|
|
print("✅ Electric vehicle handling (empty engines → Electric Motor)")
|
|
print("✅ L→I normalization during processing")
|
|
print("✅ Database-ready output structures")
|
|
print("✅ Make name normalization integrated")
|
|
print("✅ Engine specification parsing integrated")
|
|
|
|
if success:
|
|
print("\n🚀 Ready for ETL pipeline integration!")
|
|
else:
|
|
print("\n⚠️ Review failed validations")
|
|
|
|
print("\nNext Steps:")
|
|
print("• Integrate with PostgreSQL loader")
|
|
print("• Add batch processing for all 55 files")
|
|
print("• Implement clear/append modes")
|
|
print("• Add CLI interface")
|
|
print("• Create comprehensive test suite") |