Initial Commit

This commit is contained in:
Eric Gullickson
2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions

View File

@@ -0,0 +1,449 @@
#!/usr/bin/env python3
"""
Sample JSON Processing Examples
This file demonstrates complete processing of JSON vehicle data,
from file reading through database-ready output structures.
Usage:
python sample-json-processing.py
"""
import json
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from pathlib import Path
@dataclass
class EngineSpec:
"""Parsed engine specification"""
displacement_l: Optional[float]
configuration: str
cylinders: Optional[int]
fuel_type: str
aspiration: str
raw_string: str
@dataclass
class ModelData:
"""Model information for a specific year"""
name: str
engines: List[EngineSpec]
trims: List[str] # From submodels
@dataclass
class YearData:
"""Vehicle data for a specific year"""
year: int
models: List[ModelData]
@dataclass
class MakeData:
"""Complete make information"""
name: str # Normalized display name
filename: str # Original JSON filename
years: List[YearData]
@property
def total_models(self) -> int:
return sum(len(year.models) for year in self.years)
@property
def total_engines(self) -> int:
return sum(len(model.engines)
for year in self.years
for model in year.models)
@property
def total_trims(self) -> int:
return sum(len(model.trims)
for year in self.years
for model in year.models)
class JsonProcessor:
"""Process JSON vehicle files into structured data"""
def __init__(self):
# Import our utility classes
from engine_parsing_examples import EngineSpecParser
from make_mapping_examples import MakeNameMapper
self.engine_parser = EngineSpecParser()
self.make_mapper = MakeNameMapper()
def process_json_file(self, json_data: Dict[str, Any], filename: str) -> MakeData:
"""Process complete JSON file into structured data"""
# Get the make name (first key in JSON)
make_key = list(json_data.keys())[0]
display_name = self.make_mapper.normalize_make_name(filename)
years_data = []
for year_entry in json_data[make_key]:
year = int(year_entry['year'])
models_data = []
for model_entry in year_entry.get('models', []):
model_name = model_entry['name']
# Process engines
engines = []
engine_strings = model_entry.get('engines', [])
if not engine_strings:
# Electric vehicle - create default engine
engines.append(self.engine_parser.create_electric_motor())
else:
for engine_str in engine_strings:
engine_spec = self.engine_parser.parse_engine_string(engine_str)
engines.append(engine_spec)
# Process trims (from submodels)
trims = model_entry.get('submodels', [])
models_data.append(ModelData(
name=model_name,
engines=engines,
trims=trims
))
years_data.append(YearData(
year=year,
models=models_data
))
return MakeData(
name=display_name,
filename=filename,
years=years_data
)
def demonstrate_tesla_processing():
"""Demonstrate processing Tesla JSON (electric vehicle example)"""
# Sample Tesla data (simplified from actual tesla.json)
tesla_json = {
"tesla": [
{
"year": "2024",
"models": [
{
"name": "3",
"engines": [], # Empty - electric vehicle
"submodels": [
"Long Range AWD",
"Performance",
"Standard Plus"
]
},
{
"name": "y",
"engines": [], # Empty - electric vehicle
"submodels": [
"Long Range",
"Performance"
]
}
]
},
{
"year": "2023",
"models": [
{
"name": "s",
"engines": [], # Empty - electric vehicle
"submodels": [
"Plaid",
"Long Range Plus"
]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(tesla_json, 'tesla.json')
print("⚡ Tesla JSON Processing Example")
print("=" * 35)
print(f"Filename: tesla.json")
print(f"Display Name: {make_data.name}")
print(f"Years: {len(make_data.years)}")
print(f"Total Models: {make_data.total_models}")
print(f"Total Engines: {make_data.total_engines}")
print(f"Total Trims: {make_data.total_trims}")
print(f"\nDetailed Breakdown:")
for year_data in make_data.years:
print(f"\n {year_data.year}:")
for model in year_data.models:
print(f" Model: {model.name}")
print(f" Engines: {[e.raw_string for e in model.engines]}")
print(f" Trims: {model.trims}")
def demonstrate_subaru_processing():
"""Demonstrate processing Subaru JSON (Boxer engines, H4 configuration)"""
# Sample Subaru data showing H4 engines
subaru_json = {
"subaru": [
{
"year": "2024",
"models": [
{
"name": "crosstrek",
"engines": [
"2.0L H4",
"2.0L H4 PLUG-IN HYBRID EV- (PHEV)",
"2.5L H4"
],
"submodels": [
"Base",
"Premium",
"Limited",
"Hybrid"
]
},
{
"name": "forester",
"engines": [
"2.5L H4"
],
"submodels": [
"Base",
"Premium",
"Sport",
"Limited"
]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(subaru_json, 'subaru.json')
print(f"\n\n🚗 Subaru JSON Processing Example (Boxer Engines)")
print("=" * 50)
print(f"Display Name: {make_data.name}")
for year_data in make_data.years:
print(f"\n{year_data.year}:")
for model in year_data.models:
print(f" {model.name}:")
for engine in model.engines:
config_note = " (Boxer)" if engine.configuration == 'H' else ""
hybrid_note = " (Hybrid)" if 'Hybrid' in engine.fuel_type else ""
print(f" Engine: {engine.raw_string}")
print(f"{engine.displacement_l}L {engine.configuration}{engine.cylinders}{config_note}{hybrid_note}")
def demonstrate_l_to_i_processing():
"""Demonstrate L→I normalization during processing"""
# Sample data with L-configuration engines
nissan_json = {
"nissan": [
{
"year": "2024",
"models": [
{
"name": "versa",
"engines": [
"1.6L I4"
],
"submodels": ["S", "SV", "SR"]
},
{
"name": "kicks",
"engines": [
"1.5L L3 PLUG-IN HYBRID EV- (PHEV)" # L3 → I3
],
"submodels": ["S", "SV", "SR"]
},
{
"name": "note",
"engines": [
"1.2L L3 FULL HYBRID EV- (FHEV)" # L3 → I3
],
"submodels": ["Base", "Premium"]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(nissan_json, 'nissan.json')
print(f"\n\n🎯 L→I Normalization Processing Example")
print("=" * 42)
for year_data in make_data.years:
for model in year_data.models:
for engine in model.engines:
original_config = "L" if "L3" in engine.raw_string else "I"
normalized_config = engine.configuration
print(f"Model: {model.name}")
print(f" Input: \"{engine.raw_string}\"")
print(f" Configuration: {original_config}{engine.cylinders}{normalized_config}{engine.cylinders}")
if original_config == "L" and normalized_config == "I":
print(f" 🎯 NORMALIZED: L→I conversion applied")
print()
def demonstrate_database_ready_output():
"""Show how processed data maps to database tables"""
# Sample mixed data
sample_json = {
"toyota": [
{
"year": "2024",
"models": [
{
"name": "camry",
"engines": [
"2.5L I4",
"2.5L I4 FULL HYBRID EV- (FHEV)"
],
"submodels": [
"LE",
"XLE",
"Hybrid LE"
]
}
]
}
]
}
processor = JsonProcessor()
make_data = processor.process_json_file(sample_json, 'toyota.json')
print(f"\n\n💾 Database-Ready Output")
print("=" * 25)
# Show SQL INSERT statements
print("-- Make table")
print(f"INSERT INTO vehicles.make (name) VALUES ('{make_data.name}');")
print(f"\n-- Model table (assuming make_id = 1)")
for year_data in make_data.years:
for model in year_data.models:
print(f"INSERT INTO vehicles.model (make_id, name) VALUES (1, '{model.name}');")
print(f"\n-- Model Year table (assuming model_id = 1)")
for year_data in make_data.years:
print(f"INSERT INTO vehicles.model_year (model_id, year) VALUES (1, {year_data.year});")
print(f"\n-- Engine table")
unique_engines = set()
for year_data in make_data.years:
for model in year_data.models:
for engine in model.engines:
engine_key = (engine.raw_string, engine.displacement_l, engine.cylinders, engine.fuel_type)
if engine_key not in unique_engines:
unique_engines.add(engine_key)
print(f"INSERT INTO vehicles.engine (name, displacement_l, cylinders, fuel_type, aspiration)")
print(f" VALUES ('{engine.raw_string}', {engine.displacement_l}, {engine.cylinders}, '{engine.fuel_type}', '{engine.aspiration}');")
print(f"\n-- Trim table (assuming model_year_id = 1)")
for year_data in make_data.years:
for model in year_data.models:
for trim in model.trims:
print(f"INSERT INTO vehicles.trim (model_year_id, name) VALUES (1, '{trim}');")
def run_processing_validation():
"""Validate that processing works correctly"""
print(f"\n\n✅ Processing Validation")
print("=" * 25)
processor = JsonProcessor()
# Test cases
test_cases = [
# Tesla (electric, empty engines)
('tesla.json', {"tesla": [{"year": "2024", "models": [{"name": "3", "engines": [], "submodels": ["Base"]}]}]}),
# Subaru (H4 engines)
('subaru.json', {"subaru": [{"year": "2024", "models": [{"name": "crosstrek", "engines": ["2.0L H4"], "submodels": ["Base"]}]}]}),
# Nissan (L→I normalization)
('nissan.json', {"nissan": [{"year": "2024", "models": [{"name": "kicks", "engines": ["1.5L L3"], "submodels": ["Base"]}]}]})
]
for filename, json_data in test_cases:
try:
make_data = processor.process_json_file(json_data, filename)
# Basic validation
assert make_data.name is not None, "Make name should not be None"
assert len(make_data.years) > 0, "Should have at least one year"
assert make_data.total_models > 0, "Should have at least one model"
print(f"{filename} processed successfully")
print(f" Make: {make_data.name}, Models: {make_data.total_models}, Engines: {make_data.total_engines}")
# Special validations
if filename == 'tesla.json':
# Should have electric motors for empty engines
for year_data in make_data.years:
for model in year_data.models:
assert all(e.fuel_type == 'Electric' for e in model.engines), "Tesla should have electric engines"
if filename == 'nissan.json':
# Should have L→I normalization
for year_data in make_data.years:
for model in year_data.models:
for engine in model.engines:
if 'L3' in engine.raw_string:
assert engine.configuration == 'I', "L3 should become I3"
except Exception as e:
print(f"{filename} failed: {e}")
return False
print(f"\n🎉 All processing validation tests passed!")
return True
if __name__ == "__main__":
demonstrate_tesla_processing()
demonstrate_subaru_processing()
demonstrate_l_to_i_processing()
demonstrate_database_ready_output()
success = run_processing_validation()
print("\n\n📋 Summary")
print("=" * 10)
print("✅ JSON file processing implemented")
print("✅ Electric vehicle handling (empty engines → Electric Motor)")
print("✅ L→I normalization during processing")
print("✅ Database-ready output structures")
print("✅ Make name normalization integrated")
print("✅ Engine specification parsing integrated")
if success:
print("\n🚀 Ready for ETL pipeline integration!")
else:
print("\n⚠️ Review failed validations")
print("\nNext Steps:")
print("• Integrate with PostgreSQL loader")
print("• Add batch processing for all 55 files")
print("• Implement clear/append modes")
print("• Add CLI interface")
print("• Create comprehensive test suite")