#!/usr/bin/env python3 """ Sample JSON Processing Examples This file demonstrates complete processing of JSON vehicle data, from file reading through database-ready output structures. Usage: python sample-json-processing.py """ import json from typing import List, Dict, Any, Optional from dataclasses import dataclass from pathlib import Path @dataclass class EngineSpec: """Parsed engine specification""" displacement_l: Optional[float] configuration: str cylinders: Optional[int] fuel_type: str aspiration: str raw_string: str @dataclass class ModelData: """Model information for a specific year""" name: str engines: List[EngineSpec] trims: List[str] # From submodels @dataclass class YearData: """Vehicle data for a specific year""" year: int models: List[ModelData] @dataclass class MakeData: """Complete make information""" name: str # Normalized display name filename: str # Original JSON filename years: List[YearData] @property def total_models(self) -> int: return sum(len(year.models) for year in self.years) @property def total_engines(self) -> int: return sum(len(model.engines) for year in self.years for model in year.models) @property def total_trims(self) -> int: return sum(len(model.trims) for year in self.years for model in year.models) class JsonProcessor: """Process JSON vehicle files into structured data""" def __init__(self): # Import our utility classes from engine_parsing_examples import EngineSpecParser from make_mapping_examples import MakeNameMapper self.engine_parser = EngineSpecParser() self.make_mapper = MakeNameMapper() def process_json_file(self, json_data: Dict[str, Any], filename: str) -> MakeData: """Process complete JSON file into structured data""" # Get the make name (first key in JSON) make_key = list(json_data.keys())[0] display_name = self.make_mapper.normalize_make_name(filename) years_data = [] for year_entry in json_data[make_key]: year = int(year_entry['year']) models_data = [] for model_entry in year_entry.get('models', []): model_name = model_entry['name'] # Process engines engines = [] engine_strings = model_entry.get('engines', []) if not engine_strings: # Electric vehicle - create default engine engines.append(self.engine_parser.create_electric_motor()) else: for engine_str in engine_strings: engine_spec = self.engine_parser.parse_engine_string(engine_str) engines.append(engine_spec) # Process trims (from submodels) trims = model_entry.get('submodels', []) models_data.append(ModelData( name=model_name, engines=engines, trims=trims )) years_data.append(YearData( year=year, models=models_data )) return MakeData( name=display_name, filename=filename, years=years_data ) def demonstrate_tesla_processing(): """Demonstrate processing Tesla JSON (electric vehicle example)""" # Sample Tesla data (simplified from actual tesla.json) tesla_json = { "tesla": [ { "year": "2024", "models": [ { "name": "3", "engines": [], # Empty - electric vehicle "submodels": [ "Long Range AWD", "Performance", "Standard Plus" ] }, { "name": "y", "engines": [], # Empty - electric vehicle "submodels": [ "Long Range", "Performance" ] } ] }, { "year": "2023", "models": [ { "name": "s", "engines": [], # Empty - electric vehicle "submodels": [ "Plaid", "Long Range Plus" ] } ] } ] } processor = JsonProcessor() make_data = processor.process_json_file(tesla_json, 'tesla.json') print("⚔ Tesla JSON Processing Example") print("=" * 35) print(f"Filename: tesla.json") print(f"Display Name: {make_data.name}") print(f"Years: {len(make_data.years)}") print(f"Total Models: {make_data.total_models}") print(f"Total Engines: {make_data.total_engines}") print(f"Total Trims: {make_data.total_trims}") print(f"\nDetailed Breakdown:") for year_data in make_data.years: print(f"\n {year_data.year}:") for model in year_data.models: print(f" Model: {model.name}") print(f" Engines: {[e.raw_string for e in model.engines]}") print(f" Trims: {model.trims}") def demonstrate_subaru_processing(): """Demonstrate processing Subaru JSON (Boxer engines, H4 configuration)""" # Sample Subaru data showing H4 engines subaru_json = { "subaru": [ { "year": "2024", "models": [ { "name": "crosstrek", "engines": [ "2.0L H4", "2.0L H4 PLUG-IN HYBRID EV- (PHEV)", "2.5L H4" ], "submodels": [ "Base", "Premium", "Limited", "Hybrid" ] }, { "name": "forester", "engines": [ "2.5L H4" ], "submodels": [ "Base", "Premium", "Sport", "Limited" ] } ] } ] } processor = JsonProcessor() make_data = processor.process_json_file(subaru_json, 'subaru.json') print(f"\n\nšŸš— Subaru JSON Processing Example (Boxer Engines)") print("=" * 50) print(f"Display Name: {make_data.name}") for year_data in make_data.years: print(f"\n{year_data.year}:") for model in year_data.models: print(f" {model.name}:") for engine in model.engines: config_note = " (Boxer)" if engine.configuration == 'H' else "" hybrid_note = " (Hybrid)" if 'Hybrid' in engine.fuel_type else "" print(f" Engine: {engine.raw_string}") print(f" → {engine.displacement_l}L {engine.configuration}{engine.cylinders}{config_note}{hybrid_note}") def demonstrate_l_to_i_processing(): """Demonstrate L→I normalization during processing""" # Sample data with L-configuration engines nissan_json = { "nissan": [ { "year": "2024", "models": [ { "name": "versa", "engines": [ "1.6L I4" ], "submodels": ["S", "SV", "SR"] }, { "name": "kicks", "engines": [ "1.5L L3 PLUG-IN HYBRID EV- (PHEV)" # L3 → I3 ], "submodels": ["S", "SV", "SR"] }, { "name": "note", "engines": [ "1.2L L3 FULL HYBRID EV- (FHEV)" # L3 → I3 ], "submodels": ["Base", "Premium"] } ] } ] } processor = JsonProcessor() make_data = processor.process_json_file(nissan_json, 'nissan.json') print(f"\n\nšŸŽÆ L→I Normalization Processing Example") print("=" * 42) for year_data in make_data.years: for model in year_data.models: for engine in model.engines: original_config = "L" if "L3" in engine.raw_string else "I" normalized_config = engine.configuration print(f"Model: {model.name}") print(f" Input: \"{engine.raw_string}\"") print(f" Configuration: {original_config}{engine.cylinders} → {normalized_config}{engine.cylinders}") if original_config == "L" and normalized_config == "I": print(f" šŸŽÆ NORMALIZED: L→I conversion applied") print() def demonstrate_database_ready_output(): """Show how processed data maps to database tables""" # Sample mixed data sample_json = { "toyota": [ { "year": "2024", "models": [ { "name": "camry", "engines": [ "2.5L I4", "2.5L I4 FULL HYBRID EV- (FHEV)" ], "submodels": [ "LE", "XLE", "Hybrid LE" ] } ] } ] } processor = JsonProcessor() make_data = processor.process_json_file(sample_json, 'toyota.json') print(f"\n\nšŸ’¾ Database-Ready Output") print("=" * 25) # Show SQL INSERT statements print("-- Make table") print(f"INSERT INTO vehicles.make (name) VALUES ('{make_data.name}');") print(f"\n-- Model table (assuming make_id = 1)") for year_data in make_data.years: for model in year_data.models: print(f"INSERT INTO vehicles.model (make_id, name) VALUES (1, '{model.name}');") print(f"\n-- Model Year table (assuming model_id = 1)") for year_data in make_data.years: print(f"INSERT INTO vehicles.model_year (model_id, year) VALUES (1, {year_data.year});") print(f"\n-- Engine table") unique_engines = set() for year_data in make_data.years: for model in year_data.models: for engine in model.engines: engine_key = (engine.raw_string, engine.displacement_l, engine.cylinders, engine.fuel_type) if engine_key not in unique_engines: unique_engines.add(engine_key) print(f"INSERT INTO vehicles.engine (name, displacement_l, cylinders, fuel_type, aspiration)") print(f" VALUES ('{engine.raw_string}', {engine.displacement_l}, {engine.cylinders}, '{engine.fuel_type}', '{engine.aspiration}');") print(f"\n-- Trim table (assuming model_year_id = 1)") for year_data in make_data.years: for model in year_data.models: for trim in model.trims: print(f"INSERT INTO vehicles.trim (model_year_id, name) VALUES (1, '{trim}');") def run_processing_validation(): """Validate that processing works correctly""" print(f"\n\nāœ… Processing Validation") print("=" * 25) processor = JsonProcessor() # Test cases test_cases = [ # Tesla (electric, empty engines) ('tesla.json', {"tesla": [{"year": "2024", "models": [{"name": "3", "engines": [], "submodels": ["Base"]}]}]}), # Subaru (H4 engines) ('subaru.json', {"subaru": [{"year": "2024", "models": [{"name": "crosstrek", "engines": ["2.0L H4"], "submodels": ["Base"]}]}]}), # Nissan (L→I normalization) ('nissan.json', {"nissan": [{"year": "2024", "models": [{"name": "kicks", "engines": ["1.5L L3"], "submodels": ["Base"]}]}]}) ] for filename, json_data in test_cases: try: make_data = processor.process_json_file(json_data, filename) # Basic validation assert make_data.name is not None, "Make name should not be None" assert len(make_data.years) > 0, "Should have at least one year" assert make_data.total_models > 0, "Should have at least one model" print(f"āœ… {filename} processed successfully") print(f" Make: {make_data.name}, Models: {make_data.total_models}, Engines: {make_data.total_engines}") # Special validations if filename == 'tesla.json': # Should have electric motors for empty engines for year_data in make_data.years: for model in year_data.models: assert all(e.fuel_type == 'Electric' for e in model.engines), "Tesla should have electric engines" if filename == 'nissan.json': # Should have L→I normalization for year_data in make_data.years: for model in year_data.models: for engine in model.engines: if 'L3' in engine.raw_string: assert engine.configuration == 'I', "L3 should become I3" except Exception as e: print(f"āŒ {filename} failed: {e}") return False print(f"\nšŸŽ‰ All processing validation tests passed!") return True if __name__ == "__main__": demonstrate_tesla_processing() demonstrate_subaru_processing() demonstrate_l_to_i_processing() demonstrate_database_ready_output() success = run_processing_validation() print("\n\nšŸ“‹ Summary") print("=" * 10) print("āœ… JSON file processing implemented") print("āœ… Electric vehicle handling (empty engines → Electric Motor)") print("āœ… L→I normalization during processing") print("āœ… Database-ready output structures") print("āœ… Make name normalization integrated") print("āœ… Engine specification parsing integrated") if success: print("\nšŸš€ Ready for ETL pipeline integration!") else: print("\nāš ļø Review failed validations") print("\nNext Steps:") print("• Integrate with PostgreSQL loader") print("• Add batch processing for all 55 files") print("• Implement clear/append modes") print("• Add CLI interface") print("• Create comprehensive test suite")