#!/usr/bin/env python3 """ Make Name Mapping Examples This file demonstrates the complete make name normalization process, converting JSON filenames to proper display names for the database. Usage: python make-mapping-examples.py """ import json import glob import os from typing import Dict, Set, List, Tuple from dataclasses import dataclass @dataclass class ValidationReport: """Make name validation report""" total_files: int valid_mappings: int mismatches: List[Dict[str, str]] @property def success_rate(self) -> float: return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0 class MakeNameMapper: """Convert JSON filenames to proper make display names""" def __init__(self): # Special capitalization cases self.special_cases = { 'Bmw': 'BMW', # Bayerische Motoren Werke 'Gmc': 'GMC', # General Motors Company 'Mini': 'MINI', # Brand styling 'Mclaren': 'McLaren', # Scottish naming convention } # Authoritative makes list (would be loaded from sources/makes.json) self.authoritative_makes = { 'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley', 'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari', 'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer', 'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia', 'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid', 'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi', 'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac', 'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn', 'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen', 'Volvo' } def normalize_make_name(self, filename: str) -> str: """Convert filename to proper display name""" # Remove .json extension base_name = filename.replace('.json', '') # Replace underscores with spaces spaced_name = base_name.replace('_', ' ') # Apply title case title_cased = spaced_name.title() # Apply special cases return self.special_cases.get(title_cased, title_cased) def validate_mapping(self, filename: str, display_name: str) -> bool: """Validate mapped name against authoritative list""" return display_name in self.authoritative_makes def get_all_mappings(self) -> Dict[str, str]: """Get complete filename โ†’ display name mapping""" # Simulate the 55 JSON files found in the actual directory json_files = [ 'acura.json', 'alfa_romeo.json', 'aston_martin.json', 'audi.json', 'bentley.json', 'bmw.json', 'buick.json', 'cadillac.json', 'chevrolet.json', 'chrysler.json', 'dodge.json', 'ferrari.json', 'fiat.json', 'ford.json', 'genesis.json', 'geo.json', 'gmc.json', 'honda.json', 'hummer.json', 'hyundai.json', 'infiniti.json', 'isuzu.json', 'jaguar.json', 'jeep.json', 'kia.json', 'lamborghini.json', 'land_rover.json', 'lexus.json', 'lincoln.json', 'lotus.json', 'lucid.json', 'maserati.json', 'mazda.json', 'mclaren.json', 'mercury.json', 'mini.json', 'mitsubishi.json', 'nissan.json', 'oldsmobile.json', 'plymouth.json', 'polestar.json', 'pontiac.json', 'porsche.json', 'ram.json', 'rivian.json', 'rolls_royce.json', 'saab.json', 'saturn.json', 'scion.json', 'smart.json', 'subaru.json', 'tesla.json', 'toyota.json', 'volkswagen.json', 'volvo.json' ] mappings = {} for filename in json_files: display_name = self.normalize_make_name(filename) mappings[filename] = display_name return mappings def validate_all_mappings(self) -> ValidationReport: """Validate all mappings against authoritative list""" mappings = self.get_all_mappings() mismatches = [] for filename, display_name in mappings.items(): if not self.validate_mapping(filename, display_name): mismatches.append({ 'filename': filename, 'mapped_name': display_name, 'status': 'NOT_FOUND_IN_AUTHORITATIVE' }) return ValidationReport( total_files=len(mappings), valid_mappings=len(mappings) - len(mismatches), mismatches=mismatches ) def demonstrate_make_name_mapping(): """Demonstrate make name normalization process""" mapper = MakeNameMapper() print("๐Ÿท๏ธ Make Name Mapping Examples") print("=" * 40) # Test cases showing different transformation types test_cases = [ # Single word makes (standard title case) ('toyota.json', 'Toyota'), ('honda.json', 'Honda'), ('ford.json', 'Ford'), # Multi-word makes (underscore โ†’ space + title case) ('alfa_romeo.json', 'Alfa Romeo'), ('land_rover.json', 'Land Rover'), ('rolls_royce.json', 'Rolls Royce'), ('aston_martin.json', 'Aston Martin'), # Special capitalization cases ('bmw.json', 'BMW'), ('gmc.json', 'GMC'), ('mini.json', 'MINI'), ('mclaren.json', 'McLaren'), ] for filename, expected in test_cases: result = mapper.normalize_make_name(filename) status = "โœ…" if result == expected else "โŒ" print(f"{status} {filename:20} โ†’ {result:15} (expected: {expected})") if result != expected: print(f" โš ๏ธ MISMATCH: Expected '{expected}', got '{result}'") def demonstrate_complete_mapping(): """Show complete mapping of all 55 make files""" mapper = MakeNameMapper() all_mappings = mapper.get_all_mappings() print(f"\n\n๐Ÿ“‹ Complete Make Name Mappings ({len(all_mappings)} files)") print("=" * 50) # Group by transformation type for clarity single_words = [] multi_words = [] special_cases = [] for filename, display_name in sorted(all_mappings.items()): if '_' in filename: multi_words.append((filename, display_name)) elif display_name in ['BMW', 'GMC', 'MINI', 'McLaren']: special_cases.append((filename, display_name)) else: single_words.append((filename, display_name)) print("\n๐Ÿ”ค Single Word Makes (Standard Title Case):") for filename, display_name in single_words: print(f" {filename:20} โ†’ {display_name}") print(f"\n๐Ÿ“ Multi-Word Makes (Underscore โ†’ Space, {len(multi_words)} total):") for filename, display_name in multi_words: print(f" {filename:20} โ†’ {display_name}") print(f"\nโญ Special Capitalization Cases ({len(special_cases)} total):") for filename, display_name in special_cases: print(f" {filename:20} โ†’ {display_name}") def demonstrate_validation(): """Demonstrate validation against authoritative makes list""" mapper = MakeNameMapper() report = mapper.validate_all_mappings() print(f"\n\nโœ… Validation Report") print("=" * 20) print(f"Total files processed: {report.total_files}") print(f"Valid mappings: {report.valid_mappings}") print(f"Success rate: {report.success_rate:.1%}") if report.mismatches: print(f"\nโš ๏ธ Mismatches found ({len(report.mismatches)}):") for mismatch in report.mismatches: print(f" {mismatch['filename']} โ†’ {mismatch['mapped_name']}") print(f" Status: {mismatch['status']}") else: print("\n๐ŸŽ‰ All mappings valid!") def demonstrate_database_integration(): """Show how mappings integrate with database operations""" mapper = MakeNameMapper() print(f"\n\n๐Ÿ’พ Database Integration Example") print("=" * 35) sample_files = ['toyota.json', 'alfa_romeo.json', 'bmw.json', 'land_rover.json'] print("SQL: INSERT INTO vehicles.make (name) VALUES") for i, filename in enumerate(sample_files): display_name = mapper.normalize_make_name(filename) comma = "," if i < len(sample_files) - 1 else ";" print(f" ('{display_name}'){comma}") print(f" -- From file: {filename}") def demonstrate_error_handling(): """Demonstrate error handling for edge cases""" mapper = MakeNameMapper() print(f"\n\n๐Ÿ› ๏ธ Error Handling Examples") print("=" * 30) edge_cases = [ 'unknown_brand.json', 'test__multiple__underscores.json', 'no_extension', '.json', # Only extension ] for filename in edge_cases: try: display_name = mapper.normalize_make_name(filename) is_valid = mapper.validate_mapping(filename, display_name) status = "โœ… Valid" if is_valid else "โš ๏ธ Not in authoritative list" print(f" {filename:35} โ†’ {display_name:15} ({status})") except Exception as e: print(f" {filename:35} โ†’ ERROR: {e}") def run_validation_tests(): """Run comprehensive validation tests""" mapper = MakeNameMapper() print(f"\n\n๐Ÿงช Validation Tests") print("=" * 20) # Test cases with expected results test_cases = [ ('toyota.json', 'Toyota', True), ('alfa_romeo.json', 'Alfa Romeo', True), ('bmw.json', 'BMW', True), ('gmc.json', 'GMC', True), ('mclaren.json', 'McLaren', True), ('unknown_brand.json', 'Unknown Brand', False), ] passed = 0 for filename, expected_name, expected_valid in test_cases: actual_name = mapper.normalize_make_name(filename) actual_valid = mapper.validate_mapping(filename, actual_name) name_correct = actual_name == expected_name valid_correct = actual_valid == expected_valid if name_correct and valid_correct: print(f"โœ… {filename} โ†’ {actual_name} (valid: {actual_valid})") passed += 1 else: print(f"โŒ {filename}") if not name_correct: print(f" Name: Expected '{expected_name}', got '{actual_name}'") if not valid_correct: print(f" Valid: Expected {expected_valid}, got {actual_valid}") print(f"\n๐Ÿ“Š Test Results: {passed}/{len(test_cases)} tests passed") if passed == len(test_cases): print("๐ŸŽ‰ All validation tests passed!") return True else: print("โš ๏ธ Some tests failed!") return False if __name__ == "__main__": demonstrate_make_name_mapping() demonstrate_complete_mapping() demonstrate_validation() demonstrate_database_integration() demonstrate_error_handling() success = run_validation_tests() print("\n\n๐Ÿ“‹ Summary") print("=" * 10) print("โœ… Make name normalization patterns implemented") print("โœ… Special capitalization cases handled") print("โœ… Multi-word make names (underscore โ†’ space) working") print("โœ… Validation against authoritative list functional") print("โœ… Database integration format demonstrated") if success: print("\n๐Ÿš€ Ready for integration into ETL system!") else: print("\nโš ๏ธ Review failed tests before integration") print("\nKey Implementation Notes:") print("โ€ข filename.replace('.json', '').replace('_', ' ').title()") print("โ€ข Special cases: BMW, GMC, MINI, McLaren") print("โ€ข Validation against sources/makes.json required") print("โ€ข Handle unknown makes gracefully (log warning, continue)")