334 lines
12 KiB
Python
334 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Make Name Mapping Examples
|
|
|
|
This file demonstrates the complete make name normalization process,
|
|
converting JSON filenames to proper display names for the database.
|
|
|
|
Usage:
|
|
python make-mapping-examples.py
|
|
"""
|
|
|
|
import json
|
|
import glob
|
|
import os
|
|
from typing import Dict, Set, List, Tuple
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass
|
|
class ValidationReport:
|
|
"""Make name validation report"""
|
|
total_files: int
|
|
valid_mappings: int
|
|
mismatches: List[Dict[str, str]]
|
|
|
|
@property
|
|
def success_rate(self) -> float:
|
|
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
|
|
|
|
|
|
class MakeNameMapper:
|
|
"""Convert JSON filenames to proper make display names"""
|
|
|
|
def __init__(self):
|
|
# Special capitalization cases
|
|
self.special_cases = {
|
|
'Bmw': 'BMW', # Bayerische Motoren Werke
|
|
'Gmc': 'GMC', # General Motors Company
|
|
'Mini': 'MINI', # Brand styling
|
|
'Mclaren': 'McLaren', # Scottish naming convention
|
|
}
|
|
|
|
# Authoritative makes list (would be loaded from sources/makes.json)
|
|
self.authoritative_makes = {
|
|
'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
|
|
'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
|
|
'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
|
|
'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
|
|
'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
|
|
'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
|
|
'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
|
|
'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
|
|
'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
|
|
'Volvo'
|
|
}
|
|
|
|
def normalize_make_name(self, filename: str) -> str:
|
|
"""Convert filename to proper display name"""
|
|
# Remove .json extension
|
|
base_name = filename.replace('.json', '')
|
|
|
|
# Replace underscores with spaces
|
|
spaced_name = base_name.replace('_', ' ')
|
|
|
|
# Apply title case
|
|
title_cased = spaced_name.title()
|
|
|
|
# Apply special cases
|
|
return self.special_cases.get(title_cased, title_cased)
|
|
|
|
def validate_mapping(self, filename: str, display_name: str) -> bool:
|
|
"""Validate mapped name against authoritative list"""
|
|
return display_name in self.authoritative_makes
|
|
|
|
def get_all_mappings(self) -> Dict[str, str]:
|
|
"""Get complete filename → display name mapping"""
|
|
# Simulate the 55 JSON files found in the actual directory
|
|
json_files = [
|
|
'acura.json', 'alfa_romeo.json', 'aston_martin.json', 'audi.json',
|
|
'bentley.json', 'bmw.json', 'buick.json', 'cadillac.json',
|
|
'chevrolet.json', 'chrysler.json', 'dodge.json', 'ferrari.json',
|
|
'fiat.json', 'ford.json', 'genesis.json', 'geo.json', 'gmc.json',
|
|
'honda.json', 'hummer.json', 'hyundai.json', 'infiniti.json',
|
|
'isuzu.json', 'jaguar.json', 'jeep.json', 'kia.json',
|
|
'lamborghini.json', 'land_rover.json', 'lexus.json', 'lincoln.json',
|
|
'lotus.json', 'lucid.json', 'maserati.json', 'mazda.json',
|
|
'mclaren.json', 'mercury.json', 'mini.json', 'mitsubishi.json',
|
|
'nissan.json', 'oldsmobile.json', 'plymouth.json', 'polestar.json',
|
|
'pontiac.json', 'porsche.json', 'ram.json', 'rivian.json',
|
|
'rolls_royce.json', 'saab.json', 'saturn.json', 'scion.json',
|
|
'smart.json', 'subaru.json', 'tesla.json', 'toyota.json',
|
|
'volkswagen.json', 'volvo.json'
|
|
]
|
|
|
|
mappings = {}
|
|
for filename in json_files:
|
|
display_name = self.normalize_make_name(filename)
|
|
mappings[filename] = display_name
|
|
|
|
return mappings
|
|
|
|
def validate_all_mappings(self) -> ValidationReport:
|
|
"""Validate all mappings against authoritative list"""
|
|
mappings = self.get_all_mappings()
|
|
mismatches = []
|
|
|
|
for filename, display_name in mappings.items():
|
|
if not self.validate_mapping(filename, display_name):
|
|
mismatches.append({
|
|
'filename': filename,
|
|
'mapped_name': display_name,
|
|
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
|
|
})
|
|
|
|
return ValidationReport(
|
|
total_files=len(mappings),
|
|
valid_mappings=len(mappings) - len(mismatches),
|
|
mismatches=mismatches
|
|
)
|
|
|
|
|
|
def demonstrate_make_name_mapping():
|
|
"""Demonstrate make name normalization process"""
|
|
|
|
mapper = MakeNameMapper()
|
|
|
|
print("🏷️ Make Name Mapping Examples")
|
|
print("=" * 40)
|
|
|
|
# Test cases showing different transformation types
|
|
test_cases = [
|
|
# Single word makes (standard title case)
|
|
('toyota.json', 'Toyota'),
|
|
('honda.json', 'Honda'),
|
|
('ford.json', 'Ford'),
|
|
|
|
# Multi-word makes (underscore → space + title case)
|
|
('alfa_romeo.json', 'Alfa Romeo'),
|
|
('land_rover.json', 'Land Rover'),
|
|
('rolls_royce.json', 'Rolls Royce'),
|
|
('aston_martin.json', 'Aston Martin'),
|
|
|
|
# Special capitalization cases
|
|
('bmw.json', 'BMW'),
|
|
('gmc.json', 'GMC'),
|
|
('mini.json', 'MINI'),
|
|
('mclaren.json', 'McLaren'),
|
|
]
|
|
|
|
for filename, expected in test_cases:
|
|
result = mapper.normalize_make_name(filename)
|
|
status = "✅" if result == expected else "❌"
|
|
|
|
print(f"{status} {filename:20} → {result:15} (expected: {expected})")
|
|
|
|
if result != expected:
|
|
print(f" ⚠️ MISMATCH: Expected '{expected}', got '{result}'")
|
|
|
|
|
|
def demonstrate_complete_mapping():
|
|
"""Show complete mapping of all 55 make files"""
|
|
|
|
mapper = MakeNameMapper()
|
|
all_mappings = mapper.get_all_mappings()
|
|
|
|
print(f"\n\n📋 Complete Make Name Mappings ({len(all_mappings)} files)")
|
|
print("=" * 50)
|
|
|
|
# Group by transformation type for clarity
|
|
single_words = []
|
|
multi_words = []
|
|
special_cases = []
|
|
|
|
for filename, display_name in sorted(all_mappings.items()):
|
|
if '_' in filename:
|
|
multi_words.append((filename, display_name))
|
|
elif display_name in ['BMW', 'GMC', 'MINI', 'McLaren']:
|
|
special_cases.append((filename, display_name))
|
|
else:
|
|
single_words.append((filename, display_name))
|
|
|
|
print("\n🔤 Single Word Makes (Standard Title Case):")
|
|
for filename, display_name in single_words:
|
|
print(f" {filename:20} → {display_name}")
|
|
|
|
print(f"\n📝 Multi-Word Makes (Underscore → Space, {len(multi_words)} total):")
|
|
for filename, display_name in multi_words:
|
|
print(f" {filename:20} → {display_name}")
|
|
|
|
print(f"\n⭐ Special Capitalization Cases ({len(special_cases)} total):")
|
|
for filename, display_name in special_cases:
|
|
print(f" {filename:20} → {display_name}")
|
|
|
|
|
|
def demonstrate_validation():
|
|
"""Demonstrate validation against authoritative makes list"""
|
|
|
|
mapper = MakeNameMapper()
|
|
report = mapper.validate_all_mappings()
|
|
|
|
print(f"\n\n✅ Validation Report")
|
|
print("=" * 20)
|
|
print(f"Total files processed: {report.total_files}")
|
|
print(f"Valid mappings: {report.valid_mappings}")
|
|
print(f"Success rate: {report.success_rate:.1%}")
|
|
|
|
if report.mismatches:
|
|
print(f"\n⚠️ Mismatches found ({len(report.mismatches)}):")
|
|
for mismatch in report.mismatches:
|
|
print(f" {mismatch['filename']} → {mismatch['mapped_name']}")
|
|
print(f" Status: {mismatch['status']}")
|
|
else:
|
|
print("\n🎉 All mappings valid!")
|
|
|
|
|
|
def demonstrate_database_integration():
|
|
"""Show how mappings integrate with database operations"""
|
|
|
|
mapper = MakeNameMapper()
|
|
|
|
print(f"\n\n💾 Database Integration Example")
|
|
print("=" * 35)
|
|
|
|
sample_files = ['toyota.json', 'alfa_romeo.json', 'bmw.json', 'land_rover.json']
|
|
|
|
print("SQL: INSERT INTO vehicles.make (name) VALUES")
|
|
|
|
for i, filename in enumerate(sample_files):
|
|
display_name = mapper.normalize_make_name(filename)
|
|
comma = "," if i < len(sample_files) - 1 else ";"
|
|
|
|
print(f" ('{display_name}'){comma}")
|
|
print(f" -- From file: {filename}")
|
|
|
|
|
|
def demonstrate_error_handling():
|
|
"""Demonstrate error handling for edge cases"""
|
|
|
|
mapper = MakeNameMapper()
|
|
|
|
print(f"\n\n🛠️ Error Handling Examples")
|
|
print("=" * 30)
|
|
|
|
edge_cases = [
|
|
'unknown_brand.json',
|
|
'test__multiple__underscores.json',
|
|
'no_extension',
|
|
'.json', # Only extension
|
|
]
|
|
|
|
for filename in edge_cases:
|
|
try:
|
|
display_name = mapper.normalize_make_name(filename)
|
|
is_valid = mapper.validate_mapping(filename, display_name)
|
|
status = "✅ Valid" if is_valid else "⚠️ Not in authoritative list"
|
|
|
|
print(f" {filename:35} → {display_name:15} ({status})")
|
|
except Exception as e:
|
|
print(f" {filename:35} → ERROR: {e}")
|
|
|
|
|
|
def run_validation_tests():
|
|
"""Run comprehensive validation tests"""
|
|
|
|
mapper = MakeNameMapper()
|
|
|
|
print(f"\n\n🧪 Validation Tests")
|
|
print("=" * 20)
|
|
|
|
# Test cases with expected results
|
|
test_cases = [
|
|
('toyota.json', 'Toyota', True),
|
|
('alfa_romeo.json', 'Alfa Romeo', True),
|
|
('bmw.json', 'BMW', True),
|
|
('gmc.json', 'GMC', True),
|
|
('mclaren.json', 'McLaren', True),
|
|
('unknown_brand.json', 'Unknown Brand', False),
|
|
]
|
|
|
|
passed = 0
|
|
for filename, expected_name, expected_valid in test_cases:
|
|
actual_name = mapper.normalize_make_name(filename)
|
|
actual_valid = mapper.validate_mapping(filename, actual_name)
|
|
|
|
name_correct = actual_name == expected_name
|
|
valid_correct = actual_valid == expected_valid
|
|
|
|
if name_correct and valid_correct:
|
|
print(f"✅ {filename} → {actual_name} (valid: {actual_valid})")
|
|
passed += 1
|
|
else:
|
|
print(f"❌ {filename}")
|
|
if not name_correct:
|
|
print(f" Name: Expected '{expected_name}', got '{actual_name}'")
|
|
if not valid_correct:
|
|
print(f" Valid: Expected {expected_valid}, got {actual_valid}")
|
|
|
|
print(f"\n📊 Test Results: {passed}/{len(test_cases)} tests passed")
|
|
|
|
if passed == len(test_cases):
|
|
print("🎉 All validation tests passed!")
|
|
return True
|
|
else:
|
|
print("⚠️ Some tests failed!")
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
demonstrate_make_name_mapping()
|
|
demonstrate_complete_mapping()
|
|
demonstrate_validation()
|
|
demonstrate_database_integration()
|
|
demonstrate_error_handling()
|
|
|
|
success = run_validation_tests()
|
|
|
|
print("\n\n📋 Summary")
|
|
print("=" * 10)
|
|
print("✅ Make name normalization patterns implemented")
|
|
print("✅ Special capitalization cases handled")
|
|
print("✅ Multi-word make names (underscore → space) working")
|
|
print("✅ Validation against authoritative list functional")
|
|
print("✅ Database integration format demonstrated")
|
|
|
|
if success:
|
|
print("\n🚀 Ready for integration into ETL system!")
|
|
else:
|
|
print("\n⚠️ Review failed tests before integration")
|
|
|
|
print("\nKey Implementation Notes:")
|
|
print("• filename.replace('.json', '').replace('_', ' ').title()")
|
|
print("• Special cases: BMW, GMC, MINI, McLaren")
|
|
print("• Validation against sources/makes.json required")
|
|
print("• Handle unknown makes gracefully (log warning, continue)") |