Initial Commit
This commit is contained in:
@@ -0,0 +1,334 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Make Name Mapping Examples
|
||||
|
||||
This file demonstrates the complete make name normalization process,
|
||||
converting JSON filenames to proper display names for the database.
|
||||
|
||||
Usage:
|
||||
python make-mapping-examples.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import glob
|
||||
import os
|
||||
from typing import Dict, Set, List, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationReport:
|
||||
"""Make name validation report"""
|
||||
total_files: int
|
||||
valid_mappings: int
|
||||
mismatches: List[Dict[str, str]]
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
|
||||
|
||||
|
||||
class MakeNameMapper:
|
||||
"""Convert JSON filenames to proper make display names"""
|
||||
|
||||
def __init__(self):
|
||||
# Special capitalization cases
|
||||
self.special_cases = {
|
||||
'Bmw': 'BMW', # Bayerische Motoren Werke
|
||||
'Gmc': 'GMC', # General Motors Company
|
||||
'Mini': 'MINI', # Brand styling
|
||||
'Mclaren': 'McLaren', # Scottish naming convention
|
||||
}
|
||||
|
||||
# Authoritative makes list (would be loaded from sources/makes.json)
|
||||
self.authoritative_makes = {
|
||||
'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
|
||||
'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
|
||||
'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
|
||||
'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
|
||||
'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
|
||||
'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
|
||||
'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
|
||||
'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
|
||||
'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
|
||||
'Volvo'
|
||||
}
|
||||
|
||||
def normalize_make_name(self, filename: str) -> str:
|
||||
"""Convert filename to proper display name"""
|
||||
# Remove .json extension
|
||||
base_name = filename.replace('.json', '')
|
||||
|
||||
# Replace underscores with spaces
|
||||
spaced_name = base_name.replace('_', ' ')
|
||||
|
||||
# Apply title case
|
||||
title_cased = spaced_name.title()
|
||||
|
||||
# Apply special cases
|
||||
return self.special_cases.get(title_cased, title_cased)
|
||||
|
||||
def validate_mapping(self, filename: str, display_name: str) -> bool:
|
||||
"""Validate mapped name against authoritative list"""
|
||||
return display_name in self.authoritative_makes
|
||||
|
||||
def get_all_mappings(self) -> Dict[str, str]:
|
||||
"""Get complete filename → display name mapping"""
|
||||
# Simulate the 55 JSON files found in the actual directory
|
||||
json_files = [
|
||||
'acura.json', 'alfa_romeo.json', 'aston_martin.json', 'audi.json',
|
||||
'bentley.json', 'bmw.json', 'buick.json', 'cadillac.json',
|
||||
'chevrolet.json', 'chrysler.json', 'dodge.json', 'ferrari.json',
|
||||
'fiat.json', 'ford.json', 'genesis.json', 'geo.json', 'gmc.json',
|
||||
'honda.json', 'hummer.json', 'hyundai.json', 'infiniti.json',
|
||||
'isuzu.json', 'jaguar.json', 'jeep.json', 'kia.json',
|
||||
'lamborghini.json', 'land_rover.json', 'lexus.json', 'lincoln.json',
|
||||
'lotus.json', 'lucid.json', 'maserati.json', 'mazda.json',
|
||||
'mclaren.json', 'mercury.json', 'mini.json', 'mitsubishi.json',
|
||||
'nissan.json', 'oldsmobile.json', 'plymouth.json', 'polestar.json',
|
||||
'pontiac.json', 'porsche.json', 'ram.json', 'rivian.json',
|
||||
'rolls_royce.json', 'saab.json', 'saturn.json', 'scion.json',
|
||||
'smart.json', 'subaru.json', 'tesla.json', 'toyota.json',
|
||||
'volkswagen.json', 'volvo.json'
|
||||
]
|
||||
|
||||
mappings = {}
|
||||
for filename in json_files:
|
||||
display_name = self.normalize_make_name(filename)
|
||||
mappings[filename] = display_name
|
||||
|
||||
return mappings
|
||||
|
||||
def validate_all_mappings(self) -> ValidationReport:
|
||||
"""Validate all mappings against authoritative list"""
|
||||
mappings = self.get_all_mappings()
|
||||
mismatches = []
|
||||
|
||||
for filename, display_name in mappings.items():
|
||||
if not self.validate_mapping(filename, display_name):
|
||||
mismatches.append({
|
||||
'filename': filename,
|
||||
'mapped_name': display_name,
|
||||
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
|
||||
})
|
||||
|
||||
return ValidationReport(
|
||||
total_files=len(mappings),
|
||||
valid_mappings=len(mappings) - len(mismatches),
|
||||
mismatches=mismatches
|
||||
)
|
||||
|
||||
|
||||
def demonstrate_make_name_mapping():
|
||||
"""Demonstrate make name normalization process"""
|
||||
|
||||
mapper = MakeNameMapper()
|
||||
|
||||
print("🏷️ Make Name Mapping Examples")
|
||||
print("=" * 40)
|
||||
|
||||
# Test cases showing different transformation types
|
||||
test_cases = [
|
||||
# Single word makes (standard title case)
|
||||
('toyota.json', 'Toyota'),
|
||||
('honda.json', 'Honda'),
|
||||
('ford.json', 'Ford'),
|
||||
|
||||
# Multi-word makes (underscore → space + title case)
|
||||
('alfa_romeo.json', 'Alfa Romeo'),
|
||||
('land_rover.json', 'Land Rover'),
|
||||
('rolls_royce.json', 'Rolls Royce'),
|
||||
('aston_martin.json', 'Aston Martin'),
|
||||
|
||||
# Special capitalization cases
|
||||
('bmw.json', 'BMW'),
|
||||
('gmc.json', 'GMC'),
|
||||
('mini.json', 'MINI'),
|
||||
('mclaren.json', 'McLaren'),
|
||||
]
|
||||
|
||||
for filename, expected in test_cases:
|
||||
result = mapper.normalize_make_name(filename)
|
||||
status = "✅" if result == expected else "❌"
|
||||
|
||||
print(f"{status} {filename:20} → {result:15} (expected: {expected})")
|
||||
|
||||
if result != expected:
|
||||
print(f" ⚠️ MISMATCH: Expected '{expected}', got '{result}'")
|
||||
|
||||
|
||||
def demonstrate_complete_mapping():
|
||||
"""Show complete mapping of all 55 make files"""
|
||||
|
||||
mapper = MakeNameMapper()
|
||||
all_mappings = mapper.get_all_mappings()
|
||||
|
||||
print(f"\n\n📋 Complete Make Name Mappings ({len(all_mappings)} files)")
|
||||
print("=" * 50)
|
||||
|
||||
# Group by transformation type for clarity
|
||||
single_words = []
|
||||
multi_words = []
|
||||
special_cases = []
|
||||
|
||||
for filename, display_name in sorted(all_mappings.items()):
|
||||
if '_' in filename:
|
||||
multi_words.append((filename, display_name))
|
||||
elif display_name in ['BMW', 'GMC', 'MINI', 'McLaren']:
|
||||
special_cases.append((filename, display_name))
|
||||
else:
|
||||
single_words.append((filename, display_name))
|
||||
|
||||
print("\n🔤 Single Word Makes (Standard Title Case):")
|
||||
for filename, display_name in single_words:
|
||||
print(f" {filename:20} → {display_name}")
|
||||
|
||||
print(f"\n📝 Multi-Word Makes (Underscore → Space, {len(multi_words)} total):")
|
||||
for filename, display_name in multi_words:
|
||||
print(f" {filename:20} → {display_name}")
|
||||
|
||||
print(f"\n⭐ Special Capitalization Cases ({len(special_cases)} total):")
|
||||
for filename, display_name in special_cases:
|
||||
print(f" {filename:20} → {display_name}")
|
||||
|
||||
|
||||
def demonstrate_validation():
|
||||
"""Demonstrate validation against authoritative makes list"""
|
||||
|
||||
mapper = MakeNameMapper()
|
||||
report = mapper.validate_all_mappings()
|
||||
|
||||
print(f"\n\n✅ Validation Report")
|
||||
print("=" * 20)
|
||||
print(f"Total files processed: {report.total_files}")
|
||||
print(f"Valid mappings: {report.valid_mappings}")
|
||||
print(f"Success rate: {report.success_rate:.1%}")
|
||||
|
||||
if report.mismatches:
|
||||
print(f"\n⚠️ Mismatches found ({len(report.mismatches)}):")
|
||||
for mismatch in report.mismatches:
|
||||
print(f" {mismatch['filename']} → {mismatch['mapped_name']}")
|
||||
print(f" Status: {mismatch['status']}")
|
||||
else:
|
||||
print("\n🎉 All mappings valid!")
|
||||
|
||||
|
||||
def demonstrate_database_integration():
|
||||
"""Show how mappings integrate with database operations"""
|
||||
|
||||
mapper = MakeNameMapper()
|
||||
|
||||
print(f"\n\n💾 Database Integration Example")
|
||||
print("=" * 35)
|
||||
|
||||
sample_files = ['toyota.json', 'alfa_romeo.json', 'bmw.json', 'land_rover.json']
|
||||
|
||||
print("SQL: INSERT INTO vehicles.make (name) VALUES")
|
||||
|
||||
for i, filename in enumerate(sample_files):
|
||||
display_name = mapper.normalize_make_name(filename)
|
||||
comma = "," if i < len(sample_files) - 1 else ";"
|
||||
|
||||
print(f" ('{display_name}'){comma}")
|
||||
print(f" -- From file: {filename}")
|
||||
|
||||
|
||||
def demonstrate_error_handling():
|
||||
"""Demonstrate error handling for edge cases"""
|
||||
|
||||
mapper = MakeNameMapper()
|
||||
|
||||
print(f"\n\n🛠️ Error Handling Examples")
|
||||
print("=" * 30)
|
||||
|
||||
edge_cases = [
|
||||
'unknown_brand.json',
|
||||
'test__multiple__underscores.json',
|
||||
'no_extension',
|
||||
'.json', # Only extension
|
||||
]
|
||||
|
||||
for filename in edge_cases:
|
||||
try:
|
||||
display_name = mapper.normalize_make_name(filename)
|
||||
is_valid = mapper.validate_mapping(filename, display_name)
|
||||
status = "✅ Valid" if is_valid else "⚠️ Not in authoritative list"
|
||||
|
||||
print(f" {filename:35} → {display_name:15} ({status})")
|
||||
except Exception as e:
|
||||
print(f" {filename:35} → ERROR: {e}")
|
||||
|
||||
|
||||
def run_validation_tests():
|
||||
"""Run comprehensive validation tests"""
|
||||
|
||||
mapper = MakeNameMapper()
|
||||
|
||||
print(f"\n\n🧪 Validation Tests")
|
||||
print("=" * 20)
|
||||
|
||||
# Test cases with expected results
|
||||
test_cases = [
|
||||
('toyota.json', 'Toyota', True),
|
||||
('alfa_romeo.json', 'Alfa Romeo', True),
|
||||
('bmw.json', 'BMW', True),
|
||||
('gmc.json', 'GMC', True),
|
||||
('mclaren.json', 'McLaren', True),
|
||||
('unknown_brand.json', 'Unknown Brand', False),
|
||||
]
|
||||
|
||||
passed = 0
|
||||
for filename, expected_name, expected_valid in test_cases:
|
||||
actual_name = mapper.normalize_make_name(filename)
|
||||
actual_valid = mapper.validate_mapping(filename, actual_name)
|
||||
|
||||
name_correct = actual_name == expected_name
|
||||
valid_correct = actual_valid == expected_valid
|
||||
|
||||
if name_correct and valid_correct:
|
||||
print(f"✅ {filename} → {actual_name} (valid: {actual_valid})")
|
||||
passed += 1
|
||||
else:
|
||||
print(f"❌ {filename}")
|
||||
if not name_correct:
|
||||
print(f" Name: Expected '{expected_name}', got '{actual_name}'")
|
||||
if not valid_correct:
|
||||
print(f" Valid: Expected {expected_valid}, got {actual_valid}")
|
||||
|
||||
print(f"\n📊 Test Results: {passed}/{len(test_cases)} tests passed")
|
||||
|
||||
if passed == len(test_cases):
|
||||
print("🎉 All validation tests passed!")
|
||||
return True
|
||||
else:
|
||||
print("⚠️ Some tests failed!")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demonstrate_make_name_mapping()
|
||||
demonstrate_complete_mapping()
|
||||
demonstrate_validation()
|
||||
demonstrate_database_integration()
|
||||
demonstrate_error_handling()
|
||||
|
||||
success = run_validation_tests()
|
||||
|
||||
print("\n\n📋 Summary")
|
||||
print("=" * 10)
|
||||
print("✅ Make name normalization patterns implemented")
|
||||
print("✅ Special capitalization cases handled")
|
||||
print("✅ Multi-word make names (underscore → space) working")
|
||||
print("✅ Validation against authoritative list functional")
|
||||
print("✅ Database integration format demonstrated")
|
||||
|
||||
if success:
|
||||
print("\n🚀 Ready for integration into ETL system!")
|
||||
else:
|
||||
print("\n⚠️ Review failed tests before integration")
|
||||
|
||||
print("\nKey Implementation Notes:")
|
||||
print("• filename.replace('.json', '').replace('_', ' ').title()")
|
||||
print("• Special cases: BMW, GMC, MINI, McLaren")
|
||||
print("• Validation against sources/makes.json required")
|
||||
print("• Handle unknown makes gracefully (log warning, continue)")
|
||||
Reference in New Issue
Block a user