317 lines
11 KiB
Python
317 lines
11 KiB
Python
"""
|
|
Make Name Mapper Utility
|
|
|
|
Converts JSON filenames to proper display names for database storage.
|
|
Handles underscore-to-space conversion, title casing, and special capitalization cases.
|
|
|
|
Critical for converting:
|
|
- alfa_romeo.json → "Alfa Romeo"
|
|
- bmw.json → "BMW"
|
|
- land_rover.json → "Land Rover"
|
|
|
|
Usage:
|
|
mapper = MakeNameMapper()
|
|
display_name = mapper.normalize_make_name('alfa_romeo.json') # Returns "Alfa Romeo"
|
|
"""
|
|
|
|
import json
|
|
import glob
|
|
import os
|
|
import logging
|
|
from typing import Set, Dict, List, Optional
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ValidationReport:
|
|
"""Make name validation report"""
|
|
total_files: int
|
|
valid_mappings: int
|
|
mismatches: List[Dict[str, str]]
|
|
|
|
@property
|
|
def success_rate(self) -> float:
|
|
"""Calculate success rate as percentage"""
|
|
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
|
|
|
|
|
|
class MakeNameMapper:
|
|
"""Convert JSON filenames to proper make display names"""
|
|
|
|
def __init__(self, sources_dir: Optional[str] = None):
|
|
"""
|
|
Initialize make name mapper
|
|
|
|
Args:
|
|
sources_dir: Directory containing sources/makes.json for validation
|
|
"""
|
|
self.sources_dir = sources_dir or "sources"
|
|
|
|
# Special capitalization cases that don't follow standard title case
|
|
self.special_cases = {
|
|
'Bmw': 'BMW', # Bayerische Motoren Werke
|
|
'Gmc': 'GMC', # General Motors Company
|
|
'Mini': 'MINI', # Brand styling requirement
|
|
'Mclaren': 'McLaren', # Scottish naming convention
|
|
}
|
|
|
|
# Load authoritative makes list for validation
|
|
self.authoritative_makes = self._load_authoritative_makes()
|
|
|
|
logger.debug(f"MakeNameMapper initialized with {len(self.authoritative_makes)} authoritative makes")
|
|
|
|
def _load_authoritative_makes(self) -> Set[str]:
|
|
"""Load authoritative makes list from sources/makes.json"""
|
|
makes_file = os.path.join(self.sources_dir, 'makes.json')
|
|
|
|
try:
|
|
if os.path.exists(makes_file):
|
|
with open(makes_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
makes_set = set(data.get('manufacturers', []))
|
|
logger.info(f"Loaded {len(makes_set)} authoritative makes from {makes_file}")
|
|
return makes_set
|
|
else:
|
|
logger.warning(f"Authoritative makes file not found: {makes_file}")
|
|
return self._get_fallback_makes()
|
|
except Exception as e:
|
|
logger.error(f"Failed to load authoritative makes from {makes_file}: {e}")
|
|
return self._get_fallback_makes()
|
|
|
|
def _get_fallback_makes(self) -> Set[str]:
|
|
"""Fallback authoritative makes list if file is not available"""
|
|
return {
|
|
'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
|
|
'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
|
|
'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
|
|
'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
|
|
'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
|
|
'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
|
|
'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
|
|
'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
|
|
'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
|
|
'Volvo'
|
|
}
|
|
|
|
def normalize_make_name(self, filename: str) -> str:
|
|
"""
|
|
Convert filename to proper display name
|
|
|
|
Args:
|
|
filename: JSON filename (e.g., 'alfa_romeo.json')
|
|
|
|
Returns:
|
|
Normalized display name (e.g., 'Alfa Romeo')
|
|
"""
|
|
try:
|
|
# Remove .json extension
|
|
base_name = filename.replace('.json', '')
|
|
|
|
# Handle edge case of empty string
|
|
if not base_name:
|
|
logger.warning(f"Empty base name after removing .json from '{filename}'")
|
|
return "Unknown"
|
|
|
|
# Replace underscores with spaces
|
|
spaced_name = base_name.replace('_', ' ')
|
|
|
|
# Apply title case
|
|
title_cased = spaced_name.title()
|
|
|
|
# Apply special capitalization cases
|
|
normalized = self.special_cases.get(title_cased, title_cased)
|
|
|
|
logger.debug(f"Normalized '{filename}' → '{normalized}'")
|
|
return normalized
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to normalize make name '{filename}': {e}")
|
|
return "Unknown"
|
|
|
|
def validate_mapping(self, filename: str, display_name: str) -> bool:
|
|
"""
|
|
Validate mapped name against authoritative list
|
|
|
|
Args:
|
|
filename: Original JSON filename
|
|
display_name: Normalized display name
|
|
|
|
Returns:
|
|
True if display name is in authoritative list
|
|
"""
|
|
is_valid = display_name in self.authoritative_makes
|
|
|
|
if not is_valid:
|
|
logger.warning(f"Make '{display_name}' from '{filename}' not found in authoritative list")
|
|
|
|
return is_valid
|
|
|
|
def get_all_mappings(self, json_files_dir: str) -> Dict[str, str]:
|
|
"""
|
|
Get complete filename → display name mapping for all JSON files
|
|
|
|
Args:
|
|
json_files_dir: Directory containing make JSON files
|
|
|
|
Returns:
|
|
Dictionary mapping filenames to display names
|
|
"""
|
|
mappings = {}
|
|
|
|
try:
|
|
pattern = os.path.join(json_files_dir, '*.json')
|
|
json_files = glob.glob(pattern)
|
|
|
|
logger.info(f"Found {len(json_files)} JSON files in {json_files_dir}")
|
|
|
|
for file_path in json_files:
|
|
filename = os.path.basename(file_path)
|
|
display_name = self.normalize_make_name(filename)
|
|
mappings[filename] = display_name
|
|
|
|
return mappings
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get all mappings from {json_files_dir}: {e}")
|
|
return {}
|
|
|
|
def validate_all_mappings(self, json_files_dir: str) -> ValidationReport:
|
|
"""
|
|
Validate all mappings against authoritative list
|
|
|
|
Args:
|
|
json_files_dir: Directory containing make JSON files
|
|
|
|
Returns:
|
|
ValidationReport with results
|
|
"""
|
|
mappings = self.get_all_mappings(json_files_dir)
|
|
mismatches = []
|
|
|
|
for filename, display_name in mappings.items():
|
|
if not self.validate_mapping(filename, display_name):
|
|
mismatches.append({
|
|
'filename': filename,
|
|
'mapped_name': display_name,
|
|
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
|
|
})
|
|
|
|
report = ValidationReport(
|
|
total_files=len(mappings),
|
|
valid_mappings=len(mappings) - len(mismatches),
|
|
mismatches=mismatches
|
|
)
|
|
|
|
logger.info(f"Validation complete: {report.valid_mappings}/{report.total_files} valid ({report.success_rate:.1%})")
|
|
|
|
return report
|
|
|
|
def get_filename_for_display_name(self, display_name: str) -> Optional[str]:
|
|
"""
|
|
Reverse lookup: get JSON filename for a display name
|
|
|
|
Args:
|
|
display_name: Make display name (e.g., 'Alfa Romeo')
|
|
|
|
Returns:
|
|
JSON filename (e.g., 'alfa_romeo.json') or None if not found
|
|
"""
|
|
# Convert display name back to filename format
|
|
# Handle special cases in reverse
|
|
reverse_special_cases = {v: k for k, v in self.special_cases.items()}
|
|
|
|
if display_name in reverse_special_cases:
|
|
# Special case: BMW → Bmw, etc.
|
|
base_name = reverse_special_cases[display_name].lower()
|
|
else:
|
|
# Standard case: convert to lowercase, spaces to underscores
|
|
base_name = display_name.lower().replace(' ', '_')
|
|
|
|
filename = f"{base_name}.json"
|
|
|
|
logger.debug(f"Reverse lookup: '{display_name}' → '{filename}'")
|
|
return filename
|
|
|
|
def print_validation_report(self, report: ValidationReport) -> None:
|
|
"""
|
|
Print formatted validation report
|
|
|
|
Args:
|
|
report: ValidationReport to display
|
|
"""
|
|
print(f"📋 Make Name Validation Report")
|
|
print(f"=" * 35)
|
|
print(f"Total files: {report.total_files}")
|
|
print(f"Valid mappings: {report.valid_mappings}")
|
|
print(f"Success rate: {report.success_rate:.1%}")
|
|
|
|
if report.mismatches:
|
|
print(f"\n⚠️ Mismatches ({len(report.mismatches)}):")
|
|
for mismatch in report.mismatches:
|
|
print(f" {mismatch['filename']} → {mismatch['mapped_name']}")
|
|
print(f" Status: {mismatch['status']}")
|
|
else:
|
|
print(f"\n🎉 All mappings are valid!")
|
|
|
|
def get_make_statistics(self, json_files_dir: str) -> Dict[str, int]:
|
|
"""
|
|
Get statistics about make name transformations
|
|
|
|
Args:
|
|
json_files_dir: Directory containing make JSON files
|
|
|
|
Returns:
|
|
Dictionary with transformation statistics
|
|
"""
|
|
mappings = self.get_all_mappings(json_files_dir)
|
|
|
|
single_words = 0
|
|
multi_words = 0
|
|
special_cases = 0
|
|
|
|
for filename, display_name in mappings.items():
|
|
if display_name in self.special_cases.values():
|
|
special_cases += 1
|
|
elif ' ' in display_name:
|
|
multi_words += 1
|
|
else:
|
|
single_words += 1
|
|
|
|
return {
|
|
'total': len(mappings),
|
|
'single_words': single_words,
|
|
'multi_words': multi_words,
|
|
'special_cases': special_cases
|
|
}
|
|
|
|
|
|
# Example usage and testing functions
|
|
def example_usage():
|
|
"""Demonstrate MakeNameMapper usage"""
|
|
print("🏷️ MakeNameMapper Example Usage")
|
|
print("=" * 35)
|
|
|
|
mapper = MakeNameMapper()
|
|
|
|
# Test individual conversions
|
|
test_files = [
|
|
'toyota.json',
|
|
'alfa_romeo.json',
|
|
'bmw.json',
|
|
'land_rover.json',
|
|
'mclaren.json'
|
|
]
|
|
|
|
for filename in test_files:
|
|
display_name = mapper.normalize_make_name(filename)
|
|
is_valid = mapper.validate_mapping(filename, display_name)
|
|
status = "✅" if is_valid else "⚠️"
|
|
|
|
print(f"{status} {filename:20} → {display_name}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
example_usage() |