Files
motovaultpro/mvp-platform-services/vehicles/etl/utils/make_name_mapper.py
Eric Gullickson a052040e3a Initial Commit
2025-09-17 16:09:15 -05:00

317 lines
11 KiB
Python

"""
Make Name Mapper Utility
Converts JSON filenames to proper display names for database storage.
Handles underscore-to-space conversion, title casing, and special capitalization cases.
Critical for converting:
- alfa_romeo.json → "Alfa Romeo"
- bmw.json → "BMW"
- land_rover.json → "Land Rover"
Usage:
mapper = MakeNameMapper()
display_name = mapper.normalize_make_name('alfa_romeo.json') # Returns "Alfa Romeo"
"""
import json
import glob
import os
import logging
from typing import Set, Dict, List, Optional
from dataclasses import dataclass
from pathlib import Path
logger = logging.getLogger(__name__)
@dataclass
class ValidationReport:
"""Make name validation report"""
total_files: int
valid_mappings: int
mismatches: List[Dict[str, str]]
@property
def success_rate(self) -> float:
"""Calculate success rate as percentage"""
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
class MakeNameMapper:
"""Convert JSON filenames to proper make display names"""
def __init__(self, sources_dir: Optional[str] = None):
"""
Initialize make name mapper
Args:
sources_dir: Directory containing sources/makes.json for validation
"""
self.sources_dir = sources_dir or "sources"
# Special capitalization cases that don't follow standard title case
self.special_cases = {
'Bmw': 'BMW', # Bayerische Motoren Werke
'Gmc': 'GMC', # General Motors Company
'Mini': 'MINI', # Brand styling requirement
'Mclaren': 'McLaren', # Scottish naming convention
}
# Load authoritative makes list for validation
self.authoritative_makes = self._load_authoritative_makes()
logger.debug(f"MakeNameMapper initialized with {len(self.authoritative_makes)} authoritative makes")
def _load_authoritative_makes(self) -> Set[str]:
"""Load authoritative makes list from sources/makes.json"""
makes_file = os.path.join(self.sources_dir, 'makes.json')
try:
if os.path.exists(makes_file):
with open(makes_file, 'r', encoding='utf-8') as f:
data = json.load(f)
makes_set = set(data.get('manufacturers', []))
logger.info(f"Loaded {len(makes_set)} authoritative makes from {makes_file}")
return makes_set
else:
logger.warning(f"Authoritative makes file not found: {makes_file}")
return self._get_fallback_makes()
except Exception as e:
logger.error(f"Failed to load authoritative makes from {makes_file}: {e}")
return self._get_fallback_makes()
def _get_fallback_makes(self) -> Set[str]:
"""Fallback authoritative makes list if file is not available"""
return {
'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
'Volvo'
}
def normalize_make_name(self, filename: str) -> str:
"""
Convert filename to proper display name
Args:
filename: JSON filename (e.g., 'alfa_romeo.json')
Returns:
Normalized display name (e.g., 'Alfa Romeo')
"""
try:
# Remove .json extension
base_name = filename.replace('.json', '')
# Handle edge case of empty string
if not base_name:
logger.warning(f"Empty base name after removing .json from '{filename}'")
return "Unknown"
# Replace underscores with spaces
spaced_name = base_name.replace('_', ' ')
# Apply title case
title_cased = spaced_name.title()
# Apply special capitalization cases
normalized = self.special_cases.get(title_cased, title_cased)
logger.debug(f"Normalized '{filename}''{normalized}'")
return normalized
except Exception as e:
logger.error(f"Failed to normalize make name '{filename}': {e}")
return "Unknown"
def validate_mapping(self, filename: str, display_name: str) -> bool:
"""
Validate mapped name against authoritative list
Args:
filename: Original JSON filename
display_name: Normalized display name
Returns:
True if display name is in authoritative list
"""
is_valid = display_name in self.authoritative_makes
if not is_valid:
logger.warning(f"Make '{display_name}' from '{filename}' not found in authoritative list")
return is_valid
def get_all_mappings(self, json_files_dir: str) -> Dict[str, str]:
"""
Get complete filename → display name mapping for all JSON files
Args:
json_files_dir: Directory containing make JSON files
Returns:
Dictionary mapping filenames to display names
"""
mappings = {}
try:
pattern = os.path.join(json_files_dir, '*.json')
json_files = glob.glob(pattern)
logger.info(f"Found {len(json_files)} JSON files in {json_files_dir}")
for file_path in json_files:
filename = os.path.basename(file_path)
display_name = self.normalize_make_name(filename)
mappings[filename] = display_name
return mappings
except Exception as e:
logger.error(f"Failed to get all mappings from {json_files_dir}: {e}")
return {}
def validate_all_mappings(self, json_files_dir: str) -> ValidationReport:
"""
Validate all mappings against authoritative list
Args:
json_files_dir: Directory containing make JSON files
Returns:
ValidationReport with results
"""
mappings = self.get_all_mappings(json_files_dir)
mismatches = []
for filename, display_name in mappings.items():
if not self.validate_mapping(filename, display_name):
mismatches.append({
'filename': filename,
'mapped_name': display_name,
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
})
report = ValidationReport(
total_files=len(mappings),
valid_mappings=len(mappings) - len(mismatches),
mismatches=mismatches
)
logger.info(f"Validation complete: {report.valid_mappings}/{report.total_files} valid ({report.success_rate:.1%})")
return report
def get_filename_for_display_name(self, display_name: str) -> Optional[str]:
"""
Reverse lookup: get JSON filename for a display name
Args:
display_name: Make display name (e.g., 'Alfa Romeo')
Returns:
JSON filename (e.g., 'alfa_romeo.json') or None if not found
"""
# Convert display name back to filename format
# Handle special cases in reverse
reverse_special_cases = {v: k for k, v in self.special_cases.items()}
if display_name in reverse_special_cases:
# Special case: BMW → Bmw, etc.
base_name = reverse_special_cases[display_name].lower()
else:
# Standard case: convert to lowercase, spaces to underscores
base_name = display_name.lower().replace(' ', '_')
filename = f"{base_name}.json"
logger.debug(f"Reverse lookup: '{display_name}''{filename}'")
return filename
def print_validation_report(self, report: ValidationReport) -> None:
"""
Print formatted validation report
Args:
report: ValidationReport to display
"""
print(f"📋 Make Name Validation Report")
print(f"=" * 35)
print(f"Total files: {report.total_files}")
print(f"Valid mappings: {report.valid_mappings}")
print(f"Success rate: {report.success_rate:.1%}")
if report.mismatches:
print(f"\n⚠️ Mismatches ({len(report.mismatches)}):")
for mismatch in report.mismatches:
print(f" {mismatch['filename']}{mismatch['mapped_name']}")
print(f" Status: {mismatch['status']}")
else:
print(f"\n🎉 All mappings are valid!")
def get_make_statistics(self, json_files_dir: str) -> Dict[str, int]:
"""
Get statistics about make name transformations
Args:
json_files_dir: Directory containing make JSON files
Returns:
Dictionary with transformation statistics
"""
mappings = self.get_all_mappings(json_files_dir)
single_words = 0
multi_words = 0
special_cases = 0
for filename, display_name in mappings.items():
if display_name in self.special_cases.values():
special_cases += 1
elif ' ' in display_name:
multi_words += 1
else:
single_words += 1
return {
'total': len(mappings),
'single_words': single_words,
'multi_words': multi_words,
'special_cases': special_cases
}
# Example usage and testing functions
def example_usage():
"""Demonstrate MakeNameMapper usage"""
print("🏷️ MakeNameMapper Example Usage")
print("=" * 35)
mapper = MakeNameMapper()
# Test individual conversions
test_files = [
'toyota.json',
'alfa_romeo.json',
'bmw.json',
'land_rover.json',
'mclaren.json'
]
for filename in test_files:
display_name = mapper.normalize_make_name(filename)
is_valid = mapper.validate_mapping(filename, display_name)
status = "" if is_valid else "⚠️"
print(f"{status} {filename:20}{display_name}")
if __name__ == "__main__":
example_usage()