""" Make Name Mapper Utility Converts JSON filenames to proper display names for database storage. Handles underscore-to-space conversion, title casing, and special capitalization cases. Critical for converting: - alfa_romeo.json → "Alfa Romeo" - bmw.json → "BMW" - land_rover.json → "Land Rover" Usage: mapper = MakeNameMapper() display_name = mapper.normalize_make_name('alfa_romeo.json') # Returns "Alfa Romeo" """ import json import glob import os import logging from typing import Set, Dict, List, Optional from dataclasses import dataclass from pathlib import Path logger = logging.getLogger(__name__) @dataclass class ValidationReport: """Make name validation report""" total_files: int valid_mappings: int mismatches: List[Dict[str, str]] @property def success_rate(self) -> float: """Calculate success rate as percentage""" return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0 class MakeNameMapper: """Convert JSON filenames to proper make display names""" def __init__(self, sources_dir: Optional[str] = None): """ Initialize make name mapper Args: sources_dir: Directory containing sources/makes.json for validation """ self.sources_dir = sources_dir or "sources" # Special capitalization cases that don't follow standard title case self.special_cases = { 'Bmw': 'BMW', # Bayerische Motoren Werke 'Gmc': 'GMC', # General Motors Company 'Mini': 'MINI', # Brand styling requirement 'Mclaren': 'McLaren', # Scottish naming convention } # Load authoritative makes list for validation self.authoritative_makes = self._load_authoritative_makes() logger.debug(f"MakeNameMapper initialized with {len(self.authoritative_makes)} authoritative makes") def _load_authoritative_makes(self) -> Set[str]: """Load authoritative makes list from sources/makes.json""" makes_file = os.path.join(self.sources_dir, 'makes.json') try: if os.path.exists(makes_file): with open(makes_file, 'r', encoding='utf-8') as f: data = json.load(f) makes_set = set(data.get('manufacturers', [])) logger.info(f"Loaded {len(makes_set)} authoritative makes from {makes_file}") return makes_set else: logger.warning(f"Authoritative makes file not found: {makes_file}") return self._get_fallback_makes() except Exception as e: logger.error(f"Failed to load authoritative makes from {makes_file}: {e}") return self._get_fallback_makes() def _get_fallback_makes(self) -> Set[str]: """Fallback authoritative makes list if file is not available""" return { 'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley', 'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari', 'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer', 'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia', 'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid', 'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi', 'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac', 'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn', 'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen', 'Volvo' } def normalize_make_name(self, filename: str) -> str: """ Convert filename to proper display name Args: filename: JSON filename (e.g., 'alfa_romeo.json') Returns: Normalized display name (e.g., 'Alfa Romeo') """ try: # Remove .json extension base_name = filename.replace('.json', '') # Handle edge case of empty string if not base_name: logger.warning(f"Empty base name after removing .json from '{filename}'") return "Unknown" # Replace underscores with spaces spaced_name = base_name.replace('_', ' ') # Apply title case title_cased = spaced_name.title() # Apply special capitalization cases normalized = self.special_cases.get(title_cased, title_cased) logger.debug(f"Normalized '{filename}' → '{normalized}'") return normalized except Exception as e: logger.error(f"Failed to normalize make name '{filename}': {e}") return "Unknown" def validate_mapping(self, filename: str, display_name: str) -> bool: """ Validate mapped name against authoritative list Args: filename: Original JSON filename display_name: Normalized display name Returns: True if display name is in authoritative list """ is_valid = display_name in self.authoritative_makes if not is_valid: logger.warning(f"Make '{display_name}' from '{filename}' not found in authoritative list") return is_valid def get_all_mappings(self, json_files_dir: str) -> Dict[str, str]: """ Get complete filename → display name mapping for all JSON files Args: json_files_dir: Directory containing make JSON files Returns: Dictionary mapping filenames to display names """ mappings = {} try: pattern = os.path.join(json_files_dir, '*.json') json_files = glob.glob(pattern) logger.info(f"Found {len(json_files)} JSON files in {json_files_dir}") for file_path in json_files: filename = os.path.basename(file_path) display_name = self.normalize_make_name(filename) mappings[filename] = display_name return mappings except Exception as e: logger.error(f"Failed to get all mappings from {json_files_dir}: {e}") return {} def validate_all_mappings(self, json_files_dir: str) -> ValidationReport: """ Validate all mappings against authoritative list Args: json_files_dir: Directory containing make JSON files Returns: ValidationReport with results """ mappings = self.get_all_mappings(json_files_dir) mismatches = [] for filename, display_name in mappings.items(): if not self.validate_mapping(filename, display_name): mismatches.append({ 'filename': filename, 'mapped_name': display_name, 'status': 'NOT_FOUND_IN_AUTHORITATIVE' }) report = ValidationReport( total_files=len(mappings), valid_mappings=len(mappings) - len(mismatches), mismatches=mismatches ) logger.info(f"Validation complete: {report.valid_mappings}/{report.total_files} valid ({report.success_rate:.1%})") return report def get_filename_for_display_name(self, display_name: str) -> Optional[str]: """ Reverse lookup: get JSON filename for a display name Args: display_name: Make display name (e.g., 'Alfa Romeo') Returns: JSON filename (e.g., 'alfa_romeo.json') or None if not found """ # Convert display name back to filename format # Handle special cases in reverse reverse_special_cases = {v: k for k, v in self.special_cases.items()} if display_name in reverse_special_cases: # Special case: BMW → Bmw, etc. base_name = reverse_special_cases[display_name].lower() else: # Standard case: convert to lowercase, spaces to underscores base_name = display_name.lower().replace(' ', '_') filename = f"{base_name}.json" logger.debug(f"Reverse lookup: '{display_name}' → '{filename}'") return filename def print_validation_report(self, report: ValidationReport) -> None: """ Print formatted validation report Args: report: ValidationReport to display """ print(f"📋 Make Name Validation Report") print(f"=" * 35) print(f"Total files: {report.total_files}") print(f"Valid mappings: {report.valid_mappings}") print(f"Success rate: {report.success_rate:.1%}") if report.mismatches: print(f"\n⚠️ Mismatches ({len(report.mismatches)}):") for mismatch in report.mismatches: print(f" {mismatch['filename']} → {mismatch['mapped_name']}") print(f" Status: {mismatch['status']}") else: print(f"\n🎉 All mappings are valid!") def get_make_statistics(self, json_files_dir: str) -> Dict[str, int]: """ Get statistics about make name transformations Args: json_files_dir: Directory containing make JSON files Returns: Dictionary with transformation statistics """ mappings = self.get_all_mappings(json_files_dir) single_words = 0 multi_words = 0 special_cases = 0 for filename, display_name in mappings.items(): if display_name in self.special_cases.values(): special_cases += 1 elif ' ' in display_name: multi_words += 1 else: single_words += 1 return { 'total': len(mappings), 'single_words': single_words, 'multi_words': multi_words, 'special_cases': special_cases } # Example usage and testing functions def example_usage(): """Demonstrate MakeNameMapper usage""" print("🏷️ MakeNameMapper Example Usage") print("=" * 35) mapper = MakeNameMapper() # Test individual conversions test_files = [ 'toyota.json', 'alfa_romeo.json', 'bmw.json', 'land_rover.json', 'mclaren.json' ] for filename in test_files: display_name = mapper.normalize_make_name(filename) is_valid = mapper.validate_mapping(filename, display_name) status = "✅" if is_valid else "⚠️" print(f"{status} {filename:20} → {display_name}") if __name__ == "__main__": example_usage()