# Make Name Mapping Documentation ## Overview Rules and implementation for converting JSON filename conventions to proper display names in the database. ## Problem Statement JSON files use lowercase filenames with underscores, but database and API require proper display names: - `alfa_romeo.json` → `"Alfa Romeo"` - `land_rover.json` → `"Land Rover"` - `rolls_royce.json` → `"Rolls Royce"` ## Normalization Rules ### Standard Transformation 1. **Remove .json extension** 2. **Replace underscores** with spaces 3. **Apply title case** to each word 4. **Apply special case exceptions** ### Implementation Algorithm ```python def normalize_make_name(filename: str) -> str: # Remove .json extension base_name = filename.replace('.json', '') # Replace underscores with spaces spaced_name = base_name.replace('_', ' ') # Apply title case title_cased = spaced_name.title() # Apply special cases return apply_special_cases(title_cased) ``` ## Complete Filename Mapping ### Multi-Word Makes (Underscore Conversion) | Filename | Display Name | Notes | |----------|-------------|-------| | `alfa_romeo.json` | `"Alfa Romeo"` | Italian brand | | `aston_martin.json` | `"Aston Martin"` | British luxury | | `land_rover.json` | `"Land Rover"` | British SUV brand | | `rolls_royce.json` | `"Rolls Royce"` | Ultra-luxury brand | ### Single-Word Makes (Standard Title Case) | Filename | Display Name | Notes | |----------|-------------|-------| | `acura.json` | `"Acura"` | Honda luxury division | | `audi.json` | `"Audi"` | German luxury | | `bentley.json` | `"Bentley"` | British luxury | | `bmw.json` | `"BMW"` | **Special case - all caps** | | `buick.json` | `"Buick"` | GM luxury | | `cadillac.json` | `"Cadillac"` | GM luxury | | `chevrolet.json` | `"Chevrolet"` | GM mainstream | | `chrysler.json` | `"Chrysler"` | Stellantis brand | | `dodge.json` | `"Dodge"` | Stellantis performance | | `ferrari.json` | `"Ferrari"` | Italian supercar | | `fiat.json` | `"Fiat"` | Italian mainstream | | `ford.json` | `"Ford"` | American mainstream | | `genesis.json` | `"Genesis"` | Hyundai luxury | | `geo.json` | `"Geo"` | GM defunct brand | | `gmc.json` | `"GMC"` | **Special case - all caps** | | `honda.json` | `"Honda"` | Japanese mainstream | | `hummer.json` | `"Hummer"` | GM truck brand | | `hyundai.json` | `"Hyundai"` | Korean mainstream | | `infiniti.json` | `"Infiniti"` | Nissan luxury | | `isuzu.json` | `"Isuzu"` | Japanese commercial | | `jaguar.json` | `"Jaguar"` | British luxury | | `jeep.json` | `"Jeep"` | Stellantis SUV | | `kia.json` | `"Kia"` | Korean mainstream | | `lamborghini.json` | `"Lamborghini"` | Italian supercar | | `lexus.json` | `"Lexus"` | Toyota luxury | | `lincoln.json` | `"Lincoln"` | Ford luxury | | `lotus.json` | `"Lotus"` | British sports car | | `lucid.json` | `"Lucid"` | American electric luxury | | `maserati.json` | `"Maserati"` | Italian luxury | | `mazda.json` | `"Mazda"` | Japanese mainstream | | `mclaren.json` | `"McLaren"` | **Special case - capital L** | | `mercury.json` | `"Mercury"` | Ford defunct luxury | | `mini.json` | `"MINI"` | **Special case - all caps** | | `mitsubishi.json` | `"Mitsubishi"` | Japanese mainstream | | `nissan.json` | `"Nissan"` | Japanese mainstream | | `oldsmobile.json` | `"Oldsmobile"` | GM defunct | | `plymouth.json` | `"Plymouth"` | Chrysler defunct | | `polestar.json` | `"Polestar"` | Volvo electric | | `pontiac.json` | `"Pontiac"` | GM defunct performance | | `porsche.json` | `"Porsche"` | German sports car | | `ram.json` | `"Ram"` | Stellantis trucks | | `rivian.json` | `"Rivian"` | American electric trucks | | `saab.json` | `"Saab"` | Swedish defunct | | `saturn.json` | `"Saturn"` | GM defunct | | `scion.json` | `"Scion"` | Toyota defunct youth | | `smart.json` | `"Smart"` | Mercedes micro car | | `subaru.json` | `"Subaru"` | Japanese AWD | | `tesla.json` | `"Tesla"` | American electric | | `toyota.json` | `"Toyota"` | Japanese mainstream | | `volkswagen.json` | `"Volkswagen"` | German mainstream | | `volvo.json` | `"Volvo"` | Swedish luxury | ## Special Cases Implementation ### All Caps Brands ```python SPECIAL_CASES = { 'Bmw': 'BMW', # Bayerische Motoren Werke 'Gmc': 'GMC', # General Motors Company 'Mini': 'MINI', # Brand stylization } ``` ### Custom Capitalizations ```python CUSTOM_CAPS = { 'Mclaren': 'McLaren', # Scottish naming convention } ``` ### Complete Special Cases Function ```python def apply_special_cases(title_cased_name: str) -> str: """Apply brand-specific capitalization rules""" special_cases = { 'Bmw': 'BMW', 'Gmc': 'GMC', 'Mini': 'MINI', 'Mclaren': 'McLaren' } return special_cases.get(title_cased_name, title_cased_name) ``` ## Validation Strategy ### Cross-Reference with sources/makes.json The existing `mvp-platform-services/vehicles/etl/sources/makes.json` contains the authoritative list: ```json { "manufacturers": [ "Acura", "Alfa Romeo", "Aston Martin", "Audi", "BMW", "Bentley", "Buick", "Cadillac", "Chevrolet", "Chrysler", ... ] } ``` ### Validation Implementation ```python class MakeNameMapper: def __init__(self): self.authoritative_makes = self.load_authoritative_makes() def load_authoritative_makes(self) -> Set[str]: """Load makes list from sources/makes.json""" with open('sources/makes.json') as f: data = json.load(f) return set(data['manufacturers']) def validate_mapping(self, filename: str, display_name: str) -> bool: """Validate mapped name against authoritative list""" return display_name in self.authoritative_makes def get_validation_report(self) -> ValidationReport: """Generate complete validation report""" mismatches = [] json_files = glob.glob('sources/makes/*.json') for file_path in json_files: filename = os.path.basename(file_path) mapped_name = self.normalize_make_name(filename) if not self.validate_mapping(filename, mapped_name): mismatches.append({ 'filename': filename, 'mapped_name': mapped_name, 'status': 'NOT_FOUND_IN_AUTHORITATIVE' }) return ValidationReport(mismatches=mismatches) ``` ## Error Handling ### Unknown Files For JSON files not in the authoritative list: 1. **Log warning** with filename and mapped name 2. **Proceed with mapping** (don't fail) 3. **Include in validation report** ### Filename Edge Cases ```python def handle_edge_cases(filename: str) -> str: """Handle unusual filename patterns""" # Remove multiple underscores cleaned = re.sub(r'_+', '_', filename) # Handle special characters (future-proofing) cleaned = re.sub(r'[^a-zA-Z0-9_]', '', cleaned) return cleaned ``` ## Testing Requirements ### Unit Tests ```python def test_standard_mapping(): mapper = MakeNameMapper() assert mapper.normalize_make_name('toyota.json') == 'Toyota' assert mapper.normalize_make_name('alfa_romeo.json') == 'Alfa Romeo' def test_special_cases(): mapper = MakeNameMapper() assert mapper.normalize_make_name('bmw.json') == 'BMW' assert mapper.normalize_make_name('gmc.json') == 'GMC' assert mapper.normalize_make_name('mclaren.json') == 'McLaren' def test_validation(): mapper = MakeNameMapper() assert mapper.validate_mapping('toyota.json', 'Toyota') == True assert mapper.validate_mapping('fake.json', 'Fake Brand') == False ``` ### Integration Tests 1. **Process all 55 files**: Ensure all map correctly 2. **Database integration**: Verify display names in database 3. **API response**: Confirm proper names in dropdown responses ## Implementation Class ### Complete MakeNameMapper Class ```python import json import glob import os from typing import Set, Dict, List from dataclasses import dataclass @dataclass class ValidationReport: mismatches: List[Dict[str, str]] total_files: int valid_mappings: int @property def success_rate(self) -> float: return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0 class MakeNameMapper: def __init__(self, sources_dir: str = 'sources'): self.sources_dir = sources_dir self.authoritative_makes = self.load_authoritative_makes() self.special_cases = { 'Bmw': 'BMW', 'Gmc': 'GMC', 'Mini': 'MINI', 'Mclaren': 'McLaren' } def normalize_make_name(self, filename: str) -> str: """Convert filename to display name""" # Remove .json extension base_name = filename.replace('.json', '') # Replace underscores with spaces spaced_name = base_name.replace('_', ' ') # Apply title case title_cased = spaced_name.title() # Apply special cases return self.special_cases.get(title_cased, title_cased) def get_all_mappings(self) -> Dict[str, str]: """Get complete filename → display name mapping""" mappings = {} json_files = glob.glob(f'{self.sources_dir}/makes/*.json') for file_path in json_files: filename = os.path.basename(file_path) display_name = self.normalize_make_name(filename) mappings[filename] = display_name return mappings def validate_all_mappings(self) -> ValidationReport: """Validate all mappings against authoritative list""" mappings = self.get_all_mappings() mismatches = [] for filename, display_name in mappings.items(): if display_name not in self.authoritative_makes: mismatches.append({ 'filename': filename, 'mapped_name': display_name, 'status': 'NOT_FOUND_IN_AUTHORITATIVE' }) return ValidationReport( mismatches=mismatches, total_files=len(mappings), valid_mappings=len(mappings) - len(mismatches) ) ``` ## Usage Examples ### Basic Usage ```python mapper = MakeNameMapper() # Single conversion display_name = mapper.normalize_make_name('alfa_romeo.json') print(display_name) # Output: "Alfa Romeo" # Get all mappings all_mappings = mapper.get_all_mappings() print(all_mappings['bmw.json']) # Output: "BMW" ``` ### Validation Usage ```python # Validate all mappings report = mapper.validate_all_mappings() print(f"Success rate: {report.success_rate:.1%}") print(f"Mismatches: {len(report.mismatches)}") for mismatch in report.mismatches: print(f"⚠️ {mismatch['filename']} → {mismatch['mapped_name']}") ```