11 KiB
11 KiB
Make Name Mapping Documentation
Overview
Rules and implementation for converting JSON filename conventions to proper display names in the database.
Problem Statement
JSON files use lowercase filenames with underscores, but database and API require proper display names:
alfa_romeo.json→"Alfa Romeo"land_rover.json→"Land Rover"rolls_royce.json→"Rolls Royce"
Normalization Rules
Standard Transformation
- Remove .json extension
- Replace underscores with spaces
- Apply title case to each word
- Apply special case exceptions
Implementation Algorithm
def normalize_make_name(filename: str) -> str:
# Remove .json extension
base_name = filename.replace('.json', '')
# Replace underscores with spaces
spaced_name = base_name.replace('_', ' ')
# Apply title case
title_cased = spaced_name.title()
# Apply special cases
return apply_special_cases(title_cased)
Complete Filename Mapping
Multi-Word Makes (Underscore Conversion)
| Filename | Display Name | Notes |
|---|---|---|
alfa_romeo.json |
"Alfa Romeo" |
Italian brand |
aston_martin.json |
"Aston Martin" |
British luxury |
land_rover.json |
"Land Rover" |
British SUV brand |
rolls_royce.json |
"Rolls Royce" |
Ultra-luxury brand |
Single-Word Makes (Standard Title Case)
| Filename | Display Name | Notes |
|---|---|---|
acura.json |
"Acura" |
Honda luxury division |
audi.json |
"Audi" |
German luxury |
bentley.json |
"Bentley" |
British luxury |
bmw.json |
"BMW" |
Special case - all caps |
buick.json |
"Buick" |
GM luxury |
cadillac.json |
"Cadillac" |
GM luxury |
chevrolet.json |
"Chevrolet" |
GM mainstream |
chrysler.json |
"Chrysler" |
Stellantis brand |
dodge.json |
"Dodge" |
Stellantis performance |
ferrari.json |
"Ferrari" |
Italian supercar |
fiat.json |
"Fiat" |
Italian mainstream |
ford.json |
"Ford" |
American mainstream |
genesis.json |
"Genesis" |
Hyundai luxury |
geo.json |
"Geo" |
GM defunct brand |
gmc.json |
"GMC" |
Special case - all caps |
honda.json |
"Honda" |
Japanese mainstream |
hummer.json |
"Hummer" |
GM truck brand |
hyundai.json |
"Hyundai" |
Korean mainstream |
infiniti.json |
"Infiniti" |
Nissan luxury |
isuzu.json |
"Isuzu" |
Japanese commercial |
jaguar.json |
"Jaguar" |
British luxury |
jeep.json |
"Jeep" |
Stellantis SUV |
kia.json |
"Kia" |
Korean mainstream |
lamborghini.json |
"Lamborghini" |
Italian supercar |
lexus.json |
"Lexus" |
Toyota luxury |
lincoln.json |
"Lincoln" |
Ford luxury |
lotus.json |
"Lotus" |
British sports car |
lucid.json |
"Lucid" |
American electric luxury |
maserati.json |
"Maserati" |
Italian luxury |
mazda.json |
"Mazda" |
Japanese mainstream |
mclaren.json |
"McLaren" |
Special case - capital L |
mercury.json |
"Mercury" |
Ford defunct luxury |
mini.json |
"MINI" |
Special case - all caps |
mitsubishi.json |
"Mitsubishi" |
Japanese mainstream |
nissan.json |
"Nissan" |
Japanese mainstream |
oldsmobile.json |
"Oldsmobile" |
GM defunct |
plymouth.json |
"Plymouth" |
Chrysler defunct |
polestar.json |
"Polestar" |
Volvo electric |
pontiac.json |
"Pontiac" |
GM defunct performance |
porsche.json |
"Porsche" |
German sports car |
ram.json |
"Ram" |
Stellantis trucks |
rivian.json |
"Rivian" |
American electric trucks |
saab.json |
"Saab" |
Swedish defunct |
saturn.json |
"Saturn" |
GM defunct |
scion.json |
"Scion" |
Toyota defunct youth |
smart.json |
"Smart" |
Mercedes micro car |
subaru.json |
"Subaru" |
Japanese AWD |
tesla.json |
"Tesla" |
American electric |
toyota.json |
"Toyota" |
Japanese mainstream |
volkswagen.json |
"Volkswagen" |
German mainstream |
volvo.json |
"Volvo" |
Swedish luxury |
Special Cases Implementation
All Caps Brands
SPECIAL_CASES = {
'Bmw': 'BMW', # Bayerische Motoren Werke
'Gmc': 'GMC', # General Motors Company
'Mini': 'MINI', # Brand stylization
}
Custom Capitalizations
CUSTOM_CAPS = {
'Mclaren': 'McLaren', # Scottish naming convention
}
Complete Special Cases Function
def apply_special_cases(title_cased_name: str) -> str:
"""Apply brand-specific capitalization rules"""
special_cases = {
'Bmw': 'BMW',
'Gmc': 'GMC',
'Mini': 'MINI',
'Mclaren': 'McLaren'
}
return special_cases.get(title_cased_name, title_cased_name)
Validation Strategy
Cross-Reference with sources/makes.json
The existing mvp-platform-services/vehicles/etl/sources/makes.json contains the authoritative list:
{
"manufacturers": [
"Acura", "Alfa Romeo", "Aston Martin", "Audi", "BMW",
"Bentley", "Buick", "Cadillac", "Chevrolet", "Chrysler",
...
]
}
Validation Implementation
class MakeNameMapper:
def __init__(self):
self.authoritative_makes = self.load_authoritative_makes()
def load_authoritative_makes(self) -> Set[str]:
"""Load makes list from sources/makes.json"""
with open('sources/makes.json') as f:
data = json.load(f)
return set(data['manufacturers'])
def validate_mapping(self, filename: str, display_name: str) -> bool:
"""Validate mapped name against authoritative list"""
return display_name in self.authoritative_makes
def get_validation_report(self) -> ValidationReport:
"""Generate complete validation report"""
mismatches = []
json_files = glob.glob('sources/makes/*.json')
for file_path in json_files:
filename = os.path.basename(file_path)
mapped_name = self.normalize_make_name(filename)
if not self.validate_mapping(filename, mapped_name):
mismatches.append({
'filename': filename,
'mapped_name': mapped_name,
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
})
return ValidationReport(mismatches=mismatches)
Error Handling
Unknown Files
For JSON files not in the authoritative list:
- Log warning with filename and mapped name
- Proceed with mapping (don't fail)
- Include in validation report
Filename Edge Cases
def handle_edge_cases(filename: str) -> str:
"""Handle unusual filename patterns"""
# Remove multiple underscores
cleaned = re.sub(r'_+', '_', filename)
# Handle special characters (future-proofing)
cleaned = re.sub(r'[^a-zA-Z0-9_]', '', cleaned)
return cleaned
Testing Requirements
Unit Tests
def test_standard_mapping():
mapper = MakeNameMapper()
assert mapper.normalize_make_name('toyota.json') == 'Toyota'
assert mapper.normalize_make_name('alfa_romeo.json') == 'Alfa Romeo'
def test_special_cases():
mapper = MakeNameMapper()
assert mapper.normalize_make_name('bmw.json') == 'BMW'
assert mapper.normalize_make_name('gmc.json') == 'GMC'
assert mapper.normalize_make_name('mclaren.json') == 'McLaren'
def test_validation():
mapper = MakeNameMapper()
assert mapper.validate_mapping('toyota.json', 'Toyota') == True
assert mapper.validate_mapping('fake.json', 'Fake Brand') == False
Integration Tests
- Process all 55 files: Ensure all map correctly
- Database integration: Verify display names in database
- API response: Confirm proper names in dropdown responses
Implementation Class
Complete MakeNameMapper Class
import json
import glob
import os
from typing import Set, Dict, List
from dataclasses import dataclass
@dataclass
class ValidationReport:
mismatches: List[Dict[str, str]]
total_files: int
valid_mappings: int
@property
def success_rate(self) -> float:
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
class MakeNameMapper:
def __init__(self, sources_dir: str = 'sources'):
self.sources_dir = sources_dir
self.authoritative_makes = self.load_authoritative_makes()
self.special_cases = {
'Bmw': 'BMW',
'Gmc': 'GMC',
'Mini': 'MINI',
'Mclaren': 'McLaren'
}
def normalize_make_name(self, filename: str) -> str:
"""Convert filename to display name"""
# Remove .json extension
base_name = filename.replace('.json', '')
# Replace underscores with spaces
spaced_name = base_name.replace('_', ' ')
# Apply title case
title_cased = spaced_name.title()
# Apply special cases
return self.special_cases.get(title_cased, title_cased)
def get_all_mappings(self) -> Dict[str, str]:
"""Get complete filename → display name mapping"""
mappings = {}
json_files = glob.glob(f'{self.sources_dir}/makes/*.json')
for file_path in json_files:
filename = os.path.basename(file_path)
display_name = self.normalize_make_name(filename)
mappings[filename] = display_name
return mappings
def validate_all_mappings(self) -> ValidationReport:
"""Validate all mappings against authoritative list"""
mappings = self.get_all_mappings()
mismatches = []
for filename, display_name in mappings.items():
if display_name not in self.authoritative_makes:
mismatches.append({
'filename': filename,
'mapped_name': display_name,
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
})
return ValidationReport(
mismatches=mismatches,
total_files=len(mappings),
valid_mappings=len(mappings) - len(mismatches)
)
Usage Examples
Basic Usage
mapper = MakeNameMapper()
# Single conversion
display_name = mapper.normalize_make_name('alfa_romeo.json')
print(display_name) # Output: "Alfa Romeo"
# Get all mappings
all_mappings = mapper.get_all_mappings()
print(all_mappings['bmw.json']) # Output: "BMW"
Validation Usage
# Validate all mappings
report = mapper.validate_all_mappings()
print(f"Success rate: {report.success_rate:.1%}")
print(f"Mismatches: {len(report.mismatches)}")
for mismatch in report.mismatches:
print(f"⚠️ {mismatch['filename']} → {mismatch['mapped_name']}")