Initial Commit

This commit is contained in:
Eric Gullickson
2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions

View File

@@ -0,0 +1,331 @@
# Make Name Mapping Documentation
## Overview
Rules and implementation for converting JSON filename conventions to proper display names in the database.
## Problem Statement
JSON files use lowercase filenames with underscores, but database and API require proper display names:
- `alfa_romeo.json``"Alfa Romeo"`
- `land_rover.json``"Land Rover"`
- `rolls_royce.json``"Rolls Royce"`
## Normalization Rules
### Standard Transformation
1. **Remove .json extension**
2. **Replace underscores** with spaces
3. **Apply title case** to each word
4. **Apply special case exceptions**
### Implementation Algorithm
```python
def normalize_make_name(filename: str) -> str:
# Remove .json extension
base_name = filename.replace('.json', '')
# Replace underscores with spaces
spaced_name = base_name.replace('_', ' ')
# Apply title case
title_cased = spaced_name.title()
# Apply special cases
return apply_special_cases(title_cased)
```
## Complete Filename Mapping
### Multi-Word Makes (Underscore Conversion)
| Filename | Display Name | Notes |
|----------|-------------|-------|
| `alfa_romeo.json` | `"Alfa Romeo"` | Italian brand |
| `aston_martin.json` | `"Aston Martin"` | British luxury |
| `land_rover.json` | `"Land Rover"` | British SUV brand |
| `rolls_royce.json` | `"Rolls Royce"` | Ultra-luxury brand |
### Single-Word Makes (Standard Title Case)
| Filename | Display Name | Notes |
|----------|-------------|-------|
| `acura.json` | `"Acura"` | Honda luxury division |
| `audi.json` | `"Audi"` | German luxury |
| `bentley.json` | `"Bentley"` | British luxury |
| `bmw.json` | `"BMW"` | **Special case - all caps** |
| `buick.json` | `"Buick"` | GM luxury |
| `cadillac.json` | `"Cadillac"` | GM luxury |
| `chevrolet.json` | `"Chevrolet"` | GM mainstream |
| `chrysler.json` | `"Chrysler"` | Stellantis brand |
| `dodge.json` | `"Dodge"` | Stellantis performance |
| `ferrari.json` | `"Ferrari"` | Italian supercar |
| `fiat.json` | `"Fiat"` | Italian mainstream |
| `ford.json` | `"Ford"` | American mainstream |
| `genesis.json` | `"Genesis"` | Hyundai luxury |
| `geo.json` | `"Geo"` | GM defunct brand |
| `gmc.json` | `"GMC"` | **Special case - all caps** |
| `honda.json` | `"Honda"` | Japanese mainstream |
| `hummer.json` | `"Hummer"` | GM truck brand |
| `hyundai.json` | `"Hyundai"` | Korean mainstream |
| `infiniti.json` | `"Infiniti"` | Nissan luxury |
| `isuzu.json` | `"Isuzu"` | Japanese commercial |
| `jaguar.json` | `"Jaguar"` | British luxury |
| `jeep.json` | `"Jeep"` | Stellantis SUV |
| `kia.json` | `"Kia"` | Korean mainstream |
| `lamborghini.json` | `"Lamborghini"` | Italian supercar |
| `lexus.json` | `"Lexus"` | Toyota luxury |
| `lincoln.json` | `"Lincoln"` | Ford luxury |
| `lotus.json` | `"Lotus"` | British sports car |
| `lucid.json` | `"Lucid"` | American electric luxury |
| `maserati.json` | `"Maserati"` | Italian luxury |
| `mazda.json` | `"Mazda"` | Japanese mainstream |
| `mclaren.json` | `"McLaren"` | **Special case - capital L** |
| `mercury.json` | `"Mercury"` | Ford defunct luxury |
| `mini.json` | `"MINI"` | **Special case - all caps** |
| `mitsubishi.json` | `"Mitsubishi"` | Japanese mainstream |
| `nissan.json` | `"Nissan"` | Japanese mainstream |
| `oldsmobile.json` | `"Oldsmobile"` | GM defunct |
| `plymouth.json` | `"Plymouth"` | Chrysler defunct |
| `polestar.json` | `"Polestar"` | Volvo electric |
| `pontiac.json` | `"Pontiac"` | GM defunct performance |
| `porsche.json` | `"Porsche"` | German sports car |
| `ram.json` | `"Ram"` | Stellantis trucks |
| `rivian.json` | `"Rivian"` | American electric trucks |
| `saab.json` | `"Saab"` | Swedish defunct |
| `saturn.json` | `"Saturn"` | GM defunct |
| `scion.json` | `"Scion"` | Toyota defunct youth |
| `smart.json` | `"Smart"` | Mercedes micro car |
| `subaru.json` | `"Subaru"` | Japanese AWD |
| `tesla.json` | `"Tesla"` | American electric |
| `toyota.json` | `"Toyota"` | Japanese mainstream |
| `volkswagen.json` | `"Volkswagen"` | German mainstream |
| `volvo.json` | `"Volvo"` | Swedish luxury |
## Special Cases Implementation
### All Caps Brands
```python
SPECIAL_CASES = {
'Bmw': 'BMW', # Bayerische Motoren Werke
'Gmc': 'GMC', # General Motors Company
'Mini': 'MINI', # Brand stylization
}
```
### Custom Capitalizations
```python
CUSTOM_CAPS = {
'Mclaren': 'McLaren', # Scottish naming convention
}
```
### Complete Special Cases Function
```python
def apply_special_cases(title_cased_name: str) -> str:
"""Apply brand-specific capitalization rules"""
special_cases = {
'Bmw': 'BMW',
'Gmc': 'GMC',
'Mini': 'MINI',
'Mclaren': 'McLaren'
}
return special_cases.get(title_cased_name, title_cased_name)
```
## Validation Strategy
### Cross-Reference with sources/makes.json
The existing `mvp-platform-services/vehicles/etl/sources/makes.json` contains the authoritative list:
```json
{
"manufacturers": [
"Acura", "Alfa Romeo", "Aston Martin", "Audi", "BMW",
"Bentley", "Buick", "Cadillac", "Chevrolet", "Chrysler",
...
]
}
```
### Validation Implementation
```python
class MakeNameMapper:
def __init__(self):
self.authoritative_makes = self.load_authoritative_makes()
def load_authoritative_makes(self) -> Set[str]:
"""Load makes list from sources/makes.json"""
with open('sources/makes.json') as f:
data = json.load(f)
return set(data['manufacturers'])
def validate_mapping(self, filename: str, display_name: str) -> bool:
"""Validate mapped name against authoritative list"""
return display_name in self.authoritative_makes
def get_validation_report(self) -> ValidationReport:
"""Generate complete validation report"""
mismatches = []
json_files = glob.glob('sources/makes/*.json')
for file_path in json_files:
filename = os.path.basename(file_path)
mapped_name = self.normalize_make_name(filename)
if not self.validate_mapping(filename, mapped_name):
mismatches.append({
'filename': filename,
'mapped_name': mapped_name,
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
})
return ValidationReport(mismatches=mismatches)
```
## Error Handling
### Unknown Files
For JSON files not in the authoritative list:
1. **Log warning** with filename and mapped name
2. **Proceed with mapping** (don't fail)
3. **Include in validation report**
### Filename Edge Cases
```python
def handle_edge_cases(filename: str) -> str:
"""Handle unusual filename patterns"""
# Remove multiple underscores
cleaned = re.sub(r'_+', '_', filename)
# Handle special characters (future-proofing)
cleaned = re.sub(r'[^a-zA-Z0-9_]', '', cleaned)
return cleaned
```
## Testing Requirements
### Unit Tests
```python
def test_standard_mapping():
mapper = MakeNameMapper()
assert mapper.normalize_make_name('toyota.json') == 'Toyota'
assert mapper.normalize_make_name('alfa_romeo.json') == 'Alfa Romeo'
def test_special_cases():
mapper = MakeNameMapper()
assert mapper.normalize_make_name('bmw.json') == 'BMW'
assert mapper.normalize_make_name('gmc.json') == 'GMC'
assert mapper.normalize_make_name('mclaren.json') == 'McLaren'
def test_validation():
mapper = MakeNameMapper()
assert mapper.validate_mapping('toyota.json', 'Toyota') == True
assert mapper.validate_mapping('fake.json', 'Fake Brand') == False
```
### Integration Tests
1. **Process all 55 files**: Ensure all map correctly
2. **Database integration**: Verify display names in database
3. **API response**: Confirm proper names in dropdown responses
## Implementation Class
### Complete MakeNameMapper Class
```python
import json
import glob
import os
from typing import Set, Dict, List
from dataclasses import dataclass
@dataclass
class ValidationReport:
mismatches: List[Dict[str, str]]
total_files: int
valid_mappings: int
@property
def success_rate(self) -> float:
return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
class MakeNameMapper:
def __init__(self, sources_dir: str = 'sources'):
self.sources_dir = sources_dir
self.authoritative_makes = self.load_authoritative_makes()
self.special_cases = {
'Bmw': 'BMW',
'Gmc': 'GMC',
'Mini': 'MINI',
'Mclaren': 'McLaren'
}
def normalize_make_name(self, filename: str) -> str:
"""Convert filename to display name"""
# Remove .json extension
base_name = filename.replace('.json', '')
# Replace underscores with spaces
spaced_name = base_name.replace('_', ' ')
# Apply title case
title_cased = spaced_name.title()
# Apply special cases
return self.special_cases.get(title_cased, title_cased)
def get_all_mappings(self) -> Dict[str, str]:
"""Get complete filename → display name mapping"""
mappings = {}
json_files = glob.glob(f'{self.sources_dir}/makes/*.json')
for file_path in json_files:
filename = os.path.basename(file_path)
display_name = self.normalize_make_name(filename)
mappings[filename] = display_name
return mappings
def validate_all_mappings(self) -> ValidationReport:
"""Validate all mappings against authoritative list"""
mappings = self.get_all_mappings()
mismatches = []
for filename, display_name in mappings.items():
if display_name not in self.authoritative_makes:
mismatches.append({
'filename': filename,
'mapped_name': display_name,
'status': 'NOT_FOUND_IN_AUTHORITATIVE'
})
return ValidationReport(
mismatches=mismatches,
total_files=len(mappings),
valid_mappings=len(mappings) - len(mismatches)
)
```
## Usage Examples
### Basic Usage
```python
mapper = MakeNameMapper()
# Single conversion
display_name = mapper.normalize_make_name('alfa_romeo.json')
print(display_name) # Output: "Alfa Romeo"
# Get all mappings
all_mappings = mapper.get_all_mappings()
print(all_mappings['bmw.json']) # Output: "BMW"
```
### Validation Usage
```python
# Validate all mappings
report = mapper.validate_all_mappings()
print(f"Success rate: {report.success_rate:.1%}")
print(f"Mismatches: {len(report.mismatches)}")
for mismatch in report.mismatches:
print(f"⚠️ {mismatch['filename']}{mismatch['mapped_name']}")
```