Initial Commit

2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions
--- a/mvp-platform-services/vehicles/etl/utils/init.py
+++ b/mvp-platform-services/vehicles/etl/utils/init.py
@@ -0,0 +1,3 @@
+from .make_filter import MakeFilter
+
+__all__ = ['MakeFilter']
--- a/mvp-platform-services/vehicles/etl/utils/pycache/init.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/init.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/init.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/init.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/engine_spec_parser.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/engine_spec_parser.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/engine_spec_parser.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/engine_spec_parser.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/logging.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/logging.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/logging.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/logging.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/make_filter.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/make_filter.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/make_filter.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/make_filter.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/make_name_mapper.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/make_name_mapper.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/make_name_mapper.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/make_name_mapper.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/manufacturer_filter.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/manufacturer_filter.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/utils/pycache/manufacturer_filter.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/utils/pycache/manufacturer_filter.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/utils/engine_spec_parser.py
+++ b/mvp-platform-services/vehicles/etl/utils/engine_spec_parser.py
@@ -0,0 +1,392 @@
+"""
+Engine Specification Parser
+
+Parses engine specifications from JSON vehicle data into structured components.
+Handles displacement, configuration, cylinders, fuel type, and aspiration.
+
+CRITICAL REQUIREMENT: L-configuration normalization
+- L3 → I3 (L-configuration treated as Inline)
+- L4 → I4 (L-configuration treated as Inline)
+
+Standard format: {displacement}L {config}{cylinders} {modifiers}
+Examples:
+- "2.0L I4" → 2.0L, Inline, 4-cylinder
+- "1.5L L3 PLUG-IN HYBRID EV- (PHEV)" → 1.5L, Inline (normalized), 3-cyl, Plug-in Hybrid
+- "2.4L H4" → 2.4L, Horizontal (Subaru Boxer), 4-cylinder
+
+Usage:
+    parser = EngineSpecParser()
+    spec = parser.parse_engine_string("1.5L L3 PLUG-IN HYBRID EV- (PHEV)")
+    # spec.configuration == "I" (normalized from L)
+"""
+
+import re
+import logging
+from typing import Optional, List, Pattern
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class EngineSpec:
+    """Parsed engine specification"""
+    displacement_l: Optional[float]    # Engine displacement in liters
+    configuration: str                  # I, V, H, Electric, Unknown
+    cylinders: Optional[int]           # Number of cylinders
+    fuel_type: str                     # Gasoline, Hybrid variants, Electric, Flex Fuel
+    aspiration: str                    # Natural, Turbocharged, Supercharged
+    raw_string: str                    # Original engine string
+    
+    def __str__(self) -> str:
+        return f"EngineSpec({self.displacement_l}L {self.configuration}{self.cylinders}, {self.fuel_type}, {self.aspiration})"
+
+
+class EngineSpecParser:
+    """Parse engine specifications with L→I normalization"""
+    
+    def __init__(self):
+        """Initialize engine parser with regex patterns"""
+        
+        # Primary engine pattern: {displacement}L {config}{cylinders}
+        # Supports I, V, H, L, W configurations
+        self.engine_pattern = re.compile(r'(\d+\.?\d*)L\s+([IVHLW])(\d+)', re.IGNORECASE)
+        
+        # Hybrid detection patterns (most specific first)
+        self.hybrid_patterns = [
+            (re.compile(r'PLUG-IN HYBRID EV-?\s*\(PHEV\)', re.IGNORECASE), 'Plug-in Hybrid'),
+            (re.compile(r'FULL HYBRID EV-?\s*\(FHEV\)', re.IGNORECASE), 'Full Hybrid'),
+            (re.compile(r'HYBRID', re.IGNORECASE), 'Hybrid'),
+        ]
+        
+        # Other fuel type patterns
+        self.fuel_patterns = [
+            (re.compile(r'FLEX', re.IGNORECASE), 'Flex Fuel'),
+            (re.compile(r'ELECTRIC', re.IGNORECASE), 'Electric'),
+        ]
+        
+        # Aspiration patterns  
+        self.aspiration_patterns = [
+            (re.compile(r'TURBO', re.IGNORECASE), 'Turbocharged'),
+            (re.compile(r'SUPERCHARGED|SC', re.IGNORECASE), 'Supercharged'),
+        ]
+        
+        logger.debug("EngineSpecParser initialized with regex patterns")
+    
+    def normalize_configuration(self, config: str) -> str:
+        """
+        CRITICAL: Convert L-configuration to I (Inline)
+        
+        L-configurations are alternate notation for Inline engines.
+        W-configurations are W-type engines (VW Group, Bentley, etc.)
+        
+        Args:
+            config: Configuration character (I, V, H, L, W)
+            
+        Returns:
+            Normalized configuration (L becomes I, others unchanged)
+        """
+        config_upper = config.upper()
+        
+        if config_upper == 'L':
+            logger.debug(f"Normalizing L-configuration to I (Inline)")
+            return 'I'
+        
+        return config_upper
+    
+    def extract_fuel_type(self, engine_str: str) -> str:
+        """
+        Extract fuel type from engine string
+        
+        Priority order:
+        1. Hybrid patterns (PHEV, FHEV, HYBRID)
+        2. Other fuel types (FLEX, ELECTRIC)
+        3. Default to Gasoline
+        
+        Args:
+            engine_str: Original engine string
+            
+        Returns:
+            Detected fuel type
+        """
+        # Check hybrid patterns first (most specific)
+        for pattern, fuel_type in self.hybrid_patterns:
+            if pattern.search(engine_str):
+                logger.debug(f"Detected fuel type '{fuel_type}' from '{engine_str}'")
+                return fuel_type
+        
+        # Check other fuel types
+        for pattern, fuel_type in self.fuel_patterns:
+            if pattern.search(engine_str):
+                logger.debug(f"Detected fuel type '{fuel_type}' from '{engine_str}'")
+                return fuel_type
+        
+        # Default to gasoline
+        return 'Gasoline'
+    
+    def extract_aspiration(self, engine_str: str) -> str:
+        """
+        Extract aspiration type from engine string
+        
+        Args:
+            engine_str: Original engine string
+            
+        Returns:
+            Detected aspiration type
+        """
+        for pattern, aspiration in self.aspiration_patterns:
+            if pattern.search(engine_str):
+                logger.debug(f"Detected aspiration '{aspiration}' from '{engine_str}'")
+                return aspiration
+        
+        return 'Natural'  # Default to naturally aspirated
+    
+    def parse_engine_string(self, engine_str: str) -> EngineSpec:
+        """
+        Parse complete engine specification
+        
+        Args:
+            engine_str: Engine specification string
+            
+        Returns:
+            EngineSpec with parsed components
+        """
+        if not engine_str or not engine_str.strip():
+            logger.warning("Empty engine string provided")
+            return self.create_fallback_engine("Empty Engine String")
+        
+        engine_str = engine_str.strip()
+        
+        # Try to match standard engine pattern
+        match = self.engine_pattern.match(engine_str)
+        
+        if not match:
+            logger.warning(f"Could not parse engine string: '{engine_str}'")
+            return self.create_fallback_engine(engine_str)
+        
+        try:
+            # Extract basic components
+            displacement = float(match.group(1))
+            raw_config = match.group(2)
+            cylinders = int(match.group(3))
+            
+            # CRITICAL: Apply L→I normalization
+            config = self.normalize_configuration(raw_config)
+            
+            # Extract fuel type and aspiration from modifiers
+            fuel_type = self.extract_fuel_type(engine_str)
+            aspiration = self.extract_aspiration(engine_str)
+            
+            # Log L→I normalization when it occurs
+            if raw_config.upper() == 'L' and config == 'I':
+                logger.info(f"L→I normalization applied: '{engine_str}' → {displacement}L I{cylinders}")
+            
+            spec = EngineSpec(
+                displacement_l=displacement,
+                configuration=config,
+                cylinders=cylinders,
+                fuel_type=fuel_type,
+                aspiration=aspiration,
+                raw_string=engine_str
+            )
+            
+            logger.debug(f"Parsed '{engine_str}' → {spec}")
+            return spec
+            
+        except (ValueError, IndexError) as e:
+            logger.error(f"Failed to parse matched components from '{engine_str}': {e}")
+            return self.create_fallback_engine(engine_str)
+    
+    def create_fallback_engine(self, raw_string: str) -> EngineSpec:
+        """
+        Create fallback engine spec for unparseable strings
+        
+        Args:
+            raw_string: Original engine string that couldn't be parsed
+            
+        Returns:
+            EngineSpec with unknown values but preserved raw string
+        """
+        logger.debug(f"Creating fallback engine for '{raw_string}'")
+        
+        return EngineSpec(
+            displacement_l=None,
+            configuration="Unknown",
+            cylinders=None,
+            fuel_type="Unknown",
+            aspiration="Natural",
+            raw_string=raw_string
+        )
+    
+    def create_electric_motor(self) -> EngineSpec:
+        """
+        Create default electric motor spec for empty engines arrays
+        
+        Common for Tesla, Lucid, and other electric vehicles that have
+        empty engines arrays in their JSON data.
+        
+        Returns:
+            EngineSpec configured for electric motor
+        """
+        logger.debug("Creating default electric motor spec")
+        
+        return EngineSpec(
+            displacement_l=None,           # N/A for electric
+            configuration="Electric",      # Special designation
+            cylinders=None,               # N/A for electric
+            fuel_type="Electric",
+            aspiration=None,              # N/A for electric
+            raw_string="Electric Motor"
+        )
+    
+    def parse_multiple_engines(self, engine_strings: List[str]) -> List[EngineSpec]:
+        """
+        Parse multiple engine specifications
+        
+        Args:
+            engine_strings: List of engine specification strings
+            
+        Returns:
+            List of parsed EngineSpec objects
+        """
+        if not engine_strings:
+            # Handle empty engines array (common for electric vehicles)
+            logger.info("Empty engines array detected - creating electric motor")
+            return [self.create_electric_motor()]
+        
+        specs = []
+        for engine_str in engine_strings:
+            spec = self.parse_engine_string(engine_str)
+            specs.append(spec)
+        
+        logger.debug(f"Parsed {len(specs)} engines from {len(engine_strings)} strings")
+        return specs
+    
+    def get_unique_engines(self, engine_specs: List[EngineSpec]) -> List[EngineSpec]:
+        """
+        Get unique engines based on key attributes
+        
+        Args:
+            engine_specs: List of engine specifications
+            
+        Returns:
+            List of unique engine specifications
+        """
+        seen = set()
+        unique_specs = []
+        
+        for spec in engine_specs:
+            # Create key based on engine characteristics
+            key = (
+                spec.displacement_l,
+                spec.configuration, 
+                spec.cylinders,
+                spec.fuel_type,
+                spec.aspiration
+            )
+            
+            if key not in seen:
+                seen.add(key)
+                unique_specs.append(spec)
+            else:
+                logger.debug(f"Skipping duplicate engine: {spec}")
+        
+        logger.info(f"Reduced {len(engine_specs)} engines to {len(unique_specs)} unique engines")
+        return unique_specs
+    
+    def validate_engine_spec(self, spec: EngineSpec) -> List[str]:
+        """
+        Validate engine specification for data quality issues
+        
+        Args:
+            spec: Engine specification to validate
+            
+        Returns:
+            List of validation warnings (empty if no issues)
+        """
+        warnings = []
+        
+        # Check displacement
+        if spec.displacement_l is not None:
+            if spec.displacement_l <= 0:
+                warnings.append(f"Invalid displacement: {spec.displacement_l}")
+            elif spec.displacement_l > 20:  # Unrealistic for production cars
+                warnings.append(f"Unusually large displacement: {spec.displacement_l}L")
+        
+        # Check cylinders
+        if spec.cylinders is not None:
+            if spec.cylinders <= 0:
+                warnings.append(f"Invalid cylinder count: {spec.cylinders}")
+            elif spec.cylinders > 16:  # Very rare in production
+                warnings.append(f"Unusually high cylinder count: {spec.cylinders}")
+        
+        # Check configuration consistency
+        if spec.configuration == "Electric" and spec.displacement_l is not None:
+            warnings.append("Electric motor should not have displacement")
+        
+        if spec.configuration not in ["I", "V", "H", "W", "Electric", "Unknown"]:
+            warnings.append(f"Unexpected configuration: {spec.configuration}")
+        
+        # Check fuel type consistency
+        if spec.fuel_type == "Electric" and spec.configuration != "Electric":
+            warnings.append("Electric fuel type should have Electric configuration")
+        
+        return warnings
+
+
+# Example usage and testing functions
+def example_usage():
+    """Demonstrate EngineSpecParser usage"""
+    print("🔧 EngineSpecParser Example Usage")
+    print("=" * 40)
+    
+    parser = EngineSpecParser()
+    
+    # Test cases from actual JSON data
+    test_engines = [
+        # Standard engines
+        "2.0L I4",
+        "3.5L V6",
+        
+        # L→I normalization examples (CRITICAL)
+        "1.5L L3",
+        "1.2L L3 FULL HYBRID EV- (FHEV)",
+        
+        # Subaru Boxer engines
+        "2.4L H4", 
+        
+        # W-configuration engines (VW Group, Bentley)
+        "6.0L W12",
+        "4.0L W8",
+        
+        # Hybrid examples
+        "2.5L I4 FULL HYBRID EV- (FHEV)",
+        "1.5L L3 PLUG-IN HYBRID EV- (PHEV)",
+        
+        # Flex fuel
+        "5.6L V8 FLEX",
+        
+        # Electric
+        "1.8L I4 ELECTRIC",
+    ]
+    
+    for engine_str in test_engines:
+        spec = parser.parse_engine_string(engine_str)
+        
+        print(f"\nInput: \"{engine_str}\"")
+        print(f"  → {spec.displacement_l}L {spec.configuration}{spec.cylinders}")
+        print(f"  → Fuel: {spec.fuel_type}, Aspiration: {spec.aspiration}")
+        
+        # Highlight L→I normalization
+        if 'L' in engine_str and spec.configuration == 'I' and not 'ELECTRIC' in engine_str.upper():
+            print(f"  🎯 L→I NORMALIZED")
+    
+    # Test electric vehicle handling
+    print(f"\n⚡ Electric Vehicle Handling:")
+    electric_spec = parser.create_electric_motor()
+    print(f"  Default: {electric_spec.raw_string}")
+    print(f"  → Config: {electric_spec.configuration}, Fuel: {electric_spec.fuel_type}")
+
+
+if __name__ == "__main__":
+    example_usage()
--- a/mvp-platform-services/vehicles/etl/utils/logging.py
+++ b/mvp-platform-services/vehicles/etl/utils/logging.py
@@ -0,0 +1,28 @@
+import logging
+import sys
+from pathlib import Path
+from datetime import datetime
+
+def setup_logging(log_level: str = "INFO"):
+    """Setup logging configuration"""
+    
+    # Create logs directory if it doesn't exist
+    log_dir = Path("logs")
+    log_dir.mkdir(exist_ok=True)
+    
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, log_level.upper()),
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.StreamHandler(sys.stdout),
+            logging.FileHandler(
+                log_dir / f"etl_{datetime.now().strftime('%Y%m%d')}.log"
+            )
+        ]
+    )
+    
+    # Set specific logger levels
+    logging.getLogger("pymssql").setLevel(logging.WARNING)
+    logging.getLogger("psycopg2").setLevel(logging.WARNING)
+    logging.getLogger("redis").setLevel(logging.WARNING)
--- a/mvp-platform-services/vehicles/etl/utils/make_filter.py
+++ b/mvp-platform-services/vehicles/etl/utils/make_filter.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Make filtering service for ETL pipeline optimization.
+Filters processing to only allowed mainstream makes/brands.
+"""
+import json
+import logging
+from typing import List, Set
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+class MakeFilter:
+    """Service to filter ETL processing by allowed makes/brands"""
+    
+    def __init__(self, config_path: str = '/app/etl/sources/makes.json'):
+        """
+        Initialize make filter
+        
+        Args:
+            config_path: Path to makes.json configuration file
+        """
+        self.config_path = Path(config_path)
+        self.allowed_makes: List[str] = []
+        self.allowed_makes_set: Set[str] = set()
+        self.allowed_makes_sql: str = ""
+        self._load_makes()
+    
+    def _load_makes(self):
+        """Load allowed makes from JSON configuration"""
+        try:
+            # Try multiple paths for container and local development
+            config_paths = [
+                self.config_path,
+                Path('/app/etl/sources/makes.json'),  # Primary container path - administrator controlled
+                Path(__file__).parent.parent / 'sources' / 'makes.json',  # Local development
+                Path(__file__).parent.parent.parent / 'makes.json'  # Fallback to main makes.json if needed
+            ]
+            
+            config_data = None
+            used_path = None
+            
+            for path in config_paths:
+                if path.exists():
+                    with open(path, 'r') as f:
+                        config_data = json.load(f)
+                        used_path = path
+                        break
+            
+            if not config_data:
+                raise FileNotFoundError(f"Could not find makes.json in any of: {config_paths}")
+            
+            self.allowed_makes = config_data.get('manufacturers', [])
+            self.allowed_makes_set = set(self.allowed_makes)
+            self.allowed_makes_sql = self._build_sql_in_clause()
+            
+            logger.info(f"Loaded {len(self.allowed_makes)} allowed makes from {used_path}")
+            logger.debug(f"Allowed makes: {', '.join(sorted(self.allowed_makes[:10]))}{'...' if len(self.allowed_makes) > 10 else ''}")
+            
+        except Exception as e:
+            logger.error(f"Failed to load make configuration: {e}")
+            raise
+    
+    def _build_sql_in_clause(self) -> str:
+        """Build SQL IN clause for make filtering"""
+        if not self.allowed_makes:
+            return "()"
+        
+        # Escape single quotes and build IN clause
+        escaped_names = [name.replace("'", "''") for name in self.allowed_makes]
+        return "(" + ",".join(f"'{name}'" for name in escaped_names) + ")"
+    
+    def is_make_allowed(self, make_name: str) -> bool:
+        """
+        Check if a make is in the allowed list
+        
+        Args:
+            make_name: Name to check
+            
+        Returns:
+            True if make is allowed, False otherwise
+        """
+        return make_name in self.allowed_makes_set
+    
+    def get_allowed_makes(self) -> List[str]:
+        """Get list of allowed makes"""
+        return self.allowed_makes.copy()
+    
+    def get_sql_filter(self, column_name: str = 'Name') -> str:
+        """
+        Get SQL WHERE clause for make filtering
+        
+        Args:
+            column_name: Name of the make column
+            
+        Returns:
+            SQL WHERE clause fragment
+        """
+        return f"{column_name} IN {self.allowed_makes_sql}"
+    
+    def reload_configuration(self):
+        """Reload makes configuration from file"""
+        logger.info("Reloading make configuration...")
+        self._load_makes()
+    
+    def get_filter_stats(self) -> dict:
+        """Get filtering statistics"""
+        return {
+            'total_allowed_makes': len(self.allowed_makes),
+            'config_path': str(self.config_path),
+            'sql_clause_length': len(self.allowed_makes_sql)
+        }
--- a/mvp-platform-services/vehicles/etl/utils/make_name_mapper.py
+++ b/mvp-platform-services/vehicles/etl/utils/make_name_mapper.py
@@ -0,0 +1,317 @@
+"""
+Make Name Mapper Utility
+
+Converts JSON filenames to proper display names for database storage.
+Handles underscore-to-space conversion, title casing, and special capitalization cases.
+
+Critical for converting:
+- alfa_romeo.json → "Alfa Romeo"
+- bmw.json → "BMW" 
+- land_rover.json → "Land Rover"
+
+Usage:
+    mapper = MakeNameMapper()
+    display_name = mapper.normalize_make_name('alfa_romeo.json')  # Returns "Alfa Romeo"
+"""
+
+import json
+import glob
+import os
+import logging
+from typing import Set, Dict, List, Optional
+from dataclasses import dataclass
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ValidationReport:
+    """Make name validation report"""
+    total_files: int
+    valid_mappings: int
+    mismatches: List[Dict[str, str]]
+    
+    @property
+    def success_rate(self) -> float:
+        """Calculate success rate as percentage"""
+        return self.valid_mappings / self.total_files if self.total_files > 0 else 0.0
+
+
+class MakeNameMapper:
+    """Convert JSON filenames to proper make display names"""
+    
+    def __init__(self, sources_dir: Optional[str] = None):
+        """
+        Initialize make name mapper
+        
+        Args:
+            sources_dir: Directory containing sources/makes.json for validation
+        """
+        self.sources_dir = sources_dir or "sources"
+        
+        # Special capitalization cases that don't follow standard title case
+        self.special_cases = {
+            'Bmw': 'BMW',           # Bayerische Motoren Werke
+            'Gmc': 'GMC',           # General Motors Company  
+            'Mini': 'MINI',         # Brand styling requirement
+            'Mclaren': 'McLaren',   # Scottish naming convention
+        }
+        
+        # Load authoritative makes list for validation
+        self.authoritative_makes = self._load_authoritative_makes()
+        
+        logger.debug(f"MakeNameMapper initialized with {len(self.authoritative_makes)} authoritative makes")
+    
+    def _load_authoritative_makes(self) -> Set[str]:
+        """Load authoritative makes list from sources/makes.json"""
+        makes_file = os.path.join(self.sources_dir, 'makes.json')
+        
+        try:
+            if os.path.exists(makes_file):
+                with open(makes_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    makes_set = set(data.get('manufacturers', []))
+                    logger.info(f"Loaded {len(makes_set)} authoritative makes from {makes_file}")
+                    return makes_set
+            else:
+                logger.warning(f"Authoritative makes file not found: {makes_file}")
+                return self._get_fallback_makes()
+        except Exception as e:
+            logger.error(f"Failed to load authoritative makes from {makes_file}: {e}")
+            return self._get_fallback_makes()
+    
+    def _get_fallback_makes(self) -> Set[str]:
+        """Fallback authoritative makes list if file is not available"""
+        return {
+            'Acura', 'Alfa Romeo', 'Aston Martin', 'Audi', 'BMW', 'Bentley',
+            'Buick', 'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Ferrari',
+            'Fiat', 'Ford', 'Genesis', 'Geo', 'GMC', 'Honda', 'Hummer',
+            'Hyundai', 'Infiniti', 'Isuzu', 'Jaguar', 'Jeep', 'Kia',
+            'Lamborghini', 'Land Rover', 'Lexus', 'Lincoln', 'Lotus', 'Lucid',
+            'MINI', 'Maserati', 'Mazda', 'McLaren', 'Mercury', 'Mitsubishi',
+            'Nissan', 'Oldsmobile', 'Plymouth', 'Polestar', 'Pontiac',
+            'Porsche', 'Ram', 'Rivian', 'Rolls Royce', 'Saab', 'Saturn',
+            'Scion', 'Smart', 'Subaru', 'Tesla', 'Toyota', 'Volkswagen',
+            'Volvo'
+        }
+    
+    def normalize_make_name(self, filename: str) -> str:
+        """
+        Convert filename to proper display name
+        
+        Args:
+            filename: JSON filename (e.g., 'alfa_romeo.json')
+            
+        Returns:
+            Normalized display name (e.g., 'Alfa Romeo')
+        """
+        try:
+            # Remove .json extension
+            base_name = filename.replace('.json', '')
+            
+            # Handle edge case of empty string
+            if not base_name:
+                logger.warning(f"Empty base name after removing .json from '{filename}'")
+                return "Unknown"
+            
+            # Replace underscores with spaces
+            spaced_name = base_name.replace('_', ' ')
+            
+            # Apply title case
+            title_cased = spaced_name.title()
+            
+            # Apply special capitalization cases
+            normalized = self.special_cases.get(title_cased, title_cased)
+            
+            logger.debug(f"Normalized '{filename}' → '{normalized}'")
+            return normalized
+            
+        except Exception as e:
+            logger.error(f"Failed to normalize make name '{filename}': {e}")
+            return "Unknown"
+    
+    def validate_mapping(self, filename: str, display_name: str) -> bool:
+        """
+        Validate mapped name against authoritative list
+        
+        Args:
+            filename: Original JSON filename
+            display_name: Normalized display name
+            
+        Returns:
+            True if display name is in authoritative list
+        """
+        is_valid = display_name in self.authoritative_makes
+        
+        if not is_valid:
+            logger.warning(f"Make '{display_name}' from '{filename}' not found in authoritative list")
+        
+        return is_valid
+    
+    def get_all_mappings(self, json_files_dir: str) -> Dict[str, str]:
+        """
+        Get complete filename → display name mapping for all JSON files
+        
+        Args:
+            json_files_dir: Directory containing make JSON files
+            
+        Returns:
+            Dictionary mapping filenames to display names
+        """
+        mappings = {}
+        
+        try:
+            pattern = os.path.join(json_files_dir, '*.json')
+            json_files = glob.glob(pattern)
+            
+            logger.info(f"Found {len(json_files)} JSON files in {json_files_dir}")
+            
+            for file_path in json_files:
+                filename = os.path.basename(file_path)
+                display_name = self.normalize_make_name(filename)
+                mappings[filename] = display_name
+            
+            return mappings
+            
+        except Exception as e:
+            logger.error(f"Failed to get all mappings from {json_files_dir}: {e}")
+            return {}
+    
+    def validate_all_mappings(self, json_files_dir: str) -> ValidationReport:
+        """
+        Validate all mappings against authoritative list
+        
+        Args:
+            json_files_dir: Directory containing make JSON files
+            
+        Returns:
+            ValidationReport with results
+        """
+        mappings = self.get_all_mappings(json_files_dir)
+        mismatches = []
+        
+        for filename, display_name in mappings.items():
+            if not self.validate_mapping(filename, display_name):
+                mismatches.append({
+                    'filename': filename,
+                    'mapped_name': display_name,
+                    'status': 'NOT_FOUND_IN_AUTHORITATIVE'
+                })
+        
+        report = ValidationReport(
+            total_files=len(mappings),
+            valid_mappings=len(mappings) - len(mismatches),
+            mismatches=mismatches
+        )
+        
+        logger.info(f"Validation complete: {report.valid_mappings}/{report.total_files} valid ({report.success_rate:.1%})")
+        
+        return report
+    
+    def get_filename_for_display_name(self, display_name: str) -> Optional[str]:
+        """
+        Reverse lookup: get JSON filename for a display name
+        
+        Args:
+            display_name: Make display name (e.g., 'Alfa Romeo')
+            
+        Returns:
+            JSON filename (e.g., 'alfa_romeo.json') or None if not found
+        """
+        # Convert display name back to filename format
+        # Handle special cases in reverse
+        reverse_special_cases = {v: k for k, v in self.special_cases.items()}
+        
+        if display_name in reverse_special_cases:
+            # Special case: BMW → Bmw, etc.
+            base_name = reverse_special_cases[display_name].lower()
+        else:
+            # Standard case: convert to lowercase, spaces to underscores
+            base_name = display_name.lower().replace(' ', '_')
+        
+        filename = f"{base_name}.json"
+        
+        logger.debug(f"Reverse lookup: '{display_name}' → '{filename}'")
+        return filename
+    
+    def print_validation_report(self, report: ValidationReport) -> None:
+        """
+        Print formatted validation report
+        
+        Args:
+            report: ValidationReport to display
+        """
+        print(f"📋 Make Name Validation Report")
+        print(f"=" * 35)
+        print(f"Total files: {report.total_files}")
+        print(f"Valid mappings: {report.valid_mappings}")
+        print(f"Success rate: {report.success_rate:.1%}")
+        
+        if report.mismatches:
+            print(f"\n⚠️  Mismatches ({len(report.mismatches)}):")
+            for mismatch in report.mismatches:
+                print(f"  {mismatch['filename']} → {mismatch['mapped_name']}")
+                print(f"    Status: {mismatch['status']}")
+        else:
+            print(f"\n🎉 All mappings are valid!")
+    
+    def get_make_statistics(self, json_files_dir: str) -> Dict[str, int]:
+        """
+        Get statistics about make name transformations
+        
+        Args:
+            json_files_dir: Directory containing make JSON files
+            
+        Returns:
+            Dictionary with transformation statistics
+        """
+        mappings = self.get_all_mappings(json_files_dir)
+        
+        single_words = 0
+        multi_words = 0
+        special_cases = 0
+        
+        for filename, display_name in mappings.items():
+            if display_name in self.special_cases.values():
+                special_cases += 1
+            elif ' ' in display_name:
+                multi_words += 1
+            else:
+                single_words += 1
+        
+        return {
+            'total': len(mappings),
+            'single_words': single_words,
+            'multi_words': multi_words,
+            'special_cases': special_cases
+        }
+
+
+# Example usage and testing functions
+def example_usage():
+    """Demonstrate MakeNameMapper usage"""
+    print("🏷️  MakeNameMapper Example Usage")
+    print("=" * 35)
+    
+    mapper = MakeNameMapper()
+    
+    # Test individual conversions
+    test_files = [
+        'toyota.json',
+        'alfa_romeo.json', 
+        'bmw.json',
+        'land_rover.json',
+        'mclaren.json'
+    ]
+    
+    for filename in test_files:
+        display_name = mapper.normalize_make_name(filename)
+        is_valid = mapper.validate_mapping(filename, display_name)
+        status = "✅" if is_valid else "⚠️"
+        
+        print(f"{status} {filename:20} → {display_name}")
+
+
+if __name__ == "__main__":
+    example_usage()