Initial Commit

2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions
--- a/mvp-platform-services/vehicles/etl/loaders/init.py
+++ b/mvp-platform-services/vehicles/etl/loaders/init.py
@@ -0,0 +1 @@
+# ETL Loaders
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/init.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/init.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/init.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/init.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/json_manual_loader.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/json_manual_loader.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/json_manual_loader.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/json_manual_loader.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/mssql_loader.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/mssql_loader.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/postgres_loader.cpython-311.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/postgres_loader.cpython-311.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/pycache/postgres_loader.cpython-312.pyc
+++ b/mvp-platform-services/vehicles/etl/loaders/pycache/postgres_loader.cpython-312.pyc
--- a/mvp-platform-services/vehicles/etl/loaders/json_manual_loader.py
+++ b/mvp-platform-services/vehicles/etl/loaders/json_manual_loader.py
@@ -0,0 +1,716 @@
+"""
+JSON Manual Loader for Vehicles ETL
+
+Loads extracted JSON data into PostgreSQL database with referential integrity.
+Supports clear/append modes with duplicate handling and comprehensive progress tracking.
+
+Database Schema:
+- vehicles.make (id, name)
+- vehicles.model (id, make_id, name)
+- vehicles.model_year (id, model_id, year)
+- vehicles.trim (id, model_year_id, name)
+- vehicles.engine (id, name, code, displacement_l, cylinders, fuel_type, aspiration)
+- vehicles.trim_engine (trim_id, engine_id)
+
+Load Modes:
+- CLEAR: Truncate all tables and reload (destructive)
+- APPEND: Insert with conflict resolution (safe)
+
+Usage:
+    loader = JsonManualLoader(postgres_loader)
+    result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
+"""
+
+import logging
+from typing import List, Dict, Optional, Tuple
+from enum import Enum
+from dataclasses import dataclass
+from psycopg2.extras import execute_batch
+
+# Import our components (handle both relative and direct imports)
+try:
+    from .postgres_loader import PostgreSQLLoader
+    from ..extractors.json_extractor import MakeData, ModelData, ExtractionResult
+    from ..utils.engine_spec_parser import EngineSpec
+    from ..connections import db_connections
+except ImportError:
+    # Fallback for direct execution
+    import sys
+    import os
+    sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+    
+    # Import with fallback handling for nested imports
+    try:
+        from loaders.postgres_loader import PostgreSQLLoader
+    except ImportError:
+        # Mock PostgreSQLLoader for testing
+        class PostgreSQLLoader:
+            def __init__(self):
+                self.batch_size = 1000
+    
+    from extractors.json_extractor import MakeData, ModelData, ExtractionResult
+    from utils.engine_spec_parser import EngineSpec
+    
+    try:
+        from connections import db_connections
+    except ImportError:
+        # Mock db_connections for testing
+        class MockDBConnections:
+            def postgres_connection(self):
+                raise NotImplementedError("Database connection not available in test mode")
+        db_connections = MockDBConnections()
+
+logger = logging.getLogger(__name__)
+
+
+class LoadMode(Enum):
+    """Data loading modes"""
+    CLEAR = "clear"    # Truncate and reload (destructive)
+    APPEND = "append"  # Insert with conflict handling (safe)
+
+
+@dataclass
+class LoadResult:
+    """Result of loading operations"""
+    total_makes: int
+    total_models: int
+    total_model_years: int
+    total_trims: int
+    total_engines: int
+    total_trim_engine_mappings: int
+    failed_makes: List[str]
+    warnings: List[str]
+    load_mode: LoadMode
+    
+    @property
+    def success_count(self) -> int:
+        return self.total_makes - len(self.failed_makes)
+    
+    @property
+    def success_rate(self) -> float:
+        return self.success_count / self.total_makes if self.total_makes > 0 else 0.0
+
+
+@dataclass
+class LoadStatistics:
+    """Detailed loading statistics"""
+    makes_processed: int = 0
+    makes_skipped: int = 0
+    models_inserted: int = 0
+    model_years_inserted: int = 0
+    skipped_model_years: int = 0
+    trims_inserted: int = 0
+    engines_inserted: int = 0
+    trim_engine_mappings_inserted: int = 0
+    duplicate_makes: int = 0
+    duplicate_models: int = 0
+    duplicate_engines: int = 0
+    errors: List[str] = None
+    warnings: List[str] = None
+    
+    def __post_init__(self):
+        if self.errors is None:
+            self.errors = []
+        if self.warnings is None:
+            self.warnings = []
+
+
+class JsonManualLoader:
+    """Load JSON-extracted vehicle data into PostgreSQL"""
+    
+    def _get_id_from_result(self, result, column_name='id'):
+        """Helper to extract ID from query result, handling both tuple and dict cursors"""
+        if result is None:
+            return None
+        if isinstance(result, tuple):
+            return result[0]
+        # For RealDictCursor, try the column name first, fall back to key access
+        if column_name in result:
+            return result[column_name]
+        # For COUNT(*) queries, the key might be 'count'
+        if 'count' in result:
+            return result['count']
+        # Fall back to first value
+        return list(result.values())[0] if result else None
+    
+    def __init__(self, postgres_loader: Optional[PostgreSQLLoader] = None):
+        """
+        Initialize JSON manual loader
+        
+        Args:
+            postgres_loader: Existing PostgreSQL loader instance
+        """
+        self.postgres_loader = postgres_loader or PostgreSQLLoader()
+        self.batch_size = 1000
+        
+        logger.info("JsonManualLoader initialized")
+    
+    def clear_all_tables(self) -> None:
+        """
+        Clear all vehicles tables in dependency order
+        
+        WARNING: This is destructive and will remove all data
+        """
+        logger.warning("CLEARING ALL VEHICLES TABLES - This is destructive!")
+        
+        tables_to_clear = [
+            'trim_engine',      # Many-to-many mappings first
+            'trim_transmission',
+            'performance',      # Tables with foreign keys
+            'trim',
+            'model_year',
+            'model', 
+            'make',
+            'engine',           # Independent tables last
+            'transmission'
+        ]
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            for table in tables_to_clear:
+                try:
+                    cursor.execute(f"TRUNCATE TABLE vehicles.{table} CASCADE")
+                    logger.info(f"Cleared vehicles.{table}")
+                except Exception as e:
+                    logger.warning(f"Failed to clear vehicles.{table}: {str(e)}")
+            
+            conn.commit()
+        
+        logger.info("All vehicles tables cleared")
+    
+    def load_make(self, make_data: MakeData, mode: LoadMode, stats: LoadStatistics) -> int:
+        """
+        Load a single make with all related data
+        
+        Args:
+            make_data: Extracted make data
+            mode: Loading mode (clear/append)
+            stats: Statistics accumulator
+            
+        Returns:
+            Make ID in database
+        """
+        logger.debug(f"Loading make: {make_data.name}")
+        
+        try:
+            with db_connections.postgres_connection() as conn:
+                cursor = conn.cursor()
+                
+                # 1. Insert or get make (always check for existing to avoid constraint violations)
+                # Check if make exists (case-insensitive to match database constraint)
+                cursor.execute(
+                    "SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
+                    (make_data.name,)
+                )
+                result = cursor.fetchone()
+                
+                if result:
+                    make_id = self._get_id_from_result(result)
+                    stats.duplicate_makes += 1
+                    logger.debug(f"Make {make_data.name} already exists with ID {make_id}")
+                else:
+                    # Insert new make with error handling for constraint violations
+                    try:
+                        cursor.execute(
+                            "INSERT INTO vehicles.make (name) VALUES (%s) RETURNING id",
+                            (make_data.name,)
+                        )
+                        result = cursor.fetchone()
+                        make_id = self._get_id_from_result(result)
+                        logger.debug(f"Inserted make {make_data.name} with ID {make_id}")
+                    except Exception as e:
+                        if "duplicate key value violates unique constraint" in str(e):
+                            # Retry the lookup in case of race condition
+                            cursor.execute(
+                                "SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
+                                (make_data.name,)
+                            )
+                            result = cursor.fetchone()
+                            if result:
+                                make_id = self._get_id_from_result(result)
+                                stats.duplicate_makes += 1
+                                logger.debug(f"Make {make_data.name} found after retry with ID {make_id}")
+                            else:
+                                raise
+                        else:
+                            raise
+                
+                # 2. Process models
+                for model_data in make_data.models:
+                    model_id = self.load_model(cursor, make_id, model_data, mode, stats)
+                
+                conn.commit()
+                stats.makes_processed += 1
+                
+                return make_id
+                
+        except Exception as e:
+            error_msg = f"Failed to load make {make_data.name}: {str(e)}"
+            logger.error(error_msg)
+            stats.errors.append(error_msg)
+            raise
+    
+    def load_model(self, cursor, make_id: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
+        """
+        Load a single model with all related data
+        
+        Args:
+            cursor: Database cursor
+            make_id: Parent make ID
+            model_data: Extracted model data
+            mode: Loading mode
+            stats: Statistics accumulator
+            
+        Returns:
+            Model ID in database
+        """
+        # 1. Insert or get model
+        if mode == LoadMode.APPEND:
+            cursor.execute(
+                "SELECT id FROM vehicles.model WHERE make_id = %s AND name = %s",
+                (make_id, model_data.name)
+            )
+            result = cursor.fetchone()
+            
+            if result:
+                model_id = result[0] if isinstance(result, tuple) else result['id']
+                stats.duplicate_models += 1
+            else:
+                cursor.execute(
+                    "INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
+                    (make_id, model_data.name)
+                )
+                model_id = self._get_id_from_result(cursor.fetchone())
+                stats.models_inserted += 1
+        else:
+            # CLEAR mode - just insert
+            cursor.execute(
+                "INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
+                (make_id, model_data.name)
+            )
+            model_id = self._get_id_from_result(cursor.fetchone())
+            stats.models_inserted += 1
+        
+        # 2. Insert model years and related data
+        for year in model_data.years:
+            model_year_id = self.load_model_year(cursor, model_id, year, model_data, mode, stats)
+            # Skip processing if year was outside valid range
+            if model_year_id is None:
+                continue
+        
+        return model_id
+    
+    def load_model_year(self, cursor, model_id: int, year: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
+        """
+        Load model year and associated trims/engines
+        
+        Args:
+            cursor: Database cursor
+            model_id: Parent model ID
+            year: Model year
+            model_data: Model data with trims and engines
+            mode: Loading mode
+            stats: Statistics accumulator
+            
+        Returns:
+            Model year ID in database
+        """
+        # Skip years that don't meet database constraints (must be 1950-2100)
+        if year < 1950 or year > 2100:
+            logger.warning(f"Skipping year {year} - outside valid range (1950-2100)")
+            stats.skipped_model_years += 1
+            return None
+        
+        # 1. Insert or get model year
+        if mode == LoadMode.APPEND:
+            cursor.execute(
+                "SELECT id FROM vehicles.model_year WHERE model_id = %s AND year = %s",
+                (model_id, year)
+            )
+            result = cursor.fetchone()
+            
+            if result:
+                model_year_id = result[0] if isinstance(result, tuple) else result['id']
+            else:
+                cursor.execute(
+                    "INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
+                    (model_id, year)
+                )
+                model_year_id = self._get_id_from_result(cursor.fetchone())
+                stats.model_years_inserted += 1
+        else:
+            # CLEAR mode - just insert
+            cursor.execute(
+                "INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
+                (model_id, year)
+            )
+            model_year_id = self._get_id_from_result(cursor.fetchone())
+            stats.model_years_inserted += 1
+        
+        # 2. Load engines and get their IDs
+        engine_ids = []
+        for engine_spec in model_data.engines:
+            engine_id = self.load_engine(cursor, engine_spec, mode, stats)
+            engine_ids.append(engine_id)
+        
+        # 3. Load trims and connect to engines
+        for trim_name in model_data.trims:
+            trim_id = self.load_trim(cursor, model_year_id, trim_name, engine_ids, mode, stats)
+        
+        return model_year_id
+    
+    def load_engine(self, cursor, engine_spec: EngineSpec, mode: LoadMode, stats: LoadStatistics) -> int:
+        """
+        Load engine specification
+        
+        Args:
+            cursor: Database cursor
+            engine_spec: Parsed engine specification
+            mode: Loading mode
+            stats: Statistics accumulator
+            
+        Returns:
+            Engine ID in database
+        """
+        # Create a canonical engine name for database storage
+        if engine_spec.displacement_l and engine_spec.configuration != "Unknown" and engine_spec.cylinders:
+            engine_name = f"{engine_spec.displacement_l}L {engine_spec.configuration}{engine_spec.cylinders}"
+        else:
+            engine_name = engine_spec.raw_string
+        
+        # Generate engine code from name (remove spaces, lowercase)
+        engine_code = engine_name.replace(" ", "").lower()
+        
+        # Always check for existing engine by name or code to avoid constraint violations
+        cursor.execute("""
+            SELECT id FROM vehicles.engine 
+            WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
+        """, (engine_name, engine_code))
+        result = cursor.fetchone()
+        
+        if result:
+            engine_id = self._get_id_from_result(result)
+            stats.duplicate_engines += 1
+            return engine_id
+        
+        # Insert new engine
+        try:
+            cursor.execute("""
+                INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)
+                VALUES (%s, %s, %s, %s, %s, %s)
+                RETURNING id
+            """, (
+                engine_name,
+                engine_code,
+                engine_spec.displacement_l,
+                engine_spec.cylinders,
+                engine_spec.fuel_type if engine_spec.fuel_type != "Unknown" else None,
+                engine_spec.aspiration if engine_spec.aspiration != "Natural" else None
+            ))
+            
+            engine_id = self._get_id_from_result(cursor.fetchone())
+            stats.engines_inserted += 1
+            
+            return engine_id
+        except Exception as e:
+            if "duplicate key value violates unique constraint" in str(e):
+                # Retry the lookup in case of race condition
+                cursor.execute("""
+                    SELECT id FROM vehicles.engine 
+                    WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
+                """, (engine_name, engine_code))
+                result = cursor.fetchone()
+                if result:
+                    engine_id = self._get_id_from_result(result)
+                    stats.duplicate_engines += 1
+                    return engine_id
+            raise
+    
+    def load_trim(self, cursor, model_year_id: int, trim_name: str, engine_ids: List[int], mode: LoadMode, stats: LoadStatistics) -> int:
+        """
+        Load trim and connect to engines
+        
+        Args:
+            cursor: Database cursor
+            model_year_id: Parent model year ID
+            trim_name: Trim name
+            engine_ids: List of engine IDs to connect
+            mode: Loading mode
+            stats: Statistics accumulator
+            
+        Returns:
+            Trim ID in database
+        """
+        # 1. Insert or get trim
+        if mode == LoadMode.APPEND:
+            cursor.execute(
+                "SELECT id FROM vehicles.trim WHERE model_year_id = %s AND name = %s",
+                (model_year_id, trim_name)
+            )
+            result = cursor.fetchone()
+            
+            if result:
+                trim_id = result[0] if isinstance(result, tuple) else result['id']
+            else:
+                cursor.execute(
+                    "INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
+                    (model_year_id, trim_name)
+                )
+                trim_id = self._get_id_from_result(cursor.fetchone())
+                stats.trims_inserted += 1
+        else:
+            # CLEAR mode - just insert
+            cursor.execute(
+                "INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
+                (model_year_id, trim_name)
+            )
+            trim_id = self._get_id_from_result(cursor.fetchone())
+            stats.trims_inserted += 1
+        
+        # 2. Connect trim to engines (always check for existing to avoid duplicates)
+        # Deduplicate engine_ids to prevent duplicate mappings within the same trim
+        unique_engine_ids = list(set(engine_ids))
+        for engine_id in unique_engine_ids:
+            # Check if mapping already exists
+            cursor.execute(
+                "SELECT 1 FROM vehicles.trim_engine WHERE trim_id = %s AND engine_id = %s",
+                (trim_id, engine_id)
+            )
+            
+            if not cursor.fetchone():
+                try:
+                    cursor.execute(
+                        "INSERT INTO vehicles.trim_engine (trim_id, engine_id) VALUES (%s, %s)",
+                        (trim_id, engine_id)
+                    )
+                    stats.trim_engine_mappings_inserted += 1
+                except Exception as e:
+                    if "duplicate key value violates unique constraint" in str(e):
+                        # Another process may have inserted it, skip
+                        logger.debug(f"Trim-engine mapping ({trim_id}, {engine_id}) already exists, skipping")
+                    else:
+                        raise
+        
+        return trim_id
+    
+    def load_all_makes(self, makes_data: List[MakeData], mode: LoadMode) -> LoadResult:
+        """
+        Load all makes with complete data
+        
+        Args:
+            makes_data: List of extracted make data
+            mode: Loading mode (clear/append)
+            
+        Returns:
+            LoadResult with comprehensive statistics
+        """
+        logger.info(f"Starting bulk load of {len(makes_data)} makes in {mode.value} mode")
+        
+        # Clear tables if in CLEAR mode
+        if mode == LoadMode.CLEAR:
+            self.clear_all_tables()
+        
+        stats = LoadStatistics()
+        failed_makes = []
+        
+        for make_data in makes_data:
+            try:
+                if make_data.processing_errors:
+                    logger.warning(f"Skipping make {make_data.name} due to extraction errors")
+                    stats.makes_skipped += 1
+                    failed_makes.append(make_data.name)
+                    continue
+                
+                make_id = self.load_make(make_data, mode, stats)
+                logger.info(f"Successfully loaded make {make_data.name} (ID: {make_id})")
+                
+            except Exception as e:
+                logger.error(f"Failed to load make {make_data.name}: {str(e)}")
+                failed_makes.append(make_data.name)
+                continue
+        
+        # Create result
+        result = LoadResult(
+            total_makes=len(makes_data),
+            total_models=stats.models_inserted,
+            total_model_years=stats.model_years_inserted,
+            total_trims=stats.trims_inserted,
+            total_engines=stats.engines_inserted,
+            total_trim_engine_mappings=stats.trim_engine_mappings_inserted,
+            failed_makes=failed_makes,
+            warnings=stats.warnings,
+            load_mode=mode
+        )
+        
+        logger.info(f"Bulk load complete: {result.success_count}/{result.total_makes} makes loaded successfully")
+        logger.info(f"Data loaded: {result.total_models} models, {result.total_engines} engines, {result.total_trims} trims")
+        
+        return result
+    
+    def get_database_statistics(self) -> Dict[str, int]:
+        """
+        Get current database record counts
+        
+        Returns:
+            Dictionary with table counts
+        """
+        stats = {}
+        
+        tables = ['make', 'model', 'model_year', 'trim', 'engine', 'trim_engine']
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            for table in tables:
+                cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table}")
+                result = cursor.fetchone()
+                stats[table] = result[0] if isinstance(result, tuple) else result['count']
+        
+        return stats
+    
+    def validate_referential_integrity(self) -> List[str]:
+        """
+        Validate referential integrity of loaded data
+        
+        Returns:
+            List of integrity issues found (empty if all good)
+        """
+        issues = []
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Check for orphaned models
+            cursor.execute("""
+                SELECT COUNT(*) FROM vehicles.model m
+                LEFT JOIN vehicles.make mk ON m.make_id = mk.id
+                WHERE mk.id IS NULL
+            """)
+            orphaned_models = self._get_id_from_result(cursor.fetchone(), 'count')
+            if orphaned_models > 0:
+                issues.append(f"Found {orphaned_models} orphaned models")
+            
+            # Check for orphaned model_years
+            cursor.execute("""
+                SELECT COUNT(*) FROM vehicles.model_year my
+                LEFT JOIN vehicles.model m ON my.model_id = m.id
+                WHERE m.id IS NULL
+            """)
+            orphaned_model_years = self._get_id_from_result(cursor.fetchone())
+            if orphaned_model_years > 0:
+                issues.append(f"Found {orphaned_model_years} orphaned model_years")
+            
+            # Check for orphaned trims
+            cursor.execute("""
+                SELECT COUNT(*) FROM vehicles.trim t
+                LEFT JOIN vehicles.model_year my ON t.model_year_id = my.id
+                WHERE my.id IS NULL
+            """)
+            orphaned_trims = self._get_id_from_result(cursor.fetchone())
+            if orphaned_trims > 0:
+                issues.append(f"Found {orphaned_trims} orphaned trims")
+            
+            # Check for broken trim_engine mappings
+            cursor.execute("""
+                SELECT COUNT(*) FROM vehicles.trim_engine te
+                LEFT JOIN vehicles.trim t ON te.trim_id = t.id
+                LEFT JOIN vehicles.engine e ON te.engine_id = e.id
+                WHERE t.id IS NULL OR e.id IS NULL
+            """)
+            broken_mappings = self._get_id_from_result(cursor.fetchone())
+            if broken_mappings > 0:
+                issues.append(f"Found {broken_mappings} broken trim_engine mappings")
+        
+        if issues:
+            logger.warning(f"Referential integrity issues found: {issues}")
+        else:
+            logger.info("Referential integrity validation passed")
+        
+        return issues
+    
+    def print_load_report(self, result: LoadResult) -> None:
+        """
+        Print comprehensive loading report
+        
+        Args:
+            result: LoadResult from load operation
+        """
+        print(f"🚀 JSON MANUAL LOADING REPORT")
+        print(f"=" * 50)
+        
+        # Load summary
+        print(f"\n📊 LOADING SUMMARY")
+        print(f"   Mode: {result.load_mode.value.upper()}")
+        print(f"   Makes processed: {result.success_count}/{result.total_makes}")
+        print(f"   Success rate: {result.success_rate:.1%}")
+        
+        # Data counts
+        print(f"\n📈 DATA LOADED")
+        print(f"   Models: {result.total_models}")
+        print(f"   Model years: {result.total_model_years}")
+        print(f"   Trims: {result.total_trims}")
+        print(f"   Engines: {result.total_engines}")
+        print(f"   Trim-engine mappings: {result.total_trim_engine_mappings}")
+        
+        # Issues
+        if result.failed_makes:
+            print(f"\n⚠️  FAILED MAKES ({len(result.failed_makes)}):")
+            for make in result.failed_makes:
+                print(f"   {make}")
+        
+        if result.warnings:
+            print(f"\n⚠️  WARNINGS ({len(result.warnings)}):")
+            for warning in result.warnings[:5]:  # Show first 5
+                print(f"   {warning}")
+            if len(result.warnings) > 5:
+                print(f"   ... and {len(result.warnings) - 5} more warnings")
+        
+        # Database statistics
+        print(f"\n📋 DATABASE STATISTICS:")
+        db_stats = self.get_database_statistics()
+        for table, count in db_stats.items():
+            print(f"   vehicles.{table}: {count:,} records")
+        
+        # Referential integrity
+        integrity_issues = self.validate_referential_integrity()
+        if integrity_issues:
+            print(f"\n❌ REFERENTIAL INTEGRITY ISSUES:")
+            for issue in integrity_issues:
+                print(f"   {issue}")
+        else:
+            print(f"\n✅ REFERENTIAL INTEGRITY: PASSED")
+
+
+# Example usage and testing functions
+def example_usage():
+    """Demonstrate JsonManualLoader usage"""
+    print("🚀 JsonManualLoader Example Usage")
+    print("=" * 40)
+    
+    # This would typically be called after JsonExtractor
+    # For demo purposes, we'll just show the structure
+    
+    print("\n📋 Typical usage flow:")
+    print("1. Extract data with JsonExtractor")
+    print("2. Create JsonManualLoader")
+    print("3. Load data in APPEND or CLEAR mode")
+    print("4. Validate and report results")
+    
+    print(f"\n💡 Example code:")
+    print("""
+    # Extract data
+    extractor = JsonExtractor(make_mapper, engine_parser)
+    extraction_result = extractor.extract_all_makes('sources/makes')
+    
+    # Load data
+    loader = JsonManualLoader()
+    load_result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
+    
+    # Report results
+    loader.print_load_report(load_result)
+    """)
+
+
+if __name__ == "__main__":
+    example_usage()
--- a/mvp-platform-services/vehicles/etl/loaders/mssql_loader.py
+++ b/mvp-platform-services/vehicles/etl/loaders/mssql_loader.py
@@ -0,0 +1,437 @@
+#!/usr/bin/env python3
+"""
+MSSQL Database Loader
+Handles loading .bak files into MSSQL Server for ETL processing
+"""
+import os
+import logging
+import pyodbc
+import time
+from pathlib import Path
+from typing import Optional, List
+from ..config import config
+
+logger = logging.getLogger(__name__)
+
+class MSSQLLoader:
+    """Loads database files into MSSQL Server"""
+    
+    def __init__(self):
+        self.server = config.MSSQL_HOST
+        self.port = config.MSSQL_PORT
+        self.database = config.MSSQL_DATABASE
+        self.username = config.MSSQL_USER
+        self.password = config.MSSQL_PASSWORD
+        
+    def get_connection_string(self, database: str = "master") -> str:
+        """Get MSSQL connection string"""
+        return (
+            f"DRIVER={{ODBC Driver 17 for SQL Server}};"
+            f"SERVER={self.server},{self.port};"
+            f"DATABASE={database};"
+            f"UID={self.username};"
+            f"PWD={self.password};"
+            f"TrustServerCertificate=yes;"
+        )
+    
+    def test_connection(self) -> bool:
+        """Test MSSQL connection"""
+        try:
+            conn_str = self.get_connection_string()
+            logger.info(f"Testing MSSQL connection to: {self.server}")
+            
+            with pyodbc.connect(conn_str, timeout=30) as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT @@VERSION")
+                version = cursor.fetchone()[0]
+                logger.info(f"MSSQL connection successful: {version[:100]}...")
+                return True
+                
+        except Exception as e:
+            logger.error(f"MSSQL connection failed: {e}")
+            return False
+    
+    def database_exists(self, database_name: str) -> bool:
+        """Check if database exists"""
+        try:
+            conn_str = self.get_connection_string()
+            with pyodbc.connect(conn_str, timeout=30) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    "SELECT COUNT(*) FROM sys.databases WHERE name = ?",
+                    (database_name,)
+                )
+                count = cursor.fetchone()[0]
+                return count > 0
+                
+        except Exception as e:
+            logger.error(f"Failed to check if database exists: {e}")
+            return False
+
+    def get_database_state(self, database_name: str) -> Optional[str]:
+        """Return the state_desc for a database or None if not found"""
+        try:
+            conn_str = self.get_connection_string()
+            with pyodbc.connect(conn_str, timeout=30) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    "SELECT state_desc FROM sys.databases WHERE name = ?",
+                    (database_name,)
+                )
+                row = cursor.fetchone()
+                return row[0] if row else None
+        except Exception as e:
+            logger.error(f"Failed to get database state: {e}")
+            return None
+    
+    def drop_database(self, database_name: str) -> bool:
+        """Drop database if it exists"""
+        try:
+            if not self.database_exists(database_name):
+                logger.info(f"Database {database_name} does not exist, skipping drop")
+                return True
+            
+            logger.info(f"Dropping database: {database_name}")
+            conn_str = self.get_connection_string()
+            
+            with pyodbc.connect(conn_str, timeout=30) as conn:
+                conn.autocommit = True
+                cursor = conn.cursor()
+                
+                # Kill existing connections
+                cursor.execute(f"""
+                    ALTER DATABASE [{database_name}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
+                    DROP DATABASE [{database_name}];
+                """)
+                
+                logger.info(f"Successfully dropped database: {database_name}")
+                return True
+                
+        except Exception as e:
+            logger.error(f"Failed to drop database {database_name}: {e}")
+            return False
+    
+    def get_backup_file_info(self, bak_path: Path) -> Optional[dict]:
+        """Get information about backup file"""
+        try:
+            # Use the MSSQL container's mounted backup directory
+            container_path = f"/backups/{bak_path.name}"
+            
+            # For now, assume the file is accessible
+            # In production, this would copy the file into the MSSQL container
+            
+            conn_str = self.get_connection_string()
+            with pyodbc.connect(conn_str, timeout=30) as conn:
+                cursor = conn.cursor()
+                
+                # Get backup file information
+                cursor.execute(f"RESTORE HEADERONLY FROM DISK = '{container_path}'")
+                headers = cursor.fetchall()
+                
+                if headers:
+                    header = headers[0]
+                    return {
+                        "database_name": header.DatabaseName,
+                        "server_name": header.ServerName,
+                        "backup_start_date": header.BackupStartDate,
+                        "backup_finish_date": header.BackupFinishDate,
+                        "backup_size": header.BackupSize,
+                    }
+                    
+        except Exception as e:
+            logger.warning(f"Could not get backup file info: {e}")
+            
+        return None
+    
+    def restore_database(self, bak_path: Path, target_database: str = None) -> bool:
+        """
+        Restore database from .bak file
+        
+        Args:
+            bak_path: Path to .bak file
+            target_database: Target database name (defaults to VPICList)
+            
+        Returns:
+            True if successful
+        """
+        if target_database is None:
+            target_database = self.database
+            
+        if not bak_path.exists():
+            logger.error(f"Backup file does not exist: {bak_path}")
+            return False
+        
+        logger.info(f"Starting database restore: {bak_path} -> {target_database}")
+        
+        try:
+            # Copy backup file to MSSQL container
+            container_bak_path = self.copy_backup_to_container(bak_path)
+            
+            if not container_bak_path:
+                logger.error("Failed to copy backup file to container")
+                return False
+            
+            # If database exists, note the state; we will handle exclusivity in the same session below
+            if self.database_exists(target_database):
+                state = self.get_database_state(target_database)
+                logger.info(f"Existing database detected: {target_database} (state={state})")
+            else:
+                logger.info(f"Target database does not exist yet: {target_database} — proceeding with restore")
+            
+            # Restore database using a single master connection for exclusivity
+            logger.info(f"Restoring database from: {container_bak_path}")
+
+            conn_str = self.get_connection_string()
+            with pyodbc.connect(conn_str, timeout=600) as conn:  # 10 minute timeout
+                conn.autocommit = True
+                cursor = conn.cursor()
+
+                # If DB exists, ensure exclusive access: kill sessions + SINGLE_USER in this session
+                if self.database_exists(target_database):
+                    try:
+                        logger.info(f"Preparing exclusive access for restore: killing active sessions on {target_database}")
+                        kill_sql = f"""
+                        DECLARE @db sysname = N'{target_database}';
+                        DECLARE @kill nvarchar(max) = N'';
+                        SELECT @kill = @kill + N'KILL ' + CONVERT(nvarchar(10), session_id) + N';'
+                        FROM sys.dm_exec_sessions 
+                        WHERE database_id = DB_ID(@db) AND session_id <> @@SPID;
+                        IF LEN(@kill) > 0 EXEC (@kill);
+                        """
+                        cursor.execute(kill_sql)
+                        # Force SINGLE_USER in current session
+                        cursor.execute(f"ALTER DATABASE [{target_database}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE;")
+                        logger.info(f"Exclusive access prepared (SINGLE_USER) for {target_database}")
+                    except Exception as e:
+                        logger.warning(f"Could not fully prepare exclusive access: {e}")
+
+                # Get logical file names from backup
+                cursor.execute(f"RESTORE FILELISTONLY FROM DISK = '{container_bak_path}'")
+                files = cursor.fetchall()
+
+                if not files:
+                    logger.error("No files found in backup")
+                    return False
+
+                # Build RESTORE command with MOVE options
+                data_file = None
+                log_file = None
+
+                for file_info in files:
+                    logical_name = file_info.LogicalName
+                    file_type = file_info.Type
+
+                    if file_type == 'D':  # Data file
+                        data_file = logical_name
+                    elif file_type == 'L':  # Log file
+                        log_file = logical_name
+
+                if not data_file:
+                    logger.error("No data file found in backup")
+                    return False
+
+                # Construct restore command
+                restore_sql = f"""
+                RESTORE DATABASE [{target_database}]
+                FROM DISK = '{container_bak_path}'
+                WITH 
+                    MOVE '{data_file}' TO '/var/opt/mssql/data/{target_database}.mdf',
+                """
+
+                if log_file:
+                    restore_sql += f"    MOVE '{log_file}' TO '/var/opt/mssql/data/{target_database}.ldf',"
+
+                restore_sql += """
+                    REPLACE,
+                    RECOVERY,
+                    STATS = 10
+                """
+
+                logger.info(f"Executing restore command for database: {target_database}")
+                logger.debug(f"Restore SQL: {restore_sql}")
+
+                try:
+                    cursor.execute(restore_sql)
+                except Exception as e:
+                    # If we hit exclusive access error, retry once after killing sessions again
+                    if 'Exclusive access could not be obtained' in str(e):
+                        logger.warning("Exclusive access error on RESTORE; retrying after killing sessions and reasserting SINGLE_USER...")
+                        try:
+                            cursor.execute(kill_sql)
+                            cursor.execute(f"ALTER DATABASE [{target_database}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE;")
+                        except Exception as e2:
+                            logger.warning(f"Retry exclusive prep failed: {e2}")
+                        cursor.execute(restore_sql)
+                    else:
+                        raise
+
+                # Poll for database to be ONLINE
+                if not self._wait_for_database_online(target_database):
+                    logger.error(f"Database did not come ONLINE in time: {target_database}")
+                    return False
+
+                # Small retry around database_exists to handle late readiness
+                if self._retry_database_exists(target_database):
+                    logger.info(f"Database restore successful and ONLINE: {target_database}")
+                    
+                    # Get basic database info
+                    cursor.execute(f"""
+                        SELECT 
+                            name, 
+                            create_date, 
+                            compatibility_level,
+                            state_desc
+                        FROM sys.databases 
+                        WHERE name = '{target_database}'
+                    """)
+                    
+                    db_info = cursor.fetchone()
+                    if db_info:
+                        logger.info(f"Database info: Name={db_info.name}, Created={db_info.create_date}, Level={db_info.compatibility_level}, State={db_info.state_desc}")
+
+                    # Optional: quick content verification with small retry window
+                    if not self._retry_verify_content(target_database):
+                        logger.warning("Database restored but content verification is inconclusive")
+                    
+                    # Try to set MULTI_USER back in same session
+                    try:
+                        cursor.execute(f"ALTER DATABASE [{target_database}] SET MULTI_USER;")
+                        logger.info(f"Set {target_database} back to MULTI_USER")
+                    except Exception as e:
+                        logger.warning(f"Could not set MULTI_USER on {target_database}: {e}")
+
+                    return True
+                else:
+                    logger.error(f"Database restore failed - database not found: {target_database}")
+                    return False
+                    
+        except Exception as e:
+            logger.error(f"Database restore failed: {e}")
+            return False
+    
+    def copy_backup_to_container(self, bak_path: Path) -> Optional[str]:
+        """
+        Copy backup file to shared volume accessible by MSSQL container
+        
+        Args:
+            bak_path: Local path to .bak file
+            
+        Returns:
+            Container path to .bak file or None if failed
+        """
+        try:
+            # Use shared volume instead of docker cp
+            shared_dir = Path("/app/shared")
+            shared_bak_path = shared_dir / bak_path.name
+
+            # If the file is already in the shared dir, skip copying
+            if bak_path.resolve().parent == shared_dir.resolve():
+                logger.info(f"Backup already in shared volume: {bak_path}")
+            else:
+                logger.info(f"Copying {bak_path} to shared volume...")
+                import shutil
+                shutil.copy2(bak_path, shared_bak_path)
+
+            # Container path from MSSQL perspective
+            container_path = f"/backups/{shared_bak_path.name}"
+            
+            logger.info(f"Successfully copied to shared volume: {container_path}")
+            return container_path
+                
+        except Exception as e:
+            logger.error(f"Failed to copy backup to shared volume: {e}")
+            return None
+
+    def _wait_for_database_online(self, database_name: str, timeout_seconds: int = 600, interval_seconds: int = 5) -> bool:
+        """Poll MSSQL until the specified database state becomes ONLINE or timeout.
+
+        Returns True if ONLINE, False on timeout/error.
+        """
+        logger.info(f"Waiting for database to become ONLINE: {database_name}")
+        deadline = time.time() + timeout_seconds
+        last_state = None
+        try:
+            conn_str = self.get_connection_string()
+            while time.time() < deadline:
+                with pyodbc.connect(conn_str, timeout=30) as conn:
+                    cursor = conn.cursor()
+                    cursor.execute("SELECT state_desc FROM sys.databases WHERE name = ?", (database_name,))
+                    row = cursor.fetchone()
+                    if row:
+                        state = row[0]
+                        if state != last_state:
+                            logger.info(f"Database state: {state}")
+                            last_state = state
+                        if state == 'ONLINE':
+                            # Optional: verify updateability is READ_WRITE
+                            try:
+                                cursor.execute("SELECT DATABASEPROPERTYEX(?, 'Updateability')", (database_name,))
+                                up = cursor.fetchone()[0]
+                                logger.info(f"Database updateability: {up}")
+                            except Exception:
+                                pass
+                            return True
+                    else:
+                        logger.info("Database entry not found yet in sys.databases")
+                time.sleep(interval_seconds)
+        except Exception as e:
+            logger.error(f"Error while waiting for database ONLINE: {e}")
+            return False
+        logger.error("Timed out waiting for database to become ONLINE")
+        return False
+
+    def _retry_database_exists(self, database_name: str, attempts: int = 6, delay_seconds: int = 5) -> bool:
+        """Retry wrapper for database existence checks."""
+        for i in range(1, attempts + 1):
+            if self.database_exists(database_name):
+                return True
+            logger.info(f"database_exists() false, retrying ({i}/{attempts})...")
+            time.sleep(delay_seconds)
+        return False
+
+    def _retry_verify_content(self, database_name: str, attempts: int = 3, delay_seconds: int = 5) -> bool:
+        """Retry wrapper around verify_database_content to allow late readiness."""
+        for i in range(1, attempts + 1):
+            try:
+                counts = self.verify_database_content(database_name)
+                if counts:
+                    logger.info(f"Content verification counts: {counts}")
+                    return True
+            except Exception as e:
+                logger.info(f"Content verification attempt {i} failed: {e}")
+            time.sleep(delay_seconds)
+        return False
+    
+    def verify_database_content(self, database_name: str = None) -> dict:
+        """
+        Verify database has expected content
+        
+        Returns:
+            Dictionary with table counts
+        """
+        if database_name is None:
+            database_name = self.database
+            
+        try:
+            conn_str = self.get_connection_string(database_name)
+            with pyodbc.connect(conn_str, timeout=30) as conn:
+                cursor = conn.cursor()
+                
+                # Get table counts for key tables
+                tables_to_check = ['Make', 'Model', 'VehicleType', 'Manufacturer']
+                counts = {}
+                
+                for table in tables_to_check:
+                    try:
+                        cursor.execute(f"SELECT COUNT(*) FROM {table}")
+                        count = cursor.fetchone()[0]
+                        counts[table] = count
+                        logger.info(f"Table {table}: {count:,} rows")
+                    except:
+                        counts[table] = 0
+                
+                return counts
+                
+        except Exception as e:
+            logger.error(f"Failed to verify database content: {e}")
+            return {}
--- a/mvp-platform-services/vehicles/etl/loaders/postgres_loader.py
+++ b/mvp-platform-services/vehicles/etl/loaders/postgres_loader.py
@@ -0,0 +1,354 @@
+import logging
+from typing import List, Dict, Optional
+from psycopg2.extras import execute_batch
+from ..connections import db_connections
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+class PostgreSQLLoader:
+    """Load data into PostgreSQL target database"""
+    
+    def __init__(self):
+        self.batch_size = 1000
+    
+    def load_reference_table(self, table_name: str, data: List[Dict], 
+                           clear_existing: bool = True) -> int:
+        """Load data into a reference table"""
+        if not data:
+            logger.warning(f"No data to load for table {table_name}")
+            return 0
+            
+        logger.info(f"Loading {len(data)} records into vehicles.{table_name}")
+        
+        # Column mapping from source (MS SQL) to target (PostgreSQL)
+        column_mappings = {
+            'Id': 'id',
+            'Name': 'name', 
+            'Code': 'code',
+            'MakeId': 'make_id',
+            'CreateOn': 'created_at',
+            'CreatedOn': 'created_at',
+            'UpdateOn': 'updated_at',
+            'UpdatedOn': 'updated_at',
+            'Wmi': 'wmi',
+            'ManufacturerId': 'manufacturer_id',
+            'MakeId': 'make_id',
+            'VehicleTypeId': 'vehicle_type_id',
+            'TruckTypeId': 'truck_type_id',
+            'CountryId': 'country_id',
+            'PublicAvailabilityDate': 'public_availability_date',
+            'NonCompliant': 'non_compliant',
+            'NonCompliantReason': 'non_compliant_reason',
+            'ProcessedOn': 'processed_on',
+            'DisplayOrder': 'display_order',
+            'FormType': 'form_type',
+            'Description': 'description',
+            'LookupTable': 'lookup_table',
+            'IsPrivate': 'is_private',
+            'GroupName': 'group_name',
+            'DataType': 'data_type',
+            'MinAllowedValue': 'min_allowed_value',
+            'MaxAllowedValue': 'max_allowed_value',
+            'IsQS': 'is_qs',
+            'Decode': 'decode',
+            'weight': 'weight',
+            # ErrorCode specific mappings
+            'ErrorCodeName': 'code',
+            'ErrorCodeDescription': 'description'
+        }
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            if clear_existing:
+                cursor.execute(f"TRUNCATE TABLE vehicles.{table_name} CASCADE")
+                logger.info(f"Cleared existing data from vehicles.{table_name}")
+            
+            # Get source columns and map them to target columns
+            source_columns = list(data[0].keys())
+            target_columns = []
+            valid_data = []
+            
+            # Map columns and filter data
+            for source_col in source_columns:
+                if source_col in column_mappings:
+                    target_columns.append(column_mappings[source_col])
+                else:
+                    target_columns.append(source_col.lower())
+            
+            # Check which columns exist in target table
+            cursor.execute(f"""
+                SELECT column_name 
+                FROM information_schema.columns 
+                WHERE table_schema = 'vehicles' AND table_name = '{table_name}'
+            """)
+            results = cursor.fetchall()
+            existing_columns = {row['column_name'] if isinstance(row, dict) else row[0] for row in results}
+            
+            # Filter to only existing columns
+            final_columns = []
+            final_indices = []
+            for i, col in enumerate(target_columns):
+                if col in existing_columns:
+                    final_columns.append(col)
+                    final_indices.append(i)
+            
+            if not final_columns:
+                logger.warning(f"No matching columns found for table {table_name}")
+                return 0
+            
+            column_str = ','.join(final_columns)
+            placeholders = ','.join(['%s'] * len(final_columns))
+            
+            # Prepare insert query
+            query = f"""
+                INSERT INTO vehicles.{table_name} ({column_str})
+                VALUES ({placeholders})
+                ON CONFLICT DO NOTHING
+            """
+            
+            # Prepare data tuples with only valid columns
+            data_tuples = []
+            for record in data:
+                values = []
+                skip_record = False
+                
+                for i in final_indices:
+                    source_col = source_columns[i]
+                    value = record[source_col]
+                    
+                    # Handle special cases for error_codes table
+                    if table_name == 'error_codes' and source_col in ['ErrorCodeName', 'Code'] and (value is None or value == ''):
+                        skip_record = True
+                        break
+                    
+                    values.append(value)
+                
+                if not skip_record:
+                    data_tuples.append(tuple(values))
+            
+            # Execute batch insert
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+            # Get final count
+            cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table_name}")
+            result = cursor.fetchone()
+            final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
+            
+        logger.info(f"Successfully loaded {final_count} records into vehicles.{table_name}")
+        return final_count
+    
+    def load_wmi_vin_schema_mappings(self, mappings: List[Dict]) -> int:
+        """Load WMI to VIN Schema mappings"""
+        if not mappings:
+            return 0
+            
+        logger.info(f"Loading {len(mappings)} WMI-VinSchema mappings")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Clear existing mappings
+            cursor.execute("TRUNCATE TABLE vehicles.wmi_vin_schemas CASCADE")
+            
+            query = """
+                INSERT INTO vehicles.wmi_vin_schemas 
+                (wmi_id, vin_schema_id, year_from, year_to)
+                VALUES (%s, %s, %s, %s)
+                ON CONFLICT DO NOTHING
+            """
+            
+            data_tuples = []
+            for mapping in mappings:
+                data_tuples.append((
+                    mapping['WmiId'],
+                    mapping['VinSchemaId'],
+                    mapping['YearFrom'] or 1980,
+                    mapping['YearTo'] or 2999
+                ))
+            
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+            # Get final count
+            cursor.execute("SELECT COUNT(*) FROM vehicles.wmi_vin_schemas")
+            result = cursor.fetchone()
+            final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
+            
+        logger.info(f"Successfully loaded {final_count} WMI-VinSchema mappings")
+        return final_count
+    
+    def load_make_model_relationships(self, relationships: List[Dict]) -> int:
+        """Load Make-Model relationships"""
+        if not relationships:
+            return 0
+            
+        logger.info(f"Loading {len(relationships)} Make-Model relationships")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Clear existing relationships
+            cursor.execute("TRUNCATE TABLE vehicles.make_models CASCADE")
+            
+            query = """
+                INSERT INTO vehicles.make_models (make_id, model_id)
+                VALUES (%s, %s)
+                ON CONFLICT DO NOTHING
+            """
+            
+            data_tuples = []
+            for rel in relationships:
+                data_tuples.append((rel['MakeId'], rel['ModelId']))
+            
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+            # Get final count
+            cursor.execute("SELECT COUNT(*) FROM vehicles.make_models")
+            result = cursor.fetchone()
+            final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
+            
+        logger.info(f"Successfully loaded {final_count} Make-Model relationships")
+        return final_count
+    
+    def load_wmi_make_relationships(self, relationships: List[Dict]) -> int:
+        """Load WMI-Make relationships"""
+        if not relationships:
+            return 0
+            
+        logger.info(f"Loading {len(relationships)} WMI-Make relationships")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Clear existing relationships
+            cursor.execute("TRUNCATE TABLE vehicles.wmi_makes CASCADE")
+            
+            query = """
+                INSERT INTO vehicles.wmi_makes (wmi_id, make_id)
+                VALUES (%s, %s)
+                ON CONFLICT DO NOTHING
+            """
+            
+            data_tuples = []
+            for rel in relationships:
+                data_tuples.append((rel['WmiId'], rel['MakeId']))
+            
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+            # Get final count
+            cursor.execute("SELECT COUNT(*) FROM vehicles.wmi_makes")
+            result = cursor.fetchone()
+            final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
+            
+        logger.info(f"Successfully loaded {final_count} WMI-Make relationships")
+        return final_count
+    
+    def load_model_years(self, model_years: List[Dict]) -> int:
+        """Load model year availability data"""
+        if not model_years:
+            return 0
+            
+        logger.info(f"Loading {len(model_years)} model year records")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            query = """
+                INSERT INTO vehicles.model_year (model_id, year)
+                VALUES (%s, %s)
+                ON CONFLICT (model_id, year) DO NOTHING
+            """
+            
+            data_tuples = [(my['model_id'], my['year']) for my in model_years]
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+        return len(model_years)
+    
+    def load_trims(self, trims: List[Dict]) -> int:
+        """Load trim data"""
+        if not trims:
+            return 0
+            
+        logger.info(f"Loading {len(trims)} trim records")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            query = """
+                INSERT INTO vehicles.trim (model_year_id, name)
+                VALUES (%s, %s)
+                ON CONFLICT DO NOTHING
+            """
+            
+            data_tuples = [(t['model_year_id'], t['name']) for t in trims]
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+        return len(trims)
+    
+    def load_engines(self, engines: List[Dict]) -> int:
+        """Load engine data"""
+        if not engines:
+            return 0
+            
+        logger.info(f"Loading {len(engines)} engine records")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            query = """
+                INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)
+                VALUES (%s, %s, %s, %s, %s, %s)
+                ON CONFLICT (lower(name)) DO NOTHING
+                RETURNING id
+            """
+            
+            for engine in engines:
+                cursor.execute(query, (
+                    engine['name'],
+                    engine.get('code'),
+                    engine.get('displacement_l'),
+                    engine.get('cylinders'),
+                    engine.get('fuel_type'),
+                    engine.get('aspiration')
+                ))
+            
+            conn.commit()
+            
+        return len(engines)
+    
+    def load_trim_engine_relationships(self, relationships: List[Dict]) -> int:
+        """Load trim-engine relationships"""
+        if not relationships:
+            return 0
+            
+        logger.info(f"Loading {len(relationships)} trim-engine relationships")
+        
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            
+            query = """
+                INSERT INTO vehicles.trim_engine (trim_id, engine_id)
+                VALUES (%s, %s)
+                ON CONFLICT (trim_id, engine_id) DO NOTHING
+            """
+            
+            data_tuples = [(rel['trim_id'], rel['engine_id']) for rel in relationships]
+            execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
+            conn.commit()
+            
+        return len(relationships)
+    
+    def get_table_count(self, table_name: str) -> int:
+        """Get count of records in a table"""
+        with db_connections.postgres_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table_name}")
+            result = cursor.fetchone()
+            return result['count'] if isinstance(result, dict) and 'count' in result else result[0]