"""
JSON Manual Loader for Vehicles ETL

Loads extracted JSON data into PostgreSQL database with referential integrity.
Supports clear/append modes with duplicate handling and comprehensive progress tracking.

Database Schema:
- vehicles.make (id, name)
- vehicles.model (id, make_id, name)
- vehicles.model_year (id, model_id, year)
- vehicles.trim (id, model_year_id, name)
- vehicles.engine (id, name, code, displacement_l, cylinders, fuel_type, aspiration)
- vehicles.trim_engine (trim_id, engine_id)

Load Modes:
- CLEAR: Truncate all tables and reload (destructive)
- APPEND: Insert with conflict resolution (safe)

Usage:
    loader = JsonManualLoader(postgres_loader)
    result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
"""

import logging
from typing import List, Dict, Optional, Tuple
from enum import Enum
from dataclasses import dataclass
from psycopg2.extras import execute_batch

# Import our components (handle both relative and direct imports)
try:
    from .postgres_loader import PostgreSQLLoader
    from ..extractors.json_extractor import MakeData, ModelData, ExtractionResult
    from ..utils.engine_spec_parser import EngineSpec
    from ..connections import db_connections
except ImportError:
    # Fallback for direct execution
    import sys
    import os
    sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
    
    # Import with fallback handling for nested imports
    try:
        from loaders.postgres_loader import PostgreSQLLoader
    except ImportError:
        # Mock PostgreSQLLoader for testing
        class PostgreSQLLoader:
            def __init__(self):
                self.batch_size = 1000
    
    from extractors.json_extractor import MakeData, ModelData, ExtractionResult
    from utils.engine_spec_parser import EngineSpec
    
    try:
        from connections import db_connections
    except ImportError:
        # Mock db_connections for testing
        class MockDBConnections:
            def postgres_connection(self):
                raise NotImplementedError("Database connection not available in test mode")
        db_connections = MockDBConnections()

logger = logging.getLogger(__name__)


class LoadMode(Enum):
    """Data loading modes"""
    CLEAR = "clear"    # Truncate and reload (destructive)
    APPEND = "append"  # Insert with conflict handling (safe)


@dataclass
class LoadResult:
    """Result of loading operations"""
    total_makes: int
    total_models: int
    total_model_years: int
    total_trims: int
    total_engines: int
    total_trim_engine_mappings: int
    failed_makes: List[str]
    warnings: List[str]
    load_mode: LoadMode
    
    @property
    def success_count(self) -> int:
        return self.total_makes - len(self.failed_makes)
    
    @property
    def success_rate(self) -> float:
        return self.success_count / self.total_makes if self.total_makes > 0 else 0.0


@dataclass
class LoadStatistics:
    """Detailed loading statistics"""
    makes_processed: int = 0
    makes_skipped: int = 0
    models_inserted: int = 0
    model_years_inserted: int = 0
    skipped_model_years: int = 0
    trims_inserted: int = 0
    engines_inserted: int = 0
    trim_engine_mappings_inserted: int = 0
    duplicate_makes: int = 0
    duplicate_models: int = 0
    duplicate_engines: int = 0
    errors: List[str] = None
    warnings: List[str] = None
    
    def __post_init__(self):
        if self.errors is None:
            self.errors = []
        if self.warnings is None:
            self.warnings = []


class JsonManualLoader:
    """Load JSON-extracted vehicle data into PostgreSQL"""
    
    def _get_id_from_result(self, result, column_name='id'):
        """Helper to extract ID from query result, handling both tuple and dict cursors"""
        if result is None:
            return None
        if isinstance(result, tuple):
            return result[0]
        # For RealDictCursor, try the column name first, fall back to key access
        if column_name in result:
            return result[column_name]
        # For COUNT(*) queries, the key might be 'count'
        if 'count' in result:
            return result['count']
        # Fall back to first value
        return list(result.values())[0] if result else None
    
    def __init__(self, postgres_loader: Optional[PostgreSQLLoader] = None):
        """
        Initialize JSON manual loader
        
        Args:
            postgres_loader: Existing PostgreSQL loader instance
        """
        self.postgres_loader = postgres_loader or PostgreSQLLoader()
        self.batch_size = 1000
        
        logger.info("JsonManualLoader initialized")
    
    def clear_all_tables(self) -> None:
        """
        Clear all vehicles tables in dependency order
        
        WARNING: This is destructive and will remove all data
        """
        logger.warning("CLEARING ALL VEHICLES TABLES - This is destructive!")
        
        tables_to_clear = [
            'trim_engine',      # Many-to-many mappings first
            'trim_transmission',
            'performance',      # Tables with foreign keys
            'trim',
            'model_year',
            'model', 
            'make',
            'engine',           # Independent tables last
            'transmission'
        ]
        
        with db_connections.postgres_connection() as conn:
            cursor = conn.cursor()
            
            for table in tables_to_clear:
                try:
                    cursor.execute(f"TRUNCATE TABLE vehicles.{table} CASCADE")
                    logger.info(f"Cleared vehicles.{table}")
                except Exception as e:
                    logger.warning(f"Failed to clear vehicles.{table}: {str(e)}")
            
            conn.commit()
        
        logger.info("All vehicles tables cleared")
    
    def load_make(self, make_data: MakeData, mode: LoadMode, stats: LoadStatistics) -> int:
        """
        Load a single make with all related data
        
        Args:
            make_data: Extracted make data
            mode: Loading mode (clear/append)
            stats: Statistics accumulator
            
        Returns:
            Make ID in database
        """
        logger.debug(f"Loading make: {make_data.name}")
        
        try:
            with db_connections.postgres_connection() as conn:
                cursor = conn.cursor()
                
                # 1. Insert or get make (always check for existing to avoid constraint violations)
                # Check if make exists (case-insensitive to match database constraint)
                cursor.execute(
                    "SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
                    (make_data.name,)
                )
                result = cursor.fetchone()
                
                if result:
                    make_id = self._get_id_from_result(result)
                    stats.duplicate_makes += 1
                    logger.debug(f"Make {make_data.name} already exists with ID {make_id}")
                else:
                    # Insert new make with error handling for constraint violations
                    try:
                        cursor.execute(
                            "INSERT INTO vehicles.make (name) VALUES (%s) RETURNING id",
                            (make_data.name,)
                        )
                        result = cursor.fetchone()
                        make_id = self._get_id_from_result(result)
                        logger.debug(f"Inserted make {make_data.name} with ID {make_id}")
                    except Exception as e:
                        if "duplicate key value violates unique constraint" in str(e):
                            # Retry the lookup in case of race condition
                            cursor.execute(
                                "SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
                                (make_data.name,)
                            )
                            result = cursor.fetchone()
                            if result:
                                make_id = self._get_id_from_result(result)
                                stats.duplicate_makes += 1
                                logger.debug(f"Make {make_data.name} found after retry with ID {make_id}")
                            else:
                                raise
                        else:
                            raise
                
                # 2. Process models
                for model_data in make_data.models:
                    model_id = self.load_model(cursor, make_id, model_data, mode, stats)
                
                conn.commit()
                stats.makes_processed += 1
                
                return make_id
                
        except Exception as e:
            error_msg = f"Failed to load make {make_data.name}: {str(e)}"
            logger.error(error_msg)
            stats.errors.append(error_msg)
            raise
    
    def load_model(self, cursor, make_id: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
        """
        Load a single model with all related data
        
        Args:
            cursor: Database cursor
            make_id: Parent make ID
            model_data: Extracted model data
            mode: Loading mode
            stats: Statistics accumulator
            
        Returns:
            Model ID in database
        """
        # 1. Insert or get model
        if mode == LoadMode.APPEND:
            cursor.execute(
                "SELECT id FROM vehicles.model WHERE make_id = %s AND name = %s",
                (make_id, model_data.name)
            )
            result = cursor.fetchone()
            
            if result:
                model_id = result[0] if isinstance(result, tuple) else result['id']
                stats.duplicate_models += 1
            else:
                cursor.execute(
                    "INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
                    (make_id, model_data.name)
                )
                model_id = self._get_id_from_result(cursor.fetchone())
                stats.models_inserted += 1
        else:
            # CLEAR mode - just insert
            cursor.execute(
                "INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
                (make_id, model_data.name)
            )
            model_id = self._get_id_from_result(cursor.fetchone())
            stats.models_inserted += 1
        
        # 2. Insert model years and related data
        for year in model_data.years:
            model_year_id = self.load_model_year(cursor, model_id, year, model_data, mode, stats)
            # Skip processing if year was outside valid range
            if model_year_id is None:
                continue
        
        return model_id
    
    def load_model_year(self, cursor, model_id: int, year: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
        """
        Load model year and associated trims/engines
        
        Args:
            cursor: Database cursor
            model_id: Parent model ID
            year: Model year
            model_data: Model data with trims and engines
            mode: Loading mode
            stats: Statistics accumulator
            
        Returns:
            Model year ID in database
        """
        # Skip years that don't meet database constraints (must be 1950-2100)
        if year < 1950 or year > 2100:
            logger.warning(f"Skipping year {year} - outside valid range (1950-2100)")
            stats.skipped_model_years += 1
            return None
        
        # 1. Insert or get model year
        if mode == LoadMode.APPEND:
            cursor.execute(
                "SELECT id FROM vehicles.model_year WHERE model_id = %s AND year = %s",
                (model_id, year)
            )
            result = cursor.fetchone()
            
            if result:
                model_year_id = result[0] if isinstance(result, tuple) else result['id']
            else:
                cursor.execute(
                    "INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
                    (model_id, year)
                )
                model_year_id = self._get_id_from_result(cursor.fetchone())
                stats.model_years_inserted += 1
        else:
            # CLEAR mode - just insert
            cursor.execute(
                "INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
                (model_id, year)
            )
            model_year_id = self._get_id_from_result(cursor.fetchone())
            stats.model_years_inserted += 1
        
        # 2. Load engines and get their IDs
        engine_ids = []
        for engine_spec in model_data.engines:
            engine_id = self.load_engine(cursor, engine_spec, mode, stats)
            engine_ids.append(engine_id)
        
        # 3. Load trims and connect to engines
        for trim_name in model_data.trims:
            trim_id = self.load_trim(cursor, model_year_id, trim_name, engine_ids, mode, stats)
        
        return model_year_id
    
    def load_engine(self, cursor, engine_spec: EngineSpec, mode: LoadMode, stats: LoadStatistics) -> int:
        """
        Load engine specification
        
        Args:
            cursor: Database cursor
            engine_spec: Parsed engine specification
            mode: Loading mode
            stats: Statistics accumulator
            
        Returns:
            Engine ID in database
        """
        # Create a canonical engine name for database storage
        if engine_spec.displacement_l and engine_spec.configuration != "Unknown" and engine_spec.cylinders:
            engine_name = f"{engine_spec.displacement_l}L {engine_spec.configuration}{engine_spec.cylinders}"
        else:
            engine_name = engine_spec.raw_string
        
        # Generate engine code from name (remove spaces, lowercase)
        engine_code = engine_name.replace(" ", "").lower()
        
        # Always check for existing engine by name or code to avoid constraint violations
        cursor.execute("""
            SELECT id FROM vehicles.engine 
            WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
        """, (engine_name, engine_code))
        result = cursor.fetchone()
        
        if result:
            engine_id = self._get_id_from_result(result)
            stats.duplicate_engines += 1
            return engine_id
        
        # Insert new engine
        try:
            cursor.execute("""
                INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)
                VALUES (%s, %s, %s, %s, %s, %s)
                RETURNING id
            """, (
                engine_name,
                engine_code,
                engine_spec.displacement_l,
                engine_spec.cylinders,
                engine_spec.fuel_type if engine_spec.fuel_type != "Unknown" else None,
                engine_spec.aspiration if engine_spec.aspiration != "Natural" else None
            ))
            
            engine_id = self._get_id_from_result(cursor.fetchone())
            stats.engines_inserted += 1
            
            return engine_id
        except Exception as e:
            if "duplicate key value violates unique constraint" in str(e):
                # Retry the lookup in case of race condition
                cursor.execute("""
                    SELECT id FROM vehicles.engine 
                    WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
                """, (engine_name, engine_code))
                result = cursor.fetchone()
                if result:
                    engine_id = self._get_id_from_result(result)
                    stats.duplicate_engines += 1
                    return engine_id
            raise
    
    def load_trim(self, cursor, model_year_id: int, trim_name: str, engine_ids: List[int], mode: LoadMode, stats: LoadStatistics) -> int:
        """
        Load trim and connect to engines
        
        Args:
            cursor: Database cursor
            model_year_id: Parent model year ID
            trim_name: Trim name
            engine_ids: List of engine IDs to connect
            mode: Loading mode
            stats: Statistics accumulator
            
        Returns:
            Trim ID in database
        """
        # 1. Insert or get trim
        if mode == LoadMode.APPEND:
            cursor.execute(
                "SELECT id FROM vehicles.trim WHERE model_year_id = %s AND name = %s",
                (model_year_id, trim_name)
            )
            result = cursor.fetchone()
            
            if result:
                trim_id = result[0] if isinstance(result, tuple) else result['id']
            else:
                cursor.execute(
                    "INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
                    (model_year_id, trim_name)
                )
                trim_id = self._get_id_from_result(cursor.fetchone())
                stats.trims_inserted += 1
        else:
            # CLEAR mode - just insert
            cursor.execute(
                "INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
                (model_year_id, trim_name)
            )
            trim_id = self._get_id_from_result(cursor.fetchone())
            stats.trims_inserted += 1
        
        # 2. Connect trim to engines (always check for existing to avoid duplicates)
        # Deduplicate engine_ids to prevent duplicate mappings within the same trim
        unique_engine_ids = list(set(engine_ids))
        for engine_id in unique_engine_ids:
            # Check if mapping already exists
            cursor.execute(
                "SELECT 1 FROM vehicles.trim_engine WHERE trim_id = %s AND engine_id = %s",
                (trim_id, engine_id)
            )
            
            if not cursor.fetchone():
                try:
                    cursor.execute(
                        "INSERT INTO vehicles.trim_engine (trim_id, engine_id) VALUES (%s, %s)",
                        (trim_id, engine_id)
                    )
                    stats.trim_engine_mappings_inserted += 1
                except Exception as e:
                    if "duplicate key value violates unique constraint" in str(e):
                        # Another process may have inserted it, skip
                        logger.debug(f"Trim-engine mapping ({trim_id}, {engine_id}) already exists, skipping")
                    else:
                        raise
        
        return trim_id
    
    def load_all_makes(self, makes_data: List[MakeData], mode: LoadMode) -> LoadResult:
        """
        Load all makes with complete data
        
        Args:
            makes_data: List of extracted make data
            mode: Loading mode (clear/append)
            
        Returns:
            LoadResult with comprehensive statistics
        """
        logger.info(f"Starting bulk load of {len(makes_data)} makes in {mode.value} mode")
        
        # Clear tables if in CLEAR mode
        if mode == LoadMode.CLEAR:
            self.clear_all_tables()
        
        stats = LoadStatistics()
        failed_makes = []
        
        for make_data in makes_data:
            try:
                if make_data.processing_errors:
                    logger.warning(f"Skipping make {make_data.name} due to extraction errors")
                    stats.makes_skipped += 1
                    failed_makes.append(make_data.name)
                    continue
                
                make_id = self.load_make(make_data, mode, stats)
                logger.info(f"Successfully loaded make {make_data.name} (ID: {make_id})")
                
            except Exception as e:
                logger.error(f"Failed to load make {make_data.name}: {str(e)}")
                failed_makes.append(make_data.name)
                continue
        
        # Create result
        result = LoadResult(
            total_makes=len(makes_data),
            total_models=stats.models_inserted,
            total_model_years=stats.model_years_inserted,
            total_trims=stats.trims_inserted,
            total_engines=stats.engines_inserted,
            total_trim_engine_mappings=stats.trim_engine_mappings_inserted,
            failed_makes=failed_makes,
            warnings=stats.warnings,
            load_mode=mode
        )
        
        logger.info(f"Bulk load complete: {result.success_count}/{result.total_makes} makes loaded successfully")
        logger.info(f"Data loaded: {result.total_models} models, {result.total_engines} engines, {result.total_trims} trims")
        
        return result
    
    def get_database_statistics(self) -> Dict[str, int]:
        """
        Get current database record counts
        
        Returns:
            Dictionary with table counts
        """
        stats = {}
        
        tables = ['make', 'model', 'model_year', 'trim', 'engine', 'trim_engine']
        
        with db_connections.postgres_connection() as conn:
            cursor = conn.cursor()
            
            for table in tables:
                cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table}")
                result = cursor.fetchone()
                stats[table] = result[0] if isinstance(result, tuple) else result['count']
        
        return stats
    
    def validate_referential_integrity(self) -> List[str]:
        """
        Validate referential integrity of loaded data
        
        Returns:
            List of integrity issues found (empty if all good)
        """
        issues = []
        
        with db_connections.postgres_connection() as conn:
            cursor = conn.cursor()
            
            # Check for orphaned models
            cursor.execute("""
                SELECT COUNT(*) FROM vehicles.model m
                LEFT JOIN vehicles.make mk ON m.make_id = mk.id
                WHERE mk.id IS NULL
            """)
            orphaned_models = self._get_id_from_result(cursor.fetchone(), 'count')
            if orphaned_models > 0:
                issues.append(f"Found {orphaned_models} orphaned models")
            
            # Check for orphaned model_years
            cursor.execute("""
                SELECT COUNT(*) FROM vehicles.model_year my
                LEFT JOIN vehicles.model m ON my.model_id = m.id
                WHERE m.id IS NULL
            """)
            orphaned_model_years = self._get_id_from_result(cursor.fetchone())
            if orphaned_model_years > 0:
                issues.append(f"Found {orphaned_model_years} orphaned model_years")
            
            # Check for orphaned trims
            cursor.execute("""
                SELECT COUNT(*) FROM vehicles.trim t
                LEFT JOIN vehicles.model_year my ON t.model_year_id = my.id
                WHERE my.id IS NULL
            """)
            orphaned_trims = self._get_id_from_result(cursor.fetchone())
            if orphaned_trims > 0:
                issues.append(f"Found {orphaned_trims} orphaned trims")
            
            # Check for broken trim_engine mappings
            cursor.execute("""
                SELECT COUNT(*) FROM vehicles.trim_engine te
                LEFT JOIN vehicles.trim t ON te.trim_id = t.id
                LEFT JOIN vehicles.engine e ON te.engine_id = e.id
                WHERE t.id IS NULL OR e.id IS NULL
            """)
            broken_mappings = self._get_id_from_result(cursor.fetchone())
            if broken_mappings > 0:
                issues.append(f"Found {broken_mappings} broken trim_engine mappings")
        
        if issues:
            logger.warning(f"Referential integrity issues found: {issues}")
        else:
            logger.info("Referential integrity validation passed")
        
        return issues
    
    def print_load_report(self, result: LoadResult) -> None:
        """
        Print comprehensive loading report
        
        Args:
            result: LoadResult from load operation
        """
        print(f"🚀 JSON MANUAL LOADING REPORT")
        print(f"=" * 50)
        
        # Load summary
        print(f"\n📊 LOADING SUMMARY")
        print(f"   Mode: {result.load_mode.value.upper()}")
        print(f"   Makes processed: {result.success_count}/{result.total_makes}")
        print(f"   Success rate: {result.success_rate:.1%}")
        
        # Data counts
        print(f"\n📈 DATA LOADED")
        print(f"   Models: {result.total_models}")
        print(f"   Model years: {result.total_model_years}")
        print(f"   Trims: {result.total_trims}")
        print(f"   Engines: {result.total_engines}")
        print(f"   Trim-engine mappings: {result.total_trim_engine_mappings}")
        
        # Issues
        if result.failed_makes:
            print(f"\n⚠️  FAILED MAKES ({len(result.failed_makes)}):")
            for make in result.failed_makes:
                print(f"   {make}")
        
        if result.warnings:
            print(f"\n⚠️  WARNINGS ({len(result.warnings)}):")
            for warning in result.warnings[:5]:  # Show first 5
                print(f"   {warning}")
            if len(result.warnings) > 5:
                print(f"   ... and {len(result.warnings) - 5} more warnings")
        
        # Database statistics
        print(f"\n📋 DATABASE STATISTICS:")
        db_stats = self.get_database_statistics()
        for table, count in db_stats.items():
            print(f"   vehicles.{table}: {count:,} records")
        
        # Referential integrity
        integrity_issues = self.validate_referential_integrity()
        if integrity_issues:
            print(f"\n❌ REFERENTIAL INTEGRITY ISSUES:")
            for issue in integrity_issues:
                print(f"   {issue}")
        else:
            print(f"\n✅ REFERENTIAL INTEGRITY: PASSED")


# Example usage and testing functions
def example_usage():
    """Demonstrate JsonManualLoader usage"""
    print("🚀 JsonManualLoader Example Usage")
    print("=" * 40)
    
    # This would typically be called after JsonExtractor
    # For demo purposes, we'll just show the structure
    
    print("\n📋 Typical usage flow:")
    print("1. Extract data with JsonExtractor")
    print("2. Create JsonManualLoader")
    print("3. Load data in APPEND or CLEAR mode")
    print("4. Validate and report results")
    
    print(f"\n💡 Example code:")
    print("""
    # Extract data
    extractor = JsonExtractor(make_mapper, engine_parser)
    extraction_result = extractor.extract_all_makes('sources/makes')
    
    # Load data
    loader = JsonManualLoader()
    load_result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
    
    # Report results
    loader.print_load_report(load_result)
    """)


if __name__ == "__main__":
    example_usage()