Initial Commit
This commit is contained in:
716
mvp-platform-services/vehicles/etl/loaders/json_manual_loader.py
Normal file
716
mvp-platform-services/vehicles/etl/loaders/json_manual_loader.py
Normal file
@@ -0,0 +1,716 @@
|
||||
"""
|
||||
JSON Manual Loader for Vehicles ETL
|
||||
|
||||
Loads extracted JSON data into PostgreSQL database with referential integrity.
|
||||
Supports clear/append modes with duplicate handling and comprehensive progress tracking.
|
||||
|
||||
Database Schema:
|
||||
- vehicles.make (id, name)
|
||||
- vehicles.model (id, make_id, name)
|
||||
- vehicles.model_year (id, model_id, year)
|
||||
- vehicles.trim (id, model_year_id, name)
|
||||
- vehicles.engine (id, name, code, displacement_l, cylinders, fuel_type, aspiration)
|
||||
- vehicles.trim_engine (trim_id, engine_id)
|
||||
|
||||
Load Modes:
|
||||
- CLEAR: Truncate all tables and reload (destructive)
|
||||
- APPEND: Insert with conflict resolution (safe)
|
||||
|
||||
Usage:
|
||||
loader = JsonManualLoader(postgres_loader)
|
||||
result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from psycopg2.extras import execute_batch
|
||||
|
||||
# Import our components (handle both relative and direct imports)
|
||||
try:
|
||||
from .postgres_loader import PostgreSQLLoader
|
||||
from ..extractors.json_extractor import MakeData, ModelData, ExtractionResult
|
||||
from ..utils.engine_spec_parser import EngineSpec
|
||||
from ..connections import db_connections
|
||||
except ImportError:
|
||||
# Fallback for direct execution
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
# Import with fallback handling for nested imports
|
||||
try:
|
||||
from loaders.postgres_loader import PostgreSQLLoader
|
||||
except ImportError:
|
||||
# Mock PostgreSQLLoader for testing
|
||||
class PostgreSQLLoader:
|
||||
def __init__(self):
|
||||
self.batch_size = 1000
|
||||
|
||||
from extractors.json_extractor import MakeData, ModelData, ExtractionResult
|
||||
from utils.engine_spec_parser import EngineSpec
|
||||
|
||||
try:
|
||||
from connections import db_connections
|
||||
except ImportError:
|
||||
# Mock db_connections for testing
|
||||
class MockDBConnections:
|
||||
def postgres_connection(self):
|
||||
raise NotImplementedError("Database connection not available in test mode")
|
||||
db_connections = MockDBConnections()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LoadMode(Enum):
|
||||
"""Data loading modes"""
|
||||
CLEAR = "clear" # Truncate and reload (destructive)
|
||||
APPEND = "append" # Insert with conflict handling (safe)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoadResult:
|
||||
"""Result of loading operations"""
|
||||
total_makes: int
|
||||
total_models: int
|
||||
total_model_years: int
|
||||
total_trims: int
|
||||
total_engines: int
|
||||
total_trim_engine_mappings: int
|
||||
failed_makes: List[str]
|
||||
warnings: List[str]
|
||||
load_mode: LoadMode
|
||||
|
||||
@property
|
||||
def success_count(self) -> int:
|
||||
return self.total_makes - len(self.failed_makes)
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
return self.success_count / self.total_makes if self.total_makes > 0 else 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoadStatistics:
|
||||
"""Detailed loading statistics"""
|
||||
makes_processed: int = 0
|
||||
makes_skipped: int = 0
|
||||
models_inserted: int = 0
|
||||
model_years_inserted: int = 0
|
||||
skipped_model_years: int = 0
|
||||
trims_inserted: int = 0
|
||||
engines_inserted: int = 0
|
||||
trim_engine_mappings_inserted: int = 0
|
||||
duplicate_makes: int = 0
|
||||
duplicate_models: int = 0
|
||||
duplicate_engines: int = 0
|
||||
errors: List[str] = None
|
||||
warnings: List[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.errors is None:
|
||||
self.errors = []
|
||||
if self.warnings is None:
|
||||
self.warnings = []
|
||||
|
||||
|
||||
class JsonManualLoader:
|
||||
"""Load JSON-extracted vehicle data into PostgreSQL"""
|
||||
|
||||
def _get_id_from_result(self, result, column_name='id'):
|
||||
"""Helper to extract ID from query result, handling both tuple and dict cursors"""
|
||||
if result is None:
|
||||
return None
|
||||
if isinstance(result, tuple):
|
||||
return result[0]
|
||||
# For RealDictCursor, try the column name first, fall back to key access
|
||||
if column_name in result:
|
||||
return result[column_name]
|
||||
# For COUNT(*) queries, the key might be 'count'
|
||||
if 'count' in result:
|
||||
return result['count']
|
||||
# Fall back to first value
|
||||
return list(result.values())[0] if result else None
|
||||
|
||||
def __init__(self, postgres_loader: Optional[PostgreSQLLoader] = None):
|
||||
"""
|
||||
Initialize JSON manual loader
|
||||
|
||||
Args:
|
||||
postgres_loader: Existing PostgreSQL loader instance
|
||||
"""
|
||||
self.postgres_loader = postgres_loader or PostgreSQLLoader()
|
||||
self.batch_size = 1000
|
||||
|
||||
logger.info("JsonManualLoader initialized")
|
||||
|
||||
def clear_all_tables(self) -> None:
|
||||
"""
|
||||
Clear all vehicles tables in dependency order
|
||||
|
||||
WARNING: This is destructive and will remove all data
|
||||
"""
|
||||
logger.warning("CLEARING ALL VEHICLES TABLES - This is destructive!")
|
||||
|
||||
tables_to_clear = [
|
||||
'trim_engine', # Many-to-many mappings first
|
||||
'trim_transmission',
|
||||
'performance', # Tables with foreign keys
|
||||
'trim',
|
||||
'model_year',
|
||||
'model',
|
||||
'make',
|
||||
'engine', # Independent tables last
|
||||
'transmission'
|
||||
]
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
for table in tables_to_clear:
|
||||
try:
|
||||
cursor.execute(f"TRUNCATE TABLE vehicles.{table} CASCADE")
|
||||
logger.info(f"Cleared vehicles.{table}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clear vehicles.{table}: {str(e)}")
|
||||
|
||||
conn.commit()
|
||||
|
||||
logger.info("All vehicles tables cleared")
|
||||
|
||||
def load_make(self, make_data: MakeData, mode: LoadMode, stats: LoadStatistics) -> int:
|
||||
"""
|
||||
Load a single make with all related data
|
||||
|
||||
Args:
|
||||
make_data: Extracted make data
|
||||
mode: Loading mode (clear/append)
|
||||
stats: Statistics accumulator
|
||||
|
||||
Returns:
|
||||
Make ID in database
|
||||
"""
|
||||
logger.debug(f"Loading make: {make_data.name}")
|
||||
|
||||
try:
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 1. Insert or get make (always check for existing to avoid constraint violations)
|
||||
# Check if make exists (case-insensitive to match database constraint)
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
|
||||
(make_data.name,)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
make_id = self._get_id_from_result(result)
|
||||
stats.duplicate_makes += 1
|
||||
logger.debug(f"Make {make_data.name} already exists with ID {make_id}")
|
||||
else:
|
||||
# Insert new make with error handling for constraint violations
|
||||
try:
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.make (name) VALUES (%s) RETURNING id",
|
||||
(make_data.name,)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
make_id = self._get_id_from_result(result)
|
||||
logger.debug(f"Inserted make {make_data.name} with ID {make_id}")
|
||||
except Exception as e:
|
||||
if "duplicate key value violates unique constraint" in str(e):
|
||||
# Retry the lookup in case of race condition
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
|
||||
(make_data.name,)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
make_id = self._get_id_from_result(result)
|
||||
stats.duplicate_makes += 1
|
||||
logger.debug(f"Make {make_data.name} found after retry with ID {make_id}")
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
|
||||
# 2. Process models
|
||||
for model_data in make_data.models:
|
||||
model_id = self.load_model(cursor, make_id, model_data, mode, stats)
|
||||
|
||||
conn.commit()
|
||||
stats.makes_processed += 1
|
||||
|
||||
return make_id
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to load make {make_data.name}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
stats.errors.append(error_msg)
|
||||
raise
|
||||
|
||||
def load_model(self, cursor, make_id: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
|
||||
"""
|
||||
Load a single model with all related data
|
||||
|
||||
Args:
|
||||
cursor: Database cursor
|
||||
make_id: Parent make ID
|
||||
model_data: Extracted model data
|
||||
mode: Loading mode
|
||||
stats: Statistics accumulator
|
||||
|
||||
Returns:
|
||||
Model ID in database
|
||||
"""
|
||||
# 1. Insert or get model
|
||||
if mode == LoadMode.APPEND:
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicles.model WHERE make_id = %s AND name = %s",
|
||||
(make_id, model_data.name)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
model_id = result[0] if isinstance(result, tuple) else result['id']
|
||||
stats.duplicate_models += 1
|
||||
else:
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
|
||||
(make_id, model_data.name)
|
||||
)
|
||||
model_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.models_inserted += 1
|
||||
else:
|
||||
# CLEAR mode - just insert
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
|
||||
(make_id, model_data.name)
|
||||
)
|
||||
model_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.models_inserted += 1
|
||||
|
||||
# 2. Insert model years and related data
|
||||
for year in model_data.years:
|
||||
model_year_id = self.load_model_year(cursor, model_id, year, model_data, mode, stats)
|
||||
# Skip processing if year was outside valid range
|
||||
if model_year_id is None:
|
||||
continue
|
||||
|
||||
return model_id
|
||||
|
||||
def load_model_year(self, cursor, model_id: int, year: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
|
||||
"""
|
||||
Load model year and associated trims/engines
|
||||
|
||||
Args:
|
||||
cursor: Database cursor
|
||||
model_id: Parent model ID
|
||||
year: Model year
|
||||
model_data: Model data with trims and engines
|
||||
mode: Loading mode
|
||||
stats: Statistics accumulator
|
||||
|
||||
Returns:
|
||||
Model year ID in database
|
||||
"""
|
||||
# Skip years that don't meet database constraints (must be 1950-2100)
|
||||
if year < 1950 or year > 2100:
|
||||
logger.warning(f"Skipping year {year} - outside valid range (1950-2100)")
|
||||
stats.skipped_model_years += 1
|
||||
return None
|
||||
|
||||
# 1. Insert or get model year
|
||||
if mode == LoadMode.APPEND:
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicles.model_year WHERE model_id = %s AND year = %s",
|
||||
(model_id, year)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
model_year_id = result[0] if isinstance(result, tuple) else result['id']
|
||||
else:
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
|
||||
(model_id, year)
|
||||
)
|
||||
model_year_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.model_years_inserted += 1
|
||||
else:
|
||||
# CLEAR mode - just insert
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
|
||||
(model_id, year)
|
||||
)
|
||||
model_year_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.model_years_inserted += 1
|
||||
|
||||
# 2. Load engines and get their IDs
|
||||
engine_ids = []
|
||||
for engine_spec in model_data.engines:
|
||||
engine_id = self.load_engine(cursor, engine_spec, mode, stats)
|
||||
engine_ids.append(engine_id)
|
||||
|
||||
# 3. Load trims and connect to engines
|
||||
for trim_name in model_data.trims:
|
||||
trim_id = self.load_trim(cursor, model_year_id, trim_name, engine_ids, mode, stats)
|
||||
|
||||
return model_year_id
|
||||
|
||||
def load_engine(self, cursor, engine_spec: EngineSpec, mode: LoadMode, stats: LoadStatistics) -> int:
|
||||
"""
|
||||
Load engine specification
|
||||
|
||||
Args:
|
||||
cursor: Database cursor
|
||||
engine_spec: Parsed engine specification
|
||||
mode: Loading mode
|
||||
stats: Statistics accumulator
|
||||
|
||||
Returns:
|
||||
Engine ID in database
|
||||
"""
|
||||
# Create a canonical engine name for database storage
|
||||
if engine_spec.displacement_l and engine_spec.configuration != "Unknown" and engine_spec.cylinders:
|
||||
engine_name = f"{engine_spec.displacement_l}L {engine_spec.configuration}{engine_spec.cylinders}"
|
||||
else:
|
||||
engine_name = engine_spec.raw_string
|
||||
|
||||
# Generate engine code from name (remove spaces, lowercase)
|
||||
engine_code = engine_name.replace(" ", "").lower()
|
||||
|
||||
# Always check for existing engine by name or code to avoid constraint violations
|
||||
cursor.execute("""
|
||||
SELECT id FROM vehicles.engine
|
||||
WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
|
||||
""", (engine_name, engine_code))
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
engine_id = self._get_id_from_result(result)
|
||||
stats.duplicate_engines += 1
|
||||
return engine_id
|
||||
|
||||
# Insert new engine
|
||||
try:
|
||||
cursor.execute("""
|
||||
INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
RETURNING id
|
||||
""", (
|
||||
engine_name,
|
||||
engine_code,
|
||||
engine_spec.displacement_l,
|
||||
engine_spec.cylinders,
|
||||
engine_spec.fuel_type if engine_spec.fuel_type != "Unknown" else None,
|
||||
engine_spec.aspiration if engine_spec.aspiration != "Natural" else None
|
||||
))
|
||||
|
||||
engine_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.engines_inserted += 1
|
||||
|
||||
return engine_id
|
||||
except Exception as e:
|
||||
if "duplicate key value violates unique constraint" in str(e):
|
||||
# Retry the lookup in case of race condition
|
||||
cursor.execute("""
|
||||
SELECT id FROM vehicles.engine
|
||||
WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
|
||||
""", (engine_name, engine_code))
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
engine_id = self._get_id_from_result(result)
|
||||
stats.duplicate_engines += 1
|
||||
return engine_id
|
||||
raise
|
||||
|
||||
def load_trim(self, cursor, model_year_id: int, trim_name: str, engine_ids: List[int], mode: LoadMode, stats: LoadStatistics) -> int:
|
||||
"""
|
||||
Load trim and connect to engines
|
||||
|
||||
Args:
|
||||
cursor: Database cursor
|
||||
model_year_id: Parent model year ID
|
||||
trim_name: Trim name
|
||||
engine_ids: List of engine IDs to connect
|
||||
mode: Loading mode
|
||||
stats: Statistics accumulator
|
||||
|
||||
Returns:
|
||||
Trim ID in database
|
||||
"""
|
||||
# 1. Insert or get trim
|
||||
if mode == LoadMode.APPEND:
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicles.trim WHERE model_year_id = %s AND name = %s",
|
||||
(model_year_id, trim_name)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
trim_id = result[0] if isinstance(result, tuple) else result['id']
|
||||
else:
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
|
||||
(model_year_id, trim_name)
|
||||
)
|
||||
trim_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.trims_inserted += 1
|
||||
else:
|
||||
# CLEAR mode - just insert
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
|
||||
(model_year_id, trim_name)
|
||||
)
|
||||
trim_id = self._get_id_from_result(cursor.fetchone())
|
||||
stats.trims_inserted += 1
|
||||
|
||||
# 2. Connect trim to engines (always check for existing to avoid duplicates)
|
||||
# Deduplicate engine_ids to prevent duplicate mappings within the same trim
|
||||
unique_engine_ids = list(set(engine_ids))
|
||||
for engine_id in unique_engine_ids:
|
||||
# Check if mapping already exists
|
||||
cursor.execute(
|
||||
"SELECT 1 FROM vehicles.trim_engine WHERE trim_id = %s AND engine_id = %s",
|
||||
(trim_id, engine_id)
|
||||
)
|
||||
|
||||
if not cursor.fetchone():
|
||||
try:
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicles.trim_engine (trim_id, engine_id) VALUES (%s, %s)",
|
||||
(trim_id, engine_id)
|
||||
)
|
||||
stats.trim_engine_mappings_inserted += 1
|
||||
except Exception as e:
|
||||
if "duplicate key value violates unique constraint" in str(e):
|
||||
# Another process may have inserted it, skip
|
||||
logger.debug(f"Trim-engine mapping ({trim_id}, {engine_id}) already exists, skipping")
|
||||
else:
|
||||
raise
|
||||
|
||||
return trim_id
|
||||
|
||||
def load_all_makes(self, makes_data: List[MakeData], mode: LoadMode) -> LoadResult:
|
||||
"""
|
||||
Load all makes with complete data
|
||||
|
||||
Args:
|
||||
makes_data: List of extracted make data
|
||||
mode: Loading mode (clear/append)
|
||||
|
||||
Returns:
|
||||
LoadResult with comprehensive statistics
|
||||
"""
|
||||
logger.info(f"Starting bulk load of {len(makes_data)} makes in {mode.value} mode")
|
||||
|
||||
# Clear tables if in CLEAR mode
|
||||
if mode == LoadMode.CLEAR:
|
||||
self.clear_all_tables()
|
||||
|
||||
stats = LoadStatistics()
|
||||
failed_makes = []
|
||||
|
||||
for make_data in makes_data:
|
||||
try:
|
||||
if make_data.processing_errors:
|
||||
logger.warning(f"Skipping make {make_data.name} due to extraction errors")
|
||||
stats.makes_skipped += 1
|
||||
failed_makes.append(make_data.name)
|
||||
continue
|
||||
|
||||
make_id = self.load_make(make_data, mode, stats)
|
||||
logger.info(f"Successfully loaded make {make_data.name} (ID: {make_id})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load make {make_data.name}: {str(e)}")
|
||||
failed_makes.append(make_data.name)
|
||||
continue
|
||||
|
||||
# Create result
|
||||
result = LoadResult(
|
||||
total_makes=len(makes_data),
|
||||
total_models=stats.models_inserted,
|
||||
total_model_years=stats.model_years_inserted,
|
||||
total_trims=stats.trims_inserted,
|
||||
total_engines=stats.engines_inserted,
|
||||
total_trim_engine_mappings=stats.trim_engine_mappings_inserted,
|
||||
failed_makes=failed_makes,
|
||||
warnings=stats.warnings,
|
||||
load_mode=mode
|
||||
)
|
||||
|
||||
logger.info(f"Bulk load complete: {result.success_count}/{result.total_makes} makes loaded successfully")
|
||||
logger.info(f"Data loaded: {result.total_models} models, {result.total_engines} engines, {result.total_trims} trims")
|
||||
|
||||
return result
|
||||
|
||||
def get_database_statistics(self) -> Dict[str, int]:
|
||||
"""
|
||||
Get current database record counts
|
||||
|
||||
Returns:
|
||||
Dictionary with table counts
|
||||
"""
|
||||
stats = {}
|
||||
|
||||
tables = ['make', 'model', 'model_year', 'trim', 'engine', 'trim_engine']
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
for table in tables:
|
||||
cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table}")
|
||||
result = cursor.fetchone()
|
||||
stats[table] = result[0] if isinstance(result, tuple) else result['count']
|
||||
|
||||
return stats
|
||||
|
||||
def validate_referential_integrity(self) -> List[str]:
|
||||
"""
|
||||
Validate referential integrity of loaded data
|
||||
|
||||
Returns:
|
||||
List of integrity issues found (empty if all good)
|
||||
"""
|
||||
issues = []
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check for orphaned models
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM vehicles.model m
|
||||
LEFT JOIN vehicles.make mk ON m.make_id = mk.id
|
||||
WHERE mk.id IS NULL
|
||||
""")
|
||||
orphaned_models = self._get_id_from_result(cursor.fetchone(), 'count')
|
||||
if orphaned_models > 0:
|
||||
issues.append(f"Found {orphaned_models} orphaned models")
|
||||
|
||||
# Check for orphaned model_years
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM vehicles.model_year my
|
||||
LEFT JOIN vehicles.model m ON my.model_id = m.id
|
||||
WHERE m.id IS NULL
|
||||
""")
|
||||
orphaned_model_years = self._get_id_from_result(cursor.fetchone())
|
||||
if orphaned_model_years > 0:
|
||||
issues.append(f"Found {orphaned_model_years} orphaned model_years")
|
||||
|
||||
# Check for orphaned trims
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM vehicles.trim t
|
||||
LEFT JOIN vehicles.model_year my ON t.model_year_id = my.id
|
||||
WHERE my.id IS NULL
|
||||
""")
|
||||
orphaned_trims = self._get_id_from_result(cursor.fetchone())
|
||||
if orphaned_trims > 0:
|
||||
issues.append(f"Found {orphaned_trims} orphaned trims")
|
||||
|
||||
# Check for broken trim_engine mappings
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM vehicles.trim_engine te
|
||||
LEFT JOIN vehicles.trim t ON te.trim_id = t.id
|
||||
LEFT JOIN vehicles.engine e ON te.engine_id = e.id
|
||||
WHERE t.id IS NULL OR e.id IS NULL
|
||||
""")
|
||||
broken_mappings = self._get_id_from_result(cursor.fetchone())
|
||||
if broken_mappings > 0:
|
||||
issues.append(f"Found {broken_mappings} broken trim_engine mappings")
|
||||
|
||||
if issues:
|
||||
logger.warning(f"Referential integrity issues found: {issues}")
|
||||
else:
|
||||
logger.info("Referential integrity validation passed")
|
||||
|
||||
return issues
|
||||
|
||||
def print_load_report(self, result: LoadResult) -> None:
|
||||
"""
|
||||
Print comprehensive loading report
|
||||
|
||||
Args:
|
||||
result: LoadResult from load operation
|
||||
"""
|
||||
print(f"🚀 JSON MANUAL LOADING REPORT")
|
||||
print(f"=" * 50)
|
||||
|
||||
# Load summary
|
||||
print(f"\n📊 LOADING SUMMARY")
|
||||
print(f" Mode: {result.load_mode.value.upper()}")
|
||||
print(f" Makes processed: {result.success_count}/{result.total_makes}")
|
||||
print(f" Success rate: {result.success_rate:.1%}")
|
||||
|
||||
# Data counts
|
||||
print(f"\n📈 DATA LOADED")
|
||||
print(f" Models: {result.total_models}")
|
||||
print(f" Model years: {result.total_model_years}")
|
||||
print(f" Trims: {result.total_trims}")
|
||||
print(f" Engines: {result.total_engines}")
|
||||
print(f" Trim-engine mappings: {result.total_trim_engine_mappings}")
|
||||
|
||||
# Issues
|
||||
if result.failed_makes:
|
||||
print(f"\n⚠️ FAILED MAKES ({len(result.failed_makes)}):")
|
||||
for make in result.failed_makes:
|
||||
print(f" {make}")
|
||||
|
||||
if result.warnings:
|
||||
print(f"\n⚠️ WARNINGS ({len(result.warnings)}):")
|
||||
for warning in result.warnings[:5]: # Show first 5
|
||||
print(f" {warning}")
|
||||
if len(result.warnings) > 5:
|
||||
print(f" ... and {len(result.warnings) - 5} more warnings")
|
||||
|
||||
# Database statistics
|
||||
print(f"\n📋 DATABASE STATISTICS:")
|
||||
db_stats = self.get_database_statistics()
|
||||
for table, count in db_stats.items():
|
||||
print(f" vehicles.{table}: {count:,} records")
|
||||
|
||||
# Referential integrity
|
||||
integrity_issues = self.validate_referential_integrity()
|
||||
if integrity_issues:
|
||||
print(f"\n❌ REFERENTIAL INTEGRITY ISSUES:")
|
||||
for issue in integrity_issues:
|
||||
print(f" {issue}")
|
||||
else:
|
||||
print(f"\n✅ REFERENTIAL INTEGRITY: PASSED")
|
||||
|
||||
|
||||
# Example usage and testing functions
|
||||
def example_usage():
|
||||
"""Demonstrate JsonManualLoader usage"""
|
||||
print("🚀 JsonManualLoader Example Usage")
|
||||
print("=" * 40)
|
||||
|
||||
# This would typically be called after JsonExtractor
|
||||
# For demo purposes, we'll just show the structure
|
||||
|
||||
print("\n📋 Typical usage flow:")
|
||||
print("1. Extract data with JsonExtractor")
|
||||
print("2. Create JsonManualLoader")
|
||||
print("3. Load data in APPEND or CLEAR mode")
|
||||
print("4. Validate and report results")
|
||||
|
||||
print(f"\n💡 Example code:")
|
||||
print("""
|
||||
# Extract data
|
||||
extractor = JsonExtractor(make_mapper, engine_parser)
|
||||
extraction_result = extractor.extract_all_makes('sources/makes')
|
||||
|
||||
# Load data
|
||||
loader = JsonManualLoader()
|
||||
load_result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
|
||||
|
||||
# Report results
|
||||
loader.print_load_report(load_result)
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
example_usage()
|
||||
Reference in New Issue
Block a user