716 lines
27 KiB
Python
716 lines
27 KiB
Python
"""
|
|
JSON Manual Loader for Vehicles ETL
|
|
|
|
Loads extracted JSON data into PostgreSQL database with referential integrity.
|
|
Supports clear/append modes with duplicate handling and comprehensive progress tracking.
|
|
|
|
Database Schema:
|
|
- vehicles.make (id, name)
|
|
- vehicles.model (id, make_id, name)
|
|
- vehicles.model_year (id, model_id, year)
|
|
- vehicles.trim (id, model_year_id, name)
|
|
- vehicles.engine (id, name, code, displacement_l, cylinders, fuel_type, aspiration)
|
|
- vehicles.trim_engine (trim_id, engine_id)
|
|
|
|
Load Modes:
|
|
- CLEAR: Truncate all tables and reload (destructive)
|
|
- APPEND: Insert with conflict resolution (safe)
|
|
|
|
Usage:
|
|
loader = JsonManualLoader(postgres_loader)
|
|
result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
|
|
"""
|
|
|
|
import logging
|
|
from typing import List, Dict, Optional, Tuple
|
|
from enum import Enum
|
|
from dataclasses import dataclass
|
|
from psycopg2.extras import execute_batch
|
|
|
|
# Import our components (handle both relative and direct imports)
|
|
try:
|
|
from .postgres_loader import PostgreSQLLoader
|
|
from ..extractors.json_extractor import MakeData, ModelData, ExtractionResult
|
|
from ..utils.engine_spec_parser import EngineSpec
|
|
from ..connections import db_connections
|
|
except ImportError:
|
|
# Fallback for direct execution
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
|
|
|
# Import with fallback handling for nested imports
|
|
try:
|
|
from loaders.postgres_loader import PostgreSQLLoader
|
|
except ImportError:
|
|
# Mock PostgreSQLLoader for testing
|
|
class PostgreSQLLoader:
|
|
def __init__(self):
|
|
self.batch_size = 1000
|
|
|
|
from extractors.json_extractor import MakeData, ModelData, ExtractionResult
|
|
from utils.engine_spec_parser import EngineSpec
|
|
|
|
try:
|
|
from connections import db_connections
|
|
except ImportError:
|
|
# Mock db_connections for testing
|
|
class MockDBConnections:
|
|
def postgres_connection(self):
|
|
raise NotImplementedError("Database connection not available in test mode")
|
|
db_connections = MockDBConnections()
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class LoadMode(Enum):
|
|
"""Data loading modes"""
|
|
CLEAR = "clear" # Truncate and reload (destructive)
|
|
APPEND = "append" # Insert with conflict handling (safe)
|
|
|
|
|
|
@dataclass
|
|
class LoadResult:
|
|
"""Result of loading operations"""
|
|
total_makes: int
|
|
total_models: int
|
|
total_model_years: int
|
|
total_trims: int
|
|
total_engines: int
|
|
total_trim_engine_mappings: int
|
|
failed_makes: List[str]
|
|
warnings: List[str]
|
|
load_mode: LoadMode
|
|
|
|
@property
|
|
def success_count(self) -> int:
|
|
return self.total_makes - len(self.failed_makes)
|
|
|
|
@property
|
|
def success_rate(self) -> float:
|
|
return self.success_count / self.total_makes if self.total_makes > 0 else 0.0
|
|
|
|
|
|
@dataclass
|
|
class LoadStatistics:
|
|
"""Detailed loading statistics"""
|
|
makes_processed: int = 0
|
|
makes_skipped: int = 0
|
|
models_inserted: int = 0
|
|
model_years_inserted: int = 0
|
|
skipped_model_years: int = 0
|
|
trims_inserted: int = 0
|
|
engines_inserted: int = 0
|
|
trim_engine_mappings_inserted: int = 0
|
|
duplicate_makes: int = 0
|
|
duplicate_models: int = 0
|
|
duplicate_engines: int = 0
|
|
errors: List[str] = None
|
|
warnings: List[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.errors is None:
|
|
self.errors = []
|
|
if self.warnings is None:
|
|
self.warnings = []
|
|
|
|
|
|
class JsonManualLoader:
|
|
"""Load JSON-extracted vehicle data into PostgreSQL"""
|
|
|
|
def _get_id_from_result(self, result, column_name='id'):
|
|
"""Helper to extract ID from query result, handling both tuple and dict cursors"""
|
|
if result is None:
|
|
return None
|
|
if isinstance(result, tuple):
|
|
return result[0]
|
|
# For RealDictCursor, try the column name first, fall back to key access
|
|
if column_name in result:
|
|
return result[column_name]
|
|
# For COUNT(*) queries, the key might be 'count'
|
|
if 'count' in result:
|
|
return result['count']
|
|
# Fall back to first value
|
|
return list(result.values())[0] if result else None
|
|
|
|
def __init__(self, postgres_loader: Optional[PostgreSQLLoader] = None):
|
|
"""
|
|
Initialize JSON manual loader
|
|
|
|
Args:
|
|
postgres_loader: Existing PostgreSQL loader instance
|
|
"""
|
|
self.postgres_loader = postgres_loader or PostgreSQLLoader()
|
|
self.batch_size = 1000
|
|
|
|
logger.info("JsonManualLoader initialized")
|
|
|
|
def clear_all_tables(self) -> None:
|
|
"""
|
|
Clear all vehicles tables in dependency order
|
|
|
|
WARNING: This is destructive and will remove all data
|
|
"""
|
|
logger.warning("CLEARING ALL VEHICLES TABLES - This is destructive!")
|
|
|
|
tables_to_clear = [
|
|
'trim_engine', # Many-to-many mappings first
|
|
'trim_transmission',
|
|
'performance', # Tables with foreign keys
|
|
'trim',
|
|
'model_year',
|
|
'model',
|
|
'make',
|
|
'engine', # Independent tables last
|
|
'transmission'
|
|
]
|
|
|
|
with db_connections.postgres_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
for table in tables_to_clear:
|
|
try:
|
|
cursor.execute(f"TRUNCATE TABLE vehicles.{table} CASCADE")
|
|
logger.info(f"Cleared vehicles.{table}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to clear vehicles.{table}: {str(e)}")
|
|
|
|
conn.commit()
|
|
|
|
logger.info("All vehicles tables cleared")
|
|
|
|
def load_make(self, make_data: MakeData, mode: LoadMode, stats: LoadStatistics) -> int:
|
|
"""
|
|
Load a single make with all related data
|
|
|
|
Args:
|
|
make_data: Extracted make data
|
|
mode: Loading mode (clear/append)
|
|
stats: Statistics accumulator
|
|
|
|
Returns:
|
|
Make ID in database
|
|
"""
|
|
logger.debug(f"Loading make: {make_data.name}")
|
|
|
|
try:
|
|
with db_connections.postgres_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# 1. Insert or get make (always check for existing to avoid constraint violations)
|
|
# Check if make exists (case-insensitive to match database constraint)
|
|
cursor.execute(
|
|
"SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
|
|
(make_data.name,)
|
|
)
|
|
result = cursor.fetchone()
|
|
|
|
if result:
|
|
make_id = self._get_id_from_result(result)
|
|
stats.duplicate_makes += 1
|
|
logger.debug(f"Make {make_data.name} already exists with ID {make_id}")
|
|
else:
|
|
# Insert new make with error handling for constraint violations
|
|
try:
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.make (name) VALUES (%s) RETURNING id",
|
|
(make_data.name,)
|
|
)
|
|
result = cursor.fetchone()
|
|
make_id = self._get_id_from_result(result)
|
|
logger.debug(f"Inserted make {make_data.name} with ID {make_id}")
|
|
except Exception as e:
|
|
if "duplicate key value violates unique constraint" in str(e):
|
|
# Retry the lookup in case of race condition
|
|
cursor.execute(
|
|
"SELECT id FROM vehicles.make WHERE lower(name) = lower(%s)",
|
|
(make_data.name,)
|
|
)
|
|
result = cursor.fetchone()
|
|
if result:
|
|
make_id = self._get_id_from_result(result)
|
|
stats.duplicate_makes += 1
|
|
logger.debug(f"Make {make_data.name} found after retry with ID {make_id}")
|
|
else:
|
|
raise
|
|
else:
|
|
raise
|
|
|
|
# 2. Process models
|
|
for model_data in make_data.models:
|
|
model_id = self.load_model(cursor, make_id, model_data, mode, stats)
|
|
|
|
conn.commit()
|
|
stats.makes_processed += 1
|
|
|
|
return make_id
|
|
|
|
except Exception as e:
|
|
error_msg = f"Failed to load make {make_data.name}: {str(e)}"
|
|
logger.error(error_msg)
|
|
stats.errors.append(error_msg)
|
|
raise
|
|
|
|
def load_model(self, cursor, make_id: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
|
|
"""
|
|
Load a single model with all related data
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
make_id: Parent make ID
|
|
model_data: Extracted model data
|
|
mode: Loading mode
|
|
stats: Statistics accumulator
|
|
|
|
Returns:
|
|
Model ID in database
|
|
"""
|
|
# 1. Insert or get model
|
|
if mode == LoadMode.APPEND:
|
|
cursor.execute(
|
|
"SELECT id FROM vehicles.model WHERE make_id = %s AND name = %s",
|
|
(make_id, model_data.name)
|
|
)
|
|
result = cursor.fetchone()
|
|
|
|
if result:
|
|
model_id = result[0] if isinstance(result, tuple) else result['id']
|
|
stats.duplicate_models += 1
|
|
else:
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
|
|
(make_id, model_data.name)
|
|
)
|
|
model_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.models_inserted += 1
|
|
else:
|
|
# CLEAR mode - just insert
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.model (make_id, name) VALUES (%s, %s) RETURNING id",
|
|
(make_id, model_data.name)
|
|
)
|
|
model_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.models_inserted += 1
|
|
|
|
# 2. Insert model years and related data
|
|
for year in model_data.years:
|
|
model_year_id = self.load_model_year(cursor, model_id, year, model_data, mode, stats)
|
|
# Skip processing if year was outside valid range
|
|
if model_year_id is None:
|
|
continue
|
|
|
|
return model_id
|
|
|
|
def load_model_year(self, cursor, model_id: int, year: int, model_data: ModelData, mode: LoadMode, stats: LoadStatistics) -> int:
|
|
"""
|
|
Load model year and associated trims/engines
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
model_id: Parent model ID
|
|
year: Model year
|
|
model_data: Model data with trims and engines
|
|
mode: Loading mode
|
|
stats: Statistics accumulator
|
|
|
|
Returns:
|
|
Model year ID in database
|
|
"""
|
|
# Skip years that don't meet database constraints (must be 1950-2100)
|
|
if year < 1950 or year > 2100:
|
|
logger.warning(f"Skipping year {year} - outside valid range (1950-2100)")
|
|
stats.skipped_model_years += 1
|
|
return None
|
|
|
|
# 1. Insert or get model year
|
|
if mode == LoadMode.APPEND:
|
|
cursor.execute(
|
|
"SELECT id FROM vehicles.model_year WHERE model_id = %s AND year = %s",
|
|
(model_id, year)
|
|
)
|
|
result = cursor.fetchone()
|
|
|
|
if result:
|
|
model_year_id = result[0] if isinstance(result, tuple) else result['id']
|
|
else:
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
|
|
(model_id, year)
|
|
)
|
|
model_year_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.model_years_inserted += 1
|
|
else:
|
|
# CLEAR mode - just insert
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) RETURNING id",
|
|
(model_id, year)
|
|
)
|
|
model_year_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.model_years_inserted += 1
|
|
|
|
# 2. Load engines and get their IDs
|
|
engine_ids = []
|
|
for engine_spec in model_data.engines:
|
|
engine_id = self.load_engine(cursor, engine_spec, mode, stats)
|
|
engine_ids.append(engine_id)
|
|
|
|
# 3. Load trims and connect to engines
|
|
for trim_name in model_data.trims:
|
|
trim_id = self.load_trim(cursor, model_year_id, trim_name, engine_ids, mode, stats)
|
|
|
|
return model_year_id
|
|
|
|
def load_engine(self, cursor, engine_spec: EngineSpec, mode: LoadMode, stats: LoadStatistics) -> int:
|
|
"""
|
|
Load engine specification
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
engine_spec: Parsed engine specification
|
|
mode: Loading mode
|
|
stats: Statistics accumulator
|
|
|
|
Returns:
|
|
Engine ID in database
|
|
"""
|
|
# Create a canonical engine name for database storage
|
|
if engine_spec.displacement_l and engine_spec.configuration != "Unknown" and engine_spec.cylinders:
|
|
engine_name = f"{engine_spec.displacement_l}L {engine_spec.configuration}{engine_spec.cylinders}"
|
|
else:
|
|
engine_name = engine_spec.raw_string
|
|
|
|
# Generate engine code from name (remove spaces, lowercase)
|
|
engine_code = engine_name.replace(" ", "").lower()
|
|
|
|
# Always check for existing engine by name or code to avoid constraint violations
|
|
cursor.execute("""
|
|
SELECT id FROM vehicles.engine
|
|
WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
|
|
""", (engine_name, engine_code))
|
|
result = cursor.fetchone()
|
|
|
|
if result:
|
|
engine_id = self._get_id_from_result(result)
|
|
stats.duplicate_engines += 1
|
|
return engine_id
|
|
|
|
# Insert new engine
|
|
try:
|
|
cursor.execute("""
|
|
INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)
|
|
VALUES (%s, %s, %s, %s, %s, %s)
|
|
RETURNING id
|
|
""", (
|
|
engine_name,
|
|
engine_code,
|
|
engine_spec.displacement_l,
|
|
engine_spec.cylinders,
|
|
engine_spec.fuel_type if engine_spec.fuel_type != "Unknown" else None,
|
|
engine_spec.aspiration if engine_spec.aspiration != "Natural" else None
|
|
))
|
|
|
|
engine_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.engines_inserted += 1
|
|
|
|
return engine_id
|
|
except Exception as e:
|
|
if "duplicate key value violates unique constraint" in str(e):
|
|
# Retry the lookup in case of race condition
|
|
cursor.execute("""
|
|
SELECT id FROM vehicles.engine
|
|
WHERE lower(name) = lower(%s) OR (code IS NOT NULL AND code = %s)
|
|
""", (engine_name, engine_code))
|
|
result = cursor.fetchone()
|
|
if result:
|
|
engine_id = self._get_id_from_result(result)
|
|
stats.duplicate_engines += 1
|
|
return engine_id
|
|
raise
|
|
|
|
def load_trim(self, cursor, model_year_id: int, trim_name: str, engine_ids: List[int], mode: LoadMode, stats: LoadStatistics) -> int:
|
|
"""
|
|
Load trim and connect to engines
|
|
|
|
Args:
|
|
cursor: Database cursor
|
|
model_year_id: Parent model year ID
|
|
trim_name: Trim name
|
|
engine_ids: List of engine IDs to connect
|
|
mode: Loading mode
|
|
stats: Statistics accumulator
|
|
|
|
Returns:
|
|
Trim ID in database
|
|
"""
|
|
# 1. Insert or get trim
|
|
if mode == LoadMode.APPEND:
|
|
cursor.execute(
|
|
"SELECT id FROM vehicles.trim WHERE model_year_id = %s AND name = %s",
|
|
(model_year_id, trim_name)
|
|
)
|
|
result = cursor.fetchone()
|
|
|
|
if result:
|
|
trim_id = result[0] if isinstance(result, tuple) else result['id']
|
|
else:
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
|
|
(model_year_id, trim_name)
|
|
)
|
|
trim_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.trims_inserted += 1
|
|
else:
|
|
# CLEAR mode - just insert
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) RETURNING id",
|
|
(model_year_id, trim_name)
|
|
)
|
|
trim_id = self._get_id_from_result(cursor.fetchone())
|
|
stats.trims_inserted += 1
|
|
|
|
# 2. Connect trim to engines (always check for existing to avoid duplicates)
|
|
# Deduplicate engine_ids to prevent duplicate mappings within the same trim
|
|
unique_engine_ids = list(set(engine_ids))
|
|
for engine_id in unique_engine_ids:
|
|
# Check if mapping already exists
|
|
cursor.execute(
|
|
"SELECT 1 FROM vehicles.trim_engine WHERE trim_id = %s AND engine_id = %s",
|
|
(trim_id, engine_id)
|
|
)
|
|
|
|
if not cursor.fetchone():
|
|
try:
|
|
cursor.execute(
|
|
"INSERT INTO vehicles.trim_engine (trim_id, engine_id) VALUES (%s, %s)",
|
|
(trim_id, engine_id)
|
|
)
|
|
stats.trim_engine_mappings_inserted += 1
|
|
except Exception as e:
|
|
if "duplicate key value violates unique constraint" in str(e):
|
|
# Another process may have inserted it, skip
|
|
logger.debug(f"Trim-engine mapping ({trim_id}, {engine_id}) already exists, skipping")
|
|
else:
|
|
raise
|
|
|
|
return trim_id
|
|
|
|
def load_all_makes(self, makes_data: List[MakeData], mode: LoadMode) -> LoadResult:
|
|
"""
|
|
Load all makes with complete data
|
|
|
|
Args:
|
|
makes_data: List of extracted make data
|
|
mode: Loading mode (clear/append)
|
|
|
|
Returns:
|
|
LoadResult with comprehensive statistics
|
|
"""
|
|
logger.info(f"Starting bulk load of {len(makes_data)} makes in {mode.value} mode")
|
|
|
|
# Clear tables if in CLEAR mode
|
|
if mode == LoadMode.CLEAR:
|
|
self.clear_all_tables()
|
|
|
|
stats = LoadStatistics()
|
|
failed_makes = []
|
|
|
|
for make_data in makes_data:
|
|
try:
|
|
if make_data.processing_errors:
|
|
logger.warning(f"Skipping make {make_data.name} due to extraction errors")
|
|
stats.makes_skipped += 1
|
|
failed_makes.append(make_data.name)
|
|
continue
|
|
|
|
make_id = self.load_make(make_data, mode, stats)
|
|
logger.info(f"Successfully loaded make {make_data.name} (ID: {make_id})")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load make {make_data.name}: {str(e)}")
|
|
failed_makes.append(make_data.name)
|
|
continue
|
|
|
|
# Create result
|
|
result = LoadResult(
|
|
total_makes=len(makes_data),
|
|
total_models=stats.models_inserted,
|
|
total_model_years=stats.model_years_inserted,
|
|
total_trims=stats.trims_inserted,
|
|
total_engines=stats.engines_inserted,
|
|
total_trim_engine_mappings=stats.trim_engine_mappings_inserted,
|
|
failed_makes=failed_makes,
|
|
warnings=stats.warnings,
|
|
load_mode=mode
|
|
)
|
|
|
|
logger.info(f"Bulk load complete: {result.success_count}/{result.total_makes} makes loaded successfully")
|
|
logger.info(f"Data loaded: {result.total_models} models, {result.total_engines} engines, {result.total_trims} trims")
|
|
|
|
return result
|
|
|
|
def get_database_statistics(self) -> Dict[str, int]:
|
|
"""
|
|
Get current database record counts
|
|
|
|
Returns:
|
|
Dictionary with table counts
|
|
"""
|
|
stats = {}
|
|
|
|
tables = ['make', 'model', 'model_year', 'trim', 'engine', 'trim_engine']
|
|
|
|
with db_connections.postgres_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
for table in tables:
|
|
cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table}")
|
|
result = cursor.fetchone()
|
|
stats[table] = result[0] if isinstance(result, tuple) else result['count']
|
|
|
|
return stats
|
|
|
|
def validate_referential_integrity(self) -> List[str]:
|
|
"""
|
|
Validate referential integrity of loaded data
|
|
|
|
Returns:
|
|
List of integrity issues found (empty if all good)
|
|
"""
|
|
issues = []
|
|
|
|
with db_connections.postgres_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Check for orphaned models
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM vehicles.model m
|
|
LEFT JOIN vehicles.make mk ON m.make_id = mk.id
|
|
WHERE mk.id IS NULL
|
|
""")
|
|
orphaned_models = self._get_id_from_result(cursor.fetchone(), 'count')
|
|
if orphaned_models > 0:
|
|
issues.append(f"Found {orphaned_models} orphaned models")
|
|
|
|
# Check for orphaned model_years
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM vehicles.model_year my
|
|
LEFT JOIN vehicles.model m ON my.model_id = m.id
|
|
WHERE m.id IS NULL
|
|
""")
|
|
orphaned_model_years = self._get_id_from_result(cursor.fetchone())
|
|
if orphaned_model_years > 0:
|
|
issues.append(f"Found {orphaned_model_years} orphaned model_years")
|
|
|
|
# Check for orphaned trims
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM vehicles.trim t
|
|
LEFT JOIN vehicles.model_year my ON t.model_year_id = my.id
|
|
WHERE my.id IS NULL
|
|
""")
|
|
orphaned_trims = self._get_id_from_result(cursor.fetchone())
|
|
if orphaned_trims > 0:
|
|
issues.append(f"Found {orphaned_trims} orphaned trims")
|
|
|
|
# Check for broken trim_engine mappings
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM vehicles.trim_engine te
|
|
LEFT JOIN vehicles.trim t ON te.trim_id = t.id
|
|
LEFT JOIN vehicles.engine e ON te.engine_id = e.id
|
|
WHERE t.id IS NULL OR e.id IS NULL
|
|
""")
|
|
broken_mappings = self._get_id_from_result(cursor.fetchone())
|
|
if broken_mappings > 0:
|
|
issues.append(f"Found {broken_mappings} broken trim_engine mappings")
|
|
|
|
if issues:
|
|
logger.warning(f"Referential integrity issues found: {issues}")
|
|
else:
|
|
logger.info("Referential integrity validation passed")
|
|
|
|
return issues
|
|
|
|
def print_load_report(self, result: LoadResult) -> None:
|
|
"""
|
|
Print comprehensive loading report
|
|
|
|
Args:
|
|
result: LoadResult from load operation
|
|
"""
|
|
print(f"🚀 JSON MANUAL LOADING REPORT")
|
|
print(f"=" * 50)
|
|
|
|
# Load summary
|
|
print(f"\n📊 LOADING SUMMARY")
|
|
print(f" Mode: {result.load_mode.value.upper()}")
|
|
print(f" Makes processed: {result.success_count}/{result.total_makes}")
|
|
print(f" Success rate: {result.success_rate:.1%}")
|
|
|
|
# Data counts
|
|
print(f"\n📈 DATA LOADED")
|
|
print(f" Models: {result.total_models}")
|
|
print(f" Model years: {result.total_model_years}")
|
|
print(f" Trims: {result.total_trims}")
|
|
print(f" Engines: {result.total_engines}")
|
|
print(f" Trim-engine mappings: {result.total_trim_engine_mappings}")
|
|
|
|
# Issues
|
|
if result.failed_makes:
|
|
print(f"\n⚠️ FAILED MAKES ({len(result.failed_makes)}):")
|
|
for make in result.failed_makes:
|
|
print(f" {make}")
|
|
|
|
if result.warnings:
|
|
print(f"\n⚠️ WARNINGS ({len(result.warnings)}):")
|
|
for warning in result.warnings[:5]: # Show first 5
|
|
print(f" {warning}")
|
|
if len(result.warnings) > 5:
|
|
print(f" ... and {len(result.warnings) - 5} more warnings")
|
|
|
|
# Database statistics
|
|
print(f"\n📋 DATABASE STATISTICS:")
|
|
db_stats = self.get_database_statistics()
|
|
for table, count in db_stats.items():
|
|
print(f" vehicles.{table}: {count:,} records")
|
|
|
|
# Referential integrity
|
|
integrity_issues = self.validate_referential_integrity()
|
|
if integrity_issues:
|
|
print(f"\n❌ REFERENTIAL INTEGRITY ISSUES:")
|
|
for issue in integrity_issues:
|
|
print(f" {issue}")
|
|
else:
|
|
print(f"\n✅ REFERENTIAL INTEGRITY: PASSED")
|
|
|
|
|
|
# Example usage and testing functions
|
|
def example_usage():
|
|
"""Demonstrate JsonManualLoader usage"""
|
|
print("🚀 JsonManualLoader Example Usage")
|
|
print("=" * 40)
|
|
|
|
# This would typically be called after JsonExtractor
|
|
# For demo purposes, we'll just show the structure
|
|
|
|
print("\n📋 Typical usage flow:")
|
|
print("1. Extract data with JsonExtractor")
|
|
print("2. Create JsonManualLoader")
|
|
print("3. Load data in APPEND or CLEAR mode")
|
|
print("4. Validate and report results")
|
|
|
|
print(f"\n💡 Example code:")
|
|
print("""
|
|
# Extract data
|
|
extractor = JsonExtractor(make_mapper, engine_parser)
|
|
extraction_result = extractor.extract_all_makes('sources/makes')
|
|
|
|
# Load data
|
|
loader = JsonManualLoader()
|
|
load_result = loader.load_all_makes(extraction_result.makes, LoadMode.APPEND)
|
|
|
|
# Report results
|
|
loader.print_load_report(load_result)
|
|
""")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
example_usage() |