Initial Commit
This commit is contained in:
354
mvp-platform-services/vehicles/etl/loaders/postgres_loader.py
Executable file
354
mvp-platform-services/vehicles/etl/loaders/postgres_loader.py
Executable file
@@ -0,0 +1,354 @@
|
||||
import logging
|
||||
from typing import List, Dict, Optional
|
||||
from psycopg2.extras import execute_batch
|
||||
from ..connections import db_connections
|
||||
from tqdm import tqdm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PostgreSQLLoader:
|
||||
"""Load data into PostgreSQL target database"""
|
||||
|
||||
def __init__(self):
|
||||
self.batch_size = 1000
|
||||
|
||||
def load_reference_table(self, table_name: str, data: List[Dict],
|
||||
clear_existing: bool = True) -> int:
|
||||
"""Load data into a reference table"""
|
||||
if not data:
|
||||
logger.warning(f"No data to load for table {table_name}")
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(data)} records into vehicles.{table_name}")
|
||||
|
||||
# Column mapping from source (MS SQL) to target (PostgreSQL)
|
||||
column_mappings = {
|
||||
'Id': 'id',
|
||||
'Name': 'name',
|
||||
'Code': 'code',
|
||||
'MakeId': 'make_id',
|
||||
'CreateOn': 'created_at',
|
||||
'CreatedOn': 'created_at',
|
||||
'UpdateOn': 'updated_at',
|
||||
'UpdatedOn': 'updated_at',
|
||||
'Wmi': 'wmi',
|
||||
'ManufacturerId': 'manufacturer_id',
|
||||
'MakeId': 'make_id',
|
||||
'VehicleTypeId': 'vehicle_type_id',
|
||||
'TruckTypeId': 'truck_type_id',
|
||||
'CountryId': 'country_id',
|
||||
'PublicAvailabilityDate': 'public_availability_date',
|
||||
'NonCompliant': 'non_compliant',
|
||||
'NonCompliantReason': 'non_compliant_reason',
|
||||
'ProcessedOn': 'processed_on',
|
||||
'DisplayOrder': 'display_order',
|
||||
'FormType': 'form_type',
|
||||
'Description': 'description',
|
||||
'LookupTable': 'lookup_table',
|
||||
'IsPrivate': 'is_private',
|
||||
'GroupName': 'group_name',
|
||||
'DataType': 'data_type',
|
||||
'MinAllowedValue': 'min_allowed_value',
|
||||
'MaxAllowedValue': 'max_allowed_value',
|
||||
'IsQS': 'is_qs',
|
||||
'Decode': 'decode',
|
||||
'weight': 'weight',
|
||||
# ErrorCode specific mappings
|
||||
'ErrorCodeName': 'code',
|
||||
'ErrorCodeDescription': 'description'
|
||||
}
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
if clear_existing:
|
||||
cursor.execute(f"TRUNCATE TABLE vehicles.{table_name} CASCADE")
|
||||
logger.info(f"Cleared existing data from vehicles.{table_name}")
|
||||
|
||||
# Get source columns and map them to target columns
|
||||
source_columns = list(data[0].keys())
|
||||
target_columns = []
|
||||
valid_data = []
|
||||
|
||||
# Map columns and filter data
|
||||
for source_col in source_columns:
|
||||
if source_col in column_mappings:
|
||||
target_columns.append(column_mappings[source_col])
|
||||
else:
|
||||
target_columns.append(source_col.lower())
|
||||
|
||||
# Check which columns exist in target table
|
||||
cursor.execute(f"""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'vehicles' AND table_name = '{table_name}'
|
||||
""")
|
||||
results = cursor.fetchall()
|
||||
existing_columns = {row['column_name'] if isinstance(row, dict) else row[0] for row in results}
|
||||
|
||||
# Filter to only existing columns
|
||||
final_columns = []
|
||||
final_indices = []
|
||||
for i, col in enumerate(target_columns):
|
||||
if col in existing_columns:
|
||||
final_columns.append(col)
|
||||
final_indices.append(i)
|
||||
|
||||
if not final_columns:
|
||||
logger.warning(f"No matching columns found for table {table_name}")
|
||||
return 0
|
||||
|
||||
column_str = ','.join(final_columns)
|
||||
placeholders = ','.join(['%s'] * len(final_columns))
|
||||
|
||||
# Prepare insert query
|
||||
query = f"""
|
||||
INSERT INTO vehicles.{table_name} ({column_str})
|
||||
VALUES ({placeholders})
|
||||
ON CONFLICT DO NOTHING
|
||||
"""
|
||||
|
||||
# Prepare data tuples with only valid columns
|
||||
data_tuples = []
|
||||
for record in data:
|
||||
values = []
|
||||
skip_record = False
|
||||
|
||||
for i in final_indices:
|
||||
source_col = source_columns[i]
|
||||
value = record[source_col]
|
||||
|
||||
# Handle special cases for error_codes table
|
||||
if table_name == 'error_codes' and source_col in ['ErrorCodeName', 'Code'] and (value is None or value == ''):
|
||||
skip_record = True
|
||||
break
|
||||
|
||||
values.append(value)
|
||||
|
||||
if not skip_record:
|
||||
data_tuples.append(tuple(values))
|
||||
|
||||
# Execute batch insert
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
# Get final count
|
||||
cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table_name}")
|
||||
result = cursor.fetchone()
|
||||
final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
|
||||
|
||||
logger.info(f"Successfully loaded {final_count} records into vehicles.{table_name}")
|
||||
return final_count
|
||||
|
||||
def load_wmi_vin_schema_mappings(self, mappings: List[Dict]) -> int:
|
||||
"""Load WMI to VIN Schema mappings"""
|
||||
if not mappings:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(mappings)} WMI-VinSchema mappings")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Clear existing mappings
|
||||
cursor.execute("TRUNCATE TABLE vehicles.wmi_vin_schemas CASCADE")
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.wmi_vin_schemas
|
||||
(wmi_id, vin_schema_id, year_from, year_to)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
ON CONFLICT DO NOTHING
|
||||
"""
|
||||
|
||||
data_tuples = []
|
||||
for mapping in mappings:
|
||||
data_tuples.append((
|
||||
mapping['WmiId'],
|
||||
mapping['VinSchemaId'],
|
||||
mapping['YearFrom'] or 1980,
|
||||
mapping['YearTo'] or 2999
|
||||
))
|
||||
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
# Get final count
|
||||
cursor.execute("SELECT COUNT(*) FROM vehicles.wmi_vin_schemas")
|
||||
result = cursor.fetchone()
|
||||
final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
|
||||
|
||||
logger.info(f"Successfully loaded {final_count} WMI-VinSchema mappings")
|
||||
return final_count
|
||||
|
||||
def load_make_model_relationships(self, relationships: List[Dict]) -> int:
|
||||
"""Load Make-Model relationships"""
|
||||
if not relationships:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(relationships)} Make-Model relationships")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Clear existing relationships
|
||||
cursor.execute("TRUNCATE TABLE vehicles.make_models CASCADE")
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.make_models (make_id, model_id)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT DO NOTHING
|
||||
"""
|
||||
|
||||
data_tuples = []
|
||||
for rel in relationships:
|
||||
data_tuples.append((rel['MakeId'], rel['ModelId']))
|
||||
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
# Get final count
|
||||
cursor.execute("SELECT COUNT(*) FROM vehicles.make_models")
|
||||
result = cursor.fetchone()
|
||||
final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
|
||||
|
||||
logger.info(f"Successfully loaded {final_count} Make-Model relationships")
|
||||
return final_count
|
||||
|
||||
def load_wmi_make_relationships(self, relationships: List[Dict]) -> int:
|
||||
"""Load WMI-Make relationships"""
|
||||
if not relationships:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(relationships)} WMI-Make relationships")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Clear existing relationships
|
||||
cursor.execute("TRUNCATE TABLE vehicles.wmi_makes CASCADE")
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.wmi_makes (wmi_id, make_id)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT DO NOTHING
|
||||
"""
|
||||
|
||||
data_tuples = []
|
||||
for rel in relationships:
|
||||
data_tuples.append((rel['WmiId'], rel['MakeId']))
|
||||
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
# Get final count
|
||||
cursor.execute("SELECT COUNT(*) FROM vehicles.wmi_makes")
|
||||
result = cursor.fetchone()
|
||||
final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0]
|
||||
|
||||
logger.info(f"Successfully loaded {final_count} WMI-Make relationships")
|
||||
return final_count
|
||||
|
||||
def load_model_years(self, model_years: List[Dict]) -> int:
|
||||
"""Load model year availability data"""
|
||||
if not model_years:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(model_years)} model year records")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.model_year (model_id, year)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT (model_id, year) DO NOTHING
|
||||
"""
|
||||
|
||||
data_tuples = [(my['model_id'], my['year']) for my in model_years]
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
return len(model_years)
|
||||
|
||||
def load_trims(self, trims: List[Dict]) -> int:
|
||||
"""Load trim data"""
|
||||
if not trims:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(trims)} trim records")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.trim (model_year_id, name)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT DO NOTHING
|
||||
"""
|
||||
|
||||
data_tuples = [(t['model_year_id'], t['name']) for t in trims]
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
return len(trims)
|
||||
|
||||
def load_engines(self, engines: List[Dict]) -> int:
|
||||
"""Load engine data"""
|
||||
if not engines:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(engines)} engine records")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (lower(name)) DO NOTHING
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
for engine in engines:
|
||||
cursor.execute(query, (
|
||||
engine['name'],
|
||||
engine.get('code'),
|
||||
engine.get('displacement_l'),
|
||||
engine.get('cylinders'),
|
||||
engine.get('fuel_type'),
|
||||
engine.get('aspiration')
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
|
||||
return len(engines)
|
||||
|
||||
def load_trim_engine_relationships(self, relationships: List[Dict]) -> int:
|
||||
"""Load trim-engine relationships"""
|
||||
if not relationships:
|
||||
return 0
|
||||
|
||||
logger.info(f"Loading {len(relationships)} trim-engine relationships")
|
||||
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = """
|
||||
INSERT INTO vehicles.trim_engine (trim_id, engine_id)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT (trim_id, engine_id) DO NOTHING
|
||||
"""
|
||||
|
||||
data_tuples = [(rel['trim_id'], rel['engine_id']) for rel in relationships]
|
||||
execute_batch(cursor, query, data_tuples, page_size=self.batch_size)
|
||||
conn.commit()
|
||||
|
||||
return len(relationships)
|
||||
|
||||
def get_table_count(self, table_name: str) -> int:
|
||||
"""Get count of records in a table"""
|
||||
with db_connections.postgres_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table_name}")
|
||||
result = cursor.fetchone()
|
||||
return result['count'] if isinstance(result, dict) and 'count' in result else result[0]
|
||||
Reference in New Issue
Block a user