import logging from typing import List, Dict, Optional from psycopg2.extras import execute_batch from ..connections import db_connections from tqdm import tqdm logger = logging.getLogger(__name__) class PostgreSQLLoader: """Load data into PostgreSQL target database""" def __init__(self): self.batch_size = 1000 def load_reference_table(self, table_name: str, data: List[Dict], clear_existing: bool = True) -> int: """Load data into a reference table""" if not data: logger.warning(f"No data to load for table {table_name}") return 0 logger.info(f"Loading {len(data)} records into vehicles.{table_name}") # Column mapping from source (MS SQL) to target (PostgreSQL) column_mappings = { 'Id': 'id', 'Name': 'name', 'Code': 'code', 'MakeId': 'make_id', 'CreateOn': 'created_at', 'CreatedOn': 'created_at', 'UpdateOn': 'updated_at', 'UpdatedOn': 'updated_at', 'Wmi': 'wmi', 'ManufacturerId': 'manufacturer_id', 'MakeId': 'make_id', 'VehicleTypeId': 'vehicle_type_id', 'TruckTypeId': 'truck_type_id', 'CountryId': 'country_id', 'PublicAvailabilityDate': 'public_availability_date', 'NonCompliant': 'non_compliant', 'NonCompliantReason': 'non_compliant_reason', 'ProcessedOn': 'processed_on', 'DisplayOrder': 'display_order', 'FormType': 'form_type', 'Description': 'description', 'LookupTable': 'lookup_table', 'IsPrivate': 'is_private', 'GroupName': 'group_name', 'DataType': 'data_type', 'MinAllowedValue': 'min_allowed_value', 'MaxAllowedValue': 'max_allowed_value', 'IsQS': 'is_qs', 'Decode': 'decode', 'weight': 'weight', # ErrorCode specific mappings 'ErrorCodeName': 'code', 'ErrorCodeDescription': 'description' } with db_connections.postgres_connection() as conn: cursor = conn.cursor() if clear_existing: cursor.execute(f"TRUNCATE TABLE vehicles.{table_name} CASCADE") logger.info(f"Cleared existing data from vehicles.{table_name}") # Get source columns and map them to target columns source_columns = list(data[0].keys()) target_columns = [] valid_data = [] # Map columns and filter data for source_col in source_columns: if source_col in column_mappings: target_columns.append(column_mappings[source_col]) else: target_columns.append(source_col.lower()) # Check which columns exist in target table cursor.execute(f""" SELECT column_name FROM information_schema.columns WHERE table_schema = 'vehicles' AND table_name = '{table_name}' """) results = cursor.fetchall() existing_columns = {row['column_name'] if isinstance(row, dict) else row[0] for row in results} # Filter to only existing columns final_columns = [] final_indices = [] for i, col in enumerate(target_columns): if col in existing_columns: final_columns.append(col) final_indices.append(i) if not final_columns: logger.warning(f"No matching columns found for table {table_name}") return 0 column_str = ','.join(final_columns) placeholders = ','.join(['%s'] * len(final_columns)) # Prepare insert query query = f""" INSERT INTO vehicles.{table_name} ({column_str}) VALUES ({placeholders}) ON CONFLICT DO NOTHING """ # Prepare data tuples with only valid columns data_tuples = [] for record in data: values = [] skip_record = False for i in final_indices: source_col = source_columns[i] value = record[source_col] # Handle special cases for error_codes table if table_name == 'error_codes' and source_col in ['ErrorCodeName', 'Code'] and (value is None or value == ''): skip_record = True break values.append(value) if not skip_record: data_tuples.append(tuple(values)) # Execute batch insert execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() # Get final count cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table_name}") result = cursor.fetchone() final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0] logger.info(f"Successfully loaded {final_count} records into vehicles.{table_name}") return final_count def load_wmi_vin_schema_mappings(self, mappings: List[Dict]) -> int: """Load WMI to VIN Schema mappings""" if not mappings: return 0 logger.info(f"Loading {len(mappings)} WMI-VinSchema mappings") with db_connections.postgres_connection() as conn: cursor = conn.cursor() # Clear existing mappings cursor.execute("TRUNCATE TABLE vehicles.wmi_vin_schemas CASCADE") query = """ INSERT INTO vehicles.wmi_vin_schemas (wmi_id, vin_schema_id, year_from, year_to) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING """ data_tuples = [] for mapping in mappings: data_tuples.append(( mapping['WmiId'], mapping['VinSchemaId'], mapping['YearFrom'] or 1980, mapping['YearTo'] or 2999 )) execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() # Get final count cursor.execute("SELECT COUNT(*) FROM vehicles.wmi_vin_schemas") result = cursor.fetchone() final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0] logger.info(f"Successfully loaded {final_count} WMI-VinSchema mappings") return final_count def load_make_model_relationships(self, relationships: List[Dict]) -> int: """Load Make-Model relationships""" if not relationships: return 0 logger.info(f"Loading {len(relationships)} Make-Model relationships") with db_connections.postgres_connection() as conn: cursor = conn.cursor() # Clear existing relationships cursor.execute("TRUNCATE TABLE vehicles.make_models CASCADE") query = """ INSERT INTO vehicles.make_models (make_id, model_id) VALUES (%s, %s) ON CONFLICT DO NOTHING """ data_tuples = [] for rel in relationships: data_tuples.append((rel['MakeId'], rel['ModelId'])) execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() # Get final count cursor.execute("SELECT COUNT(*) FROM vehicles.make_models") result = cursor.fetchone() final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0] logger.info(f"Successfully loaded {final_count} Make-Model relationships") return final_count def load_wmi_make_relationships(self, relationships: List[Dict]) -> int: """Load WMI-Make relationships""" if not relationships: return 0 logger.info(f"Loading {len(relationships)} WMI-Make relationships") with db_connections.postgres_connection() as conn: cursor = conn.cursor() # Clear existing relationships cursor.execute("TRUNCATE TABLE vehicles.wmi_makes CASCADE") query = """ INSERT INTO vehicles.wmi_makes (wmi_id, make_id) VALUES (%s, %s) ON CONFLICT DO NOTHING """ data_tuples = [] for rel in relationships: data_tuples.append((rel['WmiId'], rel['MakeId'])) execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() # Get final count cursor.execute("SELECT COUNT(*) FROM vehicles.wmi_makes") result = cursor.fetchone() final_count = result['count'] if isinstance(result, dict) and 'count' in result else result[0] logger.info(f"Successfully loaded {final_count} WMI-Make relationships") return final_count def load_model_years(self, model_years: List[Dict]) -> int: """Load model year availability data""" if not model_years: return 0 logger.info(f"Loading {len(model_years)} model year records") with db_connections.postgres_connection() as conn: cursor = conn.cursor() query = """ INSERT INTO vehicles.model_year (model_id, year) VALUES (%s, %s) ON CONFLICT (model_id, year) DO NOTHING """ data_tuples = [(my['model_id'], my['year']) for my in model_years] execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() return len(model_years) def load_trims(self, trims: List[Dict]) -> int: """Load trim data""" if not trims: return 0 logger.info(f"Loading {len(trims)} trim records") with db_connections.postgres_connection() as conn: cursor = conn.cursor() query = """ INSERT INTO vehicles.trim (model_year_id, name) VALUES (%s, %s) ON CONFLICT DO NOTHING """ data_tuples = [(t['model_year_id'], t['name']) for t in trims] execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() return len(trims) def load_engines(self, engines: List[Dict]) -> int: """Load engine data""" if not engines: return 0 logger.info(f"Loading {len(engines)} engine records") with db_connections.postgres_connection() as conn: cursor = conn.cursor() query = """ INSERT INTO vehicles.engine (name, code, displacement_l, cylinders, fuel_type, aspiration) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (lower(name)) DO NOTHING RETURNING id """ for engine in engines: cursor.execute(query, ( engine['name'], engine.get('code'), engine.get('displacement_l'), engine.get('cylinders'), engine.get('fuel_type'), engine.get('aspiration') )) conn.commit() return len(engines) def load_trim_engine_relationships(self, relationships: List[Dict]) -> int: """Load trim-engine relationships""" if not relationships: return 0 logger.info(f"Loading {len(relationships)} trim-engine relationships") with db_connections.postgres_connection() as conn: cursor = conn.cursor() query = """ INSERT INTO vehicles.trim_engine (trim_id, engine_id) VALUES (%s, %s) ON CONFLICT (trim_id, engine_id) DO NOTHING """ data_tuples = [(rel['trim_id'], rel['engine_id']) for rel in relationships] execute_batch(cursor, query, data_tuples, page_size=self.batch_size) conn.commit() return len(relationships) def get_table_count(self, table_name: str) -> int: """Get count of records in a table""" with db_connections.postgres_connection() as conn: cursor = conn.cursor() cursor.execute(f"SELECT COUNT(*) FROM vehicles.{table_name}") result = cursor.fetchone() return result['count'] if isinstance(result, dict) and 'count' in result else result[0]