import logging from typing import List, Dict, Optional, Generator from ..connections import db_connections from ..utils.make_filter import MakeFilter from tqdm import tqdm logger = logging.getLogger(__name__) class MSSQLExtractor: """Extract data from MS SQL Server source database""" def __init__(self, make_filter: Optional[MakeFilter] = None): self.batch_size = 10000 self.make_filter = make_filter or MakeFilter() logger.info(f"Initialized MSSQL extractor with {len(self.make_filter.get_allowed_makes())} allowed makes") def extract_wmi_data(self) -> List[Dict]: """Extract WMI (World Manufacturer Identifier) data with make filtering""" logger.info("Extracting WMI data from source database with make filtering") query = f""" SELECT w.Id, w.Wmi, w.ManufacturerId, w.MakeId, w.VehicleTypeId, w.TruckTypeId, w.CountryId, w.PublicAvailabilityDate, w.NonCompliant, w.NonCompliantReason, w.CreatedOn, w.UpdatedOn, w.ProcessedOn FROM dbo.Wmi w WHERE w.PublicAvailabilityDate <= GETDATE() AND w.ManufacturerId IN ( SELECT DISTINCT mfr.Id FROM dbo.Manufacturer mfr JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId JOIN dbo.Make m ON mm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} ) ORDER BY w.Id """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() results = self._rows_to_dicts(cursor, rows) logger.info(f"Extracted {len(results)} WMI records") return results def extract_wmi_vin_schema_mappings(self) -> List[Dict]: """Extract WMI to VIN Schema mappings with year ranges and make filtering""" logger.info("Extracting WMI-VinSchema mappings with make filtering") query = f""" SELECT wvs.WmiId, wvs.VinSchemaId, wvs.YearFrom, wvs.YearTo, w.Wmi, vs.Name as SchemaName FROM dbo.Wmi_VinSchema wvs JOIN dbo.Wmi w ON wvs.WmiId = w.Id JOIN dbo.VinSchema vs ON wvs.VinSchemaId = vs.Id WHERE w.PublicAvailabilityDate <= GETDATE() AND w.ManufacturerId IN ( SELECT DISTINCT mfr.Id FROM dbo.Manufacturer mfr JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId JOIN dbo.Make m ON mm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} ) AND w.MakeId IN ( SELECT Id FROM dbo.Make WHERE {self.make_filter.get_sql_filter('Name')} ) ORDER BY wvs.WmiId, wvs.VinSchemaId """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() results = self._rows_to_dicts(cursor, rows) logger.info(f"Extracted {len(results)} WMI-VinSchema mappings (filtered by allowed makes)") return results def extract_patterns_data(self) -> Generator[List[Dict], None, None]: """Extract pattern data in batches with make filtering""" logger.info("Extracting pattern data from source database with make filtering") # First get the total count with filtering count_query = f""" SELECT COUNT(*) as total FROM dbo.Pattern p JOIN dbo.Element e ON p.ElementId = e.Id JOIN dbo.VinSchema vs ON p.VinSchemaId = vs.Id JOIN dbo.Wmi_VinSchema wvs ON vs.Id = wvs.VinSchemaId JOIN dbo.Wmi w ON wvs.WmiId = w.Id JOIN dbo.Wmi_Make wm ON w.Id = wm.WmiId JOIN dbo.Make m ON wm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} AND e.Id IN (26, 27, 28, 18, 24) """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(count_query) total_row = self._row_to_dict(cursor, cursor.fetchone()) total_count = total_row.get('total', 0) logger.info(f"Total patterns to extract (filtered): {total_count}") # Extract in batches with manufacturer filtering query = f""" SELECT p.Id, p.VinSchemaId, p.Keys, p.ElementId, p.AttributeId, e.Name as ElementName, e.weight, e.GroupName, vs.Name as SchemaName, w.Wmi, m.Name as MakeName FROM dbo.Pattern p JOIN dbo.Element e ON p.ElementId = e.Id JOIN dbo.VinSchema vs ON p.VinSchemaId = vs.Id JOIN dbo.Wmi_VinSchema wvs ON vs.Id = wvs.VinSchemaId JOIN dbo.Wmi w ON wvs.WmiId = w.Id JOIN dbo.Wmi_Make wm ON w.Id = wm.WmiId JOIN dbo.Make m ON wm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} AND e.Id IN (26, 27, 28, 18, 24) ORDER BY p.Id OFFSET {{}} ROWS FETCH NEXT {{}} ROWS ONLY """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() for offset in tqdm(range(0, total_count, self.batch_size), desc="Extracting filtered patterns"): cursor.execute(query.format(offset, self.batch_size)) rows = cursor.fetchall() if rows: yield self._rows_to_dicts(cursor, rows) else: break def extract_elements_data(self) -> List[Dict]: """Extract element definitions""" logger.info("Extracting element data") query = """ SELECT Id, Name, Code, LookupTable, Description, IsPrivate, GroupName, DataType, MinAllowedValue, MaxAllowedValue, IsQS, Decode, weight FROM dbo.Element ORDER BY Id """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() results = self._rows_to_dicts(cursor, rows) logger.info(f"Extracted {len(results)} element definitions") return results def extract_reference_table(self, table_name: str) -> List[Dict]: """Extract data from a reference table with make filtering""" logger.info(f"Extracting data from {table_name} with make filtering") # Apply make filtering - filter by Make brand names (simpler and more efficient) if table_name == 'Manufacturer': # Extract manufacturers linked to filtered makes only query = f""" SELECT DISTINCT mfr.* FROM dbo.Manufacturer mfr JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId JOIN dbo.Make m ON mm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} ORDER BY mfr.Id """ elif table_name == 'Make': # Filter makes directly by brand names (GMC, Ford, Toyota, etc.) query = f""" SELECT * FROM dbo.Make WHERE {self.make_filter.get_sql_filter('Name')} ORDER BY Id """ elif table_name == 'Model': # Filter models by allowed make brand names query = f""" SELECT md.* FROM dbo.Model md JOIN dbo.Make_Model mm ON md.Id = mm.ModelId JOIN dbo.Make m ON mm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} ORDER BY md.Id """ elif table_name == 'Wmi': # Filter WMI records by allowed manufacturers (linked to makes) AND makes directly query = f""" SELECT w.* FROM dbo.Wmi w WHERE w.PublicAvailabilityDate <= GETDATE() AND w.ManufacturerId IN ( SELECT DISTINCT mfr.Id FROM dbo.Manufacturer mfr JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId JOIN dbo.Make m ON mm.MakeId = m.Id WHERE {self.make_filter.get_sql_filter('m.Name')} ) AND w.MakeId IN ( SELECT Id FROM dbo.Make WHERE {self.make_filter.get_sql_filter('Name')} ) ORDER BY w.Id """ else: # No filtering for other reference tables query = f"SELECT * FROM dbo.{table_name} ORDER BY Id" with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() results = self._rows_to_dicts(cursor, rows) logger.info(f"Extracted {len(results)} records from {table_name} (filtered by allowed makes)") return results def extract_make_model_relationships(self) -> List[Dict]: """Extract Make-Model relationships with make filtering""" logger.info("Extracting Make-Model relationships with make filtering") query = f""" SELECT mm.MakeId, mm.ModelId, m.Name as MakeName, md.Name as ModelName FROM dbo.Make_Model mm JOIN dbo.Make m ON mm.MakeId = m.Id JOIN dbo.Model md ON mm.ModelId = md.Id WHERE {self.make_filter.get_sql_filter('m.Name')} ORDER BY mm.MakeId, mm.ModelId """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() results = self._rows_to_dicts(cursor, rows) logger.info(f"Extracted {len(results)} Make-Model relationships (filtered by allowed makes)") return results def extract_wmi_make_relationships(self) -> List[Dict]: """Extract WMI-Make relationships with make filtering""" logger.info("Extracting WMI-Make relationships with make filtering") query = f""" SELECT wm.WmiId, wm.MakeId, w.Wmi, m.Name as MakeName FROM dbo.Wmi_Make wm JOIN dbo.Wmi w ON wm.WmiId = w.Id JOIN dbo.Make m ON wm.MakeId = m.Id WHERE w.PublicAvailabilityDate <= GETDATE() AND w.ManufacturerId IN ( SELECT DISTINCT mfr.Id FROM dbo.Manufacturer mfr JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId JOIN dbo.Make mk ON mm.MakeId = mk.Id WHERE {self.make_filter.get_sql_filter('mk.Name')} ) AND w.MakeId IN ( SELECT Id FROM dbo.Make WHERE {self.make_filter.get_sql_filter('Name')} ) AND m.Id IN ( SELECT Id FROM dbo.Make WHERE {self.make_filter.get_sql_filter('Name')} ) ORDER BY wm.WmiId, wm.MakeId """ with db_connections.mssql_connection() as conn: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() results = self._rows_to_dicts(cursor, rows) logger.info(f"Extracted {len(results)} WMI-Make relationships (filtered by allowed makes)") return results def _rows_to_dicts(self, cursor, rows) -> List[Dict]: """Convert pyodbc rows to list of dicts using cursor description.""" if not rows: return [] columns = [col[0] for col in cursor.description] result: List[Dict] = [] for row in rows: item = {columns[i]: row[i] for i in range(len(columns))} result.append(item) return result def _row_to_dict(self, cursor, row) -> Dict: """Convert single pyodbc row to dict.""" if row is None: return {} columns = [col[0] for col in cursor.description] return {columns[i]: row[i] for i in range(len(columns))}