338 lines
12 KiB
Python
Executable File
338 lines
12 KiB
Python
Executable File
import logging
|
|
from typing import List, Dict, Optional, Generator
|
|
from ..connections import db_connections
|
|
from ..utils.make_filter import MakeFilter
|
|
from tqdm import tqdm
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MSSQLExtractor:
|
|
"""Extract data from MS SQL Server source database"""
|
|
|
|
def __init__(self, make_filter: Optional[MakeFilter] = None):
|
|
self.batch_size = 10000
|
|
self.make_filter = make_filter or MakeFilter()
|
|
logger.info(f"Initialized MSSQL extractor with {len(self.make_filter.get_allowed_makes())} allowed makes")
|
|
|
|
def extract_wmi_data(self) -> List[Dict]:
|
|
"""Extract WMI (World Manufacturer Identifier) data with make filtering"""
|
|
logger.info("Extracting WMI data from source database with make filtering")
|
|
|
|
query = f"""
|
|
SELECT
|
|
w.Id,
|
|
w.Wmi,
|
|
w.ManufacturerId,
|
|
w.MakeId,
|
|
w.VehicleTypeId,
|
|
w.TruckTypeId,
|
|
w.CountryId,
|
|
w.PublicAvailabilityDate,
|
|
w.NonCompliant,
|
|
w.NonCompliantReason,
|
|
w.CreatedOn,
|
|
w.UpdatedOn,
|
|
w.ProcessedOn
|
|
FROM dbo.Wmi w
|
|
WHERE w.PublicAvailabilityDate <= GETDATE()
|
|
AND w.ManufacturerId IN (
|
|
SELECT DISTINCT mfr.Id
|
|
FROM dbo.Manufacturer mfr
|
|
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
|
|
JOIN dbo.Make m ON mm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
)
|
|
ORDER BY w.Id
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
results = self._rows_to_dicts(cursor, rows)
|
|
|
|
logger.info(f"Extracted {len(results)} WMI records")
|
|
return results
|
|
|
|
def extract_wmi_vin_schema_mappings(self) -> List[Dict]:
|
|
"""Extract WMI to VIN Schema mappings with year ranges and make filtering"""
|
|
logger.info("Extracting WMI-VinSchema mappings with make filtering")
|
|
|
|
query = f"""
|
|
SELECT
|
|
wvs.WmiId,
|
|
wvs.VinSchemaId,
|
|
wvs.YearFrom,
|
|
wvs.YearTo,
|
|
w.Wmi,
|
|
vs.Name as SchemaName
|
|
FROM dbo.Wmi_VinSchema wvs
|
|
JOIN dbo.Wmi w ON wvs.WmiId = w.Id
|
|
JOIN dbo.VinSchema vs ON wvs.VinSchemaId = vs.Id
|
|
WHERE w.PublicAvailabilityDate <= GETDATE()
|
|
AND w.ManufacturerId IN (
|
|
SELECT DISTINCT mfr.Id
|
|
FROM dbo.Manufacturer mfr
|
|
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
|
|
JOIN dbo.Make m ON mm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
)
|
|
AND w.MakeId IN (
|
|
SELECT Id FROM dbo.Make
|
|
WHERE {self.make_filter.get_sql_filter('Name')}
|
|
)
|
|
ORDER BY wvs.WmiId, wvs.VinSchemaId
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
results = self._rows_to_dicts(cursor, rows)
|
|
|
|
logger.info(f"Extracted {len(results)} WMI-VinSchema mappings (filtered by allowed makes)")
|
|
return results
|
|
|
|
def extract_patterns_data(self) -> Generator[List[Dict], None, None]:
|
|
"""Extract pattern data in batches with make filtering"""
|
|
logger.info("Extracting pattern data from source database with make filtering")
|
|
|
|
# First get the total count with filtering
|
|
count_query = f"""
|
|
SELECT COUNT(*) as total
|
|
FROM dbo.Pattern p
|
|
JOIN dbo.Element e ON p.ElementId = e.Id
|
|
JOIN dbo.VinSchema vs ON p.VinSchemaId = vs.Id
|
|
JOIN dbo.Wmi_VinSchema wvs ON vs.Id = wvs.VinSchemaId
|
|
JOIN dbo.Wmi w ON wvs.WmiId = w.Id
|
|
JOIN dbo.Wmi_Make wm ON w.Id = wm.WmiId
|
|
JOIN dbo.Make m ON wm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
AND e.Id IN (26, 27, 28, 18, 24)
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(count_query)
|
|
total_row = self._row_to_dict(cursor, cursor.fetchone())
|
|
total_count = total_row.get('total', 0)
|
|
|
|
logger.info(f"Total patterns to extract (filtered): {total_count}")
|
|
|
|
# Extract in batches with manufacturer filtering
|
|
query = f"""
|
|
SELECT
|
|
p.Id,
|
|
p.VinSchemaId,
|
|
p.Keys,
|
|
p.ElementId,
|
|
p.AttributeId,
|
|
e.Name as ElementName,
|
|
e.weight,
|
|
e.GroupName,
|
|
vs.Name as SchemaName,
|
|
w.Wmi,
|
|
m.Name as MakeName
|
|
FROM dbo.Pattern p
|
|
JOIN dbo.Element e ON p.ElementId = e.Id
|
|
JOIN dbo.VinSchema vs ON p.VinSchemaId = vs.Id
|
|
JOIN dbo.Wmi_VinSchema wvs ON vs.Id = wvs.VinSchemaId
|
|
JOIN dbo.Wmi w ON wvs.WmiId = w.Id
|
|
JOIN dbo.Wmi_Make wm ON w.Id = wm.WmiId
|
|
JOIN dbo.Make m ON wm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
AND e.Id IN (26, 27, 28, 18, 24)
|
|
ORDER BY p.Id
|
|
OFFSET {{}} ROWS FETCH NEXT {{}} ROWS ONLY
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
for offset in tqdm(range(0, total_count, self.batch_size), desc="Extracting filtered patterns"):
|
|
cursor.execute(query.format(offset, self.batch_size))
|
|
rows = cursor.fetchall()
|
|
|
|
if rows:
|
|
yield self._rows_to_dicts(cursor, rows)
|
|
else:
|
|
break
|
|
|
|
def extract_elements_data(self) -> List[Dict]:
|
|
"""Extract element definitions"""
|
|
logger.info("Extracting element data")
|
|
|
|
query = """
|
|
SELECT
|
|
Id,
|
|
Name,
|
|
Code,
|
|
LookupTable,
|
|
Description,
|
|
IsPrivate,
|
|
GroupName,
|
|
DataType,
|
|
MinAllowedValue,
|
|
MaxAllowedValue,
|
|
IsQS,
|
|
Decode,
|
|
weight
|
|
FROM dbo.Element
|
|
ORDER BY Id
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
results = self._rows_to_dicts(cursor, rows)
|
|
|
|
logger.info(f"Extracted {len(results)} element definitions")
|
|
return results
|
|
|
|
def extract_reference_table(self, table_name: str) -> List[Dict]:
|
|
"""Extract data from a reference table with make filtering"""
|
|
logger.info(f"Extracting data from {table_name} with make filtering")
|
|
|
|
# Apply make filtering - filter by Make brand names (simpler and more efficient)
|
|
if table_name == 'Manufacturer':
|
|
# Extract manufacturers linked to filtered makes only
|
|
query = f"""
|
|
SELECT DISTINCT mfr.* FROM dbo.Manufacturer mfr
|
|
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
|
|
JOIN dbo.Make m ON mm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
ORDER BY mfr.Id
|
|
"""
|
|
elif table_name == 'Make':
|
|
# Filter makes directly by brand names (GMC, Ford, Toyota, etc.)
|
|
query = f"""
|
|
SELECT * FROM dbo.Make
|
|
WHERE {self.make_filter.get_sql_filter('Name')}
|
|
ORDER BY Id
|
|
"""
|
|
elif table_name == 'Model':
|
|
# Filter models by allowed make brand names
|
|
query = f"""
|
|
SELECT md.* FROM dbo.Model md
|
|
JOIN dbo.Make_Model mm ON md.Id = mm.ModelId
|
|
JOIN dbo.Make m ON mm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
ORDER BY md.Id
|
|
"""
|
|
elif table_name == 'Wmi':
|
|
# Filter WMI records by allowed manufacturers (linked to makes) AND makes directly
|
|
query = f"""
|
|
SELECT w.* FROM dbo.Wmi w
|
|
WHERE w.PublicAvailabilityDate <= GETDATE()
|
|
AND w.ManufacturerId IN (
|
|
SELECT DISTINCT mfr.Id
|
|
FROM dbo.Manufacturer mfr
|
|
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
|
|
JOIN dbo.Make m ON mm.MakeId = m.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
)
|
|
AND w.MakeId IN (
|
|
SELECT Id FROM dbo.Make
|
|
WHERE {self.make_filter.get_sql_filter('Name')}
|
|
)
|
|
ORDER BY w.Id
|
|
"""
|
|
else:
|
|
# No filtering for other reference tables
|
|
query = f"SELECT * FROM dbo.{table_name} ORDER BY Id"
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
results = self._rows_to_dicts(cursor, rows)
|
|
|
|
logger.info(f"Extracted {len(results)} records from {table_name} (filtered by allowed makes)")
|
|
return results
|
|
|
|
def extract_make_model_relationships(self) -> List[Dict]:
|
|
"""Extract Make-Model relationships with make filtering"""
|
|
logger.info("Extracting Make-Model relationships with make filtering")
|
|
|
|
query = f"""
|
|
SELECT
|
|
mm.MakeId,
|
|
mm.ModelId,
|
|
m.Name as MakeName,
|
|
md.Name as ModelName
|
|
FROM dbo.Make_Model mm
|
|
JOIN dbo.Make m ON mm.MakeId = m.Id
|
|
JOIN dbo.Model md ON mm.ModelId = md.Id
|
|
WHERE {self.make_filter.get_sql_filter('m.Name')}
|
|
ORDER BY mm.MakeId, mm.ModelId
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
results = self._rows_to_dicts(cursor, rows)
|
|
|
|
logger.info(f"Extracted {len(results)} Make-Model relationships (filtered by allowed makes)")
|
|
return results
|
|
|
|
def extract_wmi_make_relationships(self) -> List[Dict]:
|
|
"""Extract WMI-Make relationships with make filtering"""
|
|
logger.info("Extracting WMI-Make relationships with make filtering")
|
|
|
|
query = f"""
|
|
SELECT
|
|
wm.WmiId,
|
|
wm.MakeId,
|
|
w.Wmi,
|
|
m.Name as MakeName
|
|
FROM dbo.Wmi_Make wm
|
|
JOIN dbo.Wmi w ON wm.WmiId = w.Id
|
|
JOIN dbo.Make m ON wm.MakeId = m.Id
|
|
WHERE w.PublicAvailabilityDate <= GETDATE()
|
|
AND w.ManufacturerId IN (
|
|
SELECT DISTINCT mfr.Id
|
|
FROM dbo.Manufacturer mfr
|
|
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
|
|
JOIN dbo.Make mk ON mm.MakeId = mk.Id
|
|
WHERE {self.make_filter.get_sql_filter('mk.Name')}
|
|
)
|
|
AND w.MakeId IN (
|
|
SELECT Id FROM dbo.Make
|
|
WHERE {self.make_filter.get_sql_filter('Name')}
|
|
)
|
|
AND m.Id IN (
|
|
SELECT Id FROM dbo.Make
|
|
WHERE {self.make_filter.get_sql_filter('Name')}
|
|
)
|
|
ORDER BY wm.WmiId, wm.MakeId
|
|
"""
|
|
|
|
with db_connections.mssql_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
results = self._rows_to_dicts(cursor, rows)
|
|
|
|
logger.info(f"Extracted {len(results)} WMI-Make relationships (filtered by allowed makes)")
|
|
return results
|
|
|
|
def _rows_to_dicts(self, cursor, rows) -> List[Dict]:
|
|
"""Convert pyodbc rows to list of dicts using cursor description."""
|
|
if not rows:
|
|
return []
|
|
columns = [col[0] for col in cursor.description]
|
|
result: List[Dict] = []
|
|
for row in rows:
|
|
item = {columns[i]: row[i] for i in range(len(columns))}
|
|
result.append(item)
|
|
return result
|
|
|
|
def _row_to_dict(self, cursor, row) -> Dict:
|
|
"""Convert single pyodbc row to dict."""
|
|
if row is None:
|
|
return {}
|
|
columns = [col[0] for col in cursor.description]
|
|
return {columns[i]: row[i] for i in range(len(columns))}
|