Initial Commit

This commit is contained in:
Eric Gullickson
2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions

View File

@@ -0,0 +1,629 @@
"""
JSON Extractor for Manual Vehicle Data Processing
Extracts and normalizes vehicle data from JSON files into database-ready structures.
Integrates with MakeNameMapper and EngineSpecParser utilities for comprehensive
data processing with L→I normalization and make name conversion.
Key Features:
- Extract make/model/year/trim/engine data from JSON files
- Handle electric vehicles (empty engines → default motor)
- Data validation and quality assurance
- Progress tracking and error reporting
Usage:
extractor = JsonExtractor(make_mapper, engine_parser)
make_data = extractor.extract_make_data('sources/makes/toyota.json')
all_data = extractor.extract_all_makes('sources/makes/')
"""
import json
import os
import glob
import logging
from typing import List, Dict, Optional, Generator, Tuple
from dataclasses import dataclass
from pathlib import Path
# Import our utilities (handle both relative and direct imports)
try:
from ..utils.make_name_mapper import MakeNameMapper
from ..utils.engine_spec_parser import EngineSpecParser, EngineSpec
except ImportError:
# Fallback for direct execution
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from utils.make_name_mapper import MakeNameMapper
from utils.engine_spec_parser import EngineSpecParser, EngineSpec
logger = logging.getLogger(__name__)
@dataclass
class ValidationResult:
"""JSON validation result"""
is_valid: bool
errors: List[str]
warnings: List[str]
@property
def has_errors(self) -> bool:
return len(self.errors) > 0
@property
def has_warnings(self) -> bool:
return len(self.warnings) > 0
@dataclass
class ModelData:
"""Extracted model data with normalized engines and trims"""
name: str # Model name from JSON
years: List[int] # Years this model appears in
engines: List[EngineSpec] # Parsed and normalized engines
trims: List[str] # Trim names (from submodels)
is_electric: bool = False # True if empty engines array detected
@property
def total_trims(self) -> int:
return len(self.trims)
@property
def total_engines(self) -> int:
return len(self.engines)
@property
def year_range(self) -> str:
if not self.years:
return "Unknown"
return f"{min(self.years)}-{max(self.years)}" if len(self.years) > 1 else str(self.years[0])
@dataclass
class MakeData:
"""Complete make data with models, engines, and metadata"""
name: str # Normalized display name (e.g., "Alfa Romeo")
filename: str # Original JSON filename
models: List[ModelData]
processing_errors: List[str] # Any errors during extraction
processing_warnings: List[str] # Any warnings during extraction
@property
def total_models(self) -> int:
return len(self.models)
@property
def total_engines(self) -> int:
return sum(model.total_engines for model in self.models)
@property
def total_trims(self) -> int:
return sum(model.total_trims for model in self.models)
@property
def electric_models_count(self) -> int:
return sum(1 for model in self.models if model.is_electric)
@property
def year_range(self) -> str:
all_years = []
for model in self.models:
all_years.extend(model.years)
if not all_years:
return "Unknown"
return f"{min(all_years)}-{max(all_years)}" if len(set(all_years)) > 1 else str(all_years[0])
@dataclass
class ExtractionResult:
"""Results of extracting all makes"""
makes: List[MakeData]
total_files_processed: int
successful_extractions: int
failed_extractions: int
total_models: int
total_engines: int
total_electric_models: int
@property
def success_rate(self) -> float:
return self.successful_extractions / self.total_files_processed if self.total_files_processed > 0 else 0.0
class JsonExtractor:
"""Extract normalized vehicle data from JSON files"""
def __init__(self, make_mapper: MakeNameMapper, engine_parser: EngineSpecParser):
"""
Initialize JSON extractor with utilities
Args:
make_mapper: For normalizing make names from filenames
engine_parser: For parsing engine specifications with L→I normalization
"""
self.make_mapper = make_mapper
self.engine_parser = engine_parser
logger.info("JsonExtractor initialized with MakeNameMapper and EngineSpecParser")
def validate_json_structure(self, json_data: dict, filename: str) -> ValidationResult:
"""
Validate JSON structure before processing
Args:
json_data: Loaded JSON data
filename: Source filename for error context
Returns:
ValidationResult with validity status and any issues
"""
errors = []
warnings = []
try:
# Check top-level structure
if not isinstance(json_data, dict):
errors.append("JSON must be a dictionary")
return ValidationResult(False, errors, warnings)
# Should have exactly one key (the make name)
if len(json_data.keys()) != 1:
errors.append(f"JSON should have exactly one top-level key, found {len(json_data.keys())}")
return ValidationResult(False, errors, warnings)
make_key = list(json_data.keys())[0]
make_data = json_data[make_key]
# Make data should be a list of year entries
if not isinstance(make_data, list):
errors.append(f"Make data for '{make_key}' must be a list")
return ValidationResult(False, errors, warnings)
if len(make_data) == 0:
warnings.append(f"Make '{make_key}' has no year entries")
# Validate year entries
for i, year_entry in enumerate(make_data):
if not isinstance(year_entry, dict):
errors.append(f"Year entry {i} must be a dictionary")
continue
# Check required fields
if 'year' not in year_entry:
errors.append(f"Year entry {i} missing 'year' field")
if 'models' not in year_entry:
errors.append(f"Year entry {i} missing 'models' field")
continue
# Validate year
try:
year = int(year_entry['year'])
if year < 1900 or year > 2030:
warnings.append(f"Unusual year value: {year}")
except (ValueError, TypeError):
errors.append(f"Invalid year value in entry {i}: {year_entry.get('year')}")
# Validate models
models = year_entry['models']
if not isinstance(models, list):
errors.append(f"Models in year entry {i} must be a list")
continue
for j, model in enumerate(models):
if not isinstance(model, dict):
errors.append(f"Model {j} in year {year_entry.get('year')} must be a dictionary")
continue
if 'name' not in model:
errors.append(f"Model {j} in year {year_entry.get('year')} missing 'name' field")
# Engines and submodels are optional but should be lists if present
if 'engines' in model and not isinstance(model['engines'], list):
errors.append(f"Engines for model {model.get('name')} must be a list")
if 'submodels' in model and not isinstance(model['submodels'], list):
errors.append(f"Submodels for model {model.get('name')} must be a list")
except Exception as e:
errors.append(f"Unexpected error during validation: {str(e)}")
is_valid = len(errors) == 0
if errors:
logger.warning(f"JSON validation failed for {filename}: {len(errors)} errors")
elif warnings:
logger.info(f"JSON validation for {filename}: {len(warnings)} warnings")
else:
logger.debug(f"JSON validation passed for {filename}")
return ValidationResult(is_valid, errors, warnings)
def extract_make_data(self, json_file_path: str) -> MakeData:
"""
Extract complete make data from a single JSON file
Args:
json_file_path: Path to JSON file
Returns:
MakeData with extracted and normalized data
"""
filename = os.path.basename(json_file_path)
logger.info(f"Extracting make data from {filename}")
processing_errors = []
processing_warnings = []
try:
# Load and validate JSON
with open(json_file_path, 'r', encoding='utf-8') as f:
json_data = json.load(f)
validation = self.validate_json_structure(json_data, filename)
processing_errors.extend(validation.errors)
processing_warnings.extend(validation.warnings)
if not validation.is_valid:
logger.error(f"JSON validation failed for {filename}")
return MakeData(
name=self.make_mapper.normalize_make_name(filename),
filename=filename,
models=[],
processing_errors=processing_errors,
processing_warnings=processing_warnings
)
# Get normalized make name
make_name = self.make_mapper.normalize_make_name(filename)
logger.debug(f"Normalized make name: {filename}{make_name}")
# Extract data
make_key = list(json_data.keys())[0]
year_entries = json_data[make_key]
# Group models by name across all years
models_by_name = {} # model_name -> {years: set, engines: set, trims: set}
for year_entry in year_entries:
try:
year = int(year_entry['year'])
models_list = year_entry.get('models', [])
for model_entry in models_list:
model_name = model_entry.get('name', '').strip()
if not model_name:
processing_warnings.append(f"Empty model name in year {year}")
continue
# Initialize model data if not seen before
if model_name not in models_by_name:
models_by_name[model_name] = {
'years': set(),
'engines': set(),
'trims': set()
}
# Add year
models_by_name[model_name]['years'].add(year)
# Add engines
engines_list = model_entry.get('engines', [])
for engine_str in engines_list:
if engine_str and engine_str.strip():
models_by_name[model_name]['engines'].add(engine_str.strip())
# Add trims (from submodels)
submodels_list = model_entry.get('submodels', [])
for trim in submodels_list:
if trim and trim.strip():
models_by_name[model_name]['trims'].add(trim.strip())
except (ValueError, TypeError) as e:
processing_errors.append(f"Error processing year entry: {str(e)}")
continue
# Convert to ModelData objects
models = []
for model_name, model_info in models_by_name.items():
try:
# Parse engines
engine_specs = []
is_electric = False
if not model_info['engines']:
# Empty engines array - electric vehicle
is_electric = True
electric_spec = self.engine_parser.create_electric_motor()
engine_specs = [electric_spec]
logger.debug(f"Created electric motor for {make_name} {model_name}")
else:
# Parse each engine string
for engine_str in model_info['engines']:
spec = self.engine_parser.parse_engine_string(engine_str)
engine_specs.append(spec)
# Remove duplicate engines based on key attributes
unique_engines = self.engine_parser.get_unique_engines(engine_specs)
# Create model data
model_data = ModelData(
name=model_name,
years=sorted(list(model_info['years'])),
engines=unique_engines,
trims=sorted(list(model_info['trims'])),
is_electric=is_electric
)
models.append(model_data)
except Exception as e:
processing_errors.append(f"Error processing model {model_name}: {str(e)}")
continue
# Sort models by name
models.sort(key=lambda m: m.name)
make_data = MakeData(
name=make_name,
filename=filename,
models=models,
processing_errors=processing_errors,
processing_warnings=processing_warnings
)
logger.info(f"Extracted {filename}: {len(models)} models, "
f"{make_data.total_engines} engines, {make_data.electric_models_count} electric models")
return make_data
except Exception as e:
logger.error(f"Failed to extract make data from {filename}: {str(e)}")
processing_errors.append(f"Fatal error: {str(e)}")
return MakeData(
name=self.make_mapper.normalize_make_name(filename),
filename=filename,
models=[],
processing_errors=processing_errors,
processing_warnings=processing_warnings
)
def extract_all_makes(self, sources_dir: str) -> ExtractionResult:
"""
Process all JSON files in the sources directory
Args:
sources_dir: Directory containing JSON make files
Returns:
ExtractionResult with all extracted data and statistics
"""
logger.info(f"Starting extraction of all makes from {sources_dir}")
# Find all JSON files
pattern = os.path.join(sources_dir, '*.json')
json_files = glob.glob(pattern)
if not json_files:
logger.warning(f"No JSON files found in {sources_dir}")
return ExtractionResult(
makes=[],
total_files_processed=0,
successful_extractions=0,
failed_extractions=0,
total_models=0,
total_engines=0,
total_electric_models=0
)
logger.info(f"Found {len(json_files)} JSON files to process")
makes = []
successful_extractions = 0
failed_extractions = 0
# Sort files for consistent processing order
json_files.sort()
for json_file in json_files:
try:
make_data = self.extract_make_data(json_file)
makes.append(make_data)
if make_data.processing_errors:
failed_extractions += 1
logger.error(f"Extraction completed with errors for {make_data.filename}")
else:
successful_extractions += 1
logger.debug(f"Extraction successful for {make_data.filename}")
except Exception as e:
logger.error(f"Fatal error processing {os.path.basename(json_file)}: {str(e)}")
failed_extractions += 1
# Create minimal make data for failed file
filename = os.path.basename(json_file)
failed_make = MakeData(
name=self.make_mapper.normalize_make_name(filename),
filename=filename,
models=[],
processing_errors=[f"Fatal extraction error: {str(e)}"],
processing_warnings=[]
)
makes.append(failed_make)
# Calculate statistics
total_models = sum(make.total_models for make in makes)
total_engines = sum(make.total_engines for make in makes)
total_electric_models = sum(make.electric_models_count for make in makes)
result = ExtractionResult(
makes=makes,
total_files_processed=len(json_files),
successful_extractions=successful_extractions,
failed_extractions=failed_extractions,
total_models=total_models,
total_engines=total_engines,
total_electric_models=total_electric_models
)
logger.info(f"Extraction complete: {successful_extractions}/{len(json_files)} successful, "
f"{total_models} models, {total_engines} engines, {total_electric_models} electric models")
return result
def get_extraction_statistics(self, result: ExtractionResult) -> Dict[str, any]:
"""
Get detailed extraction statistics
Args:
result: ExtractionResult from extract_all_makes
Returns:
Dictionary with detailed statistics
"""
stats = {
'files': {
'total_processed': result.total_files_processed,
'successful': result.successful_extractions,
'failed': result.failed_extractions,
'success_rate': result.success_rate
},
'data': {
'total_makes': len(result.makes),
'total_models': result.total_models,
'total_engines': result.total_engines,
'electric_models': result.total_electric_models
},
'quality': {
'makes_with_errors': sum(1 for make in result.makes if make.processing_errors),
'makes_with_warnings': sum(1 for make in result.makes if make.processing_warnings),
'total_errors': sum(len(make.processing_errors) for make in result.makes),
'total_warnings': sum(len(make.processing_warnings) for make in result.makes)
}
}
# Add make-specific statistics
make_stats = []
for make in result.makes:
make_stat = {
'name': make.name,
'filename': make.filename,
'models': make.total_models,
'engines': make.total_engines,
'trims': make.total_trims,
'electric_models': make.electric_models_count,
'year_range': make.year_range,
'errors': len(make.processing_errors),
'warnings': len(make.processing_warnings)
}
make_stats.append(make_stat)
stats['makes'] = make_stats
return stats
def print_extraction_report(self, result: ExtractionResult) -> None:
"""
Print detailed extraction report
Args:
result: ExtractionResult from extract_all_makes
"""
stats = self.get_extraction_statistics(result)
print(f"🚀 JSON EXTRACTION REPORT")
print(f"=" * 50)
# File processing summary
print(f"\n📁 FILE PROCESSING")
print(f" Files processed: {stats['files']['total_processed']}")
print(f" Successful: {stats['files']['successful']}")
print(f" Failed: {stats['files']['failed']}")
print(f" Success rate: {stats['files']['success_rate']:.1%}")
# Data summary
print(f"\n📊 DATA EXTRACTED")
print(f" Makes: {stats['data']['total_makes']}")
print(f" Models: {stats['data']['total_models']}")
print(f" Engines: {stats['data']['total_engines']}")
print(f" Electric models: {stats['data']['electric_models']}")
# Quality summary
print(f"\n🔍 QUALITY ASSESSMENT")
print(f" Makes with errors: {stats['quality']['makes_with_errors']}")
print(f" Makes with warnings: {stats['quality']['makes_with_warnings']}")
print(f" Total errors: {stats['quality']['total_errors']}")
print(f" Total warnings: {stats['quality']['total_warnings']}")
# Show problematic makes
if stats['quality']['makes_with_errors'] > 0:
print(f"\n⚠️ MAKES WITH ERRORS:")
for make in result.makes:
if make.processing_errors:
print(f" {make.name} ({make.filename}): {len(make.processing_errors)} errors")
# Show top makes by data volume
print(f"\n🏆 TOP MAKES BY MODEL COUNT:")
top_makes = sorted(result.makes, key=lambda m: m.total_models, reverse=True)[:10]
for make in top_makes:
print(f" {make.name}: {make.total_models} models, {make.total_engines} engines")
# Example usage and testing functions
def example_usage():
"""Demonstrate JsonExtractor usage"""
print("🚀 JsonExtractor Example Usage")
print("=" * 40)
# Use direct imports for example usage
try:
from ..utils.make_name_mapper import MakeNameMapper
from ..utils.engine_spec_parser import EngineSpecParser
except ImportError:
# Fallback for direct execution
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from utils.make_name_mapper import MakeNameMapper
from utils.engine_spec_parser import EngineSpecParser
# Initialize utilities
make_mapper = MakeNameMapper()
engine_parser = EngineSpecParser()
# Create extractor
extractor = JsonExtractor(make_mapper, engine_parser)
# Extract single make
sources_dir = "sources/makes"
if os.path.exists(sources_dir):
toyota_file = os.path.join(sources_dir, "toyota.json")
if os.path.exists(toyota_file):
print(f"\n📄 Extracting from toyota.json...")
toyota_data = extractor.extract_make_data(toyota_file)
print(f" Make: {toyota_data.name}")
print(f" Models: {toyota_data.total_models}")
print(f" Engines: {toyota_data.total_engines}")
print(f" Electric models: {toyota_data.electric_models_count}")
print(f" Year range: {toyota_data.year_range}")
if toyota_data.processing_errors:
print(f" Errors: {len(toyota_data.processing_errors)}")
if toyota_data.processing_warnings:
print(f" Warnings: {len(toyota_data.processing_warnings)}")
# Extract all makes
print(f"\n🔄 Extracting all makes...")
result = extractor.extract_all_makes(sources_dir)
extractor.print_extraction_report(result)
else:
print(f"Sources directory not found: {sources_dir}")
if __name__ == "__main__":
example_usage()

View File

@@ -0,0 +1,337 @@
import logging
from typing import List, Dict, Optional, Generator
from ..connections import db_connections
from ..utils.make_filter import MakeFilter
from tqdm import tqdm
logger = logging.getLogger(__name__)
class MSSQLExtractor:
"""Extract data from MS SQL Server source database"""
def __init__(self, make_filter: Optional[MakeFilter] = None):
self.batch_size = 10000
self.make_filter = make_filter or MakeFilter()
logger.info(f"Initialized MSSQL extractor with {len(self.make_filter.get_allowed_makes())} allowed makes")
def extract_wmi_data(self) -> List[Dict]:
"""Extract WMI (World Manufacturer Identifier) data with make filtering"""
logger.info("Extracting WMI data from source database with make filtering")
query = f"""
SELECT
w.Id,
w.Wmi,
w.ManufacturerId,
w.MakeId,
w.VehicleTypeId,
w.TruckTypeId,
w.CountryId,
w.PublicAvailabilityDate,
w.NonCompliant,
w.NonCompliantReason,
w.CreatedOn,
w.UpdatedOn,
w.ProcessedOn
FROM dbo.Wmi w
WHERE w.PublicAvailabilityDate <= GETDATE()
AND w.ManufacturerId IN (
SELECT DISTINCT mfr.Id
FROM dbo.Manufacturer mfr
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
JOIN dbo.Make m ON mm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
)
ORDER BY w.Id
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = self._rows_to_dicts(cursor, rows)
logger.info(f"Extracted {len(results)} WMI records")
return results
def extract_wmi_vin_schema_mappings(self) -> List[Dict]:
"""Extract WMI to VIN Schema mappings with year ranges and make filtering"""
logger.info("Extracting WMI-VinSchema mappings with make filtering")
query = f"""
SELECT
wvs.WmiId,
wvs.VinSchemaId,
wvs.YearFrom,
wvs.YearTo,
w.Wmi,
vs.Name as SchemaName
FROM dbo.Wmi_VinSchema wvs
JOIN dbo.Wmi w ON wvs.WmiId = w.Id
JOIN dbo.VinSchema vs ON wvs.VinSchemaId = vs.Id
WHERE w.PublicAvailabilityDate <= GETDATE()
AND w.ManufacturerId IN (
SELECT DISTINCT mfr.Id
FROM dbo.Manufacturer mfr
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
JOIN dbo.Make m ON mm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
)
AND w.MakeId IN (
SELECT Id FROM dbo.Make
WHERE {self.make_filter.get_sql_filter('Name')}
)
ORDER BY wvs.WmiId, wvs.VinSchemaId
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = self._rows_to_dicts(cursor, rows)
logger.info(f"Extracted {len(results)} WMI-VinSchema mappings (filtered by allowed makes)")
return results
def extract_patterns_data(self) -> Generator[List[Dict], None, None]:
"""Extract pattern data in batches with make filtering"""
logger.info("Extracting pattern data from source database with make filtering")
# First get the total count with filtering
count_query = f"""
SELECT COUNT(*) as total
FROM dbo.Pattern p
JOIN dbo.Element e ON p.ElementId = e.Id
JOIN dbo.VinSchema vs ON p.VinSchemaId = vs.Id
JOIN dbo.Wmi_VinSchema wvs ON vs.Id = wvs.VinSchemaId
JOIN dbo.Wmi w ON wvs.WmiId = w.Id
JOIN dbo.Wmi_Make wm ON w.Id = wm.WmiId
JOIN dbo.Make m ON wm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
AND e.Id IN (26, 27, 28, 18, 24)
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(count_query)
total_row = self._row_to_dict(cursor, cursor.fetchone())
total_count = total_row.get('total', 0)
logger.info(f"Total patterns to extract (filtered): {total_count}")
# Extract in batches with manufacturer filtering
query = f"""
SELECT
p.Id,
p.VinSchemaId,
p.Keys,
p.ElementId,
p.AttributeId,
e.Name as ElementName,
e.weight,
e.GroupName,
vs.Name as SchemaName,
w.Wmi,
m.Name as MakeName
FROM dbo.Pattern p
JOIN dbo.Element e ON p.ElementId = e.Id
JOIN dbo.VinSchema vs ON p.VinSchemaId = vs.Id
JOIN dbo.Wmi_VinSchema wvs ON vs.Id = wvs.VinSchemaId
JOIN dbo.Wmi w ON wvs.WmiId = w.Id
JOIN dbo.Wmi_Make wm ON w.Id = wm.WmiId
JOIN dbo.Make m ON wm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
AND e.Id IN (26, 27, 28, 18, 24)
ORDER BY p.Id
OFFSET {{}} ROWS FETCH NEXT {{}} ROWS ONLY
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
for offset in tqdm(range(0, total_count, self.batch_size), desc="Extracting filtered patterns"):
cursor.execute(query.format(offset, self.batch_size))
rows = cursor.fetchall()
if rows:
yield self._rows_to_dicts(cursor, rows)
else:
break
def extract_elements_data(self) -> List[Dict]:
"""Extract element definitions"""
logger.info("Extracting element data")
query = """
SELECT
Id,
Name,
Code,
LookupTable,
Description,
IsPrivate,
GroupName,
DataType,
MinAllowedValue,
MaxAllowedValue,
IsQS,
Decode,
weight
FROM dbo.Element
ORDER BY Id
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = self._rows_to_dicts(cursor, rows)
logger.info(f"Extracted {len(results)} element definitions")
return results
def extract_reference_table(self, table_name: str) -> List[Dict]:
"""Extract data from a reference table with make filtering"""
logger.info(f"Extracting data from {table_name} with make filtering")
# Apply make filtering - filter by Make brand names (simpler and more efficient)
if table_name == 'Manufacturer':
# Extract manufacturers linked to filtered makes only
query = f"""
SELECT DISTINCT mfr.* FROM dbo.Manufacturer mfr
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
JOIN dbo.Make m ON mm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
ORDER BY mfr.Id
"""
elif table_name == 'Make':
# Filter makes directly by brand names (GMC, Ford, Toyota, etc.)
query = f"""
SELECT * FROM dbo.Make
WHERE {self.make_filter.get_sql_filter('Name')}
ORDER BY Id
"""
elif table_name == 'Model':
# Filter models by allowed make brand names
query = f"""
SELECT md.* FROM dbo.Model md
JOIN dbo.Make_Model mm ON md.Id = mm.ModelId
JOIN dbo.Make m ON mm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
ORDER BY md.Id
"""
elif table_name == 'Wmi':
# Filter WMI records by allowed manufacturers (linked to makes) AND makes directly
query = f"""
SELECT w.* FROM dbo.Wmi w
WHERE w.PublicAvailabilityDate <= GETDATE()
AND w.ManufacturerId IN (
SELECT DISTINCT mfr.Id
FROM dbo.Manufacturer mfr
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
JOIN dbo.Make m ON mm.MakeId = m.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
)
AND w.MakeId IN (
SELECT Id FROM dbo.Make
WHERE {self.make_filter.get_sql_filter('Name')}
)
ORDER BY w.Id
"""
else:
# No filtering for other reference tables
query = f"SELECT * FROM dbo.{table_name} ORDER BY Id"
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = self._rows_to_dicts(cursor, rows)
logger.info(f"Extracted {len(results)} records from {table_name} (filtered by allowed makes)")
return results
def extract_make_model_relationships(self) -> List[Dict]:
"""Extract Make-Model relationships with make filtering"""
logger.info("Extracting Make-Model relationships with make filtering")
query = f"""
SELECT
mm.MakeId,
mm.ModelId,
m.Name as MakeName,
md.Name as ModelName
FROM dbo.Make_Model mm
JOIN dbo.Make m ON mm.MakeId = m.Id
JOIN dbo.Model md ON mm.ModelId = md.Id
WHERE {self.make_filter.get_sql_filter('m.Name')}
ORDER BY mm.MakeId, mm.ModelId
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = self._rows_to_dicts(cursor, rows)
logger.info(f"Extracted {len(results)} Make-Model relationships (filtered by allowed makes)")
return results
def extract_wmi_make_relationships(self) -> List[Dict]:
"""Extract WMI-Make relationships with make filtering"""
logger.info("Extracting WMI-Make relationships with make filtering")
query = f"""
SELECT
wm.WmiId,
wm.MakeId,
w.Wmi,
m.Name as MakeName
FROM dbo.Wmi_Make wm
JOIN dbo.Wmi w ON wm.WmiId = w.Id
JOIN dbo.Make m ON wm.MakeId = m.Id
WHERE w.PublicAvailabilityDate <= GETDATE()
AND w.ManufacturerId IN (
SELECT DISTINCT mfr.Id
FROM dbo.Manufacturer mfr
JOIN dbo.Manufacturer_Make mm ON mfr.Id = mm.ManufacturerId
JOIN dbo.Make mk ON mm.MakeId = mk.Id
WHERE {self.make_filter.get_sql_filter('mk.Name')}
)
AND w.MakeId IN (
SELECT Id FROM dbo.Make
WHERE {self.make_filter.get_sql_filter('Name')}
)
AND m.Id IN (
SELECT Id FROM dbo.Make
WHERE {self.make_filter.get_sql_filter('Name')}
)
ORDER BY wm.WmiId, wm.MakeId
"""
with db_connections.mssql_connection() as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = self._rows_to_dicts(cursor, rows)
logger.info(f"Extracted {len(results)} WMI-Make relationships (filtered by allowed makes)")
return results
def _rows_to_dicts(self, cursor, rows) -> List[Dict]:
"""Convert pyodbc rows to list of dicts using cursor description."""
if not rows:
return []
columns = [col[0] for col in cursor.description]
result: List[Dict] = []
for row in rows:
item = {columns[i]: row[i] for i in range(len(columns))}
result.append(item)
return result
def _row_to_dict(self, cursor, row) -> Dict:
"""Convert single pyodbc row to dict."""
if row is None:
return {}
columns = [col[0] for col in cursor.description]
return {columns[i]: row[i] for i in range(len(columns))}

View File

@@ -0,0 +1,63 @@
import logging
from typing import Optional, Dict, Any, List
from ..connections import db_connections
logger = logging.getLogger(__name__)
class VinProcExtractor:
"""Utilities to inspect and sample the MSSQL VIN decode stored procedure."""
def __init__(self, proc_name: str = 'dbo.spVinDecode'):
self.proc_name = proc_name
def find_proc(self) -> Optional[Dict[str, Any]]:
"""Locate the VIN decode proc by name pattern, return basic metadata."""
query = """
SELECT TOP 1
o.name AS object_name,
s.name AS schema_name,
o.type_desc
FROM sys.objects o
JOIN sys.schemas s ON s.schema_id = o.schema_id
WHERE o.name LIKE '%Vin%Decode%'
ORDER BY o.create_date DESC
"""
with db_connections.mssql_connection() as conn:
cur = conn.cursor()
cur.execute(query)
row = cur.fetchone()
if not row:
logger.warning("VIN decode stored procedure not found by pattern")
return None
return { 'object_name': row[0], 'schema_name': row[1], 'type_desc': row[2] }
def get_definition(self, schema: str, name: str) -> str:
"""Return the text definition of the proc using sp_helptext semantics."""
sql = f"EXEC {schema}.sp_helptext '{schema}.{name}'"
definition_lines: List[str] = []
with db_connections.mssql_connection() as conn:
cur = conn.cursor()
cur.execute(sql)
for row in cur.fetchall():
# sp_helptext returns a single NVARCHAR column with line segments
definition_lines.append(row[0])
return ''.join(definition_lines)
def sample_execute(self, vin: str) -> Optional[List[Dict[str, Any]]]:
"""Execute the VIN decode proc with a VIN to capture output shape."""
# Prefer proc signature with @VIN only; if it requires year, MSSQL will error.
sql = f"EXEC {self.proc_name} @VIN=?"
with db_connections.mssql_connection() as conn:
cur = conn.cursor()
try:
cur.execute(sql, (vin,))
columns = [c[0] for c in cur.description] if cur.description else []
rows = cur.fetchall() if cur.description else []
results: List[Dict[str, Any]] = []
for r in rows:
results.append({columns[i]: r[i] for i in range(len(columns))})
return results
except Exception as e:
logger.warning(f"VIN proc sample execution failed: {e}")
return None