Initial Commit
This commit is contained in:
0
mvp-platform-services/vehicles/etl/transformers/__init__.py
Executable file
0
mvp-platform-services/vehicles/etl/transformers/__init__.py
Executable file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
117
mvp-platform-services/vehicles/etl/transformers/pattern_analyzer.py
Executable file
117
mvp-platform-services/vehicles/etl/transformers/pattern_analyzer.py
Executable file
@@ -0,0 +1,117 @@
|
||||
from typing import Dict, List, Set, Optional
|
||||
import re
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PatternAnalyzer:
|
||||
"""Analyze VIN patterns to extract vehicle information"""
|
||||
|
||||
def __init__(self):
|
||||
self.pattern_cache = {}
|
||||
|
||||
def analyze_pattern_keys(self, keys: str) -> Dict:
|
||||
"""
|
||||
Analyze pattern keys format
|
||||
Example: "ABCDE|FGHIJKLM" means positions 4-8 and 10-17
|
||||
"""
|
||||
parts = keys.split('|') if '|' in keys else [keys]
|
||||
|
||||
analysis = {
|
||||
'vds': parts[0] if len(parts) > 0 else '', # Vehicle Descriptor Section
|
||||
'vis': parts[1] if len(parts) > 1 else '', # Vehicle Identifier Section
|
||||
'positions': []
|
||||
}
|
||||
|
||||
# Calculate actual VIN positions
|
||||
if analysis['vds']:
|
||||
analysis['positions'].extend(range(4, 4 + len(analysis['vds'])))
|
||||
if analysis['vis']:
|
||||
analysis['positions'].extend(range(10, 10 + len(analysis['vis'])))
|
||||
|
||||
return analysis
|
||||
|
||||
def extract_make_model_from_patterns(self, patterns: List[Dict]) -> Dict:
|
||||
"""
|
||||
Extract make/model combinations from pattern data
|
||||
"""
|
||||
vehicle_combinations = defaultdict(lambda: {
|
||||
'makes': set(),
|
||||
'models': set(),
|
||||
'trims': set(),
|
||||
'engines': set(),
|
||||
'transmissions': set(),
|
||||
'confidence': 0
|
||||
})
|
||||
|
||||
for pattern in patterns:
|
||||
key = (pattern.get('vin_schema_id'), pattern.get('keys'))
|
||||
element_id = pattern.get('element_id')
|
||||
attribute_id = pattern.get('attribute_id', '')
|
||||
weight = pattern.get('weight', 0)
|
||||
|
||||
# Map element IDs to vehicle attributes
|
||||
if element_id == 26: # Make
|
||||
vehicle_combinations[key]['makes'].add(attribute_id)
|
||||
elif element_id == 27: # Model
|
||||
vehicle_combinations[key]['models'].add(attribute_id)
|
||||
elif element_id == 28: # Trim
|
||||
vehicle_combinations[key]['trims'].add(attribute_id)
|
||||
elif element_id == 18: # Engine Model
|
||||
vehicle_combinations[key]['engines'].add(attribute_id)
|
||||
elif element_id == 24: # Transmission
|
||||
vehicle_combinations[key]['transmissions'].add(attribute_id)
|
||||
|
||||
vehicle_combinations[key]['confidence'] += weight
|
||||
|
||||
return dict(vehicle_combinations)
|
||||
|
||||
def match_vin_to_pattern(self, vin: str, pattern_keys: str) -> bool:
|
||||
"""
|
||||
Check if VIN matches pattern using SQL LIKE syntax
|
||||
Pattern uses '*' as wildcard, convert to regex
|
||||
"""
|
||||
if not vin or not pattern_keys:
|
||||
return False
|
||||
|
||||
# Extract VIN segments based on pattern format
|
||||
if '|' in pattern_keys:
|
||||
vds_pattern, vis_pattern = pattern_keys.split('|')
|
||||
|
||||
# Extract corresponding VIN segments
|
||||
if len(vin) >= 17:
|
||||
vds_actual = vin[3:3+len(vds_pattern)]
|
||||
vis_actual = vin[9:9+len(vis_pattern)]
|
||||
|
||||
# Convert SQL LIKE pattern to regex
|
||||
vds_regex = vds_pattern.replace('*', '.')
|
||||
vis_regex = vis_pattern.replace('*', '.')
|
||||
|
||||
return (re.match(vds_regex, vds_actual) is not None and
|
||||
re.match(vis_regex, vis_actual) is not None)
|
||||
else:
|
||||
# Single segment pattern (positions 4-8)
|
||||
if len(vin) >= 8:
|
||||
segment = vin[3:3+len(pattern_keys)]
|
||||
pattern_regex = pattern_keys.replace('*', '.')
|
||||
return re.match(pattern_regex, segment) is not None
|
||||
|
||||
return False
|
||||
|
||||
def calculate_pattern_confidence(self,
|
||||
pattern_matches: List[Dict],
|
||||
element_weights: Dict[int, int]) -> int:
|
||||
"""
|
||||
Calculate confidence score based on matched patterns and element weights
|
||||
"""
|
||||
total_weight = 0
|
||||
matched_elements = set()
|
||||
|
||||
for match in pattern_matches:
|
||||
element_id = match.get('element_id')
|
||||
if element_id not in matched_elements:
|
||||
matched_elements.add(element_id)
|
||||
total_weight += element_weights.get(element_id, 0)
|
||||
|
||||
return total_weight
|
||||
95
mvp-platform-services/vehicles/etl/transformers/vin_year_extractor.py
Executable file
95
mvp-platform-services/vehicles/etl/transformers/vin_year_extractor.py
Executable file
@@ -0,0 +1,95 @@
|
||||
from typing import Tuple, Optional, Dict
|
||||
import re
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class VINYearExtractor:
|
||||
"""Extract model year from VIN following NHTSA logic"""
|
||||
|
||||
# Year code mapping for position 10
|
||||
YEAR_CODES = {
|
||||
'A': 2010, 'B': 2011, 'C': 2012, 'D': 2013,
|
||||
'E': 2014, 'F': 2015, 'G': 2016, 'H': 2017,
|
||||
'J': 2018, 'K': 2019, 'L': 2020, 'M': 2021,
|
||||
'N': 2022, 'P': 2023, 'R': 2024, 'S': 2025,
|
||||
'T': 2026, 'V': 2027, 'W': 2028, 'X': 2029,
|
||||
'Y': 2030, '1': 2031, '2': 2032, '3': 2033,
|
||||
'4': 2034, '5': 2035, '6': 2036, '7': 2037,
|
||||
'8': 2038, '9': 2039
|
||||
}
|
||||
|
||||
def extract_year(self,
|
||||
vin: str,
|
||||
vehicle_type_id: Optional[int] = None,
|
||||
truck_type_id: Optional[int] = None) -> Tuple[Optional[int], Optional[int]]:
|
||||
"""
|
||||
Extract model year from VIN position 10
|
||||
Returns (primary_year, alternative_year)
|
||||
"""
|
||||
if not vin or len(vin) < 10:
|
||||
return None, None
|
||||
|
||||
vin = vin.upper().strip()
|
||||
pos10 = vin[9]
|
||||
|
||||
if pos10 not in self.YEAR_CODES:
|
||||
logger.warning(f"Invalid year code '{pos10}' in VIN {vin}")
|
||||
return None, None
|
||||
|
||||
year = self.YEAR_CODES[pos10]
|
||||
alt_year = year - 30
|
||||
|
||||
# Determine if vehicle is car or light truck
|
||||
is_car_lt = vehicle_type_id in [2, 7] or \
|
||||
(vehicle_type_id == 3 and truck_type_id == 1)
|
||||
|
||||
# Use position 7 for disambiguation
|
||||
if is_car_lt and len(vin) >= 7:
|
||||
pos7 = vin[6]
|
||||
if pos7.isdigit():
|
||||
# Numeric = older cycle is primary
|
||||
return alt_year, year
|
||||
elif pos7.isalpha():
|
||||
# Alpha = newer cycle is primary
|
||||
return year, alt_year
|
||||
|
||||
# Check if year is too far in future
|
||||
current_year = datetime.now().year
|
||||
if year > current_year + 1:
|
||||
return alt_year, year
|
||||
|
||||
return year, alt_year
|
||||
|
||||
def extract_from_descriptor(self, descriptor: str) -> Optional[int]:
|
||||
"""Extract year from VIN descriptor if cached"""
|
||||
# This would query the vin_descriptors table
|
||||
# Placeholder for actual implementation
|
||||
return None
|
||||
|
||||
|
||||
class VINDescriptorBuilder:
|
||||
"""Build VIN descriptor for caching"""
|
||||
|
||||
def build_descriptor(self, vin: str) -> str:
|
||||
"""
|
||||
Build VIN descriptor following NHTSA rules
|
||||
- Mask position 9 (check digit)
|
||||
- Use first 11 chars (or 14 if position 3 = '9')
|
||||
"""
|
||||
if not vin:
|
||||
return ""
|
||||
|
||||
# Pad to 17 characters with asterisks
|
||||
vin = vin.upper().strip()
|
||||
vin = (vin + "*" * 17)[:17]
|
||||
|
||||
# Mask position 9
|
||||
vin = vin[:8] + '*' + vin[9:]
|
||||
|
||||
# Determine descriptor length
|
||||
if len(vin) > 2 and vin[2] == '9':
|
||||
return vin[:14]
|
||||
else:
|
||||
return vin[:11]
|
||||
Reference in New Issue
Block a user