Initial Commit

This commit is contained in:
Eric Gullickson
2025-09-17 16:09:15 -05:00
parent 0cdb9803de
commit a052040e3a
373 changed files with 437090 additions and 6773 deletions

View File

@@ -0,0 +1,117 @@
from typing import Dict, List, Set, Optional
import re
import logging
from collections import defaultdict
logger = logging.getLogger(__name__)
class PatternAnalyzer:
"""Analyze VIN patterns to extract vehicle information"""
def __init__(self):
self.pattern_cache = {}
def analyze_pattern_keys(self, keys: str) -> Dict:
"""
Analyze pattern keys format
Example: "ABCDE|FGHIJKLM" means positions 4-8 and 10-17
"""
parts = keys.split('|') if '|' in keys else [keys]
analysis = {
'vds': parts[0] if len(parts) > 0 else '', # Vehicle Descriptor Section
'vis': parts[1] if len(parts) > 1 else '', # Vehicle Identifier Section
'positions': []
}
# Calculate actual VIN positions
if analysis['vds']:
analysis['positions'].extend(range(4, 4 + len(analysis['vds'])))
if analysis['vis']:
analysis['positions'].extend(range(10, 10 + len(analysis['vis'])))
return analysis
def extract_make_model_from_patterns(self, patterns: List[Dict]) -> Dict:
"""
Extract make/model combinations from pattern data
"""
vehicle_combinations = defaultdict(lambda: {
'makes': set(),
'models': set(),
'trims': set(),
'engines': set(),
'transmissions': set(),
'confidence': 0
})
for pattern in patterns:
key = (pattern.get('vin_schema_id'), pattern.get('keys'))
element_id = pattern.get('element_id')
attribute_id = pattern.get('attribute_id', '')
weight = pattern.get('weight', 0)
# Map element IDs to vehicle attributes
if element_id == 26: # Make
vehicle_combinations[key]['makes'].add(attribute_id)
elif element_id == 27: # Model
vehicle_combinations[key]['models'].add(attribute_id)
elif element_id == 28: # Trim
vehicle_combinations[key]['trims'].add(attribute_id)
elif element_id == 18: # Engine Model
vehicle_combinations[key]['engines'].add(attribute_id)
elif element_id == 24: # Transmission
vehicle_combinations[key]['transmissions'].add(attribute_id)
vehicle_combinations[key]['confidence'] += weight
return dict(vehicle_combinations)
def match_vin_to_pattern(self, vin: str, pattern_keys: str) -> bool:
"""
Check if VIN matches pattern using SQL LIKE syntax
Pattern uses '*' as wildcard, convert to regex
"""
if not vin or not pattern_keys:
return False
# Extract VIN segments based on pattern format
if '|' in pattern_keys:
vds_pattern, vis_pattern = pattern_keys.split('|')
# Extract corresponding VIN segments
if len(vin) >= 17:
vds_actual = vin[3:3+len(vds_pattern)]
vis_actual = vin[9:9+len(vis_pattern)]
# Convert SQL LIKE pattern to regex
vds_regex = vds_pattern.replace('*', '.')
vis_regex = vis_pattern.replace('*', '.')
return (re.match(vds_regex, vds_actual) is not None and
re.match(vis_regex, vis_actual) is not None)
else:
# Single segment pattern (positions 4-8)
if len(vin) >= 8:
segment = vin[3:3+len(pattern_keys)]
pattern_regex = pattern_keys.replace('*', '.')
return re.match(pattern_regex, segment) is not None
return False
def calculate_pattern_confidence(self,
pattern_matches: List[Dict],
element_weights: Dict[int, int]) -> int:
"""
Calculate confidence score based on matched patterns and element weights
"""
total_weight = 0
matched_elements = set()
for match in pattern_matches:
element_id = match.get('element_id')
if element_id not in matched_elements:
matched_elements.add(element_id)
total_weight += element_weights.get(element_id, 0)
return total_weight

View File

@@ -0,0 +1,95 @@
from typing import Tuple, Optional, Dict
import re
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
class VINYearExtractor:
"""Extract model year from VIN following NHTSA logic"""
# Year code mapping for position 10
YEAR_CODES = {
'A': 2010, 'B': 2011, 'C': 2012, 'D': 2013,
'E': 2014, 'F': 2015, 'G': 2016, 'H': 2017,
'J': 2018, 'K': 2019, 'L': 2020, 'M': 2021,
'N': 2022, 'P': 2023, 'R': 2024, 'S': 2025,
'T': 2026, 'V': 2027, 'W': 2028, 'X': 2029,
'Y': 2030, '1': 2031, '2': 2032, '3': 2033,
'4': 2034, '5': 2035, '6': 2036, '7': 2037,
'8': 2038, '9': 2039
}
def extract_year(self,
vin: str,
vehicle_type_id: Optional[int] = None,
truck_type_id: Optional[int] = None) -> Tuple[Optional[int], Optional[int]]:
"""
Extract model year from VIN position 10
Returns (primary_year, alternative_year)
"""
if not vin or len(vin) < 10:
return None, None
vin = vin.upper().strip()
pos10 = vin[9]
if pos10 not in self.YEAR_CODES:
logger.warning(f"Invalid year code '{pos10}' in VIN {vin}")
return None, None
year = self.YEAR_CODES[pos10]
alt_year = year - 30
# Determine if vehicle is car or light truck
is_car_lt = vehicle_type_id in [2, 7] or \
(vehicle_type_id == 3 and truck_type_id == 1)
# Use position 7 for disambiguation
if is_car_lt and len(vin) >= 7:
pos7 = vin[6]
if pos7.isdigit():
# Numeric = older cycle is primary
return alt_year, year
elif pos7.isalpha():
# Alpha = newer cycle is primary
return year, alt_year
# Check if year is too far in future
current_year = datetime.now().year
if year > current_year + 1:
return alt_year, year
return year, alt_year
def extract_from_descriptor(self, descriptor: str) -> Optional[int]:
"""Extract year from VIN descriptor if cached"""
# This would query the vin_descriptors table
# Placeholder for actual implementation
return None
class VINDescriptorBuilder:
"""Build VIN descriptor for caching"""
def build_descriptor(self, vin: str) -> str:
"""
Build VIN descriptor following NHTSA rules
- Mask position 9 (check digit)
- Use first 11 chars (or 14 if position 3 = '9')
"""
if not vin:
return ""
# Pad to 17 characters with asterisks
vin = vin.upper().strip()
vin = (vin + "*" * 17)[:17]
# Mask position 9
vin = vin[:8] + '*' + vin[9:]
# Determine descriptor length
if len(vin) > 2 and vin[2] == '9':
return vin[:14]
else:
return vin[:11]