117 lines
4.4 KiB
Python
Executable File
117 lines
4.4 KiB
Python
Executable File
from typing import Dict, List, Set, Optional
|
|
import re
|
|
import logging
|
|
from collections import defaultdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class PatternAnalyzer:
|
|
"""Analyze VIN patterns to extract vehicle information"""
|
|
|
|
def __init__(self):
|
|
self.pattern_cache = {}
|
|
|
|
def analyze_pattern_keys(self, keys: str) -> Dict:
|
|
"""
|
|
Analyze pattern keys format
|
|
Example: "ABCDE|FGHIJKLM" means positions 4-8 and 10-17
|
|
"""
|
|
parts = keys.split('|') if '|' in keys else [keys]
|
|
|
|
analysis = {
|
|
'vds': parts[0] if len(parts) > 0 else '', # Vehicle Descriptor Section
|
|
'vis': parts[1] if len(parts) > 1 else '', # Vehicle Identifier Section
|
|
'positions': []
|
|
}
|
|
|
|
# Calculate actual VIN positions
|
|
if analysis['vds']:
|
|
analysis['positions'].extend(range(4, 4 + len(analysis['vds'])))
|
|
if analysis['vis']:
|
|
analysis['positions'].extend(range(10, 10 + len(analysis['vis'])))
|
|
|
|
return analysis
|
|
|
|
def extract_make_model_from_patterns(self, patterns: List[Dict]) -> Dict:
|
|
"""
|
|
Extract make/model combinations from pattern data
|
|
"""
|
|
vehicle_combinations = defaultdict(lambda: {
|
|
'makes': set(),
|
|
'models': set(),
|
|
'trims': set(),
|
|
'engines': set(),
|
|
'transmissions': set(),
|
|
'confidence': 0
|
|
})
|
|
|
|
for pattern in patterns:
|
|
key = (pattern.get('vin_schema_id'), pattern.get('keys'))
|
|
element_id = pattern.get('element_id')
|
|
attribute_id = pattern.get('attribute_id', '')
|
|
weight = pattern.get('weight', 0)
|
|
|
|
# Map element IDs to vehicle attributes
|
|
if element_id == 26: # Make
|
|
vehicle_combinations[key]['makes'].add(attribute_id)
|
|
elif element_id == 27: # Model
|
|
vehicle_combinations[key]['models'].add(attribute_id)
|
|
elif element_id == 28: # Trim
|
|
vehicle_combinations[key]['trims'].add(attribute_id)
|
|
elif element_id == 18: # Engine Model
|
|
vehicle_combinations[key]['engines'].add(attribute_id)
|
|
elif element_id == 24: # Transmission
|
|
vehicle_combinations[key]['transmissions'].add(attribute_id)
|
|
|
|
vehicle_combinations[key]['confidence'] += weight
|
|
|
|
return dict(vehicle_combinations)
|
|
|
|
def match_vin_to_pattern(self, vin: str, pattern_keys: str) -> bool:
|
|
"""
|
|
Check if VIN matches pattern using SQL LIKE syntax
|
|
Pattern uses '*' as wildcard, convert to regex
|
|
"""
|
|
if not vin or not pattern_keys:
|
|
return False
|
|
|
|
# Extract VIN segments based on pattern format
|
|
if '|' in pattern_keys:
|
|
vds_pattern, vis_pattern = pattern_keys.split('|')
|
|
|
|
# Extract corresponding VIN segments
|
|
if len(vin) >= 17:
|
|
vds_actual = vin[3:3+len(vds_pattern)]
|
|
vis_actual = vin[9:9+len(vis_pattern)]
|
|
|
|
# Convert SQL LIKE pattern to regex
|
|
vds_regex = vds_pattern.replace('*', '.')
|
|
vis_regex = vis_pattern.replace('*', '.')
|
|
|
|
return (re.match(vds_regex, vds_actual) is not None and
|
|
re.match(vis_regex, vis_actual) is not None)
|
|
else:
|
|
# Single segment pattern (positions 4-8)
|
|
if len(vin) >= 8:
|
|
segment = vin[3:3+len(pattern_keys)]
|
|
pattern_regex = pattern_keys.replace('*', '.')
|
|
return re.match(pattern_regex, segment) is not None
|
|
|
|
return False
|
|
|
|
def calculate_pattern_confidence(self,
|
|
pattern_matches: List[Dict],
|
|
element_weights: Dict[int, int]) -> int:
|
|
"""
|
|
Calculate confidence score based on matched patterns and element weights
|
|
"""
|
|
total_weight = 0
|
|
matched_elements = set()
|
|
|
|
for match in pattern_matches:
|
|
element_id = match.get('element_id')
|
|
if element_id not in matched_elements:
|
|
matched_elements.add(element_id)
|
|
total_weight += element_weights.get(element_id, 0)
|
|
|
|
return total_weight |