Files
motovaultpro/ocr/app/patterns/service_mapping.py
Eric Gullickson 3eb54211cb
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m1s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
feat: add owner's manual OCR pipeline (refs #71)
Implement async PDF processing for owner's manuals with maintenance
schedule extraction:

- Add PDF preprocessor with PyMuPDF for text/scanned PDF handling
- Add maintenance pattern matching (mileage, time, fluid specs)
- Add service name mapping to maintenance subtypes
- Add table detection and parsing for schedule tables
- Add manual extractor orchestrating the complete pipeline
- Add POST /extract/manual endpoint for async job submission
- Add Redis job queue support for manual extraction jobs
- Add progress tracking during processing

Processing pipeline:
1. Analyze PDF structure (text layer vs scanned)
2. Find maintenance schedule sections
3. Extract text or OCR scanned pages at 300 DPI
4. Detect and parse maintenance tables
5. Normalize service names and extract intervals
6. Return structured maintenance schedules with confidence scores

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 21:30:20 -06:00

260 lines
15 KiB
Python

"""Service name normalization and mapping to maintenance subtypes."""
import re
from dataclasses import dataclass
from typing import Optional
@dataclass
class ServiceMapping:
"""Mapping result from extracted text to maintenance subtypes."""
normalized_name: str # Standardized service name
subtypes: list[str] # Maintenance subtypes from the system
category: str # routine_maintenance, repair, performance_upgrade
confidence: float
# Maintenance subtypes from the system (must match exactly)
ROUTINE_MAINTENANCE_SUBTYPES = [
"Accelerator Pedal",
"Air Filter Element",
"Brakes and Traction Control",
"Cabin Air Filter / Purifier",
"Coolant",
"Doors",
"Drive Belt",
"Engine Oil",
"Evaporative Emissions System",
"Exhaust System",
"Fluid - A/T",
"Fluid - Differential",
"Fluid - M/T",
"Fluid Filter - A/T",
"Fluids",
"Fuel Delivery and Air Induction",
"Hood Shock / Support",
"Neutral Safety Switch",
"Parking Brake System",
"Restraints and Safety Systems",
"Shift Interlock A/T",
"Spark Plug",
"Steering and Suspension",
"Tires",
"Trunk / Liftgate Shock / Support",
"Washer Fluid",
"Wiper Blade",
]
class ServiceMapper:
"""Map extracted service names to maintenance subtypes."""
# Mapping from common service terms to system subtypes
# Keys are lowercase patterns, values are (normalized_name, subtypes, category, confidence)
SERVICE_MAPPINGS: dict[str, tuple[str, list[str], str, float]] = {
# Oil related
"engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"oil change": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"motor oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.93),
"oil and filter": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"oil & filter": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"change engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"replace engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
# Air filter
"air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.90),
"engine air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
"air cleaner": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.88),
"air cleaner element": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.93),
"replace air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
# Cabin filter
"cabin air filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.95),
"cabin filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.93),
"a/c filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.88),
"hvac filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.88),
"interior air filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.90),
"dust and pollen filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.90),
# Tires
"tire rotation": ("Tire Rotation", ["Tires"], "routine_maintenance", 0.98),
"rotate tires": ("Tire Rotation", ["Tires"], "routine_maintenance", 0.95),
"tire inspection": ("Tire Inspection", ["Tires"], "routine_maintenance", 0.93),
"inspect tires": ("Tire Inspection", ["Tires"], "routine_maintenance", 0.93),
"check tire pressure": ("Tire Pressure Check", ["Tires"], "routine_maintenance", 0.90),
"tire pressure": ("Tire Pressure Check", ["Tires"], "routine_maintenance", 0.85),
# Brakes
"brake inspection": ("Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.95),
"inspect brakes": ("Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.93),
"brake fluid": ("Brake Fluid Service", ["Brakes and Traction Control"], "routine_maintenance", 0.93),
"brake pads": ("Brake Pad Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.90),
"parking brake": ("Parking Brake Inspection", ["Parking Brake System"], "routine_maintenance", 0.93),
# Coolant
"coolant": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.90),
"engine coolant": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.93),
"antifreeze": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.90),
"cooling system": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.88),
"radiator fluid": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.88),
"replace coolant": ("Coolant Replacement", ["Coolant"], "routine_maintenance", 0.95),
# Transmission
"transmission fluid": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
"automatic transmission fluid": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.95),
"atf": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.90),
"manual transmission fluid": ("Manual Transmission Fluid", ["Fluid - M/T"], "routine_maintenance", 0.95),
"cvt fluid": ("CVT Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
"transmission filter": ("Transmission Filter", ["Fluid Filter - A/T"], "routine_maintenance", 0.93),
# Differential
"differential fluid": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.95),
"rear differential": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.93),
"front differential": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.93),
"transfer case": ("Transfer Case Fluid", ["Fluid - Differential"], "routine_maintenance", 0.90),
# Spark plugs
"spark plug": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
"spark plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
"replace spark plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
"ignition plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.88),
# Drive belt
"drive belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.93),
"serpentine belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.93),
"accessory belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.90),
"timing belt": ("Timing Belt Service", ["Drive Belt"], "routine_maintenance", 0.90),
"v-belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.88),
# Wipers
"wiper blade": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.95),
"wiper blades": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.95),
"windshield wiper": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.93),
"replace wipers": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.93),
# Washer fluid
"washer fluid": ("Washer Fluid", ["Washer Fluid"], "routine_maintenance", 0.95),
"windshield washer": ("Washer Fluid", ["Washer Fluid"], "routine_maintenance", 0.90),
# Steering/Suspension
"steering": ("Steering Inspection", ["Steering and Suspension"], "routine_maintenance", 0.85),
"suspension": ("Suspension Inspection", ["Steering and Suspension"], "routine_maintenance", 0.85),
"power steering": ("Power Steering Fluid", ["Steering and Suspension"], "routine_maintenance", 0.90),
"power steering fluid": ("Power Steering Fluid", ["Steering and Suspension"], "routine_maintenance", 0.93),
# Exhaust
"exhaust": ("Exhaust System Inspection", ["Exhaust System"], "routine_maintenance", 0.88),
"exhaust system": ("Exhaust System Inspection", ["Exhaust System"], "routine_maintenance", 0.93),
# Fuel system
"fuel filter": ("Fuel Filter Replacement", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.93),
"fuel system": ("Fuel System Inspection", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.88),
"fuel injection": ("Fuel Injection Service", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.88),
# Emissions
"evaporative emissions": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.93),
"evap system": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.90),
"emissions": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.80),
# Safety systems
"seat belt": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.90),
"airbag": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.85),
"restraint": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.85),
# Miscellaneous
"battery": ("Battery Inspection", ["Fluids"], "routine_maintenance", 0.80),
"inspect battery": ("Battery Inspection", ["Fluids"], "routine_maintenance", 0.85),
"door hinges": ("Door Lubrication", ["Doors"], "routine_maintenance", 0.85),
"hood shock": ("Hood Shock Inspection", ["Hood Shock / Support"], "routine_maintenance", 0.90),
"trunk shock": ("Trunk Shock Inspection", ["Trunk / Liftgate Shock / Support"], "routine_maintenance", 0.90),
"liftgate": ("Liftgate Inspection", ["Trunk / Liftgate Shock / Support"], "routine_maintenance", 0.88),
}
# Pattern-based mappings for fuzzy matching
SERVICE_PATTERNS: list[tuple[str, str, list[str], str, float]] = [
# (regex_pattern, normalized_name, subtypes, category, confidence)
(r"oil\s+(?:and|&)\s+filter", "Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
(r"(?:change|replace)\s+(?:the\s+)?oil", "Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.93),
(r"(?:inspect|check)\s+(?:the\s+)?brakes?", "Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.90),
(r"(?:inspect|check)\s+(?:the\s+)?tires?", "Tire Inspection", ["Tires"], "routine_maintenance", 0.90),
(r"(?:rotate|rotation)\s+(?:the\s+)?tires?", "Tire Rotation", ["Tires"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?(?:engine\s+)?air\s+filter", "Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?cabin\s+(?:air\s+)?filter", "Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?spark\s+plugs?", "Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?coolant", "Coolant Replacement", ["Coolant"], "routine_maintenance", 0.93),
(r"(?:flush|drain)\s+(?:the\s+)?coolant", "Coolant Flush", ["Coolant"], "routine_maintenance", 0.93),
(r"(?:replace|change)\s+(?:the\s+)?(?:a/?t|automatic\s+transmission)\s+fluid", "Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
(r"(?:inspect|check)\s+(?:the\s+)?(?:drive|serpentine|accessory)\s+belt", "Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.90),
]
def map_service(self, service_text: str) -> Optional[ServiceMapping]:
"""
Map extracted service text to maintenance subtypes.
Args:
service_text: Service name or description from the manual
Returns:
ServiceMapping or None if no mapping found
"""
normalized_text = service_text.lower().strip()
# Try exact mapping first
for key, (name, subtypes, category, conf) in self.SERVICE_MAPPINGS.items():
if key in normalized_text:
return ServiceMapping(
normalized_name=name,
subtypes=subtypes,
category=category,
confidence=conf,
)
# Try pattern matching
for pattern, name, subtypes, category, conf in self.SERVICE_PATTERNS:
if re.search(pattern, normalized_text, re.IGNORECASE):
return ServiceMapping(
normalized_name=name,
subtypes=subtypes,
category=category,
confidence=conf,
)
return None
def map_service_fuzzy(self, service_text: str, threshold: float = 0.6) -> Optional[ServiceMapping]:
"""
Map service text with fuzzy matching for typos and variations.
Args:
service_text: Service name or description
threshold: Minimum similarity threshold (0.0-1.0)
Returns:
ServiceMapping or None
"""
# First try exact matching
result = self.map_service(service_text)
if result:
return result
# Fall back to word overlap matching
words = set(service_text.lower().split())
best_match: Optional[ServiceMapping] = None
best_score = 0.0
for key, (name, subtypes, category, conf) in self.SERVICE_MAPPINGS.items():
key_words = set(key.split())
overlap = len(words & key_words)
total = len(words | key_words)
if total > 0:
score = overlap / total
if score > best_score and score >= threshold:
best_score = score
best_match = ServiceMapping(
normalized_name=name,
subtypes=subtypes,
category=category,
confidence=conf * score, # Reduce confidence by match quality
)
return best_match
def get_all_service_keywords(self) -> list[str]:
"""Get all service keywords for table header detection."""
keywords = list(self.SERVICE_MAPPINGS.keys())
# Add common header terms
keywords.extend([
"service", "maintenance", "item", "operation",
"inspection", "replacement", "interval", "schedule",
])
return keywords
# Singleton instance
service_mapper = ServiceMapper()