All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m1s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Implement async PDF processing for owner's manuals with maintenance schedule extraction: - Add PDF preprocessor with PyMuPDF for text/scanned PDF handling - Add maintenance pattern matching (mileage, time, fluid specs) - Add service name mapping to maintenance subtypes - Add table detection and parsing for schedule tables - Add manual extractor orchestrating the complete pipeline - Add POST /extract/manual endpoint for async job submission - Add Redis job queue support for manual extraction jobs - Add progress tracking during processing Processing pipeline: 1. Analyze PDF structure (text layer vs scanned) 2. Find maintenance schedule sections 3. Extract text or OCR scanned pages at 300 DPI 4. Detect and parse maintenance tables 5. Normalize service names and extract intervals 6. Return structured maintenance schedules with confidence scores Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
260 lines
15 KiB
Python
260 lines
15 KiB
Python
"""Service name normalization and mapping to maintenance subtypes."""
|
|
import re
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class ServiceMapping:
|
|
"""Mapping result from extracted text to maintenance subtypes."""
|
|
|
|
normalized_name: str # Standardized service name
|
|
subtypes: list[str] # Maintenance subtypes from the system
|
|
category: str # routine_maintenance, repair, performance_upgrade
|
|
confidence: float
|
|
|
|
|
|
# Maintenance subtypes from the system (must match exactly)
|
|
ROUTINE_MAINTENANCE_SUBTYPES = [
|
|
"Accelerator Pedal",
|
|
"Air Filter Element",
|
|
"Brakes and Traction Control",
|
|
"Cabin Air Filter / Purifier",
|
|
"Coolant",
|
|
"Doors",
|
|
"Drive Belt",
|
|
"Engine Oil",
|
|
"Evaporative Emissions System",
|
|
"Exhaust System",
|
|
"Fluid - A/T",
|
|
"Fluid - Differential",
|
|
"Fluid - M/T",
|
|
"Fluid Filter - A/T",
|
|
"Fluids",
|
|
"Fuel Delivery and Air Induction",
|
|
"Hood Shock / Support",
|
|
"Neutral Safety Switch",
|
|
"Parking Brake System",
|
|
"Restraints and Safety Systems",
|
|
"Shift Interlock A/T",
|
|
"Spark Plug",
|
|
"Steering and Suspension",
|
|
"Tires",
|
|
"Trunk / Liftgate Shock / Support",
|
|
"Washer Fluid",
|
|
"Wiper Blade",
|
|
]
|
|
|
|
|
|
class ServiceMapper:
|
|
"""Map extracted service names to maintenance subtypes."""
|
|
|
|
# Mapping from common service terms to system subtypes
|
|
# Keys are lowercase patterns, values are (normalized_name, subtypes, category, confidence)
|
|
SERVICE_MAPPINGS: dict[str, tuple[str, list[str], str, float]] = {
|
|
# Oil related
|
|
"engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
"oil change": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
"motor oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.93),
|
|
"oil and filter": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
"oil & filter": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
"change engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
"replace engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
# Air filter
|
|
"air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.90),
|
|
"engine air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
|
|
"air cleaner": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.88),
|
|
"air cleaner element": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.93),
|
|
"replace air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
|
|
# Cabin filter
|
|
"cabin air filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.95),
|
|
"cabin filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.93),
|
|
"a/c filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.88),
|
|
"hvac filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.88),
|
|
"interior air filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.90),
|
|
"dust and pollen filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.90),
|
|
# Tires
|
|
"tire rotation": ("Tire Rotation", ["Tires"], "routine_maintenance", 0.98),
|
|
"rotate tires": ("Tire Rotation", ["Tires"], "routine_maintenance", 0.95),
|
|
"tire inspection": ("Tire Inspection", ["Tires"], "routine_maintenance", 0.93),
|
|
"inspect tires": ("Tire Inspection", ["Tires"], "routine_maintenance", 0.93),
|
|
"check tire pressure": ("Tire Pressure Check", ["Tires"], "routine_maintenance", 0.90),
|
|
"tire pressure": ("Tire Pressure Check", ["Tires"], "routine_maintenance", 0.85),
|
|
# Brakes
|
|
"brake inspection": ("Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.95),
|
|
"inspect brakes": ("Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.93),
|
|
"brake fluid": ("Brake Fluid Service", ["Brakes and Traction Control"], "routine_maintenance", 0.93),
|
|
"brake pads": ("Brake Pad Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.90),
|
|
"parking brake": ("Parking Brake Inspection", ["Parking Brake System"], "routine_maintenance", 0.93),
|
|
# Coolant
|
|
"coolant": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.90),
|
|
"engine coolant": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.93),
|
|
"antifreeze": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.90),
|
|
"cooling system": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.88),
|
|
"radiator fluid": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.88),
|
|
"replace coolant": ("Coolant Replacement", ["Coolant"], "routine_maintenance", 0.95),
|
|
# Transmission
|
|
"transmission fluid": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
|
|
"automatic transmission fluid": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.95),
|
|
"atf": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.90),
|
|
"manual transmission fluid": ("Manual Transmission Fluid", ["Fluid - M/T"], "routine_maintenance", 0.95),
|
|
"cvt fluid": ("CVT Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
|
|
"transmission filter": ("Transmission Filter", ["Fluid Filter - A/T"], "routine_maintenance", 0.93),
|
|
# Differential
|
|
"differential fluid": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.95),
|
|
"rear differential": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.93),
|
|
"front differential": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.93),
|
|
"transfer case": ("Transfer Case Fluid", ["Fluid - Differential"], "routine_maintenance", 0.90),
|
|
# Spark plugs
|
|
"spark plug": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
|
|
"spark plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
|
|
"replace spark plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
|
|
"ignition plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.88),
|
|
# Drive belt
|
|
"drive belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.93),
|
|
"serpentine belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.93),
|
|
"accessory belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.90),
|
|
"timing belt": ("Timing Belt Service", ["Drive Belt"], "routine_maintenance", 0.90),
|
|
"v-belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.88),
|
|
# Wipers
|
|
"wiper blade": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.95),
|
|
"wiper blades": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.95),
|
|
"windshield wiper": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.93),
|
|
"replace wipers": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.93),
|
|
# Washer fluid
|
|
"washer fluid": ("Washer Fluid", ["Washer Fluid"], "routine_maintenance", 0.95),
|
|
"windshield washer": ("Washer Fluid", ["Washer Fluid"], "routine_maintenance", 0.90),
|
|
# Steering/Suspension
|
|
"steering": ("Steering Inspection", ["Steering and Suspension"], "routine_maintenance", 0.85),
|
|
"suspension": ("Suspension Inspection", ["Steering and Suspension"], "routine_maintenance", 0.85),
|
|
"power steering": ("Power Steering Fluid", ["Steering and Suspension"], "routine_maintenance", 0.90),
|
|
"power steering fluid": ("Power Steering Fluid", ["Steering and Suspension"], "routine_maintenance", 0.93),
|
|
# Exhaust
|
|
"exhaust": ("Exhaust System Inspection", ["Exhaust System"], "routine_maintenance", 0.88),
|
|
"exhaust system": ("Exhaust System Inspection", ["Exhaust System"], "routine_maintenance", 0.93),
|
|
# Fuel system
|
|
"fuel filter": ("Fuel Filter Replacement", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.93),
|
|
"fuel system": ("Fuel System Inspection", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.88),
|
|
"fuel injection": ("Fuel Injection Service", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.88),
|
|
# Emissions
|
|
"evaporative emissions": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.93),
|
|
"evap system": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.90),
|
|
"emissions": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.80),
|
|
# Safety systems
|
|
"seat belt": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.90),
|
|
"airbag": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.85),
|
|
"restraint": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.85),
|
|
# Miscellaneous
|
|
"battery": ("Battery Inspection", ["Fluids"], "routine_maintenance", 0.80),
|
|
"inspect battery": ("Battery Inspection", ["Fluids"], "routine_maintenance", 0.85),
|
|
"door hinges": ("Door Lubrication", ["Doors"], "routine_maintenance", 0.85),
|
|
"hood shock": ("Hood Shock Inspection", ["Hood Shock / Support"], "routine_maintenance", 0.90),
|
|
"trunk shock": ("Trunk Shock Inspection", ["Trunk / Liftgate Shock / Support"], "routine_maintenance", 0.90),
|
|
"liftgate": ("Liftgate Inspection", ["Trunk / Liftgate Shock / Support"], "routine_maintenance", 0.88),
|
|
}
|
|
|
|
# Pattern-based mappings for fuzzy matching
|
|
SERVICE_PATTERNS: list[tuple[str, str, list[str], str, float]] = [
|
|
# (regex_pattern, normalized_name, subtypes, category, confidence)
|
|
(r"oil\s+(?:and|&)\s+filter", "Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
|
|
(r"(?:change|replace)\s+(?:the\s+)?oil", "Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.93),
|
|
(r"(?:inspect|check)\s+(?:the\s+)?brakes?", "Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.90),
|
|
(r"(?:inspect|check)\s+(?:the\s+)?tires?", "Tire Inspection", ["Tires"], "routine_maintenance", 0.90),
|
|
(r"(?:rotate|rotation)\s+(?:the\s+)?tires?", "Tire Rotation", ["Tires"], "routine_maintenance", 0.95),
|
|
(r"(?:replace|change)\s+(?:the\s+)?(?:engine\s+)?air\s+filter", "Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
|
|
(r"(?:replace|change)\s+(?:the\s+)?cabin\s+(?:air\s+)?filter", "Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.95),
|
|
(r"(?:replace|change)\s+(?:the\s+)?spark\s+plugs?", "Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
|
|
(r"(?:replace|change)\s+(?:the\s+)?coolant", "Coolant Replacement", ["Coolant"], "routine_maintenance", 0.93),
|
|
(r"(?:flush|drain)\s+(?:the\s+)?coolant", "Coolant Flush", ["Coolant"], "routine_maintenance", 0.93),
|
|
(r"(?:replace|change)\s+(?:the\s+)?(?:a/?t|automatic\s+transmission)\s+fluid", "Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
|
|
(r"(?:inspect|check)\s+(?:the\s+)?(?:drive|serpentine|accessory)\s+belt", "Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.90),
|
|
]
|
|
|
|
def map_service(self, service_text: str) -> Optional[ServiceMapping]:
|
|
"""
|
|
Map extracted service text to maintenance subtypes.
|
|
|
|
Args:
|
|
service_text: Service name or description from the manual
|
|
|
|
Returns:
|
|
ServiceMapping or None if no mapping found
|
|
"""
|
|
normalized_text = service_text.lower().strip()
|
|
|
|
# Try exact mapping first
|
|
for key, (name, subtypes, category, conf) in self.SERVICE_MAPPINGS.items():
|
|
if key in normalized_text:
|
|
return ServiceMapping(
|
|
normalized_name=name,
|
|
subtypes=subtypes,
|
|
category=category,
|
|
confidence=conf,
|
|
)
|
|
|
|
# Try pattern matching
|
|
for pattern, name, subtypes, category, conf in self.SERVICE_PATTERNS:
|
|
if re.search(pattern, normalized_text, re.IGNORECASE):
|
|
return ServiceMapping(
|
|
normalized_name=name,
|
|
subtypes=subtypes,
|
|
category=category,
|
|
confidence=conf,
|
|
)
|
|
|
|
return None
|
|
|
|
def map_service_fuzzy(self, service_text: str, threshold: float = 0.6) -> Optional[ServiceMapping]:
|
|
"""
|
|
Map service text with fuzzy matching for typos and variations.
|
|
|
|
Args:
|
|
service_text: Service name or description
|
|
threshold: Minimum similarity threshold (0.0-1.0)
|
|
|
|
Returns:
|
|
ServiceMapping or None
|
|
"""
|
|
# First try exact matching
|
|
result = self.map_service(service_text)
|
|
if result:
|
|
return result
|
|
|
|
# Fall back to word overlap matching
|
|
words = set(service_text.lower().split())
|
|
|
|
best_match: Optional[ServiceMapping] = None
|
|
best_score = 0.0
|
|
|
|
for key, (name, subtypes, category, conf) in self.SERVICE_MAPPINGS.items():
|
|
key_words = set(key.split())
|
|
overlap = len(words & key_words)
|
|
total = len(words | key_words)
|
|
|
|
if total > 0:
|
|
score = overlap / total
|
|
if score > best_score and score >= threshold:
|
|
best_score = score
|
|
best_match = ServiceMapping(
|
|
normalized_name=name,
|
|
subtypes=subtypes,
|
|
category=category,
|
|
confidence=conf * score, # Reduce confidence by match quality
|
|
)
|
|
|
|
return best_match
|
|
|
|
def get_all_service_keywords(self) -> list[str]:
|
|
"""Get all service keywords for table header detection."""
|
|
keywords = list(self.SERVICE_MAPPINGS.keys())
|
|
# Add common header terms
|
|
keywords.extend([
|
|
"service", "maintenance", "item", "operation",
|
|
"inspection", "replacement", "interval", "schedule",
|
|
])
|
|
return keywords
|
|
|
|
|
|
# Singleton instance
|
|
service_mapper = ServiceMapper()
|