feat: add owner's manual OCR pipeline (refs #71)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m1s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

Implement async PDF processing for owner's manuals with maintenance
schedule extraction:

- Add PDF preprocessor with PyMuPDF for text/scanned PDF handling
- Add maintenance pattern matching (mileage, time, fluid specs)
- Add service name mapping to maintenance subtypes
- Add table detection and parsing for schedule tables
- Add manual extractor orchestrating the complete pipeline
- Add POST /extract/manual endpoint for async job submission
- Add Redis job queue support for manual extraction jobs
- Add progress tracking during processing

Processing pipeline:
1. Analyze PDF structure (text layer vs scanned)
2. Find maintenance schedule sections
3. Extract text or OCR scanned pages at 300 DPI
4. Detect and parse maintenance tables
5. Normalize service names and extract intervals
6. Return structured maintenance schedules with confidence scores

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-01 21:30:20 -06:00
parent b226ca59de
commit 3eb54211cb
20 changed files with 2904 additions and 14 deletions

View File

@@ -1,7 +1,9 @@
"""Pattern matching modules for receipt field extraction."""
"""Pattern matching modules for receipt and manual field extraction."""
from app.patterns.date_patterns import DatePatternMatcher, date_matcher
from app.patterns.currency_patterns import CurrencyPatternMatcher, currency_matcher
from app.patterns.fuel_patterns import FuelPatternMatcher, fuel_matcher
from app.patterns.maintenance_patterns import MaintenancePatternMatcher, maintenance_matcher
from app.patterns.service_mapping import ServiceMapper, service_mapper
__all__ = [
"DatePatternMatcher",
@@ -10,4 +12,8 @@ __all__ = [
"currency_matcher",
"FuelPatternMatcher",
"fuel_matcher",
"MaintenancePatternMatcher",
"maintenance_matcher",
"ServiceMapper",
"service_mapper",
]

View File

@@ -0,0 +1,335 @@
"""Maintenance schedule pattern matching for owner's manual extraction."""
import re
from dataclasses import dataclass
from typing import Optional
@dataclass
class MileageIntervalMatch:
"""Result of mileage interval pattern matching."""
value: int # Miles
raw_match: str
confidence: float
pattern_name: str
@dataclass
class TimeIntervalMatch:
"""Result of time interval pattern matching."""
value: int # Months
raw_match: str
confidence: float
pattern_name: str
@dataclass
class FluidSpecMatch:
"""Result of fluid specification pattern matching."""
value: str # e.g., "0W-20", "ATF-Z1", "DOT 4"
fluid_type: str # e.g., "oil", "transmission", "brake"
raw_match: str
confidence: float
class MaintenancePatternMatcher:
"""Extract maintenance-specific data from owner's manual text."""
# Mileage interval patterns
MILEAGE_PATTERNS = [
# "every 5,000 miles" or "every 5000 miles"
(
r"every\s+([\d,]+)\s*(?:miles?|mi\.?)",
"every_miles",
0.95,
),
# "at 30,000 mi" or "at 30000 miles"
(
r"at\s+([\d,]+)\s*(?:miles?|mi\.?)",
"at_miles",
0.93,
),
# "5,000 miles or" (interval before "or")
(
r"([\d,]+)\s*(?:miles?|mi\.?)\s*(?:or|/)",
"miles_or",
0.90,
),
# "every 5,000-7,500 miles" (range - take lower)
(
r"every\s+([\d,]+)\s*[-]\s*[\d,]+\s*(?:miles?|mi\.?)",
"miles_range",
0.88,
),
# "7,500 mi/12 months" (interval with slash)
(
r"([\d,]+)\s*(?:miles?|mi\.?)\s*/",
"miles_slash",
0.87,
),
# Standalone "X,XXX miles" in table context
(
r"(?<![0-9])([\d,]+)\s*(?:miles?|mi\.?)(?![a-z])",
"standalone_miles",
0.75,
),
]
# Time interval patterns
TIME_PATTERNS = [
# "every 6 months"
(
r"every\s+(\d+)\s*months?",
"every_months",
0.95,
),
# "6 months or" (interval before "or")
(
r"(\d+)\s*months?\s*(?:or|/)",
"months_or",
0.90,
),
# "annually" -> 12 months
(
r"\bannually\b",
"annually",
0.95,
),
# "semi-annually" or "semi-annual" -> 6 months
(
r"\bsemi-?annual(?:ly)?\b",
"semi_annual",
0.95,
),
# "every year" -> 12 months
(
r"every\s+year",
"every_year",
0.93,
),
# "every 2 years" -> 24 months
(
r"every\s+(\d+)\s*years?",
"every_years",
0.93,
),
# "12 mo/7,500 mi" or "12 months/"
(
r"(\d+)\s*(?:mo(?:nths?)?\.?)\s*/",
"months_slash",
0.87,
),
# Standalone "X months" in table context
(
r"(?<![0-9])(\d+)\s*months?(?![a-z])",
"standalone_months",
0.75,
),
]
# Fluid specification patterns
FLUID_PATTERNS = [
# Oil viscosity: 0W-20, 5W-30, 10W-40
(
r"\b(\d+W-\d+)\b",
"oil",
0.95,
),
# Full synthetic variants
(
r"(full\s+synthetic\s+\d+W-\d+)",
"oil",
0.93,
),
# Transmission fluid: ATF-Z1, ATF+4, Dexron VI
(
r"\b(ATF[- ]?\w+)\b",
"transmission",
0.90,
),
(
r"\b(Dexron\s*(?:VI|IV|III)?)\b",
"transmission",
0.90,
),
(
r"\b(Mercon\s*(?:V|LV|SP)?)\b",
"transmission",
0.90,
),
# Brake fluid: DOT 3, DOT 4, DOT 5.1
(
r"\b(DOT\s*\d(?:\.\d)?)\b",
"brake",
0.95,
),
# Coolant types
(
r"\b((?:Type\s+)?(?:2|II)\s+(?:coolant|antifreeze))\b",
"coolant",
0.88,
),
(
r"\b((?:50/50|pre-mixed)\s+(?:coolant|antifreeze))\b",
"coolant",
0.85,
),
# Power steering fluid
(
r"\b(power\s+steering\s+fluid)\b",
"power_steering",
0.90,
),
]
def extract_mileage_interval(self, text: str) -> Optional[MileageIntervalMatch]:
"""
Extract mileage interval from text.
Args:
text: Text to search for mileage intervals
Returns:
MileageIntervalMatch or None if no interval found
"""
text_lower = text.lower()
for pattern, name, confidence in self.MILEAGE_PATTERNS:
match = re.search(pattern, text_lower, re.IGNORECASE)
if match:
# Extract the number and remove commas
mileage_str = match.group(1).replace(",", "")
mileage = int(mileage_str)
if self._is_reasonable_mileage(mileage):
return MileageIntervalMatch(
value=mileage,
raw_match=match.group(0),
confidence=confidence,
pattern_name=name,
)
return None
def extract_time_interval(self, text: str) -> Optional[TimeIntervalMatch]:
"""
Extract time interval from text.
Args:
text: Text to search for time intervals
Returns:
TimeIntervalMatch or None if no interval found
"""
text_lower = text.lower()
for pattern, name, confidence in self.TIME_PATTERNS:
match = re.search(pattern, text_lower, re.IGNORECASE)
if match:
# Handle special cases
if name == "annually":
months = 12
elif name == "semi_annual":
months = 6
elif name == "every_year":
months = 12
elif name == "every_years":
years = int(match.group(1))
months = years * 12
else:
months = int(match.group(1))
if self._is_reasonable_months(months):
return TimeIntervalMatch(
value=months,
raw_match=match.group(0),
confidence=confidence,
pattern_name=name,
)
return None
def extract_fluid_spec(self, text: str) -> Optional[FluidSpecMatch]:
"""
Extract fluid specification from text.
Args:
text: Text to search for fluid specs
Returns:
FluidSpecMatch or None if no spec found
"""
for pattern, fluid_type, confidence in self.FLUID_PATTERNS:
match = re.search(pattern, text, re.IGNORECASE)
if match:
return FluidSpecMatch(
value=match.group(1).upper() if fluid_type != "coolant" else match.group(1),
fluid_type=fluid_type,
raw_match=match.group(0),
confidence=confidence,
)
return None
def extract_all_fluid_specs(self, text: str) -> list[FluidSpecMatch]:
"""
Extract all fluid specifications from text.
Args:
text: Text to search for fluid specs
Returns:
List of FluidSpecMatch objects
"""
results = []
seen_values: set[str] = set()
for pattern, fluid_type, confidence in self.FLUID_PATTERNS:
for match in re.finditer(pattern, text, re.IGNORECASE):
value = match.group(1).upper() if fluid_type != "coolant" else match.group(1)
if value not in seen_values:
seen_values.add(value)
results.append(
FluidSpecMatch(
value=value,
fluid_type=fluid_type,
raw_match=match.group(0),
confidence=confidence,
)
)
return results
def extract_combined_interval(
self, text: str
) -> tuple[Optional[MileageIntervalMatch], Optional[TimeIntervalMatch]]:
"""
Extract both mileage and time intervals from a combined pattern.
Many schedules use patterns like "every 5,000 miles or 6 months".
Args:
text: Text to search
Returns:
Tuple of (mileage_match, time_match)
"""
mileage = self.extract_mileage_interval(text)
time = self.extract_time_interval(text)
return mileage, time
def _is_reasonable_mileage(self, mileage: int) -> bool:
"""Check if mileage interval is reasonable for maintenance."""
# Typical ranges: 1,000 to 100,000 miles
return 500 <= mileage <= 150000
def _is_reasonable_months(self, months: int) -> bool:
"""Check if month interval is reasonable for maintenance."""
# Typical ranges: 1 to 120 months (10 years)
return 1 <= months <= 120
# Singleton instance
maintenance_matcher = MaintenancePatternMatcher()

View File

@@ -0,0 +1,259 @@
"""Service name normalization and mapping to maintenance subtypes."""
import re
from dataclasses import dataclass
from typing import Optional
@dataclass
class ServiceMapping:
"""Mapping result from extracted text to maintenance subtypes."""
normalized_name: str # Standardized service name
subtypes: list[str] # Maintenance subtypes from the system
category: str # routine_maintenance, repair, performance_upgrade
confidence: float
# Maintenance subtypes from the system (must match exactly)
ROUTINE_MAINTENANCE_SUBTYPES = [
"Accelerator Pedal",
"Air Filter Element",
"Brakes and Traction Control",
"Cabin Air Filter / Purifier",
"Coolant",
"Doors",
"Drive Belt",
"Engine Oil",
"Evaporative Emissions System",
"Exhaust System",
"Fluid - A/T",
"Fluid - Differential",
"Fluid - M/T",
"Fluid Filter - A/T",
"Fluids",
"Fuel Delivery and Air Induction",
"Hood Shock / Support",
"Neutral Safety Switch",
"Parking Brake System",
"Restraints and Safety Systems",
"Shift Interlock A/T",
"Spark Plug",
"Steering and Suspension",
"Tires",
"Trunk / Liftgate Shock / Support",
"Washer Fluid",
"Wiper Blade",
]
class ServiceMapper:
"""Map extracted service names to maintenance subtypes."""
# Mapping from common service terms to system subtypes
# Keys are lowercase patterns, values are (normalized_name, subtypes, category, confidence)
SERVICE_MAPPINGS: dict[str, tuple[str, list[str], str, float]] = {
# Oil related
"engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"oil change": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"motor oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.93),
"oil and filter": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"oil & filter": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"change engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
"replace engine oil": ("Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
# Air filter
"air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.90),
"engine air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
"air cleaner": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.88),
"air cleaner element": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.93),
"replace air filter": ("Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
# Cabin filter
"cabin air filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.95),
"cabin filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.93),
"a/c filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.88),
"hvac filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.88),
"interior air filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.90),
"dust and pollen filter": ("Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.90),
# Tires
"tire rotation": ("Tire Rotation", ["Tires"], "routine_maintenance", 0.98),
"rotate tires": ("Tire Rotation", ["Tires"], "routine_maintenance", 0.95),
"tire inspection": ("Tire Inspection", ["Tires"], "routine_maintenance", 0.93),
"inspect tires": ("Tire Inspection", ["Tires"], "routine_maintenance", 0.93),
"check tire pressure": ("Tire Pressure Check", ["Tires"], "routine_maintenance", 0.90),
"tire pressure": ("Tire Pressure Check", ["Tires"], "routine_maintenance", 0.85),
# Brakes
"brake inspection": ("Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.95),
"inspect brakes": ("Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.93),
"brake fluid": ("Brake Fluid Service", ["Brakes and Traction Control"], "routine_maintenance", 0.93),
"brake pads": ("Brake Pad Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.90),
"parking brake": ("Parking Brake Inspection", ["Parking Brake System"], "routine_maintenance", 0.93),
# Coolant
"coolant": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.90),
"engine coolant": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.93),
"antifreeze": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.90),
"cooling system": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.88),
"radiator fluid": ("Coolant Service", ["Coolant"], "routine_maintenance", 0.88),
"replace coolant": ("Coolant Replacement", ["Coolant"], "routine_maintenance", 0.95),
# Transmission
"transmission fluid": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
"automatic transmission fluid": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.95),
"atf": ("Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.90),
"manual transmission fluid": ("Manual Transmission Fluid", ["Fluid - M/T"], "routine_maintenance", 0.95),
"cvt fluid": ("CVT Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
"transmission filter": ("Transmission Filter", ["Fluid Filter - A/T"], "routine_maintenance", 0.93),
# Differential
"differential fluid": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.95),
"rear differential": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.93),
"front differential": ("Differential Fluid Service", ["Fluid - Differential"], "routine_maintenance", 0.93),
"transfer case": ("Transfer Case Fluid", ["Fluid - Differential"], "routine_maintenance", 0.90),
# Spark plugs
"spark plug": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
"spark plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
"replace spark plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
"ignition plugs": ("Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.88),
# Drive belt
"drive belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.93),
"serpentine belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.93),
"accessory belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.90),
"timing belt": ("Timing Belt Service", ["Drive Belt"], "routine_maintenance", 0.90),
"v-belt": ("Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.88),
# Wipers
"wiper blade": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.95),
"wiper blades": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.95),
"windshield wiper": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.93),
"replace wipers": ("Wiper Blade Replacement", ["Wiper Blade"], "routine_maintenance", 0.93),
# Washer fluid
"washer fluid": ("Washer Fluid", ["Washer Fluid"], "routine_maintenance", 0.95),
"windshield washer": ("Washer Fluid", ["Washer Fluid"], "routine_maintenance", 0.90),
# Steering/Suspension
"steering": ("Steering Inspection", ["Steering and Suspension"], "routine_maintenance", 0.85),
"suspension": ("Suspension Inspection", ["Steering and Suspension"], "routine_maintenance", 0.85),
"power steering": ("Power Steering Fluid", ["Steering and Suspension"], "routine_maintenance", 0.90),
"power steering fluid": ("Power Steering Fluid", ["Steering and Suspension"], "routine_maintenance", 0.93),
# Exhaust
"exhaust": ("Exhaust System Inspection", ["Exhaust System"], "routine_maintenance", 0.88),
"exhaust system": ("Exhaust System Inspection", ["Exhaust System"], "routine_maintenance", 0.93),
# Fuel system
"fuel filter": ("Fuel Filter Replacement", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.93),
"fuel system": ("Fuel System Inspection", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.88),
"fuel injection": ("Fuel Injection Service", ["Fuel Delivery and Air Induction"], "routine_maintenance", 0.88),
# Emissions
"evaporative emissions": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.93),
"evap system": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.90),
"emissions": ("Evaporative Emissions Inspection", ["Evaporative Emissions System"], "routine_maintenance", 0.80),
# Safety systems
"seat belt": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.90),
"airbag": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.85),
"restraint": ("Safety Systems Inspection", ["Restraints and Safety Systems"], "routine_maintenance", 0.85),
# Miscellaneous
"battery": ("Battery Inspection", ["Fluids"], "routine_maintenance", 0.80),
"inspect battery": ("Battery Inspection", ["Fluids"], "routine_maintenance", 0.85),
"door hinges": ("Door Lubrication", ["Doors"], "routine_maintenance", 0.85),
"hood shock": ("Hood Shock Inspection", ["Hood Shock / Support"], "routine_maintenance", 0.90),
"trunk shock": ("Trunk Shock Inspection", ["Trunk / Liftgate Shock / Support"], "routine_maintenance", 0.90),
"liftgate": ("Liftgate Inspection", ["Trunk / Liftgate Shock / Support"], "routine_maintenance", 0.88),
}
# Pattern-based mappings for fuzzy matching
SERVICE_PATTERNS: list[tuple[str, str, list[str], str, float]] = [
# (regex_pattern, normalized_name, subtypes, category, confidence)
(r"oil\s+(?:and|&)\s+filter", "Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.95),
(r"(?:change|replace)\s+(?:the\s+)?oil", "Engine Oil Change", ["Engine Oil"], "routine_maintenance", 0.93),
(r"(?:inspect|check)\s+(?:the\s+)?brakes?", "Brake Inspection", ["Brakes and Traction Control"], "routine_maintenance", 0.90),
(r"(?:inspect|check)\s+(?:the\s+)?tires?", "Tire Inspection", ["Tires"], "routine_maintenance", 0.90),
(r"(?:rotate|rotation)\s+(?:the\s+)?tires?", "Tire Rotation", ["Tires"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?(?:engine\s+)?air\s+filter", "Air Filter Replacement", ["Air Filter Element"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?cabin\s+(?:air\s+)?filter", "Cabin Air Filter Replacement", ["Cabin Air Filter / Purifier"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?spark\s+plugs?", "Spark Plug Replacement", ["Spark Plug"], "routine_maintenance", 0.95),
(r"(?:replace|change)\s+(?:the\s+)?coolant", "Coolant Replacement", ["Coolant"], "routine_maintenance", 0.93),
(r"(?:flush|drain)\s+(?:the\s+)?coolant", "Coolant Flush", ["Coolant"], "routine_maintenance", 0.93),
(r"(?:replace|change)\s+(?:the\s+)?(?:a/?t|automatic\s+transmission)\s+fluid", "Transmission Fluid Service", ["Fluid - A/T"], "routine_maintenance", 0.93),
(r"(?:inspect|check)\s+(?:the\s+)?(?:drive|serpentine|accessory)\s+belt", "Drive Belt Inspection", ["Drive Belt"], "routine_maintenance", 0.90),
]
def map_service(self, service_text: str) -> Optional[ServiceMapping]:
"""
Map extracted service text to maintenance subtypes.
Args:
service_text: Service name or description from the manual
Returns:
ServiceMapping or None if no mapping found
"""
normalized_text = service_text.lower().strip()
# Try exact mapping first
for key, (name, subtypes, category, conf) in self.SERVICE_MAPPINGS.items():
if key in normalized_text:
return ServiceMapping(
normalized_name=name,
subtypes=subtypes,
category=category,
confidence=conf,
)
# Try pattern matching
for pattern, name, subtypes, category, conf in self.SERVICE_PATTERNS:
if re.search(pattern, normalized_text, re.IGNORECASE):
return ServiceMapping(
normalized_name=name,
subtypes=subtypes,
category=category,
confidence=conf,
)
return None
def map_service_fuzzy(self, service_text: str, threshold: float = 0.6) -> Optional[ServiceMapping]:
"""
Map service text with fuzzy matching for typos and variations.
Args:
service_text: Service name or description
threshold: Minimum similarity threshold (0.0-1.0)
Returns:
ServiceMapping or None
"""
# First try exact matching
result = self.map_service(service_text)
if result:
return result
# Fall back to word overlap matching
words = set(service_text.lower().split())
best_match: Optional[ServiceMapping] = None
best_score = 0.0
for key, (name, subtypes, category, conf) in self.SERVICE_MAPPINGS.items():
key_words = set(key.split())
overlap = len(words & key_words)
total = len(words | key_words)
if total > 0:
score = overlap / total
if score > best_score and score >= threshold:
best_score = score
best_match = ServiceMapping(
normalized_name=name,
subtypes=subtypes,
category=category,
confidence=conf * score, # Reduce confidence by match quality
)
return best_match
def get_all_service_keywords(self) -> list[str]:
"""Get all service keywords for table header detection."""
keywords = list(self.SERVICE_MAPPINGS.keys())
# Add common header terms
keywords.extend([
"service", "maintenance", "item", "operation",
"inspection", "replacement", "interval", "schedule",
])
return keywords
# Singleton instance
service_mapper = ServiceMapper()