"""Fuel-specific pattern matching for receipt extraction.""" import re from dataclasses import dataclass from typing import Optional @dataclass class FuelQuantityMatch: """Result of fuel quantity pattern matching.""" value: float # Gallons or liters unit: str # "GAL" or "L" raw_match: str confidence: float pattern_name: str @dataclass class FuelPriceMatch: """Result of fuel price per unit pattern matching.""" value: float unit: str # "GAL" or "L" raw_match: str confidence: float pattern_name: str @dataclass class FuelGradeMatch: """Result of fuel grade pattern matching.""" value: str # e.g., "87", "89", "93", "DIESEL" display_name: str # e.g., "Regular 87", "Premium 93" raw_match: str confidence: float class FuelPatternMatcher: """Extract fuel-specific data from receipt text.""" # Gallons patterns GALLONS_PATTERNS = [ # XX.XXX GAL or XX.XXX GALLONS ( r"(\d{1,3}\.\d{1,3})\s*(?:GAL(?:LON)?S?)", "gallons_suffix", 0.95, ), # GALLONS: XX.XXX or GAL: XX.XXX ( r"(?:GAL(?:LON)?S?)[:\s]+(\d{1,3}\.\d{1,3})", "gallons_prefix", 0.93, ), # VOLUME XX.XXX ( r"VOLUME[:\s]+(\d{1,3}\.\d{1,3})", "volume", 0.85, ), # QTY XX.XXX (near fuel context) ( r"QTY[:\s]+(\d{1,3}\.\d{1,3})", "qty", 0.70, ), ] # Liters patterns (for international receipts) LITERS_PATTERNS = [ # XX.XX L or XX.XX LITERS ( r"(\d{1,3}\.\d{1,3})\s*(?:L(?:ITERS?)?)", "liters_suffix", 0.95, ), # LITERS: XX.XX ( r"(?:L(?:ITERS?)?)[:\s]+(\d{1,3}\.\d{1,3})", "liters_prefix", 0.93, ), ] # Price per gallon patterns PRICE_PER_UNIT_PATTERNS = [ # $X.XXX/GAL or $X.XX/GAL ( r"\$?\s*(\d{1,2}\.\d{2,3})\s*/\s*GAL", "price_per_gal", 0.98, ), # PRICE/GAL $X.XXX ( r"PRICE\s*/\s*GAL[:\s]*\$?\s*(\d{1,2}\.\d{2,3})", "labeled_price_gal", 0.96, ), # UNIT PRICE $X.XXX ( r"UNIT\s*PRICE[:\s]*\$?\s*(\d{1,2}\.\d{2,3})", "unit_price", 0.90, ), # @ $X.XXX (per unit implied) ( r"@\s*\$?\s*(\d{1,2}\.\d{2,3})", "at_price", 0.85, ), # PPG $X.XXX (price per gallon) ( r"PPG[:\s]*\$?\s*(\d{1,2}\.\d{2,3})", "ppg", 0.92, ), ] # Fuel grade patterns GRADE_PATTERNS = [ # REGULAR 87, REG 87 (r"(?:REGULAR|REG)\s*(\d{2})", "regular", 0.95), # UNLEADED 87 (r"UNLEADED\s*(\d{2})", "unleaded", 0.93), # PLUS 89, MID 89, MIDGRADE 89 (r"(?:PLUS|MID(?:GRADE)?)\s*(\d{2})", "plus", 0.95), # PREMIUM 91/93, PREM 91/93, SUPER 91/93 (r"(?:PREMIUM|PREM|SUPER)\s*(\d{2})", "premium", 0.95), # Just the octane number near fuel context (87, 89, 91, 93) (r"(? Optional[FuelQuantityMatch]: """ Extract fuel quantity in gallons. Args: text: Receipt text to search Returns: FuelQuantityMatch or None """ text_upper = text.upper() for pattern, name, confidence in self.GALLONS_PATTERNS: match = re.search(pattern, text_upper) if match: quantity = float(match.group(1)) if self._is_reasonable_quantity(quantity): return FuelQuantityMatch( value=quantity, unit="GAL", raw_match=match.group(0), confidence=confidence, pattern_name=name, ) return None def extract_liters(self, text: str) -> Optional[FuelQuantityMatch]: """ Extract fuel quantity in liters. Args: text: Receipt text to search Returns: FuelQuantityMatch or None """ text_upper = text.upper() for pattern, name, confidence in self.LITERS_PATTERNS: match = re.search(pattern, text_upper) if match: quantity = float(match.group(1)) if self._is_reasonable_quantity(quantity, is_liters=True): return FuelQuantityMatch( value=quantity, unit="L", raw_match=match.group(0), confidence=confidence, pattern_name=name, ) return None def extract_quantity(self, text: str) -> Optional[FuelQuantityMatch]: """ Extract fuel quantity (gallons or liters). Prefers gallons for US receipts. Args: text: Receipt text to search Returns: FuelQuantityMatch or None """ # Try gallons first (more common in US) gallons = self.extract_gallons(text) if gallons: return gallons # Fall back to liters return self.extract_liters(text) def extract_price_per_unit(self, text: str) -> Optional[FuelPriceMatch]: """ Extract price per gallon/liter. Args: text: Receipt text to search Returns: FuelPriceMatch or None """ text_upper = text.upper() for pattern, name, confidence in self.PRICE_PER_UNIT_PATTERNS: match = re.search(pattern, text_upper) if match: price = float(match.group(1)) if self._is_reasonable_price(price): return FuelPriceMatch( value=price, unit="GAL", # Default to gallons for US raw_match=match.group(0), confidence=confidence, pattern_name=name, ) return None def extract_grade(self, text: str) -> Optional[FuelGradeMatch]: """ Extract fuel grade (octane rating or diesel). Args: text: Receipt text to search Returns: FuelGradeMatch or None """ text_upper = text.upper() for pattern, name, confidence in self.GRADE_PATTERNS: match = re.search(pattern, text_upper) if match: if name == "diesel": return FuelGradeMatch( value="DIESEL", display_name="Diesel", raw_match=match.group(0), confidence=confidence, ) elif name == "e85": return FuelGradeMatch( value="E85", display_name="E85 Ethanol", raw_match=match.group(0), confidence=confidence, ) else: octane = match.group(1) display = self._get_grade_display_name(octane, name) return FuelGradeMatch( value=octane, display_name=display, raw_match=match.group(0), confidence=confidence, ) return None def extract_merchant_name(self, text: str) -> Optional[tuple[str, float]]: """ Extract gas station/merchant name. Args: text: Receipt text to search Returns: Tuple of (merchant_name, confidence) or None """ text_upper = text.upper() # Check for known station names for station in self.STATION_NAMES: if station in text_upper: # Try to get the full line for context for line in text.split("\n"): if station in line.upper(): # Clean up the line cleaned = line.strip() if len(cleaned) <= 50: # Reasonable length return (cleaned, 0.90) return (station.title(), 0.85) # Fall back to first non-empty line (often the merchant) lines = [l.strip() for l in text.split("\n") if l.strip()] if lines: first_line = lines[0] # Skip if it looks like a date or number if not re.match(r"^\d+[/\-.]", first_line): return (first_line[:50], 0.50) # Low confidence return None def _is_reasonable_quantity( self, quantity: float, is_liters: bool = False ) -> bool: """Check if fuel quantity is reasonable.""" if is_liters: # Typical fill: 20-100 liters return 0.5 <= quantity <= 150.0 else: # Typical fill: 5-30 gallons return 0.1 <= quantity <= 50.0 def _is_reasonable_price(self, price: float) -> bool: """Check if price per unit is reasonable.""" # US gas prices: $1.50 - $8.00 per gallon (allowing for fluctuation) return 1.00 <= price <= 10.00 def _get_grade_display_name(self, octane: str, pattern_name: str) -> str: """Get display name for fuel grade.""" grade_names = { "87": "Regular 87", "89": "Plus 89", "91": "Premium 91", "93": "Premium 93", } if octane in grade_names: return grade_names[octane] # Use pattern hint if pattern_name == "premium": return f"Premium {octane}" elif pattern_name == "plus": return f"Plus {octane}" else: return f"Unleaded {octane}" # Singleton instance fuel_matcher = FuelPatternMatcher()