Files
motovaultpro/ocr/tests/test_fuel_patterns.py
Eric Gullickson 6319d50fb1
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 32s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m20s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
feat: add receipt OCR pipeline (refs #69)
Implement receipt-specific OCR extraction for fuel receipts:

- Pattern matching modules for date, currency, and fuel data extraction
- Receipt-optimized image preprocessing for thermal receipts
- POST /extract/receipt endpoint with field extraction
- Confidence scoring per extracted field
- Cross-validation of fuel receipt data
- Unit tests for all pattern matchers

Extracted fields: merchantName, transactionDate, totalAmount,
fuelQuantity, pricePerUnit, fuelGrade

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 20:43:30 -06:00

328 lines
9.5 KiB
Python

"""Tests for fuel-specific pattern matching."""
import pytest
from app.patterns.fuel_patterns import FuelPatternMatcher, fuel_matcher
class TestFuelQuantityExtraction:
"""Test fuel quantity (gallons/liters) extraction."""
def test_gallons_suffix(self) -> None:
"""Test 'XX.XXX GAL' pattern."""
text = "10.500 GAL"
result = fuel_matcher.extract_gallons(text)
assert result is not None
assert result.value == 10.5
assert result.unit == "GAL"
assert result.confidence > 0.9
def test_gallons_full_word(self) -> None:
"""Test 'XX.XXX GALLONS' pattern."""
text = "10.500 GALLONS"
result = fuel_matcher.extract_gallons(text)
assert result is not None
assert result.value == 10.5
def test_gallons_prefix(self) -> None:
"""Test 'GALLONS: XX.XXX' pattern."""
text = "GALLONS: 10.500"
result = fuel_matcher.extract_gallons(text)
assert result is not None
assert result.value == 10.5
def test_gal_prefix(self) -> None:
"""Test 'GAL: XX.XXX' pattern."""
text = "GAL: 10.500"
result = fuel_matcher.extract_gallons(text)
assert result is not None
assert result.value == 10.5
def test_volume_label(self) -> None:
"""Test 'VOLUME XX.XXX' pattern."""
text = "VOLUME: 10.500"
result = fuel_matcher.extract_gallons(text)
assert result is not None
assert result.value == 10.5
def test_liters_suffix(self) -> None:
"""Test 'XX.XX L' pattern."""
text = "40.5 L"
result = fuel_matcher.extract_liters(text)
assert result is not None
assert result.value == 40.5
assert result.unit == "L"
def test_liters_full_word(self) -> None:
"""Test 'XX.XX LITERS' pattern."""
text = "40.5 LITERS"
result = fuel_matcher.extract_liters(text)
assert result is not None
assert result.value == 40.5
def test_quantity_prefers_gallons(self) -> None:
"""Test extract_quantity prefers gallons for US receipts."""
text = "10.500 GAL"
result = fuel_matcher.extract_quantity(text)
assert result is not None
assert result.unit == "GAL"
def test_reasonable_quantity_filter(self) -> None:
"""Test unreasonable quantities are filtered."""
# Too small
text = "0.001 GAL"
result = fuel_matcher.extract_gallons(text)
assert result is None
# Too large
text = "100.0 GAL"
result = fuel_matcher.extract_gallons(text)
assert result is None
class TestFuelPriceExtraction:
"""Test price per unit extraction."""
def test_price_per_gal_dollar_sign(self) -> None:
"""Test '$X.XXX/GAL' pattern."""
text = "$3.679/GAL"
result = fuel_matcher.extract_price_per_unit(text)
assert result is not None
assert result.value == 3.679
assert result.unit == "GAL"
assert result.confidence > 0.95
def test_price_per_gal_no_dollar(self) -> None:
"""Test 'X.XXX/GAL' pattern."""
text = "3.679/GAL"
result = fuel_matcher.extract_price_per_unit(text)
assert result is not None
assert result.value == 3.679
def test_labeled_price_gal(self) -> None:
"""Test 'PRICE/GAL $X.XXX' pattern."""
text = "PRICE/GAL $3.679"
result = fuel_matcher.extract_price_per_unit(text)
assert result is not None
assert result.value == 3.679
def test_unit_price(self) -> None:
"""Test 'UNIT PRICE $X.XXX' pattern."""
text = "UNIT PRICE: $3.679"
result = fuel_matcher.extract_price_per_unit(text)
assert result is not None
assert result.value == 3.679
def test_at_price(self) -> None:
"""Test '@ $X.XXX' pattern."""
text = "10.500 GAL @ $3.679"
result = fuel_matcher.extract_price_per_unit(text)
assert result is not None
assert result.value == 3.679
def test_ppg_pattern(self) -> None:
"""Test 'PPG $X.XXX' pattern."""
text = "PPG: $3.679"
result = fuel_matcher.extract_price_per_unit(text)
assert result is not None
assert result.value == 3.679
def test_reasonable_price_filter(self) -> None:
"""Test unreasonable prices are filtered."""
# Too low
text = "$0.50/GAL"
result = fuel_matcher.extract_price_per_unit(text)
assert result is None
# Too high
text = "$15.00/GAL"
result = fuel_matcher.extract_price_per_unit(text)
assert result is None
class TestFuelGradeExtraction:
"""Test fuel grade/octane extraction."""
def test_regular_87(self) -> None:
"""Test 'REGULAR 87' pattern."""
text = "REGULAR 87"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "87"
assert "Regular" in result.display_name
def test_reg_87(self) -> None:
"""Test 'REG 87' pattern."""
text = "REG 87"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "87"
def test_unleaded_87(self) -> None:
"""Test 'UNLEADED 87' pattern."""
text = "UNLEADED 87"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "87"
def test_plus_89(self) -> None:
"""Test 'PLUS 89' pattern."""
text = "PLUS 89"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "89"
assert "Plus" in result.display_name
def test_midgrade_89(self) -> None:
"""Test 'MIDGRADE 89' pattern."""
text = "MIDGRADE 89"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "89"
def test_premium_93(self) -> None:
"""Test 'PREMIUM 93' pattern."""
text = "PREMIUM 93"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "93"
assert "Premium" in result.display_name
def test_super_93(self) -> None:
"""Test 'SUPER 93' pattern."""
text = "SUPER 93"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "93"
def test_diesel(self) -> None:
"""Test 'DIESEL' pattern."""
text = "DIESEL #2"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "DIESEL"
assert "Diesel" in result.display_name
def test_e85(self) -> None:
"""Test 'E85' ethanol pattern."""
text = "E85"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "E85"
def test_octane_only(self) -> None:
"""Test standalone octane number."""
text = "87 OCTANE"
result = fuel_matcher.extract_grade(text)
assert result is not None
assert result.value == "87"
class TestMerchantExtraction:
"""Test gas station name extraction."""
def test_shell_station(self) -> None:
"""Test Shell station detection."""
text = "SHELL\n123 MAIN ST"
result = fuel_matcher.extract_merchant_name(text)
assert result is not None
merchant, confidence = result
assert "SHELL" in merchant.upper()
assert confidence > 0.8
def test_chevron_station(self) -> None:
"""Test Chevron station detection."""
text = "CHEVRON #12345\n456 OAK AVE"
result = fuel_matcher.extract_merchant_name(text)
assert result is not None
merchant, confidence = result
assert "CHEVRON" in merchant.upper()
def test_costco_gas(self) -> None:
"""Test Costco gas detection."""
text = "COSTCO GASOLINE\n789 WAREHOUSE BLVD"
result = fuel_matcher.extract_merchant_name(text)
assert result is not None
merchant, confidence = result
assert "COSTCO" in merchant.upper()
def test_unknown_station_fallback(self) -> None:
"""Test fallback to first line for unknown stations."""
text = "JOE'S GAS\n123 MAIN ST"
result = fuel_matcher.extract_merchant_name(text)
assert result is not None
merchant, confidence = result
assert "JOE'S GAS" in merchant
assert confidence < 0.7 # Lower confidence for unknown
class TestReceiptContextExtraction:
"""Test extraction from realistic receipt text."""
def test_full_receipt_extraction(self) -> None:
"""Test all fields from complete receipt text."""
text = """
SHELL
123 MAIN STREET
ANYTOWN, USA 12345
DATE: 01/15/2024
TIME: 14:32
PUMP #5
REGULAR 87
10.500 GAL @ $3.679/GAL
FUEL TOTAL $38.63
TAX $0.00
TOTAL $38.63
DEBIT CARD
************1234
APPROVED
"""
# Test all extractors on this text
quantity = fuel_matcher.extract_quantity(text)
assert quantity is not None
assert quantity.value == 10.5
price = fuel_matcher.extract_price_per_unit(text)
assert price is not None
assert price.value == 3.679
grade = fuel_matcher.extract_grade(text)
assert grade is not None
assert grade.value == "87"
merchant = fuel_matcher.extract_merchant_name(text)
assert merchant is not None
assert "SHELL" in merchant[0].upper()