All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 32s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m20s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Implement receipt-specific OCR extraction for fuel receipts: - Pattern matching modules for date, currency, and fuel data extraction - Receipt-optimized image preprocessing for thermal receipts - POST /extract/receipt endpoint with field extraction - Confidence scoring per extracted field - Cross-validation of fuel receipt data - Unit tests for all pattern matchers Extracted fields: merchantName, transactionDate, totalAmount, fuelQuantity, pricePerUnit, fuelGrade Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
199 lines
5.9 KiB
Python
199 lines
5.9 KiB
Python
"""Tests for currency pattern matching."""
|
|
import pytest
|
|
|
|
from app.patterns.currency_patterns import CurrencyPatternMatcher, currency_matcher
|
|
|
|
|
|
class TestCurrencyPatternMatcher:
|
|
"""Test currency and amount extraction."""
|
|
|
|
def test_total_explicit(self) -> None:
|
|
"""Test 'TOTAL $XX.XX' pattern."""
|
|
text = "TOTAL $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
assert result.confidence > 0.9
|
|
assert result.label == "TOTAL"
|
|
|
|
def test_total_with_colon(self) -> None:
|
|
"""Test 'TOTAL: $XX.XX' pattern."""
|
|
text = "TOTAL: $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_total_without_dollar_sign(self) -> None:
|
|
"""Test 'TOTAL 45.67' pattern."""
|
|
text = "TOTAL 45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_amount_due(self) -> None:
|
|
"""Test 'AMOUNT DUE' pattern."""
|
|
text = "AMOUNT DUE: $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
assert result.label == "AMOUNT DUE"
|
|
|
|
def test_sale_pattern(self) -> None:
|
|
"""Test 'SALE $XX.XX' pattern."""
|
|
text = "SALE $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_grand_total(self) -> None:
|
|
"""Test 'GRAND TOTAL' pattern."""
|
|
text = "GRAND TOTAL $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
assert result.label == "GRAND TOTAL"
|
|
|
|
def test_total_sale(self) -> None:
|
|
"""Test 'TOTAL SALE' pattern."""
|
|
text = "TOTAL SALE: $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_balance_due(self) -> None:
|
|
"""Test 'BALANCE DUE' pattern."""
|
|
text = "BALANCE DUE $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_multiple_amounts_picks_total(self) -> None:
|
|
"""Test that labeled total is preferred over generic amounts."""
|
|
text = """
|
|
REGULAR 87
|
|
10.500 GAL @ $3.67
|
|
SUBTOTAL $38.54
|
|
TAX $0.00
|
|
TOTAL $38.54
|
|
"""
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 38.54
|
|
assert result.pattern_name == "total_explicit"
|
|
|
|
def test_all_amounts(self) -> None:
|
|
"""Test extracting all amounts from receipt."""
|
|
text = """
|
|
SUBTOTAL $35.00
|
|
TAX $3.54
|
|
TOTAL $38.54
|
|
"""
|
|
results = currency_matcher.extract_all_amounts(text)
|
|
|
|
# Should find TOTAL and possibly others
|
|
assert len(results) >= 1
|
|
assert any(r.value == 38.54 for r in results)
|
|
|
|
def test_comma_thousand_separator(self) -> None:
|
|
"""Test amounts with thousand separators."""
|
|
text = "TOTAL $1,234.56"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 1234.56
|
|
|
|
def test_reasonable_total_range(self) -> None:
|
|
"""Test that unreasonable totals are filtered."""
|
|
# Very small amount
|
|
text = "TOTAL $0.05"
|
|
result = currency_matcher.extract_total(text)
|
|
assert result is None # Too small for fuel receipt
|
|
|
|
# Reasonable amount
|
|
text = "TOTAL $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
assert result is not None
|
|
|
|
def test_receipt_context_extraction(self) -> None:
|
|
"""Test extraction from realistic receipt text."""
|
|
text = """
|
|
SHELL
|
|
123 MAIN ST
|
|
DATE: 01/15/2024
|
|
|
|
UNLEADED 87
|
|
10.500 GAL
|
|
@ $3.679/GAL
|
|
|
|
FUEL TOTAL $38.63
|
|
TAX $0.00
|
|
TOTAL $38.63
|
|
|
|
DEBIT CARD
|
|
************1234
|
|
"""
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 38.63
|
|
|
|
def test_no_total_returns_largest(self) -> None:
|
|
"""Test fallback to largest amount when no labeled total."""
|
|
text = """
|
|
$10.50
|
|
$5.00
|
|
$45.67
|
|
"""
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
# Should infer largest reasonable amount as total
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
assert result.confidence < 0.7 # Lower confidence for inferred
|
|
|
|
def test_no_amounts_returns_none(self) -> None:
|
|
"""Test that text without amounts returns None."""
|
|
text = "SHELL STATION\nPUMP 5"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is None
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Test edge cases in currency parsing."""
|
|
|
|
def test_european_format(self) -> None:
|
|
"""Test European format (comma as decimal)."""
|
|
# European: 45,67 means 45.67
|
|
text = "TOTAL 45,67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_spaces_in_amount(self) -> None:
|
|
"""Test handling of spaces around amounts."""
|
|
text = "TOTAL $ 45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None
|
|
assert result.value == 45.67
|
|
|
|
def test_case_insensitive(self) -> None:
|
|
"""Test case insensitive matching."""
|
|
for label in ["TOTAL", "Total", "total"]:
|
|
text = f"{label} $45.67"
|
|
result = currency_matcher.extract_total(text)
|
|
|
|
assert result is not None, f"Failed for {label}"
|
|
assert result.value == 45.67
|