motovaultpro/ocr/tests/test_currency_patterns.py

"""Tests for currency pattern matching."""
import pytest

from app.patterns.currency_patterns import CurrencyPatternMatcher, currency_matcher


class TestCurrencyPatternMatcher:
    """Test currency and amount extraction."""

    def test_total_explicit(self) -> None:
        """Test 'TOTAL $XX.XX' pattern."""
        text = "TOTAL $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67
        assert result.confidence > 0.9
        assert result.label == "TOTAL"

    def test_total_with_colon(self) -> None:
        """Test 'TOTAL: $XX.XX' pattern."""
        text = "TOTAL: $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_total_without_dollar_sign(self) -> None:
        """Test 'TOTAL 45.67' pattern."""
        text = "TOTAL 45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_amount_due(self) -> None:
        """Test 'AMOUNT DUE' pattern."""
        text = "AMOUNT DUE: $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67
        assert result.label == "AMOUNT DUE"

    def test_sale_pattern(self) -> None:
        """Test 'SALE $XX.XX' pattern."""
        text = "SALE $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_grand_total(self) -> None:
        """Test 'GRAND TOTAL' pattern."""
        text = "GRAND TOTAL $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67
        assert result.label == "GRAND TOTAL"

    def test_total_sale(self) -> None:
        """Test 'TOTAL SALE' pattern."""
        text = "TOTAL SALE: $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_balance_due(self) -> None:
        """Test 'BALANCE DUE' pattern."""
        text = "BALANCE DUE $45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_multiple_amounts_picks_total(self) -> None:
        """Test that labeled total is preferred over generic amounts."""
        text = """
        REGULAR 87
        10.500 GAL @ $3.67
        SUBTOTAL $38.54
        TAX $0.00
        TOTAL $38.54
        """
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 38.54
        assert result.pattern_name == "total_explicit"

    def test_all_amounts(self) -> None:
        """Test extracting all amounts from receipt."""
        text = """
        SUBTOTAL $35.00
        TAX $3.54
        TOTAL $38.54
        """
        results = currency_matcher.extract_all_amounts(text)

        # Should find TOTAL and possibly others
        assert len(results) >= 1
        assert any(r.value == 38.54 for r in results)

    def test_comma_thousand_separator(self) -> None:
        """Test amounts with thousand separators."""
        text = "TOTAL $1,234.56"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 1234.56

    def test_reasonable_total_range(self) -> None:
        """Test that unreasonable totals are filtered."""
        # Very small amount
        text = "TOTAL $0.05"
        result = currency_matcher.extract_total(text)
        assert result is None  # Too small for fuel receipt

        # Reasonable amount
        text = "TOTAL $45.67"
        result = currency_matcher.extract_total(text)
        assert result is not None

    def test_receipt_context_extraction(self) -> None:
        """Test extraction from realistic receipt text."""
        text = """
        SHELL
        123 MAIN ST
        DATE: 01/15/2024

        UNLEADED 87
        10.500 GAL
        @ $3.679/GAL

        FUEL TOTAL    $38.63
        TAX           $0.00
        TOTAL         $38.63

        DEBIT CARD
        ************1234
        """
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 38.63

    def test_no_total_returns_largest(self) -> None:
        """Test fallback to largest amount when no labeled total."""
        text = """
        $10.50
        $5.00
        $45.67
        """
        result = currency_matcher.extract_total(text)

        # Should infer largest reasonable amount as total
        assert result is not None
        assert result.value == 45.67
        assert result.confidence < 0.7  # Lower confidence for inferred

    def test_no_amounts_returns_none(self) -> None:
        """Test that text without amounts returns None."""
        text = "SHELL STATION\nPUMP 5"
        result = currency_matcher.extract_total(text)

        assert result is None


class TestEdgeCases:
    """Test edge cases in currency parsing."""

    def test_european_format(self) -> None:
        """Test European format (comma as decimal)."""
        # European: 45,67 means 45.67
        text = "TOTAL 45,67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_spaces_in_amount(self) -> None:
        """Test handling of spaces around amounts."""
        text = "TOTAL $ 45.67"
        result = currency_matcher.extract_total(text)

        assert result is not None
        assert result.value == 45.67

    def test_case_insensitive(self) -> None:
        """Test case insensitive matching."""
        for label in ["TOTAL", "Total", "total"]:
            text = f"{label} $45.67"
            result = currency_matcher.extract_total(text)

            assert result is not None, f"Failed for {label}"
            assert result.value == 45.67