"""Tests for date pattern matching.""" import pytest from app.patterns.date_patterns import DatePatternMatcher, date_matcher class TestDatePatternMatcher: """Test date pattern extraction.""" def test_mm_dd_yyyy_slash(self) -> None: """Test MM/DD/YYYY format.""" text = "DATE: 01/15/2024" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" assert result.confidence > 0.9 def test_mm_dd_yy_slash(self) -> None: """Test MM/DD/YY format with 2-digit year.""" text = "01/15/24" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_mm_dd_yyyy_dash(self) -> None: """Test MM-DD-YYYY format.""" text = "01-15-2024" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_iso_format(self) -> None: """Test ISO YYYY-MM-DD format.""" text = "2024-01-15" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" assert result.confidence > 0.95 def test_month_name_format(self) -> None: """Test 'Jan 15, 2024' format.""" text = "Jan 15, 2024" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_month_name_no_comma(self) -> None: """Test 'Jan 15 2024' format without comma.""" text = "Jan 15 2024" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_day_month_year_format(self) -> None: """Test '15 Jan 2024' format.""" text = "15 Jan 2024" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_full_month_name(self) -> None: """Test full month name like 'January'.""" text = "January 15, 2024" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_multiple_dates_returns_best(self) -> None: """Test that multiple dates returns highest confidence.""" text = "Date: 01/15/2024\nExpires: 01/15/2025" results = date_matcher.extract_dates(text) assert len(results) == 2 # Both should be valid assert all(r.confidence > 0.5 for r in results) def test_invalid_date_rejected(self) -> None: """Test that invalid dates are rejected.""" text = "13/45/2024" # Invalid month/day result = date_matcher.extract_best_date(text) assert result is None def test_receipt_context_text(self) -> None: """Test date extraction from realistic receipt text.""" text = """ SHELL STATION 123 MAIN ST DATE: 01/15/2024 TIME: 14:32 PUMP #5 REGULAR 87 10.500 GAL TOTAL $38.50 """ result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-01-15" def test_no_date_returns_none(self) -> None: """Test that text without dates returns None.""" text = "SHELL STATION\nTOTAL $38.50" result = date_matcher.extract_best_date(text) assert result is None def test_confidence_boost_near_keyword(self) -> None: """Test confidence boost when date is near DATE keyword.""" text_with_keyword = "DATE: 01/15/2024" text_without = "01/15/2024" result_with = date_matcher.extract_best_date(text_with_keyword) result_without = date_matcher.extract_best_date(text_without) assert result_with is not None assert result_without is not None # Keyword proximity should boost confidence assert result_with.confidence >= result_without.confidence class TestEdgeCases: """Test edge cases in date parsing.""" def test_year_2000(self) -> None: """Test 2-digit year 00 is parsed as 2000.""" text = "01/15/00" result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2000-01-15" def test_leap_year_date(self) -> None: """Test Feb 29 on leap year.""" text = "02/29/2024" # 2024 is a leap year result = date_matcher.extract_best_date(text) assert result is not None assert result.value == "2024-02-29" def test_leap_year_invalid(self) -> None: """Test Feb 29 on non-leap year is rejected.""" text = "02/29/2023" # 2023 is not a leap year result = date_matcher.extract_best_date(text) assert result is None def test_september_abbrev(self) -> None: """Test September abbreviation (Sept vs Sep).""" for abbrev in ["Sep", "Sept", "September"]: text = f"{abbrev} 15, 2024" result = date_matcher.extract_best_date(text) assert result is not None, f"Failed for {abbrev}" assert result.value == "2024-09-15"