feat: add owner's manual OCR pipeline (refs #71)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m1s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 3m1s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 31s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 7s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
Implement async PDF processing for owner's manuals with maintenance schedule extraction: - Add PDF preprocessor with PyMuPDF for text/scanned PDF handling - Add maintenance pattern matching (mileage, time, fluid specs) - Add service name mapping to maintenance subtypes - Add table detection and parsing for schedule tables - Add manual extractor orchestrating the complete pipeline - Add POST /extract/manual endpoint for async job submission - Add Redis job queue support for manual extraction jobs - Add progress tracking during processing Processing pipeline: 1. Analyze PDF structure (text layer vs scanned) 2. Find maintenance schedule sections 3. Extract text or OCR scanned pages at 300 DPI 4. Detect and parse maintenance tables 5. Normalize service names and extract intervals 6. Return structured maintenance schedules with confidence scores Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
116
ocr/tests/test_service_mapping.py
Normal file
116
ocr/tests/test_service_mapping.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""Tests for service name mapping."""
|
||||
import pytest
|
||||
|
||||
from app.patterns.service_mapping import service_mapper
|
||||
|
||||
|
||||
class TestServiceMapping:
|
||||
"""Tests for service to subtype mapping."""
|
||||
|
||||
def test_engine_oil_mapping(self) -> None:
|
||||
"""Test engine oil service mapping."""
|
||||
result = service_mapper.map_service("engine oil")
|
||||
assert result is not None
|
||||
assert result.normalized_name == "Engine Oil Change"
|
||||
assert "Engine Oil" in result.subtypes
|
||||
assert result.category == "routine_maintenance"
|
||||
|
||||
def test_oil_change_mapping(self) -> None:
|
||||
"""Test oil change service mapping."""
|
||||
result = service_mapper.map_service("oil change")
|
||||
assert result is not None
|
||||
assert "Engine Oil" in result.subtypes
|
||||
|
||||
def test_air_filter_mapping(self) -> None:
|
||||
"""Test air filter service mapping."""
|
||||
result = service_mapper.map_service("engine air filter")
|
||||
assert result is not None
|
||||
assert result.normalized_name == "Air Filter Replacement"
|
||||
assert "Air Filter Element" in result.subtypes
|
||||
|
||||
def test_cabin_filter_mapping(self) -> None:
|
||||
"""Test cabin air filter mapping."""
|
||||
result = service_mapper.map_service("cabin air filter")
|
||||
assert result is not None
|
||||
assert "Cabin Air Filter / Purifier" in result.subtypes
|
||||
|
||||
def test_tire_rotation_mapping(self) -> None:
|
||||
"""Test tire rotation mapping."""
|
||||
result = service_mapper.map_service("tire rotation")
|
||||
assert result is not None
|
||||
assert "Tires" in result.subtypes
|
||||
assert result.confidence >= 0.95
|
||||
|
||||
def test_brake_inspection_mapping(self) -> None:
|
||||
"""Test brake inspection mapping."""
|
||||
result = service_mapper.map_service("brake inspection")
|
||||
assert result is not None
|
||||
assert "Brakes and Traction Control" in result.subtypes
|
||||
|
||||
def test_coolant_mapping(self) -> None:
|
||||
"""Test coolant service mapping."""
|
||||
result = service_mapper.map_service("engine coolant")
|
||||
assert result is not None
|
||||
assert "Coolant" in result.subtypes
|
||||
|
||||
def test_transmission_fluid_mapping(self) -> None:
|
||||
"""Test transmission fluid mapping."""
|
||||
result = service_mapper.map_service("automatic transmission fluid")
|
||||
assert result is not None
|
||||
assert "Fluid - A/T" in result.subtypes
|
||||
|
||||
def test_spark_plug_mapping(self) -> None:
|
||||
"""Test spark plug mapping."""
|
||||
result = service_mapper.map_service("spark plugs")
|
||||
assert result is not None
|
||||
assert "Spark Plug" in result.subtypes
|
||||
|
||||
def test_wiper_blade_mapping(self) -> None:
|
||||
"""Test wiper blade mapping."""
|
||||
result = service_mapper.map_service("wiper blades")
|
||||
assert result is not None
|
||||
assert "Wiper Blade" in result.subtypes
|
||||
|
||||
def test_unknown_service(self) -> None:
|
||||
"""Test unknown service returns None."""
|
||||
result = service_mapper.map_service("quantum flux capacitor")
|
||||
assert result is None
|
||||
|
||||
def test_case_insensitive(self) -> None:
|
||||
"""Test mapping is case insensitive."""
|
||||
result = service_mapper.map_service("ENGINE OIL")
|
||||
assert result is not None
|
||||
assert "Engine Oil" in result.subtypes
|
||||
|
||||
def test_partial_match(self) -> None:
|
||||
"""Test partial matching in longer text."""
|
||||
result = service_mapper.map_service("Replace engine oil and filter")
|
||||
assert result is not None
|
||||
assert "Engine Oil" in result.subtypes
|
||||
|
||||
|
||||
class TestFuzzyMapping:
|
||||
"""Tests for fuzzy service mapping."""
|
||||
|
||||
def test_fuzzy_oil_change(self) -> None:
|
||||
"""Test fuzzy matching for oil change."""
|
||||
result = service_mapper.map_service_fuzzy("change the engine oil")
|
||||
assert result is not None
|
||||
assert "Engine Oil" in result.subtypes
|
||||
|
||||
def test_fuzzy_low_threshold(self) -> None:
|
||||
"""Test fuzzy matching with low similarity."""
|
||||
result = service_mapper.map_service_fuzzy("oil", threshold=0.3)
|
||||
assert result is not None # Should match "engine oil" partially
|
||||
|
||||
|
||||
class TestKeywords:
|
||||
"""Tests for keyword extraction."""
|
||||
|
||||
def test_get_keywords(self) -> None:
|
||||
"""Test getting service keywords."""
|
||||
keywords = service_mapper.get_all_service_keywords()
|
||||
assert len(keywords) > 0
|
||||
assert "engine oil" in keywords
|
||||
assert "service" in keywords
|
||||
assert "maintenance" in keywords
|
||||
Reference in New Issue
Block a user