fix: Build errors and tesseract removal
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 4m14s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 8s

This commit is contained in:
Eric Gullickson
2026-02-07 12:12:04 -06:00
parent cf114fad3c
commit b9fe222f12
16 changed files with 35 additions and 238 deletions

View File

@@ -1,7 +1,7 @@
"""Tests for OCR engine abstraction layer.
Covers: base types, exception hierarchy, PaddleOcrEngine,
TesseractEngine, CloudEngine, HybridEngine, and engine_factory.
CloudEngine, HybridEngine, and engine_factory.
"""
import io
@@ -124,7 +124,7 @@ class TestOcrEngineResult:
def test_empty_result(self) -> None:
result = OcrEngineResult(
text="", confidence=0.0, word_boxes=[], engine_name="tesseract"
text="", confidence=0.0, word_boxes=[], engine_name="paddleocr"
)
assert result.text == ""
assert result.word_boxes == []
@@ -303,85 +303,6 @@ class TestPaddleOcrEngine:
engine.recognize(_create_test_image_bytes(), OcrConfig())
# ---------------------------------------------------------------------------
# TesseractEngine
# ---------------------------------------------------------------------------
class TestTesseractEngine:
"""Tests for TesseractEngine using mocked pytesseract."""
@pytest.fixture()
def engine(self) -> "TesseractEngine": # type: ignore[name-defined]
"""Create a TesseractEngine with mocked pytesseract dependency."""
mock_pytesseract = MagicMock()
mock_pytesseract.Output.DICT = "dict"
with patch.dict("sys.modules", {"pytesseract": mock_pytesseract}):
with patch("app.engines.tesseract_engine.settings") as mock_settings:
mock_settings.tesseract_cmd = "/usr/bin/tesseract"
from app.engines.tesseract_engine import TesseractEngine
eng = TesseractEngine()
eng._mock_pytesseract = mock_pytesseract # type: ignore[attr-defined]
return eng
def test_name(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
assert engine.name == "tesseract"
def test_build_config_default_psm(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
config_str = engine._build_config(OcrConfig())
assert "--psm 6" in config_str
def test_build_config_single_line(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
config_str = engine._build_config(OcrConfig(single_line=True))
assert "--psm 7" in config_str
def test_build_config_single_word(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
config_str = engine._build_config(OcrConfig(single_word=True))
assert "--psm 8" in config_str
def test_build_config_whitelist(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
config_str = engine._build_config(OcrConfig(char_whitelist="ABC123"))
assert "-c tessedit_char_whitelist=ABC123" in config_str
def test_build_config_psm_hint(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
config_str = engine._build_config(OcrConfig(hints={"psm": 11}))
assert "--psm 11" in config_str
def test_recognize_normalizes_confidence(self, engine: "TesseractEngine") -> None: # type: ignore[name-defined]
"""Tesseract returns 0-100 confidence; engine normalizes to 0.0-1.0."""
engine._pytesseract.image_to_data.return_value = {
"text": ["HELLO", ""],
"conf": [92, -1],
"left": [10],
"top": [20],
"width": [100],
"height": [30],
}
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
assert result.text == "HELLO"
assert abs(result.confidence - 0.92) < 0.01
assert result.engine_name == "tesseract"
def test_import_error_raises_unavailable(self) -> None:
with patch.dict("sys.modules", {"pytesseract": None}):
with patch("app.engines.tesseract_engine.settings") as mock_settings:
mock_settings.tesseract_cmd = "/usr/bin/tesseract"
def mock_import(name, *args, **kwargs):
if name == "pytesseract":
raise ImportError("No module named 'pytesseract'")
return __import__(name, *args, **kwargs)
with patch("builtins.__import__", side_effect=mock_import):
from app.engines.tesseract_engine import TesseractEngine
with pytest.raises(EngineUnavailableError, match="pytesseract"):
TesseractEngine()
# ---------------------------------------------------------------------------
# CloudEngine
# ---------------------------------------------------------------------------
@@ -637,8 +558,8 @@ class TestEngineFactory:
from app.engines.engine_factory import create_engine
create_engine("tesseract")
mock_create.assert_called_once_with("tesseract")
create_engine("google_vision")
mock_create.assert_called_once_with("google_vision")
@patch("app.engines.engine_factory.settings")
@patch("app.engines.engine_factory._create_single_engine")