fix: Update PaddleOCR API
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 5m6s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 51s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

This commit is contained in:
Eric Gullickson
2026-02-07 14:44:06 -06:00
parent b9fe222f12
commit 639ca117f1
3 changed files with 86 additions and 44 deletions

View File

@@ -41,6 +41,19 @@ def _make_result(
)
def _mock_paddle_result(
dt_polys: list, rec_texts: list[str], rec_scores: list[float]
) -> MagicMock:
"""Create a mock PaddleOCR v3.x predict() result object."""
result = MagicMock()
result.res = {
"dt_polys": dt_polys,
"rec_texts": rec_texts,
"rec_scores": rec_scores,
}
return result
# ---------------------------------------------------------------------------
# Exception hierarchy
# ---------------------------------------------------------------------------
@@ -182,7 +195,9 @@ class TestPaddleOcrEngine:
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.ocr.return_value = [None]
mock_ocr.predict.return_value = iter([
_mock_paddle_result(dt_polys=[], rec_texts=[], rec_scores=[])
])
engine._ocr = mock_ocr
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
@@ -196,12 +211,16 @@ class TestPaddleOcrEngine:
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.ocr.return_value = [
[
[[[10, 20], [110, 20], [110, 50], [10, 50]], ("HELLO", 0.95)],
[[[10, 60], [110, 60], [110, 90], [10, 90]], ("WORLD", 0.88)],
]
]
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[
[[10, 20], [110, 20], [110, 50], [10, 50]],
[[10, 60], [110, 60], [110, 90], [10, 90]],
],
rec_texts=["HELLO", "WORLD"],
rec_scores=[0.95, 0.88],
)
])
engine._ocr = mock_ocr
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
@@ -218,11 +237,13 @@ class TestPaddleOcrEngine:
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.ocr.return_value = [
[
[[[0, 0], [100, 0], [100, 30], [0, 30]], ("1HG-BH4!", 0.9)],
]
]
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[[[0, 0], [100, 0], [100, 30], [0, 30]]],
rec_texts=["1HG-BH4!"],
rec_scores=[0.9],
)
])
engine._ocr = mock_ocr
config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
@@ -237,11 +258,13 @@ class TestPaddleOcrEngine:
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
# Slightly rotated quad: min x=8, min y=20, max x=110, max y=55
mock_ocr.ocr.return_value = [
[
[[[10, 20], [110, 25], [108, 55], [8, 50]], ("TEXT", 0.9)],
]
]
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[[[10, 20], [110, 25], [108, 55], [8, 50]]],
rec_texts=["TEXT"],
rec_scores=[0.9],
)
])
engine._ocr = mock_ocr
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
@@ -257,11 +280,13 @@ class TestPaddleOcrEngine:
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.ocr.return_value = [
[
[[[0, 0], [50, 0], [50, 20], [0, 20]], ("---", 0.9)],
]
]
mock_ocr.predict.return_value = iter([
_mock_paddle_result(
dt_polys=[[[0, 0], [50, 0], [50, 20], [0, 20]]],
rec_texts=["---"],
rec_scores=[0.9],
)
])
engine._ocr = mock_ocr
config = OcrConfig(char_whitelist="ABC")
@@ -296,7 +321,7 @@ class TestPaddleOcrEngine:
engine = PaddleOcrEngine()
mock_ocr = MagicMock()
mock_ocr.ocr.side_effect = RuntimeError("OCR crashed")
mock_ocr.predict.side_effect = RuntimeError("OCR crashed")
engine._ocr = mock_ocr
with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"):