fix: Update PaddleOCR API
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 5m6s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 51s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 5m6s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 51s
Deploy to Staging / Verify Staging (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped
This commit is contained in:
@@ -36,7 +36,8 @@ RUN pip install --no-cache-dir -r requirements.txt \
|
|||||||
# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
|
# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
|
||||||
# Models are baked into the image so container starts are fast and
|
# Models are baked into the image so container starts are fast and
|
||||||
# no network access is needed at runtime for model download.
|
# no network access is needed at runtime for model download.
|
||||||
RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \
|
ENV PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True
|
||||||
|
RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4', use_textline_orientation=True, lang='en', device='cpu')" \
|
||||||
&& echo "PaddleOCR PP-OCRv4 models downloaded and verified"
|
&& echo "PaddleOCR PP-OCRv4 models downloaded and verified"
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|||||||
@@ -34,12 +34,12 @@ class PaddleOcrEngine(OcrEngine):
|
|||||||
from paddleocr import PaddleOCR # type: ignore[import-untyped]
|
from paddleocr import PaddleOCR # type: ignore[import-untyped]
|
||||||
|
|
||||||
self._ocr = PaddleOCR(
|
self._ocr = PaddleOCR(
|
||||||
use_angle_cls=True,
|
ocr_version="PP-OCRv4",
|
||||||
|
use_textline_orientation=True,
|
||||||
lang="en",
|
lang="en",
|
||||||
use_gpu=False,
|
device="cpu",
|
||||||
show_log=False,
|
|
||||||
)
|
)
|
||||||
logger.info("PaddleOCR PP-OCRv4 initialized (CPU, angle_cls=True)")
|
logger.info("PaddleOCR PP-OCRv4 initialized (CPU, textline_orientation=True)")
|
||||||
return self._ocr
|
return self._ocr
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
raise EngineUnavailableError(
|
raise EngineUnavailableError(
|
||||||
@@ -54,8 +54,9 @@ class PaddleOcrEngine(OcrEngine):
|
|||||||
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
|
def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
|
||||||
"""Run PaddleOCR on image bytes.
|
"""Run PaddleOCR on image bytes.
|
||||||
|
|
||||||
PaddleOCR returns: list of pages, each page is a list of
|
PaddleOCR v3.x ``predict()`` returns an iterator of result objects.
|
||||||
``[[box_coords], (text, confidence)]`` entries.
|
Each result has a ``res`` dict with ``dt_polys``, ``rec_texts``,
|
||||||
|
and ``rec_scores``.
|
||||||
"""
|
"""
|
||||||
ocr = self._get_ocr()
|
ocr = self._get_ocr()
|
||||||
|
|
||||||
@@ -66,10 +67,22 @@ class PaddleOcrEngine(OcrEngine):
|
|||||||
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
||||||
img_array = np.array(image)
|
img_array = np.array(image)
|
||||||
|
|
||||||
# PaddleOCR accepts numpy arrays
|
results = list(ocr.predict(img_array))
|
||||||
results = ocr.ocr(img_array, cls=config.use_angle_cls)
|
|
||||||
|
|
||||||
if not results or not results[0]:
|
if not results:
|
||||||
|
return OcrEngineResult(
|
||||||
|
text="",
|
||||||
|
confidence=0.0,
|
||||||
|
word_boxes=[],
|
||||||
|
engine_name=self.name,
|
||||||
|
)
|
||||||
|
|
||||||
|
res = results[0].res
|
||||||
|
dt_polys = res.get("dt_polys", [])
|
||||||
|
rec_texts = res.get("rec_texts", [])
|
||||||
|
rec_scores = res.get("rec_scores", [])
|
||||||
|
|
||||||
|
if not rec_texts:
|
||||||
return OcrEngineResult(
|
return OcrEngineResult(
|
||||||
text="",
|
text="",
|
||||||
confidence=0.0,
|
confidence=0.0,
|
||||||
@@ -81,10 +94,8 @@ class PaddleOcrEngine(OcrEngine):
|
|||||||
texts: list[str] = []
|
texts: list[str] = []
|
||||||
confidences: list[float] = []
|
confidences: list[float] = []
|
||||||
|
|
||||||
for line in results[0]:
|
for i, text in enumerate(rec_texts):
|
||||||
box_coords = line[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
|
conf = float(rec_scores[i]) if i < len(rec_scores) else 0.0
|
||||||
text = line[1][0]
|
|
||||||
conf = float(line[1][1])
|
|
||||||
|
|
||||||
# Apply character whitelist filter if configured
|
# Apply character whitelist filter if configured
|
||||||
if config.char_whitelist:
|
if config.char_whitelist:
|
||||||
@@ -94,11 +105,16 @@ class PaddleOcrEngine(OcrEngine):
|
|||||||
if not text.strip():
|
if not text.strip():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Convert quadrilateral to bounding box
|
# Convert quadrilateral polygon to bounding box
|
||||||
xs = [pt[0] for pt in box_coords]
|
x_min, y_min, width, height = 0, 0, 0, 0
|
||||||
ys = [pt[1] for pt in box_coords]
|
if i < len(dt_polys):
|
||||||
|
poly = dt_polys[i]
|
||||||
|
xs = [pt[0] for pt in poly]
|
||||||
|
ys = [pt[1] for pt in poly]
|
||||||
x_min, y_min = int(min(xs)), int(min(ys))
|
x_min, y_min = int(min(xs)), int(min(ys))
|
||||||
x_max, y_max = int(max(xs)), int(max(ys))
|
x_max, y_max = int(max(xs)), int(max(ys))
|
||||||
|
width = x_max - x_min
|
||||||
|
height = y_max - y_min
|
||||||
|
|
||||||
word_boxes.append(
|
word_boxes.append(
|
||||||
WordBox(
|
WordBox(
|
||||||
@@ -106,8 +122,8 @@ class PaddleOcrEngine(OcrEngine):
|
|||||||
confidence=conf,
|
confidence=conf,
|
||||||
x=x_min,
|
x=x_min,
|
||||||
y=y_min,
|
y=y_min,
|
||||||
width=x_max - x_min,
|
width=width,
|
||||||
height=y_max - y_min,
|
height=height,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
texts.append(text.strip())
|
texts.append(text.strip())
|
||||||
|
|||||||
@@ -41,6 +41,19 @@ def _make_result(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_paddle_result(
|
||||||
|
dt_polys: list, rec_texts: list[str], rec_scores: list[float]
|
||||||
|
) -> MagicMock:
|
||||||
|
"""Create a mock PaddleOCR v3.x predict() result object."""
|
||||||
|
result = MagicMock()
|
||||||
|
result.res = {
|
||||||
|
"dt_polys": dt_polys,
|
||||||
|
"rec_texts": rec_texts,
|
||||||
|
"rec_scores": rec_scores,
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Exception hierarchy
|
# Exception hierarchy
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -182,7 +195,9 @@ class TestPaddleOcrEngine:
|
|||||||
|
|
||||||
engine = PaddleOcrEngine()
|
engine = PaddleOcrEngine()
|
||||||
mock_ocr = MagicMock()
|
mock_ocr = MagicMock()
|
||||||
mock_ocr.ocr.return_value = [None]
|
mock_ocr.predict.return_value = iter([
|
||||||
|
_mock_paddle_result(dt_polys=[], rec_texts=[], rec_scores=[])
|
||||||
|
])
|
||||||
engine._ocr = mock_ocr
|
engine._ocr = mock_ocr
|
||||||
|
|
||||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||||
@@ -196,12 +211,16 @@ class TestPaddleOcrEngine:
|
|||||||
|
|
||||||
engine = PaddleOcrEngine()
|
engine = PaddleOcrEngine()
|
||||||
mock_ocr = MagicMock()
|
mock_ocr = MagicMock()
|
||||||
mock_ocr.ocr.return_value = [
|
mock_ocr.predict.return_value = iter([
|
||||||
[
|
_mock_paddle_result(
|
||||||
[[[10, 20], [110, 20], [110, 50], [10, 50]], ("HELLO", 0.95)],
|
dt_polys=[
|
||||||
[[[10, 60], [110, 60], [110, 90], [10, 90]], ("WORLD", 0.88)],
|
[[10, 20], [110, 20], [110, 50], [10, 50]],
|
||||||
]
|
[[10, 60], [110, 60], [110, 90], [10, 90]],
|
||||||
]
|
],
|
||||||
|
rec_texts=["HELLO", "WORLD"],
|
||||||
|
rec_scores=[0.95, 0.88],
|
||||||
|
)
|
||||||
|
])
|
||||||
engine._ocr = mock_ocr
|
engine._ocr = mock_ocr
|
||||||
|
|
||||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||||
@@ -218,11 +237,13 @@ class TestPaddleOcrEngine:
|
|||||||
|
|
||||||
engine = PaddleOcrEngine()
|
engine = PaddleOcrEngine()
|
||||||
mock_ocr = MagicMock()
|
mock_ocr = MagicMock()
|
||||||
mock_ocr.ocr.return_value = [
|
mock_ocr.predict.return_value = iter([
|
||||||
[
|
_mock_paddle_result(
|
||||||
[[[0, 0], [100, 0], [100, 30], [0, 30]], ("1HG-BH4!", 0.9)],
|
dt_polys=[[[0, 0], [100, 0], [100, 30], [0, 30]]],
|
||||||
]
|
rec_texts=["1HG-BH4!"],
|
||||||
]
|
rec_scores=[0.9],
|
||||||
|
)
|
||||||
|
])
|
||||||
engine._ocr = mock_ocr
|
engine._ocr = mock_ocr
|
||||||
|
|
||||||
config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
|
config = OcrConfig(char_whitelist="ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
|
||||||
@@ -237,11 +258,13 @@ class TestPaddleOcrEngine:
|
|||||||
engine = PaddleOcrEngine()
|
engine = PaddleOcrEngine()
|
||||||
mock_ocr = MagicMock()
|
mock_ocr = MagicMock()
|
||||||
# Slightly rotated quad: min x=8, min y=20, max x=110, max y=55
|
# Slightly rotated quad: min x=8, min y=20, max x=110, max y=55
|
||||||
mock_ocr.ocr.return_value = [
|
mock_ocr.predict.return_value = iter([
|
||||||
[
|
_mock_paddle_result(
|
||||||
[[[10, 20], [110, 25], [108, 55], [8, 50]], ("TEXT", 0.9)],
|
dt_polys=[[[10, 20], [110, 25], [108, 55], [8, 50]]],
|
||||||
]
|
rec_texts=["TEXT"],
|
||||||
]
|
rec_scores=[0.9],
|
||||||
|
)
|
||||||
|
])
|
||||||
engine._ocr = mock_ocr
|
engine._ocr = mock_ocr
|
||||||
|
|
||||||
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
result = engine.recognize(_create_test_image_bytes(), OcrConfig())
|
||||||
@@ -257,11 +280,13 @@ class TestPaddleOcrEngine:
|
|||||||
|
|
||||||
engine = PaddleOcrEngine()
|
engine = PaddleOcrEngine()
|
||||||
mock_ocr = MagicMock()
|
mock_ocr = MagicMock()
|
||||||
mock_ocr.ocr.return_value = [
|
mock_ocr.predict.return_value = iter([
|
||||||
[
|
_mock_paddle_result(
|
||||||
[[[0, 0], [50, 0], [50, 20], [0, 20]], ("---", 0.9)],
|
dt_polys=[[[0, 0], [50, 0], [50, 20], [0, 20]]],
|
||||||
]
|
rec_texts=["---"],
|
||||||
]
|
rec_scores=[0.9],
|
||||||
|
)
|
||||||
|
])
|
||||||
engine._ocr = mock_ocr
|
engine._ocr = mock_ocr
|
||||||
|
|
||||||
config = OcrConfig(char_whitelist="ABC")
|
config = OcrConfig(char_whitelist="ABC")
|
||||||
@@ -296,7 +321,7 @@ class TestPaddleOcrEngine:
|
|||||||
|
|
||||||
engine = PaddleOcrEngine()
|
engine = PaddleOcrEngine()
|
||||||
mock_ocr = MagicMock()
|
mock_ocr = MagicMock()
|
||||||
mock_ocr.ocr.side_effect = RuntimeError("OCR crashed")
|
mock_ocr.predict.side_effect = RuntimeError("OCR crashed")
|
||||||
engine._ocr = mock_ocr
|
engine._ocr = mock_ocr
|
||||||
|
|
||||||
with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"):
|
with pytest.raises(EngineProcessingError, match="PaddleOCR recognition failed"):
|
||||||
|
|||||||
Reference in New Issue
Block a user