fix: Update PaddleOCR API

2026-02-07 14:44:06 -06:00
parent b9fe222f12
commit 639ca117f1
3 changed files with 86 additions and 44 deletions
--- a/ocr/app/engines/paddle_engine.py
+++ b/ocr/app/engines/paddle_engine.py
@@ -34,12 +34,12 @@ class PaddleOcrEngine(OcrEngine):
            from paddleocr import PaddleOCR  # type: ignore[import-untyped]

            self._ocr = PaddleOCR(
-                use_angle_cls=True,
+                ocr_version="PP-OCRv4",
+                use_textline_orientation=True,
                lang="en",
-                use_gpu=False,
-                show_log=False,
+                device="cpu",
            )
-            logger.info("PaddleOCR PP-OCRv4 initialized (CPU, angle_cls=True)")
+            logger.info("PaddleOCR PP-OCRv4 initialized (CPU, textline_orientation=True)")
            return self._ocr
        except ImportError as exc:
            raise EngineUnavailableError(
@@ -54,8 +54,9 @@ class PaddleOcrEngine(OcrEngine):
    def recognize(self, image_bytes: bytes, config: OcrConfig) -> OcrEngineResult:
        """Run PaddleOCR on image bytes.

-        PaddleOCR returns: list of pages, each page is a list of
-        ``[[box_coords], (text, confidence)]`` entries.
+        PaddleOCR v3.x ``predict()`` returns an iterator of result objects.
+        Each result has a ``res`` dict with ``dt_polys``, ``rec_texts``,
+        and ``rec_scores``.
        """
        ocr = self._get_ocr()

@@ -66,10 +67,22 @@ class PaddleOcrEngine(OcrEngine):
            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
            img_array = np.array(image)

-            # PaddleOCR accepts numpy arrays
-            results = ocr.ocr(img_array, cls=config.use_angle_cls)
+            results = list(ocr.predict(img_array))

-            if not results or not results[0]:
+            if not results:
+                return OcrEngineResult(
+                    text="",
+                    confidence=0.0,
+                    word_boxes=[],
+                    engine_name=self.name,
+                )
+
+            res = results[0].res
+            dt_polys = res.get("dt_polys", [])
+            rec_texts = res.get("rec_texts", [])
+            rec_scores = res.get("rec_scores", [])
+
+            if not rec_texts:
                return OcrEngineResult(
                    text="",
                    confidence=0.0,
@@ -81,10 +94,8 @@ class PaddleOcrEngine(OcrEngine):
            texts: list[str] = []
            confidences: list[float] = []

-            for line in results[0]:
-                box_coords = line[0]  # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
-                text = line[1][0]
-                conf = float(line[1][1])
+            for i, text in enumerate(rec_texts):
+                conf = float(rec_scores[i]) if i < len(rec_scores) else 0.0

                # Apply character whitelist filter if configured
                if config.char_whitelist:
@@ -94,11 +105,16 @@ class PaddleOcrEngine(OcrEngine):
                if not text.strip():
                    continue

-                # Convert quadrilateral to bounding box
-                xs = [pt[0] for pt in box_coords]
-                ys = [pt[1] for pt in box_coords]
-                x_min, y_min = int(min(xs)), int(min(ys))
-                x_max, y_max = int(max(xs)), int(max(ys))
+                # Convert quadrilateral polygon to bounding box
+                x_min, y_min, width, height = 0, 0, 0, 0
+                if i < len(dt_polys):
+                    poly = dt_polys[i]
+                    xs = [pt[0] for pt in poly]
+                    ys = [pt[1] for pt in poly]
+                    x_min, y_min = int(min(xs)), int(min(ys))
+                    x_max, y_max = int(max(xs)), int(max(ys))
+                    width = x_max - x_min
+                    height = y_max - y_min

                word_boxes.append(
                    WordBox(
@@ -106,8 +122,8 @@ class PaddleOcrEngine(OcrEngine):
                        confidence=conf,
                        x=x_min,
                        y=y_min,
-                        width=x_max - x_min,
-                        height=y_max - y_min,
+                        width=width,
+                        height=height,
                    )
                )
                texts.append(text.strip())