fix: Update PaddleOCR API

2026-02-07 14:44:06 -06:00
parent b9fe222f12
commit 639ca117f1
3 changed files with 86 additions and 44 deletions
--- a/ocr/Dockerfile
+++ b/ocr/Dockerfile
@@ -36,7 +36,8 @@ RUN pip install --no-cache-dir -r requirements.txt \
 # Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
 # Models are baked into the image so container starts are fast and
 # no network access is needed at runtime for model download.
-RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \
+ENV PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True
+RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4', use_textline_orientation=True, lang='en', device='cpu')" \
    && echo "PaddleOCR PP-OCRv4 models downloaded and verified"

 COPY . .