feat: Improve OCR process - replace Tesseract with PaddleOCR (#115) #122

Merged
egullickson merged 16 commits from issue-115-improve-ocr-paddleocr into main 2026-02-08 01:13:35 +00:00
2 changed files with 799 additions and 1 deletions
Showing only changes of commit cf114fad3c - Show all commits

File diff suppressed because one or more lines are too long

View File

@@ -15,6 +15,7 @@ FROM ${REGISTRY_MIRRORS}/python:3.13-slim
# - libgomp1: OpenMP runtime required by PaddlePaddle
# - libheif1/libheif-dev: HEIF image support (iPhone photos)
# - libglib2.0-0: GLib shared library (OpenCV dependency)
# - libgl1-mesa-glx: OpenGL runtime (OpenCV cv2 dependency, pulled by PaddleX)
# - libmagic1: File type detection
# - curl: Health check endpoint
RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -24,6 +25,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libheif1 \
libheif-dev \
libglib2.0-0 \
libgl1-mesa-glx \
libmagic1 \
curl \
&& rm -rf /var/lib/apt/lists/*