fix: Build errors and tesseract removal

2026-02-07 12:12:04 -06:00
parent cf114fad3c
commit b9fe222f12
16 changed files with 35 additions and 238 deletions
--- a/ocr/app/preprocessors/vin_preprocessor.py
+++ b/ocr/app/preprocessors/vin_preprocessor.py
@@ -93,7 +93,7 @@ class VinPreprocessor:
            gray = cv_image
        steps_applied.append("grayscale")

-        # Upscale small images for better OCR (Tesseract needs ~300 DPI)
+        # Upscale small images for better OCR (~300 DPI recommended)
        gray = self._ensure_minimum_resolution(gray)
        steps_applied.append("resolution_check")

@@ -129,14 +129,14 @@ class VinPreprocessor:
        )

    # Minimum width in pixels for reliable VIN OCR.
-    # A 17-char VIN needs ~30px per character for Tesseract accuracy.
+    # A 17-char VIN needs ~30px per character for reliable OCR accuracy.
    MIN_WIDTH_FOR_VIN = 600

    def _ensure_minimum_resolution(self, image: np.ndarray) -> np.ndarray:
        """
        Upscale image if too small for reliable OCR.

-        Tesseract works best at ~300 DPI. Mobile photos of VINs may have
+        OCR works best at ~300 DPI. Mobile photos of VINs may have
        the text occupy only a small portion of the frame, resulting in
        low effective resolution for the VIN characters.
        """
@@ -160,7 +160,7 @@ class VinPreprocessor:
        Colored backgrounds have a low min value (e.g. green sticker:
        min(130,230,150) = 130) → inverted to 125 (medium gray).

-        The inversion ensures Tesseract always receives dark-text-on-
+        The inversion ensures the OCR engine always receives dark-text-on-
        light-background, which is the polarity it expects.
        """
        b_channel, g_channel, r_channel = cv2.split(bgr_image)
@@ -168,8 +168,8 @@ class VinPreprocessor:
        min_channel = np.minimum(np.minimum(b_channel, g_channel), r_channel)

        # Invert so white text (min=255) becomes black (0) and colored
-        # backgrounds (min~130) become lighter gray (~125).  Tesseract
-        # expects dark text on light background.
+        # backgrounds (min~130) become lighter gray (~125).  OCR engines
+        # expect dark text on light background.
        inverted = cv2.bitwise_not(min_channel)

        gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)