fix: Build errors and tesseract removal
Some checks failed
Deploy to Staging / Build Images (pull_request) Failing after 4m14s
Deploy to Staging / Deploy to Staging (pull_request) Has been skipped
Deploy to Staging / Verify Staging (pull_request) Has been skipped
Deploy to Staging / Notify Staging Ready (pull_request) Has been skipped
Deploy to Staging / Notify Staging Failure (pull_request) Successful in 8s

This commit is contained in:
Eric Gullickson
2026-02-07 12:12:04 -06:00
parent cf114fad3c
commit b9fe222f12
16 changed files with 35 additions and 238 deletions

View File

@@ -5,9 +5,9 @@ import time
from dataclasses import dataclass, field
from typing import Callable, Optional
import pytesseract
from PIL import Image
from app.engines import create_engine, OcrConfig
from app.preprocessors.pdf_preprocessor import pdf_preprocessor, PdfInfo
from app.table_extraction.detector import table_detector, DetectedTable
from app.table_extraction.parser import table_parser, ParsedScheduleRow
@@ -243,8 +243,9 @@ class ManualExtractor:
# OCR the full page
try:
image = Image.open(io.BytesIO(image_bytes))
ocr_text = pytesseract.image_to_string(image)
engine = create_engine()
ocr_result = engine.recognize(image_bytes, OcrConfig())
ocr_text = ocr_result.text
# Mark tables as maintenance if page contains maintenance keywords
for table in detected_tables:
@@ -358,8 +359,9 @@ class ManualExtractor:
if not text and first_page.image_bytes:
# OCR first page
image = Image.open(io.BytesIO(first_page.image_bytes))
text = pytesseract.image_to_string(image)
engine = create_engine()
ocr_result = engine.recognize(first_page.image_bytes, OcrConfig())
text = ocr_result.text
if text:
return self._parse_vehicle_from_text(text)

View File

@@ -316,8 +316,8 @@ class VinExtractor(BaseExtractor):
single-line - Treat as a single text line
single-word - Treat as a single word
For PaddleOCR, angle classification handles rotated/angled text
inherently, replacing the need for Tesseract PSM mode fallbacks.
PaddleOCR angle classification handles rotated/angled text
inherently, so no PSM mode fallbacks are needed.
Returns:
List of VIN candidates