diff --git a/ocr/app/extractors/vin_extractor.py b/ocr/app/extractors/vin_extractor.py index 66a694b..1edca3f 100644 --- a/ocr/app/extractors/vin_extractor.py +++ b/ocr/app/extractors/vin_extractor.py @@ -299,11 +299,12 @@ class VinExtractor(BaseExtractor): # Configure Tesseract for VIN extraction # OEM 1 = LSTM neural network engine (best accuracy) - # Disable dictionaries since VINs are not dictionary words + # NOTE: tessedit_char_whitelist does NOT work with OEM 1 (LSTM). + # Using it causes empty/erratic output. Character filtering is + # handled post-OCR by vin_validator.correct_ocr_errors() instead. config = ( f"--psm {psm} " f"--oem 1 " - f"-c tessedit_char_whitelist={self.VIN_WHITELIST} " f"-c load_system_dawg=false " f"-c load_freq_dawg=false" )