chore: UX design audit cleanup and receipt flow improvements #186
@@ -162,15 +162,18 @@ class ReceiptExtractor(BaseExtractor):
|
|||||||
def _extract_pdf_first_page(self, pdf_bytes: bytes) -> bytes:
|
def _extract_pdf_first_page(self, pdf_bytes: bytes) -> bytes:
|
||||||
"""Extract first page of PDF as PNG image for OCR processing."""
|
"""Extract first page of PDF as PNG image for OCR processing."""
|
||||||
try:
|
try:
|
||||||
from pdf2image import convert_from_bytes
|
import fitz # PyMuPDF
|
||||||
|
|
||||||
images = convert_from_bytes(pdf_bytes, first_page=1, last_page=1, dpi=300)
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||||
if images:
|
page = doc[0]
|
||||||
buffer = io.BytesIO()
|
# Render at 300 DPI (default is 72, so scale factor = 300/72)
|
||||||
images[0].save(buffer, format="PNG")
|
mat = fitz.Matrix(300 / 72, 300 / 72)
|
||||||
return buffer.getvalue()
|
pix = page.get_pixmap(matrix=mat)
|
||||||
|
png_bytes = pix.tobytes("png")
|
||||||
|
doc.close()
|
||||||
|
return png_bytes
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.warning("pdf2image not available, PDF support limited")
|
logger.warning("PyMuPDF not available, PDF support limited")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"PDF first page extraction failed: {e}")
|
logger.error(f"PDF first page extraction failed: {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -141,16 +141,17 @@ class OcrService:
|
|||||||
def _extract_pdf_first_page(self, pdf_bytes: bytes) -> bytes:
|
def _extract_pdf_first_page(self, pdf_bytes: bytes) -> bytes:
|
||||||
"""Extract first page of PDF as PNG image."""
|
"""Extract first page of PDF as PNG image."""
|
||||||
try:
|
try:
|
||||||
# Use pdf2image if available, otherwise return empty
|
import fitz # PyMuPDF
|
||||||
from pdf2image import convert_from_bytes
|
|
||||||
|
|
||||||
images = convert_from_bytes(pdf_bytes, first_page=1, last_page=1, dpi=300)
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||||
if images:
|
page = doc[0]
|
||||||
buffer = io.BytesIO()
|
mat = fitz.Matrix(300 / 72, 300 / 72)
|
||||||
images[0].save(buffer, format="PNG")
|
pix = page.get_pixmap(matrix=mat)
|
||||||
return buffer.getvalue()
|
png_bytes = pix.tobytes("png")
|
||||||
|
doc.close()
|
||||||
|
return png_bytes
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.warning("pdf2image not available, PDF support limited")
|
logger.warning("PyMuPDF not available, PDF support limited")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"PDF extraction failed: {e}")
|
logger.error(f"PDF extraction failed: {e}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user