motovaultpro/ocr/app/models/schemas.py

"""Pydantic models for OCR API request/response validation."""
from enum import Enum
from typing import Optional

from pydantic import BaseModel, Field


class DocumentType(str, Enum):
    """Types of documents that can be processed."""

    VIN = "vin"
    RECEIPT = "receipt"
    MANUAL = "manual"
    UNKNOWN = "unknown"


class ExtractedField(BaseModel):
    """A single extracted field with confidence score."""

    value: str
    confidence: float = Field(ge=0.0, le=1.0)


class BoundingBox(BaseModel):
    """Bounding box for detected region."""

    x: int
    y: int
    width: int
    height: int


class VinAlternative(BaseModel):
    """Alternative VIN candidate."""

    vin: str
    confidence: float = Field(ge=0.0, le=1.0)


class VinExtractionResponse(BaseModel):
    """Response from VIN extraction endpoint."""

    success: bool
    vin: Optional[str] = None
    confidence: float = Field(ge=0.0, le=1.0)
    bounding_box: Optional[BoundingBox] = Field(default=None, alias="boundingBox")
    alternatives: list[VinAlternative] = Field(default_factory=list)
    processing_time_ms: int = Field(alias="processingTimeMs")
    error: Optional[str] = None

    model_config = {"populate_by_name": True}


class OcrResponse(BaseModel):
    """Response from OCR extraction."""

    success: bool
    document_type: DocumentType = Field(alias="documentType")
    raw_text: str = Field(alias="rawText")
    confidence: float = Field(ge=0.0, le=1.0)
    extracted_fields: dict[str, ExtractedField] = Field(
        default_factory=dict, alias="extractedFields"
    )
    processing_time_ms: int = Field(alias="processingTimeMs")

    model_config = {"populate_by_name": True}


class JobStatus(str, Enum):
    """Status of an async OCR job."""

    PENDING = "pending"
    PROCESSING = "processing"
    COMPLETED = "completed"
    FAILED = "failed"


class JobResponse(BaseModel):
    """Response for async job status."""

    job_id: str = Field(alias="jobId")
    status: JobStatus
    progress: Optional[int] = Field(default=None, ge=0, le=100)
    result: Optional[OcrResponse] = None
    error: Optional[str] = None

    model_config = {"populate_by_name": True}


class JobSubmitRequest(BaseModel):
    """Request to submit an async OCR job."""

    callback_url: Optional[str] = Field(default=None, alias="callbackUrl")

    model_config = {"populate_by_name": True}


class ReceiptExtractedField(BaseModel):
    """A single extracted field from a receipt with confidence."""

    value: str | float
    confidence: float = Field(ge=0.0, le=1.0)


class ReceiptExtractionResponse(BaseModel):
    """Response from receipt extraction endpoint."""

    success: bool
    receipt_type: str = Field(alias="receiptType")
    extracted_fields: dict[str, ReceiptExtractedField] = Field(
        default_factory=dict, alias="extractedFields"
    )
    raw_text: str = Field(alias="rawText")
    processing_time_ms: int = Field(alias="processingTimeMs")
    error: Optional[str] = None

    model_config = {"populate_by_name": True}