"""Pydantic models for OCR API request/response validation.""" from enum import Enum from typing import Optional from pydantic import BaseModel, Field class DocumentType(str, Enum): """Types of documents that can be processed.""" VIN = "vin" RECEIPT = "receipt" MANUAL = "manual" UNKNOWN = "unknown" class ExtractedField(BaseModel): """A single extracted field with confidence score.""" value: str confidence: float = Field(ge=0.0, le=1.0) class BoundingBox(BaseModel): """Bounding box for detected region.""" x: int y: int width: int height: int class VinAlternative(BaseModel): """Alternative VIN candidate.""" vin: str confidence: float = Field(ge=0.0, le=1.0) class VinExtractionResponse(BaseModel): """Response from VIN extraction endpoint.""" success: bool vin: Optional[str] = None confidence: float = Field(ge=0.0, le=1.0) bounding_box: Optional[BoundingBox] = Field(default=None, alias="boundingBox") alternatives: list[VinAlternative] = Field(default_factory=list) processing_time_ms: int = Field(alias="processingTimeMs") error: Optional[str] = None model_config = {"populate_by_name": True} class OcrResponse(BaseModel): """Response from OCR extraction.""" success: bool document_type: DocumentType = Field(alias="documentType") raw_text: str = Field(alias="rawText") confidence: float = Field(ge=0.0, le=1.0) extracted_fields: dict[str, ExtractedField] = Field( default_factory=dict, alias="extractedFields" ) processing_time_ms: int = Field(alias="processingTimeMs") model_config = {"populate_by_name": True} class JobStatus(str, Enum): """Status of an async OCR job.""" PENDING = "pending" PROCESSING = "processing" COMPLETED = "completed" FAILED = "failed" class JobResponse(BaseModel): """Response for async job status.""" job_id: str = Field(alias="jobId") status: JobStatus progress: Optional[int] = Field(default=None, ge=0, le=100) result: Optional[OcrResponse] = None error: Optional[str] = None model_config = {"populate_by_name": True} class JobSubmitRequest(BaseModel): """Request to submit an async OCR job.""" callback_url: Optional[str] = Field(default=None, alias="callbackUrl") model_config = {"populate_by_name": True} class ReceiptExtractedField(BaseModel): """A single extracted field from a receipt with confidence.""" value: str | float confidence: float = Field(ge=0.0, le=1.0) class ReceiptExtractionResponse(BaseModel): """Response from receipt extraction endpoint.""" success: bool receipt_type: str = Field(alias="receiptType") extracted_fields: dict[str, ReceiptExtractedField] = Field( default_factory=dict, alias="extractedFields" ) raw_text: str = Field(alias="rawText") processing_time_ms: int = Field(alias="processingTimeMs") error: Optional[str] = None model_config = {"populate_by_name": True} # Manual extraction models class ManualVehicleInfo(BaseModel): """Vehicle information extracted from manual.""" make: Optional[str] = None model: Optional[str] = None year: Optional[int] = None class ManualMaintenanceSchedule(BaseModel): """A single maintenance schedule entry.""" service: str interval_miles: Optional[int] = Field(default=None, alias="intervalMiles") interval_months: Optional[int] = Field(default=None, alias="intervalMonths") details: Optional[str] = None confidence: float = Field(ge=0.0, le=1.0) subtypes: list[str] = Field(default_factory=list) model_config = {"populate_by_name": True} class ManualExtractionResponse(BaseModel): """Response from manual extraction endpoint.""" success: bool vehicle_info: Optional[ManualVehicleInfo] = Field(default=None, alias="vehicleInfo") maintenance_schedules: list[ManualMaintenanceSchedule] = Field( default_factory=list, alias="maintenanceSchedules" ) raw_tables: list[dict] = Field(default_factory=list, alias="rawTables") processing_time_ms: int = Field(alias="processingTimeMs") total_pages: int = Field(alias="totalPages") pages_processed: int = Field(alias="pagesProcessed") error: Optional[str] = None model_config = {"populate_by_name": True} class ManualJobResponse(BaseModel): """Response for async manual extraction job.""" job_id: str = Field(alias="jobId") status: JobStatus progress: Optional[int] = Field(default=None, ge=0, le=100) estimated_seconds: Optional[int] = Field(default=None, alias="estimatedSeconds") result: Optional[ManualExtractionResponse] = None error: Optional[str] = None model_config = {"populate_by_name": True}