feat: Maintenance Receipt Upload with OCR Auto-populate (#16) #161

Merged
egullickson merged 11 commits from issue-16-maintenance-receipt-upload-ocr into main 2026-02-13 22:19:45 +00:00
4 changed files with 234 additions and 32 deletions
Showing only changes of commit d9a40f7d37 - Show all commits

View File

@@ -16,6 +16,7 @@ import { TemplateService } from '../../notifications/domain/template.service';
import { EmailService } from '../../notifications/domain/email.service';
import { ocrService } from '../../ocr/domain/ocr.service';
import type { ReceiptExtractionResponse } from '../../ocr/domain/ocr.types';
import { ReceiptClassifier } from './receipt-classifier';
import type {
ResendWebhookEvent,
EmailProcessingResult,
@@ -51,6 +52,7 @@ export class EmailIngestionService {
private notificationsRepository: NotificationsRepository;
private templateService: TemplateService;
private emailService: EmailService;
private classifier: ReceiptClassifier;
constructor(dbPool?: Pool) {
const p = dbPool || pool;
@@ -61,6 +63,7 @@ export class EmailIngestionService {
this.notificationsRepository = new NotificationsRepository(p);
this.templateService = new TemplateService();
this.emailService = new EmailService();
this.classifier = new ReceiptClassifier();
}
// ========================
@@ -102,28 +105,38 @@ export class EmailIngestionService {
return;
}
// 5. Process first valid image attachment through OCR
const ocrResult = await this.processAttachmentsWithOcr(userId, validAttachments);
// 5. Classify receipt from email text first
const emailClassification = this.classifier.classifyFromText(subject, event.data.text);
logger.info('Email text classification result', {
emailId,
type: emailClassification.type,
confidence: emailClassification.confidence,
});
// 6. Process attachments through OCR using classification
const ocrResult = await this.processAttachmentsWithClassification(
userId, validAttachments, emailClassification, emailId
);
if (!ocrResult) {
await this.handleOcrFailure(emailId, senderEmail, userName, subject, 'No receipt data could be extracted from attachments');
return;
}
// 6. Build extracted data from OCR result
// 7. Build extracted data from OCR result
const extractedData = this.mapOcrToExtractedData(ocrResult.response);
const recordType = ocrResult.recordType;
// 7. Handle vehicle association
// 8. Handle vehicle association
const processingResult = await this.handleVehicleAssociation(
userId, userName, senderEmail, recordType, extractedData
);
// 8. Mark as completed
// 9. Mark as completed
await this.repository.updateQueueStatus(emailId, 'completed', {
processingResult,
});
// 9. Send confirmation email
// 10. Send confirmation email
await this.sendConfirmationEmail(senderEmail, userName, processingResult);
logger.info('Email processing completed successfully', {
@@ -239,51 +252,100 @@ export class EmailIngestionService {
// ========================
/**
* Process attachments through OCR, trying fuel then maintenance receipt extraction.
* Returns the first successful result.
* Process attachments using classifier-driven OCR extraction.
* If email text classification is confident, calls the specific OCR endpoint.
* If not, performs general OCR and classifies from rawText.
* Returns null if no usable result or receipt is unclassified.
*/
private async processAttachmentsWithOcr(
private async processAttachmentsWithClassification(
userId: string,
attachments: ParsedEmailAttachment[]
attachments: ParsedEmailAttachment[],
emailClassification: { type: string; confidence: number },
emailId: string
): Promise<{ response: ReceiptExtractionResponse; recordType: EmailRecordType } | null> {
// Process only image attachments that the receipt OCR supports
const imageAttachments = attachments.filter(att => OCR_RECEIPT_IMAGE_TYPES.has(att.contentType));
for (const attachment of imageAttachments) {
const result = await this.classifyAndExtract(userId, attachment);
if (result) return result;
// If email text gave a confident classification, call the specific OCR endpoint first
if (emailClassification.type === 'fuel') {
const result = await this.extractFuelReceipt(userId, attachment);
if (result?.success) return { response: result, recordType: 'fuel_log' };
// Fuel OCR failed, try maintenance as fallback
const fallbackResult = await this.extractMaintenanceReceipt(userId, attachment);
if (fallbackResult?.success) return { response: fallbackResult, recordType: 'maintenance_record' };
continue;
}
if (emailClassification.type === 'maintenance') {
const result = await this.extractMaintenanceReceipt(userId, attachment);
if (result?.success) return { response: result, recordType: 'maintenance_record' };
// Maintenance OCR failed, try fuel as fallback
const fallbackResult = await this.extractFuelReceipt(userId, attachment);
if (fallbackResult?.success) return { response: fallbackResult, recordType: 'fuel_log' };
continue;
}
// Email text was not confident - try both OCR endpoints and classify from rawText
const fuelResult = await this.extractFuelReceipt(userId, attachment);
const maintenanceResult = await this.extractMaintenanceReceipt(userId, attachment);
// Use rawText from whichever succeeded for secondary classification
const rawText = fuelResult?.rawText || maintenanceResult?.rawText || '';
if (rawText) {
const ocrClassification = this.classifier.classifyFromOcrRawText(rawText);
logger.info('OCR rawText classification result', {
emailId,
type: ocrClassification.type,
confidence: ocrClassification.confidence,
});
if (ocrClassification.type === 'fuel' && fuelResult?.success) {
return { response: fuelResult, recordType: 'fuel_log' };
}
if (ocrClassification.type === 'maintenance' && maintenanceResult?.success) {
return { response: maintenanceResult, recordType: 'maintenance_record' };
}
}
// Both classifiers failed - fall back to field-count heuristic
const fallback = this.selectBestResultByFields(fuelResult, maintenanceResult);
if (fallback) return fallback;
}
return null;
}
/**
* Try both fuel and maintenance OCR extractors, return the better result
* Extract fuel receipt via OCR. Returns null on failure.
*/
private async classifyAndExtract(
private async extractFuelReceipt(
userId: string,
attachment: ParsedEmailAttachment
): Promise<{ response: ReceiptExtractionResponse; recordType: EmailRecordType } | null> {
let fuelResult: ReceiptExtractionResponse | null = null;
let maintenanceResult: ReceiptExtractionResponse | null = null;
// Try fuel receipt extraction
): Promise<ReceiptExtractionResponse | null> {
try {
fuelResult = await ocrService.extractReceipt(userId, {
return await ocrService.extractReceipt(userId, {
fileBuffer: attachment.content,
contentType: attachment.contentType,
receiptType: 'fuel',
});
} catch (error) {
logger.info('Fuel receipt extraction failed, trying maintenance', {
logger.info('Fuel receipt extraction failed', {
filename: attachment.filename,
error: error instanceof Error ? error.message : String(error),
});
return null;
}
}
// Try maintenance receipt extraction
/**
* Extract maintenance receipt via OCR. Returns null on failure.
*/
private async extractMaintenanceReceipt(
userId: string,
attachment: ParsedEmailAttachment
): Promise<ReceiptExtractionResponse | null> {
try {
maintenanceResult = await ocrService.extractMaintenanceReceipt(userId, {
return await ocrService.extractMaintenanceReceipt(userId, {
fileBuffer: attachment.content,
contentType: attachment.contentType,
});
@@ -292,16 +354,15 @@ export class EmailIngestionService {
filename: attachment.filename,
error: error instanceof Error ? error.message : String(error),
});
return null;
}
// Compare results and pick the best one
return this.selectBestResult(fuelResult, maintenanceResult);
}
/**
* Select the better OCR result based on extracted field count and success
* Last-resort fallback: select the better OCR result based on domain-specific
* fields and field count when keyword classifiers could not decide.
*/
private selectBestResult(
private selectBestResultByFields(
fuelResult: ReceiptExtractionResponse | null,
maintenanceResult: ReceiptExtractionResponse | null
): { response: ReceiptExtractionResponse; recordType: EmailRecordType } | null {
@@ -316,7 +377,6 @@ export class EmailIngestionService {
return null;
}
// Check for fuel-specific fields to improve classification
const hasFuelFields = fuelResult?.extractedFields['gallons'] ||
fuelResult?.extractedFields['price_per_gallon'] ||
fuelResult?.extractedFields['fuel_type'];
@@ -325,7 +385,6 @@ export class EmailIngestionService {
maintenanceResult?.extractedFields['shop_name'] ||
maintenanceResult?.extractedFields['description'];
// Prefer the result with domain-specific fields
if (hasFuelFields && !hasMaintenanceFields) {
return { response: fuelResult!, recordType: 'fuel_log' };
}
@@ -333,7 +392,6 @@ export class EmailIngestionService {
return { response: maintenanceResult!, recordType: 'maintenance_record' };
}
// Fall back to field count comparison
if (fuelFieldCount >= maintenanceFieldCount && fuelResult?.success) {
return { response: fuelResult, recordType: 'fuel_log' };
}

View File

@@ -13,6 +13,17 @@ export type PendingAssociationStatus = 'pending' | 'resolved' | 'expired';
export type EmailRecordType = 'fuel_log' | 'maintenance_record';
// ========================
// Receipt Classification
// ========================
export type ReceiptClassificationType = 'fuel' | 'maintenance' | 'unclassified';
export interface ClassificationResult {
type: ReceiptClassificationType;
confidence: number;
}
// ========================
// Database Records
// ========================

View File

@@ -0,0 +1,130 @@
/**
* @ai-summary Classifies receipt type from email text or OCR raw text
* @ai-context Uses keyword matching to determine fuel vs maintenance receipts
* before falling back to OCR-based classification. Returns confidence score.
*/
import { logger } from '../../../core/logging/logger';
import type { ClassificationResult, ReceiptClassificationType } from './email-ingestion.types';
/** Fuel-related keywords (case-insensitive matching) */
const FUEL_KEYWORDS: string[] = [
'gas',
'fuel',
'gallons',
'octane',
'pump',
'diesel',
'unleaded',
'shell',
'chevron',
'exxon',
'bp',
];
/** Maintenance-related keywords (case-insensitive matching). Multi-word entries matched as phrases. */
const MAINTENANCE_KEYWORDS: string[] = [
'oil change',
'brake',
'alignment',
'tire',
'rotation',
'inspection',
'labor',
'parts',
'service',
'repair',
'transmission',
'coolant',
];
/** Minimum keyword matches required for a confident classification */
const CONFIDENCE_THRESHOLD = 2;
export class ReceiptClassifier {
/**
* Classify receipt type from email subject and body text.
* Returns a confident result if >= 2 keyword matches for one type.
*/
classifyFromText(subject: string | null, body: string | null): ClassificationResult {
const text = [subject || '', body || ''].join(' ');
return this.classifyText(text, 'email');
}
/**
* Classify receipt type from OCR raw text output.
* Uses same keyword matching as email text classification.
*/
classifyFromOcrRawText(rawText: string): ClassificationResult {
return this.classifyText(rawText, 'ocr');
}
/**
* Core keyword matching logic shared by email and OCR classification.
*/
private classifyText(text: string, source: 'email' | 'ocr'): ClassificationResult {
const normalizedText = text.toLowerCase();
const fuelMatches = this.countKeywordMatches(normalizedText, FUEL_KEYWORDS);
const maintenanceMatches = this.countKeywordMatches(normalizedText, MAINTENANCE_KEYWORDS);
logger.info('Receipt classification keyword analysis', {
source,
fuelMatches,
maintenanceMatches,
textLength: text.length,
});
// Both below threshold - unclassified
if (fuelMatches < CONFIDENCE_THRESHOLD && maintenanceMatches < CONFIDENCE_THRESHOLD) {
return { type: 'unclassified', confidence: 0 };
}
// Clear winner with threshold met
if (fuelMatches >= CONFIDENCE_THRESHOLD && fuelMatches > maintenanceMatches) {
return {
type: 'fuel',
confidence: Math.min(fuelMatches / (fuelMatches + maintenanceMatches), 1),
};
}
if (maintenanceMatches >= CONFIDENCE_THRESHOLD && maintenanceMatches > fuelMatches) {
return {
type: 'maintenance',
confidence: Math.min(maintenanceMatches / (fuelMatches + maintenanceMatches), 1),
};
}
// Tie with both meeting threshold - unclassified (ambiguous)
if (fuelMatches >= CONFIDENCE_THRESHOLD && maintenanceMatches >= CONFIDENCE_THRESHOLD) {
return { type: 'unclassified', confidence: 0 };
}
return { type: 'unclassified', confidence: 0 };
}
/**
* Count how many keywords from the list appear in the text.
* Multi-word keywords are matched as phrases.
*/
private countKeywordMatches(normalizedText: string, keywords: string[]): number {
let matches = 0;
for (const keyword of keywords) {
if (normalizedText.includes(keyword)) {
matches++;
}
}
return matches;
}
/**
* Map classifier type to the EmailRecordType used in the processing pipeline.
*/
static toRecordType(classificationType: ReceiptClassificationType): 'fuel_log' | 'maintenance_record' | null {
switch (classificationType) {
case 'fuel': return 'fuel_log';
case 'maintenance': return 'maintenance_record';
case 'unclassified': return null;
}
}
}

View File

@@ -6,14 +6,17 @@
export { emailIngestionWebhookRoutes } from './api/email-ingestion.routes';
export { EmailIngestionService } from './domain/email-ingestion.service';
export { EmailIngestionRepository } from './data/email-ingestion.repository';
export { ReceiptClassifier } from './domain/receipt-classifier';
export { ResendInboundClient } from './external/resend-inbound.client';
export type { ParsedEmailResult, ParsedEmailAttachment } from './external/resend-inbound.client';
export type {
ClassificationResult,
EmailIngestionQueueRecord,
EmailIngestionStatus,
EmailProcessingResult,
ExtractedReceiptData,
PendingVehicleAssociation,
ReceiptClassificationType,
ResendWebhookEvent,
ResendWebhookEventData,
} from './domain/email-ingestion.types';