feat: Maintenance Receipt Upload with OCR Auto-populate (#16) #161
@@ -16,6 +16,7 @@ import { TemplateService } from '../../notifications/domain/template.service';
|
|||||||
import { EmailService } from '../../notifications/domain/email.service';
|
import { EmailService } from '../../notifications/domain/email.service';
|
||||||
import { ocrService } from '../../ocr/domain/ocr.service';
|
import { ocrService } from '../../ocr/domain/ocr.service';
|
||||||
import type { ReceiptExtractionResponse } from '../../ocr/domain/ocr.types';
|
import type { ReceiptExtractionResponse } from '../../ocr/domain/ocr.types';
|
||||||
|
import { ReceiptClassifier } from './receipt-classifier';
|
||||||
import type {
|
import type {
|
||||||
ResendWebhookEvent,
|
ResendWebhookEvent,
|
||||||
EmailProcessingResult,
|
EmailProcessingResult,
|
||||||
@@ -51,6 +52,7 @@ export class EmailIngestionService {
|
|||||||
private notificationsRepository: NotificationsRepository;
|
private notificationsRepository: NotificationsRepository;
|
||||||
private templateService: TemplateService;
|
private templateService: TemplateService;
|
||||||
private emailService: EmailService;
|
private emailService: EmailService;
|
||||||
|
private classifier: ReceiptClassifier;
|
||||||
|
|
||||||
constructor(dbPool?: Pool) {
|
constructor(dbPool?: Pool) {
|
||||||
const p = dbPool || pool;
|
const p = dbPool || pool;
|
||||||
@@ -61,6 +63,7 @@ export class EmailIngestionService {
|
|||||||
this.notificationsRepository = new NotificationsRepository(p);
|
this.notificationsRepository = new NotificationsRepository(p);
|
||||||
this.templateService = new TemplateService();
|
this.templateService = new TemplateService();
|
||||||
this.emailService = new EmailService();
|
this.emailService = new EmailService();
|
||||||
|
this.classifier = new ReceiptClassifier();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========================
|
// ========================
|
||||||
@@ -102,28 +105,38 @@ export class EmailIngestionService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Process first valid image attachment through OCR
|
// 5. Classify receipt from email text first
|
||||||
const ocrResult = await this.processAttachmentsWithOcr(userId, validAttachments);
|
const emailClassification = this.classifier.classifyFromText(subject, event.data.text);
|
||||||
|
logger.info('Email text classification result', {
|
||||||
|
emailId,
|
||||||
|
type: emailClassification.type,
|
||||||
|
confidence: emailClassification.confidence,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 6. Process attachments through OCR using classification
|
||||||
|
const ocrResult = await this.processAttachmentsWithClassification(
|
||||||
|
userId, validAttachments, emailClassification, emailId
|
||||||
|
);
|
||||||
if (!ocrResult) {
|
if (!ocrResult) {
|
||||||
await this.handleOcrFailure(emailId, senderEmail, userName, subject, 'No receipt data could be extracted from attachments');
|
await this.handleOcrFailure(emailId, senderEmail, userName, subject, 'No receipt data could be extracted from attachments');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6. Build extracted data from OCR result
|
// 7. Build extracted data from OCR result
|
||||||
const extractedData = this.mapOcrToExtractedData(ocrResult.response);
|
const extractedData = this.mapOcrToExtractedData(ocrResult.response);
|
||||||
const recordType = ocrResult.recordType;
|
const recordType = ocrResult.recordType;
|
||||||
|
|
||||||
// 7. Handle vehicle association
|
// 8. Handle vehicle association
|
||||||
const processingResult = await this.handleVehicleAssociation(
|
const processingResult = await this.handleVehicleAssociation(
|
||||||
userId, userName, senderEmail, recordType, extractedData
|
userId, userName, senderEmail, recordType, extractedData
|
||||||
);
|
);
|
||||||
|
|
||||||
// 8. Mark as completed
|
// 9. Mark as completed
|
||||||
await this.repository.updateQueueStatus(emailId, 'completed', {
|
await this.repository.updateQueueStatus(emailId, 'completed', {
|
||||||
processingResult,
|
processingResult,
|
||||||
});
|
});
|
||||||
|
|
||||||
// 9. Send confirmation email
|
// 10. Send confirmation email
|
||||||
await this.sendConfirmationEmail(senderEmail, userName, processingResult);
|
await this.sendConfirmationEmail(senderEmail, userName, processingResult);
|
||||||
|
|
||||||
logger.info('Email processing completed successfully', {
|
logger.info('Email processing completed successfully', {
|
||||||
@@ -239,51 +252,100 @@ export class EmailIngestionService {
|
|||||||
// ========================
|
// ========================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process attachments through OCR, trying fuel then maintenance receipt extraction.
|
* Process attachments using classifier-driven OCR extraction.
|
||||||
* Returns the first successful result.
|
* If email text classification is confident, calls the specific OCR endpoint.
|
||||||
|
* If not, performs general OCR and classifies from rawText.
|
||||||
|
* Returns null if no usable result or receipt is unclassified.
|
||||||
*/
|
*/
|
||||||
private async processAttachmentsWithOcr(
|
private async processAttachmentsWithClassification(
|
||||||
userId: string,
|
userId: string,
|
||||||
attachments: ParsedEmailAttachment[]
|
attachments: ParsedEmailAttachment[],
|
||||||
|
emailClassification: { type: string; confidence: number },
|
||||||
|
emailId: string
|
||||||
): Promise<{ response: ReceiptExtractionResponse; recordType: EmailRecordType } | null> {
|
): Promise<{ response: ReceiptExtractionResponse; recordType: EmailRecordType } | null> {
|
||||||
// Process only image attachments that the receipt OCR supports
|
|
||||||
const imageAttachments = attachments.filter(att => OCR_RECEIPT_IMAGE_TYPES.has(att.contentType));
|
const imageAttachments = attachments.filter(att => OCR_RECEIPT_IMAGE_TYPES.has(att.contentType));
|
||||||
|
|
||||||
for (const attachment of imageAttachments) {
|
for (const attachment of imageAttachments) {
|
||||||
const result = await this.classifyAndExtract(userId, attachment);
|
// If email text gave a confident classification, call the specific OCR endpoint first
|
||||||
if (result) return result;
|
if (emailClassification.type === 'fuel') {
|
||||||
|
const result = await this.extractFuelReceipt(userId, attachment);
|
||||||
|
if (result?.success) return { response: result, recordType: 'fuel_log' };
|
||||||
|
// Fuel OCR failed, try maintenance as fallback
|
||||||
|
const fallbackResult = await this.extractMaintenanceReceipt(userId, attachment);
|
||||||
|
if (fallbackResult?.success) return { response: fallbackResult, recordType: 'maintenance_record' };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (emailClassification.type === 'maintenance') {
|
||||||
|
const result = await this.extractMaintenanceReceipt(userId, attachment);
|
||||||
|
if (result?.success) return { response: result, recordType: 'maintenance_record' };
|
||||||
|
// Maintenance OCR failed, try fuel as fallback
|
||||||
|
const fallbackResult = await this.extractFuelReceipt(userId, attachment);
|
||||||
|
if (fallbackResult?.success) return { response: fallbackResult, recordType: 'fuel_log' };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Email text was not confident - try both OCR endpoints and classify from rawText
|
||||||
|
const fuelResult = await this.extractFuelReceipt(userId, attachment);
|
||||||
|
const maintenanceResult = await this.extractMaintenanceReceipt(userId, attachment);
|
||||||
|
|
||||||
|
// Use rawText from whichever succeeded for secondary classification
|
||||||
|
const rawText = fuelResult?.rawText || maintenanceResult?.rawText || '';
|
||||||
|
if (rawText) {
|
||||||
|
const ocrClassification = this.classifier.classifyFromOcrRawText(rawText);
|
||||||
|
logger.info('OCR rawText classification result', {
|
||||||
|
emailId,
|
||||||
|
type: ocrClassification.type,
|
||||||
|
confidence: ocrClassification.confidence,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (ocrClassification.type === 'fuel' && fuelResult?.success) {
|
||||||
|
return { response: fuelResult, recordType: 'fuel_log' };
|
||||||
|
}
|
||||||
|
if (ocrClassification.type === 'maintenance' && maintenanceResult?.success) {
|
||||||
|
return { response: maintenanceResult, recordType: 'maintenance_record' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both classifiers failed - fall back to field-count heuristic
|
||||||
|
const fallback = this.selectBestResultByFields(fuelResult, maintenanceResult);
|
||||||
|
if (fallback) return fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try both fuel and maintenance OCR extractors, return the better result
|
* Extract fuel receipt via OCR. Returns null on failure.
|
||||||
*/
|
*/
|
||||||
private async classifyAndExtract(
|
private async extractFuelReceipt(
|
||||||
userId: string,
|
userId: string,
|
||||||
attachment: ParsedEmailAttachment
|
attachment: ParsedEmailAttachment
|
||||||
): Promise<{ response: ReceiptExtractionResponse; recordType: EmailRecordType } | null> {
|
): Promise<ReceiptExtractionResponse | null> {
|
||||||
let fuelResult: ReceiptExtractionResponse | null = null;
|
|
||||||
let maintenanceResult: ReceiptExtractionResponse | null = null;
|
|
||||||
|
|
||||||
// Try fuel receipt extraction
|
|
||||||
try {
|
try {
|
||||||
fuelResult = await ocrService.extractReceipt(userId, {
|
return await ocrService.extractReceipt(userId, {
|
||||||
fileBuffer: attachment.content,
|
fileBuffer: attachment.content,
|
||||||
contentType: attachment.contentType,
|
contentType: attachment.contentType,
|
||||||
receiptType: 'fuel',
|
receiptType: 'fuel',
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.info('Fuel receipt extraction failed, trying maintenance', {
|
logger.info('Fuel receipt extraction failed', {
|
||||||
filename: attachment.filename,
|
filename: attachment.filename,
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
});
|
});
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Try maintenance receipt extraction
|
/**
|
||||||
|
* Extract maintenance receipt via OCR. Returns null on failure.
|
||||||
|
*/
|
||||||
|
private async extractMaintenanceReceipt(
|
||||||
|
userId: string,
|
||||||
|
attachment: ParsedEmailAttachment
|
||||||
|
): Promise<ReceiptExtractionResponse | null> {
|
||||||
try {
|
try {
|
||||||
maintenanceResult = await ocrService.extractMaintenanceReceipt(userId, {
|
return await ocrService.extractMaintenanceReceipt(userId, {
|
||||||
fileBuffer: attachment.content,
|
fileBuffer: attachment.content,
|
||||||
contentType: attachment.contentType,
|
contentType: attachment.contentType,
|
||||||
});
|
});
|
||||||
@@ -292,16 +354,15 @@ export class EmailIngestionService {
|
|||||||
filename: attachment.filename,
|
filename: attachment.filename,
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
});
|
});
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compare results and pick the best one
|
|
||||||
return this.selectBestResult(fuelResult, maintenanceResult);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Select the better OCR result based on extracted field count and success
|
* Last-resort fallback: select the better OCR result based on domain-specific
|
||||||
|
* fields and field count when keyword classifiers could not decide.
|
||||||
*/
|
*/
|
||||||
private selectBestResult(
|
private selectBestResultByFields(
|
||||||
fuelResult: ReceiptExtractionResponse | null,
|
fuelResult: ReceiptExtractionResponse | null,
|
||||||
maintenanceResult: ReceiptExtractionResponse | null
|
maintenanceResult: ReceiptExtractionResponse | null
|
||||||
): { response: ReceiptExtractionResponse; recordType: EmailRecordType } | null {
|
): { response: ReceiptExtractionResponse; recordType: EmailRecordType } | null {
|
||||||
@@ -316,7 +377,6 @@ export class EmailIngestionService {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for fuel-specific fields to improve classification
|
|
||||||
const hasFuelFields = fuelResult?.extractedFields['gallons'] ||
|
const hasFuelFields = fuelResult?.extractedFields['gallons'] ||
|
||||||
fuelResult?.extractedFields['price_per_gallon'] ||
|
fuelResult?.extractedFields['price_per_gallon'] ||
|
||||||
fuelResult?.extractedFields['fuel_type'];
|
fuelResult?.extractedFields['fuel_type'];
|
||||||
@@ -325,7 +385,6 @@ export class EmailIngestionService {
|
|||||||
maintenanceResult?.extractedFields['shop_name'] ||
|
maintenanceResult?.extractedFields['shop_name'] ||
|
||||||
maintenanceResult?.extractedFields['description'];
|
maintenanceResult?.extractedFields['description'];
|
||||||
|
|
||||||
// Prefer the result with domain-specific fields
|
|
||||||
if (hasFuelFields && !hasMaintenanceFields) {
|
if (hasFuelFields && !hasMaintenanceFields) {
|
||||||
return { response: fuelResult!, recordType: 'fuel_log' };
|
return { response: fuelResult!, recordType: 'fuel_log' };
|
||||||
}
|
}
|
||||||
@@ -333,7 +392,6 @@ export class EmailIngestionService {
|
|||||||
return { response: maintenanceResult!, recordType: 'maintenance_record' };
|
return { response: maintenanceResult!, recordType: 'maintenance_record' };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to field count comparison
|
|
||||||
if (fuelFieldCount >= maintenanceFieldCount && fuelResult?.success) {
|
if (fuelFieldCount >= maintenanceFieldCount && fuelResult?.success) {
|
||||||
return { response: fuelResult, recordType: 'fuel_log' };
|
return { response: fuelResult, recordType: 'fuel_log' };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,17 @@ export type PendingAssociationStatus = 'pending' | 'resolved' | 'expired';
|
|||||||
|
|
||||||
export type EmailRecordType = 'fuel_log' | 'maintenance_record';
|
export type EmailRecordType = 'fuel_log' | 'maintenance_record';
|
||||||
|
|
||||||
|
// ========================
|
||||||
|
// Receipt Classification
|
||||||
|
// ========================
|
||||||
|
|
||||||
|
export type ReceiptClassificationType = 'fuel' | 'maintenance' | 'unclassified';
|
||||||
|
|
||||||
|
export interface ClassificationResult {
|
||||||
|
type: ReceiptClassificationType;
|
||||||
|
confidence: number;
|
||||||
|
}
|
||||||
|
|
||||||
// ========================
|
// ========================
|
||||||
// Database Records
|
// Database Records
|
||||||
// ========================
|
// ========================
|
||||||
|
|||||||
@@ -0,0 +1,130 @@
|
|||||||
|
/**
|
||||||
|
* @ai-summary Classifies receipt type from email text or OCR raw text
|
||||||
|
* @ai-context Uses keyword matching to determine fuel vs maintenance receipts
|
||||||
|
* before falling back to OCR-based classification. Returns confidence score.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { logger } from '../../../core/logging/logger';
|
||||||
|
import type { ClassificationResult, ReceiptClassificationType } from './email-ingestion.types';
|
||||||
|
|
||||||
|
/** Fuel-related keywords (case-insensitive matching) */
|
||||||
|
const FUEL_KEYWORDS: string[] = [
|
||||||
|
'gas',
|
||||||
|
'fuel',
|
||||||
|
'gallons',
|
||||||
|
'octane',
|
||||||
|
'pump',
|
||||||
|
'diesel',
|
||||||
|
'unleaded',
|
||||||
|
'shell',
|
||||||
|
'chevron',
|
||||||
|
'exxon',
|
||||||
|
'bp',
|
||||||
|
];
|
||||||
|
|
||||||
|
/** Maintenance-related keywords (case-insensitive matching). Multi-word entries matched as phrases. */
|
||||||
|
const MAINTENANCE_KEYWORDS: string[] = [
|
||||||
|
'oil change',
|
||||||
|
'brake',
|
||||||
|
'alignment',
|
||||||
|
'tire',
|
||||||
|
'rotation',
|
||||||
|
'inspection',
|
||||||
|
'labor',
|
||||||
|
'parts',
|
||||||
|
'service',
|
||||||
|
'repair',
|
||||||
|
'transmission',
|
||||||
|
'coolant',
|
||||||
|
];
|
||||||
|
|
||||||
|
/** Minimum keyword matches required for a confident classification */
|
||||||
|
const CONFIDENCE_THRESHOLD = 2;
|
||||||
|
|
||||||
|
export class ReceiptClassifier {
|
||||||
|
/**
|
||||||
|
* Classify receipt type from email subject and body text.
|
||||||
|
* Returns a confident result if >= 2 keyword matches for one type.
|
||||||
|
*/
|
||||||
|
classifyFromText(subject: string | null, body: string | null): ClassificationResult {
|
||||||
|
const text = [subject || '', body || ''].join(' ');
|
||||||
|
return this.classifyText(text, 'email');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classify receipt type from OCR raw text output.
|
||||||
|
* Uses same keyword matching as email text classification.
|
||||||
|
*/
|
||||||
|
classifyFromOcrRawText(rawText: string): ClassificationResult {
|
||||||
|
return this.classifyText(rawText, 'ocr');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Core keyword matching logic shared by email and OCR classification.
|
||||||
|
*/
|
||||||
|
private classifyText(text: string, source: 'email' | 'ocr'): ClassificationResult {
|
||||||
|
const normalizedText = text.toLowerCase();
|
||||||
|
|
||||||
|
const fuelMatches = this.countKeywordMatches(normalizedText, FUEL_KEYWORDS);
|
||||||
|
const maintenanceMatches = this.countKeywordMatches(normalizedText, MAINTENANCE_KEYWORDS);
|
||||||
|
|
||||||
|
logger.info('Receipt classification keyword analysis', {
|
||||||
|
source,
|
||||||
|
fuelMatches,
|
||||||
|
maintenanceMatches,
|
||||||
|
textLength: text.length,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Both below threshold - unclassified
|
||||||
|
if (fuelMatches < CONFIDENCE_THRESHOLD && maintenanceMatches < CONFIDENCE_THRESHOLD) {
|
||||||
|
return { type: 'unclassified', confidence: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear winner with threshold met
|
||||||
|
if (fuelMatches >= CONFIDENCE_THRESHOLD && fuelMatches > maintenanceMatches) {
|
||||||
|
return {
|
||||||
|
type: 'fuel',
|
||||||
|
confidence: Math.min(fuelMatches / (fuelMatches + maintenanceMatches), 1),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maintenanceMatches >= CONFIDENCE_THRESHOLD && maintenanceMatches > fuelMatches) {
|
||||||
|
return {
|
||||||
|
type: 'maintenance',
|
||||||
|
confidence: Math.min(maintenanceMatches / (fuelMatches + maintenanceMatches), 1),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tie with both meeting threshold - unclassified (ambiguous)
|
||||||
|
if (fuelMatches >= CONFIDENCE_THRESHOLD && maintenanceMatches >= CONFIDENCE_THRESHOLD) {
|
||||||
|
return { type: 'unclassified', confidence: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { type: 'unclassified', confidence: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count how many keywords from the list appear in the text.
|
||||||
|
* Multi-word keywords are matched as phrases.
|
||||||
|
*/
|
||||||
|
private countKeywordMatches(normalizedText: string, keywords: string[]): number {
|
||||||
|
let matches = 0;
|
||||||
|
for (const keyword of keywords) {
|
||||||
|
if (normalizedText.includes(keyword)) {
|
||||||
|
matches++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map classifier type to the EmailRecordType used in the processing pipeline.
|
||||||
|
*/
|
||||||
|
static toRecordType(classificationType: ReceiptClassificationType): 'fuel_log' | 'maintenance_record' | null {
|
||||||
|
switch (classificationType) {
|
||||||
|
case 'fuel': return 'fuel_log';
|
||||||
|
case 'maintenance': return 'maintenance_record';
|
||||||
|
case 'unclassified': return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,14 +6,17 @@
|
|||||||
export { emailIngestionWebhookRoutes } from './api/email-ingestion.routes';
|
export { emailIngestionWebhookRoutes } from './api/email-ingestion.routes';
|
||||||
export { EmailIngestionService } from './domain/email-ingestion.service';
|
export { EmailIngestionService } from './domain/email-ingestion.service';
|
||||||
export { EmailIngestionRepository } from './data/email-ingestion.repository';
|
export { EmailIngestionRepository } from './data/email-ingestion.repository';
|
||||||
|
export { ReceiptClassifier } from './domain/receipt-classifier';
|
||||||
export { ResendInboundClient } from './external/resend-inbound.client';
|
export { ResendInboundClient } from './external/resend-inbound.client';
|
||||||
export type { ParsedEmailResult, ParsedEmailAttachment } from './external/resend-inbound.client';
|
export type { ParsedEmailResult, ParsedEmailAttachment } from './external/resend-inbound.client';
|
||||||
export type {
|
export type {
|
||||||
|
ClassificationResult,
|
||||||
EmailIngestionQueueRecord,
|
EmailIngestionQueueRecord,
|
||||||
EmailIngestionStatus,
|
EmailIngestionStatus,
|
||||||
EmailProcessingResult,
|
EmailProcessingResult,
|
||||||
ExtractedReceiptData,
|
ExtractedReceiptData,
|
||||||
PendingVehicleAssociation,
|
PendingVehicleAssociation,
|
||||||
|
ReceiptClassificationType,
|
||||||
ResendWebhookEvent,
|
ResendWebhookEvent,
|
||||||
ResendWebhookEventData,
|
ResendWebhookEventData,
|
||||||
} from './domain/email-ingestion.types';
|
} from './domain/email-ingestion.types';
|
||||||
|
|||||||
Reference in New Issue
Block a user