feat: rewrite ManualExtractor progress to spec-aligned 10/50/95/100 pattern (refs #143)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-11 14:40:11 -06:00
parent f9a650a4d7
commit 209425a908
3 changed files with 9 additions and 12 deletions

View File

@@ -82,11 +82,12 @@ class ManualExtractor:
logger.info(f"Progress {percent}%: {message}")
try:
update_progress(5, "Sending PDF to Gemini for analysis")
update_progress(10, "Preparing extraction")
update_progress(50, "Processing with Gemini")
gemini_result = self._engine.extract_maintenance(pdf_bytes)
update_progress(50, "Mapping service names to maintenance subtypes")
update_progress(95, "Mapping results")
schedules: list[ExtractedSchedule] = []
for item in gemini_result.items:
@@ -112,8 +113,6 @@ class ManualExtractor:
)
)
update_progress(90, "Finalizing results")
processing_time_ms = int((time.time() - start_time) * 1000)
logger.info(

View File

@@ -280,11 +280,9 @@ async def extract_manual(
the time required for large documents.
Pipeline:
1. Analyze PDF structure (text layer vs scanned)
2. Find maintenance schedule sections
3. Extract text or perform OCR on scanned pages
4. Detect and parse maintenance tables
5. Extract service intervals and fluid specifications
1. Send entire PDF to Gemini for semantic extraction
2. Map extracted service names to system maintenance subtypes
3. Return structured results with confidence scores
- **file**: Owner's manual PDF (max 200MB)
- **vehicle_id**: Optional vehicle ID for context

View File

@@ -108,11 +108,11 @@ class TestNormalExtraction:
extractor.extract(_make_pdf_bytes(), progress_callback=track_progress)
# Should have progress calls at 5, 50, 90, 100
# Should have progress calls at 10, 50, 95, 100
percents = [p for p, _ in progress_calls]
assert 5 in percents
assert 10 in percents
assert 50 in percents
assert 90 in percents
assert 95 in percents
assert 100 in percents
# Percents should be non-decreasing
assert percents == sorted(percents)