feat: rewrite ManualExtractor progress to spec-aligned 10/50/95/100 pattern (refs #143)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-11 14:40:11 -06:00
parent f9a650a4d7
commit 209425a908
3 changed files with 9 additions and 12 deletions

View File

@@ -82,11 +82,12 @@ class ManualExtractor:
logger.info(f"Progress {percent}%: {message}") logger.info(f"Progress {percent}%: {message}")
try: try:
update_progress(5, "Sending PDF to Gemini for analysis") update_progress(10, "Preparing extraction")
update_progress(50, "Processing with Gemini")
gemini_result = self._engine.extract_maintenance(pdf_bytes) gemini_result = self._engine.extract_maintenance(pdf_bytes)
update_progress(50, "Mapping service names to maintenance subtypes") update_progress(95, "Mapping results")
schedules: list[ExtractedSchedule] = [] schedules: list[ExtractedSchedule] = []
for item in gemini_result.items: for item in gemini_result.items:
@@ -112,8 +113,6 @@ class ManualExtractor:
) )
) )
update_progress(90, "Finalizing results")
processing_time_ms = int((time.time() - start_time) * 1000) processing_time_ms = int((time.time() - start_time) * 1000)
logger.info( logger.info(

View File

@@ -280,11 +280,9 @@ async def extract_manual(
the time required for large documents. the time required for large documents.
Pipeline: Pipeline:
1. Analyze PDF structure (text layer vs scanned) 1. Send entire PDF to Gemini for semantic extraction
2. Find maintenance schedule sections 2. Map extracted service names to system maintenance subtypes
3. Extract text or perform OCR on scanned pages 3. Return structured results with confidence scores
4. Detect and parse maintenance tables
5. Extract service intervals and fluid specifications
- **file**: Owner's manual PDF (max 200MB) - **file**: Owner's manual PDF (max 200MB)
- **vehicle_id**: Optional vehicle ID for context - **vehicle_id**: Optional vehicle ID for context

View File

@@ -108,11 +108,11 @@ class TestNormalExtraction:
extractor.extract(_make_pdf_bytes(), progress_callback=track_progress) extractor.extract(_make_pdf_bytes(), progress_callback=track_progress)
# Should have progress calls at 5, 50, 90, 100 # Should have progress calls at 10, 50, 95, 100
percents = [p for p, _ in progress_calls] percents = [p for p, _ in progress_calls]
assert 5 in percents assert 10 in percents
assert 50 in percents assert 50 in percents
assert 90 in percents assert 95 in percents
assert 100 in percents assert 100 in percents
# Percents should be non-decreasing # Percents should be non-decreasing
assert percents == sorted(percents) assert percents == sorted(percents)