feat: rewrite ManualExtractor progress to spec-aligned 10/50/95/100 pattern (refs #143)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -82,11 +82,12 @@ class ManualExtractor:
|
|||||||
logger.info(f"Progress {percent}%: {message}")
|
logger.info(f"Progress {percent}%: {message}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
update_progress(5, "Sending PDF to Gemini for analysis")
|
update_progress(10, "Preparing extraction")
|
||||||
|
|
||||||
|
update_progress(50, "Processing with Gemini")
|
||||||
gemini_result = self._engine.extract_maintenance(pdf_bytes)
|
gemini_result = self._engine.extract_maintenance(pdf_bytes)
|
||||||
|
|
||||||
update_progress(50, "Mapping service names to maintenance subtypes")
|
update_progress(95, "Mapping results")
|
||||||
|
|
||||||
schedules: list[ExtractedSchedule] = []
|
schedules: list[ExtractedSchedule] = []
|
||||||
for item in gemini_result.items:
|
for item in gemini_result.items:
|
||||||
@@ -112,8 +113,6 @@ class ManualExtractor:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
update_progress(90, "Finalizing results")
|
|
||||||
|
|
||||||
processing_time_ms = int((time.time() - start_time) * 1000)
|
processing_time_ms = int((time.time() - start_time) * 1000)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -280,11 +280,9 @@ async def extract_manual(
|
|||||||
the time required for large documents.
|
the time required for large documents.
|
||||||
|
|
||||||
Pipeline:
|
Pipeline:
|
||||||
1. Analyze PDF structure (text layer vs scanned)
|
1. Send entire PDF to Gemini for semantic extraction
|
||||||
2. Find maintenance schedule sections
|
2. Map extracted service names to system maintenance subtypes
|
||||||
3. Extract text or perform OCR on scanned pages
|
3. Return structured results with confidence scores
|
||||||
4. Detect and parse maintenance tables
|
|
||||||
5. Extract service intervals and fluid specifications
|
|
||||||
|
|
||||||
- **file**: Owner's manual PDF (max 200MB)
|
- **file**: Owner's manual PDF (max 200MB)
|
||||||
- **vehicle_id**: Optional vehicle ID for context
|
- **vehicle_id**: Optional vehicle ID for context
|
||||||
|
|||||||
@@ -108,11 +108,11 @@ class TestNormalExtraction:
|
|||||||
|
|
||||||
extractor.extract(_make_pdf_bytes(), progress_callback=track_progress)
|
extractor.extract(_make_pdf_bytes(), progress_callback=track_progress)
|
||||||
|
|
||||||
# Should have progress calls at 5, 50, 90, 100
|
# Should have progress calls at 10, 50, 95, 100
|
||||||
percents = [p for p, _ in progress_calls]
|
percents = [p for p, _ in progress_calls]
|
||||||
assert 5 in percents
|
assert 10 in percents
|
||||||
assert 50 in percents
|
assert 50 in percents
|
||||||
assert 90 in percents
|
assert 95 in percents
|
||||||
assert 100 in percents
|
assert 100 in percents
|
||||||
# Percents should be non-decreasing
|
# Percents should be non-decreasing
|
||||||
assert percents == sorted(percents)
|
assert percents == sorted(percents)
|
||||||
|
|||||||
Reference in New Issue
Block a user