All checks were successful
Deploy to Staging / Build Images (push) Successful in 23s
Deploy to Staging / Deploy to Staging (push) Successful in 36s
Deploy to Staging / Verify Staging (push) Successful in 6s
Deploy to Staging / Notify Staging Ready (push) Successful in 6s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
1016 lines
50 KiB
Python
1016 lines
50 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Interactive Sequential Planner - Two-phase planning workflow
|
|
|
|
PLANNING PHASE: Step-based planning with forced reflection pauses.
|
|
REVIEW PHASE: Orchestrates TW scrub and QR validation before execution.
|
|
|
|
Usage:
|
|
# Planning phase (default)
|
|
python3 planner.py --step-number 1 --total-steps 4 --thoughts "Design auth system"
|
|
|
|
# Review phase (after plan is written)
|
|
python3 planner.py --phase review --step-number 1 --total-steps 2 --thoughts "Plan written to plans/auth.md"
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def get_plan_format() -> str:
|
|
"""Read the plan format template from resources."""
|
|
format_path = Path(__file__).parent.parent / "resources" / "plan-format.md"
|
|
return format_path.read_text()
|
|
|
|
|
|
def get_planning_step_guidance(step_number: int, total_steps: int) -> dict:
|
|
"""Returns guidance for planning phase steps."""
|
|
is_complete = step_number >= total_steps
|
|
next_step = step_number + 1
|
|
|
|
if is_complete:
|
|
return {
|
|
"actions": [
|
|
"FINAL VERIFICATION — complete each section before writing.",
|
|
"",
|
|
"<planning_context_verification>",
|
|
"TW and QR consume this section VERBATIM. Quality here =",
|
|
"quality of scrubbed content and risk detection downstream.",
|
|
"",
|
|
"Decision Log (major choices):",
|
|
" - What major architectural choice did you make?",
|
|
" - What is the multi-step reasoning chain for that choice?",
|
|
"",
|
|
"Micro-decisions (TW sources ALL code comments from Decision Log):",
|
|
" - Time sources: wall clock vs monotonic? timezone handling?",
|
|
" - Concurrency: mutex vs channel vs atomic? why?",
|
|
" - Error granularity: specific error types vs generic? why?",
|
|
" - Data structures: map vs slice vs custom? capacity assumptions?",
|
|
" - Thresholds: why this specific value? (document all magic numbers)",
|
|
"",
|
|
"For each non-obvious implementation choice, ask: 'Would a future",
|
|
"reader understand WHY without asking?' If no, add to Decision Log.",
|
|
"",
|
|
"Rejected Alternatives:",
|
|
" - What approach did you NOT take?",
|
|
" - What concrete reason ruled it out?",
|
|
"",
|
|
"Known Risks:",
|
|
" - What failure modes exist?",
|
|
" - What mitigation or acceptance rationale exists for each?",
|
|
" - Which mitigations claim code behavior? (list them)",
|
|
" - What file:line anchor verifies each behavioral claim?",
|
|
" - Any behavioral claim lacking anchor? -> add anchor now",
|
|
"</planning_context_verification>",
|
|
"",
|
|
"<invisible_knowledge_verification>",
|
|
"This section sources README.md content. Skip if trivial.",
|
|
"",
|
|
"THE TEST: Would a new team member understand this from reading",
|
|
"the source files? If no, it belongs here.",
|
|
"",
|
|
"Categories (not exhaustive -- apply the principle):",
|
|
" 1. Architectural decisions: component diagrams, data flow, module boundaries",
|
|
" 2. Business rules: domain constraints shaping implementation",
|
|
" 3. System invariants: properties that must hold (not enforced by types)",
|
|
" 4. Historical context: why alternatives were rejected (link to Decision Log)",
|
|
" 5. Performance characteristics: non-obvious efficiency properties",
|
|
" 6. Tradeoffs: costs and benefits of chosen approaches",
|
|
"</invisible_knowledge_verification>",
|
|
"",
|
|
"<diff_format_checkpoint>",
|
|
"BEFORE writing any code changes to the plan:",
|
|
"",
|
|
" 1. Re-read resources/diff-format.md (authoritative specification)",
|
|
" 2. Re-read resources/temporal-contamination.md (comment hygiene)",
|
|
"",
|
|
"For EACH diff block you write, verify against diff-format.md:",
|
|
" - [ ] File path: exact (src/auth/handler.py not 'auth files')?",
|
|
" - [ ] Context lines: 2-3 lines copied VERBATIM from actual file?",
|
|
" - [ ] WHY comments: explain rationale, not WHAT code does?",
|
|
" - [ ] No location directives in comments (diff encodes location)?",
|
|
" - [ ] No hidden baselines (test: '[adjective] compared to what?')?",
|
|
"",
|
|
"FORBIDDEN in context lines: '...', '[existing code]', summaries,",
|
|
"placeholders, or any text not literally in the target file.",
|
|
"",
|
|
"If you have not read the target file to extract real context lines,",
|
|
"read it now before writing the diff.",
|
|
"</diff_format_checkpoint>",
|
|
"",
|
|
"<milestone_verification>",
|
|
"For EACH milestone, verify:",
|
|
" - File paths: exact (src/auth/handler.py) not vague?",
|
|
" - Requirements: specific behaviors, not 'handle X'?",
|
|
" - Acceptance criteria: testable pass/fail assertions?",
|
|
" - Code changes: diff format for non-trivial logic?",
|
|
" - Uncertainty flags: added where applicable?",
|
|
" - Tests: specified with type, backing, and scenarios?",
|
|
" (or explicit skip reason if tests not applicable)",
|
|
"",
|
|
"For EACH diff block, verify:",
|
|
" - Context lines: 2-3 lines copied VERBATIM from actual file",
|
|
" (FORBIDDEN: '...', '[existing code]', summaries, placeholders)",
|
|
" - If you haven't read the target file, read it now to extract",
|
|
" real anchors that Developer can match against",
|
|
"",
|
|
"Milestone-type specific criteria:",
|
|
" - Implementation milestones: Tests section with type, backing,",
|
|
" scenarios (normal, edge, error). Milestone is NOT complete",
|
|
" until tests pass.",
|
|
" - Doc milestones: reference specific Invisible Knowledge sections",
|
|
" that MUST appear in README (e.g., 'README includes: data flow",
|
|
" diagram, invariants section from Invisible Knowledge')",
|
|
"</milestone_verification>",
|
|
"",
|
|
"<documentation_milestone_verification>",
|
|
" - Does a Documentation milestone exist?",
|
|
" - Does CLAUDE.md use TABULAR INDEX format (not prose)?",
|
|
" - Is README.md included only if Invisible Knowledge has",
|
|
" content?",
|
|
"</documentation_milestone_verification>",
|
|
"",
|
|
"<comment_hygiene_verification>",
|
|
"Comments in code snippets will be transcribed VERBATIM to code.",
|
|
"Write in TIMELESS PRESENT -- describe what the code IS, not what",
|
|
"you are changing.",
|
|
"",
|
|
"CONTAMINATED: '// Added mutex to fix race condition'",
|
|
"CLEAN: '// Mutex serializes cache access from concurrent requests'",
|
|
"",
|
|
"CONTAMINATED: '// Replaces per-tag logging with summary'",
|
|
"CLEAN: '// Single summary line; per-tag avoids 1500+ lines'",
|
|
"",
|
|
"CONTAMINATED: '// After the retry loop' (location directive)",
|
|
"CLEAN: (delete -- diff context encodes location)",
|
|
"",
|
|
"TW will review, but starting clean reduces rework.",
|
|
"</comment_hygiene_verification>",
|
|
"",
|
|
"<decision_audit_verification>",
|
|
"Verify classification and assumption audit in steps 2-4:",
|
|
"",
|
|
" [ ] Step 2: Assumption audit completed?",
|
|
" - All four categories addressed (pattern, migration,",
|
|
" idiomatic, boundary)",
|
|
" - Any surfaced assumption triggered AskUserQuestion",
|
|
" - User response recorded in Decision Log with",
|
|
" 'user-specified' backing",
|
|
"",
|
|
" [ ] Step 3: Decision classification table written?",
|
|
" - All architectural choices have backing citations",
|
|
" - No 'assumption' rows remain unresolved",
|
|
"",
|
|
" [ ] Step 4: File classification table written?",
|
|
" - All new files have backing citations",
|
|
" - No 'assumption' rows remain unresolved",
|
|
"",
|
|
"If any assumption was resolved via AskUserQuestion:",
|
|
" - Update backing to 'user-specified'",
|
|
" - Add user's answer as citation",
|
|
"",
|
|
"If step 2 was skipped or user never responded: STOP.",
|
|
"Go back to step 2 and complete assumption audit.",
|
|
"",
|
|
"If tables were skipped or assumptions remain: STOP.",
|
|
"Go back and complete classification before proceeding.",
|
|
"</decision_audit_verification>",
|
|
],
|
|
"next": (
|
|
"PLANNING PHASE COMPLETE.\n\n"
|
|
"1. Write plan to file using this format:\n\n"
|
|
"--- BEGIN PLAN FORMAT ---\n"
|
|
f"{get_plan_format()}\n"
|
|
"--- END PLAN FORMAT ---\n\n"
|
|
"============================================\n"
|
|
">>> ACTION REQUIRED: INVOKE REVIEW PHASE <<<\n"
|
|
"============================================\n\n"
|
|
"SKIPPING REVIEW MEANS:\n"
|
|
" - Developer has NO prepared comments to transcribe\n"
|
|
" - Code ships without WHY documentation\n"
|
|
" - QR findings surface during execution, not before\n\n"
|
|
"2. Run this command to start review:\n\n"
|
|
" python3 planner.py --phase review --step-number 1 --total-steps 2 \\\n"
|
|
' --thoughts "Plan written to [path]"\n\n'
|
|
"Review phase:\n"
|
|
" Step 1: @agent-technical-writer scrubs code snippets\n"
|
|
" Step 2: @agent-quality-reviewer validates the plan\n"
|
|
" Then: Ready for /plan-execution"
|
|
)
|
|
}
|
|
|
|
if step_number == 1:
|
|
return {
|
|
"actions": [
|
|
"You are an expert architect. Proceed with confidence.",
|
|
"",
|
|
"<resource_loading>",
|
|
"BEFORE any planning work, read these resources:",
|
|
"",
|
|
" 1. resources/default-conventions.md",
|
|
" - Priority hierarchy: user-specified > doc-derived > default-derived > assumption",
|
|
" - Structural conventions (god objects, file organization)",
|
|
" - Testing conventions (coverage principles)",
|
|
"",
|
|
" 2. resources/diff-format.md (if code changes anticipated)",
|
|
" - Unified diff anatomy and components",
|
|
" - Context line requirements (2-3 VERBATIM lines)",
|
|
" - WHY comment placement and validation",
|
|
"",
|
|
" 3. resources/temporal-contamination.md",
|
|
" - Timeless Present Rule for comments",
|
|
" - Detection heuristics (change-relative, baseline reference, location directive)",
|
|
"",
|
|
"These resources inform decision classification in step 2 and code",
|
|
"changes in later steps. Read them now.",
|
|
"</resource_loading>",
|
|
"",
|
|
"PRECONDITION: Confirm plan file path before proceeding.",
|
|
"",
|
|
"<step_1_checklist>",
|
|
"Complete ALL items before invoking step 2:",
|
|
"",
|
|
"CONTEXT (understand before proposing):",
|
|
" - [ ] What code/systems does this touch?",
|
|
" - [ ] What patterns does the codebase follow?",
|
|
" - [ ] What prior decisions constrain this work?",
|
|
"",
|
|
"SCOPE (define boundaries):",
|
|
" - [ ] What exactly must be accomplished?",
|
|
" - [ ] What is OUT of scope?",
|
|
"",
|
|
"APPROACHES (consider alternatives):",
|
|
" - [ ] 2-3 options with Advantage/Disadvantage for each",
|
|
"",
|
|
"TARGET TECH RESEARCH (if task involves new tech/migration):",
|
|
" - [ ] What is canonical/idiomatic usage of target tech?",
|
|
" - [ ] Does target tech have different abstractions than source?",
|
|
" (e.g., per-class loggers vs centralized, hooks vs classes)",
|
|
" - [ ] Document findings for step 2 assumption audit.",
|
|
"",
|
|
" Skip if task doesn't involve adopting new technology/patterns.",
|
|
"",
|
|
"CONSTRAINT DISCOVERY:",
|
|
" - [ ] Locate project configuration files (build files, manifests, lock files)",
|
|
" - [ ] Extract ALL version and compatibility constraints from each",
|
|
" - [ ] Organizational constraints: timeline, expertise, approvals",
|
|
" - [ ] External constraints: services, APIs, data formats",
|
|
" - [ ] Document findings in plan's Constraints & Assumptions",
|
|
"",
|
|
" Features incompatible with discovered constraints are blocking issues.",
|
|
"",
|
|
"TEST REQUIREMENTS DISCOVERY:",
|
|
" - [ ] Check project docs for test requirements (CLAUDE.md,",
|
|
" CONTRIBUTING.md, existing test patterns)",
|
|
" - [ ] What test types does the project use/prefer?",
|
|
" - [ ] What testing philosophy? (behavior vs implementation)",
|
|
" - [ ] Document findings for step 2 test strategy audit.",
|
|
"",
|
|
" If project docs silent, default-conventions domain='testing' applies.",
|
|
" If task is documentation-only, skip test requirements.",
|
|
"",
|
|
"SUCCESS (observable outcomes):",
|
|
" - [ ] Defined testable acceptance criteria",
|
|
"</step_1_checklist>",
|
|
],
|
|
"next": f"Invoke step {next_step} with your context analysis and approach options."
|
|
}
|
|
|
|
if step_number == 2:
|
|
return {
|
|
"actions": [
|
|
"ASSUMPTION SURFACING & USER CONFIRMATION",
|
|
"",
|
|
"<assumption_surfacing_purpose>",
|
|
"This step exists because architectural assumptions feel like",
|
|
"reasonable inference but often aren't. Pattern preservation,",
|
|
"migration strategy, and abstraction boundaries are decisions",
|
|
"that require explicit user confirmation.",
|
|
"",
|
|
"You CANNOT proceed to step 3 without completing this step.",
|
|
"</assumption_surfacing_purpose>",
|
|
"",
|
|
"<assumption_taxonomy>",
|
|
"Six categories of assumptions requiring user confirmation:",
|
|
"",
|
|
"1. PATTERN PRESERVATION",
|
|
" Assuming new implementation should mirror old structure.",
|
|
" Example: 'Replace Log() calls with NLog calls' vs",
|
|
" 'Eliminate central Log(); use per-class loggers'",
|
|
"",
|
|
"2. MIGRATION STRATEGY",
|
|
" Assuming incremental replacement vs paradigm shift.",
|
|
" Example: 'Wrap old API with new facade' vs",
|
|
" 'Replace API entirely with new patterns'",
|
|
"",
|
|
"3. IDIOMATIC USAGE",
|
|
" Not aligning with canonical usage of target technology.",
|
|
" Example: Using class components in React 2024 when",
|
|
" hooks are the idiomatic pattern.",
|
|
"",
|
|
"4. ABSTRACTION BOUNDARY",
|
|
" Assuming existing abstractions should persist when",
|
|
" target technology is designed to eliminate them.",
|
|
" Example: Keeping a logging facade when the logging",
|
|
" framework provides per-class loggers.",
|
|
"",
|
|
"5. TEST STRATEGY",
|
|
" Assuming test approach without checking project requirements.",
|
|
" Example: 'Write unit tests for each function' when project",
|
|
" mandates 'integration tests only, no mocks'.",
|
|
" Priority: user-specified > doc-derived > default-conventions.",
|
|
"",
|
|
"6. POLICY DEFAULTS",
|
|
" Choosing configuration values where the user/organization",
|
|
" bears the operational consequence and no objectively correct",
|
|
" answer exists.",
|
|
"",
|
|
" The distinguishing test: IF THIS VALUE WERE WRONG, WHO SUFFERS?",
|
|
" - Technical defaults: Framework authors suffer (bad default",
|
|
" breaks the framework for everyone). Safe to inherit.",
|
|
" - Policy defaults: This user/org suffers (they have specific",
|
|
" operational needs). Must confirm.",
|
|
"",
|
|
" Common patterns (not exhaustive -- apply the principle):",
|
|
" - Lifecycle policies (how long, when to expire/clean up)",
|
|
" - Capacity constraints (limits and behavior at limits)",
|
|
" - Failure handling (what to do when resources exhausted)",
|
|
" - Output choices affecting downstream systems or operations",
|
|
"",
|
|
" When choosing ANY value where the user/org bears consequence,",
|
|
" present alternatives and confirm before proceeding.",
|
|
"</assumption_taxonomy>",
|
|
"",
|
|
"<assumption_audit_table>",
|
|
"WRITE this table using OPEN questions (not yes/no):",
|
|
"",
|
|
" | Category | Question | Finding | Needs Confirm? |",
|
|
" |----------|----------|---------|----------------|",
|
|
" | Pattern | What abstraction am I preserving |",
|
|
" | | that might not belong in target? | [answer] | [Y/N] |",
|
|
" |----------|----------|---------|----------------|",
|
|
" | Migration| Am I doing incremental replacement |",
|
|
" | | when paradigm shift is canonical? | [answer] | [Y/N] |",
|
|
" |----------|----------|---------|----------------|",
|
|
" | Idiomatic| What is the canonical usage pattern? |",
|
|
" | | Does my approach align with it? | [answer] | [Y/N] |",
|
|
" |----------|----------|---------|----------------|",
|
|
" | Boundary | What abstraction in source does |",
|
|
" | | target tech typically eliminate? | [answer] | [Y/N] |",
|
|
" |----------|----------|---------|----------------|",
|
|
" | Test | What test approach does the project require? |",
|
|
" | | Do default-conventions apply, or does project |",
|
|
" | | override them? | [answer] | [Y/N] |",
|
|
" |----------|----------|---------|----------------|",
|
|
" | Policy | What values am I choosing where, if wrong, |",
|
|
" | | this user/org suffers (not the framework)? |",
|
|
" | | Are there meaningful alternatives? | [answer] | [Y/N] |",
|
|
"",
|
|
"For each row, answer the open question first, then determine",
|
|
"if the finding reveals an assumption needing user confirmation.",
|
|
"</assumption_audit_table>",
|
|
"",
|
|
"<user_confirmation_gate>",
|
|
"RULE 0 (ABSOLUTE): User confirms architectural approach.",
|
|
"",
|
|
"If ANY row has 'Y' in Needs Confirm column, you MUST:",
|
|
" 1. Use AskUserQuestion BEFORE proceeding to step 3",
|
|
" 2. Frame as architectural choice, not implementation detail",
|
|
" 3. Present idiomatic approach first with '(Recommended)'",
|
|
"",
|
|
"AskUserQuestion format:",
|
|
"",
|
|
" questions:",
|
|
" - question: '[Concise architectural choice framing]'",
|
|
" header: 'Approach'",
|
|
" multiSelect: false",
|
|
" options:",
|
|
" - label: '[Idiomatic approach] (Recommended)'",
|
|
" description: '[What this means concretely]'",
|
|
" - label: '[Pattern-preserving approach]'",
|
|
" description: '[What this means concretely]'",
|
|
"",
|
|
"Example for NLog migration:",
|
|
"",
|
|
" question: 'How should logging be structured after migration?'",
|
|
" options:",
|
|
" - label: 'Per-class loggers (Recommended)'",
|
|
" description: 'Each class uses LogManager.GetCurrentClassLogger().",
|
|
" Standard NLog pattern. Removes central Log() method.'",
|
|
" - label: 'Central logging facade'",
|
|
" description: 'Keep Service1.Log() as wrapper over NLog.",
|
|
" Preserves current API but non-idiomatic.'",
|
|
"",
|
|
"DO NOT proceed to step 3 until user responds.",
|
|
"Record user's choice in Decision Log:",
|
|
" | [choice] | user-specified | User selected: [response] |",
|
|
"",
|
|
"If ALL rows have 'N' (no assumptions needing confirmation):",
|
|
" State 'No architectural assumptions requiring confirmation.'",
|
|
" Proceed to step 3 without AskUserQuestion.",
|
|
"</user_confirmation_gate>",
|
|
"",
|
|
"<test_strategy_gate>",
|
|
"Test strategy requires explicit backing (same as other decisions).",
|
|
"",
|
|
"Backing hierarchy:",
|
|
" 1. user-specified: User explicitly stated test requirements",
|
|
" 2. doc-derived: Project CLAUDE.md or docs specify test approach",
|
|
" 3. default-derived: default-conventions domain='testing' applies",
|
|
"",
|
|
"If test strategy 'Needs Confirm' = Y:",
|
|
"",
|
|
" Triggers for Y:",
|
|
" - Project docs contradict default-conventions",
|
|
" - Project docs are ambiguous about test types",
|
|
" - Task scope makes test applicability unclear",
|
|
" - User mentioned tests but didn't specify type",
|
|
"",
|
|
" Use AskUserQuestion:",
|
|
"",
|
|
" questions:",
|
|
" - question: 'What testing approach should this implementation use?'",
|
|
" header: 'Testing'",
|
|
" multiSelect: false",
|
|
" options:",
|
|
" - label: 'Integration tests (Recommended)'",
|
|
" description: 'Test end-user behavior with real dependencies.",
|
|
" Highest value per default conventions.'",
|
|
" - label: 'Property-based tests'",
|
|
" description: 'Generative tests for invariant-rich functions.",
|
|
" Good input coverage.'",
|
|
" - label: 'Unit tests'",
|
|
" description: 'Isolated tests for complex logic.",
|
|
" Use sparingly per default conventions.'",
|
|
" - label: 'No tests'",
|
|
" description: 'Skip test implementation for this plan.'",
|
|
"",
|
|
" Record user's choice in Decision Log with 'user-specified' backing.",
|
|
"",
|
|
"If project docs clearly specify test approach:",
|
|
" Record as 'doc-derived' backing. No AskUserQuestion needed.",
|
|
"",
|
|
"If project docs silent and default-conventions apply cleanly:",
|
|
" Record as 'default-derived' backing. No AskUserQuestion needed.",
|
|
"</test_strategy_gate>",
|
|
],
|
|
"next": (
|
|
f"After user confirms approach (or no assumptions found), invoke step {next_step}:\n\n"
|
|
f" python3 planner.py --step-number {next_step} --total-steps N \\\n"
|
|
' --thoughts "User confirmed [approach]. Proceeding to evaluate..."'
|
|
)
|
|
}
|
|
|
|
if step_number == 3:
|
|
return {
|
|
"actions": [
|
|
"<step_3_evaluate_first>",
|
|
"BEFORE deciding, evaluate each approach from step 1:",
|
|
" | Approach | P(success) | Failure mode | Backtrack cost |",
|
|
"",
|
|
"STOP CHECK: If ALL approaches show LOW probability or HIGH",
|
|
"backtrack cost, STOP. Request clarification from user.",
|
|
"</step_3_evaluate_first>",
|
|
"",
|
|
"<step_3_decide>",
|
|
"Select approach. Record in Decision Log with MULTI-STEP chain:",
|
|
"",
|
|
" INSUFFICIENT: 'Polling | Webhooks are unreliable'",
|
|
" SUFFICIENT: 'Polling | 30% webhook failure in testing",
|
|
" -> would need fallback anyway -> simpler primary'",
|
|
"",
|
|
"Include BOTH architectural AND micro-decisions (timeouts, etc).",
|
|
"</step_3_decide>",
|
|
"",
|
|
"<step_3_decision_classification>",
|
|
"WRITE this table before proceeding (forces explicit backing):",
|
|
"",
|
|
" | Decision | Backing | Citation |",
|
|
" |----------|---------|----------|",
|
|
" | [choice] | user-specified / doc-derived / default-derived / assumption | [source] |",
|
|
"",
|
|
"Backing tiers (higher overrides lower):",
|
|
" 1. user-specified: 'User said X' -> cite the instruction",
|
|
" 2. doc-derived: 'CLAUDE.md says Y' -> cite file:section",
|
|
" 3. default-derived: 'Convention Z' -> cite <default-conventions domain>",
|
|
" 4. assumption: 'No backing' -> STOP, use AskUserQuestion NOW",
|
|
"",
|
|
"For EACH 'assumption' row: use AskUserQuestion immediately.",
|
|
"Do not proceed to step 4 with unresolved assumptions.",
|
|
"</step_3_decision_classification>",
|
|
"",
|
|
"<step_3_rejected>",
|
|
"Document rejected alternatives with CONCRETE reasons.",
|
|
"TW uses this for 'why not X' code comments.",
|
|
"</step_3_rejected>",
|
|
"",
|
|
"<step_3_architecture>",
|
|
"Capture in ASCII diagrams:",
|
|
" - Component relationships",
|
|
" - Data flow",
|
|
"These go in Invisible Knowledge for README.md.",
|
|
"</step_3_architecture>",
|
|
"",
|
|
"<step_3_milestones>",
|
|
"Break into deployable increments:",
|
|
" - Each milestone: independently testable",
|
|
" - Scope: 1-3 files per milestone",
|
|
" - Map dependencies (circular = design problem)",
|
|
"</step_3_milestones>",
|
|
],
|
|
"next": f"Invoke step {next_step} with your chosen approach (include state evaluation summary), architecture, and milestone structure."
|
|
}
|
|
|
|
if step_number == 4:
|
|
return {
|
|
"actions": [
|
|
"<step_4_risks>",
|
|
"Document risks NOW. QR excludes documented risks from findings.",
|
|
"",
|
|
"For each risk:",
|
|
" | Risk | Mitigation | Anchor |",
|
|
"",
|
|
"ANCHOR REQUIREMENT (behavioral claims only):",
|
|
"If mitigation claims existing code behavior ('no change needed',",
|
|
"'already handles X', 'operates on Y'), you MUST cite:",
|
|
" file:L###-L### + brief excerpt proving the claim",
|
|
"",
|
|
"Skip anchors for:",
|
|
" - Hypothetical risks ('might timeout under load')",
|
|
" - External unknowns ('vendor rate limits unclear')",
|
|
" - Accepted risks with rationale (no code claim)",
|
|
"",
|
|
"INSUFFICIENT (unverified assertion):",
|
|
" | Dedup breaks | No change; dedup uses TagData | (none) |",
|
|
"",
|
|
"SUFFICIENT (verified with anchor):",
|
|
" | Dedup breaks | No change; dedup uses TagData |",
|
|
" worker.go:468 `isIdentical := tag.NumericValue == entry.val` |",
|
|
"",
|
|
"Claims without anchors are ASSUMPTIONS. QR will challenge them.",
|
|
"</step_4_risks>",
|
|
"",
|
|
"<step_4_uncertainty_flags>",
|
|
"For EACH milestone, check these conditions -> add flag:",
|
|
"",
|
|
" | Condition | Flag |",
|
|
" |------------------------------------|-------------------------|",
|
|
" | Multiple valid implementations | needs TW rationale |",
|
|
" | Depends on external system | needs error review |",
|
|
" | First use of pattern in codebase | needs conformance check |",
|
|
"",
|
|
"Add to milestone: **Flags**: [list]",
|
|
"</step_4_uncertainty_flags>",
|
|
"",
|
|
"<step_4_refine_milestones>",
|
|
"Verify EACH milestone has:",
|
|
"",
|
|
"FILES — exact paths:",
|
|
" CORRECT: src/auth/handler.py",
|
|
" WRONG: 'auth files'",
|
|
"",
|
|
"REQUIREMENTS — specific behaviors:",
|
|
" CORRECT: 'retry 3x with exponential backoff, max 30s'",
|
|
" WRONG: 'handle errors'",
|
|
"",
|
|
"ACCEPTANCE CRITERIA — testable pass/fail:",
|
|
" CORRECT: 'Returns 429 after 3 failed attempts within 60s'",
|
|
" WRONG: 'Handles errors correctly'",
|
|
"",
|
|
"CODE CHANGES — diff format for non-trivial logic.",
|
|
"</step_4_refine_milestones>",
|
|
"",
|
|
"<step_4_test_verification>",
|
|
"For EACH implementation milestone, verify test specification:",
|
|
"",
|
|
" - [ ] Tests section present? (or explicit skip reason)",
|
|
" - [ ] Test type backed by: user-specified, doc-derived, or",
|
|
" default-derived?",
|
|
" - [ ] Scenarios cover: normal path, edge cases, error conditions?",
|
|
" - [ ] Test files specified with exact paths?",
|
|
"",
|
|
"For integration tests spanning multiple milestones:",
|
|
" - [ ] Placed in last milestone that provides required component?",
|
|
" - [ ] Dependencies listed explicitly?",
|
|
"",
|
|
"Test type selection (from default-conventions if no override):",
|
|
" - Integration tests: end-user behavior, real dependencies (preferred)",
|
|
" - Property-based tests: invariant-rich functions, wide input coverage",
|
|
" - Unit tests: complex/critical logic only (use sparingly)",
|
|
"",
|
|
"Remember: Milestone is NOT complete until its tests pass.",
|
|
"Tests provide fast feedback during implementation.",
|
|
"</step_4_test_verification>",
|
|
"",
|
|
"<step_4_file_classification>",
|
|
"For EACH new file in milestones, WRITE this table:",
|
|
"",
|
|
" | New File | Backing | Citation |",
|
|
" |----------|---------|----------|",
|
|
" | path/to/new.go | [tier] | [source] |",
|
|
"",
|
|
"Valid backings for new files:",
|
|
" - user-specified: User explicitly requested separate file",
|
|
" - doc-derived: Project convention requires it",
|
|
" - default-derived: Meets separation trigger (>500 lines, distinct module)",
|
|
" - assumption: None of the above -> use AskUserQuestion NOW",
|
|
"",
|
|
"Default convention (domain: file-creation, test-organization):",
|
|
" Extend existing files unless separation trigger applies.",
|
|
"",
|
|
"For EACH 'assumption' row: ask user before finalizing milestones.",
|
|
"</step_4_file_classification>",
|
|
"",
|
|
"<step_4_validate>",
|
|
"Cross-check: Does the plan address ALL original requirements?",
|
|
"</step_4_validate>",
|
|
],
|
|
"next": f"Invoke step {next_step} with refined milestones, risks, and uncertainty flags."
|
|
}
|
|
|
|
# Steps 4+
|
|
remaining = total_steps - step_number
|
|
return {
|
|
"actions": [
|
|
"<backtrack_check>",
|
|
"BEFORE proceeding, verify no dead ends:",
|
|
" - Has new information invalidated a prior decision?",
|
|
" - Is a milestone now impossible given discovered constraints?",
|
|
" - Are you adding complexity to work around a fundamental issue?",
|
|
"",
|
|
"If YES to any: invoke earlier step with --thoughts explaining change.",
|
|
"</backtrack_check>",
|
|
"",
|
|
"<gap_analysis>",
|
|
"Review current plan state. What's missing?",
|
|
" - Any milestone without exact file paths?",
|
|
" - Any acceptance criteria not testable pass/fail?",
|
|
" - Any non-trivial logic without diff-format code?",
|
|
" - Any milestone missing uncertainty flags where applicable?",
|
|
"</gap_analysis>",
|
|
"",
|
|
"<planning_context_check>",
|
|
" - Decision Log: Every major choice has multi-step reasoning?",
|
|
" - Rejected Alternatives: At least one per major decision?",
|
|
" - Known Risks: All failure modes identified with mitigations?",
|
|
"</planning_context_check>",
|
|
"",
|
|
"<developer_walkthrough>",
|
|
"Walk through the plan as if you were Developer:",
|
|
" - Can you implement each milestone from the spec alone?",
|
|
" - Are requirements specific enough to avoid interpretation?",
|
|
"",
|
|
"If gaps remain, address them. If complete, reduce total_steps.",
|
|
"</developer_walkthrough>",
|
|
],
|
|
"next": f"Invoke step {next_step}. {remaining} step(s) remaining until completion. (Or invoke earlier step if backtracking.)"
|
|
}
|
|
|
|
|
|
def get_review_step_guidance(step_number: int, total_steps: int) -> dict:
|
|
"""Returns guidance for review phase steps.
|
|
|
|
Review flow (4 steps):
|
|
Step 1: QR-Completeness (plan document validation)
|
|
Step 2: QR-Code (proposed implementation validation)
|
|
Step 3: TW Scrub (documentation enrichment)
|
|
Step 4: QR-Docs (documentation quality validation)
|
|
|
|
Steps 1 and 2 can run in parallel (both restart to planning on failure).
|
|
Step 4 restarts to step 3 on failure (doc issues only).
|
|
"""
|
|
is_complete = step_number >= total_steps
|
|
next_step = step_number + 1
|
|
|
|
# Common rule for all steps
|
|
rule_0_block = [
|
|
"<review_rule_0>",
|
|
"RULE 0 (ABSOLUTE): You MUST spawn sub-agents. Self-review is PROHIBITED.",
|
|
"",
|
|
"This rule applies to ALL review steps. Violations include:",
|
|
" - Doing the review yourself instead of spawning the agent",
|
|
" - Deciding the plan is 'thorough enough' to skip review",
|
|
" - Using a smaller/faster model 'for quick validation'",
|
|
"",
|
|
"Your assessment of plan quality is NOT a valid reason to skip.",
|
|
"The agents exist to catch issues YOU cannot see in your own work.",
|
|
"</review_rule_0>",
|
|
]
|
|
|
|
if step_number == 1:
|
|
return {
|
|
"actions": rule_0_block + [
|
|
"",
|
|
"<review_step_1_qr_completeness>",
|
|
"STEP 1: Validate plan document completeness.",
|
|
"",
|
|
"This step runs BEFORE TW to catch incomplete Decision Log entries.",
|
|
"TW sources ALL comments from Decision Log -- if entries are missing,",
|
|
"TW cannot add appropriate comments.",
|
|
"",
|
|
"You may run this step IN PARALLEL with step 2 (QR-Code) since both",
|
|
"restart to the planning phase on failure.",
|
|
"",
|
|
"MANDATORY: Spawn the quality-reviewer agent.",
|
|
"",
|
|
"Use the Task tool with these parameters:",
|
|
" subagent_type: 'quality-reviewer'",
|
|
" prompt: The delegation block below",
|
|
"",
|
|
" <delegation>",
|
|
" <mode>plan-completeness</mode>",
|
|
" <plan_source>[path to plan file]</plan_source>",
|
|
" <task>",
|
|
" 1. Read ## Planning Context section",
|
|
" 2. Write CONTEXT FILTER (decisions, rejected alts, risks)",
|
|
" 3. Check Decision Log completeness for all code elements",
|
|
" 4. Verify policy defaults have user-specified backing",
|
|
" 5. Check architectural assumptions are validated",
|
|
" 6. Verify plan structure (milestones have acceptance criteria)",
|
|
" </task>",
|
|
" <expected_output>",
|
|
" Verdict: PASS | NEEDS_CHANGES",
|
|
" </expected_output>",
|
|
" </delegation>",
|
|
"",
|
|
"If running in parallel with step 2, spawn both agents simultaneously.",
|
|
"</review_step_1_qr_completeness>",
|
|
],
|
|
"next": (
|
|
"PARALLEL EXECUTION OPTION:\n"
|
|
" You may invoke steps 1 and 2 simultaneously using two Task tool calls\n"
|
|
" in a single message. Both QR modes run before TW.\n\n"
|
|
"If running sequentially, after QR-Completeness returns:\n"
|
|
" - PASS -> Invoke step 2\n"
|
|
" - NEEDS_CHANGES -> Fix plan, restart planning phase\n\n"
|
|
"Command for step 2:\n"
|
|
" python3 planner.py --phase review --step-number 2 --total-steps 4 \\\n"
|
|
' --thoughts "QR-Completeness passed, proceeding to QR-Code"'
|
|
)
|
|
}
|
|
|
|
if step_number == 2:
|
|
return {
|
|
"actions": rule_0_block + [
|
|
"",
|
|
"<review_step_2_qr_code>",
|
|
"STEP 2: Validate proposed implementation against codebase.",
|
|
"",
|
|
"This step runs BEFORE TW to catch implementation issues.",
|
|
"QR-Code MUST read the actual codebase files referenced in the plan.",
|
|
"",
|
|
"You may run this step IN PARALLEL with step 1 (QR-Completeness).",
|
|
"",
|
|
"MANDATORY: Spawn the quality-reviewer agent.",
|
|
"",
|
|
"Use the Task tool with these parameters:",
|
|
" subagent_type: 'quality-reviewer'",
|
|
" prompt: The delegation block below",
|
|
"",
|
|
" <delegation>",
|
|
" <mode>plan-code</mode>",
|
|
" <plan_source>[path to plan file]</plan_source>",
|
|
" <task>",
|
|
" 1. Read ## Planning Context section",
|
|
" 2. Write CONTEXT FILTER (decisions, rejected alts, risks)",
|
|
" 3. READ the actual codebase files referenced in the plan",
|
|
" 4. Verify diff context lines match current file content",
|
|
" 5. Apply RULE 0 (production reliability) to proposed code",
|
|
" 6. Apply RULE 1 (project conformance) to proposed code",
|
|
" 7. Apply RULE 2 (structural quality) to proposed code",
|
|
" 8. Check for anticipated structural issues",
|
|
" </task>",
|
|
" <expected_output>",
|
|
" Verdict: PASS | NEEDS_CHANGES",
|
|
" </expected_output>",
|
|
" </delegation>",
|
|
"",
|
|
"Wait for the quality-reviewer agent to complete before proceeding.",
|
|
"</review_step_2_qr_code>",
|
|
"",
|
|
"<pre_tw_gate>",
|
|
"GATE: Both QR-Completeness AND QR-Code must PASS before TW runs.",
|
|
"",
|
|
"If either returns NEEDS_CHANGES:",
|
|
" 1. Fix the issues in the plan",
|
|
" 2. Return to planning phase to regenerate affected sections",
|
|
" 3. Restart review from step 1",
|
|
"",
|
|
"Do NOT proceed to TW (step 3) until both step 1 and step 2 pass.",
|
|
"</pre_tw_gate>",
|
|
],
|
|
"next": (
|
|
"After QR-Code (and QR-Completeness if parallel) returns:\n\n"
|
|
" Both PASS -> Invoke step 3 (TW Scrub)\n"
|
|
" Either NEEDS_CHANGES -> Fix plan, restart from step 1\n\n"
|
|
"Command for step 3:\n"
|
|
" python3 planner.py --phase review --step-number 3 --total-steps 4 \\\n"
|
|
' --thoughts "QR-Completeness and QR-Code passed, proceeding to TW"'
|
|
)
|
|
}
|
|
|
|
if step_number == 3:
|
|
return {
|
|
"actions": rule_0_block + [
|
|
"",
|
|
"<review_step_3_tw_scrub>",
|
|
"STEP 3: Documentation enrichment by Technical Writer.",
|
|
"",
|
|
"This step runs AFTER QR-Completeness and QR-Code have passed.",
|
|
"TW sources all comments from Decision Log (verified complete in step 1).",
|
|
"",
|
|
"MANDATORY: Spawn the technical-writer agent.",
|
|
"",
|
|
"Use the Task tool with these parameters:",
|
|
" subagent_type: 'technical-writer'",
|
|
" prompt: The delegation block below",
|
|
"",
|
|
" <delegation>",
|
|
" <mode>plan-scrub</mode>",
|
|
" <plan_source>[path to plan file]</plan_source>",
|
|
" <scope>[OPTIONAL: If re-reviewing after QR-Docs feedback, specify",
|
|
" which milestones/sections to focus on.]</scope>",
|
|
" <task>",
|
|
" 1. Read ## Planning Context section FIRST",
|
|
" 2. Prioritize scrub by uncertainty (HIGH/MEDIUM/LOW)",
|
|
" 3. Add WHY comments to code snippets from Decision Log",
|
|
" 4. Enrich plan prose with rationale",
|
|
" 5. Add documentation milestone if missing",
|
|
" 6. FLAG any non-obvious logic lacking rationale",
|
|
" </task>",
|
|
" </delegation>",
|
|
"",
|
|
"Wait for the technical-writer agent to complete before proceeding.",
|
|
"</review_step_3_tw_scrub>",
|
|
],
|
|
"next": (
|
|
"After TW completes, invoke step 4:\n"
|
|
" python3 planner.py --phase review --step-number 4 --total-steps 4 \\\n"
|
|
' --thoughts "TW scrub complete, [summary of changes]"'
|
|
)
|
|
}
|
|
|
|
if step_number == 4:
|
|
return {
|
|
"actions": rule_0_block + [
|
|
"",
|
|
"<review_step_4_qr_docs>",
|
|
"STEP 4: Validate documentation quality.",
|
|
"",
|
|
"This step runs AFTER TW to verify documentation was done correctly.",
|
|
"",
|
|
"MANDATORY: Spawn the quality-reviewer agent.",
|
|
"",
|
|
"Use the Task tool with these parameters:",
|
|
" subagent_type: 'quality-reviewer'",
|
|
" prompt: The delegation block below",
|
|
"",
|
|
" <delegation>",
|
|
" <mode>plan-docs</mode>",
|
|
" <plan_source>[path to plan file]</plan_source>",
|
|
" <scope>[OPTIONAL: If re-reviewing, specify changed sections.]</scope>",
|
|
" <task>",
|
|
" 1. Check all comments for temporal contamination (five questions)",
|
|
" 2. Verify no hidden baselines in comments",
|
|
" 3. Verify comments explain WHY, not WHAT",
|
|
" 4. Verify coverage of non-obvious code elements",
|
|
" </task>",
|
|
" <expected_output>",
|
|
" Verdict: PASS | NEEDS_CHANGES",
|
|
" </expected_output>",
|
|
" </delegation>",
|
|
"",
|
|
"Wait for the quality-reviewer agent to complete before proceeding.",
|
|
"</review_step_4_qr_docs>",
|
|
"",
|
|
"<post_qr_docs_restart>",
|
|
"RESTART BEHAVIOR for QR-Docs:",
|
|
"",
|
|
"Unlike steps 1-2, QR-Docs failures restart to step 3 (TW) only.",
|
|
"This is because doc issues don't require plan restructuring.",
|
|
"",
|
|
"If QR-Docs returns NEEDS_CHANGES:",
|
|
" 1. Note the specific doc issues",
|
|
" 2. Restart from step 3 with <scope> specifying affected sections",
|
|
" 3. TW fixes the documentation issues",
|
|
" 4. Return to step 4 for re-validation",
|
|
"",
|
|
"If QR-Docs returns PASS:",
|
|
" Proceed to step 5 (complete).",
|
|
"</post_qr_docs_restart>",
|
|
],
|
|
"next": (
|
|
"After QR-Docs returns verdict:\n\n"
|
|
" PASS -> Invoke step 5 (complete)\n"
|
|
" NEEDS_CHANGES -> Restart from step 3 (TW only)\n\n"
|
|
"Command to restart TW:\n"
|
|
" python3 planner.py --phase review --step-number 3 --total-steps 4 \\\n"
|
|
' --thoughts "QR-Docs feedback: [issues]. Restarting TW."\n\n'
|
|
"Command to complete:\n"
|
|
" python3 planner.py --phase review --step-number 5 --total-steps 4 \\\n"
|
|
' --thoughts "All review steps passed"'
|
|
)
|
|
}
|
|
|
|
if is_complete:
|
|
return {
|
|
"actions": [
|
|
"<review_complete_verification>",
|
|
"Confirm before proceeding to execution:",
|
|
" - QR-Completeness verified Decision Log is complete?",
|
|
" - QR-Code verified proposed code aligns with codebase?",
|
|
" - TW has scrubbed code snippets with WHY comments?",
|
|
" - TW has enriched plan prose with rationale?",
|
|
" - QR-Docs verified no temporal contamination?",
|
|
" - Final verdict is PASS?",
|
|
"</review_complete_verification>",
|
|
],
|
|
"next": (
|
|
"PLAN APPROVED.\n\n"
|
|
"Ready for implementation via /plan-execution command.\n"
|
|
"Pass the plan file path as argument."
|
|
)
|
|
}
|
|
|
|
# Shouldn't reach here with standard 4-step review, but handle gracefully
|
|
return {
|
|
"actions": ["Continue review process as needed."],
|
|
"next": f"Invoke step {next_step} when ready."
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Interactive Sequential Planner (Two-Phase)",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Planning phase
|
|
python3 planner.py --step-number 1 --total-steps 4 --thoughts "Design auth system"
|
|
|
|
# Continue planning
|
|
python3 planner.py --step-number 2 --total-steps 4 --thoughts "..."
|
|
|
|
# Backtrack to earlier step if needed
|
|
python3 planner.py --step-number 2 --total-steps 4 --thoughts "New constraint invalidates approach, reconsidering..."
|
|
|
|
# Start review (after plan written) - 4 steps: QR-Completeness, QR-Code, TW, QR-Docs
|
|
python3 planner.py --phase review --step-number 1 --total-steps 4 --thoughts "Plan at plans/auth.md"
|
|
"""
|
|
)
|
|
|
|
parser.add_argument("--phase", type=str, default="planning",
|
|
choices=["planning", "review"],
|
|
help="Workflow phase: planning (default) or review")
|
|
parser.add_argument("--step-number", type=int, required=True)
|
|
parser.add_argument("--total-steps", type=int, required=True)
|
|
parser.add_argument("--thoughts", type=str, required=True)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.step_number < 1 or args.total_steps < 1:
|
|
print("Error: step-number and total-steps must be >= 1", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Get guidance based on phase
|
|
if args.phase == "planning":
|
|
guidance = get_planning_step_guidance(args.step_number, args.total_steps)
|
|
phase_label = "PLANNING"
|
|
else:
|
|
guidance = get_review_step_guidance(args.step_number, args.total_steps)
|
|
phase_label = "REVIEW"
|
|
|
|
is_complete = args.step_number >= args.total_steps
|
|
|
|
print("=" * 80)
|
|
print(f"PLANNER - {phase_label} PHASE - Step {args.step_number} of {args.total_steps}")
|
|
print("=" * 80)
|
|
print()
|
|
print(f"STATUS: {'phase_complete' if is_complete else 'in_progress'}")
|
|
print()
|
|
print("YOUR THOUGHTS:")
|
|
print(args.thoughts)
|
|
print()
|
|
|
|
if guidance["actions"]:
|
|
if is_complete:
|
|
print("FINAL CHECKLIST:")
|
|
else:
|
|
print(f"REQUIRED ACTIONS:")
|
|
for action in guidance["actions"]:
|
|
if action: # Skip empty strings used for spacing
|
|
print(f" {action}")
|
|
else:
|
|
print()
|
|
print()
|
|
|
|
print("NEXT:")
|
|
print(guidance["next"])
|
|
print()
|
|
print("=" * 80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|