longcipher
diff --git a/‎docs/design.md‎
Lines changed: 4 additions & 1 deletion b/‎docs/design.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 5 deletions b/‎pyproject.toml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎skills/pb-build/SKILL.md‎
Lines changed: 2 additions & 0 deletions b/‎skills/pb-build/SKILL.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎skills/pb-build/references/evaluator_prompt.md‎
Lines changed: 7 additions & 0 deletions b/‎skills/pb-build/references/evaluator_prompt.md‎
Lines changed: 7 additions & 0 deletions
@@ -181,6 +181,9 @@ The validator enforces the pb-spec markdown contract:
 - Required fields: Context, Verification, Scenario Coverage, Loop Type, Behavioral Contract, Simplification Focus, Status, BDD Verification, Advanced Test Verification, Runtime Verification
 - Valid statuses: 🔴 TODO, 🟡 IN PROGRESS, 🟢 DONE, ⏭️ SKIPPED, 🔄 DCR, ⛔ OBSOLETE
 - At least one checkbox step per task
+- Duplicate task IDs are rejected
+- `N/A` verification placeholders must include a reason
+- Markdown-carried `🛑 Build Blocked` and `🔄 Design Change Request` packets are checked for required sections when present
 
 **features/ Contract:**
 
@@ -252,7 +255,7 @@ All external operations are protected with appropriate timeouts:
 
 ## 10. Known Constraints and Future Enhancements
 
-1. **Markdown parsing**: Current implementation uses regex matching; future versions may adopt AST-based parsing for better accuracy
+1. **Markdown parsing**: Current implementation uses a contract-specific line parser with regex token boundaries; future versions may adopt AST-based parsing if lossless editing becomes a requirement
 2. **Language support**: Additional programming languages can be added by extending scanner patterns
 3. **Performance**: For very large codebases, consider implementing parallel scanning or incremental validation
 4. **Integration**: Future versions may integrate with CI/CD pipelines for automated quality gates
 
@@ -4,7 +4,7 @@ build-backend = "uv_build"
 
 [project]
 name = "pb-spec"
-version = "1.0.2"
+version = "1.0.3"
 description = "Plan-Build Spec (pb-spec): A CLI tool for managing AI coding assistant skills"
 readme = "README.md"
 license = "Apache-2.0"
@@ -23,7 +23,7 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 dependencies = [
-    "click>=8.3.1",
+    "click>=8.3.3",
 ]
 
 [project.urls]
@@ -40,9 +40,9 @@ testpaths = ["tests"]
 [dependency-groups]
 dev = [
     "behave>=1.3.3",
-    "pytest>=9.0.2",
-    "ruff>=0.15.8",
-    "ty>=0.0.26",
+    "pytest>=9.0.3",
+    "ruff>=0.15.12",
+    "ty>=0.0.32",
 ]
 
 [tool.ruff]
 
@@ -192,6 +192,7 @@ After the Generator signals `READY_FOR_EVAL`, the orchestrator must perform an *
    - The task description from `tasks.md`
    - The relevant `.feature` file scenarios
    - The `design.md` architecture decisions for this task
+   - If the agent runtime exposes a context-forking option, keep it disabled for the Evaluator. Never reuse the Generator agent/session ID, and never pass the Generator transcript.
 
 2. **Spawn Evaluator Persona.** Use the `references/evaluator_prompt.md` template, filled with:
    - The full task description
@@ -211,6 +212,7 @@ After the Generator signals `READY_FOR_EVAL`, the orchestrator must perform an *
    - **Frontend tasks:** Use browser automation to navigate, screenshot, and interact with the running app.
    - **Backend tasks:** Use HTTP tools to hit real API endpoints, verify status codes and response bodies.
    - If tools are unavailable, fall back to CLI-based verification (curl, wget) and document the limitation.
+   - Clean up every live verification resource you start: close browser pages/contexts, terminate local dev servers, and release MCP/tool sessions even when verification fails.
 
    **Check C — Edge Case Probing:**
    - Test at least 2 boundary/edge cases not explicitly in the scenario
 
@@ -116,6 +116,13 @@ You MUST verify the implementation works at runtime. Do NOT rely on test logs al
 - Check that configuration files are syntactically valid
 - Verify no regressions in existing tooling
 
+**Resource cleanup requirement:**
+
+- Close any browser page/context/session you open for verification.
+- Stop any local server, subprocess, tunnel, or watcher you start for verification.
+- Release MCP/tool sessions even when a verification step fails.
+- If cleanup cannot be verified, report that limitation in the verdict.
+
 **Output:** Document every verification step taken and its result. Include MCP tool commands and responses.
 
 ### Check C — Edge Case Probing