feat(llm): extract adapter layer for standalone llm-connect package (S1+S2)

Stage 1 — Decouple: - Move RunConfig + LLMResponse to markitect/llm/models.py (canonical) - Move LLMAdapter + Mock/ErrorLLMAdapter to markitect/llm/adapter.py - markitect/prompts/execution/models.py and llm_adapter.py become re-export shims - All 4 adapters + factory.py updated to import from markitect.llm.* - Parameterize app_name in toml_config.py (resolve_llm, get_default_layers, get_preference_layers): paths and env var now derived from app_name arg - Add tests/test_llm_isolation.py: 7 isolation + backward-compat tests Stage 2 — Extract: - Standalone llm-connect package created at ~/llm-connect/ - All 18 llm files copied; markitect.* imports replaced with llm_connect.* - LLMError base inlined in llm_connect/exceptions.py (no markitect dep) - llm-connect installed into markitect-venv; declared in pyproject.toml Smoke test: markitect llm-check succeeds (live Gemini API call). Backward compat: markitect.prompts.execution.{models,llm_adapter} still work. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
docs(roadmap): add workplans for infospace S3 close-out and JSUI publication
2026-02-27 08:04:50 +01:00 · 2026-02-26 00:44:05 +01:00
15 changed files with 833 additions and 268 deletions
--- a/markitect/llm/init.py
+++ b/markitect/llm/init.py
@@ -12,6 +12,8 @@ Quick start::
    response = adapter.execute_prompt(prompt, run_config)
 """

+from markitect.llm.models import RunConfig, LLMResponse
+from markitect.llm.adapter import LLMAdapter, MockLLMAdapter, ErrorLLMAdapter
 from markitect.llm.factory import create_adapter
 from markitect.llm.openrouter import OpenRouterAdapter
 from markitect.llm.claude_code import ClaudeCodeAdapter
@@ -37,6 +39,11 @@ from markitect.llm.similarity import (
 )

 __all__ = [
+    "RunConfig",
+    "LLMResponse",
+    "LLMAdapter",
+    "MockLLMAdapter",
+    "ErrorLLMAdapter",
    "create_adapter",
    "OpenRouterAdapter",
    "ClaudeCodeAdapter",
--- a/markitect/llm/adapter.py
+++ b/markitect/llm/adapter.py
@@ -0,0 +1,169 @@
+"""
+LLM adapter interface for pluggable model providers.
+
+Implements abstraction layer for LLM integration, supporting
+multiple providers (OpenAI, Anthropic, local models, etc.).
+"""
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+
+from markitect.llm.models import RunConfig, LLMResponse
+
+
+class LLMAdapter(ABC):
+    """
+    Abstract base class for LLM providers.
+
+    Enables pluggable LLM backends without prescribing implementation.
+    Implementations can wrap OpenAI, Anthropic, or other APIs.
+    """
+
+    @abstractmethod
+    def execute_prompt(
+        self,
+        prompt: str,
+        config: RunConfig,
+    ) -> LLMResponse:
+        """
+        Execute a prompt with the LLM.
+
+        Args:
+            prompt: Compiled prompt text
+            config: Execution configuration
+
+        Returns:
+            LLMResponse with generated content
+
+        Raises:
+            Exception: On LLM API errors
+        """
+        pass
+
+    @abstractmethod
+    def validate_config(self, config: RunConfig) -> bool:
+        """
+        Validate that configuration is supported.
+
+        Args:
+            config: Configuration to validate
+
+        Returns:
+            True if valid, False otherwise
+        """
+        pass
+
+
+class MockLLMAdapter(LLMAdapter):
+    """
+    Mock LLM adapter for testing.
+
+    Returns deterministic responses without calling external APIs.
+    """
+
+    def __init__(self, mock_response: str = "Mock LLM response"):
+        """
+        Initialize mock adapter.
+
+        Args:
+            mock_response: Response to return
+        """
+        self.mock_response = mock_response
+        self.call_count = 0
+        self.last_prompt = None
+        self.last_config = None
+
+    def execute_prompt(
+        self,
+        prompt: str,
+        config: RunConfig,
+    ) -> LLMResponse:
+        """
+        Return mock response.
+
+        Args:
+            prompt: Prompt (stored for inspection)
+            config: Config (stored for inspection)
+
+        Returns:
+            Mock LLMResponse
+        """
+        self.call_count += 1
+        self.last_prompt = prompt
+        self.last_config = config
+
+        return LLMResponse(
+            content=self.mock_response,
+            model=config.model_name,
+            usage={
+                "prompt_tokens": len(prompt.split()),
+                "completion_tokens": len(self.mock_response.split()),
+                "total_tokens": len(prompt.split()) + len(self.mock_response.split()),
+            },
+            finish_reason="stop",
+            metadata={"mock": True},
+        )
+
+    def validate_config(self, config: RunConfig) -> bool:
+        """
+        Mock validation always succeeds.
+
+        Args:
+            config: Configuration
+
+        Returns:
+            Always True
+        """
+        return True
+
+    def reset(self) -> None:
+        """Reset mock state."""
+        self.call_count = 0
+        self.last_prompt = None
+        self.last_config = None
+
+
+class ErrorLLMAdapter(LLMAdapter):
+    """
+    Mock adapter that always raises an error.
+
+    Useful for testing error handling.
+    """
+
+    def __init__(self, error_message: str = "Mock LLM error"):
+        """
+        Initialize error adapter.
+
+        Args:
+            error_message: Error message to raise
+        """
+        self.error_message = error_message
+
+    def execute_prompt(
+        self,
+        prompt: str,
+        config: RunConfig,
+    ) -> LLMResponse:
+        """
+        Raise error.
+
+        Args:
+            prompt: Prompt
+            config: Config
+
+        Raises:
+            RuntimeError: Always
+        """
+        raise RuntimeError(self.error_message)
+
+    def validate_config(self, config: RunConfig) -> bool:
+        """
+        Validation succeeds.
+
+        Args:
+            config: Configuration
+
+        Returns:
+            True
+        """
+        return True
--- a/markitect/llm/claude_code.py
+++ b/markitect/llm/claude_code.py
@@ -5,8 +5,8 @@ Claude Code CLI adapter — runs the ``claude`` CLI as a subprocess.
 import subprocess
 from typing import Optional

-from markitect.prompts.execution.llm_adapter import LLMAdapter
-from markitect.prompts.execution.models import RunConfig, LLMResponse
+from markitect.llm.adapter import LLMAdapter
+from markitect.llm.models import RunConfig, LLMResponse
 from markitect.llm.config import LLMConfig
 from markitect.llm._token_estimator import estimate_tokens
 from markitect.llm.exceptions import (
--- a/markitect/llm/factory.py
+++ b/markitect/llm/factory.py
@@ -4,7 +4,7 @@ Factory for creating LLM adapters by provider name.

 from typing import Optional, Dict, Any

-from markitect.prompts.execution.llm_adapter import LLMAdapter
+from markitect.llm.adapter import LLMAdapter
 from markitect.llm.exceptions import LLMConfigurationError

 # Lazy imports to avoid pulling in every adapter at module load time.
--- a/markitect/llm/gemini.py
+++ b/markitect/llm/gemini.py
@@ -5,8 +5,8 @@ Google Gemini adapter — calls the Generative Language REST API directly.
 import time
 from typing import Optional, Dict, Any

-from markitect.prompts.execution.llm_adapter import LLMAdapter
-from markitect.prompts.execution.models import RunConfig, LLMResponse
+from markitect.llm.adapter import LLMAdapter
+from markitect.llm.models import RunConfig, LLMResponse
 from markitect.llm.config import resolve_api_key, find_project_root
 from markitect.llm._http import post_json
 from markitect.llm.exceptions import LLMConfigurationError
--- a/markitect/llm/models.py
+++ b/markitect/llm/models.py
@@ -0,0 +1,86 @@
+"""
+Shared data models for LLM execution.
+
+These classes are the canonical definitions; they are re-exported by
+markitect.prompts.execution.models for backward compatibility.
+"""
+
+from dataclasses import dataclass, field
+from typing import Dict, Any
+
+
+@dataclass
+class RunConfig:
+    """
+    Configuration for prompt execution.
+
+    Attributes:
+        model_name: LLM model to use
+        temperature: Model temperature (0.0-1.0)
+        max_tokens: Maximum tokens to generate
+        model_params: Additional model parameters
+        max_depth: Maximum generation depth for nested runs
+        skip_if_exists: Skip if identical InputBundleHash exists
+        timeout_seconds: Execution timeout
+    """
+    model_name: str = "gpt-4"
+    temperature: float = 0.7
+    max_tokens: int = 2000
+    model_params: Dict[str, Any] = field(default_factory=dict)
+    max_depth: int = 3
+    skip_if_exists: bool = True
+    timeout_seconds: int = 300
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "model_name": self.model_name,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "model_params": self.model_params,
+            "max_depth": self.max_depth,
+            "skip_if_exists": self.skip_if_exists,
+            "timeout_seconds": self.timeout_seconds,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "RunConfig":
+        """Create from dictionary."""
+        return cls(
+            model_name=data.get("model_name", "gpt-4"),
+            temperature=data.get("temperature", 0.7),
+            max_tokens=data.get("max_tokens", 2000),
+            model_params=data.get("model_params", {}),
+            max_depth=data.get("max_depth", 3),
+            skip_if_exists=data.get("skip_if_exists", True),
+            timeout_seconds=data.get("timeout_seconds", 300),
+        )
+
+
+@dataclass
+class LLMResponse:
+    """
+    Response from LLM execution.
+
+    Attributes:
+        content: Generated content
+        model: Model used
+        usage: Token usage statistics
+        finish_reason: Why generation stopped
+        metadata: Additional response metadata
+    """
+    content: str
+    model: str
+    usage: Dict[str, int] = field(default_factory=dict)
+    finish_reason: str = "stop"
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "content": self.content,
+            "model": self.model,
+            "usage": self.usage,
+            "finish_reason": self.finish_reason,
+            "metadata": self.metadata,
+        }
--- a/markitect/llm/openai.py
+++ b/markitect/llm/openai.py
@@ -5,8 +5,8 @@ OpenAI (ChatGPT) adapter — calls the OpenAI chat completions API.
 import time
 from typing import Optional, Dict, Any

-from markitect.prompts.execution.llm_adapter import LLMAdapter
-from markitect.prompts.execution.models import RunConfig, LLMResponse
+from markitect.llm.adapter import LLMAdapter
+from markitect.llm.models import RunConfig, LLMResponse
 from markitect.llm.config import resolve_api_key, find_project_root
 from markitect.llm._http import post_json
 from markitect.llm.exceptions import (
--- a/markitect/llm/openrouter.py
+++ b/markitect/llm/openrouter.py
@@ -5,8 +5,8 @@ OpenRouter adapter — calls the OpenAI-compatible chat completions API.
 import time
 from typing import Optional, Dict, Any

-from markitect.prompts.execution.llm_adapter import LLMAdapter
-from markitect.prompts.execution.models import RunConfig, LLMResponse
+from markitect.llm.adapter import LLMAdapter
+from markitect.llm.models import RunConfig, LLMResponse
 from markitect.llm.config import LLMConfig, resolve_api_key, find_project_root
 from markitect.llm._http import post_json
 from markitect.llm.exceptions import (
--- a/markitect/llm/toml_config.py
+++ b/markitect/llm/toml_config.py
@@ -28,13 +28,28 @@ from markitect.llm.config import find_project_root

 HARDCODED_PROVIDER = "gemini"
 HARDCODED_MODEL = "gemini-2.5-flash"
-MODEL_ENV_VAR = "MARKITECT_HELPER_MODEL"

+# Default (markitect) values kept for backward compatibility.
+MODEL_ENV_VAR = "MARKITECT_HELPER_MODEL"
 USER_CONFIG_DIR = Path.home() / ".config" / "markitect"
 USER_CONFIG_PATH = USER_CONFIG_DIR / "config.toml"
 DIR_CONFIG_NAME = ".markitect.toml"


+# ── App-name helpers ───────────────────────────────────────────────────────
+
+def _model_env_var(app_name: str) -> str:
+    return f"{app_name.upper()}_HELPER_MODEL"
+
+
+def _user_config_path(app_name: str) -> Path:
+    return Path.home() / ".config" / app_name / "config.toml"
+
+
+def _dir_config_name(app_name: str) -> str:
+    return f".{app_name}.toml"
+
+
 # ── Data classes ──────────────────────────────────────────────────────────

@dataclass
@@ -114,11 +129,11 @@ def _clear_llm_section(path: Path, section: str) -> bool:

 # ── Directory config path helper ─────────────────────────────────────────

-def _dir_config_path() -> Optional[Path]:
+def _dir_config_path(app_name: str = "markitect") -> Optional[Path]:
    root = find_project_root()
    if root is None:
        return None
-    return root / DIR_CONFIG_NAME
+    return root / _dir_config_name(app_name)


 # ── Resolution ───────────────────────────────────────────────────────────
@@ -126,13 +141,23 @@ def _dir_config_path() -> Optional[Path]:
 def resolve_llm(
    cli_provider: Optional[str] = None,
    cli_model: Optional[str] = None,
+    app_name: str = "markitect",
 ) -> ResolvedLLM:
    """Walk the 7-level priority chain and return a fully resolved config.

    Provider and model are resolved independently — each takes the value
    from its highest-priority source.
+
+    Args:
+        cli_provider: Provider override from CLI.
+        cli_model: Model override from CLI.
+        app_name: Application name used to derive config paths and the
+            env-var prefix (e.g. ``"railiance"`` → ``RAILIANCE_HELPER_MODEL``
+            and ``~/.config/railiance/config.toml``).
    """
-    dir_path = _dir_config_path()
+    dir_path = _dir_config_path(app_name)
+    user_cfg = _user_config_path(app_name)
+    env_var = _model_env_var(app_name)

    # Build the layers (highest priority first).
    layers: list[tuple[str, LLMLayer]] = []
@@ -141,13 +166,13 @@ def resolve_llm(
    layers.append(("CLI flag", LLMLayer(provider=cli_provider, model=cli_model)))

    # 2. Env var (model only)
-    env_model = os.environ.get(MODEL_ENV_VAR) or None
-    layers.append(("env MARKITECT_HELPER_MODEL", LLMLayer(model=env_model)))
+    env_model = os.environ.get(env_var) or None
+    layers.append((f"env {env_var}", LLMLayer(model=env_model)))

    # 3. User preference
    layers.append((
        "user preference",
-        _read_llm_section(USER_CONFIG_PATH, "preference"),
+        _read_llm_section(user_cfg, "preference"),
    ))

    # 4. Directory preference
@@ -167,7 +192,7 @@ def resolve_llm(
    # 6. User default
    layers.append((
        "user default",
-        _read_llm_section(USER_CONFIG_PATH, "default"),
+        _read_llm_section(user_cfg, "default"),
    ))

    # 7. Hardcoded
@@ -199,20 +224,22 @@ def resolve_llm(
    )


-def get_default_layers() -> list[tuple[str, LLMLayer]]:
+def get_default_layers(app_name: str = "markitect") -> list[tuple[str, LLMLayer]]:
    """Return only the default layers for display."""
-    dir_path = _dir_config_path()
+    dir_path = _dir_config_path(app_name)
+    user_cfg = _user_config_path(app_name)
+    dir_cfg_name = _dir_config_name(app_name)
    layers: list[tuple[str, LLMLayer]] = []

    if dir_path:
        layers.append((
-            f"Directory default ({DIR_CONFIG_NAME})",
+            f"Directory default ({dir_cfg_name})",
            _read_llm_section(dir_path, "default"),
        ))

    layers.append((
-        f"User default ({USER_CONFIG_PATH})",
-        _read_llm_section(USER_CONFIG_PATH, "default"),
+        f"User default ({user_cfg})",
+        _read_llm_section(user_cfg, "default"),
    ))

    layers.append((
@@ -223,19 +250,21 @@ def get_default_layers() -> list[tuple[str, LLMLayer]]:
    return layers


-def get_preference_layers() -> list[tuple[str, LLMLayer]]:
+def get_preference_layers(app_name: str = "markitect") -> list[tuple[str, LLMLayer]]:
    """Return only the preference layers for display."""
-    dir_path = _dir_config_path()
+    dir_path = _dir_config_path(app_name)
+    user_cfg = _user_config_path(app_name)
+    dir_cfg_name = _dir_config_name(app_name)
    layers: list[tuple[str, LLMLayer]] = []

    layers.append((
-        f"User preference ({USER_CONFIG_PATH})",
-        _read_llm_section(USER_CONFIG_PATH, "preference"),
+        f"User preference ({user_cfg})",
+        _read_llm_section(user_cfg, "preference"),
    ))

    if dir_path:
        layers.append((
-            f"Directory preference ({DIR_CONFIG_NAME})",
+            f"Directory preference ({dir_cfg_name})",
            _read_llm_section(dir_path, "preference"),
        ))

--- a/markitect/prompts/execution/llm_adapter.py
+++ b/markitect/prompts/execution/llm_adapter.py
@@ -1,169 +1,9 @@
 """
-LLM adapter interface for pluggable model providers.
+Re-exports from markitect.llm.adapter for backward compatibility.

-Implements abstraction layer for LLM integration, supporting
-multiple providers (OpenAI, Anthropic, local models, etc.).
+The LLM adapter interface was moved to markitect.llm.adapter in v1.1.
 """

-from abc import ABC, abstractmethod
-from typing import Dict, Any
+from markitect.llm.adapter import LLMAdapter, MockLLMAdapter, ErrorLLMAdapter

-from markitect.prompts.execution.models import RunConfig, LLMResponse
-
-
-class LLMAdapter(ABC):
-    """
-    Abstract base class for LLM providers.
-
-    Enables pluggable LLM backends without prescribing implementation.
-    Implementations can wrap OpenAI, Anthropic, or other APIs.
-    """
-
-    @abstractmethod
-    def execute_prompt(
-        self,
-        prompt: str,
-        config: RunConfig,
-    ) -> LLMResponse:
-        """
-        Execute a prompt with the LLM.
-
-        Args:
-            prompt: Compiled prompt text
-            config: Execution configuration
-
-        Returns:
-            LLMResponse with generated content
-
-        Raises:
-            Exception: On LLM API errors
-        """
-        pass
-
-    @abstractmethod
-    def validate_config(self, config: RunConfig) -> bool:
-        """
-        Validate that configuration is supported.
-
-        Args:
-            config: Configuration to validate
-
-        Returns:
-            True if valid, False otherwise
-        """
-        pass
-
-
-class MockLLMAdapter(LLMAdapter):
-    """
-    Mock LLM adapter for testing.
-
-    Returns deterministic responses without calling external APIs.
-    """
-
-    def __init__(self, mock_response: str = "Mock LLM response"):
-        """
-        Initialize mock adapter.
-
-        Args:
-            mock_response: Response to return
-        """
-        self.mock_response = mock_response
-        self.call_count = 0
-        self.last_prompt = None
-        self.last_config = None
-
-    def execute_prompt(
-        self,
-        prompt: str,
-        config: RunConfig,
-    ) -> LLMResponse:
-        """
-        Return mock response.
-
-        Args:
-            prompt: Prompt (stored for inspection)
-            config: Config (stored for inspection)
-
-        Returns:
-            Mock LLMResponse
-        """
-        self.call_count += 1
-        self.last_prompt = prompt
-        self.last_config = config
-
-        return LLMResponse(
-            content=self.mock_response,
-            model=config.model_name,
-            usage={
-                "prompt_tokens": len(prompt.split()),
-                "completion_tokens": len(self.mock_response.split()),
-                "total_tokens": len(prompt.split()) + len(self.mock_response.split()),
-            },
-            finish_reason="stop",
-            metadata={"mock": True},
-        )
-
-    def validate_config(self, config: RunConfig) -> bool:
-        """
-        Mock validation always succeeds.
-
-        Args:
-            config: Configuration
-
-        Returns:
-            Always True
-        """
-        return True
-
-    def reset(self) -> None:
-        """Reset mock state."""
-        self.call_count = 0
-        self.last_prompt = None
-        self.last_config = None
-
-
-class ErrorLLMAdapter(LLMAdapter):
-    """
-    Mock adapter that always raises an error.
-
-    Useful for testing error handling.
-    """
-
-    def __init__(self, error_message: str = "Mock LLM error"):
-        """
-        Initialize error adapter.
-
-        Args:
-            error_message: Error message to raise
-        """
-        self.error_message = error_message
-
-    def execute_prompt(
-        self,
-        prompt: str,
-        config: RunConfig,
-    ) -> LLMResponse:
-        """
-        Raise error.
-
-        Args:
-            prompt: Prompt
-            config: Config
-
-        Raises:
-            RuntimeError: Always
-        """
-        raise RuntimeError(self.error_message)
-
-    def validate_config(self, config: RunConfig) -> bool:
-        """
-        Validation succeeds.
-
-        Args:
-            config: Configuration
-
-        Returns:
-            True
-        """
-        return True
+__all__ = ["LLMAdapter", "MockLLMAdapter", "ErrorLLMAdapter"]
--- a/markitect/prompts/execution/models.py
+++ b/markitect/prompts/execution/models.py
@@ -12,6 +12,7 @@ from typing import Dict, Any, List, Optional
 from enum import Enum

 from markitect.prompts.models import calculate_bundle_digest
+from markitect.llm.models import RunConfig, LLMResponse  # canonical; re-exported here


 class ExecutionStage(Enum):
@@ -37,54 +38,6 @@ class RunStatus(Enum):
    SKIPPED = "skipped"  # Skipped due to identical InputBundleHash


-@dataclass
-class RunConfig:
-    """
-    Configuration for prompt execution.
-
-    Attributes:
-        model_name: LLM model to use
-        temperature: Model temperature (0.0-1.0)
-        max_tokens: Maximum tokens to generate
-        model_params: Additional model parameters
-        max_depth: Maximum generation depth for nested runs
-        skip_if_exists: Skip if identical InputBundleHash exists (FR-4.4)
-        timeout_seconds: Execution timeout
-    """
-    model_name: str = "gpt-4"
-    temperature: float = 0.7
-    max_tokens: int = 2000
-    model_params: Dict[str, Any] = field(default_factory=dict)
-    max_depth: int = 3
-    skip_if_exists: bool = True
-    timeout_seconds: int = 300
-
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary."""
-        return {
-            "model_name": self.model_name,
-            "temperature": self.temperature,
-            "max_tokens": self.max_tokens,
-            "model_params": self.model_params,
-            "max_depth": self.max_depth,
-            "skip_if_exists": self.skip_if_exists,
-            "timeout_seconds": self.timeout_seconds,
-        }
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "RunConfig":
-        """Create from dictionary."""
-        return cls(
-            model_name=data.get("model_name", "gpt-4"),
-            temperature=data.get("temperature", 0.7),
-            max_tokens=data.get("max_tokens", 2000),
-            model_params=data.get("model_params", {}),
-            max_depth=data.get("max_depth", 3),
-            skip_if_exists=data.get("skip_if_exists", True),
-            timeout_seconds=data.get("timeout_seconds", 300),
-        )
-
-
@dataclass
 class InputBundle:
    """
@@ -151,35 +104,6 @@ class InputBundle:
        }


-@dataclass
-class LLMResponse:
-    """
-    Response from LLM execution.
-
-    Attributes:
-        content: Generated content
-        model: Model used
-        usage: Token usage statistics
-        finish_reason: Why generation stopped
-        metadata: Additional response metadata
-    """
-    content: str
-    model: str
-    usage: Dict[str, int] = field(default_factory=dict)
-    finish_reason: str = "stop"
-    metadata: Dict[str, Any] = field(default_factory=dict)
-
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary."""
-        return {
-            "content": self.content,
-            "model": self.model,
-            "usage": self.usage,
-            "finish_reason": self.finish_reason,
-            "metadata": self.metadata,
-        }
-
-
@dataclass
 class PromptRun:
    """
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,9 @@ dependencies = [
    "aiohttp>=3.8.0",
    "toml",

+    # Extracted LLM adapter library (standalone repo)
+    "llm-connect @ file:///home/worsch/llm-connect",
+
    # Core capabilities (required for basic functionality)
    "release-management @ file:./capabilities/release-management",
    "testdrive-jsui @ file:./capabilities/testdrive-jsui",
--- a/roadmap/infospace-s3-closeout/PLAN.md
+++ b/roadmap/infospace-s3-closeout/PLAN.md
@@ -0,0 +1,172 @@
+# Infospace Tooling — Stage 3 Close-out
+
+## Context
+
+Stages 1 and 2 of the infospace tooling roadmap are complete. Stage 3 used the
+Wealth of Nations / VSM example to validate the tooling end-to-end. Most of S3
+is done; this workstream finishes the remaining tasks, addresses deferred cleanup,
+and formally closes the roadmap.
+
+**Parent roadmap:** `roadmap/infospace-tooling/PLAN.md`
+**Example location:** `examples/infospace-with-history/`
+
+### State at workstream open (2026-02-26)
+
+| Item | Status |
+|------|--------|
+| S3.1 Migrate example to infospace config | ✅ Done |
+| S3.3 Per-entity eval batch | ✅ 985/988 complete; metrics.yaml updated |
+| S3.4 Tutorial rewrite | ✅ Done |
+| S3.5 Supply-chain-vsm composition demo | ✅ Done |
+| S3.2 Clean per-chapter git history | ⏳ Deferred — included here |
+| 3 missing evaluations | ⏳ Outstanding |
+| 4 follow-up items (commit b055c8d7) | ⏳ Outstanding |
+
+---
+
+## Tasks
+
+### C.1 — Complete the 3 missing entity evaluations
+
+985 of 988 entities have evaluation files. Identify and evaluate the remaining 3.
+
+```bash
+cd examples/infospace-with-history
+# Identify missing slugs
+comm -23 \
+  <(ls output/entities/*.md | xargs -I{} basename {} .md | sort) \
+  <(ls output/evaluations/*.md | xargs -I{} basename {} .md | sort)
+# Evaluate each missing entity individually
+markitect infospace evaluate --entity <slug> --provider openrouter
+```
+
+**Acceptance:** `ls output/evaluations/*.md | wc -l` returns 988.
+
+---
+
+### C.2 — Run eval-summary and verify viability
+
+Run the aggregation command to update per_entity_mean from all 988 evaluations,
+then check all 6 viability gates pass.
+
+```bash
+cd examples/infospace-with-history
+unset OPENROUTER_API_KEY  # stale env var guard
+markitect infospace eval-summary --update-metrics
+markitect infospace viability
+```
+
+Current sample reading (985 entities): `per_entity_mean = 3.956` against threshold 3.5.
+Expected: all 6 metrics pass.
+
+**Acceptance:** `markitect infospace viability` exits 0 and shows 6/6 PASS.
+
+---
+
+### C.3 — Refresh the metrics report
+
+The metrics report was generated from chapters 1–4 only. Regenerate it from
+the full 988-entity set.
+
+```bash
+cd examples/infospace-with-history
+markitect infospace check --provider openrouter   # or reuse existing check outputs
+markitect infospace history                        # confirm snapshot recorded
+```
+
+**Acceptance:** `output/metrics/metrics.yaml` reflects all 988 entities; a dated
+snapshot exists in the metrics history.
+
+---
+
+### C.4 — Document advanced usage patterns
+
+Write `examples/infospace-with-history/docs/advanced-usage.md` covering:
+
+- Incremental evaluation (adding entities after initial run, skip-if-exists behaviour)
+- Re-evaluating after guideline changes (`--force` flag)
+- Interpreting per-entity score distributions and identifying outliers
+- Using `markitect infospace entities --sort-by score` to triage low scorers
+- Reading and acting on collection check outputs (redundancy pairs, coverage gaps)
+
+**Acceptance:** File exists with ≥ 4 documented patterns, each with a worked command example.
+
+---
+
+### C.5 — Add composition examples to documentation
+
+Document how the supply-chain-vsm example (`examples/supply-chain-vsm/`) demonstrates
+composition. Add a `docs/composition-guide.md` covering:
+
+- What composition means (discipline binding)
+- How supply-chain-vsm binds WoN as a discipline
+- How to create a new infospace that uses an existing one as a discipline
+- Viability requirement: the discipline must pass its own thresholds before binding
+
+Reference `examples/supply-chain-vsm/` throughout.
+
+**Acceptance:** `docs/composition-guide.md` exists and links to supply-chain-vsm.
+
+---
+
+### C.6 — Performance benchmarking note
+
+Rather than a full benchmarking guide (out of scope for a 988-entity example),
+record observed timings in a `docs/performance-notes.md`:
+
+- Eval batch duration (~4 hrs for 988 entities via OpenRouter)
+- Tokens per entity (rough estimate from usage logs)
+- Embedding cache hit rate after first run
+- Recommendation: provider choice (OpenRouter vs Gemini) for different dataset sizes
+
+**Acceptance:** File exists with at least 4 concrete measurements or estimates.
+
+---
+
+### C.7 — S3.2: Clean per-chapter git history (deferred cleanup)
+
+Create a clean branch where each of the 35 processed chapters has its own commit.
+Chapters 1–8 are already done on branch `clean-example-history`; 27 remain.
+
+This is a cosmetic/archival task — it does not change output files.
+
+```bash
+git checkout clean-example-history
+# For each remaining chapter (9–35):
+#   cherry-pick or re-commit the chapter output files with a per-chapter message
+git log --oneline clean-example-history  # verify 35 chapter commits
+```
+
+**Acceptance:** Branch `clean-example-history` has exactly 35 chapter commits
+(one per chapter), rebased onto current main.
+
+**Note:** This task can be done independently of C.1–C.6. Low urgency — do last.
+
+---
+
+### C.8 — Formally close the S3 roadmap
+
+Update `roadmap/infospace-tooling/PLAN.md` to mark all S3 tasks as complete.
+Add a close-out summary at the top of the file with final metrics and date.
+Commit with a `docs(roadmap)` message.
+
+**Acceptance:** PLAN.md header shows all stages complete; committed to main.
+
+---
+
+## Task order
+
+```
+C.1 → C.2 → C.3
+              ↓
+         C.4, C.5, C.6 (parallel)
+              ↓
+             C.8
+C.7 (independent, do last)
+```
+
+## Out of scope
+
+- Adding new entities or chapters (the WoN example is complete at 988 entities)
+- Re-running collection checks from scratch (existing results are valid)
+- Publishing the example as a standalone dataset
--- a/roadmap/testdrive-jsui-publication/PLAN.md
+++ b/roadmap/testdrive-jsui-publication/PLAN.md
@@ -0,0 +1,176 @@
+# TestDrive-JSUI — npm Publication
+
+## Context
+
+TestDrive-JSUI is a JavaScript-first markdown editor library living at
+`capabilities/testdrive-jsui/`. Phases 1–6 (build system, bundling, testing,
+migration) are complete. 84 tests pass (68 JS + 15 Python + 1 fixes).
+Single source of truth: `capabilities/testdrive-jsui/js/`.
+
+This workstream covers the remaining work to publish the library to npm and
+close out the capability.
+
+**Source:** `capabilities/testdrive-jsui/TODO.md` (Phases 7–9)
+**Package name:** `testdrive-jsui` (to be confirmed in P.1)
+**Current version:** 1.0.0
+
+---
+
+## Tasks
+
+### P.1 — Pre-publication: decide repository structure
+
+The library currently lives inside the markitect monorepo. Before publishing to
+npm, decide whether it ships from here or from a dedicated repo.
+
+**Options:**
+- A: Publish directly from `capabilities/testdrive-jsui/` — simpler, no repo split
+- B: Extract to a standalone `testdrive-jsui` repo — cleaner for npm consumers
+
+Record the decision and proceed accordingly.
+
+**Acceptance:** Decision recorded; if B, standalone repo created and code copied.
+
+---
+
+### P.2 — Pre-publication: verify Markitect integration
+
+Confirm the main Markitect application still works correctly with the current
+capability code before publishing.
+
+```bash
+cd /home/worsch/markitect_project
+make testdrive-jsui-test-all   # 84 tests must pass
+# Manually verify view and edit modes in the running Markitect app
+```
+
+**Acceptance:** All 84 tests pass; view and edit modes confirmed working.
+
+---
+
+### P.3 — Pre-publication: decide STANDALONE_PLAN.md
+
+`STANDALONE_PLAN.md` exists in the capability but its status is unclear. Either:
+- Implement it (if it describes meaningful standalone work)
+- Explicitly archive it with a note that the standalone use case is covered by the npm package
+
+**Acceptance:** File updated with a clear status note; or deleted if obsolete.
+
+---
+
+### P.4 — Pre-publication: pack and dry-run
+
+Run the full pre-publish checklist.
+
+```bash
+cd capabilities/testdrive-jsui
+npm run lint          # zero errors
+npm test              # all 84 tests pass
+npm run build:prod    # clean production build
+npm pack              # creates testdrive-jsui-1.0.0.tgz
+npm install ./testdrive-jsui-1.0.0.tgz --dry-run   # verify install
+npm publish --dry-run  # verify what will be published
+```
+
+Review `--dry-run` output: confirm only intended files are included (check
+`.npmignore` or `files` field in `package.json`).
+
+**Acceptance:** `npm publish --dry-run` succeeds with expected file list; no
+test files, source maps, or internal docs included unintentionally.
+
+---
+
+### P.5 — Pre-publication: create release tag
+
+```bash
+git tag -a v1.0.0 -m "Release testdrive-jsui v1.0.0"
+# (push tag to remote when ready)
+```
+
+**Acceptance:** Tag `v1.0.0` exists on main; CHANGELOG.md entry present for 1.0.0.
+
+---
+
+### P.6 — Publication: publish to npm
+
+```bash
+cd capabilities/testdrive-jsui
+npm login   # if not already logged in
+npm publish
+```
+
+Then verify:
+- Package visible at `https://www.npmjs.com/package/testdrive-jsui`
+- Wait 5–10 minutes, then check CDN availability:
+  - `https://cdn.jsdelivr.net/npm/testdrive-jsui@1.0.0/dist/testdrive-jsui.min.js`
+  - `https://unpkg.com/testdrive-jsui@1.0.0/dist/testdrive-jsui.min.js`
+
+**Acceptance:** Package installable via `npm install testdrive-jsui`.
+
+---
+
+### P.7 — Publication: fresh install test
+
+In a clean temporary directory, install from npm and verify the library works
+with a minimal HTML file.
+
+```bash
+mkdir /tmp/testdrive-test && cd /tmp/testdrive-test
+npm install testdrive-jsui marked
+# Open standalone.html equivalent, confirm editor initialises
+```
+
+**Acceptance:** `new TestDriveJSUI({...})` works in a fresh install with no
+reference to the capability source directory.
+
+---
+
+### P.8 — Publication: GitHub release
+
+Create a GitHub release from the v1.0.0 tag with:
+- Release notes (summary from CHANGELOG.md 1.0.0 entry)
+- Link to npm package
+- Link to CDN URLs (jsdelivr, unpkg)
+
+**Acceptance:** GitHub release published and visible.
+
+---
+
+### P.9 — Post-publication: README badges and monitoring
+
+Add npm badges to `capabilities/testdrive-jsui/README.md`:
+
+```markdown
+[![npm version](https://badge.fury.io/js/testdrive-jsui.svg)](...)
+[![npm downloads](https://img.shields.io/npm/dm/testdrive-jsui.svg)](...)
+```
+
+Set a reminder to check download stats after 1 week.
+Demo page and GitHub Pages are optional — do only if there's a specific audience
+to point at it.
+
+**Acceptance:** README has version and download count badges; committed.
+
+---
+
+## Task order
+
+```
+P.1 (repo decision)
+P.2 (Markitect integration check)   ← can run in parallel with P.1
+P.3 (STANDALONE_PLAN decision)      ← can run in parallel
+     ↓
+P.4 (pack + dry-run)   ← needs P.1, P.2, P.3 all done
+P.5 (release tag)      ← can run with P.4
+     ↓
+P.6 (publish)
+P.7 (fresh install test)
+P.8 (GitHub release)
+P.9 (badges + monitoring)
+```
+
+## Out of scope
+
+- Adding new features before publication (ship what's there)
+- Ruby or Java adapters (optional integrations, not blocking publication)
+- Paid npm features (keep on free tier)
--- a/tests/test_llm_isolation.py
+++ b/tests/test_llm_isolation.py
@@ -0,0 +1,159 @@
+"""
+S1.3 — LLM isolation gate.
+
+Confirms that markitect.llm.* has zero imports from markitect.prompts.*
+or markitect.infospace.*, making the module safe to extract into a
+standalone llm-connect library.
+
+These tests must pass before extraction (S2).
+"""
+
+import importlib
+import pkgutil
+import sys
+from pathlib import Path
+
+
+def _collect_llm_modules() -> list[str]:
+    """Return fully-qualified names of all modules under markitect.llm."""
+    import markitect.llm as pkg
+    pkg_path = Path(pkg.__file__).parent
+    names = []
+    for info in pkgutil.walk_packages([str(pkg_path)], prefix="markitect.llm."):
+        names.append(info.name)
+    # Include the package itself
+    names.insert(0, "markitect.llm")
+    return names
+
+
+def _direct_imports(module_name: str) -> set[str]:
+    """Return set of top-level module names imported by *module_name*."""
+    mod = importlib.import_module(module_name)
+    src_file = getattr(mod, "__file__", None)
+    if not src_file or not src_file.endswith(".py"):
+        return set()
+
+    imports: set[str] = set()
+    with open(src_file) as f:
+        for line in f:
+            stripped = line.strip()
+            if stripped.startswith("from ") or stripped.startswith("import "):
+                # Extract the root package of the imported name
+                parts = stripped.split()
+                if parts[0] == "from" and len(parts) >= 2:
+                    imports.add(parts[1].split(".")[0] + "." + parts[1].split(".")[1]
+                                if "." in parts[1] else parts[1])
+                    # Also capture full dotted path for cross-module check
+                    imports.add(parts[1])
+    return imports
+
+
+def _import_lines(src_file: str) -> list[str]:
+    """Return only import-statement lines from a Python source file."""
+    lines = []
+    with open(src_file) as f:
+        for line in f:
+            stripped = line.strip()
+            if stripped.startswith("from ") or stripped.startswith("import "):
+                lines.append(stripped)
+    return lines
+
+
+def test_no_prompts_import_in_llm_tree():
+    """markitect.llm must not import anything from markitect.prompts.*"""
+    violations = []
+    for mod_name in _collect_llm_modules():
+        try:
+            mod = importlib.import_module(mod_name)
+        except ImportError:
+            continue
+        src_file = getattr(mod, "__file__", None)
+        if not src_file or not src_file.endswith(".py"):
+            continue
+        for line in _import_lines(src_file):
+            if "markitect.prompts" in line:
+                violations.append(mod_name)
+                break
+
+    assert violations == [], (
+        f"These llm modules still import from markitect.prompts: {violations}"
+    )
+
+
+def test_no_infospace_import_in_llm_tree():
+    """markitect.llm must not import anything from markitect.infospace.*"""
+    violations = []
+    for mod_name in _collect_llm_modules():
+        try:
+            mod = importlib.import_module(mod_name)
+        except ImportError:
+            continue
+        src_file = getattr(mod, "__file__", None)
+        if not src_file or not src_file.endswith(".py"):
+            continue
+        for line in _import_lines(src_file):
+            if "markitect.infospace" in line:
+                violations.append(mod_name)
+                break
+
+    assert violations == [], (
+        f"These llm modules still import from markitect.infospace: {violations}"
+    )
+
+
+def test_runconfig_and_llmresponse_canonical_in_llm():
+    """RunConfig and LLMResponse must be defined in markitect.llm.models."""
+    from markitect.llm.models import RunConfig, LLMResponse
+
+    assert RunConfig.__module__ == "markitect.llm.models", (
+        f"RunConfig.module = {RunConfig.__module__!r}, expected 'markitect.llm.models'"
+    )
+    assert LLMResponse.__module__ == "markitect.llm.models", (
+        f"LLMResponse.module = {LLMResponse.__module__!r}, expected 'markitect.llm.models'"
+    )
+
+
+def test_llmadapter_canonical_in_llm():
+    """LLMAdapter must be defined in markitect.llm.adapter."""
+    from markitect.llm.adapter import LLMAdapter
+
+    assert LLMAdapter.__module__ == "markitect.llm.adapter", (
+        f"LLMAdapter.module = {LLMAdapter.__module__!r}, expected 'markitect.llm.adapter'"
+    )
+
+
+def test_backward_compat_prompts_reexport():
+    """markitect.prompts.execution.models must still export RunConfig/LLMResponse."""
+    from markitect.prompts.execution.models import RunConfig, LLMResponse
+    from markitect.llm.models import RunConfig as RC, LLMResponse as LR
+
+    assert RunConfig is RC, "prompts re-export RunConfig must be the same object as llm.models.RunConfig"
+    assert LLMResponse is LR, "prompts re-export LLMResponse must be the same object as llm.models.LLMResponse"
+
+
+def test_backward_compat_llmadapter_reexport():
+    """markitect.prompts.execution.llm_adapter must still export LLMAdapter."""
+    from markitect.prompts.execution.llm_adapter import LLMAdapter
+    from markitect.llm.adapter import LLMAdapter as LA
+
+    assert LLMAdapter is LA, "prompts re-export LLMAdapter must be the same object as llm.adapter.LLMAdapter"
+
+
+def test_app_name_parameterization():
+    """resolve_llm(app_name=X) uses ~/.config/X/config.toml and X_HELPER_MODEL."""
+    from markitect.llm.toml_config import (
+        _model_env_var,
+        _user_config_path,
+        _dir_config_name,
+        resolve_llm,
+    )
+
+    assert _model_env_var("railiance") == "RAILIANCE_HELPER_MODEL"
+    assert _model_env_var("markitect") == "MARKITECT_HELPER_MODEL"
+    assert str(_user_config_path("railiance")).endswith(".config/railiance/config.toml")
+    assert _dir_config_name("railiance") == ".railiance.toml"
+
+    # Smoke: resolve falls back to hardcoded for unknown app
+    r = resolve_llm(app_name="nonexistent_app_xyz")
+    assert r.provider_source == "hardcoded"
+    assert r.model_source == "hardcoded"