From 6f0facd744be5e94ac8c82daa62b894776d55dac Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 4 May 2026 00:12:07 +0200 Subject: [PATCH] Workplan dependencies and prio for text research lab workplans --- docs/cache-backend-architecture-blueprint.md | 259 ++++++++++++++++++ docs/query-extraction.md | 76 +++++ docs/research-lab-cache-backend-research.md | 248 +++++++++++++++++ docs/workplan-planning-map.md | 68 +++++ src/markitect_tool/__init__.py | 10 + src/markitect_tool/cli/main.py | 78 ++++++ src/markitect_tool/query/__init__.py | 15 + src/markitect_tool/query/engine.py | 242 ++++++++++++++++ tests/test_query_extraction.py | 148 ++++++++++ workplans/MKTT-WP-0001-repo-foundation.md | 3 + .../MKTT-WP-0002-markitect-main-migration.md | 4 + ...KTT-WP-0003-core-toolkit-implementation.md | 12 +- ...TT-WP-0004-practical-contract-framework.md | 5 + ...-runtime-context-and-assessment-engines.md | 5 + ...WP-0006-cache-backend-architecture-core.md | 133 +++++++++ ...-advanced-query-and-local-index-backend.md | 125 +++++++++ ...0008-agent-working-memory-context-cache.md | 109 ++++++++ ...009-access-controlled-knowledge-gateway.md | 105 +++++++ 18 files changed, 1644 insertions(+), 1 deletion(-) create mode 100644 docs/cache-backend-architecture-blueprint.md create mode 100644 docs/query-extraction.md create mode 100644 docs/research-lab-cache-backend-research.md create mode 100644 docs/workplan-planning-map.md create mode 100644 src/markitect_tool/query/__init__.py create mode 100644 src/markitect_tool/query/engine.py create mode 100644 tests/test_query_extraction.py create mode 100644 workplans/MKTT-WP-0006-cache-backend-architecture-core.md create mode 100644 workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md create mode 100644 workplans/MKTT-WP-0008-agent-working-memory-context-cache.md create mode 100644 workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md diff --git a/docs/cache-backend-architecture-blueprint.md b/docs/cache-backend-architecture-blueprint.md new file mode 100644 index 0000000..03d6337 --- /dev/null +++ b/docs/cache-backend-architecture-blueprint.md @@ -0,0 +1,259 @@ +# Cache Backend Architecture Blueprint + +Date: 2026-05-03 + +## Purpose + +This blueprint defines an optional backend architecture for sophisticated +knowledge systems built on top of `markitect-tool`. + +It is a research-lab architecture: powerful enough to support cached ASTs, +advanced query backends, agent memory, and access control, but separated from +the slim core so one-off CLI use stays fast and simple. + +## Architectural Boundary + +The core package owns: + +- Markdown parsing +- document contracts +- simple selectors +- deterministic transforms and generation primitives +- unified diagnostics + +The optional backend fabric owns: + +- persistent snapshots +- indexes +- advanced query adapters +- memory/context packages +- policy enforcement +- provenance records +- trace and performance metadata + +The core must be able to run without the backend fabric. + +## Conceptual Layers + +```text +Markdown files + -> Core parser and contract layer + -> Content-addressed document snapshots + -> Index fabric + -> AST/JSON index + -> full-text index + -> vector/semantic index + -> analytical/index export + -> Query adapter registry + -> simple selectors + -> JSONPath + -> SQL/FTS + -> vector/hybrid retrieval + -> Context package registry + -> activated working sets + -> memory namespaces + -> agent-ready context bundles + -> Access policy gateway + -> labels/ACL/ReBAC/ABAC + -> result filtering and denial diagnostics + -> Provenance and observability +``` + +## Core Interfaces + +### Snapshot Backend + +Responsible for durable parsed-document snapshots. + +Minimum protocol: + +```text +put_document(source_path, content, parse_options) -> snapshot_id +get_snapshot(snapshot_id) -> DocumentSnapshot +resolve_source(source_path) -> latest snapshot_id +diff_snapshot(old_id, new_id) -> SnapshotDiff +``` + +Snapshot identity should include: + +- source content hash +- parser version +- parse options +- contract version when relevant + +### Index Backend + +Responsible for derived lookup structures. + +Minimum protocol: + +```text +capabilities() -> IndexCapabilities +build(snapshot_ids, options) -> IndexBuildResult +refresh(changed_snapshots) -> IndexBuildResult +query(request) -> QueryResult +explain(request) -> QueryPlan +``` + +Capabilities should include: + +- `jsonpath` +- `sql` +- `fts` +- `vector` +- `hybrid` +- `inline_tokens` +- `section_graph` +- `policy_pushdown` + +### Query Adapter + +Translates a stable Markitect query request into backend-specific execution. + +Minimum protocol: + +```text +name +supports(selector_or_query, target) -> bool +execute(document_or_backend, request) -> QueryResult +explain(request) -> QueryExplanation +``` + +Adapters must return a common result envelope: + +- kind +- path +- value +- text +- source location +- snapshot id +- provenance +- policy decision +- backend metadata + +### Context Package Registry + +Responsible for agent-ready working memory. + +Minimum protocol: + +```text +create_package(query_or_manifest, budget, policy) -> context_package_id +activate(package_id, thread_or_workspace) -> activation_id +deactivate(activation_id) +refresh(package_id) -> package_id +explain(package_id) -> ContextPackageReport +``` + +Context packages should include: + +- included source spans +- summary layers +- token estimates +- provenance +- freshness +- policy labels +- retrieval recipe +- cache keys + +### Access Policy Gateway + +Responsible for authorization and redaction before results leave a backend. + +Minimum protocol: + +```text +authorize(subject, action, object, context) -> PolicyDecision +filter_results(subject, action, results, context) -> FilteredResults +explain_decision(decision_id) -> PolicyExplanation +``` + +Policy should support a ladder: + +1. Labels and trust zones. +2. File/path ACLs. +3. Relationship-based access control. +4. Attribute/rule-based policies. +5. External authorization services. + +## Suggested Backend Manifest + +Backends should register through a Markdown/YAML manifest: + +````markdown +# Local SQLite Backend + +```yaml markitect-backend +id: local-sqlite-cache +kind: cache-backend +capabilities: + - snapshots + - json + - fts + - sql + - provenance +storage: + engine: sqlite + path: .markitect/cache/index.sqlite +policy: + mode: labels +``` +```` + +## CLI Direction + +The first backend CLI should be explicit: + +```text +mkt cache init +mkt cache build +mkt cache status +mkt cache query --backend +mkt ast show +mkt ast query +mkt context pack +mkt context activate +mkt policy check +``` + +Do not hide persistence behind `mkt query`. The user should know when the tool +is querying live files versus a persistent backend. + +## Recommended First Stack + +Start with: + +- content hashes in Python standard library +- SQLite for snapshot metadata, JSON, and FTS5 +- JSONPath as an optional extra +- local filesystem cache directory +- simple label policy +- provenance tables + +Defer: + +- vector search until text/structure cache works +- external authorization engines until local policy model is stable +- MCP server exposure until resources/tools are secure and explainable +- distributed cache until local invalidation is boring + +## Security Notes + +Cached data becomes a new data exposure surface. + +Minimum requirements before secure use: + +- cache location is explicit +- cache entries know source path and content hash +- policy mode is visible +- query results report policy filtering +- context packages list what they include +- destructive cache operations require explicit command +- no backend silently sends document content to a network service + +## Architecture Decision + +Implement the backend fabric after deterministic transform/composition +primitives are underway, but before serious caching, agent memory, or advanced +query backends. This lets WP-0003 continue while reserving a clean path for the +research-lab track. diff --git a/docs/query-extraction.md b/docs/query-extraction.md new file mode 100644 index 0000000..0e29f90 --- /dev/null +++ b/docs/query-extraction.md @@ -0,0 +1,76 @@ +# Query And Extraction + +Date: 2026-05-03 + +## Purpose + +The first query layer keeps selection close to the structured Markdown model. +It is intentionally small and deterministic. JSONPath or another query backend +can be added later behind the same API if the simple selector language becomes +too limited. + +## CLI + +```text +mkt query [--format json|yaml|text] +mkt extract [--format text|json|yaml] +``` + +`query` returns structured matches. `extract` returns textual content from the +matches. + +## Selectors + +Supported targets: + +- `document`, `$`, or `.`: full parsed document +- `frontmatter`: YAML frontmatter +- `headings`: heading objects +- `sections`: heading-led sections +- `blocks`: parsed content blocks +- `metrics`: document and section metrics + +Supported path examples: + +```text +frontmatter.status +frontmatter.owner.name +metrics.document.words +metrics.document.sections +``` + +Supported filters: + +```text +headings[level=2] +headings[text=Decision] +headings[text~=decision] +sections[heading=Context] +sections[heading~=risk] +sections[contains=problem] +sections[contains~=PROBLEM] +blocks[type=paragraph] +blocks[contains~=follow-up] +``` + +`=` is exact and case-sensitive. `~=` is substring matching and +case-insensitive. + +## Current Boundary + +This is not a full query language. It covers practical extraction from the +current parser model: + +- frontmatter values +- headings +- sections +- content blocks +- metrics + +Future query backend work should preserve this simple surface and add optional +adapters rather than forcing every user into a heavier language. + +Advanced query and cache backends are tracked in: + +- `docs/cache-backend-architecture-blueprint.md` +- `workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md` diff --git a/docs/research-lab-cache-backend-research.md b/docs/research-lab-cache-backend-research.md new file mode 100644 index 0000000..e032c58 --- /dev/null +++ b/docs/research-lab-cache-backend-research.md @@ -0,0 +1,248 @@ +# Research Lab: Sophisticated Cache Backends + +Date: 2026-05-03 + +## Purpose + +This research note explores how `markitect-tool` can keep its slim, +markdown-native core while allowing sophisticated optional backends for cached +ASTs, structured indexes, multiple query paradigms, agent working memory, and +access-controlled knowledge systems. + +The goal is not to rebuild `markitect-main` wholesale. The goal is to preserve +the useful insight behind it: once Markdown has been parsed into a trustworthy +structure, many higher-value operations become possible if that structure can +be cached, indexed, queried, reactivated, and governed. + +## Research Signals + +### Content Addressing And Reproducibility + +Git's object model is a practical reference for content-addressed storage: +content is written to an object database and retrieved by a hash-derived key. +Bazel remote caching similarly separates action outputs from metadata so work +can be reused when inputs are unchanged. + +Relevance: + +- Parse results should be keyed by content hash, parser version, and options. +- Derived indexes should declare their input snapshots and invalidation rules. +- Reproducible context packages need stable object identities. + +Sources: + +- https://git-scm.com/book/en/v2/Git-Internals-Git-Objects +- https://docs.bazel.build/versions/main/remote-caching.html + +### Structured Query And AST Introspection + +JSONPath is now standardized as RFC 9535. It defines selection and extraction +over JSON values and has security considerations around implementation behavior +and query construction. This makes it a good optional backend for power users +who need raw access to the full parsed structure. + +SQLite JSON and FTS5 provide a pragmatic local storage/query foundation. FTS5 +supports full-text search, relevance ranking, phrase/prefix/NEAR queries, and +external-content tables. These features map well to Markdown sections and +blocks while keeping local-first operation. + +Relevance: + +- Keep the current simple selector API as the common surface. +- Add JSONPath over `Document.to_dict()` as an optional advanced adapter. +- Add SQLite as the first local persistent index backend. +- Keep AST introspection as a debugging and research-lab capability, not as + the default user interface. + +Sources: + +- https://www.rfc-editor.org/rfc/rfc9535.html +- https://www.sqlite.org/json1.html +- https://www.sqlite.org/fts5.html + +### Columnar And Vector Backends + +Apache Arrow defines a language-independent columnar memory format. DuckDB is +strong for local analytical SQL over structured data. Vector databases such as +Qdrant, LanceDB, and pgvector provide semantic retrieval primitives. + +Relevance: + +- The core should not depend on any vector database. +- Index backends should advertise capabilities: text search, SQL, JSONPath, + vector search, hybrid retrieval, analytical scans. +- Vector indexes should store provenance back to document, section, and content + hash, not just opaque chunks. + +Sources: + +- https://arrow.apache.org/docs/format/Columnar.html +- https://duckdb.org/docs/stable/data/json/overview +- https://qdrant.tech/documentation/manage-data/collections/ +- https://docs.lancedb.com/ +- https://github.com/pgvector/pgvector + +### Agent Context And Working Memory + +The Model Context Protocol gives a useful integration model: resources provide +context/data, tools execute actions, and roots define filesystem or URI +boundaries. LangChain/LangGraph memory docs distinguish short-term, +thread-scoped memory from long-term, namespace-scoped memory, and further split +long-term memory into semantic, episodic, and procedural forms. The MemGPT +paper frames memory management as an operating-system-like problem for LLMs. + +Relevance: + +- Markitect context caches should be namespace-scoped and explicitly + activatable. +- A context package should carry text, structure, provenance, policy, freshness, + and token-budget metadata. +- Agents should be able to drop and reactivate working context by stable id. +- Memory writes need hot-path and background modes. + +Sources: + +- https://modelcontextprotocol.io/specification/2025-06-18 +- https://docs.langchain.com/oss/python/concepts/memory +- https://developers.llamaindex.ai/python/framework/module_guides/deploying/agents/memory/ +- https://arxiv.org/abs/2310.08560 + +### Provenance, Observability, And Debuggability + +W3C PROV provides a vocabulary for entities, activities, agents, and +derivations. OpenTelemetry traces provide spans and attributes for observing +distributed or multi-step operations. + +Relevance: + +- Cache entries should record what produced them. +- Query results should be explainable: source file, section, content hash, + index backend, policy decision, and transform chain. +- Agent context packs should be auditable. + +Sources: + +- https://www.w3.org/TR/prov-overview/ +- https://opentelemetry.io/docs/concepts/signals/traces/ + +### Access Control: Fluid To Rigid + +Zanzibar demonstrates a relationship-based authorization model at large scale. +OpenFGA and SpiceDB make Zanzibar-style relationship-based access control +available as productized systems. OPA/Rego and Cedar provide policy evaluation +models for attribute and rule-based decisions. + +Relevance: + +- Markitect should support a fluid-to-rigid access-control ladder. +- Local labs can start with labels and trust scopes. +- Secure deployments need policy checks before query results are returned to + agents or users. +- Policy decisions should be part of the diagnostic and provenance trail. + +Sources: + +- https://www.usenix.org/conference/atc19/presentation/pang +- https://openfga.dev/docs/concepts +- https://www.openpolicyagent.org/docs/policy-language +- https://docs.cedarpolicy.com/ + +## Main Finding + +The optional backend should be a **capability-oriented cache fabric**, not a +single database choice. + +The slim core should continue to parse, validate, query, transform, and +generate Markdown without persistent infrastructure. The research-lab backend +should attach through explicit interfaces: + +- content-addressed snapshots +- index manifests +- query adapter registry +- memory/context package registry +- access policy gateway +- provenance and trace records + +That lets the project support spontaneous one-time tool use and also grow into +high-performance, agentic, security-sensitive knowledge systems. + +## Most Promising Use Cases + +### UC-RL-001: AST Introspection And JSONPath Backend + +Expose raw parsed documents for advanced users: + +- `mkt ast show` +- `mkt ast query --backend jsonpath` +- raw token and inline query support +- adapter path from simple selectors to JSONPath where possible + +Utility: + +- debugging parser behavior +- developing transforms +- power-user structural extraction +- migration path for legacy `markitect-main` AST workflows + +### UC-RL-002: Local Persistent Knowledge Index + +Build a local cache/index for a repo or document collection: + +- content-addressed document snapshots +- SQLite JSON tables for structure +- SQLite FTS5 for section/block text search +- optional DuckDB/Arrow export for analytical work +- incremental refresh based on content hashes + +Utility: + +- fast repeated queries +- search across many Markdown files +- offline/local-first knowledge work +- foundation for batch transforms and generation pipelines + +### UC-RL-003: Agent Working Memory Cache + +Create activatable context packages for LLM agents: + +- namespace-scoped memories +- short-term working sets and long-term caches +- semantic/episodic/procedural memory categories +- drop/reactivate by stable id +- token-budget-aware context assembly +- provenance and freshness metadata + +Utility: + +- efficient agent work across long projects +- reusable context packs for recurring tasks +- controlled memory updates and recall +- bridge from Markitect documents to agent infrastructure + +### UC-RL-004: Access-Controlled Knowledge Gateway + +Add policy enforcement to cached retrieval: + +- labels/trust zones for local use +- ACL/ReBAC/ABAC adapters for stricter systems +- policy-aware query result filtering +- decision logs and diagnostics +- secure context packages for LLM use + +Utility: + +- enterprise and IT-security use cases +- multi-tenant knowledge bases +- agent access control +- auditable data exposure + +## Design Principles + +- The core remains infrastructure-free. +- Backends are optional and capability-declared. +- Every cached object is content-addressed or provenance-addressed. +- Query adapters return the same match/result envelope. +- Policy is checked before data leaves a backend boundary. +- Context packages are explicit, droppable, and reactivatable. +- LLM memory is data with provenance, not invisible prompt residue. +- Experimental backends belong behind stable contracts. diff --git a/docs/workplan-planning-map.md b/docs/workplan-planning-map.md new file mode 100644 index 0000000..7c200ea --- /dev/null +++ b/docs/workplan-planning-map.md @@ -0,0 +1,68 @@ +# Workplan Planning Map + +Date: 2026-05-03 + +## Purpose + +This document captures the current sequencing and priority view for +`markitect-tool` workplans. + +State Hub currently supports workstream dependency edges, but it does not yet +have native workstream priority/order fields and does not ingest dependency +metadata from workplan frontmatter. Until that exists, this file and the +workplan frontmatter are the repo source of truth; State Hub dependency edges +and descriptions mirror the operational view. + +## Priority Scale + +| Priority | Meaning | +| --- | --- | +| `P0` | Current mainline work. | +| `P1` | Next enabling architecture or implementation work. | +| `P2` | High-value follow-on work, start when trigger conditions are met. | +| `P3` | Research-lab or security-sensitive extension work. | +| `complete` | Finished foundation or completed decision work. | + +## Current Ordering + +| Workplan | Priority | Status | Depends On | Current View | +| --- | --- | --- | --- | --- | +| `MKTT-WP-0001` | complete | done | none | Repository foundation is complete. | +| `MKTT-WP-0002` | complete | done | `MKTT-WP-0001` | Legacy scope extraction is complete. | +| `MKTT-WP-0004` | complete | done | `MKTT-WP-0001`, `MKTT-WP-0002` | Contract framework is complete and informs later validation/generation work. | +| `MKTT-WP-0003` | P0 | active | `MKTT-WP-0001`, `MKTT-WP-0002`, `MKTT-WP-0004` | Mainline implementation. Continue with P3.5 transform/compose/include. | +| `MKTT-WP-0006` | P1 | todo | `MKTT-WP-0004`; task-level trigger: `MKTT-WP-0003-T005` | Start after transform/composition shape is clear and before serious cache work. | +| `MKTT-WP-0007` | P2 | todo | `MKTT-WP-0006` | First practical cache backend use case: AST/JSONPath/SQLite/FTS. | +| `MKTT-WP-0005` | P2 | todo | `MKTT-WP-0003`, `MKTT-WP-0004` | Pick up when generation/form/context or semantic assessment pressure appears. | +| `MKTT-WP-0009` | P2 | todo | `MKTT-WP-0006` | Establish access-control gateway before security-sensitive cache/context use. | +| `MKTT-WP-0008` | P3 | todo | `MKTT-WP-0006`, `MKTT-WP-0007`, `MKTT-WP-0009` | Agent working-memory cache after backend and policy floor are available. | + +## Dependency Notes + +The most important nuance is `MKTT-WP-0006`: it should not wait for every task +in `MKTT-WP-0003`, because it should shape cache architecture before `P3.7`. +It should wait until `MKTT-WP-0003-T005` gives transform/composition enough +shape to know what cached identities and invalidation rules must preserve. + +This is a mixed task/workstream dependency. State Hub does not currently model +that natively. + +## State Hub Mirror + +Native State Hub dependency edges should mirror the whole-workstream +dependencies: + +- `MKTT-WP-0002 -> MKTT-WP-0001` +- `MKTT-WP-0004 -> MKTT-WP-0001` +- `MKTT-WP-0004 -> MKTT-WP-0002` +- `MKTT-WP-0003 -> MKTT-WP-0001` +- `MKTT-WP-0003 -> MKTT-WP-0002` +- `MKTT-WP-0003 -> MKTT-WP-0004` +- `MKTT-WP-0006 -> MKTT-WP-0004` +- `MKTT-WP-0007 -> MKTT-WP-0006` +- `MKTT-WP-0005 -> MKTT-WP-0003` +- `MKTT-WP-0005 -> MKTT-WP-0004` +- `MKTT-WP-0009 -> MKTT-WP-0006` +- `MKTT-WP-0008 -> MKTT-WP-0006` +- `MKTT-WP-0008 -> MKTT-WP-0007` +- `MKTT-WP-0008 -> MKTT-WP-0009` diff --git a/src/markitect_tool/__init__.py b/src/markitect_tool/__init__.py index 9bf97ec..c203a14 100644 --- a/src/markitect_tool/__init__.py +++ b/src/markitect_tool/__init__.py @@ -21,6 +21,12 @@ from markitect_tool.contract import ( validate_contract_file, ) from markitect_tool.diagnostics import Diagnostic, SourceLocation +from markitect_tool.query import ( + InvalidQueryError, + QueryMatch, + extract_document, + query_document, +) from markitect_tool.schema import ( MarkdownSchema, SchemaValidationResult, @@ -55,4 +61,8 @@ __all__ = [ "validate_contract_file", "Diagnostic", "SourceLocation", + "InvalidQueryError", + "QueryMatch", + "extract_document", + "query_document", ] diff --git a/src/markitect_tool/cli/main.py b/src/markitect_tool/cli/main.py index 331caa3..a92e855 100644 --- a/src/markitect_tool/cli/main.py +++ b/src/markitect_tool/cli/main.py @@ -16,6 +16,7 @@ from markitect_tool.contract import ( load_contract_file, validate_contract, ) +from markitect_tool.query import InvalidQueryError, extract_document, query_document from markitect_tool.schema import load_schema_file, validate_markdown_file, validate_schema @@ -65,6 +66,60 @@ def metrics(file: Path, output_format: str) -> None: _emit_metrics(data, output_format) +@main.command() +@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path)) +@click.argument("selector") +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "yaml", "text"], case_sensitive=False), + default="json", + show_default=True, +) +def query(file: Path, selector: str, output_format: str) -> None: + """Query structured Markdown content with a small selector.""" + + document = parse_markdown_file(file) + try: + matches = query_document(document, selector) + except InvalidQueryError as exc: + raise click.ClickException(str(exc)) from exc + data = { + "selector": selector, + "document_path": str(file), + "count": len(matches), + "matches": [match.to_dict() for match in matches], + } + _emit_query(data, output_format) + + +@main.command() +@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path)) +@click.argument("selector") +@click.option( + "--format", + "output_format", + type=click.Choice(["text", "json", "yaml"], case_sensitive=False), + default="text", + show_default=True, +) +def extract(file: Path, selector: str, output_format: str) -> None: + """Extract text or Markdown content from structured Markdown.""" + + document = parse_markdown_file(file) + try: + items = extract_document(document, selector) + except InvalidQueryError as exc: + raise click.ClickException(str(exc)) from exc + data = { + "selector": selector, + "document_path": str(file), + "count": len(items), + "items": items, + } + _emit_extract(data, output_format) + + @main.command() @click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path)) @click.option( @@ -214,5 +269,28 @@ def _emit_metrics(data: dict, output_format: str) -> None: ) +def _emit_query(data: dict, output_format: str) -> None: + if output_format == "json": + click.echo(json.dumps(data, indent=2, ensure_ascii=False)) + elif output_format == "yaml": + click.echo(yaml.safe_dump(data, sort_keys=False)) + else: + click.echo(f"{data['count']} match(es)") + for match in data["matches"]: + location = f":{match['line']}" if match.get("line") else "" + click.echo(f"- {match['kind']} {match['path']}{location}") + if match.get("text"): + click.echo(f" {match['text'].splitlines()[0]}") + + +def _emit_extract(data: dict, output_format: str) -> None: + if output_format == "json": + click.echo(json.dumps(data, indent=2, ensure_ascii=False)) + elif output_format == "yaml": + click.echo(yaml.safe_dump(data, sort_keys=False)) + else: + click.echo("\n\n".join(data["items"])) + + if __name__ == "__main__": main() diff --git a/src/markitect_tool/query/__init__.py b/src/markitect_tool/query/__init__.py new file mode 100644 index 0000000..d545b4f --- /dev/null +++ b/src/markitect_tool/query/__init__.py @@ -0,0 +1,15 @@ +"""Query and extraction helpers for parsed Markdown documents.""" + +from markitect_tool.query.engine import ( + InvalidQueryError, + QueryMatch, + extract_document, + query_document, +) + +__all__ = [ + "InvalidQueryError", + "QueryMatch", + "extract_document", + "query_document", +] diff --git a/src/markitect_tool/query/engine.py b/src/markitect_tool/query/engine.py new file mode 100644 index 0000000..e267774 --- /dev/null +++ b/src/markitect_tool/query/engine.py @@ -0,0 +1,242 @@ +"""Small selector engine for structured Markdown documents.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from markitect_tool.contract import collect_metrics +from markitect_tool.core import ContentBlock, Document, Heading, Section + + +class InvalidQueryError(ValueError): + """Raised when a selector cannot be parsed or evaluated.""" + + +@dataclass(frozen=True) +class QueryMatch: + """One match returned by a selector.""" + + kind: str + path: str + value: Any + text: str | None = None + line: int | None = None + + def to_dict(self) -> dict[str, Any]: + data = { + "kind": self.kind, + "path": self.path, + "value": self.value, + "text": self.text, + "line": self.line, + } + return {key: value for key, value in data.items() if value is not None} + + +@dataclass(frozen=True) +class _Selector: + target: str + path: list[str] + filters: dict[str, str] + + +def query_document(document: Document, selector: str) -> list[QueryMatch]: + """Query a parsed document with a small Markitect selector.""" + + parsed = _parse_selector(selector) + if parsed.target in {"document", "$", "."}: + return [QueryMatch(kind="document", path="$", value=document.to_dict())] + if parsed.target == "frontmatter": + return _query_mapping(document.frontmatter, parsed.path, "frontmatter", "$.frontmatter") + if parsed.target == "headings": + return _query_headings(document.headings, parsed.filters) + if parsed.target == "sections": + return _query_sections(document.sections, parsed.filters) + if parsed.target == "blocks": + return _query_blocks(document.blocks, parsed.filters) + if parsed.target == "metrics": + return _query_mapping(collect_metrics(document).to_dict(), parsed.path, "metrics", "$.metrics") + raise InvalidQueryError(f"Unsupported selector target `{parsed.target}`") + + +def extract_document(document: Document, selector: str) -> list[str]: + """Extract text content from query matches.""" + + extracted: list[str] = [] + for match in query_document(document, selector): + if match.text is not None: + extracted.append(match.text) + elif isinstance(match.value, str): + extracted.append(match.value) + elif isinstance(match.value, int | float | bool): + extracted.append(str(match.value)) + return extracted + + +def _parse_selector(selector: str) -> _Selector: + raw = selector.strip() + if not raw: + raise InvalidQueryError("Selector cannot be empty") + + filters: dict[str, str] = {} + base = raw + if "[" in raw or "]" in raw: + if not raw.endswith("]") or raw.count("[") != 1 or raw.count("]") != 1: + raise InvalidQueryError(f"Invalid selector filter syntax `{selector}`") + base, raw_filter = raw[:-1].split("[", 1) + filters = _parse_filters(raw_filter) + + parts = [part for part in base.split(".") if part] + if not parts: + return _Selector(target="document", path=[], filters=filters) + return _Selector(target=parts[0], path=parts[1:], filters=filters) + + +def _parse_filters(raw_filter: str) -> dict[str, str]: + filters: dict[str, str] = {} + for raw_part in raw_filter.split(","): + part = raw_part.strip() + if not part: + continue + operator = "~=" if "~=" in part else "=" + if operator not in part: + raise InvalidQueryError(f"Invalid filter `{part}`") + key, value = part.split(operator, 1) + key = key.strip() + if operator == "~=": + key = f"{key}~" + if not key: + raise InvalidQueryError(f"Invalid filter `{part}`") + filters[key] = _strip_quotes(value.strip()) + return filters + + +def _query_mapping( + mapping: dict[str, Any], + path: list[str], + kind: str, + root_path: str, +) -> list[QueryMatch]: + if not path: + return [QueryMatch(kind=kind, path=root_path, value=mapping)] + value: Any = mapping + current_path = root_path + for part in path: + current_path = f"{current_path}.{part}" + if isinstance(value, dict) and part in value: + value = value[part] + else: + return [] + return [QueryMatch(kind=kind, path=current_path, value=value, text=_text_value(value))] + + +def _query_headings(headings: list[Heading], filters: dict[str, str]) -> list[QueryMatch]: + matches: list[QueryMatch] = [] + for index, heading in enumerate(headings): + if not _match_heading(heading, filters): + continue + matches.append( + QueryMatch( + kind="heading", + path=f"$.headings[{index}]", + value=heading.to_dict(), + text=f"{'#' * heading.level} {heading.text}", + line=heading.line, + ) + ) + return matches + + +def _query_sections(sections: list[Section], filters: dict[str, str]) -> list[QueryMatch]: + matches: list[QueryMatch] = [] + for index, section in enumerate(sections): + if not _match_section(section, filters): + continue + matches.append( + QueryMatch( + kind="section", + path=f"$.sections[{index}]", + value=section.to_dict(), + text=_section_markdown(section), + line=section.heading.line, + ) + ) + return matches + + +def _query_blocks(blocks: list[ContentBlock], filters: dict[str, str]) -> list[QueryMatch]: + matches: list[QueryMatch] = [] + for index, block in enumerate(blocks): + if not _match_block(block, filters): + continue + matches.append( + QueryMatch( + kind="block", + path=f"$.blocks[{index}]", + value=block.to_dict(), + text=block.text, + line=block.line_start, + ) + ) + return matches + + +def _match_heading(heading: Heading, filters: dict[str, str]) -> bool: + for key, expected in filters.items(): + if key == "level" and str(heading.level) != expected: + return False + if key in {"text", "heading", "title"} and heading.text != expected: + return False + if key in {"text~", "heading~", "title~"} and expected.lower() not in heading.text.lower(): + return False + return True + + +def _match_section(section: Section, filters: dict[str, str]) -> bool: + section_text = "\n".join(block.text for block in section.blocks if block.text) + for key, expected in filters.items(): + if key == "level" and str(section.heading.level) != expected: + return False + if key in {"heading", "title", "text"} and section.heading.text != expected: + return False + if key in {"heading~", "title~", "text~"} and expected.lower() not in section.heading.text.lower(): + return False + if key == "contains" and expected not in section_text: + return False + if key == "contains~" and expected.lower() not in section_text.lower(): + return False + return True + + +def _match_block(block: ContentBlock, filters: dict[str, str]) -> bool: + for key, expected in filters.items(): + if key == "type" and block.type != expected: + return False + if key == "contains" and expected not in block.text: + return False + if key == "contains~" and expected.lower() not in block.text.lower(): + return False + return True + + +def _section_markdown(section: Section) -> str: + lines = [f"{'#' * section.heading.level} {section.heading.text}"] + for block in section.blocks: + if block.text: + lines.extend(["", block.text]) + return "\n".join(lines).strip() + + +def _strip_quotes(value: str) -> str: + if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: + return value[1:-1] + return value + + +def _text_value(value: Any) -> str | None: + if isinstance(value, str): + return value + if isinstance(value, int | float | bool): + return str(value) + return None diff --git a/tests/test_query_extraction.py b/tests/test_query_extraction.py new file mode 100644 index 0000000..7499eff --- /dev/null +++ b/tests/test_query_extraction.py @@ -0,0 +1,148 @@ +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from markitect_tool.cli import main +from markitect_tool.core import parse_markdown +from markitect_tool.query import InvalidQueryError, extract_document, query_document + + +QUERY_DOC = """--- +document_type: adr +status: accepted +nested: + owner: Platform +--- + +# Use Query Selectors + +## Context + +The problem is that authors need predictable extraction from Markdown. + +## Decision + +We will use a small selector language before adopting a larger query backend. + +## Consequences + +- Queries remain readable. +- Extraction can feed later transforms. +""" + + +def test_query_frontmatter_path(): + document = parse_markdown(QUERY_DOC) + + matches = query_document(document, "frontmatter.nested.owner") + + assert len(matches) == 1 + assert matches[0].kind == "frontmatter" + assert matches[0].path == "$.frontmatter.nested.owner" + assert matches[0].text == "Platform" + + +def test_query_headings_by_level(): + document = parse_markdown(QUERY_DOC) + + matches = query_document(document, "headings[level=2]") + + assert [match.value["text"] for match in matches] == [ + "Context", + "Decision", + "Consequences", + ] + + +def test_query_sections_by_exact_heading(): + document = parse_markdown(QUERY_DOC) + + matches = query_document(document, "sections[heading=Decision]") + + assert len(matches) == 1 + assert matches[0].kind == "section" + assert matches[0].line == 14 + assert matches[0].text.startswith("## Decision") + assert "small selector language" in matches[0].text + + +def test_query_sections_by_case_insensitive_contains(): + document = parse_markdown(QUERY_DOC) + + matches = query_document(document, "sections[contains~=TRANSFORMS]") + + assert [match.value["heading"]["text"] for match in matches] == ["Consequences"] + + +def test_query_blocks_by_type(): + document = parse_markdown(QUERY_DOC) + + matches = query_document(document, "blocks[type=bullet_list]") + + assert len(matches) == 1 + assert "Queries remain readable" in matches[0].text + + +def test_query_metrics_path(): + document = parse_markdown(QUERY_DOC) + + matches = query_document(document, "metrics.document.sections") + + assert matches[0].value == 4 + assert matches[0].text == "4" + + +def test_extract_document_returns_textual_matches(): + document = parse_markdown(QUERY_DOC) + + extracted = extract_document(document, "sections[heading=Context]") + + assert extracted == [ + "## Context\n\nThe problem is that authors need predictable extraction from Markdown." + ] + + +def test_invalid_query_reports_error(): + document = parse_markdown(QUERY_DOC) + + with pytest.raises(InvalidQueryError): + query_document(document, "sections[heading") + + +def test_mkt_query_outputs_json(tmp_path: Path): + source = tmp_path / "doc.md" + source.write_text(QUERY_DOC, encoding="utf-8") + + result = CliRunner().invoke( + main, ["query", str(source), "sections[heading=Decision]"] + ) + + assert result.exit_code == 0 + assert '"count": 1' in result.output + assert "Decision" in result.output + + +def test_mkt_query_outputs_text(tmp_path: Path): + source = tmp_path / "doc.md" + source.write_text(QUERY_DOC, encoding="utf-8") + + result = CliRunner().invoke( + main, ["query", str(source), "headings[level=2]", "--format", "text"] + ) + + assert result.exit_code == 0 + assert "3 match(es)" in result.output + assert "## Context" in result.output + + +def test_mkt_extract_outputs_text(tmp_path: Path): + source = tmp_path / "doc.md" + source.write_text(QUERY_DOC, encoding="utf-8") + + result = CliRunner().invoke( + main, ["extract", str(source), "frontmatter.status"] + ) + + assert result.exit_code == 0 + assert result.output.strip() == "accepted" diff --git a/workplans/MKTT-WP-0001-repo-foundation.md b/workplans/MKTT-WP-0001-repo-foundation.md index ae7598c..42286c5 100644 --- a/workplans/MKTT-WP-0001-repo-foundation.md +++ b/workplans/MKTT-WP-0001-repo-foundation.md @@ -6,6 +6,9 @@ domain: markitect status: done owner: markitect-tool topic_slug: markitect +planning_priority: complete +planning_order: 10 +depends_on_workplans: [] created: "2026-05-03" updated: "2026-05-03" state_hub_workstream_id: "4d405d74-faec-440e-873e-692ff9ca96e7" diff --git a/workplans/MKTT-WP-0002-markitect-main-migration.md b/workplans/MKTT-WP-0002-markitect-main-migration.md index b3cd77d..d70dc71 100644 --- a/workplans/MKTT-WP-0002-markitect-main-migration.md +++ b/workplans/MKTT-WP-0002-markitect-main-migration.md @@ -6,6 +6,10 @@ domain: markitect status: done owner: markitect-tool topic_slug: markitect +planning_priority: complete +planning_order: 20 +depends_on_workplans: + - MKTT-WP-0001 created: "2026-05-03" updated: "2026-05-03" state_hub_workstream_id: "0fe54d2c-d579-4b03-a647-7a15bb835893" diff --git a/workplans/MKTT-WP-0003-core-toolkit-implementation.md b/workplans/MKTT-WP-0003-core-toolkit-implementation.md index d949558..be7cc19 100644 --- a/workplans/MKTT-WP-0003-core-toolkit-implementation.md +++ b/workplans/MKTT-WP-0003-core-toolkit-implementation.md @@ -6,6 +6,12 @@ domain: markitect status: active owner: markitect-tool topic_slug: markitect +planning_priority: P0 +planning_order: 40 +depends_on_workplans: + - MKTT-WP-0001 + - MKTT-WP-0002 + - MKTT-WP-0004 created: "2026-05-03" updated: "2026-05-03" state_hub_workstream_id: "9fefb57d-985e-4125-8daf-03554844f45e" @@ -67,7 +73,7 @@ validation, structured violations, `mkt validate`, and `mkt schema validate`. ```task id: MKTT-WP-0003-T004 -status: todo +status: done priority: medium state_hub_task_id: "e4f72218-601e-488f-a5df-171b91a747d2" ``` @@ -75,6 +81,10 @@ state_hub_task_id: "e4f72218-601e-488f-a5df-171b91a747d2" Implement FR-030 and FR-031 over the structured representation. Start with a small query language or JSONPath-like adapter only if it remains simple. +Initial implementation complete for simple selectors over frontmatter, +headings, sections, blocks, and metrics, with API access plus `mkt query` and +`mkt extract`. + ## P3.5 - Implement transform, compose, and include primitives ```task diff --git a/workplans/MKTT-WP-0004-practical-contract-framework.md b/workplans/MKTT-WP-0004-practical-contract-framework.md index d553c72..201265a 100644 --- a/workplans/MKTT-WP-0004-practical-contract-framework.md +++ b/workplans/MKTT-WP-0004-practical-contract-framework.md @@ -6,6 +6,11 @@ domain: markitect status: done owner: markitect-tool topic_slug: markitect +planning_priority: complete +planning_order: 30 +depends_on_workplans: + - MKTT-WP-0001 + - MKTT-WP-0002 created: "2026-05-03" updated: "2026-05-03" state_hub_workstream_id: "558787e1-d287-46a5-9214-634e8b90a858" diff --git a/workplans/MKTT-WP-0005-runtime-context-and-assessment-engines.md b/workplans/MKTT-WP-0005-runtime-context-and-assessment-engines.md index 60547fb..e8ab2ca 100644 --- a/workplans/MKTT-WP-0005-runtime-context-and-assessment-engines.md +++ b/workplans/MKTT-WP-0005-runtime-context-and-assessment-engines.md @@ -6,6 +6,11 @@ domain: markitect status: todo owner: markitect-tool topic_slug: markitect +planning_priority: P2 +planning_order: 70 +depends_on_workplans: + - MKTT-WP-0003 + - MKTT-WP-0004 created: "2026-05-03" updated: "2026-05-03" state_hub_workstream_id: "7918687e-2364-46b1-ab7e-65aa77cb8449" diff --git a/workplans/MKTT-WP-0006-cache-backend-architecture-core.md b/workplans/MKTT-WP-0006-cache-backend-architecture-core.md new file mode 100644 index 0000000..48b3f69 --- /dev/null +++ b/workplans/MKTT-WP-0006-cache-backend-architecture-core.md @@ -0,0 +1,133 @@ +--- +id: MKTT-WP-0006 +type: workplan +title: "Optional Cache Backend Architecture Core" +domain: markitect +status: todo +owner: markitect-tool +topic_slug: markitect +planning_priority: P1 +planning_order: 50 +depends_on_workplans: + - MKTT-WP-0004 +depends_on_tasks: + - MKTT-WP-0003-T005 +created: "2026-05-03" +updated: "2026-05-03" +state_hub_workstream_id: "0c585f8a-5c7e-4c89-b785-5b0089180256" +--- + +# MKTT-WP-0006: Optional Cache Backend Architecture Core + +## Purpose + +Create the optional backend fabric that lets `markitect-tool` attach cached +ASTs, indexes, query adapters, context packages, and policy gateways without +making persistent infrastructure mandatory for core CLI use. + +## Background + +Research and architecture are captured in: + +- `docs/research-lab-cache-backend-research.md` +- `docs/cache-backend-architecture-blueprint.md` + +## Decision + +Do not start this before the current deterministic transform/composition slice +has enough shape to show what cache invalidation must preserve. Start it before +WP-0003 P3.7 caching becomes implementation work. + +## P6.1 - Define backend capability model + +```task +id: MKTT-WP-0006-T001 +status: todo +priority: high +state_hub_task_id: "8c04f146-942c-45b8-9a7b-3bd61916aa4b" +``` + +Define capability names, backend manifests, and compatibility checks for: + +- snapshots +- JSON/AST query +- full-text search +- SQL +- vector/hybrid search +- context packages +- policy enforcement +- provenance + +## P6.2 - Define snapshot model and content identity + +```task +id: MKTT-WP-0006-T002 +status: todo +priority: high +state_hub_task_id: "5debc135-908a-47ed-ba15-564610970e38" +``` + +Specify content-addressed document snapshots keyed by source content hash, +parser version, parse options, and contract version where relevant. + +## P6.3 - Define backend interfaces + +```task +id: MKTT-WP-0006-T003 +status: todo +priority: high +state_hub_task_id: "a3e37112-1197-4f6f-8de8-7b3067ef060e" +``` + +Add protocol classes for snapshot backends, index backends, query adapters, +context package registries, and access policy gateways. + +## P6.4 - Implement local backend registry + +```task +id: MKTT-WP-0006-T004 +status: todo +priority: medium +state_hub_task_id: "6c9b8765-4d14-436d-a2c9-c028a31aaade" +``` + +Load backend manifests from project config and expose registered capabilities +without importing optional dependencies unless needed. + +## P6.5 - Add provenance envelope + +```task +id: MKTT-WP-0006-T005 +status: todo +priority: medium +state_hub_task_id: "7b551eae-99c8-4c8a-b781-18d59d318707" +``` + +Add provenance metadata shared by snapshots, query results, context packages, +and diagnostics. + +## P6.6 - Add CLI scaffolding + +```task +id: MKTT-WP-0006-T006 +status: todo +priority: medium +state_hub_task_id: "921e589c-8b0d-4eeb-8834-4a4c6c73da65" +``` + +Add read-only commands: + +```text +mkt backend list +mkt backend inspect +mkt cache status +``` + +No persistent write behavior is required in this task. + +## Exit Criteria + +- Core CLI still works without any backend. +- Backends can declare capabilities in Markdown/YAML manifests. +- Query and future cache work can target backend interfaces. +- Provenance is represented consistently. diff --git a/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md b/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md new file mode 100644 index 0000000..da46783 --- /dev/null +++ b/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md @@ -0,0 +1,125 @@ +--- +id: MKTT-WP-0007 +type: workplan +title: "Advanced Query and Local Index Backend" +domain: markitect +status: todo +owner: markitect-tool +topic_slug: markitect +planning_priority: P2 +planning_order: 60 +depends_on_workplans: + - MKTT-WP-0006 +created: "2026-05-03" +updated: "2026-05-03" +state_hub_workstream_id: "d61a82e4-651a-4df2-944a-9ff996b2e1f6" +--- + +# MKTT-WP-0007: Advanced Query and Local Index Backend + +## Purpose + +Implement the first practical backend use case: cached AST introspection, +JSONPath querying, SQLite metadata, and FTS5 search over Markdown documents. + +## P7.1 - Implement local snapshot store + +```task +id: MKTT-WP-0007-T001 +status: todo +priority: high +state_hub_task_id: "8894a9a4-586c-457b-b4e6-add8276ff5f2" +``` + +Persist parsed document snapshots and source metadata in a local cache +directory. + +## P7.2 - Add AST introspection commands + +```task +id: MKTT-WP-0007-T002 +status: todo +priority: high +state_hub_task_id: "fb9eaa9d-5c20-49a9-a7a6-acae28ac5e20" +``` + +Add: + +```text +mkt ast show +mkt ast stats +``` + +Use the current parsed document and token model. Do not require cache presence +for single-file use. + +## P7.3 - Add optional JSONPath query adapter + +```task +id: MKTT-WP-0007-T003 +status: todo +priority: high +state_hub_task_id: "a7b46b32-f322-4fe0-a6fb-60b0b823593c" +``` + +Support JSONPath over `Document.to_dict()` behind an optional dependency and +shared query result envelope. + +## P7.4 - Build SQLite metadata and JSON index + +```task +id: MKTT-WP-0007-T004 +status: todo +priority: medium +state_hub_task_id: "479f11a3-4ab4-451b-991c-7f143f2bffea" +``` + +Persist source files, content hashes, frontmatter, headings, sections, blocks, +and metrics in SQLite. + +## P7.5 - Add FTS5 section/block search + +```task +id: MKTT-WP-0007-T005 +status: todo +priority: medium +state_hub_task_id: "0f03e9be-b6f0-4e4b-8220-3bbf638a892b" +``` + +Add full-text search over section and block text with source spans and +relevance ranking. + +## P7.6 - Add incremental refresh + +```task +id: MKTT-WP-0007-T006 +status: todo +priority: medium +state_hub_task_id: "7d9472e6-0716-435b-866c-d2c66ad786cf" +``` + +Refresh only changed files based on content hash and parser version. + +## P7.7 - Add local index CLI + +```task +id: MKTT-WP-0007-T007 +status: todo +priority: high +state_hub_task_id: "35cc63ff-3723-43d5-aaf6-f9312efa0f4b" +``` + +Add: + +```text +mkt cache init +mkt cache build +mkt cache query +mkt search +``` + +## Exit Criteria + +- Legacy AST/JSONPath value is recovered as an optional backend. +- Local repeated queries are faster and explainable. +- Simple selectors still work without cache. diff --git a/workplans/MKTT-WP-0008-agent-working-memory-context-cache.md b/workplans/MKTT-WP-0008-agent-working-memory-context-cache.md new file mode 100644 index 0000000..8944066 --- /dev/null +++ b/workplans/MKTT-WP-0008-agent-working-memory-context-cache.md @@ -0,0 +1,109 @@ +--- +id: MKTT-WP-0008 +type: workplan +title: "Agent Working Memory Context Cache" +domain: markitect +status: todo +owner: markitect-tool +topic_slug: markitect +planning_priority: P3 +planning_order: 90 +depends_on_workplans: + - MKTT-WP-0006 + - MKTT-WP-0007 + - MKTT-WP-0009 +created: "2026-05-03" +updated: "2026-05-03" +state_hub_workstream_id: "6269f338-4f5c-40ee-90e5-0371f5c3874c" +--- + +# MKTT-WP-0008: Agent Working Memory Context Cache + +## Purpose + +Create activatable context packages that let agents drop, reactivate, and +reuse project knowledge efficiently while preserving provenance and policy +metadata. + +## P8.1 - Define context package schema + +```task +id: MKTT-WP-0008-T001 +status: todo +priority: high +state_hub_task_id: "21ee9c37-4add-4886-bd03-a7bb4b20e957" +``` + +Represent source spans, summaries, token estimates, freshness, provenance, +policy labels, and retrieval recipes. + +## P8.2 - Implement package creation from queries + +```task +id: MKTT-WP-0008-T002 +status: todo +priority: high +state_hub_task_id: "4df06b93-13ce-41fb-a8c3-f04d4ad9d752" +``` + +Create context packages from simple selectors, cached search results, or +manifest files. + +## P8.3 - Implement activation lifecycle + +```task +id: MKTT-WP-0008-T003 +status: todo +priority: medium +state_hub_task_id: "9f3d9792-d655-482d-bae0-262df5fc0136" +``` + +Support activate, deactivate, refresh, and explain operations for a package. + +## P8.4 - Add memory namespaces + +```task +id: MKTT-WP-0008-T004 +status: todo +priority: medium +state_hub_task_id: "2d090494-0e10-44cd-8e2d-c418d7530b27" +``` + +Support project, user, agent, thread, and task namespaces without hard-coding +any external agent platform. + +## P8.5 - Add summary layers + +```task +id: MKTT-WP-0008-T005 +status: todo +priority: medium +state_hub_task_id: "4d1cf970-3d6d-4bd5-8da9-ec2399aa7efe" +``` + +Support deterministic summaries first, then optional LLM-generated summaries +through an injected adapter. + +## P8.6 - Add CLI commands + +```task +id: MKTT-WP-0008-T006 +status: todo +priority: medium +state_hub_task_id: "2f18386c-9d2c-4af1-b8e2-75cb487c1692" +``` + +Add: + +```text +mkt context pack +mkt context activate +mkt context explain +mkt context refresh +``` + +## Exit Criteria + +- Agents can reactivate project context by stable id. +- Context packages show included sources and token budgets. +- Memory writes remain explicit and inspectable. diff --git a/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md b/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md new file mode 100644 index 0000000..aea5026 --- /dev/null +++ b/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md @@ -0,0 +1,105 @@ +--- +id: MKTT-WP-0009 +type: workplan +title: "Access-Controlled Knowledge Gateway" +domain: markitect +status: todo +owner: markitect-tool +topic_slug: markitect +planning_priority: P2 +planning_order: 80 +depends_on_workplans: + - MKTT-WP-0006 +created: "2026-05-03" +updated: "2026-05-03" +state_hub_workstream_id: "f36acbc9-881d-46f2-9181-67de228df0c2" +--- + +# MKTT-WP-0009: Access-Controlled Knowledge Gateway + +## Purpose + +Add a policy boundary for cached retrieval and context packages so Markitect can +support security-sensitive knowledge systems and agent workflows. + +## P9.1 - Define access-control ladder + +```task +id: MKTT-WP-0009-T001 +status: todo +priority: high +state_hub_task_id: "acf240b4-7210-4ee5-90b6-2f2fe1438439" +``` + +Specify supported modes: + +- labels and trust zones +- path/file ACLs +- relationship-based access control +- attribute/rule-based policies +- external policy engines + +## P9.2 - Implement local label policy + +```task +id: MKTT-WP-0009-T002 +status: todo +priority: high +state_hub_task_id: "9eb589d2-82f2-4282-9af0-3958826d397d" +``` + +Start with local policy labels and diagnostics for denied or redacted results. + +## P9.3 - Add policy-aware query filtering + +```task +id: MKTT-WP-0009-T003 +status: todo +priority: high +state_hub_task_id: "d78ab623-c472-4b24-ad84-08464b574886" +``` + +Ensure results are filtered before leaving the backend boundary. Result +metadata must report whether policy filtering occurred. + +## P9.4 - Add relationship policy adapter design + +```task +id: MKTT-WP-0009-T004 +status: todo +priority: medium +state_hub_task_id: "bd4c2b7a-6eac-4845-b5c8-9f9c64946f0c" +``` + +Design an adapter boundary for Zanzibar/OpenFGA/SpiceDB-style relationship +checks without binding the core package to any one service. + +## P9.5 - Add rule policy adapter design + +```task +id: MKTT-WP-0009-T005 +status: todo +priority: medium +state_hub_task_id: "752f1962-e83c-44cc-a1c1-0f89a4ea2a90" +``` + +Design an adapter boundary for OPA/Rego and Cedar-style rule policies. + +## P9.6 - Add decision logs and explainability + +```task +id: MKTT-WP-0009-T006 +status: todo +priority: medium +state_hub_task_id: "990f01fa-5008-4871-a887-1c6ab4375605" +``` + +Record policy decisions with subject, action, object, context, decision, +reason, and provenance. + +## Exit Criteria + +- Local caches can operate in an explicit policy mode. +- Query and context package results are policy-aware. +- More rigid authorization engines can attach later without replacing the + query/cache framework.