From d9236618528d59227e8714496432ac73533d6819 Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 4 May 2026 15:00:16 +0200 Subject: [PATCH] Access controlled knowledge gateway functionality --- docs/access-control-policy-gateway.md | 173 +++++++ docs/backend-fabric.md | 7 + docs/internal-extension-framework.md | 1 + docs/local-index-backend.md | 17 +- docs/workplan-planning-map.md | 2 +- examples/backends/local-sqlite-backend.md | 1 + examples/policy/local-label-policy.yaml | 16 + examples/policy/private/internal-note.md | 11 + examples/policy/public-note.md | 11 + src/markitect_tool/__init__.py | 26 + src/markitect_tool/backend/local_store.py | 26 + src/markitect_tool/cli/main.py | 173 ++++++- src/markitect_tool/extension/builtins.py | 36 ++ src/markitect_tool/policy/__init__.py | 35 ++ src/markitect_tool/policy/adapters.py | 65 +++ src/markitect_tool/policy/local.py | 482 ++++++++++++++++++ src/markitect_tool/policy/models.py | 149 ++++++ tests/test_builtin_extension_catalog.py | 16 + tests/test_policy_gateway.py | 211 ++++++++ ...009-access-controlled-knowledge-gateway.md | 42 +- 20 files changed, 1486 insertions(+), 14 deletions(-) create mode 100644 docs/access-control-policy-gateway.md create mode 100644 examples/policy/local-label-policy.yaml create mode 100644 examples/policy/private/internal-note.md create mode 100644 examples/policy/public-note.md create mode 100644 src/markitect_tool/policy/__init__.py create mode 100644 src/markitect_tool/policy/adapters.py create mode 100644 src/markitect_tool/policy/local.py create mode 100644 src/markitect_tool/policy/models.py create mode 100644 tests/test_policy_gateway.py diff --git a/docs/access-control-policy-gateway.md b/docs/access-control-policy-gateway.md new file mode 100644 index 0000000..7cfe8b4 --- /dev/null +++ b/docs/access-control-policy-gateway.md @@ -0,0 +1,173 @@ +# Access-Controlled Knowledge Gateway + +Date: 2026-05-04 + +## Purpose + +The policy gateway adds an explicit boundary before cached query, search, and +future context-package results leave a backend. It starts with a local label +policy and keeps room for more rigid authorization systems later. + +This layer does not make Markitect a full identity platform. Core Markitect +answers a narrower question: + +```text +subject + action + object + context -> policy decision +``` + +Backend and workflow code can then filter, redact, log, and explain results +using the same decision envelope. + +## Access-Control Ladder + +The intended progression is: + +| Level | Mode | Purpose | +| --- | --- | --- | +| 1 | Labels and trust zones | Local labs, prototypes, and agent context safety. | +| 2 | Path/file ACLs | Repository-local restrictions and team boundaries. | +| 3 | Relationship policies | Zanzibar/OpenFGA/SpiceDB-style subject-object relations. | +| 4 | Attribute/rule policies | OPA/Rego, Cedar, and policy-as-data engines. | +| 5 | External policy services | Organization identity, audit, and authorization systems. | + +WP-0009 implements levels 1 and 2 directly and defines adapter boundaries for +levels 3 and 4. + +## Local Label Policy + +Local policy files are YAML: + +```yaml +id: local-label-policy +mode: enforce +default_labels: [public] +default_subject: public-agent +subjects: + public-agent: + allowed_labels: [public] + trust_zones: [public] + internal-agent: + allowed_labels: [public, internal] + trust_zones: [public, internal] +path_rules: + - id: private-path + pattern: private/** + labels: [internal] + trust_zone: internal +``` + +Policy modes: + +- `off`: allow every result and emit allow decisions. +- `audit`: keep results but mark decisions that would have been denied. +- `enforce`: deny or redact results before they leave the boundary. + +Denied behavior: + +- `on_denied: drop` removes denied results. This is the default. +- `on_denied: redact` keeps the envelope but replaces text and value. + +Object labels can come from document frontmatter, policy metadata, or path +rules. Supported frontmatter shapes: + +```yaml +--- +labels: [internal] +policy: + labels: [internal] + trust_zone: internal +--- +``` + +Path rules augment frontmatter labels. This lets a repository declare that +everything under `private/**` is internal even if an individual document forgets +its frontmatter label. + +## CLI + +Check one decision: + +```text +mkt policy check public-agent query private/doc.md \ + --policy examples/policy/local-label-policy.yaml \ + --path private/doc.md +``` + +Filter local FTS search results: + +```text +mkt search Knowledge \ + --policy examples/policy/local-label-policy.yaml \ + --subject public-agent +``` + +Filter indexed query results: + +```text +mkt cache query 'sections[heading=Decision]' \ + --policy examples/policy/local-label-policy.yaml \ + --subject public-agent +``` + +JSON and YAML outputs include: + +- `policy`: mode, subject, action, allowed, denied, redacted, audit counts +- `policy_decisions`: per-result decisions with stable ids and reasons +- `diagnostics`: denied/redacted result diagnostics + +Text output shows a compact policy summary before the filtered matches. + +## Decision Logs + +Every local decision contains: + +- stable `decision_id` +- subject +- action +- object id +- effect: `allow`, `deny`, `redact`, or `audit_denied` +- reason +- mode +- rule id +- labels +- trust zone +- metadata, including object path and policy id + +`LocalLabelPolicyGateway.explain_decision(decision_id)` returns a decision made +by the current gateway instance. Persistent decision logs are intentionally left +to future backend storage. + +## Adapter Boundaries + +Relationship policies use `RelationshipPolicyAdapter`: + +```text +RelationshipPolicyRequest(subject, relation, object_id, namespace, context) + -> PolicyDecision +``` + +This is the attachment point for Zanzibar/OpenFGA/SpiceDB-style systems. + +Rule policies use `RulePolicyAdapter`: + +```text +RulePolicyRequest(subject, action, object, context, policy_id) + -> PolicyDecision +``` + +This is the attachment point for OPA/Rego, Cedar, or other +attribute/rule-based systems. + +Adapters must return the same `PolicyDecision` shape as the local label +gateway. That keeps query filtering, diagnostics, provenance, and future +context-package filtering independent from the concrete policy engine. + +## Extension Fit + +The local gateway is registered as `policy.local-label`. It is an internal +extension with no network dependency. Backends and workflows can request the +`policy` or `policy_filter` capability without importing an external service. + +The design intentionally stays close to Markdown: labels can live in document +frontmatter, path rules live in YAML, and external authorization languages are +extensions rather than replacements for the core contract. diff --git a/docs/backend-fabric.md b/docs/backend-fabric.md index 71e118e..87bdc05 100644 --- a/docs/backend-fabric.md +++ b/docs/backend-fabric.md @@ -136,6 +136,12 @@ Protocol interfaces are provided for: These are contracts for future implementations. They are intentionally light and do not force the current CLI through a persistent backend. +WP-0009 adds the first concrete `AccessPolicyGateway` implementation: +`LocalLabelPolicyGateway`. It supports labels, trust zones, path rules, enforce +and audit modes, denied-result diagnostics, redaction, and explainable decisions. +Relationship and rule-policy systems remain adapter protocols rather than core +dependencies. + ## CLI Read-only inspection commands: @@ -145,6 +151,7 @@ mkt backend list --path examples/backends mkt backend inspect local-sqlite-cache --path examples/backends --require snapshots --require provenance mkt backend snapshot-id docs/content-references.md mkt backend refresh-plan docs --state examples/backend-state/snapshot-state.yaml +mkt policy check public-agent query private/doc.md --policy examples/policy/local-label-policy.yaml --path private/doc.md ``` The existing `mkt cache status` remains the lightweight file-manifest change diff --git a/docs/internal-extension-framework.md b/docs/internal-extension-framework.md index 5f37ed3..58f7061 100644 --- a/docs/internal-extension-framework.md +++ b/docs/internal-extension-framework.md @@ -36,6 +36,7 @@ framework organizes how Markitect itself exposes and composes capabilities. | `validator` | schema, contract, section assertion | document/context in, diagnostics out | | `runtime` | context loader, form state, dynamic rules | document/contract/context in, diagnostics and state out | | `assessment-runner` | provider-neutral rubric execution | assessment request in, normalized result out | +| `policy-gateway` | local label gateway, future external auth adapters | subject/action/object in, decision or filtered results out | | `template-engine` | deterministic templates | template/data in, Markdown out | | `generation-adapter` | provider-neutral assisted generation | request in, generated candidate out | | `cli-group` | cache, backend, ref, class | command descriptors or registration hook | diff --git a/docs/local-index-backend.md b/docs/local-index-backend.md index 4960b24..f030442 100644 --- a/docs/local-index-backend.md +++ b/docs/local-index-backend.md @@ -36,12 +36,14 @@ Query indexed snapshots: ```text mkt cache query 'sections[heading=Decision]' --root . mkt cache query '$.headings[*].text' --engine jsonpath --root . +mkt cache query 'sections[heading=Decision]' --policy examples/policy/local-label-policy.yaml --subject public-agent ``` Search indexed section/block text: ```text mkt search SQLite --root . +mkt search SQLite --policy examples/policy/local-label-policy.yaml --subject public-agent ``` Inspect a parsed AST without using the cache: @@ -90,8 +92,19 @@ This is enough to recover the useful markitect-main idea of keeping parsed structure available for faster and richer query backends, while keeping the normal CLI usable without a cache. +## Policy-Aware Retrieval + +`mkt cache query` and `mkt search` can run with a local label policy before +results leave the local backend boundary. When `--policy` is supplied, Markitect +extracts labels and trust zones from document frontmatter and applies any path +rules in the policy file. JSON/YAML output includes policy decisions and +diagnostics. + +See `docs/access-control-policy-gateway.md` for the policy vocabulary and +adapter boundaries. + ## Future Work Follow-on backend work can now focus on richer dependency extraction from -references, transclusion, and literate chunks; access-controlled query gateways; -and larger-scale memory/context packages. +references, transclusion, and literate chunks; persistent decision logs; and +larger-scale memory/context packages. diff --git a/docs/workplan-planning-map.md b/docs/workplan-planning-map.md index 5c305f3..e58a7d3 100644 --- a/docs/workplan-planning-map.md +++ b/docs/workplan-planning-map.md @@ -37,7 +37,7 @@ and descriptions mirror the operational view. | `MKTT-WP-0013` | complete | done | `MKTT-WP-0003`, `MKTT-WP-0004`, `MKTT-WP-0006`, `MKTT-WP-0007`, `MKTT-WP-0010` | Internal extension framework is complete: characterization tests, canonical processing model, descriptors, registries, lifecycle callbacks, query-engine registry, built-in extension catalog, CLI command specs, and authoring guide. | | `MKTT-WP-0005` | complete | done | `MKTT-WP-0003`, `MKTT-WP-0004` | Runtime context, form state, dynamic rules, workflow integration, and provider-neutral assessment boundary are complete. | | `MKTT-WP-0011` | complete | done | `MKTT-WP-0003`; task-level triggers: `MKTT-WP-0010-T001`, `MKTT-WP-0010-T005` | Markdown dataflow workflow layer is complete: workflow standard, source collectors, binding model, deterministic steps, assisted boundary, safe outputs, CLI, docs, and examples. | -| `MKTT-WP-0009` | P2 | todo | `MKTT-WP-0006` | Establish access-control gateway before security-sensitive cache/context use. | +| `MKTT-WP-0009` | complete | done | `MKTT-WP-0006` | Access-controlled knowledge gateway is complete: local labels, trust zones, path rules, policy-aware cache query/search, decisions, diagnostics, and external adapter boundaries. | | `MKTT-WP-0012` | P3 | todo | `MKTT-WP-0004`, `MKTT-WP-0010`, `MKTT-WP-0011` | Future Quarkdown-inspired document function layer: reusable Markdown-native function calls over processors, references, contracts, workflows, and later assisted steps. | | `MKTT-WP-0008` | P3 | todo | `MKTT-WP-0006`, `MKTT-WP-0007`, `MKTT-WP-0009` | Agent working-memory cache after backend and policy floor are available. | diff --git a/examples/backends/local-sqlite-backend.md b/examples/backends/local-sqlite-backend.md index b9028e8..4c431c6 100644 --- a/examples/backends/local-sqlite-backend.md +++ b/examples/backends/local-sqlite-backend.md @@ -12,6 +12,7 @@ capabilities: - fts - sql - provenance + - policy - reference_graph - processor_results storage: diff --git a/examples/policy/local-label-policy.yaml b/examples/policy/local-label-policy.yaml new file mode 100644 index 0000000..334b322 --- /dev/null +++ b/examples/policy/local-label-policy.yaml @@ -0,0 +1,16 @@ +id: markitect-local-label-policy +mode: enforce +default_labels: [public] +default_subject: public-agent +subjects: + public-agent: + allowed_labels: [public] + trust_zones: [public] + internal-agent: + allowed_labels: [public, internal] + trust_zones: [public, internal] +path_rules: + - id: private-path + pattern: private/** + labels: [internal] + trust_zone: internal diff --git a/examples/policy/private/internal-note.md b/examples/policy/private/internal-note.md new file mode 100644 index 0000000..b8630b6 --- /dev/null +++ b/examples/policy/private/internal-note.md @@ -0,0 +1,11 @@ +--- +policy: + labels: [internal] + trust_zone: internal +--- + +# Internal Note + +## Decision + +Keep implementation-specific security review notes in the internal trust zone. diff --git a/examples/policy/public-note.md b/examples/policy/public-note.md new file mode 100644 index 0000000..df20ef9 --- /dev/null +++ b/examples/policy/public-note.md @@ -0,0 +1,11 @@ +--- +labels: [public] +policy: + trust_zone: public +--- + +# Public Note + +## Decision + +Share the policy gateway overview with every documentation contributor. diff --git a/src/markitect_tool/__init__.py b/src/markitect_tool/__init__.py index 6e4cc03..9463313 100644 --- a/src/markitect_tool/__init__.py +++ b/src/markitect_tool/__init__.py @@ -123,6 +123,20 @@ from markitect_tool.processor import ( discover_fenced_processors, run_fenced_processors, ) +from markitect_tool.policy import ( + LocalLabelPolicy, + LocalLabelPolicyGateway, + LocalPathPolicyRule, + PolicyDecision, + PolicyFilterResult, + PolicyObject, + PolicySubject, + RelationshipPolicyAdapter, + RelationshipPolicyRequest, + RulePolicyAdapter, + RulePolicyRequest, + policy_metadata_from_document, +) from markitect_tool.query import ( InvalidQueryError, QueryMatch, @@ -294,6 +308,18 @@ __all__ = [ "default_processor_registry", "discover_fenced_processors", "run_fenced_processors", + "LocalLabelPolicy", + "LocalLabelPolicyGateway", + "LocalPathPolicyRule", + "PolicyDecision", + "PolicyFilterResult", + "PolicyObject", + "PolicySubject", + "RelationshipPolicyAdapter", + "RelationshipPolicyRequest", + "RulePolicyAdapter", + "RulePolicyRequest", + "policy_metadata_from_document", "InvalidQueryError", "QueryMatch", "extract_document", diff --git a/src/markitect_tool/backend/local_store.py b/src/markitect_tool/backend/local_store.py index 99ee724..8853259 100644 --- a/src/markitect_tool/backend/local_store.py +++ b/src/markitect_tool/backend/local_store.py @@ -294,6 +294,32 @@ class LocalSnapshotStore: for row in rows ] + def search_with_policy( + self, + query: str, + *, + subject: str, + gateway: Any, + action: str = "search", + limit: int = 20, + context: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Search and apply a policy gateway before returning result rows.""" + + matches = [] + for result in self.search(query, limit=limit): + item = result.to_dict() + item["policy"] = self.policy_metadata(result.path) + matches.append(item) + return gateway.filter_results(subject, action, matches, context=context) + + def policy_metadata(self, path: str) -> dict[str, Any]: + """Return document-derived policy metadata for an indexed source path.""" + + from markitect_tool.policy import policy_metadata_from_document + + return policy_metadata_from_document(self.get_document(path), path=path) + def build( self, paths: list[str | Path], diff --git a/src/markitect_tool/cli/main.py b/src/markitect_tool/cli/main.py index 9ccf7a4..bb6c775 100644 --- a/src/markitect_tool/cli/main.py +++ b/src/markitect_tool/cli/main.py @@ -52,6 +52,7 @@ from markitect_tool.generation import ( from markitect_tool.literate import tangle_markdown, weave_markdown, write_tangle_files from markitect_tool.ops import IncludeError, compose_files, resolve_includes, transform_markdown from markitect_tool.processor import ProcessorContext, run_fenced_processors +from markitect_tool.policy import LocalLabelPolicyGateway from markitect_tool.query import ( InvalidQueryError, extract_document, @@ -727,6 +728,69 @@ def backend_refresh_plan( raise click.exceptions.Exit(1 if plan.dirty else 0) +@main.group() +def policy() -> None: + """Check local access policy decisions.""" + + +@policy.command("check") +@click.argument("subject") +@click.argument("action") +@click.argument("object_id") +@click.option( + "--policy", + "policy_file", + type=click.Path(exists=True, dir_okay=False, path_type=Path), + help="Local label policy file.", +) +@click.option("--label", "labels", multiple=True, help="Object policy label. May be repeated.") +@click.option("--path", "object_path", help="Object path for path ACL and path-label rules.") +@click.option("--trust-zone", help="Object trust zone.") +@click.option( + "--policy-mode", + type=click.Choice(["off", "audit", "enforce"], case_sensitive=False), + help="Override policy mode for this check.", +) +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "yaml", "text"], case_sensitive=False), + default="text", + show_default=True, +) +def policy_check( + subject: str, + action: str, + object_id: str, + policy_file: Path | None, + labels: tuple[str, ...], + object_path: str | None, + trust_zone: str | None, + policy_mode: str | None, + output_format: str, +) -> None: + """Authorize one subject/action/object tuple with local label policy.""" + + try: + gateway = _load_policy_gateway(policy_file, policy_mode) or LocalLabelPolicyGateway() + decision = gateway.authorize( + subject, + action, + object_id, + context={ + "object": { + "labels": list(labels), + "path": object_path, + "trust_zone": trust_zone, + } + }, + ) + except ValueError as exc: + raise click.ClickException(str(exc)) from exc + _emit_policy_result({"decision": decision}, output_format) + raise click.exceptions.Exit(0 if decision.get("allowed") else 1) + + @main.group("class") def class_group() -> None: """Resolve deterministic content classes.""" @@ -1028,6 +1092,18 @@ def cache_index( multiple=True, help="Restrict query to one or more indexed relative paths.", ) +@click.option( + "--policy", + "policy_file", + type=click.Path(exists=True, dir_okay=False, path_type=Path), + help="Local label policy file used to filter results.", +) +@click.option("--subject", default="anonymous", help="Policy subject id.") +@click.option( + "--policy-mode", + type=click.Choice(["off", "audit", "enforce"], case_sensitive=False), + help="Override policy mode for this query.", +) @click.option( "--engine", type=click.Choice(["selector", "jsonpath"], case_sensitive=False), @@ -1047,17 +1123,22 @@ def cache_query( root: Path, index_path: Path | None, paths: tuple[str, ...], + policy_file: Path | None, + subject: str, + policy_mode: str | None, engine: str, output_format: str, ) -> None: """Run a selector or JSONPath query over indexed document snapshots.""" store = LocalSnapshotStore(local_index_path_for(root, index_path)) + policy_gateway = _load_policy_gateway(policy_file, policy_mode) indexed_paths = sorted(paths or [state.path for state in store.load_state()]) all_matches = [] try: for indexed_path in indexed_paths: document = Document.from_dict(store.get_document(indexed_path)) + policy_metadata = store.policy_metadata(indexed_path) if policy_gateway else {} matches = ( query_document_jsonpath(document, selector) if engine == "jsonpath" @@ -1066,11 +1147,17 @@ def cache_query( for match in matches: item = match.to_dict() item["source_path"] = indexed_path + if policy_metadata: + item["policy"] = policy_metadata all_matches.append(item) except KeyError as exc: raise click.ClickException(str(exc)) from exc except InvalidQueryError as exc: raise click.ClickException(str(exc)) from exc + policy_result = None + if policy_gateway: + policy_result = policy_gateway.filter_results(subject, "query", all_matches) + all_matches = policy_result["results"] data = { "selector": selector, "engine": engine, @@ -1078,6 +1165,10 @@ def cache_query( "count": len(all_matches), "matches": all_matches, } + if policy_result: + data["policy"] = policy_result.get("policy") + data["policy_decisions"] = policy_result.get("decisions") + data["diagnostics"] = policy_result.get("diagnostics") _emit_query(data, output_format) @@ -1096,6 +1187,18 @@ def cache_query( help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.", ) @click.option("--limit", type=int, default=20, show_default=True) +@click.option( + "--policy", + "policy_file", + type=click.Path(exists=True, dir_okay=False, path_type=Path), + help="Local label policy file used to filter results.", +) +@click.option("--subject", default="anonymous", help="Policy subject id.") +@click.option( + "--policy-mode", + type=click.Choice(["off", "audit", "enforce"], case_sensitive=False), + help="Override policy mode for this search.", +) @click.option( "--format", "output_format", @@ -1108,21 +1211,39 @@ def search( root: Path, index_path: Path | None, limit: int, + policy_file: Path | None, + subject: str, + policy_mode: str | None, output_format: str, ) -> None: """Search the local SQLite index with FTS5.""" try: store = LocalSnapshotStore(local_index_path_for(root, index_path)) - results = store.search(text, limit=limit) + policy_gateway = _load_policy_gateway(policy_file, policy_mode) + if policy_gateway: + policy_result = store.search_with_policy( + text, + subject=subject, + gateway=policy_gateway, + limit=limit, + ) + matches = policy_result["results"] + else: + policy_result = None + matches = [result.to_dict() for result in store.search(text, limit=limit)] except ValueError as exc: raise click.ClickException(str(exc)) from exc data = { "query": text, "index_path": str(local_index_path_for(root, index_path)), - "count": len(results), - "matches": [result.to_dict() for result in results], + "count": len(matches), + "matches": matches, } + if policy_result: + data["policy"] = policy_result.get("policy") + data["policy_decisions"] = policy_result.get("decisions") + data["diagnostics"] = policy_result.get("diagnostics") _emit_search_results(data, output_format) @@ -1529,6 +1650,20 @@ def contract_form_state( raise click.exceptions.Exit(0 if form_state.valid else 1) +def _load_policy_gateway( + policy_file: Path | None, + policy_mode: str | None, +) -> LocalLabelPolicyGateway | None: + if policy_file is None and policy_mode is None: + return None + try: + if policy_file: + return LocalLabelPolicyGateway.from_file(policy_file, mode=policy_mode) + return LocalLabelPolicyGateway(mode=policy_mode) + except ValueError as exc: + raise click.ClickException(str(exc)) from exc + + def _emit_result(data: dict, output_format: str) -> None: if output_format == "json": click.echo(json.dumps(data, indent=2, ensure_ascii=False)) @@ -1588,6 +1723,19 @@ def _emit_form_state(data: dict, output_format: str) -> None: ) +def _emit_policy_result(data: dict, output_format: str) -> None: + if output_format == "json": + click.echo(json.dumps(data, indent=2, ensure_ascii=False)) + elif output_format == "yaml": + click.echo(yaml.safe_dump(data, sort_keys=False)) + else: + decision = data["decision"] + click.echo("allowed" if decision.get("allowed") else "denied") + click.echo(f"effect: {decision.get('effect')}") + click.echo(f"decision_id: {decision.get('decision_id')}") + click.echo(f"reason: {decision.get('reason')}") + + def _emit_metrics(data: dict, output_format: str) -> None: if output_format == "json": click.echo(json.dumps(data, indent=2, ensure_ascii=False)) @@ -1615,11 +1763,15 @@ def _emit_query(data: dict, output_format: str) -> None: click.echo(yaml.safe_dump(data, sort_keys=False)) else: click.echo(f"{data['count']} match(es)") + if data.get("policy"): + _emit_policy_summary(data["policy"]) for match in data["matches"]: location = f":{match['line']}" if match.get("line") else "" click.echo(f"- {match['kind']} {match['path']}{location}") if match.get("text"): click.echo(f" {match['text'].splitlines()[0]}") + for diagnostic in data.get("diagnostics", []): + click.echo(f"! [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}") def _emit_extract(data: dict, output_format: str) -> None: @@ -1709,6 +1861,8 @@ def _emit_search_results(data: dict, output_format: str) -> None: click.echo(yaml.safe_dump(data, sort_keys=False)) else: click.echo(f"{data['count']} match(es)") + if data.get("policy"): + _emit_policy_summary(data["policy"]) for match in data["matches"]: span = "" if match.get("line_start"): @@ -1720,6 +1874,19 @@ def _emit_search_results(data: dict, output_format: str) -> None: preview = " ".join(str(match.get("text", "")).split()) if preview: click.echo(f" {preview[:160]}") + for diagnostic in data.get("diagnostics", []): + click.echo(f"! [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}") + + +def _emit_policy_summary(policy_data: dict) -> None: + click.echo( + "policy: " + f"mode={policy_data.get('mode')} " + f"subject={policy_data.get('subject')} " + f"allowed={policy_data.get('allowed', 0)} " + f"denied={policy_data.get('denied', 0)} " + f"redacted={policy_data.get('redacted', 0)}" + ) def _emit_workflow_result(data: dict, output_format: str) -> None: diff --git a/src/markitect_tool/extension/builtins.py b/src/markitect_tool/extension/builtins.py index 8477209..cfde167 100644 --- a/src/markitect_tool/extension/builtins.py +++ b/src/markitect_tool/extension/builtins.py @@ -17,6 +17,7 @@ def builtin_extension_registry() -> ExtensionRegistry: _runtime_context_descriptor(), _runtime_form_state_descriptor(), _runtime_assessment_descriptor(), + _local_label_policy_descriptor(), ]: registry.register(descriptor) return registry @@ -86,6 +87,7 @@ def _local_sqlite_backend_descriptor() -> ExtensionDescriptor: ProcessingCapability(id="fts", kind="backend"), ProcessingCapability(id="sql", kind="backend"), ProcessingCapability(id="provenance", kind="backend"), + ProcessingCapability(id="policy_filter", kind="backend"), ], safety={"reads_files": True, "writes_local_cache": True, "network": False}, input_contract="Markdown files/directories", @@ -188,3 +190,37 @@ def _runtime_assessment_descriptor() -> ExtensionDescriptor: examples=["examples/runtime/concept-note-assessment.contract.md"], metadata={"provider_implementation": "external adapter required"}, ) + + +def _local_label_policy_descriptor() -> ExtensionDescriptor: + return ExtensionDescriptor( + id="policy.local-label", + kind="policy-gateway", + summary="Local label, trust-zone, and path policy gateway.", + capabilities=[ + ProcessingCapability(id="policy", kind="authorize"), + ProcessingCapability(id="policy_filter", kind="filter"), + ProcessingCapability(id="diagnostics", kind="emit"), + ProcessingCapability(id="provenance", kind="emit"), + ], + safety={"network": False, "external_policy_engine": False}, + input_contract="PolicySubject + PolicyObject + local label policy", + output_contract="PolicyDecision | PolicyFilterResult", + diagnostics_namespace="policy", + provenance_prefix="policy.local_label", + cli={ + "commands": [ + "mkt policy check", + "mkt cache query --policy", + "mkt search --policy", + ] + }, + docs=["docs/access-control-policy-gateway.md"], + examples=["examples/policy/local-label-policy.yaml"], + metadata={ + "external_adapters": [ + "RelationshipPolicyAdapter", + "RulePolicyAdapter", + ] + }, + ) diff --git a/src/markitect_tool/policy/__init__.py b/src/markitect_tool/policy/__init__.py new file mode 100644 index 0000000..139f27f --- /dev/null +++ b/src/markitect_tool/policy/__init__.py @@ -0,0 +1,35 @@ +"""Access policy gateways and adapter protocols.""" + +from markitect_tool.policy.adapters import ( + RelationshipPolicyAdapter, + RelationshipPolicyRequest, + RulePolicyAdapter, + RulePolicyRequest, +) +from markitect_tool.policy.local import ( + LocalLabelPolicy, + LocalLabelPolicyGateway, + LocalPathPolicyRule, + policy_metadata_from_document, +) +from markitect_tool.policy.models import ( + PolicyDecision, + PolicyFilterResult, + PolicyObject, + PolicySubject, +) + +__all__ = [ + "LocalLabelPolicy", + "LocalLabelPolicyGateway", + "LocalPathPolicyRule", + "PolicyDecision", + "PolicyFilterResult", + "PolicyObject", + "PolicySubject", + "RelationshipPolicyAdapter", + "RelationshipPolicyRequest", + "RulePolicyAdapter", + "RulePolicyRequest", + "policy_metadata_from_document", +] diff --git a/src/markitect_tool/policy/adapters.py b/src/markitect_tool/policy/adapters.py new file mode 100644 index 0000000..8f9a3a3 --- /dev/null +++ b/src/markitect_tool/policy/adapters.py @@ -0,0 +1,65 @@ +"""Protocol boundaries for external authorization engines.""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from typing import Any, Protocol + +from markitect_tool.policy.models import PolicyDecision + + +@dataclass(frozen=True) +class RelationshipPolicyRequest: + """Relationship-based authorization request. + + This maps cleanly to Zanzibar/OpenFGA/SpiceDB-style checks without binding + Markitect core to one service or tuple schema. + """ + + subject: str + relation: str + object_id: str + namespace: str | None = None + context: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty(asdict(self)) + + +class RelationshipPolicyAdapter(Protocol): + """Adapter boundary for relationship authorization systems.""" + + def check(self, request: RelationshipPolicyRequest) -> PolicyDecision | dict[str, Any]: + """Return a policy decision for a relationship check.""" + + +@dataclass(frozen=True) +class RulePolicyRequest: + """Attribute/rule policy evaluation request. + + This can be mapped to OPA/Rego, Cedar, or local policy-as-data engines. + """ + + subject: dict[str, Any] + action: str + object: dict[str, Any] + context: dict[str, Any] = field(default_factory=dict) + policy_id: str | None = None + + def to_dict(self) -> dict[str, Any]: + return _drop_empty(asdict(self)) + + +class RulePolicyAdapter(Protocol): + """Adapter boundary for rule/attribute policy systems.""" + + def evaluate(self, request: RulePolicyRequest) -> PolicyDecision | dict[str, Any]: + """Return a policy decision for a rule evaluation.""" + + +def _drop_empty(data: dict[str, Any]) -> dict[str, Any]: + return { + key: value + for key, value in data.items() + if value not in (None, [], {}, "") + } diff --git a/src/markitect_tool/policy/local.py b/src/markitect_tool/policy/local.py new file mode 100644 index 0000000..73be912 --- /dev/null +++ b/src/markitect_tool/policy/local.py @@ -0,0 +1,482 @@ +"""Local label policy gateway for cache, query, and context-package results.""" + +from __future__ import annotations + +import fnmatch +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +from markitect_tool.diagnostics import Diagnostic +from markitect_tool.policy.models import ( + PolicyDecision, + PolicyFilterResult, + PolicyObject, + PolicySubject, +) + + +POLICY_MODES = {"off", "audit", "enforce"} +DENIED_BEHAVIOR = {"drop", "redact"} + + +@dataclass(frozen=True) +class LocalPathPolicyRule: + """Path rule that can add labels, set trust zone, or deny directly.""" + + pattern: str + labels: list[str] = field(default_factory=list) + trust_zone: str | None = None + deny: bool = False + id: str | None = None + + @classmethod + def from_mapping(cls, raw: dict[str, Any], *, fallback_id: str) -> "LocalPathPolicyRule": + return cls( + pattern=str(raw.get("pattern") or raw.get("glob") or raw.get("path") or "*"), + labels=_string_list(raw.get("labels") or raw.get("label")), + trust_zone=raw.get("trust_zone") or raw.get("zone"), + deny=bool(raw.get("deny", False)), + id=raw.get("id") or fallback_id, + ) + + def matches(self, path: str | None) -> bool: + return bool(path) and fnmatch.fnmatch(path, self.pattern) + + +@dataclass(frozen=True) +class LocalLabelPolicy: + """Declarative local policy for labels, trust zones, and path ACLs.""" + + id: str = "local-label-policy" + mode: str = "enforce" + default_labels: list[str] = field(default_factory=lambda: ["public"]) + default_trust_zone: str | None = None + default_subject: str = "anonymous" + on_denied: str = "drop" + subjects: dict[str, PolicySubject] = field(default_factory=dict) + path_rules: list[LocalPathPolicyRule] = field(default_factory=list) + metadata: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_mapping(cls, raw: dict[str, Any]) -> "LocalLabelPolicy": + policy = raw.get("policy") if isinstance(raw.get("policy"), dict) else raw + subjects = _subjects_from_mapping(policy.get("subjects")) + default_subject = str(policy.get("default_subject", "anonymous")) + if default_subject not in subjects: + subjects[default_subject] = PolicySubject( + id=default_subject, + allowed_labels=_string_list( + policy.get("default_allowed_labels") or policy.get("default_labels") or ["public"] + ), + trust_zones=_string_list(policy.get("default_trust_zones")), + ) + mode = str(policy.get("mode", "enforce")).strip().lower() + if mode not in POLICY_MODES: + mode = "enforce" + on_denied = str(policy.get("on_denied", "drop")).strip().lower() + if on_denied not in DENIED_BEHAVIOR: + on_denied = "drop" + return cls( + id=str(policy.get("id", "local-label-policy")), + mode=mode, + default_labels=_string_list(policy.get("default_labels") or ["public"]), + default_trust_zone=policy.get("default_trust_zone"), + default_subject=default_subject, + on_denied=on_denied, + subjects=subjects, + path_rules=_path_rules_from_value(policy.get("path_rules") or policy.get("paths")), + metadata=dict(policy.get("metadata") or {}), + ) + + @classmethod + def from_file(cls, path: str | Path) -> "LocalLabelPolicy": + policy_path = Path(path) + data = yaml.safe_load(policy_path.read_text(encoding="utf-8")) or {} + if not isinstance(data, dict): + raise ValueError("Policy file must contain a mapping.") + return cls.from_mapping(data) + + +class LocalLabelPolicyGateway: + """AccessPolicyGateway implementation for local label policies.""" + + gateway_id = "policy.local-label" + + def __init__( + self, + policy: LocalLabelPolicy | dict[str, Any] | None = None, + *, + mode: str | None = None, + ) -> None: + if isinstance(policy, LocalLabelPolicy): + loaded = policy + elif isinstance(policy, dict): + loaded = LocalLabelPolicy.from_mapping(policy) + else: + loaded = LocalLabelPolicy() + if mode: + normalized = mode.strip().lower() + if normalized not in POLICY_MODES: + raise ValueError(f"Unsupported policy mode `{mode}`.") + loaded = LocalLabelPolicy( + id=loaded.id, + mode=normalized, + default_labels=loaded.default_labels, + default_trust_zone=loaded.default_trust_zone, + default_subject=loaded.default_subject, + on_denied=loaded.on_denied, + subjects=loaded.subjects, + path_rules=loaded.path_rules, + metadata=loaded.metadata, + ) + self.policy = loaded + self._decisions: dict[str, PolicyDecision] = {} + + @classmethod + def from_file( + cls, + path: str | Path, + *, + mode: str | None = None, + ) -> "LocalLabelPolicyGateway": + return cls(LocalLabelPolicy.from_file(path), mode=mode) + + def authorize( + self, + subject: str, + action: str, + object_id: str, + context: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Authorize one action against one object.""" + + decision = self.decide(subject, action, object_id, context=context) + return decision.to_dict() + + def decide( + self, + subject: str, + action: str, + object_id: str, + context: dict[str, Any] | None = None, + ) -> PolicyDecision: + subject_model = self._subject(subject, context) + object_model = self._object(object_id, context) + decision = self._evaluate(subject_model, action, object_model) + self._decisions[decision.decision_id] = decision + return decision + + def filter_results( + self, + subject: str, + action: str, + results: list[dict[str, Any]], + context: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Filter results and return policy decisions.""" + + kept: list[dict[str, Any]] = [] + diagnostics: list[Diagnostic] = [] + decisions: list[PolicyDecision] = [] + for index, item in enumerate(results): + object_id = _object_id_for_result(item, index) + item_context = dict(context or {}) + item_context["result"] = item + item_context.setdefault("object", {}).update(_policy_object_mapping(item)) + decision = self.decide(subject, action, object_id, context=item_context) + decisions.append(decision) + + annotated = _annotate_result(item, decision) + if decision.effect == "allow" or decision.effect == "audit_denied": + kept.append(annotated) + elif decision.effect == "redact": + kept.append(_redact_result(annotated)) + diagnostics.append(_denied_diagnostic(decision, redacted=True)) + else: + diagnostics.append(_denied_diagnostic(decision)) + + result = PolicyFilterResult( + results=kept, + decisions=decisions, + diagnostics=diagnostics, + mode=self.policy.mode, + subject=subject, + action=action, + ) + return result.to_dict() + + def explain_decision(self, decision_id: str) -> dict[str, Any]: + """Explain one policy decision made by this gateway instance.""" + + try: + return self._decisions[decision_id].to_dict() + except KeyError as exc: + raise KeyError(f"Unknown policy decision `{decision_id}`") from exc + + def _subject(self, subject: str, context: dict[str, Any] | None) -> PolicySubject: + if context and isinstance(context.get("subject"), dict): + merged = self.policy.subjects.get(subject) or self.policy.subjects[self.policy.default_subject] + override = context["subject"] + return PolicySubject( + id=subject, + allowed_labels=_unique(merged.allowed_labels + _string_list(override.get("allowed_labels") or override.get("labels"))), + trust_zones=_unique(merged.trust_zones + _string_list(override.get("trust_zones") or override.get("zones"))), + roles=_unique(merged.roles + _string_list(override.get("roles"))), + allowed_actions=_unique(merged.allowed_actions + _string_list(override.get("allowed_actions") or override.get("actions"))), + path_allow=_unique(merged.path_allow + _string_list(override.get("path_allow") or override.get("allow_paths"))), + path_deny=_unique(merged.path_deny + _string_list(override.get("path_deny") or override.get("deny_paths"))), + attributes=merged.attributes | dict(override.get("attributes") or {}), + ) + return self.policy.subjects.get(subject) or self.policy.subjects[self.policy.default_subject] + + def _object(self, object_id: str, context: dict[str, Any] | None) -> PolicyObject: + raw_object = context.get("object", {}) if context else {} + result = context.get("result", {}) if context else {} + path = raw_object.get("path") or result.get("source_path") or result.get("path") + labels = _unique( + self.policy.default_labels + + _string_list(raw_object.get("labels") or raw_object.get("label")) + + _string_list(result.get("labels")) + ) + trust_zone = raw_object.get("trust_zone") or self.policy.default_trust_zone + deny_by_path = False + matched_rules: list[str] = [] + for rule in self.policy.path_rules: + if not rule.matches(path): + continue + matched_rules.append(rule.id or rule.pattern) + labels = _unique(labels + rule.labels) + trust_zone = rule.trust_zone or trust_zone + deny_by_path = deny_by_path or rule.deny + attributes = dict(raw_object.get("attributes") or {}) + attributes["matched_path_rules"] = matched_rules + attributes["deny_by_path_rule"] = deny_by_path + return PolicyObject( + id=object_id, + path=path, + labels=labels, + trust_zone=trust_zone, + attributes=attributes, + ) + + def _evaluate( + self, + subject: PolicySubject, + action: str, + policy_object: PolicyObject, + ) -> PolicyDecision: + if self.policy.mode == "off": + return self._decision(subject, action, policy_object, "allow", "policy mode is off") + + denial_reason, rule_id = self._denial_reason(subject, action, policy_object) + if denial_reason is None: + return self._decision(subject, action, policy_object, "allow", "label policy allowed") + + if self.policy.mode == "audit": + return self._decision(subject, action, policy_object, "audit_denied", denial_reason, rule_id) + if self.policy.on_denied == "redact": + return self._decision(subject, action, policy_object, "redact", denial_reason, rule_id) + return self._decision(subject, action, policy_object, "deny", denial_reason, rule_id) + + def _denial_reason( + self, + subject: PolicySubject, + action: str, + policy_object: PolicyObject, + ) -> tuple[str | None, str | None]: + if policy_object.attributes.get("deny_by_path_rule"): + return "object path is denied by local path policy", "path.deny" + if subject.allowed_actions and action not in subject.allowed_actions: + return f"subject `{subject.id}` is not allowed to perform `{action}`", "subject.action" + if policy_object.path: + if any(fnmatch.fnmatch(policy_object.path, pattern) for pattern in subject.path_deny): + return "object path is denied for subject", "subject.path_deny" + if subject.path_allow and not any( + fnmatch.fnmatch(policy_object.path, pattern) for pattern in subject.path_allow + ): + return "object path is outside subject allow list", "subject.path_allow" + missing_labels = sorted(set(policy_object.labels) - set(subject.allowed_labels)) + if missing_labels: + return ( + f"subject `{subject.id}` lacks labels {missing_labels}", + "labels", + ) + if policy_object.trust_zone and subject.trust_zones and policy_object.trust_zone not in subject.trust_zones: + return ( + f"subject `{subject.id}` is outside trust zone `{policy_object.trust_zone}`", + "trust_zone", + ) + return None, None + + def _decision( + self, + subject: PolicySubject, + action: str, + policy_object: PolicyObject, + effect: str, + reason: str, + rule_id: str | None = None, + ) -> PolicyDecision: + return PolicyDecision( + subject=subject.id, + action=action, + object_id=policy_object.id, + effect=effect, + reason=reason, + mode=self.policy.mode, + rule_id=rule_id, + labels=policy_object.labels, + trust_zone=policy_object.trust_zone, + metadata={"path": policy_object.path, "policy_id": self.policy.id}, + ) + + +def policy_metadata_from_document( + document: dict[str, Any], + *, + path: str | None = None, +) -> dict[str, Any]: + """Extract stable policy metadata from parsed document frontmatter.""" + + frontmatter = document.get("frontmatter", {}) if isinstance(document, dict) else {} + policy = frontmatter.get("policy") if isinstance(frontmatter.get("policy"), dict) else {} + labels = _unique( + _string_list(policy.get("labels") or policy.get("label")) + + _string_list(frontmatter.get("labels") or frontmatter.get("label")) + + _string_list(frontmatter.get("classification")) + ) + data = { + "path": path or document.get("source_path"), + "labels": labels, + "trust_zone": policy.get("trust_zone") or policy.get("zone") or frontmatter.get("trust_zone"), + "attributes": { + "document_type": frontmatter.get("document_type") or frontmatter.get("type"), + "title": frontmatter.get("title"), + }, + } + return {key: value for key, value in data.items() if value not in (None, [], {})} + + +def _subjects_from_mapping(value: Any) -> dict[str, PolicySubject]: + if not isinstance(value, dict): + return {} + return { + str(subject_id): PolicySubject( + id=str(subject_id), + allowed_labels=_string_list(raw.get("allowed_labels") or raw.get("labels") or raw.get("clearance")) + if isinstance(raw, dict) + else [], + trust_zones=_string_list(raw.get("trust_zones") or raw.get("zones")) + if isinstance(raw, dict) + else [], + roles=_string_list(raw.get("roles")) if isinstance(raw, dict) else [], + allowed_actions=_string_list(raw.get("allowed_actions") or raw.get("actions")) + if isinstance(raw, dict) + else [], + path_allow=_string_list(raw.get("path_allow") or raw.get("allow_paths")) + if isinstance(raw, dict) + else [], + path_deny=_string_list(raw.get("path_deny") or raw.get("deny_paths")) + if isinstance(raw, dict) + else [], + attributes=dict(raw.get("attributes") or {}) if isinstance(raw, dict) else {}, + ) + for subject_id, raw in value.items() + } + + +def _path_rules_from_value(value: Any) -> list[LocalPathPolicyRule]: + if value is None: + return [] + if isinstance(value, dict): + return [ + LocalPathPolicyRule.from_mapping(raw if isinstance(raw, dict) else {"pattern": pattern}, fallback_id=str(pattern)) + for pattern, raw in value.items() + ] + if isinstance(value, list): + return [ + LocalPathPolicyRule.from_mapping(raw, fallback_id=f"path-{index + 1}") + for index, raw in enumerate(value) + if isinstance(raw, dict) + ] + return [] + + +def _policy_object_mapping(item: dict[str, Any]) -> dict[str, Any]: + policy = item.get("policy") if isinstance(item.get("policy"), dict) else {} + return { + "path": policy.get("path") or item.get("source_path") or item.get("path"), + "labels": policy.get("labels") or item.get("labels"), + "trust_zone": policy.get("trust_zone"), + "attributes": policy.get("attributes", {}), + } + + +def _object_id_for_result(item: dict[str, Any], index: int) -> str: + path = item.get("source_path") or item.get("path") or "" + unit = item.get("path") if item.get("source_path") else item.get("unit_index", index) + return f"{path}#{unit}" + + +def _annotate_result(item: dict[str, Any], decision: PolicyDecision) -> dict[str, Any]: + annotated = dict(item) + policy = dict(annotated.get("policy") or {}) + policy.update( + { + "decision_id": decision.decision_id, + "effect": decision.effect, + "labels": decision.labels, + "trust_zone": decision.trust_zone, + } + ) + annotated["policy"] = {key: value for key, value in policy.items() if value not in (None, [], {})} + return annotated + + +def _redact_result(item: dict[str, Any]) -> dict[str, Any]: + redacted = dict(item) + if "text" in redacted: + redacted["text"] = "[redacted by policy]" + if "value" in redacted: + redacted["value"] = None + policy = dict(redacted.get("policy") or {}) + policy["redacted"] = True + redacted["policy"] = policy + return redacted + + +def _denied_diagnostic(decision: PolicyDecision, *, redacted: bool = False) -> Diagnostic: + return Diagnostic( + severity="warning", + code="policy.result.redacted" if redacted else "policy.result.denied", + message=( + f"Policy redacted `{decision.object_id}`: {decision.reason}" + if redacted + else f"Policy denied `{decision.object_id}`: {decision.reason}" + ), + rule_id=decision.rule_id, + details={"decision_id": decision.decision_id, "effect": decision.effect}, + ) + + +def _string_list(value: Any) -> list[str]: + if value is None: + return [] + if isinstance(value, list): + return [str(item) for item in value if item is not None] + return [str(value)] + + +def _unique(values: list[str]) -> list[str]: + seen: set[str] = set() + result: list[str] = [] + for value in values: + normalized = str(value).strip() + key = normalized.lower() + if normalized and key not in seen: + result.append(normalized) + seen.add(key) + return result diff --git a/src/markitect_tool/policy/models.py b/src/markitect_tool/policy/models.py new file mode 100644 index 0000000..08a9878 --- /dev/null +++ b/src/markitect_tool/policy/models.py @@ -0,0 +1,149 @@ +"""Policy gateway models shared by local and external policy adapters.""" + +from __future__ import annotations + +import hashlib +import json +from dataclasses import asdict, dataclass, field +from typing import Any + + +@dataclass(frozen=True) +class PolicySubject: + """Actor asking to read, query, search, or package knowledge.""" + + id: str + allowed_labels: list[str] = field(default_factory=list) + trust_zones: list[str] = field(default_factory=list) + roles: list[str] = field(default_factory=list) + allowed_actions: list[str] = field(default_factory=list) + path_allow: list[str] = field(default_factory=list) + path_deny: list[str] = field(default_factory=list) + attributes: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty(asdict(self)) + + +@dataclass(frozen=True) +class PolicyObject: + """Knowledge object considered by a policy decision.""" + + id: str + path: str | None = None + labels: list[str] = field(default_factory=list) + trust_zone: str | None = None + attributes: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty(asdict(self)) + + +@dataclass(frozen=True) +class PolicyDecision: + """Explainable policy decision for one subject/action/object tuple.""" + + subject: str + action: str + object_id: str + effect: str + reason: str + mode: str = "enforce" + rule_id: str | None = None + labels: list[str] = field(default_factory=list) + trust_zone: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + @property + def allowed(self) -> bool: + return self.effect in {"allow", "audit_denied"} + + @property + def denied(self) -> bool: + return self.effect in {"deny", "audit_denied"} + + @property + def redacted(self) -> bool: + return self.effect == "redact" + + @property + def decision_id(self) -> str: + payload = { + "subject": self.subject, + "action": self.action, + "object_id": self.object_id, + "effect": self.effect, + "reason": self.reason, + "mode": self.mode, + "rule_id": self.rule_id, + "labels": self.labels, + "trust_zone": self.trust_zone, + "metadata": self.metadata, + } + return "policy:" + hashlib.sha256( + json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str).encode("utf-8") + ).hexdigest() + + def to_dict(self) -> dict[str, Any]: + data = asdict(self) + data["decision_id"] = self.decision_id + data["allowed"] = self.allowed + return _drop_empty(data) + + +@dataclass(frozen=True) +class PolicyFilterResult: + """Results after policy filtering plus decision and diagnostic metadata.""" + + results: list[dict[str, Any]] + decisions: list[PolicyDecision] = field(default_factory=list) + diagnostics: list[Any] = field(default_factory=list) + mode: str = "enforce" + subject: str | None = None + action: str | None = None + + @property + def filtered(self) -> bool: + return any(decision.denied for decision in self.decisions if not decision.allowed) + + @property + def denied_count(self) -> int: + return sum(1 for decision in self.decisions if decision.effect == "deny") + + @property + def redacted_count(self) -> int: + return sum(1 for decision in self.decisions if decision.effect == "redact") + + @property + def audit_denied_count(self) -> int: + return sum(1 for decision in self.decisions if decision.effect == "audit_denied") + + def to_dict(self) -> dict[str, Any]: + data = { + "results": self.results, + "decisions": [decision.to_dict() for decision in self.decisions], + "diagnostics": [ + diagnostic.to_dict() if hasattr(diagnostic, "to_dict") else diagnostic + for diagnostic in self.diagnostics + ], + "policy": { + "mode": self.mode, + "subject": self.subject, + "action": self.action, + "filtered": self.filtered, + "allowed": sum(1 for decision in self.decisions if decision.effect == "allow"), + "denied": self.denied_count, + "redacted": self.redacted_count, + "audit_denied": self.audit_denied_count, + "total_decisions": len(self.decisions), + }, + } + return _drop_empty(data) + + +def _drop_empty(data: dict[str, Any]) -> dict[str, Any]: + return { + key: value + for key, value in data.items() + if value not in (None, [], {}, "") + } diff --git a/tests/test_builtin_extension_catalog.py b/tests/test_builtin_extension_catalog.py index d4dc619..1c38405 100644 --- a/tests/test_builtin_extension_catalog.py +++ b/tests/test_builtin_extension_catalog.py @@ -17,6 +17,7 @@ def test_builtin_extension_registry_lists_query_processors_and_backend(): assert "runtime.context" in ids assert "runtime.form-state" in ids assert "runtime.assessment" in ids + assert "policy.local-label" in ids def test_builtin_processor_descriptors_capture_safety_and_provenance(): @@ -81,3 +82,18 @@ def test_builtin_runtime_descriptors_expose_boundaries(): assert {capability.id for capability in form_state.capabilities} >= {"forms", "rules"} assert assessment.kind == "assessment-runner" assert assessment.safety["provider_calls"] == "adapter-only" + + +def test_builtin_policy_descriptor_exposes_cli_and_adapter_boundary(): + registry = builtin_extension_registry() + + descriptor = registry.get("policy.local-label") + + assert descriptor.kind == "policy-gateway" + assert descriptor.safety["network"] is False + assert {capability.id for capability in descriptor.capabilities} >= { + "policy", + "policy_filter", + } + assert "mkt policy check" in descriptor.cli["commands"] + assert "RelationshipPolicyAdapter" in descriptor.metadata["external_adapters"] diff --git a/tests/test_policy_gateway.py b/tests/test_policy_gateway.py new file mode 100644 index 0000000..d16010b --- /dev/null +++ b/tests/test_policy_gateway.py @@ -0,0 +1,211 @@ +import json +from pathlib import Path + +from click.testing import CliRunner + +from markitect_tool.cli import main +from markitect_tool.policy import LocalLabelPolicy, LocalLabelPolicyGateway + + +POLICY_TEXT = """id: example-policy +mode: enforce +default_labels: [public] +default_subject: public-agent +subjects: + public-agent: + allowed_labels: [public] + trust_zones: [public] + internal-agent: + allowed_labels: [public, internal] + trust_zones: [public, internal] +path_rules: + - id: private-path + pattern: private/** + labels: [internal] + trust_zone: internal +""" + + +def test_local_label_policy_authorizes_by_labels_and_path_rules(): + gateway = LocalLabelPolicyGateway(LocalLabelPolicy.from_mapping(_policy_mapping())) + + public = gateway.authorize( + "public-agent", + "query", + "public.md", + context={"object": {"path": "public.md", "labels": ["public"], "trust_zone": "public"}}, + ) + private = gateway.authorize( + "public-agent", + "query", + "private/doc.md", + context={"object": {"path": "private/doc.md"}}, + ) + internal = gateway.authorize( + "internal-agent", + "query", + "private/doc.md", + context={"object": {"path": "private/doc.md"}}, + ) + + assert public["allowed"] is True + assert private["allowed"] is False + assert private["effect"] == "deny" + assert "lacks labels" in private["reason"] + assert internal["allowed"] is True + + +def test_policy_filter_can_redact_denied_results(): + policy = LocalLabelPolicy.from_mapping(_policy_mapping() | {"on_denied": "redact"}) + gateway = LocalLabelPolicyGateway(policy) + + result = gateway.filter_results( + "public-agent", + "search", + [ + {"path": "public.md", "text": "Visible", "policy": {"labels": ["public"]}}, + {"path": "private/doc.md", "text": "Secret"}, + ], + ) + + assert result["policy"]["redacted"] == 1 + assert len(result["results"]) == 2 + assert result["results"][1]["text"] == "[redacted by policy]" + assert result["diagnostics"][0]["code"] == "policy.result.redacted" + + +def test_policy_audit_mode_keeps_results_but_records_would_deny(): + gateway = LocalLabelPolicyGateway(LocalLabelPolicy.from_mapping(_policy_mapping()), mode="audit") + + result = gateway.filter_results( + "public-agent", + "query", + [{"path": "private/doc.md", "text": "Internal"}], + ) + + assert len(result["results"]) == 1 + assert result["policy"]["audit_denied"] == 1 + assert result["results"][0]["policy"]["effect"] == "audit_denied" + + +def test_mkt_policy_check_reports_denied_decision(tmp_path: Path): + policy_file = tmp_path / "policy.yaml" + policy_file.write_text(POLICY_TEXT, encoding="utf-8") + + result = CliRunner().invoke( + main, + [ + "policy", + "check", + "public-agent", + "query", + "private/doc.md", + "--policy", + str(policy_file), + "--path", + "private/doc.md", + ], + ) + + assert result.exit_code == 1 + assert "denied" in result.output + assert "lacks labels" in result.output + + +def test_mkt_search_filters_local_index_results_by_policy(tmp_path: Path): + policy_file = tmp_path / "policy.yaml" + private_dir = tmp_path / "private" + private_dir.mkdir() + policy_file.write_text(POLICY_TEXT, encoding="utf-8") + (tmp_path / "public.md").write_text("# Public\n\nKnowledge for everyone.\n", encoding="utf-8") + (private_dir / "restricted.md").write_text( + "# Restricted\n\nKnowledge for internal work.\n", + encoding="utf-8", + ) + runner = CliRunner() + + indexed = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)]) + result = runner.invoke( + main, + [ + "search", + "Knowledge", + "--root", + str(tmp_path), + "--policy", + str(policy_file), + "--subject", + "public-agent", + "--format", + "json", + ], + ) + data = json.loads(result.output) + + assert indexed.exit_code == 0 + assert result.exit_code == 0 + assert data["count"] >= 1 + assert all("private/restricted.md" != match["path"] for match in data["matches"]) + assert data["policy"]["denied"] >= 1 + + +def test_mkt_cache_query_filters_indexed_documents_by_policy(tmp_path: Path): + policy_file = tmp_path / "policy.yaml" + private_dir = tmp_path / "private" + private_dir.mkdir() + policy_file.write_text(POLICY_TEXT, encoding="utf-8") + (tmp_path / "public.md").write_text("# Public\n\n## Decision\n\nShare it.\n", encoding="utf-8") + (private_dir / "restricted.md").write_text( + "# Restricted\n\n## Decision\n\nKeep it internal.\n", + encoding="utf-8", + ) + runner = CliRunner() + + indexed = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)]) + result = runner.invoke( + main, + [ + "cache", + "query", + "sections[heading=Decision]", + "--root", + str(tmp_path), + "--policy", + str(policy_file), + "--subject", + "public-agent", + "--format", + "json", + ], + ) + data = json.loads(result.output) + + assert indexed.exit_code == 0 + assert result.exit_code == 0 + assert data["count"] == 1 + assert data["matches"][0]["source_path"] == "public.md" + assert data["policy"]["denied"] == 1 + + +def _policy_mapping() -> dict: + return { + "id": "example-policy", + "mode": "enforce", + "default_labels": ["public"], + "default_subject": "public-agent", + "subjects": { + "public-agent": {"allowed_labels": ["public"], "trust_zones": ["public"]}, + "internal-agent": { + "allowed_labels": ["public", "internal"], + "trust_zones": ["public", "internal"], + }, + }, + "path_rules": [ + { + "id": "private-path", + "pattern": "private/**", + "labels": ["internal"], + "trust_zone": "internal", + } + ], + } diff --git a/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md b/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md index aea5026..22b849e 100644 --- a/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md +++ b/workplans/MKTT-WP-0009-access-controlled-knowledge-gateway.md @@ -3,10 +3,10 @@ id: MKTT-WP-0009 type: workplan title: "Access-Controlled Knowledge Gateway" domain: markitect -status: todo +status: done owner: markitect-tool topic_slug: markitect -planning_priority: P2 +planning_priority: complete planning_order: 80 depends_on_workplans: - MKTT-WP-0006 @@ -22,11 +22,30 @@ state_hub_workstream_id: "f36acbc9-881d-46f2-9181-67de228df0c2" Add a policy boundary for cached retrieval and context packages so Markitect can support security-sensitive knowledge systems and agent workflows. +## Implementation Summary + +Implemented as an internal policy-gateway extension, centered on +`LocalLabelPolicyGateway`: + +- local labels, trust zones, and path rules +- enforce, audit, and off modes +- drop or redact behavior for denied results +- stable `PolicyDecision` ids and explainable decision envelopes +- denied/redacted result diagnostics +- policy-aware `mkt cache query` and `mkt search` +- `mkt policy check` +- relationship and rule policy adapter protocols for future + Zanzibar/OpenFGA/SpiceDB, OPA/Rego, and Cedar integrations + +Documentation: `docs/access-control-policy-gateway.md`. + +Examples: `examples/policy/`. + ## P9.1 - Define access-control ladder ```task id: MKTT-WP-0009-T001 -status: todo +status: done priority: high state_hub_task_id: "acf240b4-7210-4ee5-90b6-2f2fe1438439" ``` @@ -43,7 +62,7 @@ Specify supported modes: ```task id: MKTT-WP-0009-T002 -status: todo +status: done priority: high state_hub_task_id: "9eb589d2-82f2-4282-9af0-3958826d397d" ``` @@ -54,7 +73,7 @@ Start with local policy labels and diagnostics for denied or redacted results. ```task id: MKTT-WP-0009-T003 -status: todo +status: done priority: high state_hub_task_id: "d78ab623-c472-4b24-ad84-08464b574886" ``` @@ -66,7 +85,7 @@ metadata must report whether policy filtering occurred. ```task id: MKTT-WP-0009-T004 -status: todo +status: done priority: medium state_hub_task_id: "bd4c2b7a-6eac-4845-b5c8-9f9c64946f0c" ``` @@ -78,7 +97,7 @@ checks without binding the core package to any one service. ```task id: MKTT-WP-0009-T005 -status: todo +status: done priority: medium state_hub_task_id: "752f1962-e83c-44cc-a1c1-0f89a4ea2a90" ``` @@ -89,7 +108,7 @@ Design an adapter boundary for OPA/Rego and Cedar-style rule policies. ```task id: MKTT-WP-0009-T006 -status: todo +status: done priority: medium state_hub_task_id: "990f01fa-5008-4871-a887-1c6ab4375605" ``` @@ -103,3 +122,10 @@ reason, and provenance. - Query and context package results are policy-aware. - More rigid authorization engines can attach later without replacing the query/cache framework. + +## Notes + +Context package filtering is represented through the same gateway and adapter +protocols, but persistent context package storage remains future WP-0008 scope. +Persistent decision-log storage is also intentionally deferred to backend work; +the local gateway explains decisions made by the current instance.