Files
markitect-tool/src/markitect_tool/policy/local.py

483 lines
18 KiB
Python

"""Local label policy gateway for cache, query, and context-package results."""
from __future__ import annotations
import fnmatch
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import yaml
from markitect_tool.diagnostics import Diagnostic
from markitect_tool.policy.models import (
PolicyDecision,
PolicyFilterResult,
PolicyObject,
PolicySubject,
)
POLICY_MODES = {"off", "audit", "enforce"}
DENIED_BEHAVIOR = {"drop", "redact"}
@dataclass(frozen=True)
class LocalPathPolicyRule:
"""Path rule that can add labels, set trust zone, or deny directly."""
pattern: str
labels: list[str] = field(default_factory=list)
trust_zone: str | None = None
deny: bool = False
id: str | None = None
@classmethod
def from_mapping(cls, raw: dict[str, Any], *, fallback_id: str) -> "LocalPathPolicyRule":
return cls(
pattern=str(raw.get("pattern") or raw.get("glob") or raw.get("path") or "*"),
labels=_string_list(raw.get("labels") or raw.get("label")),
trust_zone=raw.get("trust_zone") or raw.get("zone"),
deny=bool(raw.get("deny", False)),
id=raw.get("id") or fallback_id,
)
def matches(self, path: str | None) -> bool:
return bool(path) and fnmatch.fnmatch(path, self.pattern)
@dataclass(frozen=True)
class LocalLabelPolicy:
"""Declarative local policy for labels, trust zones, and path ACLs."""
id: str = "local-label-policy"
mode: str = "enforce"
default_labels: list[str] = field(default_factory=lambda: ["public"])
default_trust_zone: str | None = None
default_subject: str = "anonymous"
on_denied: str = "drop"
subjects: dict[str, PolicySubject] = field(default_factory=dict)
path_rules: list[LocalPathPolicyRule] = field(default_factory=list)
metadata: dict[str, Any] = field(default_factory=dict)
@classmethod
def from_mapping(cls, raw: dict[str, Any]) -> "LocalLabelPolicy":
policy = raw.get("policy") if isinstance(raw.get("policy"), dict) else raw
subjects = _subjects_from_mapping(policy.get("subjects"))
default_subject = str(policy.get("default_subject", "anonymous"))
if default_subject not in subjects:
subjects[default_subject] = PolicySubject(
id=default_subject,
allowed_labels=_string_list(
policy.get("default_allowed_labels") or policy.get("default_labels") or ["public"]
),
trust_zones=_string_list(policy.get("default_trust_zones")),
)
mode = str(policy.get("mode", "enforce")).strip().lower()
if mode not in POLICY_MODES:
mode = "enforce"
on_denied = str(policy.get("on_denied", "drop")).strip().lower()
if on_denied not in DENIED_BEHAVIOR:
on_denied = "drop"
return cls(
id=str(policy.get("id", "local-label-policy")),
mode=mode,
default_labels=_string_list(policy.get("default_labels") or ["public"]),
default_trust_zone=policy.get("default_trust_zone"),
default_subject=default_subject,
on_denied=on_denied,
subjects=subjects,
path_rules=_path_rules_from_value(policy.get("path_rules") or policy.get("paths")),
metadata=dict(policy.get("metadata") or {}),
)
@classmethod
def from_file(cls, path: str | Path) -> "LocalLabelPolicy":
policy_path = Path(path)
data = yaml.safe_load(policy_path.read_text(encoding="utf-8")) or {}
if not isinstance(data, dict):
raise ValueError("Policy file must contain a mapping.")
return cls.from_mapping(data)
class LocalLabelPolicyGateway:
"""AccessPolicyGateway implementation for local label policies."""
gateway_id = "policy.local-label"
def __init__(
self,
policy: LocalLabelPolicy | dict[str, Any] | None = None,
*,
mode: str | None = None,
) -> None:
if isinstance(policy, LocalLabelPolicy):
loaded = policy
elif isinstance(policy, dict):
loaded = LocalLabelPolicy.from_mapping(policy)
else:
loaded = LocalLabelPolicy()
if mode:
normalized = mode.strip().lower()
if normalized not in POLICY_MODES:
raise ValueError(f"Unsupported policy mode `{mode}`.")
loaded = LocalLabelPolicy(
id=loaded.id,
mode=normalized,
default_labels=loaded.default_labels,
default_trust_zone=loaded.default_trust_zone,
default_subject=loaded.default_subject,
on_denied=loaded.on_denied,
subjects=loaded.subjects,
path_rules=loaded.path_rules,
metadata=loaded.metadata,
)
self.policy = loaded
self._decisions: dict[str, PolicyDecision] = {}
@classmethod
def from_file(
cls,
path: str | Path,
*,
mode: str | None = None,
) -> "LocalLabelPolicyGateway":
return cls(LocalLabelPolicy.from_file(path), mode=mode)
def authorize(
self,
subject: str,
action: str,
object_id: str,
context: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Authorize one action against one object."""
decision = self.decide(subject, action, object_id, context=context)
return decision.to_dict()
def decide(
self,
subject: str,
action: str,
object_id: str,
context: dict[str, Any] | None = None,
) -> PolicyDecision:
subject_model = self._subject(subject, context)
object_model = self._object(object_id, context)
decision = self._evaluate(subject_model, action, object_model)
self._decisions[decision.decision_id] = decision
return decision
def filter_results(
self,
subject: str,
action: str,
results: list[dict[str, Any]],
context: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Filter results and return policy decisions."""
kept: list[dict[str, Any]] = []
diagnostics: list[Diagnostic] = []
decisions: list[PolicyDecision] = []
for index, item in enumerate(results):
object_id = _object_id_for_result(item, index)
item_context = dict(context or {})
item_context["result"] = item
item_context.setdefault("object", {}).update(_policy_object_mapping(item))
decision = self.decide(subject, action, object_id, context=item_context)
decisions.append(decision)
annotated = _annotate_result(item, decision)
if decision.effect == "allow" or decision.effect == "audit_denied":
kept.append(annotated)
elif decision.effect == "redact":
kept.append(_redact_result(annotated))
diagnostics.append(_denied_diagnostic(decision, redacted=True))
else:
diagnostics.append(_denied_diagnostic(decision))
result = PolicyFilterResult(
results=kept,
decisions=decisions,
diagnostics=diagnostics,
mode=self.policy.mode,
subject=subject,
action=action,
)
return result.to_dict()
def explain_decision(self, decision_id: str) -> dict[str, Any]:
"""Explain one policy decision made by this gateway instance."""
try:
return self._decisions[decision_id].to_dict()
except KeyError as exc:
raise KeyError(f"Unknown policy decision `{decision_id}`") from exc
def _subject(self, subject: str, context: dict[str, Any] | None) -> PolicySubject:
if context and isinstance(context.get("subject"), dict):
merged = self.policy.subjects.get(subject) or self.policy.subjects[self.policy.default_subject]
override = context["subject"]
return PolicySubject(
id=subject,
allowed_labels=_unique(merged.allowed_labels + _string_list(override.get("allowed_labels") or override.get("labels"))),
trust_zones=_unique(merged.trust_zones + _string_list(override.get("trust_zones") or override.get("zones"))),
roles=_unique(merged.roles + _string_list(override.get("roles"))),
allowed_actions=_unique(merged.allowed_actions + _string_list(override.get("allowed_actions") or override.get("actions"))),
path_allow=_unique(merged.path_allow + _string_list(override.get("path_allow") or override.get("allow_paths"))),
path_deny=_unique(merged.path_deny + _string_list(override.get("path_deny") or override.get("deny_paths"))),
attributes=merged.attributes | dict(override.get("attributes") or {}),
)
return self.policy.subjects.get(subject) or self.policy.subjects[self.policy.default_subject]
def _object(self, object_id: str, context: dict[str, Any] | None) -> PolicyObject:
raw_object = context.get("object", {}) if context else {}
result = context.get("result", {}) if context else {}
path = raw_object.get("path") or result.get("source_path") or result.get("path")
labels = _unique(
self.policy.default_labels
+ _string_list(raw_object.get("labels") or raw_object.get("label"))
+ _string_list(result.get("labels"))
)
trust_zone = raw_object.get("trust_zone") or self.policy.default_trust_zone
deny_by_path = False
matched_rules: list[str] = []
for rule in self.policy.path_rules:
if not rule.matches(path):
continue
matched_rules.append(rule.id or rule.pattern)
labels = _unique(labels + rule.labels)
trust_zone = rule.trust_zone or trust_zone
deny_by_path = deny_by_path or rule.deny
attributes = dict(raw_object.get("attributes") or {})
attributes["matched_path_rules"] = matched_rules
attributes["deny_by_path_rule"] = deny_by_path
return PolicyObject(
id=object_id,
path=path,
labels=labels,
trust_zone=trust_zone,
attributes=attributes,
)
def _evaluate(
self,
subject: PolicySubject,
action: str,
policy_object: PolicyObject,
) -> PolicyDecision:
if self.policy.mode == "off":
return self._decision(subject, action, policy_object, "allow", "policy mode is off")
denial_reason, rule_id = self._denial_reason(subject, action, policy_object)
if denial_reason is None:
return self._decision(subject, action, policy_object, "allow", "label policy allowed")
if self.policy.mode == "audit":
return self._decision(subject, action, policy_object, "audit_denied", denial_reason, rule_id)
if self.policy.on_denied == "redact":
return self._decision(subject, action, policy_object, "redact", denial_reason, rule_id)
return self._decision(subject, action, policy_object, "deny", denial_reason, rule_id)
def _denial_reason(
self,
subject: PolicySubject,
action: str,
policy_object: PolicyObject,
) -> tuple[str | None, str | None]:
if policy_object.attributes.get("deny_by_path_rule"):
return "object path is denied by local path policy", "path.deny"
if subject.allowed_actions and action not in subject.allowed_actions:
return f"subject `{subject.id}` is not allowed to perform `{action}`", "subject.action"
if policy_object.path:
if any(fnmatch.fnmatch(policy_object.path, pattern) for pattern in subject.path_deny):
return "object path is denied for subject", "subject.path_deny"
if subject.path_allow and not any(
fnmatch.fnmatch(policy_object.path, pattern) for pattern in subject.path_allow
):
return "object path is outside subject allow list", "subject.path_allow"
missing_labels = sorted(set(policy_object.labels) - set(subject.allowed_labels))
if missing_labels:
return (
f"subject `{subject.id}` lacks labels {missing_labels}",
"labels",
)
if policy_object.trust_zone and subject.trust_zones and policy_object.trust_zone not in subject.trust_zones:
return (
f"subject `{subject.id}` is outside trust zone `{policy_object.trust_zone}`",
"trust_zone",
)
return None, None
def _decision(
self,
subject: PolicySubject,
action: str,
policy_object: PolicyObject,
effect: str,
reason: str,
rule_id: str | None = None,
) -> PolicyDecision:
return PolicyDecision(
subject=subject.id,
action=action,
object_id=policy_object.id,
effect=effect,
reason=reason,
mode=self.policy.mode,
rule_id=rule_id,
labels=policy_object.labels,
trust_zone=policy_object.trust_zone,
metadata={"path": policy_object.path, "policy_id": self.policy.id},
)
def policy_metadata_from_document(
document: dict[str, Any],
*,
path: str | None = None,
) -> dict[str, Any]:
"""Extract stable policy metadata from parsed document frontmatter."""
frontmatter = document.get("frontmatter", {}) if isinstance(document, dict) else {}
policy = frontmatter.get("policy") if isinstance(frontmatter.get("policy"), dict) else {}
labels = _unique(
_string_list(policy.get("labels") or policy.get("label"))
+ _string_list(frontmatter.get("labels") or frontmatter.get("label"))
+ _string_list(frontmatter.get("classification"))
)
data = {
"path": path or document.get("source_path"),
"labels": labels,
"trust_zone": policy.get("trust_zone") or policy.get("zone") or frontmatter.get("trust_zone"),
"attributes": {
"document_type": frontmatter.get("document_type") or frontmatter.get("type"),
"title": frontmatter.get("title"),
},
}
return {key: value for key, value in data.items() if value not in (None, [], {})}
def _subjects_from_mapping(value: Any) -> dict[str, PolicySubject]:
if not isinstance(value, dict):
return {}
return {
str(subject_id): PolicySubject(
id=str(subject_id),
allowed_labels=_string_list(raw.get("allowed_labels") or raw.get("labels") or raw.get("clearance"))
if isinstance(raw, dict)
else [],
trust_zones=_string_list(raw.get("trust_zones") or raw.get("zones"))
if isinstance(raw, dict)
else [],
roles=_string_list(raw.get("roles")) if isinstance(raw, dict) else [],
allowed_actions=_string_list(raw.get("allowed_actions") or raw.get("actions"))
if isinstance(raw, dict)
else [],
path_allow=_string_list(raw.get("path_allow") or raw.get("allow_paths"))
if isinstance(raw, dict)
else [],
path_deny=_string_list(raw.get("path_deny") or raw.get("deny_paths"))
if isinstance(raw, dict)
else [],
attributes=dict(raw.get("attributes") or {}) if isinstance(raw, dict) else {},
)
for subject_id, raw in value.items()
}
def _path_rules_from_value(value: Any) -> list[LocalPathPolicyRule]:
if value is None:
return []
if isinstance(value, dict):
return [
LocalPathPolicyRule.from_mapping(raw if isinstance(raw, dict) else {"pattern": pattern}, fallback_id=str(pattern))
for pattern, raw in value.items()
]
if isinstance(value, list):
return [
LocalPathPolicyRule.from_mapping(raw, fallback_id=f"path-{index + 1}")
for index, raw in enumerate(value)
if isinstance(raw, dict)
]
return []
def _policy_object_mapping(item: dict[str, Any]) -> dict[str, Any]:
policy = item.get("policy") if isinstance(item.get("policy"), dict) else {}
return {
"path": policy.get("path") or item.get("source_path") or item.get("path"),
"labels": policy.get("labels") or item.get("labels"),
"trust_zone": policy.get("trust_zone"),
"attributes": policy.get("attributes", {}),
}
def _object_id_for_result(item: dict[str, Any], index: int) -> str:
path = item.get("source_path") or item.get("path") or "<memory>"
unit = item.get("path") if item.get("source_path") else item.get("unit_index", index)
return f"{path}#{unit}"
def _annotate_result(item: dict[str, Any], decision: PolicyDecision) -> dict[str, Any]:
annotated = dict(item)
policy = dict(annotated.get("policy") or {})
policy.update(
{
"decision_id": decision.decision_id,
"effect": decision.effect,
"labels": decision.labels,
"trust_zone": decision.trust_zone,
}
)
annotated["policy"] = {key: value for key, value in policy.items() if value not in (None, [], {})}
return annotated
def _redact_result(item: dict[str, Any]) -> dict[str, Any]:
redacted = dict(item)
if "text" in redacted:
redacted["text"] = "[redacted by policy]"
if "value" in redacted:
redacted["value"] = None
policy = dict(redacted.get("policy") or {})
policy["redacted"] = True
redacted["policy"] = policy
return redacted
def _denied_diagnostic(decision: PolicyDecision, *, redacted: bool = False) -> Diagnostic:
return Diagnostic(
severity="warning",
code="policy.result.redacted" if redacted else "policy.result.denied",
message=(
f"Policy redacted `{decision.object_id}`: {decision.reason}"
if redacted
else f"Policy denied `{decision.object_id}`: {decision.reason}"
),
rule_id=decision.rule_id,
details={"decision_id": decision.decision_id, "effect": decision.effect},
)
def _string_list(value: Any) -> list[str]:
if value is None:
return []
if isinstance(value, list):
return [str(item) for item in value if item is not None]
return [str(value)]
def _unique(values: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
for value in values:
normalized = str(value).strip()
key = normalized.lower()
if normalized and key not in seen:
result.append(normalized)
seen.add(key)
return result