markitect-tool/src/markitect_tool/policy/local.py

"""Local label policy gateway for cache, query, and context-package results."""

from __future__ import annotations

import fnmatch
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import yaml

from markitect_tool.diagnostics import Diagnostic
from markitect_tool.policy.models import (
    PolicyDecision,
    PolicyFilterResult,
    PolicyObject,
    PolicySubject,
)


POLICY_MODES = {"off", "audit", "enforce"}
DENIED_BEHAVIOR = {"drop", "redact"}


@dataclass(frozen=True)
class LocalPathPolicyRule:
    """Path rule that can add labels, set trust zone, or deny directly."""

    pattern: str
    labels: list[str] = field(default_factory=list)
    trust_zone: str | None = None
    deny: bool = False
    id: str | None = None

    @classmethod
    def from_mapping(cls, raw: dict[str, Any], *, fallback_id: str) -> "LocalPathPolicyRule":
        return cls(
            pattern=str(raw.get("pattern") or raw.get("glob") or raw.get("path") or "*"),
            labels=_string_list(raw.get("labels") or raw.get("label")),
            trust_zone=raw.get("trust_zone") or raw.get("zone"),
            deny=bool(raw.get("deny", False)),
            id=raw.get("id") or fallback_id,
        )

    def matches(self, path: str | None) -> bool:
        return bool(path) and fnmatch.fnmatch(path, self.pattern)


@dataclass(frozen=True)
class LocalLabelPolicy:
    """Declarative local policy for labels, trust zones, and path ACLs."""

    id: str = "local-label-policy"
    mode: str = "enforce"
    default_labels: list[str] = field(default_factory=lambda: ["public"])
    default_trust_zone: str | None = None
    default_subject: str = "anonymous"
    on_denied: str = "drop"
    subjects: dict[str, PolicySubject] = field(default_factory=dict)
    path_rules: list[LocalPathPolicyRule] = field(default_factory=list)
    metadata: dict[str, Any] = field(default_factory=dict)

    @classmethod
    def from_mapping(cls, raw: dict[str, Any]) -> "LocalLabelPolicy":
        policy = raw.get("policy") if isinstance(raw.get("policy"), dict) else raw
        subjects = _subjects_from_mapping(policy.get("subjects"))
        default_subject = str(policy.get("default_subject", "anonymous"))
        if default_subject not in subjects:
            subjects[default_subject] = PolicySubject(
                id=default_subject,
                allowed_labels=_string_list(
                    policy.get("default_allowed_labels") or policy.get("default_labels") or ["public"]
                ),
                trust_zones=_string_list(policy.get("default_trust_zones")),
            )
        mode = str(policy.get("mode", "enforce")).strip().lower()
        if mode not in POLICY_MODES:
            mode = "enforce"
        on_denied = str(policy.get("on_denied", "drop")).strip().lower()
        if on_denied not in DENIED_BEHAVIOR:
            on_denied = "drop"
        return cls(
            id=str(policy.get("id", "local-label-policy")),
            mode=mode,
            default_labels=_string_list(policy.get("default_labels") or ["public"]),
            default_trust_zone=policy.get("default_trust_zone"),
            default_subject=default_subject,
            on_denied=on_denied,
            subjects=subjects,
            path_rules=_path_rules_from_value(policy.get("path_rules") or policy.get("paths")),
            metadata=dict(policy.get("metadata") or {}),
        )

    @classmethod
    def from_file(cls, path: str | Path) -> "LocalLabelPolicy":
        policy_path = Path(path)
        data = yaml.safe_load(policy_path.read_text(encoding="utf-8")) or {}
        if not isinstance(data, dict):
            raise ValueError("Policy file must contain a mapping.")
        return cls.from_mapping(data)


class LocalLabelPolicyGateway:
    """AccessPolicyGateway implementation for local label policies."""

    gateway_id = "policy.local-label"

    def __init__(
        self,
        policy: LocalLabelPolicy | dict[str, Any] | None = None,
        *,
        mode: str | None = None,
    ) -> None:
        if isinstance(policy, LocalLabelPolicy):
            loaded = policy
        elif isinstance(policy, dict):
            loaded = LocalLabelPolicy.from_mapping(policy)
        else:
            loaded = LocalLabelPolicy()
        if mode:
            normalized = mode.strip().lower()
            if normalized not in POLICY_MODES:
                raise ValueError(f"Unsupported policy mode `{mode}`.")
            loaded = LocalLabelPolicy(
                id=loaded.id,
                mode=normalized,
                default_labels=loaded.default_labels,
                default_trust_zone=loaded.default_trust_zone,
                default_subject=loaded.default_subject,
                on_denied=loaded.on_denied,
                subjects=loaded.subjects,
                path_rules=loaded.path_rules,
                metadata=loaded.metadata,
            )
        self.policy = loaded
        self._decisions: dict[str, PolicyDecision] = {}

    @classmethod
    def from_file(
        cls,
        path: str | Path,
        *,
        mode: str | None = None,
    ) -> "LocalLabelPolicyGateway":
        return cls(LocalLabelPolicy.from_file(path), mode=mode)

    def authorize(
        self,
        subject: str,
        action: str,
        object_id: str,
        context: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
        """Authorize one action against one object."""

        decision = self.decide(subject, action, object_id, context=context)
        return decision.to_dict()

    def decide(
        self,
        subject: str,
        action: str,
        object_id: str,
        context: dict[str, Any] | None = None,
    ) -> PolicyDecision:
        subject_model = self._subject(subject, context)
        object_model = self._object(object_id, context)
        decision = self._evaluate(subject_model, action, object_model)
        self._decisions[decision.decision_id] = decision
        return decision

    def filter_results(
        self,
        subject: str,
        action: str,
        results: list[dict[str, Any]],
        context: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
        """Filter results and return policy decisions."""

        kept: list[dict[str, Any]] = []
        diagnostics: list[Diagnostic] = []
        decisions: list[PolicyDecision] = []
        for index, item in enumerate(results):
            object_id = _object_id_for_result(item, index)
            item_context = dict(context or {})
            item_context["result"] = item
            item_context.setdefault("object", {}).update(_policy_object_mapping(item))
            decision = self.decide(subject, action, object_id, context=item_context)
            decisions.append(decision)

            annotated = _annotate_result(item, decision)
            if decision.effect == "allow" or decision.effect == "audit_denied":
                kept.append(annotated)
            elif decision.effect == "redact":
                kept.append(_redact_result(annotated))
                diagnostics.append(_denied_diagnostic(decision, redacted=True))
            else:
                diagnostics.append(_denied_diagnostic(decision))

        result = PolicyFilterResult(
            results=kept,
            decisions=decisions,
            diagnostics=diagnostics,
            mode=self.policy.mode,
            subject=subject,
            action=action,
        )
        return result.to_dict()

    def explain_decision(self, decision_id: str) -> dict[str, Any]:
        """Explain one policy decision made by this gateway instance."""

        try:
            return self._decisions[decision_id].to_dict()
        except KeyError as exc:
            raise KeyError(f"Unknown policy decision `{decision_id}`") from exc

    def _subject(self, subject: str, context: dict[str, Any] | None) -> PolicySubject:
        if context and isinstance(context.get("subject"), dict):
            merged = self.policy.subjects.get(subject) or self.policy.subjects[self.policy.default_subject]
            override = context["subject"]
            return PolicySubject(
                id=subject,
                allowed_labels=_unique(merged.allowed_labels + _string_list(override.get("allowed_labels") or override.get("labels"))),
                trust_zones=_unique(merged.trust_zones + _string_list(override.get("trust_zones") or override.get("zones"))),
                roles=_unique(merged.roles + _string_list(override.get("roles"))),
                allowed_actions=_unique(merged.allowed_actions + _string_list(override.get("allowed_actions") or override.get("actions"))),
                path_allow=_unique(merged.path_allow + _string_list(override.get("path_allow") or override.get("allow_paths"))),
                path_deny=_unique(merged.path_deny + _string_list(override.get("path_deny") or override.get("deny_paths"))),
                attributes=merged.attributes | dict(override.get("attributes") or {}),
            )
        return self.policy.subjects.get(subject) or self.policy.subjects[self.policy.default_subject]

    def _object(self, object_id: str, context: dict[str, Any] | None) -> PolicyObject:
        raw_object = context.get("object", {}) if context else {}
        result = context.get("result", {}) if context else {}
        path = raw_object.get("path") or result.get("source_path") or result.get("path")
        labels = _unique(
            self.policy.default_labels
            + _string_list(raw_object.get("labels") or raw_object.get("label"))
            + _string_list(result.get("labels"))
        )
        trust_zone = raw_object.get("trust_zone") or self.policy.default_trust_zone
        deny_by_path = False
        matched_rules: list[str] = []
        for rule in self.policy.path_rules:
            if not rule.matches(path):
                continue
            matched_rules.append(rule.id or rule.pattern)
            labels = _unique(labels + rule.labels)
            trust_zone = rule.trust_zone or trust_zone
            deny_by_path = deny_by_path or rule.deny
        attributes = dict(raw_object.get("attributes") or {})
        attributes["matched_path_rules"] = matched_rules
        attributes["deny_by_path_rule"] = deny_by_path
        return PolicyObject(
            id=object_id,
            path=path,
            labels=labels,
            trust_zone=trust_zone,
            attributes=attributes,
        )

    def _evaluate(
        self,
        subject: PolicySubject,
        action: str,
        policy_object: PolicyObject,
    ) -> PolicyDecision:
        if self.policy.mode == "off":
            return self._decision(subject, action, policy_object, "allow", "policy mode is off")

        denial_reason, rule_id = self._denial_reason(subject, action, policy_object)
        if denial_reason is None:
            return self._decision(subject, action, policy_object, "allow", "label policy allowed")

        if self.policy.mode == "audit":
            return self._decision(subject, action, policy_object, "audit_denied", denial_reason, rule_id)
        if self.policy.on_denied == "redact":
            return self._decision(subject, action, policy_object, "redact", denial_reason, rule_id)
        return self._decision(subject, action, policy_object, "deny", denial_reason, rule_id)

    def _denial_reason(
        self,
        subject: PolicySubject,
        action: str,
        policy_object: PolicyObject,
    ) -> tuple[str | None, str | None]:
        if policy_object.attributes.get("deny_by_path_rule"):
            return "object path is denied by local path policy", "path.deny"
        if subject.allowed_actions and action not in subject.allowed_actions:
            return f"subject `{subject.id}` is not allowed to perform `{action}`", "subject.action"
        if policy_object.path:
            if any(fnmatch.fnmatch(policy_object.path, pattern) for pattern in subject.path_deny):
                return "object path is denied for subject", "subject.path_deny"
            if subject.path_allow and not any(
                fnmatch.fnmatch(policy_object.path, pattern) for pattern in subject.path_allow
            ):
                return "object path is outside subject allow list", "subject.path_allow"
        missing_labels = sorted(set(policy_object.labels) - set(subject.allowed_labels))
        if missing_labels:
            return (
                f"subject `{subject.id}` lacks labels {missing_labels}",
                "labels",
            )
        if policy_object.trust_zone and subject.trust_zones and policy_object.trust_zone not in subject.trust_zones:
            return (
                f"subject `{subject.id}` is outside trust zone `{policy_object.trust_zone}`",
                "trust_zone",
            )
        return None, None

    def _decision(
        self,
        subject: PolicySubject,
        action: str,
        policy_object: PolicyObject,
        effect: str,
        reason: str,
        rule_id: str | None = None,
    ) -> PolicyDecision:
        return PolicyDecision(
            subject=subject.id,
            action=action,
            object_id=policy_object.id,
            effect=effect,
            reason=reason,
            mode=self.policy.mode,
            rule_id=rule_id,
            labels=policy_object.labels,
            trust_zone=policy_object.trust_zone,
            metadata={"path": policy_object.path, "policy_id": self.policy.id},
        )


def policy_metadata_from_document(
    document: dict[str, Any],
    *,
    path: str | None = None,
) -> dict[str, Any]:
    """Extract stable policy metadata from parsed document frontmatter."""

    frontmatter = document.get("frontmatter", {}) if isinstance(document, dict) else {}
    policy = frontmatter.get("policy") if isinstance(frontmatter.get("policy"), dict) else {}
    labels = _unique(
        _string_list(policy.get("labels") or policy.get("label"))
        + _string_list(frontmatter.get("labels") or frontmatter.get("label"))
        + _string_list(frontmatter.get("classification"))
    )
    data = {
        "path": path or document.get("source_path"),
        "labels": labels,
        "trust_zone": policy.get("trust_zone") or policy.get("zone") or frontmatter.get("trust_zone"),
        "attributes": {
            "document_type": frontmatter.get("document_type") or frontmatter.get("type"),
            "title": frontmatter.get("title"),
        },
    }
    return {key: value for key, value in data.items() if value not in (None, [], {})}


def _subjects_from_mapping(value: Any) -> dict[str, PolicySubject]:
    if not isinstance(value, dict):
        return {}
    return {
        str(subject_id): PolicySubject(
            id=str(subject_id),
            allowed_labels=_string_list(raw.get("allowed_labels") or raw.get("labels") or raw.get("clearance"))
            if isinstance(raw, dict)
            else [],
            trust_zones=_string_list(raw.get("trust_zones") or raw.get("zones"))
            if isinstance(raw, dict)
            else [],
            roles=_string_list(raw.get("roles")) if isinstance(raw, dict) else [],
            allowed_actions=_string_list(raw.get("allowed_actions") or raw.get("actions"))
            if isinstance(raw, dict)
            else [],
            path_allow=_string_list(raw.get("path_allow") or raw.get("allow_paths"))
            if isinstance(raw, dict)
            else [],
            path_deny=_string_list(raw.get("path_deny") or raw.get("deny_paths"))
            if isinstance(raw, dict)
            else [],
            attributes=dict(raw.get("attributes") or {}) if isinstance(raw, dict) else {},
        )
        for subject_id, raw in value.items()
    }


def _path_rules_from_value(value: Any) -> list[LocalPathPolicyRule]:
    if value is None:
        return []
    if isinstance(value, dict):
        return [
            LocalPathPolicyRule.from_mapping(raw if isinstance(raw, dict) else {"pattern": pattern}, fallback_id=str(pattern))
            for pattern, raw in value.items()
        ]
    if isinstance(value, list):
        return [
            LocalPathPolicyRule.from_mapping(raw, fallback_id=f"path-{index + 1}")
            for index, raw in enumerate(value)
            if isinstance(raw, dict)
        ]
    return []


def _policy_object_mapping(item: dict[str, Any]) -> dict[str, Any]:
    policy = item.get("policy") if isinstance(item.get("policy"), dict) else {}
    return {
        "path": policy.get("path") or item.get("source_path") or item.get("path"),
        "labels": policy.get("labels") or item.get("labels"),
        "trust_zone": policy.get("trust_zone"),
        "attributes": policy.get("attributes", {}),
    }


def _object_id_for_result(item: dict[str, Any], index: int) -> str:
    path = item.get("source_path") or item.get("path") or "<memory>"
    unit = item.get("path") if item.get("source_path") else item.get("unit_index", index)
    return f"{path}#{unit}"


def _annotate_result(item: dict[str, Any], decision: PolicyDecision) -> dict[str, Any]:
    annotated = dict(item)
    policy = dict(annotated.get("policy") or {})
    policy.update(
        {
            "decision_id": decision.decision_id,
            "effect": decision.effect,
            "labels": decision.labels,
            "trust_zone": decision.trust_zone,
        }
    )
    annotated["policy"] = {key: value for key, value in policy.items() if value not in (None, [], {})}
    return annotated


def _redact_result(item: dict[str, Any]) -> dict[str, Any]:
    redacted = dict(item)
    if "text" in redacted:
        redacted["text"] = "[redacted by policy]"
    if "value" in redacted:
        redacted["value"] = None
    policy = dict(redacted.get("policy") or {})
    policy["redacted"] = True
    redacted["policy"] = policy
    return redacted


def _denied_diagnostic(decision: PolicyDecision, *, redacted: bool = False) -> Diagnostic:
    return Diagnostic(
        severity="warning",
        code="policy.result.redacted" if redacted else "policy.result.denied",
        message=(
            f"Policy redacted `{decision.object_id}`: {decision.reason}"
            if redacted
            else f"Policy denied `{decision.object_id}`: {decision.reason}"
        ),
        rule_id=decision.rule_id,
        details={"decision_id": decision.decision_id, "effect": decision.effect},
    )


def _string_list(value: Any) -> list[str]:
    if value is None:
        return []
    if isinstance(value, list):
        return [str(item) for item in value if item is not None]
    return [str(value)]


def _unique(values: list[str]) -> list[str]:
    seen: set[str] = set()
    result: list[str] = []
    for value in values:
        normalized = str(value).strip()
        key = normalized.lower()
        if normalized and key not in seen:
            result.append(normalized)
            seen.add(key)
    return result