Files
tegwick 9031e1162c feat(infospace): add schema compliance validator (S1.2)
Deterministic validation of EntityMeta against declarative schemas:
section presence/word counts, heading format, domain enum values.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 00:48:57 +01:00

145 lines
3.9 KiB
Python

"""
Declarative schema definitions for entity compliance validation.
A schema describes the expected structure of an entity: which sections
are required, word count bounds, heading format, and valid enum values.
Schemas are frozen (immutable once created).
"""
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Tuple
class SectionRequirement(Enum):
"""How strictly a section must be present."""
REQUIRED = "required"
RECOMMENDED = "recommended"
OPTIONAL = "optional"
@dataclass(frozen=True)
class SectionRule:
"""Validation rule for a single H2 section.
Parameters
----------
slug:
Section slug as it appears in entity metadata (e.g. ``definition``).
label:
Human-readable section name for diagnostics.
requirement:
Whether the section is required, recommended, or optional.
min_words:
Minimum word count (inclusive). ``None`` means no lower bound.
max_words:
Maximum word count (inclusive). ``None`` means no upper bound.
"""
slug: str
label: str
requirement: SectionRequirement
min_words: Optional[int] = None
max_words: Optional[int] = None
@dataclass(frozen=True)
class EnumConstraint:
"""Constraint limiting a field to a set of allowed values.
Parameters
----------
field_name:
The ``EntityMeta`` field to check (e.g. ``domain``).
allowed_values:
Tuple of acceptable string values.
severity:
``"error"`` or ``"warning"`` when the value is not in the set.
"""
field_name: str
allowed_values: Tuple[str, ...]
severity: str = "warning"
@dataclass(frozen=True)
class EntitySchema:
"""Complete validation schema for an entity type.
Parameters
----------
name:
Human-readable schema name (e.g. ``"Economic Entity"``).
section_rules:
Tuple of :class:`SectionRule` objects.
enum_constraints:
Tuple of :class:`EnumConstraint` objects.
h1_title_case_severity:
Severity for non-title-case H1 headings (``"error"`` or ``"warning"``).
require_h1:
Whether a non-empty slug (H1) is required.
"""
name: str
section_rules: Tuple[SectionRule, ...]
enum_constraints: Tuple[EnumConstraint, ...] = ()
h1_title_case_severity: str = "warning"
require_h1: bool = True
# ── Default schema for the economic-entity infospace ──────────────
ECONOMIC_ENTITY_SCHEMA = EntitySchema(
name="Economic Entity",
section_rules=(
SectionRule(
slug="definition",
label="Definition",
requirement=SectionRequirement.REQUIRED,
min_words=20,
max_words=150,
),
SectionRule(
slug="source_chapter",
label="Source Chapter",
requirement=SectionRequirement.REQUIRED,
),
SectionRule(
slug="context",
label="Context",
requirement=SectionRequirement.REQUIRED,
),
SectionRule(
slug="economic_domain",
label="Economic Domain",
requirement=SectionRequirement.REQUIRED,
),
SectionRule(
slug="smith_s_original_wording",
label="Smith's Original Wording",
requirement=SectionRequirement.OPTIONAL,
),
SectionRule(
slug="modern_interpretation",
label="Modern Interpretation",
requirement=SectionRequirement.OPTIONAL,
),
),
enum_constraints=(
EnumConstraint(
field_name="domain",
allowed_values=(
"Production",
"Exchange",
"Distribution",
"Regulation",
"General Theory",
),
severity="warning",
),
),
h1_title_case_severity="warning",
require_h1=True,
)