feat(infospace): add schema compliance validator (S1.2)
Deterministic validation of EntityMeta against declarative schemas: section presence/word counts, heading format, domain enum values. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,9 +7,35 @@ files and analysing infospace collections.
|
|||||||
|
|
||||||
from .models import EntityMeta
|
from .models import EntityMeta
|
||||||
from .entity_parser import parse_entity_file, parse_entity_directory
|
from .entity_parser import parse_entity_file, parse_entity_directory
|
||||||
|
from .schema import (
|
||||||
|
ECONOMIC_ENTITY_SCHEMA,
|
||||||
|
EntitySchema,
|
||||||
|
EnumConstraint,
|
||||||
|
SectionRequirement,
|
||||||
|
SectionRule,
|
||||||
|
)
|
||||||
|
from .validator import (
|
||||||
|
BatchComplianceResult,
|
||||||
|
ComplianceDiagnostic,
|
||||||
|
ComplianceResult,
|
||||||
|
validate_entities,
|
||||||
|
validate_entity,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"EntityMeta",
|
"EntityMeta",
|
||||||
"parse_entity_file",
|
"parse_entity_file",
|
||||||
"parse_entity_directory",
|
"parse_entity_directory",
|
||||||
|
# Schema
|
||||||
|
"ECONOMIC_ENTITY_SCHEMA",
|
||||||
|
"EntitySchema",
|
||||||
|
"EnumConstraint",
|
||||||
|
"SectionRequirement",
|
||||||
|
"SectionRule",
|
||||||
|
# Validator
|
||||||
|
"BatchComplianceResult",
|
||||||
|
"ComplianceDiagnostic",
|
||||||
|
"ComplianceResult",
|
||||||
|
"validate_entities",
|
||||||
|
"validate_entity",
|
||||||
]
|
]
|
||||||
|
|||||||
144
markitect/infospace/schema.py
Normal file
144
markitect/infospace/schema.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
"""
|
||||||
|
Declarative schema definitions for entity compliance validation.
|
||||||
|
|
||||||
|
A schema describes the expected structure of an entity: which sections
|
||||||
|
are required, word count bounds, heading format, and valid enum values.
|
||||||
|
Schemas are frozen (immutable once created).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
class SectionRequirement(Enum):
|
||||||
|
"""How strictly a section must be present."""
|
||||||
|
|
||||||
|
REQUIRED = "required"
|
||||||
|
RECOMMENDED = "recommended"
|
||||||
|
OPTIONAL = "optional"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SectionRule:
|
||||||
|
"""Validation rule for a single H2 section.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
slug:
|
||||||
|
Section slug as it appears in entity metadata (e.g. ``definition``).
|
||||||
|
label:
|
||||||
|
Human-readable section name for diagnostics.
|
||||||
|
requirement:
|
||||||
|
Whether the section is required, recommended, or optional.
|
||||||
|
min_words:
|
||||||
|
Minimum word count (inclusive). ``None`` means no lower bound.
|
||||||
|
max_words:
|
||||||
|
Maximum word count (inclusive). ``None`` means no upper bound.
|
||||||
|
"""
|
||||||
|
|
||||||
|
slug: str
|
||||||
|
label: str
|
||||||
|
requirement: SectionRequirement
|
||||||
|
min_words: Optional[int] = None
|
||||||
|
max_words: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class EnumConstraint:
|
||||||
|
"""Constraint limiting a field to a set of allowed values.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
field_name:
|
||||||
|
The ``EntityMeta`` field to check (e.g. ``domain``).
|
||||||
|
allowed_values:
|
||||||
|
Tuple of acceptable string values.
|
||||||
|
severity:
|
||||||
|
``"error"`` or ``"warning"`` when the value is not in the set.
|
||||||
|
"""
|
||||||
|
|
||||||
|
field_name: str
|
||||||
|
allowed_values: Tuple[str, ...]
|
||||||
|
severity: str = "warning"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class EntitySchema:
|
||||||
|
"""Complete validation schema for an entity type.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name:
|
||||||
|
Human-readable schema name (e.g. ``"Economic Entity"``).
|
||||||
|
section_rules:
|
||||||
|
Tuple of :class:`SectionRule` objects.
|
||||||
|
enum_constraints:
|
||||||
|
Tuple of :class:`EnumConstraint` objects.
|
||||||
|
h1_title_case_severity:
|
||||||
|
Severity for non-title-case H1 headings (``"error"`` or ``"warning"``).
|
||||||
|
require_h1:
|
||||||
|
Whether a non-empty slug (H1) is required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
section_rules: Tuple[SectionRule, ...]
|
||||||
|
enum_constraints: Tuple[EnumConstraint, ...] = ()
|
||||||
|
h1_title_case_severity: str = "warning"
|
||||||
|
require_h1: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
# ── Default schema for the economic-entity infospace ──────────────
|
||||||
|
|
||||||
|
ECONOMIC_ENTITY_SCHEMA = EntitySchema(
|
||||||
|
name="Economic Entity",
|
||||||
|
section_rules=(
|
||||||
|
SectionRule(
|
||||||
|
slug="definition",
|
||||||
|
label="Definition",
|
||||||
|
requirement=SectionRequirement.REQUIRED,
|
||||||
|
min_words=20,
|
||||||
|
max_words=150,
|
||||||
|
),
|
||||||
|
SectionRule(
|
||||||
|
slug="source_chapter",
|
||||||
|
label="Source Chapter",
|
||||||
|
requirement=SectionRequirement.REQUIRED,
|
||||||
|
),
|
||||||
|
SectionRule(
|
||||||
|
slug="context",
|
||||||
|
label="Context",
|
||||||
|
requirement=SectionRequirement.REQUIRED,
|
||||||
|
),
|
||||||
|
SectionRule(
|
||||||
|
slug="economic_domain",
|
||||||
|
label="Economic Domain",
|
||||||
|
requirement=SectionRequirement.REQUIRED,
|
||||||
|
),
|
||||||
|
SectionRule(
|
||||||
|
slug="smith_s_original_wording",
|
||||||
|
label="Smith's Original Wording",
|
||||||
|
requirement=SectionRequirement.OPTIONAL,
|
||||||
|
),
|
||||||
|
SectionRule(
|
||||||
|
slug="modern_interpretation",
|
||||||
|
label="Modern Interpretation",
|
||||||
|
requirement=SectionRequirement.OPTIONAL,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
enum_constraints=(
|
||||||
|
EnumConstraint(
|
||||||
|
field_name="domain",
|
||||||
|
allowed_values=(
|
||||||
|
"Production",
|
||||||
|
"Exchange",
|
||||||
|
"Distribution",
|
||||||
|
"Regulation",
|
||||||
|
"General Theory",
|
||||||
|
),
|
||||||
|
severity="warning",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
h1_title_case_severity="warning",
|
||||||
|
require_h1=True,
|
||||||
|
)
|
||||||
261
markitect/infospace/validator.py
Normal file
261
markitect/infospace/validator.py
Normal file
@@ -0,0 +1,261 @@
|
|||||||
|
"""
|
||||||
|
Schema compliance validator for entity metadata.
|
||||||
|
|
||||||
|
Validates :class:`~markitect.infospace.models.EntityMeta` instances
|
||||||
|
against a declarative :class:`~markitect.infospace.schema.EntitySchema`.
|
||||||
|
All checks are deterministic — no LLM calls.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from .models import EntityMeta
|
||||||
|
from .schema import EntitySchema, SectionRequirement
|
||||||
|
|
||||||
|
# Maps section slugs (as they appear in the schema) to EntityMeta field
|
||||||
|
# names. Most match directly; ``economic_domain`` maps to ``domain``.
|
||||||
|
_SECTION_FIELD_MAP: Dict[str, str] = {
|
||||||
|
"definition": "definition",
|
||||||
|
"source_chapter": "source_chapter",
|
||||||
|
"context": "context",
|
||||||
|
"economic_domain": "domain",
|
||||||
|
"smith_s_original_wording": "original_wording",
|
||||||
|
"modern_interpretation": "modern_interpretation",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ComplianceDiagnostic:
|
||||||
|
"""A single validation finding."""
|
||||||
|
|
||||||
|
code: str
|
||||||
|
message: str
|
||||||
|
severity: str # "error" or "warning"
|
||||||
|
section: Optional[str] = None
|
||||||
|
field: Optional[str] = None
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
parts = [f"[{self.severity.upper()}] {self.code}: {self.message}"]
|
||||||
|
if self.section:
|
||||||
|
parts.append(f"(section: {self.section})")
|
||||||
|
if self.field:
|
||||||
|
parts.append(f"(field: {self.field})")
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ComplianceResult:
|
||||||
|
"""Validation result for a single entity."""
|
||||||
|
|
||||||
|
entity_slug: str
|
||||||
|
schema_name: str
|
||||||
|
diagnostics: List[ComplianceDiagnostic] = field(default_factory=list)
|
||||||
|
checks_run: int = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_compliant(self) -> bool:
|
||||||
|
return self.error_count == 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def error_count(self) -> int:
|
||||||
|
return sum(1 for d in self.diagnostics if d.severity == "error")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def warning_count(self) -> int:
|
||||||
|
return sum(1 for d in self.diagnostics if d.severity == "warning")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def errors(self) -> List[ComplianceDiagnostic]:
|
||||||
|
return [d for d in self.diagnostics if d.severity == "error"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def warnings(self) -> List[ComplianceDiagnostic]:
|
||||||
|
return [d for d in self.diagnostics if d.severity == "warning"]
|
||||||
|
|
||||||
|
def summary(self) -> str:
|
||||||
|
status = "PASS" if self.is_compliant else "FAIL"
|
||||||
|
return (
|
||||||
|
f"{self.entity_slug}: {status} "
|
||||||
|
f"({self.checks_run} checks, "
|
||||||
|
f"{self.error_count} errors, "
|
||||||
|
f"{self.warning_count} warnings)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BatchComplianceResult:
|
||||||
|
"""Aggregated validation result for multiple entities."""
|
||||||
|
|
||||||
|
results: List[ComplianceResult] = field(default_factory=list)
|
||||||
|
schema_name: str = ""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_entities(self) -> int:
|
||||||
|
return len(self.results)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def compliant_count(self) -> int:
|
||||||
|
return sum(1 for r in self.results if r.is_compliant)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def non_compliant_count(self) -> int:
|
||||||
|
return self.total_entities - self.compliant_count
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_errors(self) -> int:
|
||||||
|
return sum(r.error_count for r in self.results)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_warnings(self) -> int:
|
||||||
|
return sum(r.warning_count for r in self.results)
|
||||||
|
|
||||||
|
def summary(self) -> str:
|
||||||
|
lines = [
|
||||||
|
f"Schema: {self.schema_name}",
|
||||||
|
f"Entities: {self.total_entities}",
|
||||||
|
f"Compliant: {self.compliant_count}/{self.total_entities}",
|
||||||
|
f"Errors: {self.total_errors}, Warnings: {self.total_warnings}",
|
||||||
|
]
|
||||||
|
for r in self.results:
|
||||||
|
lines.append(f" {r.summary()}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _word_count(text: str) -> int:
|
||||||
|
"""Count whitespace-separated words."""
|
||||||
|
return len(text.split())
|
||||||
|
|
||||||
|
|
||||||
|
def validate_entity(
|
||||||
|
entity: EntityMeta,
|
||||||
|
schema: EntitySchema,
|
||||||
|
) -> ComplianceResult:
|
||||||
|
"""Validate a single entity against *schema*.
|
||||||
|
|
||||||
|
Returns a :class:`ComplianceResult` with all diagnostics found.
|
||||||
|
"""
|
||||||
|
result = ComplianceResult(
|
||||||
|
entity_slug=entity.slug,
|
||||||
|
schema_name=schema.name,
|
||||||
|
)
|
||||||
|
checks = 0
|
||||||
|
|
||||||
|
# ── H1 checks ─────────────────────────────────────────────────
|
||||||
|
if schema.require_h1:
|
||||||
|
checks += 1
|
||||||
|
if not entity.slug:
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="H1_MISSING",
|
||||||
|
message="Entity has no H1 heading (empty slug).",
|
||||||
|
severity="error",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
checks += 1
|
||||||
|
if entity.slug and not entity.h1_is_title_case:
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="H1_NOT_TITLE_CASE",
|
||||||
|
message=f"H1 '{entity.h1_raw}' is not in title case.",
|
||||||
|
severity=schema.h1_title_case_severity,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Section checks ────────────────────────────────────────────
|
||||||
|
for rule in schema.section_rules:
|
||||||
|
checks += 1
|
||||||
|
field_name = _SECTION_FIELD_MAP.get(rule.slug, rule.slug)
|
||||||
|
value = getattr(entity, field_name, "")
|
||||||
|
|
||||||
|
is_empty = not value or not value.strip()
|
||||||
|
|
||||||
|
if is_empty:
|
||||||
|
if rule.requirement == SectionRequirement.REQUIRED:
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="SECTION_MISSING",
|
||||||
|
message=f"Required section '{rule.label}' is missing or empty.",
|
||||||
|
severity="error",
|
||||||
|
section=rule.slug,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif rule.requirement == SectionRequirement.RECOMMENDED:
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="SECTION_RECOMMENDED",
|
||||||
|
message=f"Recommended section '{rule.label}' is missing.",
|
||||||
|
severity="warning",
|
||||||
|
section=rule.slug,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# OPTIONAL + empty → no diagnostic
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Word count bounds (only if section has content)
|
||||||
|
wc = _word_count(value)
|
||||||
|
if rule.min_words is not None and wc < rule.min_words:
|
||||||
|
checks += 1
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="SECTION_TOO_SHORT",
|
||||||
|
message=(
|
||||||
|
f"Section '{rule.label}' has {wc} words "
|
||||||
|
f"(minimum: {rule.min_words})."
|
||||||
|
),
|
||||||
|
severity="error",
|
||||||
|
section=rule.slug,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif rule.max_words is not None and wc > rule.max_words:
|
||||||
|
checks += 1
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="SECTION_TOO_LONG",
|
||||||
|
message=(
|
||||||
|
f"Section '{rule.label}' has {wc} words "
|
||||||
|
f"(maximum: {rule.max_words})."
|
||||||
|
),
|
||||||
|
severity="warning",
|
||||||
|
section=rule.slug,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Enum constraints ──────────────────────────────────────────
|
||||||
|
for constraint in schema.enum_constraints:
|
||||||
|
checks += 1
|
||||||
|
value = getattr(entity, constraint.field_name, "")
|
||||||
|
|
||||||
|
# Empty field is already caught by SECTION_MISSING above
|
||||||
|
if not value or not value.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if value.strip() not in constraint.allowed_values:
|
||||||
|
result.diagnostics.append(
|
||||||
|
ComplianceDiagnostic(
|
||||||
|
code="ENUM_VALUE_UNKNOWN",
|
||||||
|
message=(
|
||||||
|
f"Field '{constraint.field_name}' has value "
|
||||||
|
f"'{value.strip()}' which is not in the allowed set."
|
||||||
|
),
|
||||||
|
severity=constraint.severity,
|
||||||
|
field=constraint.field_name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
result.checks_run = checks
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def validate_entities(
|
||||||
|
entities: Sequence[EntityMeta],
|
||||||
|
schema: EntitySchema,
|
||||||
|
) -> BatchComplianceResult:
|
||||||
|
"""Validate multiple entities against *schema*.
|
||||||
|
|
||||||
|
Returns a :class:`BatchComplianceResult` with per-entity results.
|
||||||
|
"""
|
||||||
|
batch = BatchComplianceResult(schema_name=schema.name)
|
||||||
|
for entity in entities:
|
||||||
|
batch.results.append(validate_entity(entity, schema))
|
||||||
|
return batch
|
||||||
419
tests/unit/infospace/test_schema_validator.py
Normal file
419
tests/unit/infospace/test_schema_validator.py
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
"""Tests for markitect.infospace schema and validator modules."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from markitect.infospace import (
|
||||||
|
ECONOMIC_ENTITY_SCHEMA,
|
||||||
|
BatchComplianceResult,
|
||||||
|
ComplianceDiagnostic,
|
||||||
|
ComplianceResult,
|
||||||
|
EntityMeta,
|
||||||
|
EntitySchema,
|
||||||
|
EnumConstraint,
|
||||||
|
SectionRequirement,
|
||||||
|
SectionRule,
|
||||||
|
validate_entities,
|
||||||
|
validate_entity,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _compliant_entity(**overrides) -> EntityMeta:
|
||||||
|
"""Return an EntityMeta that passes ECONOMIC_ENTITY_SCHEMA."""
|
||||||
|
defaults = dict(
|
||||||
|
slug="division_of_labour",
|
||||||
|
title="Division of Labour",
|
||||||
|
h1_raw="Division of Labour",
|
||||||
|
definition=(
|
||||||
|
"The separation of a work process into a number of distinct "
|
||||||
|
"tasks, each performed by a specialised worker, resulting in "
|
||||||
|
"a significant increase in the productive powers of labour."
|
||||||
|
),
|
||||||
|
source_chapter='Book I, Chapter 1: "Of the Division of Labour"',
|
||||||
|
context="The division of labour is the central argument of the chapter.",
|
||||||
|
domain="Production",
|
||||||
|
original_wording='"The greatest improvements in the productive powers…"',
|
||||||
|
modern_interpretation="Remains foundational in economics.",
|
||||||
|
h1_is_title_case=True,
|
||||||
|
has_original_wording=True,
|
||||||
|
definition_word_count=30,
|
||||||
|
total_word_count=100,
|
||||||
|
section_slugs=[
|
||||||
|
"definition",
|
||||||
|
"source_chapter",
|
||||||
|
"context",
|
||||||
|
"economic_domain",
|
||||||
|
"smith_s_original_wording",
|
||||||
|
"modern_interpretation",
|
||||||
|
],
|
||||||
|
source_path="/tmp/division-of-labour.md",
|
||||||
|
)
|
||||||
|
defaults.update(overrides)
|
||||||
|
return EntityMeta(**defaults)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Single-entity validation ────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestValidateEntityCompliant:
|
||||||
|
def test_fully_compliant_zero_diagnostics(self):
|
||||||
|
entity = _compliant_entity()
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
assert result.diagnostics == []
|
||||||
|
assert result.is_compliant is True
|
||||||
|
assert result.error_count == 0
|
||||||
|
assert result.warning_count == 0
|
||||||
|
assert result.checks_run > 0
|
||||||
|
|
||||||
|
def test_summary_shows_pass(self):
|
||||||
|
entity = _compliant_entity()
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
assert "PASS" in result.summary()
|
||||||
|
assert "division_of_labour" in result.summary()
|
||||||
|
|
||||||
|
|
||||||
|
class TestSectionMissing:
|
||||||
|
def test_missing_required_section_error(self):
|
||||||
|
entity = _compliant_entity(definition="")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
codes = [d.code for d in result.diagnostics]
|
||||||
|
assert "SECTION_MISSING" in codes
|
||||||
|
assert not result.is_compliant
|
||||||
|
|
||||||
|
def test_empty_required_section_error(self):
|
||||||
|
entity = _compliant_entity(definition=" ")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
codes = [d.code for d in result.diagnostics]
|
||||||
|
assert "SECTION_MISSING" in codes
|
||||||
|
|
||||||
|
def test_optional_section_absent_no_diagnostic(self):
|
||||||
|
entity = _compliant_entity(original_wording="", modern_interpretation="")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
# Only optional sections removed — should still be fully compliant
|
||||||
|
assert result.is_compliant is True
|
||||||
|
assert result.error_count == 0
|
||||||
|
# No SECTION_MISSING or SECTION_RECOMMENDED for optional sections
|
||||||
|
section_codes = {d.code for d in result.diagnostics}
|
||||||
|
assert "SECTION_MISSING" not in section_codes
|
||||||
|
assert "SECTION_RECOMMENDED" not in section_codes
|
||||||
|
|
||||||
|
|
||||||
|
class TestSectionRecommended:
|
||||||
|
def test_recommended_section_missing_warning(self):
|
||||||
|
schema = EntitySchema(
|
||||||
|
name="Test Schema",
|
||||||
|
section_rules=(
|
||||||
|
SectionRule(
|
||||||
|
slug="definition",
|
||||||
|
label="Definition",
|
||||||
|
requirement=SectionRequirement.RECOMMENDED,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
entity = _compliant_entity(definition="")
|
||||||
|
result = validate_entity(entity, schema)
|
||||||
|
codes = [d.code for d in result.diagnostics]
|
||||||
|
assert "SECTION_RECOMMENDED" in codes
|
||||||
|
severities = [d.severity for d in result.diagnostics if d.code == "SECTION_RECOMMENDED"]
|
||||||
|
assert severities == ["warning"]
|
||||||
|
# Warnings don't break compliance
|
||||||
|
assert result.is_compliant is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestWordCountBounds:
|
||||||
|
def test_definition_too_short_error(self):
|
||||||
|
entity = _compliant_entity(definition="only ten words here to test the lower boundary check now")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
short_diags = [d for d in result.diagnostics if d.code == "SECTION_TOO_SHORT"]
|
||||||
|
assert len(short_diags) == 1
|
||||||
|
assert short_diags[0].severity == "error"
|
||||||
|
assert not result.is_compliant
|
||||||
|
|
||||||
|
def test_definition_too_long_warning(self):
|
||||||
|
long_def = " ".join(["word"] * 200)
|
||||||
|
entity = _compliant_entity(definition=long_def)
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
long_diags = [d for d in result.diagnostics if d.code == "SECTION_TOO_LONG"]
|
||||||
|
assert len(long_diags) == 1
|
||||||
|
assert long_diags[0].severity == "warning"
|
||||||
|
# Warnings don't break compliance
|
||||||
|
assert result.is_compliant is True
|
||||||
|
|
||||||
|
def test_definition_at_min_boundary_passes(self):
|
||||||
|
exactly_20 = " ".join(["word"] * 20)
|
||||||
|
entity = _compliant_entity(definition=exactly_20)
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
codes = [d.code for d in result.diagnostics]
|
||||||
|
assert "SECTION_TOO_SHORT" not in codes
|
||||||
|
|
||||||
|
def test_definition_at_max_boundary_passes(self):
|
||||||
|
exactly_150 = " ".join(["word"] * 150)
|
||||||
|
entity = _compliant_entity(definition=exactly_150)
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
codes = [d.code for d in result.diagnostics]
|
||||||
|
assert "SECTION_TOO_LONG" not in codes
|
||||||
|
|
||||||
|
|
||||||
|
class TestH1Checks:
|
||||||
|
def test_slug_format_h1_warning(self):
|
||||||
|
entity = _compliant_entity(
|
||||||
|
h1_raw="effectual-demand",
|
||||||
|
h1_is_title_case=False,
|
||||||
|
)
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
h1_diags = [d for d in result.diagnostics if d.code == "H1_NOT_TITLE_CASE"]
|
||||||
|
assert len(h1_diags) == 1
|
||||||
|
assert h1_diags[0].severity == "warning"
|
||||||
|
# Still compliant (it's a warning)
|
||||||
|
assert result.is_compliant is True
|
||||||
|
|
||||||
|
def test_h1_missing_error(self):
|
||||||
|
entity = _compliant_entity(slug="", h1_raw="")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
codes = [d.code for d in result.diagnostics]
|
||||||
|
assert "H1_MISSING" in codes
|
||||||
|
assert not result.is_compliant
|
||||||
|
|
||||||
|
def test_h1_title_case_error_severity(self):
|
||||||
|
schema = EntitySchema(
|
||||||
|
name="Strict",
|
||||||
|
section_rules=(),
|
||||||
|
h1_title_case_severity="error",
|
||||||
|
)
|
||||||
|
entity = _compliant_entity(h1_is_title_case=False)
|
||||||
|
result = validate_entity(entity, schema)
|
||||||
|
h1_diags = [d for d in result.diagnostics if d.code == "H1_NOT_TITLE_CASE"]
|
||||||
|
assert h1_diags[0].severity == "error"
|
||||||
|
assert not result.is_compliant
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnumConstraints:
|
||||||
|
def test_unknown_domain_warning(self):
|
||||||
|
entity = _compliant_entity(domain="Metaphysics")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||||
|
assert len(enum_diags) == 1
|
||||||
|
assert enum_diags[0].severity == "warning"
|
||||||
|
assert result.is_compliant is True
|
||||||
|
|
||||||
|
def test_empty_domain_no_enum_diagnostic(self):
|
||||||
|
"""Empty domain triggers SECTION_MISSING, not ENUM_VALUE_UNKNOWN."""
|
||||||
|
entity = _compliant_entity(domain="")
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
enum_codes = [d.code for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||||
|
assert len(enum_codes) == 0
|
||||||
|
# But SECTION_MISSING is raised for the required section
|
||||||
|
missing_codes = [d.code for d in result.diagnostics if d.code == "SECTION_MISSING"]
|
||||||
|
assert len(missing_codes) >= 1
|
||||||
|
|
||||||
|
def test_valid_domain_no_diagnostic(self):
|
||||||
|
for domain in ("Production", "Exchange", "Distribution", "Regulation", "General Theory"):
|
||||||
|
entity = _compliant_entity(domain=domain)
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||||
|
assert len(enum_diags) == 0, f"Unexpected enum diagnostic for domain '{domain}'"
|
||||||
|
|
||||||
|
|
||||||
|
class TestMultipleIssues:
|
||||||
|
def test_multiple_issues_on_one_entity(self):
|
||||||
|
entity = _compliant_entity(
|
||||||
|
definition="too short",
|
||||||
|
domain="UnknownDomain",
|
||||||
|
h1_is_title_case=False,
|
||||||
|
)
|
||||||
|
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
codes = {d.code for d in result.diagnostics}
|
||||||
|
assert "SECTION_TOO_SHORT" in codes
|
||||||
|
assert "ENUM_VALUE_UNKNOWN" in codes
|
||||||
|
assert "H1_NOT_TITLE_CASE" in codes
|
||||||
|
assert len(result.diagnostics) >= 3
|
||||||
|
|
||||||
|
|
||||||
|
class TestCustomSchema:
|
||||||
|
def test_custom_schema_different_rules(self):
|
||||||
|
schema = EntitySchema(
|
||||||
|
name="Custom",
|
||||||
|
section_rules=(
|
||||||
|
SectionRule(
|
||||||
|
slug="definition",
|
||||||
|
label="Definition",
|
||||||
|
requirement=SectionRequirement.REQUIRED,
|
||||||
|
min_words=5,
|
||||||
|
max_words=50,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
enum_constraints=(
|
||||||
|
EnumConstraint(
|
||||||
|
field_name="domain",
|
||||||
|
allowed_values=("Alpha", "Beta"),
|
||||||
|
severity="error",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
h1_title_case_severity="error",
|
||||||
|
require_h1=False,
|
||||||
|
)
|
||||||
|
entity = _compliant_entity(
|
||||||
|
definition="just five words here exactly",
|
||||||
|
domain="Alpha",
|
||||||
|
)
|
||||||
|
result = validate_entity(entity, schema)
|
||||||
|
assert result.is_compliant is True
|
||||||
|
assert result.schema_name == "Custom"
|
||||||
|
|
||||||
|
def test_custom_enum_error_severity(self):
|
||||||
|
schema = EntitySchema(
|
||||||
|
name="Strict Enum",
|
||||||
|
section_rules=(),
|
||||||
|
enum_constraints=(
|
||||||
|
EnumConstraint(
|
||||||
|
field_name="domain",
|
||||||
|
allowed_values=("A",),
|
||||||
|
severity="error",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
entity = _compliant_entity(domain="B")
|
||||||
|
result = validate_entity(entity, schema)
|
||||||
|
assert not result.is_compliant
|
||||||
|
enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||||
|
assert enum_diags[0].severity == "error"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Batch validation ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestBatchValidation:
|
||||||
|
def test_empty_list(self):
|
||||||
|
result = validate_entities([], ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
assert result.total_entities == 0
|
||||||
|
assert result.compliant_count == 0
|
||||||
|
assert result.total_errors == 0
|
||||||
|
assert result.total_warnings == 0
|
||||||
|
|
||||||
|
def test_mixed_compliance(self):
|
||||||
|
good = _compliant_entity()
|
||||||
|
bad = _compliant_entity(slug="bad", definition="")
|
||||||
|
result = validate_entities([good, bad], ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
assert result.total_entities == 2
|
||||||
|
assert result.compliant_count == 1
|
||||||
|
assert result.non_compliant_count == 1
|
||||||
|
assert result.total_errors >= 1
|
||||||
|
|
||||||
|
def test_summary_format(self):
|
||||||
|
good = _compliant_entity()
|
||||||
|
bad = _compliant_entity(slug="bad_entity", definition="too short")
|
||||||
|
result = validate_entities([good, bad], ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
summary = result.summary()
|
||||||
|
assert "Schema: Economic Entity" in summary
|
||||||
|
assert "Entities: 2" in summary
|
||||||
|
assert "Compliant: 1/2" in summary
|
||||||
|
assert "division_of_labour" in summary
|
||||||
|
assert "bad_entity" in summary
|
||||||
|
|
||||||
|
def test_aggregate_counts(self):
|
||||||
|
entities = [
|
||||||
|
_compliant_entity(slug="e1"),
|
||||||
|
_compliant_entity(slug="e2", definition="short"),
|
||||||
|
_compliant_entity(slug="e3", domain="Unknown", h1_is_title_case=False),
|
||||||
|
]
|
||||||
|
result = validate_entities(entities, ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
assert result.total_entities == 3
|
||||||
|
assert result.total_errors == result.results[0].error_count + result.results[1].error_count + result.results[2].error_count
|
||||||
|
assert result.total_warnings == result.results[0].warning_count + result.results[1].warning_count + result.results[2].warning_count
|
||||||
|
|
||||||
|
def test_schema_name_propagated(self):
|
||||||
|
result = validate_entities([], ECONOMIC_ENTITY_SCHEMA)
|
||||||
|
assert result.schema_name == "Economic Entity"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Default schema checks ──────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestDefaultSchema:
|
||||||
|
def test_correct_section_count(self):
|
||||||
|
assert len(ECONOMIC_ENTITY_SCHEMA.section_rules) == 6
|
||||||
|
|
||||||
|
def test_required_sections(self):
|
||||||
|
required = [
|
||||||
|
r.slug for r in ECONOMIC_ENTITY_SCHEMA.section_rules
|
||||||
|
if r.requirement == SectionRequirement.REQUIRED
|
||||||
|
]
|
||||||
|
assert set(required) == {"definition", "source_chapter", "context", "economic_domain"}
|
||||||
|
|
||||||
|
def test_optional_sections(self):
|
||||||
|
optional = [
|
||||||
|
r.slug for r in ECONOMIC_ENTITY_SCHEMA.section_rules
|
||||||
|
if r.requirement == SectionRequirement.OPTIONAL
|
||||||
|
]
|
||||||
|
assert set(optional) == {"smith_s_original_wording", "modern_interpretation"}
|
||||||
|
|
||||||
|
def test_domain_enum_values(self):
|
||||||
|
domain_constraint = ECONOMIC_ENTITY_SCHEMA.enum_constraints[0]
|
||||||
|
assert domain_constraint.field_name == "domain"
|
||||||
|
assert set(domain_constraint.allowed_values) == {
|
||||||
|
"Production", "Exchange", "Distribution", "Regulation", "General Theory",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_schema_is_frozen(self):
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
ECONOMIC_ENTITY_SCHEMA.name = "Changed"
|
||||||
|
|
||||||
|
def test_section_rule_is_frozen(self):
|
||||||
|
rule = ECONOMIC_ENTITY_SCHEMA.section_rules[0]
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
rule.slug = "changed"
|
||||||
|
|
||||||
|
def test_enum_constraint_is_frozen(self):
|
||||||
|
constraint = ECONOMIC_ENTITY_SCHEMA.enum_constraints[0]
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
constraint.field_name = "changed"
|
||||||
|
|
||||||
|
|
||||||
|
# ── ComplianceDiagnostic __str__ ────────────────────────────────────
|
||||||
|
|
||||||
|
class TestDiagnosticStr:
|
||||||
|
def test_basic_str(self):
|
||||||
|
d = ComplianceDiagnostic(code="TEST", message="test msg", severity="error")
|
||||||
|
assert "[ERROR] TEST: test msg" in str(d)
|
||||||
|
|
||||||
|
def test_str_with_section(self):
|
||||||
|
d = ComplianceDiagnostic(
|
||||||
|
code="SECTION_MISSING",
|
||||||
|
message="Missing.",
|
||||||
|
severity="error",
|
||||||
|
section="definition",
|
||||||
|
)
|
||||||
|
s = str(d)
|
||||||
|
assert "(section: definition)" in s
|
||||||
|
|
||||||
|
def test_str_with_field(self):
|
||||||
|
d = ComplianceDiagnostic(
|
||||||
|
code="ENUM_VALUE_UNKNOWN",
|
||||||
|
message="Unknown.",
|
||||||
|
severity="warning",
|
||||||
|
field="domain",
|
||||||
|
)
|
||||||
|
s = str(d)
|
||||||
|
assert "(field: domain)" in s
|
||||||
|
|
||||||
|
|
||||||
|
# ── ComplianceResult properties ─────────────────────────────────────
|
||||||
|
|
||||||
|
class TestComplianceResultProperties:
|
||||||
|
def test_errors_property(self):
|
||||||
|
result = ComplianceResult(entity_slug="test", schema_name="Test")
|
||||||
|
result.diagnostics = [
|
||||||
|
ComplianceDiagnostic(code="A", message="a", severity="error"),
|
||||||
|
ComplianceDiagnostic(code="B", message="b", severity="warning"),
|
||||||
|
ComplianceDiagnostic(code="C", message="c", severity="error"),
|
||||||
|
]
|
||||||
|
assert len(result.errors) == 2
|
||||||
|
assert len(result.warnings) == 1
|
||||||
|
assert result.error_count == 2
|
||||||
|
assert result.warning_count == 1
|
||||||
|
assert not result.is_compliant
|
||||||
|
|
||||||
|
def test_summary_fail(self):
|
||||||
|
result = ComplianceResult(entity_slug="test", schema_name="Test", checks_run=5)
|
||||||
|
result.diagnostics = [
|
||||||
|
ComplianceDiagnostic(code="A", message="a", severity="error"),
|
||||||
|
]
|
||||||
|
assert "FAIL" in result.summary()
|
||||||
Reference in New Issue
Block a user