feat(infospace): add schema compliance validator (S1.2)
Deterministic validation of EntityMeta against declarative schemas: section presence/word counts, heading format, domain enum values. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
419
tests/unit/infospace/test_schema_validator.py
Normal file
419
tests/unit/infospace/test_schema_validator.py
Normal file
@@ -0,0 +1,419 @@
|
||||
"""Tests for markitect.infospace schema and validator modules."""
|
||||
|
||||
import pytest
|
||||
|
||||
from markitect.infospace import (
|
||||
ECONOMIC_ENTITY_SCHEMA,
|
||||
BatchComplianceResult,
|
||||
ComplianceDiagnostic,
|
||||
ComplianceResult,
|
||||
EntityMeta,
|
||||
EntitySchema,
|
||||
EnumConstraint,
|
||||
SectionRequirement,
|
||||
SectionRule,
|
||||
validate_entities,
|
||||
validate_entity,
|
||||
)
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
def _compliant_entity(**overrides) -> EntityMeta:
|
||||
"""Return an EntityMeta that passes ECONOMIC_ENTITY_SCHEMA."""
|
||||
defaults = dict(
|
||||
slug="division_of_labour",
|
||||
title="Division of Labour",
|
||||
h1_raw="Division of Labour",
|
||||
definition=(
|
||||
"The separation of a work process into a number of distinct "
|
||||
"tasks, each performed by a specialised worker, resulting in "
|
||||
"a significant increase in the productive powers of labour."
|
||||
),
|
||||
source_chapter='Book I, Chapter 1: "Of the Division of Labour"',
|
||||
context="The division of labour is the central argument of the chapter.",
|
||||
domain="Production",
|
||||
original_wording='"The greatest improvements in the productive powers…"',
|
||||
modern_interpretation="Remains foundational in economics.",
|
||||
h1_is_title_case=True,
|
||||
has_original_wording=True,
|
||||
definition_word_count=30,
|
||||
total_word_count=100,
|
||||
section_slugs=[
|
||||
"definition",
|
||||
"source_chapter",
|
||||
"context",
|
||||
"economic_domain",
|
||||
"smith_s_original_wording",
|
||||
"modern_interpretation",
|
||||
],
|
||||
source_path="/tmp/division-of-labour.md",
|
||||
)
|
||||
defaults.update(overrides)
|
||||
return EntityMeta(**defaults)
|
||||
|
||||
|
||||
# ── Single-entity validation ────────────────────────────────────────
|
||||
|
||||
class TestValidateEntityCompliant:
|
||||
def test_fully_compliant_zero_diagnostics(self):
|
||||
entity = _compliant_entity()
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
assert result.diagnostics == []
|
||||
assert result.is_compliant is True
|
||||
assert result.error_count == 0
|
||||
assert result.warning_count == 0
|
||||
assert result.checks_run > 0
|
||||
|
||||
def test_summary_shows_pass(self):
|
||||
entity = _compliant_entity()
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
assert "PASS" in result.summary()
|
||||
assert "division_of_labour" in result.summary()
|
||||
|
||||
|
||||
class TestSectionMissing:
|
||||
def test_missing_required_section_error(self):
|
||||
entity = _compliant_entity(definition="")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
codes = [d.code for d in result.diagnostics]
|
||||
assert "SECTION_MISSING" in codes
|
||||
assert not result.is_compliant
|
||||
|
||||
def test_empty_required_section_error(self):
|
||||
entity = _compliant_entity(definition=" ")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
codes = [d.code for d in result.diagnostics]
|
||||
assert "SECTION_MISSING" in codes
|
||||
|
||||
def test_optional_section_absent_no_diagnostic(self):
|
||||
entity = _compliant_entity(original_wording="", modern_interpretation="")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
# Only optional sections removed — should still be fully compliant
|
||||
assert result.is_compliant is True
|
||||
assert result.error_count == 0
|
||||
# No SECTION_MISSING or SECTION_RECOMMENDED for optional sections
|
||||
section_codes = {d.code for d in result.diagnostics}
|
||||
assert "SECTION_MISSING" not in section_codes
|
||||
assert "SECTION_RECOMMENDED" not in section_codes
|
||||
|
||||
|
||||
class TestSectionRecommended:
|
||||
def test_recommended_section_missing_warning(self):
|
||||
schema = EntitySchema(
|
||||
name="Test Schema",
|
||||
section_rules=(
|
||||
SectionRule(
|
||||
slug="definition",
|
||||
label="Definition",
|
||||
requirement=SectionRequirement.RECOMMENDED,
|
||||
),
|
||||
),
|
||||
)
|
||||
entity = _compliant_entity(definition="")
|
||||
result = validate_entity(entity, schema)
|
||||
codes = [d.code for d in result.diagnostics]
|
||||
assert "SECTION_RECOMMENDED" in codes
|
||||
severities = [d.severity for d in result.diagnostics if d.code == "SECTION_RECOMMENDED"]
|
||||
assert severities == ["warning"]
|
||||
# Warnings don't break compliance
|
||||
assert result.is_compliant is True
|
||||
|
||||
|
||||
class TestWordCountBounds:
|
||||
def test_definition_too_short_error(self):
|
||||
entity = _compliant_entity(definition="only ten words here to test the lower boundary check now")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
short_diags = [d for d in result.diagnostics if d.code == "SECTION_TOO_SHORT"]
|
||||
assert len(short_diags) == 1
|
||||
assert short_diags[0].severity == "error"
|
||||
assert not result.is_compliant
|
||||
|
||||
def test_definition_too_long_warning(self):
|
||||
long_def = " ".join(["word"] * 200)
|
||||
entity = _compliant_entity(definition=long_def)
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
long_diags = [d for d in result.diagnostics if d.code == "SECTION_TOO_LONG"]
|
||||
assert len(long_diags) == 1
|
||||
assert long_diags[0].severity == "warning"
|
||||
# Warnings don't break compliance
|
||||
assert result.is_compliant is True
|
||||
|
||||
def test_definition_at_min_boundary_passes(self):
|
||||
exactly_20 = " ".join(["word"] * 20)
|
||||
entity = _compliant_entity(definition=exactly_20)
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
codes = [d.code for d in result.diagnostics]
|
||||
assert "SECTION_TOO_SHORT" not in codes
|
||||
|
||||
def test_definition_at_max_boundary_passes(self):
|
||||
exactly_150 = " ".join(["word"] * 150)
|
||||
entity = _compliant_entity(definition=exactly_150)
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
codes = [d.code for d in result.diagnostics]
|
||||
assert "SECTION_TOO_LONG" not in codes
|
||||
|
||||
|
||||
class TestH1Checks:
|
||||
def test_slug_format_h1_warning(self):
|
||||
entity = _compliant_entity(
|
||||
h1_raw="effectual-demand",
|
||||
h1_is_title_case=False,
|
||||
)
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
h1_diags = [d for d in result.diagnostics if d.code == "H1_NOT_TITLE_CASE"]
|
||||
assert len(h1_diags) == 1
|
||||
assert h1_diags[0].severity == "warning"
|
||||
# Still compliant (it's a warning)
|
||||
assert result.is_compliant is True
|
||||
|
||||
def test_h1_missing_error(self):
|
||||
entity = _compliant_entity(slug="", h1_raw="")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
codes = [d.code for d in result.diagnostics]
|
||||
assert "H1_MISSING" in codes
|
||||
assert not result.is_compliant
|
||||
|
||||
def test_h1_title_case_error_severity(self):
|
||||
schema = EntitySchema(
|
||||
name="Strict",
|
||||
section_rules=(),
|
||||
h1_title_case_severity="error",
|
||||
)
|
||||
entity = _compliant_entity(h1_is_title_case=False)
|
||||
result = validate_entity(entity, schema)
|
||||
h1_diags = [d for d in result.diagnostics if d.code == "H1_NOT_TITLE_CASE"]
|
||||
assert h1_diags[0].severity == "error"
|
||||
assert not result.is_compliant
|
||||
|
||||
|
||||
class TestEnumConstraints:
|
||||
def test_unknown_domain_warning(self):
|
||||
entity = _compliant_entity(domain="Metaphysics")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||
assert len(enum_diags) == 1
|
||||
assert enum_diags[0].severity == "warning"
|
||||
assert result.is_compliant is True
|
||||
|
||||
def test_empty_domain_no_enum_diagnostic(self):
|
||||
"""Empty domain triggers SECTION_MISSING, not ENUM_VALUE_UNKNOWN."""
|
||||
entity = _compliant_entity(domain="")
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
enum_codes = [d.code for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||
assert len(enum_codes) == 0
|
||||
# But SECTION_MISSING is raised for the required section
|
||||
missing_codes = [d.code for d in result.diagnostics if d.code == "SECTION_MISSING"]
|
||||
assert len(missing_codes) >= 1
|
||||
|
||||
def test_valid_domain_no_diagnostic(self):
|
||||
for domain in ("Production", "Exchange", "Distribution", "Regulation", "General Theory"):
|
||||
entity = _compliant_entity(domain=domain)
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||
assert len(enum_diags) == 0, f"Unexpected enum diagnostic for domain '{domain}'"
|
||||
|
||||
|
||||
class TestMultipleIssues:
|
||||
def test_multiple_issues_on_one_entity(self):
|
||||
entity = _compliant_entity(
|
||||
definition="too short",
|
||||
domain="UnknownDomain",
|
||||
h1_is_title_case=False,
|
||||
)
|
||||
result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA)
|
||||
codes = {d.code for d in result.diagnostics}
|
||||
assert "SECTION_TOO_SHORT" in codes
|
||||
assert "ENUM_VALUE_UNKNOWN" in codes
|
||||
assert "H1_NOT_TITLE_CASE" in codes
|
||||
assert len(result.diagnostics) >= 3
|
||||
|
||||
|
||||
class TestCustomSchema:
|
||||
def test_custom_schema_different_rules(self):
|
||||
schema = EntitySchema(
|
||||
name="Custom",
|
||||
section_rules=(
|
||||
SectionRule(
|
||||
slug="definition",
|
||||
label="Definition",
|
||||
requirement=SectionRequirement.REQUIRED,
|
||||
min_words=5,
|
||||
max_words=50,
|
||||
),
|
||||
),
|
||||
enum_constraints=(
|
||||
EnumConstraint(
|
||||
field_name="domain",
|
||||
allowed_values=("Alpha", "Beta"),
|
||||
severity="error",
|
||||
),
|
||||
),
|
||||
h1_title_case_severity="error",
|
||||
require_h1=False,
|
||||
)
|
||||
entity = _compliant_entity(
|
||||
definition="just five words here exactly",
|
||||
domain="Alpha",
|
||||
)
|
||||
result = validate_entity(entity, schema)
|
||||
assert result.is_compliant is True
|
||||
assert result.schema_name == "Custom"
|
||||
|
||||
def test_custom_enum_error_severity(self):
|
||||
schema = EntitySchema(
|
||||
name="Strict Enum",
|
||||
section_rules=(),
|
||||
enum_constraints=(
|
||||
EnumConstraint(
|
||||
field_name="domain",
|
||||
allowed_values=("A",),
|
||||
severity="error",
|
||||
),
|
||||
),
|
||||
)
|
||||
entity = _compliant_entity(domain="B")
|
||||
result = validate_entity(entity, schema)
|
||||
assert not result.is_compliant
|
||||
enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"]
|
||||
assert enum_diags[0].severity == "error"
|
||||
|
||||
|
||||
# ── Batch validation ────────────────────────────────────────────────
|
||||
|
||||
class TestBatchValidation:
|
||||
def test_empty_list(self):
|
||||
result = validate_entities([], ECONOMIC_ENTITY_SCHEMA)
|
||||
assert result.total_entities == 0
|
||||
assert result.compliant_count == 0
|
||||
assert result.total_errors == 0
|
||||
assert result.total_warnings == 0
|
||||
|
||||
def test_mixed_compliance(self):
|
||||
good = _compliant_entity()
|
||||
bad = _compliant_entity(slug="bad", definition="")
|
||||
result = validate_entities([good, bad], ECONOMIC_ENTITY_SCHEMA)
|
||||
assert result.total_entities == 2
|
||||
assert result.compliant_count == 1
|
||||
assert result.non_compliant_count == 1
|
||||
assert result.total_errors >= 1
|
||||
|
||||
def test_summary_format(self):
|
||||
good = _compliant_entity()
|
||||
bad = _compliant_entity(slug="bad_entity", definition="too short")
|
||||
result = validate_entities([good, bad], ECONOMIC_ENTITY_SCHEMA)
|
||||
summary = result.summary()
|
||||
assert "Schema: Economic Entity" in summary
|
||||
assert "Entities: 2" in summary
|
||||
assert "Compliant: 1/2" in summary
|
||||
assert "division_of_labour" in summary
|
||||
assert "bad_entity" in summary
|
||||
|
||||
def test_aggregate_counts(self):
|
||||
entities = [
|
||||
_compliant_entity(slug="e1"),
|
||||
_compliant_entity(slug="e2", definition="short"),
|
||||
_compliant_entity(slug="e3", domain="Unknown", h1_is_title_case=False),
|
||||
]
|
||||
result = validate_entities(entities, ECONOMIC_ENTITY_SCHEMA)
|
||||
assert result.total_entities == 3
|
||||
assert result.total_errors == result.results[0].error_count + result.results[1].error_count + result.results[2].error_count
|
||||
assert result.total_warnings == result.results[0].warning_count + result.results[1].warning_count + result.results[2].warning_count
|
||||
|
||||
def test_schema_name_propagated(self):
|
||||
result = validate_entities([], ECONOMIC_ENTITY_SCHEMA)
|
||||
assert result.schema_name == "Economic Entity"
|
||||
|
||||
|
||||
# ── Default schema checks ──────────────────────────────────────────
|
||||
|
||||
class TestDefaultSchema:
|
||||
def test_correct_section_count(self):
|
||||
assert len(ECONOMIC_ENTITY_SCHEMA.section_rules) == 6
|
||||
|
||||
def test_required_sections(self):
|
||||
required = [
|
||||
r.slug for r in ECONOMIC_ENTITY_SCHEMA.section_rules
|
||||
if r.requirement == SectionRequirement.REQUIRED
|
||||
]
|
||||
assert set(required) == {"definition", "source_chapter", "context", "economic_domain"}
|
||||
|
||||
def test_optional_sections(self):
|
||||
optional = [
|
||||
r.slug for r in ECONOMIC_ENTITY_SCHEMA.section_rules
|
||||
if r.requirement == SectionRequirement.OPTIONAL
|
||||
]
|
||||
assert set(optional) == {"smith_s_original_wording", "modern_interpretation"}
|
||||
|
||||
def test_domain_enum_values(self):
|
||||
domain_constraint = ECONOMIC_ENTITY_SCHEMA.enum_constraints[0]
|
||||
assert domain_constraint.field_name == "domain"
|
||||
assert set(domain_constraint.allowed_values) == {
|
||||
"Production", "Exchange", "Distribution", "Regulation", "General Theory",
|
||||
}
|
||||
|
||||
def test_schema_is_frozen(self):
|
||||
with pytest.raises(AttributeError):
|
||||
ECONOMIC_ENTITY_SCHEMA.name = "Changed"
|
||||
|
||||
def test_section_rule_is_frozen(self):
|
||||
rule = ECONOMIC_ENTITY_SCHEMA.section_rules[0]
|
||||
with pytest.raises(AttributeError):
|
||||
rule.slug = "changed"
|
||||
|
||||
def test_enum_constraint_is_frozen(self):
|
||||
constraint = ECONOMIC_ENTITY_SCHEMA.enum_constraints[0]
|
||||
with pytest.raises(AttributeError):
|
||||
constraint.field_name = "changed"
|
||||
|
||||
|
||||
# ── ComplianceDiagnostic __str__ ────────────────────────────────────
|
||||
|
||||
class TestDiagnosticStr:
|
||||
def test_basic_str(self):
|
||||
d = ComplianceDiagnostic(code="TEST", message="test msg", severity="error")
|
||||
assert "[ERROR] TEST: test msg" in str(d)
|
||||
|
||||
def test_str_with_section(self):
|
||||
d = ComplianceDiagnostic(
|
||||
code="SECTION_MISSING",
|
||||
message="Missing.",
|
||||
severity="error",
|
||||
section="definition",
|
||||
)
|
||||
s = str(d)
|
||||
assert "(section: definition)" in s
|
||||
|
||||
def test_str_with_field(self):
|
||||
d = ComplianceDiagnostic(
|
||||
code="ENUM_VALUE_UNKNOWN",
|
||||
message="Unknown.",
|
||||
severity="warning",
|
||||
field="domain",
|
||||
)
|
||||
s = str(d)
|
||||
assert "(field: domain)" in s
|
||||
|
||||
|
||||
# ── ComplianceResult properties ─────────────────────────────────────
|
||||
|
||||
class TestComplianceResultProperties:
|
||||
def test_errors_property(self):
|
||||
result = ComplianceResult(entity_slug="test", schema_name="Test")
|
||||
result.diagnostics = [
|
||||
ComplianceDiagnostic(code="A", message="a", severity="error"),
|
||||
ComplianceDiagnostic(code="B", message="b", severity="warning"),
|
||||
ComplianceDiagnostic(code="C", message="c", severity="error"),
|
||||
]
|
||||
assert len(result.errors) == 2
|
||||
assert len(result.warnings) == 1
|
||||
assert result.error_count == 2
|
||||
assert result.warning_count == 1
|
||||
assert not result.is_compliant
|
||||
|
||||
def test_summary_fail(self):
|
||||
result = ComplianceResult(entity_slug="test", schema_name="Test", checks_run=5)
|
||||
result.diagnostics = [
|
||||
ComplianceDiagnostic(code="A", message="a", severity="error"),
|
||||
]
|
||||
assert "FAIL" in result.summary()
|
||||
Reference in New Issue
Block a user