"""Tests for markitect.infospace schema and validator modules.""" import pytest from markitect.infospace import ( ECONOMIC_ENTITY_SCHEMA, BatchComplianceResult, ComplianceDiagnostic, ComplianceResult, EntityMeta, EntitySchema, EnumConstraint, SectionRequirement, SectionRule, validate_entities, validate_entity, ) # ── Helpers ────────────────────────────────────────────────────────── def _compliant_entity(**overrides) -> EntityMeta: """Return an EntityMeta that passes ECONOMIC_ENTITY_SCHEMA.""" defaults = dict( slug="division_of_labour", title="Division of Labour", h1_raw="Division of Labour", definition=( "The separation of a work process into a number of distinct " "tasks, each performed by a specialised worker, resulting in " "a significant increase in the productive powers of labour." ), source_chapter='Book I, Chapter 1: "Of the Division of Labour"', context="The division of labour is the central argument of the chapter.", domain="Production", original_wording='"The greatest improvements in the productive powers…"', modern_interpretation="Remains foundational in economics.", h1_is_title_case=True, has_original_wording=True, definition_word_count=30, total_word_count=100, section_slugs=[ "definition", "source_chapter", "context", "economic_domain", "smith_s_original_wording", "modern_interpretation", ], source_path="/tmp/division-of-labour.md", ) defaults.update(overrides) return EntityMeta(**defaults) # ── Single-entity validation ──────────────────────────────────────── class TestValidateEntityCompliant: def test_fully_compliant_zero_diagnostics(self): entity = _compliant_entity() result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) assert result.diagnostics == [] assert result.is_compliant is True assert result.error_count == 0 assert result.warning_count == 0 assert result.checks_run > 0 def test_summary_shows_pass(self): entity = _compliant_entity() result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) assert "PASS" in result.summary() assert "division_of_labour" in result.summary() class TestSectionMissing: def test_missing_required_section_error(self): entity = _compliant_entity(definition="") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) codes = [d.code for d in result.diagnostics] assert "SECTION_MISSING" in codes assert not result.is_compliant def test_empty_required_section_error(self): entity = _compliant_entity(definition=" ") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) codes = [d.code for d in result.diagnostics] assert "SECTION_MISSING" in codes def test_optional_section_absent_no_diagnostic(self): entity = _compliant_entity(original_wording="", modern_interpretation="") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) # Only optional sections removed — should still be fully compliant assert result.is_compliant is True assert result.error_count == 0 # No SECTION_MISSING or SECTION_RECOMMENDED for optional sections section_codes = {d.code for d in result.diagnostics} assert "SECTION_MISSING" not in section_codes assert "SECTION_RECOMMENDED" not in section_codes class TestSectionRecommended: def test_recommended_section_missing_warning(self): schema = EntitySchema( name="Test Schema", section_rules=( SectionRule( slug="definition", label="Definition", requirement=SectionRequirement.RECOMMENDED, ), ), ) entity = _compliant_entity(definition="") result = validate_entity(entity, schema) codes = [d.code for d in result.diagnostics] assert "SECTION_RECOMMENDED" in codes severities = [d.severity for d in result.diagnostics if d.code == "SECTION_RECOMMENDED"] assert severities == ["warning"] # Warnings don't break compliance assert result.is_compliant is True class TestWordCountBounds: def test_definition_too_short_error(self): entity = _compliant_entity(definition="only ten words here to test the lower boundary check now") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) short_diags = [d for d in result.diagnostics if d.code == "SECTION_TOO_SHORT"] assert len(short_diags) == 1 assert short_diags[0].severity == "error" assert not result.is_compliant def test_definition_too_long_warning(self): long_def = " ".join(["word"] * 200) entity = _compliant_entity(definition=long_def) result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) long_diags = [d for d in result.diagnostics if d.code == "SECTION_TOO_LONG"] assert len(long_diags) == 1 assert long_diags[0].severity == "warning" # Warnings don't break compliance assert result.is_compliant is True def test_definition_at_min_boundary_passes(self): exactly_20 = " ".join(["word"] * 20) entity = _compliant_entity(definition=exactly_20) result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) codes = [d.code for d in result.diagnostics] assert "SECTION_TOO_SHORT" not in codes def test_definition_at_max_boundary_passes(self): exactly_150 = " ".join(["word"] * 150) entity = _compliant_entity(definition=exactly_150) result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) codes = [d.code for d in result.diagnostics] assert "SECTION_TOO_LONG" not in codes class TestH1Checks: def test_slug_format_h1_warning(self): entity = _compliant_entity( h1_raw="effectual-demand", h1_is_title_case=False, ) result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) h1_diags = [d for d in result.diagnostics if d.code == "H1_NOT_TITLE_CASE"] assert len(h1_diags) == 1 assert h1_diags[0].severity == "warning" # Still compliant (it's a warning) assert result.is_compliant is True def test_h1_missing_error(self): entity = _compliant_entity(slug="", h1_raw="") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) codes = [d.code for d in result.diagnostics] assert "H1_MISSING" in codes assert not result.is_compliant def test_h1_title_case_error_severity(self): schema = EntitySchema( name="Strict", section_rules=(), h1_title_case_severity="error", ) entity = _compliant_entity(h1_is_title_case=False) result = validate_entity(entity, schema) h1_diags = [d for d in result.diagnostics if d.code == "H1_NOT_TITLE_CASE"] assert h1_diags[0].severity == "error" assert not result.is_compliant class TestEnumConstraints: def test_unknown_domain_warning(self): entity = _compliant_entity(domain="Metaphysics") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"] assert len(enum_diags) == 1 assert enum_diags[0].severity == "warning" assert result.is_compliant is True def test_empty_domain_no_enum_diagnostic(self): """Empty domain triggers SECTION_MISSING, not ENUM_VALUE_UNKNOWN.""" entity = _compliant_entity(domain="") result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) enum_codes = [d.code for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"] assert len(enum_codes) == 0 # But SECTION_MISSING is raised for the required section missing_codes = [d.code for d in result.diagnostics if d.code == "SECTION_MISSING"] assert len(missing_codes) >= 1 def test_valid_domain_no_diagnostic(self): for domain in ("Production", "Exchange", "Distribution", "Regulation", "General Theory"): entity = _compliant_entity(domain=domain) result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"] assert len(enum_diags) == 0, f"Unexpected enum diagnostic for domain '{domain}'" class TestMultipleIssues: def test_multiple_issues_on_one_entity(self): entity = _compliant_entity( definition="too short", domain="UnknownDomain", h1_is_title_case=False, ) result = validate_entity(entity, ECONOMIC_ENTITY_SCHEMA) codes = {d.code for d in result.diagnostics} assert "SECTION_TOO_SHORT" in codes assert "ENUM_VALUE_UNKNOWN" in codes assert "H1_NOT_TITLE_CASE" in codes assert len(result.diagnostics) >= 3 class TestCustomSchema: def test_custom_schema_different_rules(self): schema = EntitySchema( name="Custom", section_rules=( SectionRule( slug="definition", label="Definition", requirement=SectionRequirement.REQUIRED, min_words=5, max_words=50, ), ), enum_constraints=( EnumConstraint( field_name="domain", allowed_values=("Alpha", "Beta"), severity="error", ), ), h1_title_case_severity="error", require_h1=False, ) entity = _compliant_entity( definition="just five words here exactly", domain="Alpha", ) result = validate_entity(entity, schema) assert result.is_compliant is True assert result.schema_name == "Custom" def test_custom_enum_error_severity(self): schema = EntitySchema( name="Strict Enum", section_rules=(), enum_constraints=( EnumConstraint( field_name="domain", allowed_values=("A",), severity="error", ), ), ) entity = _compliant_entity(domain="B") result = validate_entity(entity, schema) assert not result.is_compliant enum_diags = [d for d in result.diagnostics if d.code == "ENUM_VALUE_UNKNOWN"] assert enum_diags[0].severity == "error" # ── Batch validation ──────────────────────────────────────────────── class TestBatchValidation: def test_empty_list(self): result = validate_entities([], ECONOMIC_ENTITY_SCHEMA) assert result.total_entities == 0 assert result.compliant_count == 0 assert result.total_errors == 0 assert result.total_warnings == 0 def test_mixed_compliance(self): good = _compliant_entity() bad = _compliant_entity(slug="bad", definition="") result = validate_entities([good, bad], ECONOMIC_ENTITY_SCHEMA) assert result.total_entities == 2 assert result.compliant_count == 1 assert result.non_compliant_count == 1 assert result.total_errors >= 1 def test_summary_format(self): good = _compliant_entity() bad = _compliant_entity(slug="bad_entity", definition="too short") result = validate_entities([good, bad], ECONOMIC_ENTITY_SCHEMA) summary = result.summary() assert "Schema: Economic Entity" in summary assert "Entities: 2" in summary assert "Compliant: 1/2" in summary assert "division_of_labour" in summary assert "bad_entity" in summary def test_aggregate_counts(self): entities = [ _compliant_entity(slug="e1"), _compliant_entity(slug="e2", definition="short"), _compliant_entity(slug="e3", domain="Unknown", h1_is_title_case=False), ] result = validate_entities(entities, ECONOMIC_ENTITY_SCHEMA) assert result.total_entities == 3 assert result.total_errors == result.results[0].error_count + result.results[1].error_count + result.results[2].error_count assert result.total_warnings == result.results[0].warning_count + result.results[1].warning_count + result.results[2].warning_count def test_schema_name_propagated(self): result = validate_entities([], ECONOMIC_ENTITY_SCHEMA) assert result.schema_name == "Economic Entity" # ── Default schema checks ────────────────────────────────────────── class TestDefaultSchema: def test_correct_section_count(self): assert len(ECONOMIC_ENTITY_SCHEMA.section_rules) == 6 def test_required_sections(self): required = [ r.slug for r in ECONOMIC_ENTITY_SCHEMA.section_rules if r.requirement == SectionRequirement.REQUIRED ] assert set(required) == {"definition", "source_chapter", "context", "economic_domain"} def test_optional_sections(self): optional = [ r.slug for r in ECONOMIC_ENTITY_SCHEMA.section_rules if r.requirement == SectionRequirement.OPTIONAL ] assert set(optional) == {"smith_s_original_wording", "modern_interpretation"} def test_domain_enum_values(self): domain_constraint = ECONOMIC_ENTITY_SCHEMA.enum_constraints[0] assert domain_constraint.field_name == "domain" assert set(domain_constraint.allowed_values) == { "Production", "Exchange", "Distribution", "Regulation", "General Theory", } def test_schema_is_frozen(self): with pytest.raises(AttributeError): ECONOMIC_ENTITY_SCHEMA.name = "Changed" def test_section_rule_is_frozen(self): rule = ECONOMIC_ENTITY_SCHEMA.section_rules[0] with pytest.raises(AttributeError): rule.slug = "changed" def test_enum_constraint_is_frozen(self): constraint = ECONOMIC_ENTITY_SCHEMA.enum_constraints[0] with pytest.raises(AttributeError): constraint.field_name = "changed" # ── ComplianceDiagnostic __str__ ──────────────────────────────────── class TestDiagnosticStr: def test_basic_str(self): d = ComplianceDiagnostic(code="TEST", message="test msg", severity="error") assert "[ERROR] TEST: test msg" in str(d) def test_str_with_section(self): d = ComplianceDiagnostic( code="SECTION_MISSING", message="Missing.", severity="error", section="definition", ) s = str(d) assert "(section: definition)" in s def test_str_with_field(self): d = ComplianceDiagnostic( code="ENUM_VALUE_UNKNOWN", message="Unknown.", severity="warning", field="domain", ) s = str(d) assert "(field: domain)" in s # ── ComplianceResult properties ───────────────────────────────────── class TestComplianceResultProperties: def test_errors_property(self): result = ComplianceResult(entity_slug="test", schema_name="Test") result.diagnostics = [ ComplianceDiagnostic(code="A", message="a", severity="error"), ComplianceDiagnostic(code="B", message="b", severity="warning"), ComplianceDiagnostic(code="C", message="c", severity="error"), ] assert len(result.errors) == 2 assert len(result.warnings) == 1 assert result.error_count == 2 assert result.warning_count == 1 assert not result.is_compliant def test_summary_fail(self): result = ComplianceResult(entity_slug="test", schema_name="Test", checks_run=5) result.diagnostics = [ ComplianceDiagnostic(code="A", message="a", severity="error"), ] assert "FAIL" in result.summary()