Extract section-tree algorithm from SchemaGenerator into standalone core/section_tree.py and build markitect/infospace/ package with EntityMeta dataclass and parse_entity_file/parse_entity_directory. Foundation for schema compliance, coverage, and granularity metrics. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
231 lines
6.8 KiB
Python
231 lines
6.8 KiB
Python
"""Tests for markitect.infospace.entity_parser and EntityMeta."""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from markitect.infospace import EntityMeta, parse_entity_file, parse_entity_directory
|
|
|
|
|
|
# ── Fixtures ────────────────────────────────────────────────────────
|
|
|
|
COMPLETE_ENTITY = """\
|
|
# Division of Labour
|
|
|
|
## Definition
|
|
|
|
The separation of a work process into a number of distinct tasks, each performed
|
|
by a specialised worker, resulting in a significant increase in the productive
|
|
powers of labour.
|
|
|
|
## Source Chapter
|
|
|
|
Book I, Chapter 1: "Of the Division of Labour"
|
|
|
|
## Context
|
|
|
|
The division of labour is the central argument of the chapter.
|
|
|
|
## Economic Domain
|
|
|
|
Production
|
|
|
|
## Smith's Original Wording
|
|
|
|
"The greatest improvements in the productive powers of labour…"
|
|
|
|
## Modern Interpretation
|
|
|
|
The division of labour remains a foundational concept in economics.
|
|
"""
|
|
|
|
MINIMAL_ENTITY = """\
|
|
# Minimal Entity
|
|
|
|
## Definition
|
|
|
|
A brief definition.
|
|
|
|
## Source Chapter
|
|
|
|
Book I, Chapter 1
|
|
|
|
## Context
|
|
|
|
Some context.
|
|
|
|
## Economic Domain
|
|
|
|
Exchange
|
|
"""
|
|
|
|
SLUG_H1_ENTITY = """\
|
|
# effectual-demand
|
|
|
|
## Definition
|
|
|
|
Effectual demand is the demand by consumers who are willing and able to pay.
|
|
|
|
## Source Chapter
|
|
|
|
Book 1, Chapter 7
|
|
|
|
## Context
|
|
|
|
Context for effectual demand.
|
|
|
|
## Economic Domain
|
|
|
|
Exchange
|
|
|
|
## Smith's Original Wording
|
|
|
|
"Such people may be called the effectual demanders…"
|
|
|
|
## Modern Interpretation
|
|
|
|
Represents the intersection of desire and purchasing power.
|
|
"""
|
|
|
|
NO_H1 = """\
|
|
## Only H2
|
|
|
|
Some content.
|
|
"""
|
|
|
|
|
|
# ── parse_entity_file ────────────────────────────────────────────────
|
|
|
|
class TestParseEntityFile:
|
|
def test_complete_entity(self, tmp_path):
|
|
f = tmp_path / "division-of-labour.md"
|
|
f.write_text(COMPLETE_ENTITY)
|
|
meta = parse_entity_file(f)
|
|
|
|
assert meta.slug == "division_of_labour"
|
|
assert meta.title == "Division of Labour"
|
|
assert meta.h1_is_title_case is True
|
|
assert meta.has_original_wording is True
|
|
assert meta.domain == "Production"
|
|
assert meta.definition_word_count > 20
|
|
assert "separation" in meta.definition.lower()
|
|
assert meta.source_path == str(f)
|
|
assert "definition" in meta.section_slugs
|
|
assert "smith_s_original_wording" in meta.section_slugs
|
|
|
|
def test_minimal_entity(self, tmp_path):
|
|
f = tmp_path / "minimal-entity.md"
|
|
f.write_text(MINIMAL_ENTITY)
|
|
meta = parse_entity_file(f)
|
|
|
|
assert meta.slug == "minimal_entity"
|
|
assert meta.has_original_wording is False
|
|
assert meta.original_wording == ""
|
|
assert meta.modern_interpretation == ""
|
|
assert meta.domain == "Exchange"
|
|
|
|
def test_slug_format_h1(self, tmp_path):
|
|
f = tmp_path / "effectual-demand.md"
|
|
f.write_text(SLUG_H1_ENTITY)
|
|
meta = parse_entity_file(f)
|
|
|
|
assert meta.h1_raw == "effectual-demand"
|
|
assert meta.h1_is_title_case is False
|
|
assert meta.slug == "effectual_demand"
|
|
assert meta.has_original_wording is True
|
|
|
|
def test_missing_h1_raises(self, tmp_path):
|
|
f = tmp_path / "no-h1.md"
|
|
f.write_text(NO_H1)
|
|
with pytest.raises(ValueError, match="No H1"):
|
|
parse_entity_file(f)
|
|
|
|
def test_missing_sections_return_empty(self, tmp_path):
|
|
f = tmp_path / "minimal.md"
|
|
f.write_text(MINIMAL_ENTITY)
|
|
meta = parse_entity_file(f)
|
|
|
|
# Optional sections not present → empty string
|
|
assert meta.original_wording == ""
|
|
assert meta.modern_interpretation == ""
|
|
|
|
def test_word_count_accuracy(self, tmp_path):
|
|
f = tmp_path / "test.md"
|
|
f.write_text("# Test\n\n## Definition\n\none two three four five\n")
|
|
meta = parse_entity_file(f)
|
|
assert meta.definition_word_count == 5
|
|
|
|
|
|
# ── parse_entity_directory ──────────────────────────────────────────
|
|
|
|
class TestParseEntityDirectory:
|
|
def _make_dir(self, tmp_path):
|
|
"""Create a temporary entity directory."""
|
|
d = tmp_path / "entities"
|
|
d.mkdir()
|
|
(d / "entity-a.md").write_text(COMPLETE_ENTITY)
|
|
(d / "entity-b.md").write_text(MINIMAL_ENTITY)
|
|
# files that should be excluded by default
|
|
(d / "book-1-chapter-01-entities.md").write_text("# View\n\nview file")
|
|
(d / "book-1-chapter-01-prompt.md").write_text("# Prompt\n\nprompt file")
|
|
return d
|
|
|
|
def test_excludes_view_and_prompt(self, tmp_path):
|
|
d = self._make_dir(tmp_path)
|
|
results = parse_entity_directory(d)
|
|
slugs = {e.slug for e in results}
|
|
|
|
assert "division_of_labour" in slugs
|
|
assert "minimal_entity" in slugs
|
|
# Excluded files should not be parsed as entities
|
|
assert len(results) == 2
|
|
|
|
def test_custom_exclude_patterns(self, tmp_path):
|
|
d = self._make_dir(tmp_path)
|
|
# Only exclude prompt files, allow entity views
|
|
results = parse_entity_directory(d, exclude_patterns=[r".*-prompt\.md$"])
|
|
assert len(results) == 3 # entity-a, entity-b, chapter-01-entities
|
|
|
|
def test_malformed_skipped_with_warning(self, tmp_path, caplog):
|
|
d = tmp_path / "entities"
|
|
d.mkdir()
|
|
(d / "good.md").write_text(COMPLETE_ENTITY)
|
|
(d / "bad.md").write_text(NO_H1)
|
|
|
|
with caplog.at_level(logging.WARNING):
|
|
results = parse_entity_directory(d)
|
|
|
|
assert len(results) == 1
|
|
assert "bad.md" in caplog.text
|
|
|
|
|
|
# ── EntityMeta round-trip ───────────────────────────────────────────
|
|
|
|
class TestEntityMetaRoundTrip:
|
|
def test_to_dict_from_dict(self, tmp_path):
|
|
f = tmp_path / "entity.md"
|
|
f.write_text(COMPLETE_ENTITY)
|
|
original = parse_entity_file(f)
|
|
|
|
data = original.to_dict()
|
|
restored = EntityMeta.from_dict(data)
|
|
|
|
assert restored.slug == original.slug
|
|
assert restored.title == original.title
|
|
assert restored.definition == original.definition
|
|
assert restored.h1_is_title_case == original.h1_is_title_case
|
|
assert restored.section_slugs == original.section_slugs
|
|
assert restored.definition_word_count == original.definition_word_count
|
|
|
|
def test_from_dict_ignores_unknown_keys(self):
|
|
data = {
|
|
"slug": "test",
|
|
"title": "Test",
|
|
"h1_raw": "Test",
|
|
"unknown_field": "should be ignored",
|
|
}
|
|
meta = EntityMeta.from_dict(data)
|
|
assert meta.slug == "test"
|
|
assert not hasattr(meta, "unknown_field") or "unknown_field" not in meta.__dict__
|