"""Tests for markitect.infospace.entity_parser and EntityMeta.""" import logging from pathlib import Path import pytest from markitect.infospace import EntityMeta, parse_entity_file, parse_entity_directory # ── Fixtures ──────────────────────────────────────────────────────── COMPLETE_ENTITY = """\ # Division of Labour ## Definition The separation of a work process into a number of distinct tasks, each performed by a specialised worker, resulting in a significant increase in the productive powers of labour. ## Source Chapter Book I, Chapter 1: "Of the Division of Labour" ## Context The division of labour is the central argument of the chapter. ## Economic Domain Production ## Smith's Original Wording "The greatest improvements in the productive powers of labour…" ## Modern Interpretation The division of labour remains a foundational concept in economics. """ MINIMAL_ENTITY = """\ # Minimal Entity ## Definition A brief definition. ## Source Chapter Book I, Chapter 1 ## Context Some context. ## Economic Domain Exchange """ SLUG_H1_ENTITY = """\ # effectual-demand ## Definition Effectual demand is the demand by consumers who are willing and able to pay. ## Source Chapter Book 1, Chapter 7 ## Context Context for effectual demand. ## Economic Domain Exchange ## Smith's Original Wording "Such people may be called the effectual demanders…" ## Modern Interpretation Represents the intersection of desire and purchasing power. """ NO_H1 = """\ ## Only H2 Some content. """ # ── parse_entity_file ──────────────────────────────────────────────── class TestParseEntityFile: def test_complete_entity(self, tmp_path): f = tmp_path / "division-of-labour.md" f.write_text(COMPLETE_ENTITY) meta = parse_entity_file(f) assert meta.slug == "division_of_labour" assert meta.title == "Division of Labour" assert meta.h1_is_title_case is True assert meta.has_original_wording is True assert meta.domain == "Production" assert meta.definition_word_count > 20 assert "separation" in meta.definition.lower() assert meta.source_path == str(f) assert "definition" in meta.section_slugs assert "smith_s_original_wording" in meta.section_slugs def test_minimal_entity(self, tmp_path): f = tmp_path / "minimal-entity.md" f.write_text(MINIMAL_ENTITY) meta = parse_entity_file(f) assert meta.slug == "minimal_entity" assert meta.has_original_wording is False assert meta.original_wording == "" assert meta.modern_interpretation == "" assert meta.domain == "Exchange" def test_slug_format_h1(self, tmp_path): f = tmp_path / "effectual-demand.md" f.write_text(SLUG_H1_ENTITY) meta = parse_entity_file(f) assert meta.h1_raw == "effectual-demand" assert meta.h1_is_title_case is False assert meta.slug == "effectual_demand" assert meta.has_original_wording is True def test_missing_h1_raises(self, tmp_path): f = tmp_path / "no-h1.md" f.write_text(NO_H1) with pytest.raises(ValueError, match="No H1"): parse_entity_file(f) def test_missing_sections_return_empty(self, tmp_path): f = tmp_path / "minimal.md" f.write_text(MINIMAL_ENTITY) meta = parse_entity_file(f) # Optional sections not present → empty string assert meta.original_wording == "" assert meta.modern_interpretation == "" def test_word_count_accuracy(self, tmp_path): f = tmp_path / "test.md" f.write_text("# Test\n\n## Definition\n\none two three four five\n") meta = parse_entity_file(f) assert meta.definition_word_count == 5 # ── parse_entity_directory ────────────────────────────────────────── class TestParseEntityDirectory: def _make_dir(self, tmp_path): """Create a temporary entity directory.""" d = tmp_path / "entities" d.mkdir() (d / "entity-a.md").write_text(COMPLETE_ENTITY) (d / "entity-b.md").write_text(MINIMAL_ENTITY) # files that should be excluded by default (d / "book-1-chapter-01-entities.md").write_text("# View\n\nview file") (d / "book-1-chapter-01-prompt.md").write_text("# Prompt\n\nprompt file") return d def test_excludes_view_and_prompt(self, tmp_path): d = self._make_dir(tmp_path) results = parse_entity_directory(d) slugs = {e.slug for e in results} assert "division_of_labour" in slugs assert "minimal_entity" in slugs # Excluded files should not be parsed as entities assert len(results) == 2 def test_custom_exclude_patterns(self, tmp_path): d = self._make_dir(tmp_path) # Only exclude prompt files, allow entity views results = parse_entity_directory(d, exclude_patterns=[r".*-prompt\.md$"]) assert len(results) == 3 # entity-a, entity-b, chapter-01-entities def test_malformed_skipped_with_warning(self, tmp_path, caplog): d = tmp_path / "entities" d.mkdir() (d / "good.md").write_text(COMPLETE_ENTITY) (d / "bad.md").write_text(NO_H1) with caplog.at_level(logging.WARNING): results = parse_entity_directory(d) assert len(results) == 1 assert "bad.md" in caplog.text # ── EntityMeta round-trip ─────────────────────────────────────────── class TestEntityMetaRoundTrip: def test_to_dict_from_dict(self, tmp_path): f = tmp_path / "entity.md" f.write_text(COMPLETE_ENTITY) original = parse_entity_file(f) data = original.to_dict() restored = EntityMeta.from_dict(data) assert restored.slug == original.slug assert restored.title == original.title assert restored.definition == original.definition assert restored.h1_is_title_case == original.h1_is_title_case assert restored.section_slugs == original.section_slugs assert restored.definition_word_count == original.definition_word_count def test_from_dict_ignores_unknown_keys(self): data = { "slug": "test", "title": "Test", "h1_raw": "Test", "unknown_field": "should be ignored", } meta = EntityMeta.from_dict(data) assert meta.slug == "test" assert not hasattr(meta, "unknown_field") or "unknown_field" not in meta.__dict__