"""Tests for markitect.core.section_tree.""" from markitect.core.parser import parse_markdown_to_ast from markitect.core.section_tree import ( build_section_tree, extract_heading_content, extract_heading_level, extract_section_text, slugify, ) class TestSlugify: def test_simple_text(self): assert slugify("Hello World") == "hello_world" def test_german_umlauts(self): assert slugify("Ärger mit Über") == "aerger_mit_ueber" def test_special_characters(self): assert slugify("Smith's Original Wording") == "smith_s_original_wording" def test_empty_string(self): assert slugify("") == "feld" def test_trailing_underscores_stripped(self): assert slugify("--hello--") == "hello" def test_multiple_spaces(self): assert slugify("a b") == "a_b" class TestExtractHeadingLevel: def test_h1(self): assert extract_heading_level("h1") == 1 def test_h6(self): assert extract_heading_level("h6") == 6 def test_invalid_tag(self): assert extract_heading_level("p") == 1 def test_empty(self): assert extract_heading_level("") == 1 class TestExtractHeadingContent: def test_finds_inline_token(self): tokens = [ {"type": "heading_open", "tag": "h1"}, {"type": "inline", "content": "Hello"}, {"type": "heading_close", "tag": "h1"}, ] assert extract_heading_content(tokens, 0) == "Hello" def test_no_inline(self): tokens = [ {"type": "heading_open", "tag": "h1"}, {"type": "heading_close", "tag": "h1"}, ] assert extract_heading_content(tokens, 0) == "" class TestBuildSectionTree: def test_single_heading(self): md = "# Title\n\nSome text." tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens) assert tree["level"] == 0 assert len(tree["children"]) == 1 assert tree["children"][0]["heading"] == "Title" assert tree["children"][0]["level"] == 1 def test_nested_headings(self): md = "# Top\n\n## Sub\n\ntext\n\n## Sub2\n\nmore" tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens) top = tree["children"][0] assert top["heading"] == "Top" assert len(top["children"]) == 2 assert top["children"][0]["heading"] == "Sub" assert top["children"][1]["heading"] == "Sub2" def test_max_depth(self): md = "# Top\n\n## Sub\n\n### Deep\n\ntext" tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens, max_depth=2) top = tree["children"][0] sub = top["children"][0] # H3 should be excluded from tree assert len(sub["children"]) == 0 def test_content_tokens_captured(self): md = "# Title\n\nParagraph text here." tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens) section = tree["children"][0] inline_tokens = [t for t in section["content_tokens"] if t.get("type") == "inline"] assert len(inline_tokens) == 1 assert "Paragraph text here" in inline_tokens[0]["content"] def test_slug_assigned(self): md = "# Economic Domain\n\ntext" tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens) assert tree["children"][0]["slug"] == "economic_domain" def test_empty_document(self): tokens = parse_markdown_to_ast("") tree = build_section_tree(tokens) assert tree["children"] == [] class TestExtractSectionText: def test_simple_paragraph(self): md = "# Title\n\nHello world." tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens) text = extract_section_text(tree["children"][0]) assert text == "Hello world." def test_multiple_paragraphs(self): md = "# Title\n\nFirst paragraph.\n\nSecond paragraph." tokens = parse_markdown_to_ast(md) tree = build_section_tree(tokens) text = extract_section_text(tree["children"][0]) assert "First paragraph." in text assert "Second paragraph." in text def test_empty_section(self): section = {"content_tokens": []} assert extract_section_text(section) == ""