Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Asset Management System (Issue #142): - Add complete asset management framework with deduplication - Implement AssetManager, AssetRegistry, and AssetDeduplicator classes - Add AssetPackager for markdown document packaging - Create comprehensive test suite for all asset management components - Add asset constants and custom exceptions for robust error handling Markdown Processing Enhancements: - Update markdown_commands.py with improved functionality - Enhanced parsing and content aggregation capabilities - Improved filename encoding/decoding for special characters Test Suite Improvements: - Add comprehensive tests for Issue #138 markdown parsing - Enhance Issue #139 content aggregation and end-to-end testing - Complete test coverage for new asset management features Examples and Documentation: - Update BildungsKanonJon.md example with enhanced content - Generate corresponding HTML output for documentation - Add asset registry configuration Development Tools: - Add install script for simplified setup This commit represents a major enhancement to MarkiTect's asset handling capabilities with full test coverage and improved markdown processing. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
504 lines
17 KiB
Python
504 lines
17 KiB
Python
"""
|
|
Test content aggregation functionality for Issue #139: Implode directory to a markdown file.
|
|
|
|
This test module covers combining content from multiple files in correct order while
|
|
preserving all markdown formatting and handling index files appropriately.
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import shutil
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch
|
|
|
|
# Import will fail initially (RED phase) until implementation exists
|
|
try:
|
|
from markitect.plugins.builtin.markdown_commands import (
|
|
aggregate_content,
|
|
combine_markdown_files,
|
|
preserve_markdown_formatting,
|
|
handle_index_files,
|
|
process_front_matter,
|
|
ContentAggregator,
|
|
FrontMatterConsolidator
|
|
)
|
|
except ImportError:
|
|
# Expected during RED phase - tests should fail initially
|
|
aggregate_content = None
|
|
combine_markdown_files = None
|
|
preserve_markdown_formatting = None
|
|
handle_index_files = None
|
|
process_front_matter = None
|
|
ContentAggregator = None
|
|
FrontMatterConsolidator = None
|
|
|
|
|
|
class TestContentAggregation:
|
|
"""Test aggregating content from multiple markdown files."""
|
|
|
|
def setup_method(self):
|
|
"""Set up temporary directory for each test."""
|
|
self.temp_dir = Path(tempfile.mkdtemp())
|
|
|
|
def teardown_method(self):
|
|
"""Clean up temporary directory after each test."""
|
|
if self.temp_dir.exists():
|
|
shutil.rmtree(self.temp_dir)
|
|
|
|
def test_combine_simple_markdown_files(self):
|
|
"""Test combining simple markdown files in correct order."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create test files
|
|
(self.temp_dir / "01_intro.md").write_text("# Introduction\nIntro content here.")
|
|
(self.temp_dir / "02_chapter1.md").write_text("## Chapter 1\nChapter content here.")
|
|
(self.temp_dir / "03_conclusion.md").write_text("# Conclusion\nConclusion content.")
|
|
|
|
files = [
|
|
self.temp_dir / "01_intro.md",
|
|
self.temp_dir / "02_chapter1.md",
|
|
self.temp_dir / "03_conclusion.md"
|
|
]
|
|
|
|
combined_content = combine_markdown_files(files)
|
|
|
|
# Should combine in order with proper spacing
|
|
assert "# Introduction" in combined_content
|
|
assert "## Chapter 1" in combined_content
|
|
assert "# Conclusion" in combined_content
|
|
|
|
# Check order is maintained
|
|
intro_pos = combined_content.find("# Introduction")
|
|
chapter_pos = combined_content.find("## Chapter 1")
|
|
conclusion_pos = combined_content.find("# Conclusion")
|
|
|
|
assert intro_pos < chapter_pos < conclusion_pos
|
|
|
|
def test_preserve_markdown_formatting(self):
|
|
"""Test that all markdown formatting is preserved during aggregation."""
|
|
# This should fail initially (RED phase)
|
|
|
|
markdown_content = """# Test Section
|
|
|
|
## Subsection with **bold** and *italic*
|
|
|
|
Here's some code:
|
|
|
|
```python
|
|
def example():
|
|
return "preserved"
|
|
```
|
|
|
|
| Table | Header |
|
|
|-------|--------|
|
|
| Cell | Data |
|
|
|
|
- List item 1
|
|
- List item 2
|
|
- Nested item
|
|
|
|
> Blockquote text
|
|
|
|
[Link text](http://example.com)
|
|
|
|

|
|
"""
|
|
|
|
(self.temp_dir / "formatted.md").write_text(markdown_content)
|
|
|
|
preserved = preserve_markdown_formatting([self.temp_dir / "formatted.md"])
|
|
|
|
# Should preserve all formatting elements
|
|
assert "**bold**" in preserved
|
|
assert "*italic*" in preserved
|
|
assert "```python" in preserved
|
|
assert "| Table | Header |" in preserved
|
|
assert "- List item 1" in preserved
|
|
assert "> Blockquote text" in preserved
|
|
assert "[Link text]" in preserved
|
|
assert "![Image alt]" in preserved
|
|
|
|
def test_handle_index_files_as_parent_content(self):
|
|
"""Test handling index.md files as parent section content."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create directory structure with index files
|
|
part_dir = self.temp_dir / "part_1_introduction"
|
|
part_dir.mkdir()
|
|
(part_dir / "index.md").write_text("# Part 1: Introduction\nPart introduction content.")
|
|
|
|
chapter_dir = part_dir / "chapter_1_overview"
|
|
chapter_dir.mkdir()
|
|
(chapter_dir / "index.md").write_text("## Chapter 1: Overview\nChapter overview content.")
|
|
(chapter_dir / "section_1_1.md").write_text("### Section 1.1\nSection content.")
|
|
|
|
aggregated = handle_index_files(self.temp_dir)
|
|
|
|
# Should treat index.md files as parent section content
|
|
assert "# Part 1: Introduction" in aggregated
|
|
assert "Part introduction content." in aggregated
|
|
assert "## Chapter 1: Overview" in aggregated
|
|
assert "Chapter overview content." in aggregated
|
|
assert "### Section 1.1" in aggregated
|
|
|
|
def test_maintain_proper_spacing_between_sections(self):
|
|
"""Test maintaining appropriate whitespace between combined sections."""
|
|
# This should fail initially (RED phase)
|
|
|
|
files_content = [
|
|
("section1.md", "# Section 1\nContent 1"),
|
|
("section2.md", "# Section 2\nContent 2"),
|
|
("section3.md", "# Section 3\nContent 3")
|
|
]
|
|
|
|
files = []
|
|
for filename, content in files_content:
|
|
file_path = self.temp_dir / filename
|
|
file_path.write_text(content)
|
|
files.append(file_path)
|
|
|
|
combined = combine_markdown_files(files)
|
|
|
|
# Should have proper spacing between sections
|
|
lines = combined.split('\n')
|
|
|
|
# Find section boundaries and check spacing
|
|
section1_end = None
|
|
section2_start = None
|
|
|
|
for i, line in enumerate(lines):
|
|
if line == "Content 1":
|
|
section1_end = i
|
|
elif line == "# Section 2":
|
|
section2_start = i
|
|
break
|
|
|
|
# Should have appropriate spacing between sections
|
|
assert section2_start is not None
|
|
assert section1_end is not None
|
|
assert section2_start > section1_end + 1 # At least one empty line
|
|
|
|
def test_process_files_in_hierarchical_order(self):
|
|
"""Test processing files in logical hierarchical order."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create hierarchical structure
|
|
structure = [
|
|
("part_1", "index.md", "# Part 1\nPart content"),
|
|
("part_1/chapter_1", "index.md", "## Chapter 1\nChapter content"),
|
|
("part_1/chapter_1", "section_1_1.md", "### Section 1.1\nSection content"),
|
|
("part_1/chapter_1", "section_1_2.md", "### Section 1.2\nMore section content"),
|
|
("part_1", "chapter_2.md", "## Chapter 2\nChapter 2 content")
|
|
]
|
|
|
|
for dir_path, filename, content in structure:
|
|
full_dir = self.temp_dir / dir_path
|
|
full_dir.mkdir(parents=True, exist_ok=True)
|
|
(full_dir / filename).write_text(content)
|
|
|
|
aggregated = aggregate_content(self.temp_dir)
|
|
|
|
# Should maintain hierarchical order
|
|
part_pos = aggregated.find("# Part 1")
|
|
ch1_pos = aggregated.find("## Chapter 1")
|
|
sec11_pos = aggregated.find("### Section 1.1")
|
|
sec12_pos = aggregated.find("### Section 1.2")
|
|
ch2_pos = aggregated.find("## Chapter 2")
|
|
|
|
assert part_pos < ch1_pos < sec11_pos < sec12_pos < ch2_pos
|
|
|
|
def test_handle_empty_files_gracefully(self):
|
|
"""Test handling empty markdown files during aggregation."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create files with various content states
|
|
(self.temp_dir / "empty.md").write_text("")
|
|
(self.temp_dir / "whitespace_only.md").write_text(" \n\t\n ")
|
|
(self.temp_dir / "content.md").write_text("# Real Content\nActual content here.")
|
|
|
|
files = [
|
|
self.temp_dir / "empty.md",
|
|
self.temp_dir / "whitespace_only.md",
|
|
self.temp_dir / "content.md"
|
|
]
|
|
|
|
combined = combine_markdown_files(files)
|
|
|
|
# Should handle empty files gracefully
|
|
assert "# Real Content" in combined
|
|
assert "Actual content here." in combined
|
|
# Should not break or include excessive whitespace
|
|
|
|
|
|
class TestFrontMatterHandling:
|
|
"""Test front matter detection, extraction, and consolidation."""
|
|
|
|
def setup_method(self):
|
|
"""Set up temporary directory for each test."""
|
|
self.temp_dir = Path(tempfile.mkdtemp())
|
|
|
|
def teardown_method(self):
|
|
"""Clean up temporary directory after each test."""
|
|
if self.temp_dir.exists():
|
|
shutil.rmtree(self.temp_dir)
|
|
|
|
def test_detect_and_extract_front_matter(self):
|
|
"""Test detecting and extracting YAML front matter."""
|
|
# This should fail initially (RED phase)
|
|
|
|
content_with_frontmatter = """---
|
|
title: "Chapter 1"
|
|
author: "John Doe"
|
|
date: "2023-01-01"
|
|
---
|
|
|
|
# Chapter 1 Content
|
|
Actual markdown content here.
|
|
"""
|
|
|
|
(self.temp_dir / "chapter1.md").write_text(content_with_frontmatter)
|
|
|
|
front_matter, content = process_front_matter(self.temp_dir / "chapter1.md")
|
|
|
|
# Should extract front matter correctly
|
|
assert front_matter is not None
|
|
assert "title" in front_matter
|
|
assert front_matter["title"] == "Chapter 1"
|
|
assert front_matter["author"] == "John Doe"
|
|
|
|
# Should separate content correctly
|
|
assert content.strip().startswith("# Chapter 1 Content")
|
|
assert "---" not in content
|
|
|
|
def test_consolidate_multiple_front_matter_blocks(self):
|
|
"""Test consolidating front matter from multiple files."""
|
|
# This should fail initially (RED phase)
|
|
|
|
file1_content = """---
|
|
title: "My Document"
|
|
author: "Author Name"
|
|
---
|
|
|
|
# Section 1
|
|
Content 1"""
|
|
|
|
file2_content = """---
|
|
version: "1.0"
|
|
tags: ["documentation", "guide"]
|
|
---
|
|
|
|
# Section 2
|
|
Content 2"""
|
|
|
|
(self.temp_dir / "file1.md").write_text(file1_content)
|
|
(self.temp_dir / "file2.md").write_text(file2_content)
|
|
|
|
files = [self.temp_dir / "file1.md", self.temp_dir / "file2.md"]
|
|
|
|
consolidator = FrontMatterConsolidator()
|
|
consolidated_fm, content = consolidator.consolidate(files)
|
|
|
|
# Should merge front matter appropriately
|
|
assert "title" in consolidated_fm
|
|
assert "author" in consolidated_fm
|
|
assert "version" in consolidated_fm
|
|
assert "tags" in consolidated_fm
|
|
|
|
# Content should be combined without front matter blocks
|
|
assert "# Section 1" in content
|
|
assert "# Section 2" in content
|
|
assert content.count("---") == 0
|
|
|
|
def test_handle_conflicting_front_matter(self):
|
|
"""Test handling conflicting front matter values."""
|
|
# This should fail initially (RED phase)
|
|
|
|
file1_content = """---
|
|
title: "Document Title"
|
|
author: "First Author"
|
|
---
|
|
|
|
# Content 1"""
|
|
|
|
file2_content = """---
|
|
title: "Different Title"
|
|
author: "Second Author"
|
|
---
|
|
|
|
# Content 2"""
|
|
|
|
(self.temp_dir / "file1.md").write_text(file1_content)
|
|
(self.temp_dir / "file2.md").write_text(file2_content)
|
|
|
|
files = [self.temp_dir / "file1.md", self.temp_dir / "file2.md"]
|
|
|
|
consolidator = FrontMatterConsolidator(conflict_strategy="merge")
|
|
consolidated_fm, content = consolidator.consolidate(files)
|
|
|
|
# Should handle conflicts according to strategy
|
|
assert "title" in consolidated_fm
|
|
assert "author" in consolidated_fm
|
|
|
|
# Could merge into lists, take first value, etc.
|
|
# Exact behavior depends on implementation strategy
|
|
|
|
def test_preserve_front_matter_in_output(self):
|
|
"""Test that consolidated front matter is properly placed in output."""
|
|
# This should fail initially (RED phase)
|
|
|
|
files_with_fm = [
|
|
("file1.md", """---
|
|
title: "Combined Document"
|
|
---
|
|
# Section 1
|
|
Content"""),
|
|
("file2.md", """---
|
|
tags: ["test"]
|
|
---
|
|
# Section 2
|
|
More content""")
|
|
]
|
|
|
|
files = []
|
|
for filename, content in files_with_fm:
|
|
file_path = self.temp_dir / filename
|
|
file_path.write_text(content)
|
|
files.append(file_path)
|
|
|
|
aggregated = aggregate_content(self.temp_dir, preserve_front_matter=True)
|
|
|
|
# Should have front matter at the beginning
|
|
lines = aggregated.split('\n')
|
|
assert lines[0] == "---"
|
|
|
|
# Should find closing front matter delimiter
|
|
closing_fm_index = None
|
|
for i, line in enumerate(lines[1:], 1):
|
|
if line == "---":
|
|
closing_fm_index = i
|
|
break
|
|
|
|
assert closing_fm_index is not None
|
|
|
|
# Content should follow front matter
|
|
content_start = closing_fm_index + 1
|
|
assert content_start < len(lines)
|
|
|
|
|
|
class TestContentAggregator:
|
|
"""Test the ContentAggregator class for comprehensive content processing."""
|
|
|
|
def setup_method(self):
|
|
"""Set up temporary directory for each test."""
|
|
self.temp_dir = Path(tempfile.mkdtemp())
|
|
|
|
def teardown_method(self):
|
|
"""Clean up temporary directory after each test."""
|
|
if self.temp_dir.exists():
|
|
shutil.rmtree(self.temp_dir)
|
|
|
|
def test_content_aggregator_initialization(self):
|
|
"""Test creating ContentAggregator instances."""
|
|
# This should fail initially (RED phase)
|
|
|
|
aggregator = ContentAggregator()
|
|
|
|
assert aggregator is not None
|
|
assert hasattr(aggregator, 'preserve_formatting')
|
|
assert hasattr(aggregator, 'handle_front_matter')
|
|
assert hasattr(aggregator, 'section_spacing')
|
|
|
|
def test_aggregator_with_custom_options(self):
|
|
"""Test aggregator with custom configuration."""
|
|
# This should fail initially (RED phase)
|
|
|
|
aggregator = ContentAggregator(
|
|
preserve_formatting=True,
|
|
handle_front_matter=True,
|
|
section_spacing=2,
|
|
include_toc=True
|
|
)
|
|
|
|
# Create test structure
|
|
(self.temp_dir / "chapter1.md").write_text("# Chapter 1\nContent 1")
|
|
(self.temp_dir / "chapter2.md").write_text("# Chapter 2\nContent 2")
|
|
|
|
result = aggregator.aggregate(self.temp_dir)
|
|
|
|
assert result is not None
|
|
assert "# Chapter 1" in result
|
|
assert "# Chapter 2" in result
|
|
|
|
def test_aggregator_processes_directory_recursively(self):
|
|
"""Test that aggregator processes nested directory structures."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create nested structure
|
|
part_dir = self.temp_dir / "part1"
|
|
part_dir.mkdir()
|
|
(part_dir / "index.md").write_text("# Part 1\nPart content")
|
|
|
|
chapter_dir = part_dir / "chapter1"
|
|
chapter_dir.mkdir()
|
|
(chapter_dir / "content.md").write_text("## Chapter 1\nChapter content")
|
|
|
|
aggregator = ContentAggregator(recursive=True)
|
|
result = aggregator.aggregate(self.temp_dir)
|
|
|
|
# Should process all nested content
|
|
assert "# Part 1" in result
|
|
assert "## Chapter 1" in result
|
|
assert "Part content" in result
|
|
assert "Chapter content" in result
|
|
|
|
def test_aggregator_sorts_content_correctly(self):
|
|
"""Test that aggregator sorts content in logical order."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create files that need sorting
|
|
files_data = [
|
|
("03_conclusion.md", "# Conclusion"),
|
|
("01_introduction.md", "# Introduction"),
|
|
("02_main_content.md", "# Main Content")
|
|
]
|
|
|
|
for filename, content in files_data:
|
|
(self.temp_dir / filename).write_text(content)
|
|
|
|
aggregator = ContentAggregator(sort_files=True)
|
|
result = aggregator.aggregate(self.temp_dir)
|
|
|
|
# Should be in logical order
|
|
intro_pos = result.find("# Introduction")
|
|
main_pos = result.find("# Main Content")
|
|
conclusion_pos = result.find("# Conclusion")
|
|
|
|
assert intro_pos < main_pos < conclusion_pos
|
|
|
|
def test_aggregator_handles_large_directory_structures(self):
|
|
"""Test aggregator performance with larger directory structures."""
|
|
# This should fail initially (RED phase)
|
|
|
|
# Create larger structure
|
|
for i in range(10):
|
|
part_dir = self.temp_dir / f"part_{i+1:02d}"
|
|
part_dir.mkdir()
|
|
(part_dir / "index.md").write_text(f"# Part {i+1}\nPart {i+1} content")
|
|
|
|
for j in range(5):
|
|
chapter_file = part_dir / f"chapter_{j+1:02d}.md"
|
|
chapter_file.write_text(f"## Chapter {i+1}.{j+1}\nChapter content")
|
|
|
|
aggregator = ContentAggregator()
|
|
result = aggregator.aggregate(self.temp_dir)
|
|
|
|
# Should process all content
|
|
assert result is not None
|
|
assert len(result) > 0
|
|
|
|
# Should contain expected number of parts and chapters
|
|
part_count = result.count("# Part")
|
|
chapter_count = result.count("## Chapter")
|
|
|
|
assert part_count >= 10
|
|
assert chapter_count >= 50 |