Files
markitect-main/tests/test_l4_service_document_modification.py
tegwick 4f16166e94 feat: implement comprehensive front matter preservation and unicode handling
This commit provides complete front matter support and fixes unicode character
handling across all explode-implode variants (flat, hierarchical, semantic).

## Front Matter Implementation
- Added FrontmatterParser integration to all three variants
- Extract front matter during explosion to `_frontmatter.yml` files
- Restore front matter during implosion by prepending to content
- Support for YAML front matter with proper type preservation
- Handles strings, arrays, dates, and other YAML data types

## Unicode Character Fixes
- Fixed filename sanitization inconsistency in flat variant
- Used consistent `_sanitize_filename()` method for both file creation and manifest paths
- Resolved issue where unicode characters in headings caused empty reconstructed files
- Ensured proper handling of emojis and special characters in content

## CLI Integration
- Updated CLI implode command to use variant system instead of legacy concatenation
- Fixed default output file naming to use `_imploded.md` suffix
- Enhanced DocumentManager with missing `get_file` method for database integration
- Improved processing info and preview support for dry-run mode

## Test Coverage
- Reactivated `test_issue_149_roundtrip_validation.py` front matter test
- Updated tests to use semantic equivalence checking instead of exact string matching
- Fixed all 3 failing tests in `test_roundtrip_consolidated.py`
- All 10 roundtrip tests and 11 Issue #149 validation tests now pass

## Technical Improvements
- Better content normalization with preserved internal structure
- Enhanced recursive directory processing for deep nesting scenarios
- Fixed variable naming conflicts in variant file creation logic
- Improved error handling and graceful fallbacks for front matter processing

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-13 20:26:08 +02:00

571 lines
20 KiB
Python

"""
Test Get and Modify Commands for Issue #2 Completion
This test validates the newly implemented get and modify commands that
complete Issue #2 requirements for document manipulation and roundtrip validation.
Requirements tested:
- markitect md-get command functionality
- markitect modify command with --add-section and --update-front-matter
- AST serialization and roundtrip validation
- Integration with existing AST cache and database systems
"""
import pytest
import tempfile
import os
import json
from pathlib import Path
from click.testing import CliRunner
from unittest.mock import patch, MagicMock
from markitect.cli import cli
from markitect.serializer import ASTSerializer
class TestGetCommand:
"""Test suite for markitect md-get command."""
def setup_method(self):
"""Set up test fixtures."""
self.runner = CliRunner()
self.test_ast = [
{
"type": "heading_open",
"tag": "h1",
"attrs": {},
"map": [0, 1],
"nesting": 1,
"level": 0,
"content": "",
"markup": "#",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "inline",
"tag": "",
"attrs": {},
"map": [0, 1],
"nesting": 0,
"level": 1,
"children": [
{
"type": "text",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 0,
"content": "Test Document",
"markup": "",
"info": "",
"meta": {},
"block": False,
"hidden": False
}
],
"content": "Test Document",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "heading_close",
"tag": "h1",
"attrs": {},
"map": [0, 1],
"nesting": -1,
"level": 0,
"content": "",
"markup": "#",
"info": "",
"meta": {},
"block": True,
"hidden": False
}
]
def test_get_command_exists(self):
"""Test that md-get command is available in CLI."""
result = self.runner.invoke(cli, ['md-get', '--help'])
assert result.exit_code == 0
assert 'md-get' in result.output.lower()
assert 'retrieve content' in result.output.lower()
def test_get_command_retrieves_file(self):
"""Test that md-get command can retrieve a processed file."""
with tempfile.TemporaryDirectory() as temp_dir:
cache_dir = Path(temp_dir) / '.ast_cache'
cache_dir.mkdir()
cache_file = cache_dir / 'test.md.ast.json'
# Create mock AST cache file
with open(cache_file, 'w') as f:
json.dump(self.test_ast, f)
with patch('markitect.cli.Path') as mock_path, \
patch('markitect.cli.DatabaseManager') as mock_db_mgr:
# Mock paths and database
mock_path.return_value = cache_file.parent
mock_path.side_effect = lambda x: Path(x) if isinstance(x, str) else cache_file.parent
cache_path_mock = MagicMock()
cache_path_mock.exists.return_value = True
with patch('builtins.open', create=True) as mock_open:
mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(self.test_ast)
mock_db_instance = MagicMock()
mock_db_mgr.return_value = mock_db_instance
mock_db_instance.get_markdown_file.return_value = {
'filename': 'test.md',
'front_matter': None,
'content': '# Test Document'
}
# Mock the cache path construction
with patch('markitect.cli.Path') as path_constructor:
path_constructor.return_value = cache_path_mock
result = self.runner.invoke(cli, ['md-get', 'test.md'])
assert result.exit_code == 0
assert 'Test Document' in result.output
def test_get_command_handles_missing_file(self):
"""Test that md-get command handles missing files gracefully."""
with patch('markitect.cli.DatabaseManager') as mock_db_mgr:
mock_db_instance = MagicMock()
mock_db_mgr.return_value = mock_db_instance
mock_db_instance.get_markdown_file.return_value = None
result = self.runner.invoke(cli, ['md-get', 'nonexistent.md'])
assert result.exit_code != 0
assert 'not found in database' in result.output.lower()
def test_get_command_outputs_to_file(self):
"""Test that md-get command can output to a file."""
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / 'output.md'
cache_dir = Path(temp_dir) / '.ast_cache'
cache_dir.mkdir()
cache_file = cache_dir / 'test.md.ast.json'
with open(cache_file, 'w') as f:
json.dump(self.test_ast, f)
with patch('markitect.cli.DatabaseManager') as mock_db_mgr, \
patch('markitect.cli.Path') as mock_path_constructor:
mock_db_instance = MagicMock()
mock_db_mgr.return_value = mock_db_instance
mock_db_instance.get_markdown_file.return_value = {
'filename': 'test.md',
'front_matter': None,
'content': '# Test Document'
}
# Mock cache path
cache_path_mock = MagicMock()
cache_path_mock.exists.return_value = True
mock_path_constructor.return_value = cache_path_mock
with patch('builtins.open', create=True) as mock_open:
# Mock reading AST cache
mock_file = MagicMock()
mock_file.read.return_value = json.dumps(self.test_ast)
mock_open.return_value.__enter__.return_value = mock_file
result = self.runner.invoke(cli, ['md-get', 'test.md', '--output', str(output_file)])
assert result.exit_code == 0
assert 'written to' in result.output.lower()
class TestModifyCommand:
"""Test suite for markitect modify command."""
def setup_method(self):
"""Set up test fixtures."""
self.runner = CliRunner()
self.test_ast = [
{
"type": "paragraph_open",
"tag": "p",
"attrs": {},
"map": [0, 1],
"nesting": 1,
"level": 0,
"content": "",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "inline",
"tag": "",
"attrs": {},
"map": [0, 1],
"nesting": 0,
"level": 1,
"children": [
{
"type": "text",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 0,
"content": "Original content",
"markup": "",
"info": "",
"meta": {},
"block": False,
"hidden": False
}
],
"content": "Original content",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "paragraph_close",
"tag": "p",
"attrs": {},
"map": [0, 1],
"nesting": -1,
"level": 0,
"content": "",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
}
]
def test_modify_command_exists(self):
"""Test that modify command is available in CLI."""
result = self.runner.invoke(cli, ['modify', '--help'])
assert result.exit_code == 0
assert 'modify' in result.output.lower()
assert 'add-section' in result.output.lower()
assert 'update-front-matter' in result.output.lower()
def test_modify_command_adds_section(self):
"""Test that modify command can add sections to documents."""
with tempfile.TemporaryDirectory() as temp_dir:
cache_dir = Path(temp_dir) / '.ast_cache'
cache_dir.mkdir()
cache_file = cache_dir / 'test.md.ast.json'
with open(cache_file, 'w') as f:
json.dump(self.test_ast, f)
with patch('markitect.cli.DatabaseManager') as mock_db_mgr, \
patch('markitect.cli.Path') as mock_path_constructor:
mock_db_instance = MagicMock()
mock_db_mgr.return_value = mock_db_instance
mock_db_instance.get_markdown_file.return_value = {
'filename': 'test.md',
'front_matter': None,
'content': 'Original content'
}
# Mock cache path
cache_path_mock = MagicMock()
cache_path_mock.exists.return_value = True
mock_path_constructor.return_value = cache_path_mock
with patch('builtins.open', create=True) as mock_open:
# Mock reading and writing AST cache
mock_file = MagicMock()
mock_file.read.return_value = json.dumps(self.test_ast)
mock_open.return_value.__enter__.return_value = mock_file
result = self.runner.invoke(cli, [
'modify', 'test.md',
'--add-section', 'New Section',
'--section-content', 'New content'
])
assert result.exit_code == 0
assert 'modified file updated' in result.output.lower()
def test_modify_command_updates_front_matter(self):
"""Test that modify command can update front matter."""
with tempfile.TemporaryDirectory() as temp_dir:
cache_dir = Path(temp_dir) / '.ast_cache'
cache_dir.mkdir()
cache_file = cache_dir / 'test.md.ast.json'
with open(cache_file, 'w') as f:
json.dump(self.test_ast, f)
with patch('markitect.cli.DatabaseManager') as mock_db_mgr, \
patch('markitect.cli.Path') as mock_path_constructor:
mock_db_instance = MagicMock()
mock_db_mgr.return_value = mock_db_instance
mock_db_instance.get_markdown_file.return_value = {
'filename': 'test.md',
'front_matter': "{'title': 'Test'}",
'content': 'Original content'
}
cache_path_mock = MagicMock()
cache_path_mock.exists.return_value = True
mock_path_constructor.return_value = cache_path_mock
with patch('builtins.open', create=True) as mock_open:
mock_file = MagicMock()
mock_file.read.return_value = json.dumps(self.test_ast)
mock_open.return_value.__enter__.return_value = mock_file
result = self.runner.invoke(cli, [
'modify', 'test.md',
'--update-front-matter', 'status:published'
])
assert result.exit_code == 0
assert 'modified file updated' in result.output.lower()
def test_modify_command_requires_modifications(self):
"""Test that modify command requires at least one modification."""
with patch('markitect.cli.DatabaseManager') as mock_db_mgr:
mock_db_instance = MagicMock()
mock_db_mgr.return_value = mock_db_instance
mock_db_instance.get_markdown_file.return_value = {
'filename': 'test.md',
'front_matter': None,
'content': 'Original content'
}
result = self.runner.invoke(cli, ['modify', 'test.md'])
assert result.exit_code != 0
assert 'no modifications specified' in result.output.lower()
class TestASTSerializer:
"""Test suite for AST serialization functionality."""
def setup_method(self):
"""Set up test fixtures."""
self.serializer = ASTSerializer()
self.test_ast = [
{
"type": "heading_open",
"tag": "h1",
"attrs": {},
"map": [0, 1],
"nesting": 1,
"level": 0,
"content": "",
"markup": "#",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "inline",
"tag": "",
"attrs": {},
"map": [0, 1],
"nesting": 0,
"level": 1,
"children": [
{
"type": "text",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 0,
"content": "Test Heading",
"markup": "",
"info": "",
"meta": {},
"block": False,
"hidden": False
}
],
"content": "Test Heading",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "heading_close",
"tag": "h1",
"attrs": {},
"map": [0, 1],
"nesting": -1,
"level": 0,
"content": "",
"markup": "#",
"info": "",
"meta": {},
"block": True,
"hidden": False
}
]
def test_serializer_basic_functionality(self):
"""Test basic AST to markdown serialization."""
result = self.serializer.serialize_to_markdown(self.test_ast)
assert '# Test Heading' in result
def test_serializer_with_front_matter(self):
"""Test AST serialization with front matter."""
front_matter = {'title': 'Test Document', 'status': 'draft'}
result = self.serializer.serialize_to_markdown(self.test_ast, front_matter)
assert '---' in result
assert 'title: Test Document' in result
assert 'status: draft' in result
assert '# Test Heading' in result
def test_serializer_modify_ast_add_section(self):
"""Test AST modification with section addition."""
modifications = {
'add_section': {
'title': 'New Section',
'content': 'New content here',
'level': 2
}
}
modified_ast = self.serializer.modify_ast_content(self.test_ast, modifications)
# Should have more tokens than original
assert len(modified_ast) > len(self.test_ast)
# Serialize to check content
result = self.serializer.serialize_to_markdown(modified_ast)
assert '# Test Heading' in result
assert '## New Section' in result
assert 'New content here' in result
def test_serializer_empty_front_matter_handling(self):
"""Test that empty front matter is handled correctly."""
result = self.serializer.serialize_to_markdown(self.test_ast, {})
# Should not include front matter section
assert not result.startswith('---')
assert '# Test Heading' in result
def test_serializer_none_front_matter_handling(self):
"""Test that None front matter is handled correctly."""
result = self.serializer.serialize_to_markdown(self.test_ast, None)
# Should not include front matter section
assert not result.startswith('---')
assert '# Test Heading' in result
class TestRoundtripValidation:
"""Test suite for complete roundtrip validation."""
def test_roundtrip_integration(self):
"""Test complete roundtrip: ingest → modify → get workflow."""
# This would be an integration test that tests the complete workflow
# For now, we'll test the components work together
serializer = ASTSerializer()
# Create test AST
test_ast = [
{
"type": "paragraph_open",
"tag": "p",
"attrs": {},
"map": [0, 1],
"nesting": 1,
"level": 0,
"content": "",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "inline",
"tag": "",
"attrs": {},
"map": [0, 1],
"nesting": 0,
"level": 1,
"children": [
{
"type": "text",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 0,
"content": "Original content",
"markup": "",
"info": "",
"meta": {},
"block": False,
"hidden": False
}
],
"content": "Original content",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "paragraph_close",
"tag": "p",
"attrs": {},
"map": [0, 1],
"nesting": -1,
"level": 0,
"content": "",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
}
]
# Test modification
modifications = {
'add_section': {
'title': 'Added Section',
'content': 'Added content',
'level': 2
}
}
modified_ast = serializer.modify_ast_content(test_ast, modifications)
# Test serialization
front_matter = {'title': 'Test Document'}
result = serializer.serialize_to_markdown(modified_ast, front_matter)
# Verify content is preserved and modification is present
assert 'Original content' in result
assert '## Added Section' in result
assert 'Added content' in result
assert 'title: Test Document' in result