Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
schema-generate now builds content-aware schemas from the document's section hierarchy instead of counting markdown syntax elements. Detects key-value tables, data tables, link lists, and mixed content patterns to produce schemas that reflect the actual document outline. Old behavior preserved via --mode syntactic. Validator and visualization tools pinned to syntactic mode for compatibility. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
269 lines
8.5 KiB
Python
269 lines
8.5 KiB
Python
"""
|
|
Test for Issue #5: Generate a Schema from a Markdown File.
|
|
|
|
Tests the schema generation service that creates JSON schemas from markdown
|
|
AST structures with configurable depth limitations - critical for arc42
|
|
architectural documentation compliance validation.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
from pathlib import Path
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
from markitect.schema_generator import SchemaGenerator
|
|
from markitect.exceptions import FileNotFoundError, InvalidDepthError
|
|
|
|
|
|
class TestIssue5SchemaGeneration:
|
|
"""Test suite for schema generation from markdown files."""
|
|
|
|
def setup_method(self):
|
|
"""Set up test environment."""
|
|
self.schema_generator = SchemaGenerator()
|
|
|
|
def test_generate_schema_from_simple_markdown_creates_valid_json_schema(self):
|
|
"""
|
|
ISSUE #5: Test basic schema generation from simple markdown structure.
|
|
|
|
Verifies that a simple markdown file generates a valid JSON schema
|
|
that captures heading structure and basic elements for arc42 compliance.
|
|
"""
|
|
# Arrange - Simple markdown with clear structure
|
|
markdown_content = """# Main Heading
|
|
|
|
This is a paragraph.
|
|
|
|
## Sub Heading
|
|
|
|
- List item 1
|
|
- List item 2
|
|
|
|
Some text here.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema in syntactic mode (element counting)
|
|
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
|
|
|
# Assert - Schema should be valid JSON and contain expected structure
|
|
assert isinstance(result, dict)
|
|
assert "$schema" in result
|
|
assert result["$schema"] == "http://json-schema.org/draft-07/schema#"
|
|
assert "type" in result
|
|
assert result["type"] == "object"
|
|
|
|
# Should capture heading structure
|
|
properties = result.get("properties", {})
|
|
assert "headings" in properties
|
|
|
|
# Should define heading levels found in the document
|
|
heading_properties = properties["headings"]["properties"]
|
|
assert "level_1" in heading_properties # # Main Heading
|
|
assert "level_2" in heading_properties # ## Sub Heading
|
|
|
|
# Should capture other structural elements
|
|
assert "paragraphs" in properties
|
|
assert "lists" in properties
|
|
assert "metadata" in properties
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generate_schema_with_depth_limitation_excludes_deep_headings(self):
|
|
"""
|
|
ISSUE #5: Test schema generation with depth limitation for arc42 templates.
|
|
|
|
Verifies that depth parameter correctly limits which heading levels
|
|
are included - essential for arc42 section-specific schema generation.
|
|
"""
|
|
# Arrange - Markdown with multiple heading levels
|
|
markdown_content = """# Level 1
|
|
|
|
Content here.
|
|
|
|
## Level 2
|
|
|
|
More content.
|
|
|
|
### Level 3
|
|
|
|
Deep content.
|
|
|
|
#### Level 4
|
|
|
|
Very deep content.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema in syntactic mode with depth limit of 2
|
|
result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2, mode='syntactic')
|
|
|
|
# Assert - Only levels 1 and 2 should be included
|
|
properties = result.get("properties", {})
|
|
heading_properties = properties["headings"]["properties"]
|
|
|
|
assert "level_1" in heading_properties
|
|
assert "level_2" in heading_properties
|
|
assert "level_3" not in heading_properties # Should be excluded
|
|
assert "level_4" not in heading_properties # Should be excluded
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generate_schema_handles_file_not_found_error(self):
|
|
"""
|
|
ISSUE #5: Test error handling when markdown file doesn't exist.
|
|
"""
|
|
# Arrange - Non-existent file path
|
|
non_existent_file = Path("/tmp/non_existent_file.md")
|
|
|
|
# Act & Assert - Should raise appropriate exception
|
|
with pytest.raises(FileNotFoundError):
|
|
self.schema_generator.generate_schema_from_file(non_existent_file)
|
|
|
|
def test_generate_schema_handles_invalid_depth_parameters(self):
|
|
"""
|
|
ISSUE #5: Test error handling for invalid depth parameters.
|
|
"""
|
|
# Arrange - Simple markdown file
|
|
markdown_content = "# Test\n\nContent here."
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act & Assert - Invalid depth values should raise exceptions
|
|
with pytest.raises(InvalidDepthError):
|
|
self.schema_generator.generate_schema_from_file(temp_file, max_depth=0)
|
|
|
|
with pytest.raises(InvalidDepthError):
|
|
self.schema_generator.generate_schema_from_file(temp_file, max_depth=-1)
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generated_schema_is_json_serializable_and_valid(self):
|
|
"""
|
|
ISSUE #5: Test that generated schema follows JSON Schema specification.
|
|
|
|
Verifies the output can be used for validation by standard JSON Schema
|
|
validators - critical for arc42 document compliance checking.
|
|
"""
|
|
# Arrange - Standard markdown structure
|
|
markdown_content = """# Title
|
|
|
|
## Section
|
|
|
|
Content with **formatting**.
|
|
|
|
- List item
|
|
|
|
### Subsection
|
|
|
|
More content.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema
|
|
result = self.schema_generator.generate_schema_from_file(temp_file)
|
|
|
|
# Assert - Should be valid JSON Schema format
|
|
assert result.get("$schema") == "http://json-schema.org/draft-07/schema#"
|
|
assert result.get("type") == "object"
|
|
assert "properties" in result
|
|
assert "title" in result
|
|
assert "description" in result
|
|
|
|
# Should be serializable as JSON
|
|
json_string = json.dumps(result, indent=2)
|
|
assert len(json_string) > 0
|
|
|
|
# Should be deserializable back to same structure
|
|
deserialized = json.loads(json_string)
|
|
assert deserialized == result
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_schema_generation_captures_structural_metadata(self):
|
|
"""
|
|
ISSUE #5: Test that schema includes comprehensive structural metadata.
|
|
|
|
Ensures generated schemas contain sufficient information for
|
|
architectural analysis and arc42 compliance validation.
|
|
"""
|
|
# Arrange - Complex document structure
|
|
markdown_content = """# Documentation
|
|
|
|
## Overview
|
|
|
|
This document describes the **architecture**.
|
|
|
|
### Components
|
|
|
|
- Component A
|
|
- Component B
|
|
- Sub-component B1
|
|
|
|
## API
|
|
|
|
```python
|
|
def api_function():
|
|
pass
|
|
```
|
|
|
|
> Important architectural decision.
|
|
|
|
| Service | Purpose |
|
|
|---------|---------|
|
|
| Auth | Authentication |
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema in syntactic mode
|
|
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
|
|
|
# Assert - Should capture comprehensive structure
|
|
properties = result.get("properties", {})
|
|
|
|
# Should have metadata about the document structure
|
|
assert "metadata" in properties
|
|
metadata_props = properties["metadata"]["properties"]
|
|
assert "total_elements" in metadata_props
|
|
assert "structure_types" in metadata_props
|
|
|
|
# Should capture heading hierarchy
|
|
assert "headings" in properties
|
|
heading_props = properties["headings"]["properties"]
|
|
assert "level_1" in heading_props
|
|
assert "level_2" in heading_props
|
|
assert "level_3" in heading_props
|
|
|
|
# Should identify structural elements present in document
|
|
expected_elements = ["paragraphs", "lists"] # Code blocks, blockquotes, tables may vary in parsing
|
|
for element in expected_elements:
|
|
assert element in properties
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
|