Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
schema-generate now builds content-aware schemas from the document's section hierarchy instead of counting markdown syntax elements. Detects key-value tables, data tables, link lists, and mixed content patterns to produce schemas that reflect the actual document outline. Old behavior preserved via --mode syntactic. Validator and visualization tools pinned to syntactic mode for compatibility. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
305 lines
9.2 KiB
Python
305 lines
9.2 KiB
Python
"""
|
|
Test for Issue #5: Generate a Schema from a Markdown File.
|
|
|
|
Tests the ability to create JSON schemas from markdown file AST structures
|
|
with configurable depth limitations for structural analysis.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
from pathlib import Path
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
from markitect.schema_generator import SchemaGenerator
|
|
from markitect.exceptions import FileNotFoundError, InvalidDepthError
|
|
|
|
|
|
class TestIssue5SchemaGeneration:
|
|
"""Test suite for schema generation from markdown files."""
|
|
|
|
def setup_method(self):
|
|
"""Set up test environment."""
|
|
self.schema_generator = SchemaGenerator()
|
|
|
|
def teardown_method(self):
|
|
"""Clean up after tests."""
|
|
pass
|
|
|
|
def test_generate_schema_from_simple_markdown(self):
|
|
"""
|
|
ISSUE #5: Test basic schema generation from simple markdown structure.
|
|
|
|
Verifies that a simple markdown file generates a valid JSON schema
|
|
that captures heading structure and basic elements.
|
|
"""
|
|
# Arrange - Simple markdown with clear structure
|
|
markdown_content = """# Main Heading
|
|
|
|
This is a paragraph.
|
|
|
|
## Sub Heading
|
|
|
|
- List item 1
|
|
- List item 2
|
|
|
|
Some text here.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema in syntactic mode (element counting)
|
|
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
|
|
|
# Assert - Schema should be valid JSON and contain expected structure
|
|
assert isinstance(result, dict)
|
|
assert "$schema" in result
|
|
assert "type" in result
|
|
assert result["type"] == "object"
|
|
|
|
# Should capture heading structure
|
|
properties = result.get("properties", {})
|
|
assert "headings" in properties
|
|
|
|
# Should define heading levels found in the document
|
|
heading_properties = properties["headings"]["properties"]
|
|
assert "level_1" in heading_properties # # Main Heading
|
|
assert "level_2" in heading_properties # ## Sub Heading
|
|
|
|
# Should capture other structural elements
|
|
assert "paragraphs" in properties
|
|
assert "lists" in properties
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generate_schema_with_depth_limitation(self):
|
|
"""
|
|
ISSUE #5: Test schema generation with depth limitation.
|
|
|
|
Verifies that depth parameter correctly limits which heading levels
|
|
are included in the generated schema.
|
|
"""
|
|
# Arrange - Markdown with multiple heading levels
|
|
markdown_content = """# Level 1
|
|
|
|
Content here.
|
|
|
|
## Level 2
|
|
|
|
More content.
|
|
|
|
### Level 3
|
|
|
|
Deep content.
|
|
|
|
#### Level 4
|
|
|
|
Very deep content.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema in syntactic mode with depth limit of 2
|
|
result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2, mode='syntactic')
|
|
|
|
# Assert - Only levels 1 and 2 should be included
|
|
properties = result.get("properties", {})
|
|
heading_properties = properties["headings"]["properties"]
|
|
|
|
assert "level_1" in heading_properties
|
|
assert "level_2" in heading_properties
|
|
assert "level_3" not in heading_properties # Should be excluded
|
|
assert "level_4" not in heading_properties # Should be excluded
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generate_schema_from_complex_document(self):
|
|
"""
|
|
ISSUE #5: Test schema generation from complex markdown document.
|
|
|
|
Verifies handling of complex markdown structures including
|
|
code blocks, blockquotes, links, and nested lists.
|
|
"""
|
|
# Arrange - Complex markdown with various elements
|
|
markdown_content = """# Documentation
|
|
|
|
## Overview
|
|
|
|
This is an **important** document with *emphasis*.
|
|
|
|
### Features
|
|
|
|
- Feature 1 with [link](https://example.com)
|
|
- Feature 2
|
|
- Nested item A
|
|
- Nested item B
|
|
|
|
### Code Examples
|
|
|
|
```python
|
|
def hello():
|
|
print("Hello, World!")
|
|
```
|
|
|
|
> This is a blockquote with important information.
|
|
|
|
## API Reference
|
|
|
|
| Method | Description |
|
|
|--------|-------------|
|
|
| GET | Retrieve data |
|
|
| POST | Create data |
|
|
|
|
### Error Handling
|
|
|
|
1. Check input parameters
|
|
2. Validate data types
|
|
3. Handle exceptions
|
|
|
|
#### Implementation Details
|
|
|
|
Some implementation notes here.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema in syntactic mode
|
|
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
|
|
|
# Assert - Schema should capture complex structures
|
|
properties = result.get("properties", {})
|
|
|
|
# Should have all major structural elements
|
|
expected_elements = ["headings", "paragraphs", "lists", "code_blocks", "blockquotes", "tables"]
|
|
for element in expected_elements:
|
|
assert element in properties, f"Missing {element} in schema"
|
|
|
|
# Should capture heading hierarchy
|
|
heading_properties = properties["headings"]["properties"]
|
|
assert "level_1" in heading_properties
|
|
assert "level_2" in heading_properties
|
|
assert "level_3" in heading_properties
|
|
assert "level_4" in heading_properties
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generate_schema_file_not_found(self):
|
|
"""
|
|
ISSUE #5: Test error handling when markdown file doesn't exist.
|
|
"""
|
|
# Arrange - Non-existent file path
|
|
non_existent_file = Path("/tmp/non_existent_file.md")
|
|
|
|
# Act & Assert - Should raise appropriate exception
|
|
with pytest.raises(FileNotFoundError):
|
|
self.schema_generator.generate_schema_from_file(non_existent_file)
|
|
|
|
def test_generate_schema_invalid_depth(self):
|
|
"""
|
|
ISSUE #5: Test error handling for invalid depth parameters.
|
|
"""
|
|
# Arrange - Simple markdown file
|
|
markdown_content = "# Test\n\nContent here."
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act & Assert - Invalid depth values should raise exceptions
|
|
with pytest.raises(InvalidDepthError):
|
|
self.schema_generator.generate_schema_from_file(temp_file, max_depth=0)
|
|
|
|
with pytest.raises(InvalidDepthError):
|
|
self.schema_generator.generate_schema_from_file(temp_file, max_depth=-1)
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_generate_schema_empty_file(self):
|
|
"""
|
|
ISSUE #5: Test schema generation from empty markdown file.
|
|
"""
|
|
# Arrange - Empty markdown file
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write("")
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema from empty file
|
|
result = self.schema_generator.generate_schema_from_file(temp_file)
|
|
|
|
# Assert - Should generate valid but minimal schema
|
|
assert isinstance(result, dict)
|
|
assert "$schema" in result
|
|
assert "type" in result
|
|
|
|
# Should have empty or minimal structure
|
|
properties = result.get("properties", {})
|
|
if "headings" in properties:
|
|
heading_properties = properties["headings"].get("properties", {})
|
|
assert len(heading_properties) == 0 # No headings in empty file
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_schema_format_compliance(self):
|
|
"""
|
|
ISSUE #5: Test that generated schema follows JSON Schema specification.
|
|
|
|
Verifies the output is a valid JSON Schema that could be used
|
|
for validation by standard JSON Schema validators.
|
|
"""
|
|
# Arrange - Standard markdown structure
|
|
markdown_content = """# Title
|
|
|
|
## Section
|
|
|
|
Content with **formatting**.
|
|
|
|
- List item
|
|
|
|
### Subsection
|
|
|
|
More content.
|
|
"""
|
|
|
|
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# Act - Generate schema
|
|
result = self.schema_generator.generate_schema_from_file(temp_file)
|
|
|
|
# Assert - Should be valid JSON Schema format
|
|
assert result.get("$schema") == "http://json-schema.org/draft-07/schema#"
|
|
assert result.get("type") == "object"
|
|
assert "properties" in result
|
|
assert "title" in result
|
|
assert "description" in result
|
|
|
|
# Should be serializable as JSON
|
|
json_string = json.dumps(result, indent=2)
|
|
assert len(json_string) > 0
|
|
|
|
# Should be deserializable back to same structure
|
|
deserialized = json.loads(json_string)
|
|
assert deserialized == result
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
|