""" Test for Issue #5: Generate a Schema from a Markdown File. Tests the schema generation service that creates JSON schemas from markdown AST structures with configurable depth limitations - critical for arc42 architectural documentation compliance validation. """ import json import pytest from pathlib import Path from tempfile import NamedTemporaryFile from markitect.schema_generator import SchemaGenerator from markitect.exceptions import FileNotFoundError, InvalidDepthError class TestIssue5SchemaGeneration: """Test suite for schema generation from markdown files.""" def setup_method(self): """Set up test environment.""" self.schema_generator = SchemaGenerator() def test_generate_schema_from_simple_markdown_creates_valid_json_schema(self): """ ISSUE #5: Test basic schema generation from simple markdown structure. Verifies that a simple markdown file generates a valid JSON schema that captures heading structure and basic elements for arc42 compliance. """ # Arrange - Simple markdown with clear structure markdown_content = """# Main Heading This is a paragraph. ## Sub Heading - List item 1 - List item 2 Some text here. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act - Generate schema with unlimited depth result = self.schema_generator.generate_schema_from_file(temp_file) # Assert - Schema should be valid JSON and contain expected structure assert isinstance(result, dict) assert "$schema" in result assert result["$schema"] == "http://json-schema.org/draft-07/schema#" assert "type" in result assert result["type"] == "object" # Should capture heading structure properties = result.get("properties", {}) assert "headings" in properties # Should define heading levels found in the document heading_properties = properties["headings"]["properties"] assert "level_1" in heading_properties # # Main Heading assert "level_2" in heading_properties # ## Sub Heading # Should capture other structural elements assert "paragraphs" in properties assert "lists" in properties assert "metadata" in properties finally: temp_file.unlink() def test_generate_schema_with_depth_limitation_excludes_deep_headings(self): """ ISSUE #5: Test schema generation with depth limitation for arc42 templates. Verifies that depth parameter correctly limits which heading levels are included - essential for arc42 section-specific schema generation. """ # Arrange - Markdown with multiple heading levels markdown_content = """# Level 1 Content here. ## Level 2 More content. ### Level 3 Deep content. #### Level 4 Very deep content. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act - Generate schema with depth limit of 2 result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2) # Assert - Only levels 1 and 2 should be included properties = result.get("properties", {}) heading_properties = properties["headings"]["properties"] assert "level_1" in heading_properties assert "level_2" in heading_properties assert "level_3" not in heading_properties # Should be excluded assert "level_4" not in heading_properties # Should be excluded finally: temp_file.unlink() def test_generate_schema_handles_file_not_found_error(self): """ ISSUE #5: Test error handling when markdown file doesn't exist. """ # Arrange - Non-existent file path non_existent_file = Path("/tmp/non_existent_file.md") # Act & Assert - Should raise appropriate exception with pytest.raises(FileNotFoundError): self.schema_generator.generate_schema_from_file(non_existent_file) def test_generate_schema_handles_invalid_depth_parameters(self): """ ISSUE #5: Test error handling for invalid depth parameters. """ # Arrange - Simple markdown file markdown_content = "# Test\n\nContent here." with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act & Assert - Invalid depth values should raise exceptions with pytest.raises(InvalidDepthError): self.schema_generator.generate_schema_from_file(temp_file, max_depth=0) with pytest.raises(InvalidDepthError): self.schema_generator.generate_schema_from_file(temp_file, max_depth=-1) finally: temp_file.unlink() def test_generated_schema_is_json_serializable_and_valid(self): """ ISSUE #5: Test that generated schema follows JSON Schema specification. Verifies the output can be used for validation by standard JSON Schema validators - critical for arc42 document compliance checking. """ # Arrange - Standard markdown structure markdown_content = """# Title ## Section Content with **formatting**. - List item ### Subsection More content. """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act - Generate schema result = self.schema_generator.generate_schema_from_file(temp_file) # Assert - Should be valid JSON Schema format assert result.get("$schema") == "http://json-schema.org/draft-07/schema#" assert result.get("type") == "object" assert "properties" in result assert "title" in result assert "description" in result # Should be serializable as JSON json_string = json.dumps(result, indent=2) assert len(json_string) > 0 # Should be deserializable back to same structure deserialized = json.loads(json_string) assert deserialized == result finally: temp_file.unlink() def test_schema_generation_captures_structural_metadata(self): """ ISSUE #5: Test that schema includes comprehensive structural metadata. Ensures generated schemas contain sufficient information for architectural analysis and arc42 compliance validation. """ # Arrange - Complex document structure markdown_content = """# Documentation ## Overview This document describes the **architecture**. ### Components - Component A - Component B - Sub-component B1 ## API ```python def api_function(): pass ``` > Important architectural decision. | Service | Purpose | |---------|---------| | Auth | Authentication | """ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(markdown_content) temp_file = Path(f.name) try: # Act - Generate schema result = self.schema_generator.generate_schema_from_file(temp_file) # Assert - Should capture comprehensive structure properties = result.get("properties", {}) # Should have metadata about the document structure assert "metadata" in properties metadata_props = properties["metadata"]["properties"] assert "total_elements" in metadata_props assert "structure_types" in metadata_props # Should capture heading hierarchy assert "headings" in properties heading_props = properties["headings"]["properties"] assert "level_1" in heading_props assert "level_2" in heading_props assert "level_3" in heading_props # Should identify structural elements present in document expected_elements = ["paragraphs", "lists"] # Code blocks, blockquotes, tables may vary in parsing for element in expected_elements: assert element in properties finally: temp_file.unlink()