""" Unit tests for schema_loader.py - Markdown schema loading. Tests the markdown schema loader functionality including: - Frontmatter extraction (YAML) - JSON schema extraction from code blocks - Metadata merging - Schema saving - Error handling """ import pytest import json import yaml from pathlib import Path from markitect.schema_loader import ( MarkdownSchemaLoader, SchemaLoaderError, InvalidSchemaFormatError, SchemaNotFoundError ) # Test fixtures @pytest.fixture def temp_schema_dir(tmp_path): """Create temporary directory for schema files.""" schema_dir = tmp_path / "schemas" schema_dir.mkdir() return schema_dir @pytest.fixture def simple_schema_md(): """Simple valid markdown schema content.""" return """--- schema-id: "https://markitect.dev/schemas/test/v1" version: "1.0.0" status: "stable" --- # Test Schema v1.0 ## Overview This is a test schema for validation. ## Schema Definition ```json { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://markitect.dev/schemas/test/v1", "version": "1.0.0", "title": "Test Schema", "description": "Schema for testing", "type": "object", "properties": { "name": {"type": "string"} } } ``` ## Version History ### v1.0.0 - Initial version """ @pytest.fixture def schema_without_frontmatter(): """Schema without YAML frontmatter.""" return """# Test Schema v1.0 ## Schema Definition ```json { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Test Schema", "type": "object" } ``` """ @pytest.fixture def schema_multiple_json_blocks(): """Schema with multiple JSON code blocks.""" return """--- version: "1.0.0" --- # Test Schema ## Example Usage ```json { "example": "This is not the schema" } ``` ## Schema Definition ```json { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Test Schema", "type": "object" } ``` ## More Examples ```json { "another": "example" } ``` """ class TestMarkdownSchemaLoader: """Tests for MarkdownSchemaLoader class.""" def test_init(self): """Test loader initialization.""" loader = MarkdownSchemaLoader() assert loader is not None assert hasattr(loader, 'frontmatter_pattern') assert hasattr(loader, 'json_code_block_pattern') def test_load_simple_schema(self, temp_schema_dir, simple_schema_md): """Test loading a simple valid schema.""" schema_file = temp_schema_dir / "test-schema-v1.0.md" schema_file.write_text(simple_schema_md) loader = MarkdownSchemaLoader() result = loader.load_schema(schema_file) assert 'schema' in result assert 'metadata' in result assert 'documentation' in result assert 'source_file' in result # Check schema content schema = result['schema'] assert schema['title'] == 'Test Schema' assert schema['version'] == '1.0.0' assert schema['type'] == 'object' # Check metadata metadata = result['metadata'] assert metadata['version'] == '1.0.0' assert metadata['status'] == 'stable' # Check source tracking assert result['source_file'] == str(schema_file) assert 'x-markitect-source' in schema assert schema['x-markitect-source']['format'] == 'markdown' def test_load_schema_file_not_found(self): """Test loading non-existent file raises FileNotFoundError.""" loader = MarkdownSchemaLoader() with pytest.raises(FileNotFoundError, match="Schema file not found"): loader.load_schema(Path("/nonexistent/schema.md")) def test_load_schema_without_json(self, temp_schema_dir): """Test loading markdown without JSON schema raises error.""" schema_file = temp_schema_dir / "no-schema.md" schema_file.write_text("# Just a heading\n\nNo schema here.") loader = MarkdownSchemaLoader() with pytest.raises(SchemaNotFoundError, match="No JSON schema found"): loader.load_schema(schema_file) def test_load_schema_invalid_json(self, temp_schema_dir): """Test loading markdown with invalid JSON raises error.""" content = """# Test ```json {invalid json} ``` """ schema_file = temp_schema_dir / "invalid.md" schema_file.write_text(content) loader = MarkdownSchemaLoader() with pytest.raises(InvalidSchemaFormatError, match="Invalid JSON"): loader.load_schema(schema_file) class TestExtractFrontmatter: """Tests for frontmatter extraction.""" def test_extract_valid_frontmatter(self, simple_schema_md): """Test extracting valid YAML frontmatter.""" loader = MarkdownSchemaLoader() metadata = loader._extract_frontmatter(simple_schema_md) assert metadata['schema-id'] == 'https://markitect.dev/schemas/test/v1' assert metadata['version'] == '1.0.0' assert metadata['status'] == 'stable' def test_extract_no_frontmatter(self, schema_without_frontmatter): """Test extracting from content without frontmatter returns empty dict.""" loader = MarkdownSchemaLoader() metadata = loader._extract_frontmatter(schema_without_frontmatter) assert metadata == {} def test_extract_invalid_yaml_frontmatter(self): """Test extracting invalid YAML raises error.""" content = """--- invalid: yaml: syntax: error --- # Content """ loader = MarkdownSchemaLoader() with pytest.raises(InvalidSchemaFormatError, match="Invalid YAML"): loader._extract_frontmatter(content) def test_extract_non_dict_frontmatter(self): """Test extracting non-dictionary YAML raises error.""" content = """--- - list - not - dict --- # Content """ loader = MarkdownSchemaLoader() with pytest.raises(InvalidSchemaFormatError, match="must be a YAML dictionary"): loader._extract_frontmatter(content) def test_extract_complex_frontmatter(self): """Test extracting complex frontmatter with nested structures.""" content = """--- schema-id: "https://example.com/schema" version: "1.0.0" tags: - documentation - schema metadata: author: "Test Author" created: "2026-01-04" --- # Content """ loader = MarkdownSchemaLoader() metadata = loader._extract_frontmatter(content) assert metadata['tags'] == ['documentation', 'schema'] assert metadata['metadata']['author'] == 'Test Author' class TestExtractJsonSchema: """Tests for JSON schema extraction.""" def test_extract_single_json_block(self, schema_without_frontmatter): """Test extracting single JSON block.""" loader = MarkdownSchemaLoader() schema = loader._extract_json_schema(schema_without_frontmatter) assert schema is not None assert schema['title'] == 'Test Schema' assert schema['type'] == 'object' def test_extract_from_schema_definition_section(self, schema_multiple_json_blocks): """Test preferring JSON block under Schema Definition heading.""" loader = MarkdownSchemaLoader() schema = loader._extract_json_schema(schema_multiple_json_blocks) assert schema is not None assert schema['title'] == 'Test Schema' # Should get the schema from Schema Definition section, not the example def test_extract_no_json_block(self): """Test extracting from content with no JSON blocks returns None.""" content = "# Just text\n\nNo code blocks here." loader = MarkdownSchemaLoader() schema = loader._extract_json_schema(content) assert schema is None def test_extract_invalid_json_block(self): """Test extracting invalid JSON raises error.""" content = """# Test ```json {invalid} ``` """ loader = MarkdownSchemaLoader() with pytest.raises(InvalidSchemaFormatError, match="Invalid JSON"): loader._extract_json_schema(content) def test_extract_non_object_json(self): """Test extracting JSON array (non-object) raises error.""" content = """# Test ```json ["array", "not", "object"] ``` """ loader = MarkdownSchemaLoader() with pytest.raises(InvalidSchemaFormatError, match="must be a JSON object"): loader._extract_json_schema(content) class TestMergeMetadata: """Tests for metadata merging.""" def test_merge_basic_metadata(self): """Test merging frontmatter into schema.""" loader = MarkdownSchemaLoader() schema = { 'title': 'Test Schema', 'type': 'object' } metadata = { 'version': '2.0.0', 'schema-id': 'https://example.com/v2', 'status': 'draft' } merged = loader._merge_metadata(schema, metadata, Path('test.md')) # Version should be overridden assert merged['version'] == '2.0.0' # $id should be set from schema-id assert merged['$id'] == 'https://example.com/v2' # Status should be in x-markitect-metadata assert merged['x-markitect-metadata']['status'] == 'draft' # Source tracking should be added assert merged['x-markitect-source']['file'] == 'test.md' assert merged['x-markitect-source']['format'] == 'markdown' def test_merge_preserves_schema_fields(self): """Test merging doesn't remove existing schema fields.""" loader = MarkdownSchemaLoader() schema = { 'title': 'Test', 'type': 'object', 'properties': {'name': {'type': 'string'}} } merged = loader._merge_metadata(schema, {}, Path('test.md')) assert merged['title'] == 'Test' assert merged['type'] == 'object' assert 'properties' in merged def test_merge_frontmatter_takes_precedence(self): """Test frontmatter overrides schema values.""" loader = MarkdownSchemaLoader() schema = { 'version': '1.0.0', '$id': 'old-id' } metadata = { 'version': '2.0.0', 'schema-id': 'new-id' } merged = loader._merge_metadata(schema, metadata, Path('test.md')) assert merged['version'] == '2.0.0' assert merged['$id'] == 'new-id' class TestSaveSchema: """Tests for saving schemas to markdown.""" def test_save_simple_schema(self, temp_schema_dir): """Test saving a schema to markdown file.""" loader = MarkdownSchemaLoader() schema = { '$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://example.com/schema/v1', 'version': '1.0.0', 'title': 'Test Schema', 'description': 'A test schema', 'type': 'object' } output_file = temp_schema_dir / 'output-schema-v1.0.md' loader.save_schema(schema, output_file) assert output_file.exists() # Verify content content = output_file.read_text() assert '---' in content # Frontmatter assert 'Test Schema v1.0.0' in content # Title assert '```json' in content # JSON block assert '"title": "Test Schema"' in content def test_save_creates_parent_directory(self, temp_schema_dir): """Test saving creates parent directories if needed.""" loader = MarkdownSchemaLoader() schema = {'title': 'Test', 'type': 'object'} output_file = temp_schema_dir / 'nested' / 'dir' / 'schema.md' loader.save_schema(schema, output_file) assert output_file.exists() assert output_file.parent.exists() def test_save_with_custom_frontmatter(self, temp_schema_dir): """Test saving with custom frontmatter.""" loader = MarkdownSchemaLoader() schema = {'title': 'Test', 'type': 'object'} frontmatter = { 'schema-id': 'https://custom.com/schema', 'status': 'experimental', 'tags': ['test', 'custom'] } output_file = temp_schema_dir / 'custom.md' loader.save_schema(schema, output_file, frontmatter=frontmatter) content = output_file.read_text() assert 'experimental' in content assert 'https://custom.com/schema' in content def test_save_and_reload_roundtrip(self, temp_schema_dir): """Test saving and reloading produces same schema.""" loader = MarkdownSchemaLoader() original_schema = { '$schema': 'http://json-schema.org/draft-07/schema#', 'version': '1.0.0', 'title': 'Roundtrip Test', 'type': 'object', 'properties': { 'name': {'type': 'string'}, 'age': {'type': 'integer'} } } schema_file = temp_schema_dir / 'roundtrip-schema-v1.0.md' loader.save_schema(original_schema, schema_file) # Reload loaded = loader.load_schema(schema_file) loaded_schema = loaded['schema'] # Compare key fields (ignoring x-markitect-source added during load) assert loaded_schema['title'] == original_schema['title'] assert loaded_schema['type'] == original_schema['type'] assert loaded_schema['properties'] == original_schema['properties'] class TestGenerateMarkdown: """Tests for markdown generation.""" def test_generate_basic_markdown(self): """Test generating basic markdown from schema.""" loader = MarkdownSchemaLoader() schema = { 'title': 'Test Schema', 'version': '1.0.0', 'description': 'Test description', 'type': 'object' } md = loader._generate_markdown(schema) assert 'Test Schema v1.0.0' in md assert 'Test description' in md assert '```json' in md assert '"title": "Test Schema"' in md assert '---' in md # Frontmatter def test_generate_includes_frontmatter(self): """Test generated markdown includes frontmatter.""" loader = MarkdownSchemaLoader() schema = { '$id': 'https://example.com/schema', 'title': 'Test', 'version': '2.0.0', 'type': 'object' } md = loader._generate_markdown(schema) # Parse frontmatter lines = md.split('\n') assert lines[0] == '---' # Find end of frontmatter end_idx = lines[1:].index('---') + 1 frontmatter_yaml = '\n'.join(lines[1:end_idx]) frontmatter = yaml.safe_load(frontmatter_yaml) assert frontmatter['version'] == '2.0.0' assert frontmatter['schema-id'] == 'https://example.com/schema' class TestListJsonBlocks: """Tests for listing JSON blocks.""" def test_list_single_block(self, schema_without_frontmatter): """Test listing single JSON block.""" loader = MarkdownSchemaLoader() blocks = loader.list_json_blocks(schema_without_frontmatter) assert len(blocks) == 1 assert '"title": "Test Schema"' in blocks[0][1] def test_list_multiple_blocks(self, schema_multiple_json_blocks): """Test listing multiple JSON blocks.""" loader = MarkdownSchemaLoader() blocks = loader.list_json_blocks(schema_multiple_json_blocks) assert len(blocks) == 3 # First block assert '"example"' in blocks[0][1] # Second block (schema) assert '"title": "Test Schema"' in blocks[1][1] # Third block assert '"another"' in blocks[2][1] def test_list_no_blocks(self): """Test listing with no JSON blocks.""" loader = MarkdownSchemaLoader() blocks = loader.list_json_blocks("# Just text\n\nNo code blocks.") assert len(blocks) == 0 class TestValidateSchemaStructure: """Tests for schema structure validation.""" def test_validate_complete_schema(self): """Test validating complete schema returns no issues.""" loader = MarkdownSchemaLoader() schema = { '$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://example.com/schema', 'version': '1.0.0', 'title': 'Test Schema', 'description': 'Test description', 'type': 'object' } issues = loader.validate_schema_structure(schema) assert len(issues) == 0 def test_validate_missing_required_fields(self): """Test validation detects missing required fields.""" loader = MarkdownSchemaLoader() schema = {'type': 'object'} issues = loader.validate_schema_structure(schema) assert len(issues) > 0 assert any('$schema' in issue for issue in issues) assert any('title' in issue for issue in issues) assert any('description' in issue for issue in issues) def test_validate_missing_version(self): """Test validation detects missing version field.""" loader = MarkdownSchemaLoader() schema = { '$schema': 'http://json-schema.org/draft-07/schema#', 'title': 'Test', 'type': 'object' } issues = loader.validate_schema_structure(schema) assert any('version' in issue for issue in issues) def test_validate_invalid_id_format(self): """Test validation detects non-HTTPS $id.""" loader = MarkdownSchemaLoader() schema = { '$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'http://example.com/schema', # HTTP not HTTPS 'version': '1.0.0', 'title': 'Test', 'type': 'object' } issues = loader.validate_schema_structure(schema) assert any('HTTPS' in issue for issue in issues) class TestEdgeCases: """Tests for edge cases and error conditions.""" def test_load_empty_file(self, temp_schema_dir): """Test loading empty file raises error.""" schema_file = temp_schema_dir / 'empty.md' schema_file.write_text('') loader = MarkdownSchemaLoader() with pytest.raises(SchemaNotFoundError): loader.load_schema(schema_file) def test_load_binary_file(self, temp_schema_dir): """Test loading binary file with invalid UTF-8 raises error.""" schema_file = temp_schema_dir / 'binary.md' # Use invalid UTF-8 sequences that will trigger UnicodeDecodeError schema_file.write_bytes(b'\xff\xfe\x00\x00\x80\x81\x82') loader = MarkdownSchemaLoader() with pytest.raises(InvalidSchemaFormatError): loader.load_schema(schema_file) def test_malformed_code_block(self, temp_schema_dir): """Test handling malformed code block delimiters.""" content = """# Test ```json {"valid": "json" # Missing closing backticks """ schema_file = temp_schema_dir / 'malformed.md' schema_file.write_text(content) loader = MarkdownSchemaLoader() with pytest.raises(SchemaNotFoundError): loader.load_schema(schema_file) def test_very_large_schema(self, temp_schema_dir): """Test loading very large schema.""" # Create large schema with many properties large_schema = { '$schema': 'http://json-schema.org/draft-07/schema#', 'title': 'Large Schema', 'type': 'object', 'properties': { f'prop_{i}': {'type': 'string'} for i in range(1000) } } content = f"""# Large Schema ```json {json.dumps(large_schema, indent=2)} ``` """ schema_file = temp_schema_dir / 'large.md' schema_file.write_text(content) loader = MarkdownSchemaLoader() result = loader.load_schema(schema_file) assert len(result['schema']['properties']) == 1000