feat(schema): add semantic schema generation as default mode
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
schema-generate now builds content-aware schemas from the document's section hierarchy instead of counting markdown syntax elements. Detects key-value tables, data tables, link lists, and mixed content patterns to produce schemas that reflect the actual document outline. Old behavior preserved via --mode syntactic. Validator and visualization tools pinned to syntactic mode for compatibility. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -290,7 +290,7 @@ This is a test document.
|
||||
output_file.unlink()
|
||||
|
||||
def test_cli_maintains_backward_compatibility_with_max_depth(self):
|
||||
"""Test that existing --max-depth option still works with default mode."""
|
||||
"""Test that existing --max-depth option still works with default (semantic) mode."""
|
||||
# Arrange
|
||||
markdown_content = """# Test Document
|
||||
|
||||
@@ -317,9 +317,9 @@ Some details here.
|
||||
assert result.exit_code == 0, f"CLI should maintain backward compatibility with --max-depth, got: {result.output}"
|
||||
schema = json.loads(result.output)
|
||||
|
||||
# Should use old title format for backward compatibility
|
||||
expected_title = f"Schema for {temp_file.name}"
|
||||
assert schema["title"] == expected_title, f"Default mode should use 'for' in title"
|
||||
# Default mode is now semantic, which uses 'from' in title
|
||||
expected_title = f"Schema from {temp_file.name}"
|
||||
assert schema["title"] == expected_title, f"Default (semantic) mode should use 'from' in title"
|
||||
|
||||
finally:
|
||||
temp_file.unlink()
|
||||
|
||||
@@ -50,8 +50,8 @@ Some text here.
|
||||
temp_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Act - Generate schema with unlimited depth
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file)
|
||||
# Act - Generate schema in syntactic mode (element counting)
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
||||
|
||||
# Assert - Schema should be valid JSON and contain expected structure
|
||||
assert isinstance(result, dict)
|
||||
@@ -105,8 +105,8 @@ Very deep content.
|
||||
temp_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Act - Generate schema with depth limit of 2
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2)
|
||||
# Act - Generate schema in syntactic mode with depth limit of 2
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2, mode='syntactic')
|
||||
|
||||
# Assert - Only levels 1 and 2 should be included
|
||||
properties = result.get("properties", {})
|
||||
@@ -173,8 +173,8 @@ Some implementation notes here.
|
||||
temp_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Act - Generate schema
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file)
|
||||
# Act - Generate schema in syntactic mode
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
||||
|
||||
# Assert - Schema should capture complex structures
|
||||
properties = result.get("properties", {})
|
||||
|
||||
@@ -47,8 +47,8 @@ Some text here.
|
||||
temp_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Act - Generate schema with unlimited depth
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file)
|
||||
# Act - Generate schema in syntactic mode (element counting)
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
||||
|
||||
# Assert - Schema should be valid JSON and contain expected structure
|
||||
assert isinstance(result, dict)
|
||||
@@ -104,8 +104,8 @@ Very deep content.
|
||||
temp_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Act - Generate schema with depth limit of 2
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2)
|
||||
# Act - Generate schema in syntactic mode with depth limit of 2
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, max_depth=2, mode='syntactic')
|
||||
|
||||
# Assert - Only levels 1 and 2 should be included
|
||||
properties = result.get("properties", {})
|
||||
@@ -238,8 +238,8 @@ def api_function():
|
||||
temp_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Act - Generate schema
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file)
|
||||
# Act - Generate schema in syntactic mode
|
||||
result = self.schema_generator.generate_schema_from_file(temp_file, mode='syntactic')
|
||||
|
||||
# Assert - Should capture comprehensive structure
|
||||
properties = result.get("properties", {})
|
||||
|
||||
Reference in New Issue
Block a user