Implement comprehensive heading text capture functionality that allows schemas to enforce specific heading text requirements through enum constraints: • New CLI option: --capture-heading-text flag for exact text constraints • Schema generation with heading text as enum constraints (not just structure) • Advanced validation engine that enforces heading text requirements • Metaschema extension: x-markitect-heading-text-capture marker • Full integration with Issue #51 outline mode capabilities • Comprehensive error reporting for heading text mismatches • Complete backward compatibility with existing schema generation Technical implementation: - Extended SchemaGenerator with capture_heading_text parameter - Enhanced validation system to check enum constraints on heading content - Added _validate_heading_text_constraints_with_errors for detailed reporting - Integrated with existing metaschema validation from Issue #50 - Preserved document order of headings in enum constraints Key features: - Schemas can now specify required heading text via enum constraints - Validation rejects documents with incorrect heading text - Detailed error messages show expected vs actual heading text - Works seamlessly with outline mode depth controls - Maintains 100% compatibility with 513 existing tests Usage examples: markitect schema-generate --capture-heading-text document.md markitect schema-generate --mode outline --capture-heading-text --depth 2 document.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
682 lines
27 KiB
Python
682 lines
27 KiB
Python
"""
|
|
Schema Validator for Issue #7: Validate a Markdown File Against a Schema.
|
|
|
|
This module provides functionality to validate markdown documents against JSON schemas
|
|
for arc42 architecture documentation compliance checking - essential for intelligent
|
|
document analysis and plan-actual comparison capabilities.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Union
|
|
|
|
try:
|
|
import jsonschema
|
|
from jsonschema import validate, ValidationError, SchemaError
|
|
JSONSCHEMA_AVAILABLE = True
|
|
except ImportError:
|
|
# Fallback to basic validation without full JSON Schema validation
|
|
JSONSCHEMA_AVAILABLE = False
|
|
ValidationError = Exception
|
|
SchemaError = Exception
|
|
|
|
from .parser import parse_markdown_to_ast
|
|
from .schema_generator import SchemaGenerator
|
|
from .validation_error import ValidationErrorCollector, ValidationErrorType
|
|
from .exceptions import FileNotFoundError, SchemaValidationError, InvalidSchemaError
|
|
|
|
|
|
class SchemaValidator:
|
|
"""
|
|
Validates markdown documents against JSON schemas for arc42 compliance checking.
|
|
|
|
This service provides boolean validation results for markdown documents against
|
|
schemas, enabling strict compliance checking for architectural documentation
|
|
templates and intelligent plan-actual comparison.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the schema validator."""
|
|
self.schema_generator = SchemaGenerator()
|
|
self.jsonschema_available = JSONSCHEMA_AVAILABLE
|
|
|
|
def validate_file_against_schema(self, file_path: Path, schema: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate a markdown file against a JSON schema.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema: JSON schema dictionary to validate against
|
|
|
|
Returns:
|
|
True if the document matches the schema, False otherwise
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid
|
|
"""
|
|
# Validate inputs
|
|
if not file_path.exists():
|
|
raise FileNotFoundError(f"Markdown file not found: {file_path}")
|
|
|
|
# Validate the schema itself
|
|
self._validate_schema(schema)
|
|
|
|
# Generate the document's current structure
|
|
try:
|
|
document_schema = self.schema_generator.generate_schema_from_file(file_path)
|
|
except Exception as e:
|
|
raise SchemaValidationError(f"Failed to generate document schema: {e}") from e
|
|
|
|
# Check if the expected schema has heading text constraints
|
|
if self._has_heading_text_constraints(schema):
|
|
# For heading text validation, we need to extract actual content and compare against enum constraints
|
|
return self._validate_with_heading_text_constraints(file_path, schema, document_schema)
|
|
else:
|
|
# Use standard structure comparison for backward compatibility
|
|
return self._compare_structures(document_schema, schema)
|
|
|
|
def validate_file_against_schema_string(self, file_path: Path, schema_json: str) -> bool:
|
|
"""
|
|
Validate a markdown file against a JSON schema provided as a string.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_json: JSON schema as a string
|
|
|
|
Returns:
|
|
True if the document matches the schema, False otherwise
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid JSON or schema
|
|
"""
|
|
try:
|
|
schema = json.loads(schema_json)
|
|
except json.JSONDecodeError as e:
|
|
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
|
|
|
|
return self.validate_file_against_schema(file_path, schema)
|
|
|
|
def validate_file_against_schema_file(self, file_path: Path, schema_file_path: Path) -> bool:
|
|
"""
|
|
Validate a markdown file against a schema stored in a file.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_file_path: Path to the JSON schema file
|
|
|
|
Returns:
|
|
True if the document matches the schema, False otherwise
|
|
|
|
Raises:
|
|
FileNotFoundError: If either file doesn't exist
|
|
InvalidSchemaError: If the schema file is invalid
|
|
"""
|
|
if not schema_file_path.exists():
|
|
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
|
|
|
|
try:
|
|
schema_content = schema_file_path.read_text(encoding='utf-8')
|
|
schema = json.loads(schema_content)
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
|
|
|
|
return self.validate_file_against_schema(file_path, schema)
|
|
|
|
def _validate_schema(self, schema: Dict[str, Any]) -> None:
|
|
"""
|
|
Validate that a schema is a valid JSON Schema.
|
|
|
|
Args:
|
|
schema: Schema dictionary to validate
|
|
|
|
Raises:
|
|
InvalidSchemaError: If the schema is invalid
|
|
"""
|
|
try:
|
|
# Check basic schema structure
|
|
if not isinstance(schema, dict):
|
|
raise InvalidSchemaError("Schema must be a dictionary")
|
|
|
|
# Basic schema validation
|
|
if not schema.get('$schema') or not schema.get('type'):
|
|
raise InvalidSchemaError("Schema must have '$schema' and 'type' fields")
|
|
|
|
# If jsonschema library is available, use it for full validation
|
|
if self.jsonschema_available:
|
|
jsonschema.validators.validator_for(schema).check_schema(schema)
|
|
|
|
except (SchemaError, TypeError, AttributeError) as e:
|
|
raise InvalidSchemaError(f"Invalid JSON schema: {e}") from e
|
|
|
|
def _compare_structures(self, document_schema: Dict[str, Any], expected_schema: Dict[str, Any]) -> bool:
|
|
"""
|
|
Compare a document's actual structure against expected schema requirements.
|
|
|
|
This method performs the core validation logic by analyzing whether the
|
|
document's generated schema satisfies the requirements defined in the
|
|
expected schema.
|
|
|
|
Args:
|
|
document_schema: Schema generated from the actual document
|
|
expected_schema: Expected schema requirements
|
|
|
|
Returns:
|
|
True if the document satisfies the expected schema requirements
|
|
"""
|
|
try:
|
|
# Extract actual document structure
|
|
doc_properties = document_schema.get('properties', {})
|
|
expected_properties = expected_schema.get('properties', {})
|
|
|
|
# Check all required properties are present
|
|
required_properties = expected_schema.get('required', [])
|
|
for prop in required_properties:
|
|
if prop not in doc_properties:
|
|
return False
|
|
|
|
# Validate heading structure if specified
|
|
if 'headings' in expected_properties and 'headings' in doc_properties:
|
|
if not self._validate_heading_structure(
|
|
doc_properties['headings'],
|
|
expected_properties['headings']
|
|
):
|
|
return False
|
|
|
|
# Validate other structural elements
|
|
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
|
for element in structural_elements:
|
|
if element in expected_properties:
|
|
if not self._validate_structural_element(
|
|
doc_properties.get(element),
|
|
expected_properties[element]
|
|
):
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception:
|
|
# If comparison fails for any reason, consider validation failed
|
|
return False
|
|
|
|
def _validate_heading_structure(self, actual_headings: Dict[str, Any], expected_headings: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate heading structure against expected requirements.
|
|
|
|
Args:
|
|
actual_headings: Actual heading structure from document
|
|
expected_headings: Expected heading requirements
|
|
|
|
Returns:
|
|
True if heading structure meets requirements
|
|
"""
|
|
actual_heading_props = actual_headings.get('properties', {})
|
|
expected_heading_props = expected_headings.get('properties', {})
|
|
required_heading_levels = expected_headings.get('required', [])
|
|
|
|
# Check required heading levels are present
|
|
for level in required_heading_levels:
|
|
if level not in actual_heading_props:
|
|
return False
|
|
|
|
# Check each expected heading level meets requirements
|
|
for level, expected_spec in expected_heading_props.items():
|
|
if level not in actual_heading_props:
|
|
# If level is not required, skip it
|
|
if level not in required_heading_levels:
|
|
continue
|
|
return False
|
|
|
|
actual_spec = actual_heading_props[level]
|
|
|
|
# Check minimum and maximum item requirements
|
|
if not self._validate_array_constraints(actual_spec, expected_spec):
|
|
return False
|
|
|
|
return True
|
|
|
|
def _validate_structural_element(self, actual_element: Dict[str, Any], expected_element: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate a structural element (paragraphs, lists, etc.) against requirements.
|
|
|
|
Args:
|
|
actual_element: Actual element structure from document
|
|
expected_element: Expected element requirements
|
|
|
|
Returns:
|
|
True if element meets requirements
|
|
"""
|
|
if actual_element is None:
|
|
# Element doesn't exist in document
|
|
return False
|
|
|
|
return self._validate_array_constraints(actual_element, expected_element)
|
|
|
|
def _validate_array_constraints(self, actual: Dict[str, Any], expected: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate array constraints (minItems, maxItems) for structural elements.
|
|
|
|
Args:
|
|
actual: Actual element specification
|
|
expected: Expected element specification
|
|
|
|
Returns:
|
|
True if constraints are satisfied
|
|
"""
|
|
# Get actual count from the schema specification
|
|
# For generated schemas, we use minItems/maxItems which represent actual counts
|
|
actual_min = actual.get('minItems', 0)
|
|
actual_max = actual.get('maxItems', actual_min)
|
|
actual_count = actual_max # In our generated schemas, min=max=actual count
|
|
|
|
# Check against expected constraints
|
|
expected_min = expected.get('minItems', 0)
|
|
expected_max = expected.get('maxItems', float('inf'))
|
|
|
|
return expected_min <= actual_count <= expected_max
|
|
|
|
# Issue #8: Detailed Error Reporting Methods
|
|
|
|
def validate_file_with_errors(self, file_path: Path, schema: Dict[str, Any]) -> ValidationErrorCollector:
|
|
"""
|
|
Validate a markdown file against a JSON schema and collect detailed errors.
|
|
|
|
This method provides comprehensive error reporting for Issue #8, enabling
|
|
users to understand exactly how their documents deviate from schemas.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema: JSON schema dictionary to validate against
|
|
|
|
Returns:
|
|
ValidationErrorCollector with all validation errors
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid
|
|
"""
|
|
# Validate inputs
|
|
if not file_path.exists():
|
|
raise FileNotFoundError(f"Markdown file not found: {file_path}")
|
|
|
|
# Validate the schema itself
|
|
self._validate_schema(schema)
|
|
|
|
# Initialize error collector
|
|
error_collector = ValidationErrorCollector()
|
|
|
|
# Generate the document's current structure
|
|
try:
|
|
document_schema = self.schema_generator.generate_schema_from_file(file_path)
|
|
except Exception as e:
|
|
error_collector.add_error(
|
|
ValidationErrorType.STRUCTURAL_VIOLATION,
|
|
f"Failed to generate document schema: {e}",
|
|
"document.structure",
|
|
suggestion="Check if the markdown file is properly formatted"
|
|
)
|
|
return error_collector
|
|
|
|
# Compare the document's structure against the expected schema and collect errors
|
|
if self._has_heading_text_constraints(schema):
|
|
# For heading text validation, we need to handle enum constraints specially
|
|
self._compare_structures_with_errors(document_schema, schema, error_collector)
|
|
self._validate_heading_text_constraints_with_errors(file_path, schema, error_collector)
|
|
else:
|
|
# Use standard structure comparison for backward compatibility
|
|
self._compare_structures_with_errors(document_schema, schema, error_collector)
|
|
|
|
return error_collector
|
|
|
|
def validate_file_with_errors_string(self, file_path: Path, schema_json: str) -> ValidationErrorCollector:
|
|
"""
|
|
Validate a markdown file against a JSON schema string and collect detailed errors.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_json: JSON schema as a string
|
|
|
|
Returns:
|
|
ValidationErrorCollector with all validation errors
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid JSON or schema
|
|
"""
|
|
try:
|
|
schema = json.loads(schema_json)
|
|
except json.JSONDecodeError as e:
|
|
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
|
|
|
|
return self.validate_file_with_errors(file_path, schema)
|
|
|
|
def validate_file_with_errors_file(self, file_path: Path, schema_file_path: Path) -> ValidationErrorCollector:
|
|
"""
|
|
Validate a markdown file against a schema file and collect detailed errors.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_file_path: Path to the JSON schema file
|
|
|
|
Returns:
|
|
ValidationErrorCollector with all validation errors
|
|
|
|
Raises:
|
|
FileNotFoundError: If either file doesn't exist
|
|
InvalidSchemaError: If the schema file is invalid
|
|
"""
|
|
if not schema_file_path.exists():
|
|
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
|
|
|
|
try:
|
|
schema_content = schema_file_path.read_text(encoding='utf-8')
|
|
schema = json.loads(schema_content)
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
|
|
|
|
return self.validate_file_with_errors(file_path, schema)
|
|
|
|
def _compare_structures_with_errors(
|
|
self,
|
|
document_schema: Dict[str, Any],
|
|
expected_schema: Dict[str, Any],
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Compare document structure against expected schema and collect detailed errors.
|
|
|
|
This method performs comprehensive validation analysis, collecting specific
|
|
errors about missing headings, incorrect content counts, and structural violations.
|
|
|
|
Args:
|
|
document_schema: Schema generated from the actual document
|
|
expected_schema: Expected schema requirements
|
|
error_collector: Collector to accumulate validation errors
|
|
"""
|
|
try:
|
|
# Extract actual document structure
|
|
doc_properties = document_schema.get('properties', {})
|
|
expected_properties = expected_schema.get('properties', {})
|
|
|
|
# Check all required properties are present
|
|
required_properties = expected_schema.get('required', [])
|
|
for prop in required_properties:
|
|
if prop not in doc_properties:
|
|
error_collector.add_error(
|
|
ValidationErrorType.MISSING_REQUIRED_SECTION,
|
|
f"Missing required section: '{prop}'",
|
|
f"document.{prop}",
|
|
expected=f"Section '{prop}' is required by schema",
|
|
actual="Section not found",
|
|
suggestion=f"Add the '{prop}' section to your document"
|
|
)
|
|
|
|
# Validate heading structure if specified
|
|
if 'headings' in expected_properties and 'headings' in doc_properties:
|
|
self._validate_heading_structure_with_errors(
|
|
doc_properties['headings'],
|
|
expected_properties['headings'],
|
|
error_collector
|
|
)
|
|
|
|
# Validate other structural elements
|
|
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
|
for element in structural_elements:
|
|
if element in expected_properties:
|
|
self._validate_structural_element_with_errors(
|
|
doc_properties.get(element),
|
|
expected_properties[element],
|
|
element,
|
|
error_collector
|
|
)
|
|
|
|
except Exception as e:
|
|
error_collector.add_error(
|
|
ValidationErrorType.STRUCTURAL_VIOLATION,
|
|
f"Error during structure comparison: {e}",
|
|
"document.structure",
|
|
suggestion="Check if both the document and schema are properly formatted"
|
|
)
|
|
|
|
def _validate_heading_structure_with_errors(
|
|
self,
|
|
actual_headings: Dict[str, Any],
|
|
expected_headings: Dict[str, Any],
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate heading structure and collect detailed errors.
|
|
|
|
Args:
|
|
actual_headings: Actual heading structure from document
|
|
expected_headings: Expected heading requirements
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
actual_heading_props = actual_headings.get('properties', {})
|
|
expected_heading_props = expected_headings.get('properties', {})
|
|
required_heading_levels = expected_headings.get('required', [])
|
|
|
|
# Check required heading levels are present
|
|
for level in required_heading_levels:
|
|
if level not in actual_heading_props:
|
|
level_num = level.replace('level_', '')
|
|
error_collector.add_error(
|
|
ValidationErrorType.MISSING_REQUIRED_HEADING,
|
|
f"Missing required heading level {level_num}",
|
|
f"headings.{level}",
|
|
expected=f"At least one heading at level {level_num}",
|
|
actual="No headings found at this level",
|
|
suggestion=f"Add heading(s) at level {level_num} (e.g., {'#' * int(level_num)} Heading)"
|
|
)
|
|
|
|
# Check each expected heading level meets requirements
|
|
for level, expected_spec in expected_heading_props.items():
|
|
if level not in actual_heading_props:
|
|
# If level is not required, skip it
|
|
if level not in required_heading_levels:
|
|
continue
|
|
# Already handled above in required check
|
|
|
|
else:
|
|
actual_spec = actual_heading_props[level]
|
|
level_num = level.replace('level_', '')
|
|
|
|
# Check minimum and maximum item requirements
|
|
self._validate_array_constraints_with_errors(
|
|
actual_spec,
|
|
expected_spec,
|
|
f"headings.{level}",
|
|
f"level {level_num} headings",
|
|
error_collector
|
|
)
|
|
|
|
def _validate_structural_element_with_errors(
|
|
self,
|
|
actual_element: Dict[str, Any],
|
|
expected_element: Dict[str, Any],
|
|
element_name: str,
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate a structural element and collect errors.
|
|
|
|
Args:
|
|
actual_element: Actual element structure from document
|
|
expected_element: Expected element requirements
|
|
element_name: Name of the structural element (for error messages)
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
if actual_element is None:
|
|
error_collector.add_error(
|
|
ValidationErrorType.MISSING_REQUIRED_SECTION,
|
|
f"Missing required structural element: {element_name}",
|
|
f"content.{element_name}",
|
|
expected=f"Document should contain {element_name}",
|
|
actual="Element not found",
|
|
suggestion=f"Add {element_name} to your document"
|
|
)
|
|
return
|
|
|
|
self._validate_array_constraints_with_errors(
|
|
actual_element,
|
|
expected_element,
|
|
f"content.{element_name}",
|
|
element_name,
|
|
error_collector
|
|
)
|
|
|
|
def _validate_array_constraints_with_errors(
|
|
self,
|
|
actual: Dict[str, Any],
|
|
expected: Dict[str, Any],
|
|
path: str,
|
|
element_description: str,
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate array constraints and collect specific errors.
|
|
|
|
Args:
|
|
actual: Actual element specification
|
|
expected: Expected element specification
|
|
path: JSON path for error location
|
|
element_description: Human-readable element description
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
# Get actual count from the schema specification
|
|
actual_min = actual.get('minItems', 0)
|
|
actual_max = actual.get('maxItems', actual_min)
|
|
actual_count = actual_max # In our generated schemas, min=max=actual count
|
|
|
|
# Check against expected constraints
|
|
expected_min = expected.get('minItems', 0)
|
|
expected_max = expected.get('maxItems', float('inf'))
|
|
|
|
# Check minimum constraint
|
|
if actual_count < expected_min:
|
|
error_collector.add_error(
|
|
ValidationErrorType.INSUFFICIENT_CONTENT,
|
|
f"Insufficient {element_description}: found {actual_count}, required at least {expected_min}",
|
|
path,
|
|
expected=f"At least {expected_min} {element_description}",
|
|
actual=f"{actual_count} {element_description}",
|
|
suggestion=f"Add {expected_min - actual_count} more {element_description}"
|
|
)
|
|
|
|
# Check maximum constraint
|
|
if expected_max != float('inf') and actual_count > expected_max:
|
|
error_collector.add_error(
|
|
ValidationErrorType.EXCESS_CONTENT,
|
|
f"Too many {element_description}: found {actual_count}, maximum allowed {expected_max}",
|
|
path,
|
|
expected=f"At most {expected_max} {element_description}",
|
|
actual=f"{actual_count} {element_description}",
|
|
suggestion=f"Remove {actual_count - expected_max} {element_description}"
|
|
)
|
|
|
|
def _has_heading_text_constraints(self, schema: Dict[str, Any]) -> bool:
|
|
"""
|
|
Check if the schema has heading text constraints (enum values on heading content).
|
|
|
|
Args:
|
|
schema: JSON schema to check
|
|
|
|
Returns:
|
|
True if schema has heading text constraints
|
|
"""
|
|
headings_props = schema.get('properties', {}).get('headings', {}).get('properties', {})
|
|
|
|
for level_props in headings_props.values():
|
|
items = level_props.get('items', {})
|
|
content_prop = items.get('properties', {}).get('content', {})
|
|
if 'enum' in content_prop:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _validate_with_heading_text_constraints(
|
|
self,
|
|
file_path: Path,
|
|
expected_schema: Dict[str, Any],
|
|
document_schema: Dict[str, Any]
|
|
) -> bool:
|
|
"""
|
|
Validate document with heading text constraints by comparing actual content against enum values.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
expected_schema: Schema with heading text constraints
|
|
document_schema: Generated schema from the actual document
|
|
|
|
Returns:
|
|
True if document meets all constraints including heading text
|
|
"""
|
|
# First check standard structure compliance
|
|
if not self._compare_structures(document_schema, expected_schema):
|
|
return False
|
|
|
|
# Then check heading text constraints
|
|
expected_headings = expected_schema.get('properties', {}).get('headings', {}).get('properties', {})
|
|
|
|
# Generate document analysis with actual heading content
|
|
from .parser import parse_markdown_to_ast
|
|
content = file_path.read_text(encoding='utf-8')
|
|
ast_tokens = parse_markdown_to_ast(content)
|
|
structure_analysis = self.schema_generator._analyze_ast_structure(ast_tokens, None)
|
|
|
|
for level_key, expected_level_spec in expected_headings.items():
|
|
content_constraints = expected_level_spec.get('items', {}).get('properties', {}).get('content', {})
|
|
|
|
if 'enum' in content_constraints:
|
|
allowed_texts = content_constraints['enum']
|
|
actual_headings = structure_analysis['headings'].get(level_key, [])
|
|
|
|
for heading in actual_headings:
|
|
actual_text = heading['content']
|
|
if actual_text not in allowed_texts:
|
|
return False
|
|
|
|
return True
|
|
|
|
def _validate_heading_text_constraints_with_errors(
|
|
self,
|
|
file_path: Path,
|
|
expected_schema: Dict[str, Any],
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate heading text constraints and collect detailed errors.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
expected_schema: Schema with heading text constraints
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
expected_headings = expected_schema.get('properties', {}).get('headings', {}).get('properties', {})
|
|
|
|
# Generate document analysis with actual heading content
|
|
from .parser import parse_markdown_to_ast
|
|
content = file_path.read_text(encoding='utf-8')
|
|
ast_tokens = parse_markdown_to_ast(content)
|
|
structure_analysis = self.schema_generator._analyze_ast_structure(ast_tokens, None)
|
|
|
|
for level_key, expected_level_spec in expected_headings.items():
|
|
content_constraints = expected_level_spec.get('items', {}).get('properties', {}).get('content', {})
|
|
|
|
if 'enum' in content_constraints:
|
|
allowed_texts = content_constraints['enum']
|
|
actual_headings = structure_analysis['headings'].get(level_key, [])
|
|
|
|
for i, heading in enumerate(actual_headings):
|
|
actual_text = heading['content']
|
|
if actual_text not in allowed_texts:
|
|
# Add detailed error about heading text mismatch
|
|
error_collector.add_error(
|
|
ValidationErrorType.HEADING_COUNT_MISMATCH,
|
|
f"Heading text mismatch at {level_key.replace('_', ' ')} #{i+1}: expected one of {allowed_texts}, found '{actual_text}'",
|
|
f"headings.{level_key}[{i}].content",
|
|
expected=f"One of: {allowed_texts}",
|
|
actual=actual_text,
|
|
suggestion=f"Change heading text to one of the allowed values: {', '.join(allowed_texts)}"
|
|
) |