Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Major Features: - Implement comprehensive validation error reporting system (Issue #8) - Add direct CLI access with 'markitect' command - Create extensive makefile targets for CLI usage - Enhance schema validation with detailed error collection Components Added: - markitect/validation_error.py: ValidationError system with 8 error types - Enhanced markitect/schema_validator.py: Detailed error reporting methods - markitect/cli.py: Enhanced with --detailed-errors and --error-format options - visualize_schema.py: Schema visualization with ASCII and colorful modes - Comprehensive test suite for validation error reporting CLI Enhancements: - Direct 'markitect' command access for all operations - Makefile targets for typical CLI usage (cli-help, cli-ingest, etc.) - Support for text, JSON, and markdown error output formats - Backward compatibility with existing validation functionality Testing: - 11 comprehensive tests for Issue #8 validation error reporting - Tests for schema validation, visualization, and CLI integration - 100% test coverage for validation error scenarios 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
565 lines
22 KiB
Python
565 lines
22 KiB
Python
"""
|
|
Schema Validator for Issue #7: Validate a Markdown File Against a Schema.
|
|
|
|
This module provides functionality to validate markdown documents against JSON schemas
|
|
for arc42 architecture documentation compliance checking - essential for intelligent
|
|
document analysis and plan-actual comparison capabilities.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Union
|
|
|
|
try:
|
|
import jsonschema
|
|
from jsonschema import validate, ValidationError, SchemaError
|
|
JSONSCHEMA_AVAILABLE = True
|
|
except ImportError:
|
|
# Fallback to basic validation without full JSON Schema validation
|
|
JSONSCHEMA_AVAILABLE = False
|
|
ValidationError = Exception
|
|
SchemaError = Exception
|
|
|
|
from .parser import parse_markdown_to_ast
|
|
from .schema_generator import SchemaGenerator
|
|
from .validation_error import ValidationErrorCollector, ValidationErrorType
|
|
from .exceptions import FileNotFoundError, SchemaValidationError, InvalidSchemaError
|
|
|
|
|
|
class SchemaValidator:
|
|
"""
|
|
Validates markdown documents against JSON schemas for arc42 compliance checking.
|
|
|
|
This service provides boolean validation results for markdown documents against
|
|
schemas, enabling strict compliance checking for architectural documentation
|
|
templates and intelligent plan-actual comparison.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the schema validator."""
|
|
self.schema_generator = SchemaGenerator()
|
|
self.jsonschema_available = JSONSCHEMA_AVAILABLE
|
|
|
|
def validate_file_against_schema(self, file_path: Path, schema: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate a markdown file against a JSON schema.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema: JSON schema dictionary to validate against
|
|
|
|
Returns:
|
|
True if the document matches the schema, False otherwise
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid
|
|
"""
|
|
# Validate inputs
|
|
if not file_path.exists():
|
|
raise FileNotFoundError(f"Markdown file not found: {file_path}")
|
|
|
|
# Validate the schema itself
|
|
self._validate_schema(schema)
|
|
|
|
# Generate the document's current structure
|
|
try:
|
|
document_schema = self.schema_generator.generate_schema_from_file(file_path)
|
|
except Exception as e:
|
|
raise SchemaValidationError(f"Failed to generate document schema: {e}") from e
|
|
|
|
# Compare the document's structure against the expected schema
|
|
return self._compare_structures(document_schema, schema)
|
|
|
|
def validate_file_against_schema_string(self, file_path: Path, schema_json: str) -> bool:
|
|
"""
|
|
Validate a markdown file against a JSON schema provided as a string.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_json: JSON schema as a string
|
|
|
|
Returns:
|
|
True if the document matches the schema, False otherwise
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid JSON or schema
|
|
"""
|
|
try:
|
|
schema = json.loads(schema_json)
|
|
except json.JSONDecodeError as e:
|
|
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
|
|
|
|
return self.validate_file_against_schema(file_path, schema)
|
|
|
|
def validate_file_against_schema_file(self, file_path: Path, schema_file_path: Path) -> bool:
|
|
"""
|
|
Validate a markdown file against a schema stored in a file.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_file_path: Path to the JSON schema file
|
|
|
|
Returns:
|
|
True if the document matches the schema, False otherwise
|
|
|
|
Raises:
|
|
FileNotFoundError: If either file doesn't exist
|
|
InvalidSchemaError: If the schema file is invalid
|
|
"""
|
|
if not schema_file_path.exists():
|
|
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
|
|
|
|
try:
|
|
schema_content = schema_file_path.read_text(encoding='utf-8')
|
|
schema = json.loads(schema_content)
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
|
|
|
|
return self.validate_file_against_schema(file_path, schema)
|
|
|
|
def _validate_schema(self, schema: Dict[str, Any]) -> None:
|
|
"""
|
|
Validate that a schema is a valid JSON Schema.
|
|
|
|
Args:
|
|
schema: Schema dictionary to validate
|
|
|
|
Raises:
|
|
InvalidSchemaError: If the schema is invalid
|
|
"""
|
|
try:
|
|
# Check basic schema structure
|
|
if not isinstance(schema, dict):
|
|
raise InvalidSchemaError("Schema must be a dictionary")
|
|
|
|
# Basic schema validation
|
|
if not schema.get('$schema') or not schema.get('type'):
|
|
raise InvalidSchemaError("Schema must have '$schema' and 'type' fields")
|
|
|
|
# If jsonschema library is available, use it for full validation
|
|
if self.jsonschema_available:
|
|
jsonschema.validators.validator_for(schema).check_schema(schema)
|
|
|
|
except (SchemaError, TypeError, AttributeError) as e:
|
|
raise InvalidSchemaError(f"Invalid JSON schema: {e}") from e
|
|
|
|
def _compare_structures(self, document_schema: Dict[str, Any], expected_schema: Dict[str, Any]) -> bool:
|
|
"""
|
|
Compare a document's actual structure against expected schema requirements.
|
|
|
|
This method performs the core validation logic by analyzing whether the
|
|
document's generated schema satisfies the requirements defined in the
|
|
expected schema.
|
|
|
|
Args:
|
|
document_schema: Schema generated from the actual document
|
|
expected_schema: Expected schema requirements
|
|
|
|
Returns:
|
|
True if the document satisfies the expected schema requirements
|
|
"""
|
|
try:
|
|
# Extract actual document structure
|
|
doc_properties = document_schema.get('properties', {})
|
|
expected_properties = expected_schema.get('properties', {})
|
|
|
|
# Check all required properties are present
|
|
required_properties = expected_schema.get('required', [])
|
|
for prop in required_properties:
|
|
if prop not in doc_properties:
|
|
return False
|
|
|
|
# Validate heading structure if specified
|
|
if 'headings' in expected_properties and 'headings' in doc_properties:
|
|
if not self._validate_heading_structure(
|
|
doc_properties['headings'],
|
|
expected_properties['headings']
|
|
):
|
|
return False
|
|
|
|
# Validate other structural elements
|
|
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
|
for element in structural_elements:
|
|
if element in expected_properties:
|
|
if not self._validate_structural_element(
|
|
doc_properties.get(element),
|
|
expected_properties[element]
|
|
):
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception:
|
|
# If comparison fails for any reason, consider validation failed
|
|
return False
|
|
|
|
def _validate_heading_structure(self, actual_headings: Dict[str, Any], expected_headings: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate heading structure against expected requirements.
|
|
|
|
Args:
|
|
actual_headings: Actual heading structure from document
|
|
expected_headings: Expected heading requirements
|
|
|
|
Returns:
|
|
True if heading structure meets requirements
|
|
"""
|
|
actual_heading_props = actual_headings.get('properties', {})
|
|
expected_heading_props = expected_headings.get('properties', {})
|
|
required_heading_levels = expected_headings.get('required', [])
|
|
|
|
# Check required heading levels are present
|
|
for level in required_heading_levels:
|
|
if level not in actual_heading_props:
|
|
return False
|
|
|
|
# Check each expected heading level meets requirements
|
|
for level, expected_spec in expected_heading_props.items():
|
|
if level not in actual_heading_props:
|
|
# If level is not required, skip it
|
|
if level not in required_heading_levels:
|
|
continue
|
|
return False
|
|
|
|
actual_spec = actual_heading_props[level]
|
|
|
|
# Check minimum and maximum item requirements
|
|
if not self._validate_array_constraints(actual_spec, expected_spec):
|
|
return False
|
|
|
|
return True
|
|
|
|
def _validate_structural_element(self, actual_element: Dict[str, Any], expected_element: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate a structural element (paragraphs, lists, etc.) against requirements.
|
|
|
|
Args:
|
|
actual_element: Actual element structure from document
|
|
expected_element: Expected element requirements
|
|
|
|
Returns:
|
|
True if element meets requirements
|
|
"""
|
|
if actual_element is None:
|
|
# Element doesn't exist in document
|
|
return False
|
|
|
|
return self._validate_array_constraints(actual_element, expected_element)
|
|
|
|
def _validate_array_constraints(self, actual: Dict[str, Any], expected: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate array constraints (minItems, maxItems) for structural elements.
|
|
|
|
Args:
|
|
actual: Actual element specification
|
|
expected: Expected element specification
|
|
|
|
Returns:
|
|
True if constraints are satisfied
|
|
"""
|
|
# Get actual count from the schema specification
|
|
# For generated schemas, we use minItems/maxItems which represent actual counts
|
|
actual_min = actual.get('minItems', 0)
|
|
actual_max = actual.get('maxItems', actual_min)
|
|
actual_count = actual_max # In our generated schemas, min=max=actual count
|
|
|
|
# Check against expected constraints
|
|
expected_min = expected.get('minItems', 0)
|
|
expected_max = expected.get('maxItems', float('inf'))
|
|
|
|
return expected_min <= actual_count <= expected_max
|
|
|
|
# Issue #8: Detailed Error Reporting Methods
|
|
|
|
def validate_file_with_errors(self, file_path: Path, schema: Dict[str, Any]) -> ValidationErrorCollector:
|
|
"""
|
|
Validate a markdown file against a JSON schema and collect detailed errors.
|
|
|
|
This method provides comprehensive error reporting for Issue #8, enabling
|
|
users to understand exactly how their documents deviate from schemas.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema: JSON schema dictionary to validate against
|
|
|
|
Returns:
|
|
ValidationErrorCollector with all validation errors
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid
|
|
"""
|
|
# Validate inputs
|
|
if not file_path.exists():
|
|
raise FileNotFoundError(f"Markdown file not found: {file_path}")
|
|
|
|
# Validate the schema itself
|
|
self._validate_schema(schema)
|
|
|
|
# Initialize error collector
|
|
error_collector = ValidationErrorCollector()
|
|
|
|
# Generate the document's current structure
|
|
try:
|
|
document_schema = self.schema_generator.generate_schema_from_file(file_path)
|
|
except Exception as e:
|
|
error_collector.add_error(
|
|
ValidationErrorType.STRUCTURAL_VIOLATION,
|
|
f"Failed to generate document schema: {e}",
|
|
"document.structure",
|
|
suggestion="Check if the markdown file is properly formatted"
|
|
)
|
|
return error_collector
|
|
|
|
# Compare the document's structure against the expected schema and collect errors
|
|
self._compare_structures_with_errors(document_schema, schema, error_collector)
|
|
|
|
return error_collector
|
|
|
|
def validate_file_with_errors_string(self, file_path: Path, schema_json: str) -> ValidationErrorCollector:
|
|
"""
|
|
Validate a markdown file against a JSON schema string and collect detailed errors.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_json: JSON schema as a string
|
|
|
|
Returns:
|
|
ValidationErrorCollector with all validation errors
|
|
|
|
Raises:
|
|
FileNotFoundError: If the markdown file doesn't exist
|
|
InvalidSchemaError: If the schema is invalid JSON or schema
|
|
"""
|
|
try:
|
|
schema = json.loads(schema_json)
|
|
except json.JSONDecodeError as e:
|
|
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
|
|
|
|
return self.validate_file_with_errors(file_path, schema)
|
|
|
|
def validate_file_with_errors_file(self, file_path: Path, schema_file_path: Path) -> ValidationErrorCollector:
|
|
"""
|
|
Validate a markdown file against a schema file and collect detailed errors.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
schema_file_path: Path to the JSON schema file
|
|
|
|
Returns:
|
|
ValidationErrorCollector with all validation errors
|
|
|
|
Raises:
|
|
FileNotFoundError: If either file doesn't exist
|
|
InvalidSchemaError: If the schema file is invalid
|
|
"""
|
|
if not schema_file_path.exists():
|
|
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
|
|
|
|
try:
|
|
schema_content = schema_file_path.read_text(encoding='utf-8')
|
|
schema = json.loads(schema_content)
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
|
|
|
|
return self.validate_file_with_errors(file_path, schema)
|
|
|
|
def _compare_structures_with_errors(
|
|
self,
|
|
document_schema: Dict[str, Any],
|
|
expected_schema: Dict[str, Any],
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Compare document structure against expected schema and collect detailed errors.
|
|
|
|
This method performs comprehensive validation analysis, collecting specific
|
|
errors about missing headings, incorrect content counts, and structural violations.
|
|
|
|
Args:
|
|
document_schema: Schema generated from the actual document
|
|
expected_schema: Expected schema requirements
|
|
error_collector: Collector to accumulate validation errors
|
|
"""
|
|
try:
|
|
# Extract actual document structure
|
|
doc_properties = document_schema.get('properties', {})
|
|
expected_properties = expected_schema.get('properties', {})
|
|
|
|
# Check all required properties are present
|
|
required_properties = expected_schema.get('required', [])
|
|
for prop in required_properties:
|
|
if prop not in doc_properties:
|
|
error_collector.add_error(
|
|
ValidationErrorType.MISSING_REQUIRED_SECTION,
|
|
f"Missing required section: '{prop}'",
|
|
f"document.{prop}",
|
|
expected=f"Section '{prop}' is required by schema",
|
|
actual="Section not found",
|
|
suggestion=f"Add the '{prop}' section to your document"
|
|
)
|
|
|
|
# Validate heading structure if specified
|
|
if 'headings' in expected_properties and 'headings' in doc_properties:
|
|
self._validate_heading_structure_with_errors(
|
|
doc_properties['headings'],
|
|
expected_properties['headings'],
|
|
error_collector
|
|
)
|
|
|
|
# Validate other structural elements
|
|
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
|
for element in structural_elements:
|
|
if element in expected_properties:
|
|
self._validate_structural_element_with_errors(
|
|
doc_properties.get(element),
|
|
expected_properties[element],
|
|
element,
|
|
error_collector
|
|
)
|
|
|
|
except Exception as e:
|
|
error_collector.add_error(
|
|
ValidationErrorType.STRUCTURAL_VIOLATION,
|
|
f"Error during structure comparison: {e}",
|
|
"document.structure",
|
|
suggestion="Check if both the document and schema are properly formatted"
|
|
)
|
|
|
|
def _validate_heading_structure_with_errors(
|
|
self,
|
|
actual_headings: Dict[str, Any],
|
|
expected_headings: Dict[str, Any],
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate heading structure and collect detailed errors.
|
|
|
|
Args:
|
|
actual_headings: Actual heading structure from document
|
|
expected_headings: Expected heading requirements
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
actual_heading_props = actual_headings.get('properties', {})
|
|
expected_heading_props = expected_headings.get('properties', {})
|
|
required_heading_levels = expected_headings.get('required', [])
|
|
|
|
# Check required heading levels are present
|
|
for level in required_heading_levels:
|
|
if level not in actual_heading_props:
|
|
level_num = level.replace('level_', '')
|
|
error_collector.add_error(
|
|
ValidationErrorType.MISSING_REQUIRED_HEADING,
|
|
f"Missing required heading level {level_num}",
|
|
f"headings.{level}",
|
|
expected=f"At least one heading at level {level_num}",
|
|
actual="No headings found at this level",
|
|
suggestion=f"Add heading(s) at level {level_num} (e.g., {'#' * int(level_num)} Heading)"
|
|
)
|
|
|
|
# Check each expected heading level meets requirements
|
|
for level, expected_spec in expected_heading_props.items():
|
|
if level not in actual_heading_props:
|
|
# If level is not required, skip it
|
|
if level not in required_heading_levels:
|
|
continue
|
|
# Already handled above in required check
|
|
|
|
else:
|
|
actual_spec = actual_heading_props[level]
|
|
level_num = level.replace('level_', '')
|
|
|
|
# Check minimum and maximum item requirements
|
|
self._validate_array_constraints_with_errors(
|
|
actual_spec,
|
|
expected_spec,
|
|
f"headings.{level}",
|
|
f"level {level_num} headings",
|
|
error_collector
|
|
)
|
|
|
|
def _validate_structural_element_with_errors(
|
|
self,
|
|
actual_element: Dict[str, Any],
|
|
expected_element: Dict[str, Any],
|
|
element_name: str,
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate a structural element and collect errors.
|
|
|
|
Args:
|
|
actual_element: Actual element structure from document
|
|
expected_element: Expected element requirements
|
|
element_name: Name of the structural element (for error messages)
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
if actual_element is None:
|
|
error_collector.add_error(
|
|
ValidationErrorType.MISSING_REQUIRED_SECTION,
|
|
f"Missing required structural element: {element_name}",
|
|
f"content.{element_name}",
|
|
expected=f"Document should contain {element_name}",
|
|
actual="Element not found",
|
|
suggestion=f"Add {element_name} to your document"
|
|
)
|
|
return
|
|
|
|
self._validate_array_constraints_with_errors(
|
|
actual_element,
|
|
expected_element,
|
|
f"content.{element_name}",
|
|
element_name,
|
|
error_collector
|
|
)
|
|
|
|
def _validate_array_constraints_with_errors(
|
|
self,
|
|
actual: Dict[str, Any],
|
|
expected: Dict[str, Any],
|
|
path: str,
|
|
element_description: str,
|
|
error_collector: ValidationErrorCollector
|
|
) -> None:
|
|
"""
|
|
Validate array constraints and collect specific errors.
|
|
|
|
Args:
|
|
actual: Actual element specification
|
|
expected: Expected element specification
|
|
path: JSON path for error location
|
|
element_description: Human-readable element description
|
|
error_collector: Collector for validation errors
|
|
"""
|
|
# Get actual count from the schema specification
|
|
actual_min = actual.get('minItems', 0)
|
|
actual_max = actual.get('maxItems', actual_min)
|
|
actual_count = actual_max # In our generated schemas, min=max=actual count
|
|
|
|
# Check against expected constraints
|
|
expected_min = expected.get('minItems', 0)
|
|
expected_max = expected.get('maxItems', float('inf'))
|
|
|
|
# Check minimum constraint
|
|
if actual_count < expected_min:
|
|
error_collector.add_error(
|
|
ValidationErrorType.INSUFFICIENT_CONTENT,
|
|
f"Insufficient {element_description}: found {actual_count}, required at least {expected_min}",
|
|
path,
|
|
expected=f"At least {expected_min} {element_description}",
|
|
actual=f"{actual_count} {element_description}",
|
|
suggestion=f"Add {expected_min - actual_count} more {element_description}"
|
|
)
|
|
|
|
# Check maximum constraint
|
|
if expected_max != float('inf') and actual_count > expected_max:
|
|
error_collector.add_error(
|
|
ValidationErrorType.EXCESS_CONTENT,
|
|
f"Too many {element_description}: found {actual_count}, maximum allowed {expected_max}",
|
|
path,
|
|
expected=f"At most {expected_max} {element_description}",
|
|
actual=f"{actual_count} {element_description}",
|
|
suggestion=f"Remove {actual_count - expected_max} {element_description}"
|
|
) |