feat: Complete Issue #8 - Detailed Validation Error Reporting and CLI Enhancements
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Major Features: - Implement comprehensive validation error reporting system (Issue #8) - Add direct CLI access with 'markitect' command - Create extensive makefile targets for CLI usage - Enhance schema validation with detailed error collection Components Added: - markitect/validation_error.py: ValidationError system with 8 error types - Enhanced markitect/schema_validator.py: Detailed error reporting methods - markitect/cli.py: Enhanced with --detailed-errors and --error-format options - visualize_schema.py: Schema visualization with ASCII and colorful modes - Comprehensive test suite for validation error reporting CLI Enhancements: - Direct 'markitect' command access for all operations - Makefile targets for typical CLI usage (cli-help, cli-ingest, etc.) - Support for text, JSON, and markdown error output formats - Backward compatibility with existing validation functionality Testing: - 11 comprehensive tests for Issue #8 validation error reporting - Tests for schema validation, visualization, and CLI integration - 100% test coverage for validation error scenarios 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
565
markitect/schema_validator.py
Normal file
565
markitect/schema_validator.py
Normal file
@@ -0,0 +1,565 @@
|
||||
"""
|
||||
Schema Validator for Issue #7: Validate a Markdown File Against a Schema.
|
||||
|
||||
This module provides functionality to validate markdown documents against JSON schemas
|
||||
for arc42 architecture documentation compliance checking - essential for intelligent
|
||||
document analysis and plan-actual comparison capabilities.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Union
|
||||
|
||||
try:
|
||||
import jsonschema
|
||||
from jsonschema import validate, ValidationError, SchemaError
|
||||
JSONSCHEMA_AVAILABLE = True
|
||||
except ImportError:
|
||||
# Fallback to basic validation without full JSON Schema validation
|
||||
JSONSCHEMA_AVAILABLE = False
|
||||
ValidationError = Exception
|
||||
SchemaError = Exception
|
||||
|
||||
from .parser import parse_markdown_to_ast
|
||||
from .schema_generator import SchemaGenerator
|
||||
from .validation_error import ValidationErrorCollector, ValidationErrorType
|
||||
from .exceptions import FileNotFoundError, SchemaValidationError, InvalidSchemaError
|
||||
|
||||
|
||||
class SchemaValidator:
|
||||
"""
|
||||
Validates markdown documents against JSON schemas for arc42 compliance checking.
|
||||
|
||||
This service provides boolean validation results for markdown documents against
|
||||
schemas, enabling strict compliance checking for architectural documentation
|
||||
templates and intelligent plan-actual comparison.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the schema validator."""
|
||||
self.schema_generator = SchemaGenerator()
|
||||
self.jsonschema_available = JSONSCHEMA_AVAILABLE
|
||||
|
||||
def validate_file_against_schema(self, file_path: Path, schema: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validate a markdown file against a JSON schema.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
schema: JSON schema dictionary to validate against
|
||||
|
||||
Returns:
|
||||
True if the document matches the schema, False otherwise
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the markdown file doesn't exist
|
||||
InvalidSchemaError: If the schema is invalid
|
||||
"""
|
||||
# Validate inputs
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"Markdown file not found: {file_path}")
|
||||
|
||||
# Validate the schema itself
|
||||
self._validate_schema(schema)
|
||||
|
||||
# Generate the document's current structure
|
||||
try:
|
||||
document_schema = self.schema_generator.generate_schema_from_file(file_path)
|
||||
except Exception as e:
|
||||
raise SchemaValidationError(f"Failed to generate document schema: {e}") from e
|
||||
|
||||
# Compare the document's structure against the expected schema
|
||||
return self._compare_structures(document_schema, schema)
|
||||
|
||||
def validate_file_against_schema_string(self, file_path: Path, schema_json: str) -> bool:
|
||||
"""
|
||||
Validate a markdown file against a JSON schema provided as a string.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
schema_json: JSON schema as a string
|
||||
|
||||
Returns:
|
||||
True if the document matches the schema, False otherwise
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the markdown file doesn't exist
|
||||
InvalidSchemaError: If the schema is invalid JSON or schema
|
||||
"""
|
||||
try:
|
||||
schema = json.loads(schema_json)
|
||||
except json.JSONDecodeError as e:
|
||||
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
|
||||
|
||||
return self.validate_file_against_schema(file_path, schema)
|
||||
|
||||
def validate_file_against_schema_file(self, file_path: Path, schema_file_path: Path) -> bool:
|
||||
"""
|
||||
Validate a markdown file against a schema stored in a file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
schema_file_path: Path to the JSON schema file
|
||||
|
||||
Returns:
|
||||
True if the document matches the schema, False otherwise
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If either file doesn't exist
|
||||
InvalidSchemaError: If the schema file is invalid
|
||||
"""
|
||||
if not schema_file_path.exists():
|
||||
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
|
||||
|
||||
try:
|
||||
schema_content = schema_file_path.read_text(encoding='utf-8')
|
||||
schema = json.loads(schema_content)
|
||||
except (IOError, json.JSONDecodeError) as e:
|
||||
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
|
||||
|
||||
return self.validate_file_against_schema(file_path, schema)
|
||||
|
||||
def _validate_schema(self, schema: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Validate that a schema is a valid JSON Schema.
|
||||
|
||||
Args:
|
||||
schema: Schema dictionary to validate
|
||||
|
||||
Raises:
|
||||
InvalidSchemaError: If the schema is invalid
|
||||
"""
|
||||
try:
|
||||
# Check basic schema structure
|
||||
if not isinstance(schema, dict):
|
||||
raise InvalidSchemaError("Schema must be a dictionary")
|
||||
|
||||
# Basic schema validation
|
||||
if not schema.get('$schema') or not schema.get('type'):
|
||||
raise InvalidSchemaError("Schema must have '$schema' and 'type' fields")
|
||||
|
||||
# If jsonschema library is available, use it for full validation
|
||||
if self.jsonschema_available:
|
||||
jsonschema.validators.validator_for(schema).check_schema(schema)
|
||||
|
||||
except (SchemaError, TypeError, AttributeError) as e:
|
||||
raise InvalidSchemaError(f"Invalid JSON schema: {e}") from e
|
||||
|
||||
def _compare_structures(self, document_schema: Dict[str, Any], expected_schema: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Compare a document's actual structure against expected schema requirements.
|
||||
|
||||
This method performs the core validation logic by analyzing whether the
|
||||
document's generated schema satisfies the requirements defined in the
|
||||
expected schema.
|
||||
|
||||
Args:
|
||||
document_schema: Schema generated from the actual document
|
||||
expected_schema: Expected schema requirements
|
||||
|
||||
Returns:
|
||||
True if the document satisfies the expected schema requirements
|
||||
"""
|
||||
try:
|
||||
# Extract actual document structure
|
||||
doc_properties = document_schema.get('properties', {})
|
||||
expected_properties = expected_schema.get('properties', {})
|
||||
|
||||
# Check all required properties are present
|
||||
required_properties = expected_schema.get('required', [])
|
||||
for prop in required_properties:
|
||||
if prop not in doc_properties:
|
||||
return False
|
||||
|
||||
# Validate heading structure if specified
|
||||
if 'headings' in expected_properties and 'headings' in doc_properties:
|
||||
if not self._validate_heading_structure(
|
||||
doc_properties['headings'],
|
||||
expected_properties['headings']
|
||||
):
|
||||
return False
|
||||
|
||||
# Validate other structural elements
|
||||
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
||||
for element in structural_elements:
|
||||
if element in expected_properties:
|
||||
if not self._validate_structural_element(
|
||||
doc_properties.get(element),
|
||||
expected_properties[element]
|
||||
):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
# If comparison fails for any reason, consider validation failed
|
||||
return False
|
||||
|
||||
def _validate_heading_structure(self, actual_headings: Dict[str, Any], expected_headings: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validate heading structure against expected requirements.
|
||||
|
||||
Args:
|
||||
actual_headings: Actual heading structure from document
|
||||
expected_headings: Expected heading requirements
|
||||
|
||||
Returns:
|
||||
True if heading structure meets requirements
|
||||
"""
|
||||
actual_heading_props = actual_headings.get('properties', {})
|
||||
expected_heading_props = expected_headings.get('properties', {})
|
||||
required_heading_levels = expected_headings.get('required', [])
|
||||
|
||||
# Check required heading levels are present
|
||||
for level in required_heading_levels:
|
||||
if level not in actual_heading_props:
|
||||
return False
|
||||
|
||||
# Check each expected heading level meets requirements
|
||||
for level, expected_spec in expected_heading_props.items():
|
||||
if level not in actual_heading_props:
|
||||
# If level is not required, skip it
|
||||
if level not in required_heading_levels:
|
||||
continue
|
||||
return False
|
||||
|
||||
actual_spec = actual_heading_props[level]
|
||||
|
||||
# Check minimum and maximum item requirements
|
||||
if not self._validate_array_constraints(actual_spec, expected_spec):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _validate_structural_element(self, actual_element: Dict[str, Any], expected_element: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validate a structural element (paragraphs, lists, etc.) against requirements.
|
||||
|
||||
Args:
|
||||
actual_element: Actual element structure from document
|
||||
expected_element: Expected element requirements
|
||||
|
||||
Returns:
|
||||
True if element meets requirements
|
||||
"""
|
||||
if actual_element is None:
|
||||
# Element doesn't exist in document
|
||||
return False
|
||||
|
||||
return self._validate_array_constraints(actual_element, expected_element)
|
||||
|
||||
def _validate_array_constraints(self, actual: Dict[str, Any], expected: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validate array constraints (minItems, maxItems) for structural elements.
|
||||
|
||||
Args:
|
||||
actual: Actual element specification
|
||||
expected: Expected element specification
|
||||
|
||||
Returns:
|
||||
True if constraints are satisfied
|
||||
"""
|
||||
# Get actual count from the schema specification
|
||||
# For generated schemas, we use minItems/maxItems which represent actual counts
|
||||
actual_min = actual.get('minItems', 0)
|
||||
actual_max = actual.get('maxItems', actual_min)
|
||||
actual_count = actual_max # In our generated schemas, min=max=actual count
|
||||
|
||||
# Check against expected constraints
|
||||
expected_min = expected.get('minItems', 0)
|
||||
expected_max = expected.get('maxItems', float('inf'))
|
||||
|
||||
return expected_min <= actual_count <= expected_max
|
||||
|
||||
# Issue #8: Detailed Error Reporting Methods
|
||||
|
||||
def validate_file_with_errors(self, file_path: Path, schema: Dict[str, Any]) -> ValidationErrorCollector:
|
||||
"""
|
||||
Validate a markdown file against a JSON schema and collect detailed errors.
|
||||
|
||||
This method provides comprehensive error reporting for Issue #8, enabling
|
||||
users to understand exactly how their documents deviate from schemas.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
schema: JSON schema dictionary to validate against
|
||||
|
||||
Returns:
|
||||
ValidationErrorCollector with all validation errors
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the markdown file doesn't exist
|
||||
InvalidSchemaError: If the schema is invalid
|
||||
"""
|
||||
# Validate inputs
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"Markdown file not found: {file_path}")
|
||||
|
||||
# Validate the schema itself
|
||||
self._validate_schema(schema)
|
||||
|
||||
# Initialize error collector
|
||||
error_collector = ValidationErrorCollector()
|
||||
|
||||
# Generate the document's current structure
|
||||
try:
|
||||
document_schema = self.schema_generator.generate_schema_from_file(file_path)
|
||||
except Exception as e:
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.STRUCTURAL_VIOLATION,
|
||||
f"Failed to generate document schema: {e}",
|
||||
"document.structure",
|
||||
suggestion="Check if the markdown file is properly formatted"
|
||||
)
|
||||
return error_collector
|
||||
|
||||
# Compare the document's structure against the expected schema and collect errors
|
||||
self._compare_structures_with_errors(document_schema, schema, error_collector)
|
||||
|
||||
return error_collector
|
||||
|
||||
def validate_file_with_errors_string(self, file_path: Path, schema_json: str) -> ValidationErrorCollector:
|
||||
"""
|
||||
Validate a markdown file against a JSON schema string and collect detailed errors.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
schema_json: JSON schema as a string
|
||||
|
||||
Returns:
|
||||
ValidationErrorCollector with all validation errors
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the markdown file doesn't exist
|
||||
InvalidSchemaError: If the schema is invalid JSON or schema
|
||||
"""
|
||||
try:
|
||||
schema = json.loads(schema_json)
|
||||
except json.JSONDecodeError as e:
|
||||
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
|
||||
|
||||
return self.validate_file_with_errors(file_path, schema)
|
||||
|
||||
def validate_file_with_errors_file(self, file_path: Path, schema_file_path: Path) -> ValidationErrorCollector:
|
||||
"""
|
||||
Validate a markdown file against a schema file and collect detailed errors.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
schema_file_path: Path to the JSON schema file
|
||||
|
||||
Returns:
|
||||
ValidationErrorCollector with all validation errors
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If either file doesn't exist
|
||||
InvalidSchemaError: If the schema file is invalid
|
||||
"""
|
||||
if not schema_file_path.exists():
|
||||
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
|
||||
|
||||
try:
|
||||
schema_content = schema_file_path.read_text(encoding='utf-8')
|
||||
schema = json.loads(schema_content)
|
||||
except (IOError, json.JSONDecodeError) as e:
|
||||
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
|
||||
|
||||
return self.validate_file_with_errors(file_path, schema)
|
||||
|
||||
def _compare_structures_with_errors(
|
||||
self,
|
||||
document_schema: Dict[str, Any],
|
||||
expected_schema: Dict[str, Any],
|
||||
error_collector: ValidationErrorCollector
|
||||
) -> None:
|
||||
"""
|
||||
Compare document structure against expected schema and collect detailed errors.
|
||||
|
||||
This method performs comprehensive validation analysis, collecting specific
|
||||
errors about missing headings, incorrect content counts, and structural violations.
|
||||
|
||||
Args:
|
||||
document_schema: Schema generated from the actual document
|
||||
expected_schema: Expected schema requirements
|
||||
error_collector: Collector to accumulate validation errors
|
||||
"""
|
||||
try:
|
||||
# Extract actual document structure
|
||||
doc_properties = document_schema.get('properties', {})
|
||||
expected_properties = expected_schema.get('properties', {})
|
||||
|
||||
# Check all required properties are present
|
||||
required_properties = expected_schema.get('required', [])
|
||||
for prop in required_properties:
|
||||
if prop not in doc_properties:
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.MISSING_REQUIRED_SECTION,
|
||||
f"Missing required section: '{prop}'",
|
||||
f"document.{prop}",
|
||||
expected=f"Section '{prop}' is required by schema",
|
||||
actual="Section not found",
|
||||
suggestion=f"Add the '{prop}' section to your document"
|
||||
)
|
||||
|
||||
# Validate heading structure if specified
|
||||
if 'headings' in expected_properties and 'headings' in doc_properties:
|
||||
self._validate_heading_structure_with_errors(
|
||||
doc_properties['headings'],
|
||||
expected_properties['headings'],
|
||||
error_collector
|
||||
)
|
||||
|
||||
# Validate other structural elements
|
||||
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
||||
for element in structural_elements:
|
||||
if element in expected_properties:
|
||||
self._validate_structural_element_with_errors(
|
||||
doc_properties.get(element),
|
||||
expected_properties[element],
|
||||
element,
|
||||
error_collector
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.STRUCTURAL_VIOLATION,
|
||||
f"Error during structure comparison: {e}",
|
||||
"document.structure",
|
||||
suggestion="Check if both the document and schema are properly formatted"
|
||||
)
|
||||
|
||||
def _validate_heading_structure_with_errors(
|
||||
self,
|
||||
actual_headings: Dict[str, Any],
|
||||
expected_headings: Dict[str, Any],
|
||||
error_collector: ValidationErrorCollector
|
||||
) -> None:
|
||||
"""
|
||||
Validate heading structure and collect detailed errors.
|
||||
|
||||
Args:
|
||||
actual_headings: Actual heading structure from document
|
||||
expected_headings: Expected heading requirements
|
||||
error_collector: Collector for validation errors
|
||||
"""
|
||||
actual_heading_props = actual_headings.get('properties', {})
|
||||
expected_heading_props = expected_headings.get('properties', {})
|
||||
required_heading_levels = expected_headings.get('required', [])
|
||||
|
||||
# Check required heading levels are present
|
||||
for level in required_heading_levels:
|
||||
if level not in actual_heading_props:
|
||||
level_num = level.replace('level_', '')
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.MISSING_REQUIRED_HEADING,
|
||||
f"Missing required heading level {level_num}",
|
||||
f"headings.{level}",
|
||||
expected=f"At least one heading at level {level_num}",
|
||||
actual="No headings found at this level",
|
||||
suggestion=f"Add heading(s) at level {level_num} (e.g., {'#' * int(level_num)} Heading)"
|
||||
)
|
||||
|
||||
# Check each expected heading level meets requirements
|
||||
for level, expected_spec in expected_heading_props.items():
|
||||
if level not in actual_heading_props:
|
||||
# If level is not required, skip it
|
||||
if level not in required_heading_levels:
|
||||
continue
|
||||
# Already handled above in required check
|
||||
|
||||
else:
|
||||
actual_spec = actual_heading_props[level]
|
||||
level_num = level.replace('level_', '')
|
||||
|
||||
# Check minimum and maximum item requirements
|
||||
self._validate_array_constraints_with_errors(
|
||||
actual_spec,
|
||||
expected_spec,
|
||||
f"headings.{level}",
|
||||
f"level {level_num} headings",
|
||||
error_collector
|
||||
)
|
||||
|
||||
def _validate_structural_element_with_errors(
|
||||
self,
|
||||
actual_element: Dict[str, Any],
|
||||
expected_element: Dict[str, Any],
|
||||
element_name: str,
|
||||
error_collector: ValidationErrorCollector
|
||||
) -> None:
|
||||
"""
|
||||
Validate a structural element and collect errors.
|
||||
|
||||
Args:
|
||||
actual_element: Actual element structure from document
|
||||
expected_element: Expected element requirements
|
||||
element_name: Name of the structural element (for error messages)
|
||||
error_collector: Collector for validation errors
|
||||
"""
|
||||
if actual_element is None:
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.MISSING_REQUIRED_SECTION,
|
||||
f"Missing required structural element: {element_name}",
|
||||
f"content.{element_name}",
|
||||
expected=f"Document should contain {element_name}",
|
||||
actual="Element not found",
|
||||
suggestion=f"Add {element_name} to your document"
|
||||
)
|
||||
return
|
||||
|
||||
self._validate_array_constraints_with_errors(
|
||||
actual_element,
|
||||
expected_element,
|
||||
f"content.{element_name}",
|
||||
element_name,
|
||||
error_collector
|
||||
)
|
||||
|
||||
def _validate_array_constraints_with_errors(
|
||||
self,
|
||||
actual: Dict[str, Any],
|
||||
expected: Dict[str, Any],
|
||||
path: str,
|
||||
element_description: str,
|
||||
error_collector: ValidationErrorCollector
|
||||
) -> None:
|
||||
"""
|
||||
Validate array constraints and collect specific errors.
|
||||
|
||||
Args:
|
||||
actual: Actual element specification
|
||||
expected: Expected element specification
|
||||
path: JSON path for error location
|
||||
element_description: Human-readable element description
|
||||
error_collector: Collector for validation errors
|
||||
"""
|
||||
# Get actual count from the schema specification
|
||||
actual_min = actual.get('minItems', 0)
|
||||
actual_max = actual.get('maxItems', actual_min)
|
||||
actual_count = actual_max # In our generated schemas, min=max=actual count
|
||||
|
||||
# Check against expected constraints
|
||||
expected_min = expected.get('minItems', 0)
|
||||
expected_max = expected.get('maxItems', float('inf'))
|
||||
|
||||
# Check minimum constraint
|
||||
if actual_count < expected_min:
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.INSUFFICIENT_CONTENT,
|
||||
f"Insufficient {element_description}: found {actual_count}, required at least {expected_min}",
|
||||
path,
|
||||
expected=f"At least {expected_min} {element_description}",
|
||||
actual=f"{actual_count} {element_description}",
|
||||
suggestion=f"Add {expected_min - actual_count} more {element_description}"
|
||||
)
|
||||
|
||||
# Check maximum constraint
|
||||
if expected_max != float('inf') and actual_count > expected_max:
|
||||
error_collector.add_error(
|
||||
ValidationErrorType.EXCESS_CONTENT,
|
||||
f"Too many {element_description}: found {actual_count}, maximum allowed {expected_max}",
|
||||
path,
|
||||
expected=f"At most {expected_max} {element_description}",
|
||||
actual=f"{actual_count} {element_description}",
|
||||
suggestion=f"Remove {actual_count - expected_max} {element_description}"
|
||||
)
|
||||
Reference in New Issue
Block a user