feat: Complete Issue #8 - Detailed Validation Error Reporting and CLI Enhancements
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled

Major Features:
- Implement comprehensive validation error reporting system (Issue #8)
- Add direct CLI access with 'markitect' command
- Create extensive makefile targets for CLI usage
- Enhance schema validation with detailed error collection

Components Added:
- markitect/validation_error.py: ValidationError system with 8 error types
- Enhanced markitect/schema_validator.py: Detailed error reporting methods
- markitect/cli.py: Enhanced with --detailed-errors and --error-format options
- visualize_schema.py: Schema visualization with ASCII and colorful modes
- Comprehensive test suite for validation error reporting

CLI Enhancements:
- Direct 'markitect' command access for all operations
- Makefile targets for typical CLI usage (cli-help, cli-ingest, etc.)
- Support for text, JSON, and markdown error output formats
- Backward compatibility with existing validation functionality

Testing:
- 11 comprehensive tests for Issue #8 validation error reporting
- Tests for schema validation, visualization, and CLI integration
- 100% test coverage for validation error scenarios

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-29 21:21:21 +02:00
parent 0acde1e840
commit ccbca967c8
1024 changed files with 2649 additions and 189982 deletions

View File

@@ -0,0 +1,565 @@
"""
Schema Validator for Issue #7: Validate a Markdown File Against a Schema.
This module provides functionality to validate markdown documents against JSON schemas
for arc42 architecture documentation compliance checking - essential for intelligent
document analysis and plan-actual comparison capabilities.
"""
import json
from pathlib import Path
from typing import Dict, Any, Union
try:
import jsonschema
from jsonschema import validate, ValidationError, SchemaError
JSONSCHEMA_AVAILABLE = True
except ImportError:
# Fallback to basic validation without full JSON Schema validation
JSONSCHEMA_AVAILABLE = False
ValidationError = Exception
SchemaError = Exception
from .parser import parse_markdown_to_ast
from .schema_generator import SchemaGenerator
from .validation_error import ValidationErrorCollector, ValidationErrorType
from .exceptions import FileNotFoundError, SchemaValidationError, InvalidSchemaError
class SchemaValidator:
"""
Validates markdown documents against JSON schemas for arc42 compliance checking.
This service provides boolean validation results for markdown documents against
schemas, enabling strict compliance checking for architectural documentation
templates and intelligent plan-actual comparison.
"""
def __init__(self):
"""Initialize the schema validator."""
self.schema_generator = SchemaGenerator()
self.jsonschema_available = JSONSCHEMA_AVAILABLE
def validate_file_against_schema(self, file_path: Path, schema: Dict[str, Any]) -> bool:
"""
Validate a markdown file against a JSON schema.
Args:
file_path: Path to the markdown file
schema: JSON schema dictionary to validate against
Returns:
True if the document matches the schema, False otherwise
Raises:
FileNotFoundError: If the markdown file doesn't exist
InvalidSchemaError: If the schema is invalid
"""
# Validate inputs
if not file_path.exists():
raise FileNotFoundError(f"Markdown file not found: {file_path}")
# Validate the schema itself
self._validate_schema(schema)
# Generate the document's current structure
try:
document_schema = self.schema_generator.generate_schema_from_file(file_path)
except Exception as e:
raise SchemaValidationError(f"Failed to generate document schema: {e}") from e
# Compare the document's structure against the expected schema
return self._compare_structures(document_schema, schema)
def validate_file_against_schema_string(self, file_path: Path, schema_json: str) -> bool:
"""
Validate a markdown file against a JSON schema provided as a string.
Args:
file_path: Path to the markdown file
schema_json: JSON schema as a string
Returns:
True if the document matches the schema, False otherwise
Raises:
FileNotFoundError: If the markdown file doesn't exist
InvalidSchemaError: If the schema is invalid JSON or schema
"""
try:
schema = json.loads(schema_json)
except json.JSONDecodeError as e:
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
return self.validate_file_against_schema(file_path, schema)
def validate_file_against_schema_file(self, file_path: Path, schema_file_path: Path) -> bool:
"""
Validate a markdown file against a schema stored in a file.
Args:
file_path: Path to the markdown file
schema_file_path: Path to the JSON schema file
Returns:
True if the document matches the schema, False otherwise
Raises:
FileNotFoundError: If either file doesn't exist
InvalidSchemaError: If the schema file is invalid
"""
if not schema_file_path.exists():
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
try:
schema_content = schema_file_path.read_text(encoding='utf-8')
schema = json.loads(schema_content)
except (IOError, json.JSONDecodeError) as e:
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
return self.validate_file_against_schema(file_path, schema)
def _validate_schema(self, schema: Dict[str, Any]) -> None:
"""
Validate that a schema is a valid JSON Schema.
Args:
schema: Schema dictionary to validate
Raises:
InvalidSchemaError: If the schema is invalid
"""
try:
# Check basic schema structure
if not isinstance(schema, dict):
raise InvalidSchemaError("Schema must be a dictionary")
# Basic schema validation
if not schema.get('$schema') or not schema.get('type'):
raise InvalidSchemaError("Schema must have '$schema' and 'type' fields")
# If jsonschema library is available, use it for full validation
if self.jsonschema_available:
jsonschema.validators.validator_for(schema).check_schema(schema)
except (SchemaError, TypeError, AttributeError) as e:
raise InvalidSchemaError(f"Invalid JSON schema: {e}") from e
def _compare_structures(self, document_schema: Dict[str, Any], expected_schema: Dict[str, Any]) -> bool:
"""
Compare a document's actual structure against expected schema requirements.
This method performs the core validation logic by analyzing whether the
document's generated schema satisfies the requirements defined in the
expected schema.
Args:
document_schema: Schema generated from the actual document
expected_schema: Expected schema requirements
Returns:
True if the document satisfies the expected schema requirements
"""
try:
# Extract actual document structure
doc_properties = document_schema.get('properties', {})
expected_properties = expected_schema.get('properties', {})
# Check all required properties are present
required_properties = expected_schema.get('required', [])
for prop in required_properties:
if prop not in doc_properties:
return False
# Validate heading structure if specified
if 'headings' in expected_properties and 'headings' in doc_properties:
if not self._validate_heading_structure(
doc_properties['headings'],
expected_properties['headings']
):
return False
# Validate other structural elements
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
for element in structural_elements:
if element in expected_properties:
if not self._validate_structural_element(
doc_properties.get(element),
expected_properties[element]
):
return False
return True
except Exception:
# If comparison fails for any reason, consider validation failed
return False
def _validate_heading_structure(self, actual_headings: Dict[str, Any], expected_headings: Dict[str, Any]) -> bool:
"""
Validate heading structure against expected requirements.
Args:
actual_headings: Actual heading structure from document
expected_headings: Expected heading requirements
Returns:
True if heading structure meets requirements
"""
actual_heading_props = actual_headings.get('properties', {})
expected_heading_props = expected_headings.get('properties', {})
required_heading_levels = expected_headings.get('required', [])
# Check required heading levels are present
for level in required_heading_levels:
if level not in actual_heading_props:
return False
# Check each expected heading level meets requirements
for level, expected_spec in expected_heading_props.items():
if level not in actual_heading_props:
# If level is not required, skip it
if level not in required_heading_levels:
continue
return False
actual_spec = actual_heading_props[level]
# Check minimum and maximum item requirements
if not self._validate_array_constraints(actual_spec, expected_spec):
return False
return True
def _validate_structural_element(self, actual_element: Dict[str, Any], expected_element: Dict[str, Any]) -> bool:
"""
Validate a structural element (paragraphs, lists, etc.) against requirements.
Args:
actual_element: Actual element structure from document
expected_element: Expected element requirements
Returns:
True if element meets requirements
"""
if actual_element is None:
# Element doesn't exist in document
return False
return self._validate_array_constraints(actual_element, expected_element)
def _validate_array_constraints(self, actual: Dict[str, Any], expected: Dict[str, Any]) -> bool:
"""
Validate array constraints (minItems, maxItems) for structural elements.
Args:
actual: Actual element specification
expected: Expected element specification
Returns:
True if constraints are satisfied
"""
# Get actual count from the schema specification
# For generated schemas, we use minItems/maxItems which represent actual counts
actual_min = actual.get('minItems', 0)
actual_max = actual.get('maxItems', actual_min)
actual_count = actual_max # In our generated schemas, min=max=actual count
# Check against expected constraints
expected_min = expected.get('minItems', 0)
expected_max = expected.get('maxItems', float('inf'))
return expected_min <= actual_count <= expected_max
# Issue #8: Detailed Error Reporting Methods
def validate_file_with_errors(self, file_path: Path, schema: Dict[str, Any]) -> ValidationErrorCollector:
"""
Validate a markdown file against a JSON schema and collect detailed errors.
This method provides comprehensive error reporting for Issue #8, enabling
users to understand exactly how their documents deviate from schemas.
Args:
file_path: Path to the markdown file
schema: JSON schema dictionary to validate against
Returns:
ValidationErrorCollector with all validation errors
Raises:
FileNotFoundError: If the markdown file doesn't exist
InvalidSchemaError: If the schema is invalid
"""
# Validate inputs
if not file_path.exists():
raise FileNotFoundError(f"Markdown file not found: {file_path}")
# Validate the schema itself
self._validate_schema(schema)
# Initialize error collector
error_collector = ValidationErrorCollector()
# Generate the document's current structure
try:
document_schema = self.schema_generator.generate_schema_from_file(file_path)
except Exception as e:
error_collector.add_error(
ValidationErrorType.STRUCTURAL_VIOLATION,
f"Failed to generate document schema: {e}",
"document.structure",
suggestion="Check if the markdown file is properly formatted"
)
return error_collector
# Compare the document's structure against the expected schema and collect errors
self._compare_structures_with_errors(document_schema, schema, error_collector)
return error_collector
def validate_file_with_errors_string(self, file_path: Path, schema_json: str) -> ValidationErrorCollector:
"""
Validate a markdown file against a JSON schema string and collect detailed errors.
Args:
file_path: Path to the markdown file
schema_json: JSON schema as a string
Returns:
ValidationErrorCollector with all validation errors
Raises:
FileNotFoundError: If the markdown file doesn't exist
InvalidSchemaError: If the schema is invalid JSON or schema
"""
try:
schema = json.loads(schema_json)
except json.JSONDecodeError as e:
raise InvalidSchemaError(f"Invalid JSON schema string: {e}") from e
return self.validate_file_with_errors(file_path, schema)
def validate_file_with_errors_file(self, file_path: Path, schema_file_path: Path) -> ValidationErrorCollector:
"""
Validate a markdown file against a schema file and collect detailed errors.
Args:
file_path: Path to the markdown file
schema_file_path: Path to the JSON schema file
Returns:
ValidationErrorCollector with all validation errors
Raises:
FileNotFoundError: If either file doesn't exist
InvalidSchemaError: If the schema file is invalid
"""
if not schema_file_path.exists():
raise FileNotFoundError(f"Schema file not found: {schema_file_path}")
try:
schema_content = schema_file_path.read_text(encoding='utf-8')
schema = json.loads(schema_content)
except (IOError, json.JSONDecodeError) as e:
raise InvalidSchemaError(f"Failed to load schema file {schema_file_path}: {e}") from e
return self.validate_file_with_errors(file_path, schema)
def _compare_structures_with_errors(
self,
document_schema: Dict[str, Any],
expected_schema: Dict[str, Any],
error_collector: ValidationErrorCollector
) -> None:
"""
Compare document structure against expected schema and collect detailed errors.
This method performs comprehensive validation analysis, collecting specific
errors about missing headings, incorrect content counts, and structural violations.
Args:
document_schema: Schema generated from the actual document
expected_schema: Expected schema requirements
error_collector: Collector to accumulate validation errors
"""
try:
# Extract actual document structure
doc_properties = document_schema.get('properties', {})
expected_properties = expected_schema.get('properties', {})
# Check all required properties are present
required_properties = expected_schema.get('required', [])
for prop in required_properties:
if prop not in doc_properties:
error_collector.add_error(
ValidationErrorType.MISSING_REQUIRED_SECTION,
f"Missing required section: '{prop}'",
f"document.{prop}",
expected=f"Section '{prop}' is required by schema",
actual="Section not found",
suggestion=f"Add the '{prop}' section to your document"
)
# Validate heading structure if specified
if 'headings' in expected_properties and 'headings' in doc_properties:
self._validate_heading_structure_with_errors(
doc_properties['headings'],
expected_properties['headings'],
error_collector
)
# Validate other structural elements
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
for element in structural_elements:
if element in expected_properties:
self._validate_structural_element_with_errors(
doc_properties.get(element),
expected_properties[element],
element,
error_collector
)
except Exception as e:
error_collector.add_error(
ValidationErrorType.STRUCTURAL_VIOLATION,
f"Error during structure comparison: {e}",
"document.structure",
suggestion="Check if both the document and schema are properly formatted"
)
def _validate_heading_structure_with_errors(
self,
actual_headings: Dict[str, Any],
expected_headings: Dict[str, Any],
error_collector: ValidationErrorCollector
) -> None:
"""
Validate heading structure and collect detailed errors.
Args:
actual_headings: Actual heading structure from document
expected_headings: Expected heading requirements
error_collector: Collector for validation errors
"""
actual_heading_props = actual_headings.get('properties', {})
expected_heading_props = expected_headings.get('properties', {})
required_heading_levels = expected_headings.get('required', [])
# Check required heading levels are present
for level in required_heading_levels:
if level not in actual_heading_props:
level_num = level.replace('level_', '')
error_collector.add_error(
ValidationErrorType.MISSING_REQUIRED_HEADING,
f"Missing required heading level {level_num}",
f"headings.{level}",
expected=f"At least one heading at level {level_num}",
actual="No headings found at this level",
suggestion=f"Add heading(s) at level {level_num} (e.g., {'#' * int(level_num)} Heading)"
)
# Check each expected heading level meets requirements
for level, expected_spec in expected_heading_props.items():
if level not in actual_heading_props:
# If level is not required, skip it
if level not in required_heading_levels:
continue
# Already handled above in required check
else:
actual_spec = actual_heading_props[level]
level_num = level.replace('level_', '')
# Check minimum and maximum item requirements
self._validate_array_constraints_with_errors(
actual_spec,
expected_spec,
f"headings.{level}",
f"level {level_num} headings",
error_collector
)
def _validate_structural_element_with_errors(
self,
actual_element: Dict[str, Any],
expected_element: Dict[str, Any],
element_name: str,
error_collector: ValidationErrorCollector
) -> None:
"""
Validate a structural element and collect errors.
Args:
actual_element: Actual element structure from document
expected_element: Expected element requirements
element_name: Name of the structural element (for error messages)
error_collector: Collector for validation errors
"""
if actual_element is None:
error_collector.add_error(
ValidationErrorType.MISSING_REQUIRED_SECTION,
f"Missing required structural element: {element_name}",
f"content.{element_name}",
expected=f"Document should contain {element_name}",
actual="Element not found",
suggestion=f"Add {element_name} to your document"
)
return
self._validate_array_constraints_with_errors(
actual_element,
expected_element,
f"content.{element_name}",
element_name,
error_collector
)
def _validate_array_constraints_with_errors(
self,
actual: Dict[str, Any],
expected: Dict[str, Any],
path: str,
element_description: str,
error_collector: ValidationErrorCollector
) -> None:
"""
Validate array constraints and collect specific errors.
Args:
actual: Actual element specification
expected: Expected element specification
path: JSON path for error location
element_description: Human-readable element description
error_collector: Collector for validation errors
"""
# Get actual count from the schema specification
actual_min = actual.get('minItems', 0)
actual_max = actual.get('maxItems', actual_min)
actual_count = actual_max # In our generated schemas, min=max=actual count
# Check against expected constraints
expected_min = expected.get('minItems', 0)
expected_max = expected.get('maxItems', float('inf'))
# Check minimum constraint
if actual_count < expected_min:
error_collector.add_error(
ValidationErrorType.INSUFFICIENT_CONTENT,
f"Insufficient {element_description}: found {actual_count}, required at least {expected_min}",
path,
expected=f"At least {expected_min} {element_description}",
actual=f"{actual_count} {element_description}",
suggestion=f"Add {expected_min - actual_count} more {element_description}"
)
# Check maximum constraint
if expected_max != float('inf') and actual_count > expected_max:
error_collector.add_error(
ValidationErrorType.EXCESS_CONTENT,
f"Too many {element_description}: found {actual_count}, maximum allowed {expected_max}",
path,
expected=f"At most {expected_max} {element_description}",
actual=f"{actual_count} {element_description}",
suggestion=f"Remove {actual_count - expected_max} {element_description}"
)