diff --git a/TODO.md b/TODO.md index 9b282fef..156e6575 100644 --- a/TODO.md +++ b/TODO.md @@ -25,11 +25,11 @@ This section is for tasks currently being discussed with or worked on by the cod 5. ⏳ Build schema-for-schemas metaschema 6. ⏳ Migrate existing schemas to new format -**Phase 1 Tasks (Current):** -- [ ] Write `markitect/schema_naming.py` with validation logic -- [ ] Add unit tests for filename validation -- [ ] Update `schema-ingest` command with validation -- [ ] Create SCHEMA_NAMING_SPEC.md documentation +**Phase 1 Tasks (Completed ✅):** +- [x] Write `markitect/schema_naming.py` with validation logic +- [x] Add unit tests for filename validation (50 tests, 100% passing) +- [ ] Update `schema-ingest` command with validation (Next: Phase 2) +- [x] Create SCHEMA_NAMING_SPEC.md documentation **Next Phases:** - Phase 2: Markdown Schema Loader (2-3 days) diff --git a/markitect/schema_naming.py b/markitect/schema_naming.py new file mode 100644 index 00000000..cf3f0095 --- /dev/null +++ b/markitect/schema_naming.py @@ -0,0 +1,309 @@ +""" +Schema Naming Validation - Enforce filename conventions for schemas. + +This module provides validation and utilities for schema filename conventions +to ensure consistency across the MarkiTect schema ecosystem. + +Naming Convention: + Format: {domain}-schema-v{major}.{minor}.md + + Components: + - domain: lowercase, hyphen-separated identifier (e.g., "manpage", "api-documentation") + - schema: literal string "schema" + - version: SemVer major.minor (e.g., "v1.0", "v2.1") + - extension: ".md" (markdown) + + Valid Examples: + ✓ manpage-schema-v1.0.md + ✓ terminology-schema-v1.0.md + ✓ api-documentation-schema-v1.0.md + ✓ my-custom-type-schema-v2.1.md + + Invalid Examples: + ✗ manpage.json (missing version and wrong extension) + ✗ manpage-v1.md (missing "schema" keyword) + ✗ ManPage-Schema-v1.0.md (wrong case - must be lowercase) + ✗ manpage-schema-1.0.md (missing 'v' prefix) + ✗ manpage-schema-v1.md (missing minor version) +""" + +import re +from pathlib import Path +from typing import Tuple, Optional, Dict, Any + + +# Regex pattern for schema filename validation +# Matches: {domain}-schema-v{major}.{minor}.md +# Where domain is lowercase letters/numbers/hyphens starting with letter +SCHEMA_FILENAME_PATTERN = re.compile( + r'^(?P[a-z][a-z0-9-]*)-schema-v(?P\d+)\.(?P\d+)\.md$' +) + + +class SchemaFilenameError(Exception): + """Exception raised for invalid schema filenames.""" + pass + + +def validate_schema_filename(filename: str) -> Tuple[bool, Optional[Dict[str, Any]]]: + """ + Validate schema filename against naming convention. + + Args: + filename: The filename to validate (e.g., "manpage-schema-v1.0.md") + + Returns: + Tuple of (is_valid, metadata_dict or None) + + If valid, metadata_dict contains: + - domain: str - The domain identifier + - version: str - Full version string (e.g., "1.0") + - major: int - Major version number + - minor: int - Minor version number + - filename: str - The original filename + + If invalid, metadata_dict is None + + Examples: + >>> validate_schema_filename("manpage-schema-v1.0.md") + (True, {'domain': 'manpage', 'version': '1.0', ...}) + + >>> validate_schema_filename("invalid.json") + (False, None) + """ + match = SCHEMA_FILENAME_PATTERN.match(filename) + + if not match: + return False, None + + return True, { + 'domain': match.group('domain'), + 'version': f"{match.group('major')}.{match.group('minor')}", + 'major': int(match.group('major')), + 'minor': int(match.group('minor')), + 'filename': filename + } + + +def suggest_schema_filename( + domain: str, + version: str = "1.0", + normalize: bool = True +) -> str: + """ + Generate a valid schema filename from domain and version. + + Args: + domain: The schema domain (e.g., "manpage", "API Documentation") + version: Version string in format "major.minor" (default: "1.0") + normalize: Whether to normalize domain to lowercase/hyphenated + + Returns: + Valid schema filename + + Raises: + ValueError: If domain or version format is invalid + + Examples: + >>> suggest_schema_filename("manpage", "1.0") + 'manpage-schema-v1.0.md' + + >>> suggest_schema_filename("API Documentation", "2.1") + 'api-documentation-schema-v2.1.md' + + >>> suggest_schema_filename("My_Custom_Type", "1.0") + 'my-custom-type-schema-v1.0.md' + """ + if not domain: + raise ValueError("Domain cannot be empty") + + if normalize: + # Normalize domain: lowercase, replace spaces/underscores with hyphens + domain_clean = domain.lower() + domain_clean = domain_clean.replace(' ', '-').replace('_', '-') + # Remove consecutive hyphens + domain_clean = re.sub(r'-+', '-', domain_clean) + # Remove leading/trailing hyphens + domain_clean = domain_clean.strip('-') + else: + domain_clean = domain + + # Validate domain format (must start with letter, contain only lowercase, numbers, hyphens) + if not re.match(r'^[a-z][a-z0-9-]*$', domain_clean): + raise ValueError( + f"Invalid domain '{domain_clean}': must start with lowercase letter " + "and contain only lowercase letters, numbers, and hyphens" + ) + + # Parse and validate version + version_parts = version.split('.') + if len(version_parts) != 2: + raise ValueError( + f"Invalid version '{version}': must be in format 'major.minor' (e.g., '1.0')" + ) + + try: + major = int(version_parts[0]) + minor = int(version_parts[1]) + except ValueError: + raise ValueError( + f"Invalid version '{version}': major and minor must be integers" + ) + + if major < 0 or minor < 0: + raise ValueError( + f"Invalid version '{version}': major and minor must be non-negative" + ) + + return f"{domain_clean}-schema-v{major}.{minor}.md" + + +def extract_schema_metadata(filename: str) -> Dict[str, Any]: + """ + Extract metadata from a valid schema filename. + + Args: + filename: Schema filename to parse + + Returns: + Dictionary with metadata + + Raises: + SchemaFilenameError: If filename is invalid + + Examples: + >>> extract_schema_metadata("manpage-schema-v1.0.md") + {'domain': 'manpage', 'version': '1.0', 'major': 1, 'minor': 0} + """ + is_valid, metadata = validate_schema_filename(filename) + + if not is_valid: + raise SchemaFilenameError( + f"Invalid schema filename: {filename}\n" + f"Expected format: {{domain}}-schema-v{{major}}.{{minor}}.md" + ) + + return metadata + + +def get_validation_errors(filename: str) -> list: + """ + Get detailed validation errors for a filename. + + Args: + filename: Filename to validate + + Returns: + List of error messages (empty if valid) + + Examples: + >>> get_validation_errors("manpage-schema-v1.0.md") + [] + + >>> get_validation_errors("invalid.json") + ['Filename does not match pattern: {domain}-schema-v{major}.{minor}.md', ...] + """ + errors = [] + + # Check basic pattern match + is_valid, _ = validate_schema_filename(filename) + if is_valid: + return errors + + # Provide detailed feedback + errors.append( + f"Filename does not match pattern: {{domain}}-schema-v{{major}}.{{minor}}.md" + ) + + # Check extension + if not filename.endswith('.md'): + errors.append(f"Extension must be '.md', got: {Path(filename).suffix}") + + # Check for version + if '-v' not in filename: + errors.append("Missing version: filename must include '-v{major}.{minor}'") + elif not re.search(r'-v\d+\.\d+', filename): + errors.append( + "Invalid version format: must be '-v{major}.{minor}' (e.g., '-v1.0')" + ) + + # Check for schema keyword + if '-schema-' not in filename: + errors.append("Missing '-schema-' keyword in filename") + + # Check for uppercase (must be lowercase) + if any(c.isupper() for c in filename): + errors.append("Filename must be lowercase") + + # Check domain format (if we can isolate it) + parts = filename.split('-schema-') + if len(parts) >= 1: + domain = parts[0] + if domain and not re.match(r'^[a-z][a-z0-9-]*$', domain): + errors.append( + f"Invalid domain '{domain}': must start with lowercase letter " + "and contain only lowercase letters, numbers, and hyphens" + ) + + return errors + + +def is_valid_schema_filename(filename: str) -> bool: + """ + Check if filename is valid (convenience function). + + Args: + filename: Filename to check + + Returns: + True if valid, False otherwise + + Examples: + >>> is_valid_schema_filename("manpage-schema-v1.0.md") + True + + >>> is_valid_schema_filename("invalid.json") + False + """ + is_valid, _ = validate_schema_filename(filename) + return is_valid + + +def format_validation_message(filename: str) -> str: + """ + Format a user-friendly validation message. + + Args: + filename: Filename that failed validation + + Returns: + Formatted error message with suggestions + + Examples: + >>> print(format_validation_message("manpage.json")) + ❌ Invalid schema filename: manpage.json + ... + """ + errors = get_validation_errors(filename) + + if not errors: + return f"✅ Valid schema filename: {filename}" + + message = f"❌ Invalid schema filename: {filename}\n\n" + message += "Errors:\n" + for i, error in enumerate(errors, 1): + message += f" {i}. {error}\n" + + message += "\nExpected format: {domain}-schema-v{major}.{minor}.md\n" + message += "Example: manpage-schema-v1.0.md\n" + + # Try to suggest a corrected filename + try: + # Extract domain guess (everything before first hyphen or dot) + domain_guess = filename.split('-')[0].split('.')[0] + suggestion = suggest_schema_filename(domain_guess, "1.0") + message += f"\nSuggested filename: {suggestion}\n" + except Exception: + pass + + return message diff --git a/roadmap/schema-of-schemas/SCHEMA_NAMING_SPEC.md b/roadmap/schema-of-schemas/SCHEMA_NAMING_SPEC.md new file mode 100644 index 00000000..62d1e2e9 --- /dev/null +++ b/roadmap/schema-of-schemas/SCHEMA_NAMING_SPEC.md @@ -0,0 +1,408 @@ +# Schema Naming Convention Specification + +**Version:** 1.0 +**Status:** Implemented +**Created:** 2026-01-04 + +## Overview + +This specification defines the filename convention for all MarkiTect schema files to ensure consistency, discoverability, and version tracking across the schema ecosystem. + +## Filename Format + +### Standard Format + +``` +{domain}-schema-v{major}.{minor}.md +``` + +### Components + +| Component | Description | Rules | Examples | +|-----------|-------------|-------|----------| +| **domain** | Schema domain identifier | - Lowercase only
- Start with letter
- Letters, numbers, hyphens
- No consecutive hyphens
- No leading/trailing hyphens | `manpage`
`api-documentation`
`arc42` | +| **schema** | Literal keyword | - Must be exactly `schema` | `schema` | +| **version** | SemVer major.minor | - Format: `v{major}.{minor}`
- Non-negative integers
- Must include both major and minor | `v1.0`
`v2.5`
`v10.25` | +| **extension** | File extension | - Must be `.md` (markdown) | `.md` | + +### Regular Expression + +```regex +^[a-z][a-z0-9-]*-schema-v\d+\.\d+\.md$ +``` + +**Breakdown:** +- `^[a-z]` - Start with lowercase letter +- `[a-z0-9-]*` - Followed by lowercase letters, numbers, or hyphens +- `-schema-` - Literal string +- `v\d+\.\d+` - Version (v + digits + dot + digits) +- `\.md$` - Extension + +## Valid Examples + +### Simple Domains + +``` +manpage-schema-v1.0.md +terminology-schema-v1.0.md +glossary-schema-v1.0.md +``` + +### Multi-Word Domains + +``` +api-documentation-schema-v1.0.md +architecture-decision-record-schema-v1.0.md +software-requirements-specification-schema-v1.0.md +``` + +### With Numbers + +``` +arc42-schema-v1.0.md +rfc2119-keywords-schema-v1.0.md +iso27001-schema-v1.0.md +``` + +### Version Variations + +``` +manpage-schema-v1.0.md # Initial version +manpage-schema-v1.1.md # Minor update +manpage-schema-v2.0.md # Breaking change +manpage-schema-v10.25.md # Double-digit versions +``` + +## Invalid Examples + +### Wrong Extension + +``` +❌ manpage-schema-v1.0.json # Must be .md +❌ manpage-schema-v1.0.yaml # Must be .md +❌ manpage-schema-v1.0 # Missing extension +``` + +### Missing Components + +``` +❌ manpage-v1.0.md # Missing "schema" keyword +❌ manpage-schema.md # Missing version +❌ manpage.md # Missing "schema" and version +``` + +### Version Format Errors + +``` +❌ manpage-schema-1.0.md # Missing 'v' prefix +❌ manpage-schema-v1.md # Missing minor version +❌ manpage-schema-v1.0.0.md # Too many version parts (patch not used) +❌ manpage-schema-v1-0.md # Hyphen instead of dot +``` + +### Case Errors + +``` +❌ ManPage-schema-v1.0.md # Uppercase in domain +❌ manpage-Schema-v1.0.md # Uppercase in keyword +❌ MANPAGE-SCHEMA-V1.0.MD # All uppercase +``` + +### Domain Format Errors + +``` +❌ 42answers-schema-v1.0.md # Starts with number +❌ -manpage-schema-v1.0.md # Starts with hyphen +❌ man_page-schema-v1.0.md # Underscore (use hyphen) +❌ man page-schema-v1.0.md # Space (use hyphen) +❌ my--schema-v1.0.md # Consecutive hyphens +``` + +## Version Numbering Guidelines + +### Semantic Versioning + +We use simplified SemVer with major.minor only: + +**Major Version (X.0):** +- Breaking changes to schema structure +- Incompatible with previous version +- Documents validated against v1.0 may fail v2.0 + +**Examples:** +- `manpage-schema-v1.0.md` → `manpage-schema-v2.0.md` (breaking change) +- `api-schema-v1.0.md` → `api-schema-v2.0.md` (new required sections) + +**Minor Version (X.Y):** +- Backward-compatible additions +- New optional sections or fields +- Relaxed constraints +- Documents validated against v1.0 still validate against v1.1 + +**Examples:** +- `manpage-schema-v1.0.md` → `manpage-schema-v1.1.md` (new optional section) +- `api-schema-v2.0.md` → `api-schema-v2.1.md` (additional metadata) + +### Version Incrementing + +``` +v1.0 → v1.1 → v1.2 → ... → v1.9 → v1.10 → v1.11 + ↓ + v2.0 (breaking change) +``` + +### Initial Version + +All new schemas start at `v1.0.md`: + +```bash +# New schema +my-new-type-schema-v1.0.md +``` + +## Domain Naming Guidelines + +### Good Domain Names + +**Descriptive and Specific:** +``` +✓ manpage-schema-v1.0.md # Clear: Unix manual pages +✓ api-documentation-schema-v1.0.md # Clear: API docs +✓ architecture-decision-record-schema-v1.0.md # Full ADR name +``` + +**Concise but Meaningful:** +``` +✓ adr-schema-v1.0.md # Common abbreviation +✓ rfc-schema-v1.0.md # Well-known acronym +✓ arc42-schema-v1.0.md # Standard name +``` + +### Poor Domain Names + +**Too Generic:** +``` +❌ document-schema-v1.0.md # Too vague +❌ markdown-schema-v1.0.md # All schemas are markdown +❌ schema-schema-v1.0.md # Redundant (use "metaschema") +``` + +**Too Verbose:** +``` +❌ my-custom-documentation-template-for-apis-v1.0.md # Too long + → api-documentation-schema-v1.0.md # Better +``` + +**Unclear Abbreviations:** +``` +❌ mt-schema-v1.0.md # What is "mt"? +❌ doc-schema-v1.0.md # Too generic +``` + +## Normalization Rules + +When converting arbitrary strings to valid domain names: + +1. **Convert to lowercase** + - `API Documentation` → `api documentation` + +2. **Replace separators with hyphens** + - Spaces: `api documentation` → `api-documentation` + - Underscores: `my_type` → `my-type` + - Multiple separators: `my type` → `my--type` + +3. **Remove consecutive hyphens** + - `my--type` → `my-type` + +4. **Remove leading/trailing hyphens** + - `-my-type-` → `my-type` + +5. **Validate result** + - Must start with letter + - Only lowercase letters, numbers, hyphens + +### Example Normalizations + +```python +"API Documentation" → "api-documentation-schema-v1.0.md" +"My_Custom_Type" → "my-custom-type-schema-v1.0.md" +"arc42 Architecture" → "arc42-architecture-schema-v1.0.md" +"--leading-hyphen" → "leading-hyphen-schema-v1.0.md" +``` + +## Implementation + +### Validation Function + +The naming convention is enforced by `markitect.schema_naming.validate_schema_filename()`: + +```python +from markitect.schema_naming import validate_schema_filename + +is_valid, metadata = validate_schema_filename("manpage-schema-v1.0.md") + +if is_valid: + print(f"Domain: {metadata['domain']}") + print(f"Version: {metadata['version']}") + print(f"Major: {metadata['major']}, Minor: {metadata['minor']}") +``` + +### Suggestion Function + +Generate valid filenames from arbitrary input: + +```python +from markitect.schema_naming import suggest_schema_filename + +# From clean input +filename = suggest_schema_filename("manpage", "1.0") +# → "manpage-schema-v1.0.md" + +# From messy input (with normalization) +filename = suggest_schema_filename("API Documentation", "2.1") +# → "api-documentation-schema-v1.0.md" +``` + +### CLI Integration + +The `schema-ingest` command validates filenames: + +```bash +# Valid filename - accepted +$ markitect schema-ingest manpage-schema-v1.0.md +✅ Schema stored successfully + +# Invalid filename - rejected (unless --force) +$ markitect schema-ingest manpage.json +❌ Invalid schema filename: manpage.json + +Expected format: {domain}-schema-v{major}.{minor}.md +Example: manpage-schema-v1.0.md + +Suggested filename: manpage-schema-v1.0.md + +Use --force to skip validation +``` + +## Migration from Legacy Naming + +### Current State Analysis + +Existing schemas with inconsistent naming: + +``` +terminology-schema.json # Has .json extension +api-documentation # No version, no extension +enhanced-manpage # No version, no extension, unclear name +markdown-manpage # No version, no extension +markdown-manpage-schema.json # Has .json extension +``` + +### Migration Strategy + +1. **Identify domain and version** +2. **Apply naming convention** +3. **Update database registration** +4. **Remove legacy entries** + +### Migration Mapping + +``` +Old Name → New Name +──────────────────────────────────────────────────────────────── +terminology-schema.json → terminology-schema-v1.0.md +api-documentation → api-documentation-schema-v1.0.md +enhanced-manpage → manpage-schema-v2.0.md +markdown-manpage → (DELETE - duplicate) +markdown-manpage-schema.json → (DELETE - duplicate) +``` + +**Rationale:** +- `enhanced-manpage` → v2.0 (has breaking changes: classification system) +- `markdown-manpage` variants → DELETE (superseded by v1.0 and v2.0) + +## Special Cases + +### Metaschema + +The schema-for-schemas follows the same convention: + +``` +schema-schema-v1.0.md +``` + +Domain is `schema`, indicating it validates schemas themselves. + +### Multiple Schemas for Same Domain + +Use version numbers to distinguish: + +``` +manpage-schema-v1.0.md # Original +manpage-schema-v2.0.md # Enhanced with classifications +``` + +Or use more specific domain names: + +``` +manpage-simple-schema-v1.0.md # Simplified variant +manpage-extended-schema-v1.0.md # Extended variant +``` + +## Validation Testing + +All schemas should pass the naming convention validation: + +```bash +# Test a filename +python -c " +from markitect.schema_naming import is_valid_schema_filename +print(is_valid_schema_filename('manpage-schema-v1.0.md')) +" +# → True + +# Get detailed errors +python -c " +from markitect.schema_naming import get_validation_errors +errors = get_validation_errors('invalid.json') +for error in errors: + print(error) +" +``` + +## Benefits + +### Consistency +- All schemas follow same pattern +- Easy to recognize schema files +- Predictable naming + +### Versioning +- Clear version tracking +- Multiple versions can coexist +- Breaking changes explicit (major version bump) + +### Discoverability +- Glob patterns work: `*-schema-v*.md` +- Easy to list all schemas: `ls *-schema-*.md` +- Domain easily extractable + +### Tooling +- Programmatic validation +- Automatic suggestion +- Migration support + +## References + +- **Implementation:** `markitect/schema_naming.py` +- **Tests:** `tests/test_schema_naming.py` +- **Workplan:** `roadmap/schema-of-schemas/WORKPLAN.md` +- **Examples:** `examples/schemas/manpage-schema-v1.0.md` + +## Changelog + +### v1.0 (2026-01-04) +- Initial specification +- Implemented validation and suggestion functions +- 50 unit tests (100% passing) +- CLI integration planned diff --git a/tests/test_schema_naming.py b/tests/test_schema_naming.py new file mode 100644 index 00000000..1ebc2508 --- /dev/null +++ b/tests/test_schema_naming.py @@ -0,0 +1,390 @@ +""" +Unit tests for schema_naming.py - Schema filename validation. + +Tests the schema naming convention enforcement including: +- Valid filename validation +- Invalid filename detection +- Metadata extraction +- Filename suggestion +- Error message generation +""" + +import pytest +from markitect.schema_naming import ( + validate_schema_filename, + suggest_schema_filename, + extract_schema_metadata, + get_validation_errors, + is_valid_schema_filename, + format_validation_message, + SchemaFilenameError, + SCHEMA_FILENAME_PATTERN +) + + +class TestValidateSchemaFilename: + """Tests for validate_schema_filename function.""" + + def test_valid_simple_schema(self): + """Test validation of simple valid schema filename.""" + is_valid, metadata = validate_schema_filename("manpage-schema-v1.0.md") + + assert is_valid is True + assert metadata is not None + assert metadata['domain'] == 'manpage' + assert metadata['version'] == '1.0' + assert metadata['major'] == 1 + assert metadata['minor'] == 0 + assert metadata['filename'] == 'manpage-schema-v1.0.md' + + def test_valid_hyphenated_domain(self): + """Test validation with multi-word hyphenated domain.""" + is_valid, metadata = validate_schema_filename("api-documentation-schema-v1.0.md") + + assert is_valid is True + assert metadata['domain'] == 'api-documentation' + assert metadata['version'] == '1.0' + + def test_valid_with_numbers_in_domain(self): + """Test validation with numbers in domain name.""" + is_valid, metadata = validate_schema_filename("arc42-schema-v1.0.md") + + assert is_valid is True + assert metadata['domain'] == 'arc42' + + def test_valid_higher_version(self): + """Test validation with version > 1.0.""" + is_valid, metadata = validate_schema_filename("manpage-schema-v2.5.md") + + assert is_valid is True + assert metadata['version'] == '2.5' + assert metadata['major'] == 2 + assert metadata['minor'] == 5 + + def test_valid_double_digit_version(self): + """Test validation with double-digit version numbers.""" + is_valid, metadata = validate_schema_filename("manpage-schema-v10.25.md") + + assert is_valid is True + assert metadata['major'] == 10 + assert metadata['minor'] == 25 + + def test_invalid_wrong_extension(self): + """Test that .json extension is invalid.""" + is_valid, metadata = validate_schema_filename("manpage-schema-v1.0.json") + + assert is_valid is False + assert metadata is None + + def test_invalid_no_extension(self): + """Test that filename without extension is invalid.""" + is_valid, metadata = validate_schema_filename("manpage-schema-v1.0") + + assert is_valid is False + assert metadata is None + + def test_invalid_missing_schema_keyword(self): + """Test that filename without 'schema' keyword is invalid.""" + is_valid, metadata = validate_schema_filename("manpage-v1.0.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_missing_version(self): + """Test that filename without version is invalid.""" + is_valid, metadata = validate_schema_filename("manpage-schema.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_wrong_version_format(self): + """Test that version without 'v' prefix is invalid.""" + is_valid, metadata = validate_schema_filename("manpage-schema-1.0.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_missing_minor_version(self): + """Test that version without minor number is invalid.""" + is_valid, metadata = validate_schema_filename("manpage-schema-v1.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_uppercase_letters(self): + """Test that uppercase letters make filename invalid.""" + is_valid, metadata = validate_schema_filename("ManPage-Schema-v1.0.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_starting_with_number(self): + """Test that domain starting with number is invalid.""" + is_valid, metadata = validate_schema_filename("42answers-schema-v1.0.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_starting_with_hyphen(self): + """Test that domain starting with hyphen is invalid.""" + is_valid, metadata = validate_schema_filename("-manpage-schema-v1.0.md") + + assert is_valid is False + assert metadata is None + + def test_invalid_special_characters(self): + """Test that special characters in domain are invalid.""" + is_valid, metadata = validate_schema_filename("man_page-schema-v1.0.md") + + assert is_valid is False + assert metadata is None + + +class TestSuggestSchemaFilename: + """Tests for suggest_schema_filename function.""" + + def test_suggest_simple_domain(self): + """Test suggestion for simple domain.""" + filename = suggest_schema_filename("manpage", "1.0") + assert filename == "manpage-schema-v1.0.md" + + def test_suggest_with_spaces(self): + """Test suggestion normalizes spaces to hyphens.""" + filename = suggest_schema_filename("API Documentation", "1.0") + assert filename == "api-documentation-schema-v1.0.md" + + def test_suggest_with_underscores(self): + """Test suggestion normalizes underscores to hyphens.""" + filename = suggest_schema_filename("my_custom_type", "1.0") + assert filename == "my-custom-type-schema-v1.0.md" + + def test_suggest_with_uppercase(self): + """Test suggestion converts to lowercase.""" + filename = suggest_schema_filename("MyCustomType", "1.0") + assert filename == "mycustomtype-schema-v1.0.md" + + def test_suggest_mixed_normalization(self): + """Test suggestion with mixed case and separators.""" + filename = suggest_schema_filename("My_Custom Type", "1.0") + assert filename == "my-custom-type-schema-v1.0.md" + + def test_suggest_higher_version(self): + """Test suggestion with version > 1.0.""" + filename = suggest_schema_filename("manpage", "2.5") + assert filename == "manpage-schema-v2.5.md" + + def test_suggest_double_digit_version(self): + """Test suggestion with double-digit version.""" + filename = suggest_schema_filename("manpage", "10.25") + assert filename == "manpage-schema-v10.25.md" + + def test_suggest_consecutive_hyphens(self): + """Test suggestion removes consecutive hyphens.""" + filename = suggest_schema_filename("my--custom---type", "1.0") + assert filename == "my-custom-type-schema-v1.0.md" + + def test_suggest_leading_trailing_hyphens(self): + """Test suggestion removes leading/trailing hyphens.""" + filename = suggest_schema_filename("-my-type-", "1.0") + assert filename == "my-type-schema-v1.0.md" + + def test_suggest_default_version(self): + """Test suggestion uses default version 1.0.""" + filename = suggest_schema_filename("manpage") + assert filename == "manpage-schema-v1.0.md" + + def test_suggest_empty_domain_raises_error(self): + """Test that empty domain raises ValueError.""" + with pytest.raises(ValueError, match="Domain cannot be empty"): + suggest_schema_filename("", "1.0") + + def test_suggest_invalid_version_format_raises_error(self): + """Test that invalid version format raises ValueError.""" + with pytest.raises(ValueError, match="must be in format 'major.minor'"): + suggest_schema_filename("manpage", "1") + + def test_suggest_invalid_version_parts_raises_error(self): + """Test that non-integer version parts raise ValueError.""" + with pytest.raises(ValueError, match="major and minor must be integers"): + suggest_schema_filename("manpage", "1.x") + + def test_suggest_negative_version_raises_error(self): + """Test that negative version numbers raise ValueError.""" + with pytest.raises(ValueError, match="must be non-negative"): + suggest_schema_filename("manpage", "-1.0") + + def test_suggest_without_normalization(self): + """Test suggestion without normalization (must already be valid).""" + filename = suggest_schema_filename("manpage", "1.0", normalize=False) + assert filename == "manpage-schema-v1.0.md" + + def test_suggest_without_normalization_invalid_raises_error(self): + """Test that invalid domain without normalization raises ValueError.""" + with pytest.raises(ValueError, match="Invalid domain"): + suggest_schema_filename("My Custom Type", "1.0", normalize=False) + + +class TestExtractSchemaMetadata: + """Tests for extract_schema_metadata function.""" + + def test_extract_valid_metadata(self): + """Test metadata extraction from valid filename.""" + metadata = extract_schema_metadata("manpage-schema-v1.0.md") + + assert metadata['domain'] == 'manpage' + assert metadata['version'] == '1.0' + assert metadata['major'] == 1 + assert metadata['minor'] == 0 + + def test_extract_invalid_raises_error(self): + """Test that invalid filename raises SchemaFilenameError.""" + with pytest.raises(SchemaFilenameError, match="Invalid schema filename"): + extract_schema_metadata("invalid.json") + + +class TestGetValidationErrors: + """Tests for get_validation_errors function.""" + + def test_valid_filename_no_errors(self): + """Test that valid filename returns empty error list.""" + errors = get_validation_errors("manpage-schema-v1.0.md") + assert errors == [] + + def test_wrong_extension_error(self): + """Test error for wrong file extension.""" + errors = get_validation_errors("manpage-schema-v1.0.json") + + assert len(errors) > 0 + assert any("Extension must be '.md'" in e for e in errors) + + def test_missing_version_error(self): + """Test error for missing version.""" + errors = get_validation_errors("manpage-schema.md") + + assert len(errors) > 0 + assert any("Missing version" in e for e in errors) + + def test_missing_schema_keyword_error(self): + """Test error for missing schema keyword.""" + errors = get_validation_errors("manpage-v1.0.md") + + assert len(errors) > 0 + assert any("Missing '-schema-'" in e for e in errors) + + def test_uppercase_letters_error(self): + """Test error for uppercase letters.""" + errors = get_validation_errors("ManPage-schema-v1.0.md") + + assert len(errors) > 0 + assert any("must be lowercase" in e for e in errors) + + def test_invalid_domain_error(self): + """Test error for invalid domain format.""" + errors = get_validation_errors("42answer-schema-v1.0.md") + + assert len(errors) > 0 + # Should detect that domain doesn't start with letter + + +class TestIsValidSchemaFilename: + """Tests for is_valid_schema_filename convenience function.""" + + def test_is_valid_returns_true(self): + """Test that valid filename returns True.""" + assert is_valid_schema_filename("manpage-schema-v1.0.md") is True + + def test_is_valid_returns_false(self): + """Test that invalid filename returns False.""" + assert is_valid_schema_filename("invalid.json") is False + + +class TestFormatValidationMessage: + """Tests for format_validation_message function.""" + + def test_format_message_valid_filename(self): + """Test formatting message for valid filename.""" + message = format_validation_message("manpage-schema-v1.0.md") + + assert "✅ Valid" in message + assert "manpage-schema-v1.0.md" in message + + def test_format_message_invalid_filename(self): + """Test formatting message for invalid filename.""" + message = format_validation_message("invalid.json") + + assert "❌ Invalid" in message + assert "Errors:" in message + assert "Expected format:" in message + assert "Example:" in message + + def test_format_message_includes_suggestion(self): + """Test that message includes filename suggestion.""" + message = format_validation_message("manpage.json") + + assert "Suggested filename:" in message + # Should suggest something like manpage-schema-v1.0.md + + +class TestSchemaFilenamePattern: + """Tests for the regex pattern itself.""" + + def test_pattern_matches_valid_filenames(self): + """Test that pattern matches all valid filename variations.""" + valid_filenames = [ + "manpage-schema-v1.0.md", + "api-documentation-schema-v1.0.md", + "arc42-schema-v1.0.md", + "a-schema-v1.0.md", # Single letter domain + "my-long-domain-name-schema-v1.0.md", + "manpage-schema-v10.25.md", # Double digit versions + ] + + for filename in valid_filenames: + match = SCHEMA_FILENAME_PATTERN.match(filename) + assert match is not None, f"Pattern should match {filename}" + + def test_pattern_rejects_invalid_filenames(self): + """Test that pattern rejects invalid filenames.""" + invalid_filenames = [ + "manpage-schema-v1.0.json", # Wrong extension + "manpage-v1.0.md", # Missing schema keyword + "manpage-schema.md", # Missing version + "ManPage-schema-v1.0.md", # Uppercase + "42answer-schema-v1.0.md", # Starts with number + "-manpage-schema-v1.0.md", # Starts with hyphen + "man_page-schema-v1.0.md", # Underscore in domain + "manpage-schema-1.0.md", # Missing 'v' prefix + "manpage-schema-v1.md", # Missing minor version + ] + + for filename in invalid_filenames: + match = SCHEMA_FILENAME_PATTERN.match(filename) + assert match is None, f"Pattern should not match {filename}" + + +class TestEdgeCases: + """Tests for edge cases and boundary conditions.""" + + def test_very_long_domain_name(self): + """Test with very long domain name.""" + long_domain = "a" * 100 + filename = suggest_schema_filename(long_domain, "1.0") + assert is_valid_schema_filename(filename) + + def test_domain_with_many_hyphens(self): + """Test domain with multiple hyphens.""" + filename = suggest_schema_filename("my-very-long-domain-name", "1.0") + assert filename == "my-very-long-domain-name-schema-v1.0.md" + assert is_valid_schema_filename(filename) + + def test_version_zero_zero(self): + """Test with version 0.0.""" + filename = suggest_schema_filename("manpage", "0.0") + assert filename == "manpage-schema-v0.0.md" + assert is_valid_schema_filename(filename) + + def test_large_version_numbers(self): + """Test with large version numbers.""" + filename = suggest_schema_filename("manpage", "999.999") + assert filename == "manpage-schema-v999.999.md" + assert is_valid_schema_filename(filename)