feat(spaces): implement Phase 0-1 of Information Space Service

Phase 0 - Project Organization: - Create docs/PROJECT_STRUCTURE.md documenting codebase layout - Create markitect/core/ with parser, serializer, document_manager, workspace - Create markitect/schema/ consolidating 6 schema_*.py modules - Create markitect/storage/ with database module - Maintain backward compatibility via re-exports from original locations - Add docs/roadmap/information-space-service/ with README and WORKPLAN Phase 1 - Foundation (Weeks 1-3): - Week 1: Core domain models (InformationSpace, SpaceDocument, SpaceConfig, SpaceMetadata, SpaceVariable, TransclusionReference, SpaceStatus) - Week 2: Repository layer with interfaces (ISpaceRepository, IDocumentAssociationRepository, IVariableRepository, IReferenceRepository) and SQLite implementations with foreign key cascade deletes - Week 3: SpaceService orchestration layer with full CRUD, document, variable, and reference tracking operations Test coverage: 124 tests (25 model + 63 repository + 36 integration) Capabilities delivered: - CAP-001: InformationSpace entity with lifecycle management - CAP-002: SpaceRepository CRUD with SQLite backing - CAP-003: Document-Space associations with path-based organization - CAP-004: Space metadata and configuration schemas - CAP-005: Database schema with migrations Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 02:02:46 +01:00
parent 6ebcc0f60e
commit 9b12875681
45 changed files with 9818 additions and 4300 deletions
--- a/markitect/schema_loader.py
+++ b/markitect/schema_loader.py
@@ -1,610 +1,23 @@
 """
-Schema Loader - Extract JSON schemas from markdown files.
+Schema Loader - Backward Compatibility Module.

-This module provides functionality to load schemas from markdown files that
-contain embedded JSON schemas in code blocks, along with YAML frontmatter
-metadata and rich documentation.
-
-Markdown Schema Format:
-    ---
-    schema-id: "https://markitect.dev/schemas/domain/v1"
-    version: "1.0.0"
-    status: "stable|draft|deprecated"
-    ---
-
-    # Schema Title v1.0
-
-    ## Documentation sections...
-
-    ## Schema Definition
-
-    ```json
-    {
-      "$schema": "http://json-schema.org/draft-07/schema#",
-      ...
-    }
-    ```
-
-This enables:
- Rich documentation alongside schemas
- Version history in same file
- Human-readable schema files
- Markdown-first approach aligned with MarkiTect philosophy
+This module re-exports from markitect.schema.loader for backward compatibility.
+New code should import from markitect.schema.loader directly.
 """

-import re
-import json
-import yaml
-from pathlib import Path
-from typing import Dict, Any, Optional, List, Tuple
-
-
-class SchemaLoaderError(Exception):
-    """Base exception for schema loading errors."""
-    pass
-
-
-class InvalidSchemaFormatError(SchemaLoaderError):
-    """Schema file format is invalid."""
-    pass
-
-
-class SchemaNotFoundError(SchemaLoaderError):
-    """No JSON schema found in markdown file."""
-    pass
-
-
-class MarkdownSchemaLoader:
-    """
-    Load and parse markdown schema files.
-
-    Supports:
-    - YAML frontmatter for metadata
-    - JSON code blocks for schema definition
-    - Validation of schema structure
-    - Metadata merging
-
-    Example:
-        >>> loader = MarkdownSchemaLoader()
-        >>> schema_data = loader.load_schema(Path("manpage-schema-v1.0.md"))
-        >>> schema = schema_data['schema']
-        >>> metadata = schema_data['metadata']
-    """
-
-    def __init__(self):
-        """Initialize the schema loader with regex patterns."""
-        # Pattern to match YAML frontmatter
-        # Matches: --- ... --- at start of file
-        self.frontmatter_pattern = re.compile(
-            r'^---\s*\n(.*?)\n---\s*\n',
-            re.DOTALL | re.MULTILINE
-        )
-
-        # Pattern to match JSON code blocks
-        # Matches: ```json ... ```
-        self.json_code_block_pattern = re.compile(
-            r'```json\s*\n(.*?)\n```',
-            re.DOTALL | re.MULTILINE
-        )
-
-        # Pattern to find Schema Definition section
-        # This helps us find the right JSON block if there are multiple
-        self.schema_section_pattern = re.compile(
-            r'##\s+Schema Definition\s*\n',
-            re.MULTILINE
-        )
-
-    def load_schema(self, md_path: Path) -> Dict[str, Any]:
-        """
-        Load schema from markdown file.
-
-        Args:
-            md_path: Path to markdown schema file
-
-        Returns:
-            Dictionary containing:
-                - schema: Extracted JSON schema (dict)
-                - metadata: Frontmatter metadata (dict)
-                - documentation: Full markdown content (str)
-                - source_file: Source file path (str)
-
-        Raises:
-            FileNotFoundError: If schema file doesn't exist
-            InvalidSchemaFormatError: If file format is invalid
-            SchemaNotFoundError: If no JSON schema found
-
-        Example:
-            >>> loader = MarkdownSchemaLoader()
-            >>> data = loader.load_schema(Path("manpage-schema-v1.0.md"))
-            >>> print(data['schema']['title'])
-            'Unix Manual Page Schema'
-        """
-        if not md_path.exists():
-            raise FileNotFoundError(f"Schema file not found: {md_path}")
-
-        # Read file content
-        try:
-            content = md_path.read_text(encoding='utf-8')
-        except Exception as e:
-            raise InvalidSchemaFormatError(f"Failed to read schema file: {e}")
-
-        # Extract frontmatter
-        metadata = self._extract_frontmatter(content)
-
-        # Extract JSON schema
-        schema = self._extract_json_schema(content)
-
-        if not schema:
-            raise SchemaNotFoundError(
-                f"No JSON schema found in {md_path}. "
-                f"Expected a ```json code block with schema definition."
-            )
-
-        # Merge metadata into schema
-        schema = self._merge_metadata(schema, metadata, md_path)
-
-        return {
-            'schema': schema,
-            'metadata': metadata,
-            'documentation': content,
-            'source_file': str(md_path)
-        }
-
-    def _extract_frontmatter(self, content: str) -> Dict[str, Any]:
-        """
-        Extract YAML frontmatter from markdown content.
-
-        Args:
-            content: Markdown file content
-
-        Returns:
-            Dictionary of frontmatter metadata (empty if none found)
-
-        Raises:
-            InvalidSchemaFormatError: If YAML is malformed
-        """
-        match = self.frontmatter_pattern.search(content)
-        if not match:
-            return {}
-
-        yaml_content = match.group(1)
-        try:
-            metadata = yaml.safe_load(yaml_content) or {}
-            if not isinstance(metadata, dict):
-                raise InvalidSchemaFormatError(
-                    f"Frontmatter must be a YAML dictionary, got {type(metadata)}"
-                )
-            return metadata
-        except yaml.YAMLError as e:
-            raise InvalidSchemaFormatError(f"Invalid YAML frontmatter: {e}")
-
-    def _extract_json_schema(self, content: str) -> Optional[Dict[str, Any]]:
-        """
-        Extract JSON schema from markdown code blocks.
-
-        Prefers JSON blocks under "## Schema Definition" section,
-        but will use first JSON block if no Schema Definition section found.
-
-        Args:
-            content: Markdown file content
-
-        Returns:
-            JSON schema dictionary or None if not found
-
-        Raises:
-            InvalidSchemaFormatError: If JSON is malformed
-        """
-        # Find all JSON code blocks
-        json_blocks = self.json_code_block_pattern.findall(content)
-
-        if not json_blocks:
-            return None
-
-        # Try to find the Schema Definition section
-        schema_section_match = self.schema_section_pattern.search(content)
-
-        if schema_section_match:
-            # Find JSON block that comes after Schema Definition section
-            section_pos = schema_section_match.end()
-
-            # Re-search for JSON blocks starting from section position
-            remaining_content = content[section_pos:]
-            section_json_blocks = self.json_code_block_pattern.findall(remaining_content)
-
-            if section_json_blocks:
-                json_text = section_json_blocks[0]
-            else:
-                # Fallback to first JSON block in entire document
-                json_text = json_blocks[0]
-        else:
-            # No Schema Definition section, use first JSON block
-            json_text = json_blocks[0]
-
-        # Parse JSON
-        try:
-            schema = json.loads(json_text)
-            if not isinstance(schema, dict):
-                raise InvalidSchemaFormatError(
-                    f"Schema must be a JSON object, got {type(schema)}"
-                )
-            return schema
-        except json.JSONDecodeError as e:
-            raise InvalidSchemaFormatError(f"Invalid JSON schema: {e}")
-
-    def _merge_metadata(
-        self,
-        schema: Dict[str, Any],
-        metadata: Dict[str, Any],
-        source_file: Path
-    ) -> Dict[str, Any]:
-        """
-        Merge frontmatter metadata into schema.
-
-        Adds x-markitect-source extension with file info and metadata.
-        Optionally overrides schema fields with frontmatter values.
-
-        Args:
-            schema: JSON schema dictionary
-            metadata: Frontmatter metadata dictionary
-            source_file: Path to source file
-
-        Returns:
-            Schema with merged metadata
-        """
-        # Create a copy to avoid modifying original
-        merged_schema = schema.copy()
-
-        # Add MarkiTect-specific source metadata
-        merged_schema['x-markitect-source'] = {
-            'file': str(source_file),
-            'filename': source_file.name,
-            'format': 'markdown',
-            'frontmatter': metadata
-        }
-
-        # Override schema fields with frontmatter if present
-        # This allows frontmatter to be the source of truth for metadata
-        if 'version' in metadata:
-            merged_schema['version'] = metadata['version']
-
-        if 'schema-id' in metadata:
-            merged_schema['$id'] = metadata['schema-id']
-
-        if 'status' in metadata:
-            if 'x-markitect-metadata' not in merged_schema:
-                merged_schema['x-markitect-metadata'] = {}
-            merged_schema['x-markitect-metadata']['status'] = metadata['status']
-
-        return merged_schema
-
-    def save_schema(
-        self,
-        schema: Dict[str, Any],
-        md_path: Path,
-        template: Optional[str] = None,
-        frontmatter: Optional[Dict[str, Any]] = None
-    ):
-        """
-        Save schema as markdown file.
-
-        Args:
-            schema: JSON schema dictionary to save
-            md_path: Output path for markdown file
-            template: Optional markdown template string
-            frontmatter: Optional frontmatter metadata (extracted from schema if not provided)
-
-        Raises:
-            InvalidSchemaFormatError: If schema is invalid
-
-        Example:
-            >>> loader = MarkdownSchemaLoader()
-            >>> loader.save_schema(
-            ...     schema={'title': 'My Schema', ...},
-            ...     md_path=Path('my-schema-v1.0.md')
-            ... )
-        """
-        if template:
-            # Use provided template
-            content = self._render_template(template, schema, frontmatter)
-        else:
-            # Generate basic markdown
-            content = self._generate_markdown(schema, frontmatter)
-
-        # Create parent directory if needed
-        md_path.parent.mkdir(parents=True, exist_ok=True)
-
-        # Write file
-        try:
-            md_path.write_text(content, encoding='utf-8')
-        except Exception as e:
-            raise InvalidSchemaFormatError(f"Failed to write schema file: {e}")
-
-    def _generate_markdown(
-        self,
-        schema: Dict[str, Any],
-        frontmatter: Optional[Dict[str, Any]] = None
-    ) -> str:
-        """
-        Generate markdown from schema.
-
-        Args:
-            schema: JSON schema dictionary
-            frontmatter: Optional frontmatter metadata
-
-        Returns:
-            Markdown content as string
-        """
-        # Extract metadata from schema
-        title = schema.get('title', 'Untitled Schema')
-        version = schema.get('version', '1.0.0')
-        description = schema.get('description', '')
-        schema_id = schema.get('$id', '')
-
-        # Build frontmatter
-        if frontmatter is None:
-            frontmatter = {}
-
-        # Set defaults
-        if 'schema-id' not in frontmatter and schema_id:
-            frontmatter['schema-id'] = schema_id
-        if 'version' not in frontmatter:
-            frontmatter['version'] = version
-        if 'status' not in frontmatter:
-            frontmatter['status'] = 'draft'
-
-        # Generate frontmatter YAML
-        frontmatter_yaml = yaml.dump(
-            frontmatter,
-            default_flow_style=False,
-            allow_unicode=True
-        ).strip()
-
-        # Generate JSON (pretty-printed)
-        schema_json = json.dumps(schema, indent=2, ensure_ascii=False)
-
-        # Build markdown content
-        md_content = f"""---
-{frontmatter_yaml}
---
-
-# {title} v{version}
-
-## Overview
-
-{description}
-
-## Usage
-
-```bash
-markitect validate document.md --schema {Path(frontmatter.get('schema-id', 'schema')).name}
-```
-
-## Schema Definition
-
-```json
-{schema_json}
-```
-
-## Version History
-
-### v{version}
- Initial version
-"""
-
-        return md_content
-
-    def _render_template(
-        self,
-        template: str,
-        schema: Dict[str, Any],
-        frontmatter: Optional[Dict[str, Any]] = None
-    ) -> str:
-        """
-        Render markdown from template.
-
-        Simple template rendering using string formatting.
-        For complex templates, consider using Jinja2 or similar.
-
-        Args:
-            template: Template string
-            schema: JSON schema dictionary
-            frontmatter: Optional frontmatter metadata
-
-        Returns:
-            Rendered markdown content
-        """
-        # Build context for template
-        context = {
-            'title': schema.get('title', 'Untitled'),
-            'version': schema.get('version', '1.0.0'),
-            'description': schema.get('description', ''),
-            'schema_id': schema.get('$id', ''),
-            'schema_json': json.dumps(schema, indent=2, ensure_ascii=False),
-            'frontmatter': frontmatter or {},
-        }
-
-        # Simple template rendering
-        try:
-            return template.format(**context)
-        except KeyError as e:
-            raise InvalidSchemaFormatError(f"Template missing key: {e}")
-
-    def list_json_blocks(self, content: str) -> List[Tuple[int, str]]:
-        """
-        List all JSON code blocks in markdown content.
-
-        Useful for debugging or when multiple JSON blocks exist.
-
-        Args:
-            content: Markdown file content
-
-        Returns:
-            List of (position, json_content) tuples
-
-        Example:
-            >>> loader = MarkdownSchemaLoader()
-            >>> content = Path('schema.md').read_text()
-            >>> blocks = loader.list_json_blocks(content)
-            >>> print(f"Found {len(blocks)} JSON blocks")
-        """
-        blocks = []
-        for match in self.json_code_block_pattern.finditer(content):
-            blocks.append((match.start(), match.group(1)))
-        return blocks
-
-    def validate_schema_structure(self, schema: Dict[str, Any]) -> List[str]:
-        """
-        Validate basic schema structure.
-
-        Checks for required JSON Schema fields and MarkiTect conventions.
-
-        Args:
-            schema: JSON schema dictionary
-
-        Returns:
-            List of warning/error messages (empty if valid)
-
-        Example:
-            >>> loader = MarkdownSchemaLoader()
-            >>> issues = loader.validate_schema_structure(schema)
-            >>> if issues:
-            ...     print("Schema issues:", issues)
-        """
-        issues = []
-
-        # Check required JSON Schema fields
-        if '$schema' not in schema:
-            issues.append("Missing required field: $schema")
-
-        if 'type' not in schema:
-            issues.append("Missing recommended field: type")
-
-        if 'title' not in schema:
-            issues.append("Missing recommended field: title")
-
-        if 'description' not in schema:
-            issues.append("Missing recommended field: description")
-
-        # Check MarkiTect conventions
-        if 'version' not in schema:
-            issues.append("Missing MarkiTect convention: version field")
-
-        if '$id' not in schema:
-            issues.append("Missing recommended field: $id")
-
-        # Check $id format if present
-        if '$id' in schema:
-            schema_id = schema['$id']
-            if not isinstance(schema_id, str):
-                issues.append("$id must be a string")
-            elif not schema_id.startswith('https://'):
-                issues.append("$id should be a full HTTPS URL")
-
-        return issues
-
-
-def auto_ingest_schemas(db_manager=None, schema_dir: Optional[Path] = None, verbose: bool = False) -> Dict[str, Any]:
-    """Automatically ingest schemas from markitect/schemas/ directory.
-
-    This function scans the schemas directory for .md schema files and ingests
-    any that are not already in the database. Useful for post-install setup
-    or automatic schema registration.
-
-    Args:
-        db_manager: DatabaseManager instance (optional, will create if not provided)
-        schema_dir: Directory containing schemas (defaults to markitect/schemas/)
-        verbose: If True, print detailed progress messages
-
-    Returns:
-        Dictionary with ingestion results:
-        {
-            'ingested': [list of schema names that were ingested],
-            'skipped': [list of schema names that were already present],
-            'failed': [list of (schema_name, error) tuples for failures]
-        }
-
-    Example:
-        >>> from markitect.schema_loader import auto_ingest_schemas
-        >>> results = auto_ingest_schemas(verbose=True)
-        >>> print(f"Ingested {len(results['ingested'])} schemas")
-    """
-    # Determine schema directory
-    if schema_dir is None:
-        schema_dir = Path(__file__).parent / "schemas"
-
-    if not schema_dir.exists():
-        if verbose:
-            print(f"⚠️  Schema directory not found: {schema_dir}")
-        return {'ingested': [], 'skipped': [], 'failed': []}
-
-    # Initialize database manager if not provided
-    if db_manager is None:
-        from .database import DatabaseManager
-        db_path = Path.home() / '.markitect' / 'markitect.db'
-        db_manager = DatabaseManager(str(db_path))
-        db_manager.initialize_database()
-
-    # Get list of already ingested schemas
-    try:
-        existing_schemas = {schema['name'] for schema in db_manager.list_schemas()}
-    except Exception as e:
-        if verbose:
-            print(f"❌ Error listing existing schemas: {e}")
-        return {'ingested': [], 'skipped': [], 'failed': []}
-
-    results = {
-        'ingested': [],
-        'skipped': [],
-        'failed': []
-    }
-
-    # Find all schema files
-    schema_files = list(schema_dir.glob("*-schema-v*.md"))
-
-    if verbose and schema_files:
-        print(f"🔍 Found {len(schema_files)} schema file(s) in {schema_dir}")
-
-    loader = MarkdownSchemaLoader()
-
-    for schema_file in sorted(schema_files):
-        schema_name = schema_file.name
-
-        # Skip if already ingested
-        if schema_name in existing_schemas:
-            results['skipped'].append(schema_name)
-            if verbose:
-                print(f"⏭️  Skipping {schema_name} (already ingested)")
-            continue
-
-        # Try to ingest
-        try:
-            # Load schema
-            schema_data_full = loader.load_schema(schema_file)
-            schema_data = schema_data_full['schema']
-
-            # Store in database
-            schema_content = json.dumps(schema_data, indent=2)
-            record_id = db_manager.store_schema_file(schema_name, schema_content)
-
-            if record_id:
-                results['ingested'].append(schema_name)
-                if verbose:
-                    title = schema_data.get('title', schema_name)
-                    print(f"✅ Ingested {schema_name} (title: {title})")
-            else:
-                results['failed'].append((schema_name, "Failed to store in database"))
-                if verbose:
-                    print(f"❌ Failed to store {schema_name} in database")
-
-        except Exception as e:
-            results['failed'].append((schema_name, str(e)))
-            if verbose:
-                print(f"❌ Failed to ingest {schema_name}: {e}")
-
-    if verbose:
-        print(f"\n📊 Auto-ingestion complete:")
-        print(f"   Ingested: {len(results['ingested'])}")
-        print(f"   Skipped: {len(results['skipped'])}")
-        print(f"   Failed: {len(results['failed'])}")
-
-    return results
+# Re-export from schema package for backward compatibility
+from markitect.schema.loader import (
+    MarkdownSchemaLoader,
+    SchemaLoaderError,
+    InvalidSchemaFormatError,
+    SchemaNotFoundError,
+    auto_ingest_schemas,
+)
+
+__all__ = [
+    'MarkdownSchemaLoader',
+    'SchemaLoaderError',
+    'InvalidSchemaFormatError',
+    'SchemaNotFoundError',
+    'auto_ingest_schemas',
+]