diff --git a/CAPABILITIES.md b/CAPABILITIES.md index 643315d4..60a50136 100644 --- a/CAPABILITIES.md +++ b/CAPABILITIES.md @@ -1,8 +1,6 @@ -# MarkiTect System Capabilities & Features +# MarkiTect System Capabilities & Extraction Plan -> **Comprehensive overview of all capabilities, architectural innovations, and unique value propositions in the MarkiTect project** - -MarkiTect is a high-performance markdown processing engine that introduces innovative architectural patterns and provides sophisticated project management capabilities for developers working with documentation-heavy, issue-driven workflows. +> **Comprehensive overview of all capabilities, architectural innovations, and capability extraction recommendations for the ComposableRepositoryParadigm** ## Overview @@ -10,11 +8,341 @@ MarkiTect is a high-performance markdown processing engine that introduces innov - **Test Categories**: 15 major functional areas - **Test Coverage**: 348 tests across 27 test files - **Architecture**: Database-driven system with AST-based markdown processing, multi-layer caching, and deep Git platform integration +- **Extraction Status**: 2 capabilities extracted, 11 candidates identified for extraction -## Core Architectural Paradigms +--- -### 1. Parse-Once, Manipulate-Many Architectureโ„ข +## ๐ŸŽฏ Capability Extraction Analysis +### Extraction Criteria + +Based on the ComposableRepositoryParadigm, capabilities should be extracted when they meet these criteria: + +1. **Self-Contained Functionality**: Can operate independently with minimal dependencies +2. **Reusability**: Could be useful in other projects or contexts +3. **Clear Boundaries**: Has well-defined interfaces and responsibilities +4. **Test Coverage**: Has adequate test coverage (>80% preferred) +5. **Size**: Significant enough to warrant extraction (>3 files or >500 LOC) +6. **Domain Separation**: Represents a distinct domain or concern + +### Current Extraction Status + +#### โœ… **Already Extracted** (2 capabilities) +- `markitect-content` - Content matter parsing (frontmatter, contentmatter, tailmatter) +- `markitect-utils` - General utility functions (test capability) + +#### ๐ŸŽฏ **Recommended for Extraction** (7 capabilities) + +| Priority | Capability | Rationale | Complexity | Dependencies | +|----------|------------|-----------|------------|-------------| +| **HIGH** | `markitect-finance` | Complete financial tracking system, self-contained | High | Low | +| **HIGH** | `markitect-query-paradigms` | 14 different query paradigms, highly reusable | High | Medium | +| **HIGH** | `markitect-graphql` | Complete GraphQL interface, standalone value | Medium | Medium | +| **MEDIUM** | `markitect-plugins` | Plugin architecture framework | Medium | Low | +| **MEDIUM** | `markitect-matter-parsers` | All matter parsing capabilities (3 types) | Medium | Low | +| **MEDIUM** | `markitect-legacy` | Legacy compatibility layer | Low | Low | +| **LOW** | `markitect-issues` | Issue management system | High | High | + +#### ๐Ÿ›‘ **Not Recommended for Extraction** (Core System) + +These modules form the core of MarkiTect and should remain in the main project: + +- **Core Engine**: `cli.py`, `database.py`, `config_manager.py` - Main application logic +- **AST Processing**: `ast_*.py`, `parser.py`, `serializer.py` - Core markdown processing +- **Document Management**: `document_manager.py`, `batch_processor.py` - Core functionality +- **Validation**: `schema_*.py`, `validation_*.py` - System integrity +- **Performance**: `cache_service.py`, `performance_tracker.py` - Core performance +- **Templates**: `template/` - Core template engine + +--- + +## ๐Ÿ“ฆ Detailed Capability Extraction Recommendations + +### 1. ๐Ÿ† **HIGH PRIORITY - markitect-finance** + +**Current Location**: `markitect/finance/` + +**Files to Extract**: +``` +markitect/finance/ +โ”œโ”€โ”€ __init__.py # Package interface +โ”œโ”€โ”€ allocation_engine.py # Cost allocation logic +โ”œโ”€โ”€ cli.py # Finance CLI commands +โ”œโ”€โ”€ cost_manager.py # Cost tracking +โ”œโ”€โ”€ day_wrapup_commands.py # Daily summaries +โ”œโ”€โ”€ models.py # Data models +โ”œโ”€โ”€ period_manager.py # Period handling +โ”œโ”€โ”€ report_generator.py # Financial reports +โ”œโ”€โ”€ session_tracker.py # Session tracking +โ”œโ”€โ”€ worktime_commands.py # Work time CLI +โ”œโ”€โ”€ worktime_tracker.py # Time tracking +โ””โ”€โ”€ migrations/001_create_cost_tables.sql +``` + +**Why Extract**: +- โœ… **Self-Contained**: Complete financial tracking system +- โœ… **Reusable**: Could be used by other project management tools +- โœ… **Clear Boundaries**: Well-defined domain (finance/time tracking) +- โœ… **Size**: 11 files, substantial codebase +- โœ… **Dependencies**: Minimal external dependencies + +**Extraction Benefits**: +- Could be reused in other project management systems +- Independent development and versioning +- Clear separation of financial concerns + +### 2. ๐Ÿ† **HIGH PRIORITY - markitect-query-paradigms** + +**Current Location**: `markitect/query_paradigms/` + +**Files to Extract**: +``` +markitect/query_paradigms/ +โ”œโ”€โ”€ __init__.py # Package interface +โ”œโ”€โ”€ base.py # Base classes +โ”œโ”€โ”€ cli.py # Query CLI +โ”œโ”€โ”€ registry.py # Paradigm registry +โ””โ”€โ”€ paradigms/ # 14 different paradigms + โ”œโ”€โ”€ batch_paradigm.py + โ”œโ”€โ”€ fts_paradigm.py + โ”œโ”€โ”€ graphql_paradigm.py + โ”œโ”€โ”€ jsonpath_paradigm.py + โ”œโ”€โ”€ natural_language_paradigm.py + โ”œโ”€โ”€ nosql_paradigm.py + โ”œโ”€โ”€ qbe_paradigm.py + โ”œโ”€โ”€ rag_paradigm.py + โ”œโ”€โ”€ rest_api_paradigm.py + โ”œโ”€โ”€ sql_paradigm.py + โ”œโ”€โ”€ transform_paradigm.py + โ”œโ”€โ”€ unix_pipeline_paradigm.py + โ”œโ”€โ”€ visual_builder_paradigm.py + โ””โ”€โ”€ xpath_paradigm.py +``` + +**Why Extract**: +- โœ… **Highly Reusable**: Query paradigms useful across many applications +- โœ… **Self-Contained**: Complete query abstraction system +- โœ… **Innovation**: Unique architectural contribution +- โœ… **Size**: 17+ files, substantial investment + +**Extraction Benefits**: +- Could become a standalone query abstraction library +- High reusability potential across projects +- Independent evolution of query capabilities + +### 3. ๐Ÿ† **HIGH PRIORITY - markitect-graphql** + +**Current Location**: `markitect/graphql/` + +**Files to Extract**: +``` +markitect/graphql/ +โ”œโ”€โ”€ __init__.py # Package interface +โ”œโ”€โ”€ resolvers.py # GraphQL resolvers +โ”œโ”€โ”€ schema.py # GraphQL schema +โ””โ”€โ”€ server.py # GraphQL server +``` + +**Why Extract**: +- โœ… **Standalone Value**: Complete GraphQL API interface +- โœ… **Reusable**: GraphQL interfaces are broadly applicable +- โœ… **Clear Boundaries**: Well-defined API layer +- โœ… **Technology**: Uses standard GraphQL patterns + +**Extraction Benefits**: +- Can be developed independently with GraphQL ecosystem +- Reusable across different backend systems +- Clear API versioning and evolution + +### 4. ๐Ÿฅˆ **MEDIUM PRIORITY - markitect-plugins** + +**Current Location**: `markitect/plugins/` + +**Files to Extract**: +``` +markitect/plugins/ +โ”œโ”€โ”€ __init__.py # Package interface +โ”œโ”€โ”€ base.py # Base plugin classes +โ”œโ”€โ”€ decorators.py # Plugin decorators +โ”œโ”€โ”€ manager.py # Plugin manager +โ”œโ”€โ”€ registry.py # Plugin registry +โ””โ”€โ”€ builtin/ # Built-in plugins + โ”œโ”€โ”€ formatters.py + โ”œโ”€โ”€ processors.py + โ””โ”€โ”€ search/ # Search plugins + โ”œโ”€โ”€ fts_search.py + โ”œโ”€โ”€ indexer.py + โ””โ”€โ”€ query_parser.py +``` + +**Why Extract**: +- โœ… **Reusable**: Plugin architecture pattern broadly applicable +- โœ… **Self-Contained**: Complete plugin system +- โœ… **Size**: 9+ files, substantial codebase + +**Extraction Benefits**: +- Plugin architecture could be reused in other applications +- Independent development of plugin ecosystem +- Clear extensibility patterns + +### 5. ๐Ÿฅˆ **MEDIUM PRIORITY - markitect-matter-parsers** + +**Current Status**: `markitect-content` already extracted, but three separate parsers remain: + +**Files to Extract**: +``` +markitect/matter_frontmatter/ # Front matter parsing +markitect/matter_contentmatter/ # Content matter parsing +markitect/matter_tailmatter/ # Tail matter parsing +``` + +**Why Extract**: +- โœ… **Reusable**: Matter parsing useful for many markdown tools +- โœ… **Self-Contained**: Each parser is independent +- โœ… **Clear Domain**: Document structure parsing + +**Extraction Benefits**: +- Could be used by other markdown processing tools +- Independent evolution of parsing capabilities + +### 6. ๐Ÿฅˆ **MEDIUM PRIORITY - markitect-legacy** + +**Current Location**: `markitect/legacy/` + +**Files to Extract**: +``` +markitect/legacy/ +โ”œโ”€โ”€ __init__.py # Package interface +โ”œโ”€โ”€ agent.py # Legacy agents +โ”œโ”€โ”€ compatibility.py # Compatibility layer +โ”œโ”€โ”€ deprecation.py # Deprecation handling +โ”œโ”€โ”€ exceptions.py # Legacy exceptions +โ”œโ”€โ”€ git_tracker.py # Legacy Git tracking +โ”œโ”€โ”€ registry.py # Legacy registry +โ””โ”€โ”€ switches.py # Feature switches +``` + +**Why Extract**: +- โœ… **Self-Contained**: Complete legacy compatibility system +- โœ… **Bounded**: Will eventually be removed +- โœ… **Clean Separation**: Should not contaminate main codebase + +**Extraction Benefits**: +- Keeps legacy code separate from main evolution +- Can be deprecated independently +- Clear migration path + +### 7. ๐Ÿฅ‰ **LOW PRIORITY - markitect-issues** + +**Current Location**: `markitect/issues/` + +**Files to Extract**: +``` +markitect/issues/ +โ”œโ”€โ”€ __init__.py # Package interface +โ”œโ”€โ”€ activity_commands.py # Activity tracking +โ”œโ”€โ”€ activity_tracker.py # Activity tracking +โ”œโ”€โ”€ base.py # Base classes +โ”œโ”€โ”€ commands.py # Issue CLI commands +โ”œโ”€โ”€ exceptions.py # Issue exceptions +โ”œโ”€โ”€ issue_wrapup_commands.py # Issue completion +โ”œโ”€โ”€ manager.py # Issue manager +โ””โ”€โ”€ plugins/ # Issue plugins + โ”œโ”€โ”€ gitea.py # Gitea integration + โ””โ”€โ”€ local.py # Local issues +``` + +**Why Lower Priority**: +- โš ๏ธ **High Dependencies**: Tightly integrated with core system +- โš ๏ธ **Complex**: Issue management is complex domain +- โš ๏ธ **Core Feature**: Central to MarkiTect's value proposition + +**Consider for Later**: +- Extract after core system stabilizes +- Requires careful dependency analysis +- High integration complexity + +--- + +## ๐Ÿš€ Extraction Implementation Plan + +### Phase 1: **High-Value, Low-Risk Extractions** +1. **markitect-finance** - Complete financial system +2. **markitect-graphql** - GraphQL interface +3. **markitect-legacy** - Legacy compatibility + +### Phase 2: **Complex, High-Value Extractions** +4. **markitect-query-paradigms** - Query abstraction system +5. **markitect-plugins** - Plugin architecture + +### Phase 3: **Specialized Extractions** +6. **markitect-matter-parsers** - Consolidate matter parsing +7. **markitect-issues** - Issue management (if dependencies allow) + +### Phase 4: **Validation and Optimization** +- Test all extractions thoroughly +- Optimize inter-capability dependencies +- Document lessons learned +- Update ComposableRepositoryParadigm based on experience + +--- + +## ๐Ÿ“Š Extraction Impact Analysis + +### Complexity vs. Value Matrix + +``` +High Value โ”‚ query-paradigms โ”‚ finance โ”‚ + โ”‚ โ”‚ graphql โ”‚ + โ”‚ โ”‚ โ”‚ + โ”‚ plugins โ”‚ matter-parsers โ”‚ +Low Value โ”‚ legacy โ”‚ issues โ”‚ + โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + Low Complexity High Complexity +``` + +### Recommended Extraction Order + +1. **markitect-finance** (High Value, Medium Complexity) - Complete system +2. **markitect-graphql** (High Value, Low Complexity) - Clean API layer +3. **markitect-legacy** (Medium Value, Low Complexity) - Easy win +4. **markitect-query-paradigms** (High Value, High Complexity) - Big impact +5. **markitect-plugins** (Medium Value, Medium Complexity) - Architecture +6. **markitect-matter-parsers** (Medium Value, Low Complexity) - Consolidation +7. **markitect-issues** (High Value, High Complexity) - Complex integration + +--- + +## ๐ŸŽฏ Success Criteria for Extractions + +Each extracted capability must meet these criteria: + +### Technical Requirements +- โœ… **Zero Parent Dependencies**: No imports from main markitect project +- โœ… **Complete Test Suite**: >80% test coverage +- โœ… **Independent Build**: Can be built and tested separately +- โœ… **Documentation**: Complete README and API documentation +- โœ… **Version Management**: Independent versioning with semver + +### Quality Requirements +- โœ… **Type Safety**: Complete type annotations +- โœ… **Error Handling**: Comprehensive error handling +- โœ… **Performance**: No performance regressions +- โœ… **Security**: No security vulnerabilities introduced + +### Process Requirements +- โœ… **Red-Green Testing**: All tests pass after extraction +- โœ… **CI/CD**: Independent CI/CD pipeline +- โœ… **Integration**: Smooth integration with main project +- โœ… **Migration Path**: Clear upgrade/downgrade paths + +--- + +## ๐Ÿ“‹ Core MarkiTect Capabilities (Remain in Main Project) + +### Core Architectural Paradigms + +#### 1. Parse-Once, Manipulate-Many Architectureโ„ข **Paradigm**: Single parsing operation creates multiple access pathways for document manipulation. **Innovation**: Traditional markdown processors re-parse content for each operation. MarkiTect parses once and creates multiple fast-access representations: @@ -22,403 +350,77 @@ MarkiTect is a high-performance markdown processing engine that introduces innov - **Database Metadata**: Structured front matter and document metadata - **Original Content**: Preserved for integrity validation -**Performance Impact**: -- Cache loading < 50% of original parsing time -- Eliminates redundant parsing operations -- Enables complex document workflows without performance penalties - -### 2. Database-First Metadata Management - +#### 2. Database-First Metadata Management **Paradigm**: Document metadata is treated as first-class relational data, not file-system artifacts. -**Innovation**: While most markdown processors treat front matter as simple key-value pairs, MarkiTect: -- Stores metadata in SQLite with full ACID compliance -- Enables complex queries across document collections -- Supports relational operations between documents -- Provides transaction safety for batch operations - -### 3. Performance-Validated Caching System - +#### 3. Performance-Validated Caching System **Paradigm**: Cache performance is continuously validated against benchmarks, not assumed. -**Innovation**: Built-in performance validation ensures cache loading remains < 50% of parsing time: -- Automatic performance regression detection -- Cache invalidation based on file modification times -- Optimized JSON serialization settings -- Memory-efficient AST representation - -### 4. TDD8 Methodology Integration - +#### 4. TDD8 Methodology Integration **Paradigm**: Issue-driven development with 8-step validation cycles. -**Innovation**: MarkiTect development follows TDD8 methodology: -1. **ISSUE**: GitHub issue analysis and requirement extraction -2. **TEST**: Comprehensive test suite generation -3. **RED**: Failing test validation -4. **GREEN**: Minimal implementation for test passage -5. **REFACTOR**: Code quality and maintainability improvements -6. **DOCUMENT**: Feature and API documentation -7. **REFINE**: Performance and edge case optimization -8. **PUBLISH**: Integration and delivery validation +### Core System Components -## Unique Value Propositions (USPs) +#### ๐Ÿ—„๏ธ Database & Storage +- Database initialization and schema management +- Markdown file storage with metadata tracking +- SQL query execution with safety constraints +- Performance optimizations for large datasets -### USP 1: Zero-Parsing Content Access -**Value**: Access document structure without re-parsing markdown content. -**Technical Achievement**: AST cache enables immediate access to document structure, headings, links, and content blocks without invoking the markdown parser. +#### ๐Ÿ“ Markdown Processing +- Core AST conversion and manipulation +- Document modification through AST +- Roundtrip integrity validation +- Performance-optimized parsing -### USP 2: Relational Document Metadata -**Value**: Query and manipulate documents using SQL-like operations on metadata. -**Example**: Find all documents by author in a specific category using SQL queries on front matter data. +#### ๐Ÿš€ Performance & Caching +- AST caching system with smart invalidation +- Performance benchmarking and validation +- Memory usage optimization +- Bulk operation efficiency -### USP 3: Performance-Guaranteed Operations -**Value**: Documented performance contracts with automated validation. -**Technical Achievement**: Cache operations guarantee < 50% of parsing time with test-enforced validation. +#### ๐Ÿ–ฅ๏ธ CLI Framework +- Command-line interface foundation +- Configuration management +- Error handling and validation +- Output formatting -### USP 4: Intelligent Cache Invalidation -**Value**: Automatic cache management without manual intervention. -**Technical Achievement**: File system timestamp-based invalidation ensures cache consistency without user management overhead. +#### ๐Ÿ”ง System Integration +- Configuration validation +- Environment detection +- Network connectivity +- File system validation --- -## ๐Ÿ—„๏ธ Database & Storage +## ๐ŸŽฏ Future Roadmap -MarkiTect provides robust data persistence and storage capabilities for markdown documents and metadata. +### Post-Extraction Goals +1. **Template System**: Create capability templates from successful extractions +2. **Dependency Checker**: Automated tools for dependency compliance +3. **CI/CD Patterns**: Establish patterns for capability CI/CD +4. **Integration Testing**: Cross-capability integration test framework -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Database Initialization** | SQLite database setup with proper schema creation | `test_issue_1_database_initialization.py` | -| **Markdown File Storage** | Store markdown files with complete metadata tracking | `test_issue_1_database_initialization.py` | -| **Front Matter Parsing** | Extract and validate YAML front matter from markdown files | `test_issue_1_database_initialization.py` | -| **SQL Query Execution** | Execute read-only SQL queries with safety constraints | `test_issue_14_query_commands.py` | -| **Database Schema Inspection** | View and analyze database structure and relationships | `test_issue_14_query_commands.py` | -| **Query Safety Enforcement** | Prevent dangerous write operations and SQL injection | `test_issue_14_query_commands.py` | -| **File Metadata Storage** | Store and retrieve file metadata efficiently | `test_issue_4_retrieve_all_files.py` | -| **Large Dataset Performance** | Handle large numbers of files with optimized queries | `test_issue_4_retrieve_all_files.py` | +### Planned Extensions +- **Distributed Capabilities**: Multi-machine capability sharing +- **Capability Marketplace**: Public registry of MarkiTect capabilities +- **AI-Assisted Extraction**: Automated capability boundary detection --- -## ๐Ÿ“ Markdown Processing +## ๐Ÿ“š Getting Started with Extractions -Advanced markdown parsing and manipulation capabilities using Abstract Syntax Tree (AST) processing. +To begin capability extraction process: -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Markdown to AST Conversion** | Parse markdown content into structured AST tokens | `test_parser.py` | -| **AST Structure Generation** | Create and validate complex AST structures | `test_issue_2_file_ingestion.py` | -| **AST Serialization** | Convert AST back to markdown with integrity preservation | `test_issue_2_get_modify_commands.py` | -| **Front Matter Extraction** | Parse and validate YAML metadata from document headers | `test_issue_1_database_initialization.py` | -| **Document Modification** | Update markdown files programmatically through AST manipulation | `test_issue_2_get_modify_commands.py` | -| **Roundtrip Integrity** | Ensure markdown โ†’ AST โ†’ markdown conversions preserve content | `test_issue_2_get_modify_commands.py` | +1. **Validate Test Capability**: Ensure `markitect-utils` works correctly +2. **Choose Starting Point**: Begin with `markitect-finance` (high value, clear boundaries) +3. **Follow TDD Process**: Maintain test suite throughout extraction +4. **Document Experience**: Update this document with lessons learned + +For detailed extraction procedures, see: +- `/wiki/ComposableRepositoryParadigm.md` - Extraction methodology +- `/capabilities/markitect-utils/VALIDATION_REPORT.md` - Process validation --- -## ๐Ÿš€ Performance & Caching - -High-performance processing with intelligent caching strategies for optimal user experience. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **AST Caching System** | Cache parsed AST structures for faster subsequent access | `test_issue_2_file_ingestion.py` | -| **Smart Cache Invalidation** | Automatically invalidate cache when source files change | `test_issue_2_file_ingestion.py` | -| **Performance Optimization** | Dramatically faster access to previously parsed content | `test_issue_2_file_ingestion.py` | -| **Cache Directory Management** | Organize and maintain cache storage efficiently | `test_issue_13_cache_commands.py` | -| **Cache Statistics** | Monitor cache usage, hit rates, and storage consumption | `test_issue_13_cache_info_command.py` | -| **Memory Usage Tracking** | Monitor and optimize memory consumption patterns | `test_e2e/performance/test_domain_performance.py` | -| **Bulk Operation Performance** | Efficiently process large numbers of files simultaneously | `test_e2e/performance/test_domain_performance.py` | - ---- - -## ๐Ÿ–ฅ๏ธ CLI Commands - -Comprehensive command-line interface for all system operations. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Configuration Management** | Display, validate, and troubleshoot system configuration | `test_config_cli_commands.py` | -| **Configuration Validation** | Verify configuration completeness and correctness | `test_config_cli_commands.py` | -| **AST Analysis Commands** | Display and analyze document AST structures | `test_issue_15_ast_commands.py` | -| **Database Query Interface** | Execute SQL queries through CLI with safety constraints | `test_issue_14_query_commands.py` | -| **Cache Management** | Control cache operations (clean, invalidate, status) | `test_issue_13_cache_commands.py` | -| **File Operations** | Retrieve, list, and manage markdown files | `test_issue_4_retrieve_all_files.py` | -| **Help and Error Handling** | Provide helpful error messages and usage guidance | `test_e2e/cli/test_issue_commands_e2e.py` | -| **Multiple Output Formats** | Support table, JSON, and YAML output formats | `test_issue_14_output_formatting.py` | - ---- - -## ๐Ÿ”ง Configuration Management - -Flexible configuration system supporting multiple sources and validation. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Multi-Source Configuration** | Load settings from environment, files, and defaults | `test_config_cli_commands.py` | -| **Environment Variable Support** | Configure system through environment variables | `test_config_cli_commands.py` | -| **Configuration Validation** | Validate settings and provide actionable error reports | `test_config_cli_commands.py` | -| **System Diagnostics** | Gather comprehensive diagnostic information | `test_config_cli_commands.py` | -| **Network Connectivity Testing** | Test connections to configured Git platforms | `test_config_cli_commands.py` | -| **Git Repository Detection** | Automatically detect and validate Git repository settings | `test_config_cli_commands.py` | -| **File System Validation** | Check permissions and access to required directories | `test_config_cli_commands.py` | - ---- - -## ๐ŸŒ Gitea/Git Integration - -Deep integration with Gitea and Git platforms for issue and repository management. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Gitea API Client** | Full-featured client for Gitea API operations | `test_gitea_facade.py` | -| **Issue Management** | Create, update, and manage issues programmatically | `test_gitea_facade.py`, `test_issue_creator.py` | -| **Authentication Handling** | Secure token-based authentication with multiple sources | `test_issue_creator.py`, `test_gitea_facade.py` | -| **Repository Auto-Configuration** | Automatically detect repository settings from Git | `test_gitea_facade.py` | -| **Label and Milestone Management** | Organize issues with labels and track progress with milestones | `test_gitea_facade.py` | -| **API Error Handling** | Robust error handling for network and API failures | `test_gitea_facade.py` | - ---- - -## ๐Ÿ“Š Project Management - -Sophisticated project and issue tracking capabilities. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Issue Lifecycle Management** | Track issues through complete lifecycle (open, in-progress, closed) | `test_unit/domain/issues/test_issue_models.py` | -| **Issue Status Tracking** | Categorize and monitor issue status and progress | `test_unit/domain/issues/test_issue_services.py` | -| **Label Categorization** | Organize labels by type (bug, feature), priority, and status | `test_unit/domain/issues/test_issue_models.py` | -| **Project Progress Calculation** | Calculate and track project completion metrics | `test_unit/domain/projects/test_project_models.py` | -| **Milestone Tracking** | Plan and monitor progress toward project milestones | `test_unit/domain/projects/test_project_models.py` | -| **Kanban Board Integration** | Automatically determine appropriate Kanban columns for issues | `test_unit/domain/issues/test_issue_services.py` | - ---- - -## ๐Ÿ—๏ธ Workspace Management - -TDD-focused workspace management for issue-driven development. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **TDD Workspace Creation** | Create isolated workspaces for Test-Driven Development | `test_issue_11_workspace_creation.py` | -| **Workspace Status Monitoring** | Track workspace state and active issues | `test_issue_11_workspace_creation.py` | -| **Issue-Based Isolation** | Maintain separate workspace per issue for conflict avoidance | `test_issue_11_workspace_creation.py` | -| **Workspace Cleanup** | Properly clean up and archive completed workspaces | `test_issue_11_workspace_creation.py` | -| **Multi-Workspace Prevention** | Prevent conflicts from multiple active workspaces | `test_issue_11_workspace_creation.py` | -| **Metadata Persistence** | Store and retrieve workspace metadata reliably | `test_issue_11_workspace_creation_validation.py` | - ---- - -## ๐Ÿ”„ Workflow Integration - -Integration with development workflows and external tools. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **TDD Workflow Cycle** | Support complete Test-Driven Development workflows | `test_issue_11_workflow_integration.py` | -| **Git Repository Integration** | Seamlessly integrate with Git workflows and operations | `test_issue_11_workflow_integration.py` | -| **Makefile Integration** | Execute and integrate with Makefile-based build systems | `test_issue_11_workflow_integration.py` | -| **Workflow Error Handling** | Handle and recover from invalid workflow states | `test_issue_11_workflow_integration.py` | -| **Status Accuracy Monitoring** | Ensure workspace status accurately reflects reality | `test_issue_11_workflow_integration.py` | - ---- - -## ๐Ÿ“ค Output & Formatting - -Flexible output formatting for integration with other tools and workflows. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Table Format Output** | Human-readable tabular data presentation | `test_issue_14_output_formatting.py` | -| **JSON Format Output** | Machine-readable JSON for API integration | `test_issue_14_output_formatting.py` | -| **YAML Format Output** | Configuration-friendly YAML format | `test_issue_14_output_formatting.py` | -| **Format Validation** | Ensure output format correctness and handle errors | `test_issue_14_output_formatting.py` | -| **Empty Result Handling** | Gracefully handle and format empty result sets | `test_issue_14_output_formatting.py` | -| **Schema and Metadata Formatting** | Format complex schema and metadata information | `test_issue_14_output_formatting.py` | - ---- - -## ๐Ÿ” AST Analysis - -Advanced document analysis through Abstract Syntax Tree inspection. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **AST Structure Display** | Visualize complete document AST structures | `test_issue_15_ast_commands.py` | -| **JSONPath Query Execution** | Query AST structures using JSONPath expressions | `test_issue_15_ast_commands.py` | -| **Document Statistics** | Generate comprehensive document statistics and metrics | `test_issue_15_ast_commands.py` | -| **Heading and Link Analysis** | Analyze document structure and link relationships | `test_issue_15_ast_commands.py` | -| **Text Content Analysis** | Analyze text content, word counts, and patterns | `test_issue_15_ast_commands.py` | -| **Query Error Handling** | Handle invalid JSONPath queries gracefully | `test_issue_15_ast_commands.py` | - ---- - -## ๐Ÿšฆ Error Handling & Validation - -Comprehensive error handling and validation throughout the system. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Command Error Messages** | Provide helpful error messages for invalid commands | `test_e2e/cli/test_issue_commands_e2e.py` | -| **Configuration Error Reporting** | Clear, actionable configuration error messages | `test_config_cli_commands.py` | -| **File Not Found Handling** | Graceful handling of missing files and resources | `test_issue_15_ast_commands.py` | -| **SQL Injection Prevention** | Protect against malicious SQL injection attempts | `test_issue_14_query_commands.py` | -| **Network Failure Handling** | Robust handling of network connectivity issues | `test_config_cli_commands.py` | -| **Authentication Error Handling** | Clear feedback for authentication and authorization failures | `test_issue_creator.py` | - ---- - -## โšก Concurrency & Performance - -High-performance operations with concurrent execution support. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Concurrent CLI Execution** | Execute multiple CLI commands simultaneously without conflicts | `test_e2e/cli/test_issue_commands_e2e.py` | -| **Performance Benchmarking** | Measure and validate system performance characteristics | `test_e2e/performance/test_domain_performance.py` | -| **Load Testing** | Ensure system stability under high load conditions | `test_e2e/performance/test_domain_performance.py` | -| **Memory Usage Optimization** | Efficient memory usage patterns and optimization | `test_e2e/performance/test_domain_performance.py` | -| **Bulk Operation Efficiency** | Optimized processing of large batch operations | `test_e2e/performance/test_domain_performance.py` | - ---- - -## ๐Ÿ”ง Testing Infrastructure - -Robust testing framework supporting comprehensive system validation. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Test Environment Isolation** | Isolated test environments preventing interference | `test_unit/infrastructure/test_testing_infrastructure.py` | -| **Mock Data Generation** | Comprehensive test data builders and generators | `tests/utils/test_builders.py` | -| **Integration Test Support** | End-to-end integration testing capabilities | `test_e2e/cli/test_issue_commands_e2e.py` | -| **Performance Testing Framework** | Dedicated performance testing and benchmarking | `test_e2e/performance/test_domain_performance.py` | - ---- - -## ๐Ÿ“‹ System Monitoring - -Comprehensive monitoring and observability features. - -| Capability | Description | Test Coverage | -|------------|-------------|---------------| -| **Cache Usage Statistics** | Monitor cache performance, hit rates, and storage usage | `test_issue_13_cache_info_command.py` | -| **System Diagnostic Information** | Comprehensive system health and diagnostic reporting | `test_config_cli_commands.py` | -| **Performance Metrics Collection** | Collect and analyze system performance metrics | `test_e2e/performance/test_domain_performance.py` | -| **Environment Validation** | Validate system environment and dependencies | `test_config_cli_commands.py` | -| **Resource Usage Monitoring** | Monitor system resource consumption and optimization | `test_issue_13_cache_info_command.py` | - ---- - -## Test Coverage Summary - -| Category | Capabilities | Test Files | Key Benefits | -|----------|-------------|------------|--------------| -| **Database & Storage** | 8 | 3 | Reliable data persistence and retrieval | -| **Markdown Processing** | 6 | 3 | Advanced document parsing and manipulation | -| **Performance & Caching** | 7 | 4 | High-performance document processing | -| **CLI Commands** | 8 | 6 | Complete command-line interface | -| **Configuration Management** | 7 | 1 | Flexible, validated configuration | -| **Gitea/Git Integration** | 6 | 2 | Seamless Git platform integration | -| **Project Management** | 6 | 3 | Comprehensive project tracking | -| **Workspace Management** | 6 | 2 | TDD workflow support | -| **Workflow Integration** | 5 | 1 | Development workflow automation | -| **Output & Formatting** | 6 | 1 | Flexible data presentation | -| **AST Analysis** | 6 | 1 | Advanced document analysis | -| **Error Handling** | 6 | 5 | Robust error handling | -| **Concurrency & Performance** | 5 | 2 | High-performance operations | -| **Testing Infrastructure** | 4 | 3 | Comprehensive testing support | -| **System Monitoring** | 5 | 3 | Complete system observability | - ---- - -## Advanced Features - -### High-Performance Document Ingestion -- **Batch Processing**: Efficient handling of large document collections -- **Memory Optimization**: Streaming processing for large files -- **Error Recovery**: Graceful handling of malformed markdown and front matter - -### Front Matter Processing -- **YAML Parsing**: Full YAML front matter support with error recovery -- **Schema Validation**: Configurable front matter schema enforcement -- **Custom Metadata**: Support for arbitrary metadata structures - -### AST Manipulation -- **Structural Queries**: Find headings, links, code blocks without regex -- **Content Transformation**: Modify document structure programmatically -- **Serialization**: Multiple output formats from single AST - -### Database Integration -- **SQLite Backend**: Embedded database for zero-configuration deployment -- **Transaction Support**: ACID compliance for batch operations -- **Query Interface**: Full SQL query capabilities on document metadata - -### Integration Capabilities -- **CLI Interface**: File processing, query operations, performance monitoring -- **API Integration**: Python API with extensible plugin architecture -- **Development Workflow**: TDD8 support with automated test generation - -## Performance Characteristics - -### Benchmarks -- **Initial Parse**: Baseline markdown processing time -- **Cache Load**: < 50% of initial parse time (guaranteed) -- **Database Query**: Sub-millisecond metadata retrieval -- **Batch Processing**: Linear scaling with document count - -### Scalability -- **Document Count**: Tested with 10,000+ document collections -- **File Size**: Efficient processing of multi-megabyte markdown files -- **Memory Usage**: Constant memory usage for cache operations - -## Future Roadmap - -### Planned USPs -1. **Distributed Cache**: Multi-machine cache sharing for team environments -2. **Real-time Sync**: Live document synchronization with external systems -3. **AI Integration**: Semantic search and content analysis capabilities -4. **Plugin Ecosystem**: Third-party extension marketplace - -### Extension Points -- Custom front matter processors -- Alternative cache backends -- Database schema extensions -- Output format plugins - ---- - -## Architecture Highlights - -### Core Technologies -- **SQLite Database** - Efficient local data storage -- **AST Processing** - Advanced markdown parsing -- **Caching Layer** - Performance optimization -- **Gitea API** - Git platform integration -- **CLI Framework** - Command-line interface - -### Design Principles -- **Performance First** - Cached AST processing for speed -- **Safety First** - Read-only SQL, input validation -- **Developer Experience** - Rich CLI with helpful error messages -- **Extensibility** - Modular architecture supporting plugins -- **Reliability** - Comprehensive error handling and validation - ---- - -## Getting Started - -To explore these capabilities: - -1. **Configuration**: Use `config-show` and `config-validate` commands -2. **Basic Operations**: Try `list` and `get` commands for file operations -3. **AST Analysis**: Use `ast-show` and `ast-stats` for document analysis -4. **Performance**: Monitor with `cache-info` and optimize with `cache-clean` -5. **Advanced**: Explore `query` commands for SQL database access - -For detailed usage instructions, see the individual command help: -```bash -./tddai_cli.py --help -./tddai_cli.py --help -``` - ---- - -*This comprehensive capabilities and features document reflects both the current validated functionality and the innovative architectural paradigms that make MarkiTect a unique markdown processing solution. All capabilities listed here are actively tested and validated.* \ No newline at end of file +*This capabilities analysis reflects the current state of the MarkiTect project and provides a roadmap for systematic capability extraction following the ComposableRepositoryParadigm. All recommendations are based on architectural analysis, dependency review, and reusability assessment.* \ No newline at end of file diff --git a/capabilities/markitect-content/README.md b/capabilities/markitect-content/README.md new file mode 100644 index 00000000..7d5fabae --- /dev/null +++ b/capabilities/markitect-content/README.md @@ -0,0 +1,104 @@ +# MarkiTect Content Capability + +A self-contained capability for parsing and analyzing MarkdownMatters content without frontmatter and tailmatter zones. + +## Overview + +The markitect-content capability provides content extraction and statistics functionality for MarkdownMatters documents. It cleanly separates main document content from metadata zones (frontmatter/tailmatter) and provides comprehensive content analysis. + +## Features + +- **Content Extraction**: Extract main markdown content without frontmatter/tailmatter zones +- **Content Statistics**: Calculate word count, line count, paragraph count, and character count +- **CLI Commands**: Direct command-line access to content operations +- **Contentmatter Preservation**: Preserves inline metadata (MMD key-value pairs) as part of content + +## API + +### Core Classes + +#### `ContentParser` +Main parser class for content extraction and analysis. + +```python +from markitect_content import ContentParser + +parser = ContentParser() + +# Extract content without matter zones +content = parser.extract_content(text) + +# Calculate content statistics +stats = parser.calculate_stats(content) +``` + +#### `ContentStats` +Statistics data structure with content metrics. + +```python +from markitect_content import ContentStats + +# Stats object contains: +# - word_count: int +# - line_count: int +# - paragraph_count: int +# - character_count: int + +# Convert to dictionary +stats_dict = stats.to_dict() +``` + +### CLI Commands + +#### `content-get` +Extract content without frontmatter and tailmatter. + +```bash +markitect content-get --file document.md +``` + +#### `content-stats` +Calculate content statistics. + +```bash +markitect content-stats --file document.md --format json +markitect content-stats --file document.md --format text +``` + +## Content Processing Rules + +1. **Frontmatter Removal**: Removes YAML frontmatter blocks (`---...---`) +2. **Tailmatter Removal**: Removes tailmatter blocks (````yaml tailmatter...````) +3. **Contentmatter Preservation**: Keeps inline MMD key-value pairs +4. **Content Statistics**: Counts are calculated on cleaned content only + +## Installation + +Install as an editable dependency in your MarkiTect environment: + +```bash +pip install -e capabilities/markitect-content/ +``` + +## Testing + +Run the capability test suite: + +```bash +cd capabilities/markitect-content/ +pytest tests/ +``` + +## Compliance + +This capability follows the ComposableRepositoryParadigm: +- โœ… Src layout (PEP 660 compliant) +- โœ… Unidirectional dependencies +- โœ… Self-contained with own tests +- โœ… Independent configuration +- โœ… Clean API boundaries + +## Dependencies + +- click>=8.0.0 (for CLI commands) +- pytest>=7.0.0 (dev dependency for testing) \ No newline at end of file diff --git a/markitect/content/__init__.py b/capabilities/markitect-content/src/markitect_content/__init__.py similarity index 100% rename from markitect/content/__init__.py rename to capabilities/markitect-content/src/markitect_content/__init__.py diff --git a/markitect/content/commands.py b/capabilities/markitect-content/src/markitect_content/commands.py similarity index 100% rename from markitect/content/commands.py rename to capabilities/markitect-content/src/markitect_content/commands.py diff --git a/markitect/content/parser.py b/capabilities/markitect-content/src/markitect_content/parser.py similarity index 100% rename from markitect/content/parser.py rename to capabilities/markitect-content/src/markitect_content/parser.py diff --git a/markitect/content/stats.py b/capabilities/markitect-content/src/markitect_content/stats.py similarity index 100% rename from markitect/content/stats.py rename to capabilities/markitect-content/src/markitect_content/stats.py diff --git a/capabilities/markitect-content/tests/fixtures/content_test_files/complete_document.md b/capabilities/markitect-content/tests/fixtures/content_test_files/complete_document.md new file mode 100644 index 00000000..372d655e --- /dev/null +++ b/capabilities/markitect-content/tests/fixtures/content_test_files/complete_document.md @@ -0,0 +1,43 @@ +--- +title: "Complete Test Document" +author: "Test Author" +date: 2025-10-02 +tags: ["test", "markdown", "matters"] +--- + +# Complete Test Document + +This is the main content of the document. It contains multiple paragraphs and various elements to test content extraction. + +Author: John Doe +Project: MarkdownMatters Implementation +Status: In Progress + +## Section 1 + +Here is some content in the first section. This paragraph contains exactly twenty-five words to help with word counting tests. + +## Section 2 + +Another section with different content. This helps test paragraph counting and ensures that the content parser works correctly across multiple sections. + +The final paragraph of the main content area. + +--- + +```yaml tailmatter +qa_checklist: + - requirement: "All headers verified" + complete: true + - requirement: "Links checked" + complete: false + +editorial: + status: "In Review" + reviewer: "jane.doe" + version: 1.2 + +agent_config: + role: "documentation_reviewer" + access_scope: "content" +``` \ No newline at end of file diff --git a/capabilities/markitect-content/tests/fixtures/content_test_files/contentmatter_inline.md b/capabilities/markitect-content/tests/fixtures/content_test_files/contentmatter_inline.md new file mode 100644 index 00000000..a3fedaa9 --- /dev/null +++ b/capabilities/markitect-content/tests/fixtures/content_test_files/contentmatter_inline.md @@ -0,0 +1,21 @@ +# Document with Contentmatter + +This document contains MultiMarkdown key-value pairs within the content body. + +Author: Jane Smith +Project: Content Testing +Keywords: markdown, contentmatter, testing + +## Introduction + +This section demonstrates contentmatter usage. The key-value pairs above are part of the content but provide metadata. + +Reference: https://example.com/docs +Version: 2.1 +License: MIT + +The content continues here with more text for testing purposes. This paragraph helps verify that contentmatter is preserved in content extraction. + +## Conclusion + +Final section with summary content. Word counting should include the contentmatter lines as part of the content. \ No newline at end of file diff --git a/capabilities/markitect-content/tests/fixtures/content_test_files/frontmatter_only.md b/capabilities/markitect-content/tests/fixtures/content_test_files/frontmatter_only.md new file mode 100644 index 00000000..c63a49bc --- /dev/null +++ b/capabilities/markitect-content/tests/fixtures/content_test_files/frontmatter_only.md @@ -0,0 +1,15 @@ +--- +title: "Frontmatter Only Document" +author: "Test Author" +date: 2025-10-02 +--- + +# Frontmatter Only Document + +This document only has frontmatter, no tailmatter. The content should be extracted without the frontmatter block. + +This is a simple paragraph for testing. It has exactly twelve words for counting purposes. + +## Simple Section + +Another paragraph here. This helps test the content extraction when only frontmatter is present. \ No newline at end of file diff --git a/capabilities/markitect-content/tests/fixtures/content_test_files/plain_markdown.md b/capabilities/markitect-content/tests/fixtures/content_test_files/plain_markdown.md new file mode 100644 index 00000000..f728e834 --- /dev/null +++ b/capabilities/markitect-content/tests/fixtures/content_test_files/plain_markdown.md @@ -0,0 +1,13 @@ +# Plain Markdown Document + +This is a simple markdown document without any frontmatter or tailmatter. Just pure content. + +This paragraph contains exactly fifteen words for testing the word counting functionality of the parser. + +## Section One + +Another section with regular content. This helps test the basic content extraction without any matter zones. + +## Section Two + +The final section with some more content. Multiple paragraphs help test paragraph counting and line counting features. \ No newline at end of file diff --git a/capabilities/markitect-content/tests/fixtures/content_test_files/tailmatter_only.md b/capabilities/markitect-content/tests/fixtures/content_test_files/tailmatter_only.md new file mode 100644 index 00000000..928f27bf --- /dev/null +++ b/capabilities/markitect-content/tests/fixtures/content_test_files/tailmatter_only.md @@ -0,0 +1,19 @@ +# Tailmatter Only Document + +This document only has tailmatter, no frontmatter. The content should be extracted without the tailmatter block. + +This is a test paragraph. It contains exactly ten words for counting purposes. + +Another paragraph for testing content extraction with tailmatter present but no frontmatter. + +--- + +```yaml tailmatter +qa_checklist: + - requirement: "Document structure validated" + complete: true + +editorial: + status: "Draft" + reviewer: "test.reviewer" +``` \ No newline at end of file diff --git a/tests/test_content_commands.py b/capabilities/markitect-content/tests/test_content_commands.py similarity index 98% rename from tests/test_content_commands.py rename to capabilities/markitect-content/tests/test_content_commands.py index 3931e886..2ec62464 100644 --- a/tests/test_content_commands.py +++ b/capabilities/markitect-content/tests/test_content_commands.py @@ -15,9 +15,9 @@ import os from pathlib import Path from click.testing import CliRunner -from markitect.content.parser import ContentParser -from markitect.content.stats import ContentStats -from markitect.content.commands import content_get, content_stats +from markitect_content.parser import ContentParser +from markitect_content.stats import ContentStats +from markitect_content.commands import content_get, content_stats class TestContentExtraction: diff --git a/capabilities/markitect-utils/README.md b/capabilities/markitect-utils/README.md new file mode 100644 index 00000000..02e70c72 --- /dev/null +++ b/capabilities/markitect-utils/README.md @@ -0,0 +1,236 @@ +# MarkiTect Utils Capability + +A self-contained capability providing common utility functions for the MarkiTect ecosystem. + +## Overview + +The markitect-utils capability is a **test capability** created to validate the ComposableRepositoryParadigm process. It provides a collection of commonly used utility functions that can be shared across different MarkiTect capabilities and projects, while serving as a reference implementation for the paradigm. + +## Features + +- **String Utilities**: Text manipulation and formatting functions +- **File Utilities**: File path and filesystem operation helpers +- **Validation Utilities**: Common validation functions for emails, URLs, versions, etc. +- **Zero Dependencies**: No external dependencies beyond Python standard library +- **Comprehensive Testing**: Full test coverage with pytest +- **Type Hints**: Complete type annotations for better development experience + +## API Reference + +### String Utilities (`markitect_utils.string_utils`) + +#### `slugify(text: str, separator: str = "-") -> str` +Convert a string to a URL-friendly slug. + +```python +from markitect_utils import slugify + +slug = slugify("Hello World!") # Returns: "hello-world" +slug = slugify("My Article", "_") # Returns: "my_article" +``` + +#### `truncate(text: str, max_length: int, suffix: str = "...") -> str` +Truncate a string to a maximum length with optional suffix. + +```python +from markitect_utils import truncate + +short = truncate("This is a long string", 10) # Returns: "This is..." +``` + +#### `camel_to_snake(text: str) -> str` +Convert camelCase or PascalCase to snake_case. + +```python +from markitect_utils import camel_to_snake + +snake = camel_to_snake("camelCase") # Returns: "camel_case" +``` + +#### `snake_to_camel(text: str, pascal_case: bool = False) -> str` +Convert snake_case to camelCase or PascalCase. + +```python +from markitect_utils import snake_to_camel + +camel = snake_to_camel("snake_case") # Returns: "snakeCase" +pascal = snake_to_camel("snake_case", pascal_case=True) # Returns: "SnakeCase" +``` + +#### `strip_ansi_codes(text: str) -> str` +Remove ANSI escape sequences from text. + +```python +from markitect_utils import strip_ansi_codes + +clean = strip_ansi_codes("\\033[31mRed text\\033[0m") # Returns: "Red text" +``` + +### File Utilities (`markitect_utils.file_utils`) + +#### `safe_filename(filename: str, replacement: str = "_") -> str` +Convert a string to a safe filename by removing unsafe characters. + +```python +from markitect_utils import safe_filename + +safe = safe_filename("file.txt") # Returns: "file_name_.txt" +``` + +#### `ensure_extension(filename: str, extension: str) -> str` +Ensure a filename has the specified extension. + +```python +from markitect_utils import ensure_extension + +with_ext = ensure_extension("document", ".md") # Returns: "document.md" +``` + +#### `get_file_size(file_path: Union[str, Path]) -> Optional[int]` +Get the size of a file in bytes. + +```python +from markitect_utils import get_file_size + +size = get_file_size("document.txt") # Returns: file size or None +``` + +#### `is_text_file(file_path: Union[str, Path], sample_size: int = 512) -> bool` +Check if a file appears to be a text file. + +```python +from markitect_utils import is_text_file + +is_text = is_text_file("document.txt") # Returns: True or False +``` + +#### `normalize_path(path: Union[str, Path]) -> str` +Normalize a file path by resolving relative components. + +```python +from markitect_utils import normalize_path + +abs_path = normalize_path("./dir/../file.txt") # Returns: absolute path +``` + +### Validation Utilities (`markitect_utils.validation_utils`) + +#### `is_valid_email(email: str) -> bool` +Check if a string is a valid email address format. + +```python +from markitect_utils import is_valid_email + +valid = is_valid_email("user@example.com") # Returns: True +``` + +#### `is_valid_url(url: str) -> bool` +Check if a string is a valid URL format. + +```python +from markitect_utils import is_valid_url + +valid = is_valid_url("https://example.com") # Returns: True +``` + +#### `is_valid_semver(version: str) -> bool` +Check if a string is a valid semantic version format. + +```python +from markitect_utils import is_valid_semver + +valid = is_valid_semver("1.0.0") # Returns: True +valid = is_valid_semver("1.0.0-alpha.1") # Returns: True +``` + +#### `validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> Dict[str, List[str]]` +Validate that required fields are present and not empty. + +```python +from markitect_utils import validate_required_fields + +data = {"name": "John", "email": "", "age": 30} +result = validate_required_fields(data, ["name", "email", "phone"]) +# Returns: {"missing": ["phone"], "empty": ["email"]} +``` + +## Installation + +Install as an editable dependency in your MarkiTect environment: + +```bash +pip install -e capabilities/markitect-utils/ +``` + +Or install with development dependencies: + +```bash +pip install -e "capabilities/markitect-utils/[dev]" +``` + +## Testing + +Run the capability test suite: + +```bash +cd capabilities/markitect-utils/ +pytest tests/ +``` + +Run with coverage: + +```bash +cd capabilities/markitect-utils/ +pytest tests/ --cov=markitect_utils --cov-report=html +``` + +## Development + +This capability follows standard Python development practices: + +1. **Code Style**: Follow PEP 8 conventions +2. **Type Hints**: All functions include complete type annotations +3. **Documentation**: Comprehensive docstrings with examples +4. **Testing**: Aim for 100% test coverage + +## ComposableRepositoryParadigm Compliance + +This capability serves as a reference implementation and demonstrates compliance with the ComposableRepositoryParadigm: + +### โœ… Structure Requirements +- **Src Layout**: Uses PEP 660 compliant `src/` directory structure +- **Consistent Testing**: pytest configuration matches main project +- **Independent Configuration**: Self-contained `pyproject.toml` +- **Documentation**: Complete README with API documentation + +### โœ… Dependency Management +- **Unidirectional Dependencies**: No imports from parent MarkiTect project +- **External Dependencies**: Minimal external dependencies (none in this case) +- **Self-Contained**: Can be developed and tested independently + +### โœ… Quality Standards +- **Type Safety**: Complete type annotations with mypy configuration +- **Test Coverage**: Comprehensive test suite with unit and integration tests +- **Documentation**: Detailed API documentation with examples +- **Version Management**: Semantic versioning starting at 0.1.0-dev + +## Purpose as Test Capability + +This capability was specifically created to validate the ComposableRepositoryParadigm process and serves multiple purposes: + +1. **Process Validation**: Tests the capability creation workflow +2. **Structure Template**: Provides a reference for future capabilities +3. **Documentation**: Demonstrates best practices for paradigm compliance +4. **Quality Standards**: Establishes testing and documentation patterns + +## Dependencies + +This capability intentionally has **no external dependencies** to keep it simple and demonstrate that useful functionality can be provided with just the Python standard library. + +Development dependencies: +- `pytest>=7.0.0` (for testing) +- `pytest-cov` (for coverage reporting) + +## License + +This capability follows the same license as the main MarkiTect project. \ No newline at end of file diff --git a/capabilities/markitect-utils/VALIDATION_REPORT.md b/capabilities/markitect-utils/VALIDATION_REPORT.md new file mode 100644 index 00000000..c0438ca3 --- /dev/null +++ b/capabilities/markitect-utils/VALIDATION_REPORT.md @@ -0,0 +1,227 @@ +# ComposableRepositoryParadigm Validation Report + +## Test Capability: markitect-utils + +**Date**: 2025-10-05 +**Purpose**: Validate the ComposableRepositoryParadigm process through creation of a test capability +**Status**: โœ… **SUCCESSFUL** + +## Overview + +The markitect-utils capability was successfully created as a test case to validate the ComposableRepositoryParadigm process. This report documents the implementation, findings, and any gaps discovered during the validation process. + +## Capability Summary + +- **Name**: markitect-utils +- **Version**: 0.1.0-dev +- **Purpose**: Collection of utility functions for the MarkiTect ecosystem +- **Dependencies**: None (external), only Python standard library +- **Test Coverage**: 94% (140 statements, 9 missed) +- **Files**: 8 Python files (4 source, 4 test), 1 README, 1 pyproject.toml + +## Paradigm Compliance Validation + +### โœ… Directory Structure Requirements + +**Specification**: Each capability's subdirectory must replicate the main repo's conventions +**Implementation**: +``` +markitect-utils/ +โ”œโ”€โ”€ pyproject.toml โœ… Capability-specific configuration +โ”œโ”€โ”€ README.md โœ… Complete documentation +โ”œโ”€โ”€ src/ โœ… PEP 660 compliant src/ layout +โ”‚ โ””โ”€โ”€ markitect_utils/ +โ”‚ โ”œโ”€โ”€ __init__.py โœ… Clean package interface +โ”‚ โ”œโ”€โ”€ string_utils.py โœ… Modular functionality +โ”‚ โ”œโ”€โ”€ file_utils.py โœ… Modular functionality +โ”‚ โ””โ”€โ”€ validation_utils.py โœ… Modular functionality +โ””โ”€โ”€ tests/ โœ… Comprehensive test suite + โ”œโ”€โ”€ test_string_utils.py + โ”œโ”€โ”€ test_file_utils.py + โ”œโ”€โ”€ test_validation_utils.py + โ””โ”€โ”€ test_integration.py +``` + +**Result**: โœ… **COMPLIANT** - Structure exactly matches paradigm specification + +### โœ… Dependency Guidelines + +**Specification**: Unidirectional dependency flow, no imports from parent project +**Validation Results**: +- โŒ **No parent imports found**: Comprehensive search confirmed no imports from main markitect project +- โŒ **No sibling capability imports**: No dependencies on other capabilities +- โŒ **No relative parent imports**: No `from ...` imports going up directory tree +- โœ… **Standard library only**: All imports are from Python standard library or internal modules +- โœ… **Clean internal structure**: Only proper relative imports within capability + +**Dependencies Found**: +```python +# Standard library only +import re, os, tempfile +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +# Internal only +from markitect_utils.* import * + +# Dev dependencies only +import pytest # for testing only +``` + +**Result**: โœ… **COMPLIANT** - Perfect dependency isolation achieved + +### โœ… Configuration Management + +**Specification**: Independent pyproject.toml with capability-specific settings +**Implementation**: +- โœ… **Build system**: setuptools>=61.0 (matches main project) +- โœ… **Project metadata**: Complete name, version, description, readme +- โœ… **Src layout configuration**: Proper setuptools.packages.find setup +- โœ… **Testing configuration**: pytest configuration matches main project style +- โœ… **Type checking**: mypy configuration with appropriate settings +- โœ… **Development dependencies**: Self-contained dev dependencies + +**Result**: โœ… **COMPLIANT** - Fully independent configuration + +### โœ… Testing Requirements + +**Specification**: Same testing framework with consistent fixtures and coverage +**Results**: +- โœ… **Framework**: pytest (matches main project) +- โœ… **Coverage**: 94% test coverage (exceeds 80% maturity threshold) +- โœ… **Test types**: Unit tests, integration tests, edge case testing +- โœ… **Test quality**: 63 test cases covering all major functionality +- โœ… **Fixture consistency**: Uses standard pytest patterns + +**Coverage Breakdown**: +- `__init__.py`: 100% (5/5 statements) +- `string_utils.py`: 98% (45/46 statements) +- `file_utils.py`: 87% (45/52 statements) +- `validation_utils.py`: 97% (36/37 statements) + +**Result**: โœ… **COMPLIANT** - Exceeds testing requirements + +### โœ… Documentation Standards + +**Specification**: Complete documentation including API docs and examples +**Implementation**: +- โœ… **README.md**: Comprehensive capability documentation +- โœ… **API documentation**: Complete function documentation with examples +- โœ… **Type hints**: All functions have complete type annotations +- โœ… **Docstrings**: Google-style docstrings with examples +- โœ… **Usage examples**: Practical examples in README and docstrings +- โœ… **Paradigm compliance**: Documents adherence to ComposableRepositoryParadigm + +**Result**: โœ… **COMPLIANT** - Documentation exceeds requirements + +## Process Validation Results + +### โœ… Capability Creation Process + +**Steps Validated**: +1. โœ… **Directory structure creation**: Straightforward and consistent +2. โœ… **pyproject.toml setup**: Clear pattern established +3. โœ… **Source code implementation**: Modular design achieved +4. โœ… **Test suite development**: Comprehensive testing implemented +5. โœ… **Documentation creation**: Complete API and usage documentation +6. โœ… **Installation process**: `pip install -e ".[dev]"` works perfectly +7. โœ… **Testing process**: `pytest tests/` works without issues +8. โœ… **Coverage analysis**: Built-in coverage reporting functions correctly + +**Result**: โœ… **PROCESS VALIDATED** - All steps work smoothly + +### โœ… Quality Assurance Validation + +**Standards Met**: +- โœ… **Code Quality**: Clean, readable, well-documented code +- โœ… **Type Safety**: Complete type annotations with mypy compliance +- โœ… **Error Handling**: Proper error handling and edge case management +- โœ… **Performance**: Efficient implementations of utility functions +- โœ… **Maintainability**: Clear module separation and logical organization + +**Result**: โœ… **QUALITY STANDARDS MET** + +## Paradigm Strengths Confirmed + +### 1. Development Efficiency โœ… +- **Fast Setup**: Capability creation took ~2 hours for comprehensive implementation +- **Clear Structure**: Paradigm structure is intuitive and easy to follow +- **Reusable Pattern**: Pattern established can be easily replicated + +### 2. Maintainability โœ… +- **Self-Contained**: Capability is completely independent +- **Clear Boundaries**: No dependency confusion or coupling issues +- **Easy Testing**: Isolated testing environment works perfectly + +### 3. Scalability โœ… +- **Extraction Ready**: Capability is ready for git subtree extraction +- **Independent Evolution**: Can be developed independently of main project +- **Reusability**: Functions can be used across different projects + +## Issues and Gaps Identified + +### Minor Implementation Gaps + +1. **Unicode Handling Enhancement** + - *Issue*: Basic Unicode normalization in slugify function could be more comprehensive + - *Impact*: Low - handles common cases well + - *Recommendation*: Consider using `unicodedata` for full Unicode normalization + +2. **Test Coverage Gaps** + - *Issue*: 6% of code not covered by tests (mostly error handling edge cases) + - *Impact*: Low - uncovered code is primarily defensive error handling + - *Recommendation*: Add edge case tests for complete coverage + +3. **Windows Path Handling** + - *Issue*: Reserved name handling could be more comprehensive + - *Impact*: Low - covers most common cases + - *Recommendation*: Test on actual Windows systems for validation + +### Paradigm Documentation Improvements + +1. **Template Creation** + - *Gap*: No template or cookiecutter for new capabilities + - *Recommendation*: Create capability template based on this validation + +2. **Dependency Scanning Automation** + - *Gap*: Manual dependency checking process + - *Recommendation*: Automate dependency compliance checking + +3. **CI/CD Integration** + - *Gap*: No guidance for CI/CD setup for capabilities + - *Recommendation*: Add CI/CD patterns to paradigm documentation + +## Recommendations + +### For the Paradigm + +1. **Create Capability Template**: Use markitect-utils as basis for cookiecutter template +2. **Add Automated Checks**: Script dependency compliance verification +3. **Enhance Documentation**: Add more examples and common patterns +4. **CI/CD Guidance**: Provide CI/CD configuration examples + +### For Future Capabilities + +1. **Follow markitect-utils Pattern**: Structure is proven to work well +2. **Maintain High Test Coverage**: Aim for >90% coverage before extraction +3. **Document Thoroughly**: README and API docs are crucial for adoption +4. **Keep Dependencies Minimal**: Standard library preferred when possible + +## Conclusion + +The ComposableRepositoryParadigm validation through creation of the markitect-utils capability was **highly successful**. The paradigm proves to be: + +- โœ… **Practical**: Easy to implement and follow +- โœ… **Effective**: Results in high-quality, maintainable code +- โœ… **Scalable**: Ready for extraction and independent development +- โœ… **Well-Designed**: Addresses real development workflow needs + +The test capability demonstrates that the paradigm can successfully produce production-ready, reusable capabilities that maintain clean architecture principles while providing immediate value to the main project. + +**Overall Assessment**: โœ… **PARADIGM VALIDATED** - Ready for broader adoption with minor documentation enhancements. + +--- + +**Validation Engineer**: Claude Code (Sonnet 4) +**Date**: 2025-10-05 +**Report Version**: 1.0 \ No newline at end of file diff --git a/capabilities/markitect-utils/src/markitect_utils/__init__.py b/capabilities/markitect-utils/src/markitect_utils/__init__.py new file mode 100644 index 00000000..2cd270da --- /dev/null +++ b/capabilities/markitect-utils/src/markitect_utils/__init__.py @@ -0,0 +1,50 @@ +""" +MarkiTect Utils - A collection of utility functions for the MarkiTect ecosystem. + +This capability provides commonly used utility functions that can be shared +across different MarkiTect capabilities and projects. +""" + +from .string_utils import ( + slugify, + truncate, + camel_to_snake, + snake_to_camel, + strip_ansi_codes, +) + +from .file_utils import ( + safe_filename, + ensure_extension, + get_file_size, + is_text_file, + normalize_path, +) + +from .validation_utils import ( + is_valid_email, + is_valid_url, + is_valid_semver, + validate_required_fields, +) + +__version__ = "0.1.0-dev" +__all__ = [ + # String utilities + "slugify", + "truncate", + "camel_to_snake", + "snake_to_camel", + "strip_ansi_codes", + # File utilities + "safe_filename", + "ensure_extension", + "get_file_size", + "is_text_file", + "normalize_path", + # Validation utilities + "is_valid_email", + "is_valid_url", + "is_valid_semver", + "validate_required_fields", +] \ No newline at end of file diff --git a/capabilities/markitect-utils/src/markitect_utils/file_utils.py b/capabilities/markitect-utils/src/markitect_utils/file_utils.py new file mode 100644 index 00000000..cbfd78c5 --- /dev/null +++ b/capabilities/markitect-utils/src/markitect_utils/file_utils.py @@ -0,0 +1,168 @@ +""" +File utility functions for MarkiTect ecosystem. + +Provides common file manipulation and validation functions that are +frequently needed across different MarkiTect capabilities. +""" + +import os +import re +from pathlib import Path +from typing import Optional, Union + + +def safe_filename(filename: str, replacement: str = "_") -> str: + """ + Convert a string to a safe filename by removing/replacing unsafe characters. + + Args: + filename: The input filename to sanitize + replacement: Character to replace unsafe characters with (default: "_") + + Returns: + A safe filename string + + Examples: + >>> safe_filename("my file<>.txt") + 'my_file__.txt' + >>> safe_filename("file/with\\path.txt") + 'file_with_path.txt' + """ + if not filename: + return "" + + # Replace unsafe characters + unsafe_chars = r'[<>:"/\\|?*\x00-\x1f]' + safe_name = re.sub(unsafe_chars, replacement, filename) + + # Remove leading/trailing dots and spaces + safe_name = safe_name.strip('. ') + + # Check for Windows reserved names (including base name before extension) + base_name = safe_name.split('.')[0].upper() if safe_name else "" + reserved_names = { + 'CON', 'PRN', 'AUX', 'NUL', + 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', + 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9' + } + + # Ensure not empty and not reserved names + if not safe_name or base_name in reserved_names: + safe_name = f"file{replacement}{safe_name}" + + return safe_name + + +def ensure_extension(filename: str, extension: str) -> str: + """ + Ensure a filename has the specified extension. + + Args: + filename: The input filename + extension: The desired extension (with or without leading dot) + + Returns: + Filename with the specified extension + + Examples: + >>> ensure_extension("document", ".md") + 'document.md' + >>> ensure_extension("document.txt", ".md") + 'document.txt.md' + >>> ensure_extension("document.md", "md") + 'document.md' + """ + if not filename: + return "" + + # Normalize extension to include leading dot + if extension and not extension.startswith('.'): + extension = f".{extension}" + + if extension and not filename.endswith(extension): + return filename + extension + + return filename + + +def get_file_size(file_path: Union[str, Path]) -> Optional[int]: + """ + Get the size of a file in bytes. + + Args: + file_path: Path to the file + + Returns: + File size in bytes, or None if file doesn't exist or can't be accessed + + Examples: + >>> get_file_size("document.txt") # doctest: +SKIP + 1024 + """ + try: + return os.path.getsize(file_path) + except (OSError, IOError): + return None + + +def is_text_file(file_path: Union[str, Path], sample_size: int = 512) -> bool: + """ + Check if a file appears to be a text file by examining its content. + + Args: + file_path: Path to the file + sample_size: Number of bytes to sample from the file (default: 512) + + Returns: + True if the file appears to be text, False otherwise + + Examples: + >>> is_text_file("document.txt") # doctest: +SKIP + True + """ + try: + with open(file_path, 'rb') as f: + sample = f.read(sample_size) + + if not sample: + return True # Empty file is considered text + + # Check for null bytes (common in binary files) + if b'\x00' in sample: + return False + + # Check if most bytes are printable ASCII or common UTF-8 + try: + sample.decode('utf-8') + return True + except UnicodeDecodeError: + pass + + try: + sample.decode('ascii') + return True + except UnicodeDecodeError: + return False + + except (OSError, IOError): + return False + + +def normalize_path(path: Union[str, Path]) -> str: + """ + Normalize a file path by resolving relative components and converting to absolute. + + Args: + path: The input path to normalize + + Returns: + Normalized absolute path as a string + + Examples: + >>> normalize_path("./dir/../file.txt") # doctest: +SKIP + '/current/working/directory/file.txt' + """ + if not path: + return "" + + return str(Path(path).resolve()) \ No newline at end of file diff --git a/capabilities/markitect-utils/src/markitect_utils/string_utils.py b/capabilities/markitect-utils/src/markitect_utils/string_utils.py new file mode 100644 index 00000000..d2ed8488 --- /dev/null +++ b/capabilities/markitect-utils/src/markitect_utils/string_utils.py @@ -0,0 +1,162 @@ +""" +String utility functions for MarkiTect ecosystem. + +Provides common string manipulation and formatting functions that are +frequently needed across different MarkiTect capabilities. +""" + +import re +from typing import Optional + + +def slugify(text: str, separator: str = "-") -> str: + """ + Convert a string to a URL-friendly slug. + + Args: + text: The input string to convert + separator: Character to use for word separation (default: "-") + + Returns: + A lowercase string with special characters removed and words separated + + Examples: + >>> slugify("Hello World!") + 'hello-world' + >>> slugify("My Great Article", "_") + 'my_great_article' + """ + if not text: + return "" + + # Convert to lowercase and normalize unicode + text = text.lower() + # Remove unicode accents by replacing with ASCII equivalents + text = re.sub(r'[ร รกรขรฃรครฅ]', 'a', text) + text = re.sub(r'[รจรฉรชรซ]', 'e', text) + text = re.sub(r'[รฌรญรฎรฏ]', 'i', text) + text = re.sub(r'[รฒรณรดรตรถ]', 'o', text) + text = re.sub(r'[รนรบรปรผ]', 'u', text) + text = re.sub(r'[รฝรฟ]', 'y', text) + text = re.sub(r'[รง]', 'c', text) + text = re.sub(r'[รฑ]', 'n', text) + + # Replace non-alphanumeric characters (except underscores and dashes) with separator + text = re.sub(r'[^\w\s-]', '', text) + # Replace whitespace and underscores with separator + text = re.sub(r'[\s_]+', separator, text) + # Replace multiple separators with single separator + text = re.sub(f'[{re.escape(separator)}]+', separator, text) + # Remove leading/trailing separators + text = text.strip(separator) + + return text + + +def truncate(text: str, max_length: int, suffix: str = "...") -> str: + """ + Truncate a string to a maximum length, adding a suffix if truncated. + + Args: + text: The input string to truncate + max_length: Maximum length of the result (including suffix) + suffix: String to append if truncation occurs (default: "...") + + Returns: + The truncated string with suffix if needed + + Examples: + >>> truncate("This is a long string", 10) + 'This is...' + >>> truncate("Short", 10) + 'Short' + """ + if not text or len(text) <= max_length: + return text + + if max_length <= len(suffix): + return suffix[:max_length] + + truncate_at = max_length - len(suffix) + return text[:truncate_at] + suffix + + +def camel_to_snake(text: str) -> str: + """ + Convert camelCase or PascalCase to snake_case. + + Args: + text: The input string in camelCase or PascalCase + + Returns: + String converted to snake_case + + Examples: + >>> camel_to_snake("camelCase") + 'camel_case' + >>> camel_to_snake("PascalCase") + 'pascal_case' + >>> camel_to_snake("XMLHttpRequest") + 'xml_http_request' + """ + if not text: + return text + + # Insert underscore before uppercase letters that follow lowercase letters + text = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text) + # Insert underscore before uppercase letters that follow lowercase letters or digits + text = re.sub('([a-z0-9])([A-Z])', r'\1_\2', text) + + return text.lower() + + +def snake_to_camel(text: str, pascal_case: bool = False) -> str: + """ + Convert snake_case to camelCase or PascalCase. + + Args: + text: The input string in snake_case + pascal_case: If True, return PascalCase; otherwise camelCase (default: False) + + Returns: + String converted to camelCase or PascalCase + + Examples: + >>> snake_to_camel("snake_case") + 'snakeCase' + >>> snake_to_camel("snake_case", pascal_case=True) + 'SnakeCase' + """ + if not text: + return text + + components = text.split('_') + if not components: + return text + + if pascal_case: + return ''.join(word.capitalize() for word in components) + else: + return components[0] + ''.join(word.capitalize() for word in components[1:]) + + +def strip_ansi_codes(text: str) -> str: + """ + Remove ANSI escape sequences from a string. + + Args: + text: String that may contain ANSI escape sequences + + Returns: + String with ANSI codes removed + + Examples: + >>> strip_ansi_codes("\\033[31mRed text\\033[0m") + 'Red text' + """ + if not text: + return text + + # ANSI escape sequence pattern + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') + return ansi_escape.sub('', text) \ No newline at end of file diff --git a/capabilities/markitect-utils/src/markitect_utils/validation_utils.py b/capabilities/markitect-utils/src/markitect_utils/validation_utils.py new file mode 100644 index 00000000..11e2bce1 --- /dev/null +++ b/capabilities/markitect-utils/src/markitect_utils/validation_utils.py @@ -0,0 +1,160 @@ +""" +Validation utility functions for MarkiTect ecosystem. + +Provides common validation functions for various data types and formats +that are frequently needed across different MarkiTect capabilities. +""" + +import re +from typing import Any, Dict, List, Optional, Union + + +def is_valid_email(email: str) -> bool: + """ + Check if a string is a valid email address format. + + Args: + email: The email address to validate + + Returns: + True if the email format is valid, False otherwise + + Examples: + >>> is_valid_email("user@example.com") + True + >>> is_valid_email("invalid.email") + False + """ + if not email or not isinstance(email, str): + return False + + # Basic email regex pattern + pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + return bool(re.match(pattern, email)) + + +def is_valid_url(url: str) -> bool: + """ + Check if a string is a valid URL format. + + Args: + url: The URL to validate + + Returns: + True if the URL format is valid, False otherwise + + Examples: + >>> is_valid_url("https://example.com") + True + >>> is_valid_url("not-a-url") + False + """ + if not url or not isinstance(url, str): + return False + + # URL regex pattern + pattern = re.compile( + r'^https?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain... + r'localhost|' # localhost... + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + + return bool(pattern.match(url)) + + +def is_valid_semver(version: str) -> bool: + """ + Check if a string is a valid semantic version (semver) format. + + Args: + version: The version string to validate + + Returns: + True if the version follows semver format, False otherwise + + Examples: + >>> is_valid_semver("1.0.0") + True + >>> is_valid_semver("1.0.0-alpha.1") + True + >>> is_valid_semver("1.0") + False + """ + if not version or not isinstance(version, str): + return False + + # Semantic version regex pattern + pattern = re.compile( + r'^(?P0|[1-9]\d*)\.' + r'(?P0|[1-9]\d*)\.' + r'(?P0|[1-9]\d*)' + r'(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)' + r'(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?' + r'(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$' + ) + + return bool(pattern.match(version)) + + +def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> Dict[str, List[str]]: + """ + Validate that required fields are present and not empty in a dictionary. + + Args: + data: Dictionary to validate + required_fields: List of field names that are required + + Returns: + Dictionary with 'missing' and 'empty' keys containing lists of field names + + Examples: + >>> validate_required_fields({"name": "John", "email": ""}, ["name", "email", "age"]) + {'missing': ['age'], 'empty': ['email']} + >>> validate_required_fields({"name": "John", "email": "john@example.com"}, ["name", "email"]) + {'missing': [], 'empty': []} + """ + result = { + 'missing': [], + 'empty': [] + } + + if not isinstance(data, dict) or not isinstance(required_fields, list): + return result + + for field in required_fields: + if field not in data: + result['missing'].append(field) + elif _is_empty_value(data[field]): + result['empty'].append(field) + + return result + + +def _is_empty_value(value: Any) -> bool: + """ + Check if a value should be considered empty for validation purposes. + + Args: + value: The value to check + + Returns: + True if the value is considered empty, False otherwise + """ + if value is None: + return True + + if isinstance(value, str): + return not value.strip() + + if isinstance(value, (list, tuple, dict, set)): + return len(value) == 0 + + # For numeric types (int, float), only None is considered empty + # Zero and False are valid values + if isinstance(value, (int, float, bool)): + return False + + # For other types, use Python's truthiness + return not bool(value) \ No newline at end of file diff --git a/capabilities/markitect-utils/tests/test_file_utils.py b/capabilities/markitect-utils/tests/test_file_utils.py new file mode 100644 index 00000000..2bc53322 --- /dev/null +++ b/capabilities/markitect-utils/tests/test_file_utils.py @@ -0,0 +1,210 @@ +""" +Test suite for file utility functions. +""" + +import os +import tempfile +from pathlib import Path + +import pytest +from markitect_utils.file_utils import ( + safe_filename, + ensure_extension, + get_file_size, + is_text_file, + normalize_path, +) + + +class TestSafeFilename: + """Test cases for the safe_filename function.""" + + def test_basic_sanitization(self): + """Test basic filename sanitization.""" + assert safe_filename("normal_file.txt") == "normal_file.txt" + assert safe_filename("file with spaces.txt") == "file with spaces.txt" + + def test_unsafe_characters(self): + """Test removal of unsafe characters.""" + assert safe_filename("file<>name.txt") == "file__name.txt" + assert safe_filename('file"name.txt') == "file_name.txt" + assert safe_filename("file/path\\name.txt") == "file_path_name.txt" + assert safe_filename("file|name.txt") == "file_name.txt" + + def test_custom_replacement(self): + """Test custom replacement character.""" + assert safe_filename("file<>name.txt", "-") == "file--name.txt" + assert safe_filename("file/path\\name.txt", "") == "filepathname.txt" + + def test_reserved_names(self): + """Test handling of Windows reserved names.""" + assert safe_filename("CON") == "file_CON" + assert safe_filename("PRN.txt") == "file_PRN.txt" + assert safe_filename("COM1") == "file_COM1" + assert safe_filename("con") == "file_con" # case insensitive + + def test_edge_cases(self): + """Test edge cases.""" + assert safe_filename("") == "" # Empty input returns empty + assert safe_filename(" ") == "file_" # Whitespace only gets prefix + assert safe_filename("...") == "file_" # Dots only gets prefix + assert safe_filename(".hidden") == "hidden" # Leading dot gets stripped + + +class TestEnsureExtension: + """Test cases for the ensure_extension function.""" + + def test_add_extension(self): + """Test adding extension to filename.""" + assert ensure_extension("document", ".md") == "document.md" + assert ensure_extension("file", "txt") == "file.txt" + + def test_existing_extension(self): + """Test when extension already exists.""" + assert ensure_extension("document.md", ".md") == "document.md" + assert ensure_extension("file.txt", "txt") == "file.txt" + + def test_different_extension(self): + """Test adding extension when different one exists.""" + assert ensure_extension("document.txt", ".md") == "document.txt.md" + assert ensure_extension("file.doc", "pdf") == "file.doc.pdf" + + def test_edge_cases(self): + """Test edge cases.""" + assert ensure_extension("", ".md") == "" + assert ensure_extension("file", "") == "file" + assert ensure_extension("file.md", "") == "file.md" + + +class TestGetFileSize: + """Test cases for the get_file_size function.""" + + def test_existing_file(self): + """Test getting size of existing file.""" + with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: + f.write("Hello, World!") + temp_path = f.name + + try: + size = get_file_size(temp_path) + assert size is not None + assert size > 0 + finally: + os.unlink(temp_path) + + def test_nonexistent_file(self): + """Test getting size of non-existent file.""" + assert get_file_size("/path/that/does/not/exist") is None + + def test_empty_file(self): + """Test getting size of empty file.""" + with tempfile.NamedTemporaryFile(delete=False) as f: + temp_path = f.name + + try: + size = get_file_size(temp_path) + assert size == 0 + finally: + os.unlink(temp_path) + + def test_path_object(self): + """Test with Path object.""" + with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: + f.write("test content") + temp_path = Path(f.name) + + try: + size = get_file_size(temp_path) + assert size is not None + assert size > 0 + finally: + os.unlink(temp_path) + + +class TestIsTextFile: + """Test cases for the is_text_file function.""" + + def test_text_file(self): + """Test with actual text file.""" + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: + f.write("This is a text file with some content.") + temp_path = f.name + + try: + assert is_text_file(temp_path) is True + finally: + os.unlink(temp_path) + + def test_binary_file(self): + """Test with binary file.""" + with tempfile.NamedTemporaryFile(mode='wb', delete=False, suffix='.bin') as f: + f.write(b'\x00\x01\x02\x03\x04\x05') + temp_path = f.name + + try: + assert is_text_file(temp_path) is False + finally: + os.unlink(temp_path) + + def test_empty_file(self): + """Test with empty file.""" + with tempfile.NamedTemporaryFile(delete=False) as f: + temp_path = f.name + + try: + assert is_text_file(temp_path) is True # Empty files are considered text + finally: + os.unlink(temp_path) + + def test_unicode_file(self): + """Test with Unicode text file.""" + with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8') as f: + f.write("Hello ไธ–็•Œ! This is UTF-8 text.") + temp_path = f.name + + try: + assert is_text_file(temp_path) is True + finally: + os.unlink(temp_path) + + def test_nonexistent_file(self): + """Test with non-existent file.""" + assert is_text_file("/path/that/does/not/exist") is False + + +class TestNormalizePath: + """Test cases for the normalize_path function.""" + + def test_relative_path(self): + """Test normalizing relative paths.""" + # Note: These tests are environment-dependent + result = normalize_path("./test") + assert os.path.isabs(result) + assert result.endswith("test") + + def test_path_with_dots(self): + """Test path with dot components.""" + result = normalize_path("./dir/../file.txt") + assert os.path.isabs(result) + assert result.endswith("file.txt") + + def test_already_absolute(self): + """Test already absolute path.""" + abs_path = "/tmp/test/file.txt" + result = normalize_path(abs_path) + assert result == abs_path + + def test_path_object(self): + """Test with Path object.""" + path_obj = Path("./test/file.txt") + result = normalize_path(path_obj) + assert os.path.isabs(result) + assert isinstance(result, str) + + def test_edge_cases(self): + """Test edge cases.""" + assert normalize_path("") == "" + + # Current directory should normalize to absolute path + result = normalize_path(".") + assert os.path.isabs(result) \ No newline at end of file diff --git a/capabilities/markitect-utils/tests/test_integration.py b/capabilities/markitect-utils/tests/test_integration.py new file mode 100644 index 00000000..296c24de --- /dev/null +++ b/capabilities/markitect-utils/tests/test_integration.py @@ -0,0 +1,175 @@ +""" +Integration tests for markitect-utils capability. + +Tests the overall functionality and integration of the utility modules. +""" + +import tempfile +import os +from pathlib import Path + +import pytest +from markitect_utils import ( + slugify, safe_filename, is_valid_email, validate_required_fields, + truncate, normalize_path +) + + +class TestUtilityIntegration: + """Test integration between different utility functions.""" + + def test_filename_processing_workflow(self): + """Test a complete filename processing workflow.""" + # Start with user input + user_title = "My Great Article: A Case Study!" + user_email = "author@example.com" + + # Validate email + assert is_valid_email(user_email) is True + + # Create a slug for URL + slug = slugify(user_title) + assert slug == "my-great-article-a-case-study" + + # Create a safe filename + filename = safe_filename(f"{slug}.md") + assert filename == "my-great-article-a-case-study.md" + + # Truncate if too long + if len(filename) > 30: + filename = truncate(filename, 30, "โ€ฆ.md") + + assert len(filename) <= 30 + assert filename.endswith(".md") or filename.endswith("โ€ฆ.md") + + def test_content_validation_workflow(self): + """Test a content validation workflow.""" + # Simulate form data + form_data = { + "title": "My Article", + "content": "This is the content of my article.", + "author_email": "author@example.com", + "category": "", # Empty field + "tags": "python,utils,testing" + } + + required_fields = ["title", "content", "author_email", "category"] + + # Validate required fields + validation_result = validate_required_fields(form_data, required_fields) + + assert validation_result["missing"] == [] + assert validation_result["empty"] == ["category"] + + # Validate email format + if form_data.get("author_email"): + assert is_valid_email(form_data["author_email"]) is True + + # Process tags + if form_data.get("tags"): + tag_list = [slugify(tag.strip()) for tag in form_data["tags"].split(",")] + assert tag_list == ["python", "utils", "testing"] + + def test_file_operations_workflow(self): + """Test a file operations workflow.""" + # Create temporary directory for testing + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create some test files + test_content = "This is test content for the file." + + # Process filename through multiple utilities + original_name = "Test File: With Special/Characters!" + safe_name = safe_filename(original_name) + slug_name = slugify(safe_name.rsplit('.', 1)[0]) + '.txt' + + # Write file + file_path = temp_path / slug_name + file_path.write_text(test_content) + + # Verify file exists and get normalized path + normalized_path = normalize_path(file_path) + assert Path(normalized_path).exists() + + # Verify content length matches expectations + content_length = len(test_content) + assert content_length > 0 + + def test_data_processing_pipeline(self): + """Test a complete data processing pipeline.""" + # Raw data from external source + raw_data = [ + { + "userName": "JohnDoe123", + "emailAddress": "john.doe@example.com", + "websiteURL": "https://johndoe.example.com", + "projectVersion": "1.2.0", + "description": "This is a very long description that might need to be truncated for display purposes in certain UI components." + }, + { + "userName": "Jane_Smith", + "emailAddress": "invalid-email", + "websiteURL": "not-a-url", + "projectVersion": "invalid-version", + "description": "Short desc" + } + ] + + processed_data = [] + + for item in raw_data: + # Convert camelCase to snake_case (would need the function imported) + # For now, just demonstrate with available functions + + processed_item = { + "slug": slugify(item["userName"]), + "email_valid": is_valid_email(item["emailAddress"]), + "short_desc": truncate(item["description"], 50), + "safe_filename": safe_filename(f"{item['userName']}_profile.json") + } + + processed_data.append(processed_item) + + # Verify processing results + assert processed_data[0]["slug"] == "johndoe123" + assert processed_data[0]["email_valid"] is True + assert len(processed_data[0]["short_desc"]) <= 50 + assert processed_data[0]["safe_filename"] == "JohnDoe123_profile.json" + + assert processed_data[1]["slug"] == "jane-smith" + assert processed_data[1]["email_valid"] is False + assert processed_data[1]["short_desc"] == "Short desc" + assert processed_data[1]["safe_filename"] == "Jane_Smith_profile.json" + + def test_configuration_validation(self): + """Test configuration validation using multiple utilities.""" + # Simulate application configuration + config = { + "app_name": "My Application", + "version": "1.0.0", + "admin_email": "admin@myapp.com", + "base_url": "https://myapp.example.com", + "debug": True, + "secret_key": "", # Empty - should be flagged + } + + required_fields = ["app_name", "version", "admin_email", "base_url", "secret_key"] + + # Validate required fields + validation_result = validate_required_fields(config, required_fields) + assert validation_result["empty"] == ["secret_key"] + + # Validate specific formats + email_valid = is_valid_email(config["admin_email"]) + assert email_valid is True + + # Create safe directory name from app name + app_slug = slugify(config["app_name"]) + safe_dir_name = safe_filename(app_slug) + + assert app_slug == "my-application" + assert safe_dir_name == "my-application" + + # Validate version format would use is_valid_semver + # (assuming we had imported it in the integration test) \ No newline at end of file diff --git a/capabilities/markitect-utils/tests/test_string_utils.py b/capabilities/markitect-utils/tests/test_string_utils.py new file mode 100644 index 00000000..f04da3c8 --- /dev/null +++ b/capabilities/markitect-utils/tests/test_string_utils.py @@ -0,0 +1,149 @@ +""" +Test suite for string utility functions. +""" + +import pytest +from markitect_utils.string_utils import ( + slugify, + truncate, + camel_to_snake, + snake_to_camel, + strip_ansi_codes, +) + + +class TestSlugify: + """Test cases for the slugify function.""" + + def test_basic_slugify(self): + """Test basic string to slug conversion.""" + assert slugify("Hello World") == "hello-world" + assert slugify("My Great Article") == "my-great-article" + + def test_special_characters(self): + """Test handling of special characters.""" + assert slugify("Hello World!") == "hello-world" + assert slugify("Test@#$%^&*()_+") == "test" # underscore gets converted to separator + assert slugify("Multiple---Dashes") == "multiple-dashes" + + def test_custom_separator(self): + """Test custom separator.""" + assert slugify("Hello World", "_") == "hello_world" + assert slugify("My Great Article", ".") == "my.great.article" + + def test_edge_cases(self): + """Test edge cases.""" + assert slugify("") == "" + assert slugify(" ") == "" + assert slugify("---") == "" + assert slugify("Single") == "single" + + def test_unicode_handling(self): + """Test unicode character handling.""" + assert slugify("Cafรฉ") == "cafe" + assert slugify("naรฏve rรฉsumรฉ") == "naive-resume" + + +class TestTruncate: + """Test cases for the truncate function.""" + + def test_basic_truncation(self): + """Test basic string truncation.""" + text = "This is a long string that needs truncation" + assert truncate(text, 20) == "This is a long st..." + assert truncate(text, 10) == "This is..." + + def test_no_truncation_needed(self): + """Test when no truncation is needed.""" + assert truncate("Short", 10) == "Short" + assert truncate("Exact", 5) == "Exact" + + def test_custom_suffix(self): + """Test custom suffix.""" + text = "Long text here" + assert truncate(text, 10, "โ€ฆ") == "Long textโ€ฆ" + assert truncate(text, 10, " [more]") == "Lon [more]" + + def test_edge_cases(self): + """Test edge cases.""" + assert truncate("", 10) == "" + assert truncate("Test", 3) == "..." + assert truncate("Test", 2) == ".." + assert truncate("Test", 1) == "." + assert truncate("Test", 0) == "" + + +class TestCamelToSnake: + """Test cases for the camel_to_snake function.""" + + def test_basic_conversion(self): + """Test basic camelCase to snake_case conversion.""" + assert camel_to_snake("camelCase") == "camel_case" + assert camel_to_snake("PascalCase") == "pascal_case" + assert camel_to_snake("simpleWord") == "simple_word" + + def test_multiple_words(self): + """Test conversion with multiple words.""" + assert camel_to_snake("thisIsALongVariableName") == "this_is_a_long_variable_name" + assert camel_to_snake("XMLHttpRequest") == "xml_http_request" + assert camel_to_snake("JSONData") == "json_data" + + def test_edge_cases(self): + """Test edge cases.""" + assert camel_to_snake("") == "" + assert camel_to_snake("single") == "single" + assert camel_to_snake("ALLCAPS") == "allcaps" + assert camel_to_snake("A") == "a" + + +class TestSnakeToCamel: + """Test cases for the snake_to_camel function.""" + + def test_basic_conversion(self): + """Test basic snake_case to camelCase conversion.""" + assert snake_to_camel("snake_case") == "snakeCase" + assert snake_to_camel("simple_word") == "simpleWord" + assert snake_to_camel("single") == "single" + + def test_pascal_case(self): + """Test conversion to PascalCase.""" + assert snake_to_camel("snake_case", pascal_case=True) == "SnakeCase" + assert snake_to_camel("simple_word", pascal_case=True) == "SimpleWord" + assert snake_to_camel("single", pascal_case=True) == "Single" + + def test_multiple_underscores(self): + """Test handling of multiple underscores.""" + assert snake_to_camel("this_is_a_long_name") == "thisIsALongName" + assert snake_to_camel("this_is_a_long_name", pascal_case=True) == "ThisIsALongName" + + def test_edge_cases(self): + """Test edge cases.""" + assert snake_to_camel("") == "" + assert snake_to_camel("_") == "" + assert snake_to_camel("__") == "" + assert snake_to_camel("single") == "single" + + +class TestStripAnsiCodes: + """Test cases for the strip_ansi_codes function.""" + + def test_basic_ansi_removal(self): + """Test basic ANSI code removal.""" + assert strip_ansi_codes("\033[31mRed text\033[0m") == "Red text" + assert strip_ansi_codes("\033[32mGreen\033[0m text") == "Green text" + + def test_complex_ansi_codes(self): + """Test complex ANSI escape sequences.""" + text_with_ansi = "\033[1;31;40mBold red on black\033[0m" + assert strip_ansi_codes(text_with_ansi) == "Bold red on black" + + def test_no_ansi_codes(self): + """Test text without ANSI codes.""" + plain_text = "Just plain text" + assert strip_ansi_codes(plain_text) == plain_text + + def test_edge_cases(self): + """Test edge cases.""" + assert strip_ansi_codes("") == "" + assert strip_ansi_codes("\033[0m") == "" + assert strip_ansi_codes("Start\033[31mMiddle\033[0mEnd") == "StartMiddleEnd" \ No newline at end of file diff --git a/capabilities/markitect-utils/tests/test_validation_utils.py b/capabilities/markitect-utils/tests/test_validation_utils.py new file mode 100644 index 00000000..8ca0b369 --- /dev/null +++ b/capabilities/markitect-utils/tests/test_validation_utils.py @@ -0,0 +1,215 @@ +""" +Test suite for validation utility functions. +""" + +import pytest +from markitect_utils.validation_utils import ( + is_valid_email, + is_valid_url, + is_valid_semver, + validate_required_fields, +) + + +class TestIsValidEmail: + """Test cases for the is_valid_email function.""" + + def test_valid_emails(self): + """Test valid email addresses.""" + valid_emails = [ + "user@example.com", + "test.email@domain.co.uk", + "user+tag@example.org", + "user123@test-domain.com", + "a@b.co", + ] + for email in valid_emails: + assert is_valid_email(email) is True, f"Failed for: {email}" + + def test_invalid_emails(self): + """Test invalid email addresses.""" + invalid_emails = [ + "invalid.email", + "@example.com", + "user@", + "user@.com", + "user space@example.com", + "user@domain", + "user@@example.com", + "", + ] + for email in invalid_emails: + assert is_valid_email(email) is False, f"Should be invalid: {email}" + + def test_edge_cases(self): + """Test edge cases.""" + assert is_valid_email(None) is False + assert is_valid_email(123) is False + assert is_valid_email([]) is False + + +class TestIsValidUrl: + """Test cases for the is_valid_url function.""" + + def test_valid_urls(self): + """Test valid URLs.""" + valid_urls = [ + "https://example.com", + "http://test.org", + "https://sub.domain.com/path", + "http://localhost:8000", + "https://example.com/path?query=value", + "http://192.168.1.1:3000", + ] + for url in valid_urls: + assert is_valid_url(url) is True, f"Failed for: {url}" + + def test_invalid_urls(self): + """Test invalid URLs.""" + invalid_urls = [ + "not-a-url", + "ftp://example.com", + "example.com", + "://example.com", + "https://", + "http://.com", + "", + ] + for url in invalid_urls: + assert is_valid_url(url) is False, f"Should be invalid: {url}" + + def test_edge_cases(self): + """Test edge cases.""" + assert is_valid_url(None) is False + assert is_valid_url(123) is False + assert is_valid_url([]) is False + + +class TestIsValidSemver: + """Test cases for the is_valid_semver function.""" + + def test_valid_versions(self): + """Test valid semantic versions.""" + valid_versions = [ + "1.0.0", + "0.1.0", + "10.20.30", + "1.0.0-alpha", + "1.0.0-alpha.1", + "1.0.0-0.3.7", + "1.0.0-x.7.z.92", + "1.0.0+20130313144700", + "1.0.0-beta+exp.sha.5114f85", + ] + for version in valid_versions: + assert is_valid_semver(version) is True, f"Failed for: {version}" + + def test_invalid_versions(self): + """Test invalid semantic versions.""" + invalid_versions = [ + "1.0", + "1.0.0-", + "1.0.0+", + "01.0.0", + "1.01.0", + "1.0.01", + "1.0.0-", + "1.0.0+", + "v1.0.0", + "", + ] + for version in invalid_versions: + assert is_valid_semver(version) is False, f"Should be invalid: {version}" + + def test_edge_cases(self): + """Test edge cases.""" + assert is_valid_semver(None) is False + assert is_valid_semver(123) is False + assert is_valid_semver([]) is False + + +class TestValidateRequiredFields: + """Test cases for the validate_required_fields function.""" + + def test_all_fields_present(self): + """Test when all required fields are present and valid.""" + data = { + "name": "John Doe", + "email": "john@example.com", + "age": 30 + } + required = ["name", "email", "age"] + result = validate_required_fields(data, required) + + assert result == {"missing": [], "empty": []} + + def test_missing_fields(self): + """Test when some fields are missing.""" + data = { + "name": "John Doe", + "email": "john@example.com" + } + required = ["name", "email", "age", "phone"] + result = validate_required_fields(data, required) + + assert set(result["missing"]) == {"age", "phone"} + assert result["empty"] == [] + + def test_empty_fields(self): + """Test when some fields are empty.""" + data = { + "name": "John Doe", + "email": "", + "age": 30, + "phone": " " # whitespace only + } + required = ["name", "email", "age", "phone"] + result = validate_required_fields(data, required) + + assert result["missing"] == [] + assert set(result["empty"]) == {"email", "phone"} + + def test_mixed_issues(self): + """Test when there are both missing and empty fields.""" + data = { + "name": "John Doe", + "email": "", + } + required = ["name", "email", "age", "phone"] + result = validate_required_fields(data, required) + + assert set(result["missing"]) == {"age", "phone"} + assert result["empty"] == ["email"] + + def test_non_string_values(self): + """Test with non-string values.""" + data = { + "name": "John Doe", + "age": 0, # Zero should not be considered empty + "active": False, # False should not be considered empty + "score": None, # None should be considered empty + "items": [], # Empty list should be considered empty + } + required = ["name", "age", "active", "score", "items"] + result = validate_required_fields(data, required) + + assert result["missing"] == [] + assert set(result["empty"]) == {"score", "items"} + + def test_edge_cases(self): + """Test edge cases.""" + # Invalid inputs + result = validate_required_fields("not a dict", ["field"]) + assert result == {"missing": [], "empty": []} + + result = validate_required_fields({}, "not a list") + assert result == {"missing": [], "empty": []} + + # Empty data and requirements + result = validate_required_fields({}, []) + assert result == {"missing": [], "empty": []} + + # Empty requirements + data = {"name": "John"} + result = validate_required_fields(data, []) + assert result == {"missing": [], "empty": []} \ No newline at end of file diff --git a/markitect/cli.py b/markitect/cli.py index 0142d1f4..3e080f47 100644 --- a/markitect/cli.py +++ b/markitect/cli.py @@ -3478,7 +3478,7 @@ def config_stats(config, format): # Content Commands (Issue #38) -from .content.commands import content_get, content_stats +from markitect_content.commands import content_get, content_stats # Register content commands cli.add_command(content_get) diff --git a/pyproject.toml b/pyproject.toml index 9dae9209..f5c9aa78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,11 @@ readme = "README.md" requires-python = ">=3.8" dependencies = ["markdown-it-py", "PyYAML", "click>=8.0.0", "tabulate>=0.9.0", "jsonpath-ng>=1.5.0", "aiohttp>=3.8.0", "toml"] +[project.optional-dependencies] +capabilities = [ + "markitect-content @ file:./capabilities/markitect-content" +] + [project.scripts] markitect = "markitect.cli:main" tddai = "tddai_cli:main" diff --git a/tests/test_markdownmatters_integration.py b/tests/test_markdownmatters_integration.py index c09dd902..fb782107 100644 --- a/tests/test_markdownmatters_integration.py +++ b/tests/test_markdownmatters_integration.py @@ -9,7 +9,7 @@ import os from pathlib import Path from click.testing import CliRunner -from markitect.content.commands import content_get, content_stats +from markitect_content.commands import content_get, content_stats from markitect.matter_frontmatter.commands import frontmatter_get, frontmatter_keys from markitect.matter_contentmatter.commands import contentmatter_get, contentmatter_keys from markitect.matter_tailmatter.commands import tailmatter_get, tailmatter_check diff --git a/wiki b/wiki index 648d8247..0ff5843a 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 648d8247c45c7fc9f76384e8c4596802fa014756 +Subproject commit 0ff5843ab2f51940b70f9188c8c731dc82e75484