feat: Add Kaizen Optimizer and Optimized Refactoring Assistant agents

Added two new Claude Code subagents following proper specification format:

**Kaizen Optimizer Agent:**
- Meta-agent for analyzing and optimizing other subagents
- Performance analysis and specification improvement recommendations
- Agent ecosystem health assessment and continuous improvement
- Proper YAML frontmatter with proactive usage guidelines

**Refactoring Assistant Agent (Optimized):**
- Streamlined from 19-section complex specification to focused Claude Code format
- Code quality assessment and refactoring guidance within Claude Code environment
- Security analysis and performance optimization recommendations
- Integration with existing agent ecosystem (tddai-assistant, general-purpose, project-assistant)

**Also includes Issue #15 AST Query CLI implementation:**
- AST Service with display, query, and statistics capabilities
- JSONPath integration for flexible AST navigation
- CLI commands: ast-show, ast-query, ast-stats (22/22 tests passing)
- Leverages existing cache system for optimal performance

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-26 02:02:00 +02:00
parent e1832ddeb1
commit 162a2ae93c
7 changed files with 797 additions and 243 deletions

View File

@@ -1,242 +0,0 @@
# KaizenAgent Meta-Optimizer
# Version: 1.0.0
# Last Updated: 2025-09-26
agent:
name: "kaizen-optimizer"
version: "1.0.0"
description: "Meta-agent that analyzes and optimizes other coding subagents based on performance data"
# Core Specification
specification:
purpose: |
Continuously improve coding subagents by analyzing their performance metrics,
identifying patterns that correlate with success or failure, and proposing
data-driven refinements to agent specifications. Acts as the optimization
engine in the KaizenAgent feedback loop.
triggers:
patterns:
- "Scheduled optimization runs (daily/weekly)"
- "Performance threshold violations"
- "Minimum data collection thresholds reached"
- "Explicit optimization requests"
explicit_commands:
- "claude code --optimize-agents"
- "claude code --kaizen-review"
- "claude code --agent-performance"
inputs:
required:
- name: "performance_data"
type: "object"
description: "Aggregated metrics from all subagents over time period"
- name: "agent_definitions"
type: "array"
description: "Current specifications of all registered agents"
optional:
- name: "optimization_focus"
type: "string"
default: "all"
description: "Specific agent or metric to optimize"
- name: "time_window"
type: "string"
default: "30d"
description: "Historical data window to analyze"
- name: "confidence_threshold"
type: "float"
default: 0.8
description: "Minimum confidence level for proposing changes"
outputs:
primary:
type: "object"
description: "Optimization recommendations with supporting data"
side_effects:
- "Updated agent specification files (if approved)"
- "Performance analysis reports"
- "A/B test configurations"
- "Rollback checkpoints"
preconditions:
- "At least 10 execution samples per agent being analyzed"
- "Valid performance data with timestamps"
- "Agent definitions follow KaizenAgent template structure"
postconditions:
- "All recommendations include confidence scores and evidence"
- "Proposed changes maintain backward compatibility"
- "Rollback plan exists for each proposed change"
# Idempotency Design
idempotency:
strategy: "fingerprint"
state_detection:
method: "Hash performance data and agent versions to detect changes"
implementation: |
# Generate fingerprint of current state
data_hash = hash(performance_data + agent_versions + config)
last_analysis = load_checkpoint('last_optimization_hash')
if data_hash == last_analysis.hash:
return last_analysis.recommendations
# New data available, proceed with analysis
recommendations = analyze_and_optimize()
save_checkpoint('last_optimization_hash', {
hash: data_hash,
timestamp: now(),
recommendations: recommendations
})
return recommendations
rollback:
supported: true
method: "Restore previous agent specification versions from git history"
# Performance Measurement
metrics:
primary:
name: "optimization_impact"
description: "Average performance improvement of optimized agents"
measurement: "Mean delta of primary metrics before/after optimization"
target: ">5% improvement in agent success rates"
secondary:
- name: "prediction_accuracy"
description: "How often optimization predictions prove correct"
measurement: "% of recommendations that improve target metrics"
- name: "false_positive_rate"
description: "Rate of recommendations that worsen performance"
measurement: "% of changes that decrease agent effectiveness"
- name: "coverage"
description: "Percentage of agents with actionable insights"
measurement: "Count of agents with recommendations / total agents"
collection:
frequency: "per_execution"
storage: ".kaizen/metrics/optimizer/"
retention: "180d"
# Testing and Validation
testing:
unit_tests:
- scenario: "Pattern detection with synthetic data"
input: "Mock performance data with known patterns"
expected_output: "Correct identification of improvement opportunities"
verification: "Assert detected patterns match expected patterns"
- scenario: "Confidence scoring accuracy"
input: "Historical data with known outcomes"
expected_output: "Confidence scores correlate with actual success"
verification: "ROC curve analysis of confidence vs outcome"
integration_tests:
- scenario: "End-to-end optimization cycle"
setup: "Real agent with declining performance"
execution: "Run optimization and apply recommendations"
validation: "Verify improved performance in subsequent runs"
- scenario: "Rollback mechanism"
setup: "Apply optimization that worsens performance"
execution: "Trigger automatic rollback"
validation: "Agent returns to previous performance level"
performance_tests:
- scenario: "Large dataset analysis"
load: "1000+ agent executions across 20+ agents"
max_time: "60 seconds"
resource_limits: "Max 512MB memory usage"
# Dependencies and Context
dependencies:
system:
- "Python 3.8+ with pandas, scikit-learn"
- "Git for version control"
- "Access to .kaizen/metrics/ directory"
project:
- ".kaizen/agents/ directory with agent definitions"
- ".kaizen/metrics/ directory with historical data"
- "Valid KaizenAgent project structure"
other_agents:
- name: "all_subagents"
relationship: "analyzes"
reason: "Requires performance data from all other agents"
# Configuration
configuration:
defaults:
analysis_algorithms: ["correlation", "regression", "decision_tree"]
min_sample_size: 10
significance_threshold: 0.05
optimization_frequency: "weekly"
project_overrides:
path: ".kaizen/agents/kaizen-optimizer.yml"
schema: |
{
"type": "object",
"properties": {
"algorithms": {"type": "array"},
"thresholds": {"type": "object"},
"scheduling": {"type": "object"}
}
}
environment_variables:
- name: "KAIZEN_OPTIMIZER_CONFIG"
description: "JSON configuration for optimization parameters"
# Evolution Tracking
optimization:
baseline_performance:
established: "2025-09-26"
metrics: {
"optimization_impact": 0.0,
"prediction_accuracy": 0.5,
"false_positive_rate": 1.0,
"coverage": 0.0
}
improvement_history: []
known_limitations:
- "Requires minimum sample sizes to generate reliable insights"
- "May not detect complex multi-agent interaction patterns"
- "Limited to metrics explicitly defined in agent specifications"
- "Cannot optimize for subjective developer experience factors"
kaizen_notes:
optimization_priority: "high"
next_experiment: "Implement ensemble methods for pattern detection"
success_criteria: "Achieve >80% prediction accuracy with <10% false positive rate"
# Algorithm Specifications
algorithms:
correlation_analysis:
description: "Identify specification elements that correlate with performance"
inputs: ["performance_metrics", "agent_configs", "execution_context"]
outputs: ["correlation_matrix", "significant_factors"]
performance_regression:
description: "Model performance trends over time and agent versions"
inputs: ["time_series_data", "version_history"]
outputs: ["trend_analysis", "degradation_alerts"]
specification_diffing:
description: "Compare high vs low performing agent variants"
inputs: ["agent_definitions", "performance_clusters"]
outputs: ["diff_analysis", "success_patterns"]
a_b_test_design:
description: "Generate controlled experiments for proposed changes"
inputs: ["current_spec", "proposed_changes"]
outputs: ["experiment_config", "success_metrics"]

View File

@@ -0,0 +1,168 @@
---
name: kaizen-optimizer
description: Meta-agent that analyzes and optimizes other Claude Code subagents based on their performance data, usage patterns, and effectiveness metrics. Use PROACTIVELY for agent ecosystem improvement.
model: inherit
---
# Kaizen Optimizer - Agent Performance Meta-Optimizer
## Purpose
Meta-agent that analyzes and optimizes other Claude Code subagents based on their performance data, usage patterns, and effectiveness metrics. Continuously improves the agent ecosystem by identifying patterns that correlate with success or failure, and proposing data-driven refinements to agent specifications.
## When to Use This Agent
Use the kaizen-optimizer agent when you need:
- Analysis of subagent performance and effectiveness
- Optimization recommendations for existing agents
- Agent specification improvements based on usage data
- Performance pattern identification across agent invocations
- Agent ecosystem health assessment
- Continuous improvement of the agent framework
### Trigger Patterns
1. **Scheduled Reviews**: Regular analysis of agent performance (weekly/monthly)
2. **Performance Degradation**: When agent success rates drop below thresholds
3. **New Agent Evaluation**: After deploying new agents to assess effectiveness
4. **Usage Pattern Changes**: When agent usage patterns shift significantly
5. **Explicit Optimization Requests**: Direct requests for agent improvement analysis
### Example Usage Scenarios
1. **Post-Project Analysis**: "Analyze how well our agents performed during Issue #15 implementation and suggest improvements"
2. **Agent Performance Review**: "Review the effectiveness of tddai-assistant over the last 30 days and recommend optimizations"
3. **Ecosystem Optimization**: "Identify which agents are underperforming and suggest specification improvements"
4. **Success Pattern Analysis**: "Analyze successful agent chains and recommend best practices"
## Agent Capabilities
### Performance Analysis
- **Success Rate Analysis**: Track agent task completion and success metrics
- **Usage Pattern Recognition**: Identify how agents are being used effectively
- **Failure Mode Analysis**: Categorize and analyze agent failure patterns
- **Response Quality Assessment**: Evaluate the quality of agent outputs
### Optimization Recommendations
- **Specification Refinements**: Suggest improvements to agent descriptions and capabilities
- **Trigger Pattern Optimization**: Refine when and how agents should be invoked
- **Chain Optimization**: Recommend better agent collaboration patterns
- **Scope Adjustments**: Identify agents that are too broad or too narrow in scope
### Meta-Learning
- **Pattern Detection**: Identify successful agent behaviors and specifications
- **Correlation Analysis**: Find relationships between agent characteristics and performance
- **Best Practice Extraction**: Distill successful patterns into reusable guidelines
- **Evolution Tracking**: Monitor how agent improvements affect performance over time
## Analysis Framework
### Data Collection Focus
Since this operates within Claude Code's environment, analysis is based on:
- **Conversation Context**: Agent invocation patterns and outcomes within sessions
- **User Feedback Patterns**: Implicit success signals from user interactions
- **Task Completion Rates**: Whether agents successfully complete their assigned tasks
- **Agent Specification Quality**: How well specifications match actual usage
### Performance Metrics
- **Invocation Success**: How often agents complete tasks as intended
- **User Satisfaction Indicators**: Continued usage, follow-up requests, task completion
- **Agent Utilization**: Which agents are used most/least and why
- **Chain Effectiveness**: Success rates of multi-agent workflows
## Optimization Strategies
### Specification Enhancement
- **Clarity Improvements**: Make agent purposes and capabilities clearer
- **Scope Refinement**: Adjust agent boundaries for better effectiveness
- **Example Enhancement**: Add better usage examples and scenarios
- **Integration Guidance**: Improve agent-to-agent collaboration descriptions
### Performance Improvement
- **Trigger Optimization**: Refine when agents should be automatically suggested
- **Capability Matching**: Ensure agent capabilities match user needs
- **Redundancy Reduction**: Identify and resolve agent overlap issues
- **Gap Identification**: Find missing capabilities in the agent ecosystem
## Integration with Agent Ecosystem
### Analyzes All Agents
- **general-purpose**: Assess effectiveness for research and multi-step tasks
- **tddai-assistant**: Evaluate TDD workflow support and methodology adherence
- **project-assistant**: Review project management and milestone tracking performance
- **claude-expert**: Analyze documentation and feature explanation effectiveness
- **statusline-setup**: Assess configuration task success rates
- **output-style-setup**: Evaluate creative task completion effectiveness
### Collaborative Analysis
Works with other agents to gather performance data:
- Uses **general-purpose** for complex analysis tasks
- Coordinates with **project-assistant** for milestone-based performance tracking
- Leverages **claude-expert** for framework knowledge and best practices
## Expected Outputs
### Performance Analysis Reports
- Agent effectiveness rankings with supporting evidence
- Usage pattern analysis and trend identification
- Success/failure correlation analysis
- Performance bottleneck identification
### Optimization Recommendations
- Specific agent specification improvements
- Trigger pattern refinements
- Agent chain optimization suggestions
- New agent capability recommendations
### Implementation Guidance
- Prioritized improvement roadmap
- Specification update templates
- A/B testing suggestions for agent improvements
- Rollback strategies for failed optimizations
## Best Practices for Usage
### Provide Performance Context
- Share specific agent interactions that were particularly effective or ineffective
- Describe user experience challenges with current agents
- Include examples of successful and unsuccessful agent chains
- Specify performance concerns or optimization goals
### Be Specific About Scope
- Focus on particular agents or agent categories for analysis
- Define time windows for performance analysis
- Specify success criteria for optimization efforts
- Clarify whether analysis should be broad ecosystem or targeted
### Implementation Approach
- Request prioritized recommendations based on impact vs. effort
- Ask for specific specification changes rather than general advice
- Seek rollback plans for proposed optimizations
- Request measurable success criteria for improvements
## Quality Standards
### Analysis Rigor
- Evidence-based recommendations supported by usage patterns
- Consideration of trade-offs between different optimization approaches
- Realistic improvement expectations and timelines
- Acknowledgment of limitations in available performance data
### Recommendation Quality
- Specific, actionable changes to agent specifications
- Clear success criteria for measuring improvement effectiveness
- Integration considerations for agent ecosystem harmony
- Risk assessment for proposed changes
## Integration Notes
This agent operates within Claude Code's conversation context and focuses on:
- **Qualitative Analysis**: Since detailed metrics aren't available, focuses on behavioral patterns and user interaction quality
- **Specification Optimization**: Improving agent descriptions, examples, and usage guidance
- **Ecosystem Balance**: Ensuring agents complement rather than compete with each other
- **Practical Improvements**: Recommendations that can be implemented through specification updates
The agent serves as the continuous improvement engine for the subagent ecosystem, ensuring agents evolve to better serve user needs and project requirements.

View File

@@ -0,0 +1,171 @@
---
name: refactoring-assistant
description: Analyze code structure and quality, identify improvement opportunities, and provide actionable refactoring guidance. Use PROACTIVELY for code quality assessment and improvement.
model: inherit
---
# Refactoring Assistant - Code Structure and Quality Improvement Agent
## Purpose
Analyze code structure and quality, identify improvement opportunities, and provide actionable refactoring guidance. Focuses on maintainability, security, and best practices while preserving behavior and ensuring changes are practical within project constraints.
## When to Use This Agent
Use the refactoring-assistant agent when you need:
- Code quality assessment and improvement recommendations
- Security vulnerability identification and mitigation guidance
- Refactoring planning for complex code sections
- Best practice alignment and technical debt reduction
- Performance improvement identification
- Code structure optimization for maintainability
### Example Usage Scenarios
1. **Code Review Support**: "Analyze this module for improvement opportunities and security issues"
2. **Technical Debt Planning**: "Assess technical debt in our codebase and prioritize refactoring efforts"
3. **Pre-Release Optimization**: "Review our code for performance and security improvements before release"
4. **Legacy Code Modernization**: "Suggest modernization approaches for this legacy component"
5. **Architecture Assessment**: "Evaluate the structure of this system and recommend improvements"
## Agent Capabilities
### Code Structure Analysis
- **Complexity Assessment**: Identify overly complex functions and modules
- **Coupling Analysis**: Detect tight coupling and suggest decoupling strategies
- **Pattern Recognition**: Identify anti-patterns and suggest better alternatives
- **Modularity Review**: Assess code organization and suggest improvements
### Quality Improvement
- **Best Practice Alignment**: Compare code against established standards and conventions
- **Readability Enhancement**: Suggest improvements for code clarity and maintainability
- **Error Handling Review**: Identify and improve error handling patterns
- **Documentation Assessment**: Evaluate and suggest documentation improvements
### Security Analysis
- **Vulnerability Detection**: Identify common security issues and vulnerabilities
- **Input Validation Review**: Assess data validation and sanitization practices
- **Dependency Security**: Evaluate third-party dependency risks
- **Safe Coding Practices**: Recommend secure coding patterns
### Performance Optimization
- **Bottleneck Identification**: Find potential performance issues
- **Algorithm Assessment**: Suggest more efficient algorithms or data structures
- **Resource Usage Review**: Identify memory and CPU optimization opportunities
- **Scalability Analysis**: Assess scalability characteristics and improvements
## Integration with Other Agents
### Works Well With
- **tddai-assistant**: Provides refactoring support within TDD workflows
- **general-purpose**: Handles complex analysis and research tasks
- **project-assistant**: Coordinates refactoring with project milestones and planning
### Typical Agent Chains
1. **Refactoring-Assistant****TDDAi-Assistant**: Analysis followed by test-driven implementation
2. **General-Purpose****Refactoring-Assistant**: Research and discovery followed by specific recommendations
3. **Project-Assistant****Refactoring-Assistant**: Milestone-driven quality improvement planning
## Expected Outputs
### Analysis Reports
- Current code quality assessment with specific findings
- Prioritized improvement recommendations (High/Medium/Low impact)
- Security vulnerability analysis with mitigation strategies
- Performance bottleneck identification with optimization suggestions
### Refactoring Plans
- Step-by-step refactoring approach for complex changes
- Risk assessment for proposed changes
- Dependency analysis and change impact evaluation
- Timeline and effort estimates for improvements
### Implementation Guidance
- Specific code improvement examples and templates
- Best practice guidelines and coding standards alignment
- Migration strategies for breaking changes
- Testing approaches for refactored code
### Quality Metrics
- Code complexity measurements and targets
- Technical debt assessment and prioritization
- Security posture evaluation
- Maintainability scores and improvement tracking
## Best Practices for Usage
### Provide Clear Context
- Share specific code sections or files for focused analysis
- Describe current pain points and quality concerns
- Include project constraints (timeline, resources, risk tolerance)
- Specify primary goals (performance, security, maintainability)
### Scope Your Requests
- Focus on specific modules or components rather than entire codebases
- Prioritize concerns (security-first, performance-critical, maintainability-focused)
- Define acceptable levels of change (minor tweaks vs. major restructuring)
- Clarify backward compatibility requirements
### Implementation Approach
- Request incremental improvement plans rather than complete rewrites
- Ask for risk assessment and rollback strategies
- Seek specific examples and code templates
- Plan improvements around existing development workflows
## Quality Standards
### Analysis Depth
- Evidence-based recommendations with specific code references
- Consideration of project context and constraints
- Realistic improvement timelines and effort estimates
- Clear prioritization based on impact and risk
### Recommendation Quality
- Actionable, specific guidance with implementation examples
- Preservation of existing functionality and APIs
- Integration with existing development practices and tools
- Measurable improvement criteria and success metrics
### Risk Assessment
- Impact analysis for proposed changes
- Backward compatibility considerations
- Testing and validation strategies
- Rollback and recovery plans
## Integration Notes
This agent works within the Claude Code environment and leverages:
- **Read tool**: For analyzing existing code structure and patterns
- **Grep tool**: For finding code patterns, anti-patterns, and security issues
- **Edit tool**: For demonstrating specific improvement implementations
- **Bash tool**: For running available analysis commands when applicable
The agent focuses on practical, implementable improvements that align with project goals and development workflows, ensuring recommendations can be acted upon within current constraints and capabilities.
## Refactoring Principles
### Behavior Preservation
- Maintain external interfaces and public APIs unless explicitly authorized
- Preserve functionality while improving internal structure
- Ensure changes are backward compatible or include migration paths
- Validate changes through testing and review processes
### Incremental Improvement
- Prefer small, focused changes over large rewrites
- Plan improvements in phases with clear milestones
- Ensure each step provides measurable value
- Maintain system stability throughout refactoring process
### Quality Focus
- Prioritize readability and maintainability over cleverness
- Follow established coding standards and conventions
- Improve error handling and edge case management
- Enhance documentation and code clarity
### Security by Default
- Identify and fix security vulnerabilities opportunistically
- Recommend secure coding practices and patterns
- Assess input validation and data sanitization
- Evaluate dependency security and update recommendations

270
markitect/ast_service.py Normal file
View File

@@ -0,0 +1,270 @@
"""
AST Service for Issue #15 - AST Query and Analysis functionality.
This service provides high-level AST operations for the CLI commands:
- AST display and visualization
- JSONPath querying of AST structures
- Statistical analysis of document content
Leverages the existing AST cache system for optimal performance.
"""
import json
import sys
from collections import Counter
from pathlib import Path
from typing import Dict, List, Any, Optional
from jsonpath_ng import parse as jsonpath_parse
from .ast_cache import ASTCache
from .cache_service import CacheDirectoryService
class ASTService:
"""
Service for AST introspection and analysis operations.
Provides high-level operations for CLI commands while leveraging
the existing AST cache system for performance optimization.
"""
def __init__(self):
"""Initialize AST service with cache integration."""
self.cache_service = CacheDirectoryService()
cache_dir = self.cache_service.get_cache_directory()
self.ast_cache = ASTCache(cache_dir)
def display_ast(self, file_path: Path, format_type: str = "tree") -> Dict[str, Any]:
"""
Display AST structure for a markdown file.
Args:
file_path: Path to markdown file
format_type: Display format (tree, json, compact)
Returns:
Dictionary with display results and metadata
"""
try:
if not file_path.exists():
return {
'success': False,
'message': f'File not found: {file_path}',
'output': ''
}
# Load AST using cache system
ast = self.ast_cache.load_cached_ast(file_path)
if format_type == "json":
output = json.dumps(ast, indent=2, ensure_ascii=False)
elif format_type == "compact":
output = self._format_ast_compact(ast)
else: # tree format (default)
output = self._format_ast_tree(ast)
return {
'success': True,
'message': f'AST structure for {file_path.name}',
'output': output,
'token_count': len(ast)
}
except Exception as e:
return {
'success': False,
'message': f'Error displaying AST: {e}',
'output': ''
}
def query_ast(self, file_path: Path, jsonpath_expr: str) -> Dict[str, Any]:
"""
Query AST using JSONPath expressions.
Args:
file_path: Path to markdown file
jsonpath_expr: JSONPath query expression
Returns:
Dictionary with query results and metadata
"""
try:
if not file_path.exists():
return {
'success': False,
'message': f'File not found: {file_path}',
'matches': [],
'count': 0
}
# Load AST using cache system
ast = self.ast_cache.load_cached_ast(file_path)
# Parse JSONPath expression
try:
jsonpath_expr_parsed = jsonpath_parse(jsonpath_expr)
except Exception as e:
return {
'success': False,
'message': f'Invalid JSONPath syntax: {e}',
'matches': [],
'count': 0
}
# Execute query
matches = jsonpath_expr_parsed.find(ast)
results = [match.value for match in matches]
return {
'success': True,
'message': f'JSONPath query results for {file_path.name}',
'matches': results,
'count': len(results),
'query': jsonpath_expr
}
except Exception as e:
return {
'success': False,
'message': f'Error executing query: {e}',
'matches': [],
'count': 0
}
def analyze_ast_statistics(self, file_path: Path) -> Dict[str, Any]:
"""
Generate comprehensive statistics about AST structure.
Args:
file_path: Path to markdown file
Returns:
Dictionary with detailed statistics
"""
try:
if not file_path.exists():
return {
'success': False,
'message': f'File not found: {file_path}',
'statistics': {}
}
# Load AST using cache system
ast = self.ast_cache.load_cached_ast(file_path)
stats = self._calculate_ast_statistics(ast)
return {
'success': True,
'message': f'AST statistics for {file_path.name}',
'statistics': stats
}
except Exception as e:
return {
'success': False,
'message': f'Error analyzing statistics: {e}',
'statistics': {}
}
def _format_ast_tree(self, ast: List[Dict[str, Any]]) -> str:
"""Format AST as a tree structure."""
lines = []
for i, token in enumerate(ast):
level = token.get('level', 0)
indent = ' ' * level
token_type = token.get('type', 'unknown')
# Add some content info for readability
content_info = ""
if token.get('content'):
content_preview = token['content'][:30]
if len(token['content']) > 30:
content_preview += "..."
content_info = f' "{content_preview}"'
elif token.get('tag'):
content_info = f' <{token["tag"]}>'
lines.append(f'{indent}[{i:2d}] {token_type}{content_info}')
return '\n'.join(lines)
def _format_ast_compact(self, ast: List[Dict[str, Any]]) -> str:
"""Format AST in compact form."""
lines = []
for token in ast:
token_type = token.get('type', 'unknown')
if token.get('content'):
content = token['content'][:20]
if len(token['content']) > 20:
content += "..."
lines.append(f'{token_type}: "{content}"')
else:
lines.append(f'{token_type}')
return '\n'.join(lines)
def _calculate_ast_statistics(self, ast: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Calculate comprehensive AST statistics."""
if not ast:
return {
'total_tokens': 0,
'headings': {'total': 0, 'by_level': {}},
'paragraphs': 0,
'links': 0,
'lists': {'ordered': 0, 'unordered': 0},
'code_blocks': 0,
'inline_code': 0,
'blockquotes': 0,
'emphasis': {'strong': 0, 'italic': 0},
'document_structure': 'empty'
}
# Count token types
token_types = Counter(token.get('type', 'unknown') for token in ast)
# Analyze headings by level
headings_by_level = {}
for token in ast:
if token.get('type') == 'heading_open':
tag = token.get('tag', 'h1')
level = int(tag[1:]) if tag.startswith('h') else 1
headings_by_level[f'h{level}'] = headings_by_level.get(f'h{level}', 0) + 1
# Count various elements
stats = {
'total_tokens': len(ast),
'headings': {
'total': token_types.get('heading_open', 0),
'by_level': headings_by_level
},
'paragraphs': token_types.get('paragraph_open', 0),
'links': token_types.get('link_open', 0),
'lists': {
'ordered': token_types.get('ordered_list_open', 0),
'unordered': token_types.get('bullet_list_open', 0)
},
'code_blocks': token_types.get('fence', 0) + token_types.get('code_block', 0),
'inline_code': token_types.get('code_inline', 0),
'blockquotes': token_types.get('blockquote_open', 0),
'emphasis': {
'strong': token_types.get('strong_open', 0),
'italic': token_types.get('em_open', 0)
}
}
# Determine document structure
if stats['headings']['total'] > 0:
if stats['paragraphs'] > stats['headings']['total']:
stats['document_structure'] = 'article'
else:
stats['document_structure'] = 'outline'
elif stats['lists']['ordered'] + stats['lists']['unordered'] > 0:
stats['document_structure'] = 'list-based'
elif stats['paragraphs'] > 0:
stats['document_structure'] = 'simple'
else:
stats['document_structure'] = 'minimal'
return stats

View File

@@ -28,6 +28,7 @@ from .database import DatabaseManager
from .document_manager import DocumentManager
from .serializer import ASTSerializer
from .cache_service import CacheDirectoryService
from .ast_service import ASTService
# Global options for CLI configuration
@@ -741,6 +742,192 @@ def cache_invalidate(config, file_path):
sys.exit(1)
@cli.command('ast-show')
@click.argument('file_path', type=click.Path(exists=False))
@click.option('--format', '-f', type=click.Choice(['tree', 'json', 'compact']), default='tree', help='Display format')
@pass_config
def ast_show(config, file_path, format):
"""
Display AST structure for file.
Shows the Abstract Syntax Tree representation of a markdown file
with various formatting options for analysis and debugging.
FILE_PATH: Path to the markdown file to analyze
Examples:
markitect ast-show document.md
markitect ast-show document.md --format json
markitect ast-show document.md --format compact
"""
try:
if config.get('verbose'):
click.echo(f"Analyzing AST structure for: {file_path}", err=True)
ast_service = ASTService()
result = ast_service.display_ast(Path(file_path), format)
if result['success']:
if result.get('message'):
if config.get('verbose'):
click.echo(f"Info: {result['message']}", err=True)
click.echo(result['output'])
if config.get('verbose') and result.get('token_count'):
click.echo(f"Total tokens: {result['token_count']}", err=True)
else:
click.echo(f"Error: {result['message']}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"AST display error: {e}", err=True)
if config and config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@cli.command('ast-query')
@click.argument('file_path', type=click.Path(exists=False))
@click.argument('jsonpath', type=str)
@click.option('--format', '-f', type=click.Choice(['json', 'compact']), default='json', help='Output format')
@pass_config
def ast_query(config, file_path, jsonpath, format):
"""
Query AST using JSONPath.
Execute JSONPath expressions against the AST structure of a markdown file
to extract specific elements or patterns.
FILE_PATH: Path to the markdown file to query
JSONPATH: JSONPath expression to execute
Examples:
markitect ast-query doc.md '$.*.type'
markitect ast-query doc.md '$..tag'
markitect ast-query doc.md '$[:5]' --format compact
"""
try:
if config.get('verbose'):
click.echo(f"Executing JSONPath query on: {file_path}", err=True)
click.echo(f"Query: {jsonpath}", err=True)
ast_service = ASTService()
result = ast_service.query_ast(Path(file_path), jsonpath)
if result['success']:
if config.get('verbose'):
click.echo(f"Query results: {result['count']} matches", err=True)
if result['count'] == 0:
click.echo("No matches found for query.")
else:
if format == 'compact':
for i, match in enumerate(result['matches']):
if isinstance(match, dict):
token_type = match.get('type', 'unknown')
content = match.get('content', match.get('tag', ''))[:30]
click.echo(f"[{i}] {token_type}: {content}")
else:
click.echo(f"[{i}] {match}")
else:
import json
click.echo(json.dumps(result['matches'], indent=2, ensure_ascii=False))
else:
click.echo(f"Error: {result['message']}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"AST query error: {e}", err=True)
if config and config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@cli.command('ast-stats')
@click.argument('file_path', type=click.Path(exists=False))
@click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format')
@pass_config
def ast_stats(config, file_path, format):
"""
Show AST statistics (headings, links, etc.).
Analyze markdown file structure and provide comprehensive statistics
about document elements, organization, and content patterns.
FILE_PATH: Path to the markdown file to analyze
Examples:
markitect ast-stats document.md
markitect ast-stats document.md --format json
markitect ast-stats document.md --format yaml
"""
try:
if config.get('verbose'):
click.echo(f"Calculating statistics for: {file_path}", err=True)
ast_service = ASTService()
result = ast_service.analyze_ast_statistics(Path(file_path))
if result['success']:
if config.get('verbose'):
click.echo(f"Analysis complete for: {Path(file_path).name}", err=True)
stats = result['statistics']
if format == 'table':
# Format statistics as readable table
click.echo("Document Statistics:")
click.echo("=" * 40)
click.echo(f"Total AST tokens: {stats.get('total_tokens', 0)}")
click.echo(f"Document structure: {stats.get('document_structure', 'unknown')}")
click.echo()
# Headings
headings = stats.get('headings', {})
click.echo(f"Headings: {headings.get('total', 0)}")
for level, count in headings.get('by_level', {}).items():
click.echo(f" {level.upper()}: {count}")
click.echo(f"Paragraphs: {stats.get('paragraphs', 0)}")
click.echo(f"Links: {stats.get('links', 0)}")
# Lists
lists = stats.get('lists', {})
total_lists = lists.get('ordered', 0) + lists.get('unordered', 0)
click.echo(f"Lists: {total_lists}")
if total_lists > 0:
click.echo(f" Ordered: {lists.get('ordered', 0)}")
click.echo(f" Unordered: {lists.get('unordered', 0)}")
click.echo(f"Code blocks: {stats.get('code_blocks', 0)}")
click.echo(f"Inline code: {stats.get('inline_code', 0)}")
click.echo(f"Blockquotes: {stats.get('blockquotes', 0)}")
# Emphasis
emphasis = stats.get('emphasis', {})
click.echo(f"Strong text: {emphasis.get('strong', 0)}")
click.echo(f"Italic text: {emphasis.get('italic', 0)}")
elif format == 'json':
import json
click.echo(json.dumps(stats, indent=2, ensure_ascii=False))
elif format == 'yaml':
import yaml
click.echo(yaml.dump(stats, default_flow_style=False, allow_unicode=True))
else:
click.echo(f"Error: {result['message']}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"AST statistics error: {e}", err=True)
if config and config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
def main():
"""
Main entry point for the CLI.

View File

@@ -8,7 +8,7 @@ version = "0.1.0"
description = "Advanced Markdown engine for structured content"
readme = "README.md"
requires-python = ">=3.8"
dependencies = ["markdown-it-py", "PyYAML", "click>=8.0.0", "tabulate>=0.9.0"]
dependencies = ["markdown-it-py", "PyYAML", "click>=8.0.0", "tabulate>=0.9.0", "jsonpath-ng>=1.5.0"]
[project.scripts]
markitect = "markitect.cli:main"