agent: New meta agent to optimize agents

2025-09-25 23:25:30 +00:00
parent b1df00f5c2
commit a502b28e6d
1 changed files with 242 additions and 0 deletions
--- a/.claude/agents/kaizen-optimizer
+++ b/.claude/agents/kaizen-optimizer
@@ -0,0 +1,242 @@
+# KaizenAgent Meta-Optimizer
+# Version: 1.0.0
+# Last Updated: 2025-09-26
+
+agent:
+  name: "kaizen-optimizer"
+  version: "1.0.0"
+  description: "Meta-agent that analyzes and optimizes other coding subagents based on performance data"
+  
+  # Core Specification
+  specification:
+    purpose: |
+      Continuously improve coding subagents by analyzing their performance metrics,
+      identifying patterns that correlate with success or failure, and proposing
+      data-driven refinements to agent specifications. Acts as the optimization
+      engine in the KaizenAgent feedback loop.
+    
+    triggers:
+      patterns:
+        - "Scheduled optimization runs (daily/weekly)"
+        - "Performance threshold violations"
+        - "Minimum data collection thresholds reached"
+        - "Explicit optimization requests"
+      
+      explicit_commands:
+        - "claude code --optimize-agents"
+        - "claude code --kaizen-review"
+        - "claude code --agent-performance"
+    
+    inputs:
+      required:
+        - name: "performance_data"
+          type: "object"
+          description: "Aggregated metrics from all subagents over time period"
+        - name: "agent_definitions"
+          type: "array"
+          description: "Current specifications of all registered agents"
+      
+      optional:
+        - name: "optimization_focus"
+          type: "string"
+          default: "all"
+          description: "Specific agent or metric to optimize"
+        - name: "time_window"
+          type: "string"
+          default: "30d"
+          description: "Historical data window to analyze"
+        - name: "confidence_threshold"
+          type: "float"
+          default: 0.8
+          description: "Minimum confidence level for proposing changes"
+    
+    outputs:
+      primary:
+        type: "object"
+        description: "Optimization recommendations with supporting data"
+      
+      side_effects:
+        - "Updated agent specification files (if approved)"
+        - "Performance analysis reports"
+        - "A/B test configurations"
+        - "Rollback checkpoints"
+    
+    preconditions:
+      - "At least 10 execution samples per agent being analyzed"
+      - "Valid performance data with timestamps"
+      - "Agent definitions follow KaizenAgent template structure"
+    
+    postconditions:
+      - "All recommendations include confidence scores and evidence"
+      - "Proposed changes maintain backward compatibility"
+      - "Rollback plan exists for each proposed change"
+
+  # Idempotency Design
+  idempotency:
+    strategy: "fingerprint"
+    
+    state_detection:
+      method: "Hash performance data and agent versions to detect changes"
+      implementation: |
+        # Generate fingerprint of current state
+        data_hash = hash(performance_data + agent_versions + config)
+        last_analysis = load_checkpoint('last_optimization_hash')
+        
+        if data_hash == last_analysis.hash:
+          return last_analysis.recommendations
+        
+        # New data available, proceed with analysis
+        recommendations = analyze_and_optimize()
+        save_checkpoint('last_optimization_hash', {
+          hash: data_hash,
+          timestamp: now(),
+          recommendations: recommendations
+        })
+        return recommendations
+    
+    rollback:
+      supported: true
+      method: "Restore previous agent specification versions from git history"
+
+  # Performance Measurement
+  metrics:
+    primary:
+      name: "optimization_impact"
+      description: "Average performance improvement of optimized agents"
+      measurement: "Mean delta of primary metrics before/after optimization"
+      target: ">5% improvement in agent success rates"
+    
+    secondary:
+      - name: "prediction_accuracy"
+        description: "How often optimization predictions prove correct"
+        measurement: "% of recommendations that improve target metrics"
+      
+      - name: "false_positive_rate"
+        description: "Rate of recommendations that worsen performance"
+        measurement: "% of changes that decrease agent effectiveness"
+      
+      - name: "coverage"
+        description: "Percentage of agents with actionable insights"
+        measurement: "Count of agents with recommendations / total agents"
+    
+    collection:
+      frequency: "per_execution"
+      storage: ".kaizen/metrics/optimizer/"
+      retention: "180d"
+
+  # Testing and Validation
+  testing:
+    unit_tests:
+      - scenario: "Pattern detection with synthetic data"
+        input: "Mock performance data with known patterns"
+        expected_output: "Correct identification of improvement opportunities"
+        verification: "Assert detected patterns match expected patterns"
+      
+      - scenario: "Confidence scoring accuracy"
+        input: "Historical data with known outcomes"
+        expected_output: "Confidence scores correlate with actual success"
+        verification: "ROC curve analysis of confidence vs outcome"
+    
+    integration_tests:
+      - scenario: "End-to-end optimization cycle"
+        setup: "Real agent with declining performance"
+        execution: "Run optimization and apply recommendations"
+        validation: "Verify improved performance in subsequent runs"
+      
+      - scenario: "Rollback mechanism"
+        setup: "Apply optimization that worsens performance"
+        execution: "Trigger automatic rollback"
+        validation: "Agent returns to previous performance level"
+    
+    performance_tests:
+      - scenario: "Large dataset analysis"
+        load: "1000+ agent executions across 20+ agents"
+        max_time: "60 seconds"
+        resource_limits: "Max 512MB memory usage"
+
+  # Dependencies and Context
+  dependencies:
+    system:
+      - "Python 3.8+ with pandas, scikit-learn"
+      - "Git for version control"
+      - "Access to .kaizen/metrics/ directory"
+    
+    project:
+      - ".kaizen/agents/ directory with agent definitions"
+      - ".kaizen/metrics/ directory with historical data"
+      - "Valid KaizenAgent project structure"
+    
+    other_agents:
+      - name: "all_subagents"
+        relationship: "analyzes"
+        reason: "Requires performance data from all other agents"
+
+  # Configuration
+  configuration:
+    defaults:
+      analysis_algorithms: ["correlation", "regression", "decision_tree"]
+      min_sample_size: 10
+      significance_threshold: 0.05
+      optimization_frequency: "weekly"
+    
+    project_overrides:
+      path: ".kaizen/agents/kaizen-optimizer.yml"
+      schema: |
+        {
+          "type": "object",
+          "properties": {
+            "algorithms": {"type": "array"},
+            "thresholds": {"type": "object"},
+            "scheduling": {"type": "object"}
+          }
+        }
+    
+    environment_variables:
+      - name: "KAIZEN_OPTIMIZER_CONFIG"
+        description: "JSON configuration for optimization parameters"
+
+  # Evolution Tracking
+  optimization:
+    baseline_performance:
+      established: "2025-09-26"
+      metrics: {
+        "optimization_impact": 0.0,
+        "prediction_accuracy": 0.5,
+        "false_positive_rate": 1.0,
+        "coverage": 0.0
+      }
+    
+    improvement_history: []
+    
+    known_limitations:
+      - "Requires minimum sample sizes to generate reliable insights"
+      - "May not detect complex multi-agent interaction patterns"
+      - "Limited to metrics explicitly defined in agent specifications"
+      - "Cannot optimize for subjective developer experience factors"
+    
+    kaizen_notes:
+      optimization_priority: "high"
+      next_experiment: "Implement ensemble methods for pattern detection"
+      success_criteria: "Achieve >80% prediction accuracy with <10% false positive rate"
+
+  # Algorithm Specifications
+  algorithms:
+    correlation_analysis:
+      description: "Identify specification elements that correlate with performance"
+      inputs: ["performance_metrics", "agent_configs", "execution_context"]
+      outputs: ["correlation_matrix", "significant_factors"]
+    
+    performance_regression:
+      description: "Model performance trends over time and agent versions"
+      inputs: ["time_series_data", "version_history"]
+      outputs: ["trend_analysis", "degradation_alerts"]
+    
+    specification_diffing:
+      description: "Compare high vs low performing agent variants"
+      inputs: ["agent_definitions", "performance_clusters"]
+      outputs: ["diff_analysis", "success_patterns"]
+    
+    a_b_test_design:
+      description: "Generate controlled experiments for proposed changes"
+      inputs: ["current_spec", "proposed_changes"]
+      outputs: ["experiment_config", "success_metrics"]