kaizen-agentic/wiki/AgentKaizenOptimizer.md

AgentKaizenOptimizer

*One agent to improve them all*

# KaizenAgent Meta-Optimizer
# Version: 1.0.0
# Last Updated: 2025-09-26

agent:
  name: "kaizen-optimizer"
  version: "1.0.0"
  description: "Meta-agent that analyzes and optimizes other coding subagents based on performance data"

  # Core Specification
  specification:
    purpose: |
      Continuously improve coding subagents by analyzing their performance metrics,
      identifying patterns that correlate with success or failure, and proposing
      data-driven refinements to agent specifications. Acts as the optimization
      engine in the KaizenAgent feedback loop.

    triggers:
      patterns:
        - "Scheduled optimization runs (daily/weekly)"
        - "Performance threshold violations"
        - "Minimum data collection thresholds reached"
        - "Explicit optimization requests"

      explicit_commands:
        - "claude code --optimize-agents"
        - "claude code --kaizen-review"
        - "claude code --agent-performance"

    inputs:
      required:
        - name: "performance_data"
          type: "object"
          description: "Aggregated metrics from all subagents over time period"
        - name: "agent_definitions"
          type: "array"
          description: "Current specifications of all registered agents"

      optional:
        - name: "optimization_focus"
          type: "string"
          default: "all"
          description: "Specific agent or metric to optimize"
        - name: "time_window"
          type: "string"
          default: "30d"
          description: "Historical data window to analyze"
        - name: "confidence_threshold"
          type: "float"
          default: 0.8
          description: "Minimum confidence level for proposing changes"

    outputs:
      primary:
        type: "object"
        description: "Optimization recommendations with supporting data"

      side_effects:
        - "Updated agent specification files (if approved)"
        - "Performance analysis reports"
        - "A/B test configurations"
        - "Rollback checkpoints"

    preconditions:
      - "At least 10 execution samples per agent being analyzed"
      - "Valid performance data with timestamps"
      - "Agent definitions follow KaizenAgent template structure"

    postconditions:
      - "All recommendations include confidence scores and evidence"
      - "Proposed changes maintain backward compatibility"
      - "Rollback plan exists for each proposed change"

  # Idempotency Design
  idempotency:
    strategy: "fingerprint"

    state_detection:
      method: "Hash performance data and agent versions to detect changes"
      implementation: |
        # Generate fingerprint of current state
        data_hash = hash(performance_data + agent_versions + config)
        last_analysis = load_checkpoint('last_optimization_hash')

        if data_hash == last_analysis.hash:
          return last_analysis.recommendations

        # New data available, proceed with analysis
        recommendations = analyze_and_optimize()
        save_checkpoint('last_optimization_hash', {
          hash: data_hash,
          timestamp: now(),
          recommendations: recommendations
        })
        return recommendations

    rollback:
      supported: true
      method: "Restore previous agent specification versions from git history"

  # Performance Measurement
  metrics:
    primary:
      name: "optimization_impact"
      description: "Average performance improvement of optimized agents"
      measurement: "Mean delta of primary metrics before/after optimization"
      target: ">5% improvement in agent success rates"

    secondary:
      - name: "prediction_accuracy"
        description: "How often optimization predictions prove correct"
        measurement: "% of recommendations that improve target metrics"

      - name: "false_positive_rate"
        description: "Rate of recommendations that worsen performance"
        measurement: "% of changes that decrease agent effectiveness"

      - name: "coverage"
        description: "Percentage of agents with actionable insights"
        measurement: "Count of agents with recommendations / total agents"

    collection:
      frequency: "per_execution"
      storage: ".kaizen/metrics/optimizer/"
      retention: "180d"

  # Testing and Validation
  testing:
    unit_tests:
      - scenario: "Pattern detection with synthetic data"
        input: "Mock performance data with known patterns"
        expected_output: "Correct identification of improvement opportunities"
        verification: "Assert detected patterns match expected patterns"

      - scenario: "Confidence scoring accuracy"
        input: "Historical data with known outcomes"
        expected_output: "Confidence scores correlate with actual success"
        verification: "ROC curve analysis of confidence vs outcome"

    integration_tests:
      - scenario: "End-to-end optimization cycle"
        setup: "Real agent with declining performance"
        execution: "Run optimization and apply recommendations"
        validation: "Verify improved performance in subsequent runs"

      - scenario: "Rollback mechanism"
        setup: "Apply optimization that worsens performance"
        execution: "Trigger automatic rollback"
        validation: "Agent returns to previous performance level"

    performance_tests:
      - scenario: "Large dataset analysis"
        load: "1000+ agent executions across 20+ agents"
        max_time: "60 seconds"
        resource_limits: "Max 512MB memory usage"

  # Dependencies and Context
  dependencies:
    system:
      - "Python 3.8+ with pandas, scikit-learn"
      - "Git for version control"
      - "Access to .kaizen/metrics/ directory"

    project:
      - ".kaizen/agents/ directory with agent definitions"
      - ".kaizen/metrics/ directory with historical data"
      - "Valid KaizenAgent project structure"

    other_agents:
      - name: "all_subagents"
        relationship: "analyzes"
        reason: "Requires performance data from all other agents"

  # Configuration
  configuration:
    defaults:
      analysis_algorithms: ["correlation", "regression", "decision_tree"]
      min_sample_size: 10
      significance_threshold: 0.05
      optimization_frequency: "weekly"

    project_overrides:
      path: ".kaizen/agents/kaizen-optimizer.yml"
      schema: |
        {
          "type": "object",
          "properties": {
            "algorithms": {"type": "array"},
            "thresholds": {"type": "object"},
            "scheduling": {"type": "object"}
          }
        }

    environment_variables:
      - name: "KAIZEN_OPTIMIZER_CONFIG"
        description: "JSON configuration for optimization parameters"

  # Evolution Tracking
  optimization:
    baseline_performance:
      established: "2025-09-26"
      metrics: {
        "optimization_impact": 0.0,
        "prediction_accuracy": 0.5,
        "false_positive_rate": 1.0,
        "coverage": 0.0
      }

    improvement_history: []

    known_limitations:
      - "Requires minimum sample sizes to generate reliable insights"
      - "May not detect complex multi-agent interaction patterns"
      - "Limited to metrics explicitly defined in agent specifications"
      - "Cannot optimize for subjective developer experience factors"

    kaizen_notes:
      optimization_priority: "high"
      next_experiment: "Implement ensemble methods for pattern detection"
      success_criteria: "Achieve >80% prediction accuracy with <10% false positive rate"

  # Algorithm Specifications
  algorithms:
    correlation_analysis:
      description: "Identify specification elements that correlate with performance"
      inputs: ["performance_metrics", "agent_configs", "execution_context"]
      outputs: ["correlation_matrix", "significant_factors"]

    performance_regression:
      description: "Model performance trends over time and agent versions"
      inputs: ["time_series_data", "version_history"]
      outputs: ["trend_analysis", "degradation_alerts"]

    specification_diffing:
      description: "Compare high vs low performing agent variants"
      inputs: ["agent_definitions", "performance_clusters"]
      outputs: ["diff_analysis", "success_patterns"]

    a_b_test_design:
      description: "Generate controlled experiments for proposed changes"
      inputs: ["current_spec", "proposed_changes"]
      outputs: ["experiment_config", "success_metrics"]

xxx