KaizenAgent Meta-Optimizer

Version: 1.0.0

Last Updated: 2025-09-26

agent: name: "kaizen-optimizer" version: "1.0.0" description: "Meta-agent that analyzes and optimizes other coding subagents based on performance data"

Core Specification

specification: purpose: | Continuously improve coding subagents by analyzing their performance metrics, identifying patterns that correlate with success or failure, and proposing data-driven refinements to agent specifications. Acts as the optimization engine in the KaizenAgent feedback loop.

triggers:
  patterns:
    - "Scheduled optimization runs (daily/weekly)"
    - "Performance threshold violations"
    - "Minimum data collection thresholds reached"
    - "Explicit optimization requests"
  
  explicit_commands:
    - "claude code --optimize-agents"
    - "claude code --kaizen-review"
    - "claude code --agent-performance"

inputs:
  required:
    - name: "performance_data"
      type: "object"
      description: "Aggregated metrics from all subagents over time period"
    - name: "agent_definitions"
      type: "array"
      description: "Current specifications of all registered agents"
  
  optional:
    - name: "optimization_focus"
      type: "string"
      default: "all"
      description: "Specific agent or metric to optimize"
    - name: "time_window"
      type: "string"
      default: "30d"
      description: "Historical data window to analyze"
    - name: "confidence_threshold"
      type: "float"
      default: 0.8
      description: "Minimum confidence level for proposing changes"

outputs:
  primary:
    type: "object"
    description: "Optimization recommendations with supporting data"
  
  side_effects:
    - "Updated agent specification files (if approved)"
    - "Performance analysis reports"
    - "A/B test configurations"
    - "Rollback checkpoints"

preconditions:
  - "At least 10 execution samples per agent being analyzed"
  - "Valid performance data with timestamps"
  - "Agent definitions follow KaizenAgent template structure"

postconditions:
  - "All recommendations include confidence scores and evidence"
  - "Proposed changes maintain backward compatibility"
  - "Rollback plan exists for each proposed change"

Idempotency Design

idempotency: strategy: "fingerprint"

state_detection:
  method: "Hash performance data and agent versions to detect changes"
  implementation: |
    # Generate fingerprint of current state
    data_hash = hash(performance_data + agent_versions + config)
    last_analysis = load_checkpoint('last_optimization_hash')
    
    if data_hash == last_analysis.hash:
      return last_analysis.recommendations
    
    # New data available, proceed with analysis
    recommendations = analyze_and_optimize()
    save_checkpoint('last_optimization_hash', {
      hash: data_hash,
      timestamp: now(),
      recommendations: recommendations
    })
    return recommendations

rollback:
  supported: true
  method: "Restore previous agent specification versions from git history"

Performance Measurement

metrics: primary: name: "optimization_impact" description: "Average performance improvement of optimized agents" measurement: "Mean delta of primary metrics before/after optimization" target: ">5% improvement in agent success rates"

secondary:
  - name: "prediction_accuracy"
    description: "How often optimization predictions prove correct"
    measurement: "% of recommendations that improve target metrics"
  
  - name: "false_positive_rate"
    description: "Rate of recommendations that worsen performance"
    measurement: "% of changes that decrease agent effectiveness"
  
  - name: "coverage"
    description: "Percentage of agents with actionable insights"
    measurement: "Count of agents with recommendations / total agents"

collection:
  frequency: "per_execution"
  storage: ".kaizen/metrics/optimizer/"
  retention: "180d"

Testing and Validation

testing: unit_tests: - scenario: "Pattern detection with synthetic data" input: "Mock performance data with known patterns" expected_output: "Correct identification of improvement opportunities" verification: "Assert detected patterns match expected patterns"

  - scenario: "Confidence scoring accuracy"
    input: "Historical data with known outcomes"
    expected_output: "Confidence scores correlate with actual success"
    verification: "ROC curve analysis of confidence vs outcome"

integration_tests:
  - scenario: "End-to-end optimization cycle"
    setup: "Real agent with declining performance"
    execution: "Run optimization and apply recommendations"
    validation: "Verify improved performance in subsequent runs"
  
  - scenario: "Rollback mechanism"
    setup: "Apply optimization that worsens performance"
    execution: "Trigger automatic rollback"
    validation: "Agent returns to previous performance level"

performance_tests:
  - scenario: "Large dataset analysis"
    load: "1000+ agent executions across 20+ agents"
    max_time: "60 seconds"
    resource_limits: "Max 512MB memory usage"

Dependencies and Context

dependencies: system: - "Python 3.8+ with pandas, scikit-learn" - "Git for version control" - "Access to .kaizen/metrics/ directory"

project:
  - ".kaizen/agents/ directory with agent definitions"
  - ".kaizen/metrics/ directory with historical data"
  - "Valid KaizenAgent project structure"

other_agents:
  - name: "all_subagents"
    relationship: "analyzes"
    reason: "Requires performance data from all other agents"

Configuration

configuration: defaults: analysis_algorithms: ["correlation", "regression", "decision_tree"] min_sample_size: 10 significance_threshold: 0.05 optimization_frequency: "weekly"

project_overrides:
  path: ".kaizen/agents/kaizen-optimizer.yml"
  schema: |
    {
      "type": "object",
      "properties": {
        "algorithms": {"type": "array"},
        "thresholds": {"type": "object"},
        "scheduling": {"type": "object"}
      }
    }

environment_variables:
  - name: "KAIZEN_OPTIMIZER_CONFIG"
    description: "JSON configuration for optimization parameters"

Evolution Tracking

optimization: baseline_performance: established: "2025-09-26" metrics: { "optimization_impact": 0.0, "prediction_accuracy": 0.5, "false_positive_rate": 1.0, "coverage": 0.0 }

improvement_history: []

known_limitations:
  - "Requires minimum sample sizes to generate reliable insights"
  - "May not detect complex multi-agent interaction patterns"
  - "Limited to metrics explicitly defined in agent specifications"
  - "Cannot optimize for subjective developer experience factors"

kaizen_notes:
  optimization_priority: "high"
  next_experiment: "Implement ensemble methods for pattern detection"
  success_criteria: "Achieve >80% prediction accuracy with <10% false positive rate"

Algorithm Specifications

algorithms: correlation_analysis: description: "Identify specification elements that correlate with performance" inputs: ["performance_metrics", "agent_configs", "execution_context"] outputs: ["correlation_matrix", "significant_factors"]

performance_regression:
  description: "Model performance trends over time and agent versions"
  inputs: ["time_series_data", "version_history"]
  outputs: ["trend_analysis", "degradation_alerts"]

specification_diffing:
  description: "Compare high vs low performing agent variants"
  inputs: ["agent_definitions", "performance_clusters"]
  outputs: ["diff_analysis", "success_patterns"]

a_b_test_design:
  description: "Generate controlled experiments for proposed changes"
  inputs: ["current_spec", "proposed_changes"]
  outputs: ["experiment_config", "success_metrics"]

xxx

8.4 KiB Raw Blame History