AgentKaizenOptimizer *One agent to improve them all* # KaizenAgent Meta-Optimizer # Version: 1.0.0 # Last Updated: 2025-09-26 agent: name: "kaizen-optimizer" version: "1.0.0" description: "Meta-agent that analyzes and optimizes other coding subagents based on performance data" # Core Specification specification: purpose: | Continuously improve coding subagents by analyzing their performance metrics, identifying patterns that correlate with success or failure, and proposing data-driven refinements to agent specifications. Acts as the optimization engine in the KaizenAgent feedback loop. triggers: patterns: - "Scheduled optimization runs (daily/weekly)" - "Performance threshold violations" - "Minimum data collection thresholds reached" - "Explicit optimization requests" explicit_commands: - "claude code --optimize-agents" - "claude code --kaizen-review" - "claude code --agent-performance" inputs: required: - name: "performance_data" type: "object" description: "Aggregated metrics from all subagents over time period" - name: "agent_definitions" type: "array" description: "Current specifications of all registered agents" optional: - name: "optimization_focus" type: "string" default: "all" description: "Specific agent or metric to optimize" - name: "time_window" type: "string" default: "30d" description: "Historical data window to analyze" - name: "confidence_threshold" type: "float" default: 0.8 description: "Minimum confidence level for proposing changes" outputs: primary: type: "object" description: "Optimization recommendations with supporting data" side_effects: - "Updated agent specification files (if approved)" - "Performance analysis reports" - "A/B test configurations" - "Rollback checkpoints" preconditions: - "At least 10 execution samples per agent being analyzed" - "Valid performance data with timestamps" - "Agent definitions follow KaizenAgent template structure" postconditions: - "All recommendations include confidence scores and evidence" - "Proposed changes maintain backward compatibility" - "Rollback plan exists for each proposed change" # Idempotency Design idempotency: strategy: "fingerprint" state_detection: method: "Hash performance data and agent versions to detect changes" implementation: | # Generate fingerprint of current state data_hash = hash(performance_data + agent_versions + config) last_analysis = load_checkpoint('last_optimization_hash') if data_hash == last_analysis.hash: return last_analysis.recommendations # New data available, proceed with analysis recommendations = analyze_and_optimize() save_checkpoint('last_optimization_hash', { hash: data_hash, timestamp: now(), recommendations: recommendations }) return recommendations rollback: supported: true method: "Restore previous agent specification versions from git history" # Performance Measurement metrics: primary: name: "optimization_impact" description: "Average performance improvement of optimized agents" measurement: "Mean delta of primary metrics before/after optimization" target: ">5% improvement in agent success rates" secondary: - name: "prediction_accuracy" description: "How often optimization predictions prove correct" measurement: "% of recommendations that improve target metrics" - name: "false_positive_rate" description: "Rate of recommendations that worsen performance" measurement: "% of changes that decrease agent effectiveness" - name: "coverage" description: "Percentage of agents with actionable insights" measurement: "Count of agents with recommendations / total agents" collection: frequency: "per_execution" storage: ".kaizen/metrics/optimizer/" retention: "180d" # Testing and Validation testing: unit_tests: - scenario: "Pattern detection with synthetic data" input: "Mock performance data with known patterns" expected_output: "Correct identification of improvement opportunities" verification: "Assert detected patterns match expected patterns" - scenario: "Confidence scoring accuracy" input: "Historical data with known outcomes" expected_output: "Confidence scores correlate with actual success" verification: "ROC curve analysis of confidence vs outcome" integration_tests: - scenario: "End-to-end optimization cycle" setup: "Real agent with declining performance" execution: "Run optimization and apply recommendations" validation: "Verify improved performance in subsequent runs" - scenario: "Rollback mechanism" setup: "Apply optimization that worsens performance" execution: "Trigger automatic rollback" validation: "Agent returns to previous performance level" performance_tests: - scenario: "Large dataset analysis" load: "1000+ agent executions across 20+ agents" max_time: "60 seconds" resource_limits: "Max 512MB memory usage" # Dependencies and Context dependencies: system: - "Python 3.8+ with pandas, scikit-learn" - "Git for version control" - "Access to .kaizen/metrics/ directory" project: - ".kaizen/agents/ directory with agent definitions" - ".kaizen/metrics/ directory with historical data" - "Valid KaizenAgent project structure" other_agents: - name: "all_subagents" relationship: "analyzes" reason: "Requires performance data from all other agents" # Configuration configuration: defaults: analysis_algorithms: ["correlation", "regression", "decision_tree"] min_sample_size: 10 significance_threshold: 0.05 optimization_frequency: "weekly" project_overrides: path: ".kaizen/agents/kaizen-optimizer.yml" schema: | { "type": "object", "properties": { "algorithms": {"type": "array"}, "thresholds": {"type": "object"}, "scheduling": {"type": "object"} } } environment_variables: - name: "KAIZEN_OPTIMIZER_CONFIG" description: "JSON configuration for optimization parameters" # Evolution Tracking optimization: baseline_performance: established: "2025-09-26" metrics: { "optimization_impact": 0.0, "prediction_accuracy": 0.5, "false_positive_rate": 1.0, "coverage": 0.0 } improvement_history: [] known_limitations: - "Requires minimum sample sizes to generate reliable insights" - "May not detect complex multi-agent interaction patterns" - "Limited to metrics explicitly defined in agent specifications" - "Cannot optimize for subjective developer experience factors" kaizen_notes: optimization_priority: "high" next_experiment: "Implement ensemble methods for pattern detection" success_criteria: "Achieve >80% prediction accuracy with <10% false positive rate" # Algorithm Specifications algorithms: correlation_analysis: description: "Identify specification elements that correlate with performance" inputs: ["performance_metrics", "agent_configs", "execution_context"] outputs: ["correlation_matrix", "significant_factors"] performance_regression: description: "Model performance trends over time and agent versions" inputs: ["time_series_data", "version_history"] outputs: ["trend_analysis", "degradation_alerts"] specification_diffing: description: "Compare high vs low performing agent variants" inputs: ["agent_definitions", "performance_clusters"] outputs: ["diff_analysis", "success_patterns"] a_b_test_design: description: "Generate controlled experiments for proposed changes" inputs: ["current_spec", "proposed_changes"] outputs: ["experiment_config", "success_metrics"] xxx