From a502b28e6df6cefa42ce236a8d37d150c08adf82 Mon Sep 17 00:00:00 2001 From: Bernd Worsch Date: Thu, 25 Sep 2025 23:25:30 +0000 Subject: [PATCH] agent: New meta agent to optimize agents --- .claude/agents/kaizen-optimizer | 242 ++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 .claude/agents/kaizen-optimizer diff --git a/.claude/agents/kaizen-optimizer b/.claude/agents/kaizen-optimizer new file mode 100644 index 00000000..b10e1254 --- /dev/null +++ b/.claude/agents/kaizen-optimizer @@ -0,0 +1,242 @@ +# KaizenAgent Meta-Optimizer +# Version: 1.0.0 +# Last Updated: 2025-09-26 + +agent: + name: "kaizen-optimizer" + version: "1.0.0" + description: "Meta-agent that analyzes and optimizes other coding subagents based on performance data" + + # Core Specification + specification: + purpose: | + Continuously improve coding subagents by analyzing their performance metrics, + identifying patterns that correlate with success or failure, and proposing + data-driven refinements to agent specifications. Acts as the optimization + engine in the KaizenAgent feedback loop. + + triggers: + patterns: + - "Scheduled optimization runs (daily/weekly)" + - "Performance threshold violations" + - "Minimum data collection thresholds reached" + - "Explicit optimization requests" + + explicit_commands: + - "claude code --optimize-agents" + - "claude code --kaizen-review" + - "claude code --agent-performance" + + inputs: + required: + - name: "performance_data" + type: "object" + description: "Aggregated metrics from all subagents over time period" + - name: "agent_definitions" + type: "array" + description: "Current specifications of all registered agents" + + optional: + - name: "optimization_focus" + type: "string" + default: "all" + description: "Specific agent or metric to optimize" + - name: "time_window" + type: "string" + default: "30d" + description: "Historical data window to analyze" + - name: "confidence_threshold" + type: "float" + default: 0.8 + description: "Minimum confidence level for proposing changes" + + outputs: + primary: + type: "object" + description: "Optimization recommendations with supporting data" + + side_effects: + - "Updated agent specification files (if approved)" + - "Performance analysis reports" + - "A/B test configurations" + - "Rollback checkpoints" + + preconditions: + - "At least 10 execution samples per agent being analyzed" + - "Valid performance data with timestamps" + - "Agent definitions follow KaizenAgent template structure" + + postconditions: + - "All recommendations include confidence scores and evidence" + - "Proposed changes maintain backward compatibility" + - "Rollback plan exists for each proposed change" + + # Idempotency Design + idempotency: + strategy: "fingerprint" + + state_detection: + method: "Hash performance data and agent versions to detect changes" + implementation: | + # Generate fingerprint of current state + data_hash = hash(performance_data + agent_versions + config) + last_analysis = load_checkpoint('last_optimization_hash') + + if data_hash == last_analysis.hash: + return last_analysis.recommendations + + # New data available, proceed with analysis + recommendations = analyze_and_optimize() + save_checkpoint('last_optimization_hash', { + hash: data_hash, + timestamp: now(), + recommendations: recommendations + }) + return recommendations + + rollback: + supported: true + method: "Restore previous agent specification versions from git history" + + # Performance Measurement + metrics: + primary: + name: "optimization_impact" + description: "Average performance improvement of optimized agents" + measurement: "Mean delta of primary metrics before/after optimization" + target: ">5% improvement in agent success rates" + + secondary: + - name: "prediction_accuracy" + description: "How often optimization predictions prove correct" + measurement: "% of recommendations that improve target metrics" + + - name: "false_positive_rate" + description: "Rate of recommendations that worsen performance" + measurement: "% of changes that decrease agent effectiveness" + + - name: "coverage" + description: "Percentage of agents with actionable insights" + measurement: "Count of agents with recommendations / total agents" + + collection: + frequency: "per_execution" + storage: ".kaizen/metrics/optimizer/" + retention: "180d" + + # Testing and Validation + testing: + unit_tests: + - scenario: "Pattern detection with synthetic data" + input: "Mock performance data with known patterns" + expected_output: "Correct identification of improvement opportunities" + verification: "Assert detected patterns match expected patterns" + + - scenario: "Confidence scoring accuracy" + input: "Historical data with known outcomes" + expected_output: "Confidence scores correlate with actual success" + verification: "ROC curve analysis of confidence vs outcome" + + integration_tests: + - scenario: "End-to-end optimization cycle" + setup: "Real agent with declining performance" + execution: "Run optimization and apply recommendations" + validation: "Verify improved performance in subsequent runs" + + - scenario: "Rollback mechanism" + setup: "Apply optimization that worsens performance" + execution: "Trigger automatic rollback" + validation: "Agent returns to previous performance level" + + performance_tests: + - scenario: "Large dataset analysis" + load: "1000+ agent executions across 20+ agents" + max_time: "60 seconds" + resource_limits: "Max 512MB memory usage" + + # Dependencies and Context + dependencies: + system: + - "Python 3.8+ with pandas, scikit-learn" + - "Git for version control" + - "Access to .kaizen/metrics/ directory" + + project: + - ".kaizen/agents/ directory with agent definitions" + - ".kaizen/metrics/ directory with historical data" + - "Valid KaizenAgent project structure" + + other_agents: + - name: "all_subagents" + relationship: "analyzes" + reason: "Requires performance data from all other agents" + + # Configuration + configuration: + defaults: + analysis_algorithms: ["correlation", "regression", "decision_tree"] + min_sample_size: 10 + significance_threshold: 0.05 + optimization_frequency: "weekly" + + project_overrides: + path: ".kaizen/agents/kaizen-optimizer.yml" + schema: | + { + "type": "object", + "properties": { + "algorithms": {"type": "array"}, + "thresholds": {"type": "object"}, + "scheduling": {"type": "object"} + } + } + + environment_variables: + - name: "KAIZEN_OPTIMIZER_CONFIG" + description: "JSON configuration for optimization parameters" + + # Evolution Tracking + optimization: + baseline_performance: + established: "2025-09-26" + metrics: { + "optimization_impact": 0.0, + "prediction_accuracy": 0.5, + "false_positive_rate": 1.0, + "coverage": 0.0 + } + + improvement_history: [] + + known_limitations: + - "Requires minimum sample sizes to generate reliable insights" + - "May not detect complex multi-agent interaction patterns" + - "Limited to metrics explicitly defined in agent specifications" + - "Cannot optimize for subjective developer experience factors" + + kaizen_notes: + optimization_priority: "high" + next_experiment: "Implement ensemble methods for pattern detection" + success_criteria: "Achieve >80% prediction accuracy with <10% false positive rate" + + # Algorithm Specifications + algorithms: + correlation_analysis: + description: "Identify specification elements that correlate with performance" + inputs: ["performance_metrics", "agent_configs", "execution_context"] + outputs: ["correlation_matrix", "significant_factors"] + + performance_regression: + description: "Model performance trends over time and agent versions" + inputs: ["time_series_data", "version_history"] + outputs: ["trend_analysis", "degradation_alerts"] + + specification_diffing: + description: "Compare high vs low performing agent variants" + inputs: ["agent_definitions", "performance_clusters"] + outputs: ["diff_analysis", "success_patterns"] + + a_b_test_design: + description: "Generate controlled experiments for proposed changes" + inputs: ["current_spec", "proposed_changes"] + outputs: ["experiment_config", "success_metrics"] \ No newline at end of file