markitect-main/tools/agent_tooling_optimizer.py

#!/usr/bin/env python3
"""
Agent Tooling Optimizer - Meta-agent for optimizing repository tooling usage.

This tool analyzes repository tooling, identifies missed optimization opportunities,
and provides actionable recommendations to improve agent effectiveness.
"""

import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple
import argparse
from datetime import datetime


@dataclass
class ToolMetadata:
    """Metadata for a discovered tool."""
    name: str
    type: str  # 'makefile', 'cli', 'script', 'workflow'
    description: str
    usage: str
    file_path: str
    examples: List[str]
    dependencies: List[str]
    category: str


@dataclass
class MissedOpportunity:
    """Represents a missed tooling opportunity."""
    context: str
    available_tool: str
    manual_approach: str
    efficiency_gain: str
    recommendation: str


@dataclass
class OptimizationReport:
    """Comprehensive optimization report."""
    timestamp: str
    total_tools: int
    tools_by_category: Dict[str, int]
    missed_opportunities: List[MissedOpportunity]
    recommendations: List[str]
    quick_wins: List[str]


class ToolingDiscoveryEngine:
    """Discovers and catalogs repository tooling."""

    def __init__(self, repo_path: str = "."):
        self.repo_path = Path(repo_path)
        self.tools: List[ToolMetadata] = []

    def discover_all_tools(self) -> List[ToolMetadata]:
        """Discover all available tools in the repository."""
        self.tools = []

        # Discover different types of tools
        self.tools.extend(self._discover_makefile_targets())
        self.tools.extend(self._discover_cli_commands())
        self.tools.extend(self._discover_scripts())
        self.tools.extend(self._discover_workflow_automation())

        return self.tools

    def _discover_makefile_targets(self) -> List[ToolMetadata]:
        """Discover Makefile targets and their purposes."""
        makefile_path = self.repo_path / "Makefile"
        tools = []

        if not makefile_path.exists():
            return tools

        try:
            with open(makefile_path, 'r') as f:
                content = f.read()

            # Parse Makefile targets with descriptions
            target_pattern = r'^([a-zA-Z0-9_-]+):\s*(?:[^#\n]*?)(?:#\s*(.+?))?$'

            for match in re.finditer(target_pattern, content, re.MULTILINE):
                target_name = match.group(1)
                description = match.group(2) or "No description available"

                # Skip internal targets
                if target_name.startswith('.') or target_name in ['all', 'clean']:
                    continue

                # Get usage example
                usage = f"make {target_name}"

                # Categorize target
                category = self._categorize_makefile_target(target_name)

                tools.append(ToolMetadata(
                    name=target_name,
                    type="makefile",
                    description=description.strip(),
                    usage=usage,
                    file_path=str(makefile_path),
                    examples=[usage],
                    dependencies=[],
                    category=category
                ))

        except Exception as e:
            print(f"Error parsing Makefile: {e}")

        return tools

    def _discover_cli_commands(self) -> List[ToolMetadata]:
        """Discover CLI commands and subcommands."""
        tools = []

        # Discover main CLI
        cli_path = self.repo_path / "markitect" / "cli.py"
        if cli_path.exists():
            tools.extend(self._parse_click_cli(cli_path))

        # Discover issue management CLI
        issues_cli_path = self.repo_path / "markitect" / "issues" / "commands.py"
        if issues_cli_path.exists():
            tools.extend(self._parse_issues_cli(issues_cli_path))

        return tools

    def _discover_scripts(self) -> List[ToolMetadata]:
        """Discover utility scripts."""
        tools = []

        # Common script directories
        script_dirs = ["scripts", "tools", "bin", "utils"]

        for script_dir in script_dirs:
            script_path = self.repo_path / script_dir
            if script_path.exists():
                for script_file in script_path.glob("*.py"):
                    if script_file.name.startswith("_"):
                        continue

                    description = self._extract_script_description(script_file)

                    tools.append(ToolMetadata(
                        name=script_file.stem,
                        type="script",
                        description=description,
                        usage=f"python {script_file}",
                        file_path=str(script_file),
                        examples=[f"python {script_file} --help"],
                        dependencies=[],
                        category="utility"
                    ))

        return tools

    def _discover_workflow_automation(self) -> List[ToolMetadata]:
        """Discover workflow automation tools."""
        tools = []

        # GitHub Actions
        gh_actions_path = self.repo_path / ".github" / "workflows"
        if gh_actions_path.exists():
            for workflow_file in gh_actions_path.glob("*.yml"):
                description = self._extract_workflow_description(workflow_file)

                tools.append(ToolMetadata(
                    name=workflow_file.stem,
                    type="workflow",
                    description=description,
                    usage=f"Triggered by: {self._get_workflow_triggers(workflow_file)}",
                    file_path=str(workflow_file),
                    examples=[],
                    dependencies=[],
                    category="automation"
                ))

        # TDD AI CLI
        tddai_path = self.repo_path / "tddai_cli.py"
        if tddai_path.exists():
            tools.append(ToolMetadata(
                name="tddai_cli",
                type="workflow",
                description="TDD8 workflow automation and issue management",
                usage="python tddai_cli.py [command]",
                file_path=str(tddai_path),
                examples=["python tddai_cli.py tdd-start 61", "python tddai_cli.py list-issues"],
                dependencies=[],
                category="development"
            ))

        return tools

    def _categorize_makefile_target(self, target_name: str) -> str:
        """Categorize Makefile targets."""
        if any(keyword in target_name.lower() for keyword in ['test', 'check']):
            return "testing"
        elif any(keyword in target_name.lower() for keyword in ['build', 'compile', 'install']):
            return "build"
        elif any(keyword in target_name.lower() for keyword in ['issue', 'gitea']):
            return "issue_management"
        elif any(keyword in target_name.lower() for keyword in ['db', 'data', 'schema']):
            return "database"
        elif any(keyword in target_name.lower() for keyword in ['lint', 'format', 'quality']):
            return "code_quality"
        else:
            return "general"

    def _parse_click_cli(self, cli_path: Path) -> List[ToolMetadata]:
        """Parse Click CLI for commands."""
        tools = []
        try:
            with open(cli_path, 'r') as f:
                content = f.read()

            # Find Click commands and groups
            command_pattern = r'@click\.command\(\)\s*def\s+([a-zA-Z_][a-zA-Z0-9_]*)\([^)]*\):'
            group_pattern = r'@click\.group\(\)\s*def\s+([a-zA-Z_][a-zA-Z0-9_]*)\([^)]*\):'

            for match in re.finditer(command_pattern, content):
                command_name = match.group(1)
                description = self._extract_function_docstring(content, match.start())

                tools.append(ToolMetadata(
                    name=f"markitect {command_name}",
                    type="cli",
                    description=description,
                    usage=f"markitect {command_name}",
                    file_path=str(cli_path),
                    examples=[f"markitect {command_name} --help"],
                    dependencies=[],
                    category="cli"
                ))

        except Exception as e:
            print(f"Error parsing CLI: {e}")

        return tools

    def _parse_issues_cli(self, cli_path: Path) -> List[ToolMetadata]:
        """Parse issues CLI for commands."""
        tools = []
        try:
            with open(cli_path, 'r') as f:
                content = f.read()

            # Find issue management commands
            issue_commands = ['list', 'show', 'create', 'comment', 'close']

            for command in issue_commands:
                tools.append(ToolMetadata(
                    name=f"markitect issues {command}",
                    type="cli",
                    description=f"Issue management: {command} operations",
                    usage=f"markitect issues {command}",
                    file_path=str(cli_path),
                    examples=[f"markitect issues {command} --help"],
                    dependencies=[],
                    category="issue_management"
                ))

        except Exception as e:
            print(f"Error parsing issues CLI: {e}")

        return tools

    def _extract_script_description(self, script_path: Path) -> str:
        """Extract description from script docstring."""
        try:
            with open(script_path, 'r') as f:
                content = f.read()

            # Look for module docstring
            docstring_pattern = r'"""([^"]+)"""'
            match = re.search(docstring_pattern, content)
            if match:
                return match.group(1).strip().split('\n')[0]

        except Exception:
            pass

        return "Utility script"

    def _extract_function_docstring(self, content: str, start_pos: int) -> str:
        """Extract function docstring from source."""
        # Simple extraction - could be improved
        lines = content[start_pos:].split('\n')
        for i, line in enumerate(lines):
            if '"""' in line:
                if line.count('"""') == 2:
                    return line.split('"""')[1].strip()
                # Multi-line docstring
                docstring_lines = []
                for j in range(i + 1, len(lines)):
                    if '"""' in lines[j]:
                        break
                    docstring_lines.append(lines[j].strip())
                return ' '.join(docstring_lines)
        return "No description available"

    def _extract_workflow_description(self, workflow_path: Path) -> str:
        """Extract description from GitHub workflow."""
        try:
            with open(workflow_path, 'r') as f:
                content = f.read()

            # Look for name or description in YAML
            name_pattern = r'name:\s*["\']*([^"\'\n]+)["\']*'
            match = re.search(name_pattern, content)
            if match:
                return match.group(1).strip()

        except Exception:
            pass

        return "GitHub workflow"

    def _get_workflow_triggers(self, workflow_path: Path) -> str:
        """Get workflow trigger information."""
        try:
            with open(workflow_path, 'r') as f:
                content = f.read()

            # Look for 'on:' section
            on_pattern = r'on:\s*\n((?:\s+.+\n?)+)'
            match = re.search(on_pattern, content)
            if match:
                return match.group(1).strip().replace('\n', ', ')

        except Exception:
            pass

        return "Unknown"


class SessionAnalyzer:
    """Analyzes coding sessions for tooling optimization opportunities."""

    def __init__(self, repo_path: str = "."):
        self.repo_path = Path(repo_path)

    def analyze_recent_activities(self) -> List[MissedOpportunity]:
        """Analyze recent activities for missed tooling opportunities."""
        opportunities = []

        # Analyze git commits for patterns
        opportunities.extend(self._analyze_git_commits())

        # Analyze file modifications
        opportunities.extend(self._analyze_file_patterns())

        # Look for manual implementations
        opportunities.extend(self._find_manual_implementations())

        return opportunities

    def _analyze_git_commits(self) -> List[MissedOpportunity]:
        """Analyze git commits for tooling opportunities."""
        opportunities = []

        try:
            # Get recent commits
            result = subprocess.run(['git', 'log', '--oneline', '-20'],
                                 capture_output=True, text=True, cwd=self.repo_path)

            commits = result.stdout.strip().split('\n')

            for commit in commits:
                if 'test' in commit.lower() and 'make test' not in commit:
                    opportunities.append(MissedOpportunity(
                        context=f"Commit: {commit}",
                        available_tool="make test",
                        manual_approach="Manual test execution",
                        efficiency_gain="Standardized test running with proper setup",
                        recommendation="Use 'make test' for consistent test execution"
                    ))

        except Exception as e:
            print(f"Error analyzing git commits: {e}")

        return opportunities

    def _analyze_file_patterns(self) -> List[MissedOpportunity]:
        """Analyze file modification patterns."""
        opportunities = []

        # Check for direct database operations
        try:
            result = subprocess.run(['find', '.', '-name', '*.py', '-exec', 'grep', '-l', 'sqlite3', '{}', ';'],
                                 capture_output=True, text=True, cwd=self.repo_path)

            if result.stdout.strip():
                opportunities.append(MissedOpportunity(
                    context="Direct SQLite usage found",
                    available_tool="markitect db-query, markitect db-schema",
                    manual_approach="Direct database queries",
                    efficiency_gain="Standardized database operations with error handling",
                    recommendation="Use CLI database commands for database operations"
                ))

        except Exception:
            pass

        return opportunities

    def _find_manual_implementations(self) -> List[MissedOpportunity]:
        """Find patterns of manual implementations."""
        opportunities = []

        # Check for manual issue management
        python_files = list(self.repo_path.glob("**/*.py"))

        for py_file in python_files:
            try:
                with open(py_file, 'r') as f:
                    content = f.read()

                # Look for manual HTTP requests to Gitea
                if 'requests.get' in content and 'gitea' in content.lower():
                    opportunities.append(MissedOpportunity(
                        context=f"Manual HTTP requests in {py_file}",
                        available_tool="markitect issues list/show/create",
                        manual_approach="Direct HTTP requests",
                        efficiency_gain="Error handling, retry logic, authentication",
                        recommendation="Use issue management CLI commands"
                    ))

                # Look for manual file operations that could use CLI
                if 'json.dump' in content and 'schema' in content:
                    opportunities.append(MissedOpportunity(
                        context=f"Manual JSON schema operations in {py_file}",
                        available_tool="markitect schema-generate",
                        manual_approach="Manual JSON manipulation",
                        efficiency_gain="Validation, formatting, metaschema compliance",
                        recommendation="Use schema generation CLI commands"
                    ))

            except Exception:
                continue

        return opportunities


class AgentPrimingOptimizer:
    """Optimizes agent priming for better tool utilization."""

    def __init__(self, tools: List[ToolMetadata]):
        self.tools = tools

    def generate_tool_context(self) -> str:
        """Generate optimized tool context for agent priming."""
        context_parts = []

        # Group tools by category
        tools_by_category = {}
        for tool in self.tools:
            category = tool.category
            if category not in tools_by_category:
                tools_by_category[category] = []
            tools_by_category[category].append(tool)

        context_parts.append("# Available Repository Tooling\n")
        context_parts.append("The following tools are available in this repository. ")
        context_parts.append("Always prefer using these existing tools over manual implementation.\n")

        for category, category_tools in tools_by_category.items():
            context_parts.append(f"\n## {category.replace('_', ' ').title()} Tools\n")

            for tool in category_tools:
                context_parts.append(f"- **{tool.name}**: {tool.description}")
                context_parts.append(f"  - Usage: `{tool.usage}`")
                if tool.examples:
                    context_parts.append(f"  - Example: `{tool.examples[0]}`")
                context_parts.append("")

        # Add usage guidelines
        context_parts.append("\n## Tool Usage Guidelines\n")
        context_parts.append("1. **Always check for existing tools** before implementing solutions manually")
        context_parts.append("2. **Use Makefile targets** for common development tasks")
        context_parts.append("3. **Prefer CLI commands** for data operations and issue management")
        context_parts.append("4. **Check scripts directory** for utility functions")
        context_parts.append("5. **Ask for help** if unsure which tool to use for a specific task")

        return "\n".join(context_parts)

    def create_decision_tree(self) -> str:
        """Create a decision tree for tool selection."""
        decision_tree = """
# Tool Selection Decision Tree

## For Testing Tasks
- Running tests → `make test`
- Validating specific component → `pytest specific_test.py`
- Code quality checks → `make lint` or `make format`

## For Database Operations
- Querying data → `markitect db-query "SELECT ..."`
- Schema inspection → `markitect db-schema`
- Database statistics → `markitect db-stats`

## For Issue Management
- Listing issues → `markitect issues list` or `make list-issues`
- Viewing issue details → `markitect issues show NUM` or `make show-issue NUM=X`
- Creating issues → `markitect issues create`
- Closing issues → `markitect issues close NUM` or `make close-issue NUM=X`

## For Schema Operations
- Generating schemas → `markitect schema-generate`
- Validating documents → `markitect validate`
- Creating stubs → `markitect generate-stub`

## For Development Workflow
- Starting TDD cycle → `make tdd-start NUM=X`
- Project status → `markitect db-stats`
- File analysis → `markitect analyze`
"""
        return decision_tree

    def generate_quick_reference(self) -> str:
        """Generate a quick reference guide."""
        ref_parts = []

        ref_parts.append("# Quick Tool Reference\n")

        # Most common commands
        common_commands = [
            ("make test", "Run all tests"),
            ("make list-issues", "List all issues"),
            ("markitect issues show NUM", "Show issue details"),
            ("markitect schema-generate file.md", "Generate schema from markdown"),
            ("markitect db-query 'SQL'", "Query database"),
            ("make tdd-start NUM=X", "Start TDD cycle for issue X"),
        ]

        ref_parts.append("## Most Common Commands\n")
        for cmd, desc in common_commands:
            ref_parts.append(f"- `{cmd}` - {desc}")

        return "\n".join(ref_parts)


def main():
    """Main entry point for the agent tooling optimizer."""
    parser = argparse.ArgumentParser(description="Agent Tooling Optimizer")
    parser.add_argument("command", choices=["discover", "analyze", "optimize", "report"],
                       help="Command to execute")
    parser.add_argument("--format", choices=["json", "markdown", "text"], default="markdown",
                       help="Output format")
    parser.add_argument("--output", help="Output file (default: stdout)")

    args = parser.parse_args()

    # Initialize components
    discovery_engine = ToolingDiscoveryEngine()
    session_analyzer = SessionAnalyzer()

    if args.command == "discover":
        # Discover all tools
        tools = discovery_engine.discover_all_tools()

        if args.format == "json":
            output = json.dumps([asdict(tool) for tool in tools], indent=2)
        else:
            output = f"# Repository Tooling Catalog\n\nFound {len(tools)} tools:\n\n"
            for tool in tools:
                output += f"## {tool.name} ({tool.type})\n"
                output += f"- **Description**: {tool.description}\n"
                output += f"- **Usage**: `{tool.usage}`\n"
                output += f"- **Category**: {tool.category}\n\n"

    elif args.command == "analyze":
        # Analyze for missed opportunities
        opportunities = session_analyzer.analyze_recent_activities()

        if args.format == "json":
            output = json.dumps([asdict(opp) for opp in opportunities], indent=2)
        else:
            output = f"# Tooling Optimization Opportunities\n\nFound {len(opportunities)} opportunities:\n\n"
            for i, opp in enumerate(opportunities, 1):
                output += f"## Opportunity {i}\n"
                output += f"- **Context**: {opp.context}\n"
                output += f"- **Available Tool**: {opp.available_tool}\n"
                output += f"- **Current Approach**: {opp.manual_approach}\n"
                output += f"- **Potential Gain**: {opp.efficiency_gain}\n"
                output += f"- **Recommendation**: {opp.recommendation}\n\n"

    elif args.command == "optimize":
        # Generate optimized priming
        tools = discovery_engine.discover_all_tools()
        optimizer = AgentPrimingOptimizer(tools)

        output = optimizer.generate_tool_context()
        output += "\n\n" + optimizer.create_decision_tree()
        output += "\n\n" + optimizer.generate_quick_reference()

    elif args.command == "report":
        # Generate comprehensive report
        tools = discovery_engine.discover_all_tools()
        opportunities = session_analyzer.analyze_recent_activities()

        # Create comprehensive report
        report = OptimizationReport(
            timestamp=datetime.now().isoformat(),
            total_tools=len(tools),
            tools_by_category={},
            missed_opportunities=opportunities,
            recommendations=[],
            quick_wins=[]
        )

        # Calculate tools by category
        for tool in tools:
            category = tool.category
            report.tools_by_category[category] = report.tools_by_category.get(category, 0) + 1

        # Generate recommendations
        report.recommendations = [
            "Include tool inventory in agent context",
            "Use decision trees for tool selection",
            "Implement tool usage monitoring",
            "Create quick reference guides",
            "Establish tool usage best practices"
        ]

        # Identify quick wins
        report.quick_wins = [
            "Add Makefile targets to agent context",
            "Document CLI command patterns",
            "Create tool selection guidelines",
            "Implement tool discovery automation"
        ]

        if args.format == "json":
            output = json.dumps(asdict(report), indent=2)
        else:
            output = f"# Agent Tooling Optimization Report\n\n"
            output += f"**Generated**: {report.timestamp}\n"
            output += f"**Total Tools Found**: {report.total_tools}\n\n"

            output += "## Tools by Category\n"
            for category, count in report.tools_by_category.items():
                output += f"- {category.replace('_', ' ').title()}: {count}\n"

            output += f"\n## Missed Opportunities ({len(report.missed_opportunities)})\n"
            for opp in report.missed_opportunities[:5]:  # Top 5
                output += f"- {opp.context}: Use {opp.available_tool}\n"

            output += "\n## Recommendations\n"
            for rec in report.recommendations:
                output += f"- {rec}\n"

            output += "\n## Quick Wins\n"
            for win in report.quick_wins:
                output += f"- {win}\n"

    # Output results
    if args.output:
        with open(args.output, 'w') as f:
            f.write(output)
        print(f"Output written to {args.output}")
    else:
        print(output)


if __name__ == "__main__":
    main()