feat: Add comprehensive performance tracking system

🎯 Performance Index KPI System: - Weighted 0-100 scale performance measurement - Historical tracking with trend analysis - Baseline established at 81.4/100 📊 New CLI Commands: - perf-track: Record performance snapshots with git context - perf-history: View trends and historical analysis - perf-benchmark: Enhanced with database fixes - perf-validate: Real-time threshold validation 🗄️ Performance Database: - SQLite storage for historical performance data - Comprehensive metadata capture (git commits, system info) - Trend analysis with statistical insights 🔧 Critical Fixes: - Resolved DatabaseManager connection issues in performance commands - Updated database method calls to use correct API ✅ Implementation Details: - markitect/performance_tracker.py: Complete tracking system - Enhanced CLI with professional output formats - Baseline performance: 78K template ops/sec, 678 DB ops/sec - Memory usage monitoring with psutil integration 🚀 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 17:37:24 +02:00
parent 5a14b85c59
commit 3899ca9154
3 changed files with 1511 additions and 0 deletions
--- a/markitect/performance_tracker.py
+++ b/markitect/performance_tracker.py
@@ -0,0 +1,317 @@
+"""
+Performance Tracking System for MarkiTect
+
+This module provides historical performance tracking, trend analysis, and
+performance index calculation for monitoring system performance over time.
+"""
+
+import sqlite3
+import json
+import time
+import hashlib
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+
+
+@dataclass
+class PerformanceSnapshot:
+    """A complete performance measurement snapshot."""
+    timestamp: str
+    git_commit: Optional[str]
+    system_info: Dict[str, Any]
+    template_ops_per_sec: float
+    database_ops_per_sec: float
+    ingestion_ops_per_sec: float
+    memory_usage_mb: float
+    performance_index: float
+    notes: str = ""
+
+
+class PerformanceTracker:
+    """Manager for historical performance tracking and analysis."""
+
+    def __init__(self, db_path: str):
+        """Initialize performance tracker with database path."""
+        self.db_path = db_path
+        self.initialize_tracking_database()
+
+    def initialize_tracking_database(self) -> None:
+        """Initialize SQLite database for performance tracking."""
+        # Ensure directory exists
+        db_dir = Path(self.db_path).parent
+        if not db_dir.exists():
+            db_dir.mkdir(parents=True, exist_ok=True)
+
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+
+        # Create performance_snapshots table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS performance_snapshots (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp TEXT NOT NULL,
+                git_commit TEXT,
+                system_info TEXT,  -- JSON
+                template_ops_per_sec REAL NOT NULL,
+                database_ops_per_sec REAL NOT NULL,
+                ingestion_ops_per_sec REAL NOT NULL,
+                memory_usage_mb REAL NOT NULL,
+                performance_index REAL NOT NULL,
+                notes TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+
+        # Create performance_trends table for aggregated data
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS performance_trends (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                period_start TEXT NOT NULL,
+                period_end TEXT NOT NULL,
+                avg_performance_index REAL NOT NULL,
+                min_performance_index REAL NOT NULL,
+                max_performance_index REAL NOT NULL,
+                trend_direction TEXT,  -- 'improving', 'degrading', 'stable'
+                snapshot_count INTEGER NOT NULL,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+
+        conn.commit()
+        conn.close()
+
+    def calculate_performance_index(self,
+                                   template_ops: float,
+                                   database_ops: float,
+                                   ingestion_ops: float,
+                                   memory_mb: float) -> float:
+        """
+        Calculate a normalized performance index (0-100 scale).
+
+        Higher values indicate better performance. The index is calculated as:
+        - Template performance (40%): normalized to baseline of 1000 ops/sec
+        - Database performance (30%): normalized to baseline of 100 ops/sec
+        - Ingestion performance (20%): normalized to baseline of 1000 ops/sec
+        - Memory efficiency (10%): inversely weighted, baseline 50MB
+
+        Returns:
+            Performance index value (0-100, higher is better)
+        """
+        # Define baseline values for normalization
+        template_baseline = 1000.0
+        database_baseline = 100.0
+        ingestion_baseline = 1000.0
+        memory_baseline = 50.0
+
+        # Calculate component scores (capped at 100 for each)
+        template_score = min(100.0, (template_ops / template_baseline) * 100.0) * 0.40
+        database_score = min(100.0, (database_ops / database_baseline) * 100.0) * 0.30
+        ingestion_score = min(100.0, (ingestion_ops / ingestion_baseline) * 100.0) * 0.20
+
+        # Memory score is inverse - lower memory usage is better
+        memory_score = min(100.0, (memory_baseline / max(memory_mb, 1.0)) * 100.0) * 0.10
+
+        performance_index = template_score + database_score + ingestion_score + memory_score
+        return round(performance_index, 2)
+
+    def get_system_info(self) -> Dict[str, Any]:
+        """Collect system information for context."""
+        import platform
+        import sys
+
+        try:
+            import psutil
+            memory_total = psutil.virtual_memory().total / (1024 * 1024 * 1024)  # GB
+            cpu_count = psutil.cpu_count()
+        except ImportError:
+            memory_total = "unknown"
+            cpu_count = "unknown"
+
+        return {
+            "platform": platform.platform(),
+            "python_version": sys.version,
+            "cpu_count": cpu_count,
+            "memory_total_gb": memory_total,
+            "markitect_version": "dev"  # Could be extracted from __version__
+        }
+
+    def get_git_commit(self) -> Optional[str]:
+        """Get current git commit hash if available."""
+        try:
+            import subprocess
+            result = subprocess.run(
+                ['git', 'rev-parse', 'HEAD'],
+                capture_output=True,
+                text=True,
+                cwd=Path(__file__).parent.parent
+            )
+            if result.returncode == 0:
+                return result.stdout.strip()[:12]  # Short commit hash
+        except Exception:
+            pass
+        return None
+
+    def store_performance_snapshot(self,
+                                 template_ops: float,
+                                 database_ops: float,
+                                 ingestion_ops: float,
+                                 memory_mb: float,
+                                 notes: str = "") -> int:
+        """
+        Store a performance snapshot in the database.
+
+        Returns:
+            The ID of the stored snapshot
+        """
+        performance_index = self.calculate_performance_index(
+            template_ops, database_ops, ingestion_ops, memory_mb
+        )
+
+        snapshot = PerformanceSnapshot(
+            timestamp=datetime.now().isoformat(),
+            git_commit=self.get_git_commit(),
+            system_info=self.get_system_info(),
+            template_ops_per_sec=template_ops,
+            database_ops_per_sec=database_ops,
+            ingestion_ops_per_sec=ingestion_ops,
+            memory_usage_mb=memory_mb,
+            performance_index=performance_index,
+            notes=notes
+        )
+
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+
+        cursor.execute('''
+            INSERT INTO performance_snapshots
+            (timestamp, git_commit, system_info, template_ops_per_sec,
+             database_ops_per_sec, ingestion_ops_per_sec, memory_usage_mb,
+             performance_index, notes)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        ''', (
+            snapshot.timestamp,
+            snapshot.git_commit,
+            json.dumps(snapshot.system_info),
+            snapshot.template_ops_per_sec,
+            snapshot.database_ops_per_sec,
+            snapshot.ingestion_ops_per_sec,
+            snapshot.memory_usage_mb,
+            snapshot.performance_index,
+            snapshot.notes
+        ))
+
+        snapshot_id = cursor.lastrowid
+        conn.commit()
+        conn.close()
+
+        return snapshot_id
+
+    def get_performance_history(self, limit: int = 50) -> List[PerformanceSnapshot]:
+        """Get recent performance history."""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+
+        cursor.execute('''
+            SELECT timestamp, git_commit, system_info, template_ops_per_sec,
+                   database_ops_per_sec, ingestion_ops_per_sec, memory_usage_mb,
+                   performance_index, notes
+            FROM performance_snapshots
+            ORDER BY created_at DESC
+            LIMIT ?
+        ''', (limit,))
+
+        snapshots = []
+        for row in cursor.fetchall():
+            snapshots.append(PerformanceSnapshot(
+                timestamp=row[0],
+                git_commit=row[1],
+                system_info=json.loads(row[2]) if row[2] else {},
+                template_ops_per_sec=row[3],
+                database_ops_per_sec=row[4],
+                ingestion_ops_per_sec=row[5],
+                memory_usage_mb=row[6],
+                performance_index=row[7],
+                notes=row[8] or ""
+            ))
+
+        conn.close()
+        return snapshots
+
+    def analyze_performance_trend(self, days: int = 30) -> Dict[str, Any]:
+        """Analyze performance trends over specified period."""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+
+        # Get recent snapshots
+        cursor.execute('''
+            SELECT performance_index, timestamp, template_ops_per_sec,
+                   database_ops_per_sec, ingestion_ops_per_sec
+            FROM performance_snapshots
+            WHERE datetime(timestamp) > datetime('now', '-{} days')
+            ORDER BY timestamp ASC
+        '''.format(days))
+
+        rows = cursor.fetchall()
+        conn.close()
+
+        if len(rows) < 2:
+            return {
+                "trend": "insufficient_data",
+                "message": "Need at least 2 snapshots for trend analysis"
+            }
+
+        # Calculate trends
+        indices = [row[0] for row in rows]
+        first_half = indices[:len(indices)//2]
+        second_half = indices[len(indices)//2:]
+
+        first_avg = sum(first_half) / len(first_half)
+        second_avg = sum(second_half) / len(second_half)
+
+        trend_change = second_avg - first_avg
+        trend_percent = (trend_change / first_avg) * 100 if first_avg > 0 else 0
+
+        if abs(trend_percent) < 2:
+            trend_direction = "stable"
+        elif trend_percent > 0:
+            trend_direction = "improving"
+        else:
+            trend_direction = "degrading"
+
+        return {
+            "trend": trend_direction,
+            "trend_change_points": round(trend_change, 2),
+            "trend_change_percent": round(trend_percent, 2),
+            "current_index": indices[-1],
+            "period_min": min(indices),
+            "period_max": max(indices),
+            "period_avg": round(sum(indices) / len(indices), 2),
+            "snapshot_count": len(indices),
+            "analysis_period_days": days
+        }
+
+    def get_performance_summary(self) -> Dict[str, Any]:
+        """Get comprehensive performance summary."""
+        history = self.get_performance_history(limit=10)
+        trend_analysis = self.analyze_performance_trend(days=30)
+
+        if not history:
+            return {"status": "no_data", "message": "No performance data available"}
+
+        latest = history[0]
+
+        return {
+            "latest_snapshot": {
+                "performance_index": latest.performance_index,
+                "timestamp": latest.timestamp,
+                "git_commit": latest.git_commit,
+                "template_ops_per_sec": latest.template_ops_per_sec,
+                "database_ops_per_sec": latest.database_ops_per_sec,
+                "ingestion_ops_per_sec": latest.ingestion_ops_per_sec,
+                "memory_usage_mb": latest.memory_usage_mb
+            },
+            "trend_analysis": trend_analysis,
+            "history_count": len(history)
+        }