markitect-main/markitect/performance_tracker.py

"""
Performance Tracking System for MarkiTect

This module provides historical performance tracking, trend analysis, and
performance index calculation for monitoring system performance over time.
"""

import sqlite3
import json
import time
import hashlib
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass


@dataclass
class PerformanceSnapshot:
    """A complete performance measurement snapshot."""
    timestamp: str
    git_commit: Optional[str]
    system_info: Dict[str, Any]
    template_ops_per_sec: float
    database_ops_per_sec: float
    ingestion_ops_per_sec: float
    memory_usage_mb: float
    performance_index: float
    notes: str = ""


class PerformanceTracker:
    """Manager for historical performance tracking and analysis."""

    def __init__(self, db_path: str):
        """Initialize performance tracker with database path."""
        self.db_path = db_path
        self.initialize_tracking_database()

    def initialize_tracking_database(self) -> None:
        """Initialize SQLite database for performance tracking."""
        # Ensure directory exists
        db_dir = Path(self.db_path).parent
        if not db_dir.exists():
            db_dir.mkdir(parents=True, exist_ok=True)

        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        # Create performance_snapshots table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS performance_snapshots (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp TEXT NOT NULL,
                git_commit TEXT,
                system_info TEXT,  -- JSON
                template_ops_per_sec REAL NOT NULL,
                database_ops_per_sec REAL NOT NULL,
                ingestion_ops_per_sec REAL NOT NULL,
                memory_usage_mb REAL NOT NULL,
                performance_index REAL NOT NULL,
                notes TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        # Create performance_trends table for aggregated data
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS performance_trends (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                period_start TEXT NOT NULL,
                period_end TEXT NOT NULL,
                avg_performance_index REAL NOT NULL,
                min_performance_index REAL NOT NULL,
                max_performance_index REAL NOT NULL,
                trend_direction TEXT,  -- 'improving', 'degrading', 'stable'
                snapshot_count INTEGER NOT NULL,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        conn.commit()
        conn.close()

    def calculate_performance_index(self,
                                   template_ops: float,
                                   database_ops: float,
                                   ingestion_ops: float,
                                   memory_mb: float) -> float:
        """
        Calculate a normalized performance index (0-100 scale).

        Higher values indicate better performance. The index is calculated as:
        - Template performance (40%): normalized to baseline of 1000 ops/sec
        - Database performance (30%): normalized to baseline of 100 ops/sec
        - Ingestion performance (20%): normalized to baseline of 1000 ops/sec
        - Memory efficiency (10%): inversely weighted, baseline 50MB

        Returns:
            Performance index value (0-100, higher is better)
        """
        # Define baseline values for normalization
        template_baseline = 1000.0
        database_baseline = 100.0
        ingestion_baseline = 1000.0
        memory_baseline = 50.0

        # Calculate component scores (capped at 100 for each)
        template_score = min(100.0, (template_ops / template_baseline) * 100.0) * 0.40
        database_score = min(100.0, (database_ops / database_baseline) * 100.0) * 0.30
        ingestion_score = min(100.0, (ingestion_ops / ingestion_baseline) * 100.0) * 0.20

        # Memory score is inverse - lower memory usage is better
        memory_score = min(100.0, (memory_baseline / max(memory_mb, 1.0)) * 100.0) * 0.10

        performance_index = template_score + database_score + ingestion_score + memory_score
        return round(performance_index, 2)

    def get_system_info(self) -> Dict[str, Any]:
        """Collect system information for context."""
        import platform
        import sys

        try:
            import psutil
            memory_total = psutil.virtual_memory().total / (1024 * 1024 * 1024)  # GB
            cpu_count = psutil.cpu_count()
        except ImportError:
            memory_total = "unknown"
            cpu_count = "unknown"

        return {
            "platform": platform.platform(),
            "python_version": sys.version,
            "cpu_count": cpu_count,
            "memory_total_gb": memory_total,
            "markitect_version": "dev"  # Could be extracted from __version__
        }

    def get_git_commit(self) -> Optional[str]:
        """Get current git commit hash if available."""
        try:
            import subprocess
            result = subprocess.run(
                ['git', 'rev-parse', 'HEAD'],
                capture_output=True,
                text=True,
                cwd=Path(__file__).parent.parent
            )
            if result.returncode == 0:
                return result.stdout.strip()[:12]  # Short commit hash
        except Exception:
            pass
        return None

    def store_performance_snapshot(self,
                                 template_ops: float,
                                 database_ops: float,
                                 ingestion_ops: float,
                                 memory_mb: float,
                                 notes: str = "") -> int:
        """
        Store a performance snapshot in the database.

        Returns:
            The ID of the stored snapshot
        """
        performance_index = self.calculate_performance_index(
            template_ops, database_ops, ingestion_ops, memory_mb
        )

        snapshot = PerformanceSnapshot(
            timestamp=datetime.now().isoformat(),
            git_commit=self.get_git_commit(),
            system_info=self.get_system_info(),
            template_ops_per_sec=template_ops,
            database_ops_per_sec=database_ops,
            ingestion_ops_per_sec=ingestion_ops,
            memory_usage_mb=memory_mb,
            performance_index=performance_index,
            notes=notes
        )

        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute('''
            INSERT INTO performance_snapshots
            (timestamp, git_commit, system_info, template_ops_per_sec,
             database_ops_per_sec, ingestion_ops_per_sec, memory_usage_mb,
             performance_index, notes)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (
            snapshot.timestamp,
            snapshot.git_commit,
            json.dumps(snapshot.system_info),
            snapshot.template_ops_per_sec,
            snapshot.database_ops_per_sec,
            snapshot.ingestion_ops_per_sec,
            snapshot.memory_usage_mb,
            snapshot.performance_index,
            snapshot.notes
        ))

        snapshot_id = cursor.lastrowid
        conn.commit()
        conn.close()

        return snapshot_id

    def get_performance_history(self, limit: int = 50) -> List[PerformanceSnapshot]:
        """Get recent performance history."""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute('''
            SELECT timestamp, git_commit, system_info, template_ops_per_sec,
                   database_ops_per_sec, ingestion_ops_per_sec, memory_usage_mb,
                   performance_index, notes
            FROM performance_snapshots
            ORDER BY created_at DESC
            LIMIT ?
        ''', (limit,))

        snapshots = []
        for row in cursor.fetchall():
            snapshots.append(PerformanceSnapshot(
                timestamp=row[0],
                git_commit=row[1],
                system_info=json.loads(row[2]) if row[2] else {},
                template_ops_per_sec=row[3],
                database_ops_per_sec=row[4],
                ingestion_ops_per_sec=row[5],
                memory_usage_mb=row[6],
                performance_index=row[7],
                notes=row[8] or ""
            ))

        conn.close()
        return snapshots

    def analyze_performance_trend(self, days: int = 30) -> Dict[str, Any]:
        """Analyze performance trends over specified period."""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        # Get recent snapshots
        cursor.execute('''
            SELECT performance_index, timestamp, template_ops_per_sec,
                   database_ops_per_sec, ingestion_ops_per_sec
            FROM performance_snapshots
            WHERE datetime(timestamp) > datetime('now', '-{} days')
            ORDER BY timestamp ASC
        '''.format(days))

        rows = cursor.fetchall()
        conn.close()

        if len(rows) < 2:
            return {
                "trend": "insufficient_data",
                "message": "Need at least 2 snapshots for trend analysis"
            }

        # Calculate trends
        indices = [row[0] for row in rows]
        first_half = indices[:len(indices)//2]
        second_half = indices[len(indices)//2:]

        first_avg = sum(first_half) / len(first_half)
        second_avg = sum(second_half) / len(second_half)

        trend_change = second_avg - first_avg
        trend_percent = (trend_change / first_avg) * 100 if first_avg > 0 else 0

        if abs(trend_percent) < 2:
            trend_direction = "stable"
        elif trend_percent > 0:
            trend_direction = "improving"
        else:
            trend_direction = "degrading"

        return {
            "trend": trend_direction,
            "trend_change_points": round(trend_change, 2),
            "trend_change_percent": round(trend_percent, 2),
            "current_index": indices[-1],
            "period_min": min(indices),
            "period_max": max(indices),
            "period_avg": round(sum(indices) / len(indices), 2),
            "snapshot_count": len(indices),
            "analysis_period_days": days
        }

    def get_performance_summary(self) -> Dict[str, Any]:
        """Get comprehensive performance summary."""
        history = self.get_performance_history(limit=10)
        trend_analysis = self.analyze_performance_trend(days=30)

        if not history:
            return {"status": "no_data", "message": "No performance data available"}

        latest = history[0]

        return {
            "latest_snapshot": {
                "performance_index": latest.performance_index,
                "timestamp": latest.timestamp,
                "git_commit": latest.git_commit,
                "template_ops_per_sec": latest.template_ops_per_sec,
                "database_ops_per_sec": latest.database_ops_per_sec,
                "ingestion_ops_per_sec": latest.ingestion_ops_per_sec,
                "memory_usage_mb": latest.memory_usage_mb
            },
            "trend_analysis": trend_analysis,
            "history_count": len(history)
        }