Files
markitect-main/markitect/performance_tracker.py
tegwick 3899ca9154
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: Add comprehensive performance tracking system
🎯 Performance Index KPI System:
- Weighted 0-100 scale performance measurement
- Historical tracking with trend analysis
- Baseline established at 81.4/100

📊 New CLI Commands:
- perf-track: Record performance snapshots with git context
- perf-history: View trends and historical analysis
- perf-benchmark: Enhanced with database fixes
- perf-validate: Real-time threshold validation

🗄️ Performance Database:
- SQLite storage for historical performance data
- Comprehensive metadata capture (git commits, system info)
- Trend analysis with statistical insights

🔧 Critical Fixes:
- Resolved DatabaseManager connection issues in performance commands
- Updated database method calls to use correct API

 Implementation Details:
- markitect/performance_tracker.py: Complete tracking system
- Enhanced CLI with professional output formats
- Baseline performance: 78K template ops/sec, 678 DB ops/sec
- Memory usage monitoring with psutil integration

🚀 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 17:37:24 +02:00

317 lines
11 KiB
Python

"""
Performance Tracking System for MarkiTect
This module provides historical performance tracking, trend analysis, and
performance index calculation for monitoring system performance over time.
"""
import sqlite3
import json
import time
import hashlib
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
@dataclass
class PerformanceSnapshot:
"""A complete performance measurement snapshot."""
timestamp: str
git_commit: Optional[str]
system_info: Dict[str, Any]
template_ops_per_sec: float
database_ops_per_sec: float
ingestion_ops_per_sec: float
memory_usage_mb: float
performance_index: float
notes: str = ""
class PerformanceTracker:
"""Manager for historical performance tracking and analysis."""
def __init__(self, db_path: str):
"""Initialize performance tracker with database path."""
self.db_path = db_path
self.initialize_tracking_database()
def initialize_tracking_database(self) -> None:
"""Initialize SQLite database for performance tracking."""
# Ensure directory exists
db_dir = Path(self.db_path).parent
if not db_dir.exists():
db_dir.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
# Create performance_snapshots table
cursor.execute('''
CREATE TABLE IF NOT EXISTS performance_snapshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
git_commit TEXT,
system_info TEXT, -- JSON
template_ops_per_sec REAL NOT NULL,
database_ops_per_sec REAL NOT NULL,
ingestion_ops_per_sec REAL NOT NULL,
memory_usage_mb REAL NOT NULL,
performance_index REAL NOT NULL,
notes TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# Create performance_trends table for aggregated data
cursor.execute('''
CREATE TABLE IF NOT EXISTS performance_trends (
id INTEGER PRIMARY KEY AUTOINCREMENT,
period_start TEXT NOT NULL,
period_end TEXT NOT NULL,
avg_performance_index REAL NOT NULL,
min_performance_index REAL NOT NULL,
max_performance_index REAL NOT NULL,
trend_direction TEXT, -- 'improving', 'degrading', 'stable'
snapshot_count INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
conn.close()
def calculate_performance_index(self,
template_ops: float,
database_ops: float,
ingestion_ops: float,
memory_mb: float) -> float:
"""
Calculate a normalized performance index (0-100 scale).
Higher values indicate better performance. The index is calculated as:
- Template performance (40%): normalized to baseline of 1000 ops/sec
- Database performance (30%): normalized to baseline of 100 ops/sec
- Ingestion performance (20%): normalized to baseline of 1000 ops/sec
- Memory efficiency (10%): inversely weighted, baseline 50MB
Returns:
Performance index value (0-100, higher is better)
"""
# Define baseline values for normalization
template_baseline = 1000.0
database_baseline = 100.0
ingestion_baseline = 1000.0
memory_baseline = 50.0
# Calculate component scores (capped at 100 for each)
template_score = min(100.0, (template_ops / template_baseline) * 100.0) * 0.40
database_score = min(100.0, (database_ops / database_baseline) * 100.0) * 0.30
ingestion_score = min(100.0, (ingestion_ops / ingestion_baseline) * 100.0) * 0.20
# Memory score is inverse - lower memory usage is better
memory_score = min(100.0, (memory_baseline / max(memory_mb, 1.0)) * 100.0) * 0.10
performance_index = template_score + database_score + ingestion_score + memory_score
return round(performance_index, 2)
def get_system_info(self) -> Dict[str, Any]:
"""Collect system information for context."""
import platform
import sys
try:
import psutil
memory_total = psutil.virtual_memory().total / (1024 * 1024 * 1024) # GB
cpu_count = psutil.cpu_count()
except ImportError:
memory_total = "unknown"
cpu_count = "unknown"
return {
"platform": platform.platform(),
"python_version": sys.version,
"cpu_count": cpu_count,
"memory_total_gb": memory_total,
"markitect_version": "dev" # Could be extracted from __version__
}
def get_git_commit(self) -> Optional[str]:
"""Get current git commit hash if available."""
try:
import subprocess
result = subprocess.run(
['git', 'rev-parse', 'HEAD'],
capture_output=True,
text=True,
cwd=Path(__file__).parent.parent
)
if result.returncode == 0:
return result.stdout.strip()[:12] # Short commit hash
except Exception:
pass
return None
def store_performance_snapshot(self,
template_ops: float,
database_ops: float,
ingestion_ops: float,
memory_mb: float,
notes: str = "") -> int:
"""
Store a performance snapshot in the database.
Returns:
The ID of the stored snapshot
"""
performance_index = self.calculate_performance_index(
template_ops, database_ops, ingestion_ops, memory_mb
)
snapshot = PerformanceSnapshot(
timestamp=datetime.now().isoformat(),
git_commit=self.get_git_commit(),
system_info=self.get_system_info(),
template_ops_per_sec=template_ops,
database_ops_per_sec=database_ops,
ingestion_ops_per_sec=ingestion_ops,
memory_usage_mb=memory_mb,
performance_index=performance_index,
notes=notes
)
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
INSERT INTO performance_snapshots
(timestamp, git_commit, system_info, template_ops_per_sec,
database_ops_per_sec, ingestion_ops_per_sec, memory_usage_mb,
performance_index, notes)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
snapshot.timestamp,
snapshot.git_commit,
json.dumps(snapshot.system_info),
snapshot.template_ops_per_sec,
snapshot.database_ops_per_sec,
snapshot.ingestion_ops_per_sec,
snapshot.memory_usage_mb,
snapshot.performance_index,
snapshot.notes
))
snapshot_id = cursor.lastrowid
conn.commit()
conn.close()
return snapshot_id
def get_performance_history(self, limit: int = 50) -> List[PerformanceSnapshot]:
"""Get recent performance history."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT timestamp, git_commit, system_info, template_ops_per_sec,
database_ops_per_sec, ingestion_ops_per_sec, memory_usage_mb,
performance_index, notes
FROM performance_snapshots
ORDER BY created_at DESC
LIMIT ?
''', (limit,))
snapshots = []
for row in cursor.fetchall():
snapshots.append(PerformanceSnapshot(
timestamp=row[0],
git_commit=row[1],
system_info=json.loads(row[2]) if row[2] else {},
template_ops_per_sec=row[3],
database_ops_per_sec=row[4],
ingestion_ops_per_sec=row[5],
memory_usage_mb=row[6],
performance_index=row[7],
notes=row[8] or ""
))
conn.close()
return snapshots
def analyze_performance_trend(self, days: int = 30) -> Dict[str, Any]:
"""Analyze performance trends over specified period."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
# Get recent snapshots
cursor.execute('''
SELECT performance_index, timestamp, template_ops_per_sec,
database_ops_per_sec, ingestion_ops_per_sec
FROM performance_snapshots
WHERE datetime(timestamp) > datetime('now', '-{} days')
ORDER BY timestamp ASC
'''.format(days))
rows = cursor.fetchall()
conn.close()
if len(rows) < 2:
return {
"trend": "insufficient_data",
"message": "Need at least 2 snapshots for trend analysis"
}
# Calculate trends
indices = [row[0] for row in rows]
first_half = indices[:len(indices)//2]
second_half = indices[len(indices)//2:]
first_avg = sum(first_half) / len(first_half)
second_avg = sum(second_half) / len(second_half)
trend_change = second_avg - first_avg
trend_percent = (trend_change / first_avg) * 100 if first_avg > 0 else 0
if abs(trend_percent) < 2:
trend_direction = "stable"
elif trend_percent > 0:
trend_direction = "improving"
else:
trend_direction = "degrading"
return {
"trend": trend_direction,
"trend_change_points": round(trend_change, 2),
"trend_change_percent": round(trend_percent, 2),
"current_index": indices[-1],
"period_min": min(indices),
"period_max": max(indices),
"period_avg": round(sum(indices) / len(indices), 2),
"snapshot_count": len(indices),
"analysis_period_days": days
}
def get_performance_summary(self) -> Dict[str, Any]:
"""Get comprehensive performance summary."""
history = self.get_performance_history(limit=10)
trend_analysis = self.analyze_performance_trend(days=30)
if not history:
return {"status": "no_data", "message": "No performance data available"}
latest = history[0]
return {
"latest_snapshot": {
"performance_index": latest.performance_index,
"timestamp": latest.timestamp,
"git_commit": latest.git_commit,
"template_ops_per_sec": latest.template_ops_per_sec,
"database_ops_per_sec": latest.database_ops_per_sec,
"ingestion_ops_per_sec": latest.ingestion_ops_per_sec,
"memory_usage_mb": latest.memory_usage_mb
},
"trend_analysis": trend_analysis,
"history_count": len(history)
}