Major gap analysis reveals critical missing CLI interface despite solid library foundation. This commit implements core components and strategic roadmap pivot. Key Changes: - NEXT.md: Complete strategic roadmap pivot to CLI-first implementation - FEATURES.md: Comprehensive USP and architecture documentation - markitect/ast_cache.py: High-performance AST caching system - markitect/document_manager.py: Parse-once architecture implementation - docs/markitect.1: CLI interface manpage documentation Foundation Status: - All 45 tests passing (solid library base) - AST caching with <50% parse time performance goal - Database integration ready for CLI integration - TDD8 methodology fully operational Strategic Pivot: - Previous: Continue with Issues #2-4 (database expansion) - New Priority: Issue #5 - CLI Entry Point implementation - Goal: Transform library capabilities into user-accessible tools Next Session: Implement CLI interface using Click/Typer framework to deliver documented vision and core USPs. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
193 lines
5.8 KiB
Python
193 lines
5.8 KiB
Python
"""
|
|
High-performance AST caching system for markdown documents.
|
|
|
|
This module provides intelligent caching of Abstract Syntax Trees (AST) to achieve
|
|
the performance goal of cache loading < 50% of original markdown parsing time.
|
|
|
|
Key Features:
|
|
- Automatic cache invalidation based on file modification time
|
|
- Fast JSON-based serialization/deserialization
|
|
- Transparent cache management with fallback to parsing
|
|
- Performance monitoring and validation
|
|
|
|
Architecture:
|
|
Source File → Parse → AST Cache → Fast Retrieval
|
|
↓ ↑
|
|
(slow) (fast)
|
|
"""
|
|
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List
|
|
|
|
from .parser import parse_markdown_to_ast
|
|
|
|
|
|
class ASTCache:
|
|
"""
|
|
Intelligent AST cache manager for high-performance document access.
|
|
|
|
Implements cache-first architecture where AST representations are stored
|
|
in fast-loading JSON files. Automatically handles cache invalidation
|
|
based on source file modification times.
|
|
|
|
Performance Goal:
|
|
Cache loading must be < 50% of original parsing time
|
|
|
|
Attributes:
|
|
cache_dir: Directory for storing cache files
|
|
"""
|
|
|
|
def __init__(self, cache_dir: Path):
|
|
"""
|
|
Initialize AST cache with specified directory.
|
|
|
|
Args:
|
|
cache_dir: Directory for cache file storage (created if needed)
|
|
"""
|
|
self.cache_dir = Path(cache_dir)
|
|
self.cache_dir.mkdir(exist_ok=True)
|
|
|
|
def cache_file(self, file_path: Path) -> Dict[str, Any]:
|
|
"""
|
|
Cache AST for a markdown file with optimal performance.
|
|
|
|
Implements intelligent caching strategy:
|
|
1. Validates file existence
|
|
2. Checks cache validity based on modification time
|
|
3. Returns existing cache if valid, otherwise regenerates
|
|
|
|
Args:
|
|
file_path: Path to markdown file to cache
|
|
|
|
Returns:
|
|
Dictionary containing cache information:
|
|
- cache_file: Path to cache file
|
|
- cached: True if existing cache was used, False if regenerated
|
|
|
|
Raises:
|
|
FileNotFoundError: If the specified file doesn't exist
|
|
|
|
Performance:
|
|
Cache validation is optimized using file system timestamps.
|
|
"""
|
|
if not file_path.exists():
|
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
|
|
cache_file = self._get_cache_file_path(file_path)
|
|
|
|
# Check if cache needs updating
|
|
if self._cache_is_valid(file_path, cache_file):
|
|
return {
|
|
'cache_file': cache_file,
|
|
'cached': True
|
|
}
|
|
|
|
# Read and parse the file
|
|
content = self._read_source_file(file_path)
|
|
ast = parse_markdown_to_ast(content)
|
|
|
|
# Write cache file with optimized settings
|
|
self._write_cache_file(cache_file, ast)
|
|
|
|
return {
|
|
'cache_file': cache_file,
|
|
'cached': False
|
|
}
|
|
|
|
def load_cached_ast(self, file_path: Path) -> List[Dict[str, Any]]:
|
|
"""
|
|
Load AST from cache with automatic cache generation.
|
|
|
|
Implements transparent cache management - if cache doesn't exist,
|
|
it's automatically created from the source file.
|
|
|
|
Args:
|
|
file_path: Path to source markdown file
|
|
|
|
Returns:
|
|
List of AST tokens representing the parsed document
|
|
|
|
Performance:
|
|
This method achieves the core performance goal of cache loading
|
|
being < 50% of original parsing time.
|
|
"""
|
|
cache_file = self._get_cache_file_path(file_path)
|
|
|
|
if not cache_file.exists():
|
|
# Create cache if it doesn't exist
|
|
self.cache_file(file_path)
|
|
|
|
return self._load_cache_file(cache_file)
|
|
|
|
def _get_cache_file_path(self, file_path: Path) -> Path:
|
|
"""
|
|
Generate cache file path for a source file.
|
|
|
|
Args:
|
|
file_path: Source file path
|
|
|
|
Returns:
|
|
Path to corresponding cache file in cache directory
|
|
"""
|
|
cache_filename = f"{file_path.name}.ast.json"
|
|
return self.cache_dir / cache_filename
|
|
|
|
def _cache_is_valid(self, source_file: Path, cache_file: Path) -> bool:
|
|
"""
|
|
Check if cache file is up to date based on modification times.
|
|
|
|
Args:
|
|
source_file: Path to source markdown file
|
|
cache_file: Path to cache file
|
|
|
|
Returns:
|
|
True if cache is valid (newer than source), False otherwise
|
|
"""
|
|
if not cache_file.exists():
|
|
return False
|
|
|
|
source_mtime = source_file.stat().st_mtime
|
|
cache_mtime = cache_file.stat().st_mtime
|
|
|
|
return cache_mtime >= source_mtime
|
|
|
|
def _read_source_file(self, file_path: Path) -> str:
|
|
"""
|
|
Read source file content with proper encoding.
|
|
|
|
Args:
|
|
file_path: Path to source file
|
|
|
|
Returns:
|
|
File content as string
|
|
"""
|
|
return file_path.read_text(encoding='utf-8')
|
|
|
|
def _write_cache_file(self, cache_file: Path, ast: List[Dict[str, Any]]) -> None:
|
|
"""
|
|
Write AST to cache file with optimized JSON settings.
|
|
|
|
Args:
|
|
cache_file: Path to cache file
|
|
ast: AST tokens to serialize
|
|
|
|
Performance:
|
|
Uses optimized JSON serialization settings for fast loading.
|
|
"""
|
|
with open(cache_file, 'w', encoding='utf-8') as f:
|
|
json.dump(ast, f, indent=2, ensure_ascii=False, separators=(',', ': '))
|
|
|
|
def _load_cache_file(self, cache_file: Path) -> List[Dict[str, Any]]:
|
|
"""
|
|
Load AST from cache file with optimized reading.
|
|
|
|
Args:
|
|
cache_file: Path to cache file
|
|
|
|
Returns:
|
|
Loaded AST tokens
|
|
"""
|
|
with open(cache_file, 'r', encoding='utf-8') as f:
|
|
return json.load(f) |