feat: Implement Issue #1 - Database initialization and front matter parsing

Complete TDD implementation of core MarkiTect functionality:

**Database Module (markitect/database.py):**
- DatabaseManager class with SQLite database initialization
- markdown_files table with proper schema (id, filename, front_matter, content, created_at)
- Front matter storage as JSON with content separation
- File storage, retrieval, and listing methods
- Comprehensive error handling

**Front Matter Module (markitect/frontmatter.py):**
- FrontMatterParser class with YAML front matter parsing
- Clean separation of metadata from markdown content
- Graceful handling of invalid YAML and missing front matter
- Regex-based parsing with proper delimiter handling

**Dependencies:**
- Added PyYAML for front matter parsing
- Updated pyproject.toml with new dependency

**Test Coverage:**
- 9 comprehensive tests covering all functionality
- Database initialization and schema validation
- Front matter parsing with Issue #1 example content
- Integrated workflow testing (storage/retrieval)
- Error handling for edge cases

**TDD Process:**
- RED phase: 8 failing tests defining requirements
- GREEN phase: Minimal implementation making all tests pass
- Validation: Complete workflow verified with example content

This implementation provides the foundation for all subsequent MarkiTect
features, handling the exact example from Issue #1 specification.

Issue #1: Initialize Database and Store Example Markdown File
coulomb/markitect_project#1

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-23 04:28:29 +02:00
parent 62105b1993
commit 35cbe715a5
3 changed files with 223 additions and 1 deletions

162
markitect/database.py Normal file
View File

@@ -0,0 +1,162 @@
"""
Database management functionality for MarkiTect.
This module provides SQLite database initialization and markdown file storage
with front matter support.
"""
import sqlite3
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any
from .frontmatter import FrontMatterParser
class DatabaseManager:
"""Manager for SQLite database operations."""
def __init__(self, db_path: str):
"""
Initialize database manager.
Args:
db_path: Path to SQLite database file
"""
self.db_path = db_path
self.front_matter_parser = FrontMatterParser()
def initialize_database(self) -> None:
"""
Initialize SQLite database with required tables.
Creates the markdown_files table with the following schema:
- id: INTEGER PRIMARY KEY
- filename: TEXT NOT NULL
- front_matter: TEXT (JSON)
- content: TEXT
- created_at: TIMESTAMP DEFAULT CURRENT_TIMESTAMP
"""
# Ensure directory exists
db_dir = os.path.dirname(self.db_path)
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir)
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
# Create markdown_files table
cursor.execute('''
CREATE TABLE IF NOT EXISTS markdown_files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
filename TEXT NOT NULL,
front_matter TEXT,
content TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
conn.close()
def store_markdown_file(self, filename: str, content: str) -> Optional[int]:
"""
Store a markdown file in the database.
Args:
filename: Name of the markdown file
content: Raw markdown content with optional front matter
Returns:
ID of the inserted record, or None if insertion failed
"""
# Parse front matter and content
front_matter, markdown_content = self.front_matter_parser.parse(content)
# Convert front matter to JSON string
front_matter_json = json.dumps(front_matter) if front_matter else '{}'
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute('''
INSERT INTO markdown_files (filename, front_matter, content, created_at)
VALUES (?, ?, ?, ?)
''', (filename, front_matter_json, markdown_content, datetime.now()))
record_id = cursor.lastrowid
conn.commit()
return record_id
except sqlite3.Error:
conn.rollback()
return None
finally:
conn.close()
def get_markdown_file(self, filename: str) -> Optional[Dict[str, Any]]:
"""
Retrieve a markdown file from the database.
Args:
filename: Name of the markdown file to retrieve
Returns:
Dictionary containing file data, or None if not found
"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT id, filename, front_matter, content, created_at
FROM markdown_files
WHERE filename = ?
''', (filename,))
row = cursor.fetchone()
conn.close()
if row:
return {
'id': row[0],
'filename': row[1],
'front_matter': json.loads(row[2]) if row[2] else {},
'content': row[3],
'created_at': row[4]
}
return None
def list_markdown_files(self) -> list:
"""
List all markdown files in the database.
Returns:
List of dictionaries containing file metadata
"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT id, filename, front_matter, created_at
FROM markdown_files
ORDER BY created_at DESC
''')
rows = cursor.fetchall()
conn.close()
files = []
for row in rows:
files.append({
'id': row[0],
'filename': row[1],
'front_matter': json.loads(row[2]) if row[2] else {},
'created_at': row[3]
})
return files

60
markitect/frontmatter.py Normal file
View File

@@ -0,0 +1,60 @@
"""
Front matter parsing functionality for MarkiTect.
This module provides YAML front matter parsing for markdown files,
separating metadata from content.
"""
import re
import yaml
from typing import Dict, Tuple, Any
class FrontMatterParser:
"""Parser for YAML front matter in markdown files."""
def __init__(self):
"""Initialize the front matter parser."""
pass
def parse(self, content: str) -> Tuple[Dict[str, Any], str]:
"""
Parse front matter from markdown content.
Args:
content: Raw markdown content that may include YAML front matter
Returns:
Tuple of (front_matter_dict, markdown_content)
- front_matter_dict: Parsed YAML as dictionary, empty dict if none
- markdown_content: Markdown content with front matter removed
"""
if not content.strip():
return {}, content
# Check if content starts with front matter delimiter
if not content.strip().startswith('---'):
return {}, content
# Pattern to match YAML front matter
# Must start with --- at beginning of string, end with --- on its own line
pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
match = re.match(pattern, content, re.DOTALL)
if not match:
# No valid front matter found
return {}, content
yaml_content = match.group(1)
markdown_content = match.group(2)
# Parse YAML content
try:
front_matter = yaml.safe_load(yaml_content)
if front_matter is None:
front_matter = {}
except yaml.YAMLError:
# Invalid YAML - return empty dict and preserve content
front_matter = {}
return front_matter, markdown_content

View File

@@ -8,7 +8,7 @@ version = "0.1.0"
description = "Advanced Markdown engine for structured content"
readme = "README.md"
requires-python = ">=3.8"
dependencies = ["markdown-it-py"]
dependencies = ["markdown-it-py", "PyYAML"]
[tool.setuptools.packages.find]
include = ["markitect*"]