diff --git a/markitect/database.py b/markitect/database.py new file mode 100644 index 00000000..fbd97701 --- /dev/null +++ b/markitect/database.py @@ -0,0 +1,162 @@ +""" +Database management functionality for MarkiTect. + +This module provides SQLite database initialization and markdown file storage +with front matter support. +""" + +import sqlite3 +import json +import os +from datetime import datetime +from pathlib import Path +from typing import Optional, Dict, Any + +from .frontmatter import FrontMatterParser + + +class DatabaseManager: + """Manager for SQLite database operations.""" + + def __init__(self, db_path: str): + """ + Initialize database manager. + + Args: + db_path: Path to SQLite database file + """ + self.db_path = db_path + self.front_matter_parser = FrontMatterParser() + + def initialize_database(self) -> None: + """ + Initialize SQLite database with required tables. + + Creates the markdown_files table with the following schema: + - id: INTEGER PRIMARY KEY + - filename: TEXT NOT NULL + - front_matter: TEXT (JSON) + - content: TEXT + - created_at: TIMESTAMP DEFAULT CURRENT_TIMESTAMP + """ + # Ensure directory exists + db_dir = os.path.dirname(self.db_path) + if db_dir and not os.path.exists(db_dir): + os.makedirs(db_dir) + + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + # Create markdown_files table + cursor.execute(''' + CREATE TABLE IF NOT EXISTS markdown_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT NOT NULL, + front_matter TEXT, + content TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + conn.commit() + conn.close() + + def store_markdown_file(self, filename: str, content: str) -> Optional[int]: + """ + Store a markdown file in the database. + + Args: + filename: Name of the markdown file + content: Raw markdown content with optional front matter + + Returns: + ID of the inserted record, or None if insertion failed + """ + # Parse front matter and content + front_matter, markdown_content = self.front_matter_parser.parse(content) + + # Convert front matter to JSON string + front_matter_json = json.dumps(front_matter) if front_matter else '{}' + + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO markdown_files (filename, front_matter, content, created_at) + VALUES (?, ?, ?, ?) + ''', (filename, front_matter_json, markdown_content, datetime.now())) + + record_id = cursor.lastrowid + conn.commit() + return record_id + + except sqlite3.Error: + conn.rollback() + return None + + finally: + conn.close() + + def get_markdown_file(self, filename: str) -> Optional[Dict[str, Any]]: + """ + Retrieve a markdown file from the database. + + Args: + filename: Name of the markdown file to retrieve + + Returns: + Dictionary containing file data, or None if not found + """ + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(''' + SELECT id, filename, front_matter, content, created_at + FROM markdown_files + WHERE filename = ? + ''', (filename,)) + + row = cursor.fetchone() + conn.close() + + if row: + return { + 'id': row[0], + 'filename': row[1], + 'front_matter': json.loads(row[2]) if row[2] else {}, + 'content': row[3], + 'created_at': row[4] + } + + return None + + def list_markdown_files(self) -> list: + """ + List all markdown files in the database. + + Returns: + List of dictionaries containing file metadata + """ + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(''' + SELECT id, filename, front_matter, created_at + FROM markdown_files + ORDER BY created_at DESC + ''') + + rows = cursor.fetchall() + conn.close() + + files = [] + for row in rows: + files.append({ + 'id': row[0], + 'filename': row[1], + 'front_matter': json.loads(row[2]) if row[2] else {}, + 'created_at': row[3] + }) + + return files \ No newline at end of file diff --git a/markitect/frontmatter.py b/markitect/frontmatter.py new file mode 100644 index 00000000..2cde335a --- /dev/null +++ b/markitect/frontmatter.py @@ -0,0 +1,60 @@ +""" +Front matter parsing functionality for MarkiTect. + +This module provides YAML front matter parsing for markdown files, +separating metadata from content. +""" + +import re +import yaml +from typing import Dict, Tuple, Any + + +class FrontMatterParser: + """Parser for YAML front matter in markdown files.""" + + def __init__(self): + """Initialize the front matter parser.""" + pass + + def parse(self, content: str) -> Tuple[Dict[str, Any], str]: + """ + Parse front matter from markdown content. + + Args: + content: Raw markdown content that may include YAML front matter + + Returns: + Tuple of (front_matter_dict, markdown_content) + - front_matter_dict: Parsed YAML as dictionary, empty dict if none + - markdown_content: Markdown content with front matter removed + """ + if not content.strip(): + return {}, content + + # Check if content starts with front matter delimiter + if not content.strip().startswith('---'): + return {}, content + + # Pattern to match YAML front matter + # Must start with --- at beginning of string, end with --- on its own line + pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$' + match = re.match(pattern, content, re.DOTALL) + + if not match: + # No valid front matter found + return {}, content + + yaml_content = match.group(1) + markdown_content = match.group(2) + + # Parse YAML content + try: + front_matter = yaml.safe_load(yaml_content) + if front_matter is None: + front_matter = {} + except yaml.YAMLError: + # Invalid YAML - return empty dict and preserve content + front_matter = {} + + return front_matter, markdown_content \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 838e04f1..cf3dc8a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ version = "0.1.0" description = "Advanced Markdown engine for structured content" readme = "README.md" requires-python = ">=3.8" -dependencies = ["markdown-it-py"] +dependencies = ["markdown-it-py", "PyYAML"] [tool.setuptools.packages.find] include = ["markitect*"]