""" Test database initialization and markdown storage functionality for Issue #1. This test implements the requirements for initializing a SQLite database and storing markdown files with front matter parsing. Issue #1: Initialize Database and Store Example Markdown File https://gitea.coulomb.social/coulomb/markitect_project/issues/1 """ import pytest import sqlite3 import tempfile import os from pathlib import Path from datetime import datetime # Import will fail initially - this is expected for TDD red phase try: from markitect.database import DatabaseManager from markitect.frontmatter import FrontMatterParser except ImportError: # Expected to fail initially - we'll implement these modules pass class TestDatabaseInitialization: """Test database initialization and basic operations.""" def setup_method(self): """Set up temporary database for each test.""" self.temp_db = tempfile.NamedTemporaryFile(delete=False, suffix='.db') self.temp_db.close() self.db_path = self.temp_db.name def teardown_method(self): """Clean up temporary database after each test.""" if os.path.exists(self.db_path): os.unlink(self.db_path) def test_database_manager_can_be_created_for_markdown_file_storage(self): """Test that DatabaseManager can be created for markdown file storage.""" # This should fail initially (red phase) db_manager = DatabaseManager(self.db_path) assert db_manager is not None assert db_manager.db_path == self.db_path def test_database_creates_markdown_files_table_with_required_schema(self): """Test that database creates markdown_files table with required schema.""" db_manager = DatabaseManager(self.db_path) db_manager.initialize_database() # Verify database file was created assert os.path.exists(self.db_path) # Verify table structure conn = sqlite3.connect(self.db_path) cursor = conn.cursor() # Check if markdown_files table exists cursor.execute(""" SELECT name FROM sqlite_master WHERE type='table' AND name='markdown_files' """) table_exists = cursor.fetchone() assert table_exists is not None # Check table schema cursor.execute("PRAGMA table_info(markdown_files)") columns = cursor.fetchall() expected_columns = { 'id': 'INTEGER', 'filename': 'TEXT', 'front_matter': 'TEXT', 'content': 'TEXT', 'created_at': 'TIMESTAMP' } actual_columns = {col[1]: col[2] for col in columns} for col_name, col_type in expected_columns.items(): assert col_name in actual_columns assert col_type.upper() in actual_columns[col_name].upper() conn.close() class TestFrontMatterParsing: """Test front matter parsing functionality.""" def test_front_matter_parser_can_be_created_for_metadata_extraction(self): """Test that FrontMatterParser can be created for metadata extraction.""" parser = FrontMatterParser() assert parser is not None def test_parse_example_markdown_with_frontmatter(self): """Test parsing the specific example markdown from Issue #1.""" example_content = '''--- title: "My First Document" author: "Test User" created: "2024-01-01" tags: ["example", "test"] --- # Welcome to MarkiTect This is an example markdown document with **bold text** and *italic text*. ## Features - Front matter parsing - Content storage - Database initialization ## Code Example ```python print("Hello, MarkiTect!") ``` The system should handle this gracefully.''' parser = FrontMatterParser() front_matter, content = parser.parse(example_content) # Verify front matter parsing assert isinstance(front_matter, dict) assert front_matter['title'] == "My First Document" assert front_matter['author'] == "Test User" assert front_matter['created'] == "2024-01-01" assert front_matter['tags'] == ["example", "test"] # Verify content separation assert content.startswith('# Welcome to MarkiTect') assert 'title:' not in content # Front matter should be removed assert '---' not in content # YAML delimiters should be removed assert 'The system should handle this gracefully.' in content def test_parse_markdown_without_frontmatter(self): """Test parsing markdown without front matter.""" content_only = '''# Simple Document This is just markdown content without front matter.''' parser = FrontMatterParser() front_matter, content = parser.parse(content_only) assert front_matter == {} assert content == content_only class TestIntegratedWorkflow: """Test the complete workflow from initialization to storage.""" def setup_method(self): """Set up temporary database for each test.""" self.temp_db = tempfile.NamedTemporaryFile(delete=False, suffix='.db') self.temp_db.close() self.db_path = self.temp_db.name def teardown_method(self): """Clean up temporary database after each test.""" if os.path.exists(self.db_path): os.unlink(self.db_path) def test_complete_workflow_example_file(self): """Test the complete workflow with the example file from Issue #1.""" # Initialize database db_manager = DatabaseManager(self.db_path) db_manager.initialize_database() # Example content from Issue #1 example_content = '''--- title: "My First Document" author: "Test User" created: "2024-01-01" tags: ["example", "test"] --- # Welcome to MarkiTect This is an example markdown document with **bold text** and *italic text*. ## Features - Front matter parsing - Content storage - Database initialization ## Code Example ```python print("Hello, MarkiTect!") ``` The system should handle this gracefully.''' # Store the file result = db_manager.store_markdown_file("example.md", example_content) # Verify storage was successful assert result is not None # Verify data was stored correctly conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute("SELECT * FROM markdown_files WHERE filename = ?", ("example.md",)) row = cursor.fetchone() assert row is not None assert row[1] == "example.md" # filename # Verify front matter was stored as JSON import json stored_front_matter = json.loads(row[2]) assert stored_front_matter['title'] == "My First Document" assert stored_front_matter['author'] == "Test User" # Verify content was stored correctly (without front matter) stored_content = row[3] assert stored_content.startswith('# Welcome to MarkiTect') assert '---' not in stored_content # Verify timestamp was set assert row[4] is not None # created_at conn.close() def test_store_multiple_files(self): """Test storing multiple markdown files.""" db_manager = DatabaseManager(self.db_path) db_manager.initialize_database() # Store first file content1 = '''--- title: "File One" --- # First File''' db_manager.store_markdown_file("file1.md", content1) # Store second file content2 = '''--- title: "File Two" --- # Second File''' db_manager.store_markdown_file("file2.md", content2) # Verify both files were stored conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM markdown_files") count = cursor.fetchone()[0] assert count == 2 cursor.execute("SELECT filename FROM markdown_files ORDER BY filename") filenames = [row[0] for row in cursor.fetchall()] assert filenames == ["file1.md", "file2.md"] conn.close() class TestErrorHandling: """Test error handling scenarios.""" def setup_method(self): """Set up temporary database for each test.""" self.temp_db = tempfile.NamedTemporaryFile(delete=False, suffix='.db') self.temp_db.close() self.db_path = self.temp_db.name def teardown_method(self): """Clean up temporary database after each test.""" if os.path.exists(self.db_path): os.unlink(self.db_path) def test_invalid_yaml_frontmatter(self): """Test handling of invalid YAML front matter.""" invalid_content = '''--- title: "Missing quotes author: Test User invalid: [unclosed list --- # Content''' parser = FrontMatterParser() # Should handle gracefully without crashing front_matter, content = parser.parse(invalid_content) # Should return empty dict for invalid YAML and preserve content assert isinstance(front_matter, dict) assert content.startswith('# Content') def test_database_path_with_invalid_directory(self): """Test database creation with invalid directory path.""" invalid_path = "/nonexistent/directory/test.db" with pytest.raises(Exception): # Should raise appropriate exception db_manager = DatabaseManager(invalid_path) db_manager.initialize_database()