markitect-main/tests/test_l4_service_document_management.py

"""
Tests for Issue #4: Retrieve All Stored Files

This module tests the functionality to retrieve all Markdown files and schemas
currently stored in the temporary database.
"""

import pytest
import sqlite3
import tempfile
import os
from pathlib import Path

# Add project root to path for imports
import sys
project_root = Path(__file__).parent.parent.parent.parent
sys.path.insert(0, str(project_root))

from markitect.database import DatabaseManager


class TestIssue4RetrieveAllFiles:
    """Test retrieval of all stored files and schemas."""

    def setup_method(self):
        """Set up test database and manager."""
        # Create temporary database file
        self.db_fd, self.db_path = tempfile.mkstemp(suffix='.db')
        os.close(self.db_fd)  # Close file descriptor, we'll use the path

        # Initialize database manager and create tables
        self.db_manager = DatabaseManager(self.db_path)
        self.db_manager.initialize_database()

    def teardown_method(self):
        """Clean up test database."""
        if os.path.exists(self.db_path):
            os.unlink(self.db_path)

    def test_list_markdown_files_empty_database(self):
        """Test listing files when database is empty."""
        files = self.db_manager.list_markdown_files()

        assert isinstance(files, list)
        assert len(files) == 0

    def test_list_markdown_files_single_file(self):
        """Test listing files with a single stored file."""
        # Store a test file
        test_content = "# Test Document\n\nThis is a test."
        file_id = self.db_manager.store_markdown_file("test.md", test_content)

        assert file_id is not None

        # List files
        files = self.db_manager.list_markdown_files()

        assert len(files) == 1
        assert files[0]['filename'] == 'test.md'
        assert files[0]['id'] == file_id
        assert 'created_at' in files[0]
        assert 'front_matter' in files[0]

    def test_list_markdown_files_multiple_files(self):
        """Test listing files with multiple stored files."""
        # Store multiple test files
        test_files = [
            ("doc1.md", "# Document 1\n\nFirst document."),
            ("doc2.md", "# Document 2\n\nSecond document."),
            ("doc3.md", "# Document 3\n\nThird document.")
        ]

        stored_ids = []
        for filename, content in test_files:
            file_id = self.db_manager.store_markdown_file(filename, content)
            assert file_id is not None
            stored_ids.append(file_id)

        # List files
        files = self.db_manager.list_markdown_files()

        assert len(files) == 3

        # Check that all files are present
        filenames = [f['filename'] for f in files]
        assert 'doc1.md' in filenames
        assert 'doc2.md' in filenames
        assert 'doc3.md' in filenames

        # Verify ordering (should be by created_at DESC)
        # Since we created them in order, the last one should be first
        assert files[0]['filename'] == 'doc3.md'

    def test_list_markdown_files_with_frontmatter(self):
        """Test listing files that contain front matter."""
        content_with_frontmatter = """---
title: Test Document
category: testing
tags: [test, example]
---

# Test Document

This document has front matter.
"""

        file_id = self.db_manager.store_markdown_file("frontmatter.md", content_with_frontmatter)
        assert file_id is not None

        # List files
        files = self.db_manager.list_markdown_files()

        assert len(files) == 1
        file_info = files[0]

        assert file_info['filename'] == 'frontmatter.md'
        assert 'front_matter' in file_info

        # Front matter should be parsed and stored as a dictionary
        front_matter = file_info['front_matter']
        assert isinstance(front_matter, dict)
        assert front_matter.get('title') == 'Test Document'
        assert front_matter.get('category') == 'testing'

    def test_get_database_schema(self):
        """Test retrieving database schema information."""
        schema = self.db_manager.get_schema()

        assert isinstance(schema, dict)
        assert 'markdown_files' in schema

        # Check markdown_files table schema
        markdown_table = schema['markdown_files']
        assert 'columns' in markdown_table

        columns = markdown_table['columns']
        assert len(columns) >= 5  # id, filename, front_matter, content, created_at

        # Verify expected columns exist
        column_names = [col['name'] for col in columns]
        expected_columns = ['id', 'filename', 'front_matter', 'content', 'created_at']

        for expected_col in expected_columns:
            assert expected_col in column_names

        # Check primary key
        id_column = next(col for col in columns if col['name'] == 'id')
        assert id_column['primary_key'] is True
        assert id_column['type'] == 'INTEGER'

    def test_schema_after_data_insertion(self):
        """Test that schema remains consistent after inserting data."""
        # Get initial schema
        initial_schema = self.db_manager.get_schema()

        # Insert some data
        self.db_manager.store_markdown_file("test.md", "# Test")

        # Get schema again
        after_insert_schema = self.db_manager.get_schema()

        # Schema should be identical
        assert initial_schema == after_insert_schema

    def test_list_files_performance_with_many_files(self):
        """Test listing files performance with a larger number of files."""
        # Insert multiple files
        num_files = 50
        for i in range(num_files):
            content = f"# Document {i}\n\nThis is document number {i}."
            file_id = self.db_manager.store_markdown_file(f"doc_{i:03d}.md", content)
            assert file_id is not None

        # List all files
        files = self.db_manager.list_markdown_files()

        assert len(files) == num_files

        # Verify all files are present
        filenames = {f['filename'] for f in files}
        expected_filenames = {f"doc_{i:03d}.md" for i in range(num_files)}
        assert filenames == expected_filenames

    def test_list_files_returns_metadata_only(self):
        """Test that list_markdown_files returns metadata without content."""
        large_content = "# Large Document\n\n" + "This is a large content. " * 1000

        file_id = self.db_manager.store_markdown_file("large.md", large_content)
        assert file_id is not None

        # List files
        files = self.db_manager.list_markdown_files()

        assert len(files) == 1
        file_info = files[0]

        # Should have metadata but not content
        assert 'id' in file_info
        assert 'filename' in file_info
        assert 'created_at' in file_info
        assert 'front_matter' in file_info
        assert 'content' not in file_info  # Content should not be included in list

    def test_empty_filename_handling(self):
        """Test behavior with edge cases like empty filenames."""
        # Try to store file with empty filename
        file_id = self.db_manager.store_markdown_file("", "# Test content")

        if file_id is not None:  # If the database allows empty filenames
            files = self.db_manager.list_markdown_files()
            assert len(files) == 1
            assert files[0]['filename'] == ""

    def test_special_characters_in_filename(self):
        """Test files with special characters in filenames."""
        special_filenames = [
            "file with spaces.md",
            "file-with-dashes.md",
            "file_with_underscores.md",
            "файл.md",  # Unicode characters
            "file.with.dots.md"
        ]

        for filename in special_filenames:
            content = f"# {filename}\n\nContent for {filename}"
            file_id = self.db_manager.store_markdown_file(filename, content)
            assert file_id is not None, f"Failed to store file: {filename}"

        # List all files
        files = self.db_manager.list_markdown_files()
        assert len(files) == len(special_filenames)

        # Verify all special filenames are present
        stored_filenames = {f['filename'] for f in files}
        expected_filenames = set(special_filenames)
        assert stored_filenames == expected_filenames


class TestIssue4CLIIntegration:
    """Test CLI commands related to Issue #4 functionality."""

    def setup_method(self):
        """Set up test environment."""
        # Note: These tests would require CLI testing framework
        # For now, we'll test the underlying functionality
        pass

    def test_cli_list_command_exists(self):
        """Test that the CLI list command exists and is properly configured."""
        # This test verifies that the CLI command exists
        from markitect.cli import cli

        # Check that 'list' command is registered
        assert 'list' in cli.commands

        # Verify the command has the expected attributes
        list_command = cli.commands['list']
        assert list_command.name == 'list'
        assert list_command.help is not None

    def test_cli_schema_command_exists(self):
        """Test that the CLI schema command exists and is properly configured."""
        from markitect.cli import cli

        # Check that 'db-schema' command is registered
        assert 'db-schema' in cli.commands

        # Verify the command has the expected attributes
        schema_command = cli.commands['db-schema']
        assert schema_command.name == 'db-schema'
        assert schema_command.help is not None