markitect-main/tests/test_issue_56_data_driven_draft_generation.py

"""
Tests for Issue #56: Data-driven multiple draft generation

This test module implements comprehensive tests for data-driven draft generation
that creates multiple documents from a schema and data source with field mapping.

Following TDD8 methodology - these tests are written before implementation.
"""

import json
import csv
import pytest
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory
from click.testing import CliRunner

from markitect.cli import cli


class TestIssue56DataDrivenDraftGeneration:
    """Test suite for data-driven multiple draft generation functionality."""

    def setup_method(self):
        """Set up test fixtures."""
        self.runner = CliRunner()

    def test_cli_has_generate_drafts_command(self):
        """Test that CLI has a generate-drafts command for data-driven generation."""
        # Act
        result = self.runner.invoke(cli, ['--help'])

        # Assert
        assert result.exit_code == 0
        assert 'generate-drafts' in result.output, "CLI should have generate-drafts command"

    def test_generate_drafts_command_help(self):
        """Test that generate-drafts command has proper help documentation."""
        # Act
        result = self.runner.invoke(cli, ['generate-drafts', '--help'])

        # Assert
        assert result.exit_code == 0
        help_text = result.output.lower()
        assert 'data source' in help_text
        assert 'schema' in help_text
        assert 'multiple' in help_text or 'batch' in help_text

    def test_generate_drafts_supports_json_data_source(self):
        """Test that generate-drafts supports JSON data sources."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Employee Profile",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Employee name: {name}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "name"
                                    }
                                }
                            },
                            "minItems": 1,
                            "maxItems": 1
                        },
                        "level_2": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Role: {role}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "role"
                                    }
                                }
                            },
                            "minItems": 1,
                            "maxItems": 1
                        }
                    }
                }
            }
        }

        data = [
            {"name": "Alice Johnson", "role": "Software Engineer", "department": "Engineering"},
            {"name": "Bob Smith", "role": "Product Manager", "department": "Product"}
        ]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', output_dir
                ])

                # Assert
                assert result.exit_code == 0, f"Command should succeed, got: {result.output}"

                # Check that multiple files were generated
                output_path = Path(output_dir)
                generated_files = list(output_path.glob('*.md'))
                assert len(generated_files) >= 2, "Should generate multiple draft files"

                # Check content of generated files
                for file in generated_files:
                    content = file.read_text()
                    # Should contain mapped data
                    assert any(name in content for name in ["Alice Johnson", "Bob Smith"])
                    assert any(role in content for role in ["Software Engineer", "Product Manager"])

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_supports_csv_data_source(self):
        """Test that generate-drafts supports CSV data sources."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Product Description",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Product: {product_name}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "product_name"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as csv_f:
            writer = csv.writer(csv_f)
            writer.writerow(['product_name', 'price', 'category'])
            writer.writerow(['Laptop Pro', '1299.99', 'Electronics'])
            writer.writerow(['Office Chair', '249.99', 'Furniture'])
            csv_file = Path(csv_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(csv_file),
                    '--output-dir', output_dir
                ])

                # Assert
                assert result.exit_code == 0, f"CSV processing should work, got: {result.output}"

                # Check generated files
                output_path = Path(output_dir)
                generated_files = list(output_path.glob('*.md'))
                assert len(generated_files) >= 2, "Should generate files for each CSV row"

                # Check content contains mapped CSV data
                all_content = ""
                for file in generated_files:
                    all_content += file.read_text()

                assert "Laptop Pro" in all_content
                assert "Office Chair" in all_content

            finally:
                schema_file.unlink()
                csv_file.unlink()

    def test_generate_drafts_field_mapping_functionality(self):
        """Test that field mapping works correctly between data and schema."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Blog Post",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "{title}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "title"
                                    }
                                }
                            }
                        },
                        "level_2": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Author: {author_name}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "author_name"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        data = [
            {"title": "Getting Started with Python", "author_name": "Jane Doe", "tags": ["python", "beginner"]},
            {"title": "Advanced JavaScript Patterns", "author_name": "John Smith", "tags": ["javascript", "advanced"]}
        ]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', output_dir
                ])

                # Assert
                assert result.exit_code == 0

                # Verify field mapping worked correctly
                generated_files = list(Path(output_dir).glob('*.md'))
                assert len(generated_files) == 2

                contents = [file.read_text() for file in generated_files]

                # Check that titles and authors are properly mapped
                assert any("Getting Started with Python" in content for content in contents)
                assert any("Advanced JavaScript Patterns" in content for content in contents)
                assert any("Author: Jane Doe" in content for content in contents)
                assert any("Author: John Smith" in content for content in contents)

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_maintains_schema_references(self):
        """Test that generated drafts maintain schema references for validation."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Meeting Notes",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Meeting: {meeting_title}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "meeting_title"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        data = [{"meeting_title": "Weekly Standup", "date": "2024-01-15"}]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', output_dir
                ])

                # Assert
                assert result.exit_code == 0

                # Check schema reference is maintained
                generated_files = list(Path(output_dir).glob('*.md'))
                assert len(generated_files) >= 1

                for file in generated_files:
                    content = file.read_text()
                    assert f"Generated from schema: {schema_file}" in content

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_output_directory_specification(self):
        """Test that CLI supports output directory specification for batch generation."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Test Document",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "{name}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "name"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        data = [{"name": "Test1"}, {"name": "Test2"}]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as temp_dir:
            output_dir = Path(temp_dir) / "custom_output"

            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', str(output_dir)
                ])

                # Assert
                assert result.exit_code == 0
                assert output_dir.exists(), "Output directory should be created"

                generated_files = list(output_dir.glob('*.md'))
                assert len(generated_files) >= 2, "Should generate files in specified directory"

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_data_validation_compatibility(self):
        """Test that data validation ensures compatibility with schema requirements."""
        # Arrange - Schema requires specific fields
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Validated Document",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Required field: {required_field}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "required_field"
                                    }
                                }
                            }
                        }
                    }
                }
            },
            "x-markitect-required-fields": ["required_field"]
        }

        # Data missing required field
        invalid_data = [{"optional_field": "value"}]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(invalid_data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', output_dir
                ])

                # Assert - Should fail validation or provide warning
                # Could be exit code != 0 or warning in output
                assert result.exit_code != 0 or "warning" in result.output.lower() or "missing" in result.output.lower()

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_error_handling_data_schema_mismatch(self):
        """Test error handling for data-schema mismatches."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Test Schema",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "name"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        # Data with different field names
        mismatched_data = [{"different_field": "value"}]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(mismatched_data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', output_dir
                ])

                # Assert - Should handle mismatch gracefully
                # Either succeed with warnings or fail with clear error
                if result.exit_code != 0:
                    assert len(result.output) > 0  # Should have error message
                else:
                    # If succeeded, should have warnings or default handling
                    assert "warning" in result.output.lower() or len(list(Path(output_dir).glob('*.md'))) > 0

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_file_naming_convention(self):
        """Test that generated files follow a consistent naming convention."""
        # Arrange
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Item Description",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Item: {id}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "id"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        data = [
            {"id": "item-001", "name": "First Item"},
            {"id": "item-002", "name": "Second Item"}
        ]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(data, data_f, indent=2)
            data_file = Path(data_f.name)

        with TemporaryDirectory() as output_dir:
            try:
                # Act
                result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', output_dir
                ])

                # Assert
                assert result.exit_code == 0

                generated_files = list(Path(output_dir).glob('*.md'))
                assert len(generated_files) == 2

                # Check naming convention
                filenames = [f.name for f in generated_files]
                for filename in filenames:
                    assert filename.endswith('.md')
                    # Should contain identifier or be sequentially named
                    assert len(filename) > 3  # At least "x.md"

            finally:
                schema_file.unlink()
                data_file.unlink()

    def test_generate_drafts_integration_with_existing_stub_generation(self):
        """Test that generate-drafts integrates properly with existing stub generation from Issue #55."""
        # Arrange - Use schema that works with single draft generation
        schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "title": "Integration Test",
            "x-markitect-content-instructions-enabled": True,
            "properties": {
                "headings": {
                    "type": "object",
                    "properties": {
                        "level_1": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "content": {"type": "string"},
                                    "x-markitect-content-instructions": {
                                        "type": "string",
                                        "const": "Title: {title}"
                                    },
                                    "x-markitect-field-mapping": {
                                        "type": "string",
                                        "const": "title"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        data = [{"title": "Test Document"}]

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as schema_f:
            json.dump(schema, schema_f, indent=2)
            schema_file = Path(schema_f.name)

        with NamedTemporaryFile(mode='w', suffix='.json', delete=False) as data_f:
            json.dump(data, data_f, indent=2)
            data_file = Path(data_f.name)

        with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as single_output_f:
            single_output_file = Path(single_output_f.name)

        with TemporaryDirectory() as batch_output_dir:
            try:
                # Act - Test both single and batch generation
                single_result = self.runner.invoke(cli, [
                    'generate-stub',
                    str(schema_file),
                    '--output', str(single_output_file)
                ])

                batch_result = self.runner.invoke(cli, [
                    'generate-drafts',
                    str(schema_file),
                    str(data_file),
                    '--output-dir', batch_output_dir
                ])

                # Assert
                assert single_result.exit_code == 0
                assert batch_result.exit_code == 0

                # Check single output
                single_content = single_output_file.read_text()
                assert "Integration Test" in single_content

                # Check batch output
                batch_files = list(Path(batch_output_dir).glob('*.md'))
                assert len(batch_files) >= 1

                batch_content = batch_files[0].read_text()
                assert "Test Document" in batch_content

                # Both should have schema references
                assert "Generated from schema:" in single_content
                assert "Generated from schema:" in batch_content

            finally:
                schema_file.unlink()
                data_file.unlink()
                if single_output_file.exists():
                    single_output_file.unlink()