refactor: enhance draft generator documentation and code quality

Applied TDD8 refactoring improvements to draft generator module: - Added comprehensive module docstring with usage examples - Moved import statements to module level for better organization - Enhanced filename sanitization with dedicated method - Decomposed content replacement logic into focused methods - Added role-specific replacement strategies - Improved code maintainability and readability These changes improve code quality while maintaining all existing functionality and test compatibility. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 10:35:16 +02:00
parent 77db9f6231
commit a4805812f3
1 changed files with 81 additions and 15 deletions
--- a/markitect/draft_generator.py
+++ b/markitect/draft_generator.py
@@ -3,11 +3,44 @@ Data-driven Draft Generator for Issue #56: Generate multiple drafts from data so

 This module provides functionality to create multiple markdown documents from JSON schemas
 and data sources (JSON, CSV) with field mapping support.
+
+Examples:
+    Basic usage with JSON data:
+        >>> generator = DraftGenerator()
+        >>> schema = {...}  # JSON schema with field mappings
+        >>> data = [{"name": "John", "role": "Developer"}]
+        >>> files = generator.generate_drafts_from_data_source(
+        ...     schema, data, Path("./output")
+        ... )
+
+    Using with CSV file:
+        >>> files = generator.generate_drafts_from_data_source(
+        ...     schema, Path("data.csv"), Path("./output")
+        ... )
+
+    Field mapping is configured in the schema using x-markitect-field-mapping extension:
+        {
+            "properties": {
+                "headings": {
+                    "properties": {
+                        "level_1": {
+                            "x-markitect-field-mapping": {"const": "name"}
+                        }
+                    }
+                }
+            }
+        }
+
+Architecture:
+    The DraftGenerator extends the existing StubGenerator to add data-driven
+    capabilities. It processes data sources, validates compatibility with schemas,
+    and generates multiple document drafts with populated content.
 """

 import json
 import csv
 import io
+import copy
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Union
 from .stub_generator import StubGenerator
@@ -163,7 +196,6 @@ class DraftGenerator:
    def _apply_field_mapping(self, schema: Dict[str, Any], record: Dict[str, Any]) -> Dict[str, Any]:
        """Apply field mapping to populate schema content areas with data."""
        # Create a deep copy of the schema
-        import copy
        populated_schema = copy.deepcopy(schema)

        # Apply title mapping if exists
@@ -175,15 +207,26 @@ class DraftGenerator:
    def _generate_filename(self, record: Dict[str, Any], index: int) -> str:
        """Generate appropriate filename for the draft."""
        # Try to use common identifying fields
-        for field in ['name', 'title', 'id']:
+        identifier_fields = ['name', 'title', 'id']
+
+        for field in identifier_fields:
            if field in record and record[field]:
                # Sanitize filename
-                name = str(record[field]).replace(' ', '_').replace('/', '_')
+                name = self._sanitize_filename(str(record[field]))
                return f"{name}.md"

        # Fall back to index-based naming
        return f"draft_{index + 1:03d}.md"

+    def _sanitize_filename(self, filename: str) -> str:
+        """Sanitize a string to be safe for use as a filename."""
+        # Replace problematic characters with underscores
+        unsafe_chars = [' ', '/', '\\', ':', '*', '?', '"', '<', '>', '|']
+        sanitized = filename
+        for char in unsafe_chars:
+            sanitized = sanitized.replace(char, '_')
+        return sanitized
+
    def _generate_draft_content(self, schema: Dict[str, Any], record: Dict[str, Any], schema_file_path: Optional[str] = None) -> str:
        """Generate the actual draft content from populated schema."""
        # Use the existing stub generator as the base
@@ -194,20 +237,43 @@ class DraftGenerator:
        )

        # Add data-driven enhancements - replace placeholders with actual data
+        content = self._apply_data_replacements(content, record)
+
+        return content
+
+    def _apply_data_replacements(self, content: str, record: Dict[str, Any]) -> str:
+        """Apply data replacements to content using various replacement strategies."""
        for field_name, field_value in record.items():
-            # Simple replacement strategy for testing
-            placeholder_pattern = f"TODO: Add content for {field_name}"
-            if placeholder_pattern in content:
-                content = content.replace(placeholder_pattern, str(field_value))
+            content = self._apply_field_replacements(content, field_name, str(field_value))

-            # Replace template variables in content instructions (e.g., {role} -> Software Engineer)
-            template_pattern = f"{{{field_name}}}"
-            if template_pattern in content:
-                content = content.replace(template_pattern, str(field_value))
+        return content

-            # Also try to replace role-specific content
-            if field_name == 'role':
-                content = content.replace("TODO: Add content for introduction section.", f"Role: {field_value}")
-                content = content.replace("TODO: Add content for section_level_2 section.", f"Department information and role details for {field_value}")
+    def _apply_field_replacements(self, content: str, field_name: str, field_value: str) -> str:
+        """Apply all replacement patterns for a specific field."""
+        # Simple placeholder replacement
+        placeholder_pattern = f"TODO: Add content for {field_name}"
+        if placeholder_pattern in content:
+            content = content.replace(placeholder_pattern, field_value)
+
+        # Template variable replacement (e.g., {role} -> Software Engineer)
+        template_pattern = f"{{{field_name}}}"
+        if template_pattern in content:
+            content = content.replace(template_pattern, field_value)
+
+        # Role-specific content replacement (can be extended for other field types)
+        if field_name == 'role':
+            content = self._apply_role_specific_replacements(content, field_value)
+
+        return content
+
+    def _apply_role_specific_replacements(self, content: str, role_value: str) -> str:
+        """Apply role-specific content replacements."""
+        replacements = {
+            "TODO: Add content for introduction section.": f"Role: {role_value}",
+            "TODO: Add content for section_level_2 section.": f"Department information and role details for {role_value}"
+        }
+
+        for old_text, new_text in replacements.items():
+            content = content.replace(old_text, new_text)

        return content