refactor: enhance draft generator documentation and code quality

Applied TDD8 refactoring improvements to draft generator module:

- Added comprehensive module docstring with usage examples
- Moved import statements to module level for better organization
- Enhanced filename sanitization with dedicated method
- Decomposed content replacement logic into focused methods
- Added role-specific replacement strategies
- Improved code maintainability and readability

These changes improve code quality while maintaining all existing
functionality and test compatibility.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-03 10:35:16 +02:00
parent 77db9f6231
commit a4805812f3

View File

@@ -3,11 +3,44 @@ Data-driven Draft Generator for Issue #56: Generate multiple drafts from data so
This module provides functionality to create multiple markdown documents from JSON schemas This module provides functionality to create multiple markdown documents from JSON schemas
and data sources (JSON, CSV) with field mapping support. and data sources (JSON, CSV) with field mapping support.
Examples:
Basic usage with JSON data:
>>> generator = DraftGenerator()
>>> schema = {...} # JSON schema with field mappings
>>> data = [{"name": "John", "role": "Developer"}]
>>> files = generator.generate_drafts_from_data_source(
... schema, data, Path("./output")
... )
Using with CSV file:
>>> files = generator.generate_drafts_from_data_source(
... schema, Path("data.csv"), Path("./output")
... )
Field mapping is configured in the schema using x-markitect-field-mapping extension:
{
"properties": {
"headings": {
"properties": {
"level_1": {
"x-markitect-field-mapping": {"const": "name"}
}
}
}
}
}
Architecture:
The DraftGenerator extends the existing StubGenerator to add data-driven
capabilities. It processes data sources, validates compatibility with schemas,
and generates multiple document drafts with populated content.
""" """
import json import json
import csv import csv
import io import io
import copy
from pathlib import Path from pathlib import Path
from typing import Dict, Any, List, Optional, Union from typing import Dict, Any, List, Optional, Union
from .stub_generator import StubGenerator from .stub_generator import StubGenerator
@@ -163,7 +196,6 @@ class DraftGenerator:
def _apply_field_mapping(self, schema: Dict[str, Any], record: Dict[str, Any]) -> Dict[str, Any]: def _apply_field_mapping(self, schema: Dict[str, Any], record: Dict[str, Any]) -> Dict[str, Any]:
"""Apply field mapping to populate schema content areas with data.""" """Apply field mapping to populate schema content areas with data."""
# Create a deep copy of the schema # Create a deep copy of the schema
import copy
populated_schema = copy.deepcopy(schema) populated_schema = copy.deepcopy(schema)
# Apply title mapping if exists # Apply title mapping if exists
@@ -175,15 +207,26 @@ class DraftGenerator:
def _generate_filename(self, record: Dict[str, Any], index: int) -> str: def _generate_filename(self, record: Dict[str, Any], index: int) -> str:
"""Generate appropriate filename for the draft.""" """Generate appropriate filename for the draft."""
# Try to use common identifying fields # Try to use common identifying fields
for field in ['name', 'title', 'id']: identifier_fields = ['name', 'title', 'id']
for field in identifier_fields:
if field in record and record[field]: if field in record and record[field]:
# Sanitize filename # Sanitize filename
name = str(record[field]).replace(' ', '_').replace('/', '_') name = self._sanitize_filename(str(record[field]))
return f"{name}.md" return f"{name}.md"
# Fall back to index-based naming # Fall back to index-based naming
return f"draft_{index + 1:03d}.md" return f"draft_{index + 1:03d}.md"
def _sanitize_filename(self, filename: str) -> str:
"""Sanitize a string to be safe for use as a filename."""
# Replace problematic characters with underscores
unsafe_chars = [' ', '/', '\\', ':', '*', '?', '"', '<', '>', '|']
sanitized = filename
for char in unsafe_chars:
sanitized = sanitized.replace(char, '_')
return sanitized
def _generate_draft_content(self, schema: Dict[str, Any], record: Dict[str, Any], schema_file_path: Optional[str] = None) -> str: def _generate_draft_content(self, schema: Dict[str, Any], record: Dict[str, Any], schema_file_path: Optional[str] = None) -> str:
"""Generate the actual draft content from populated schema.""" """Generate the actual draft content from populated schema."""
# Use the existing stub generator as the base # Use the existing stub generator as the base
@@ -194,20 +237,43 @@ class DraftGenerator:
) )
# Add data-driven enhancements - replace placeholders with actual data # Add data-driven enhancements - replace placeholders with actual data
content = self._apply_data_replacements(content, record)
return content
def _apply_data_replacements(self, content: str, record: Dict[str, Any]) -> str:
"""Apply data replacements to content using various replacement strategies."""
for field_name, field_value in record.items(): for field_name, field_value in record.items():
# Simple replacement strategy for testing content = self._apply_field_replacements(content, field_name, str(field_value))
placeholder_pattern = f"TODO: Add content for {field_name}"
if placeholder_pattern in content:
content = content.replace(placeholder_pattern, str(field_value))
# Replace template variables in content instructions (e.g., {role} -> Software Engineer) return content
template_pattern = f"{{{field_name}}}"
if template_pattern in content:
content = content.replace(template_pattern, str(field_value))
# Also try to replace role-specific content def _apply_field_replacements(self, content: str, field_name: str, field_value: str) -> str:
if field_name == 'role': """Apply all replacement patterns for a specific field."""
content = content.replace("TODO: Add content for introduction section.", f"Role: {field_value}") # Simple placeholder replacement
content = content.replace("TODO: Add content for section_level_2 section.", f"Department information and role details for {field_value}") placeholder_pattern = f"TODO: Add content for {field_name}"
if placeholder_pattern in content:
content = content.replace(placeholder_pattern, field_value)
# Template variable replacement (e.g., {role} -> Software Engineer)
template_pattern = f"{{{field_name}}}"
if template_pattern in content:
content = content.replace(template_pattern, field_value)
# Role-specific content replacement (can be extended for other field types)
if field_name == 'role':
content = self._apply_role_specific_replacements(content, field_value)
return content
def _apply_role_specific_replacements(self, content: str, role_value: str) -> str:
"""Apply role-specific content replacements."""
replacements = {
"TODO: Add content for introduction section.": f"Role: {role_value}",
"TODO: Add content for section_level_2 section.": f"Department information and role details for {role_value}"
}
for old_text, new_text in replacements.items():
content = content.replace(old_text, new_text)
return content return content