feat: implement index page generation for HTML directories - Issue #136

Complete TDD8 implementation of index page generation functionality:

Core Features:
- HTML file discovery with optional recursive search (find_html_files)
- Smart title extraction from <title>, <h1>, or filename (extract_html_title)
- Template-integrated index page generation (generate_index_html)
- CLI command 'md-index' with output, template, and recursive options
- Comprehensive error handling for edge cases and malformed files

Implementation Details:
- Reuses existing TEMPLATE_STYLES for consistent styling across all templates
- Proper relative path resolution for cross-directory navigation
- Modular design with helper functions for maintainability
- HTML parsing patterns extracted as module-level constants for performance

Tests: 23 comprehensive tests covering discovery, generation, CLI integration, and edge cases
Files: markitect/plugins/builtin/markdown_commands.py, tests/test_issue_136_index_generation.py
Status: All tests passing, full TDD8 cycle completed (RED→GREEN→REFACTOR→DOCUMENT)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-07 13:33:39 +02:00
parent 98fe3361af
commit 3b5d6eecda
4 changed files with 1026 additions and 2 deletions

View File

@@ -8,6 +8,7 @@ replacing the legacy unprefixed commands for better namespace consistency.
import click
import json
import os
import re
import tempfile
from pathlib import Path
from typing import Dict, Any
@@ -43,7 +44,8 @@ class MarkdownCommandsPlugin(CommandPlugin):
'md-ingest': md_ingest_command,
'md-get': md_get_command,
'md-list': md_list_command,
'md-render': md_render_command
'md-render': md_render_command,
'md-index': md_index_command
}
@@ -400,6 +402,81 @@ def md_render_command(ctx, input_file, output, template, css, edit, editor_theme
raise click.Abort()
@click.command()
@click.argument('directory', type=click.Path(exists=True))
@click.option('--output', '-o', type=click.Path(), help='Output index file path (defaults to directory/index.html)')
@click.option('--template', type=click.Choice(['basic', 'github', 'academic', 'dark']),
default='basic', help='HTML template: basic (default), github, academic, or dark theme')
@click.option('--recursive', '-r', is_flag=True, help='Include HTML files from subdirectories')
@click.pass_context
def md_index_command(ctx, directory, output, template, recursive):
"""
Generate an index page for HTML files in a directory.
Creates an HTML index page that lists all HTML files found in the specified
directory, providing navigation links to each file. The index page uses the
same template system as md-render for consistent styling.
DIRECTORY: Path to the directory containing HTML files
Examples:
# Generate index for current directory
markitect md-index .
# Generate index with custom output file
markitect md-index docs/ --output docs/contents.html
# Generate index with GitHub template
markitect md-index notes/ --template github
# Include subdirectories recursively
markitect md-index docs/ --recursive
"""
config = ctx.obj or {}
try:
directory_path = Path(directory)
if config.get('verbose', False):
click.echo(f"Generating index for directory: {directory_path}")
# Determine output file
if output:
output_path = Path(output)
else:
output_path = directory_path / "index.html"
# Find and filter HTML files
html_files = find_html_files(directory_path, recursive=recursive)
html_files = [f for f in html_files if f != output_path]
if config.get('verbose', False):
click.echo(f"Found {len(html_files)} HTML file(s)")
# Prepare file info for template
file_infos = _prepare_file_infos(html_files, output_path)
# Generate and write index HTML
directory_name = directory_path.name or "Directory"
index_title = f"{directory_name} - Index"
index_html = generate_index_html(file_infos, index_title, template)
# Ensure output directory exists and write file
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(index_html, encoding='utf-8')
click.echo(f"✓ Index generated: {output_path}")
if config.get('verbose', False):
click.echo(f" Template: {template}")
click.echo(f" Files indexed: {len(file_infos)}")
if recursive:
click.echo(f" Recursive: enabled")
except Exception as e:
click.echo(f"Error generating index: {e}", err=True)
raise click.Abort()
def _render_single_markdown_file(input_path, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config):
"""Render a single markdown file to HTML."""
# Read markdown file
@@ -1020,4 +1097,205 @@ def process_directory(input_dir, use_publication_dir, publication_dir):
output_files.append(output_file)
return output_files
return output_files
# Index generation functions for Issue #136
def find_html_files(directory, recursive=False):
"""Find all HTML files in a directory."""
directory = Path(directory)
html_files = []
if recursive:
for pattern in ['*.html', '*.htm']:
html_files.extend(directory.rglob(pattern))
else:
for pattern in ['*.html', '*.htm']:
html_files.extend(directory.glob(pattern))
return sorted(html_files)
# HTML parsing patterns for index generation
HTML_TITLE_PATTERN = re.compile(r'<title[^>]*>(.*?)</title>', re.IGNORECASE | re.DOTALL)
HTML_H1_PATTERN = re.compile(r'<h1[^>]*>(.*?)</h1>', re.IGNORECASE | re.DOTALL)
HTML_TAG_PATTERN = re.compile(r'<[^>]+>')
def extract_html_title(html_file):
"""Extract title from HTML file, falling back to H1 tag or filename."""
try:
content = html_file.read_text(encoding='utf-8')
# Try to extract from title tag
title_match = HTML_TITLE_PATTERN.search(content)
if title_match:
return title_match.group(1).strip()
# Try to extract from H1 tag
h1_match = HTML_H1_PATTERN.search(content)
if h1_match:
# Remove HTML tags from H1 content
h1_text = HTML_TAG_PATTERN.sub('', h1_match.group(1))
return h1_text.strip()
# Fallback to filename
return html_file.stem
except Exception:
# If any error occurs, fallback to filename
return html_file.stem
def generate_index_html(html_files, title, template="basic"):
"""Generate HTML index page with links to HTML files."""
# Get template styles from existing TEMPLATE_STYLES
styles = TEMPLATE_STYLES.get(template, TEMPLATE_STYLES['basic'])
# Generate links list
links_html = ""
if html_files:
links_html = "<ul>\n"
for file_info in html_files:
relative_path = file_info['relative_path']
file_title = file_info['title']
links_html += f' <li><a href="{relative_path}">{file_title}</a></li>\n'
links_html += " </ul>"
else:
links_html = "<p>No HTML files found in this directory.</p>"
# Generate HTML template
html_template = '''<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
body {{
{body_bg}
color: {body_color};
font-family: {font_family};
line-height: 1.6;
max-width: {max_width};
margin: 0 auto;
padding: 20px;
{text_align}
}}
h1 {{
color: {heading_color};
{heading_border}
margin-bottom: 20px;
}}
h2 {{
color: {heading_color};
margin-top: 30px;
margin-bottom: 15px;
}}
ul {{
list-style-type: none;
padding: 0;
}}
li {{
margin: 10px 0;
padding: 8px 12px;
background: {code_bg};
border-radius: 4px;
{code_border}
}}
a {{
color: {heading_color};
text-decoration: none;
font-weight: 500;
}}
a:hover {{
text-decoration: underline;
}}
.directory-info {{
margin-bottom: 20px;
padding: 15px;
background: {code_bg};
border-radius: 8px;
border-left: 4px solid {blockquote_border};
color: {blockquote_color};
}}
</style>
</head>
<body>
<h1>{title}</h1>
<div class="directory-info">
<p>📁 Directory Index - Navigate through the available HTML pages</p>
</div>
<h2>Available Pages</h2>
{links_html}
<hr style="margin-top: 40px; border: 1px solid {blockquote_border};">
<p style="text-align: center; color: {blockquote_color}; font-size: 0.9em;">
Generated with MarkiTect • {file_count} file(s)
</p>
</body>
</html>'''
return html_template.format(
title=title,
links_html=links_html,
file_count=len(html_files),
**styles
)
def _prepare_file_infos(html_files, output_path):
"""Prepare file information for template generation."""
file_infos = []
for html_file in html_files:
title = extract_html_title(html_file)
# Calculate relative path from output directory to HTML file
try:
relative_path = html_file.relative_to(output_path.parent)
except ValueError:
# If files are in different directory trees, use filename
relative_path = html_file.name
file_infos.append({
'path': html_file,
'title': title,
'relative_path': str(relative_path)
})
return file_infos
def process_directory_for_index(directory, index_filename="index.html", template="basic", recursive=False):
"""Process directory and generate index file."""
directory = Path(directory)
output_path = directory / index_filename
if not directory.exists() or not directory.is_dir():
raise FileNotFoundError(f"Directory not found: {directory}")
# Find and filter HTML files
html_files = find_html_files(directory, recursive=recursive)
html_files = [f for f in html_files if f != output_path]
# Prepare file info for template
file_infos = _prepare_file_infos(html_files, output_path)
# Generate and write index HTML
directory_name = directory.name or "Directory"
index_title = f"{directory_name} - Index"
index_html = generate_index_html(file_infos, index_title, template)
# Ensure output directory exists and write file
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(index_html, encoding='utf-8')
return output_path