feat: implement index page generation for HTML directories - Issue #136
Complete TDD8 implementation of index page generation functionality: Core Features: - HTML file discovery with optional recursive search (find_html_files) - Smart title extraction from <title>, <h1>, or filename (extract_html_title) - Template-integrated index page generation (generate_index_html) - CLI command 'md-index' with output, template, and recursive options - Comprehensive error handling for edge cases and malformed files Implementation Details: - Reuses existing TEMPLATE_STYLES for consistent styling across all templates - Proper relative path resolution for cross-directory navigation - Modular design with helper functions for maintainability - HTML parsing patterns extracted as module-level constants for performance Tests: 23 comprehensive tests covering discovery, generation, CLI integration, and edge cases Files: markitect/plugins/builtin/markdown_commands.py, tests/test_issue_136_index_generation.py Status: All tests passing, full TDD8 cycle completed (RED→GREEN→REFACTOR→DOCUMENT) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ replacing the legacy unprefixed commands for better namespace consistency.
|
||||
import click
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
@@ -43,7 +44,8 @@ class MarkdownCommandsPlugin(CommandPlugin):
|
||||
'md-ingest': md_ingest_command,
|
||||
'md-get': md_get_command,
|
||||
'md-list': md_list_command,
|
||||
'md-render': md_render_command
|
||||
'md-render': md_render_command,
|
||||
'md-index': md_index_command
|
||||
}
|
||||
|
||||
|
||||
@@ -400,6 +402,81 @@ def md_render_command(ctx, input_file, output, template, css, edit, editor_theme
|
||||
raise click.Abort()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('directory', type=click.Path(exists=True))
|
||||
@click.option('--output', '-o', type=click.Path(), help='Output index file path (defaults to directory/index.html)')
|
||||
@click.option('--template', type=click.Choice(['basic', 'github', 'academic', 'dark']),
|
||||
default='basic', help='HTML template: basic (default), github, academic, or dark theme')
|
||||
@click.option('--recursive', '-r', is_flag=True, help='Include HTML files from subdirectories')
|
||||
@click.pass_context
|
||||
def md_index_command(ctx, directory, output, template, recursive):
|
||||
"""
|
||||
Generate an index page for HTML files in a directory.
|
||||
|
||||
Creates an HTML index page that lists all HTML files found in the specified
|
||||
directory, providing navigation links to each file. The index page uses the
|
||||
same template system as md-render for consistent styling.
|
||||
|
||||
DIRECTORY: Path to the directory containing HTML files
|
||||
|
||||
Examples:
|
||||
# Generate index for current directory
|
||||
markitect md-index .
|
||||
|
||||
# Generate index with custom output file
|
||||
markitect md-index docs/ --output docs/contents.html
|
||||
|
||||
# Generate index with GitHub template
|
||||
markitect md-index notes/ --template github
|
||||
|
||||
# Include subdirectories recursively
|
||||
markitect md-index docs/ --recursive
|
||||
"""
|
||||
config = ctx.obj or {}
|
||||
try:
|
||||
directory_path = Path(directory)
|
||||
|
||||
if config.get('verbose', False):
|
||||
click.echo(f"Generating index for directory: {directory_path}")
|
||||
|
||||
# Determine output file
|
||||
if output:
|
||||
output_path = Path(output)
|
||||
else:
|
||||
output_path = directory_path / "index.html"
|
||||
|
||||
# Find and filter HTML files
|
||||
html_files = find_html_files(directory_path, recursive=recursive)
|
||||
html_files = [f for f in html_files if f != output_path]
|
||||
|
||||
if config.get('verbose', False):
|
||||
click.echo(f"Found {len(html_files)} HTML file(s)")
|
||||
|
||||
# Prepare file info for template
|
||||
file_infos = _prepare_file_infos(html_files, output_path)
|
||||
|
||||
# Generate and write index HTML
|
||||
directory_name = directory_path.name or "Directory"
|
||||
index_title = f"{directory_name} - Index"
|
||||
index_html = generate_index_html(file_infos, index_title, template)
|
||||
|
||||
# Ensure output directory exists and write file
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(index_html, encoding='utf-8')
|
||||
|
||||
click.echo(f"✓ Index generated: {output_path}")
|
||||
|
||||
if config.get('verbose', False):
|
||||
click.echo(f" Template: {template}")
|
||||
click.echo(f" Files indexed: {len(file_infos)}")
|
||||
if recursive:
|
||||
click.echo(f" Recursive: enabled")
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Error generating index: {e}", err=True)
|
||||
raise click.Abort()
|
||||
|
||||
|
||||
def _render_single_markdown_file(input_path, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config):
|
||||
"""Render a single markdown file to HTML."""
|
||||
# Read markdown file
|
||||
@@ -1020,4 +1097,205 @@ def process_directory(input_dir, use_publication_dir, publication_dir):
|
||||
|
||||
output_files.append(output_file)
|
||||
|
||||
return output_files
|
||||
return output_files
|
||||
|
||||
|
||||
# Index generation functions for Issue #136
|
||||
def find_html_files(directory, recursive=False):
|
||||
"""Find all HTML files in a directory."""
|
||||
directory = Path(directory)
|
||||
html_files = []
|
||||
|
||||
if recursive:
|
||||
for pattern in ['*.html', '*.htm']:
|
||||
html_files.extend(directory.rglob(pattern))
|
||||
else:
|
||||
for pattern in ['*.html', '*.htm']:
|
||||
html_files.extend(directory.glob(pattern))
|
||||
|
||||
return sorted(html_files)
|
||||
|
||||
|
||||
# HTML parsing patterns for index generation
|
||||
HTML_TITLE_PATTERN = re.compile(r'<title[^>]*>(.*?)</title>', re.IGNORECASE | re.DOTALL)
|
||||
HTML_H1_PATTERN = re.compile(r'<h1[^>]*>(.*?)</h1>', re.IGNORECASE | re.DOTALL)
|
||||
HTML_TAG_PATTERN = re.compile(r'<[^>]+>')
|
||||
|
||||
|
||||
def extract_html_title(html_file):
|
||||
"""Extract title from HTML file, falling back to H1 tag or filename."""
|
||||
try:
|
||||
content = html_file.read_text(encoding='utf-8')
|
||||
|
||||
# Try to extract from title tag
|
||||
title_match = HTML_TITLE_PATTERN.search(content)
|
||||
if title_match:
|
||||
return title_match.group(1).strip()
|
||||
|
||||
# Try to extract from H1 tag
|
||||
h1_match = HTML_H1_PATTERN.search(content)
|
||||
if h1_match:
|
||||
# Remove HTML tags from H1 content
|
||||
h1_text = HTML_TAG_PATTERN.sub('', h1_match.group(1))
|
||||
return h1_text.strip()
|
||||
|
||||
# Fallback to filename
|
||||
return html_file.stem
|
||||
|
||||
except Exception:
|
||||
# If any error occurs, fallback to filename
|
||||
return html_file.stem
|
||||
|
||||
|
||||
def generate_index_html(html_files, title, template="basic"):
|
||||
"""Generate HTML index page with links to HTML files."""
|
||||
# Get template styles from existing TEMPLATE_STYLES
|
||||
styles = TEMPLATE_STYLES.get(template, TEMPLATE_STYLES['basic'])
|
||||
|
||||
# Generate links list
|
||||
links_html = ""
|
||||
if html_files:
|
||||
links_html = "<ul>\n"
|
||||
for file_info in html_files:
|
||||
relative_path = file_info['relative_path']
|
||||
file_title = file_info['title']
|
||||
links_html += f' <li><a href="{relative_path}">{file_title}</a></li>\n'
|
||||
links_html += " </ul>"
|
||||
else:
|
||||
links_html = "<p>No HTML files found in this directory.</p>"
|
||||
|
||||
# Generate HTML template
|
||||
html_template = '''<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{title}</title>
|
||||
<style>
|
||||
body {{
|
||||
{body_bg}
|
||||
color: {body_color};
|
||||
font-family: {font_family};
|
||||
line-height: 1.6;
|
||||
max-width: {max_width};
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
{text_align}
|
||||
}}
|
||||
|
||||
h1 {{
|
||||
color: {heading_color};
|
||||
{heading_border}
|
||||
margin-bottom: 20px;
|
||||
}}
|
||||
|
||||
h2 {{
|
||||
color: {heading_color};
|
||||
margin-top: 30px;
|
||||
margin-bottom: 15px;
|
||||
}}
|
||||
|
||||
ul {{
|
||||
list-style-type: none;
|
||||
padding: 0;
|
||||
}}
|
||||
|
||||
li {{
|
||||
margin: 10px 0;
|
||||
padding: 8px 12px;
|
||||
background: {code_bg};
|
||||
border-radius: 4px;
|
||||
{code_border}
|
||||
}}
|
||||
|
||||
a {{
|
||||
color: {heading_color};
|
||||
text-decoration: none;
|
||||
font-weight: 500;
|
||||
}}
|
||||
|
||||
a:hover {{
|
||||
text-decoration: underline;
|
||||
}}
|
||||
|
||||
.directory-info {{
|
||||
margin-bottom: 20px;
|
||||
padding: 15px;
|
||||
background: {code_bg};
|
||||
border-radius: 8px;
|
||||
border-left: 4px solid {blockquote_border};
|
||||
color: {blockquote_color};
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{title}</h1>
|
||||
|
||||
<div class="directory-info">
|
||||
<p>📁 Directory Index - Navigate through the available HTML pages</p>
|
||||
</div>
|
||||
|
||||
<h2>Available Pages</h2>
|
||||
{links_html}
|
||||
|
||||
<hr style="margin-top: 40px; border: 1px solid {blockquote_border};">
|
||||
<p style="text-align: center; color: {blockquote_color}; font-size: 0.9em;">
|
||||
Generated with MarkiTect • {file_count} file(s)
|
||||
</p>
|
||||
</body>
|
||||
</html>'''
|
||||
|
||||
return html_template.format(
|
||||
title=title,
|
||||
links_html=links_html,
|
||||
file_count=len(html_files),
|
||||
**styles
|
||||
)
|
||||
|
||||
|
||||
def _prepare_file_infos(html_files, output_path):
|
||||
"""Prepare file information for template generation."""
|
||||
file_infos = []
|
||||
for html_file in html_files:
|
||||
title = extract_html_title(html_file)
|
||||
|
||||
# Calculate relative path from output directory to HTML file
|
||||
try:
|
||||
relative_path = html_file.relative_to(output_path.parent)
|
||||
except ValueError:
|
||||
# If files are in different directory trees, use filename
|
||||
relative_path = html_file.name
|
||||
|
||||
file_infos.append({
|
||||
'path': html_file,
|
||||
'title': title,
|
||||
'relative_path': str(relative_path)
|
||||
})
|
||||
return file_infos
|
||||
|
||||
|
||||
def process_directory_for_index(directory, index_filename="index.html", template="basic", recursive=False):
|
||||
"""Process directory and generate index file."""
|
||||
directory = Path(directory)
|
||||
output_path = directory / index_filename
|
||||
|
||||
if not directory.exists() or not directory.is_dir():
|
||||
raise FileNotFoundError(f"Directory not found: {directory}")
|
||||
|
||||
# Find and filter HTML files
|
||||
html_files = find_html_files(directory, recursive=recursive)
|
||||
html_files = [f for f in html_files if f != output_path]
|
||||
|
||||
# Prepare file info for template
|
||||
file_infos = _prepare_file_infos(html_files, output_path)
|
||||
|
||||
# Generate and write index HTML
|
||||
directory_name = directory.name or "Directory"
|
||||
index_title = f"{directory_name} - Index"
|
||||
index_html = generate_index_html(file_infos, index_title, template)
|
||||
|
||||
# Ensure output directory exists and write file
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(index_html, encoding='utf-8')
|
||||
|
||||
return output_path
|
||||
Reference in New Issue
Block a user