chore: Issue closure 125 cleanup
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Content module for MarkdownMatters CLI.
|
||||
Handles content extraction without frontmatter and tailmatter zones.
|
||||
"""
|
||||
|
||||
from .parser import ContentParser
|
||||
from .stats import ContentStats
|
||||
|
||||
__all__ = ['ContentParser', 'ContentStats']
|
||||
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
CLI commands for content operations.
|
||||
"""
|
||||
|
||||
import click
|
||||
import json
|
||||
from pathlib import Path
|
||||
from .parser import ContentParser
|
||||
|
||||
|
||||
@click.command('content-get')
|
||||
@click.option('--file', 'file_path', required=True, type=click.Path(exists=True),
|
||||
help='Path to markdown file')
|
||||
def content_get(file_path):
|
||||
"""Extract content without frontmatter and tailmatter."""
|
||||
try:
|
||||
file_path = Path(file_path)
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
|
||||
parser = ContentParser()
|
||||
content = parser.extract_content(text)
|
||||
|
||||
click.echo(content)
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
raise click.ClickException(f"Failed to extract content from {file_path}")
|
||||
|
||||
|
||||
@click.command('content-stats')
|
||||
@click.option('--file', 'file_path', required=True, type=click.Path(exists=True),
|
||||
help='Path to markdown file')
|
||||
@click.option('--format', 'output_format', default='json', type=click.Choice(['json', 'text']),
|
||||
help='Output format (json or text)')
|
||||
def content_stats(file_path, output_format):
|
||||
"""Calculate content statistics."""
|
||||
try:
|
||||
file_path = Path(file_path)
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
|
||||
parser = ContentParser()
|
||||
content = parser.extract_content(text)
|
||||
stats = parser.calculate_stats(content)
|
||||
|
||||
if output_format == 'json':
|
||||
click.echo(json.dumps(stats.to_dict(), indent=2))
|
||||
else:
|
||||
click.echo(f"Word count: {stats.word_count}")
|
||||
click.echo(f"Line count: {stats.line_count}")
|
||||
click.echo(f"Paragraph count: {stats.paragraph_count}")
|
||||
click.echo(f"Character count: {stats.character_count}")
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
raise click.ClickException(f"Failed to calculate stats for {file_path}")
|
||||
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Content parser for extracting markdown content without matter zones.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
from .stats import ContentStats
|
||||
|
||||
|
||||
class ContentParser:
|
||||
"""Parser for extracting content from MarkdownMatters documents."""
|
||||
|
||||
def extract_content(self, text: str) -> str:
|
||||
"""
|
||||
Extract main content without frontmatter and tailmatter.
|
||||
|
||||
Args:
|
||||
text: Full markdown document text
|
||||
|
||||
Returns:
|
||||
Content without frontmatter and tailmatter zones
|
||||
"""
|
||||
# Remove frontmatter
|
||||
content = self._remove_frontmatter(text)
|
||||
|
||||
# Remove tailmatter
|
||||
content = self._remove_tailmatter(content)
|
||||
|
||||
return content.strip()
|
||||
|
||||
def calculate_stats(self, content: str) -> ContentStats:
|
||||
"""
|
||||
Calculate statistics for content.
|
||||
|
||||
Args:
|
||||
content: The content text to analyze
|
||||
|
||||
Returns:
|
||||
ContentStats object with calculated statistics
|
||||
"""
|
||||
# Count lines
|
||||
lines = content.split('\n')
|
||||
line_count = len(lines)
|
||||
|
||||
# Count words (split by whitespace)
|
||||
words = content.split()
|
||||
word_count = len(words)
|
||||
|
||||
# Count paragraphs (non-empty text blocks separated by blank lines)
|
||||
paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
|
||||
paragraph_count = len(paragraphs)
|
||||
|
||||
# Count characters
|
||||
character_count = len(content)
|
||||
|
||||
return ContentStats(
|
||||
word_count=word_count,
|
||||
line_count=line_count,
|
||||
paragraph_count=paragraph_count,
|
||||
character_count=character_count
|
||||
)
|
||||
|
||||
def _remove_frontmatter(self, text: str) -> str:
|
||||
"""Remove YAML/TOML/JSON frontmatter from text."""
|
||||
# Pattern for YAML frontmatter (---...---)
|
||||
yaml_pattern = r'^---\s*\n.*?\n---\s*\n'
|
||||
|
||||
# Remove YAML frontmatter if present
|
||||
text = re.sub(yaml_pattern, '', text, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
# TODO: Add support for TOML and JSON frontmatter in future cycles
|
||||
|
||||
return text
|
||||
|
||||
def _remove_tailmatter(self, text: str) -> str:
|
||||
"""Remove tailmatter blocks from text."""
|
||||
# Pattern for tailmatter: ```yaml tailmatter or ```json tailmatter
|
||||
# Usually preceded by horizontal rule (---)
|
||||
|
||||
# Look for the pattern: --- followed by ```yaml tailmatter or ```json tailmatter
|
||||
tailmatter_pattern = r'\n---\s*\n\s*```(?:yaml|json)\s+tailmatter\s*\n.*?```\s*$'
|
||||
|
||||
# Remove tailmatter if present
|
||||
text = re.sub(tailmatter_pattern, '', text, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
# Also handle cases where tailmatter is at the end without preceding ---
|
||||
simple_tailmatter_pattern = r'\n\s*```(?:yaml|json)\s+tailmatter\s*\n.*?```\s*$'
|
||||
text = re.sub(simple_tailmatter_pattern, '', text, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
return text
|
||||
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Content statistics data structures.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContentStats:
|
||||
"""Statistics about markdown content."""
|
||||
|
||||
word_count: int
|
||||
line_count: int
|
||||
paragraph_count: int
|
||||
character_count: int
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert stats to dictionary."""
|
||||
return {
|
||||
"word_count": self.word_count,
|
||||
"line_count": self.line_count,
|
||||
"paragraph_count": self.paragraph_count,
|
||||
"character_count": self.character_count
|
||||
}
|
||||
Reference in New Issue
Block a user