""" CHANGELOG.md parser for extracting release notes. This module provides tools for parsing CHANGELOG.md files and extracting version-specific content for release notes. """ import re from pathlib import Path from typing import Optional class ChangelogParser: """Parse CHANGELOG.md files and extract release information.""" def __init__(self, changelog_path: Optional[Path] = None): """Initialize changelog parser. Args: changelog_path: Path to CHANGELOG.md file """ self.changelog_path = changelog_path or Path.cwd() / 'CHANGELOG.md' def extract_version_section(self, version: str, format: str = 'markdown') -> str: """Extract CHANGELOG section for a specific version. Args: version: Version to extract (e.g., "0.10.0") format: Output format ('markdown', 'plain', 'html') Returns: Formatted content of the version section """ if not self.changelog_path.exists(): return f"Error: CHANGELOG.md not found at {self.changelog_path}" try: version_clean = version.lstrip('v') with open(self.changelog_path) as f: content = f.read() # Find the version section using regex # Match: ## [VERSION] - DATE followed by content until next ## [ pattern = rf"## \[{re.escape(version_clean)}\].*?\n\n(.*?)(?=\n## \[|\Z)" match = re.search(pattern, content, re.DOTALL) if not match: return f"Error: No section found for version {version_clean} in CHANGELOG.md" section_content = match.group(1).strip() if not section_content: return f"Warning: Section for version {version_clean} exists but is empty" # Format based on requested format if format == 'plain': return self._to_plain(section_content) elif format == 'html': return self._to_html(section_content) else: return section_content # markdown (default) except Exception as e: return f"Error reading CHANGELOG: {e}" def get_latest_version(self) -> Optional[str]: """Get the latest version number from CHANGELOG. Returns: Latest version string or None if not found """ if not self.changelog_path.exists(): return None try: with open(self.changelog_path) as f: content = f.read() # Find first version section (skip Unreleased) pattern = r"## \[(\d+\.\d+\.\d+[^\]]*)\]" match = re.search(pattern, content) return match.group(1) if match else None except Exception: return None def list_versions(self) -> list: """List all versions in CHANGELOG. Returns: List of version strings """ if not self.changelog_path.exists(): return [] try: with open(self.changelog_path) as f: content = f.read() # Find all version sections (excluding Unreleased) pattern = r"## \[(\d+\.\d+\.\d+[^\]]*)\]" matches = re.findall(pattern, content) return matches except Exception: return [] def _to_plain(self, markdown_content: str) -> str: """Convert markdown content to plain text. Args: markdown_content: Markdown formatted content Returns: Plain text content """ # Remove markdown formatting plain = markdown_content # Remove bold/italic plain = re.sub(r'\*\*([^*]+)\*\*', r'\1', plain) # bold plain = re.sub(r'\*([^*]+)\*', r'\1', plain) # italic plain = re.sub(r'__([^_]+)__', r'\1', plain) # bold (underscores) plain = re.sub(r'_([^_]+)_', r'\1', plain) # italic (underscores) # Remove links but keep text plain = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', plain) # Remove inline code backticks plain = re.sub(r'`([^`]+)`', r'\1', plain) # Convert headers to plain text with spacing plain = re.sub(r'^### (.+)$', r'\n\1:', plain, flags=re.MULTILINE) plain = re.sub(r'^## (.+)$', r'\n\1\n' + '=' * 40, plain, flags=re.MULTILINE) return plain.strip() def _to_html(self, markdown_content: str) -> str: """Convert markdown content to HTML. Args: markdown_content: Markdown formatted content Returns: HTML formatted content """ try: import markdown return markdown.markdown(markdown_content) except ImportError: # Fallback to basic HTML conversion if markdown package not available html = markdown_content # Headers html = re.sub(r'^### (.+)$', r'

\1

', html, flags=re.MULTILINE) html = re.sub(r'^## (.+)$', r'

\1

', html, flags=re.MULTILINE) # Bold/italic html = re.sub(r'\*\*([^*]+)\*\*', r'\1', html) html = re.sub(r'\*([^*]+)\*', r'\1', html) # Links html = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'\1', html) # Code html = re.sub(r'`([^`]+)`', r'\1', html) # Lists html = re.sub(r'^- (.+)$', r'
  • \1
  • ', html, flags=re.MULTILINE) html = re.sub(r'(
  • .*
  • )', r'', html, flags=re.DOTALL) # Paragraphs html = re.sub(r'\n\n', '

    ', html) html = f'

    {html}

    ' return html