""" Tailmatter parser for extracting and manipulating YAML/JSON tailmatter blocks. """ import re import yaml import json from typing import Dict, Any, List, Optional from .stats import TailmatterStats class TailmatterParser: """Parser for tailmatter in MarkdownMatters documents.""" def extract_tailmatter(self, text: str) -> Dict[str, Any]: """ Extract tailmatter from markdown text. Args: text: Full markdown document text Returns: Dictionary containing tailmatter data """ tailmatter_content = self._extract_tailmatter_content(text) if not tailmatter_content: return {} # Detect format and parse if tailmatter_content.strip().startswith('```yaml tailmatter'): return self._parse_yaml_tailmatter(tailmatter_content) elif tailmatter_content.strip().startswith('```json tailmatter'): return self._parse_json_tailmatter(tailmatter_content) return {} def get_tailmatter_value(self, text: str, key: str) -> Any: """ Get specific tailmatter value by key. Args: text: Full markdown document text key: Key with dot notation support Returns: Value or None if not found """ tailmatter = self.extract_tailmatter(text) return self._get_nested_value(tailmatter, key) def set_tailmatter_value(self, text: str, key: str, value: Any) -> str: """ Set a tailmatter value in the document. Args: text: Full markdown document text key: Key to set (supports dot notation) value: Value to set Returns: Updated document text """ tailmatter = self.extract_tailmatter(text) self._set_nested_value(tailmatter, key, value) return self._update_tailmatter_in_text(text, tailmatter) def get_tailmatter_keys(self, text: str) -> List[str]: """ Get list of tailmatter keys. Args: text: Full markdown document text Returns: List of tailmatter keys """ tailmatter = self.extract_tailmatter(text) return self._get_all_keys_recursive(tailmatter) def calculate_tailmatter_stats(self, text: str) -> TailmatterStats: """ Calculate statistics for tailmatter. Args: text: Full markdown document text Returns: TailmatterStats object """ tailmatter = self.extract_tailmatter(text) if not tailmatter: return TailmatterStats( has_tailmatter=False, format=None, total_fields=0, qa_items=0, qa_completed=0, editorial_status=None, has_agent_config=False ) # Analyze tailmatter structure format_type = self._detect_tailmatter_format(text) total_fields = len(tailmatter) # Analyze QA checklist qa_items, qa_completed = self._analyze_qa_checklist(tailmatter) # Get editorial status editorial_status = self._get_editorial_status(tailmatter) # Check for agent config has_agent_config = "agent_config" in tailmatter return TailmatterStats( has_tailmatter=True, format=format_type, total_fields=total_fields, qa_items=qa_items, qa_completed=qa_completed, editorial_status=editorial_status, has_agent_config=has_agent_config ) def _extract_tailmatter_content(self, text: str) -> Optional[str]: """Extract the raw tailmatter content.""" # Look for tailmatter pattern at end of document pattern = r'\n---\s*\n\s*(```(?:yaml|json)\s+tailmatter\s*\n.*?```)\s*$' match = re.search(pattern, text, flags=re.DOTALL | re.MULTILINE) if match: return match.group(1) # Also check without preceding --- pattern = r'\n\s*(```(?:yaml|json)\s+tailmatter\s*\n.*?```)\s*$' match = re.search(pattern, text, flags=re.DOTALL | re.MULTILINE) if match: return match.group(1) return None def _parse_yaml_tailmatter(self, content: str) -> Dict[str, Any]: """Parse YAML tailmatter content.""" # Extract YAML content between delimiters match = re.search(r'```yaml\s+tailmatter\s*\n(.*?)\n```', content, flags=re.DOTALL) if not match: return {} yaml_content = match.group(1) try: return yaml.safe_load(yaml_content) or {} except yaml.YAMLError: return {} def _parse_json_tailmatter(self, content: str) -> Dict[str, Any]: """Parse JSON tailmatter content.""" # Extract JSON content between delimiters match = re.search(r'```json\s+tailmatter\s*\n(.*?)\n```', content, flags=re.DOTALL) if not match: return {} json_content = match.group(1) try: return json.loads(json_content) except json.JSONDecodeError: return {} def _detect_tailmatter_format(self, text: str) -> Optional[str]: """Detect the format of tailmatter.""" content = self._extract_tailmatter_content(text) if not content: return None if 'yaml tailmatter' in content: return "yaml" elif 'json tailmatter' in content: return "json" return None def _get_nested_value(self, data: Dict[str, Any], key: str) -> Any: """Get nested value using dot notation.""" keys = key.split('.') current = data for k in keys: if isinstance(current, dict) and k in current: current = current[k] else: return None return current def _set_nested_value(self, data: Dict[str, Any], key: str, value: Any) -> None: """Set nested value using dot notation.""" keys = key.split('.') current = data for k in keys[:-1]: if k not in current: current[k] = {} current = current[k] current[keys[-1]] = value def _get_all_keys_recursive(self, data: Dict[str, Any], prefix: str = "") -> List[str]: """Get all keys recursively with dot notation.""" keys = [] for key, value in data.items(): full_key = f"{prefix}.{key}" if prefix else key keys.append(full_key) if isinstance(value, dict): keys.extend(self._get_all_keys_recursive(value, full_key)) return keys def _analyze_qa_checklist(self, tailmatter: Dict[str, Any]) -> tuple: """Analyze QA checklist items.""" qa_checklist = tailmatter.get("qa_checklist", []) if not isinstance(qa_checklist, list): return 0, 0 total_items = len(qa_checklist) completed_items = sum(1 for item in qa_checklist if isinstance(item, dict) and item.get("complete", False)) return total_items, completed_items def _get_editorial_status(self, tailmatter: Dict[str, Any]) -> Optional[str]: """Get editorial status.""" editorial = tailmatter.get("editorial", {}) if isinstance(editorial, dict): return editorial.get("status") return None def _update_tailmatter_in_text(self, text: str, tailmatter: Dict[str, Any]) -> str: """Update tailmatter block in text.""" # Convert tailmatter to YAML tailmatter_yaml = yaml.dump(tailmatter, default_flow_style=False) # Check if text already has tailmatter pattern = r'\n---\s*\n\s*```(?:yaml|json)\s+tailmatter\s*\n.*?```\s*$' if re.search(pattern, text, flags=re.DOTALL | re.MULTILINE): # Replace existing tailmatter new_tailmatter = f"\n---\n\n```yaml tailmatter\n{tailmatter_yaml}```" return re.sub(pattern, new_tailmatter, text, flags=re.DOTALL | re.MULTILINE) else: # Add tailmatter to end new_tailmatter = f"\n\n---\n\n```yaml tailmatter\n{tailmatter_yaml}```" return text + new_tailmatter