""" Markdown document builders and sample generators for testing. """ from typing import Dict, List, Optional import random import string class MarkdownDocumentBuilder: """Builder pattern for creating test markdown documents.""" def __init__(self): self.content_parts: List[str] = [] self.metadata: Dict[str, str] = {} def with_heading(self, text: str, level: int = 1) -> "MarkdownDocumentBuilder": """Add a heading to the document.""" if level < 1 or level > 6: raise ValueError("Heading level must be between 1 and 6") heading_marker = "#" * level self.content_parts.append(f"{heading_marker} {text}") return self def with_paragraph(self, text: str) -> "MarkdownDocumentBuilder": """Add a paragraph to the document.""" self.content_parts.append(text) return self def with_list(self, items: List[str], ordered: bool = False) -> "MarkdownDocumentBuilder": """Add a list to the document.""" if ordered: list_items = [f"{i+1}. {item}" for i, item in enumerate(items)] else: list_items = [f"- {item}" for item in items] self.content_parts.append("\n".join(list_items)) return self def with_code_block(self, code: str, language: str = "python") -> "MarkdownDocumentBuilder": """Add a code block to the document.""" self.content_parts.append(f"```{language}\n{code}\n```") return self def with_link(self, text: str, url: str) -> "MarkdownDocumentBuilder": """Add a link to the document.""" self.content_parts.append(f"[{text}]({url})") return self def with_metadata(self, key: str, value: str) -> "MarkdownDocumentBuilder": """Add metadata (front matter) to the document.""" self.metadata[key] = value return self def with_table(self, headers: List[str], rows: List[List[str]]) -> "MarkdownDocumentBuilder": """Add a table to the document.""" table_lines = [] # Header row table_lines.append("| " + " | ".join(headers) + " |") # Separator row table_lines.append("| " + " | ".join(["-" * len(header) for header in headers]) + " |") # Data rows for row in rows: table_lines.append("| " + " | ".join(row) + " |") self.content_parts.append("\n".join(table_lines)) return self def with_blockquote(self, text: str) -> "MarkdownDocumentBuilder": """Add a blockquote to the document.""" quote_lines = [f"> {line}" for line in text.split("\n")] self.content_parts.append("\n".join(quote_lines)) return self def build(self) -> str: """Build the final markdown document.""" content = "\n\n".join(self.content_parts) if self.metadata: metadata_lines = [f"{k}: {v}" for k, v in self.metadata.items()] content = "---\n" + "\n".join(metadata_lines) + "\n---\n\n" + content return content class LargeMarkdownGenerator: """Generator for creating large markdown documents for performance testing.""" def __init__(self, seed: Optional[int] = None): self.random = random.Random(seed) def generate_document(self, size: str = "1mb") -> str: """Generate a large markdown document of specified size.""" size_bytes = self._parse_size(size) builder = MarkdownDocumentBuilder() # Add metadata builder.with_metadata("title", "Large Test Document") builder.with_metadata("author", "Test Generator") builder.with_metadata("size", size) # Add content until we reach target size current_size = 0 section_count = 0 while current_size < size_bytes: section_count += 1 section_title = f"Section {section_count}" builder.with_heading(section_title, level=2) # Add paragraphs for _ in range(self.random.randint(3, 8)): paragraph = self._generate_paragraph() builder.with_paragraph(paragraph) current_size += len(paragraph) + 2 # +2 for newlines if current_size >= size_bytes: break # Add a list occasionally if self.random.random() < 0.3: items = [self._generate_sentence() for _ in range(self.random.randint(3, 7))] builder.with_list(items) current_size += sum(len(item) for item in items) + len(items) * 3 # Approximate # Add a code block occasionally if self.random.random() < 0.2: code = self._generate_code_block() builder.with_code_block(code) current_size += len(code) + 10 # +10 for code block markers return builder.build() def _parse_size(self, size: str) -> int: """Parse size string (e.g., '1mb', '500kb') to bytes.""" size = size.lower() if size.endswith("kb"): return int(size[:-2]) * 1024 elif size.endswith("mb"): return int(size[:-2]) * 1024 * 1024 elif size.endswith("gb"): return int(size[:-2]) * 1024 * 1024 * 1024 else: return int(size) def _generate_paragraph(self) -> str: """Generate a paragraph of random text.""" sentences = [] for _ in range(self.random.randint(3, 8)): sentences.append(self._generate_sentence()) return " ".join(sentences) def _generate_sentence(self) -> str: """Generate a random sentence.""" words = [] for _ in range(self.random.randint(5, 15)): words.append(self._generate_word()) sentence = " ".join(words).capitalize() return sentence + "." def _generate_word(self) -> str: """Generate a random word.""" length = self.random.randint(3, 12) return "".join(self.random.choices(string.ascii_lowercase, k=length)) def _generate_code_block(self) -> str: """Generate a random code block.""" lines = [] for _ in range(self.random.randint(5, 15)): line = self._generate_code_line() lines.append(line) return "\n".join(lines) def _generate_code_line(self) -> str: """Generate a line of code-like text.""" templates = [ "def {func_name}({params}):", " return {expression}", "if {condition}:", " {statement}", "# {comment}", "class {class_name}:", " self.{attr} = {value}", "import {module}", "from {module} import {name}", ] template = self.random.choice(templates) variables = { "func_name": self._generate_word(), "params": ", ".join([self._generate_word() for _ in range(self.random.randint(0, 3))]), "expression": f"{self._generate_word()}({self._generate_word()})", "condition": f"{self._generate_word()} == {self.random.randint(1, 100)}", "statement": f"{self._generate_word()} = {self.random.randint(1, 100)}", "comment": " ".join([self._generate_word() for _ in range(self.random.randint(2, 6))]), "class_name": self._generate_word().capitalize(), "attr": self._generate_word(), "value": str(self.random.randint(1, 100)), "module": self._generate_word(), "name": self._generate_word(), } return template.format(**variables) # Pre-built sample documents SAMPLE_SIMPLE_DOCUMENT = """# Simple Document This is a simple test document. ## Features - Feature 1 - Feature 2 - Feature 3 """ SAMPLE_COMPLEX_DOCUMENT = ( MarkdownDocumentBuilder() .with_metadata("title", "Complex Test Document") .with_metadata("author", "Test Suite") .with_metadata("tags", "test, complex, sample") .with_heading("Complex Test Document") .with_paragraph("This is a complex test document with various markdown features.") .with_heading("Table of Contents", level=2) .with_list([ "Introduction", "Features", "Examples", "Conclusion" ], ordered=True) .with_heading("Introduction", level=2) .with_paragraph("This document demonstrates various markdown features.") .with_blockquote("This is an important note about the document.") .with_heading("Features", level=2) .with_list([ "**Bold text**", "*Italic text*", "`Code inline`", "[Links](https://example.com)" ]) .with_heading("Code Example", level=3) .with_code_block('''def hello_world(): """Print hello world message.""" print("Hello, World!") return "success"''') .with_heading("Data Table", level=3) .with_table( ["Name", "Type", "Description"], [ ["title", "string", "Document title"], ["author", "string", "Document author"], ["tags", "array", "Document tags"] ] ) .with_heading("Conclusion", level=2) .with_paragraph("This document shows the power of markdown for documentation.") .build() ) SAMPLE_TECHNICAL_DOCUMENT = ( MarkdownDocumentBuilder() .with_metadata("title", "API Documentation") .with_metadata("version", "1.0.0") .with_metadata("category", "technical") .with_heading("API Documentation") .with_paragraph("This document describes the REST API endpoints.") .with_heading("Authentication", level=2) .with_paragraph("All API requests require authentication via API key.") .with_code_block('''curl -H "Authorization: Bearer YOUR_API_KEY" \\ https://api.example.com/v1/endpoint''', "bash") .with_heading("Endpoints", level=2) .with_heading("GET /users", level=3) .with_paragraph("Retrieve a list of users.") .with_table( ["Parameter", "Type", "Required", "Description"], [ ["limit", "integer", "No", "Maximum number of results"], ["offset", "integer", "No", "Number of results to skip"], ["filter", "string", "No", "Filter criteria"] ] ) .with_heading("Response", level=4) .with_code_block('''{ "users": [ { "id": 1, "name": "John Doe", "email": "john@example.com" } ], "total": 1, "offset": 0, "limit": 10 }''', "json") .build() )