feat: implement comprehensive GraphQL read interface (issue #9)

Adds a complete GraphQL API for querying MarkiTect database content including:

CORE FEATURES:
- Type-safe GraphQL schema with comprehensive field definitions
- Full database access: markdown files, schemas, ASTs, and metadata
- Advanced search capabilities with relevance scoring
- Pagination support for efficient data access
- Real-time schema introspection and development tools

IMPLEMENTATION:
- GraphQL schema definition with 6 core types (MarkdownFile, Schema, AST, etc.)
- Complete resolver implementation with database integration
- Flask-based GraphQL server with CORS support
- GraphQL Playground for interactive development
- Health check and schema introspection endpoints

CLI INTEGRATION:
- graphql-serve: Start GraphQL server with customizable options
- graphql-query: Execute queries from command line (local/remote)
- graphql-schema: Retrieve schema definition in SDL/JSON format
- graphql-examples: Comprehensive usage examples and documentation

API FEATURES:
- Single item queries (by ID or filename)
- List queries with filtering and pagination
- Full-text search across files and schemas
- Database statistics and analytics
- AST querying with JSONPath expressions
- Computed fields (word count, line count, etc.)

TESTING:
- Comprehensive test suite with 38 passing tests
- Unit tests for schema, resolvers, server, and client
- Integration tests for query execution
- Error handling and edge case coverage
- Mock and fixture support for isolated testing

DOCUMENTATION:
- Complete API documentation with examples
- Usage guide for all CLI commands
- Programming examples in Python and JavaScript
- Performance optimization guidelines
- Troubleshooting and security considerations

The GraphQL interface enables developers to build rich applications on top of
MarkiTect data with flexible, efficient querying capabilities.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-03 11:53:53 +02:00
parent c4a1b3cc0c
commit 2dd1704e51
7 changed files with 2626 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
"""
GraphQL interface for MarkiTect - Issue #9
This package provides a GraphQL read interface for querying MarkiTect's
database content including Markdown files, ASTs, and schemas.
"""
from .schema import schema
from .server import GraphQLServer
from .resolvers import Query
__all__ = ['schema', 'GraphQLServer', 'Query']

View File

@@ -0,0 +1,449 @@
"""
GraphQL resolvers for MarkiTect data.
Implements the resolver functions that fetch data from MarkiTect's
database and services to fulfill GraphQL queries.
"""
import json
import sqlite3
import os
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any, Union
from jsonpath_ng import parse as jsonpath_parse
from ..database import DatabaseManager
from ..ast_service import ASTService
from .schema import (
MarkdownFile, Schema, AST, ASTNode, DatabaseStats,
SearchResult, Query as QueryType
)
class MarkiTectResolver:
"""Base resolver class with common database operations."""
def __init__(self, db_path: str):
"""Initialize resolver with database path."""
self.db_path = db_path
self.db_manager = DatabaseManager(db_path)
self.ast_service = ASTService()
def get_connection(self):
"""Get database connection."""
return sqlite3.connect(self.db_path)
def row_to_dict(self, cursor, row):
"""Convert database row to dictionary."""
return dict(zip([col[0] for col in cursor.description], row))
class Query(QueryType):
"""GraphQL query resolver implementation."""
def __init__(self):
"""Initialize query resolver."""
# Default database path - could be made configurable
self.resolver = MarkiTectResolver(get_default_database_path())
def resolve_markdown_file(self, info, id=None, filename=None):
"""Resolve single markdown file query."""
conn = self.resolver.get_connection()
cursor = conn.cursor()
if id:
cursor.execute(
"SELECT * FROM markdown_files WHERE id = ?",
(id,)
)
elif filename:
cursor.execute(
"SELECT * FROM markdown_files WHERE filename = ?",
(filename,)
)
else:
return None
row = cursor.fetchone()
conn.close()
if row:
data = self.resolver.row_to_dict(cursor, row)
# Parse front matter JSON
if data['front_matter']:
try:
data['front_matter_raw'] = json.loads(data['front_matter'])
except json.JSONDecodeError:
data['front_matter_raw'] = {}
else:
data['front_matter_raw'] = {}
return MarkdownFile(**data)
return None
def resolve_schema(self, info, id=None, filename=None):
"""Resolve single schema query."""
conn = self.resolver.get_connection()
cursor = conn.cursor()
if id:
cursor.execute(
"SELECT * FROM schemas WHERE id = ?",
(id,)
)
elif filename:
cursor.execute(
"SELECT * FROM schemas WHERE filename = ?",
(filename,)
)
else:
return None
row = cursor.fetchone()
conn.close()
if row:
data = self.resolver.row_to_dict(cursor, row)
# Parse schema content JSON
if data['schema_content']:
try:
data['schema_content'] = json.loads(data['schema_content'])
except json.JSONDecodeError:
data['schema_content'] = {}
return Schema(**data)
return None
def resolve_ast(self, info, file_id=None, filename=None):
"""Resolve AST query."""
if not file_id and not filename:
return None
# Get file path
if file_id:
conn = self.resolver.get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT filename FROM markdown_files WHERE id = ?",
(file_id,)
)
row = cursor.fetchone()
conn.close()
if not row:
return None
filename = row[0]
if not filename:
return None
file_path = Path(filename)
try:
# Use AST service to get parsed AST
ast_result = self.resolver.ast_service.display_ast(file_path, "json")
if ast_result.get('success'):
ast_data = ast_result.get('ast', {})
# Convert to our GraphQL AST format
return AST(
file_id=file_id,
filename=filename,
tree=self._convert_ast_nodes(ast_data),
metadata=ast_result.get('metadata', {}),
heading_count=self._count_nodes_by_type(ast_data, 'heading'),
link_count=self._count_nodes_by_type(ast_data, 'link'),
image_count=self._count_nodes_by_type(ast_data, 'image'),
code_block_count=self._count_nodes_by_type(ast_data, 'code')
)
except Exception:
pass
return None
def resolve_markdown_files(self, info, limit=50, offset=0, has_front_matter=None, created_after=None):
"""Resolve markdown files list query."""
conn = self.resolver.get_connection()
cursor = conn.cursor()
# Build query with filters
query = "SELECT * FROM markdown_files WHERE 1=1"
params = []
if has_front_matter is not None:
if has_front_matter:
query += " AND front_matter IS NOT NULL AND front_matter != ''"
else:
query += " AND (front_matter IS NULL OR front_matter = '')"
if created_after:
query += " AND created_at > ?"
params.append(created_after.isoformat())
query += " ORDER BY created_at DESC LIMIT ? OFFSET ?"
params.extend([limit, offset])
cursor.execute(query, params)
rows = cursor.fetchall()
conn.close()
files = []
for row in rows:
data = self.resolver.row_to_dict(cursor, row)
# Parse front matter JSON
if data['front_matter']:
try:
data['front_matter_raw'] = json.loads(data['front_matter'])
except json.JSONDecodeError:
data['front_matter_raw'] = {}
else:
data['front_matter_raw'] = {}
files.append(MarkdownFile(**data))
return files
def resolve_schemas(self, info, limit=50, offset=0):
"""Resolve schemas list query."""
conn = self.resolver.get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT * FROM schemas ORDER BY created_at DESC LIMIT ? OFFSET ?",
(limit, offset)
)
rows = cursor.fetchall()
conn.close()
schemas = []
for row in rows:
data = self.resolver.row_to_dict(cursor, row)
# Parse schema content JSON
if data['schema_content']:
try:
data['schema_content'] = json.loads(data['schema_content'])
except json.JSONDecodeError:
data['schema_content'] = {}
schemas.append(Schema(**data))
return schemas
def resolve_search(self, info, query, type="all", limit=20):
"""Resolve search query."""
results = []
conn = self.resolver.get_connection()
cursor = conn.cursor()
# Search in markdown files
if type in ["all", "file"]:
cursor.execute("""
SELECT *, 'file' as result_type FROM markdown_files
WHERE filename LIKE ? OR content LIKE ?
ORDER BY
CASE WHEN filename LIKE ? THEN 1 ELSE 2 END,
created_at DESC
LIMIT ?
""", (f"%{query}%", f"%{query}%", f"%{query}%", limit))
for row in cursor.fetchall():
data = self.resolver.row_to_dict(cursor, row)
if data['front_matter']:
try:
data['front_matter_raw'] = json.loads(data['front_matter'])
except json.JSONDecodeError:
data['front_matter_raw'] = {}
else:
data['front_matter_raw'] = {}
# Remove extra fields that don't belong to MarkdownFile
file_data = {k: v for k, v in data.items() if k != 'result_type'}
# Calculate basic relevance score
score = 1.0
if query.lower() in data['filename'].lower():
score += 0.5
if data['content'] and query.lower() in data['content'].lower():
score += 0.3
results.append(SearchResult(
type="file",
score=score,
file=MarkdownFile(**file_data),
highlight=self._extract_highlight(data.get('content', ''), query)
))
# Search in schemas
if type in ["all", "schema"]:
cursor.execute("""
SELECT *, 'schema' as result_type FROM schemas
WHERE filename LIKE ? OR title LIKE ? OR description LIKE ?
ORDER BY created_at DESC
LIMIT ?
""", (f"%{query}%", f"%{query}%", f"%{query}%", limit))
for row in cursor.fetchall():
data = self.resolver.row_to_dict(cursor, row)
if data['schema_content']:
try:
data['schema_content'] = json.loads(data['schema_content'])
except json.JSONDecodeError:
data['schema_content'] = {}
# Remove extra fields that don't belong to Schema
schema_data = {k: v for k, v in data.items() if k != 'result_type'}
# Calculate basic relevance score
score = 1.0
if query.lower() in data.get('title', '').lower():
score += 0.5
results.append(SearchResult(
type="schema",
score=score,
schema=Schema(**schema_data),
highlight=data.get('title', '') or data.get('filename', '')
))
conn.close()
# Sort by score and limit
results.sort(key=lambda x: x.score, reverse=True)
return results[:limit]
def resolve_database_stats(self, info):
"""Resolve database statistics."""
conn = self.resolver.get_connection()
cursor = conn.cursor()
# Count files
cursor.execute("SELECT COUNT(*) FROM markdown_files")
total_files = cursor.fetchone()[0]
# Count schemas
cursor.execute("SELECT COUNT(*) FROM schemas")
total_schemas = cursor.fetchone()[0]
# Get database size
db_size = 0
if os.path.exists(self.resolver.db_path):
db_size = os.path.getsize(self.resolver.db_path)
# Get last update time
cursor.execute("""
SELECT MAX(created_at) FROM (
SELECT created_at FROM markdown_files
UNION ALL
SELECT created_at FROM schemas
)
""")
last_updated_str = cursor.fetchone()[0]
last_updated = None
if last_updated_str:
try:
last_updated = datetime.fromisoformat(last_updated_str)
except ValueError:
pass
conn.close()
return DatabaseStats(
total_files=total_files,
total_schemas=total_schemas,
total_size_bytes=db_size,
last_updated=last_updated
)
def resolve_ast_query(self, info, jsonpath, file_id=None, filename=None):
"""Resolve JSONPath query on AST."""
if not file_id and not filename:
return []
# Get AST data
ast = self.resolve_ast(info, file_id=file_id, filename=filename)
if not ast or not ast.metadata:
return []
try:
# Parse JSONPath expression
jsonpath_expr = jsonpath_parse(jsonpath)
# Apply to AST metadata (contains the raw AST)
matches = jsonpath_expr.find(ast.metadata)
# Return the matched values
return [match.value for match in matches]
except Exception:
return []
def _convert_ast_nodes(self, ast_data):
"""Convert AST data to GraphQL ASTNode format."""
if not ast_data or not isinstance(ast_data, dict):
return []
nodes = []
if 'children' in ast_data:
for child in ast_data['children']:
node = ASTNode(
type=child.get('type', 'unknown'),
value=child.get('value'),
level=child.get('depth'),
attrs=child,
children=self._convert_ast_nodes(child) if 'children' in child else []
)
nodes.append(node)
return nodes
def _count_nodes_by_type(self, ast_data, node_type):
"""Count nodes of specific type in AST."""
if not ast_data or not isinstance(ast_data, dict):
return 0
count = 0
if ast_data.get('type') == node_type:
count += 1
if 'children' in ast_data:
for child in ast_data['children']:
count += self._count_nodes_by_type(child, node_type)
return count
def _extract_highlight(self, content, query, context_length=100):
"""Extract highlighted snippet from content."""
if not content or not query:
return ""
query_lower = query.lower()
content_lower = content.lower()
index = content_lower.find(query_lower)
if index == -1:
return content[:context_length] + "..." if len(content) > context_length else content
start = max(0, index - context_length // 2)
end = min(len(content), index + len(query) + context_length // 2)
snippet = content[start:end]
if start > 0:
snippet = "..." + snippet
if end < len(content):
snippet = snippet + "..."
return snippet
def get_default_database_path():
"""Get default database path for GraphQL resolvers."""
import os
from pathlib import Path
# Use the same logic as CLI
if 'MARKITECT_DB' in os.environ:
return os.environ['MARKITECT_DB']
config_dir = Path.home() / '.markitect'
config_dir.mkdir(exist_ok=True)
return str(config_dir / 'markitect.db')

196
markitect/graphql/schema.py Normal file
View File

@@ -0,0 +1,196 @@
"""
GraphQL schema definition for MarkiTect data.
Defines the complete GraphQL schema for querying Markdown files,
ASTs, schemas, and related metadata.
"""
import graphene
from graphene import ObjectType, String, Int, DateTime, List, Field, JSONString
from typing import Optional
class FrontMatter(ObjectType):
"""GraphQL type for front matter data."""
key = String(required=True, description="Front matter key")
value = JSONString(description="Front matter value (can be any JSON type)")
class MarkdownFile(ObjectType):
"""GraphQL type for markdown files stored in MarkiTect."""
id = Int(required=True, description="Unique identifier")
filename = String(required=True, description="File path/name")
content = String(description="Markdown content")
front_matter = List(FrontMatter, description="Parsed front matter data")
front_matter_raw = JSONString(description="Raw front matter as JSON")
created_at = DateTime(description="Creation timestamp")
# Computed fields
word_count = Int(description="Number of words in content")
line_count = Int(description="Number of lines in content")
has_front_matter = graphene.Boolean(description="Whether file has front matter")
def resolve_front_matter(self, info):
"""Resolve front matter as key-value pairs."""
if self.front_matter_raw:
return [
FrontMatter(key=k, value=v)
for k, v in self.front_matter_raw.items()
]
return []
def resolve_word_count(self, info):
"""Calculate word count."""
if self.content:
return len(self.content.split())
return 0
def resolve_line_count(self, info):
"""Calculate line count."""
if self.content:
return len(self.content.splitlines())
return 0
def resolve_has_front_matter(self, info):
"""Check if file has front matter."""
return bool(self.front_matter_raw)
class Schema(ObjectType):
"""GraphQL type for JSON schemas."""
id = Int(required=True, description="Unique identifier")
filename = String(required=True, description="Schema filename")
title = String(description="Schema title")
description = String(description="Schema description")
schema_content = JSONString(required=True, description="JSON schema content")
created_at = DateTime(description="Creation timestamp")
updated_at = DateTime(description="Last update timestamp")
# Computed fields
schema_version = String(description="JSON Schema version")
property_count = Int(description="Number of properties in schema")
def resolve_schema_version(self, info):
"""Extract schema version."""
if self.schema_content and isinstance(self.schema_content, dict):
return self.schema_content.get('$schema', 'Unknown')
return 'Unknown'
def resolve_property_count(self, info):
"""Count properties in schema."""
if (self.schema_content and
isinstance(self.schema_content, dict) and
'properties' in self.schema_content):
return len(self.schema_content['properties'])
return 0
class ASTNode(ObjectType):
"""GraphQL type for AST nodes."""
type = String(required=True, description="Node type")
value = String(description="Node value/content")
level = Int(description="Heading level (for heading nodes)")
children = List(lambda: ASTNode, description="Child nodes")
attrs = JSONString(description="Node attributes")
class AST(ObjectType):
"""GraphQL type for parsed AST."""
file_id = Int(description="Associated file ID")
filename = String(required=True, description="Source filename")
tree = List(ASTNode, description="AST tree structure")
metadata = JSONString(description="AST metadata")
# Statistics
heading_count = Int(description="Number of headings")
link_count = Int(description="Number of links")
image_count = Int(description="Number of images")
code_block_count = Int(description="Number of code blocks")
class DatabaseStats(ObjectType):
"""Database statistics."""
total_files = Int(description="Total number of markdown files")
total_schemas = Int(description="Total number of schemas")
total_size_bytes = Int(description="Total database size in bytes")
last_updated = DateTime(description="Last database update")
class SearchResult(ObjectType):
"""Search result union type."""
type = String(required=True, description="Result type (file, schema)")
score = graphene.Float(description="Search relevance score")
file = Field(MarkdownFile, description="Matched file (if type=file)")
schema = Field(Schema, description="Matched schema (if type=schema)")
highlight = String(description="Highlighted match text")
class Query(ObjectType):
"""Root GraphQL query type."""
# Single item queries
markdown_file = Field(
MarkdownFile,
id=Int(description="File ID"),
filename=String(description="File path"),
description="Get a specific markdown file"
)
schema = Field(
Schema,
id=Int(description="Schema ID"),
filename=String(description="Schema filename"),
description="Get a specific schema"
)
ast = Field(
AST,
file_id=Int(description="File ID"),
filename=String(description="File path"),
description="Get AST for a specific file"
)
# List queries
markdown_files = List(
MarkdownFile,
limit=Int(default_value=50, description="Maximum number of results"),
offset=Int(default_value=0, description="Offset for pagination"),
has_front_matter=graphene.Boolean(description="Filter by front matter presence"),
created_after=DateTime(description="Filter by creation date"),
description="List markdown files with optional filtering"
)
schemas = List(
Schema,
limit=Int(default_value=50, description="Maximum number of results"),
offset=Int(default_value=0, description="Offset for pagination"),
description="List all schemas"
)
# Search
search = List(
SearchResult,
query=String(required=True, description="Search query"),
type=String(description="Search type filter (file, schema, all)"),
limit=Int(default_value=20, description="Maximum number of results"),
description="Search across files and schemas"
)
# Statistics
database_stats = Field(
DatabaseStats,
description="Get database statistics"
)
# JSONPath queries for ASTs
ast_query = List(
JSONString,
file_id=Int(),
filename=String(),
jsonpath=String(required=True, description="JSONPath expression"),
description="Query AST using JSONPath expressions"
)
# Create the schema
schema = graphene.Schema(query=Query)

255
markitect/graphql/server.py Normal file
View File

@@ -0,0 +1,255 @@
"""
GraphQL server implementation for MarkiTect.
Provides a standalone GraphQL server and integration components
for serving the MarkiTect GraphQL API.
"""
import json
from typing import Optional, Dict, Any
from pathlib import Path
try:
from flask import Flask, request, jsonify
from flask_cors import CORS
FLASK_AVAILABLE = True
except ImportError:
FLASK_AVAILABLE = False
from .schema import schema
from .resolvers import Query
class GraphQLServer:
"""GraphQL server for MarkiTect API."""
def __init__(self, db_path: Optional[str] = None, enable_cors: bool = True):
"""
Initialize GraphQL server.
Args:
db_path: Path to MarkiTect database
enable_cors: Enable CORS for web browser access
"""
self.db_path = db_path or self._get_default_db_path()
self.enable_cors = enable_cors
self.app = None
if not FLASK_AVAILABLE:
raise ImportError(
"Flask is required for GraphQL server. Install with: pip install flask flask-cors"
)
def _get_default_db_path(self) -> str:
"""Get default database path."""
from .resolvers import get_default_database_path
return get_default_database_path()
def create_app(self) -> Flask:
"""Create Flask application with GraphQL endpoint."""
app = Flask(__name__)
if self.enable_cors:
CORS(app)
@app.route('/graphql', methods=['POST'])
def graphql_endpoint():
"""Handle GraphQL requests."""
try:
# Parse request data
data = request.get_json()
if not data:
return jsonify({'error': 'No JSON data provided'}), 400
query = data.get('query')
variables = data.get('variables', {})
operation_name = data.get('operationName')
if not query:
return jsonify({'error': 'No query provided'}), 400
# Execute GraphQL query
result = schema.execute(
query,
variables=variables,
operation_name=operation_name,
context={'db_path': self.db_path}
)
# Format response
response_data = {'data': result.data}
if result.errors:
response_data['errors'] = [
{'message': str(error)} for error in result.errors
]
return jsonify(response_data)
except Exception as e:
return jsonify({
'errors': [{'message': f'Server error: {str(e)}'}]
}), 500
@app.route('/graphql', methods=['GET'])
def graphql_playground():
"""Serve GraphQL playground for development."""
return '''
<!DOCTYPE html>
<html>
<head>
<title>MarkiTect GraphQL Playground</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/graphql-playground-react/build/static/css/index.css" />
</head>
<body>
<div id="root">
<style>
body { margin: 0; font-family: Open Sans, sans-serif; overflow: hidden; }
#root { height: 100vh; }
</style>
</div>
<script src="https://cdn.jsdelivr.net/npm/graphql-playground-react/build/static/js/middleware.js"></script>
<script>
window.addEventListener('load', function (event) {
GraphQLPlayground.init(document.getElementById('root'), {
endpoint: '/graphql',
settings: {
'general.betaUpdates': false,
'editor.theme': 'dark',
'editor.reuseHeaders': true,
'tracing.hideTracingResponse': true,
}
})
})
</script>
</body>
</html>
'''
@app.route('/schema', methods=['GET'])
def get_schema():
"""Get GraphQL schema definition."""
try:
from graphql.utilities import print_schema
schema_sdl = print_schema(schema.graphql_schema)
except (AttributeError, ImportError):
# Fallback to simple introspection
schema_sdl = str(schema)
return jsonify({
'schema': schema_sdl
})
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
try:
# Test database connection
import sqlite3
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("SELECT 1")
conn.close()
return jsonify({
'status': 'healthy',
'database': 'connected',
'database_path': self.db_path
})
except Exception as e:
return jsonify({
'status': 'unhealthy',
'database': 'error',
'error': str(e)
}), 500
self.app = app
return app
def run(self, host: str = '127.0.0.1', port: int = 5000, debug: bool = False):
"""
Run the GraphQL server.
Args:
host: Host to bind to
port: Port to bind to
debug: Enable debug mode
"""
if not self.app:
self.create_app()
print(f"🚀 MarkiTect GraphQL Server starting...")
print(f"🔗 GraphQL endpoint: http://{host}:{port}/graphql")
print(f"🎮 GraphQL playground: http://{host}:{port}/graphql")
print(f"📊 Schema introspection: http://{host}:{port}/schema")
print(f"❤️ Health check: http://{host}:{port}/health")
self.app.run(host=host, port=port, debug=debug)
class GraphQLClient:
"""Simple GraphQL client for testing and CLI integration."""
def __init__(self, endpoint: str = "http://localhost:5000/graphql"):
"""
Initialize GraphQL client.
Args:
endpoint: GraphQL endpoint URL
"""
self.endpoint = endpoint
def execute(self, query: str, variables: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Execute GraphQL query.
Args:
query: GraphQL query string
variables: Query variables
Returns:
Query result dictionary
"""
try:
import requests
payload = {
'query': query,
'variables': variables or {}
}
response = requests.post(
self.endpoint,
json=payload,
headers={'Content-Type': 'application/json'}
)
return response.json()
except ImportError:
raise ImportError("requests is required for GraphQL client. Install with: pip install requests")
def execute_local(self, query: str, variables: Optional[Dict[str, Any]] = None, context: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Execute GraphQL query directly (without HTTP).
Args:
query: GraphQL query string
variables: Query variables
context: GraphQL context
Returns:
Query result dictionary
"""
result = schema.execute(
query,
variables=variables or {},
context=context or {}
)
response_data = {'data': result.data}
if result.errors:
response_data['errors'] = [
{'message': str(error)} for error in result.errors
]
return response_data