Files
reuse-surface/reuse_surface/llm_bridge.py
tegwick 70a5003f6e
Some checks failed
ci / validate-registry (push) Has been cancelled
Implement REUSE-WP-0013 registry establish, update, and stats
Add stats, establish (scaffold, publish-check, discover), and update CLI
commands with optional llm-connect bridge, validate --root for sibling repos,
pytest coverage, and documentation for sibling registry onboarding.
2026-06-16 01:21:01 +02:00

102 lines
3.1 KiB
Python

from __future__ import annotations
import json
import os
import re
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any
from jsonschema import Draft202012Validator
from reuse_surface.registry import ROOT
DRAFT_SCHEMA_PATH = ROOT / "schemas" / "registry-draft.schema.json"
def llm_connect_url(explicit: str | None = None) -> str:
base = (explicit or os.environ.get("LLM_CONNECT_URL", "")).rstrip("/")
if not base:
raise ValueError(
"LLM backend not configured; set LLM_CONNECT_URL or pass --llm-url"
)
return base
def load_draft_schema() -> dict[str, Any]:
return json.loads(DRAFT_SCHEMA_PATH.read_text(encoding="utf-8"))
def execute_prompt(
prompt: str,
*,
base_url: str | None = None,
config: dict[str, Any] | None = None,
) -> str:
url = f"{llm_connect_url(base_url)}/execute"
body: dict[str, Any] = {"prompt": prompt}
if config:
body["config"] = config
data = json.dumps(body).encode("utf-8")
request = urllib.request.Request(
url,
data=data,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "reuse-surface/0.1",
},
method="POST",
)
try:
with urllib.request.urlopen(request, timeout=120) as response:
payload = json.loads(response.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
raw = exc.read().decode("utf-8")
raise ValueError(f"llm-connect returned {exc.code}: {raw}") from exc
content = payload.get("content")
if not isinstance(content, str) or not content.strip():
raise ValueError("llm-connect response missing content")
return content
def extract_json_object(text: str) -> dict[str, Any]:
stripped = text.strip()
if stripped.startswith("```"):
stripped = re.sub(r"^```(?:json)?\s*", "", stripped)
stripped = re.sub(r"\s*```$", "", stripped)
try:
data = json.loads(stripped)
except json.JSONDecodeError:
match = re.search(r"\{.*\}", stripped, re.DOTALL)
if not match:
raise ValueError("llm response did not contain JSON object") from None
data = json.loads(match.group(0))
if not isinstance(data, dict):
raise ValueError("llm response JSON must be an object")
return data
def request_registry_draft(
prompt: str,
*,
base_url: str | None = None,
config: dict[str, Any] | None = None,
) -> dict[str, Any]:
draft = extract_json_object(execute_prompt(prompt, base_url=base_url, config=config))
validator = Draft202012Validator(load_draft_schema())
errors = sorted(validator.iter_errors(draft), key=lambda err: list(err.path))
if errors:
messages = "; ".join(error.message for error in errors[:3])
raise ValueError(f"draft schema validation failed: {messages}")
return draft
def request_json_object(
prompt: str,
*,
base_url: str | None = None,
config: dict[str, Any] | None = None,
) -> dict[str, Any]:
return extract_json_object(execute_prompt(prompt, base_url=base_url, config=config))