structured logging around key workflows and docs for operational readiness

This commit is contained in:
2026-04-26 17:02:24 +02:00
parent 2902e362df
commit 99bb851ca8
8 changed files with 288 additions and 5 deletions

View File

@@ -0,0 +1,15 @@
from __future__ import annotations
import json
import logging
from typing import Any
LOGGER_NAME = "repo_registry.operations"
def log_operation(event: str, **fields: Any) -> None:
payload = {"event": event, **fields}
logging.getLogger(LOGGER_NAME).info(
json.dumps(payload, sort_keys=True, default=str)
)

View File

@@ -25,6 +25,7 @@ from repo_registry.core.models import (
)
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
from repo_registry.content_indexing.extractor import ContentExtractor
from repo_registry.core.logging import log_operation
from repo_registry.llm_extraction.extractor import LLMCandidateExtractor
from repo_registry.llm_extraction.mapper import LLMExtractionMapper
from repo_registry.repo_ingestion.git import GitIngestionService
@@ -67,13 +68,21 @@ class RegistryService:
metadata = self.metadata_extractor.extract(checkout.source_path, url)
else:
metadata = None
return self.store.create_repository(
repository = self.store.create_repository(
name=name or (metadata.name if metadata is not None else "repository"),
url=url,
description=description
or (metadata.description if metadata is not None else None),
branch=branch,
)
log_operation(
"repository_registered",
repository_id=repository.id,
repository_name=repository.name,
branch=repository.branch,
metadata_imported=metadata is not None,
)
return repository
def list_repositories(self) -> list[Repository]:
return self.store.list_repositories()
@@ -108,6 +117,12 @@ class RegistryService:
repository = self.store.get_repository(repository_id)
run = self.store.create_analysis_run(repository_id)
self.store.update_repository_status(repository_id, "analyzing")
log_operation(
"analysis_started",
repository_id=repository_id,
analysis_run_id=run.id,
source_override=source_path is not None,
)
try:
if source_path is None:
checkout = self.ingestion.resolve(repository.url, branch=repository.branch)
@@ -117,6 +132,12 @@ class RegistryService:
scan_result = self.scanner.scan(scan_source)
except Exception as exc:
failed_run = self.store.fail_analysis_run(repository_id, run.id, str(exc))
log_operation(
"analysis_failed",
repository_id=repository_id,
analysis_run_id=run.id,
error=str(exc),
)
return ScanSummary(analysis_run=failed_run, snapshot=None, facts=[])
completed_run = self.store.complete_analysis_run(
@@ -145,6 +166,12 @@ class RegistryService:
stored_chunks,
)
except Exception as exc:
log_operation(
"llm_extraction_failed",
repository_id=repository_id,
analysis_run_id=completed_run.id,
error=str(exc),
)
self.store.create_review_decision(
repository_id,
completed_run.id,
@@ -159,12 +186,27 @@ class RegistryService:
candidate_source = "deterministic"
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
if candidate_source == "llm":
log_operation(
"llm_extraction_used",
repository_id=repository_id,
analysis_run_id=completed_run.id,
candidate_count=len(candidates),
)
self.store.create_review_decision(
repository_id,
completed_run.id,
action="llm_extraction_used",
notes=f"Generated {len(candidates)} candidate ability draft(s).",
)
log_operation(
"analysis_completed",
repository_id=repository_id,
analysis_run_id=completed_run.id,
fact_count=len(facts),
content_chunk_count=len(stored_chunks),
candidate_count=len(candidates),
candidate_source=candidate_source,
)
return ScanSummary(
analysis_run=completed_run,
snapshot=snapshot,

View File

@@ -27,6 +27,7 @@ from repo_registry.core.models import (
SourceReference,
confidence_label,
)
from repo_registry.core.logging import log_operation
from repo_registry.content_indexing.extractor import ContentChunkCandidate
from repo_registry.candidate_graph.generator import CandidateAbilityDraft
from repo_registry.repo_scanning.scanner import FactCandidate, ScanResult
@@ -1005,7 +1006,15 @@ class RegistryStore:
""",
(repository_id, analysis_run_id, action, notes),
)
return int(cursor.lastrowid)
decision_id = int(cursor.lastrowid)
log_operation(
"review_decision_recorded",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
review_decision_id=decision_id,
action=action,
)
return decision_id
def list_review_decisions(
self,

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import logging
from dataclasses import asdict
from pathlib import Path
@@ -62,6 +63,7 @@ class Settings(BaseSettings):
llm_provider: str | None = Field(default=None)
llm_model: str | None = Field(default=None)
embedding_provider: str | None = Field(default=None)
log_level: str = Field(default="INFO")
def get_settings() -> Settings:
@@ -69,6 +71,9 @@ def get_settings() -> Settings:
def get_service(settings: Settings = Depends(get_settings)) -> RegistryService:
logging.getLogger("repo_registry.operations").setLevel(
getattr(logging, settings.log_level.upper(), logging.INFO)
)
database_path = Path(settings.database_path)
database_path.parent.mkdir(parents=True, exist_ok=True)
store = RegistryStore(database_path)
@@ -120,8 +125,33 @@ app.include_router(ui_router)
@app.get("/health", tags=["health"])
def health() -> dict[str, str]:
return {"status": "ok"}
def health(settings: Settings = Depends(get_settings)) -> dict[str, object]:
database_path = Path(settings.database_path)
checkout_root = Path(settings.checkout_root)
database_reachable = False
database_error = None
try:
database_path.parent.mkdir(parents=True, exist_ok=True)
store = RegistryStore(database_path)
store.initialize()
with store.connect() as connection:
connection.execute("SELECT 1").fetchone()
database_reachable = True
except Exception as exc:
database_error = str(exc)
return {
"status": "ok" if database_reachable else "degraded",
"database": {
"path": str(database_path),
"reachable": database_reachable,
"error": database_error,
},
"checkout_root": {
"path": str(checkout_root),
"exists": checkout_root.exists(),
},
}
@app.post(