feat(llm): add OpenAI adapter, entity archive policy, process chapters 5-7

Add OpenAIAdapter for the OpenAI chat completions API (apikey-chatgpt.txt
or OPENAI_API_KEY). Set default model to arcee-ai/trinity-large-preview:free
for the infospace pipeline and increase max_tokens from 4096 to 8192.

Reprocess chapter 05 with Trinity Large (was Gemini: 1 truncated entity,
now 19 complete entities). Process chapters 06 (Aurora Alpha, 10 entities)
and 07 (Trinity Large, 15 entities including regenerated violent-policy.md).
Canonical set now at 85 unique entities.

Add entity archive policy: entities are never silently deleted. Retired
entities move to output/entities/archive/ with a dated reason header.
New CLI option: --archive-entity <slug> --reason "...". The --list
output shows the archive count alongside the canonical set.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-11 23:39:44 +01:00
parent 880c1d1374
commit 41773f1320
68 changed files with 6500 additions and 136 deletions

View File

@@ -228,7 +228,7 @@ class ChapterProcessor:
# ── LLM Execution Helpers ─────────────────────────────────────────
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
"""Call the LLM and return the content string, or ``None`` on failure.
Retries up to 3 times on rate-limit (429) errors with exponential backoff.
@@ -273,7 +273,7 @@ class ChapterProcessor:
return content
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
"""Call the LLM, write the result to *output_file*, and return it."""
content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
if content:
@@ -296,6 +296,9 @@ class ChapterProcessor:
def _entities_dir(self) -> Path:
return self.example_dir / "output" / "entities"
def _archive_dir(self) -> Path:
return self._entities_dir() / "archive"
def _list_existing_entity_names(self) -> list[str]:
"""Return sorted slugs of all canonical entity files already on disk."""
return sorted(
@@ -305,6 +308,45 @@ class ChapterProcessor:
and not f.name.endswith("-prompt.md")
)
def archive_entity(self, slug: str, reason: str) -> None:
"""Move a canonical entity to the archive with a documented reason.
The entity file is prepended with an archive header explaining why
it was retired, then moved to ``output/entities/archive/<slug>.md``.
Chapter views that reference this entity are **not** updated
automatically — review and update them manually.
"""
src = self._entities_dir() / f"{slug}.md"
if not src.exists():
print(f" Entity not found: {slug}")
return
archive = self._archive_dir()
archive.mkdir(parents=True, exist_ok=True)
dest = archive / f"{slug}.md"
from datetime import date
header = (
f"<!-- ARCHIVED {date.today().isoformat()}\n"
f" Reason: {reason}\n"
f"-->\n\n"
)
content = src.read_text()
dest.write_text(header + content)
src.unlink()
# Report which chapter views still reference this entity
refs = []
for view in self._entities_dir().glob("*-entities.md"):
if f'include "{slug}.md"' in view.read_text():
refs.append(view.name)
print(f" Archived: {slug}.md -> archive/{slug}.md")
print(f" Reason: {reason}")
if refs:
print(f" Referenced by: {', '.join(refs)} (update these views)")
print(f" Canonical set: {len(self._list_existing_entity_names())} entities")
def _split_entities(
self, combined_content: str
) -> list[tuple[str, Path]]:
@@ -792,6 +834,11 @@ class ChapterProcessor:
total_entities = len(self._list_existing_entity_names())
if total_entities:
print(f"\n Canonical entity set: {total_entities} unique entities")
archive = self._archive_dir()
if archive.exists():
archived = len(list(archive.glob("*.md")))
if archived:
print(f" Archived entities: {archived}")
# ── Statistics ───────────────────────────────────────────────────
@@ -820,12 +867,16 @@ def main():
group.add_argument("--metrics", action="store_true", help="Assess metrics only")
group.add_argument("--list", action="store_true", help="List available chapters")
group.add_argument("--stats", action="store_true", help="Show dependency statistics")
group.add_argument("--archive-entity", type=str, metavar="SLUG",
help="Archive an entity (move to archive/ with reason)")
parser.add_argument("--reason", type=str, default=None,
help="Reason for archiving (used with --archive-entity)")
parser.add_argument("--no-commit", action="store_true", help="Skip git commits")
parser.add_argument(
"--provider",
type=str,
choices=["openrouter", "claude-code", "gemini"],
choices=["openrouter", "claude-code", "gemini", "openai"],
default=None,
help="LLM provider for auto-generating outputs (omit for manual mode)",
)
@@ -834,17 +885,25 @@ def main():
args = parser.parse_args()
# Build optional LLM adapter
_PROVIDER_DEFAULTS = {
"openrouter": "arcee-ai/trinity-large-preview:free",
}
llm_adapter = None
if args.provider:
from markitect.llm import create_adapter
llm_adapter = create_adapter(args.provider, model=args.model)
print(f"LLM: {args.provider}" + (f" ({args.model})" if args.model else ""))
model = args.model or _PROVIDER_DEFAULTS.get(args.provider)
llm_adapter = create_adapter(args.provider, model=model)
print(f"LLM: {args.provider} ({model or 'default'})")
example_dir = Path(__file__).parent
processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter)
processor.setup()
if args.list:
if args.archive_entity:
if not args.reason:
parser.error("--archive-entity requires --reason")
processor.archive_entity(args.archive_entity, args.reason)
elif args.list:
processor.list_chapters()
elif args.stats:
processor.show_stats()