feat(llm): add OpenAI adapter, entity archive policy, process chapters 5-7
Add OpenAIAdapter for the OpenAI chat completions API (apikey-chatgpt.txt or OPENAI_API_KEY). Set default model to arcee-ai/trinity-large-preview:free for the infospace pipeline and increase max_tokens from 4096 to 8192. Reprocess chapter 05 with Trinity Large (was Gemini: 1 truncated entity, now 19 complete entities). Process chapters 06 (Aurora Alpha, 10 entities) and 07 (Trinity Large, 15 entities including regenerated violent-policy.md). Canonical set now at 85 unique entities. Add entity archive policy: entities are never silently deleted. Retired entities move to output/entities/archive/ with a dated reason header. New CLI option: --archive-entity <slug> --reason "...". The --list output shows the archive count alongside the canonical set. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -228,7 +228,7 @@ class ChapterProcessor:
|
||||
|
||||
# ── LLM Execution Helpers ─────────────────────────────────────────
|
||||
|
||||
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
|
||||
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
|
||||
"""Call the LLM and return the content string, or ``None`` on failure.
|
||||
|
||||
Retries up to 3 times on rate-limit (429) errors with exponential backoff.
|
||||
@@ -273,7 +273,7 @@ class ChapterProcessor:
|
||||
|
||||
return content
|
||||
|
||||
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
|
||||
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
|
||||
"""Call the LLM, write the result to *output_file*, and return it."""
|
||||
content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
|
||||
if content:
|
||||
@@ -296,6 +296,9 @@ class ChapterProcessor:
|
||||
def _entities_dir(self) -> Path:
|
||||
return self.example_dir / "output" / "entities"
|
||||
|
||||
def _archive_dir(self) -> Path:
|
||||
return self._entities_dir() / "archive"
|
||||
|
||||
def _list_existing_entity_names(self) -> list[str]:
|
||||
"""Return sorted slugs of all canonical entity files already on disk."""
|
||||
return sorted(
|
||||
@@ -305,6 +308,45 @@ class ChapterProcessor:
|
||||
and not f.name.endswith("-prompt.md")
|
||||
)
|
||||
|
||||
def archive_entity(self, slug: str, reason: str) -> None:
|
||||
"""Move a canonical entity to the archive with a documented reason.
|
||||
|
||||
The entity file is prepended with an archive header explaining why
|
||||
it was retired, then moved to ``output/entities/archive/<slug>.md``.
|
||||
Chapter views that reference this entity are **not** updated
|
||||
automatically — review and update them manually.
|
||||
"""
|
||||
src = self._entities_dir() / f"{slug}.md"
|
||||
if not src.exists():
|
||||
print(f" Entity not found: {slug}")
|
||||
return
|
||||
|
||||
archive = self._archive_dir()
|
||||
archive.mkdir(parents=True, exist_ok=True)
|
||||
dest = archive / f"{slug}.md"
|
||||
|
||||
from datetime import date
|
||||
header = (
|
||||
f"<!-- ARCHIVED {date.today().isoformat()}\n"
|
||||
f" Reason: {reason}\n"
|
||||
f"-->\n\n"
|
||||
)
|
||||
content = src.read_text()
|
||||
dest.write_text(header + content)
|
||||
src.unlink()
|
||||
|
||||
# Report which chapter views still reference this entity
|
||||
refs = []
|
||||
for view in self._entities_dir().glob("*-entities.md"):
|
||||
if f'include "{slug}.md"' in view.read_text():
|
||||
refs.append(view.name)
|
||||
|
||||
print(f" Archived: {slug}.md -> archive/{slug}.md")
|
||||
print(f" Reason: {reason}")
|
||||
if refs:
|
||||
print(f" Referenced by: {', '.join(refs)} (update these views)")
|
||||
print(f" Canonical set: {len(self._list_existing_entity_names())} entities")
|
||||
|
||||
def _split_entities(
|
||||
self, combined_content: str
|
||||
) -> list[tuple[str, Path]]:
|
||||
@@ -792,6 +834,11 @@ class ChapterProcessor:
|
||||
total_entities = len(self._list_existing_entity_names())
|
||||
if total_entities:
|
||||
print(f"\n Canonical entity set: {total_entities} unique entities")
|
||||
archive = self._archive_dir()
|
||||
if archive.exists():
|
||||
archived = len(list(archive.glob("*.md")))
|
||||
if archived:
|
||||
print(f" Archived entities: {archived}")
|
||||
|
||||
# ── Statistics ───────────────────────────────────────────────────
|
||||
|
||||
@@ -820,12 +867,16 @@ def main():
|
||||
group.add_argument("--metrics", action="store_true", help="Assess metrics only")
|
||||
group.add_argument("--list", action="store_true", help="List available chapters")
|
||||
group.add_argument("--stats", action="store_true", help="Show dependency statistics")
|
||||
group.add_argument("--archive-entity", type=str, metavar="SLUG",
|
||||
help="Archive an entity (move to archive/ with reason)")
|
||||
|
||||
parser.add_argument("--reason", type=str, default=None,
|
||||
help="Reason for archiving (used with --archive-entity)")
|
||||
parser.add_argument("--no-commit", action="store_true", help="Skip git commits")
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
type=str,
|
||||
choices=["openrouter", "claude-code", "gemini"],
|
||||
choices=["openrouter", "claude-code", "gemini", "openai"],
|
||||
default=None,
|
||||
help="LLM provider for auto-generating outputs (omit for manual mode)",
|
||||
)
|
||||
@@ -834,17 +885,25 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
# Build optional LLM adapter
|
||||
_PROVIDER_DEFAULTS = {
|
||||
"openrouter": "arcee-ai/trinity-large-preview:free",
|
||||
}
|
||||
llm_adapter = None
|
||||
if args.provider:
|
||||
from markitect.llm import create_adapter
|
||||
llm_adapter = create_adapter(args.provider, model=args.model)
|
||||
print(f"LLM: {args.provider}" + (f" ({args.model})" if args.model else ""))
|
||||
model = args.model or _PROVIDER_DEFAULTS.get(args.provider)
|
||||
llm_adapter = create_adapter(args.provider, model=model)
|
||||
print(f"LLM: {args.provider} ({model or 'default'})")
|
||||
|
||||
example_dir = Path(__file__).parent
|
||||
processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter)
|
||||
processor.setup()
|
||||
|
||||
if args.list:
|
||||
if args.archive_entity:
|
||||
if not args.reason:
|
||||
parser.error("--archive-entity requires --reason")
|
||||
processor.archive_entity(args.archive_entity, args.reason)
|
||||
elif args.list:
|
||||
processor.list_chapters()
|
||||
elif args.stats:
|
||||
processor.show_stats()
|
||||
|
||||
Reference in New Issue
Block a user