feat(pipeline): per-stage max_tokens, LLM provenance, processing log

- PipelineStage now supports max_tokens to override the 4096 default - SourcePipeline records provider/model on each entity file as HTML comment - output/processing-log.yaml tracks tokens, cost, duration, retries, errors - _call_llm returns (content, metadata) for downstream traceability - _http.py wraps JSON parse errors with body preview for debugging - infospace.yaml stages: extract/map=6000 tokens, synthesize=3000 tokens Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-19 14:50:49 +01:00
parent 5ede1de4b8
commit df1fdf1842
4 changed files with 191 additions and 32 deletions
--- a/examples/infospace-with-history/infospace.yaml
+++ b/examples/infospace-with-history/infospace.yaml
@@ -45,6 +45,7 @@ pipeline:
      output_dir: output/entities
      output_macro: entities
      split_entities: true
+      max_tokens: 6000
      macros:
        extraction_rules: artifacts/guidelines/extraction-rules.md
        vsm_framework: artifacts/vsm-reference/vsm-framework.md
@@ -52,6 +53,7 @@ pipeline:
      template: templates/map-to-vsm.md
      output_dir: output/mappings
      output_macro: mappings
+      max_tokens: 6000
      macros:
        mapping_rules: artifacts/guidelines/mapping-rules.md
        vsm_framework: artifacts/vsm-reference/vsm-framework.md
@@ -59,6 +61,7 @@ pipeline:
      template: templates/synthesize-analysis.md
      output_dir: output/analyses
      output_macro: analysis
+      max_tokens: 3000
      macros:
        vsm_framework: artifacts/vsm-reference/vsm-framework.md
  post_batch: