diff --git a/docs/generic-source-generator.md b/docs/generic-source-generator.md index 50dd3d8..78267fb 100644 --- a/docs/generic-source-generator.md +++ b/docs/generic-source-generator.md @@ -48,6 +48,23 @@ infospace-bench generate status ./infospaces/book-space shows chunk counts, generated artifact counts, evaluations, metrics, history, and stale source/profile inputs. +### Profiles + +Two profiles ship today: + +- `general-knowledge` — durable concepts, claims, methods, people, + places, works, and objects across any source +- `trading-literature` — trading memoirs and market-structure texts; + tunes entity categories (`trader`, `market`, `strategy`, `error`, + `psychological_pattern`, `institution`, `instrument`, + `evidence_bearing_claim`), relation types (`cause_effect`, + `lesson_evidence`, `risk_mitigation`, `actor_venue`, + `strategy_outcome`), and evaluation criteria (`groundedness`, + `lesson_clarity`, `historical_context`, `overgeneralization_risk`) + +Select via `--profile trading-literature` on `generate init` or +`generate from-source`. The generic profile remains the default. + ### Scale-aware plan `generate plan` returns a compact estimate by default — counts of selected diff --git a/src/infospace_bench/profiles/trading-literature/contracts/entity.contract.md b/src/infospace_bench/profiles/trading-literature/contracts/entity.contract.md new file mode 100644 index 0000000..a9b3390 --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/contracts/entity.contract.md @@ -0,0 +1,15 @@ +# Entity Contract — Trading Literature + +Each generated entity must be a Markdown artifact with: + +- one top-level heading containing the entity title +- a `## Category` line containing exactly one of: `trader`, `market`, + `strategy`, `error`, `psychological_pattern`, `institution`, + `instrument`, `evidence_bearing_claim` +- a `## Definition` section +- optional `## Context`, `## Source Evidence`, and `## Review Notes` + sections + +Entity titles should be stable, short, and reusable across chapters of +the same source. Do not include the chapter number in the title; that +provenance belongs in the source artifact, not the entity. diff --git a/src/infospace_bench/profiles/trading-literature/contracts/evaluation.contract.md b/src/infospace_bench/profiles/trading-literature/contracts/evaluation.contract.md new file mode 100644 index 0000000..0ca11af --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/contracts/evaluation.contract.md @@ -0,0 +1,19 @@ +# Evaluation Contract — Trading Literature + +Each evaluation must be Markdown with YAML frontmatter containing: + +- `artifact_id` +- `evaluator` +- `evaluated_at` +- `scores` + +Scores must include all four criteria on a 0 to 5 scale, with 5 best: + +- `groundedness` +- `lesson_clarity` +- `historical_context` +- `overgeneralization_risk` (higher = lower risk; an entity that + silently universalises a chapter-local claim scores low) + +Optional `## Review Notes` should quote any specific lines from the +entity body that drove a low score on any criterion. diff --git a/src/infospace_bench/profiles/trading-literature/contracts/relation.contract.md b/src/infospace_bench/profiles/trading-literature/contracts/relation.contract.md new file mode 100644 index 0000000..f0dff2e --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/contracts/relation.contract.md @@ -0,0 +1,16 @@ +# Relation Contract — Trading Literature + +Each generated relation must be a Markdown artifact with: + +- one top-level heading containing the relation title +- `## Subject` +- `## Predicate` +- `## Object` +- `## Relation Type` — exactly one of: `cause_effect`, `lesson_evidence`, + `risk_mitigation`, `actor_venue`, `strategy_outcome` +- optional `## Evidence` and `## Feedback Role` + +Subject and object values should match generated entity titles whenever +possible. A relation whose subject or object does not correspond to any +extracted entity must include an `## Evidence` section that quotes the +phrase from the source supporting the link. diff --git a/src/infospace_bench/profiles/trading-literature/contracts/summary.contract.md b/src/infospace_bench/profiles/trading-literature/contracts/summary.contract.md new file mode 100644 index 0000000..3629b77 --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/contracts/summary.contract.md @@ -0,0 +1,11 @@ +# Summary Contract — Trading Literature + +Each source summary should preserve: + +- the narrator's actions and the market events they reacted to +- named strategies, instruments, venues, and institutions present in + the chunk +- explicit lessons or rules of thumb the chunk states +- evidence phrases (dollar figures, dates, counter-party names, tape + behaviour) useful for later extraction +- unresolved ambiguities or anachronisms a reviewer should flag diff --git a/src/infospace_bench/profiles/trading-literature/profile.yaml b/src/infospace_bench/profiles/trading-literature/profile.yaml new file mode 100644 index 0000000..d0ee351 --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/profile.yaml @@ -0,0 +1,35 @@ +id: trading-literature +name: Trading Literature +description: | + Infospace generation profile for trading memoirs, market-structure texts, + and operator narratives. Tunes entity, relation, and evaluation prompts + for traders, markets, strategies, errors, psychological patterns, + institutions, instruments, and the lessons drawn from them. +terminology: + source_chunk: Chapter or chapter-part of a trading memoir or market-structure text + entity: Trader, market, strategy, error pattern, psychological habit, institution, instrument, or evidence-bearing claim + relation: Typed link between two trading-literature entities (cause/effect, lesson/evidence, risk/mitigation, actor/venue, strategy/outcome) +entity_categories: + - traders + - markets + - strategies + - errors + - psychological_patterns + - institutions + - instruments + - evidence_bearing_claims +relation_categories: + - cause_effect + - lesson_evidence + - risk_mitigation + - actor_venue + - strategy_outcome +granularity: + default: | + Prefer durable trading concepts and operator-level lessons over biographical + detail or stock-price trivia. Each entity should be reusable across chapters. +evaluation_criteria: + - groundedness + - lesson_clarity + - historical_context + - overgeneralization_risk diff --git a/src/infospace_bench/profiles/trading-literature/templates/evaluate-entity.md b/src/infospace_bench/profiles/trading-literature/templates/evaluate-entity.md new file mode 100644 index 0000000..5d37e8c --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/templates/evaluate-entity.md @@ -0,0 +1,34 @@ +# Evaluate Trading-Literature Entity + +Profile: {{ macros.profile }} + +Evaluate the generated entity as Markdown with YAML frontmatter. Include +`artifact_id`, `evaluator`, `evaluated_at`, and a `scores` list. Score +each criterion on a 0 to 5 scale where 5 is best. + +Required score names: + +- `groundedness` — does the entity stay anchored to the source chunk, + with no invented dates, dollar figures, or quotes? +- `lesson_clarity` — for `strategy`, `error`, `psychological_pattern`, + and `evidence_bearing_claim` entities, is the operator-level lesson + stated crisply enough to be reused in later chapters? For purely + factual entities (trader, market, institution, instrument), score + this on the clarity of the definition. +- `historical_context` — is the entity placed correctly in the era and + venue of the source (e.g. early-1900s American equities) without + importing modern terminology or instruments? +- `overgeneralization_risk` — is the entity scoped narrowly enough to + resist becoming a vague universal claim? Higher score means lower + risk. Flag entities that quietly claim to apply to all markets or + all operators when the source restricts the claim. + +Add a short `## Review Notes` section listing any specific lines from +the entity body that drove a low score on any criterion. + +Entity artifact: {{ input.artifact_id }} +Entity title: {{ input.title }} + +## Entity + +{{ input.content }} diff --git a/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md b/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md new file mode 100644 index 0000000..a3f7524 --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md @@ -0,0 +1,37 @@ +# Extract Trading-Literature Entities + +Profile: {{ macros.profile }} + +Extract reusable infospace entities from the source chunk. Return one +Markdown bundle where each entity starts with `# Entity Title` and has a +`## Definition` section, plus a `## Category` line drawn from the list +below. Add `## Context` and `## Source Evidence` when the chunk gives +enough material; leave them out rather than inventing detail. + +Allowed categories (use exactly one per entity): + +- `trader` — a named operator, broker, manipulator, or counter-party +- `market` — a market, exchange, pit, or named instrument family + (e.g. the New York Stock Exchange, the cotton market, the bucket-shop + circuit) +- `strategy` — a named tactic, system, or recurring playbook + (e.g. pyramiding, scale buying, tape reading) +- `error` — a recurring mistake, anti-pattern, or losing habit +- `psychological_pattern` — a named cognitive or emotional habit that + drives decisions (e.g. tip-following, hope-against-evidence) +- `institution` — a firm, regulator, news organisation, or social venue +- `instrument` — a specific security, commodity, or contract +- `evidence_bearing_claim` — a concrete operator-level claim the text + asserts and partially supports (e.g. "amateurs buy on tips, pros buy + on tape"); preserve the supporting evidence in the body + +Prefer entities that will recur across chapters. Avoid fictionalised +people whose role is purely narrative colour. Avoid wrapping a single +trade as an entity unless the trade is itself a teachable case. + +Source title: {{ input.title }} +Source artifact: {{ input.artifact_id }} + +## Source + +{{ input.content }} diff --git a/src/infospace_bench/profiles/trading-literature/templates/extract-relations.md b/src/infospace_bench/profiles/trading-literature/templates/extract-relations.md new file mode 100644 index 0000000..0689770 --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/templates/extract-relations.md @@ -0,0 +1,32 @@ +# Extract Trading-Literature Relations + +Profile: {{ macros.profile }} + +Extract a small set of important relations from the source chunk. Return +one Markdown relation artifact per relation. Each artifact uses sections +`## Subject`, `## Predicate`, `## Object`, and `## Relation Type`. Add +`## Evidence` whenever the chunk supplies a concrete supporting phrase. + +Use exactly one of these relation types per relation: + +- `cause_effect` — one entity drives a measurable market or operator + outcome (e.g. a strategy causing a loss; a market event causing a + policy change) +- `lesson_evidence` — an `evidence_bearing_claim` is supported (or + undercut) by a concrete trade, event, or quote in the source +- `risk_mitigation` — a strategy, rule, or habit reduces a named risk +- `actor_venue` — a trader operates in a market, institution, or pit +- `strategy_outcome` — a named strategy is applied to a specific trade + or campaign and produces a labelled outcome (win, loss, scratch) + +Subject and object values should match entity titles you would (or did) +extract in the entities stage. Skip relations whose subject or object +would be a one-off fictional flourish. Skip implicit moralising; prefer +relations the chunk actually evidences. + +Source title: {{ input.title }} +Source artifact: {{ input.artifact_id }} + +## Source + +{{ input.content }} diff --git a/src/infospace_bench/profiles/trading-literature/templates/summarize-source.md b/src/infospace_bench/profiles/trading-literature/templates/summarize-source.md new file mode 100644 index 0000000..859d8af --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/templates/summarize-source.md @@ -0,0 +1,23 @@ +# Summarize Trading-Literature Source + +Profile: {{ macros.profile }} + +Summarize the source chunk as Markdown for a trading-literature infospace. +Preserve in this order: + +- the narrator's actions and the market events they reacted to +- named strategies, instruments, venues, and institutions +- explicit lessons, rules of thumb, or warnings the text states +- evidence phrases (dollar figures, dates, tape behaviour, counter-party + names) that should guide later entity and relation extraction +- ambiguities or anachronisms that a reviewer should flag + +Keep the summary to a single page; do not paraphrase the moral of the +chapter, only the material a downstream extractor needs. + +Source title: {{ input.title }} +Source artifact: {{ input.artifact_id }} + +## Source + +{{ input.content }} diff --git a/src/infospace_bench/profiles/trading-literature/templates/synthesize-report.md b/src/infospace_bench/profiles/trading-literature/templates/synthesize-report.md new file mode 100644 index 0000000..f1c667d --- /dev/null +++ b/src/infospace_bench/profiles/trading-literature/templates/synthesize-report.md @@ -0,0 +1,13 @@ +# Synthesize Trading-Literature Report + +Profile: {{ macros.profile }} + +Synthesize a concise review report from the generated source summaries, +entities, relations, evaluations, and collection metrics. Group entities +by category (trader, market, strategy, error, psychological pattern, +institution, instrument, evidence-bearing claim). Surface the relations +whose `relation_type` is `lesson_evidence` or `strategy_outcome` first — +those are the operator-level findings a reviewer will want to read +before anything else. End the report with an explicit "Overgeneralization +risks" section that quotes any entities whose evaluation flagged that +score below 3. diff --git a/tests/test_trading_literature_profile.py b/tests/test_trading_literature_profile.py new file mode 100644 index 0000000..cdb3921 --- /dev/null +++ b/tests/test_trading_literature_profile.py @@ -0,0 +1,249 @@ +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path + +import yaml + +from infospace_bench.generator import ( + init_generation_infospace, + run_generation, + status_generation, +) + + +PROFILE_DIR = Path("src/infospace_bench/profiles/trading-literature") + + +def _fixture_responses(path: Path) -> None: + data = { + "responses": [ + { + "stage_id": "summarize-source", + "input_artifact_id": "*", + "markdown": "# Source Summary\n\nThe chapter introduces a bucket-shop apprenticeship.\n", + }, + { + "stage_id": "extract-entities", + "input_artifact_id": "*", + "markdown": ( + "# Tape Reading\n\n" + "## Category\n\nstrategy\n\n" + "## Definition\n\n" + "Inferring price intent from the ticker tape rather than fundamentals.\n\n" + "## Context\n\nFramed as a learnable pattern skill in the chapter.\n\n" + "# Bucket Shop\n\n" + "## Category\n\ninstitution\n\n" + "## Definition\n\n" + "A 1900s retail brokerage that took the other side of customer tape bets.\n\n" + ), + }, + { + "stage_id": "extract-relations", + "input_artifact_id": "*", + "markdown": ( + "# Tape Reading Reduces Tip Following\n\n" + "## Subject\n\nTape Reading\n\n" + "## Predicate\n\nreduces\n\n" + "## Object\n\nTip Following\n\n" + "## Relation Type\n\nrisk_mitigation\n\n" + "## Evidence\n\nThe narrator's profits track tape behaviour, not rumour.\n" + ), + }, + { + "stage_id": "evaluate-entity", + "input_artifact_id": "*", + "markdown": ( + "---\n" + "artifact_id: entity/tape-reading.md\n" + "evaluator: fixture\n" + "evaluated_at: '2026-05-17T00:00:00'\n" + "scores:\n" + " - name: groundedness\n value: 4.0\n max_value: 5.0\n" + " - name: lesson_clarity\n value: 4.0\n max_value: 5.0\n" + " - name: historical_context\n value: 4.0\n max_value: 5.0\n" + " - name: overgeneralization_risk\n value: 4.0\n max_value: 5.0\n" + "---\n\n" + "# Evaluation: entity/tape-reading.md\n" + ), + }, + ] + } + path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8") + + +CONTAINER_XML = """ + + + + + +""" + +PACKAGE_OPF = """ + + + urn:test:trading + Trading Memoir Fixture + Fixture Author + en + + + + + + + + + + +""" + + +def _write_two_chapter_epub(path: Path) -> None: + with zipfile.ZipFile(path, "w") as archive: + archive.writestr("mimetype", "application/epub+zip") + archive.writestr("META-INF/container.xml", CONTAINER_XML) + archive.writestr("OEBPS/content.opf", PACKAGE_OPF) + archive.writestr( + "OEBPS/ch1.xhtml", + "Book" + "

I

The narrator tries tape reading at a bucket shop.

", + ) + archive.writestr( + "OEBPS/ch2.xhtml", + "Book" + "

II

He learns the cost of acting on rumours.

", + ) + + +def test_trading_profile_declares_required_categories_and_criteria() -> None: + data = yaml.safe_load((PROFILE_DIR / "profile.yaml").read_text(encoding="utf-8")) + + assert data["id"] == "trading-literature" + assert set(data["entity_categories"]) == { + "traders", + "markets", + "strategies", + "errors", + "psychological_patterns", + "institutions", + "instruments", + "evidence_bearing_claims", + } + assert set(data["relation_categories"]) == { + "cause_effect", + "lesson_evidence", + "risk_mitigation", + "actor_venue", + "strategy_outcome", + } + assert data["evaluation_criteria"] == [ + "groundedness", + "lesson_clarity", + "historical_context", + "overgeneralization_risk", + ] + + +def test_trading_profile_evaluate_template_mentions_all_criteria() -> None: + template = (PROFILE_DIR / "templates" / "evaluate-entity.md").read_text(encoding="utf-8") + + for criterion in ( + "groundedness", + "lesson_clarity", + "historical_context", + "overgeneralization_risk", + ): + assert criterion in template, f"evaluate template should reference {criterion}" + + +def test_trading_profile_relation_template_lists_required_relation_types() -> None: + template = (PROFILE_DIR / "templates" / "extract-relations.md").read_text(encoding="utf-8") + + for relation_type in ( + "cause_effect", + "lesson_evidence", + "risk_mitigation", + "actor_venue", + "strategy_outcome", + ): + assert relation_type in template, f"relation template should reference {relation_type}" + + +def test_trading_profile_contracts_present() -> None: + contracts_dir = PROFILE_DIR / "contracts" + expected = {"entity.contract.md", "relation.contract.md", "evaluation.contract.md", "summary.contract.md"} + actual = {path.name for path in contracts_dir.glob("*.md")} + assert expected.issubset(actual) + + +def test_trading_profile_runs_end_to_end_with_fixture(tmp_path: Path) -> None: + book = tmp_path / "book.epub" + _write_two_chapter_epub(book) + fixture = tmp_path / "responses.yaml" + _fixture_responses(fixture) + + infospace = init_generation_infospace( + tmp_path, + book, + "trading-fixture", + name="Trading Fixture", + profile="trading-literature", + ) + result = run_generation(infospace.root, fixture_responses=fixture) + status = status_generation(infospace.root) + + assert result.status == "completed" + assert status["profile"] == "trading-literature" + assert status["source_chunk_count"] == 2 + assert status["entity_count"] >= 1 + assert status["relation_count"] >= 1 + assert status["evaluation_count"] >= 1 + # Installed profile should have copied templates and contracts into the infospace. + assert (infospace.root / "profiles" / "trading-literature" / "templates" / "evaluate-entity.md").is_file() + assert ( + infospace.root / "profiles" / "trading-literature" / "contracts" / "entity.contract.md" + ).is_file() + + +def test_trading_profile_selectable_via_cli(tmp_path: Path) -> None: + book = tmp_path / "book.epub" + _write_two_chapter_epub(book) + fixture = tmp_path / "responses.yaml" + _fixture_responses(fixture) + env = os.environ.copy() + env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" + + result = subprocess.run( + [ + sys.executable, + "-m", + "infospace_bench", + "generate", + "from-source", + str(book), + "--workspace", + str(tmp_path), + "--slug", + "trading-cli", + "--name", + "Trading CLI", + "--profile", + "trading-literature", + "--fixture-responses", + str(fixture), + "--apply", + ], + check=False, + env=env, + text=True, + capture_output=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["status"] == "completed" + assert "trading-cli" in payload["root"] diff --git a/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md b/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md index 126a0c8..928f3de 100644 --- a/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md +++ b/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md @@ -157,7 +157,7 @@ state_hub_task_id: "bee5c38a-f052-4edb-9313-b3a2ee5a6c26" ```task id: IB-WP-0016-T04 -status: todo +status: done priority: medium state_hub_task_id: "1a1b8fde-773f-46a6-887a-3c87a425d7a3" ```