feat(infospace): add process command for batch source file processing
- Extend PipelineStage with name, output_dir, output_macro,
split_entities, and macros fields for declarative pipeline config
- Add SourcePipeline class (pipeline.py) using simple @{macro}
substitution — no SQLite dependency, skip-if-exists per stage,
LLM retry on rate limits, git commit per source
- Add `markitect infospace process [GLOB_PATTERN]` CLI command with
--all, --provider, --model, --check-after-each, --no-commit flags
- Update infospace.yaml with output_dir, output_macro, split_entities,
and macros for each pipeline stage in the WoN example
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -155,13 +155,41 @@ class ViabilityThreshold:
|
||||
|
||||
@dataclass
|
||||
class PipelineStage:
|
||||
"""A single stage in the processing pipeline."""
|
||||
"""A single stage in the processing pipeline.
|
||||
|
||||
Attributes:
|
||||
template: Path to the template file (relative to infospace root).
|
||||
name: Human-readable stage name used in progress output.
|
||||
output_dir: Directory for stage outputs (relative to root).
|
||||
output_macro: Macro name for this stage's output, also used as
|
||||
the filename suffix (e.g. ``entities`` → ``<id>-entities.md``).
|
||||
split_entities: If True, parse ``--- ENTITY: <name> ---`` delimiters
|
||||
from LLM output and write individual entity files.
|
||||
macros: Static macros loaded from files (macro name → relative path).
|
||||
spaces: Legacy space IDs for SQLite-based resolver (unused by
|
||||
:class:`SourcePipeline`).
|
||||
"""
|
||||
|
||||
template: str
|
||||
name: str = ""
|
||||
output_dir: str = ""
|
||||
output_macro: str = ""
|
||||
split_entities: bool = False
|
||||
macros: Dict[str, str] = field(default_factory=dict)
|
||||
spaces: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {"template": self.template}
|
||||
if self.name:
|
||||
d["name"] = self.name
|
||||
if self.output_dir:
|
||||
d["output_dir"] = self.output_dir
|
||||
if self.output_macro:
|
||||
d["output_macro"] = self.output_macro
|
||||
if self.split_entities:
|
||||
d["split_entities"] = self.split_entities
|
||||
if self.macros:
|
||||
d["macros"] = self.macros
|
||||
if self.spaces:
|
||||
d["spaces"] = self.spaces
|
||||
return d
|
||||
@@ -170,6 +198,11 @@ class PipelineStage:
|
||||
def from_dict(cls, data: Dict[str, Any]) -> PipelineStage:
|
||||
return cls(
|
||||
template=data["template"],
|
||||
name=data.get("name", ""),
|
||||
output_dir=data.get("output_dir", ""),
|
||||
output_macro=data.get("output_macro", ""),
|
||||
split_entities=data.get("split_entities", False),
|
||||
macros=data.get("macros", {}),
|
||||
spaces=data.get("spaces", []),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user