generated from coulomb/repo-seed
85 lines
2.4 KiB
Python
85 lines
2.4 KiB
Python
from repo_registry.content_indexing.extractor import ContentExtractor
|
|
from repo_registry.core.models import ObservedFact
|
|
|
|
|
|
def fact(id, kind, name, path="", line=None):
|
|
metadata = {}
|
|
if line is not None:
|
|
metadata["line"] = line
|
|
return ObservedFact(
|
|
id=id,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
kind=kind,
|
|
path=path,
|
|
name=name,
|
|
value="",
|
|
metadata=metadata,
|
|
)
|
|
|
|
|
|
def test_content_extractor_chunks_docs_and_interface_line_ranges(tmp_path):
|
|
repo = tmp_path / "repo"
|
|
repo.mkdir()
|
|
(repo / "README.md").write_text(
|
|
"\n".join(f"readme line {number}" for number in range(1, 46)),
|
|
encoding="utf-8",
|
|
)
|
|
(repo / "app.py").write_text(
|
|
"\n".join(f"line {number}" for number in range(1, 21)),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
chunks = ContentExtractor().extract(
|
|
repo,
|
|
[
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "python route decorator", "app.py", line=10),
|
|
],
|
|
)
|
|
|
|
readme_chunks = [chunk for chunk in chunks if chunk.path == "README.md"]
|
|
interface_chunks = [chunk for chunk in chunks if chunk.path == "app.py"]
|
|
assert [(chunk.start_line, chunk.end_line) for chunk in readme_chunks] == [
|
|
(1, 40),
|
|
(41, 45),
|
|
]
|
|
assert len(interface_chunks) == 1
|
|
assert interface_chunks[0].start_line == 5
|
|
assert interface_chunks[0].end_line == 20
|
|
assert "line 10" in interface_chunks[0].text
|
|
|
|
|
|
def test_content_extractor_ignores_unindexed_and_missing_paths(tmp_path):
|
|
repo = tmp_path / "repo"
|
|
repo.mkdir()
|
|
(repo / "README.md").write_text("# ok\n", encoding="utf-8")
|
|
|
|
chunks = ContentExtractor().extract(
|
|
repo,
|
|
[
|
|
fact(1, "language", "Python"),
|
|
fact(2, "documentation", "missing", "missing.md"),
|
|
],
|
|
)
|
|
|
|
assert chunks == []
|
|
|
|
|
|
def test_content_extractor_chunks_provider_related_config(tmp_path):
|
|
repo = tmp_path / "repo"
|
|
repo.mkdir()
|
|
(repo / ".env.example").write_text("OPENROUTER_API_KEY=\n", encoding="utf-8")
|
|
|
|
chunks = ContentExtractor().extract(
|
|
repo,
|
|
[
|
|
fact(1, "credential_config", "OpenRouter API key", ".env.example"),
|
|
],
|
|
)
|
|
|
|
assert len(chunks) == 1
|
|
assert chunks[0].path == ".env.example"
|
|
assert "OPENROUTER_API_KEY" in chunks[0].text
|