Files
repo-scoping/tests/test_content_indexing.py
2026-04-29 01:19:59 +02:00

85 lines
2.4 KiB
Python

from repo_registry.content_indexing.extractor import ContentExtractor
from repo_registry.core.models import ObservedFact
def fact(id, kind, name, path="", line=None):
metadata = {}
if line is not None:
metadata["line"] = line
return ObservedFact(
id=id,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
kind=kind,
path=path,
name=name,
value="",
metadata=metadata,
)
def test_content_extractor_chunks_docs_and_interface_line_ranges(tmp_path):
repo = tmp_path / "repo"
repo.mkdir()
(repo / "README.md").write_text(
"\n".join(f"readme line {number}" for number in range(1, 46)),
encoding="utf-8",
)
(repo / "app.py").write_text(
"\n".join(f"line {number}" for number in range(1, 21)),
encoding="utf-8",
)
chunks = ContentExtractor().extract(
repo,
[
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "app.py", line=10),
],
)
readme_chunks = [chunk for chunk in chunks if chunk.path == "README.md"]
interface_chunks = [chunk for chunk in chunks if chunk.path == "app.py"]
assert [(chunk.start_line, chunk.end_line) for chunk in readme_chunks] == [
(1, 40),
(41, 45),
]
assert len(interface_chunks) == 1
assert interface_chunks[0].start_line == 5
assert interface_chunks[0].end_line == 20
assert "line 10" in interface_chunks[0].text
def test_content_extractor_ignores_unindexed_and_missing_paths(tmp_path):
repo = tmp_path / "repo"
repo.mkdir()
(repo / "README.md").write_text("# ok\n", encoding="utf-8")
chunks = ContentExtractor().extract(
repo,
[
fact(1, "language", "Python"),
fact(2, "documentation", "missing", "missing.md"),
],
)
assert chunks == []
def test_content_extractor_chunks_provider_related_config(tmp_path):
repo = tmp_path / "repo"
repo.mkdir()
(repo / ".env.example").write_text("OPENROUTER_API_KEY=\n", encoding="utf-8")
chunks = ContentExtractor().extract(
repo,
[
fact(1, "credential_config", "OpenRouter API key", ".env.example"),
],
)
assert len(chunks) == 1
assert chunks[0].path == ".env.example"
assert "OPENROUTER_API_KEY" in chunks[0].text