generated from coulomb/repo-seed
epub3 inbound filter
This commit is contained in:
206
tests/test_epub3_adapter.py
Normal file
206
tests/test_epub3_adapter.py
Normal file
@@ -0,0 +1,206 @@
|
||||
from pathlib import Path
|
||||
import zipfile
|
||||
|
||||
from markitect_tool.source import (
|
||||
SourceAdapterMatchRequest,
|
||||
SourceAdapterRegistry,
|
||||
SourceAsset,
|
||||
SourceInspectRequest,
|
||||
SourceReadRequest,
|
||||
discover_source_adapters,
|
||||
inspect_source,
|
||||
normalize_source,
|
||||
)
|
||||
|
||||
from markitect_filter.adapters import epub3_adapter_descriptor
|
||||
|
||||
|
||||
class FakeEntryPoint:
|
||||
name = "epub3"
|
||||
|
||||
def load(self):
|
||||
return epub3_adapter_descriptor
|
||||
|
||||
|
||||
def test_epub3_descriptor_matches_contract():
|
||||
descriptor = epub3_adapter_descriptor()
|
||||
|
||||
assert descriptor.id == "source.epub3"
|
||||
assert descriptor.operations == ["read"]
|
||||
assert descriptor.media_types == ["application/epub+zip"]
|
||||
assert descriptor.extensions == [".epub"]
|
||||
assert descriptor.safety["network"] is False
|
||||
assert descriptor.option_schema["properties"]["skip_boilerplate"]["default"] is True
|
||||
|
||||
|
||||
def test_epub3_adapter_matches_epub_assets(tmp_path: Path):
|
||||
epub_path = _write_epub(tmp_path)
|
||||
asset = SourceAsset.from_path(epub_path, media_type="application/epub+zip")
|
||||
adapter = epub3_adapter_descriptor().instantiate()
|
||||
|
||||
match = adapter.can_read(SourceAdapterMatchRequest(asset=asset))
|
||||
|
||||
assert match.matched
|
||||
assert match.confidence == 100
|
||||
|
||||
|
||||
def test_epub3_adapter_inspects_metadata(tmp_path: Path):
|
||||
epub_path = _write_epub(tmp_path)
|
||||
asset = SourceAsset.from_path(epub_path, media_type="application/epub+zip")
|
||||
adapter = epub3_adapter_descriptor().instantiate()
|
||||
|
||||
result = adapter.inspect(SourceInspectRequest(asset=asset))
|
||||
|
||||
assert result.is_valid
|
||||
assert result.metadata.title == "Test Book"
|
||||
assert result.metadata.creators == ["Ada Lovelace"]
|
||||
assert result.metadata.language == "en"
|
||||
assert result.metadata.identifiers["bookid"] == "urn:test-book"
|
||||
assert result.quality.lossiness == "low"
|
||||
|
||||
|
||||
def test_epub3_adapter_normalizes_spine_to_markdown(tmp_path: Path):
|
||||
epub_path = _write_epub(tmp_path)
|
||||
asset = SourceAsset.from_path(epub_path, media_type="application/epub+zip")
|
||||
adapter = epub3_adapter_descriptor().instantiate()
|
||||
|
||||
result = adapter.read(SourceReadRequest(asset=asset, options={"skip_boilerplate": True}))
|
||||
|
||||
assert result.is_valid
|
||||
assert result.document is not None
|
||||
assert result.document.document_id == "source.epub3:urn:test-book"
|
||||
assert result.document.metadata.title == "Test Book"
|
||||
assert result.document.markdown == (
|
||||
"# Opening\n\n"
|
||||
"First paragraph with emphasis.\n\n"
|
||||
"- First point\n\n"
|
||||
"- Second point\n\n"
|
||||
"## Continuation\n\n"
|
||||
"Second chapter text."
|
||||
)
|
||||
assert [segment.segment_id for segment in result.document.segments] == [
|
||||
"opening",
|
||||
"continuation",
|
||||
]
|
||||
assert result.document.segments[0].provenance[0].package_path == "EPUB/chapter1.xhtml"
|
||||
assert result.document.quality.lossiness == "none"
|
||||
|
||||
|
||||
def test_markitect_api_can_use_epub3_registry(tmp_path: Path):
|
||||
epub_path = _write_epub(tmp_path)
|
||||
registry = SourceAdapterRegistry([epub3_adapter_descriptor()])
|
||||
|
||||
inspected = inspect_source(epub_path, registry=registry)
|
||||
normalized = normalize_source(epub_path, registry=registry)
|
||||
|
||||
assert inspected.is_valid
|
||||
assert inspected.metadata.title == "Test Book"
|
||||
assert normalized.is_valid
|
||||
assert normalized.document is not None
|
||||
assert normalized.document.segments[1].heading == "Continuation"
|
||||
|
||||
|
||||
def test_epub3_adapter_reports_malformed_missing_container(tmp_path: Path):
|
||||
epub_path = tmp_path / "broken.epub"
|
||||
with zipfile.ZipFile(epub_path, "w") as archive:
|
||||
archive.writestr("mimetype", "application/epub+zip")
|
||||
asset = SourceAsset.from_path(epub_path, media_type="application/epub+zip")
|
||||
adapter = epub3_adapter_descriptor().instantiate()
|
||||
|
||||
result = adapter.read(SourceReadRequest(asset=asset))
|
||||
|
||||
assert not result.is_valid
|
||||
assert result.diagnostics[0].code == "source.malformed"
|
||||
assert "container.xml" in result.diagnostics[0].message
|
||||
|
||||
|
||||
def test_epub3_entry_point_discovery_shape():
|
||||
registry = discover_source_adapters([FakeEntryPoint()])
|
||||
|
||||
assert registry.get("source.epub3").name == "EPUB3"
|
||||
|
||||
|
||||
def _write_epub(tmp_path: Path) -> Path:
|
||||
epub_path = tmp_path / "test-book.epub"
|
||||
with zipfile.ZipFile(epub_path, "w") as archive:
|
||||
archive.writestr("mimetype", "application/epub+zip")
|
||||
archive.writestr(
|
||||
"META-INF/container.xml",
|
||||
"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="EPUB/content.opf" media-type="application/oebps-package+xml"/>
|
||||
</rootfiles>
|
||||
</container>
|
||||
""",
|
||||
)
|
||||
archive.writestr(
|
||||
"EPUB/content.opf",
|
||||
"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<package version="3.0" unique-identifier="bookid" xmlns="http://www.idpf.org/2007/opf">
|
||||
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<dc:identifier id="bookid">urn:test-book</dc:identifier>
|
||||
<dc:title>Test Book</dc:title>
|
||||
<dc:creator>Ada Lovelace</dc:creator>
|
||||
<dc:language>en</dc:language>
|
||||
<dc:publisher>Markitect Fixtures</dc:publisher>
|
||||
<dc:date>2026-05-14</dc:date>
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
|
||||
<item id="chapter1" href="chapter1.xhtml" media-type="application/xhtml+xml"/>
|
||||
<item id="chapter2" href="chapter2.xhtml" media-type="application/xhtml+xml"/>
|
||||
</manifest>
|
||||
<spine>
|
||||
<itemref idref="chapter1"/>
|
||||
<itemref idref="chapter2"/>
|
||||
</spine>
|
||||
</package>
|
||||
""",
|
||||
)
|
||||
archive.writestr(
|
||||
"EPUB/nav.xhtml",
|
||||
"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<body>
|
||||
<nav epub:type="toc" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||
<ol>
|
||||
<li><a href="chapter1.xhtml#opening">Opening</a></li>
|
||||
<li><a href="chapter2.xhtml#continuation">Continuation</a></li>
|
||||
</ol>
|
||||
</nav>
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
)
|
||||
archive.writestr(
|
||||
"EPUB/chapter1.xhtml",
|
||||
"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<body>
|
||||
<section id="opening">
|
||||
<h1>Opening</h1>
|
||||
<p>First paragraph with <em>emphasis</em>.</p>
|
||||
<ul>
|
||||
<li>First point</li>
|
||||
<li>Second point</li>
|
||||
</ul>
|
||||
</section>
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
)
|
||||
archive.writestr(
|
||||
"EPUB/chapter2.xhtml",
|
||||
"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<body>
|
||||
<section id="continuation">
|
||||
<h2>Continuation</h2>
|
||||
<p>Second chapter text.</p>
|
||||
</section>
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
)
|
||||
return epub_path
|
||||
Reference in New Issue
Block a user