init: documentation and prototypes

This commit is contained in:
2025-12-01 22:01:19 +01:00
parent e936fb41fa
commit 45d60fc1a9
51 changed files with 2476 additions and 1 deletions

View File

@@ -0,0 +1,59 @@
# Implementation Guide DirektVermittlungDe (FastAPI)
This guide explains how this codebase implements the architectural decisions
and API specification of DirektVermittlungDe.
## 1. Architecture Mapping
- **Belegorientierung**: `Document` is the central aggregate.
- Domain models in `app/domain/models.py`
- ORM model in `app/adapters/orm.py::Document`
- **Interaction Threads**: `Thread` and `Message` map to interaction threads and their logs.
- Cursor-based pagination implemented in `app/service/threads_service.py::list_messages`
- The `created_at` timestamp is used as the pagination cursor.
- **Routing Engine**:
- Implemented as an adapter in `app/adapters/routing.py`
- Operates solely on `DocumentMetadata` (plaintext) as required by the split-payload model.
- **Asynchronous Exports**:
- `POST /exports``start_export()` in `app/service/exports_service.py`
- Returns `202 Accepted` with `jobId` and uses a job registry (`app/adapters/jobs.py`)
- In a production system this would publish to Redis / RabbitMQ and be processed by workers.
## 2. Security
- **Auth**:
- OAuth2 / JWT is abstracted in `app/adapters/auth.py`.
- In this reference implementation, we parse unverified claims; in production, validate via JWKS.
- **Data Protection**:
- Encrypted payloads are treated as opaque strings and stored via `app/adapters/storage.py`.
- Only routing metadata is stored in PostgreSQL for server-side logic.
- **Retention**:
- Each `Document` gets a `retention_date`, set to a grace period in the future.
- Implement a periodic cleanup job that deletes rows where `retention_date < NOW()`.
## 3. Performance / Hybrid Concurrency
- All endpoints are `async def` and rely on the async SQLAlchemy engine.
- CPU-heavy operations (PDF merge, crypto) must not be run inside the event loop.
- To extend this, create a `ProcessPoolExecutor` in `workers/` and call via `loop.run_in_executor`.
## 4. Extending the System
- **Real Routing Rules**:
- Add a `routing_rules` table and adapt `app/adapters/routing.py` to query it.
- **Real Export Workers**:
- Replace `jobs.py` with a Redis-backed queue and a worker process in `workers/exports_worker.py`.
- **Authority Integration**:
- Call the authoritys eAkte ingress API from the worker, using authority-specific keys.
## 5. Definition of Done Checklist
Before going to production:
- Load-test `POST /documents` and `GET /threads/{id}/messages`.
- Verify that logs never contain Aktenzeichen or other PII.
- Verify that retention cleanup jobs work correctly on staging data.

View File

@@ -0,0 +1,52 @@
# DirektVermittlungDe Backend (Reference Implementation)
This repository contains a **FastAPI-based** reference implementation of the
DirektVermittlungDe (DVD) backend.
DVD provides **document-centric communication** between citizens and authorities:
citizens upload a letter or provide an *Aktenzeichen*, the system auto-routes it to
the responsible unit and opens an interaction thread for clarification instead of
a phone-based "Schnitzeljagd".
## Features
- **Document Intake (`POST /documents`)**
- Split-payload model: plaintext metadata + encrypted payload.
- Auto-routing to assigned unit.
- **Interaction Threads**
- Create a thread per document (`/documents/{id}/threads`).
- Citizen/official messages via `/threads/{threadId}/messages`.
- Cursor-based pagination for message history.
- **Exports**
- Async export workflow (`POST /exports`) with job IDs and status polling.
- **Security & NFR Alignment**
- OAuth2 / JWT-based auth (scopes: `citizen:write`, `official:read`, `official:write`).
- Document retention date for GDPR-friendly cleanup.
- Architected for async I/O and offloading CPU-heavy work (hybrid concurrency).
## Quickstart
1. **Install dependencies**
```bash
pip install -e .
```
2. **Run DB migrations (simple metadata create)**
```bash
python -m app.scripts.init_db
```
3. **Run the API**
```bash
uvicorn app.main:app --reload
```
4. **Open Swagger UI**
Visit: `http://localhost:8000/docs`

View File

@@ -0,0 +1,28 @@
[project]
name = "dvd-backend"
version = "0.1.0"
description = "DirektVermittlungDe reference backend"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.30.0",
"pydantic>=2.7.0",
"SQLAlchemy>=2.0.30",
"asyncpg>=0.29.0",
"aiobotocore>=2.15.0",
"python-jose[cryptography]>=3.3.0",
"redis>=5.0.0",
]
[project.optional-dependencies]
dev = [
"pytest",
"pytest-asyncio",
"httpx",
]
[tool.uvicorn]
factory = false
port = 8000
host = "0.0.0.0"
app = "app.main:app"

View File

View File

@@ -0,0 +1,43 @@
from typing import List
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2AuthorizationCodeBearer
from jose import jwt, JWTError
OAUTH2_SCHEME = OAuth2AuthorizationCodeBearer(
authorizationUrl="https://auth.example/authorize",
tokenUrl="https://auth.example/token",
scopes={
"citizen:write": "Citizen can create documents and threads",
"official:read": "Official can read documents and threads",
"official:write": "Official can answer and export",
},
)
JWT_ISSUER = "https://auth.example"
JWT_AUDIENCE = "dvd-api"
JWT_PUBLIC_KEY = "FAKE_PUBLIC_KEY_FOR_DEMO"
class UserContext:
def __init__(self, sub: str, scopes: List[str]):
self.sub = sub
self.scopes = scopes
def has_scope(self, scope: str) -> bool:
return scope in self.scopes
async def get_current_user(token: str = Depends(OAUTH2_SCHEME)) -> UserContext:
try:
payload = jwt.get_unverified_claims(token)
except JWTError:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token",
)
sub = payload.get("sub")
scopes = payload.get("scope", "").split()
if not sub:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token: no subject",
)
return UserContext(sub=sub, scopes=scopes)

View File

@@ -0,0 +1,17 @@
from typing import AsyncGenerator
from sqlalchemy.ext.asyncio import (
AsyncSession, async_sessionmaker, create_async_engine
)
from sqlalchemy.orm import DeclarativeBase
DATABASE_URL = "postgresql+asyncpg://dvd:dvd@localhost:5432/dvd"
engine = create_async_engine(DATABASE_URL, echo=False, future=True)
async_session_factory = async_sessionmaker(engine, expire_on_commit=False)
class Base(DeclarativeBase):
pass
async def get_session() -> AsyncGenerator[AsyncSession, None]:
async with async_session_factory() as session:
yield session

View File

@@ -0,0 +1,29 @@
import uuid
from typing import Dict
from datetime import datetime
from app.domain.models import ExportJobStatus
_jobs: Dict[str, dict] = {}
def create_export_job(case_id: str, target_system: str, include_attachments: bool) -> str:
job_id = str(uuid.uuid4())
_jobs[job_id] = {
"jobId": job_id,
"caseId": case_id,
"targetSystem": target_system,
"includeAttachments": include_attachments,
"status": ExportJobStatus.QUEUED,
"createdAt": datetime.utcnow(),
"updatedAt": datetime.utcnow(),
}
return job_id
def set_job_status(job_id: str, status: ExportJobStatus) -> None:
job = _jobs.get(job_id)
if not job:
return
job["status"] = status
job["updatedAt"] = datetime.utcnow()
def get_job(job_id: str) -> dict | None:
return _jobs.get(job_id)

View File

@@ -0,0 +1,86 @@
import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy import String, DateTime, ForeignKey, Enum as SAEnum, Boolean
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import mapped_column, Mapped, relationship
from .db import Base
from app.domain.models import ThreadType, SenderRole, ExportJobStatus
class Document(Base):
__tablename__ = "documents"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
reference_number: Mapped[str] = mapped_column(String(50), index=True)
authority_id: Mapped[str] = mapped_column(String(50), index=True)
status: Mapped[str] = mapped_column(String(20), default="RECEIVED")
assigned_unit: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
storage_path: Mapped[str] = mapped_column(String(255))
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow
)
retention_date: Mapped[Optional[datetime]] = mapped_column(
DateTime(timezone=True), nullable=True
)
personal_archive: Mapped[bool] = mapped_column(Boolean, default=False)
threads: Mapped[list["Thread"]] = relationship(back_populates="document")
class Thread(Base):
__tablename__ = "threads"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
document_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("documents.id"), index=True
)
type: Mapped[ThreadType] = mapped_column(SAEnum(ThreadType))
assigned_official_id: Mapped[Optional[str]] = mapped_column(
String(100), nullable=True
)
last_activity_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, index=True
)
document: Mapped[Document] = relationship(back_populates="threads")
messages: Mapped[list["Message"]] = relationship(back_populates="thread")
class Message(Base):
__tablename__ = "messages"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
thread_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("threads.id"), index=True
)
sender_role: Mapped[SenderRole] = mapped_column(SAEnum(SenderRole))
content_blob: Mapped[str] = mapped_column(String)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, index=True
)
thread: Mapped[Thread] = relationship(back_populates="messages")
class ExportJob(Base):
__tablename__ = "export_jobs"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
case_id: Mapped[str] = mapped_column(String(50), index=True)
target_system: Mapped[str] = mapped_column(String(100))
include_attachments: Mapped[bool] = mapped_column(Boolean, default=True)
status: Mapped[ExportJobStatus] = mapped_column(
SAEnum(ExportJobStatus), default=ExportJobStatus.QUEUED
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow
)

View File

@@ -0,0 +1,9 @@
from app.domain.models import DocumentMetadata
async def route_document(meta: DocumentMetadata) -> str:
"""Very simple routing stub.
In reality, this would query a routing rules table or service.
"""
if meta.docType.upper() == "NOTICE":
return f"{meta.authorityId}-NoticeTeam"
return f"{meta.authorityId}-DefaultTeam"

View File

@@ -0,0 +1,15 @@
import uuid
from pathlib import Path
STORAGE_ROOT = Path("data/blobstore")
async def save_encrypted_payload(payload_b64: str) -> str:
STORAGE_ROOT.mkdir(parents=True, exist_ok=True)
key = f"{uuid.uuid4()}.blob"
path = STORAGE_ROOT / key
path.write_text(payload_b64, encoding="utf-8")
return key
async def load_encrypted_payload(path_key: str) -> str:
path = STORAGE_ROOT / path_key
return path.read_text(encoding="utf-8")

View File

@@ -0,0 +1,19 @@
from fastapi import Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.adapters.db import get_session
from app.adapters.auth import get_current_user, UserContext
async def citizen_user(user: UserContext = Depends(get_current_user)) -> UserContext:
if not user.has_scope("citizen:write"):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return user
async def official_user(user: UserContext = Depends(get_current_user)) -> UserContext:
if not (user.has_scope("official:read") or user.has_scope("official:write")):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return user
DBSessionDep = Depends(get_session)
CitizenDep = Depends(citizen_user)
OfficialDep = Depends(official_user)

View File

@@ -0,0 +1,38 @@
import uuid
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.models import DocumentCreateRequest, DocumentCreatedResponse
from app.service import documents_service
from app.api.dependencies import DBSessionDep, CitizenDep
router = APIRouter(prefix="/documents", tags=["documents"])
@router.post("", response_model=DocumentCreatedResponse, status_code=status.HTTP_201_CREATED)
async def create_document(
payload: DocumentCreateRequest,
session: AsyncSession = DBSessionDep,
citizen = CitizenDep,
):
return await documents_service.create_document(payload, session)
@router.get("/{document_id}", response_model=DocumentCreatedResponse)
async def get_document(
document_id: str,
session: AsyncSession = DBSessionDep,
citizen = CitizenDep,
):
try:
doc_uuid = uuid.UUID(document_id)
except ValueError:
raise HTTPException(status_code=404, detail="Document not found")
doc = await documents_service.get_document(doc_uuid, session)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
return DocumentCreatedResponse(
id=str(doc.id),
status=doc.status,
assignedUnit=doc.assigned_unit,
)

View File

@@ -0,0 +1,31 @@
from fastapi import APIRouter, Depends, HTTPException, status
from app.api.dependencies import OfficialDep
from app.domain.models import ExportRequest, ExportCreatedResponse, ExportStatusResponse
from app.service import exports_service
router = APIRouter(prefix="/exports", tags=["exports"])
@router.post(
"",
response_model=ExportCreatedResponse,
status_code=status.HTTP_202_ACCEPTED,
)
async def create_export(
payload: ExportRequest,
official = OfficialDep,
):
return await exports_service.start_export(payload)
@router.get(
"/{job_id}",
response_model=ExportStatusResponse,
)
async def get_export_status(
job_id: str,
official = OfficialDep,
):
try:
return await exports_service.get_export_status(job_id)
except KeyError:
raise HTTPException(status_code=404, detail="Export job not found")

View File

@@ -0,0 +1,85 @@
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.api.dependencies import DBSessionDep, CitizenDep
from app.domain.models import (
ThreadCreateRequest, ThreadCreatedResponse,
MessageCreateRequest, MessageListResponse, SenderRole,
)
from app.service import threads_service
router = APIRouter(tags=["threads"])
@router.post(
"/documents/{document_id}/threads",
response_model=ThreadCreatedResponse,
status_code=status.HTTP_201_CREATED,
)
async def create_thread_for_document(
document_id: str,
payload: ThreadCreateRequest,
session: AsyncSession = DBSessionDep,
citizen = CitizenDep,
):
try:
doc_uuid = uuid.UUID(document_id)
except ValueError:
raise HTTPException(status_code=404, detail="Document not found")
return await threads_service.create_thread(
document_id=doc_uuid,
req=payload,
session=session,
citizen_id=citizen.sub,
)
@router.get(
"/threads/{thread_id}/messages",
response_model=MessageListResponse,
)
async def get_thread_messages(
thread_id: str,
limit: int = Query(20, ge=1, le=100),
before: Optional[datetime] = Query(None),
session: AsyncSession = DBSessionDep,
user = CitizenDep,
):
try:
thread_uuid = uuid.UUID(thread_id)
except ValueError:
raise HTTPException(status_code=404, detail="Thread not found")
return await threads_service.list_messages(
thread_id=thread_uuid,
session=session,
limit=limit,
before=before,
)
@router.post(
"/threads/{thread_id}/messages",
response_model=MessageListResponse.__fields__["data"].annotation.__args__[0],
status_code=status.HTTP_201_CREATED,
)
async def post_thread_message(
thread_id: str,
payload: MessageCreateRequest,
session: AsyncSession = DBSessionDep,
user = CitizenDep,
):
try:
thread_uuid = uuid.UUID(thread_id)
except ValueError:
raise HTTPException(status_code=404, detail="Thread not found")
msg = await threads_service.add_message(
thread_id=thread_uuid,
req=payload,
session=session,
sender_role=SenderRole.CITIZEN,
)
return msg

View File

@@ -0,0 +1,72 @@
from datetime import datetime
from enum import Enum
from typing import Optional, List
from pydantic import BaseModel, Field
class ThreadType(str, Enum):
TEXT_CHAT = "TEXT_CHAT"
CALLBACK_REQUEST = "CALLBACK_REQUEST"
APPOINTMENT = "APPOINTMENT"
class SenderRole(str, Enum):
CITIZEN = "CITIZEN"
OFFICIAL = "OFFICIAL"
SYSTEM = "SYSTEM"
class DocumentMetadata(BaseModel):
authorityId: str = Field(..., max_length=50)
referenceNumber: str = Field(..., max_length=50)
docType: str = Field(..., max_length=50)
issuedAt: datetime
class DocumentCreateRequest(BaseModel):
metadata: DocumentMetadata
encryptedPayload: str # base64-encoded opaque blob
class DocumentCreatedResponse(BaseModel):
id: str
status: str
assignedUnit: Optional[str] = None
class ThreadCreateRequest(BaseModel):
type: ThreadType
initialMessage: Optional[str] = None
preferredTimeSlot: Optional[datetime] = None
class ThreadCreatedResponse(BaseModel):
threadId: str
status: str
estimatedWaitTime: Optional[str] = None
class MessageCreateRequest(BaseModel):
content: str # encrypted message string
class MessageDto(BaseModel):
id: str
senderRole: SenderRole
content: str
timestamp: datetime
class MessageListResponse(BaseModel):
data: List[MessageDto]
paging: dict
class ExportRequest(BaseModel):
caseId: str
targetSystem: str
includeAttachments: bool = True
class ExportJobStatus(str, Enum):
QUEUED = "QUEUED"
RUNNING = "RUNNING"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
class ExportCreatedResponse(BaseModel):
jobId: str
status: ExportJobStatus
class ExportStatusResponse(BaseModel):
jobId: str
status: ExportJobStatus
statusUrl: Optional[str] = None

View File

@@ -0,0 +1,21 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api import documents, threads, exports
app = FastAPI(
title="DirektVermittlungDe API",
version="0.1.0",
root_path="/v1",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(documents.router)
app.include_router(threads.router)
app.include_router(exports.router)

View File

@@ -0,0 +1,9 @@
from sqlalchemy.ext.asyncio import run_sync
from app.adapters.db import engine
from app.adapters.orm import Base
def _create() -> None:
Base.metadata.create_all(bind=engine.sync_engine)
if __name__ == "__main__":
run_sync(engine, _create)

View File

@@ -0,0 +1,42 @@
import uuid
from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.domain.models import DocumentCreateRequest, DocumentCreatedResponse
from app.adapters import storage, routing
from app.adapters.orm import Document
DEFAULT_GRACE_PERIOD_DAYS = 90
async def create_document(
req: DocumentCreateRequest,
session: AsyncSession,
) -> DocumentCreatedResponse:
storage_key = await storage.save_encrypted_payload(req.encryptedPayload)
assigned_unit = await routing.route_document(req.metadata)
retention_date = datetime.utcnow() + timedelta(days=DEFAULT_GRACE_PERIOD_DAYS)
doc = Document(
reference_number=req.metadata.referenceNumber,
authority_id=req.metadata.authorityId,
status="ROUTED",
assigned_unit=assigned_unit,
storage_path=storage_key,
retention_date=retention_date,
personal_archive=False,
)
session.add(doc)
await session.commit()
await session.refresh(doc)
return DocumentCreatedResponse(
id=str(doc.id),
status=doc.status,
assignedUnit=doc.assigned_unit,
)
async def get_document(doc_id: uuid.UUID, session: AsyncSession) -> Document | None:
stmt = select(Document).where(Document.id == doc_id)
res = await session.execute(stmt)
return res.scalar_one_or_none()

View File

@@ -0,0 +1,24 @@
from app.domain.models import ExportRequest, ExportCreatedResponse, ExportStatusResponse
from app.domain.models import ExportJobStatus
from app.adapters import jobs
async def start_export(req: ExportRequest) -> ExportCreatedResponse:
job_id = jobs.create_export_job(
case_id=req.caseId,
target_system=req.targetSystem,
include_attachments=req.includeAttachments,
)
return ExportCreatedResponse(
jobId=job_id,
status=ExportJobStatus.QUEUED,
)
async def get_export_status(job_id: str) -> ExportStatusResponse:
job = jobs.get_job(job_id)
if not job:
raise KeyError(job_id)
return ExportStatusResponse(
jobId=job["jobId"],
status=job["status"],
statusUrl=f"/exports/{job_id}",
)

View File

@@ -0,0 +1,106 @@
import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, desc
from app.domain.models import (
ThreadCreateRequest, ThreadCreatedResponse,
MessageCreateRequest, MessageListResponse, MessageDto,
SenderRole,
)
from app.adapters.orm import Thread, Message
async def create_thread(
document_id: uuid.UUID,
req: ThreadCreateRequest,
session: AsyncSession,
citizen_id: str,
) -> ThreadCreatedResponse:
thread = Thread(
document_id=document_id,
type=req.type,
assigned_official_id=None,
)
session.add(thread)
await session.flush()
if req.initialMessage:
msg = Message(
thread_id=thread.id,
sender_role=SenderRole.CITIZEN,
content_blob=req.initialMessage,
)
session.add(msg)
await session.commit()
await session.refresh(thread)
return ThreadCreatedResponse(
threadId=str(thread.id),
status="PENDING_OFFICIAL",
estimatedWaitTime="4h",
)
async def add_message(
thread_id: uuid.UUID,
req: MessageCreateRequest,
session: AsyncSession,
sender_role: SenderRole,
) -> MessageDto:
msg = Message(
thread_id=thread_id,
sender_role=sender_role,
content_blob=req.content,
)
session.add(msg)
stmt_thread = select(Thread).where(Thread.id == thread_id)
res = await session.execute(stmt_thread)
thread = res.scalar_one()
thread.last_activity_at = datetime.utcnow()
await session.commit()
await session.refresh(msg)
return MessageDto(
id=str(msg.id),
senderRole=msg.sender_role,
content=msg.content_blob,
timestamp=msg.created_at,
)
async def list_messages(
thread_id: uuid.UUID,
session: AsyncSession,
limit: int = 20,
before: Optional[datetime] = None,
) -> MessageListResponse:
if before is None:
before = datetime.utcnow()
stmt = (
select(Message)
.where(Message.thread_id == thread_id, Message.created_at <= before)
.order_by(desc(Message.created_at))
.limit(limit)
)
res = await session.execute(stmt)
messages = list(res.scalars())
next_cursor = messages[-1].created_at.isoformat() if messages else None
data = [
MessageDto(
id=str(m.id),
senderRole=m.sender_role,
content=m.content_blob,
timestamp=m.created_at,
)
for m in messages
]
return MessageListResponse(
data=data,
paging={"nextCursor": next_cursor},
)

View File

@@ -0,0 +1,9 @@
"""Placeholder for a real export worker.
In production, this module would:
- Consume export jobs from a queue (e.g. Redis, RabbitMQ).
- Load the document and attachments.
- Call the target eAkte / DMS system.
- Update the ExportJob status via `jobs.set_job_status`.
"""