generated from coulomb/repo-seed
Improved datamodel and deterministic generation
This commit is contained in:
@@ -51,6 +51,7 @@ class RegistryStore:
|
||||
self._ensure_repository_scopes_table(connection)
|
||||
self._ensure_approved_source_ref_columns(connection)
|
||||
self._ensure_evidence_relationship_columns(connection)
|
||||
self._ensure_characteristic_classification_columns(connection)
|
||||
self._ensure_expectation_gaps_table(connection)
|
||||
|
||||
def connect(self) -> sqlite3.Connection:
|
||||
@@ -108,6 +109,60 @@ class RegistryStore:
|
||||
"""
|
||||
)
|
||||
|
||||
def _ensure_characteristic_classification_columns(
|
||||
self,
|
||||
connection: sqlite3.Connection,
|
||||
) -> None:
|
||||
defaults = {
|
||||
"candidate_abilities": "ability",
|
||||
"approved_abilities": "ability",
|
||||
"candidate_capabilities": "capability",
|
||||
"approved_capabilities": "capability",
|
||||
"candidate_features": "",
|
||||
"approved_features": "",
|
||||
}
|
||||
for table, default_class in defaults.items():
|
||||
columns = {
|
||||
row["name"]
|
||||
for row in connection.execute(f"PRAGMA table_info({table})").fetchall()
|
||||
}
|
||||
if "primary_class" not in columns:
|
||||
connection.execute(
|
||||
f"ALTER TABLE {table} ADD COLUMN primary_class TEXT NOT NULL DEFAULT '{default_class}'"
|
||||
)
|
||||
if "attributes" not in columns:
|
||||
connection.execute(
|
||||
f"ALTER TABLE {table} ADD COLUMN attributes TEXT NOT NULL DEFAULT '[]'"
|
||||
)
|
||||
|
||||
for table in ("candidate_abilities", "approved_abilities"):
|
||||
connection.execute(
|
||||
f"""
|
||||
UPDATE {table}
|
||||
SET primary_class = COALESCE(NULLIF(primary_class, ''), 'ability'),
|
||||
attributes = COALESCE(NULLIF(attributes, ''), '[]')
|
||||
WHERE primary_class = '' OR attributes = ''
|
||||
"""
|
||||
)
|
||||
for table in ("candidate_capabilities", "approved_capabilities"):
|
||||
connection.execute(
|
||||
f"""
|
||||
UPDATE {table}
|
||||
SET primary_class = COALESCE(NULLIF(primary_class, ''), 'capability'),
|
||||
attributes = COALESCE(NULLIF(attributes, ''), '[]')
|
||||
WHERE primary_class = '' OR attributes = ''
|
||||
"""
|
||||
)
|
||||
for table in ("candidate_features", "approved_features"):
|
||||
connection.execute(
|
||||
f"""
|
||||
UPDATE {table}
|
||||
SET primary_class = COALESCE(NULLIF(primary_class, ''), type),
|
||||
attributes = COALESCE(NULLIF(attributes, ''), json_array(type))
|
||||
WHERE primary_class = '' OR attributes = ''
|
||||
"""
|
||||
)
|
||||
|
||||
def _ensure_content_chunks_table(self, connection: sqlite3.Connection) -> None:
|
||||
connection.execute(
|
||||
"""
|
||||
@@ -361,14 +416,17 @@ class RegistryStore:
|
||||
ability_cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO candidate_abilities
|
||||
(repository_id, analysis_run_id, name, description, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
(repository_id, analysis_run_id, name, description, primary_class,
|
||||
attributes, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
ability.name,
|
||||
ability.description,
|
||||
ability.primary_class or "ability",
|
||||
self._attributes_to_json(ability.attributes),
|
||||
ability.confidence,
|
||||
self._source_refs_to_json(ability.source_refs),
|
||||
),
|
||||
@@ -379,8 +437,8 @@ class RegistryStore:
|
||||
"""
|
||||
INSERT INTO candidate_capabilities
|
||||
(repository_id, analysis_run_id, ability_id, name, description,
|
||||
inputs, outputs, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
inputs, outputs, primary_class, attributes, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
@@ -390,6 +448,8 @@ class RegistryStore:
|
||||
capability.description,
|
||||
json.dumps(capability.inputs),
|
||||
json.dumps(capability.outputs),
|
||||
capability.primary_class or "capability",
|
||||
self._attributes_to_json(capability.attributes),
|
||||
capability.confidence,
|
||||
self._source_refs_to_json(capability.source_refs),
|
||||
),
|
||||
@@ -400,8 +460,8 @@ class RegistryStore:
|
||||
"""
|
||||
INSERT INTO candidate_features
|
||||
(repository_id, analysis_run_id, capability_id, name, type,
|
||||
location, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
primary_class, attributes, location, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
@@ -409,6 +469,10 @@ class RegistryStore:
|
||||
capability_id,
|
||||
feature.name,
|
||||
feature.type,
|
||||
feature.primary_class or feature.type,
|
||||
self._attributes_to_json(
|
||||
feature.attributes or [feature.type]
|
||||
),
|
||||
feature.location,
|
||||
feature.confidence,
|
||||
self._source_refs_to_json(feature.source_refs),
|
||||
@@ -448,7 +512,8 @@ class RegistryStore:
|
||||
with self.connect() as connection:
|
||||
ability_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, name, description, confidence, status, source_refs
|
||||
SELECT id, name, description, primary_class, attributes, confidence,
|
||||
status, source_refs
|
||||
FROM candidate_abilities
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
@@ -458,7 +523,7 @@ class RegistryStore:
|
||||
capability_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, ability_id, name, description, inputs, outputs,
|
||||
confidence, status, source_refs
|
||||
primary_class, attributes, confidence, status, source_refs
|
||||
FROM candidate_capabilities
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
@@ -467,8 +532,8 @@ class RegistryStore:
|
||||
).fetchall()
|
||||
feature_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, capability_id, name, type, location, confidence,
|
||||
status, source_refs
|
||||
SELECT id, capability_id, name, type, primary_class, attributes,
|
||||
location, confidence, status, source_refs
|
||||
FROM candidate_features
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
@@ -498,6 +563,8 @@ class RegistryStore:
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
confidence_label=confidence_label(row["confidence"]),
|
||||
primary_class=row["primary_class"] or row["type"],
|
||||
attributes=self._attributes_from_json(row["attributes"]),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -531,6 +598,8 @@ class RegistryStore:
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
confidence_label=confidence_label(row["confidence"]),
|
||||
primary_class=row["primary_class"] or "capability",
|
||||
attributes=self._attributes_from_json(row["attributes"]),
|
||||
features=features_by_capability.get(row["id"], []),
|
||||
evidence=evidence_by_capability.get(row["id"], []),
|
||||
)
|
||||
@@ -545,6 +614,8 @@ class RegistryStore:
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
confidence_label=confidence_label(row["confidence"]),
|
||||
primary_class=row["primary_class"] or "ability",
|
||||
attributes=self._attributes_from_json(row["attributes"]),
|
||||
capabilities=capabilities_by_ability.get(row["id"], []),
|
||||
)
|
||||
for row in ability_rows
|
||||
@@ -861,17 +932,22 @@ class RegistryStore:
|
||||
name: str,
|
||||
description: str,
|
||||
confidence: float,
|
||||
primary_class: str = "ability",
|
||||
attributes: list[str] | None = None,
|
||||
) -> None:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
UPDATE candidate_abilities
|
||||
SET name = ?, description = ?, confidence = ?
|
||||
SET name = ?, description = ?, primary_class = ?, attributes = ?,
|
||||
confidence = ?
|
||||
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
|
||||
""",
|
||||
(
|
||||
name,
|
||||
description,
|
||||
primary_class or "ability",
|
||||
self._attributes_to_json(attributes or []),
|
||||
confidence,
|
||||
candidate_ability_id,
|
||||
repository_id,
|
||||
@@ -894,17 +970,22 @@ class RegistryStore:
|
||||
name: str,
|
||||
description: str,
|
||||
confidence: float,
|
||||
primary_class: str = "capability",
|
||||
attributes: list[str] | None = None,
|
||||
) -> None:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
UPDATE candidate_capabilities
|
||||
SET name = ?, description = ?, confidence = ?
|
||||
SET name = ?, description = ?, primary_class = ?, attributes = ?,
|
||||
confidence = ?
|
||||
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
|
||||
""",
|
||||
(
|
||||
name,
|
||||
description,
|
||||
primary_class or "capability",
|
||||
self._attributes_to_json(attributes or []),
|
||||
confidence,
|
||||
candidate_capability_id,
|
||||
repository_id,
|
||||
@@ -918,6 +999,46 @@ class RegistryStore:
|
||||
f"{repository_id} analysis run {analysis_run_id}"
|
||||
)
|
||||
|
||||
def update_candidate_feature(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
candidate_feature_id: int,
|
||||
*,
|
||||
name: str,
|
||||
type: str,
|
||||
location: str,
|
||||
confidence: float,
|
||||
primary_class: str | None = None,
|
||||
attributes: list[str] | None = None,
|
||||
) -> None:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
UPDATE candidate_features
|
||||
SET name = ?, type = ?, primary_class = ?, attributes = ?,
|
||||
location = ?, confidence = ?
|
||||
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
|
||||
""",
|
||||
(
|
||||
name,
|
||||
type,
|
||||
primary_class or type,
|
||||
self._attributes_to_json(attributes or [type]),
|
||||
location,
|
||||
confidence,
|
||||
candidate_feature_id,
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
raise NotFoundError(
|
||||
"candidate feature "
|
||||
f"{candidate_feature_id} was not found for repository "
|
||||
f"{repository_id} analysis run {analysis_run_id}"
|
||||
)
|
||||
|
||||
def relink_candidate_capability(
|
||||
self,
|
||||
repository_id: int,
|
||||
@@ -1604,15 +1725,24 @@ class RegistryStore:
|
||||
name: str,
|
||||
description: str,
|
||||
confidence: float,
|
||||
primary_class: str = "ability",
|
||||
attributes: list[str] | None = None,
|
||||
) -> int:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO approved_abilities
|
||||
(repository_id, name, description, confidence)
|
||||
VALUES (?, ?, ?, ?)
|
||||
(repository_id, name, description, primary_class, attributes, confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(repository_id, name, description, confidence),
|
||||
(
|
||||
repository_id,
|
||||
name,
|
||||
description,
|
||||
primary_class or "ability",
|
||||
self._attributes_to_json(attributes or []),
|
||||
confidence,
|
||||
),
|
||||
)
|
||||
return int(cursor.lastrowid)
|
||||
|
||||
@@ -1638,6 +1768,8 @@ class RegistryStore:
|
||||
name: str | None = None,
|
||||
description: str | None = None,
|
||||
confidence: float | None = None,
|
||||
primary_class: str | None = None,
|
||||
attributes: list[str] | None = None,
|
||||
) -> None:
|
||||
self._update_approved_row(
|
||||
table="approved_abilities",
|
||||
@@ -1648,6 +1780,12 @@ class RegistryStore:
|
||||
"name": name,
|
||||
"description": description,
|
||||
"confidence": confidence,
|
||||
"primary_class": primary_class,
|
||||
"attributes": (
|
||||
self._attributes_to_json(attributes)
|
||||
if attributes is not None
|
||||
else None
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1669,13 +1807,16 @@ class RegistryStore:
|
||||
inputs: list[str],
|
||||
outputs: list[str],
|
||||
confidence: float,
|
||||
primary_class: str = "capability",
|
||||
attributes: list[str] | None = None,
|
||||
) -> int:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO approved_capabilities
|
||||
(repository_id, ability_id, name, description, inputs, outputs, confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
(repository_id, ability_id, name, description, inputs, outputs,
|
||||
primary_class, attributes, confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
@@ -1684,6 +1825,8 @@ class RegistryStore:
|
||||
description,
|
||||
json.dumps(inputs),
|
||||
json.dumps(outputs),
|
||||
primary_class or "capability",
|
||||
self._attributes_to_json(attributes or []),
|
||||
confidence,
|
||||
),
|
||||
)
|
||||
@@ -1713,6 +1856,8 @@ class RegistryStore:
|
||||
inputs: list[str] | None = None,
|
||||
outputs: list[str] | None = None,
|
||||
confidence: float | None = None,
|
||||
primary_class: str | None = None,
|
||||
attributes: list[str] | None = None,
|
||||
) -> None:
|
||||
self._update_approved_row(
|
||||
table="approved_capabilities",
|
||||
@@ -1725,6 +1870,12 @@ class RegistryStore:
|
||||
"inputs": json.dumps(inputs) if inputs is not None else None,
|
||||
"outputs": json.dumps(outputs) if outputs is not None else None,
|
||||
"confidence": confidence,
|
||||
"primary_class": primary_class,
|
||||
"attributes": (
|
||||
self._attributes_to_json(attributes)
|
||||
if attributes is not None
|
||||
else None
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1746,19 +1897,24 @@ class RegistryStore:
|
||||
location: str,
|
||||
confidence: float,
|
||||
source_refs: list[SourceReference] | None = None,
|
||||
primary_class: str | None = None,
|
||||
attributes: list[str] | None = None,
|
||||
) -> int:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO approved_features
|
||||
(repository_id, capability_id, name, type, location, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
(repository_id, capability_id, name, type, primary_class, attributes,
|
||||
location, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
capability_id,
|
||||
name,
|
||||
type,
|
||||
primary_class or type,
|
||||
self._attributes_to_json(attributes or [type]),
|
||||
location,
|
||||
confidence,
|
||||
self._source_refs_to_json(source_refs or []),
|
||||
@@ -1775,6 +1931,8 @@ class RegistryStore:
|
||||
type: str | None = None,
|
||||
location: str | None = None,
|
||||
confidence: float | None = None,
|
||||
primary_class: str | None = None,
|
||||
attributes: list[str] | None = None,
|
||||
) -> None:
|
||||
self._update_approved_row(
|
||||
table="approved_features",
|
||||
@@ -1784,6 +1942,12 @@ class RegistryStore:
|
||||
values={
|
||||
"name": name,
|
||||
"type": type,
|
||||
"primary_class": primary_class,
|
||||
"attributes": (
|
||||
self._attributes_to_json(attributes)
|
||||
if attributes is not None
|
||||
else None
|
||||
),
|
||||
"location": location,
|
||||
"confidence": confidence,
|
||||
},
|
||||
@@ -1968,13 +2132,15 @@ class RegistryStore:
|
||||
ability_cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO approved_abilities
|
||||
(repository_id, name, description, confidence)
|
||||
VALUES (?, ?, ?, ?)
|
||||
(repository_id, name, description, primary_class, attributes, confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
ability.name,
|
||||
ability.description,
|
||||
ability.primary_class or "ability",
|
||||
self._attributes_to_json(ability.attributes),
|
||||
ability.confidence,
|
||||
),
|
||||
)
|
||||
@@ -1986,8 +2152,8 @@ class RegistryStore:
|
||||
"""
|
||||
INSERT INTO approved_capabilities
|
||||
(repository_id, ability_id, name, description, inputs, outputs,
|
||||
confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
primary_class, attributes, confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
@@ -1996,6 +2162,8 @@ class RegistryStore:
|
||||
capability.description,
|
||||
json.dumps(capability.inputs),
|
||||
json.dumps(capability.outputs),
|
||||
capability.primary_class or "capability",
|
||||
self._attributes_to_json(capability.attributes),
|
||||
capability.confidence,
|
||||
),
|
||||
)
|
||||
@@ -2006,15 +2174,19 @@ class RegistryStore:
|
||||
connection.execute(
|
||||
"""
|
||||
INSERT INTO approved_features
|
||||
(repository_id, capability_id, name, type, location,
|
||||
confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
(repository_id, capability_id, name, type, primary_class,
|
||||
attributes, location, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
approved_capability_id,
|
||||
feature.name,
|
||||
feature.type,
|
||||
feature.primary_class or feature.type,
|
||||
self._attributes_to_json(
|
||||
feature.attributes or [feature.type]
|
||||
),
|
||||
feature.location,
|
||||
feature.confidence,
|
||||
self._source_refs_to_json(feature.source_refs),
|
||||
@@ -2051,7 +2223,7 @@ class RegistryStore:
|
||||
with self.connect() as connection:
|
||||
ability_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, name, description, confidence
|
||||
SELECT id, name, description, primary_class, attributes, confidence
|
||||
FROM approved_abilities
|
||||
WHERE repository_id = ?
|
||||
ORDER BY id
|
||||
@@ -2060,7 +2232,8 @@ class RegistryStore:
|
||||
).fetchall()
|
||||
capability_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, ability_id, name, description, inputs, outputs, confidence
|
||||
SELECT id, ability_id, name, description, inputs, outputs,
|
||||
primary_class, attributes, confidence
|
||||
FROM approved_capabilities
|
||||
WHERE repository_id = ?
|
||||
ORDER BY id
|
||||
@@ -2069,7 +2242,8 @@ class RegistryStore:
|
||||
).fetchall()
|
||||
feature_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, capability_id, name, type, location, confidence, source_refs
|
||||
SELECT id, capability_id, name, type, primary_class, attributes,
|
||||
location, confidence, source_refs
|
||||
FROM approved_features
|
||||
WHERE repository_id = ?
|
||||
ORDER BY id
|
||||
@@ -2098,6 +2272,8 @@ class RegistryStore:
|
||||
confidence=row["confidence"],
|
||||
confidence_label=confidence_label(row["confidence"]),
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
primary_class=row["primary_class"] or row["type"],
|
||||
attributes=self._attributes_from_json(row["attributes"]),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -2128,6 +2304,8 @@ class RegistryStore:
|
||||
outputs=json.loads(row["outputs"]),
|
||||
confidence=row["confidence"],
|
||||
confidence_label=confidence_label(row["confidence"]),
|
||||
primary_class=row["primary_class"] or "capability",
|
||||
attributes=self._attributes_from_json(row["attributes"]),
|
||||
features=features_by_capability.get(row["id"], []),
|
||||
evidence=evidence_by_capability.get(row["id"], []),
|
||||
)
|
||||
@@ -2140,6 +2318,8 @@ class RegistryStore:
|
||||
description=row["description"],
|
||||
confidence=row["confidence"],
|
||||
confidence_label=confidence_label(row["confidence"]),
|
||||
primary_class=row["primary_class"] or "ability",
|
||||
attributes=self._attributes_from_json(row["attributes"]),
|
||||
capabilities=capabilities_by_ability.get(row["id"], []),
|
||||
)
|
||||
for row in ability_rows
|
||||
@@ -2578,6 +2758,17 @@ class RegistryStore:
|
||||
]
|
||||
)
|
||||
|
||||
def _attributes_to_json(self, attributes: list[str]) -> str:
|
||||
return json.dumps([item.strip() for item in attributes if item.strip()])
|
||||
|
||||
def _attributes_from_json(self, value: str) -> list[str]:
|
||||
if not value:
|
||||
return []
|
||||
parsed = json.loads(value)
|
||||
if not isinstance(parsed, list):
|
||||
return []
|
||||
return [str(item) for item in parsed if str(item).strip()]
|
||||
|
||||
def _source_refs_from_json(self, value: str) -> list[SourceReference]:
|
||||
return [
|
||||
SourceReference(
|
||||
|
||||
Reference in New Issue
Block a user