Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CompiledModel class which can generate DataFrames and Tables #359

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 193 additions & 3 deletions buildingmotif/dataclasses/model.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from dataclasses import dataclass
from functools import cached_property
from typing import TYPE_CHECKING, Dict, List, Optional

import pandas as pd
import rdflib
import rdflib.query
import rfc3987
from rdflib import URIRef

from buildingmotif import get_building_motif
from buildingmotif.dataclasses.shape_collection import ShapeCollection
from buildingmotif.dataclasses.validation import ValidationContext
from buildingmotif.namespaces import OWL, A
from buildingmotif.namespaces import OWL, SH, A
from buildingmotif.utils import (
Triple,
copy_graph,
Expand Down Expand Up @@ -207,7 +210,7 @@ def validate(
self,
)

def compile(self, shape_collections: List["ShapeCollection"]):
def compile(self, shape_collections: List["ShapeCollection"]) -> "CompiledModel":
"""Compile the graph of a model against a set of ShapeCollections.

:param shape_collections: list of ShapeCollections to compile the model
Expand All @@ -225,9 +228,10 @@ def compile(self, shape_collections: List["ShapeCollection"]):

model_graph = copy_graph(self.graph).skolemize()

return shacl_inference(
compiled_graph = shacl_inference(
model_graph, ontology_graph, engine=self._bm.shacl_engine
)
return CompiledModel(self, shape_collections, compiled_graph)

def test_model_against_shapes(
self,
Expand Down Expand Up @@ -307,3 +311,189 @@ def update_manifest(self, manifest: ShapeCollection):
:type manifest: ShapeCollection
"""
self.get_manifest().graph += manifest.graph


@dataclass
class CompiledModel:
"""
This class represents a model that has been compiled against a set of ShapeCollections.
"""

model: Model
shape_collections: List[ShapeCollection]
_compiled_graph: rdflib.Graph

@cached_property
def graph(self) -> rdflib.Graph:
g = copy_graph(self._compiled_graph)
for shape_collection in self.shape_collections:
g += shape_collection.graph
return g

def get_manifest(self) -> ShapeCollection:
"""Get the manifest ShapeCollection for this model.

:return: model's shape collection
:rtype: ShapeCollection
"""
return self.model.get_manifest()

def test_model_against_shapes(
self,
shapes_to_test: List[rdflib.URIRef],
target_class: rdflib.URIRef,
) -> Dict[rdflib.URIRef, "ValidationContext"]:
"""Validates the model against a list of shapes and generates a
validation report for each.

:param shapes_to_test: list of shape URIs to validate the model against
:type shapes_to_test: List[URIRef]
:param target_class: the class upon which to run the selected shapes
:type target_class: URIRef
:return: a dictionary that relates each shape to test URIRef to a
ValidationContext
:rtype: Dict[URIRef, ValidationContext]
"""
model_graph = copy_graph(self._compiled_graph)

results = {}

targets = model_graph.query(
f"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?target
WHERE {{
?target rdf:type/rdfs:subClassOf* <{target_class}>

}}
"""
)
# skolemize the shape graph so we have consistent identifiers across
# validation through the interpretation of the validation report
ontology_graph = model_graph.skolemize()

for shape_uri in shapes_to_test:
temp_model_graph = copy_graph(model_graph)
for (s,) in targets:
temp_model_graph.add((URIRef(s), A, shape_uri))

valid, report_g, report_str = shacl_validate(
temp_model_graph, ontology_graph, engine=self.model._bm.shacl_engine
)

results[shape_uri] = ValidationContext(
self.shape_collections,
ontology_graph,
valid,
report_g,
report_str,
self.model,
)

return results

def validate(
self,
error_on_missing_imports: bool = True,
) -> "ValidationContext":
"""Validates this model against the given list of ShapeCollections.
If no list is provided, the model will be validated against the model's "manifest".
If a list of shape collections is provided, the manifest will *not* be automatically
included in the set of shape collections.

Loads all of the ShapeCollections into a single graph.

:param error_on_missing_imports: if True, raises an error if any of the dependency
ontologies are missing (i.e. they need to be loaded into BuildingMOTIF), defaults
to True
:type error_on_missing_imports: bool, optional
:return: An object containing useful properties/methods to deal with
the validation results
:rtype: ValidationContext
"""
# TODO: determine the return types; At least a bool for valid/invalid,
# but also want a report. Is this the base pySHACL report? Or a useful
# transformation, like a list of deltas for potential fixes?
shapeg = copy_graph(self._compiled_graph)
# aggregate shape graphs
for sc in self.shape_collections:
shapeg += sc.resolve_imports(
error_on_missing_imports=error_on_missing_imports
).graph
# inline sh:node for interpretability
shapeg = rewrite_shape_graph(shapeg)

# remove imports from sg
shapeg.remove((None, OWL.imports, None))

# skolemize the shape graph so we have consistent identifiers across
# validation through the interpretation of the validation report
shapeg = skolemize_shapes(shapeg)

# remove imports from data graph
shapeg.remove((None, OWL.imports, None))

# validate the data graph
valid, report_g, report_str = shacl_validate(
shapeg, engine=self.model._bm.shacl_engine
)
return ValidationContext(
self.shape_collections,
shapeg,
valid,
report_g,
report_str,
self.model,
)

def defining_shape_collection(
self, shape: rdflib.URIRef
) -> Optional[ShapeCollection]:
"""
Given a shape, return the ShapeCollection that defines it. The search is limited to the
ShapeCollections that were used to compile this model.

:param shape: the shape to search for
:type shape: rdflib.URIRef
:return: the ShapeCollection that defines the shape, or None if the shape is not defined
:rtype: Optional[ShapeCollection]
"""
for sc in self.shape_collections:
if (shape, A, SH.NodeShape) in sc.graph:
return sc
return None

def shape_to_table(self, shape: rdflib.URIRef, table: str, conn):
"""
Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a table.

:param shape: the shape to query
:type shape: rdflib.URIRef
:param table: the name of the table to store the results in
:type table: str
:param conn: the connection to the database
:type conn: sqlalchemy.engine.base.Connection
"""
metadata = self.shape_to_df(shape)
metadata.to_sql(table, conn, if_exists="replace", index=False)

def shape_to_df(self, shape: rdflib.URIRef):
"""
Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a dataframe.

:param shape: the shape to query
:type shape: rdflib.URIRef
:return: the results of the query
:rtype: pd.DataFrame
"""
defining_sc = self.defining_shape_collection(shape)
if defining_sc is None:
raise ValueError(
f"Shape {shape} is not defined in any of the shape collections"
)
query = defining_sc.shape_to_query(shape)
metadata = pd.DataFrame(
self._compiled_graph.query(query).bindings, dtype="string"
)
return metadata
5 changes: 3 additions & 2 deletions buildingmotif/dataclasses/shape_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,11 @@ def gensym():

for pshape in graph.objects(shape, SH.property):
# get the varname if we've already assigned one for this pshape above,
# or generate a new one. When generating a name, use the SH.name field
# or generate a new one. When generating a name, use the SH.name|RDFS.label field
# in the PropertyShape or generate a unique one
name = pshape_vars.get(
pshape, f"?{graph.value(pshape, SH.name) or gensym()}".replace(" ", "_")
pshape,
f"?{graph.value(pshape, SH.name|RDFS.label) or gensym()}".replace(" ", "_"),
)
path = _sh_path_to_path(graph, graph.value(pshape, SH.path))
qMinCount = graph.value(pshape, SH.qualifiedMinCount) or 0
Expand Down
Loading
Loading