diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eae33c..504e4da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,29 @@ ------------------------------------------------------------------------------ +## Version 0.5.2 + +Date: 23/02/2022 +Contributors: @RMeli + +### Fixed + +* Inconsistent number of nodes with `graph-tool` for disconnected graphs [PR #61 | @RMeli] + +### Improved + +* Support for more types of node properties (including strings) with `graph-tool` [PR #64 | @RMeli] + +### Changed + +* `ValueError` exception into `NonIsomorphicGraphs(ValueError)` exception [PR #65 | @RMeli] + +### Added + +* Warning for disconnected graphs [PR #61| @RMeli] + +------------------------------------------------------------------------------ + ## Version 0.5.1 Date: 21/09/2021 @@ -10,16 +33,16 @@ Contributors: @RMeli ### Fixed -* Fixed wrong covalent radius in `graph.adjacency_matrix_from_atomic_coordinates()` [PR #58 | @RMeli] +* Wrong covalent radius in `graph.adjacency_matrix_from_atomic_coordinates()` [PR #58 | @RMeli] ### Added -* Added [pre-commit](https://pre-commit.com/) configuration file [PR #57 | @RMeli] -* Added support for gzip-compressed files (`.gz`) [PR #56 | @RMeli] +* [pre-commit](https://pre-commit.com/) configuration file [PR #57 | @RMeli] +* Support for gzip-compressed files (`.gz`) [PR #56 | @RMeli] ### Removed -* Removed dependency `QCElemental` [PR #58 | @RMeli] +* Dependency `QCElemental` [PR #58 | @RMeli] ------------------------------------------------------------------------------ @@ -31,15 +54,15 @@ Contributors: @RMeli ### Added -* Added `molecule.Molecule` constructor from RDKit molecule [PR #50 | @RMeli] -* Added `molecule.Molecule` constructor from Open Babel molecule [PR #50 | @RMeli] -* Added `--n-tests` option for `pytest` [PR #44 | @RMeli] +* `molecule.Molecule` constructor from RDKit molecule [PR #50 | @RMeli] +* `molecule.Molecule` constructor from Open Babel molecule [PR #50 | @RMeli] +* `--n-tests` option for `pytest` [PR #44 | @RMeli] ### Improved -* Improved `spyrmsd.rmsdwrapper` to deal with single molecule [PR #51 | @RMeli] -* Improved issue template [PR #46 | @RMeli] -* Improved speed of computation of squared pairwise distances [PR #45 | @RMeli] +* `spyrmsd.rmsdwrapper` to deal with single molecule [PR #51 | @RMeli] +* Issue template [PR #46 | @RMeli] +* Speed of computation of squared pairwise distances [PR #45 | @RMeli] ### Changed @@ -50,8 +73,8 @@ Contributors: @RMeli ### Removed -* Removed `spyrmsd` module [PR #52 | @RMeli] -* Removed Travis CI and AppVeyor bindings [PR #44 | @RMeli] -* Removed `--long` option for `pytest` [PR #44 | @RMeli] +* `spyrmsd` module [PR #52 | @RMeli] +* Travis CI and AppVeyor bindings [PR #44 | @RMeli] +* `--long` option for `pytest` [PR #44 | @RMeli] ------------------------------------------------------------------------------ diff --git a/README.md b/README.md index 8884ad2..9acddaf 100644 --- a/README.md +++ b/README.md @@ -122,14 +122,16 @@ The function `rmsd.rmsd` computes RMSD without symmetry correction. The atoms a def rmsd( coords1: np.ndarray, # Coordinates of molecule 1 coords2: np.ndarray, # Coordinates of molecule 2 - atomicn1: np.ndarray, # Atomic number of molecule 1 - atomicn2: np.ndarray, # Atomic number of molecule 2 + aprops1: np.ndarray, # Atomic properties of molecule 1 + aprops2: np.ndarray, # Atomic properties of molecule 2 center: bool = False, # Flag to center molecules at origin minimize: bool = False, # Flag to compute minimum RMSD atol: float = 1e-9, # Numerical tolerance for QCP method ) ``` +Note: atomic properties (`aprops`) can be any Python object when using [NetworkX](https://networkx.github.io/), or integers, floats, or strings when using [graph-tool](https://graph-tool.skewed.de/). + #### Symmetry-Corrected RMSD The function `rmsd.symmrmsd` computes symmetry-corrected RMSD using molecular graph isomorphisms. Symmetry correction requires molecular adjacency matrices describing the connectivity but needs not the atoms to be in the same order. @@ -140,8 +142,8 @@ Atom matching is performed according to the molecular graph. This function shoul def symmrmsd( coordsref: np.ndarray, # Reference coordinated coords: Union[np.ndarray, List[np.ndarray]], # Coordinates (one set or multiple sets) - atomicnumsref: np.ndarray, # Reference atomic numbers - atomicnums: np.ndarray, # Atomic numbers + apropsref: np.ndarray, # Reference atomic properties + aprops: np.ndarray, # Atomic properties amref: np.ndarray, # Reference adjacency matrix am: np.ndarray, # Adjacency matrix center: bool = False, # Flag to center molecules at origin @@ -151,6 +153,8 @@ def symmrmsd( ) ``` +Note: atomic properties (`aprops`) can be any Python object when using [NetworkX](https://networkx.github.io/), or integers, floats, or strings when using [graph-tool](https://graph-tool.skewed.de/). + ## Development To ensure code quality and consistency the following tools are used during development: diff --git a/docs/source/api/spyrmsd.exceptions.rst b/docs/source/api/spyrmsd.exceptions.rst new file mode 100644 index 0000000..1c2b6bc --- /dev/null +++ b/docs/source/api/spyrmsd.exceptions.rst @@ -0,0 +1,7 @@ +spyrmsd.exceptions module +========================= + +.. automodule:: spyrmsd.exceptions + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/spyrmsd.rst b/docs/source/api/spyrmsd.rst index 06cde76..706cd7c 100644 --- a/docs/source/api/spyrmsd.rst +++ b/docs/source/api/spyrmsd.rst @@ -23,6 +23,7 @@ Submodules spyrmsd.constants spyrmsd.due + spyrmsd.exceptions spyrmsd.graph spyrmsd.hungarian spyrmsd.io diff --git a/spyrmsd/exceptions.py b/spyrmsd/exceptions.py new file mode 100644 index 0000000..cf01f7b --- /dev/null +++ b/spyrmsd/exceptions.py @@ -0,0 +1,6 @@ +class NonIsomorphicGraphs(ValueError): + """ + Raised when graphs are not isomorphic + """ + + pass diff --git a/spyrmsd/graph.py b/spyrmsd/graph.py index 4bb897f..0235a25 100644 --- a/spyrmsd/graph.py +++ b/spyrmsd/graph.py @@ -40,15 +40,15 @@ def adjacency_matrix_from_atomic_coordinates( - atomicnums: np.ndarray, coordinates: np.ndarray + aprops: np.ndarray, coordinates: np.ndarray ) -> np.ndarray: """ Compute adjacency matrix from atomic coordinates. Parameters ---------- - atomicnums: numpy.ndarray - Atomic numbers + aprops: numpy.ndarray + Atomic properties coordinates: numpy.ndarray Atomic coordinates @@ -74,17 +74,17 @@ def adjacency_matrix_from_atomic_coordinates( (1991). """ - n = len(atomicnums) + n = len(aprops) assert coordinates.shape == (n, 3) A = np.zeros((n, n)) for i in range(n): - r_i = constants.anum_to_covalentradius[atomicnums[i]] + r_i = constants.anum_to_covalentradius[aprops[i]] for j in range(i + 1, n): - r_j = constants.anum_to_covalentradius[atomicnums[j]] + r_j = constants.anum_to_covalentradius[aprops[j]] distance = np.sqrt(np.sum((coordinates[i] - coordinates[j]) ** 2)) diff --git a/spyrmsd/graphs/_common.py b/spyrmsd/graphs/_common.py new file mode 100644 index 0000000..2a323ac --- /dev/null +++ b/spyrmsd/graphs/_common.py @@ -0,0 +1,8 @@ +warn_disconnected_graph: str = "Disconnected graph detected. Is this expected?" +warn_no_atomic_properties: str = ( + "No atomic property information stored on nodes. Node matching is not performed..." +) + +error_non_isomorphic_graphs: str = ( + "Graphs are not isomorphic.\nMake sure graphs have the same connectivity." +) diff --git a/spyrmsd/graphs/gt.py b/spyrmsd/graphs/gt.py index 8e725bb..a584c35 100644 --- a/spyrmsd/graphs/gt.py +++ b/spyrmsd/graphs/gt.py @@ -5,10 +5,53 @@ import numpy as np from graph_tool import generation, topology +from spyrmsd.exceptions import NonIsomorphicGraphs +from spyrmsd.graphs._common import ( + error_non_isomorphic_graphs, + warn_disconnected_graph, + warn_no_atomic_properties, +) + + +# TODO: Implement all graph-tool supported types +def _c_type(numpy_dtype): + """ + Get C type compatible with graph-tool from numpy dtype + + Parameters + ---------- + numpy_dtype: np.dtype + Numpy dtype + + Returns + ------- + str + C type + + Raises + ------ + ValueError + If the data type is not supported + + Notes + ----- + https://graph-tool.skewed.de/static/doc/quickstart.html#sec-property-maps + """ + name: str = numpy_dtype.name + + if "int" in name: + return "int" + elif "float" in name: + return "double" + elif "str" in name: + return "string" + else: + raise ValueError(f"Unsupported property type: {name}") + def graph_from_adjacency_matrix( adjacency_matrix: Union[np.ndarray, List[List[int]]], - atomicnums: Optional[Union[np.ndarray, List[int]]] = None, + aprops: Optional[Union[np.ndarray, List[Any]]] = None, ): """ Graph from adjacency matrix. @@ -17,8 +60,8 @@ def graph_from_adjacency_matrix( ---------- adjacency_matrix: Union[np.ndarray, List[List[int]]] Adjacency matrix - atomicnums: Union[np.ndarray, List[int]], optional - Atomic numbers + aprops: Union[np.ndarray, List[Any]], optional + Atomic properties Returns ------- @@ -33,13 +76,27 @@ def graph_from_adjacency_matrix( # Get upper triangular adjacency matrix adj = np.triu(adjacency_matrix) + assert adj.shape[0] == adj.shape[1] + num_vertices = adj.shape[0] + G = gt.Graph(directed=False) + G.add_vertex(n=num_vertices) G.add_edge_list(np.transpose(adj.nonzero())) - if atomicnums is not None: - vprop = G.new_vertex_property("short") # Create property map (of C type short) - vprop.a = atomicnums # Assign atomic numbers to property map array - G.vertex_properties["atomicnum"] = vprop # Set property map + # Check if graph is connected, for warning + cc, _ = topology.label_components(G) + if set(cc.a) != {0}: + warnings.warn(warn_disconnected_graph) + + if aprops is not None: + if not isinstance(aprops, np.ndarray): + aprops = np.array(aprops) + + assert aprops.shape[0] == num_vertices + + ptype: str = _c_type(aprops.dtype) # Get C type + vprop = G.new_vertex_property(ptype, vals=aprops) # Create property map + G.vertex_properties["aprops"] = vprop # Set property map return G @@ -62,7 +119,7 @@ def match_graphs(G1, G2) -> List[Tuple[List[int], List[int]]]: Raises ------ - ValueError + NonIsomorphicGraphs If the graphs `G1` and `G2` are not isomorphic """ @@ -71,26 +128,19 @@ def match_graphs(G1, G2) -> List[Tuple[List[int], List[int]]]: G1, G2, vertex_label=( - G1.vertex_properties["atomicnum"], - G2.vertex_properties["atomicnum"], + G1.vertex_properties["aprops"], + G2.vertex_properties["aprops"], ), subgraph=False, ) - except KeyError: # No "atomicnum" vertex property - warnings.warn( - "No atomic number information stored on nodes. " - + "Node matching is not performed..." - ) + except KeyError: + warnings.warn(warn_no_atomic_properties) maps = topology.subgraph_isomorphism(G1, G2, subgraph=False) # Check if graphs are actually isomorphic if len(maps) == 0: - # TODO: Create a new exception - raise ValueError( - "Graphs are not isomorphic." - "\nMake sure graphs have the same connectivity." - ) + raise NonIsomorphicGraphs(error_non_isomorphic_graphs) n = num_vertices(G1) diff --git a/spyrmsd/graphs/nx.py b/spyrmsd/graphs/nx.py index 64675bf..c1cf7ed 100644 --- a/spyrmsd/graphs/nx.py +++ b/spyrmsd/graphs/nx.py @@ -4,10 +4,17 @@ import networkx as nx import numpy as np +from spyrmsd.exceptions import NonIsomorphicGraphs +from spyrmsd.graphs._common import ( + error_non_isomorphic_graphs, + warn_disconnected_graph, + warn_no_atomic_properties, +) + def graph_from_adjacency_matrix( adjacency_matrix: Union[np.ndarray, List[List[int]]], - atomicnums: Optional[Union[np.ndarray, List[int]]] = None, + aprops: Optional[Union[np.ndarray, List[Any]]] = None, ) -> nx.Graph: """ Graph from adjacency matrix. @@ -16,8 +23,8 @@ def graph_from_adjacency_matrix( ---------- adjacency_matrix: Union[np.ndarray, List[List[int]]] Adjacency matrix - atomicnums: Union[np.ndarray, List[int]], optional - Atomic numbers + aprops: Union[np.ndarray, List[Any]], optional + Atomic properties Returns ------- @@ -31,9 +38,12 @@ def graph_from_adjacency_matrix( G = nx.Graph(adjacency_matrix) - if atomicnums is not None: - attributes = {idx: atomicnum for idx, atomicnum in enumerate(atomicnums)} - nx.set_node_attributes(G, attributes, "atomicnum") + if not nx.is_connected(G): + warnings.warn(warn_disconnected_graph) + + if aprops is not None: + attributes = {idx: aprops for idx, aprops in enumerate(aprops)} + nx.set_node_attributes(G, attributes, "aprops") return G @@ -56,38 +66,34 @@ def match_graphs(G1, G2) -> List[Tuple[List[int], List[int]]]: Raises ------ - ValueError + NonIsomorphicGraphs If the graphs `G1` and `G2` are not isomorphic """ - def match_atomicnum(node1, node2): - return node1["atomicnum"] == node2["atomicnum"] + def match_aprops(node1, node2): + """ + Check if atomic properties for two nodes match. + """ + return node1["aprops"] == node2["aprops"] if ( - nx.get_node_attributes(G1, "atomicnum") == {} - or nx.get_node_attributes(G2, "atomicnum") == {} + nx.get_node_attributes(G1, "aprops") == {} + or nx.get_node_attributes(G2, "aprops") == {} ): # Nodes without atomic number information # No node-matching check node_match = None - warnings.warn( - "No atomic number information stored on nodes. " - + "Node matching is not performed..." - ) + warnings.warn(warn_no_atomic_properties) else: - node_match = match_atomicnum + node_match = match_aprops GM = nx.algorithms.isomorphism.GraphMatcher(G1, G2, node_match) # Check if graphs are actually isomorphic if not GM.is_isomorphic(): - # TODO: Create a new exception - raise ValueError( - "Graphs are not isomorphic." - "\nMake sure graphs have the same connectivity." - ) + raise NonIsomorphicGraphs(error_non_isomorphic_graphs) return [ (list(isomorphism.keys()), list(isomorphism.values())) diff --git a/spyrmsd/hungarian.py b/spyrmsd/hungarian.py index 2258801..412e34a 100644 --- a/spyrmsd/hungarian.py +++ b/spyrmsd/hungarian.py @@ -61,7 +61,7 @@ def optimal_assignment(A: np.ndarray, B: np.ndarray): def hungarian_rmsd( - A: np.ndarray, B: np.ndarray, atomicnumsA: np.ndarray, atomicnumsB: np.ndarray + A: np.ndarray, B: np.ndarray, apropsA: np.ndarray, apropsB: np.ndarray ) -> float: """ Solve the optimal assignment problems between atomic coordinates of @@ -73,10 +73,10 @@ def hungarian_rmsd( Atomic coordinates of molecule A B: numpy.ndarray Atomic coordinates of molecule B - atomicnumsA: numpy.ndarray - Atomic numbers of molecule A - atomicnumsB: numpy.ndarray - Atomic numbers of molecule B + apropsA: numpy.ndarray + Atomic properties of molecule A + apropsB: numpy.ndarray + Atomic properties of molecule B Returns ------- @@ -96,19 +96,19 @@ def hungarian_rmsd( """ assert A.shape == B.shape - assert atomicnumsA.shape == atomicnumsB.shape + assert apropsA.shape == apropsB.shape - elements = set(atomicnumsA) + elements = set(apropsA) total_cost: float = 0.0 for t in elements: - atomicnumsA_idx = atomicnumsA == t - atomicnumsB_idx = atomicnumsB == t + apropsA_idx = apropsA == t + apropsB_idx = apropsB == t - assert atomicnumsA_idx.shape == atomicnumsB_idx.shape + assert apropsA_idx.shape == apropsB_idx.shape cost, row_idx, col_idx = optimal_assignment( - A[atomicnumsA_idx, :], B[atomicnumsB_idx, :] + A[apropsA_idx, :], B[apropsB_idx, :] ) total_cost += cost diff --git a/spyrmsd/rmsd.py b/spyrmsd/rmsd.py index ead85d3..1e417f8 100644 --- a/spyrmsd/rmsd.py +++ b/spyrmsd/rmsd.py @@ -117,8 +117,8 @@ def hrmsd( def _rmsd_isomorphic_core( coords1: np.ndarray, coords2: np.ndarray, - atomicnums1: np.ndarray, - atomicnums2: np.ndarray, + aprops1: np.ndarray, + aprops2: np.ndarray, am1: np.ndarray, am2: np.ndarray, center: bool = False, @@ -135,10 +135,10 @@ def _rmsd_isomorphic_core( Coordinate of molecule 1 coords2: np.ndarray Coordinates of molecule 2 - atomicnums1: npndarray - Atomic numbers for molecule 1 - atomicnums2: npndarray - Atomic numbers for molecule 2 + aprops1: np.ndarray + Atomic properties for molecule 1 + aprops2: np.ndarray + Atomic properties for molecule 2 am1: np.ndarray Adjacency matrix for molecule 1 am2: np.ndarray @@ -169,8 +169,8 @@ def _rmsd_isomorphic_core( # No cached isomorphisms if isomorphisms is None: # Convert molecules to graphs - G1 = graph.graph_from_adjacency_matrix(am1, atomicnums1) - G2 = graph.graph_from_adjacency_matrix(am2, atomicnums2) + G1 = graph.graph_from_adjacency_matrix(am1, aprops1) + G2 = graph.graph_from_adjacency_matrix(am2, aprops2) # Get all the possible graph isomorphisms isomorphisms = graph.match_graphs(G1, G2) @@ -207,8 +207,8 @@ def _rmsd_isomorphic_core( def symmrmsd( coordsref: np.ndarray, coords: Union[np.ndarray, List[np.ndarray]], - atomicnumsref: np.ndarray, - atomicnums: np.ndarray, + apropsref: np.ndarray, + aprops: np.ndarray, amref: np.ndarray, am: np.ndarray, center: bool = False, @@ -225,10 +225,10 @@ def symmrmsd( Coordinate of reference molecule coords: List[np.ndarray] Coordinates of other molecule - atomicnumsref: npndarray - Atomic numbers for reference - atomicnums: npndarray - Atomic numbers for other molecule + apropsref: np.ndarray + Atomic properties for reference + aprops: np.ndarray + Atomic properties for other molecule amref: np.ndarray Adjacency matrix for reference molecule am: np.ndarray @@ -271,8 +271,8 @@ def symmrmsd( srmsd, isomorphism = _rmsd_isomorphic_core( coordsref, c, - atomicnumsref, - atomicnums, + apropsref, + aprops, amref, am, center=center, @@ -288,8 +288,8 @@ def symmrmsd( RMSD, isomorphism = _rmsd_isomorphic_core( coordsref, coords, - atomicnumsref, - atomicnums, + apropsref, + aprops, amref, am, center=center, diff --git a/tests/test_graph.py b/tests/test_graph.py index 4462ec7..f965804 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -2,6 +2,7 @@ import pytest from spyrmsd import constants, graph, io, molecule +from spyrmsd.exceptions import NonIsomorphicGraphs from tests import molecules @@ -97,7 +98,7 @@ def test_graph_from_adjacency_matrix_atomicnums(rawmol, mol) -> None: assert graph.num_edges(G) == nbonds for idx, atomicnum in enumerate(mol.atomicnums): - assert graph.vertex_property(G, "atomicnum", idx) == atomicnum + assert graph.vertex_property(G, "aprops", idx) == atomicnum @pytest.mark.parametrize( @@ -109,7 +110,9 @@ def test_graph_from_adjacency_matrix_atomicnums(rawmol, mol) -> None: ) def test_match_graphs_isomorphic(G1, G2) -> None: - with pytest.warns(UserWarning): + with pytest.warns( + UserWarning, match="No atomic property information stored on nodes." + ): isomorphisms = graph.match_graphs(G1, G2) assert len(isomorphisms) != 0 @@ -124,5 +127,37 @@ def test_match_graphs_isomorphic(G1, G2) -> None: ) def test_match_graphs_not_isomorphic(G1, G2) -> None: - with pytest.raises(ValueError), pytest.warns(UserWarning): + with pytest.raises( + NonIsomorphicGraphs, match="Graphs are not isomorphic." + ), pytest.warns(UserWarning, match="No atomic number information stored on nodes."): graph.match_graphs(G1, G2) + + +@pytest.mark.parametrize( + "property", + [ + np.array([0, 1, 2], dtype=int), + np.array([0.1, 1.2, 2.3], dtype=float), + np.array(["H", "H", "H"], dtype=str), + np.array(["Csp3", "Csp3", "Csp3"], dtype=str), + ["LongProperty", "LongPropertyProperty", "LongPropertyProperty"], + ], +) +def test_build_graph_node_features(property) -> None: + A = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]]) + G = graph.graph_from_adjacency_matrix(A, property) + + assert graph.num_edges(G) == 3 + + +def test_build_graph_node_features_unsupported() -> None: + pytest.importorskip( + "graph_tool", reason="NetworkX supports all Python objects as node properties." + ) + + A = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]]) + + property = [True, False, True] + + with pytest.raises(ValueError, match="Unsupported property type:"): + _ = graph.graph_from_adjacency_matrix(A, property) diff --git a/tests/test_molecule.py b/tests/test_molecule.py index 08ef107..a3ea78a 100644 --- a/tests/test_molecule.py +++ b/tests/test_molecule.py @@ -167,7 +167,7 @@ def test_graph_from_adjacency_matrix(mol: molecule.Molecule, n_bonds: int) -> No assert graph.num_edges(G) == n_bonds for idx, atomicnum in enumerate(mol.atomicnums): - assert graph.vertex_property(G, "atomicnum", idx) == atomicnum + assert graph.vertex_property(G, "aprops", idx) == atomicnum @pytest.mark.parametrize( @@ -192,7 +192,7 @@ def test_graph_from_atomic_coordinates_perception( assert graph.num_edges(G) == n_bonds for idx, atomicnum in enumerate(mol.atomicnums): - assert graph.vertex_property(G, "atomicnum", idx) == atomicnum + assert graph.vertex_property(G, "aprops", idx) == atomicnum @pytest.mark.parametrize( diff --git a/tests/test_rmsd.py b/tests/test_rmsd.py index 77eec70..6a4e8bf 100644 --- a/tests/test_rmsd.py +++ b/tests/test_rmsd.py @@ -520,6 +520,18 @@ def test_rmsd_symmrmsd(index: int, RMSD: float, minimize: bool) -> None: ) +def test_rmsd_symmrmsd_disconnected_node() -> None: + + c = np.array([[0.0, 1.0, 2.0], [1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) + a = np.array([0, 1, 4]) + + # Adjacency matrix with disconnected node + A = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 0]]) + + with pytest.warns(UserWarning, match="Disconnected graph detected."): + assert rmsd.symmrmsd(c, c, a, a, A, A) == pytest.approx(0.0, abs=1e-5) + + # Results obtained with OpenBabel @pytest.mark.parametrize( "minimize, referenceRMSDs",