From 4127e8d1b429d4eaf4b8a54530dbaecc986681cc Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 14:22:10 -0500 Subject: [PATCH 01/20] Resolve explicit Hs unrepresented in graph --- openff/toolkit/utils/rdkit_wrapper.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 4a99577a1..5c3b085b6 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1580,14 +1580,26 @@ def from_smiles( elif hydrogens_are_explicit: for atom_idx in range(rdmol.GetNumAtoms()): atom = rdmol.GetAtomWithIdx(atom_idx) + # Issue #1697: GetTotalNumHs() counts the number of hydrogens that are + # not present in the graph, so explicit hydrogens are not counted. + # For some reason this isn't the same as the number of Implicit Hydrogens + # Likely for cases where only same hydrogens are defined. if atom.GetNumImplicitHs() != 0: raise ValueError( f"'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " - f"SMILES '{smiles}' as having implicit hydrogen. If this SMILES is intended to " + f"the following SMILES as having implicit hydrogens'{smiles}'\nIf this SMILES is intended to " f"express all explicit hydrogens in the molecule, then you should construct the " f"desired molecule as an RDMol with no implicit hydrogens, and then use " f"Molecule.from_rdkit() to create the desired OFFMol." ) + elif atom.GetTotalNumHs() != 0: + raise ValueError( + "'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " + f"the following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): '{smiles}'\n" + "If this SMILES is intended to express all explicit hydrogens in the molecule, then" + " you should construct the desired molecule as an RDMol with no implicit hydrogens, " + "and then use Molecule.from_rdkit() to create the desired OFFMol." + ) molecule = self.from_rdkit( rdmol, From 5becc27a527791ad1d1b44984fa7c6c2710b8395 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:06:25 -0500 Subject: [PATCH 02/20] Add test --- openff/toolkit/_tests/test_toolkits.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index dd36b4d08..b816d78a6 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2224,7 +2224,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit(self): smiles_impl = "C#C" with pytest.raises( ValueError, - match="but RDKit toolkit interpreted SMILES 'C#C' as having implicit hydrogen", + match="but RDKit toolkit interpreted the following SMILES as having implicit hydrogens: 'C#C'", ): offmol = Molecule.from_smiles( smiles_impl, @@ -2249,6 +2249,24 @@ def test_rdkit_from_smiles_hydrogens_are_explicit(self): smiles_expl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=False ) assert offmol.n_atoms == 4 + + def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): + """ + Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with + respect to its hydrogens_are_explicit kwarg and that a CMILES entry has all hydrogens + in the graph. + """ + toolkit_wrapper = RDKitToolkitWrapper() + smiles_impl = "HC(H)(H)[NH+](H)(H)C(H)(H)H" + with pytest.raises( + ValueError, + match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): 'HC(H)(H)[NH+](H)(H)C(H)(H)H'", + ): + offmol = Molecule.from_smiles( + smiles_impl, + toolkit_registry=toolkit_wrapper, + hydrogens_are_explicit=True, + ) @pytest.mark.parametrize("molecule", get_mini_drug_bank(RDKitToolkitWrapper)) def test_to_inchi(self, molecule): From 44e580809dc7dd4ce68ca61ebac591b687e21682 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:16:38 +0000 Subject: [PATCH 03/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openff/toolkit/_tests/test_toolkits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index b816d78a6..0a5b57213 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2249,7 +2249,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit(self): smiles_expl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=False ) assert offmol.n_atoms == 4 - + def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): """ Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with From d2fa36fbcb78861c25ec7f95a005c0911506413f Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:16:52 -0500 Subject: [PATCH 04/20] black --- openff/toolkit/utils/rdkit_wrapper.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 5c3b085b6..26fe6087e 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -174,9 +174,7 @@ def from_object( obj, allow_undefined_stereo=allow_undefined_stereo, ) - raise NotImplementedError( - f"Cannot create Molecule from {type(obj)} object" - ) + raise NotImplementedError(f"Cannot create Molecule from {type(obj)} object") def from_pdb_and_smiles( self, @@ -2259,16 +2257,17 @@ def from_rdkit( frags = AllChem.GetMolFrags(rdmol) if len(frags) > 1: - warnings.warn("RDKit Molecule passed to from_rdkit consists of more than one molecule, consider running " - "rdkit.Chem.AllChem.GetMolFrags(rdmol, asMols=True) or splitting input SMILES at '.' to get " - "separate molecules and pass them to from_rdkit one at a time. While this is supported for " - "legacy reasons, OpenFF Molecule objects are not supposed to contain disconnected chemical " - "graphs and this may result in undefined behavior later on. The OpenFF ecosystem is built " - "to handle multiple molecules, but they should be in a Topology object, ex: " - "top = Topology.from_molecules([mol1, mol2])", - MultipleComponentsInMoleculeWarning, - stacklevel=2 - ) + warnings.warn( + "RDKit Molecule passed to from_rdkit consists of more than one molecule, consider running " + "rdkit.Chem.AllChem.GetMolFrags(rdmol, asMols=True) or splitting input SMILES at '.' to get " + "separate molecules and pass them to from_rdkit one at a time. While this is supported for " + "legacy reasons, OpenFF Molecule objects are not supposed to contain disconnected chemical " + "graphs and this may result in undefined behavior later on. The OpenFF ecosystem is built " + "to handle multiple molecules, but they should be in a Topology object, ex: " + "top = Topology.from_molecules([mol1, mol2])", + MultipleComponentsInMoleculeWarning, + stacklevel=2, + ) if not hydrogens_are_explicit: rdmol = Chem.AddHs(rdmol, addCoords=True) From 3f0826b87b00f430a9aa2fcbd1fd249a5d3e6dd9 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:21:34 -0500 Subject: [PATCH 05/20] changelog --- docs/releasehistory.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index eef753aaa..51c560451 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -14,7 +14,8 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Behavior changes ### Bugfixes -- [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) +- [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) +- [PR #1983](https://github.com/openforcefield/openff-toolkit/pull/1983): Fixes issue with implicit hydrogens slipping though checks to result in radicals later. [Issue #1696](https://github.com/openforcefield/openff-toolkit/issues/1696) and [Issue #1697](https://github.com/openforcefield/openff-toolkit/issues/1697) ### New features From 9d3c9e813559edbcd6a2055319206cefd6979002 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:24:23 -0500 Subject: [PATCH 06/20] pre-commit --- openff/toolkit/_tests/test_toolkits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index 0a5b57213..bc06924c4 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2262,7 +2262,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): ValueError, match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): 'HC(H)(H)[NH+](H)(H)C(H)(H)H'", ): - offmol = Molecule.from_smiles( + _ = Molecule.from_smiles( smiles_impl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=True, From d5994d4cf0267cff8f279b2c4c1a4c99e5f235a8 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:34:23 -0500 Subject: [PATCH 07/20] Fix Test --- openff/toolkit/_tests/test_toolkits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index bc06924c4..5b476e523 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2257,7 +2257,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): in the graph. """ toolkit_wrapper = RDKitToolkitWrapper() - smiles_impl = "HC(H)(H)[NH+](H)(H)C(H)(H)H" + smiles_impl = "HC(H)(H)[NH+](H)C(H)(H)H" with pytest.raises( ValueError, match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): 'HC(H)(H)[NH+](H)(H)C(H)(H)H'", From 66bd7fd7459ec90d35b537fcdc49517463569be7 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:43:39 -0500 Subject: [PATCH 08/20] fix test --- openff/toolkit/_tests/test_toolkits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index 5b476e523..97ee4b6aa 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2257,7 +2257,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): in the graph. """ toolkit_wrapper = RDKitToolkitWrapper() - smiles_impl = "HC(H)(H)[NH+](H)C(H)(H)H" + smiles_impl = "[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]" with pytest.raises( ValueError, match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): 'HC(H)(H)[NH+](H)(H)C(H)(H)H'", From bc33ab0efa13da68a693431bdb0a8b6400a81954 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 15:54:38 -0500 Subject: [PATCH 09/20] Fix test --- openff/toolkit/utils/rdkit_wrapper.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 26fe6087e..821c0602a 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -174,7 +174,9 @@ def from_object( obj, allow_undefined_stereo=allow_undefined_stereo, ) - raise NotImplementedError(f"Cannot create Molecule from {type(obj)} object") + raise NotImplementedError( + f"Cannot create Molecule from {type(obj)} object" + ) def from_pdb_and_smiles( self, @@ -1585,8 +1587,8 @@ def from_smiles( if atom.GetNumImplicitHs() != 0: raise ValueError( f"'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " - f"the following SMILES as having implicit hydrogens'{smiles}'\nIf this SMILES is intended to " - f"express all explicit hydrogens in the molecule, then you should construct the " + f"SMILES '{smiles}' as having implicit hydrogen. If this SMILES is intended to " + f" express all explicit hydrogens in the molecule, then you should construct the " f"desired molecule as an RDMol with no implicit hydrogens, and then use " f"Molecule.from_rdkit() to create the desired OFFMol." ) From 8ae44c0a81e5db8a2ad594f8822b563d0aed79db Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 17:19:39 -0500 Subject: [PATCH 10/20] Fix test --- openff/toolkit/_tests/test_toolkits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index 97ee4b6aa..fb92194a5 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2260,7 +2260,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): smiles_impl = "[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]" with pytest.raises( ValueError, - match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): 'HC(H)(H)[NH+](H)(H)C(H)(H)H'", + match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): '[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]'", ): _ = Molecule.from_smiles( smiles_impl, From 34982e51a51c960c751aafe0216e1d8905382957 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 17:44:24 -0500 Subject: [PATCH 11/20] Fix test regex --- openff/toolkit/_tests/test_toolkits.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index fb92194a5..8f02197dc 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2224,7 +2224,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit(self): smiles_impl = "C#C" with pytest.raises( ValueError, - match="but RDKit toolkit interpreted the following SMILES as having implicit hydrogens: 'C#C'", + match="but RDKit toolkit interpreted SMILES 'C#C' as having implicit hydrogen", ): offmol = Molecule.from_smiles( smiles_impl, @@ -2260,7 +2260,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): smiles_impl = "[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]" with pytest.raises( ValueError, - match="following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): '[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]'", + match="the following SMILES as having some nonexplicit hydrogens", ): _ = Molecule.from_smiles( smiles_impl, From ea84b13bba99232585ba152ceed5e2cb4daeba2a Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 18:23:49 -0500 Subject: [PATCH 12/20] Update error message --- openff/toolkit/utils/rdkit_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 821c0602a..645f6e498 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1588,7 +1588,7 @@ def from_smiles( raise ValueError( f"'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " f"SMILES '{smiles}' as having implicit hydrogen. If this SMILES is intended to " - f" express all explicit hydrogens in the molecule, then you should construct the " + f"express all explicit hydrogens in the molecule, then you should construct the " f"desired molecule as an RDMol with no implicit hydrogens, and then use " f"Molecule.from_rdkit() to create the desired OFFMol." ) From 10171523656c75458575a6a841594190a7cc1ec3 Mon Sep 17 00:00:00 2001 From: "Jennifer A. Clark" Date: Thu, 12 Dec 2024 18:34:18 -0500 Subject: [PATCH 13/20] Undo unwarranted change --- openff/toolkit/utils/rdkit_wrapper.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 645f6e498..52bddaa63 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -2259,17 +2259,16 @@ def from_rdkit( frags = AllChem.GetMolFrags(rdmol) if len(frags) > 1: - warnings.warn( - "RDKit Molecule passed to from_rdkit consists of more than one molecule, consider running " - "rdkit.Chem.AllChem.GetMolFrags(rdmol, asMols=True) or splitting input SMILES at '.' to get " - "separate molecules and pass them to from_rdkit one at a time. While this is supported for " - "legacy reasons, OpenFF Molecule objects are not supposed to contain disconnected chemical " - "graphs and this may result in undefined behavior later on. The OpenFF ecosystem is built " - "to handle multiple molecules, but they should be in a Topology object, ex: " - "top = Topology.from_molecules([mol1, mol2])", - MultipleComponentsInMoleculeWarning, - stacklevel=2, - ) + warnings.warn("RDKit Molecule passed to from_rdkit consists of more than one molecule, consider running " + "rdkit.Chem.AllChem.GetMolFrags(rdmol, asMols=True) or splitting input SMILES at '.' to get " + "separate molecules and pass them to from_rdkit one at a time. While this is supported for " + "legacy reasons, OpenFF Molecule objects are not supposed to contain disconnected chemical " + "graphs and this may result in undefined behavior later on. The OpenFF ecosystem is built " + "to handle multiple molecules, but they should be in a Topology object, ex: " + "top = Topology.from_molecules([mol1, mol2])", + MultipleComponentsInMoleculeWarning, + stacklevel=2 + ) if not hydrogens_are_explicit: rdmol = Chem.AddHs(rdmol, addCoords=True) From f605add0e9c7c082adcd4a2ba4cbcc0063b3c045 Mon Sep 17 00:00:00 2001 From: Jennifer Clark Date: Tue, 7 Jan 2025 11:22:39 -0500 Subject: [PATCH 14/20] Update docstrings and comments, replace GetTotalNumHs() with GetNumExplicitHs() --- docs/releasehistory.md | 3 ++- openff/toolkit/_tests/test_toolkits.py | 4 ++++ openff/toolkit/utils/rdkit_wrapper.py | 12 +++++++----- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index 784d5e42b..63441e3c2 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -16,6 +16,7 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Bugfixes - [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) +- [PR #1983](https://github.com/openforcefield/openff-toolkit/pull/1983): Fixes issue with implicit hydrogens slipping though checks to result in radicals later. [Issue #936](https://github.com/openforcefield/openff-toolkit/issues/936), [Issue #1696](https://github.com/openforcefield/openff-toolkit/issues/1696), and [Issue #1697](https://github.com/openforcefield/openff-toolkit/issues/1697) ### Performance improvements @@ -30,8 +31,8 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ## 0.16.7 ### Bugfixes + - [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) -- [PR #1983](https://github.com/openforcefield/openff-toolkit/pull/1983): Fixes issue with implicit hydrogens slipping though checks to result in radicals later. [Issue #1696](https://github.com/openforcefield/openff-toolkit/issues/1696) and [Issue #1697](https://github.com/openforcefield/openff-toolkit/issues/1697) ### Tests updated diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index 8f02197dc..c443b5522 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2255,6 +2255,10 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with respect to its hydrogens_are_explicit kwarg and that a CMILES entry has all hydrogens in the graph. + + `Issue #936 `, + `Issue #1696 `, + `Issue #1697 ` """ toolkit_wrapper = RDKitToolkitWrapper() smiles_impl = "[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]" diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 52bddaa63..adae10137 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1580,10 +1580,6 @@ def from_smiles( elif hydrogens_are_explicit: for atom_idx in range(rdmol.GetNumAtoms()): atom = rdmol.GetAtomWithIdx(atom_idx) - # Issue #1697: GetTotalNumHs() counts the number of hydrogens that are - # not present in the graph, so explicit hydrogens are not counted. - # For some reason this isn't the same as the number of Implicit Hydrogens - # Likely for cases where only same hydrogens are defined. if atom.GetNumImplicitHs() != 0: raise ValueError( f"'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " @@ -1592,7 +1588,13 @@ def from_smiles( f"desired molecule as an RDMol with no implicit hydrogens, and then use " f"Molecule.from_rdkit() to create the desired OFFMol." ) - elif atom.GetTotalNumHs() != 0: + # Issue #936, #1696, #1697: GetNumExplicitHs() counts the number of hydrogens that are + # explicitly in the string but not present in the graph. This criteria would apply to + # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. + # https://www.rdkit.org/docs/Cookbook.html + # This method is also used in GetTotalNumHs() = GetNumImplicitHs() + GetNumExplicitHs() + # + Optional number of neighboring hydrogens in the molecular graph. + elif atom.GetNumExplicitHs() != 0: raise ValueError( "'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " f"the following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): '{smiles}'\n" From 66c1c8b7a1894a16f0e0020ec0f838f4026d0725 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 16:23:41 +0000 Subject: [PATCH 15/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openff/toolkit/utils/rdkit_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index adae10137..9e8c5da9b 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1592,7 +1592,7 @@ def from_smiles( # explicitly in the string but not present in the graph. This criteria would apply to # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. # https://www.rdkit.org/docs/Cookbook.html - # This method is also used in GetTotalNumHs() = GetNumImplicitHs() + GetNumExplicitHs() + # This method is also used in GetTotalNumHs() = GetNumImplicitHs() + GetNumExplicitHs() # + Optional number of neighboring hydrogens in the molecular graph. elif atom.GetNumExplicitHs() != 0: raise ValueError( From acef5f781483bf40deb61fa050a83da6edb0e203 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Tue, 7 Jan 2025 11:28:22 -0500 Subject: [PATCH 16/20] Remove reference to issue 1697 --- docs/releasehistory.md | 2 +- openff/toolkit/_tests/test_toolkits.py | 3 +-- openff/toolkit/utils/rdkit_wrapper.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index 63441e3c2..18cf8b10e 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -16,7 +16,7 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Bugfixes - [PR #1971](https://github.com/openforcefield/openff-toolkit/pull/1971): Fixes bug where OpenEyeToolkitWrapper would write coordinate-less PDB atoms if insertion_code or chain_id was an empty string ([Issue #1967](https://github.com/openforcefield/openff-toolkit/issues/1967)) -- [PR #1983](https://github.com/openforcefield/openff-toolkit/pull/1983): Fixes issue with implicit hydrogens slipping though checks to result in radicals later. [Issue #936](https://github.com/openforcefield/openff-toolkit/issues/936), [Issue #1696](https://github.com/openforcefield/openff-toolkit/issues/1696), and [Issue #1697](https://github.com/openforcefield/openff-toolkit/issues/1697) +- [PR #1983](https://github.com/openforcefield/openff-toolkit/pull/1983): Fixes issue with implicit hydrogens slipping though checks to result in radicals later. [Issue #936](https://github.com/openforcefield/openff-toolkit/issues/936) and [Issue #1696](https://github.com/openforcefield/openff-toolkit/issues/1696) ### Performance improvements diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index c443b5522..1a2cf13ce 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2257,8 +2257,7 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): in the graph. `Issue #936 `, - `Issue #1696 `, - `Issue #1697 ` + `Issue #1696 `, """ toolkit_wrapper = RDKitToolkitWrapper() smiles_impl = "[H][C]([H])([H])[NH+]([H])[C]([H])([H])[H]" diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 9e8c5da9b..2109a2e8e 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1588,7 +1588,7 @@ def from_smiles( f"desired molecule as an RDMol with no implicit hydrogens, and then use " f"Molecule.from_rdkit() to create the desired OFFMol." ) - # Issue #936, #1696, #1697: GetNumExplicitHs() counts the number of hydrogens that are + # Issue #936 and #1696: GetNumExplicitHs() counts the number of hydrogens that are # explicitly in the string but not present in the graph. This criteria would apply to # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. # https://www.rdkit.org/docs/Cookbook.html From 5958e00007b3505c92fa3d597a85dee54536906e Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Tue, 7 Jan 2025 11:46:42 -0500 Subject: [PATCH 17/20] ruff --- openff/toolkit/_tests/test_toolkits.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index 1a2cf13ce..8ec0d5ba7 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -2255,8 +2255,8 @@ def test_rdkit_from_smiles_hydrogens_are_explicit_and_in_graph(self): Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with respect to its hydrogens_are_explicit kwarg and that a CMILES entry has all hydrogens in the graph. - - `Issue #936 `, + + `Issue #936 `, `Issue #1696 `, """ toolkit_wrapper = RDKitToolkitWrapper() From 29cd940d6a6d0e97da3ec3f73071212e596960b8 Mon Sep 17 00:00:00 2001 From: Jennifer A Clark Date: Tue, 7 Jan 2025 13:40:34 -0500 Subject: [PATCH 18/20] Update openff/toolkit/utils/rdkit_wrapper.py Co-authored-by: Jeff Wagner --- openff/toolkit/utils/rdkit_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 2109a2e8e..3fcf30335 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1591,7 +1591,7 @@ def from_smiles( # Issue #936 and #1696: GetNumExplicitHs() counts the number of hydrogens that are # explicitly in the string but not present in the graph. This criteria would apply to # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. - # https://www.rdkit.org/docs/Cookbook.html + # https://www.rdkit.org/docs/Cookbook.html#explicit-valence-and-number-of-hydrogens # This method is also used in GetTotalNumHs() = GetNumImplicitHs() + GetNumExplicitHs() # + Optional number of neighboring hydrogens in the molecular graph. elif atom.GetNumExplicitHs() != 0: From e327e05e479268fc460b8e0fb197f5e9aee7bc61 Mon Sep 17 00:00:00 2001 From: Jennifer A Clark Date: Tue, 7 Jan 2025 13:41:25 -0500 Subject: [PATCH 19/20] Apply suggestions from code review Co-authored-by: Jeff Wagner --- openff/toolkit/utils/rdkit_wrapper.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 3fcf30335..f3fe6cdb8 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1592,14 +1592,18 @@ def from_smiles( # explicitly in the string but not present in the graph. This criteria would apply to # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. # https://www.rdkit.org/docs/Cookbook.html#explicit-valence-and-number-of-hydrogens - # This method is also used in GetTotalNumHs() = GetNumImplicitHs() + GetNumExplicitHs() - # + Optional number of neighboring hydrogens in the molecular graph. + # This method is also used in GetTotalNumHs, which returns + # GetNumImplicitHs() + GetNumExplicitHs() + Optionally the number of + # neighboring hydrogens in the molecular graph. + # (see the cookbook link above for more details) elif atom.GetNumExplicitHs() != 0: raise ValueError( - "'hydrogens_are_explicit' was specified as True, but RDKit toolkit interpreted " + "'hydrogens_are_explicit' was specified as True, but the RDKit wrapper interpreted " f"the following SMILES as having some nonexplicit hydrogens (e.g., [NH+]): '{smiles}'\n" - "If this SMILES is intended to express all explicit hydrogens in the molecule, then" - " you should construct the desired molecule as an RDMol with no implicit hydrogens, " + "If this SMILES is intended to express all explicit hydrogens in the molecule, then either: \n" + "1) Replace instances of implicit Hs (ex. `[NH+]` with `[N+]([H])` to make " + "the hydrogen explicit, or\n" + "2) construct the desired molecule as an RDMol with no implicit hydrogens, " "and then use Molecule.from_rdkit() to create the desired OFFMol." ) From 022401aff02d007500981c22079a60affdbf29ad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 18:42:37 +0000 Subject: [PATCH 20/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openff/toolkit/utils/rdkit_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index f3fe6cdb8..ebf7eb8be 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1592,8 +1592,8 @@ def from_smiles( # explicitly in the string but not present in the graph. This criteria would apply to # hydrogens inside the brackets descibing a heavy atom, e.g. [NH+]. # https://www.rdkit.org/docs/Cookbook.html#explicit-valence-and-number-of-hydrogens - # This method is also used in GetTotalNumHs, which returns - # GetNumImplicitHs() + GetNumExplicitHs() + Optionally the number of + # This method is also used in GetTotalNumHs, which returns + # GetNumImplicitHs() + GetNumExplicitHs() + Optionally the number of # neighboring hydrogens in the molecular graph. # (see the cookbook link above for more details) elif atom.GetNumExplicitHs() != 0: