diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..17e16271 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +a5e89e407dd5b4ac988138af6870262d3a9e43fa # apply ruff formatter diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..7c48e7cf --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +--- +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + groups: + gha-dependencies: + patterns: + - '*' diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 5f3b268f..3ed4cdf9 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -4,6 +4,9 @@ on: push: pull_request: +env: + FORCE_COLOR: 1 + jobs: pre-commit: env: @@ -15,4 +18,4 @@ jobs: uses: actions/checkout@v4 - name: Run pre-commit - uses: pre-commit/action@v3.0.0 + uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 92100d12..958dcd7a 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,25 +6,28 @@ on: branches: [main] pull_request: +env: + FORCE_COLOR: 1 + jobs: test: name: GAP (${{ matrix.python-version }}, ${{ matrix.os }}) runs-on: ${{ matrix.os }} - + strategy: fail-fast: true matrix: - os: ["ubuntu-latest"] + os: ["ubuntu-22.04"] python-version: ["3.9"] - + defaults: run: shell: bash -l {0} - + steps: - uses: actions/checkout@v4 - - - uses: mamba-org/setup-micromamba@v1 + + - uses: mamba-org/setup-micromamba@v2 with: # the create command looks like this: # `micromamba create -n test-env python=3.9 -f environment.yml` @@ -38,27 +41,75 @@ jobs: run: ./install_gap.sh - name: Test basic install - run: pytest + run: pytest --cov=mlptrain -k "not test_openmm" + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4 + with: + flags: python-${{ matrix.python-version }} + token: ${{ secrets.CODECOV_TOKEN }} + slug: duartegroup/mlp-train + + test-ace: + name: ACE (${{ matrix.python-version }}, ${{ matrix.os }}) + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: true + matrix: + os: ["ubuntu-22.04"] + python-version: ["3.9"] + + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v4 + + - uses: julia-actions/setup-julia@v2 + with: + version: '1.6' + + - uses: mamba-org/setup-micromamba@v2 + with: + environment-file: environment_ace.yml + environment-name: gha-test-env + cache-environment: true + create-args: >- + python=${{ matrix.python-version }} + - name: ACE install + run: ./install_ace.sh + + - name: Test ACE + run: pytest --cov=mlptrain -k "not test_openmm" + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4 + with: + flags: python-${{ matrix.python-version }}-ace + token: ${{ secrets.CODECOV_TOKEN }} + slug: duartegroup/mlp-train test-mace: name: MACE (${{ matrix.python-version }}, ${{ matrix.os }}) runs-on: ${{ matrix.os }} - + strategy: fail-fast: true matrix: - os: ["ubuntu-latest"] + os: ["ubuntu-22.04"] python-version: ["3.9"] - + defaults: run: shell: bash -l {0} - + steps: - uses: actions/checkout@v4 - - - uses: mamba-org/setup-micromamba@v1 + + - uses: mamba-org/setup-micromamba@v2 with: environment-file: environment_mace.yml environment-name: gha-test-env @@ -70,4 +121,11 @@ jobs: run: ./install_mace.sh - name: Test MACE install - run: pytest + run: pytest --cov=mlptrain + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4 + with: + flags: python-${{ matrix.python-version }}-mace + token: ${{ secrets.CODECOV_TOKEN }} + slug: duartegroup/mlp-train diff --git a/.gitignore b/.gitignore index 4ba3241d..42e886fd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,5 @@ dist/ *DS_store **/__pycache__/ **/__MACOSX/ -.coverage -.pytest_cache \ No newline at end of file +.coverage* +.pytest_cache diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ff40edfb..5c1d0ad9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,13 +10,14 @@ repos: - id: check-shebang-scripts-are-executable - id: check-added-large-files args: ['--maxkb=500', '--enforce-all'] - exclude: mlptrain/sampling/tests/data.zip + exclude: tests/data/data.zip - id: check-yaml - id: check-toml - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.3 + rev: v0.2.1 hooks: - id: ruff args: [--show-source, --fix] + - id: ruff-format diff --git a/README.md b/README.md index 60a20ecf..d92d7bd0 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -[![Test with pytest](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml/badge.svg?event=push)](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml) +[![DOI](https://img.shields.io/badge/doi-10.6084/m9.figshare.25816864.v1-blue.svg?style=flat&labelColor=whitesmoke&logo=data%3Aimage%2Fpng%3Bbase64%2CiVBORw0KGgoAAAANSUhEUgAAAB8AAAAfCAYAAAAfrhY5AAAJsklEQVR42qWXd1DTaRrHf%2BiB2Hdt5zhrAUKz4IKEYu9IGiGFFJJQ0gkJCAKiWFDWBRdFhCQUF3UVdeVcRQEBxUI3yY9iEnQHb3bdW1fPubnyz%2F11M7lvEHfOQee2ZOYzPyDv%2B3yf9%2Fk95YX4fx%2BltfUt08GcFEuPR4U9hDDZ%2FVngIlhb%2FSiI6InkTgLzgDcgfvtnovhH4BzoVlrbwr55QnhCtBW4QHXnFrZbPBaQoBh4%2FSYH2EnpBEtqcDMVzB93wA%2F8AFwa23XFGcc8CkT3mxz%2BfXWtq9T9IQlLIXYEuHojudb%2BCM7Hgdq8ydi%2FAHiBXyY%2BLjwFlAEnS6Jnar%2FvnQVhvdzasad0eKvWZKe8hvDB2ofLZ%2FZEcWsh%2BhyIuyO5Bxs2iZIE4nRv7NWAb0EO8AC%2FWPxjYAWuOEX2MSXZVgPxzmRL3xKz3ScGpx6p6QnOx4mDIFqO0w6Q4fEhO5IzwxlSwyD2FYHzwAW%2BAZ4fEsf74gCumykwNHskLM7taQxLYjjIyy8MUtraGhTWdkfhkFJqtvuVl%2F9l2ZquDfEyrH8B0W06nnpH3JtIyRGpH1iJ6SfxDIHjRXHJmdQjLpfHeN54gnfFx4W9QRnovx%2FN20aXZeTD2J84hn3%2BqoF2Tqr14VqTPUCIcP%2B5%2Fly4qC%2BUL3sYxSvNj1NwsVYPsWdMUfomsdkYm3Tj0nbV0N1wRKwFe1MgKACDIBdMAhPE%2FwicwNWxll8Ag40w%2BFfhibJkGHmutjYeQ8gVlaN%2BjO51nDysa9TwNUFMqaGbKdRJZFfOJSp6mkRKsv0rRIpEVWjAvyFkxNOEpwvcAVPfEe%2Bl8ojeNTx3nXLBcWRrYGxSRjDEk0VlpxYrbe1ZmaQ5xuT0u3r%2B2qe5j0J5uytiZPGsRL2Jm32AldpxPUNJ3jmmsN4x62z1cXrbedXBQf2yvIFCeZrtyicZZG2U2nrrBJzYorI2EXLrvTfCSB43s41PKEvbZDEfQby6L4JTj%2FfIwam%2B4%2BwucBu%2BDgNK05Nle1rSt9HvR%2FKPC4U6LTfvUIaip1mjIa8fPzykii23h2eanT57zQ7fsyYH5QjywwlooAUcAdOh5QumgTHx6aAO7%2FL52eaQNEShrxfhL6albEDmfhGflrsT4tps8gTHNOJbeDeBlt0WJWDHSgxs6cW6lQqyg1FpD5ZVDfhn1HYFF1y4Eiaqa18pQf3zzYMBhcanlBjYfgWNayAf%2FASOgklu8bmgD7hADrk4cRlOL7NSOewEcbqSmaivT33QuFdHXj5sdvjlN5yMDrAECmdgDWG2L8P%2BAKLs9ZLZ7dJda%2BB4Xl84t7QvnKfvpXJv9obz2KgK8dXyqISyV0sXGZ0U47hOA%2FAiigbEMECJxC9aoKp86re5O5prxOlHkcksutSQJzxZRlPZmrOKhsQBF5zEZKybUC0vVjG8PqOnhOq46qyDTDnj5gZBriWCk4DvXrudQnXQmnXblebhAC2cCB6zIbM4PYgGl0elPSgIf3iFEA21aLdHYLHUQuVkpgi02SxFdrG862Y8ymYGMvXDzUmiX8DS5vKZyZlGmsSgQqfLub5RyLNS4zfDiZc9Edzh%2FtCE%2BX8j9k%2FqWB071rcZyMImne1SLkL4GRw4UPHMV3jjwEYpPG5uW5fAEot0aTSJnsGAwHJi2nvF1Y5OIqWziVCQd5NT7t6Q8guOSpgS%2Fa1dSRn8JGGaCD3BPXDyQRG4Bqhu8XrgAp0yy8DMSvvyVXDgJcJTcr1wQ2BvFKf65jqhvmxXUuDpGBlRvV36XvGjQzLi8KAKT2lYOnmxQPGorURSV0NhyTIuIyqOmKTMhQ%2BieEsgOgpc4KBbfDM4B3SIgFljvfHF6cef7qpyLBXAiQcXvg5l3Iunp%2FWv4dH6qFziO%2BL9PbrimQ9RY6MQphEfGUpOmma7KkGzuS8sPUFnCtIYcKCaI9EXo4HlQLgGrBjbiK5EqMj2AKWt9QWcIFMtnVvQVDQV9lXJJqdPVtUQpbh6gCI2Ov1nvZts7yYdsnvRgxiWFOtNJcOMVLn1vgptVi6qrNiFOfEjHCDB3J%2BHDLqUB77YgQGwX%2Fb1eYna3hGKdlqJKIyiE4nSbV8VFgxmxR4b5mVkkeUhMgs5YTi4ja2XZ009xJRHdkfwMi%2BfocaancuO7h%2FMlcLOa0V%2FSw6Dq47CumRQAKhgbOP8t%2BMTjuxjJGhXCY6XpmDDFqWlVYbQ1aDJ5Cptdw4oLbf3Ck%2BdWkVP0LpH7s9XLPXI%2FQX8ws%2Bj2In63IcRvOOo%2BTTjiN%2BlssfRsanW%2B3REVKoavBOAPTXABW4AL7e4NygHdpAKBscmlDh9Jysp4wxbnUNna3L3xBvyE1jyrGIkUHaqQMuxhHElV6oj1picvgL1QEuS5PyZTEaivqh5vUCKJqOuIgPFGESns8kyFk7%2FDxyima3cYxi%2FYOQCj%2F%2B9Ms2Ll%2Bhn4FmKnl7JkGXQGDKDAz9rUGL1TIlBpuJr9Be2JjK6qPzyDg495UxXYF7JY1qKimw9jWjF0iV6DRIqE%2B%2FeWG0J2ofmZTk0mLYVd4GLiFCOoKR0Cg727tWq981InYynvCuKW43aXgEjofVbxIqrm0VL76zlH3gQzWP3R3Bv9oXxclrlO7VVtgBRpSP4hMFWJ8BrUSBCJXC07l40X4jWuvtc42ofNCxtlX2JH6bdeojXgTh5TxOBKEyY5wvBE%2BACh8BtOPNPkApjoxi5h%2B%2FFMQQNpWvZaMH7MKFu5Ax8HoCQdmGkJrtnOiLHwD3uS5y8%2F2xTSDrE%2F4PT1yqtt6vGe8ldMBVMEPd6KwqiYECHDlfbvzphcWP%2BJiZuL5swoWQYlS%2Br7Yu5mNUiGD2retxBi9fl6RDGn4Ti9B1oyYy%2BMP5G87D%2FCpRlvdnuy0PY6RC8BzTA40NXqckQ9TaOUDywkYsudxJzPgyDoAWn%2BB6nEFbaVxxC6UXjJiuDkW9TWq7uRBOJocky9iMfUhGpv%2FdQuVVIuGjYqACbXf8aa%2BPeYNIHZsM7l4s5gAQuUAzRUoT51hnH3EWofXf2vkD5HJJ33vwE%2FaEWp36GHr6GpMaH4AAPuqM5eabH%2FhfG9zcCz4nN6cPinuAw6IHwtvyB%2FdO1toZciBaPh25U0ducR2PI3Zl7mokyLWKkSnEDOg1x5fCsJE9EKhH7HwFNhWMGMS7%2BqxyYsbHHRUDUH4I%2FAheQY7wujJNnFUH4KdCju83riuQeHU9WEqNzjsJFuF%2FdTDAZ%2FK7%2F1WaAU%2BAWymT59pVMT4g2AxcwNa0XEBDdBDpAPvgDIH73R25teeuAF5ime2Ul0OUIiG4GpSAEJeYW9wDTf43wfwHgHLKJoPznkwAAAABJRU5ErkJggg%3D%3D)](https://doi.org/10.6084/m9.figshare.25816864.v1) +[![pytest CI](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml/badge.svg?event=push)](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml) +[![codecov](https://codecov.io/gh/duartegroup/mlp-train/branch/main/graph/badge.svg)](https://codecov.io/gh/duartegroup/mlp-train) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![License](https://img.shields.io/badge/License-MIT%202.0-blue.svg)](https://opensource.org/licenses/mit) -[![GitHub issues](https://img.shields.io/github/issues/duartegroup/mlp-train.svg)](https://github.com/duartegroup/mlp-train/issues) # mlp-train General machine learning potentials (MLP) training for molecular systems in gas phase and solution @@ -15,7 +16,7 @@ Available models: ## Install -Each model is installed into individual conda environment: +Each model is installed into an individual conda environment: ``` # Install GAP @@ -32,14 +33,19 @@ Each model is installed into individual conda environment: - Units are: distance (Å), energy (eV), force (eV Å$`^{-1}`$), time (fs) -## Using with OpenMM (Experimental!) +## Using with OpenMM The OpenMM backend only works with MACE at the moment. The necessary dependencies are installed automatically via conda: -```console +``` ./install_mace.sh ``` +Depending on your machine, you might need to prefix the command above with something like `CONDA_OVERRIDE_CUDA="11.2"` in two scenarios: + +- To ensure an environment that is compatible with your CUDA driver. +- To force CUDA builds to be installed, even if the installation is being done from a CPU-only machine. This is typical in a situation where you are installing from a head node without GPUs but intend to run on GPUs and want to install the CUDA builds. + You should now be able to run `water_openmm.py` in `./examples` or run the jupyter notebook on Google Colab [`water_openmm_colab.ipynb`](./examples/water_openmm_colab.ipynb). You can use OpenMM during active learning by passing the keyword argument `md_program="OpenMM"` to the `al_train` method. diff --git a/environment.yml b/environment.yml index 9bfcbcc7..39c166f8 100644 --- a/environment.yml +++ b/environment.yml @@ -13,11 +13,13 @@ dependencies: - cython - dscribe=2.0 - matplotlib-base - - numpy - - pytest + - numpy<2 + - pytest=8 + - pytest-cov=5 - py-plumed - scipy - xtb + - scikit-learn - pip: - quippy-ase # GAP - ase@git+https://gitlab.com/ase/ase.git@f2615a6e9a # For PLUMED diff --git a/environment_ace.yml b/environment_ace.yml index 01b7c674..5441cd9f 100644 --- a/environment_ace.yml +++ b/environment_ace.yml @@ -13,11 +13,13 @@ dependencies: - cython - dscribe=2.0 - matplotlib-base - - numpy - - pytest + - numpy<2 + - pytest=8 + - pytest-cov=5 - py-plumed - scipy - xtb + - scikit-learn - pip: - julia # Python-Julia integration (this will not install Julia itself!) - pyjulip@git+https://github.com/casv2/pyjulip.git@72280a6ac3 # Integration with ACE diff --git a/environment_mace.yml b/environment_mace.yml index 15f078d1..70c2ede8 100644 --- a/environment_mace.yml +++ b/environment_mace.yml @@ -15,26 +15,18 @@ dependencies: - dscribe=2.0 - matplotlib-base - numpy - - pytest + - pytest=8 + - pytest-cov=5 - py-plumed - scipy - xtb - # MACE dependencies - - pytorch=2.0 - - openmm-torch=1.1 - - torchvision - - torchaudio - - torch-ema - # TODO: You might also need CUDA-specific libraries, - # but that depends on CUDA version - # https://pytorch.org/get-started/locally/ - # - pytorch-cuda=11.8 - # - pytorch-cuda=12.1 + - scikit-learn + - openmm + - openmm-torch + - nnpops + - openmm-ml + - git + - openmm-ml - pip: - # e3nn is available on conda as well, but it is a dependency of mace - # so needs to be installed together. - - e3nn - - mace@git+https://github.com/ACEsuit/mace.git@v0.2.0 - # The upstream PR to openmm-ml has not been merged yet: https://github.com/openmm/openmm-ml/pull/61) - - openmmml@git+https://github.com/sef43/openmm-ml.git@mace - ase@git+https://gitlab.com/ase/ase.git@f2615a6e9a # For PLUMED + - mace-torch diff --git a/examples/DA_paper/1d_fes/fes.py b/examples/DA_paper/1d_fes/fes.py index d33d30b0..2a7c1651 100644 --- a/examples/DA_paper/1d_fes/fes.py +++ b/examples/DA_paper/1d_fes/fes.py @@ -6,26 +6,39 @@ mlt.Config.n_cores = 10 if __name__ == '__main__': - us = mlt.UmbrellaSampling(zeta_func=mlt.AverageDistance((1,12), (6,11)), - kappa=10) + us = mlt.UmbrellaSampling( + zeta_func=mlt.AverageDistance((1, 12), (6, 11)), kappa=10 + ) temp = 300 neb = mlt.ConfigurationSet() neb.load_xyz(filename='neb_optimised.xyz', charge=0, mult=1) - + irc = mlt.ConfigurationSet() for config in neb: config.box = Box([18.5, 18.5, 18.5]) irc.append(config) - r112_reactant = np.linalg.norm(irc[0].atoms[1].coord-irc[0].atoms[12].coord) - r611_reactant = np.linalg.norm(irc[0].atoms[6].coord-irc[0].atoms[11].coord) + r112_reactant = np.linalg.norm( + irc[0].atoms[1].coord - irc[0].atoms[12].coord + ) + r611_reactant = np.linalg.norm( + irc[0].atoms[6].coord - irc[0].atoms[11].coord + ) - r112_product = np.linalg.norm(irc[-1].atoms[1].coord-irc[-1].atoms[12].coord) - r611_product = np.linalg.norm(irc[-1].atoms[6].coord-irc[-1].atoms[11].coord) + r112_product = np.linalg.norm( + irc[-1].atoms[1].coord - irc[-1].atoms[12].coord + ) + r611_product = np.linalg.norm( + irc[-1].atoms[6].coord - irc[-1].atoms[11].coord + ) - logger.info(f'average bond length in reactant is {(r112_reactant+r611_reactant)/2}') - logger.info(f'average bond length in product is {(r112_product+r611_product)/2}') + logger.info( + f'average bond length in reactant is {(r112_reactant+r611_reactant)/2}' + ) + logger.info( + f'average bond length in product is {(r112_product+r611_product)/2}' + ) irc.reverse() # Go product -> reactant, the NEB path is from reactant -> product @@ -37,31 +50,36 @@ endo = mlt.potentials.ACE('endo_in_water_ace_wB97M', system) - us.run_umbrella_sampling(irc, - mlp=endo, - temp=temp, - interval=5, - dt=0.5, - n_windows=15, - init_ref=1.55, - final_ref=4, - ps=10) + us.run_umbrella_sampling( + irc, + mlp=endo, + temp=temp, + interval=5, + dt=0.5, + n_windows=15, + init_ref=1.55, + final_ref=4, + ps=10, + ) us.save('wide_US') # Run a second, narrower US with a higher force constant us.kappa = 20 - us.run_umbrella_sampling(irc, - mlp=endo, - temp=temp, - interval=5, - dt=0.5, - n_windows=15, - init_ref=1.7, - final_ref=2.5, - ps=10) + us.run_umbrella_sampling( + irc, + mlp=endo, + temp=temp, + interval=5, + dt=0.5, + n_windows=15, + init_ref=1.7, + final_ref=2.5, + ps=10, + ) us.save('narrow_US') - total_us = mlt.UmbrellaSampling.from_folders('wide_US', 'narrow_US', - temp=temp) + total_us = mlt.UmbrellaSampling.from_folders( + 'wide_US', 'narrow_US', temp=temp + ) total_us.wham() diff --git a/examples/DA_paper/2D_pes/pes.py b/examples/DA_paper/2D_pes/pes.py index a0900e88..5b45a60a 100644 --- a/examples/DA_paper/2D_pes/pes.py +++ b/examples/DA_paper/2D_pes/pes.py @@ -27,22 +27,21 @@ def adjust_potential_energy(self, atoms): if self._type == 'plane': A, B, C, D = self.plane x, y, z = positions[self.index] - d = ((A * x + B * y + C * z + D) / - np.sqrt(A**2 + B**2 + C**2)) + d = (A * x + B * y + C * z + D) / np.sqrt(A**2 + B**2 + C**2) if d > 0: return 0.5 * self.spring * d**2 else: - return 0. - + return 0.0 + if self._type == 'two atoms': p1, p2 = positions[self.indices] - + elif self._type == 'point': p1 = positions[self.index] p2 = self.origin displace, _ = find_mic(p2 - p1, atoms.cell, atoms.pbc) bondlength = np.linalg.norm(displace) - return 0.5 * self.spring * (bondlength - self.threshold)**2 + return 0.5 * self.spring * (bondlength - self.threshold) ** 2 def adjust_forces(self, atoms, forces): @@ -50,49 +49,51 @@ def adjust_forces(self, atoms, forces): if self._type == 'plane': A, B, C, D = self.plane x, y, z = positions[self.index] - d = ((A * x + B * y + C * z + D) / - np.sqrt(A**2 + B**2 + C**2)) + d = (A * x + B * y + C * z + D) / np.sqrt(A**2 + B**2 + C**2) if d < 0: - return 0 + return 0 magnitude = self.spring * d - direction = - np.array((A, B, C)) / np.linalg.norm((A, B, C)) + direction = -np.array((A, B, C)) / np.linalg.norm((A, B, C)) forces[self.index] += direction * magnitude return None - + if self._type == 'two atoms': p1, p2 = positions[self.indices] - + elif self._type == 'point': p1 = positions[self.index] - p2 = self.origin + p2 = self.origin displace, _ = find_mic(p2 - p1, atoms.cell, atoms.pbc) bondlength = np.linalg.norm(displace) magnitude = self.spring * (bondlength - self.threshold) direction = displace / np.linalg.norm(displace) - + if self._type == 'two atoms': forces[self.indices[0]] += direction * magnitude forces[self.indices[1]] -= direction * magnitude - + else: forces[self.index] += direction * magnitude return None -def from_autode_to_ase(molecule, cell_size = 100): - """ convert autode.molecule to ase.atoms - maintain the constrain generated during ade.pes.RelaxedPESnD calculation""" +def from_autode_to_ase(molecule, cell_size=100): + """convert autode.molecule to ase.atoms + maintain the constrain generated during ade.pes.RelaxedPESnD calculation""" from ase.atoms import Atoms - atoms = Atoms(symbols=[atom.label for atom in molecule.atoms], - positions=molecule.coordinates, - pbc=True) + + atoms = Atoms( + symbols=[atom.label for atom in molecule.atoms], + positions=molecule.coordinates, + pbc=True, + ) atoms.set_cell([(cell_size, 0, 0), (0, cell_size, 0), (0, 0, cell_size)]) c = [] for (i, j), dist in molecule.constraints.distance.items(): c.append(Hookean(a1=i, a2=j, k=50, rt=dist)) - atoms.set_constraint(c) + atoms.set_constraint(c) return atoms @@ -109,10 +110,10 @@ def __repr__(self): def generate_input(self, calc, molecule): """Just print a .xyz file of the molecule, which can be read - as a gap-train configuration object""" + as a gap-train configuration object""" molecule.print_xyz_file(filename=calc.input.filename) - calc.input.additional_filenames=[self.path] + calc.input.additional_filenames = [self.path] return None def get_output_filename(self, calc): @@ -123,7 +124,7 @@ def get_input_filename(self, calc): def get_version(self, calc): return '1.0.0' - + def execute(self, calc): """ Execute the calculation @@ -133,8 +134,9 @@ def execute(self, calc): from ase.io.trajectory import Trajectory as ASETrajectory from ase.optimize import BFGS - @work_in_tmp_dir(filenames_to_copy=calc.input.filenames, - kept_file_exts=('.xyz')) + @work_in_tmp_dir( + filenames_to_copy=calc.input.filenames, kept_file_exts=('.xyz') + ) def execute_mlp(): if 'opt' in self.action: logger.info('start optimization') @@ -142,32 +144,30 @@ def execute_mlp(): logger.info('start optimise moelucle') logger.info(f'{ase_atoms.cell}, {ase_atoms.pbc}') ase_atoms.set_calculator(self.ase_calculator) - asetraj = ASETrajectory("tmp.traj", 'w', ase_atoms) + asetraj = ASETrajectory('tmp.traj', 'w', ase_atoms) dyn = BFGS(ase_atoms) dyn.attach(asetraj.write, interval=2) dyn.run(fmax=0.01) traj = _convert_ase_traj('tmp.traj') final_traj = traj.final_frame - final_traj.single_point(self.mlp, - n_cores=calc.n_cores) + final_traj.single_point(self.mlp, n_cores=calc.n_cores) name = self.get_output_filename(calc) final_traj.save_xyz(filename=name, predicted=True) - + else: configuration = mlt.Configuration() configuration.load(f'{calc.name}.xyz') configuration.box = Box(size=[100, 100, 100]) - configuration.single_point(self.mlp, - n_cores=calc.n_cores) + configuration.single_point(self.mlp, n_cores=calc.n_cores) name = self.get_output_filename(calc) configuration.save_xyz(filename=name, predicted=True) - + execute_mlp() return None - + def calculation_terminated_normally(self, calc): name = self.get_output_filename(calc) - + if os.path.exists(name): configuration = mlt.Configuration() configuration.load(name) @@ -186,7 +186,7 @@ def get_energy(self, calc): def get_free_energy(self, calc): return None - + def get_enthalpy(self, calc): return None @@ -218,52 +218,71 @@ def get_gradients(self, calc): return configuration.forces.true * ev_to_ha def __init__(self, mlp, action, path): - super().__init__(name='mlp', keywords_set=KeywordsSet(), - path='', - implicit_solvation_type=None) + super().__init__( + name='mlp', + keywords_set=KeywordsSet(), + path='', + implicit_solvation_type=None, + ) self.path = path self.mlp = mlp - self.action = deepcopy(action) + self.action = deepcopy(action) def get_final_species(TS, mlp): """get the optimised product after MD propogation""" - trajectory_product = mlt.md.run_mlp_md(configuration=TS, - mlp=mlp, - fs=500, - temp=300, - dt=0.5, - fbond_energy={(1,12) : 0.1, (6,11) : 0.1}, - interval=2) + trajectory_product = mlt.md.run_mlp_md( + configuration=TS, + mlp=mlp, + fs=500, + temp=300, + dt=0.5, + fbond_energy={(1, 12): 0.1, (6, 11): 0.1}, + interval=2, + ) final_traj_product = trajectory_product.final_frame - traj_product_optimised = optimise_with_fix_solute(solute=TS, - configuration=final_traj_product, - fmax=0.01, - mlp=mlp, - constraint=False) - - rt1 = np.linalg.norm(traj_product_optimised.atoms[1].coord-traj_product_optimised.atoms[12].coord) - rt2 = np.linalg.norm(traj_product_optimised.atoms[6].coord-traj_product_optimised.atoms[11].coord) + traj_product_optimised = optimise_with_fix_solute( + solute=TS, + configuration=final_traj_product, + fmax=0.01, + mlp=mlp, + constraint=False, + ) + + rt1 = np.linalg.norm( + traj_product_optimised.atoms[1].coord + - traj_product_optimised.atoms[12].coord + ) + rt2 = np.linalg.norm( + traj_product_optimised.atoms[6].coord + - traj_product_optimised.atoms[11].coord + ) logger.info(f'the forming carbon bonds length in product are {rt1}, {rt2}') product = mlt.Molecule(name='product', atoms=traj_product_optimised.atoms) - return product + return product @mlt.utils.work_in_tmp_dir(copied_exts=['.xml', '.json']) -def optimise_with_fix_solute(solute, configuration, fmax, mlp, constraint = True, **kwargs): +def optimise_with_fix_solute( + solute, configuration, fmax, mlp, constraint=True, **kwargs +): """optimised molecular geometries by MLP with or without constraint""" from ase.constraints import FixAtoms from ase.optimize import BFGS from ase.io.trajectory import Trajectory as ASETrajectory assert configuration.box is not None, 'configuration must have box' - logger.info('Optimise the configuration with fixed solute (solute coords should at the first in configuration coords) by MLP') + logger.info( + 'Optimise the configuration with fixed solute (solute coords should at the first in configuration coords) by MLP' + ) - n_cores = kwargs['n_cores'] if 'n_cores' in kwargs else min(Config.n_cores, 8) + n_cores = ( + kwargs['n_cores'] if 'n_cores' in kwargs else min(Config.n_cores, 8) + ) os.environ['OMP_NUM_THREADS'] = str(n_cores) logger.info(f'Using {n_cores} cores for MLP MD') @@ -276,7 +295,7 @@ def optimise_with_fix_solute(solute, configuration, fmax, mlp, constraint = True constraints = FixAtoms(indices=solute_idx) ase_atoms.set_constraint(constraints) - asetraj = ASETrajectory("tmp.traj", 'w', ase_atoms) + asetraj = ASETrajectory('tmp.traj', 'w', ase_atoms) dyn = BFGS(ase_atoms) dyn.attach(asetraj.write, interval=2) dyn.run(fmax=fmax) @@ -285,6 +304,7 @@ def optimise_with_fix_solute(solute, configuration, fmax, mlp, constraint = True final_traj = traj.final_frame return final_traj + Hookean.adjust_forces = adjust_forces Hookean.adjust_potential_energy = adjust_potential_energy @@ -296,7 +316,7 @@ def optimise_with_fix_solute(solute, configuration, fmax, mlp, constraint = True endo = mlt.potentials.ACE('endo_ace_wB97M_imwater', system) TS = mlt.ConfigurationSet() - TS.load_xyz(filename = 'cis_endo_TS_wB97M.xyz') + TS.load_xyz(filename='cis_endo_TS_wB97M.xyz', charge=0, mult=1) TS = TS[0] TS.box = Box([100, 100, 100]) TS.charge = 0 @@ -305,15 +325,18 @@ def optimise_with_fix_solute(solute, configuration, fmax, mlp, constraint = True cwd = os.getcwd() ade_endo = MLPEST(mlp=endo, action=['opt'], path=f'{cwd}/{endo.name}.json') - product = get_final_species(TS=TS[0], - mlp=endo) - + product = get_final_species(TS=TS[0], mlp=endo) + product.print_xyz_file(filename='product.xyz') - pes = ade.pes.RelaxedPESnD(ade.Molecule('product.xyz'), - rs={(1, 12): (1.55, 3, 20), # Current->3.0 Å in 8 steps - (6, 11): (1.55, 3, 20)}) + pes = ade.pes.RelaxedPESnD( + ade.Molecule('product.xyz'), + rs={ + (1, 12): (1.55, 3, 20), # Current->3.0 Å in 8 steps + (6, 11): (1.55, 3, 20), + }, + ) pes.calculate(method=ade_endo, keywords=['opt'], n_cores=8) - pes.save(filename='endo_in_water.npz') + pes.save(filename='endo_in_water.npz') pes.plot() diff --git a/examples/DA_paper/training/explicit/endo_ace_ex.py b/examples/DA_paper/training/explicit/endo_ace_ex.py index 5f58352f..7ead333e 100644 --- a/examples/DA_paper/training/explicit/endo_ace_ex.py +++ b/examples/DA_paper/training/explicit/endo_ace_ex.py @@ -3,40 +3,52 @@ from autode.atoms import Atom from mlptrain.log import logger from mlptrain.box import Box -from mlptrain.training.selection import MaxAtomicEnvDistance +from mlptrain.training.selection import AtomicEnvSimilarity mlt.Config.n_cores = 10 -mlt.Config.orca_keywords = ['wB97M-D3BJ', 'def2-TZVP','def2/J', 'RIJCOSX','EnGrad'] +mlt.Config.orca_keywords = [ + 'wB97M-D3BJ', + 'def2-TZVP', + 'def2/J', + 'RIJCOSX', + 'EnGrad', +] def from_ase_to_autode(atoms): - #atoms is ase.Atoms + # atoms is ase.Atoms autode_atoms = [] symbols = atoms.symbols for i in range(len(atoms)): - autode_atoms.append(Atom(symbols[i], - x=atoms.positions[i][0], - y=atoms.positions[i][1], - z=atoms.positions[i][2])) + autode_atoms.append( + Atom( + symbols[i], + x=atoms.positions[i][0], + y=atoms.positions[i][1], + z=atoms.positions[i][2], + ) + ) return autode_atoms -def add_water(solute, n = 2): +def add_water(solute, n=2): """add water molecules to the reactive species - solute: mlt.Configuration, the molecule to add water molecules, should including box - n: number of water molecules to add""" + solute: mlt.Configuration, the molecule to add water molecules, should including box + n: number of water molecules to add""" from ase import Atoms from ase.calculators.tip3p import rOH, angleHOH # water molecule x = angleHOH * np.pi / 180 / 2 - pos = [[0, 0, 0], - [0, rOH * np.cos(x), rOH * np.sin(x)], - [0, rOH * np.cos(x), -rOH * np.sin(x)]] + pos = [ + [0, 0, 0], + [0, rOH * np.cos(x), rOH * np.sin(x)], + [0, rOH * np.cos(x), -rOH * np.sin(x)], + ] water = Atoms('OH2', positions=pos) - H_origin = water[0].position-water[1].position + H_origin = water[0].position - water[1].position water.translate(H_origin) water0 = water.copy() @@ -48,15 +60,21 @@ def add_water(solute, n = 2): sol = solute.ase_atoms sol.center() sys = sol.copy() - + # randomly rotate water molecule - water0.rotate(np.random.uniform(0, 180), (0, np.random.uniform (-1, 0), np.random.uniform (0, 1))) + water0.rotate( + np.random.uniform(0, 180), + (0, np.random.uniform(-1, 0), np.random.uniform(0, 1)), + ) sys += water0 - water1.rotate(np.random.uniform(0, 180), ( np.random.uniform (-1, 0), np.random.uniform (0, 1), 0)) - if n >=2: - for i in range(n-1): + water1.rotate( + np.random.uniform(0, 180), + (np.random.uniform(-1, 0), np.random.uniform(0, 1), 0), + ) + if n >= 2: + for i in range(n - 1): sys += water1 - + len_sol = len(sol) sol_idx = list(range(len_sol)) idx = list(range(len(sys))) @@ -68,45 +86,55 @@ def add_water(solute, n = 2): C_idx.append(atm) if sys.numbers[atm] == 8: O_idx.append(atm) - + # the direction to add water molecules to avioding unphysical cases, system specific - C98 = (sys[C_idx[7]].position-sys[C_idx[8]].position)/np.linalg.norm(sys[C_idx[7]].position-sys[C_idx[8]].position) - C68 = (sys[C_idx[7]].position-sys[C_idx[5]].position)/np.linalg.norm(sys[C_idx[7]].position-sys[C_idx[5]].position) - C48 = (sys[C_idx[7]].position-sys[C_idx[3]].position)/np.linalg.norm(sys[C_idx[7]].position-sys[C_idx[3]].position) - C8O = (sys[O_idx[0]].position-sys[C_idx[7]].position)/np.linalg.norm(sys[O_idx[0]].position-sys[C_idx[7]].position) + C98 = (sys[C_idx[7]].position - sys[C_idx[8]].position) / np.linalg.norm( + sys[C_idx[7]].position - sys[C_idx[8]].position + ) + C68 = (sys[C_idx[7]].position - sys[C_idx[5]].position) / np.linalg.norm( + sys[C_idx[7]].position - sys[C_idx[5]].position + ) + C48 = (sys[C_idx[7]].position - sys[C_idx[3]].position) / np.linalg.norm( + sys[C_idx[7]].position - sys[C_idx[3]].position + ) + C8O = (sys[O_idx[0]].position - sys[C_idx[7]].position) / np.linalg.norm( + sys[O_idx[0]].position - sys[C_idx[7]].position + ) direction = [C68, C48, C8O, C98] water_idx = [] for atm in idx[22::3]: single_water = [] for i in range(3): - single_water.append(atm+i) + single_water.append(atm + i) water_idx.append(single_water) assert len(water_idx) == n for j in range(len(water_idx)): displacement = np.random.uniform(1.85, 2.4) - logger.info(f'distance between H in water and O is TS is {displacement} ') - vec = displacement*direction[j] + logger.info( + f'distance between H in water and O is TS is {displacement} ' + ) + vec = displacement * direction[j] water = water_idx[j] - trans = sys[O_idx[0]].position+vec + trans = sys[O_idx[0]].position + vec for mol in water_idx[j]: sys[mol].position += trans autode_atoms = from_ase_to_autode(atoms=sys) - added_water = mlt.Configuration(atoms=autode_atoms, box=solute.box) + added_water = mlt.Configuration(atoms=autode_atoms, box=solute.box) return added_water - - -def solvation(solute_config, solvent_config, apm, radius, enforce = True): + + +def solvation(solute_config, solvent_config, apm, radius, enforce=True): """function to generate solvated system by adding the solute at the center of box, - then remove the overlapped solvent molecules - adapted from https://doi.org/10.1002/qua.26343 - solute: mlt.Configuration() solute.box is not None - solvent: mlt.Configuration() solvent.box is not None - aps: number of atoms per solvent molecule - radius: cutout radius around each solute atom - enforce: True / False Wrap solvent regardless of previous solvent PBC choices""" + then remove the overlapped solvent molecules + adapted from https://doi.org/10.1002/qua.26343 + solute: mlt.Configuration() solute.box is not None + solvent: mlt.Configuration() solvent.box is not None + aps: number of atoms per solvent molecule + radius: cutout radius around each solute atom + enforce: True / False Wrap solvent regardless of previous solvent PBC choices""" assert solute_config.box is not None, 'configuration must have box' assert solvent_config.box is not None, 'configuration must have box' @@ -114,33 +142,35 @@ def solvation(solute_config, solvent_config, apm, radius, enforce = True): solvent = solvent_config.ase_atoms def wrap(D, cell, pbc): - """ wrap distance to nearest neighbor - D: distance""" - for i , periodic in enumerate(pbc): + """wrap distance to nearest neighbor + D: distance""" + for i, periodic in enumerate(pbc): if periodic: d = D[:, i] L = cell[i] - d[:] = (d+L/2)%L-L/2 + d[:] = (d + L / 2) % L - L / 2 return None def molwrap(atoms, n, idx=0): """Wrap to cell without breaking molecule - n: number of atoms per solvent molecule - idx: which atom in the solvent molecule to determine molecular distances from""" - center = atoms.cell.diagonal()/2 + n: number of atoms per solvent molecule + idx: which atom in the solvent molecule to determine molecular distances from""" + center = atoms.cell.diagonal() / 2 positions = atoms.positions.reshape((-1, n, 3)) - distances = positions[:, idx]-center + distances = positions[:, idx] - center old_distances = distances.copy() wrap(distances, atoms.cell.diagonal(), atoms.pbc) offsets = distances - old_distances positions += offsets[:, None] - atoms.set_positions(positions.reshape((-1,3))) + atoms.set_positions(positions.reshape((-1, 3))) return atoms - assert not (solvent.cell.diagonal()==0).any(), \ - 'solvent atoms have no cell' - assert (solvent.cell == np.diag(solvent.cell.diagonal())).all(), \ - 'sol cell not orthorhombic' + assert not ( + solvent.cell.diagonal() == 0 + ).any(), 'solvent atoms have no cell' + assert ( + solvent.cell == np.diag(solvent.cell.diagonal()) + ).all(), 'sol cell not orthorhombic' if enforce: solvent.pbc = True sol = molwrap(solvent, apm) @@ -156,19 +186,19 @@ def molwrap(atoms, n, idx=0): mask = np.zeros(len(sys), bool) mask[solute_idx] = True - # delete solvent molecules for whose atom is overlap with solute + # delete solvent molecules for whose atom is overlap with solute atoms_to_delete = [] for atm in solute_idx: mol_dists = sys[atm].position - sys[~mask][::].positions - idx = np.where((np.linalg.norm(mol_dists , axis = 1))8 cores - n_cores = kwargs['n_cores'] if 'n_cores' in kwargs else min(Config.n_cores, 8) + n_cores = ( + kwargs['n_cores'] if 'n_cores' in kwargs else min(Config.n_cores, 8) + ) n_steps = _n_simulation_steps(dt, kwargs) os.environ['OMP_NUM_THREADS'] = str(n_cores) @@ -264,22 +296,23 @@ def baised_md(configuration, mlp, temp, dt, interval, bias, **kwargs): ase_atoms.set_calculator(mlp.ase_calculator) ase_atoms.set_constraint(bias) - MaxwellBoltzmannDistribution(ase_atoms, temperature_K=temp, - rng=RandomState()) - - traj = ASETrajectory("tmp.traj", 'w', ase_atoms) + MaxwellBoltzmannDistribution( + ase_atoms, temperature_K=temp, rng=RandomState() + ) + + traj = ASETrajectory('tmp.traj', 'w', ase_atoms) energies = [] def append_energy(_atoms=ase_atoms): energies.append(_atoms.get_potential_energy()) - if temp > 0: # Default Langevin NVT - dyn = Langevin(ase_atoms, dt * ase_units.fs, - temperature_K=temp, - friction=0.02) - else: # Otherwise NVE + if temp > 0: # Default Langevin NVT + dyn = Langevin( + ase_atoms, dt * ase_units.fs, temperature_K=temp, friction=0.02 + ) + else: # Otherwise NVE dyn = VelocityVerlet(ase_atoms, dt * ase_units.fs) - + dyn.attach(traj.write, interval=interval) logger.info(f'Running {n_steps:.0f} steps with a timestep of {dt} fs') @@ -290,45 +323,51 @@ def append_energy(_atoms=ase_atoms): for i in range(10, len(traj)): trajectory.append(traj[i]) energies = energies[10:] - + for i, (frame, energy) in enumerate(zip(trajectory, energies)): frame.update_attr_from(configuration) frame.energy.predicted = energy frame.time = dt * interval * i - return trajectory + return trajectory def generate_rs(TS, solution, mlp, box_size): ref = [] reactants = mlt.ConfigurationSet() while len(reactants) < 10: - reactant = get_reactant_states(TS=TS, - solution=solution, - mlp=mlp) - rt1 = np.linalg.norm(reactant.atoms[1].coord-reactant.atoms[12].coord) - rt2 = np.linalg.norm(reactant.atoms[6].coord-reactant.atoms[11].coord) - if 38 cores - n_cores = kwargs['n_cores'] if 'n_cores' in kwargs else min(mlt.Config.n_cores, 8) + n_cores = ( + kwargs['n_cores'] + if 'n_cores' in kwargs + else min(mlt.Config.n_cores, 8) + ) n_steps = _n_simulation_steps(dt, kwargs) os.environ['OMP_NUM_THREADS'] = str(n_cores) @@ -81,37 +98,46 @@ def md_with_file(configuration, mlp, temp, dt, interval, init_temp = None, **kwa ase_atoms = configuration.ase_atoms ase_atoms.set_calculator(mlp.ase_calculator) - bias = mlt.Bias(zeta_func=mlt.AverageDistance((1,12), (6,11)), kappa=0.4, reference=1.6) + bias = mlt.Bias( + zeta_func=mlt.AverageDistance((1, 12), (6, 11)), + kappa=0.4, + reference=1.6, + ) ase_atoms.set_constraint(bias) - MaxwellBoltzmannDistribution(ase_atoms, temperature_K=temp, - rng=RandomState()) - traj = ASETrajectory("tmp.traj", 'w', ase_atoms) - + MaxwellBoltzmannDistribution( + ase_atoms, temperature_K=temp, rng=RandomState() + ) + traj = ASETrajectory('tmp.traj', 'w', ase_atoms) + energies = [] - def append_energy(_atoms = ase_atoms): + + def append_energy(_atoms=ase_atoms): energies.append(_atoms.get_potential_energy()) - + reaction_coords = [] - def get_reaction_coord(atoms = ase_atoms): - C2_C7 = np.linalg.norm(atoms[1].position-atoms[12].position) - C4_C6 = np.linalg.norm(atoms[6].position-atoms[11].position) - reaction_coord = 0.5*(C2_C7+C4_C6) + + def get_reaction_coord(atoms=ase_atoms): + C2_C7 = np.linalg.norm(atoms[1].position - atoms[12].position) + C4_C6 = np.linalg.norm(atoms[6].position - atoms[11].position) + reaction_coord = 0.5 * (C2_C7 + C4_C6) reaction_coords.append(reaction_coord) cavity_volumn = [] - def get_cavity_volume(atoms = ase_atoms): + + def get_cavity_volume(atoms=ase_atoms): volumn = cavity_volume(ase_system=atoms) cavity_volumn.append(volumn) - if temp > 0: # Default Langevin NVT - dyn = Langevin(ase_atoms, dt * ase_units.fs, - temperature_K=temp, - friction=0.02) - else: # Otherwise NVE + + if temp > 0: # Default Langevin NVT + dyn = Langevin( + ase_atoms, dt * ase_units.fs, temperature_K=temp, friction=0.02 + ) + else: # Otherwise NVE dyn = VelocityVerlet(ase_atoms, dt * ase_units.fs) dyn.attach(append_energy, interval=interval) - dyn.attach(get_reaction_coord,interval=interval) - dyn.attach(get_cavity_volume,interval=interval) + dyn.attach(get_reaction_coord, interval=interval) + dyn.attach(get_cavity_volume, interval=interval) dyn.attach(traj.write, interval=interval) logger.info(f'Running {n_steps:.0f} steps with a timestep of {dt} fs') @@ -127,17 +153,16 @@ def get_cavity_volume(atoms = ase_atoms): return traj, reaction_coords, cavity_volumn -def traj_study(configs, ml_potential, init_md_time_fs = 500, max_time_fs = 3000): - +def traj_study(configs, ml_potential, init_md_time_fs=500, max_time_fs=3000): C2_C7_initial_list = [] C4_C6_initial_list = [] for k in range(500): - config =configs[k] + config = configs[k] logger.info(f'start trajectory study for {k} th configuration') - C2_C7 = np.linalg.norm(config.atoms[1].coord-config.atoms[12].coord) - C4_C6 = np.linalg.norm(config.atoms[6].coord-config.atoms[11].coord) + C2_C7 = np.linalg.norm(config.atoms[1].coord - config.atoms[12].coord) + C4_C6 = np.linalg.norm(config.atoms[6].coord - config.atoms[11].coord) C2_C7_initial_list.append(C2_C7) C4_C6_initial_list.append(C4_C6) @@ -155,14 +180,16 @@ def traj_study(configs, ml_potential, init_md_time_fs = 500, max_time_fs = 300 C2_C7_list = [] C4_C6_list = [] - traj, reaction_coords, cavity_volume = md_with_file(config, - mlp=ml_potential, - temp=300, - dt=0.5, - interval=2, - fs=md_time_fs_f) + traj, reaction_coords, cavity_volume = md_with_file( + config, + mlp=ml_potential, + temp=300, + dt=0.5, + interval=2, + fs=md_time_fs_f, + ) ending = 0 - for (i, j) in zip (C2_C7_list, C4_C6_list): + for i, j in zip(C2_C7_list, C4_C6_list): logger.info(f'C2-C7 and C4-C6 bond lengths are {(i,j)}') if i <= 1.6 and j <= 1.6: ending += 1 @@ -170,12 +197,12 @@ def traj_study(configs, ml_potential, init_md_time_fs = 500, max_time_fs = 300 else: pass - if ending!= 0: + if ending != 0: traj.save_xyz(f'trajectoris/traj_{k}.xyz') - with open ('reaction_coords.txt','a') as f: + with open('reaction_coords.txt', 'a') as f: line = reaction_coords print(line, file=f) - with open ('cavity_volume.txt','a') as f: + with open('cavity_volume.txt', 'a') as f: line = cavity_volume print(line, file=f) break @@ -186,6 +213,7 @@ def traj_study(configs, ml_potential, init_md_time_fs = 500, max_time_fs = 300 logger.info(f'current simulation time is {tol_md_time_f} fs') return None + if __name__ == '__main__': water_mol = mlt.Molecule(name='h2o.xyz') TS_mol = mlt.Molecule(name='cis_endo_TS_wB97M.xyz') @@ -196,15 +224,15 @@ def traj_study(configs, ml_potential, init_md_time_fs = 500, max_time_fs = 300 endo = mlt.potentials.ACE('endo_in_water_ace_wB97M', system) TS = mlt.ConfigurationSet() - TS.load_xyz(filename = 'cis_endo_TS_wB97M.xyz') + TS.load_xyz(filename='cis_endo_TS_wB97M.xyz', charge=0, mult=1) TS = TS[0] - TS.box = Box([21.5, 21.5,21.5]) + TS.box = Box([21.5, 21.5, 21.5]) TS.charge = 0 TS.mult = 1 - water_system = mlt.System(water_mol, box=Box([21.5, 21.5,21.5])) + water_system = mlt.System(water_mol, box=Box([21.5, 21.5, 21.5])) water_system.add_molecules(water_mol, num=331) rs = generate_rs(TS, water_system, endo, 21.5) - traj_study (rs, endo) + traj_study(rs, endo) diff --git a/examples/da_ts.py b/examples/da_ts.py index f0df1d54..7a5a18a9 100644 --- a/examples/da_ts.py +++ b/examples/da_ts.py @@ -5,23 +5,26 @@ if __name__ == '__main__': - - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) + system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) gap = mlt.potentials.GAP('da', system=system) - gap.al_train(method_name='orca', - temp=300, # K - selection_method=mlt.selection.MaxAtomicEnvDistance(), - max_active_time=200, # fs - fix_init_config=True) + gap.al_train( + method_name='orca', + temp=300, # K + selection_method=mlt.selection.MaxAtomicEnvDistance(), + max_active_time=200, # fs + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.configuration, - mlp=gap, - fs=300, - temp=100, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.configuration, + mlp=gap, + fs=300, + temp=100, + dt=0.5, + interval=10, + ) # and compare, plotting a parity plots and E_true, ∆E and ∆F trajectory.compare(gap, 'orca') diff --git a/examples/inherited_bias_active_learning/h2/h2.py b/examples/inherited_bias_active_learning/h2/h2.py index 73ec16e8..7dd96ad3 100644 --- a/examples/inherited_bias_active_learning/h2/h2.py +++ b/examples/inherited_bias_active_learning/h2/h2.py @@ -3,7 +3,6 @@ mlt.Config.n_cores = 10 if __name__ == '__main__': - # Initialise the system to train h2_system = mlt.System(mlt.Molecule('h2.xyz'), box=None) @@ -14,12 +13,12 @@ # Attach CVs to a bias and initialise it for metadynamics AL. - # By default, metadynamics bias is stored as a list of deposited - # gaussians, which results in every MD step scaling linearly with - # the total length of the simulation. To make the scaling constant, - # the bias can be stored on a grid. This requires to specify the - # bounds for the grid, and the bounds should be chosen such that - # during AL the system would not leave the grid (either by using + # By default, metadynamics bias is stored as a list of deposited + # gaussians, which results in every MD step scaling linearly with + # the total length of the simulation. To make the scaling constant, + # the bias can be stored on a grid. This requires to specify the + # bounds for the grid, and the bounds should be chosen such that + # during AL the system would not leave the grid (either by using # a large grid or attaching walls to constrain the system). # Other metadynamics parameters can also be set by the method, @@ -32,14 +31,16 @@ # Metadynamics bias starts being applied at iteration 2, at iterations 0 # and 1 the training is performed using unbiased MD with the attached walls ace = mlt.potentials.ACE('hydrogen', system=h2_system) - ace.al_train(method_name='xtb', - temp=300, - max_active_iters=50, - min_active_iters=10, - bias_start_iter=2, - inherit_metad_bias=True, - bias=bias) - - # NOTE: The same al_train() method works with arbitrary PLUMED biases - # (i.e. not only metadynamics) by initialising a PlumedBias using a + ace.al_train( + method_name='xtb', + temp=300, + max_active_iters=50, + min_active_iters=10, + bias_start_iter=2, + inherit_metad_bias=True, + bias=bias, + ) + + # NOTE: The same al_train() method works with arbitrary PLUMED biases + # (i.e. not only metadynamics) by initialising a PlumedBias using a # PLUMED input file, but then inheritance is unavailable diff --git a/examples/inherited_bias_active_learning/h2o/h2o.py b/examples/inherited_bias_active_learning/h2o/h2o.py index c12d3f75..02e03048 100644 --- a/examples/inherited_bias_active_learning/h2o/h2o.py +++ b/examples/inherited_bias_active_learning/h2o/h2o.py @@ -3,7 +3,6 @@ mlt.Config.n_cores = 10 if __name__ == '__main__': - # Initialise the system to train h2o_system = mlt.System(mlt.Molecule('h2o.xyz'), box=None) @@ -17,12 +16,12 @@ # Attach CVs to a bias and initialise it for metadynamics AL. - # By default, metadynamics bias is stored as a list of deposited - # gaussians, which results in every MD step scaling linearly with - # the total length of the simulation. To make the scaling constant, - # the bias can be stored on a grid. This requires to specify the - # bounds for the grid, and the bounds should be chosen such that - # during AL the system would not leave the grid (either by using + # By default, metadynamics bias is stored as a list of deposited + # gaussians, which results in every MD step scaling linearly with + # the total length of the simulation. To make the scaling constant, + # the bias can be stored on a grid. This requires to specify the + # bounds for the grid, and the bounds should be chosen such that + # during AL the system would not leave the grid (either by using # a large grid or attaching walls to constrain the system). # Other metadynamics parameters can also be set by the method, @@ -35,14 +34,16 @@ # Metadynamics bias starts being applied at iteration 2, at iterations 0 # and 1 the training is performed using unbiased MD with the attached walls ace = mlt.potentials.ACE('water', system=h2o_system) - ace.al_train(method_name='xtb', - temp=300, - max_active_iters=50, - min_active_iters=5, - bias_start_iter=2, - inherit_metad_bias=True, - bias=bias) - - # NOTE: The same al_train() method works with arbitrary PLUMED biases - # (i.e. not only metadynamics) by initialising a PlumedBias using a + ace.al_train( + method_name='xtb', + temp=300, + max_active_iters=50, + min_active_iters=5, + bias_start_iter=2, + inherit_metad_bias=True, + bias=bias, + ) + + # NOTE: The same al_train() method works with arbitrary PLUMED biases + # (i.e. not only metadynamics) by initialising a PlumedBias using a # PLUMED input file, but then inheritance is unavailable diff --git a/examples/metadynamics/h2/h2.py b/examples/metadynamics/h2/h2.py index f992eab2..3a8705b2 100644 --- a/examples/metadynamics/h2/h2.py +++ b/examples/metadynamics/h2/h2.py @@ -3,13 +3,12 @@ mlt.Config.n_cores = 8 if __name__ == '__main__': - # Initialise the system h2_system = mlt.System(mlt.Molecule('h2.xyz'), box=None) # Generate a starting metadynamics configuration h2_config = h2_system.random_configuration() - + # Define CVs, can also attach walls to them. More complicated CVs (i.e. # not DISTANCE, ANGLE, or TORSION; e.g. PATH) can be defined using # PlumedCustomCV that requires a PLUMED-like input file containing the @@ -23,45 +22,49 @@ # Active learning (can be commented out if the potential is loaded) al_bias = mlt.PlumedBias(cvs=cv1) al_bias.initialise_for_metad_al(width=0.05, biasfactor=100) - ace.al_train(method_name='xtb', - temp=300, - max_active_iters=50, - min_active_iters=10, - bias_start_iter=2, - inherit_metad_bias=True, - bias=al_bias) + ace.al_train( + method_name='xtb', + temp=300, + max_active_iters=50, + min_active_iters=10, + bias_start_iter=2, + inherit_metad_bias=True, + bias=al_bias, + ) # Attach CVs to the metadynamics object metad = mlt.Metadynamics(cvs=cv1) - # Can run optional methods (estimate_width() and try_multiple_biafactors()) + # Can run optional methods (estimate_width() and try_multiple_biafactors()) # to help choose appropriate metadynamics parameters (width and bias factor), - width = metad.estimate_width(configurations=h2_config, - mlp=ace, - plot=True) + width = metad.estimate_width(configurations=h2_config, mlp=ace, plot=True) - metad.try_multiple_biasfactors(configuration=h2_config, - mlp=ace, - temp=300, - interval=10, - dt=1, - width=width, - biasfactors=(5, 10, 15), - plotted_cvs=cv1, - ps=20) + metad.try_multiple_biasfactors( + configuration=h2_config, + mlp=ace, + temp=300, + interval=10, + dt=1, + width=width, + biasfactors=(5, 10, 15), + plotted_cvs=cv1, + ps=20, + ) # Execute metadynamics production runs, 8 independent simulations are # performed in parallel - metad.run_metadynamics(configuration=h2_config, - mlp=ace, - temp=300, - interval=10, - dt=1, - width=width, - biasfactor=5, - n_runs=8, - restart=False, - ps=20) + metad.run_metadynamics( + configuration=h2_config, + mlp=ace, + temp=300, + interval=10, + dt=1, + width=width, + biasfactor=5, + n_runs=8, + restart=False, + ps=20, + ) # Plot the resulting free energy surface (FES), the same method can be used # to plot the FES from block analysis or FES from a previous simulation diff --git a/examples/metadynamics/h2o/h2o.py b/examples/metadynamics/h2o/h2o.py index 69d539fa..1f554f8d 100644 --- a/examples/metadynamics/h2o/h2o.py +++ b/examples/metadynamics/h2o/h2o.py @@ -3,13 +3,12 @@ mlt.Config.n_cores = 8 if __name__ == '__main__': - # Initialise the system h2o_system = mlt.System(mlt.Molecule('h2o.xyz'), box=None) # Generate a starting metadynamics configuration h2o_config = h2o_system.random_configuration() - + # Define CVs, can also attach walls to them. More complicated CVs (i.e. # not DISTANCE, ANGLE, or TORSION; e.g. PATH) can be defined using # PlumedCustomCV that requires a PLUMED-like input file containing the @@ -29,42 +28,46 @@ # Active learning (can be commented out if the potential is loaded) al_bias = mlt.PlumedBias(cvs=(cv1, cv2)) al_bias.initialise_for_metad_al(width=(0.05, 0.10), biasfactor=100) - ace.al_train(method_name='xtb', - temp=300, - max_active_iters=50, - min_active_iters=10, - bias_start_iter=2, - inherit_metad_bias=True, - bias=al_bias) + ace.al_train( + method_name='xtb', + temp=300, + max_active_iters=50, + min_active_iters=10, + bias_start_iter=2, + inherit_metad_bias=True, + bias=al_bias, + ) - # Can run optional methods (estimate_width() and try_multiple_biafactors()) + # Can run optional methods (estimate_width() and try_multiple_biafactors()) # to help choose appropriate metadynamics parameters (width and bias factor), - width = metad.estimate_width(configurations=h2o_config, - mlp=ace, - plot=True) + width = metad.estimate_width(configurations=h2o_config, mlp=ace, plot=True) - metad.try_multiple_biasfactors(configuration=h2o_config, - mlp=ace, - temp=300, - interval=10, - dt=1, - width=width, - biasfactors=(5, 10, 15), - plotted_cvs=(cv1, cv2), - ps=40) + metad.try_multiple_biasfactors( + configuration=h2o_config, + mlp=ace, + temp=300, + interval=10, + dt=1, + width=width, + biasfactors=(5, 10, 15), + plotted_cvs=(cv1, cv2), + ps=40, + ) # Execute metadynamics production runs, 8 independent simulations are # performed in parallel - metad.run_metadynamics(configuration=h2o_config, - mlp=ace, - temp=300, - interval=10, - dt=1, - width=width, - biasfactor=5, - n_runs=8, - restart=False, - ps=40) + metad.run_metadynamics( + configuration=h2o_config, + mlp=ace, + temp=300, + interval=10, + dt=1, + width=width, + biasfactor=5, + n_runs=8, + restart=False, + ps=40, + ) # Plot the resulting free energy surface (FES), the same method can be used # to plot the FES from block analysis or FES from a previous simulation diff --git a/examples/methane.py b/examples/methane.py index f290aca5..27644b5f 100644 --- a/examples/methane.py +++ b/examples/methane.py @@ -5,25 +5,24 @@ if __name__ == '__main__': - # Set up the system of a methane molecule without any periodic boundaries - system = mlt.System(mlt.Molecule('methane.xyz'), - box=None) + system = mlt.System(mlt.Molecule('methane.xyz'), box=None) # Initialise a Gaussian Approximation Potential for this system - gap = mlt.potentials.GAP('methane', - system=system) + gap = mlt.potentials.GAP('methane', system=system) # and train using active learning at 1000 K gap.al_train(method_name='orca', temp=1000) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=gap, - fs=200, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=gap, + fs=200, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(gap, 'orca') diff --git a/examples/paper_examples/r1_fig2/ace_parity/train.py b/examples/paper_examples/r1_fig2/ace_parity/train.py index 267db842..f88fd8a9 100644 --- a/examples/paper_examples/r1_fig2/ace_parity/train.py +++ b/examples/paper_examples/r1_fig2/ace_parity/train.py @@ -5,7 +5,6 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) ace = mlt.potentials.ACE('da', system=system) @@ -13,12 +12,14 @@ ace.train(mlt.ConfigurationSet('da_data.npz')) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r1_fig2/mlp_comparison/ace/train.py b/examples/paper_examples/r1_fig2/mlp_comparison/ace/train.py index c5a1f257..38ca1eb4 100644 --- a/examples/paper_examples/r1_fig2/mlp_comparison/ace/train.py +++ b/examples/paper_examples/r1_fig2/mlp_comparison/ace/train.py @@ -5,25 +5,22 @@ if __name__ == '__main__': + system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), - box=None) + ace = mlt.potentials.ACE('da', system=system) - ace = mlt.potentials.ACE('da', - system=system) - - ace.al_train(method_name='orca', - temp=500, - max_active_time=500, - fix_init_config=True) + ace.al_train( + method_name='orca', temp=500, max_active_time=500, fix_init_config=True + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') - diff --git a/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/train.py b/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/train.py index 291fadda..55e328af 100644 --- a/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/train.py +++ b/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/train.py @@ -5,7 +5,6 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) ace = mlt.potentials.ACE('da', system=system) @@ -19,12 +18,14 @@ ace.train(dft_al_data) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/uplift/train.py b/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/uplift/train.py index 5a472dcc..95742513 100644 --- a/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/uplift/train.py +++ b/examples/paper_examples/r1_fig2/mlp_comparison/ace_uplift/uplift/train.py @@ -5,13 +5,12 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) ace = mlt.potentials.ACE('da', system=system) - #dft_al_data = mlt.ConfigurationSet() - #dft_al_data.load_xyz('da_data.xyz', charge=0, mult=1) - #dft_al_data.save('da_data.npz') + # dft_al_data = mlt.ConfigurationSet() + # dft_al_data.load_xyz('da_data.xyz', charge=0, mult=1) + # dft_al_data.save('da_data.npz') # dft_al_data.single_point(method_name='orca') # dft_al_data.save('da_data.npz') @@ -20,12 +19,14 @@ ace.train(mlt.ConfigurationSet('da_data.npz')) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r1_fig2/mlp_comparison/gap/train.py b/examples/paper_examples/r1_fig2/mlp_comparison/gap/train.py index f04f9f24..532a4b2e 100644 --- a/examples/paper_examples/r1_fig2/mlp_comparison/gap/train.py +++ b/examples/paper_examples/r1_fig2/mlp_comparison/gap/train.py @@ -5,26 +5,23 @@ if __name__ == '__main__': + system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), - box=None) - - gap = mlt.potentials.GAP('da', - system=system) - - gap.al_train(method_name='orca', - temp=500, - max_active_time=500, - fix_init_config=True) + gap = mlt.potentials.GAP('da', system=system) + gap.al_train( + method_name='orca', temp=500, max_active_time=500, fix_init_config=True + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=gap, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=gap, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(gap, 'orca') diff --git a/examples/paper_examples/r1_fig2/mlp_comparison/nequip/train.py b/examples/paper_examples/r1_fig2/mlp_comparison/nequip/train.py index ea79dce1..a1af4538 100644 --- a/examples/paper_examples/r1_fig2/mlp_comparison/nequip/train.py +++ b/examples/paper_examples/r1_fig2/mlp_comparison/nequip/train.py @@ -5,24 +5,23 @@ if __name__ == '__main__': - - system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), - box=None) + system = mlt.System(mlt.Molecule('ts_pbe0.xyz'), box=None) mlp = mlt.potentials.NeQUIP('da', system=system) - mlp.al_train(method_name='orca', - temp=500, - max_active_time=500, - fix_init_config=True) + mlp.al_train( + method_name='orca', temp=500, max_active_time=500, fix_init_config=True + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=mlp, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=mlp, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(mlp, 'orca') diff --git a/examples/paper_examples/r2_fig3/train_plus_ts7.py b/examples/paper_examples/r2_fig3/train_plus_ts7.py index 936539cd..a147b5ec 100644 --- a/examples/paper_examples/r2_fig3/train_plus_ts7.py +++ b/examples/paper_examples/r2_fig3/train_plus_ts7.py @@ -5,26 +5,27 @@ if __name__ == '__main__': + system = mlt.System(mlt.Molecule('ts7.xyz'), box=None) - system = mlt.System(mlt.Molecule('ts7.xyz'), - box=None) - - ace = mlt.potentials.ACE('da', - system=system) + ace = mlt.potentials.ACE('da', system=system) ace.training_data = mlt.ConfigurationSet('da_al_ts5.npz') - ace.al_train(method_name='orca', - temp=500, - max_active_time=1000, - fix_init_config=True) + ace.al_train( + method_name='orca', + temp=500, + max_active_time=1000, + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r2_fig3/train_ts5.py b/examples/paper_examples/r2_fig3/train_ts5.py index 4bda3dc2..e2e13f31 100644 --- a/examples/paper_examples/r2_fig3/train_ts5.py +++ b/examples/paper_examples/r2_fig3/train_ts5.py @@ -5,24 +5,25 @@ if __name__ == '__main__': + system = mlt.System(mlt.Molecule('ts5.xyz'), box=None) - system = mlt.System(mlt.Molecule('ts5.xyz'), - box=None) + ace = mlt.potentials.ACE('da', system=system) - ace = mlt.potentials.ACE('da', - system=system) - - ace.al_train(method_name='orca', - temp=500, - max_active_time=1000, - fix_init_config=True) + ace.al_train( + method_name='orca', + temp=500, + max_active_time=1000, + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r3_fig4/endo_ace_M06.py b/examples/paper_examples/r3_fig4/endo_ace_M06.py index 853669db..efac9dec 100644 --- a/examples/paper_examples/r3_fig4/endo_ace_M06.py +++ b/examples/paper_examples/r3_fig4/endo_ace_M06.py @@ -4,24 +4,28 @@ mlt.Config.orca_keywords = ['M062X', 'RIJCOSX', 'def2/J', 'def2-SVP', 'EnGrad'] if __name__ == '__main__': - - system = mlt.System(mlt.Molecule('cis_endo_TS_M06.xyz', charge=0, mult=1), - box=None) + system = mlt.System( + mlt.Molecule('cis_endo_TS_M06.xyz', charge=0, mult=1), box=None + ) ace = mlt.potentials.ACE('endo_ace', system=system) - ace.al_train(method_name='orca', - temp=500, - max_active_time=1000, - fix_init_config=True) + ace.al_train( + method_name='orca', + temp=500, + max_active_time=1000, + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.configuration, - mlp=ace, - fs=200, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.configuration, + mlp=ace, + fs=200, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r3_fig4/endo_ace_PBE0.py b/examples/paper_examples/r3_fig4/endo_ace_PBE0.py index 61c87bae..0f87ff14 100644 --- a/examples/paper_examples/r3_fig4/endo_ace_PBE0.py +++ b/examples/paper_examples/r3_fig4/endo_ace_PBE0.py @@ -4,25 +4,28 @@ mlt.Config.orca_keywords = ['PBE0', 'def2-SVP', 'EnGrad'] if __name__ == '__main__': - - system = mlt.System(mlt.Molecule('cis_endo_TS_PBE0.xyz', charge=0, mult=1), - box=None) + system = mlt.System( + mlt.Molecule('cis_endo_TS_PBE0.xyz', charge=0, mult=1), box=None + ) ace = mlt.potentials.ACE('endo_ace', system=system) - ace.al_train(method_name='orca', - temp=500, - max_active_time=1000, - fix_init_config=True) + ace.al_train( + method_name='orca', + temp=500, + max_active_time=1000, + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.configuration, - mlp=ace, - fs=200, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.configuration, + mlp=ace, + fs=200, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') - diff --git a/examples/paper_examples/r3_fig4/endo_ace_wB97X.py b/examples/paper_examples/r3_fig4/endo_ace_wB97X.py index 95910cf5..0fd035d0 100644 --- a/examples/paper_examples/r3_fig4/endo_ace_wB97X.py +++ b/examples/paper_examples/r3_fig4/endo_ace_wB97X.py @@ -1,27 +1,37 @@ import mlptrain as mlt mlt.Config.n_cores = 10 -mlt.Config.orca_keywords = ['wB97X-D3', 'RIJCOSX', 'def2/J', 'def2-SVP', 'EnGrad'] +mlt.Config.orca_keywords = [ + 'wB97X-D3', + 'RIJCOSX', + 'def2/J', + 'def2-SVP', + 'EnGrad', +] if __name__ == '__main__': - - system = mlt.System(mlt.Molecule('cis_endo_TS_wB97X.xyz', charge=0, mult=1), - box=None) + system = mlt.System( + mlt.Molecule('cis_endo_TS_wB97X.xyz', charge=0, mult=1), box=None + ) ace = mlt.potentials.ACE('endo_ace', system=system) - ace.al_train(method_name='orca', - temp=500, - max_active_time=1000, - fix_init_config=True) + ace.al_train( + method_name='orca', + temp=500, + max_active_time=1000, + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.configuration, - mlp=ace, - fs=200, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.configuration, + mlp=ace, + fs=200, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r3_fig5/train_endo.py b/examples/paper_examples/r3_fig5/train_endo.py index d60a6e93..7e3425be 100644 --- a/examples/paper_examples/r3_fig5/train_endo.py +++ b/examples/paper_examples/r3_fig5/train_endo.py @@ -6,23 +6,23 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('ts_endo.xyz'), box=None) ace = mlt.potentials.ACE('da', system=system) - ace.al_train(method_name='orca', - temp=500, - max_active_time=500, - fix_init_config=True) + ace.al_train( + method_name='orca', temp=500, max_active_time=500, fix_init_config=True + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.configuration, - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.configuration, + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') diff --git a/examples/paper_examples/r4_tab1/train.py b/examples/paper_examples/r4_tab1/train.py index 452ff64a..a3c3f4b5 100644 --- a/examples/paper_examples/r4_tab1/train.py +++ b/examples/paper_examples/r4_tab1/train.py @@ -6,27 +6,29 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('ts.xyz'), box=None) ace = mlt.potentials.ACE('da', system=system) - ace.al_train_then_bias(method_name='orca', - coordinate=mlt.AverageDistance((0, 1), (2, 3)), - max_coordinate=3.5, - selection_method=mlt.training.selection.AbsDiffE(0.043), - temp=500, - max_active_time=500, - fix_init_config=True) + ace.al_train_then_bias( + method_name='orca', + coordinate=mlt.AverageDistance((0, 1), (2, 3)), + max_coordinate=3.5, + selection_method=mlt.training.selection.AbsDiffE(0.043), + temp=500, + max_active_time=500, + fix_init_config=True, + ) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.configuration, - mlp=ace, - fs=500, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.configuration, + mlp=ace, + fs=500, + temp=300, + dt=0.5, + interval=10, + ) # and compare, plotting a parity diagram and E_true, ∆E and ∆F trajectory.compare(ace, 'orca') - diff --git a/examples/umbrella/train.py b/examples/umbrella/train.py index aacee88c..36c2e2d7 100644 --- a/examples/umbrella/train.py +++ b/examples/umbrella/train.py @@ -4,7 +4,6 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('sn2.xyz', charge=-1), box=None) ace = mlt.potentials.ACE('sn2', system=system) diff --git a/examples/umbrella/umbrella.py b/examples/umbrella/umbrella.py index 3a7eb1c7..672cbfaf 100644 --- a/examples/umbrella/umbrella.py +++ b/examples/umbrella/umbrella.py @@ -1,28 +1,28 @@ import mlptrain as mlt + mlt.Config.n_cores = 4 if __name__ == '__main__': - # Define a reaction coordinate as R1 - R2 - umbrella = mlt.UmbrellaSampling(zeta_func=mlt.DifferenceDistance((0, 1), - (0, 5)), - kappa=20) + umbrella = mlt.UmbrellaSampling( + zeta_func=mlt.DifferenceDistance((0, 1), (0, 5)), kappa=20 + ) irc = mlt.ConfigurationSet() irc.load_xyz(filename='irc.xyz', charge=-1, mult=1) - system = mlt.System(mlt.Molecule('sn2.xyz', charge=-1, mult=1), - box=None) + system = mlt.System(mlt.Molecule('sn2.xyz', charge=-1, mult=1), box=None) # Run umbrella sampling across the IRC using GAP MD - umbrella.run_umbrella_sampling(irc, - mlp=mlt.potentials.GAP('sn2', - system=system), - temp=300, - interval=5, - dt=0.5, - n_windows=10, - ps=1) + umbrella.run_umbrella_sampling( + irc, + mlp=mlt.potentials.GAP('sn2', system=system), + temp=300, + interval=5, + dt=0.5, + n_windows=10, + ps=1, + ) # Use WHAM to calculate the free energy umbrella.wham() diff --git a/examples/water.py b/examples/water.py index 2e1ef9ed..cfb59370 100644 --- a/examples/water.py +++ b/examples/water.py @@ -4,18 +4,19 @@ if __name__ == '__main__': - system = mlt.System(mlt.Molecule('water.xyz'), box=None) ace = mlt.potentials.ACE('water', system=system) ace.al_train(method_name='xtb', temp=500) # Run some dynamics with the potential - trajectory = mlt.md.run_mlp_md(configuration=system.random_configuration(), - mlp=ace, - fs=200, - temp=300, - dt=0.5, - interval=10) + trajectory = mlt.md.run_mlp_md( + configuration=system.random_configuration(), + mlp=ace, + fs=200, + temp=300, + dt=0.5, + interval=10, + ) trajectory.save(filename='water_trajectory.xyz') diff --git a/install_ace.sh b/install_ace.sh index 7a4a0bfd..08dce964 100755 --- a/install_ace.sh +++ b/install_ace.sh @@ -16,8 +16,6 @@ if ! which julia; then fi source create_conda_environment.sh -# NOTE: `conda activate` does not work in scripts, we use `conda run` below. -# https://stackoverflow.com/a/72395091 echo "* Adding required registries and packages to Julia *" echo "using Pkg @@ -30,8 +28,10 @@ Pkg.add(\"IJulia\") Pkg.add(\"ASE\")" > add_julia_pkgs.jl julia add_julia_pkgs.jl +# NOTE: `conda activate` does not work in scripts, need to use `conda run`, see: +# https://stackoverflow.com/a/72395091 echo "* Setting up Python-Julia integration *" -conda run -n ${CONDA_ENV_NAME} python -c "import julia; julia.install()" +$CONDA_EXE run -n ${CONDA_ENV_NAME} python -c "import julia; julia.install()" echo "* Pointing PyCall to the version of Python in the new env *" diff --git a/mlptrain/__init__.py b/mlptrain/__init__.py index 0f594494..3458720b 100644 --- a/mlptrain/__init__.py +++ b/mlptrain/__init__.py @@ -10,39 +10,43 @@ from mlptrain import potentials from mlptrain import loss from mlptrain.training import selection -from mlptrain.sampling.reaction_coord import AverageDistance, DifferenceDistance +from mlptrain.sampling.reaction_coord import ( + AverageDistance, + DifferenceDistance, +) from mlptrain.sampling.plumed import ( PlumedAverageCV, PlumedDifferenceCV, PlumedCustomCV, ) -__version__ = '1.0.0a0' +__version__ = '1.0.0b0' -__all__ = ['Configuration', - 'ConfigurationSet', - 'Trajectory', - 'Config', - 'Molecule', - 'System', - 'Box', - 'Bias', - 'PlumedBias', - 'PlumedCalculator', - 'UmbrellaSampling', - 'Metadynamics', - 'AverageDistance', - 'DifferenceDistance', - 'PlumedAverageCV', - 'PlumedDifferenceCV', - 'PlumedCustomCV', - 'plot_cv_versus_time', - 'plot_cv1_and_cv2', - 'convert_ase_time', - 'convert_ase_energy', - 'md', - 'md_openmm', - 'loss', - 'selection', - 'potentials' - ] +__all__ = [ + 'Configuration', + 'ConfigurationSet', + 'Trajectory', + 'Config', + 'Molecule', + 'System', + 'Box', + 'Bias', + 'PlumedBias', + 'PlumedCalculator', + 'UmbrellaSampling', + 'Metadynamics', + 'AverageDistance', + 'DifferenceDistance', + 'PlumedAverageCV', + 'PlumedDifferenceCV', + 'PlumedCustomCV', + 'plot_cv_versus_time', + 'plot_cv1_and_cv2', + 'convert_ase_time', + 'convert_ase_energy', + 'md', + 'md_openmm', + 'loss', + 'selection', + 'potentials', +] diff --git a/mlptrain/box.py b/mlptrain/box.py index 04a1eeaf..31456114 100644 --- a/mlptrain/box.py +++ b/mlptrain/box.py @@ -3,7 +3,6 @@ class Box: - def __init__(self, size: Sequence[float]): """ Periodic cuboidal box @@ -28,7 +27,7 @@ def volume(self) -> float: @property def has_zero_volume(self) -> bool: """Is this box essentially of zero size""" - return self.volume < 1E-10 + return self.volume < 1e-10 @property def midpoint(self) -> np.ndarray: @@ -38,5 +37,7 @@ def midpoint(self) -> np.ndarray: def __eq__(self, other): """Equality of two boxes""" - return (isinstance(other, Box) - and np.linalg.norm(other.size - self.size) < 1E-10) + return ( + isinstance(other, Box) + and np.linalg.norm(other.size - self.size) < 1e-10 + ) diff --git a/mlptrain/config.py b/mlptrain/config.py index 652d605b..0f8dd597 100644 --- a/mlptrain/config.py +++ b/mlptrain/config.py @@ -6,51 +6,69 @@ class _ConfigClass: n_cores = 4 _orca_keywords = ['PBE', 'def2-SVP', 'EnGrad'] - _gaussian_keywords = ['PBEPBE', 'Def2SVP', 'Force(NoStep)', 'integral=ultrafinegrid'] + _gaussian_keywords = [ + 'PBEPBE', + 'Def2SVP', + 'Force(NoStep)', + 'integral=ultrafinegrid', + ] # Default parameters for a GAP potential - gap_default_params = {'sigma_E': 10**(-4.0), # eV - 'sigma_F': 10**(-2.0)} # eV Å-1 + gap_default_params = { + 'sigma_E': 10 ** (-4.0), # eV + 'sigma_F': 10 ** (-2.0), + } # eV Å-1 # Default SOAP parameters - gap_default_soap_params = {'cutoff': 4.0, # Å - 'n_sparse': 1000, - 'l_max': 6, # n_max = 2 l_max - 'sigma_at': 0.5 # Å - } + gap_default_soap_params = { + 'cutoff': 4.0, # Å + 'n_sparse': 1000, + 'l_max': 6, # n_max = 2 l_max + 'sigma_at': 0.5, # Å + } + # ACE params + ace_params = { + 'N': 4, # maximum correlation order + 'r_cut': 4.0, # outer cutoff of ACE + 'deg_pair': 5, # Specify the pair potential + 'r_cut_pair': 5.0, + } # NeQUIP params - nequip_params = {'cutoff': 4.0, - 'train_fraction': 0.9} + nequip_params = {'cutoff': 4.0, 'train_fraction': 0.9} # MACE params try: import torch + mace_device = 'cuda' if torch.cuda.is_available() else 'cpu' except ImportError: mace_device = 'cpu' - mace_params = {'valid_fraction': 0.1, - 'config_type_weights': '{"Default":1.0}', - 'model': 'MACE', - 'loss': 'weighted', - 'energy_weight': 1.0, - 'forces_weight': 5.0, - 'hidden_irreps': '128x0e + 128x1o', - 'batch_size': 10, - 'r_max': 5, - 'correlation': 3, - 'device': mace_device , - 'calc_device': 'cpu', - 'error_table': 'TotalMAE', - 'swa': True, - 'start_swa': 800, - 'ema': False, - 'ema_decay': 0.99, - 'amsgrad': True, - 'restart_latest': False, - 'save_cpu': True} + mace_params = { + 'valid_fraction': 0.1, + 'config_type_weights': '{"Default":1.0}', + 'model': 'MACE', + 'loss': 'weighted', + 'energy_weight': 1.0, + 'forces_weight': 5.0, + 'hidden_irreps': '128x0e + 128x1o', + 'batch_size': 10, + 'r_max': 5.0, + 'correlation': 3, + 'device': mace_device, + 'calc_device': 'cpu', + 'error_table': 'TotalMAE', + 'swa': True, + 'start_swa': 800, + 'ema': False, + 'ema_decay': 0.99, + 'amsgrad': True, + 'restart_latest': False, + 'save_cpu': True, + 'dtype': 'float32', + } # --------------------- Internal properties --------------------------- diff --git a/mlptrain/configurations/__init__.py b/mlptrain/configurations/__init__.py index e04e8b7b..a86a184a 100644 --- a/mlptrain/configurations/__init__.py +++ b/mlptrain/configurations/__init__.py @@ -2,6 +2,4 @@ from mlptrain.configurations.configuration_set import ConfigurationSet from mlptrain.configurations.trajectory import Trajectory -__all__ = ['Configuration', - 'ConfigurationSet', - 'Trajectory'] +__all__ = ['Configuration', 'ConfigurationSet', 'Trajectory'] diff --git a/mlptrain/configurations/calculate.py b/mlptrain/configurations/calculate.py index 760bb579..d52f08eb 100644 --- a/mlptrain/configurations/calculate.py +++ b/mlptrain/configurations/calculate.py @@ -7,10 +7,9 @@ @work_in_tmp_dir() -def run_autode(configuration: 'mlptrain.Configuration', - method_name: str, - n_cores: int = 1 - ) -> None: +def run_autode( + configuration: 'mlptrain.Configuration', method_name: str, n_cores: int = 1 +) -> None: """ Run an autodE calculation @@ -29,14 +28,18 @@ def run_autode(configuration: 'mlptrain.Configuration', method, kwds = _method_and_keywords(method_name=method_name.lower()) logger.info(f'Running a {method_name} calculation at: {kwds}') - calc = Calculation(name='tmp', - molecule=Species(name='tmp', - atoms=configuration.atoms, - charge=configuration.charge, - mult=configuration.mult), - method=method, - keywords=kwds, - n_cores=n_cores) + calc = Calculation( + name='tmp', + molecule=Species( + name='tmp', + atoms=configuration.atoms, + charge=configuration.charge, + mult=configuration.mult, + ), + method=method, + keywords=kwds, + n_cores=n_cores, + ) calc.run() try: @@ -49,7 +52,7 @@ def run_autode(configuration: 'mlptrain.Configuration', if energy is None: logger.error('Failed to calculate the energy') if calc.output.exists: - print("".join(calc.output.file_lines[-50:])) + print(''.join(calc.output.file_lines[-50:])) return None @@ -58,8 +61,9 @@ def run_autode(configuration: 'mlptrain.Configuration', return None -def _method_and_keywords(method_name: str) -> Tuple['autode.wrappers.Method', - 'autode.wrappers.keywords.Keywords']: +def _method_and_keywords( + method_name: str, +) -> Tuple['autode.wrappers.Method', 'autode.wrappers.keywords.Keywords']: """Get the method and associated keywords to use in a QM calculation""" from autode.methods import ORCA, XTB, G16, G09 @@ -84,20 +88,23 @@ def _orca_keywords() -> 'autode.wrappers.keywords.Keywords': """Keywords e.g. functional and basis set to use for an ORCA calculation""" if Config.orca_keywords is None: - raise ValueError("For ORCA training GTConfig.orca_keywords must be" - " set. e.g.\nmlt.Config.orca_keywords " - "= ['PBE', 'def2-SVP', 'EnGrad'])") + raise ValueError( + 'For ORCA training GTConfig.orca_keywords must be' + ' set. e.g.\nmlt.Config.orca_keywords ' + "= ['PBE', 'def2-SVP', 'EnGrad'])" + ) return Config.orca_keywords def _gaussian_keywords() -> 'autode.wrappers.keywords.Keywords': """Keywords e.g. functional and basis set to use for an Gaussian - calculation, either Gaussian09 or Gaussian16""" + calculation, either Gaussian09 or Gaussian16""" if Config.gaussian_keywords is None: - raise ValueError("To train with Gaussian QM calculations " - "mlt.Config.gaussian_keywords must be set.") + raise ValueError( + 'To train with Gaussian QM calculations ' + 'mlt.Config.gaussian_keywords must be set.' + ) return Config.gaussian_keywords - diff --git a/mlptrain/configurations/configuration.py b/mlptrain/configurations/configuration.py index cef59796..aa493d04 100644 --- a/mlptrain/configurations/configuration.py +++ b/mlptrain/configurations/configuration.py @@ -16,12 +16,13 @@ class Configuration(AtomCollection): """Configuration of atoms""" - def __init__(self, - atoms: Union[autode.atoms.Atoms, List[Atom], None] = None, - charge: int = 0, - mult: int = 1, - box: Optional[Box] = None - ): + def __init__( + self, + atoms: Union[autode.atoms.Atoms, List[Atom], None] = None, + charge: int = 0, + mult: int = 1, + box: Optional[Box] = None, + ): """ Set of atoms perhaps in a periodic box with an overall charge and spin multiplicity @@ -45,8 +46,8 @@ def __init__(self, # Collective variable values (obtained using PLUMED) self.plumed_coordinates: Optional[np.ndarray] = None - self.time: Optional[float] = None # Time in a trajectory - self.n_ref_evals = 0 # Number of reference evaluations + self.time: Optional[float] = None # Time in a trajectory + self.n_ref_evals = 0 # Number of reference evaluations @property def ase_atoms(self) -> 'ase.atoms.Atoms': @@ -58,18 +59,18 @@ def ase_atoms(self) -> 'ase.atoms.Atoms': Returns: (ase.atoms.Atoms): ASE atoms """ - _atoms = ase.atoms.Atoms(symbols=[atom.label for atom in self.atoms], - positions=self.coordinates, - pbc=self.box is not None) + _atoms = ase.atoms.Atoms( + symbols=[atom.label for atom in self.atoms], + positions=self.coordinates, + pbc=self.box is not None, + ) if self.box is not None: _atoms.set_cell(cell=self.box.size) return _atoms - def update_attr_from(self, - configuration: 'Configuration' - ) -> None: + def update_attr_from(self, configuration: 'Configuration') -> None: """ Update system attributes from a configuration @@ -84,12 +85,13 @@ def update_attr_from(self, return None - def save_xyz(self, - filename: str, - append: bool = False, - true: bool = False, - predicted: bool = False - ) -> None: + def save_xyz( + self, + filename: str, + append: bool = False, + true: bool = False, + predicted: bool = False, + ) -> None: """ Print this configuration as an extended xyz file where the first 4 columns are the atom symbol, x, y, z and, if this configuration @@ -108,11 +110,13 @@ def save_xyz(self, """ # logger.info(f'Saving configuration to {filename}') - a, b, c = [0., 0., 0.] if self.box is None else self.box.size + a, b, c = [0.0, 0.0, 0.0] if self.box is None else self.box.size if true and predicted: - raise ValueError('Cannot save both predicted and true ' - f'quantities to {filename}') + raise ValueError( + 'Cannot save both predicted and true ' + f'quantities to {filename}' + ) if not (true or predicted): prop_str = '' @@ -131,12 +135,14 @@ def save_xyz(self, filename += '.xyz' with open(filename, 'a' if append else 'w') as exyz_file: - print(f'{len(self.atoms)}\n' - f'Lattice="{a:.6f} 0.000000 0.000000 ' - f'0.000000 {b:.6f} 0.000000 ' - f'0.000000 0.000000 {c:.6f}" ' - f'{prop_str}', - file=exyz_file) + print( + f'{len(self.atoms)}\n' + f'Lattice="{a:.6f} 0.000000 0.000000 ' + f'0.000000 {b:.6f} 0.000000 ' + f'0.000000 0.000000 {c:.6f}" ' + f'{prop_str}', + file=exyz_file, + ) for i, atom in enumerate(self.atoms): x, y, z = atom.coord @@ -150,10 +156,11 @@ def save_xyz(self, return None - def single_point(self, - method: Union[str, 'mlptrain.potentials._base.MLPotential'], - n_cores: int = 1 - ) -> None: + def single_point( + self, + method: Union[str, 'mlptrain.potentials._base.MLPotential'], + n_cores: int = 1, + ) -> None: """ Run a single point energy and gradient (force) evaluation using either a reference method defined by a string (e.g. 'orca') or a @@ -176,22 +183,25 @@ def single_point(self, method.predict(self) else: - raise ValueError(f'Cannot use {method} to predict energies and ' - f'forces') + raise ValueError( + f'Cannot use {method} to predict energies and ' f'forces' + ) return None def __eq__(self, other) -> bool: """Another configuration is identical to this one""" - eq = (isinstance(other, Configuration) - and other.n_atoms == self.n_atoms - and other.mult == self.mult - and other.charge == self.charge - and other.box == self.box) + eq = ( + isinstance(other, Configuration) + and other.n_atoms == self.n_atoms + and other.mult == self.mult + and other.charge == self.charge + and other.box == self.box + ) if eq and self.n_atoms > 0: rmsd = np.linalg.norm(self.coordinates - other.coordinates) - return eq and rmsd < 1E-10 + return eq and rmsd < 1e-10 return eq def copy(self) -> 'Configuration': diff --git a/mlptrain/configurations/configuration_set.py b/mlptrain/configurations/configuration_set.py index 360c7b61..16d8c8ac 100644 --- a/mlptrain/configurations/configuration_set.py +++ b/mlptrain/configurations/configuration_set.py @@ -8,16 +8,15 @@ from mlptrain.config import Config from mlptrain.log import logger from mlptrain.configurations.configuration import Configuration -from mlptrain.configurations.plotting import parity_plot from mlptrain.box import Box class ConfigurationSet(list): """A set of configurations""" - def __init__(self, - *args: Union[Configuration, str], - allow_duplicates: bool = False): + def __init__( + self, *args: Union[Configuration, str], allow_duplicates: bool = False + ): """ Construct a configuration set from Configurations, or a saved file. This is a set, thus no duplicates configurations are present. @@ -34,7 +33,6 @@ def __init__(self, self.allow_duplicates = allow_duplicates for arg in args: - if isinstance(arg, Configuration): self.append(arg) @@ -117,14 +115,17 @@ def lowest_biased_energy(self) -> 'mlptrain.Configuration': (mlptrain.Configuration): """ if len(self) == 0: - raise ValueError('No lowest biased energy configuration in an ' - 'empty set') + raise ValueError( + 'No lowest biased energy configuration in an ' 'empty set' + ) - true_energy = np.array([e if e is not None else np.inf - for e in self.true_energies]) + true_energy = np.array( + [e if e is not None else np.inf for e in self.true_energies] + ) - bias_energy = np.array([e if e is not None else 0 - for e in self.bias_energies]) + bias_energy = np.array( + [e if e is not None else 0 for e in self.bias_energies] + ) biased_energy = true_energy + bias_energy return self[np.argmin(biased_energy)] @@ -141,14 +142,17 @@ def lowest_inherited_biased_energy(self) -> 'mlptrain.Configuration': (mlptrain.Configuration): """ if len(self) == 0: - raise ValueError('No lowest biased energy configuration in an ' - 'empty set') + raise ValueError( + 'No lowest biased energy configuration in an ' 'empty set' + ) - true_energy = np.array([e if e is not None else np.inf - for e in self.true_energies]) + true_energy = np.array( + [e if e is not None else np.inf for e in self.true_energies] + ) - inherited_bias_energy = np.array([e if e is not None else 0 - for e in self.inherited_bias_energies]) + inherited_bias_energy = np.array( + [e if e is not None else 0 for e in self.inherited_bias_energies] + ) inherited_biased_energy = true_energy + inherited_bias_energy return self[np.argmin(inherited_biased_energy)] @@ -207,16 +211,17 @@ def t_min(self, from_idx: int) -> float: (float): Time in fs """ if len(self) < from_idx: - logger.warning('Insufficient data to determine minimum time ' - f'from index {from_idx}') + logger.warning( + 'Insufficient data to determine minimum time ' + f'from index {from_idx}' + ) return 0.0 - return min(c.time if c.time is not None else 0.0 - for c in self[from_idx:]) + return min( + c.time if c.time is not None else 0.0 for c in self[from_idx:] + ) - def append(self, - value: Optional['mlptrain.Configuration'] - ) -> None: + def append(self, value: Optional['mlptrain.Configuration']) -> None: """ Append an item onto these set of configurations. None will not be appended @@ -230,15 +235,16 @@ def append(self, return if not self.allow_duplicates and value in self: - logger.warning('Not appending configuration to set - already ' - 'present') + logger.warning( + 'Not appending configuration to set - already ' 'present' + ) return return super().append(value) - def compare(self, - *args: Union['mlptrain.potentials.MLPotential', str] - ) -> None: + def compare( + self, *args: Union['mlptrain.potentials.MLPotential', str] + ) -> None: """ Compare methods e.g. a MLP to a ground truth reference method over these set of configurations. Will generate plots of total energies @@ -248,9 +254,13 @@ def compare(self, Arguments: *args: Strings defining the method or MLPs """ + from mlptrain.configurations.plotting import parity_plot + if _num_strings_in(args) > 1: - raise NotImplementedError('Compare currently only supports a ' - 'single reference method (string).') + raise NotImplementedError( + 'Compare currently only supports a ' + 'single reference method (string).' + ) name = self._comparison_name(*args) @@ -273,11 +283,9 @@ def compare(self, parity_plot(self, name=name) return None - def save_xyz(self, - filename: str, - true: bool = False, - predicted: bool = False - ) -> None: + def save_xyz( + self, filename: str, true: bool = False, predicted: bool = False + ) -> None: """Save these configurations to a file ----------------------------------------------------------------------- @@ -295,25 +303,23 @@ def save_xyz(self, return None if self[0].energy.true is not None and not (predicted or true): - logger.warning('Save called without defining what energy and ' - 'forces to print. Had true energies to using those') + logger.warning( + 'Save called without defining what energy and ' + 'forces to print. Had true energies to using those' + ) true = True open(filename, 'w').close() # Empty the file for configuration in self: - configuration.save_xyz(filename, - true=true, - predicted=predicted, - append=True) + configuration.save_xyz( + filename, true=true, predicted=predicted, append=True + ) return None - def load_xyz(self, - filename: str, - charge: int, - mult: int, - box: Optional[Box] = None - ) -> None: + def load_xyz( + self, filename: str, charge: int, mult: int, box: Optional[Box] = None + ) -> None: """ Load configurations from a .xyz file. Will not load any energies or forces @@ -339,7 +345,6 @@ def append_configuration(_atoms): return None for idx, line in enumerate(file_lines): - if is_xyz_line(line): atoms.append(Atom(*line.split()[:4])) @@ -394,17 +399,20 @@ def load(self, filename: str) -> None: self._load_npz(filename) elif filename.endswith('.xyz'): - raise ValueError('Loading .xyz files is not supported. Call ' - 'load_xyz() with defined charge & multiplicity') + raise ValueError( + 'Loading .xyz files is not supported. Call ' + 'load_xyz() with defined charge & multiplicity' + ) else: - raise ValueError(f'Cannot load {filename}. Must be either a ' - f'.xyz or .npz file') + raise ValueError( + f'Cannot load {filename}. Must be either a ' + f'.xyz or .npz file' + ) return None - def single_point(self, - method: str) -> None: + def single_point(self, method: str) -> None: """ Evaluate energies and forces on all configuration in this set @@ -412,8 +420,9 @@ def single_point(self, Arguments: method: """ - return self._run_parallel_method(function=_single_point_eval, - method_name=method) + return self._run_parallel_method( + function=_single_point_eval, method_name=method + ) @property def _coordinates(self) -> np.ndarray: @@ -452,8 +461,10 @@ def plumed_coordinates(self) -> Optional[np.ndarray]: return None elif len(n_cvs_set) != 1: - logger.info('Number of CVs differ between configurations - ' - 'returning None') + logger.info( + 'Number of CVs differ between configurations - ' + 'returning None' + ) return None n_cvs = n_cvs_set.pop() @@ -474,7 +485,9 @@ def _atomic_numbers(self) -> np.ndarray: (np.ndarray): Atomic numbers matrix (n, n_atoms) """ - return np.array([[atom.atomic_number for atom in c.atoms] for c in self]) + return np.array( + [[atom.atomic_number for atom in c.atoms] for c in self] + ) @property def _box_sizes(self) -> np.ndarray: @@ -486,8 +499,9 @@ def _box_sizes(self) -> np.ndarray: Returns: (np.ndarray): Box sizes matrix (n, 3) """ - return np.array([c.box.size if c.box is not None else np.zeros(3) - for c in self]) + return np.array( + [c.box.size if c.box is not None else np.zeros(3) for c in self] + ) @property def _charges(self) -> np.ndarray: @@ -515,20 +529,22 @@ def _forces(self, kind: str) -> Optional[np.ndarray]: def _save_npz(self, filename: str) -> None: """Save a compressed numpy array of all the data in this set""" - np.savez(filename, - R=self._coordinates, - R_plumed=self.plumed_coordinates, - E_true=self.true_energies, - E_predicted=self.predicted_energies, - E_bias=self.bias_energies, - E_inherited_bias=self.inherited_bias_energies, - F_true=self.true_forces, - F_predicted=self.predicted_forces, - Z=self._atomic_numbers, - L=self._box_sizes, - C=self._charges, - M=self._multiplicities, - allow_pickle=True) + np.savez( + filename, + R=self._coordinates, + R_plumed=self.plumed_coordinates, + E_true=self.true_energies, + E_predicted=self.predicted_energies, + E_bias=self.bias_energies, + E_inherited_bias=self.inherited_bias_energies, + F_true=self.true_forces, + F_predicted=self.predicted_forces, + Z=self._atomic_numbers, + L=self._box_sizes, + C=self._charges, + M=self._multiplicities, + allow_pickle=True, + ) return None @@ -538,13 +554,14 @@ def _load_npz(self, filename: str) -> None: data = np.load(filename, allow_pickle=True) for i, coords in enumerate(data['R']): - box = Box(size=data['L'][i]) - config = Configuration(atoms=_atoms_from_z_r(data['Z'][i], coords), - charge=int(data['C'][i]), - mult=int(data['M'][i]), - box=None if box.has_zero_volume else box) + config = Configuration( + atoms=_atoms_from_z_r(data['Z'][i], coords), + charge=int(data['C'][i]), + mult=int(data['M'][i]), + box=None if box.has_zero_volume else box, + ) if data['R_plumed'].ndim > 0: config.plumed_coordinates = data['R_plumed'][i] @@ -571,9 +588,10 @@ def _load_npz(self, filename: str) -> None: return None - def __add__(self, - other: Union['mlptrain.Configuration', - 'mlptrain.ConfigurationSet']): + def __add__( + self, + other: Union['mlptrain.Configuration', 'mlptrain.ConfigurationSet'], + ): """Add another configuration or set of configurations onto this one""" if isinstance(other, Configuration): @@ -583,8 +601,10 @@ def __add__(self, self.extend(other) else: - raise TypeError('Can only add a Configuration or' - f' ConfigurationSet, not {type(other)}') + raise TypeError( + 'Can only add a Configuration or' + f' ConfigurationSet, not {type(other)}' + ) logger.info(f'Current number of configurations is {len(self)}') return self @@ -608,14 +628,15 @@ def _run_parallel_method(self, function, **kwargs): n_processes = min(len(self), Config.n_cores) n_cores_pp = max(Config.n_cores // len(self), 1) kwargs['n_cores'] = n_cores_pp - logger.info(f'Running {n_processes} processes; {n_cores_pp} cores each') + logger.info( + f'Running {n_processes} processes; {n_cores_pp} cores each' + ) with Pool(processes=n_processes) as pool: - for _, config in enumerate(self): - result = pool.apply_async(func=function, - args=(config,), - kwds=kwargs) + result = pool.apply_async( + func=function, args=(config,), kwds=kwargs + ) results.append(result) pool.close() @@ -647,8 +668,9 @@ def _single_point_eval(config, method_name, **kwargs): return config -def _atoms_from_z_r(atomic_numbers: np.ndarray, - coordinates: np.ndarray) -> List[Atom]: +def _atoms_from_z_r( + atomic_numbers: np.ndarray, coordinates: np.ndarray +) -> List[Atom]: """From a set of atomic numbers and coordinates create a set of atoms""" atoms = [] diff --git a/mlptrain/configurations/plotting.py b/mlptrain/configurations/plotting.py index fabfd290..177141f6 100644 --- a/mlptrain/configurations/plotting.py +++ b/mlptrain/configurations/plotting.py @@ -18,9 +18,9 @@ mpl.rcParams['axes.linewidth'] = 1.2 -def parity_plot(config_set: 'mlptrain.ConfigurationSet', - name: str = 'paritiy' - ) -> None: +def parity_plot( + config_set: 'mlptrain.ConfigurationSet', name: str = 'paritiy' +) -> None: """ Plot parity plots of energies, forces and temporal differences (if present) otherwise the residuals over the configuration index @@ -31,8 +31,7 @@ def parity_plot(config_set: 'mlptrain.ConfigurationSet', name: """ - fig, ax = plt.subplots(nrows=2, ncols=2, - figsize=(8, 7.5)) + fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(8, 7.5)) if _all_energies_are_defined(config_set): _add_energy_time_plot(config_set, axis=ax[0, 0]) @@ -49,7 +48,9 @@ def parity_plot(config_set: 'mlptrain.ConfigurationSet', def _all_energies_are_defined(cfgs) -> bool: """Are all the energies defined in a configuration set?""" - return all(e is not None for e in cfgs.true_energies + cfgs.predicted_energies) + return all( + e is not None for e in cfgs.true_energies + cfgs.predicted_energies + ) def _all_forces_are_defined(cfgs) -> bool: @@ -71,23 +72,23 @@ def _add_energy_time_plot(config_set, axis) -> None: true_Es = np.array(config_set.true_energies) min_E = np.min(true_Es) - axis.plot(xs, - np.array(config_set.predicted_energies) - min_E, - label='predicted', - lw=2) + axis.plot( + xs, + np.array(config_set.predicted_energies) - min_E, + label='predicted', + lw=2, + ) - axis.plot(xs, - true_Es - min_E, - label='true', - c='orange', - lw=2) + axis.plot(xs, true_Es - min_E, label='true', c='orange', lw=2) # plot the region of 'chemical accuracy' 1 kcal mol-1 = 0.043 eV - axis.fill_between(xs, - y1=true_Es - min_E - 0.043, - y2=true_Es - min_E + 0.043, - alpha=0.2, - color='orange') + axis.fill_between( + xs, + y1=true_Es - min_E - 0.043, + y2=true_Es - min_E + 0.043, + alpha=0.2, + color='orange', + ) axis.legend() axis.set_xlabel(xlabel) @@ -99,7 +100,7 @@ def _add_energy_time_plot(config_set, axis) -> None: def _add_energy_parity_plot(config_set, axis) -> None: """Plot true vs predicted energies""" xs = np.array(config_set.true_energies) - xs -= np.min(xs) # Only relative energies matter + xs -= np.min(xs) # Only relative energies matter ys = np.array(config_set.predicted_energies) ys -= np.min(ys) @@ -107,15 +108,9 @@ def _add_energy_parity_plot(config_set, axis) -> None: min_e = min([np.min(xs), np.min(ys)]) max_e = min([np.max(xs), np.max(ys)]) - axis.scatter(xs, ys, - marker='o', - s=20, - c='white', - edgecolors='blue') + axis.scatter(xs, ys, marker='o', s=20, c='white', edgecolors='blue') - axis.plot([min_e, max_e], [min_e, max_e], - c='k', - lw=1.0) + axis.plot([min_e, max_e], [min_e, max_e], c='k', lw=1.0) _add_r_sq_and_mad(axis, xs=xs, ys=ys) @@ -130,29 +125,34 @@ def _add_energy_parity_plot(config_set, axis) -> None: def _add_force_component_plot(config_set, axis) -> None: """Add a parity plot of the force components""" - cmaps = [plt.get_cmap('Blues'), - plt.get_cmap('Reds'), - plt.get_cmap('Purples')] + cmaps = [ + plt.get_cmap('Blues'), + plt.get_cmap('Reds'), + plt.get_cmap('Purples'), + ] - min_f = min([np.min(config_set.true_forces), - np.min(config_set.predicted_forces)]) + min_f = min( + [np.min(config_set.true_forces), np.min(config_set.predicted_forces)] + ) - max_f = min([np.max(config_set.true_forces), - np.max(config_set.predicted_forces)]) + max_f = min( + [np.max(config_set.true_forces), np.max(config_set.predicted_forces)] + ) for idx, k in enumerate(['x', 'y', 'z']): - xs, ys = [], [] for config in config_set: xs += config.forces.true[:, idx].tolist() ys += config.forces.predicted[:, idx].tolist() - axis.hist2d(xs, ys, - bins=40, - label='$F_{x}$', - cmap=cmaps[idx], - norm=mpl.colors.LogNorm() - ) + axis.hist2d( + xs, + ys, + bins=40, + label='$F_{x}$', + cmap=cmaps[idx], + norm=mpl.colors.LogNorm(), + ) axis.set_ylim(min_f, max_f) axis.set_xlim(min_f, max_f) @@ -174,11 +174,14 @@ def _add_force_magnitude_plot(config_set, axis) -> None: min_f = min([np.min(xs), np.min(ys)]) max_f = min([np.max(xs), np.max(ys)]) - axis.hist2d(xs, ys, - range=[[min_f, max_f], [min_f, max_f]], - bins=50, - cmap=plt.get_cmap('Blues'), - norm=mpl.colors.LogNorm()) + axis.hist2d( + xs, + ys, + range=[[min_f, max_f], [min_f, max_f]], + bins=50, + cmap=plt.get_cmap('Blues'), + norm=mpl.colors.LogNorm(), + ) _add_r_sq_and_mad(axis, xs=np.array(xs), ys=np.array(ys)) @@ -195,14 +198,15 @@ def _add_r_sq_and_mad(axis, xs, ys): """Add an annotation of the correlation and MAD between the data""" slope, intercept, r, p, se = linregress(xs, ys) - axis.annotate(f'$R^2$ = {r**2:.3f},\n' - f' MAD = {np.mean(np.abs(xs - ys)):.3f} eV', - xy=(1, 0), - xycoords='axes fraction', - fontsize=12, - xytext=(-5, 5), - textcoords='offset points', - ha='right', - va='bottom') + axis.annotate( + f'$R^2$ = {r**2:.3f},\n' f' MAD = {np.mean(np.abs(xs - ys)):.3f} eV', + xy=(1, 0), + xycoords='axes fraction', + fontsize=12, + xytext=(-5, 5), + textcoords='offset points', + ha='right', + va='bottom', + ) return None diff --git a/mlptrain/configurations/trajectory.py b/mlptrain/configurations/trajectory.py index 6310a1ba..cacbf985 100644 --- a/mlptrain/configurations/trajectory.py +++ b/mlptrain/configurations/trajectory.py @@ -20,7 +20,7 @@ def t0(self) -> float: Returns: (float): t_0 in fs """ - return 0. if len(self) == 0 else self[0].time + return 0.0 if len(self) == 0 else self[0].time @t0.setter def t0(self, value: float): @@ -28,8 +28,10 @@ def t0(self, value: float): for frame in self: if frame.time is None: - logger.warning('Attempted to set the initial time but a ' - f'time was note defined. Setting to {value}') + logger.warning( + 'Attempted to set the initial time but a ' + f'time was note defined. Setting to {value}' + ) frame.time = value else: diff --git a/mlptrain/descriptors.py b/mlptrain/descriptors.py index 5b87669f..297565d0 100644 --- a/mlptrain/descriptors.py +++ b/mlptrain/descriptors.py @@ -3,9 +3,10 @@ from typing import Union, Optional, Sequence -def soap_matrix(*args: Union[mlt.ConfigurationSet, mlt.Configuration], - elements: Optional[Sequence] = None - ) -> np.ndarray: +def soap_matrix( + *args: Union[mlt.ConfigurationSet, mlt.Configuration], + elements: Optional[Sequence] = None, +) -> np.ndarray: """ Create a SOAP matrix using dscribe (https://github.com/SINGROUP/dscribe) for a set of configurations @@ -46,16 +47,19 @@ def soap_matrix(*args: Union[mlt.ConfigurationSet, mlt.Configuration], # f' configuration(s)') if elements is None: - elements = list(set(atom.label for c in configurations - for atom in c.atoms)) + elements = list( + set(atom.label for c in configurations for atom in c.atoms) + ) # Compute the average SOAP vector where the expansion coefficients are # calculated over averages over each site - soap_desc = SOAP(species=elements, - r_cut=5, # Distance cutoff (Å) - n_max=6, # Maximum order of the radial - l_max=6, # Maximum order of the angular - average='inner') + soap_desc = SOAP( + species=elements, + r_cut=5, # Distance cutoff (Å) + n_max=6, # Maximum order of the radial + l_max=6, # Maximum order of the angular + average='inner', + ) soap_vec = soap_desc.create([conf.ase_atoms for conf in configurations]) # logger.info('SOAP calculation done') @@ -67,9 +71,11 @@ def soap_matrix(*args: Union[mlt.ConfigurationSet, mlt.Configuration], return soap_vec -def soap_kernel_vector(configuration: mlt.Configuration, - configurations: mlt.ConfigurationSet, - zeta: int = 4): +def soap_kernel_vector( + configuration: mlt.Configuration, + configurations: mlt.ConfigurationSet, + zeta: int = 4, +): """ Calculate the kernel matrix between a set of configurations where the kernel is: diff --git a/mlptrain/energy.py b/mlptrain/energy.py index adc6649d..07f59013 100644 --- a/mlptrain/energy.py +++ b/mlptrain/energy.py @@ -4,11 +4,13 @@ class Energy: """Energy in units of eV""" - def __init__(self, - predicted: Optional[float] = None, - true: Optional[float] = None, - bias: Optional[float] = None, - inherited_bias: Optional[float] = None): + def __init__( + self, + predicted: Optional[float] = None, + true: Optional[float] = None, + bias: Optional[float] = None, + inherited_bias: Optional[float] = None, + ): """ Energy diff --git a/mlptrain/forces.py b/mlptrain/forces.py index f6896c59..17b07b73 100644 --- a/mlptrain/forces.py +++ b/mlptrain/forces.py @@ -5,9 +5,11 @@ class Forces: """Forces in units of eV / Å""" - def __init__(self, - predicted: Optional[np.ndarray] = None, - true: Optional[np.ndarray] = None): + def __init__( + self, + predicted: Optional[np.ndarray] = None, + true: Optional[np.ndarray] = None, + ): """ Forces @@ -43,5 +45,3 @@ def delta(self) -> np.ndarray: raise ValueError('Cannot calculate ∆F. Shape mismatch') return self.true - self.predicted - - diff --git a/mlptrain/log.py b/mlptrain/log.py index 9d19e781..b013873a 100644 --- a/mlptrain/log.py +++ b/mlptrain/log.py @@ -3,13 +3,16 @@ ll = os.environ.get('MLT_LOG_LEVEL', default='INFO') -logging.basicConfig(level=getattr(logging, ll), - format='%(name)-12s: %(levelname)-8s %(message)s') +logging.basicConfig( + level=getattr(logging, ll), + format='%(name)-12s: %(levelname)-8s %(message)s', +) logger = logging.getLogger(__name__) # Try and use colourful logs try: import coloredlogs + coloredlogs.install(level=getattr(logging, ll), logger=logger) except ImportError: pass diff --git a/mlptrain/loss/__init__.py b/mlptrain/loss/__init__.py index b37a3586..2b16ef11 100644 --- a/mlptrain/loss/__init__.py +++ b/mlptrain/loss/__init__.py @@ -1,6 +1,4 @@ from mlptrain.loss.tau import TauCalculator from mlptrain.loss.mean_errors import RMSE, MAD -__all__ = ['TauCalculator', - 'RMSE', - 'MAD'] +__all__ = ['TauCalculator', 'RMSE', 'MAD'] diff --git a/mlptrain/loss/_base.py b/mlptrain/loss/_base.py index a0e43088..9afaa2fa 100644 --- a/mlptrain/loss/_base.py +++ b/mlptrain/loss/_base.py @@ -1,13 +1,10 @@ import mlptrain -from abc import ABC,abstractmethod +from abc import ABC, abstractmethod from typing import Optional class LossValue(ABC, float): - - def __init__(self, - x, - error: Optional[float] = None): + def __init__(self, x, error: Optional[float] = None): """ Loss value with a possible associated error @@ -33,9 +30,7 @@ def _err_str(self) -> str: class LossFunction(ABC): - - def __init__(self, - method_name: Optional[str] = None): + def __init__(self, method_name: Optional[str] = None): """ Construct a loss function @@ -48,8 +43,10 @@ def __init__(self, self.method_name = method_name @abstractmethod - def __call__(self, - configurations: 'mlptrain.ConfigurationSet', - mlp: 'mlptrain.potentials._base.MLPotential', - **kwargs) -> LossValue: + def __call__( + self, + configurations: 'mlptrain.ConfigurationSet', + mlp: 'mlptrain.potentials._base.MLPotential', + **kwargs, + ) -> LossValue: """Compute a loss value""" diff --git a/mlptrain/loss/mean_errors.py b/mlptrain/loss/mean_errors.py index 6d4244ce..a74967e2 100644 --- a/mlptrain/loss/mean_errors.py +++ b/mlptrain/loss/mean_errors.py @@ -1,7 +1,6 @@ import mlptrain import numpy as np from abc import ABC, abstractmethod -from scipy.stats import bootstrap from mlptrain.loss._base import LossValue, LossFunction @@ -10,10 +9,12 @@ class _DeltaLossFunction(LossFunction, ABC): loss_type = None - def __call__(self, - configurations: 'mlptrain.ConfigurationSet', - mlp: 'mlptrain.potentials.MLPotential', - **kwargs) -> LossValue: + def __call__( + self, + configurations: 'mlptrain.ConfigurationSet', + mlp: 'mlptrain.potentials.MLPotential', + **kwargs, + ) -> LossValue: """Calculate the value of the loss ----------------------------------------------------------------------- @@ -22,6 +23,7 @@ def __call__(self, mlp: Potential to use """ + from scipy.stats import bootstrap if self.loss_type is None: raise NotImplementedError(f'{self} did not define loss_type') @@ -41,14 +43,15 @@ def _delta_energies(self, cfgs, mlp): """Evaluate E_true - E_predicted along a set of configurations""" for idx, configuration in enumerate(cfgs): - if configuration.energy.true is None: if self.method_name is not None: configuration.single_point(method=self.method_name) else: - raise RuntimeError(f'Cannot compute loss for configuration ' - f'{idx}, a true energy was not present') + raise RuntimeError( + f'Cannot compute loss for configuration ' + f'{idx}, a true energy was not present' + ) if configuration.energy.predicted is None: mlp.predict(configuration) @@ -62,13 +65,12 @@ def statistic(arr: np.ndarray) -> float: class RMSEValue(LossValue): - def __repr__(self): return f'RMSE({float.__repr__(self)}{self._err_str})' class RMSE(_DeltaLossFunction): - """ RMSE = √(1/N Σ_i (y_i^predicted - y_i^true)^2)""" + """RMSE = √(1/N Σ_i (y_i^predicted - y_i^true)^2)""" loss_type = RMSEValue @@ -78,13 +80,12 @@ def statistic(arr: np.ndarray) -> float: class MADValue(LossValue): - def __repr__(self): return f'MAD({float.__repr__(self)}{self._err_str})' class MAD(LossFunction): - """ MAD = 1/N √(Σ_i |y_i^predicted - y_i^true|)""" + """MAD = 1/N √(Σ_i |y_i^predicted - y_i^true|)""" loss_type = MADValue diff --git a/mlptrain/loss/tau.py b/mlptrain/loss/tau.py index 254bcee2..d017a0c4 100644 --- a/mlptrain/loss/tau.py +++ b/mlptrain/loss/tau.py @@ -8,20 +8,20 @@ class Tau(LossValue): - def __repr__(self): return f'τ_acc = {float.__repr__(self)}{self._err_str}' class TauCalculator(LossFunction): - - def __init__(self, - e_lower: float = 0.1, - e_thresh: Optional[float] = None, - max_time: float = 1000.0, - time_interval: float = 50.0, - temp: float = 300.0, - dt: float = 0.5): + def __init__( + self, + e_lower: float = 0.1, + e_thresh: Optional[float] = None, + max_time: float = 1000.0, + time_interval: float = 50.0, + temp: float = 300.0, + dt: float = 0.5, + ): """ τ_acc prospective error metric in fs @@ -49,8 +49,10 @@ def __init__(self, super().__init__() if time_interval < dt: - raise ValueError('The calculated interval must be more than a ' - 'single timestep') + raise ValueError( + 'The calculated interval must be more than a ' + 'single timestep' + ) self.dt = float(dt) self.temp = float(temp) @@ -60,15 +62,18 @@ def __init__(self, self.e_l = float(e_lower) self.e_t = 10 * self.e_l if e_thresh is None else float(e_thresh) - logger.info('Successfully initialised τ_acc, will do a maximum of ' - f'{int(self.max_time // self.time_interval)} reference ' - f'calculations') - - def __call__(self, - configurations: 'mlptrain.ConfigurationSet', - mlp: 'mlptrain.potentials._base.MLPotential', - **kwargs) -> Tau: - + logger.info( + 'Successfully initialised τ_acc, will do a maximum of ' + f'{int(self.max_time // self.time_interval)} reference ' + f'calculations' + ) + + def __call__( + self, + configurations: 'mlptrain.ConfigurationSet', + mlp: 'mlptrain.potentials._base.MLPotential', + **kwargs, + ) -> Tau: """ Calculate τ_acc from a set of initial configurations @@ -83,19 +88,26 @@ def __call__(self, (Tau): τ_acc """ if len(configurations) < 2: - raise ValueError(f'Cannot calculate τ_acc over only ' - f'{len(configurations)} configurations. Need > 1') + raise ValueError( + f'Cannot calculate τ_acc over only ' + f'{len(configurations)} configurations. Need > 1' + ) if 'method_name' not in kwargs: - raise ValueError('Cannot compute τ_acc without a method. Please ' - 'specify e.g. calc(..., method_name="orca")') + raise ValueError( + 'Cannot compute τ_acc without a method. Please ' + 'specify e.g. calc(..., method_name="orca")' + ) - taus = [self._calculate_single(c, mlp, kwargs['method_name']) - for c in configurations] + taus = [ + self._calculate_single(c, mlp, kwargs['method_name']) + for c in configurations + ] # Calculate τ_acc as the average ± the standard error in the mean - return Tau(np.average(taus), - error=np.std(taus) / np.sqrt(len(taus) - 1)) + return Tau( + np.average(taus), error=np.std(taus) / np.sqrt(len(taus) - 1) + ) def _calculate_single(self, config, mlp, method_name): """Calculate a single τ_acc from one configuration""" @@ -104,28 +116,30 @@ def _calculate_single(self, config, mlp, method_name): block_time = self.time_interval * Config.n_cores while curr_time < self.max_time: - - traj = run_mlp_md(config, - mlp=mlp, - temp=self.temp, - dt=self.dt, - interval=int(self.time_interval / self.dt), - fs=block_time, - n_cores=min(Config.n_cores, 4)) + traj = run_mlp_md( + config, + mlp=mlp, + temp=self.temp, + dt=self.dt, + interval=int(self.time_interval / self.dt), + fs=block_time, + n_cores=min(Config.n_cores, 4), + ) try: traj.single_point(method_name) except (ValueError, TypeError): - logger.warning('Failed to calculate single point energies with' - f' {method_name}. τ_acc will be underestimated ' - f'by <{block_time}') + logger.warning( + 'Failed to calculate single point energies with' + f' {method_name}. τ_acc will be underestimated ' + f'by <{block_time}' + ) return curr_time logger.info(' ___ |E_true - E_GAP|/eV ___') logger.info(' t/fs err cumul(err)') for i, frame in enumerate(traj): - if frame.energy.true is None: logger.warning(f'Frame {i} had no energy') e_error = np.inf @@ -135,9 +149,11 @@ def _calculate_single(self, config, mlp, method_name): # Add any error above the allowed threshold cuml_error += max(e_error - self.e_l, 0) curr_time += self.time_interval - logger.info(f'{curr_time:5.0f} ' - f'{e_error:6.4f} ' - f'{cuml_error:6.4f}') + logger.info( + f'{curr_time:5.0f} ' + f'{e_error:6.4f} ' + f'{cuml_error:6.4f}' + ) if cuml_error > self.e_t: return curr_time diff --git a/mlptrain/molecule.py b/mlptrain/molecule.py index cf6d5dcb..20af0867 100644 --- a/mlptrain/molecule.py +++ b/mlptrain/molecule.py @@ -5,7 +5,6 @@ class Molecule(ade.Molecule): - @property def centroid(self) -> np.ndarray: """ @@ -17,9 +16,7 @@ def centroid(self) -> np.ndarray: """ return np.average(self.coordinates, axis=0) - def is_in_box(self, - box: 'mlptrain.box.Box' - ) -> bool: + def is_in_box(self, box: 'mlptrain.box.Box') -> bool: """Is this molecule totally inside a box with an origin at (0,0,0) and top right corner (a, b, c) = box.size @@ -41,9 +38,7 @@ def is_in_box(self, return True - def min_distance_to(self, - coords: np.ndarray - ) -> float: + def min_distance_to(self, coords: np.ndarray) -> float: """Calculate the minimum distance from this molecule to a set of coordinates @@ -60,9 +55,7 @@ def min_distance_to(self, return np.min(cdist(coords, self.coordinates)) - def random_normal_jiggle(self, - sigma: float = 0.01 - ) -> None: + def random_normal_jiggle(self, sigma: float = 0.01) -> None: """ Add a random displacement to each atoms position. @@ -70,9 +63,11 @@ def random_normal_jiggle(self, Arguments: sigma: Standard deviation of the standard deviation """ - dx = np.random.normal(scale=sigma, # Å - loc=0.0, - size=(self.n_atoms, 3)) + dx = np.random.normal( + scale=sigma, # Å + loc=0.0, + size=(self.n_atoms, 3), + ) self.coordinates += dx diff --git a/mlptrain/potentials/__init__.py b/mlptrain/potentials/__init__.py index d1d5205c..e1ff1042 100644 --- a/mlptrain/potentials/__init__.py +++ b/mlptrain/potentials/__init__.py @@ -3,7 +3,4 @@ from mlptrain.potentials.nequip._nequip import NequIP from mlptrain.potentials.mace.mace import MACE -__all__ = ['GAP', - 'ACE', - 'NequIP', - 'MACE'] +__all__ = ['GAP', 'ACE', 'NequIP', 'MACE'] diff --git a/mlptrain/potentials/_base.py b/mlptrain/potentials/_base.py index 964608e1..6e97fe34 100644 --- a/mlptrain/potentials/_base.py +++ b/mlptrain/potentials/_base.py @@ -11,10 +11,7 @@ class MLPotential(ABC): - - def __init__(self, - name: str, - system: 'mlt.System'): + def __init__(self, name: str, system: 'mlt.System'): """ Machine learnt potential. Name defines the name of the potential which will be saved. Training data is populated @@ -31,9 +28,9 @@ def __init__(self, self._training_data = mlt.ConfigurationSet() self.atomic_energies = {} - def train(self, - configurations: Optional['mlt.ConfigurationSet'] = None - ) -> None: + def train( + self, configurations: Optional['mlt.ConfigurationSet'] = None + ) -> None: """ Train this potential on a set of configurations @@ -49,18 +46,23 @@ def train(self, self._training_data = configurations if len(self.training_data) == 0: - raise RuntimeError(f'Failed to train {self.__class__.__name__}' - f'({self.name}) had no training configurations') + raise RuntimeError( + f'Failed to train {self.__class__.__name__}' + f'({self.name}) had no training configurations' + ) if any(c.energy.true is None for c in self.training_data): - raise RuntimeError('Cannot train on configurations, an ' - 'energy was undefined') + raise RuntimeError( + 'Cannot train on configurations, an ' 'energy was undefined' + ) if self.requires_atomic_energies and len(self.atomic_energies) == 0: - raise RuntimeError(f'Cannot train {self.__class__.__name__}' - f'({self.name}) required atomic energies that ' - f'are not set. Set e.g. mlp.atomic_energies ' - '= {"H": -13.}') + raise RuntimeError( + f'Cannot train {self.__class__.__name__}' + f'({self.name}) required atomic energies that ' + f'are not set. Set e.g. mlp.atomic_energies ' + '= {"H": -13.}' + ) self._train() return None @@ -83,8 +85,7 @@ def requires_atomic_energies(self) -> bool: def requires_non_zero_box_size(self) -> bool: """Can this potential be run in a box with side lengths = 0""" - def predict(self, - *args) -> None: + def predict(self, *args) -> None: """ Predict energies and forces using a MLP in serial @@ -102,11 +103,14 @@ def predict(self, all_configurations.append(arg) else: - raise ValueError('Cannot predict the energy and forces on ' - f'{type(arg)}') + raise ValueError( + 'Cannot predict the energy and forces on ' f'{type(arg)}' + ) - logger.info(f'Evaluating MLP energies over {len(all_configurations)} ' - f'configurations') + logger.info( + f'Evaluating MLP energies over {len(all_configurations)} ' + f'configurations' + ) calculator = self.ase_calculator logger.info('Loaded calculator successfully') @@ -131,8 +135,7 @@ def training_data(self) -> 'mlt.ConfigurationSet': return self._training_data @training_data.setter - def training_data(self, - value: Optional['mlt.ConfigurationSet']): + def training_data(self, value: Optional['mlt.ConfigurationSet']): """Set the training date for this MLP""" if value is None: @@ -142,8 +145,10 @@ def training_data(self, self._training_data = value else: - raise ValueError(f'Cannot set the training data for {self.name} ' - f'with {value}') + raise ValueError( + f'Cannot set the training data for {self.name} ' + f'with {value}' + ) @property def n_train(self) -> int: @@ -171,14 +176,13 @@ def _save_training_data_as_npz_and_xyz(self) -> None: """Save the training data""" for file_extension in ('npz', 'xyz'): - self.training_data.save(filename=f'{self.name}_al.{file_extension}') + self.training_data.save( + filename=f'{self.name}_al.{file_extension}' + ) return None - def al_train(self, - method_name: str, - **kwargs - ) -> None: + def al_train(self, method_name: str, **kwargs) -> None: """ Train this MLP using active learning (AL) using a defined reference method @@ -194,13 +198,14 @@ def al_train(self, return None - def al_train_then_bias(self, - method_name: str, - coordinate: 'mlt.sampling.ReactionCoordinate', - min_coordinate: Optional[float] = None, - max_coordinate: Optional[float] = None, - **kwargs - ) -> None: + def al_train_then_bias( + self, + method_name: str, + coordinate: 'mlt.sampling.ReactionCoordinate', + min_coordinate: Optional[float] = None, + max_coordinate: Optional[float] = None, + **kwargs, + ) -> None: r""" Active learning that ensures sufficient sampling over a coordinate. Adds a single harmonic bias to the least well sampled regions of a @@ -233,36 +238,44 @@ def al_train_then_bias(self, _max = np.max(coords) if max_coordinate is None else max_coordinate _min = np.min(coords) if min_coordinate is None else min_coordinate - hist, bin_edges = np.histogram(coords, bins=np.linspace(_min, _max, 10)) - bin_centres = bin_edges[:-1] + np.diff(bin_edges)/2 + hist, bin_edges = np.histogram( + coords, bins=np.linspace(_min, _max, 10) + ) + bin_centres = bin_edges[:-1] + np.diff(bin_edges) / 2 for idx, freq in enumerate(hist): if idx == 0 or idx == (len(hist) - 1): continue # Cannot be a minimum on the first or last point - if not (freq < hist[idx-1] and freq < hist[idx+1]): + if not (freq < hist[idx - 1] and freq < hist[idx + 1]): continue # Not a minimum in the frequency target_coord = bin_centres[idx] - logger.info('Have a minimum in the histogram of coordinates at ' - f'x = {target_coord:.2f}. Adding a harmonic bias and ' - f'running additional AL') - - kwargs['init_configs'] = self._best_bias_init_frame(target_coord, - coords) - self.al_train(method_name=method_name, - bias=mlt.Bias(coordinate, - kappa=10, # eV Å^-2 - reference=target_coord), - **kwargs) + logger.info( + 'Have a minimum in the histogram of coordinates at ' + f'x = {target_coord:.2f}. Adding a harmonic bias and ' + f'running additional AL' + ) + + kwargs['init_configs'] = self._best_bias_init_frame( + target_coord, coords + ) + self.al_train( + method_name=method_name, + bias=mlt.Bias( + coordinate, + kappa=10, # eV Å^-2 + reference=target_coord, + ), + **kwargs, + ) return None - def _best_bias_init_frame(self, - value: float, - values: np.ndarray - ) -> 'mlt.configurations.ConfigurationSet': + def _best_bias_init_frame( + self, value: float, values: np.ndarray + ) -> 'mlt.configurations.ConfigurationSet': """ Get the closest single frame as a configuration set to start a biased AL loop, where the closest distance from the value to any one of the @@ -278,9 +291,7 @@ def _best_bias_init_frame(self, return mlt.ConfigurationSet(self.training_data[best_idx]) - def set_atomic_energies(self, - method_name: str - ) -> None: + def set_atomic_energies(self, method_name: str) -> None: """ Set the atomic energies of all atoms in this system @@ -290,21 +301,23 @@ def set_atomic_energies(self, """ for symbol in self.system.unique_atomic_symbols: - config = Configuration(atoms=[Atom(symbol)], - charge=0, - mult=_spin_multiplicites[symbol]) + config = Configuration( + atoms=[Atom(symbol)], + charge=0, + mult=_spin_multiplicites[symbol], + ) config.single_point(method=method_name, n_cores=1) if config.energy.true is None: - if symbol == 'H': logger.warning('Using estimated H atom ground state E') - config.energy.true = -13.6056995 # -0.5 Ha + config.energy.true = -13.6056995 # -0.5 Ha else: - raise RuntimeError('Failed to calculate an energy for ' - f'{symbol}') + raise RuntimeError( + 'Failed to calculate an energy for ' f'{symbol}' + ) self.atomic_energies[symbol] = config.energy.true @@ -314,32 +327,35 @@ def copy(self) -> 'MLPotential': return deepcopy(self) -_spin_multiplicites = {'H': 2, - 'He': 1, - 'Li': 2, - 'Be': 1, - 'C': 3, - 'N': 4, - 'O': 3, - 'F': 2, - 'Ne': 1, - 'Na': 2, - 'Mg': 1, - 'Cl': 2, - 'Ar': 1, - 'K': 2, - 'Ca': 1, - 'Sc': 2, - 'Ti': 3, - 'V': 4, - 'Cr': 7, - 'Mn': 6, - 'Fe': 5, - 'Co': 4, - 'Ni': 3, - 'Cu': 2, - 'Zn': 1, - 'Ga': 2, - 'Br': 2, - 'Kr': 1, - 'I': 2} +_spin_multiplicites = { + 'H': 2, + 'He': 1, + 'Li': 2, + 'Be': 1, + 'C': 3, + 'N': 4, + 'O': 3, + 'F': 2, + 'Ne': 1, + 'Na': 2, + 'Mg': 1, + 'S': 3, + 'Cl': 2, + 'Ar': 1, + 'K': 2, + 'Ca': 1, + 'Sc': 2, + 'Ti': 3, + 'V': 4, + 'Cr': 7, + 'Mn': 6, + 'Fe': 5, + 'Co': 4, + 'Ni': 3, + 'Cu': 2, + 'Zn': 1, + 'Ga': 2, + 'Br': 2, + 'Kr': 1, + 'I': 2, +} diff --git a/mlptrain/potentials/ace/ace.py b/mlptrain/potentials/ace/ace.py index e2e20b76..1751542b 100644 --- a/mlptrain/potentials/ace/ace.py +++ b/mlptrain/potentials/ace/ace.py @@ -12,7 +12,6 @@ class ACE(MLPotential): - def _train(self) -> None: """ Train this potential on the current training data by printing an @@ -30,26 +29,45 @@ def _train(self) -> None: _check_julia_install_exists() - logger.info(f'Training an ACE potential on *{len(self.training_data)}* ' - f'training data') + logger.info( + f'Training an ACE potential on *{len(self.training_data)}* ' + f'training data' + ) # Run the training using a specified number of total cores os.environ['JULIA_NUM_THREADS'] = str(Config.n_cores) - p = Popen([shutil.which('julia'), f'{self.name}.jl'], - shell=False, - stdout=PIPE, - stderr=PIPE) + p = Popen( + [shutil.which('julia'), f'{self.name}.jl'], + shell=False, + encoding='utf-8', + stdout=PIPE, + stderr=PIPE, + ) out, err = p.communicate(timeout=None) + filename_ace_out = 'ACE_output.out' + + with open(filename_ace_out, 'a') as f: + f.write(f'ACE training output:\n{out}') + if err: + f.write(f'ACE training error:\n{err}') + delta_time = time() - start_time logger.info(f'ACE training ran in {delta_time / 60:.1f} m') - if any((delta_time < 0.01, - b'SYSTEM ABORT' in err, - not os.path.exists(f'{self.name}.json'))): - - raise RuntimeError(f'ACE train errored with:\n{err.decode()}\n') + if any( + ( + delta_time < 0.01, + 'SYSTEM ABORT' in err, + p.returncode != 0, + not os.path.exists(f'{self.name}.json'), + ) + ): + raise RuntimeError( + f'ACE train errored with a return code:\n{p.returncode}\n' + f'and error:\n{err}\n' + ) for filename in (f'{self.name}_data.xyz', f'{self.name}.jl'): os.remove(filename) @@ -73,10 +91,12 @@ def ase_calculator(self) -> 'ase.calculators.calculator.Calculator': import pyjulip except ModuleNotFoundError: - raise RuntimeError('Failed to import pyjulip required for ' - 'generating ASE calculators from ACE ' - 'potentials.\n' - 'Install: https://github.com/casv2/pyjulip') + raise RuntimeError( + 'Failed to import pyjulip required for ' + 'generating ASE calculators from ACE ' + 'potentials.\n' + 'Install: https://github.com/casv2/pyjulip' + ) return pyjulip.ACE(f'./{self.name}.json') @@ -98,12 +118,11 @@ def pairwise_dists(_c): diag_shift = 9999.9 * np.eye(len(_c.coordinates)) return distance_matrix(_c.coordinates, _c.coordinates) + diag_shift - return min(np.min(pairwise_dists(c)) for c in self.training_data) + 0.05 + return ( + min(np.min(pairwise_dists(c)) for c in self.training_data) + 0.05 + ) - def _print_input(self, - filename: str, - **kwargs - ) -> None: + def _print_input(self, filename: str, **kwargs) -> None: """ Print an input file appropriate for a ACE potential @@ -115,123 +134,134 @@ def _print_input(self, """ inp_file = open(filename, 'w') - print('using IPFitting, ACE, JuLIP, LinearAlgebra\n' - 'using JuLIP.MLIPs: combine, SumIP\n' - 'using ACE: z2i, i2z, order\n' - f'BLAS.set_num_threads({Config.n_cores})\n', # number of threads for the LSQ solver - file=inp_file) + print( + 'using IPFitting, ACE, JuLIP, LinearAlgebra\n' + 'using JuLIP.MLIPs: combine, SumIP\n' + 'using ACE: z2i, i2z, order\n' + f'BLAS.set_num_threads({Config.n_cores})\n', # number of threads for the LSQ solver + file=inp_file, + ) # first define the ACE basis specification - _str = ", ".join([f':{s}' for s in self.system.unique_atomic_symbols]) + _str = ', '.join([f':{s}' for s in self.system.unique_atomic_symbols]) - print(f'species = [{_str}]\n' - 'N = 4', # maximum correlation order - file=inp_file) + print( + f'species = [{_str}]\n' + f"N = {Config.ace_params['N']}", # maximum correlation order + file=inp_file, + ) for species in self.system.unique_atomic_symbols: print(f'z{species} = AtomicNumber(:{species})', file=inp_file) # maximum degrees for each correlation order - print('Dd = Dict("default" => 10,\n' - '1 => 20,\n' - '2 => 16,\n', - file=inp_file) + print( + 'Dd = Dict("default" => 10,\n' '1 => 20,\n' '2 => 16,\n', + file=inp_file, + ) for species in self.system.unique_atomic_symbols: if species == 'H': logger.warning('Not adding basis functions on H') - print(f'(3, z{species}) => {16 if species != "H" else 0},', - f'(4, z{species}) => {12 if species != "H" else 0},', - file=inp_file) + print( + f'(3, z{species}) => {16 if species != "H" else 0},', + f'(4, z{species}) => {12 if species != "H" else 0},', + file=inp_file, + ) print(')', file=inp_file) # for the basis function specified by (n, l) # degree = n_weight * n + l_weight * l - #n_weights - print('Dn = Dict( "default" => 1.0 )\n' - 'Dl = Dict( "default" => 1.5 )', #l_weights - sep='\n', file=inp_file) + # n_weights + print( + 'Dn = Dict( "default" => 1.0 )\n' + 'Dl = Dict( "default" => 1.5 )', # l_weights + sep='\n', + file=inp_file, + ) # r0 is a typical length scale for the distance transform - print('r0 = 1.3\n' - f'r_in = {self._r_in_estimate:.4f}\n' # inner cutoff of ACE, choose a little more than min dist in dataset - 'r_cut = 4.0\n' # outer cutoff of ACE - '\n' - 'deg_pair = 5\n' # Specify the pair potential - 'r_cut_pair = 5.0\n', - file=inp_file) + print( + 'r0 = 1.3\n' + f'r_in = {self._r_in_estimate:.4f}\n' # inner cutoff of ACE, choose a little more than min dist in dataset + f"r_cut = {Config.ace_params['r_cut']}\n" # outer cutoff of ACE + '\n' + f"deg_pair = {Config.ace_params['deg_pair']}\n" # Specify the pair potential + f"r_cut_pair = {Config.ace_params['r_cut_pair']}\n", + file=inp_file, + ) print('Vref = OneBody(', file=inp_file) for symbol in self.system.unique_atomic_symbols: - print(f':{symbol} => {self.atomic_energies[symbol]},', file=inp_file) + print( + f':{symbol} => {self.atomic_energies[symbol]},', file=inp_file + ) print(');', file=inp_file) # load the training data - print(f'train_data = IPFitting.Data.read_xyz("{self.name}_data.xyz",\n' - ' energy_key="energy",\n' - ' force_key="forces",\n' - ' virial_key="dummy");\n', - file=inp_file) + print( + f'train_data = IPFitting.Data.read_xyz("{self.name}_data.xyz",\n' + ' energy_key="energy",\n' + ' force_key="forces",\n' + ' virial_key="dummy");\n', + file=inp_file, + ) # give weights for the different config_type-s - print('weights = Dict(\n' - ' "default" => Dict("E" => 20.0, "F" => 1.0 , "V" => 0.0 )\n' - ' );\n' - 'dbname = ""\n', # change this to something to save the design matrix - file=inp_file) + print( + 'weights = Dict(\n' + ' "default" => Dict("E" => 20.0, "F" => 1.0 , "V" => 0.0 )\n' + ' );\n' + 'dbname = ""\n', # change this to something to save the design matrix + file=inp_file, + ) # specify the least squares solver, there are many implemented in IPFitting, # here are two examples with sensible defaults # Iterative LSQR with Laplacian scaling - print('damp = 0.1 # weight in front of ridge penalty, range 0.5 - 0.01\n' - 'rscal = 2.0 # power of Laplacian scaling of basis functions, range is 1-4\n' - 'solver = (:itlsq, (damp, rscal, 1e-6, identity))\n' - + print( + 'damp = 0.1 # weight in front of ridge penalty, range 0.5 - 0.01\n' + 'rscal = 2.0 # power of Laplacian scaling of basis functions, range is 1-4\n' + 'solver = (:itlsq, (damp, rscal, 1e-6, identity))\n' # simple riddge regression # r = 1.05 # how much of the training error to sacrifise for regularisation # solver = (:rid, r) - - f'save_name = "{filename.replace(".jl", ".json")}"\n', - file=inp_file) + f'save_name = "{filename.replace(".jl", ".json")}"\n', + file=inp_file, + ) ###################################################################### - print('Deg = ACE.RPI.SparsePSHDegreeM(Dn, Dl, Dd)\n' - - # construction of a basic basis for site energies - 'Bsite = rpi_basis(species = species,\n' - ' N = N,\n' - ' r0 = r0,\n' - ' D = Deg,\n' - ' rin = r_in, rcut = r_cut,\n' # domain for radial basis (cf documentation) - ' maxdeg = 1.0,\n' #maxdeg increases the entire basis size; - ' pin = 2)\n' # require smooth inner cutoff - - # pair potential basis - 'Bpair = pair_basis(species = species, r0 = r0, maxdeg = deg_pair,\n' - ' rcut = r_cut_pair, rin = 0.0,\n' - ' pin = 0 ) # pin = 0 means no inner cutoff\n' - - - 'B = JuLIP.MLIPs.IPSuperBasis([Bpair, Bsite]);\n' - - 'println("The total number of basis functions is")\n' - '@show length(B)\n' - - 'dB = LsqDB(dbname, B, train_data);\n' - - - 'IP, lsqinfo = IPFitting.Lsq.lsqfit(dB, Vref=Vref,\n' - ' solver=solver,\n' - ' asmerrs=true, weights=weights)\n' - 'save_dict(save_name,' - ' Dict("IP" => write_dict(IP), "info" => lsqinfo))\n' - 'rmse_table(lsqinfo["errors"])\n' - 'println("The L2 norm of the fit is ", round(norm(lsqinfo["c"]), digits=2))\n', - file=inp_file) + print( + 'Deg = ACE.RPI.SparsePSHDegreeM(Dn, Dl, Dd)\n' + # construction of a basic basis for site energies + 'Bsite = rpi_basis(species = species,\n' + ' N = N,\n' + ' r0 = r0,\n' + ' D = Deg,\n' + ' rin = r_in, rcut = r_cut,\n' # domain for radial basis (cf documentation) + ' maxdeg = 1.0,\n' # maxdeg increases the entire basis size; + ' pin = 2)\n' # require smooth inner cutoff + # pair potential basis + 'Bpair = pair_basis(species = species, r0 = r0, maxdeg = deg_pair,\n' + ' rcut = r_cut_pair, rin = 0.0,\n' + ' pin = 0 ) # pin = 0 means no inner cutoff\n' + 'B = JuLIP.MLIPs.IPSuperBasis([Bpair, Bsite]);\n' + 'println("The total number of basis functions is")\n' + '@show length(B)\n' + 'dB = LsqDB(dbname, B, train_data);\n' + 'IP, lsqinfo = IPFitting.Lsq.lsqfit(dB, Vref=Vref,\n' + ' solver=solver,\n' + ' asmerrs=true, weights=weights)\n' + 'save_dict(save_name,' + ' Dict("IP" => write_dict(IP), "info" => lsqinfo))\n' + '#rmse_table(lsqinfo["errors"])\n' + 'println("The L2 norm of the fit is ", round(norm(lsqinfo["c"]), digits=2))\n', + file=inp_file, + ) inp_file.close() @@ -242,5 +272,7 @@ def _check_julia_install_exists() -> None: """Ensure that a julia install is present""" if shutil.which('julia') is None: - exit("Failed to find a Julia installation. Make sure it's present " - "in your $PATH") + exit( + "Failed to find a Julia installation. Make sure it's present " + 'in your $PATH' + ) diff --git a/mlptrain/potentials/gap/gap.py b/mlptrain/potentials/gap/gap.py index 8942e5c7..c9d163fd 100644 --- a/mlptrain/potentials/gap/gap.py +++ b/mlptrain/potentials/gap/gap.py @@ -12,11 +12,12 @@ class GAP(MLPotential): - - def __init__(self, - name: str, - system: Optional['mlptrain.System'] = None, - default_params: bool = True): + def __init__( + self, + name: str, + system: Optional['mlptrain.System'] = None, + default_params: bool = True, + ): """ Gaussian Approximation Potential. Parameters default to using all unique pairs of SOAPs @@ -30,8 +31,10 @@ def __init__(self, default_params: Whether to use default parameters """ - super().__init__(name=name if not name.endswith('.xml') else name[:-4], - system=system) + super().__init__( + name=name if not name.endswith('.xml') else name[:-4], + system=system, + ) self.params = None @@ -54,8 +57,9 @@ def xml_filename(self): def _check_xml_exists(self): """Raise an exception if the parameter file (.xml) doesn't exist""" if not os.path.exists(self.xml_filename): - raise IOError(f'GAP parameter file ({self.xml_filename}) did not ' - f'exist') + raise IOError( + f'GAP parameter file ({self.xml_filename}) did not ' f'exist' + ) @property def ase_calculator(self): @@ -68,13 +72,15 @@ def ase_calculator(self): try: import quippy except ModuleNotFoundError: - raise ModuleNotFoundError('Quippy was not installed. Try\n' - 'pip install quippy-ase') + raise ModuleNotFoundError( + 'Quippy was not installed. Try\n' 'pip install quippy-ase' + ) self._check_xml_exists() - calculator = quippy.potential.Potential("IP GAP", - param_filename=self.xml_filename) + calculator = quippy.potential.Potential( + 'IP GAP', param_filename=self.xml_filename + ) calculator.name = self.name return calculator @@ -83,60 +89,74 @@ def _train_command(self): """Generate the teach_sparse function call for this system of atoms""" general = self.params.general - params = ('default_sigma={' - f'{general["sigma_E"]:.6f} {general["sigma_F"]:.6f} 0.0 0.0' - '} ') + params = ( + 'default_sigma={' + f'{general["sigma_E"]:.6f} {general["sigma_F"]:.6f} 0.0 0.0' + '} ' + ) params += 'e0_method=average gap={' # Likewise with all the SOAPs to be added for symbol, soap in self.params.soap.items(): logger.info(f'Adding SOAP: {symbol}') - other_atomic_ns = [Atom(s).atomic_number for s in soap["other"]] + other_atomic_ns = [Atom(s).atomic_number for s in soap['other']] logger.info(f'with neighbours {soap["other"]}') - params += ('soap sparse_method=cur_points ' - f'n_sparse={int(soap["n_sparse"])} ' - f'covariance_type=dot_product ' - f'zeta=4 ' - f'atom_sigma={soap["sigma_at"]} ' - f'cutoff={soap["cutoff"]} ' - f'delta=1.0 ' - f'add_species=F ' - f'n_Z=1 ' - f'n_species={len(soap["other"])} ' - 'species_Z={{' - # Remove the brackets from the ends of the list - f'{str(other_atomic_ns)[1:-1]}' - '}} ' - f'Z={Atom(symbol).atomic_number} ' - f'n_max={int(2 * soap["l_max"])} ' - f'l_max={int(soap["l_max"])}: ') + params += ( + 'soap sparse_method=cur_points ' + f'n_sparse={int(soap["n_sparse"])} ' + f'covariance_type=dot_product ' + f'zeta=4 ' + f'atom_sigma={soap["sigma_at"]} ' + f'cutoff={soap["cutoff"]} ' + f'delta=1.0 ' + f'add_species=F ' + f'n_Z=1 ' + f'n_species={len(soap["other"])} ' + 'species_Z={{' + # Remove the brackets from the ends of the list + f'{str(other_atomic_ns)[1:-1]}' + '}} ' + f'Z={Atom(symbol).atomic_number} ' + f'n_max={int(2 * soap["l_max"])} ' + f'l_max={int(soap["l_max"])}: ' + ) # Remove the final unnecessary colon params = params.rstrip(': ') # Reference energy and forces labels and don't separate xml files - params += ('} energy_parameter_name=energy ' - 'force_parameter_name=forces ' - 'sparse_separate_file=F') + params += ( + '} energy_parameter_name=energy ' + 'force_parameter_name=forces ' + 'sparse_separate_file=F' + ) # GAP needs the training data, some parameters and a file to save to - return [f'at_file={self.name}_data.xyz', params, f'gp_file={self.name}.xml'] + return [ + f'at_file={self.name}_data.xyz', + params, + f'gp_file={self.name}.xml', + ] def _train(self): """Train this GAP on its training data""" if self.params is None or len(self.params.soap) == 0: - raise RuntimeError(f'Cannot train a GAP({self.name}) - had no ' - f'parameters') + raise RuntimeError( + f'Cannot train a GAP({self.name}) - had no ' f'parameters' + ) if shutil.which('gap_fit') is None: - raise RuntimeError('Cannot train a GAP without a gap_fit ' - 'executable present') + raise RuntimeError( + 'Cannot train a GAP without a gap_fit ' 'executable present' + ) - logger.info('Training a Gaussian Approximation potential on ' - f'*{len(self.training_data)}* training data points') + logger.info( + 'Training a Gaussian Approximation potential on ' + f'*{len(self.training_data)}* training data points' + ) start_time = time() @@ -145,22 +165,29 @@ def _train(self): # Run the training using a specified number of total cores os.environ['OMP_NUM_THREADS'] = str(Config.n_cores) - p = Popen([shutil.which('gap_fit')] + self._train_command, - shell=False, - stdout=PIPE, - stderr=PIPE) + p = Popen( + [shutil.which('gap_fit')] + self._train_command, + shell=False, + stdout=PIPE, + stderr=PIPE, + ) out, err = p.communicate() delta_time = time() - start_time logger.info(f'GAP training ran in {delta_time/60:.1f} m') - if any((delta_time < 0.01, + if any( + ( + delta_time < 0.01, b'SYSTEM ABORT' in err, - not os.path.exists(f'{self.name}.xml'))): - - raise RuntimeError(f'GAP train errored with:\n ' - f'{err.decode()}\n' - f'{" ".join(self._train_command)}') + not os.path.exists(f'{self.name}.xml'), + ) + ): + raise RuntimeError( + f'GAP train errored with:\n ' + f'{err.decode()}\n' + f'{" ".join(self._train_command)}' + ) os.remove(f'{self.name}_data.xyz.idx') @@ -168,7 +195,6 @@ def _train(self): class _GAPParameters: - def __init__(self, atoms): """ Parameters for a GAP potential @@ -186,7 +212,6 @@ def _soap_dict(atom_symbols): soap_dict, added_pairs = {}, [] for symbol in set(atom_symbols): - if symbol == 'H': logger.warning('Not adding SOAP on H') continue @@ -195,19 +220,22 @@ def _soap_dict(atom_symbols): # Add all the atomic symbols that aren't this one, the neighbour # density for which also hasn't been added already - params["other"] = [s for s in set(atom_symbols) - if s+symbol not in added_pairs - and symbol+s not in added_pairs] + params['other'] = [ + s + for s in set(atom_symbols) + if s + symbol not in added_pairs + and symbol + s not in added_pairs + ] # If there are no other atoms of this type then remove the self # pair if atom_symbols.count(symbol) == 1: - params["other"].remove(symbol) + params['other'].remove(symbol) - for other_symbol in params["other"]: - added_pairs.append(symbol+other_symbol) + for other_symbol in params['other']: + added_pairs.append(symbol + other_symbol) - if len(params["other"]) == 0: + if len(params['other']) == 0: logger.info(f'Not adding SOAP to {symbol} - should be covered') continue diff --git a/mlptrain/potentials/mace/mace.py b/mlptrain/potentials/mace/mace.py index 372b37a0..14e30fb9 100644 --- a/mlptrain/potentials/mace/mace.py +++ b/mlptrain/potentials/mace/mace.py @@ -2,6 +2,7 @@ import mlptrain import argparse import os +import gc import ast import time import shutil @@ -9,7 +10,6 @@ import numpy as np from typing import Optional, Dict, List from ase.data import chemical_symbols -from ase.calculators.calculator import Calculator from mlptrain.potentials._base import MLPotential from mlptrain.config import Config from mlptrain.box import Box @@ -23,7 +23,8 @@ from e3nn import o3 from torch.optim.swa_utils import SWALR, AveragedModel from mace import data, modules, tools - from mace.tools import torch_geometric, torch_tools, utils + from mace.calculators import MACECalculator + from mace.tools import torch_geometric from mace.tools.scripts_utils import create_error_table except ModuleNotFoundError: pass @@ -32,10 +33,11 @@ class MACE(MLPotential): """@DynamicAttrs""" - def __init__(self, - name: str, - system: 'mlptrain.System', - ) -> None: + def __init__( + self, + name: str, + system: 'mlptrain.System', + ) -> None: """ MACE machine learning potential @@ -52,27 +54,37 @@ def __init__(self, try: import mace except ModuleNotFoundError: - raise ModuleNotFoundError('MACE install not found, install it ' - 'here: https://github.com/ACEsuit/mace') + raise ModuleNotFoundError( + 'MACE install not found, install it ' + 'here: https://github.com/ACEsuit/mace' + ) self.setup_logger() - logging.info(f"MACE version: {mace.__version__}") + logging.info(f'MACE version: {mace.__version__}') tools.set_seeds(self.args.seed) tools.set_default_dtype(self.args.default_dtype) - self._train_obj_names = ('_train_configs', '_valid_configs', - '_z_table', '_loss_fn', '_train_loader', - '_valid_loader', '_model', '_optimizer', - '_scheduler', '_checkpoint_handler', - '_start_epoch', '_swa', '_ema') + self._train_obj_names = ( + '_train_configs', + '_valid_configs', + '_z_table', + '_loss_fn', + '_train_loader', + '_valid_loader', + '_model', + '_optimizer', + '_scheduler', + '_checkpoint_handler', + '_start_epoch', + '_swa', + '_ema', + ) for obj in self._train_obj_names: setattr(self, obj, None) - def _train(self, - n_cores: Optional[int] = None - ) -> None: + def _train(self, n_cores: Optional[int] = None) -> None: """ Train a MACE potential using the data as .xyz file and save the final potential as .model file @@ -85,9 +97,11 @@ def _train(self, n_cores = n_cores if n_cores is not None else Config.n_cores os.environ['OMP_NUM_THREADS'] = str(n_cores) - logger.info('Training a MACE potential on ' - f'*{len(self.training_data)}* training data, ' - f'using {n_cores} cores for training') + logger.info( + 'Training a MACE potential on ' + f'*{len(self.training_data)}* training data, ' + f'using {n_cores} cores for training.' + ) for config in self.training_data: if self.requires_non_zero_box_size and config.box is None: @@ -99,7 +113,7 @@ def _train(self, self._run_train() delta_time = time.perf_counter() - start_time - logger.info(f'MACE training ran in {delta_time / 60:.1f} m') + logger.info(f'MACE training ran in {delta_time / 60:.1f} m.') self._load_latest_epoch() self._print_error_table() @@ -107,6 +121,10 @@ def _train(self, self._reset_train_objs() os.remove(f'{self.name}_data.xyz') + + gc.collect() + torch.cuda.empty_cache() + return None @property @@ -122,9 +140,10 @@ def requires_non_zero_box_size(self) -> bool: def ase_calculator(self) -> 'ase.calculators.calculator.Calculator': """ASE calculator for MACE potential""" - calculator = MACECalculator(model_path=self.filename, - device=Config.mace_params['calc_device'], - default_dtype="float64") + calculator = MACECalculator( + model_paths=self.filename, + device=Config.mace_params['calc_device'], + ) return calculator @property @@ -143,29 +162,32 @@ def _run_train(self) -> None: self._set_train_objs() - metrics_logger = tools.MetricsLogger(directory=self.args.results_dir, - tag=f'{self.name}_train') + metrics_logger = tools.MetricsLogger( + directory=self.args.results_dir, tag=f'{self.name}_train' + ) self.model.to(Config.mace_params['device']) - tools.train(model=self.model, - loss_fn=self.loss_fn, - train_loader=self.train_loader, - valid_loader=self.valid_loader, - optimizer=self.optimizer, - lr_scheduler=self.scheduler, - checkpoint_handler=self.checkpoint_handler, - eval_interval=self.args.eval_interval, - start_epoch=self.start_epoch, - max_num_epochs=self.max_num_epochs, - logger=metrics_logger, - patience=self.args.patience, - output_args=self.output_args, - device=Config.mace_params['device'], - swa=self.swa, - ema=self.ema, - max_grad_norm=self.args.clip_grad, - log_errors=Config.mace_params['error_table']) + tools.train( + model=self.model, + loss_fn=self.loss_fn, + train_loader=self.train_loader, + valid_loader=self.valid_loader, + optimizer=self.optimizer, + lr_scheduler=self.scheduler, + checkpoint_handler=self.checkpoint_handler, + eval_interval=self.args.eval_interval, + start_epoch=self.start_epoch, + max_num_epochs=self.max_num_epochs, + logger=metrics_logger, + patience=self.args.patience, + output_args=self.output_args, + device=Config.mace_params['device'], + swa=self.swa, + ema=self.ema, + max_grad_norm=self.args.clip_grad, + log_errors=Config.mace_params['error_table'], + ) return None @@ -173,62 +195,69 @@ def _load_latest_epoch(self) -> None: """Load the latest epoch of the training""" epoch = self.checkpoint_handler.load_latest( - state=tools.CheckpointState(self.model, - self.optimizer, - self.scheduler), - device=Config.mace_params['device']) + state=tools.CheckpointState( + self.model, self.optimizer, self.scheduler + ), + device=Config.mace_params['device'], + ) - logging.info(f"Loaded model from epoch {epoch}") + logging.info(f'Loaded model from epoch {epoch}') return None def _print_error_table(self) -> None: """Generate an error table and print it in logs""" - logging.info("Generating error table") + logging.info('Generating error table') - all_collections = [("train", self.train_configs), - ("valid", self.valid_configs)] + all_collections = { + 'training': self.train_loader, + 'validation': self.valid_loader, + } table = create_error_table( table_type=Config.mace_params['error_table'], - all_collections=all_collections, - z_table=self.z_table, - r_max=Config.mace_params['r_max'], - valid_batch_size=self.valid_batch_size, + all_data_loaders=all_collections, model=self.model, loss_fn=self.loss_fn, output_args=self.output_args, log_wandb=self.args.wandb, - device=Config.mace_params['device']) + device=Config.mace_params['device'], + ) - logging.info("\n" + str(table)) + logging.info('\n' + str(table)) return None def _save_model(self) -> None: """Save the trained model""" - model_path = os.path.join(self.args.checkpoints_dir, self.filename) + model_paths = os.path.join(self.args.checkpoints_dir, self.filename) if Config.mace_params['save_cpu']: self.model.to('cpu') - logging.info(f'Saving the model {self.filename} ' - f'to {self.args.checkpoints_dir} ' - 'and the current directory') + logging.info( + f'Saving the model {self.filename} ' + f'to {self.args.checkpoints_dir} ' + 'and the current directory.' + ) - torch.save(self.model, model_path) - shutil.copyfile(src=os.path.join(os.getcwd(), model_path), - dst=os.path.join(os.getcwd(), self.filename)) + torch.save(self.model, model_paths) + shutil.copyfile( + src=os.path.join(os.getcwd(), model_paths), + dst=os.path.join(os.getcwd(), self.filename), + ) return None def _set_train_objs(self) -> None: """Initialise and log training objects""" - logging.info(f'Total number of configurations: ' - f'valid={len(self.valid_configs)}, ' - f'train={len(self.train_configs)}') + logging.info( + f'Total number of configurations: ' + f'valid={len(self.valid_configs)}, ' + f'train={len(self.train_configs)}' + ) logging.info(self.z_table) logging.info(f'Chemical symbols: {self.z_table_symbol}') logging.info(f'Atomic energies: {self.atomic_energies}') @@ -236,12 +265,15 @@ def _set_train_objs(self) -> None: logging.info(f'Selected the following outputs: {self.output_args}') if self.args.compute_avg_num_neighbors: - logging.info(f'Average number of neighbors: ' - f'{self.avg_num_neighbors:.3f}') + logging.info( + f'Average number of neighbors: ' + f'{self.avg_num_neighbors:.3f}' + ) logging.info(f'Model: {self.model}') - logging.info(f'Number of parameters: ' - f'{tools.count_parameters(self.model)}') + logging.info( + f'Number of parameters: ' f'{tools.count_parameters(self.model)}' + ) logging.info(f'Optimizer: {self.optimizer}') return None @@ -261,11 +293,12 @@ def setup_logger(self) -> None: mace_logger.setLevel(self.args.log_level) formatter = logging.Formatter( - "%(asctime)s.%(msecs)03d %(levelname)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S") + '%(asctime)s.%(msecs)03d %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + ) os.makedirs(name=self.args.log_dir, exist_ok=True) - path = os.path.join(self.args.log_dir, self.name + ".log") + path = os.path.join(self.args.log_dir, self.name + '.log') fh = logging.FileHandler(path) fh.setFormatter(formatter) @@ -288,10 +321,16 @@ def max_num_epochs(self) -> int: @property def args(self) -> 'argparse.Namespace': """Namespace containing mostly default MACE parameters""" - args = tools.build_default_arg_parser().parse_args([ - '--name', self.name, - '--train_file', f'{self.name}_data.xyz', - '--default_dtype', 'float64']) + args = tools.build_default_arg_parser().parse_args( + [ + '--name', + self.name, + '--train_file', + f'{self.name}_data.xyz', + '--default_dtype', + Config.mace_params['dtype'], + ] + ) return args @property @@ -303,12 +342,14 @@ def device(self) -> 'torch.device': def config_type_weights(self) -> Dict: """Dictionary containing the weights for each configuration type""" config_type_weights = ast.literal_eval( - Config.mace_params['config_type_weights']) + Config.mace_params['config_type_weights'] + ) if not isinstance(config_type_weights, dict): - logging.warning('Config type weights not specified correctly, ' - 'using Default') - config_type_weights = {"Default": 1.0} + logging.warning( + 'Config type weights not specified correctly, ' 'using Default' + ) + config_type_weights = {'Default': 1.0} return config_type_weights @@ -321,7 +362,8 @@ def z_table(self) -> 'mace.tools.AtomicNumberTable': z for configs in (self.train_configs, self.valid_configs) for config in configs - for z in config.atomic_numbers) + for z in config.atomic_numbers + ) return self._z_table @@ -333,8 +375,9 @@ def z_table_symbol(self) -> List[str]: @property def atomic_energies_array(self) -> np.ndarray: """List of atomic energies of the system""" - return np.array([self.atomic_energies[symbol] - for symbol in self.z_table_symbol]) + return np.array( + [self.atomic_energies[symbol] for symbol in self.z_table_symbol] + ) @property def valid_fraction(self) -> float: @@ -342,8 +385,9 @@ def valid_fraction(self) -> float: _min_dataset = -(1 // -Config.mace_params['valid_fraction']) if self.n_train == 1: - raise ValueError('MACE training requires at least ' - '2 configurations') + raise ValueError( + 'MACE training requires at least ' '2 configurations' + ) elif self.n_train >= _min_dataset: return Config.mace_params['valid_fraction'] @@ -363,7 +407,8 @@ def train_configs(self) -> 'mace.data.Configurations': config_type_weights=self.config_type_weights, energy_key=self.args.energy_key, forces_key=self.args.forces_key, - extract_atomic_energies=False) + extract_atomic_energies=False, + ) return self._train_configs @@ -372,26 +417,32 @@ def valid_configs(self) -> 'mace.data.Configurations': """Configurations in the validation dataset""" if self._valid_configs is None: - if self.args.valid_file is not None: _, self._valid_configs = data.load_from_xyz( file_path=self.args.valid_path, config_type_weights=self.config_type_weights, energy_key=self.args.energy_key, forces_key=self.args.forces_key, - extract_atomic_energies=False) + extract_atomic_energies=False, + ) - logging.info(f'Loaded {len(self._valid_configs)} validation' - f'configurations from "{self.args.valid_file}"') + logging.info( + f'Loaded {len(self._valid_configs)} validation' + f'configurations from "{self.args.valid_file}"' + ) else: - logging.info(f'Using {100 * self.valid_fraction}% of the ' - 'training set for validation') - - self._train_configs, self._valid_configs = data.random_train_valid_split( - self.train_configs, - self.valid_fraction, - self.args.seed) + logging.info( + f'Using {100 * self.valid_fraction}% of the ' + 'training set for validation' + ) + + ( + self._train_configs, + self._valid_configs, + ) = data.random_train_valid_split( + self.train_configs, self.valid_fraction, self.args.seed + ) return self._valid_configs @@ -400,22 +451,24 @@ def loss_fn(self) -> 'torch.nn.Module': """Loss function to use in the training""" if self._loss_fn is None: - - if Config.mace_params['loss'] == "weighted": + if Config.mace_params['loss'] == 'weighted': self._loss_fn = modules.WeightedEnergyForcesLoss( - energy_weight=1.0, - forces_weight=5.0) + energy_weight=1.0, forces_weight=5.0 + ) - elif Config.mace_params['loss'] == "forces_only": + elif Config.mace_params['loss'] == 'forces_only': self._loss_fn = modules.WeightedForcesLoss(forces_weight=5.0) else: - logging.info(f'{Config.mace_params["loss"]} is not allowed in ' - f'mlp-train, setting loss to EnergyForcesLoss') + logging.info( + f'{Config.mace_params["loss"]} is not allowed in ' + f'mlp-train, setting loss to EnergyForcesLoss' + ) self._loss_fn = modules.EnergyForcesLoss( energy_weight=Config.mace_params['energy_weight'], - forces_weight=Config.mace_params['forces_weight']) + forces_weight=Config.mace_params['forces_weight'], + ) return self._loss_fn @@ -436,38 +489,48 @@ def valid_batch_size(self) -> int: return Config.mace_params['batch_size'] @property - def train_loader(self) -> 'mace.tools.torch_geometric.dataloader.DataLoader': + def train_loader( + self, + ) -> 'mace.tools.torch_geometric.dataloader.DataLoader': """Torch dataloader with training configurations""" if self._train_loader is None: - self._train_loader = torch_geometric.dataloader.DataLoader( - dataset=[data.AtomicData.from_config( - config, - z_table=self.z_table, - cutoff=Config.mace_params['r_max']) - for config in self.train_configs], + dataset=[ + data.AtomicData.from_config( + config, + z_table=self.z_table, + cutoff=Config.mace_params['r_max'], + ) + for config in self.train_configs + ], batch_size=self.train_batch_size, shuffle=True, - drop_last=True) + drop_last=True, + ) return self._train_loader @property - def valid_loader(self) -> 'mace.tools.torch_geometric.dataloader.DataLoader': + def valid_loader( + self, + ) -> 'mace.tools.torch_geometric.dataloader.DataLoader': """Torch dataloader with validation configurations""" if self._valid_loader is None: - self._valid_loader = torch_geometric.dataloader.DataLoader( - dataset=[data.AtomicData.from_config( - config, - z_table=self.z_table, - cutoff=Config.mace_params['r_max']) - for config in self.valid_configs], + dataset=[ + data.AtomicData.from_config( + config, + z_table=self.z_table, + cutoff=Config.mace_params['r_max'], + ) + for config in self.valid_configs + ], batch_size=self.valid_batch_size, shuffle=False, - drop_last=False) + drop_last=False, + ) return self._valid_loader @@ -482,19 +545,20 @@ def avg_num_neighbors(self) -> float: @property def output_args(self) -> Dict: """Dictionary containing required outputs""" - return {"energy": True, - "forces": True, - "virials": False, - "stress": False, - "dipoles": False} + return { + 'energy': True, + 'forces': True, + 'virials': False, + 'stress': False, + 'dipoles': False, + } @property def model(self) -> 'torch.nn.Module': """Torch Module to use in training""" if self._model is None: - - logging.info("Building model") + logging.info('Building model') model_config = dict( r_max=Config.mace_params['r_max'], @@ -502,78 +566,86 @@ def model(self) -> 'torch.nn.Module': num_polynomial_cutoff=self.args.num_cutoff_basis, max_ell=self.args.max_ell, interaction_cls=modules.interaction_classes[ - self.args.interaction], + self.args.interaction + ], num_interactions=self.args.num_interactions, num_elements=len(self.z_table), hidden_irreps=o3.Irreps(Config.mace_params['hidden_irreps']), atomic_energies=self.atomic_energies_array, avg_num_neighbors=self.avg_num_neighbors, - atomic_numbers=self.z_table.zs) - - if Config.mace_params['model'] == "MACE": + atomic_numbers=self.z_table.zs, + ) - if self.args.scaling == "no_scaling": + if Config.mace_params['model'] == 'MACE': + if self.args.scaling == 'no_scaling': std = 1.0 - logging.info("No scaling selected") + logging.info('No scaling selected') else: mean, std = modules.scaling_classes[self.args.scaling]( - self.train_loader, - self.atomic_energies_array) + self.train_loader, self.atomic_energies_array + ) self._model = modules.ScaleShiftMACE( **model_config, correlation=Config.mace_params['correlation'], gate=modules.gate_dict[self.args.gate], interaction_cls_first=modules.interaction_classes[ - "RealAgnosticInteractionBlock"], + 'RealAgnosticInteractionBlock' + ], MLP_irreps=o3.Irreps(self.args.MLP_irreps), atomic_inter_scale=std, - atomic_inter_shift=0.0) - - elif Config.mace_params['model'] == "ScaleShiftMACE": + atomic_inter_shift=0.0, + ) + elif Config.mace_params['model'] == 'ScaleShiftMACE': mean, std = modules.scaling_classes[self.args.scaling]( - self.train_loader, - self.atomic_energies_array) + self.train_loader, self.atomic_energies_array + ) self._model = modules.ScaleShiftMACE( **model_config, correlation=Config.mace_params['correlation'], gate=modules.gate_dict[self.args.gate], interaction_cls_first=modules.interaction_classes[ - self.args.interaction_first], + self.args.interaction_first + ], MLP_irreps=o3.Irreps(self.args.MLP_irreps), atomic_inter_scale=std, - atomic_inter_shift=mean) - - elif Config.mace_params['model'] == "ScaleShiftBOTNet": + atomic_inter_shift=mean, + ) + elif Config.mace_params['model'] == 'ScaleShiftBOTNet': mean, std = modules.scaling_classes[self.args.scaling]( - self.train_loader, - self.atomic_energies_array) + self.train_loader, self.atomic_energies_array + ) self._model = modules.ScaleShiftBOTNet( **model_config, gate=modules.gate_dict[self.args.gate], interaction_cls_first=modules.interaction_classes[ - self.args.interaction_first], + self.args.interaction_first + ], MLP_irreps=o3.Irreps(self.args.MLP_irreps), atomic_inter_scale=std, - atomic_inter_shift=mean) - - elif Config.mace_params['model'] == "BOTNet": + atomic_inter_shift=mean, + ) + elif Config.mace_params['model'] == 'BOTNet': self._model = modules.BOTNet( **model_config, gate=modules.gate_dict[self.args.gate], interaction_cls_first=modules.interaction_classes[ - self.args.interaction_first], - MLP_irreps=o3.Irreps(self.args.MLP_irreps)) + self.args.interaction_first + ], + MLP_irreps=o3.Irreps(self.args.MLP_irreps), + ) else: - raise RuntimeError(f'{Config.mace_params["model"]} cannot be ' - 'used in mlp-train, please specify a ' - 'different model in Config.mace_params') + raise RuntimeError( + f'{Config.mace_params["model"]} cannot be ' + 'used in mlp-train, please specify a ' + 'different model in Config.mace_params' + ) return self._model @@ -584,7 +656,7 @@ def opt_param_options(self) -> Dict: decay_interactions = {} no_decay_interactions = {} for name, param in self.model.interactions.named_parameters(): - if "linear.weight" in name or "skip_tp_full.weight" in name: + if 'linear.weight' in name or 'skip_tp_full.weight' in name: decay_interactions[name] = param else: no_decay_interactions[name] = param @@ -592,33 +664,37 @@ def opt_param_options(self) -> Dict: param_options = dict( params=[ { - "name": "embedding", - "params": self.model.node_embedding.parameters(), - "weight_decay": 0.0, + 'name': 'embedding', + 'params': self.model.node_embedding.parameters(), + 'weight_decay': 0.0, }, { - "name": "interactions_decay", - "params": list(decay_interactions.values()), - "weight_decay": self.args.weight_decay, + 'name': 'interactions_decay', + 'params': list(decay_interactions.values()), + 'weight_decay': self.args.weight_decay, }, { - "name": "interactions_no_decay", - "params": list(no_decay_interactions.values()), - "weight_decay": 0.0, + 'name': 'interactions_no_decay', + 'params': list(no_decay_interactions.values()), + 'weight_decay': 0.0, }, { - "name": "products", - "params": self.model.products.parameters(), - "weight_decay": self.args.weight_decay, + 'name': 'products', + 'params': self.model.products.parameters(), + 'weight_decay': self.args.weight_decay, }, { - "name": "readouts", - "params": self.model.readouts.parameters(), - "weight_decay": 0.0 + 'name': 'readouts', + 'params': self.model.readouts.parameters(), + 'weight_decay': 0.0, }, ], lr=self.args.lr, - amsgrad=Config.mace_params['amsgrad']) + amsgrad=Config.mace_params['amsgrad'], + foreach=False + if torch.get_default_dtype() == torch.float64 + else True, + ) return param_options @@ -627,8 +703,7 @@ def optimizer(self) -> 'torch.optim.Optimizer': """Optimizer to use in training""" if self._optimizer is None: - - if self.args.optimizer == "adamw": + if self.args.optimizer == 'adamw': self._optimizer = torch.optim.AdamW(**self.opt_param_options) else: self._optimizer = torch.optim.Adam(**self.opt_param_options) @@ -640,21 +715,23 @@ def scheduler(self) -> 'torch.optim.lr_scheduler': """Torch scheduler for training""" if self._scheduler is None: - - if self.args.scheduler == "ExponentialLR": + if self.args.scheduler == 'ExponentialLR': self._scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer=self.optimizer, - gamma=self.args.lr_scheduler_gamma) + gamma=self.args.lr_scheduler_gamma, + ) - elif self.args.scheduler == "ReduceLROnPlateau": + elif self.args.scheduler == 'ReduceLROnPlateau': self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=self.optimizer, factor=self.args.lr_factor, - patience=self.args.scheduler_patience) + patience=self.args.scheduler_patience, + ) else: - raise RuntimeError(f'Unknown scheduler: ' - f'{self.args.scheduler}') + raise RuntimeError( + f'Unknown scheduler: ' f'{self.args.scheduler}' + ) return self._scheduler @@ -666,7 +743,8 @@ def checkpoint_handler(self) -> 'mace.tools.CheckpointHandler': self._checkpoint_handler = tools.CheckpointHandler( directory=self.args.checkpoints_dir, tag=self.name, - keep=self.args.keep_checkpoints) + keep=self.args.keep_checkpoints, + ) return self._checkpoint_handler @@ -675,14 +753,14 @@ def start_epoch(self) -> int: """Start epoch of the training""" if self._start_epoch is None: - self._start_epoch = 0 if Config.mace_params['restart_latest']: opt_start_epoch = self.checkpoint_handler.load_latest( - state=tools.CheckpointState(self.model, - self.optimizer, - self.scheduler), - device=Config.mace_params['device']) + state=tools.CheckpointState( + self.model, self.optimizer, self.scheduler + ), + device=Config.mace_params['device'], + ) if opt_start_epoch is not None: self._start_epoch = opt_start_epoch @@ -694,21 +772,20 @@ def swa(self) -> Optional['mace.tools.SWAContainer']: """Object for stochastic weight averaging during training""" if self._swa is None: - if Config.mace_params['swa']: - if Config.mace_params['start_swa'] is None: # if not set start swa at 75% of training start_swa = self.max_num_epochs // (4 * 3) else: start_swa = Config.mace_params['start_swa'] - if Config.mace_params['loss'] == "forces_only": - logging.info("Can not select swa with forces only loss.") + if Config.mace_params['loss'] == 'forces_only': + logging.info('Can not select swa with forces only loss.') loss_fn_energy = modules.WeightedEnergyForcesLoss( energy_weight=self.args.swa_energy_weight, - forces_weight=self.args.swa_forces_weight) + forces_weight=self.args.swa_forces_weight, + ) self._swa = tools.SWAContainer( model=AveragedModel(self.model), @@ -716,15 +793,19 @@ def swa(self) -> Optional['mace.tools.SWAContainer']: optimizer=self.optimizer, swa_lr=self.args.swa_lr, anneal_epochs=1, - anneal_strategy="linear"), + anneal_strategy='linear', + ), start=start_swa, - loss_fn=loss_fn_energy) + loss_fn=loss_fn_energy, + ) - logging.info(f'Using stochastic weight averaging ' - f'(after {self._swa.start} epochs) with ' - f'energy weight : {self.args.swa_energy_weight}, ' - f'forces weight : {self.args.swa_forces_weight}, ' - f'learning rate : {self.args.swa_lr}') + logging.info( + f'Using stochastic weight averaging ' + f'(after {self._swa.start} epochs) with ' + f'energy weight : {self.args.swa_energy_weight}, ' + f'forces weight : {self.args.swa_forces_weight}, ' + f'learning rate : {self.args.swa_lr}' + ) else: self._swa = None @@ -736,45 +817,13 @@ def ema(self) -> Optional['torch_ema.ExponentialMovingAverage']: """Object for exponantial moving average during training""" if self._ema is None: - if Config.mace_params['ema']: self._ema = torch_ema.ExponentialMovingAverage( self.model.parameters(), - decay=Config.mace_params['ema_decay']) + decay=Config.mace_params['ema_decay'], + ) else: self._ema = None return self._ema - -try: - from mace.calculators import MACECalculator as _MACECalculator - - class MACECalculator(_MACECalculator): - - def __init__( - self, - model_path: str, - device: str, - energy_units_to_eV: float = 1.0, - length_units_to_A: float = 1.0, - default_dtype="float64", - **kwargs - ): - Calculator.__init__(self, **kwargs) - self.results = {} - - self.model = torch.load(f=model_path, map_location=device) - if device == 'cuda': - self.model = self.model.to(device) - - self.r_max = float(self.model.r_max) - self.device = torch_tools.init_device(device) - self.energy_units_to_eV = energy_units_to_eV - self.length_units_to_A = length_units_to_A - self.z_table = utils.AtomicNumberTable( - [int(z) for z in self.model.atomic_numbers] - ) - torch_tools.set_default_dtype(default_dtype) -except ModuleNotFoundError: - pass diff --git a/mlptrain/potentials/nequip/_nequip.py b/mlptrain/potentials/nequip/_nequip.py index f8dee82b..a3deba9d 100644 --- a/mlptrain/potentials/nequip/_nequip.py +++ b/mlptrain/potentials/nequip/_nequip.py @@ -11,7 +11,6 @@ class NequIP(MLPotential): - def _train(self) -> None: """ Train a NeQUIP potential on a set of data. Requires an .npz file @@ -45,13 +44,14 @@ def ase_calculator(self) -> 'ase.calculators.calculator.Calculator': from nequip.dynamics.nequip_calculator import NequIPCalculator except ModuleNotFoundError: - raise ModuleNotFoundError('NeQUIP install not found, install it ' - 'here: https://github.com/mir-group/nequip') + raise ModuleNotFoundError( + 'NeQUIP install not found, install it ' + 'here: https://github.com/mir-group/nequip' + ) calculator = NequIPCalculator.from_deployed_model( - f'{self.name}_deployed.pth', - device='cpu' - ) + f'{self.name}_deployed.pth', device='cpu' + ) return calculator @@ -91,7 +91,7 @@ def _print_input(self, filename): yml_file = open(filename, 'w') - train_frac = Config.nequip_params["train_fraction"] + train_frac = Config.nequip_params['train_fraction'] if train_frac >= 1 or train_frac <= 0: raise RuntimeError('Cannot train on a training fraction ∉ [0, 1]') @@ -154,7 +154,9 @@ def _print_input(self, filename): 'lr_scheduler_name: ReduceLROnPlateau', 'lr_scheduler_patience: 100', 'lr_scheduler_factor: 0.5', - sep='\n', file=yml_file) + sep='\n', + file=yml_file, + ) yml_file.close() return None @@ -167,14 +169,18 @@ def _run_train(self): if train_executable_path is None: raise RuntimeError('No NeQUIP install found!') - logger.info(f'Training a NeQUIP potential on ' - f'*{len(self.training_data)}* training data') - - p = Popen([train_executable_path, f'{self.name}.yml'], - shell=False, - stdout=PIPE, - stderr=PIPE, - env={**os.environ, 'OMP_NUM_THREADS': str(Config.n_cores)}) + logger.info( + f'Training a NeQUIP potential on ' + f'*{len(self.training_data)}* training data' + ) + + p = Popen( + [train_executable_path, f'{self.name}.yml'], + shell=False, + stdout=PIPE, + stderr=PIPE, + env={**os.environ, 'OMP_NUM_THREADS': str(Config.n_cores)}, + ) out, err = p.communicate(timeout=None) if b'SYSTEM ABORT' in err or b'raise' in err: @@ -186,11 +192,17 @@ def _run_deploy(self): """Deploy a NeQUIP model, i.e. save a TorchScript version of it""" logger.info('Deploying a NeQUIP potential') - p = Popen([shutil.which('nequip-deploy'), 'build', f'{self.name}/', - f'{self.name}_deployed.pth'], - shell=False, - stdout=PIPE, - stderr=PIPE) + p = Popen( + [ + shutil.which('nequip-deploy'), + 'build', + f'{self.name}/', + f'{self.name}_deployed.pth', + ], + shell=False, + stdout=PIPE, + stderr=PIPE, + ) _, _ = p.communicate(timeout=None) return None @@ -199,9 +211,9 @@ def _clean_up_dirs(self): """Clean up the directories created by NeQUIP train""" shutil.rmtree('processed') - shutil.make_archive(unique_name(f'{self.name}.zip')[:-4], - 'zip', - self.name) + shutil.make_archive( + unique_name(f'{self.name}.zip')[:-4], 'zip', self.name + ) try: shutil.rmtree(self.name) except OSError: diff --git a/mlptrain/sampling/__init__.py b/mlptrain/sampling/__init__.py index e5fab058..50d478a0 100644 --- a/mlptrain/sampling/__init__.py +++ b/mlptrain/sampling/__init__.py @@ -3,8 +3,10 @@ from mlptrain.sampling.umbrella import UmbrellaSampling from mlptrain.sampling.metadynamics import Metadynamics -__all__ = ['Bias', - 'PlumedBias', - 'PlumedCalculator', - 'UmbrellaSampling', - 'Metadynamics'] +__all__ = [ + 'Bias', + 'PlumedBias', + 'PlumedCalculator', + 'UmbrellaSampling', + 'Metadynamics', +] diff --git a/mlptrain/sampling/_base.py b/mlptrain/sampling/_base.py index 757bad5e..c7ac8956 100644 --- a/mlptrain/sampling/_base.py +++ b/mlptrain/sampling/_base.py @@ -28,4 +28,3 @@ def adjust_potential_energy(self, atoms): @abstractmethod def adjust_positions(self, atoms, newpositions): """Method required for ASE but not used in ml-train""" - diff --git a/mlptrain/sampling/bias.py b/mlptrain/sampling/bias.py index 95b1371c..3275ae52 100644 --- a/mlptrain/sampling/bias.py +++ b/mlptrain/sampling/bias.py @@ -5,10 +5,12 @@ class Bias(ASEConstraint, Function): """Modifies the forces and energy of a set of ASE atoms under a bias""" - def __init__(self, - zeta_func: 'mlptrain.sampling.reaction_coord.ReactionCoordinate', - kappa: float, - reference: float): + def __init__( + self, + zeta_func: 'mlptrain.sampling.reaction_coord.ReactionCoordinate', + kappa: float, + reference: float, + ): """ Bias that modifies the forces and energy of a set of atoms under a harmonic bias function. @@ -34,7 +36,7 @@ def __init__(self, def __call__(self, atoms): """Value of the bias for set of atom pairs in atoms""" - return 0.5 * self.kappa * (self.f(atoms) - self.ref)**2 + return 0.5 * self.kappa * (self.f(atoms) - self.ref) ** 2 def grad(self, atoms): """Gradient of the biasing potential a set of atom pairs in atoms""" diff --git a/mlptrain/sampling/md.py b/mlptrain/sampling/md.py index 13586f2a..ce2756a8 100644 --- a/mlptrain/sampling/md.py +++ b/mlptrain/sampling/md.py @@ -13,37 +13,43 @@ PlumedBias, PlumedCalculator, plumed_setup, - get_colvar_filename + get_colvar_filename, ) from mlptrain.log import logger from mlptrain.box import Box from mlptrain.utils import work_in_tmp_dir from ase.md.velocitydistribution import MaxwellBoltzmannDistribution from ase.io.trajectory import Trajectory as ASETrajectory +from ase.md.nptberendsen import NPTBerendsen from ase.md.langevin import Langevin from ase.md.verlet import VelocityVerlet from ase.io import read from ase import units as ase_units -def run_mlp_md(configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - dt: float, - interval: int, - init_temp: Optional[float] = None, - fbond_energy: Optional[dict] = None, - bbond_energy: Optional[dict] = None, - bias: Optional = None, - restart_files: Optional[List[str]] = None, - copied_substrings: Optional[Sequence[str]] = None, - kept_substrings: Optional[Sequence[str]] = None, - **kwargs - ) -> 'mlptrain.Trajectory': +def run_mlp_md( + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + dt: float, + interval: int, + pressure: Optional[float] = None, + compress: Optional[float] = None, + init_temp: Optional[float] = None, + fbond_energy: Optional[dict] = None, + bbond_energy: Optional[dict] = None, + bias: Optional = None, + restart_files: Optional[List[str]] = None, + copied_substrings: Optional[Sequence[str]] = None, + kept_substrings: Optional[Sequence[str]] = None, + **kwargs, +) -> 'mlptrain.Trajectory': """ Run molecular dynamics on a system using a MLP to predict energies and forces and ASE to drive dynamics. The function is executed in a temporary - directory. + directory. Note that NPT simulations are currently only implemented in + production runs and not in active learning. + --------------------------------------------------------------------------- Arguments: @@ -63,6 +69,13 @@ def run_mlp_md(configuration: 'mlptrain.Configuration', interval: (int) Interval between saving the geometry + pressure: pressure in bar to run Berendsen NPT MD, temperature + and pressure must also be specified in order to run NPT dynamics. + + compress: compressibility in bar^-1 to run Berendsen NPT MD, + temperature and pressure must also be specified in order to + run NPT dynamics. + bbond_energy: (dict | None) Additional energy to add to a breaking bond. e.g. bbond_energy={(0, 1), 0.1} Adds 0.1 eV to the 'bond' between atoms 0 and 1 as velocities @@ -122,12 +135,16 @@ def run_mlp_md(configuration: 'mlptrain.Configuration', for file in restart_files: if not isinstance(file, str): - raise TypeError('Restart files must be a list of strings ' - 'specifying filenames') + raise TypeError( + 'Restart files must be a list of strings ' + 'specifying filenames' + ) if not any(file.endswith('.traj') for file in restart_files): - raise ValueError('Restaring a simulation requires a .traj file ' - 'from the previous simulation') + raise ValueError( + 'Restaring a simulation requires a .traj file ' + 'from the previous simulation' + ) copied_substrings_list.extend(restart_files) kept_substrings_list.extend(restart_files) @@ -135,37 +152,46 @@ def run_mlp_md(configuration: 'mlptrain.Configuration', else: logger.info('Running MLP MD') - decorator = work_in_tmp_dir(copied_substrings=copied_substrings_list, - kept_substrings=kept_substrings_list) + decorator = work_in_tmp_dir( + copied_substrings=copied_substrings_list, + kept_substrings=kept_substrings_list, + ) _run_mlp_md_decorated = decorator(_run_mlp_md) - traj = _run_mlp_md_decorated(configuration=configuration, - mlp=mlp, - temp=temp, - dt=dt, - interval=interval, - init_temp=init_temp, - fbond_energy=fbond_energy, - bbond_energy=bbond_energy, - bias=bias, - restart_files=restart_files, - **kwargs) + traj = _run_mlp_md_decorated( + configuration=configuration, + mlp=mlp, + temp=temp, + dt=dt, + interval=interval, + pressure=pressure, + compress=compress, + init_temp=init_temp, + fbond_energy=fbond_energy, + bbond_energy=bbond_energy, + bias=bias, + restart_files=restart_files, + **kwargs, + ) return traj -def _run_mlp_md(configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - dt: float, - interval: int, - init_temp: Optional[float] = None, - fbond_energy: Optional[dict] = None, - bbond_energy: Optional[dict] = None, - bias: Optional = None, - restart_files: Optional[List[str]] = None, - **kwargs - ) -> 'mlptrain.Trajectory': +def _run_mlp_md( + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + dt: float, + interval: int, + pressure: Optional[float] = None, + compress: Optional[float] = None, + init_temp: Optional[float] = None, + fbond_energy: Optional[dict] = None, + bbond_energy: Optional[dict] = None, + bias: Optional = None, + restart_files: Optional[List[str]] = None, + **kwargs, +) -> 'mlptrain.Trajectory': """ Run molecular dynamics on a system using a MLP to predict energies and forces and ASE to drive dynamics @@ -173,40 +199,43 @@ def _run_mlp_md(configuration: 'mlptrain.Configuration', restart = restart_files is not None - n_cores = (kwargs['n_cores'] if 'n_cores' in kwargs - else min(Config.n_cores, 8)) + n_cores = ( + kwargs['n_cores'] if 'n_cores' in kwargs else min(Config.n_cores, 8) + ) os.environ['OMP_NUM_THREADS'] = str(n_cores) logger.info(f'Using {n_cores} core(s) for MLP MD') # Transform dt from fs into ASE time units (for dynamics only) dt_ase = dt * ase_units.fs - n_steps = _n_simulation_steps(dt=dt, - kwargs=kwargs) + n_steps = _n_simulation_steps(dt=dt, kwargs=kwargs) if restart and n_steps % interval != 0: - raise NotImplementedError('Current implementation requires the number ' - 'of steps to be divisible by the interval ' - 'if the simulation is restarted') + raise NotImplementedError( + 'Current implementation requires the number ' + 'of steps to be divisible by the interval ' + 'if the simulation is restarted' + ) if mlp.requires_non_zero_box_size and configuration.box is None: logger.warning('Assuming vaccum simulation. Box size = 1000 nm^3') configuration.box = Box([100, 100, 100]) ase_atoms = configuration.ase_atoms - traj_name = _get_traj_name(restart_files=restart_files, - **kwargs) + traj_name = _get_traj_name(restart_files=restart_files, **kwargs) - _set_momenta_and_geometry(ase_atoms=ase_atoms, - temp=init_temp if init_temp is not None else temp, - bbond_energy=bbond_energy, - fbond_energy=fbond_energy, - restart=restart, - traj_name=traj_name) + _set_momenta_and_geometry( + ase_atoms=ase_atoms, + temp=init_temp if init_temp is not None else temp, + bbond_energy=bbond_energy, + fbond_energy=fbond_energy, + restart=restart, + traj_name=traj_name, + ) - ase_traj = _initialise_traj(ase_atoms=ase_atoms, - restart=restart, - traj_name=traj_name) + ase_traj = _initialise_traj( + ase_atoms=ase_atoms, restart=restart, traj_name=traj_name + ) # If MD is restarted, energies of frames from the previous trajectory # are not loaded. Setting them to None @@ -215,43 +244,48 @@ def _run_mlp_md(configuration: 'mlptrain.Configuration', bias_energies = deepcopy(energies) n_previous_steps = interval * len(ase_traj) - _attach_calculator_and_constraints(ase_atoms=ase_atoms, - mlp=mlp, - bias=bias, - temp=temp, - interval=interval, - dt_ase=dt_ase, - restart=restart, - n_previous_steps=n_previous_steps, - **kwargs) - - _run_dynamics(ase_atoms=ase_atoms, - ase_traj=ase_traj, - traj_name=traj_name, - interval=interval, - temp=temp, - dt=dt, - dt_ase=dt_ase, - n_steps=n_steps, - energies=energies, - biased_energies=biased_energies, - **kwargs) + _attach_calculator_and_constraints( + ase_atoms=ase_atoms, + mlp=mlp, + bias=bias, + temp=temp, + interval=interval, + dt_ase=dt_ase, + restart=restart, + n_previous_steps=n_previous_steps, + **kwargs, + ) + + _run_dynamics( + ase_atoms=ase_atoms, + ase_traj=ase_traj, + traj_name=traj_name, + interval=interval, + temp=temp, + pressure=pressure, + compress=compress, + dt=dt, + dt_ase=dt_ase, + n_steps=n_steps, + energies=energies, + biased_energies=biased_energies, + **kwargs, + ) # Duplicate frames removed only if PLUMED bias is initialised not from file if restart and isinstance(bias, PlumedBias) and not bias.from_file: - _remove_colvar_duplicate_frames(bias=bias, - **kwargs) + _remove_colvar_duplicate_frames(bias=bias, **kwargs) - traj = _convert_ase_traj(traj_name=traj_name, - bias=bias, - **kwargs) + traj = _convert_ase_traj(traj_name=traj_name, bias=bias, **kwargs) for energy, biased_energy in zip(energies, biased_energies): if energy is not None and biased_energy is not None: bias_energy = biased_energy - energy bias_energies.append(bias_energy) - for i, (frame, energy, bias_energy) in enumerate(zip(traj, energies, bias_energies)): + for i, (frame, energy, bias_energy) in enumerate( + zip(traj, energies, bias_energies) + ): frame.update_attr_from(configuration) frame.energy.predicted = energy frame.energy.bias = bias_energy @@ -260,15 +294,17 @@ def _run_mlp_md(configuration: 'mlptrain.Configuration', return traj -def _attach_calculator_and_constraints(ase_atoms: 'ase.atoms.Atoms', - mlp: 'mlptrain.potentials._base.MLPotential', - bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], - temp: float, - interval: int, - dt_ase: float, - restart: bool, - n_previous_steps: int, - **kwargs) -> None: +def _attach_calculator_and_constraints( + ase_atoms: 'ase.atoms.Atoms', + mlp: 'mlptrain.potentials._base.MLPotential', + bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], + temp: float, + interval: int, + dt_ase: float, + restart: bool, + n_previous_steps: int, + **kwargs, +) -> None: """ Set up the calculator and attach it to the ase_atoms together with bias and constraints @@ -277,18 +313,17 @@ def _attach_calculator_and_constraints(ase_atoms: 'ase.atoms.Atoms', if isinstance(bias, PlumedBias): logger.info('Using PLUMED bias for MLP MD') - setup = plumed_setup(bias=bias, - temp=temp, - interval=interval, - **kwargs) + setup = plumed_setup(bias=bias, temp=temp, interval=interval, **kwargs) bias.write_cv_files() - plumed_calc = PlumedCalculator(calc=mlp.ase_calculator, - input=setup, - timestep=dt_ase, - atoms=ase_atoms, - kT=temp*ase_units.kB, - restart=restart) + plumed_calc = PlumedCalculator( + calc=mlp.ase_calculator, + input=setup, + timestep=dt_ase, + atoms=ase_atoms, + kT=temp * ase_units.kB, + restart=restart, + ) if restart: plumed_calc.istep = n_previous_steps @@ -311,25 +346,43 @@ def _attach_calculator_and_constraints(ase_atoms: 'ase.atoms.Atoms', return None -def _run_dynamics(ase_atoms: 'ase.atoms.Atoms', - ase_traj: 'ase.io.trajectory.Trajectory', - traj_name: str, - interval: int, - temp: float, - dt: float, - dt_ase: float, - n_steps: int, - energies: List, - biased_energies: List, - **kwargs) -> None: +def _run_dynamics( + ase_atoms: 'ase.atoms.Atoms', + ase_traj: 'ase.io.trajectory.Trajectory', + traj_name: str, + interval: int, + temp: float, + dt: float, + dt_ase: float, + n_steps: int, + energies: List, + biased_energies: List, + pressure: Optional[float] = None, + compress: Optional[float] = None, + **kwargs, +) -> None: """Initialise dynamics object and run dynamics""" - if temp > 0: # Default Langevin NVT - dyn = Langevin(ase_atoms, dt_ase, - temperature_K=temp, - friction=0.02) - else: # Otherwise NVE + if all([value is not None for value in [pressure, compress]]) and temp > 0: + # Run NPT dynamics if pressure and compressibility are specified + pressure = convert_pressure_to_ase_units(pressure) + compress = convert_compressibility_to_ase_units(compress) + dyn = NPTBerendsen( + ase_atoms, + dt_ase, + temperature_K=temp, + pressure_au=pressure, + compressibility_au=compress, + ) + logger.info( + f'Initialising NPT Berendsen dynamics at {pressure} bar and {temp} K' + ) + elif temp > 0: # Default Langevin NVT + dyn = Langevin(ase_atoms, dt_ase, temperature_K=temp, friction=0.02) + logger.info(f'Initialising NVT Langevin dynamics at {temp} K') + else: # Otherwise NVE dyn = VelocityVerlet(ase_atoms, dt_ase) + logger.info('Initialising NVE dynamics') def append_unbiased_energy(): energies.append(ase_atoms.calc.get_potential_energy(ase_atoms)) @@ -345,8 +398,7 @@ def save_trajectory(): dyn.attach(ase_traj.write, interval=interval) if any(key in kwargs for key in ['save_fs', 'save_ps', 'save_ns']): - dyn.attach(save_trajectory, - interval=_traj_saving_interval(dt, kwargs)) + dyn.attach(save_trajectory, interval=_traj_saving_interval(dt, kwargs)) logger.info(f'Running {n_steps:.0f} steps with a timestep of {dt} fs') dyn.run(steps=n_steps) @@ -358,9 +410,9 @@ def save_trajectory(): return None -def _save_trajectory(ase_traj: 'ase.io.trajectory.Trajectory', - traj_name: str, - **kwargs) -> None: +def _save_trajectory( + ase_traj: 'ase.io.trajectory.Trajectory', traj_name: str, **kwargs +) -> None: """ Save the trajectory with a unique name based on the current simulation time @@ -384,15 +436,14 @@ def _save_trajectory(ase_traj: 'ase.io.trajectory.Trajectory', while os.path.exists(f'{traj_basename}_{time}{time_units}.traj'): time += saving_interval - shutil.copyfile(src=traj_name, - dst=f'{traj_basename}_{time}{time_units}.traj') + shutil.copyfile( + src=traj_name, dst=f'{traj_basename}_{time}{time_units}.traj' + ) return None -def _get_traj_name(restart_files: Optional[List[str]] = None, - **kwargs - ) -> str: +def _get_traj_name(restart_files: Optional[List[str]] = None, **kwargs) -> str: """ Return the name of the trajectory which is going to be created (or on to which the new frames will be appended in the case of restart) @@ -414,9 +465,11 @@ def _get_traj_name(restart_files: Optional[List[str]] = None, return traj_name -def _convert_ase_traj(traj_name: str, - bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], - **kwargs) -> 'mlptrain.Trajectory': +def _convert_ase_traj( + traj_name: str, + bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], + **kwargs, +) -> 'mlptrain.Trajectory': """Convert an ASE trajectory into an mlptrain Trajectory""" ase_traj = ASETrajectory(traj_name, 'r') @@ -442,9 +495,9 @@ def _convert_ase_traj(traj_name: str, return mlt_traj -def _attach_plumed_coordinates(mlt_traj: 'mlptrain.Trajectory', - bias: 'mlptrain.PlumedBias', - **kwargs) -> None: +def _attach_plumed_coordinates( + mlt_traj: 'mlptrain.Trajectory', bias: 'mlptrain.PlumedBias', **kwargs +) -> None: """ Attach PLUMED collective variable values to configurations in the trajectory if all colvar files have been printed @@ -453,7 +506,6 @@ def _attach_plumed_coordinates(mlt_traj: 'mlptrain.Trajectory', colvar_filenames = [get_colvar_filename(cv, **kwargs) for cv in bias.cvs] if all(os.path.exists(fname) for fname in colvar_filenames): - for config in mlt_traj: config.plumed_coordinates = np.zeros(bias.n_cvs) @@ -467,30 +519,32 @@ def _attach_plumed_coordinates(mlt_traj: 'mlptrain.Trajectory', return None -def _set_momenta_and_geometry(ase_atoms: 'ase.atoms.Atoms', - temp: float, - bbond_energy: dict, - fbond_energy: dict, - restart: bool, - traj_name: str - ) -> None: +def _set_momenta_and_geometry( + ase_atoms: 'ase.atoms.Atoms', + temp: float, + bbond_energy: dict, + fbond_energy: dict, + restart: bool, + traj_name: str, +) -> None: """Set the initial momenta and geometry of the starting configuration""" if not restart: - if temp > 0: logger.info(f'Initialising initial velocities for {temp} K') - MaxwellBoltzmannDistribution(ase_atoms, temperature_K=temp, - rng=RandomState()) + MaxwellBoltzmannDistribution( + ase_atoms, temperature_K=temp, rng=RandomState() + ) else: # Set the momenta to zero ase_atoms.arrays['momenta'] = np.zeros((len(ase_atoms), 3)) def add_momenta(idx, vector, energy): masses = ase_atoms.get_masses() - ase_atoms.arrays['momenta'][idx] = (np.sqrt(masses[idx] * energy) - * vector) + ase_atoms.arrays['momenta'][idx] = ( + np.sqrt(masses[idx] * energy) * vector + ) return None coords = ase_atoms.positions @@ -505,7 +559,7 @@ def add_momenta(idx, vector, energy): # <--- i--j where i and j are two atoms # vec = coords[i] - coords[j] - vec /= np.linalg.norm(vec) # normalise + vec /= np.linalg.norm(vec) # normalise add_momenta(idx=i, vector=vec, energy=energy) add_momenta(idx=j, vector=-vec, energy=energy) @@ -525,8 +579,10 @@ def add_momenta(idx, vector, energy): add_momenta(idx=j, vector=-vec, energy=energy) else: - logger.info('Initialising starting geometry and momenta from the ' - 'last configuration') + logger.info( + 'Initialising starting geometry and momenta from the ' + 'last configuration' + ) last_configuration = read(traj_name) @@ -536,19 +592,25 @@ def add_momenta(idx, vector, energy): return None -def _initialise_traj(ase_atoms: 'ase.atoms.Atoms', - restart: bool, - traj_name: str - ) -> 'ase.io.trajectory.Trajectory': +def _initialise_traj( + ase_atoms: 'ase.atoms.Atoms', + restart: bool, + traj_name: str, + remove_last: bool = True, +) -> 'ase.io.trajectory.Trajectory': """Initialise ASE trajectory object""" if not restart: traj = ASETrajectory(traj_name, 'w', ase_atoms) else: - # Remove the last frame to avoid duplicate frames previous_traj = ASETrajectory(traj_name, 'r', ase_atoms) - previous_atoms = previous_traj[:-1] + + if remove_last: + # Remove the last frame to avoid duplicate frames + previous_atoms = previous_traj[:-1] + else: + previous_atoms = previous_traj os.remove(traj_name) @@ -559,9 +621,7 @@ def _initialise_traj(ase_atoms: 'ase.atoms.Atoms', return traj -def _n_simulation_steps(dt: float, - kwargs: dict - ) -> int: +def _n_simulation_steps(dt: float, kwargs: dict) -> int: """ Calculate the number of simulation steps from a set of keyword arguments e.g. kwargs = {'fs': 100} @@ -577,35 +637,33 @@ def _n_simulation_steps(dt: float, logger.warning('Unexpectedly small or large timestep - is it in fs?') if 'ps' in kwargs: - time_fs = 1E3 * kwargs['ps'] + time_fs = 1e3 * kwargs['ps'] elif 'fs' in kwargs: time_fs = kwargs['fs'] elif 'ns' in kwargs: - time_fs = 1E6 * kwargs['ns'] + time_fs = 1e6 * kwargs['ns'] else: raise ValueError('Simulation time not found') - n_steps = max(int(time_fs / dt), 1) # Run at least one step + n_steps = max(int(time_fs / dt), 1) # Run at least one step return n_steps -def _traj_saving_interval(dt: float, - kwargs: dict - ) -> int: +def _traj_saving_interval(dt: float, kwargs: dict) -> int: """Calculate the interval at which a trajectory is saved""" if 'save_ps' in kwargs: - time_fs = 1E3 * kwargs['save_ps'] + time_fs = 1e3 * kwargs['save_ps'] elif 'save_fs' in kwargs: time_fs = kwargs['save_fs'] elif 'save_ns' in kwargs: - time_fs = 1E6 * kwargs['save_ns'] + time_fs = 1e6 * kwargs['save_ns'] else: raise ValueError('Saving time not found') @@ -615,8 +673,9 @@ def _traj_saving_interval(dt: float, return saving_interval -def _remove_colvar_duplicate_frames(bias: 'mlptrain.PlumedBias', - **kwargs) -> None: +def _remove_colvar_duplicate_frames( + bias: 'mlptrain.PlumedBias', **kwargs +) -> None: """ Remove duplicate frames from generated colvar files when using PLUMED bias @@ -625,14 +684,12 @@ def _remove_colvar_duplicate_frames(bias: 'mlptrain.PlumedBias', colvar_filenames = [get_colvar_filename(cv, **kwargs) for cv in bias.cvs] for filename in colvar_filenames: - with open(filename, 'r') as f: lines = f.readlines() duplicate_index = None for i, line in enumerate(lines): if line.startswith('#!') and i != 0: - # First frame before redundant header is a duplicate duplicate_index = i - 1 break @@ -647,3 +704,21 @@ def _remove_colvar_duplicate_frames(bias: 'mlptrain.PlumedBias', f.write(line) return None + + +def convert_pressure_to_ase_units( + pressure: float, +) -> float: + """ + Converts pressure given in bar to ase units of eV/A^3 + """ + return pressure * 0.000006241509 + + +def convert_compressibility_to_ase_units( + compressibility: float, +) -> float: + """ + Converts pressure given in bar^-1 to ase units of A^3/eV + """ + return compressibility * 160217.66531138544 diff --git a/mlptrain/sampling/md_openmm.py b/mlptrain/sampling/md_openmm.py index 6d178944..ff1ab637 100644 --- a/mlptrain/sampling/md_openmm.py +++ b/mlptrain/sampling/md_openmm.py @@ -1,7 +1,39 @@ -import copy -from typing import Optional, Sequence, List +import os +from copy import deepcopy +from typing import List, Optional, Sequence, Union + +import ase import mlptrain as mlt +from mlptrain.log import logger +from mlptrain.utils import work_in_tmp_dir +from mlptrain.sampling.md import ( + _convert_ase_traj, + _get_traj_name, + _initialise_traj, + _n_simulation_steps, + _save_trajectory, + _traj_saving_interval, +) + +try: + import openmm as mm + import openmm.app as app + import openmm.unit as unit + + _HAS_OPENMM = True +except ImportError: + _HAS_OPENMM = False + +try: + from openmmml import MLPotential + + _HAS_OPENMM_ML = True +except ImportError: + _HAS_OPENMM_ML = False + +# Conversion factor from kJ/mol to eV +_KJ_PER_MOL_TO_EV = (ase.units.kJ / ase.units.mol) / ase.units.eV def run_mlp_md_openmm( @@ -13,10 +45,11 @@ def run_mlp_md_openmm( init_temp: Optional[float] = None, fbond_energy: Optional[dict] = None, bbond_energy: Optional[dict] = None, - bias: Optional = None, + bias: Optional[Union['mlt.Bias', 'mlt.PlumedBias']] = None, restart_files: Optional[List[str]] = None, copied_substrings: Optional[Sequence[str]] = None, kept_substrings: Optional[Sequence[str]] = None, + platform: Optional[str] = None, **kwargs, ) -> 'mlt.Trajectory': """ @@ -54,7 +87,7 @@ def run_mlp_md_openmm( to use in the dynamics restart_files: List of files which are needed for restarting the - simulation + simulation, e.g. 'simulation.state.xml', 'trajectory.traj' kept_substrings: List of substrings with which files are copied back from the temporary directory @@ -63,6 +96,9 @@ def run_mlp_md_openmm( copied_substrings: List of substrings with which files are copied to the temporary directory. Files required for MLPs are added to the list automatically + + platform: (str) OpenMM platform to use. If None, the fastest available + platform is used in this order: 'CUDA', 'OpenCL', 'CPU', 'Reference'. --------------- Keyword Arguments: @@ -80,14 +116,37 @@ def run_mlp_md_openmm( (mlt.Trajectory): """ + if not _HAS_OPENMM: + raise ImportError( + 'OpenMM is not installed. Install it with ' + "'conda install -c conda-forge openmm'" + ) - restart = restart_files is not None + if not _HAS_OPENMM_ML: + raise ImportError( + 'openmm-ml is not installed. Install it with ' + "'conda install -c conda-forge openmm-ml'" + ) - # TODO: Implement this! - if copied_substrings is not None: - raise NotImplementedError("copied_substrings argument not supported yet") - if kept_substrings is not None: - raise NotImplementedError("kept_substrings argument not supported yet") + if not isinstance(mlp, mlt.potentials.MACE): + raise ValueError( + 'The OpenMM backend only supports the use of the MACE potential.' + ) + + if any( + [ + fbond_energy, + bbond_energy, + bias, + kwargs['constraints'] if 'constraints' in kwargs else None, + ] + ): + raise NotImplementedError( + "The OpenMM backend does not support the use of the 'bias', " + "'fbond_energy', 'bbond_energy', or 'constraints' arguments." + ) + + restart = restart_files is not None if copied_substrings is None: copied_substrings = [] @@ -95,14 +154,13 @@ def run_mlp_md_openmm( kept_substrings = [] copied_substrings_list = list(copied_substrings) - # kept_substrings_list = list(kept_substrings) + kept_substrings_list = list(kept_substrings) copied_substrings_list.extend(['.xml', '.json', '.pth', '.model']) if restart: - msg = 'Restarting MLP MD with OpenMM not supported' - raise NotImplementedError(msg) - """ + logger.info('Restarting MLP OpenMM MD') + if not isinstance(restart_files, list): raise TypeError('Restart files must be a list') @@ -113,19 +171,31 @@ def run_mlp_md_openmm( 'specifying filenames' ) + if not any(file.endswith('.state.xml') for file in restart_files): + raise ValueError( + 'Restaring an OpenMM simulation requires a .state.xml file ' + 'from the previous simulation' + ) + if not any(file.endswith('.traj') for file in restart_files): raise ValueError( - 'Restaring a simulation requires a .traj file ' + 'Restaring an OpenMM simulation requires a .traj file ' 'from the previous simulation' ) copied_substrings_list.extend(restart_files) kept_substrings_list.extend(restart_files) - """ + else: + logger.info('Running MLP MD with OpenMM') - mlt.log.logger.info('Running MLP MD with OpenMM') + decorator = work_in_tmp_dir( + copied_substrings=copied_substrings_list, + kept_substrings=kept_substrings_list, + ) - traj_openmm = _run_mlp_md_openmm( + _run_mlp_md_decorated = decorator(_run_mlp_md_openmm) + + traj = _run_mlp_md_decorated( configuration=configuration, mlp=mlp, temp=temp, @@ -136,45 +206,10 @@ def run_mlp_md_openmm( bbond_energy=bbond_energy, bias=bias, restart_files=restart_files, + platform=platform, **kwargs, ) - - return traj_openmm - - -# TODO: This function is already defined in md.py! -def _n_simulation_steps(dt: float, kwargs: dict) -> int: - """ - Calculate the number of simulation steps from a set of keyword - arguments e.g. kwargs = {'fs': 100} - - --------------------------------------------------------------------------- - Arguments: - dt: Timestep in fs - - Returns: - (int): Number of simulation steps to perform - """ - if dt < 0.09 or dt > 5: - mlt.log.logger.warning( - 'Unexpectedly small or large timestep - is it in fs?' - ) - - if 'ps' in kwargs: - time_fs = 1e3 * kwargs['ps'] - - elif 'fs' in kwargs: - time_fs = kwargs['fs'] - - elif 'ns' in kwargs: - time_fs = 1e6 * kwargs['ns'] - - else: - raise ValueError('Simulation time not found') - - n_steps = max(int(time_fs / dt), 1) # Run at least one step - - return n_steps + return traj def _run_mlp_md_openmm( @@ -186,46 +221,127 @@ def _run_mlp_md_openmm( init_temp: Optional[float] = None, fbond_energy: Optional[dict] = None, bbond_energy: Optional[dict] = None, - bias: Optional = None, + bias: Optional[Union['mlt.Bias', 'mlt.PlumedBias']] = None, restart_files: Optional[List[str]] = None, + platform: Optional[str] = None, **kwargs, ) -> 'mlt.Trajectory': """ Run molecular dynamics on a system using a MLP to predict energies and forces and OpenMM to drive dynamics """ + restart = restart_files is not None - try: - import openmm as mm - import openmm.app as app - import openmm.unit as unit - except ImportError: - raise ImportError( - 'Cannot import OpenMM. Please make sure OpenMM is installed.' - ) - - try: - from openmmml import MLPotential - except ImportError: - raise ImportError('Cannot import OpenMM-ML') - - from sys import stdout - + # Calculate the number of steps to perform. n_steps = _n_simulation_steps(dt=dt, kwargs=kwargs) + # Set the box size if required if mlp.requires_non_zero_box_size and configuration.box is None: - mlt.log.logger.warning( - 'Assuming vaccum simulation. Box size = 1000 nm^3' - ) + logger.warning('Assuming vacuum simulation. Box size = 1000 nm^3') configuration.box = mlt.Box([100, 100, 100]) + # Get the ASE atoms object and positions. ase_atoms = configuration.ase_atoms - # create OpenMM topology + # Get the name of the trajectory and simulation state files. + traj_name = _get_traj_name(restart_files=restart_files, **kwargs) + simulation_name = _get_simulation_name( + restart_files=restart_files, **kwargs + ) + + # Create the OpenMM topology + topology = _create_openmm_topology(ase_atoms) + + # Get the OpenMM platform + platform = _get_openmm_platform(platform) + + # Create the OpenMM simulation object + simulation = _create_openmm_simulation( + mlp=mlp, + topology=topology, + temp=temp, + dt=dt, + platform=platform, + ) + + # Set the initial positions and velocities + _set_momenta_and_geometry( + simulation=simulation, + positions=ase_atoms.get_positions() * unit.angstrom, + temp=init_temp if init_temp is not None else temp, + restart_file=simulation_name if restart else None, + ) + + # Initialise the ASE trajectory with the last frame of the previous trajectory + ase_traj = _initialise_traj( + ase_atoms=ase_atoms, + restart=restart, + traj_name=traj_name, + ) + + # If MD is restarted, energies of frames from the previous trajectory + # are not loaded. Setting them to None + energies = [None for _ in range(len(ase_traj))] + biased_energies = deepcopy(energies) + bias_energies = deepcopy(energies) + + # Calculate the number of steps already performed. + n_previous_steps = interval * len(ase_traj) + + logger.info( + f'Running OpenMM simulation for {n_steps} steps with saving interval {interval}' + ) + + # Run the dynamics + _run_dynamics( + simulation=simulation, + simulation_name=simulation_name, + ase_atoms=ase_atoms, + ase_traj=ase_traj, + traj_name=traj_name, + dt=dt, + interval=interval, + n_steps=n_steps, + n_previous_steps=n_previous_steps, + energies=energies, + biased_energies=biased_energies, + **kwargs, + ) + + # Close the ASE trajectory + ase_traj.close() + + # Duplicate frames removed only if PLUMED bias is initialised not from file + # if restart and isinstance(bias, PlumedBias) and not bias.from_file: + # _remove_colvar_duplicate_frames(bias=bias, **kwargs) + + traj = _convert_ase_traj(traj_name=traj_name, bias=bias, **kwargs) + + for energy, biased_energy in zip(energies, biased_energies): + if energy is not None and biased_energy is not None: + bias_energy = biased_energy - energy + bias_energies.append(bias_energy) + + for i, (frame, energy, bias_energy) in enumerate( + zip(traj, energies, bias_energies) + ): + frame.update_attr_from(configuration) + frame.energy.predicted = energy + frame.energy.bias = bias_energy + frame.time = dt * interval * i + + return traj + + +# ============================================================================= # +# Auxiliary functions to create the OpenMM Simulation # +# ============================================================================= # +def _create_openmm_topology(ase_atoms: 'ase.Atoms') -> 'app.Topology': + """Create an OpenMM topology from an ASE atoms object.""" + logger.info('Creating the OpenMM topology') topology = app.Topology() chain = topology.addChain() - positions = ase_atoms.get_positions() * unit.angstrom atomic_numbers = ase_atoms.get_atomic_numbers() for atomic_number in atomic_numbers: @@ -233,84 +349,216 @@ def _run_mlp_md_openmm( element = app.Element.getByAtomicNumber(atomic_number) topology.addAtom(element.name, element, residue) - # use the mace model with openmm-ml - # make sure total energy is used - potential = MLPotential('mace', model_path=mlp.filename) - system = potential.createSystem(topology, interaction_energy=False) - - # setup OpenMM simulation with Langevin dynamics - integrator = mm.LangevinIntegrator( - 300 * unit.kelvin, 1.0 / unit.picoseconds, dt * unit.femtoseconds + topology.setPeriodicBoxVectors( + ase_atoms.get_cell().array * 0.1 * unit.nanometer ) - simulation = app.Simulation(topology, system, integrator) - simulation.context.setPositions(positions) - simulation.context.setVelocitiesToTemperature(temp * unit.kelvin) - - interval = int(interval) - - simulation.reporters.append( - app.StateDataReporter( - stdout, - interval, - step=True, - potentialEnergy=True, - temperature=True, - speed=True, + + return topology + + +def _get_openmm_platform(platform: Optional[str] = None) -> 'mm.Platform': + """Get the OpenMM platform to use.""" + import torch + + available_platforms = [ + mm.Platform.getPlatform(i).getName() + for i in range(mm.Platform.getNumPlatforms()) + ] + + # OpenMM might have been built with CUDA support + # but the current system might not have a GPU available (typical in clusters) + if 'CUDA' in available_platforms and not torch.cuda.is_available(): + available_platforms.remove('CUDA') + + if platform is not None and platform in available_platforms: + platform = mm.Platform.getPlatformByName(platform) + else: + platform = next( + ( + p + for p in ['CUDA', 'OpenCL', 'CPU', 'Reference'] + if p in available_platforms + ), + None, ) - ) + if platform is None: + raise ValueError( + f'No suitable platform found. Available platforms are: {available_platforms}' + ) + platform = mm.Platform.getPlatformByName(platform) - # create MLP train trajectory to save frames into + logger.info(f'Using the OpenMM platform: {platform.getName()}') - mlt_traj = mlt.Trajectory() + return platform - mlt.log.logger.info( - 'Running using OpenMM for ', - n_steps, - ' steps with saving interval', - interval, - ) - # add the first config using energies from current MLP - state = simulation.context.getState(getPositions=True, getEnergy=True) +def _create_openmm_simulation( + mlp: 'mlt.potentials._base.MLPotential', + topology: 'app.Topology', + temp: float, + dt: float, + platform: 'mm.Platform', +) -> 'app.Simulation': + """Create an OpenMM simulation object.""" + logger.info('Creating the OpenMM simulation object') + + # Use the mace model with openmm-ml and make sure the total energy is used. + potential = MLPotential('mace', modelPath=mlp.filename) + system = potential.createSystem(topology, returnEnergyType='energy') + + # Use a Langevin integrator if temp>0 (NVT ensemble). + # Otherwise, use a Verlet integrator (NVE ensemble). + if temp > 0: + logger.info( + f'Using Langevin integrator (NVT) with temperture={temp} K' + ) + integrator = mm.LangevinMiddleIntegrator( + temp * unit.kelvin, 1.0 / unit.picoseconds, dt * unit.femtoseconds + ) + else: + logger.info(f'Using Verlet integrator (NVE) as temperture is {temp} K') + integrator = mm.VerletIntegrator(dt * unit.femtoseconds) + + simulation = app.Simulation(topology, system, integrator, platform) + + return simulation - coordinates = state.getPositions(asNumpy=True).value_in_unit(unit.angstrom) - energy = state.getPotentialEnergy().value_in_unit(unit.kilojoules_per_mole) - config = mlt.Configuration() - config.atoms = copy.deepcopy(configuration.atoms) - config.box = copy.deepcopy(configuration.box) +def _set_momenta_and_geometry( + simulation: 'app.Simulation', + positions: 'unit.Quantity', + temp: float, + restart_file: Optional[str] = None, +) -> 'app.Simulation': + """Set the momenta and geometry for the OpenMM simulation.""" + + if restart_file is not None: + if os.path.isfile(restart_file): + logger.info( + f'Restarting the OpenMM simulation state from file {restart_file}' + ) + simulation.loadState(restart_file) + else: + raise FileNotFoundError(f'File {restart_file} not found') + else: + logger.info( + 'Setting the initial momenta and geometry for the OpenMM simulation' + ) + simulation.context.setPositions(positions) + simulation.context.setVelocitiesToTemperature(temp * unit.kelvin) - for i, position in enumerate(coordinates): - config.atoms[i].coord = position + return simulation - config.energy.predicted = energy / 96.48530749925793 # kj/mol -> eV - config.time = 0.0 - mlt_traj.append(config) +def _get_simulation_name( + restart_files: Optional[List[str]] = None, **kwargs +) -> str: + """Return the name of the OpenMM simulation to be created or restarted.""" + if restart_files is None: + if 'idx' in kwargs: + simulation_name = f'simulation_{kwargs["idx"]}.state.xml' + else: + simulation_name = 'simulation.state.xml' + + return simulation_name + else: + for filename in restart_files: + if filename.endswith('.state.xml'): + return filename + + raise FileNotFoundError( + 'Restart mode detected, but no simulation state files were found in restart_files. ' + ) - # now run for n_steps saving at every interval + +# ============================================================================= # +# Auxiliary functions to run the OpenMM Simulation # +# ============================================================================= # +def _run_dynamics( + simulation: 'app.Simulation', + simulation_name: str, + ase_atoms: 'ase.Atoms', + ase_traj: 'ase.io.trajectory.Trajectory', + traj_name: str, + dt: float, + interval: int, + n_steps: int, + n_previous_steps: int, + energies: List[Optional[float]], + biased_energies: List[Optional[float]], + **kwargs, +) -> None: + """Run the MD and save frames to the mlt.Trajectory.""" + + def append_unbiased_energy(): + """Append the unbiased potential energy to the energies list.""" + energies.append(potential_energy) + + def append_biased_energy(): + """Append the biased potential energy to the biased_energies list.""" + biased_energies.append(biased_energy) + + def save_trajectory(): + """Save the ASE trajectory to a file.""" + _save_trajectory(ase_traj, traj_name, **kwargs) + + def save_simulation_state(): + """Save the state of the OpenMM simulation.""" + simulation.saveState(simulation_name) + + def add_frame_to_ase_traj(): + """Add a new frame to the ASE train trajectory""" + # Create a new ASE atoms object. + new_ase_atoms = ase.Atoms( + symbols=ase_atoms.get_chemical_symbols(), + positions=coordinates, + cell=ase_atoms.get_cell(), + ) + + # Append the new frame to the trajectory. + ase_traj.write(new_ase_atoms, energy=potential_energy) + + # Determine saving intervals + if any(key in kwargs for key in ['save_fs', 'save_ps', 'save_ns']): + traj_saving_interval = _traj_saving_interval(dt, kwargs) + else: + traj_saving_interval = 0 + + # Add the initial frame to the ASE trajectory. + state = simulation.context.getState(getPositions=True, getEnergy=True) + coordinates = state.getPositions(asNumpy=True).value_in_unit(unit.angstrom) + potential_energy = ( + state.getPotentialEnergy().value_in_unit(unit.kilojoules_per_mole) + * _KJ_PER_MOL_TO_EV + ) + add_frame_to_ase_traj() + + # Run the dynamics n_steps, performing interval steps at a time. for j in range(n_steps // interval): + logger.info(f'Step {j + 1} / {n_steps // interval}') simulation.step(interval) + time = dt * interval * (j + 1) + # Get the coordinates and energy of the system from the OpenMM simulation. state = simulation.context.getState(getPositions=True, getEnergy=True) - coordinates = state.getPositions(asNumpy=True).value_in_unit( unit.angstrom ) - energy = state.getPotentialEnergy().value_in_unit( - unit.kilojoules_per_mole + potential_energy = ( + state.getPotentialEnergy().value_in_unit(unit.kilojoules_per_mole) + * _KJ_PER_MOL_TO_EV ) - config = mlt.Configuration() - config.atoms = copy.deepcopy(configuration.atoms) - config.box = copy.deepcopy(configuration.box) - - for i, position in enumerate(coordinates): - config.atoms[i].coord = position + # TODO: Implement biased_energy when bias is implemented + biased_energy = potential_energy - config.energy.predicted = energy / 96.48530749925793 # kJ/mol -> eV - config.time = dt * interval * (j + 1) + # Add the frame to the ASE trajectory. + add_frame_to_ase_traj() - mlt_traj.append(config) + # Store the energies + append_unbiased_energy() + append_biased_energy() + save_simulation_state() - return mlt_traj + if traj_saving_interval > 0 and time % traj_saving_interval == 0: + save_trajectory() diff --git a/mlptrain/sampling/metadynamics.py b/mlptrain/sampling/metadynamics.py index 90850931..2ab208bb 100644 --- a/mlptrain/sampling/metadynamics.py +++ b/mlptrain/sampling/metadynamics.py @@ -8,14 +8,11 @@ import warnings import numpy as np import multiprocessing as mp -import matplotlib.pyplot as plt import autode as ade from typing import Optional, Sequence, Union, Tuple, List from multiprocessing import Pool from subprocess import Popen from copy import deepcopy -from matplotlib.colors import ListedColormap -from scipy.stats import norm from ase import units as ase_units from ase.io import read as ase_read from ase.io import write as ase_write @@ -26,7 +23,7 @@ PlumedBias, plumed_setup, plot_cv_versus_time, - plot_cv1_and_cv2 + plot_cv1_and_cv2, ) from mlptrain.config import Config from mlptrain.log import logger @@ -36,7 +33,7 @@ move_files, convert_ase_time, convert_ase_energy, - convert_exponents + convert_exponents, ) @@ -44,11 +41,12 @@ class Metadynamics: """Metadynamics class for running biased molecular dynamics using metadynamics bias and analysing the results""" - def __init__(self, - cvs: Union[Sequence['mlptrain._PlumedCV'], - 'mlptrain._PlumedCV'], - bias: Optional['mlptrain.PlumedBias'] = None, - temp: Optional[float] = None): + def __init__( + self, + cvs: Union[Sequence['mlptrain._PlumedCV'], 'mlptrain._PlumedCV'], + bias: Optional['mlptrain.PlumedBias'] = None, + temp: Optional[float] = None, + ): """ Molecular dynamics using metadynamics bias. Used for calculating free energies (by using well-tempered metadynamics bias) and sampling @@ -67,10 +65,11 @@ def __init__(self, """ if bias is not None: - if bias.from_file: - raise ValueError('Cannot initialise Metadynamics using ' - 'PlumedBias initialised from a file') + raise ValueError( + 'Cannot initialise Metadynamics using ' + 'PlumedBias initialised from a file' + ) else: self.bias = bias @@ -93,16 +92,18 @@ def kbt(self) -> float: """Value of k_B*T in ASE units""" return ase_units.kB * self.temp - def estimate_width(self, - configurations: Union['mlptrain.Configuration', - 'mlptrain.ConfigurationSet'], - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float = 300, - interval: int = 10, - dt: float = 1, - plot: bool = True, - **kwargs - ) -> List: + def estimate_width( + self, + configurations: Union[ + 'mlptrain.Configuration', 'mlptrain.ConfigurationSet' + ], + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float = 300, + interval: int = 10, + dt: float = 1, + plot: bool = True, + **kwargs, + ) -> List: """ Estimate optimal widths (σ) to be used in metadynamics. @@ -153,21 +154,23 @@ def estimate_width(self, # Spawn is likely to make it slower, but fork in combination # with plotting is likely to give errors on MacOS > 10.13 with mp.get_context('spawn').Pool(processes=n_processes) as pool: - for idx, configuration in enumerate(configuration_set): - kwargs_single = deepcopy(kwargs) kwargs_single['idx'] = idx + 1 - width_process = pool.apply_async(func=self._get_width_for_single, - args=(configuration, - mlp, - temp, - dt, - interval, - self.bias, - plot), - kwds=kwargs_single) + width_process = pool.apply_async( + func=self._get_width_for_single, + args=( + configuration, + mlp, + temp, + dt, + interval, + self.bias, + plot, + ), + kwds=kwargs_single, + ) width_processes.append(width_process) pool.close() @@ -179,13 +182,15 @@ def estimate_width(self, finish = time.perf_counter() logger.info(f'Width estimation done in {(finish - start) / 60:.1f} m') - move_files([r'colvar_\w+_\d+\.dat'], - dst_folder='plumed_files/width_estimation', - regex=True) + move_files( + [r'colvar_\w+_\d+\.dat'], + dst_folder='plumed_files/width_estimation', + regex=True, + ) - move_files([r'\w+_config\d+\.pdf'], - dst_folder='width_estimation', - regex=True) + move_files( + [r'\w+_config\d+\.pdf'], dst_folder='width_estimation', regex=True + ) opt_widths = list(np.min(all_widths, axis=0)) opt_widths_strs = [] @@ -199,29 +204,33 @@ def estimate_width(self, logger.info(f'Estimated widths: {", ".join(opt_widths_strs)}') return opt_widths - def _get_width_for_single(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - dt: float, - interval: int, - bias: 'mlptrain.PlumedBias', - plot: bool, - **kwargs) -> List: + def _get_width_for_single( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + dt: float, + interval: int, + bias: 'mlptrain.PlumedBias', + plot: bool, + **kwargs, + ) -> List: """Estimate optimal widths (σ) for a single configuration""" logger.info(f'Running MD simulation number {kwargs["idx"]}') kwargs['n_cores'] = 1 - run_mlp_md(configuration=configuration, - mlp=mlp, - temp=temp, - dt=dt, - interval=interval, - bias=bias, - kept_substrings=['.dat'], - **kwargs) + run_mlp_md( + configuration=configuration, + mlp=mlp, + temp=temp, + dt=dt, + interval=interval, + bias=bias, + kept_substrings=['.dat'], + **kwargs, + ) widths = [] @@ -234,29 +243,32 @@ def _get_width_for_single(self, widths.append(width) if plot is True: - plot_cv_versus_time(filename=colvar_filename, - cv_units=cv.units, - label=f'config{kwargs["idx"]}') + plot_cv_versus_time( + filename=colvar_filename, + cv_units=cv.units, + label=f'config{kwargs["idx"]}', + ) return widths - def run_metadynamics(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - interval: int, - dt: float, - pace: int = 100, - height: Optional[float] = None, - width: Optional = None, - biasfactor: Optional[float] = None, - al_iter: Optional[int] = None, - n_runs: int = 1, - save_sep: bool = True, - all_to_xyz: bool = False, - restart: bool = False, - **kwargs - ) -> None: + def run_metadynamics( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + interval: int, + dt: float, + pace: int = 100, + height: Optional[float] = None, + width: Optional = None, + biasfactor: Optional[float] = None, + al_iter: Optional[int] = None, + n_runs: int = 1, + save_sep: bool = True, + all_to_xyz: bool = False, + restart: bool = False, + **kwargs, + ) -> None: """ Perform multiple metadynamics runs in parallel, generate .xyz and .traj files containing trajectories of the runs, generate PLUMED files @@ -325,15 +337,18 @@ def run_metadynamics(self, if height is None: if temp > 0: - logger.info('Height was not supplied, ' - 'setting height to 0.5*k_B*T') + logger.info( + 'Height was not supplied, ' 'setting height to 0.5*k_B*T' + ) height = 0.5 * self.kbt else: raise ValueError('Height was not supplied') if biasfactor is not None and temp <= 0: - raise ValueError('Temperature must be positive and non-zero for ' - 'well-tempered metadynamics') + raise ValueError( + 'Temperature must be positive and non-zero for ' + 'well-tempered metadynamics' + ) if al_iter: self._initialise_inherited_bias(al_iter=al_iter, n_runs=n_runs) @@ -344,49 +359,58 @@ def run_metadynamics(self, else: if width is None: - logger.info('Width parameters were not supplied to the ' - 'metadynamics simulation, estimating widths ' - 'automatically by performing an unbiased ' - 'simulation for 10 ps') + logger.info( + 'Width parameters were not supplied to the ' + 'metadynamics simulation, estimating widths ' + 'automatically by performing an unbiased ' + 'simulation for 10 ps' + ) - width = self.estimate_width(configurations=configuration, - mlp=mlp) + width = self.estimate_width( + configurations=configuration, mlp=mlp + ) kwargs['kept_substrings'] = ['.traj', '.dat'] - self.bias._set_metad_params(width=width, - pace=pace, - height=height, - biasfactor=biasfactor, - **kwargs) + self.bias._set_metad_params( + width=width, + pace=pace, + height=height, + biasfactor=biasfactor, + **kwargs, + ) metad_processes, metad_trajs = [], [] n_processes = min(Config.n_cores, n_runs) - logger.info(f'Running {n_runs} independent Metadynamics ' - f'simulation(s), {n_processes} simulation(s) run ' - f'in parallel, 1 walker per simulation') + logger.info( + f'Running {n_runs} independent Metadynamics ' + f'simulation(s), {n_processes} simulation(s) run ' + f'in parallel, 1 walker per simulation' + ) start_metad = time.perf_counter() with Pool(processes=n_processes) as pool: - for idx in range(n_runs): - # Without copy kwargs is overwritten at every iteration kwargs_single = deepcopy(kwargs) kwargs_single['idx'] = idx + 1 - metad_process = pool.apply_async(func=self._run_single_metad, - args=(configuration, - mlp, - temp, - interval, - dt, - self.bias, - al_iter, - restart), - kwds=kwargs_single) + metad_process = pool.apply_async( + func=self._run_single_metad, + args=( + configuration, + mlp, + temp, + interval, + dt, + self.bias, + al_iter, + restart, + ), + kwds=kwargs_single, + ) metad_processes.append(metad_process) pool.close() @@ -395,23 +419,29 @@ def run_metadynamics(self, pool.join() finish_metad = time.perf_counter() - logger.info('Metadynamics done in ' - f'{(finish_metad - start_metad) / 60:.1f} m') + logger.info( + 'Metadynamics done in ' + f'{(finish_metad - start_metad) / 60:.1f} m' + ) # Move .traj files into 'trajectories' folder and compute .xyz files - self._move_and_save_files(metad_trajs=metad_trajs, - save_sep=save_sep, - all_to_xyz=all_to_xyz, - restart=restart) + self._move_and_save_files( + metad_trajs=metad_trajs, + save_sep=save_sep, + all_to_xyz=all_to_xyz, + restart=restart, + ) if al_iter is None: self.plot_gaussian_heights() - self._set_previous_parameters(configuration=configuration, - mlp=mlp, - temp=temp, - dt=dt, - interval=interval, - **kwargs) + self._set_previous_parameters( + configuration=configuration, + mlp=mlp, + temp=temp, + dt=dt, + interval=interval, + **kwargs, + ) return None @staticmethod @@ -427,8 +457,10 @@ def _initialise_inherited_bias(al_iter: int, n_runs: int) -> None: shutil.copyfile(src=bias_path, dst=f'HILLS_{idx+1}.dat') else: - raise FileNotFoundError('Inherited bias generated after AL ' - f'iteration {al_iter} not found') + raise FileNotFoundError( + 'Inherited bias generated after AL ' + f'iteration {al_iter} not found' + ) return None @@ -439,13 +471,17 @@ def _initialise_restart(self, width: Sequence, n_runs: int) -> None: """ if width is None: - raise ValueError('Make sure to use exactly the same width as ' - 'in the previous simulation') + raise ValueError( + 'Make sure to use exactly the same width as ' + 'in the previous simulation' + ) if not os.path.exists('plumed_files/metadynamics'): - raise FileNotFoundError('Metadynamics folder not found, make ' - 'sure to run metadynamics before ' - 'trying to restart') + raise FileNotFoundError( + 'Metadynamics folder not found, make ' + 'sure to run metadynamics before ' + 'trying to restart' + ) metad_path = os.path.join(os.getcwd(), 'plumed_files/metadynamics') traj_path = os.path.join(os.getcwd(), 'trajectories') @@ -454,39 +490,48 @@ def _initialise_restart(self, width: Sequence, n_runs: int) -> None: colvar_path = os.path.join(metad_path, f'colvar_{cv.name}_*.dat') n_previous_runs = len(glob.glob(colvar_path)) if n_previous_runs != n_runs: - raise NotImplementedError('Restart is implemented only if the ' - 'number of runs matches the number ' - 'of runs in the previous simulation') + raise NotImplementedError( + 'Restart is implemented only if the ' + 'number of runs matches the number ' + 'of runs in the previous simulation' + ) for filename in glob.glob(os.path.join(metad_path, 'fes_*.dat')): os.remove(filename) - move_files(['.dat'], - dst_folder=os.getcwd(), - src_folder=metad_path, - unique=False) + move_files( + ['.dat'], + dst_folder=os.getcwd(), + src_folder=metad_path, + unique=False, + ) - move_files(['.traj'], - dst_folder=os.getcwd(), - src_folder=traj_path, - unique=False) + move_files( + ['.traj'], + dst_folder=os.getcwd(), + src_folder=traj_path, + unique=False, + ) return None - def _run_single_metad(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - interval: int, - dt: float, - bias: 'mlptrain.PlumedBias', - al_iter: Optional[int] = None, - restart: Optional[bool] = False, - **kwargs) -> 'mlptrain.Trajectory': + def _run_single_metad( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + interval: int, + dt: float, + bias: 'mlptrain.PlumedBias', + al_iter: Optional[int] = None, + restart: Optional[bool] = False, + **kwargs, + ) -> 'mlptrain.Trajectory': """Initiate a single metadynamics run""" - logger.info('Running Metadynamics simulation ' - f'number {kwargs["idx"]}') + logger.info( + 'Running Metadynamics simulation ' f'number {kwargs["idx"]}' + ) if al_iter is not None: kwargs['copied_substrings'] = [f'HILLS_{kwargs["idx"]}.dat'] @@ -505,36 +550,43 @@ def _run_single_metad(self, kwargs['n_cores'] = 1 - traj = run_mlp_md(configuration=configuration, - mlp=mlp, - temp=temp, - dt=dt, - interval=interval, - bias=bias, - restart_files=restart_files, - **kwargs) + traj = run_mlp_md( + configuration=configuration, + mlp=mlp, + temp=temp, + dt=dt, + interval=interval, + bias=bias, + restart_files=restart_files, + **kwargs, + ) return traj @staticmethod - def _move_and_save_files(metad_trajs: List['mlptrain.Trajectory'], - save_sep: bool, - all_to_xyz: bool, - restart: bool - ) -> None: + def _move_and_save_files( + metad_trajs: List['mlptrain.Trajectory'], + save_sep: bool, + all_to_xyz: bool, + restart: bool, + ) -> None: """ Save metadynamics trajectories, move them into trajectories folder and compute .xyz files """ - move_files(['.dat'], - dst_folder='plumed_files/metadynamics', - unique=True if not restart else False) + move_files( + ['.dat'], + dst_folder='plumed_files/metadynamics', + unique=True if not restart else False, + ) - move_files([r'trajectory_\d+\.traj', r'trajectory_\d+_\w+\.traj'], - dst_folder='trajectories', - regex=True, - unique=True if not restart else False) + move_files( + [r'trajectory_\d+\.traj', r'trajectory_\d+_\w+\.traj'], + dst_folder='trajectories', + regex=True, + unique=True if not restart else False, + ) os.chdir('trajectories') @@ -565,20 +617,24 @@ def _move_and_save_files(metad_trajs: List['mlptrain.Trajectory'], return None - def _set_previous_parameters(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - dt: float, - interval: int, - **kwargs) -> None: + def _set_previous_parameters( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + dt: float, + interval: int, + **kwargs, + ) -> None: """Set parameters in the _previous_run_parameters""" - self._previous_run_parameters = {'configuration': configuration, - 'mlp': mlp, - 'temp': temp, - 'dt': dt, - 'interval': interval} + self._previous_run_parameters = { + 'configuration': configuration, + 'mlp': mlp, + 'temp': temp, + 'dt': dt, + 'interval': interval, + } sim_time_dict = {} for key in ['ps', 'fs', 'ns']: @@ -589,11 +645,12 @@ def _set_previous_parameters(self, return None - def plot_gaussian_heights(self, - energy_units: str = 'kcal mol-1', - time_units: str = 'ps', - path: str = 'plumed_files/metadynamics' - ) -> None: + def plot_gaussian_heights( + self, + energy_units: str = 'kcal mol-1', + time_units: str = 'ps', + path: str = 'plumed_files/metadynamics', + ) -> None: """ Plot the height of deposited gaussians as a function of time (using HILLS_{idx}.dat files). @@ -611,33 +668,36 @@ def plot_gaussian_heights(self, """ if not os.path.exists(path): - raise FileNotFoundError('Directory with metadynamics files not ' - 'found. Make sure to run metadynamics ' - 'before using this method') + raise FileNotFoundError( + 'Directory with metadynamics files not ' + 'found. Make sure to run metadynamics ' + 'before using this method' + ) initial_path = os.getcwd() os.chdir(path) idx = 1 while os.path.exists(f'HILLS_{idx}.dat'): - self._plot_gaussian_heights_single(idx=idx, - energy_units=energy_units, - time_units=time_units) + self._plot_gaussian_heights_single( + idx=idx, energy_units=energy_units, time_units=time_units + ) idx += 1 os.chdir(initial_path) - move_files([r'gaussian_heights_\d+.pdf'], - src_folder=path, - dst_folder='gaussian_heights', - regex=True) + move_files( + [r'gaussian_heights_\d+.pdf'], + src_folder=path, + dst_folder='gaussian_heights', + regex=True, + ) return None @staticmethod - def _plot_gaussian_heights_single(idx: int, - energy_units: str = 'kcal mol-1', - time_units: str = 'ps' - ) -> None: + def _plot_gaussian_heights_single( + idx: int, energy_units: str = 'kcal mol-1', time_units: str = 'ps' + ) -> None: """ Plot the height of deposited gaussians as a function of time for a single metadynamics run. @@ -653,12 +713,14 @@ def _plot_gaussian_heights_single(idx: int, time_units: (str) Time units to be used in plotting, available units: 'fs', 'ps', 'ns' """ + import matplotlib.pyplot as plt filename = f'HILLS_{idx}.dat' deposit_time = np.loadtxt(filename, usecols=0) - deposit_time = convert_ase_time(time_array=deposit_time, - units=time_units) + deposit_time = convert_ase_time( + time_array=deposit_time, units=time_units + ) heights = np.loadtxt(filename, usecols=-2) heights = convert_ase_energy(energy_array=heights, units=energy_units) @@ -675,19 +737,20 @@ def _plot_gaussian_heights_single(idx: int, return None - def try_multiple_biasfactors(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - interval: int, - dt: float, - biasfactors: Sequence[float], - pace: int = 500, - height: Optional[float] = None, - width: Optional = None, - plotted_cvs: Optional = None, - **kwargs - ) -> None: + def try_multiple_biasfactors( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + interval: int, + dt: float, + biasfactors: Sequence[float], + pace: int = 500, + height: Optional[float] = None, + width: Optional = None, + plotted_cvs: Optional = None, + **kwargs, + ) -> None: """ Execute multiple well-tempered metadynamics runs in parallel with a provided sequence of biasfactors and plot the resulting trajectories, @@ -743,8 +806,10 @@ def try_multiple_biasfactors(self, raise TypeError('Supplied biasfactors variable must be a sequence') if temp <= 0: - raise ValueError('Temperature must be positive and non-zero for ' - 'well-tempered metadynamics') + raise ValueError( + 'Temperature must be positive and non-zero for ' + 'well-tempered metadynamics' + ) self.temp = temp if height is None: @@ -752,13 +817,14 @@ def try_multiple_biasfactors(self, height = 0.5 * self.kbt if width is None: - logger.info('Width parameters were not supplied to the multiple ' - 'biasfactor simulation, estimating widths ' - 'automatically by performing an unbiased simulation' - 'for 10 ps') + logger.info( + 'Width parameters were not supplied to the multiple ' + 'biasfactor simulation, estimating widths ' + 'automatically by performing an unbiased simulation' + 'for 10 ps' + ) - width = self.estimate_width(configurations=configuration, - mlp=mlp) + width = self.estimate_width(configurations=configuration, mlp=mlp) # Dummy bias which stores CVs, useful for checking CVs input if plotted_cvs is not None: @@ -768,116 +834,138 @@ def try_multiple_biasfactors(self, cvs_holder = self.bias if cvs_holder.n_cvs > 2: - raise NotImplementedError('Plotting using more than two CVs is ' - 'not implemented') + raise NotImplementedError( + 'Plotting using more than two CVs is ' 'not implemented' + ) if not all(cv in self.bias.metad_cvs for cv in cvs_holder.metad_cvs): - raise ValueError('At least one of the supplied CVs are not within ' - 'the set of CVs used to define the Metadynamics ' - 'object') + raise ValueError( + 'At least one of the supplied CVs are not within ' + 'the set of CVs used to define the Metadynamics ' + 'object' + ) - self.bias._set_metad_params(width=width, - pace=pace, - height=height, - **kwargs) + self.bias._set_metad_params( + width=width, pace=pace, height=height, **kwargs + ) n_processes = min(Config.n_cores, len(biasfactors)) - logger.info('Running Well-Tempered Metadynamics simulations ' - f'with {len(biasfactors)} different biasfactors, ' - f'{n_processes} simulation(s) run in parallel, ' - f'1 walker per simulation') + logger.info( + 'Running Well-Tempered Metadynamics simulations ' + f'with {len(biasfactors)} different biasfactors, ' + f'{n_processes} simulation(s) run in parallel, ' + f'1 walker per simulation' + ) start = time.perf_counter() with mp.get_context('spawn').Pool(processes=n_processes) as pool: - for idx, biasfactor in enumerate(biasfactors): - bias = deepcopy(self.bias) bias.biasfactor = biasfactor kwargs_single = deepcopy(kwargs) kwargs_single['idx'] = idx + 1 - pool.apply_async(func=self._try_single_biasfactor, - args=(configuration, - mlp, - temp, - interval, - dt, - bias, - cvs_holder.cvs), - kwds=kwargs_single) + pool.apply_async( + func=self._try_single_biasfactor, + args=( + configuration, + mlp, + temp, + interval, + dt, + bias, + cvs_holder.cvs, + ), + kwds=kwargs_single, + ) pool.close() pool.join() finish = time.perf_counter() - logger.info('Simulations with multiple biasfactors done in ' - f'{(finish - start) / 60:.1f} m') - - move_files([r'colvar_\w+_\d+\.dat', r'HILLS_\d+\.dat'], - dst_folder='plumed_files/multiple_biasfactors', - regex=True) - - move_files([r'\w+_biasf\d+\.pdf'], - dst_folder='multiple_biasfactors', - regex=True) + logger.info( + 'Simulations with multiple biasfactors done in ' + f'{(finish - start) / 60:.1f} m' + ) + + move_files( + [r'colvar_\w+_\d+\.dat', r'HILLS_\d+\.dat'], + dst_folder='plumed_files/multiple_biasfactors', + regex=True, + ) + + move_files( + [r'\w+_biasf\d+\.pdf'], + dst_folder='multiple_biasfactors', + regex=True, + ) return None - def _try_single_biasfactor(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - interval: int, - dt: float, - bias: 'mlptrain.PlumedBias', - plotted_cvs: Optional, - **kwargs): + def _try_single_biasfactor( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + interval: int, + dt: float, + bias: 'mlptrain.PlumedBias', + plotted_cvs: Optional, + **kwargs, + ): """ Execute a single well-tempered metadynamics run and plot the resulting trajectory """ - self._run_single_metad(configuration=configuration, - mlp=mlp, - temp=temp, - interval=interval, - dt=dt, - bias=bias, - kept_substrings=['.dat'], - **kwargs) - - filenames = [f'colvar_{cv.name}_{kwargs["idx"]}.dat' - for cv in plotted_cvs] + self._run_single_metad( + configuration=configuration, + mlp=mlp, + temp=temp, + interval=interval, + dt=dt, + bias=bias, + kept_substrings=['.dat'], + **kwargs, + ) + + filenames = [ + f'colvar_{cv.name}_{kwargs["idx"]}.dat' for cv in plotted_cvs + ] for filename, cv in zip(filenames, plotted_cvs): - plot_cv_versus_time(filename=filename, - cv_units=cv.units, - label=f'biasf{bias.biasfactor}') + plot_cv_versus_time( + filename=filename, + cv_units=cv.units, + label=f'biasf{bias.biasfactor}', + ) if len(plotted_cvs) == 2: - plot_cv1_and_cv2(filenames=filenames, - cvs_units=[cv.units for cv in plotted_cvs], - label=f'biasf{bias.biasfactor}') + plot_cv1_and_cv2( + filenames=filenames, + cvs_units=[cv.units for cv in plotted_cvs], + label=f'biasf{bias.biasfactor}', + ) return None - def block_analysis(self, - start_time: float, - idx: int = 1, - energy_units: str = 'kcal mol-1', - n_bins: int = 300, - min_n_blocks: int = 10, - min_blocksize: int = 10, - blocksize_interval: int = 10, - bandwidth: float = 0.02, - cvs_bounds: Optional[Sequence] = None, - temp: Optional[float] = None, - dt: Optional[float] = None, - interval: Optional[int] = None, - ) -> None: + def block_analysis( + self, + start_time: float, + idx: int = 1, + energy_units: str = 'kcal mol-1', + n_bins: int = 300, + min_n_blocks: int = 10, + min_blocksize: int = 10, + blocksize_interval: int = 10, + bandwidth: float = 0.02, + cvs_bounds: Optional[Sequence] = None, + temp: Optional[float] = None, + dt: Optional[float] = None, + interval: Optional[int] = None, + ) -> None: """ Perform block averaging analysis on the sliced trajectory of the most recent metadynamics run. Plot the block analysis and save mean FES @@ -925,30 +1013,36 @@ def block_analysis(self, start = time.perf_counter() - bias, temp, dt, interval = self._reweighting_params(temp=temp, - dt=dt, - interval=interval) - start_frame_index = int((start_time * 1E3) / (dt * interval)) + bias, temp, dt, interval = self._reweighting_params( + temp=temp, dt=dt, interval=interval + ) + start_frame_index = int((start_time * 1e3) / (dt * interval)) - sliced_traj = ase_read(f'trajectories/trajectory_{idx}.traj', - index=f'{start_frame_index}:') + sliced_traj = ase_read( + f'trajectories/trajectory_{idx}.traj', + index=f'{start_frame_index}:', + ) self._save_ase_traj_as_xyz(sliced_traj) - shutil.copyfile(src=f'plumed_files/metadynamics/HILLS_{idx}.dat', - dst=f'HILLS_{idx}.dat') + shutil.copyfile( + src=f'plumed_files/metadynamics/HILLS_{idx}.dat', + dst=f'HILLS_{idx}.dat', + ) # Writes plumed_setup.dat - plumed_setup(bias=bias, - temp=temp, - interval=interval, - idx=idx, - load_metad_bias=True, - remove_print=True, - write_plumed_setup=True) - - min_max_params = self._get_min_max_params(cvs_bounds=cvs_bounds, - path='plumed_files/' - 'metadynamics') + plumed_setup( + bias=bias, + temp=temp, + interval=interval, + idx=idx, + load_metad_bias=True, + remove_print=True, + write_plumed_setup=True, + ) + + min_max_params = self._get_min_max_params( + cvs_bounds=cvs_bounds, path='plumed_files/' 'metadynamics' + ) # The number of frames PLUMED driver takes into account # n_used_frames = n_total_frames - 1 @@ -956,27 +1050,33 @@ def block_analysis(self, max_blocksize = n_used_frames // min_n_blocks if max_blocksize < min_blocksize: - raise ValueError('The simulation is too short to perform ' - 'block analysis') + raise ValueError( + 'The simulation is too short to perform ' 'block analysis' + ) - logger.info('Performing block analysis in parallel using ' - f'{Config.n_cores} cores') + logger.info( + 'Performing block analysis in parallel using ' + f'{Config.n_cores} cores' + ) grid_procs, data_dict = [], {} - blocksizes = list(range(min_blocksize, max_blocksize + 1, - blocksize_interval)) + blocksizes = list( + range(min_blocksize, max_blocksize + 1, blocksize_interval) + ) with Pool(processes=Config.n_cores) as pool: - for blocksize in blocksizes: - - grid_proc = pool.apply_async(func=self._compute_grids_for_blocksize, - args=(blocksize, - temp, - min_max_params, - n_bins, - bandwidth, - energy_units)) + grid_proc = pool.apply_async( + func=self._compute_grids_for_blocksize, + args=( + blocksize, + temp, + min_max_params, + n_bins, + bandwidth, + energy_units, + ), + ) grid_procs.append(grid_proc) pool.close() @@ -992,20 +1092,20 @@ def block_analysis(self, os.remove('traj.xyz') os.remove(f'HILLS_{idx}.dat') - self._plot_block_analysis(blocksizes=blocksizes, - data_dict=data_dict, - energy_units=energy_units) + self._plot_block_analysis( + blocksizes=blocksizes, + data_dict=data_dict, + energy_units=energy_units, + ) finish = time.perf_counter() logger.info(f'Block analysis done in {(finish - start) / 60:.1f} m') return None - def _reweighting_params(self, - temp: float, - dt: float, - interval: int - ) -> Tuple: + def _reweighting_params( + self, temp: float, dt: float, interval: int + ) -> Tuple: """ Read parameters required for reweighting from the previous metadynamics simulation. If previous parameters are not set, read @@ -1014,9 +1114,9 @@ def _reweighting_params(self, # Bias with dummy width and height values, and very large pace bias = deepcopy(self.bias) - bias._set_metad_params(pace=int(1E9), - width=[1 for _ in range(self.n_cvs)], - height=0) + bias._set_metad_params( + pace=int(1e9), width=[1 for _ in range(self.n_cvs)], height=0 + ) _parameters = [temp, dt, interval] @@ -1026,11 +1126,12 @@ def _reweighting_params(self, interval = self._previous_run_parameters['interval'] elif any(param is None for param in _parameters): - - raise TypeError('Metadynamics object does not have all the ' - 'required parameters to run block analysis. ' - 'Please provide parameters from the previous ' - 'metadynamics run') + raise TypeError( + 'Metadynamics object does not have all the ' + 'required parameters to run block analysis. ' + 'Please provide parameters from the previous ' + 'metadynamics run' + ) return bias, temp, dt, interval @@ -1052,36 +1153,40 @@ def _save_ase_traj_as_xyz(ase_traj: 'ase.io.trajectory.Trajectory'): return None - @work_in_tmp_dir(copied_substrings=['traj.xyz', - 'plumed_setup.dat', - 'HILLS']) - def _compute_grids_for_blocksize(self, - blocksize: int, - temp: float, - min_max_params: Tuple, - n_bins: int, - bandwidth: float, - energy_units: str - ) -> Tuple[np.ndarray, np.ndarray]: + @work_in_tmp_dir( + copied_substrings=['traj.xyz', 'plumed_setup.dat', 'HILLS'] + ) + def _compute_grids_for_blocksize( + self, + blocksize: int, + temp: float, + min_max_params: Tuple, + n_bins: int, + bandwidth: float, + energy_units: str, + ) -> Tuple[np.ndarray, np.ndarray]: """Compute CV and FES error grids over blocks for a given block size and return both grids""" - self._generate_hist_files_by_reweighting(blocksize=blocksize, - temp=temp, - min_max_params=min_max_params, - n_bins=n_bins, - bandwidth=bandwidth) - - normal, hist, cvs_grid = self._read_histogram(filename='hist.dat', - n_bins=n_bins, - compute_cvs=True) + self._generate_hist_files_by_reweighting( + blocksize=blocksize, + temp=temp, + min_max_params=min_max_params, + n_bins=n_bins, + bandwidth=bandwidth, + ) + + normal, hist, cvs_grid = self._read_histogram( + filename='hist.dat', n_bins=n_bins, compute_cvs=True + ) normal_sq = normal**2 average = normal * hist average_sq = normal * hist**2 for hist_file in glob.glob('analysis.*.hist.dat'): - tnormal, new_hist, _ = self._read_histogram(filename=hist_file, - n_bins=n_bins) + tnormal, new_hist, _ = self._read_histogram( + filename=hist_file, n_bins=n_bins + ) normal += tnormal normal_sq += tnormal**2 average += tnormal * new_hist @@ -1089,25 +1194,27 @@ def _compute_grids_for_blocksize(self, average /= normal variance = (average_sq / normal) - average**2 - variance *= (normal / (normal - (normal_sq / normal))) + variance *= normal / (normal - (normal_sq / normal)) n_grids = 1 + len(glob.glob('analysis.*.hist.dat')) hist_error = np.sqrt(variance / n_grids) fes_error = ase_units.kB * temp * hist_error - fes_error = np.divide(fes_error, average, - out=np.zeros_like(fes_error), - where=average != 0) - fes_error = convert_ase_energy(energy_array=fes_error, - units=energy_units) + fes_error = np.divide( + fes_error, + average, + out=np.zeros_like(fes_error), + where=average != 0, + ) + fes_error = convert_ase_energy( + energy_array=fes_error, units=energy_units + ) return cvs_grid, fes_error - def _read_histogram(self, - filename: str, - n_bins: int, - compute_cvs: bool = False - ) -> Tuple: + def _read_histogram( + self, filename: str, n_bins: int, compute_cvs: bool = False + ) -> Tuple: """ Read the histogram file and return the normalisation together with the CVs and the histogram as numpy grids @@ -1128,19 +1235,20 @@ def _read_histogram(self, with open(filename, 'r') as f: for line in f: - if line.startswith("#! SET normalisation"): + if line.startswith('#! SET normalisation'): normal = line.split()[3] break return float(normal), hist, cvs_grid - def _generate_hist_files_by_reweighting(self, - blocksize: Optional[int], - temp: float, - min_max_params: Tuple, - n_bins: int, - bandwidth: float - ) -> None: + def _generate_hist_files_by_reweighting( + self, + blocksize: Optional[int], + temp: float, + min_max_params: Tuple, + n_bins: int, + bandwidth: float, + ) -> None: """ Generate analysis.*.hist.dat + hist.dat files which are required for block analysis @@ -1150,27 +1258,24 @@ def _generate_hist_files_by_reweighting(self, min_param_seq, max_param_seq = min_max_params bandwidth_seq = ','.join(str(bandwidth) for _ in range(self.n_cvs)) - bin_param_seq = ','.join(str(n_bins-1) for _ in range(self.n_cvs)) + bin_param_seq = ','.join(str(n_bins - 1) for _ in range(self.n_cvs)) clear_setup = f'CLEAR={blocksize} ' if blocksize is not None else '' stride_setup = f'STRIDE={blocksize} ' if blocksize is not None else '' - reweight_setup = ['as: REWEIGHT_BIAS ' - f'TEMP={temp} ' - 'ARG=metad.bias', - 'hist: HISTOGRAM ' - f'ARG={self.bias.metad_cv_sequence} ' - f'STRIDE=1 ' - f'{clear_setup}' - f'GRID_MIN={min_param_seq} ' - f'GRID_MAX={max_param_seq} ' - f'GRID_BIN={bin_param_seq} ' - f'BANDWIDTH={bandwidth_seq} ' - 'LOGWEIGHTS=as', - 'DUMPGRID ' - 'GRID=hist ' - f'{stride_setup}' - 'FILE=hist.dat'] + reweight_setup = [ + 'as: REWEIGHT_BIAS ' f'TEMP={temp} ' 'ARG=metad.bias', + 'hist: HISTOGRAM ' + f'ARG={self.bias.metad_cv_sequence} ' + f'STRIDE=1 ' + f'{clear_setup}' + f'GRID_MIN={min_param_seq} ' + f'GRID_MAX={max_param_seq} ' + f'GRID_BIN={bin_param_seq} ' + f'BANDWIDTH={bandwidth_seq} ' + 'LOGWEIGHTS=as', + 'DUMPGRID ' 'GRID=hist ' f'{stride_setup}' 'FILE=hist.dat', + ] os.rename('plumed_setup.dat', 'reweight.dat') with open('reweight.dat', 'a') as f: @@ -1179,30 +1284,42 @@ def _generate_hist_files_by_reweighting(self, self.bias.write_cv_files() - driver_process = Popen(['plumed', 'driver', - '--ixyz', 'traj.xyz', - '--plumed', 'reweight.dat', - '--length-units', 'A']) + driver_process = Popen( + [ + 'plumed', + 'driver', + '--ixyz', + 'traj.xyz', + '--plumed', + 'reweight.dat', + '--length-units', + 'A', + ] + ) driver_process.wait() return None @staticmethod - def _plot_block_analysis(blocksizes: List, - data_dict: dict, - energy_units: str - ) -> None: + def _plot_block_analysis( + blocksizes: List, data_dict: dict, energy_units: str + ) -> None: """Plot the standard deviation versus block size""" + import matplotlib.pyplot as plt data_dict.pop('CVs') - mean_errors = [np.mean(error_grid) for error_grid in data_dict.values()] + mean_errors = [ + np.mean(error_grid) for error_grid in data_dict.values() + ] fig, ax = plt.subplots() ax.plot(blocksizes, mean_errors, color='k') ax.set_xlabel('Block size') - ax.set_ylabel(r'$\left\langle\sigma_{G}\right\rangle$ / ' - f'{convert_exponents(energy_units)}') + ax.set_ylabel( + r'$\left\langle\sigma_{G}\right\rangle$ / ' + f'{convert_exponents(energy_units)}' + ) fig.tight_layout() @@ -1215,14 +1332,15 @@ def _plot_block_analysis(blocksizes: List, return None - def plot_fes(self, - energy_units: str = 'kcal mol-1', - confidence_level: float = 0.95, - n_bins: int = 300, - cvs_bounds: Optional[Sequence] = None, - fes_npy: Optional[str] = None, - blocksize: Optional[int] = None, - ) -> None: + def plot_fes( + self, + energy_units: str = 'kcal mol-1', + confidence_level: float = 0.95, + n_bins: int = 300, + cvs_bounds: Optional[Sequence] = None, + fes_npy: Optional[str] = None, + blocksize: Optional[int] = None, + ) -> None: """ Plot the free energy surface with a confidence interval. If the .npy file is not supplied, the file is computed (if metadynamics has been @@ -1264,8 +1382,10 @@ def plot_fes(self, fes = self.compute_fes(energy_units, n_bins, cvs_bounds) else: - logger.info('Using fes_raw.npy in the current directory for ' - 'plotting') + logger.info( + 'Using fes_raw.npy in the current directory for ' + 'plotting' + ) fes = np.load('fes_raw.npy') @@ -1273,34 +1393,41 @@ def plot_fes(self, fes = np.load(fes_npy) if self.n_cvs == 1: - self._plot_1d_fes(fes=fes, - energy_units=energy_units, - confidence_level=confidence_level, - blocksize=blocksize) + self._plot_1d_fes( + fes=fes, + energy_units=energy_units, + confidence_level=confidence_level, + blocksize=blocksize, + ) elif self.n_cvs == 2: - self._plot_2d_fes(fes=fes, - energy_units=energy_units, - confidence_level=confidence_level, - blocksize=blocksize) + self._plot_2d_fes( + fes=fes, + energy_units=energy_units, + confidence_level=confidence_level, + blocksize=blocksize, + ) else: - raise NotImplementedError('Plotting FES is available only for one ' - 'and two collective variables') + raise NotImplementedError( + 'Plotting FES is available only for one ' + 'and two collective variables' + ) return None - def compute_fes(self, - energy_units: str = 'kcal mol-1', - n_bins: int = 300, - cvs_bounds: Optional[Sequence] = None, - via_reweighting: bool = False, - start_time: float = 0.00, - bandwidth: float = 0.02, - temp: Optional[float] = None, - dt: Optional[float] = None, - interval: Optional[int] = None, - ) -> np.ndarray: + def compute_fes( + self, + energy_units: str = 'kcal mol-1', + n_bins: int = 300, + cvs_bounds: Optional[Sequence] = None, + via_reweighting: bool = False, + start_time: float = 0.00, + bandwidth: float = 0.02, + temp: Optional[float] = None, + dt: Optional[float] = None, + interval: Optional[int] = None, + ) -> np.ndarray: """ Compute a grid containing collective variable grids and free energy surface grids, which is saved in the current directory as .npy file. @@ -1345,67 +1472,76 @@ def compute_fes(self, logger.info('Computing and saving the free energy grid as fes_raw.npy') if via_reweighting: - fes_raw = self._compute_fes_via_reweighting(energy_units=energy_units, - n_bins=n_bins, - cvs_bounds=cvs_bounds, - temp=temp, - dt=dt, - interval=interval, - start_time=start_time, - bandwidth=bandwidth) + fes_raw = self._compute_fes_via_reweighting( + energy_units=energy_units, + n_bins=n_bins, + cvs_bounds=cvs_bounds, + temp=temp, + dt=dt, + interval=interval, + start_time=start_time, + bandwidth=bandwidth, + ) else: - fes_raw = self._compute_fes_via_hills(energy_units=energy_units, - n_bins=n_bins, - cvs_bounds=cvs_bounds) + fes_raw = self._compute_fes_via_hills( + energy_units=energy_units, n_bins=n_bins, cvs_bounds=cvs_bounds + ) np.save('fes_raw.npy', fes_raw) return fes_raw - def _compute_fes_via_reweighting(self, - temp: float, - dt: float, - interval: int, - start_time: float, - energy_units: str, - cvs_bounds: Optional[Sequence], - n_bins: int, - bandwidth: float - ) -> np.ndarray: + def _compute_fes_via_reweighting( + self, + temp: float, + dt: float, + interval: int, + start_time: float, + energy_units: str, + cvs_bounds: Optional[Sequence], + n_bins: int, + bandwidth: float, + ) -> np.ndarray: """ Compute the free energy surface grid by reweighting the biased distribution """ - bias, temp, dt, interval = self._reweighting_params(temp=temp, - dt=dt, - interval=interval) - start_frame_index = int((start_time * 1E3) / (dt * interval)) + bias, temp, dt, interval = self._reweighting_params( + temp=temp, dt=dt, interval=interval + ) + start_frame_index = int((start_time * 1e3) / (dt * interval)) - min_max_params = self._get_min_max_params(cvs_bounds=cvs_bounds, - path='plumed_files/' - 'metadynamics') + min_max_params = self._get_min_max_params( + cvs_bounds=cvs_bounds, path='plumed_files/' 'metadynamics' + ) n_runs = len(glob.glob('trajectories/trajectory_*.traj')) n_processes = min(Config.n_cores, n_runs) fes_processes, fes_grids = [], [] with Pool(processes=n_processes) as pool: - for idx in range(1, n_runs+1): - traj_path = os.path.join(os.getcwd(), - f'trajectories/trajectory_{idx}.traj') - hills_path = os.path.join(os.getcwd(), - f'plumed_files/metadynamics/HILLS_{idx}.dat') - - proc = pool.apply_async(func=self._compute_single_fes_via_reweighting, - args=(idx, - traj_path, - hills_path, - start_frame_index, - bias, - temp, - interval, - min_max_params, - n_bins, - bandwidth, - energy_units)) + for idx in range(1, n_runs + 1): + traj_path = os.path.join( + os.getcwd(), f'trajectories/trajectory_{idx}.traj' + ) + hills_path = os.path.join( + os.getcwd(), f'plumed_files/metadynamics/HILLS_{idx}.dat' + ) + + proc = pool.apply_async( + func=self._compute_single_fes_via_reweighting, + args=( + idx, + traj_path, + hills_path, + start_frame_index, + bias, + temp, + interval, + min_max_params, + n_bins, + bandwidth, + energy_units, + ), + ) fes_processes.append(proc) pool.close() @@ -1418,85 +1554,100 @@ def _compute_fes_via_reweighting(self, return fes_raw @work_in_tmp_dir() - def _compute_single_fes_via_reweighting(self, - idx: int, - traj_path: str, - hills_path: str, - start_frame_index: int, - bias: 'mlptrain.PlumedBias', - temp: float, - interval: int, - min_max_params: Tuple, - n_bins: int, - bandwidth: float, - energy_units: str - ) -> Tuple: + def _compute_single_fes_via_reweighting( + self, + idx: int, + traj_path: str, + hills_path: str, + start_frame_index: int, + bias: 'mlptrain.PlumedBias', + temp: float, + interval: int, + min_max_params: Tuple, + n_bins: int, + bandwidth: float, + energy_units: str, + ) -> Tuple: """Compute CVs and FES grids for a single run by reweighting""" sliced_traj = ase_read(traj_path, index=f'{start_frame_index}:') self._save_ase_traj_as_xyz(sliced_traj) shutil.copyfile(src=hills_path, dst=f'HILLS_{idx}.dat') - plumed_setup(bias=bias, - temp=temp, - interval=interval, - idx=idx, - load_metad_bias=True, - remove_print=True, - write_plumed_setup=True) - - self._generate_hist_files_by_reweighting(blocksize=None, - temp=temp, - min_max_params=min_max_params, - n_bins=n_bins, - bandwidth=bandwidth) - - _, hist, cvs_grid = self._read_histogram(filename='hist.dat', - n_bins=n_bins, - compute_cvs=True) + plumed_setup( + bias=bias, + temp=temp, + interval=interval, + idx=idx, + load_metad_bias=True, + remove_print=True, + write_plumed_setup=True, + ) + + self._generate_hist_files_by_reweighting( + blocksize=None, + temp=temp, + min_max_params=min_max_params, + n_bins=n_bins, + bandwidth=bandwidth, + ) + + _, hist, cvs_grid = self._read_histogram( + filename='hist.dat', n_bins=n_bins, compute_cvs=True + ) fes_grid = -ase_units.kB * temp * np.log(hist) fes_grid = fes_grid - np.min(fes_grid) - fes_grid = convert_ase_energy(energy_array=fes_grid, - units=energy_units) + fes_grid = convert_ase_energy( + energy_array=fes_grid, units=energy_units + ) return cvs_grid, fes_grid - def _compute_fes_via_hills(self, - energy_units: str = 'kcal mol-1', - n_bins: int = 300, - cvs_bounds: Optional[Sequence] = None, - ) -> np.ndarray: + def _compute_fes_via_hills( + self, + energy_units: str = 'kcal mol-1', + n_bins: int = 300, + cvs_bounds: Optional[Sequence] = None, + ) -> np.ndarray: """Compute the free energy surface grid using the deposited bias""" try: os.chdir('plumed_files/metadynamics') except FileNotFoundError: - raise FileNotFoundError('Metadynamics directory not found. Make ' - 'sure to run metadynamics before trying ' - 'to compute the FES') - - fes_files = [fname for fname in os.listdir() if fname.startswith('fes')] + raise FileNotFoundError( + 'Metadynamics directory not found. Make ' + 'sure to run metadynamics before trying ' + 'to compute the FES' + ) + + fes_files = [ + fname for fname in os.listdir() if fname.startswith('fes') + ] for fname in fes_files: os.remove(fname) self._generate_fes_files(n_bins=n_bins, cvs_bounds=cvs_bounds) - cv_grids, fes_grids = self._fes_files_to_grids(energy_units=energy_units, - n_bins=n_bins) + cv_grids, fes_grids = self._fes_files_to_grids( + energy_units=energy_units, n_bins=n_bins + ) os.chdir('../..') fes_raw = np.concatenate((cv_grids, fes_grids), axis=0) return fes_raw - def _plot_1d_fes(self, - fes: np.ndarray, - energy_units: str = 'kcal mol-1', - confidence_level: float = 0.95, - blocksize: Optional[int] = None, - ) -> None: + def _plot_1d_fes( + self, + fes: np.ndarray, + energy_units: str = 'kcal mol-1', + confidence_level: float = 0.95, + blocksize: Optional[int] = None, + ) -> None: """Plot 1D mean free energy surface with a confidence interval""" + import matplotlib.pyplot as plt + import scipy.stats logger.info('Plotting 1D FES') @@ -1508,23 +1659,28 @@ def _plot_1d_fes(self, mean_fes = np.mean(fes_grids, axis=0) ax.plot(cv_grid, mean_fes, label='Free energy') - fes_error = self._compute_fes_error(fes_grids=fes_grids, - blocksize=blocksize) + fes_error = self._compute_fes_error( + fes_grids=fes_grids, blocksize=blocksize + ) if np.any(fes_error): - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - message='invalid value encountered in multiply') - confidence_interval = norm.interval(confidence_level, - loc=mean_fes, - scale=fes_error) + warnings.filterwarnings( + 'ignore', message='invalid value encountered in multiply' + ) + confidence_interval = scipy.stats.norm.interval( + confidence_level, loc=mean_fes, scale=fes_error + ) lower_bound = confidence_interval[0] upper_bound = confidence_interval[1] - ax.fill_between(cv_grid, lower_bound, upper_bound, - alpha=0.3, - label='Confidence interval') + ax.fill_between( + cv_grid, + lower_bound, + upper_bound, + alpha=0.3, + label='Confidence interval', + ) cv = self.bias.metad_cvs[0] if cv.units is not None: @@ -1533,8 +1689,7 @@ def _plot_1d_fes(self, else: ax.set_xlabel(f'{cv.name}') - ax.set_ylabel(r'$\Delta G$ / ' - f'{convert_exponents(energy_units)}') + ax.set_ylabel(r'$\Delta G$ / ' f'{convert_exponents(energy_units)}') ax.legend() fig.tight_layout() @@ -1548,13 +1703,17 @@ def _plot_1d_fes(self, return None - def _plot_2d_fes(self, - fes: np.ndarray, - energy_units: str = 'kcal mol-1', - confidence_level: float = 0.95, - blocksize: Optional[int] = None, - ) -> None: + def _plot_2d_fes( + self, + fes: np.ndarray, + energy_units: str = 'kcal mol-1', + confidence_level: float = 0.95, + blocksize: Optional[int] = None, + ) -> None: """Plot 2D mean free energy surface with a confidence interval""" + from matplotlib.colors import ListedColormap + import matplotlib.pyplot as plt + import scipy.stats logger.info('Plotting 2D FES') @@ -1562,9 +1721,9 @@ def _plot_2d_fes(self, cv2_grid = fes[1] fes_grids = fes[2:] - fig, (ax_mean, ax_std_error) = plt.subplots(nrows=1, - ncols=2, - figsize=(12, 5)) + fig, (ax_mean, ax_std_error) = plt.subplots( + nrows=1, ncols=2, figsize=(12, 5) + ) jet_cmap = plt.get_cmap('jet') jet_cmap_matrix = jet_cmap(np.linspace(0, 1, 256)) @@ -1573,42 +1732,46 @@ def _plot_2d_fes(self, mean_fes = np.mean(fes_grids, axis=0) - mean_contourf = ax_mean.contourf(cv1_grid, cv2_grid, mean_fes, 256, - cmap=mod_jet_cmap) - ax_mean.contour(cv1_grid, cv2_grid, mean_fes, 20, - colors='k', - alpha=0.2) + mean_contourf = ax_mean.contourf( + cv1_grid, cv2_grid, mean_fes, 256, cmap=mod_jet_cmap + ) + ax_mean.contour( + cv1_grid, cv2_grid, mean_fes, 20, colors='k', alpha=0.2 + ) mean_cbar = fig.colorbar(mean_contourf, ax=ax_mean) - mean_cbar.set_label(label=r'$\Delta G$ / ' - f'{convert_exponents(energy_units)}') + mean_cbar.set_label( + label=r'$\Delta G$ / ' f'{convert_exponents(energy_units)}' + ) - fes_error = self._compute_fes_error(fes_grids=fes_grids, - blocksize=blocksize) + fes_error = self._compute_fes_error( + fes_grids=fes_grids, blocksize=blocksize + ) with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - message='invalid value encountered in multiply') - confidence_interval = norm.interval(confidence_level, - loc=mean_fes, - scale=fes_error) + warnings.filterwarnings( + 'ignore', message='invalid value encountered in multiply' + ) + confidence_interval = scipy.stats.norm.interval( + confidence_level, loc=mean_fes, scale=fes_error + ) interval_range = confidence_interval[1] - confidence_interval[0] - std_error_contourf = ax_std_error.contourf(cv1_grid, cv2_grid, - interval_range, 256, - cmap='Blues') - ax_std_error.contour(cv1_grid, cv2_grid, mean_fes, 20, - colors='k', - alpha=0.2) + std_error_contourf = ax_std_error.contourf( + cv1_grid, cv2_grid, interval_range, 256, cmap='Blues' + ) + ax_std_error.contour( + cv1_grid, cv2_grid, mean_fes, 20, colors='k', alpha=0.2 + ) std_error_cbar = fig.colorbar(std_error_contourf, ax=ax_std_error) - std_error_cbar.set_label(label='Confidence interval / ' - f'{convert_exponents(energy_units)}') + std_error_cbar.set_label( + label='Confidence interval / ' f'{convert_exponents(energy_units)}' + ) cv1 = self.bias.metad_cvs[0] cv2 = self.bias.metad_cvs[1] for ax in (ax_mean, ax_std_error): - if cv1.units is not None: ax.set_xlabel(f'{cv1.name} / {cv1.units}') @@ -1622,9 +1785,9 @@ def _plot_2d_fes(self, ax.set_ylabel(f'{cv2.name}') for c in mean_contourf.collections: - c.set_edgecolor("face") + c.set_edgecolor('face') for c in std_error_contourf.collections: - c.set_edgecolor("face") + c.set_edgecolor('face') fig.tight_layout() @@ -1638,9 +1801,9 @@ def _plot_2d_fes(self, return None @staticmethod - def _compute_fes_error(fes_grids: np.ndarray, - blocksize: Optional[int] = None - ) -> np.ndarray: + def _compute_fes_error( + fes_grids: np.ndarray, blocksize: Optional[int] = None + ) -> np.ndarray: """Compute standard error of the free energy to use in plotting""" n_surfaces = len(fes_grids) @@ -1651,30 +1814,34 @@ def _compute_fes_error(fes_grids: np.ndarray, fes_error = fes_error.flatten() except (FileNotFoundError, KeyError): - raise FileNotFoundError('Block averaging analysis with block ' - f'size {blocksize} was not found. ' - 'Make sure to run block analysis ' - 'before using this option and use an ' - 'appropriate block size') + raise FileNotFoundError( + 'Block averaging analysis with block ' + f'size {blocksize} was not found. ' + 'Make sure to run block analysis ' + 'before using this option and use an ' + 'appropriate block size' + ) elif n_surfaces != 1: - fes_error = ((1 / np.sqrt(n_surfaces)) - * np.std(fes_grids, axis=0, ddof=1)) + fes_error = (1 / np.sqrt(n_surfaces)) * np.std( + fes_grids, axis=0, ddof=1 + ) else: fes_error = np.zeros_like(fes_grids[0]) return fes_error - def plot_fes_convergence(self, - stride: int, - n_surfaces: int = 5, - time_units: str = 'ps', - energy_units: str = 'kcal mol-1', - n_bins: int = 300, - cvs_bounds: Optional[Sequence] = None, - idx: int = 1 - ) -> None: + def plot_fes_convergence( + self, + stride: int, + n_surfaces: int = 5, + time_units: str = 'ps', + energy_units: str = 'kcal mol-1', + n_bins: int = 300, + cvs_bounds: Optional[Sequence] = None, + idx: int = 1, + ) -> None: """ Compute multiple fes.dat files from a HILLS_idx.dat file by summing the deposited gaussians using a stride. Use the computed files to plot @@ -1714,25 +1881,30 @@ def plot_fes_convergence(self, # List of times when a new gaussian is deposited deposit_time = np.loadtxt(f'HILLS_{idx}.dat', usecols=0) - fes_time = [deposit_time[i] - for i in range(stride - 1, len(deposit_time), stride)] + fes_time = [ + deposit_time[i] + for i in range(stride - 1, len(deposit_time), stride) + ] # sum_hills generates surfaces with the stride, # but it also always computes the final FES remove_duplicate = fes_time[-1] == deposit_time[-1] fes_time.append(deposit_time[-1]) - fes_time = convert_ase_time(time_array=np.array(fes_time), units=time_units) + fes_time = convert_ase_time( + time_array=np.array(fes_time), units=time_units + ) fes_time = np.round(fes_time, decimals=1) - self._generate_fes_files(n_bins=n_bins, - cvs_bounds=cvs_bounds, - stride=stride, - idx=idx) + self._generate_fes_files( + n_bins=n_bins, cvs_bounds=cvs_bounds, stride=stride, idx=idx + ) - move_files([fr'fes_{idx}_\d+\.dat'], - dst_folder='../fes_convergence', - regex=True) + move_files( + [rf'fes_{idx}_\d+\.dat'], + dst_folder='../fes_convergence', + regex=True, + ) os.chdir('../fes_convergence') # Remove the final FES if it has already been computed with the stride @@ -1741,40 +1913,49 @@ def plot_fes_convergence(self, os.remove(f'fes_{idx}_{len(fes_time)-1}.dat') fes_time = fes_time[:-1] - cv_grids, fes_grids = self._fes_files_to_grids(energy_units=energy_units, - n_bins=n_bins) + cv_grids, fes_grids = self._fes_files_to_grids( + energy_units=energy_units, n_bins=n_bins + ) - self._plot_surface_difference(fes_grids=fes_grids, - fes_time=fes_time, - time_units=time_units, - energy_units=energy_units) + self._plot_surface_difference( + fes_grids=fes_grids, + fes_time=fes_time, + time_units=time_units, + energy_units=energy_units, + ) if self.n_cvs == 1: - self._plot_multiple_1d_fes_surfaces(cv_grids=cv_grids, - fes_grids=fes_grids, - fes_time=fes_time, - n_surfaces=n_surfaces, - time_units=time_units, - energy_units=energy_units) + self._plot_multiple_1d_fes_surfaces( + cv_grids=cv_grids, + fes_grids=fes_grids, + fes_time=fes_time, + n_surfaces=n_surfaces, + time_units=time_units, + energy_units=energy_units, + ) os.chdir('../..') - move_files(['fes_convergence.pdf', 'fes_convergence_diff.pdf'], - dst_folder='fes_convergence', - src_folder='plumed_files/fes_convergence') + move_files( + ['fes_convergence.pdf', 'fes_convergence_diff.pdf'], + dst_folder='fes_convergence', + src_folder='plumed_files/fes_convergence', + ) return None @staticmethod - def _plot_surface_difference(fes_grids: np.ndarray, - fes_time: List, - time_units: str, - energy_units: str - ) -> None: + def _plot_surface_difference( + fes_grids: np.ndarray, + fes_time: List, + time_units: str, + energy_units: str, + ) -> None: """ Plot the root mean square difference between free energy surfaces as a function of time """ + import matplotlib.pyplot as plt fes_diff_grids = np.diff(fes_grids, axis=0) rms_diffs = [np.sqrt(np.mean(grid * grid)) for grid in fes_diff_grids] @@ -1783,9 +1964,11 @@ def _plot_surface_difference(fes_grids: np.ndarray, ax.plot(fes_time[:-1], rms_diffs) ax.set_xlabel(f'Time / {time_units}') - ax.set_ylabel(r'$\left\langle\Delta\Delta G^{2} ' - r'\right\rangle^{\frac{1}{2}}$ / ' - f'{convert_exponents(energy_units)}') + ax.set_ylabel( + r'$\left\langle\Delta\Delta G^{2} ' + r'\right\rangle^{\frac{1}{2}}$ / ' + f'{convert_exponents(energy_units)}' + ) fig.tight_layout() @@ -1794,28 +1977,33 @@ def _plot_surface_difference(fes_grids: np.ndarray, return None - def _plot_multiple_1d_fes_surfaces(self, - cv_grids: np.ndarray, - fes_grids: np.ndarray, - fes_time: List, - n_surfaces: int, - time_units: str, - energy_units: str - ) -> None: + def _plot_multiple_1d_fes_surfaces( + self, + cv_grids: np.ndarray, + fes_grids: np.ndarray, + fes_time: List, + n_surfaces: int, + time_units: str, + energy_units: str, + ) -> None: """ Plot multiple 1D free energy surfaces as a function of simulation time """ + import matplotlib.pyplot as plt plotted_cv = self.bias.metad_cvs[0] if n_surfaces > len(fes_grids): - raise ValueError('The number of surfaces requested to plot is ' - 'larger than the number of computed surfaces') + raise ValueError( + 'The number of surfaces requested to plot is ' + 'larger than the number of computed surfaces' + ) fig, ax = plt.subplots() for i in range(len(fes_grids) - n_surfaces, len(fes_grids)): - ax.plot(cv_grids[0], fes_grids[i], - label=f'{fes_time[i]} {time_units}') + ax.plot( + cv_grids[0], fes_grids[i], label=f'{fes_time[i]} {time_units}' + ) ax.legend() @@ -1825,20 +2013,20 @@ def _plot_multiple_1d_fes_surfaces(self, else: ax.set_xlabel(f'{plotted_cv.name}') - ax.set_ylabel(r'$\Delta G$ / ' - f'{convert_exponents(energy_units)}') + ax.set_ylabel(r'$\Delta G$ / ' f'{convert_exponents(energy_units)}') fig.tight_layout() fig.savefig('fes_convergence.pdf') plt.close(fig) - def _generate_fes_files(self, - n_bins: int, - cvs_bounds: Optional[Sequence] = None, - stride: Optional[int] = None, - idx: Optional[int] = None - ) -> None: + def _generate_fes_files( + self, + n_bins: int, + cvs_bounds: Optional[Sequence] = None, + stride: Optional[int] = None, + idx: Optional[int] = None, + ) -> None: """ Generate fes.dat files from a HILLS.dat file. @@ -1859,19 +2047,20 @@ def _generate_fes_files(self, """ if not any(filename.startswith('HILLS') for filename in os.listdir()): - raise FileNotFoundError('No HILLS.dat files were found in ' - 'plumed_files, make sure to run ' - 'metadynamics before computing the FES') + raise FileNotFoundError( + 'No HILLS.dat files were found in ' + 'plumed_files, make sure to run ' + 'metadynamics before computing the FES' + ) logger.info('Generating fes.dat files from HILLS.dat files') - bin_param_seq = ','.join(str(n_bins-1) for _ in range(self.n_cvs)) + bin_param_seq = ','.join(str(n_bins - 1) for _ in range(self.n_cvs)) min_param_seq, max_param_seq = self._get_min_max_params(cvs_bounds) label = '*' if idx is None else idx for filename in glob.glob(f'HILLS_{label}.dat'): - # HILLS_*.dat -> * index = filename.split('.')[0].split('_')[-1] @@ -1883,22 +2072,30 @@ def _generate_fes_files(self, fes_filename = f'fes_{index}_' stride_setup = ['--stride', f'{stride}'] - compute_fes = Popen(['plumed', 'sum_hills', - '--hills', filename, - '--outfile', fes_filename, - '--bin', bin_param_seq, - '--min', min_param_seq, - '--max', max_param_seq, - *stride_setup]) + compute_fes = Popen( + [ + 'plumed', + 'sum_hills', + '--hills', + filename, + '--outfile', + fes_filename, + '--bin', + bin_param_seq, + '--min', + min_param_seq, + '--max', + max_param_seq, + *stride_setup, + ] + ) compute_fes.wait() return None - def _fes_files_to_grids(self, - energy_units: str, - n_bins: int, - relative: bool = True - ) -> Tuple[np.ndarray, np.ndarray]: + def _fes_files_to_grids( + self, energy_units: str, n_bins: int, relative: bool = True + ) -> Tuple[np.ndarray, np.ndarray]: """ Use fes.dat files in a current directory to compute a grid containing collective variables and a grid containing free energy surfaces. @@ -1939,7 +2136,6 @@ def _get_combined_index(name): # Compute CV grids cv_grids = [] for filename in fes_files: - for idx in range(self.n_cvs): cv_vector = np.loadtxt(filename, usecols=idx) @@ -1952,13 +2148,12 @@ def _get_combined_index(name): # Compute fes grids fes_grids = [] for filename in fes_files: - - fes_vector = np.loadtxt(filename, - usecols=self.n_cvs) + fes_vector = np.loadtxt(filename, usecols=self.n_cvs) fes_grid = np.reshape(fes_vector, grid_shape) - fes_grid = convert_ase_energy(energy_array=fes_grid, - units=energy_units) + fes_grid = convert_ase_energy( + energy_array=fes_grid, units=energy_units + ) if relative: fes_grid -= np.min(fes_grid) @@ -1970,10 +2165,9 @@ def _get_combined_index(name): return total_cv_grid, total_fes_grid - def _get_min_max_params(self, - cvs_bounds: Optional[Sequence] = None, - path: Optional[str] = None - ) -> Tuple: + def _get_min_max_params( + self, cvs_bounds: Optional[Sequence] = None, path: Optional[str] = None + ) -> Tuple: """ Compute min and max parameters for generating fes.dat files from HILLS.dat files. @@ -1992,8 +2186,10 @@ def _get_min_max_params(self, """ if cvs_bounds is None: - logger.info('CVs bounds were not supplied, generating min and max ' - 'parameters automatically') + logger.info( + 'CVs bounds were not supplied, generating min and max ' + 'parameters automatically' + ) initial_path = os.getcwd() if path is not None: @@ -2027,14 +2223,16 @@ def _get_min_max_params(self, else: cvs_bounds_checked = self._check_cv_bounds(cvs_bounds) - min_params = [str(cv_bounds[0]) for cv_bounds in cvs_bounds_checked] - max_params = [str(cv_bounds[1]) for cv_bounds in cvs_bounds_checked] + min_params = [ + str(cv_bounds[0]) for cv_bounds in cvs_bounds_checked + ] + max_params = [ + str(cv_bounds[1]) for cv_bounds in cvs_bounds_checked + ] return ','.join(min_params), ','.join(max_params) - def _check_cv_bounds(self, - cvs_bounds: Sequence - ) -> Sequence: + def _check_cv_bounds(self, cvs_bounds: Sequence) -> Sequence: """ Check the validity of the supplied CVs bounds and returns the bounds in a universal format. @@ -2051,17 +2249,21 @@ def _check_cv_bounds(self, """ if isinstance(cvs_bounds, list) or isinstance(cvs_bounds, tuple): - if len(cvs_bounds) == 0: - raise TypeError('CVs bounds cannot be an empty list or ' - 'an empty tuple') - - elif all(isinstance(cv_bounds, list) or isinstance(cv_bounds, tuple) - for cv_bounds in cvs_bounds): + raise TypeError( + 'CVs bounds cannot be an empty list or ' 'an empty tuple' + ) + + elif all( + isinstance(cv_bounds, list) or isinstance(cv_bounds, tuple) + for cv_bounds in cvs_bounds + ): _cvs_bounds = cvs_bounds - elif all(isinstance(cv_bound, float) or isinstance(cv_bound, int) - for cv_bound in cvs_bounds): + elif all( + isinstance(cv_bound, float) or isinstance(cv_bound, int) + for cv_bound in cvs_bounds + ): _cvs_bounds = [cvs_bounds] else: @@ -2071,7 +2273,9 @@ def _check_cv_bounds(self, raise TypeError('CVs bounds are in incorrect format') if len(_cvs_bounds) != self.n_cvs: - raise ValueError('The number of supplied CVs bounds is not equal ' - 'to the number of CVs used in metadynamics') + raise ValueError( + 'The number of supplied CVs bounds is not equal ' + 'to the number of CVs used in metadynamics' + ) return _cvs_bounds diff --git a/mlptrain/sampling/plumed.py b/mlptrain/sampling/plumed.py index 69cf5ac6..04d2f61f 100644 --- a/mlptrain/sampling/plumed.py +++ b/mlptrain/sampling/plumed.py @@ -1,7 +1,6 @@ import os import mlptrain import numpy as np -import matplotlib.pyplot as plt from typing import Sequence, List, Tuple, Dict, Optional, Union from copy import deepcopy from ase import units as ase_units @@ -20,6 +19,7 @@ class PlumedCalculator(Plumed): this calculator computes unbiased energies and forces, and computes PLUMED energy and force biases separately. """ + implemented_properties = ['energy', 'forces', 'energy_bias', 'forces_bias'] def compute_energy_and_forces(self, pos, istep) -> Tuple: @@ -42,17 +42,19 @@ def compute_energy_and_forces(self, pos, istep) -> Tuple: return energy, forces, energy_bias[0], forces_bias - def calculate(self, - atoms=None, - properties=['energy', 'forces', 'energy_bias', 'forces_bias'], - system_changes=all_changes - ) -> None: + def calculate( + self, + atoms=None, + properties=['energy', 'forces', 'energy_bias', 'forces_bias'], + system_changes=all_changes, + ) -> None: """Compute the properties and attach them to the results""" Calculator.calculate(self, atoms, properties, system_changes) - comp = self.compute_energy_and_forces(self.atoms.get_positions(), - self.istep) + comp = self.compute_energy_and_forces( + self.atoms.get_positions(), self.istep + ) energy, forces, energy_bias, forces_bias = comp self.istep += 1 @@ -71,9 +73,11 @@ class PlumedBias(ASEConstraint): simulations """ - def __init__(self, - cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'] = None, - filename: str = None): + def __init__( + self, + cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'] = None, + filename: str = None, + ): """ Class for storing collective variables and parameters used in biased simulations, parameters are not initialised with the object and have @@ -89,15 +93,15 @@ def __init__(self, filename: (str) Complete PLUMED input file """ - self.setup: Optional[List[str]] = None - self.cv_files: Optional[Tuple[str, str]] = None + self.setup: Optional[List[str]] = None + self.cv_files: Optional[Tuple[str, str]] = None - self.pace: Optional[int] = None - self.width: Optional[Union[Sequence[float], float]] = None - self.height: Optional[float] = None - self.biasfactor: Optional[float] = None + self.pace: Optional[int] = None + self.width: Optional[Union[Sequence[float], float]] = None + self.height: Optional[float] = None + self.biasfactor: Optional[float] = None - self.metad_cvs: Optional[List['_PlumedCV']] = None + self.metad_cvs: Optional[List['_PlumedCV']] = None for param_name in ['min', 'max', 'bin', 'wstride', 'wfile', 'rfile']: setattr(self, f'metad_grid_{param_name}', None) @@ -111,9 +115,11 @@ def __init__(self, self.cvs = cvs else: - raise TypeError('PLUMED bias instantiation requires ' - 'a list of collective variables (CVs) ' - 'or a file containing PLUMED-type input') + raise TypeError( + 'PLUMED bias instantiation requires ' + 'a list of collective variables (CVs) ' + 'or a file containing PLUMED-type input' + ) @property def from_file(self) -> bool: @@ -180,7 +186,6 @@ def metad_grid_setup(self) -> str: param = getattr(self, f'metad_grid_{param_name}') if param is not None: - if isinstance(param, list) or isinstance(param, tuple): param_str = ','.join(str(p) for p in param) @@ -201,20 +206,21 @@ def biasfactor_setup(self) -> str: else: return '' - def _set_metad_params(self, - pace: int, - width: Union[Sequence[float], float], - height: float, - biasfactor: Optional[float] = None, - cvs: Optional = None, - grid_min: Union[Sequence[float], float] = None, - grid_max: Union[Sequence[float], float] = None, - grid_bin: Union[Sequence[float], float] = None, - grid_wstride: Optional[int] = None, - grid_wfile: Optional[str] = None, - grid_rfile: Optional[str] = None, - **kwargs - ) -> None: + def _set_metad_params( + self, + pace: int, + width: Union[Sequence[float], float], + height: float, + biasfactor: Optional[float] = None, + cvs: Optional = None, + grid_min: Union[Sequence[float], float] = None, + grid_max: Union[Sequence[float], float] = None, + grid_bin: Union[Sequence[float], float] = None, + grid_wstride: Optional[int] = None, + grid_wfile: Optional[str] = None, + grid_rfile: Optional[str] = None, + **kwargs, + ) -> None: """ Define parameters used in (well-tempered) metadynamics. @@ -263,7 +269,6 @@ def _set_metad_params(self, self.pace = pace if isinstance(width, list) or isinstance(width, tuple): - if len(width) == 0: raise TypeError('The provided width sequence is empty') @@ -281,8 +286,10 @@ def _set_metad_params(self, self.width = [width] if len(self.width) != self.n_metad_cvs: - raise ValueError('The number of supplied widths (σ) does not ' - 'match the number of collective variables') + raise ValueError( + 'The number of supplied widths (σ) does not ' + 'match the number of collective variables' + ) if height < 0: raise ValueError('Gaussian height (ω) must be non-negative float') @@ -296,17 +303,19 @@ def _set_metad_params(self, else: self.biasfactor = biasfactor - self._set_metad_grid_params(grid_min=grid_min, - grid_max=grid_max, - grid_bin=grid_bin, - grid_wstride=grid_wstride, - grid_wfile=grid_wfile, - grid_rfile=grid_rfile) + self._set_metad_grid_params( + grid_min=grid_min, + grid_max=grid_max, + grid_bin=grid_bin, + grid_wstride=grid_wstride, + grid_wfile=grid_wfile, + grid_rfile=grid_rfile, + ) return None - def _set_metad_cvs(self, - cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'] = None - ) -> None: + def _set_metad_cvs( + self, cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'] = None + ) -> None: """ Attach PLUMED collective variables to PlumedBias which will be used in metadynamics. @@ -326,8 +335,10 @@ def _set_metad_cvs(self, for cv in cvs: if cv not in self.cvs: - raise ValueError('Supplied CVs must be a subset of CVs ' - 'already attached to the PlumedBias') + raise ValueError( + 'Supplied CVs must be a subset of CVs ' + 'already attached to the PlumedBias' + ) self.metad_cvs = cvs @@ -339,14 +350,15 @@ def _set_metad_cvs(self, return None - def _set_metad_grid_params(self, - grid_min: Union[Sequence[float], float] = None, - grid_max: Union[Sequence[float], float] = None, - grid_bin: Union[Sequence[float], float] = None, - grid_wstride: Optional[int] = None, - grid_wfile: Optional[str] = None, - grid_rfile: Optional[str] = None - ) -> None: + def _set_metad_grid_params( + self, + grid_min: Union[Sequence[float], float] = None, + grid_max: Union[Sequence[float], float] = None, + grid_bin: Union[Sequence[float], float] = None, + grid_wstride: Optional[int] = None, + grid_wfile: Optional[str] = None, + grid_rfile: Optional[str] = None, + ) -> None: """ Define grid parameters used in (well-tempered) metadynamics. Grid bounds (min and max) must cover the whole configuration space that the @@ -373,25 +385,28 @@ def _set_metad_grid_params(self, grid_rfile: (str) Name of the file to read the grid from """ - _sequences = {'grid_min': grid_min, - 'grid_max': grid_max, - 'grid_bin': grid_bin} + _sequences = { + 'grid_min': grid_min, + 'grid_max': grid_max, + 'grid_bin': grid_bin, + } if grid_bin is None: _sequences.pop('grid_bin') for param_name, params in _sequences.items(): - if isinstance(params, list) or isinstance(params, tuple): - if len(params) == 0: - raise ValueError('The supplied parameter sequence ' - 'is empty') + raise ValueError( + 'The supplied parameter sequence ' 'is empty' + ) elif len(params) != self.n_metad_cvs: - raise ValueError('The length of the parameter sequence ' - 'does not match the number of CVs used ' - 'in metadynamics') + raise ValueError( + 'The length of the parameter sequence ' + 'does not match the number of CVs used ' + 'in metadynamics' + ) else: setattr(self, f'metad_{param_name}', params) @@ -399,9 +414,11 @@ def _set_metad_grid_params(self, elif params is not None and self.n_metad_cvs == 1: setattr(self, f'metad_{param_name}', [params]) - _single_params = {'grid_wstride': grid_wstride, - 'grid_wfile': grid_wfile, - 'grid_rfile': grid_rfile} + _single_params = { + 'grid_wstride': grid_wstride, + 'grid_wfile': grid_wfile, + 'grid_rfile': grid_rfile, + } for param_name, param in _single_params.items(): if param is not None: @@ -435,10 +452,12 @@ def _attach_cv_files(self, cv_filenames: List[str]) -> None: for filename in cv_filenames: if not os.path.exists(filename): - raise FileNotFoundError(f'File {filename}, which is ' - f'required for defining one of the ' - f'CVs was not found in the ' - 'current directory') + raise FileNotFoundError( + f'File {filename}, which is ' + f'required for defining one of the ' + f'CVs was not found in the ' + 'current directory' + ) with open(filename, 'r') as f: data = f.read() @@ -462,16 +481,17 @@ def write_cv_files(self) -> None: return None - def initialise_for_metad_al(self, - width: Union[Sequence[float], float], - pace: int = 20, - height: Optional[float] = None, - biasfactor: Optional[float] = None, - cvs: Optional = None, - grid_min: Union[Sequence[float], float] = None, - grid_max: Union[Sequence[float], float] = None, - grid_bin: Union[Sequence[float], float] = None - ) -> None: + def initialise_for_metad_al( + self, + width: Union[Sequence[float], float], + pace: int = 20, + height: Optional[float] = None, + biasfactor: Optional[float] = None, + cvs: Optional = None, + grid_min: Union[Sequence[float], float] = None, + grid_max: Union[Sequence[float], float] = None, + grid_bin: Union[Sequence[float], float] = None, + ) -> None: """ Initialise PlumedBias for metadynamics active learning by setting the required parameters. @@ -511,14 +531,16 @@ def initialise_for_metad_al(self, if height is None: height = 0 - self._set_metad_params(pace=pace, - width=width, - height=height, - biasfactor=biasfactor, - cvs=cvs, - grid_min=grid_min, - grid_max=grid_max, - grid_bin=grid_bin) + self._set_metad_params( + pace=pace, + width=width, + height=height, + biasfactor=biasfactor, + cvs=cvs, + grid_min=grid_min, + grid_max=grid_max, + grid_bin=grid_bin, + ) return None @@ -550,9 +572,11 @@ def _strip_setup(self) -> None: """ if self.setup is None: - raise TypeError('Setup of the bias is not initialised, if you ' - 'want to strip the setup make sure to use a bias ' - 'which was initialised using a PLUMED input file') + raise TypeError( + 'Setup of the bias is not initialised, if you ' + 'want to strip the setup make sure to use a bias ' + 'which was initialised using a PLUMED input file' + ) _stripped_setup = [] for line in self.setup: @@ -564,8 +588,9 @@ def _strip_setup(self) -> None: return None @staticmethod - def _check_cvs_format(cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'] - ) -> List['_PlumedCV']: + def _check_cvs_format( + cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'], + ) -> List['_PlumedCV']: """ Check if the supplied collective variables are in the correct format @@ -573,10 +598,10 @@ def _check_cvs_format(cvs: Union[Sequence['_PlumedCV'], '_PlumedCV'] # e.g. cvs == [cv1, cv2]; (cv1, cv2) if isinstance(cvs, list) or isinstance(cvs, tuple): - if len(cvs) == 0: - raise TypeError('The provided collective variable ' - 'sequence is empty') + raise TypeError( + 'The provided collective variable ' 'sequence is empty' + ) elif all(issubclass(cv.__class__, _PlumedCV) for cv in cvs): pass @@ -615,11 +640,13 @@ class _PlumedCV: """Parent class containing methods for initialising PLUMED collective variables""" - def __init__(self, - name: str = None, - atom_groups: Sequence = None, - filename: str = None, - component: Optional[str] = None): + def __init__( + self, + name: str = None, + atom_groups: Sequence = None, + filename: str = None, + component: Optional[str] = None, + ): """ This class contains methods to initialise PLUMED collective variables (CVs) and only acts as a parent class which should not be used to @@ -654,16 +681,16 @@ def __init__(self, e.g. 'spath' for PATH collective variable. """ - self.setup: List = [] - self.files: Optional[Tuple[str, str]] = None + self.setup: List = [] + self.files: Optional[Tuple[str, str]] = None - self.name: Optional[str] = None - self.units: Optional[str] = None - self.dof_names: Optional[List[str]] = None - self.dof_units: Optional[List[str]] = None + self.name: Optional[str] = None + self.units: Optional[str] = None + self.dof_names: Optional[List[str]] = None + self.dof_units: Optional[List[str]] = None - self.lower_wall: Optional[Dict] = None - self.upper_wall: Optional[Dict] = None + self.lower_wall: Optional[Dict] = None + self.upper_wall: Optional[Dict] = None if filename is not None: self._from_file(filename, component) @@ -672,9 +699,11 @@ def __init__(self, self._from_atom_groups(name, atom_groups) else: - raise TypeError('Collective variable instantiation requires ' - 'groups of atom indices (DOFs) ' - 'or a file containing PLUMED-type input') + raise TypeError( + 'Collective variable instantiation requires ' + 'groups of atom indices (DOFs) ' + 'or a file containing PLUMED-type input' + ) @property def dof_sequence(self) -> str: @@ -682,11 +711,9 @@ def dof_sequence(self) -> str: return ','.join(self.dof_names) - def attach_lower_wall(self, - location: Union[float, str], - kappa: float, - exp: float = 2 - ) -> None: + def attach_lower_wall( + self, location: Union[float, str], kappa: float, exp: float = 2 + ) -> None: """ Attach lower wall bias to the collective variable. @@ -702,23 +729,26 @@ def attach_lower_wall(self, """ if self.lower_wall is not None: - raise TypeError(f'Lower wall for {self.name} CV has already ' - 'been set') + raise TypeError( + f'Lower wall for {self.name} CV has already ' 'been set' + ) self.lower_wall = {'location': location, 'kappa': kappa, 'exp': exp} - self.setup.extend(['LOWER_WALLS ' - f'ARG={self.name} ' - f'AT={location} ' - f'KAPPA={kappa} ' - f'EXP={exp}']) + self.setup.extend( + [ + 'LOWER_WALLS ' + f'ARG={self.name} ' + f'AT={location} ' + f'KAPPA={kappa} ' + f'EXP={exp}' + ] + ) return None - def attach_upper_wall(self, - location: Union[float, str], - kappa: float, - exp: float = 2 - ) -> None: + def attach_upper_wall( + self, location: Union[float, str], kappa: float, exp: float = 2 + ) -> None: """ Attach upper wall bias to the collective variable. @@ -734,15 +764,20 @@ def attach_upper_wall(self, """ if self.upper_wall is not None: - raise TypeError(f'Upper wall for {self.name} CV has already ' - 'been set') + raise TypeError( + f'Upper wall for {self.name} CV has already ' 'been set' + ) self.upper_wall = {'location': location, 'kappa': kappa, 'exp': exp} - self.setup.extend(['UPPER_WALLS ' - f'ARG={self.name} ' - f'AT={location} ' - f'KAPPA={kappa} ' - f'EXP={exp}']) + self.setup.extend( + [ + 'UPPER_WALLS ' + f'ARG={self.name} ' + f'AT={location} ' + f'KAPPA={kappa} ' + f'EXP={exp}' + ] + ) return None @@ -759,8 +794,10 @@ def _from_file(self, filename: str, component: str) -> None: _last_line = self.setup[-1] if _last_line.find(':') == -1: - raise ValueError('Supply a name to the collective variable on ' - f'the last line of {filename} file.') + raise ValueError( + 'Supply a name to the collective variable on ' + f'the last line of {filename} file.' + ) _name = _last_line.split(':')[0] @@ -785,10 +822,12 @@ def _attach_files(self, filenames: List[str]) -> None: for filename in filenames: if not os.path.exists(filename): - raise FileNotFoundError(f'File {filename}, which is ' - f'required for defining the CV ' - f'{self.name} was not found in the ' - 'current directory') + raise FileNotFoundError( + f'File {filename}, which is ' + f'required for defining the CV ' + f'{self.name} was not found in the ' + 'current directory' + ) with open(filename, 'r') as f: data = f.read() @@ -815,27 +854,28 @@ def _from_atom_groups(self, name: str, atom_groups: Sequence) -> None: self.dof_names, self.dof_units = [], [] if isinstance(atom_groups, list) or isinstance(atom_groups, tuple): - if len(atom_groups) == 0: - raise TypeError('Atom groups cannot be an empty list or an ' - 'empty tuple') + raise TypeError( + 'Atom groups cannot be an empty list or an ' 'empty tuple' + ) # e.g. atom_groups == [(1, 2), (3, 4)]; ([0, 1]) - elif all(isinstance(atom_group, list) - or isinstance(atom_group, tuple) - for atom_group in atom_groups): - + elif all( + isinstance(atom_group, list) or isinstance(atom_group, tuple) + for atom_group in atom_groups + ): for idx, atom_group in enumerate(atom_groups): self._atom_group_to_dof(idx=idx, atom_group=atom_group) # e.g. atom_groups = [0, 1] elif all(isinstance(idx, int) for idx in atom_groups): - self._atom_group_to_dof(idx=0, atom_group=atom_groups) else: - raise TypeError('Elements of atom_groups must all be ' - 'sequences or all be integers') + raise TypeError( + 'Elements of atom_groups must all be ' + 'sequences or all be integers' + ) else: raise TypeError('Atom groups are in incorrect format') @@ -850,8 +890,10 @@ def _check_name(self) -> None: _illegal_substrings = ['fes', 'colvar', 'HILLS'] if any(substr in self.name for substr in _illegal_substrings): - raise ValueError('Please do not use "fes", "colvar", "HILLS" in ' - 'your CV names') + raise ValueError( + 'Please do not use "fes", "colvar", "HILLS" in ' + 'your CV names' + ) return None @@ -869,27 +911,26 @@ def _atom_group_to_dof(self, idx: int, atom_group: Sequence) -> None: dof_name = f'{self.name}_dist{idx + 1}' self.dof_names.append(dof_name) self.dof_units.append('Å') - self.setup.extend([f'{dof_name}: ' - f'DISTANCE ATOMS={atoms}']) + self.setup.extend([f'{dof_name}: ' f'DISTANCE ATOMS={atoms}']) if len(atom_list) == 3: dof_name = f'{self.name}_ang{idx + 1}' self.dof_names.append(dof_name) self.dof_units.append('rad') - self.setup.extend([f'{dof_name}: ' - f'ANGLE ATOMS={atoms}']) + self.setup.extend([f'{dof_name}: ' f'ANGLE ATOMS={atoms}']) if len(atom_list) == 4: dof_name = f'{self.name}_tor{idx + 1}' self.dof_names.append(dof_name) self.dof_units.append('rad') - self.setup.extend([f'{dof_name}: ' - f'TORSION ATOMS={atoms}']) + self.setup.extend([f'{dof_name}: ' f'TORSION ATOMS={atoms}']) if len(atom_list) > 4: - raise NotImplementedError('Instatiation using atom groups ' - 'is only implemented for groups ' - 'not larger than four') + raise NotImplementedError( + 'Instatiation using atom groups ' + 'is only implemented for groups ' + 'not larger than four' + ) return None @@ -897,13 +938,14 @@ def _set_units(self, units: Optional[str] = None) -> None: """Set units of the collective variable as a string""" if self.dof_units is not None: - if len(set(self.dof_units)) == 1: self.units = set(self.dof_units).pop() else: - logger.warning('DOFs in a defined CV have different units, ' - 'setting units of this CV to None') + logger.warning( + 'DOFs in a defined CV have different units, ' + 'setting units of this CV to None' + ) else: self.units = units @@ -915,9 +957,7 @@ class PlumedAverageCV(_PlumedCV): """Class used to initialise a PLUMED collective variable as an average between multiple degrees of freedom""" - def __init__(self, - name: str, - atom_groups: Sequence = None): + def __init__(self, name: str, atom_groups: Sequence = None): """ PLUMED collective variable as an average between multiple degrees of freedom (distances, angles, torsions), @@ -933,28 +973,29 @@ def __init__(self, which are used to generate DOFs """ - super().__init__(name=name, - atom_groups=atom_groups) + super().__init__(name=name, atom_groups=atom_groups) self._set_units() dof_sum = '+'.join(self.dof_names) func = f'{1 / len(self.dof_names)}*({dof_sum})' - self.setup.extend([f'{self.name}: ' - f'CUSTOM ARG={self.dof_sequence} ' - f'VAR={self.dof_sequence} ' - f'FUNC={func} ' - f'PERIODIC=NO']) + self.setup.extend( + [ + f'{self.name}: ' + f'CUSTOM ARG={self.dof_sequence} ' + f'VAR={self.dof_sequence} ' + f'FUNC={func} ' + f'PERIODIC=NO' + ] + ) class PlumedDifferenceCV(_PlumedCV): """Class used to initialise a PLUMED collective variable as a difference between two degrees of freedom""" - def __init__(self, - name: str, - atom_groups: Sequence = None): + def __init__(self, name: str, atom_groups: Sequence = None): """ PLUMED collective variable as a difference between two degrees of freedom (distances, angles, torsions), @@ -970,31 +1011,37 @@ def __init__(self, which are used to generate DOFs """ - super().__init__(name=name, - atom_groups=atom_groups) + super().__init__(name=name, atom_groups=atom_groups) self._set_units() if len(self.dof_names) != 2: - raise ValueError('DifferenceCV must comprise exactly two ' - 'groups of atoms') + raise ValueError( + 'DifferenceCV must comprise exactly two ' 'groups of atoms' + ) func = f'{self.dof_names[0]}-{self.dof_names[-1]}' - self.setup.extend([f'{self.name}: ' - f'CUSTOM ARG={self.dof_sequence} ' - f'VAR={self.dof_sequence} ' - f'FUNC={func} ' - f'PERIODIC=NO']) + self.setup.extend( + [ + f'{self.name}: ' + f'CUSTOM ARG={self.dof_sequence} ' + f'VAR={self.dof_sequence} ' + f'FUNC={func} ' + f'PERIODIC=NO' + ] + ) class PlumedCustomCV(_PlumedCV): """Class used to initialise a PLUMED collective variable from a file""" - def __init__(self, - filename: str, - component: Optional[str] = None, - units: Optional[str] = None): + def __init__( + self, + filename: str, + component: Optional[str] = None, + units: Optional[str] = None, + ): """ PLUMED collective variable from a file. The file must be written in the style of a PLUMED input file, but only contain input used in the @@ -1016,8 +1063,7 @@ def __init__(self, units: (str) Units of the collective variable, used in plots """ - super().__init__(filename=filename, - component=component) + super().__init__(filename=filename, component=component) self.units = units @@ -1052,7 +1098,6 @@ def _find_files(setup: List[str]) -> List: filenames = [] for line in setup: - if _defines_cv(line): line = line.split() @@ -1084,13 +1129,14 @@ def _find_args(line: str) -> List: return _args -def plot_cv_versus_time(filename: str, - style: str = 'trajectory', - time_units: str = 'ps', - cv_units: Optional[str] = None, - cv_limits: Optional[Sequence[float]] = None, - label: Optional[str] = None, - ) -> None: +def plot_cv_versus_time( + filename: str, + style: str = 'trajectory', + time_units: str = 'ps', + cv_units: Optional[str] = None, + cv_limits: Optional[Sequence[float]] = None, + label: Optional[str] = None, +) -> None: """ Plot a collective variable as a function of time from a given colvar file. Only plot the first collective variable in the colvar file. @@ -1112,6 +1158,7 @@ def plot_cv_versus_time(filename: str, multiple plots of the same CVs are generated in the same directory """ + import matplotlib.pyplot as plt with open(filename, 'r') as f: header = f.readlines()[0] @@ -1154,12 +1201,13 @@ def plot_cv_versus_time(filename: str, return None -def plot_cv1_and_cv2(filenames: Sequence[str], - style: str = 'scatter', - cvs_units: Optional[Sequence[str]] = None, - cvs_limits: Optional[Sequence[Sequence[float]]] = None, - label: Optional[str] = None - ) -> None: +def plot_cv1_and_cv2( + filenames: Sequence[str], + style: str = 'scatter', + cvs_units: Optional[Sequence[str]] = None, + cvs_limits: Optional[Sequence[Sequence[float]]] = None, + label: Optional[str] = None, +) -> None: """ Plot the trajectory of the system by tracking two collective variables using two colvar files. The function only works for two collective @@ -1182,10 +1230,11 @@ def plot_cv1_and_cv2(filenames: Sequence[str], directory """ + import matplotlib.pyplot as plt + cvs_names, cvs_arrays = [], [] for filename in filenames: - with open(filename, 'r') as f: header = f.readlines()[0] @@ -1230,10 +1279,9 @@ def plot_cv1_and_cv2(filenames: Sequence[str], return None -def plumed_setup(bias: 'mlptrain.PlumedBias', - temp: float, - interval: int, - **kwargs) -> List[str]: +def plumed_setup( + bias: 'mlptrain.PlumedBias', temp: float, interval: int, **kwargs +) -> List[str]: """ Generate a list which represents the PLUMED input file @@ -1252,10 +1300,12 @@ def plumed_setup(bias: 'mlptrain.PlumedBias', # Converting PLUMED units to ASE units time_conversion = 1 / (ase_units.fs * 1000) energy_conversion = ase_units.mol / ase_units.kJ - units_setup = ['UNITS ' - 'LENGTH=A ' - f'TIME={time_conversion} ' - f'ENERGY={energy_conversion}'] + units_setup = [ + 'UNITS ' + 'LENGTH=A ' + f'TIME={time_conversion} ' + f'ENERGY={energy_conversion}' + ] if bias.from_file: setup = bias.setup @@ -1267,8 +1317,10 @@ def plumed_setup(bias: 'mlptrain.PlumedBias', return setup else: - logger.warning('Unit conversion not found in PLUMED input file, ' - 'adding conversion from PLUMED units to ASE units') + logger.warning( + 'Unit conversion not found in PLUMED input file, ' + 'adding conversion from PLUMED units to ASE units' + ) setup.insert(0, units_setup[0]) return setup @@ -1281,7 +1333,6 @@ def plumed_setup(bias: 'mlptrain.PlumedBias', # Metadynamics if bias.metadynamics: - hills_filename = get_hills_filename(**kwargs) if 'load_metad_bias' in kwargs and kwargs['load_metad_bias'] is True: @@ -1290,21 +1341,22 @@ def plumed_setup(bias: 'mlptrain.PlumedBias', else: load_metad_bias_setup = '' - metad_setup = ['metad: METAD ' - f'ARG={bias.metad_cv_sequence} ' - f'PACE={bias.pace} ' - f'HEIGHT={bias.height} ' - f'SIGMA={bias.width_sequence} ' - f'TEMP={temp} ' - f'{bias.biasfactor_setup}' - f'{bias.metad_grid_setup}' - f'{load_metad_bias_setup}' - f'FILE={hills_filename}'] + metad_setup = [ + 'metad: METAD ' + f'ARG={bias.metad_cv_sequence} ' + f'PACE={bias.pace} ' + f'HEIGHT={bias.height} ' + f'SIGMA={bias.width_sequence} ' + f'TEMP={temp} ' + f'{bias.biasfactor_setup}' + f'{bias.metad_grid_setup}' + f'{load_metad_bias_setup}' + f'FILE={hills_filename}' + ] setup.extend(metad_setup) # Printing trajectory in terms of DOFs and CVs for cv in bias.cvs: - colvar_filename = get_colvar_filename(cv, **kwargs) if cv.dof_names is not None: @@ -1313,10 +1365,12 @@ def plumed_setup(bias: 'mlptrain.PlumedBias', else: args = cv.name - print_setup = ['PRINT ' - f'ARG={args} ' - f'FILE={colvar_filename} ' - f'STRIDE={interval}'] + print_setup = [ + 'PRINT ' + f'ARG={args} ' + f'FILE={colvar_filename} ' + f'STRIDE={interval}' + ] setup.extend(print_setup) if 'remove_print' in kwargs and kwargs['remove_print'] is True: @@ -1335,8 +1389,7 @@ def plumed_setup(bias: 'mlptrain.PlumedBias', return setup -def get_colvar_filename(cv: '_PlumedCV', - **kwargs) -> str: +def get_colvar_filename(cv: '_PlumedCV', **kwargs) -> str: """ Return the name of the file where the trajectory in terms of collective variable values would be written @@ -1372,29 +1425,132 @@ def get_hills_filename(**kwargs) -> str: return filename -CVS = ['GROUP', 'CENTER', 'CENTER_OF_MULTICOLVAR', 'COM', 'FIXEDATOM', 'GHOST', - 'ADAPTIVE_PATH', 'ALPHABETA', 'ALPHARMSD', 'ANGLE', 'ANTIBETARMSD', - 'CELL', 'CONSTANT', 'CONTACTMAP', 'COORDINATION', 'DHENERGY', 'DIHCOR', - 'DIMER', 'DIPOLE', 'DISTANCE', 'DISTANCE_FROM_CONTOUR', 'EEFSOLV', - 'ENERGY', 'ERMSD', 'EXTRACV', 'FAKE', 'GHBFIX', 'GPROPERTYMAP', - 'GYRATION', 'PARABETARMSD', 'PATH', 'PATHMSD', 'PCAVARS', 'POSITION', - 'PROJECTION_ON_AXIS', 'PROPERTYMAP', 'PUCKERING', 'TEMPLATE', 'TORSION', - 'VOLUME', 'DRMSD', 'MULTI_RMSD', 'PCARMSD', 'RMSD', 'TARGET', 'COMBINE', - 'CUSTOM', 'EMSEMBLE', 'FUNCPATHGENERAL', 'FUNCPATHMSD', 'LOCALENSEMBLE', - 'MATHEVAL', 'PIECEWISE', 'SORT', 'STATS', 'ANGLES', 'BOND_DIRECTIONS', - 'BRIDGE', 'COORDINATIONNUMBER', 'DENSITY', 'DISTANCES', 'FCCUBIC', - 'ENVIRONMENTSIMILARITY', 'FCCUBIC', 'HBPAMM_SH', 'INPLANEDISTANCES', - 'MOLECULES', 'PLANES', 'Q3', 'Q4', 'Q6', 'SIMPLECUBIC', 'TETRAHEDRAL', - 'TORSIONS', 'XDISTANCES', 'XYDISTANCES', 'XYTORSIONS', 'XZDISTANCES', - 'XZTORSIONS', 'YANGLES', 'YDISTANCES', 'YXTORSIONS', 'YZDISTANCES', - 'YZTORSIONS', 'ZANGLES', 'ZDISTANCES', 'ZXTORSIONS', 'ZYTORSIONS', - 'MFILTER_BETWEEN', 'MFILTER_LESS', 'MFILTER_MORE', 'AROUND', 'CAVITY', - 'INCYLINDER', 'INENVELOPE', 'INSPHERE', 'TETRAHEDRALPORE', 'GRADIENT', - 'INTERMOLECULARTORSIONS', 'LOCAL_AVERAGE', 'LOCAL_Q3', 'LOCAL_Q4', - 'LOCAL_Q6', 'MCOLV_COMBINE', 'MCOLV_PRODUCT', 'NLINKS', 'PAMM', 'SMAC', - 'POLYMER_ANGLES', 'MTRANSFORM_BETWEEN', 'MTRANSFORM_LESS', - 'MTRANSFORM_MORE', 'ALIGNED_MATRIX', 'CONTACT_MATRIX', 'HBOND_MATRIX', - 'HBPAMM_MATRIX', 'SMAC_MATRIX', 'TOPOLOGY_MATRIX', 'COLUMNSUMS', - 'CLUSTER_WITHSURFACE', 'DFSCLUSTERING', 'ROWSUMS', 'SPRINT', - 'CLUSTER_DIAMETER', 'CLUSTER_DISTRIBUTION', 'CLUSTER_NATOMS', - 'CLUSTER_PROPERTIES'] +CVS = [ + 'GROUP', + 'CENTER', + 'CENTER_OF_MULTICOLVAR', + 'COM', + 'FIXEDATOM', + 'GHOST', + 'ADAPTIVE_PATH', + 'ALPHABETA', + 'ALPHARMSD', + 'ANGLE', + 'ANTIBETARMSD', + 'CELL', + 'CONSTANT', + 'CONTACTMAP', + 'COORDINATION', + 'DHENERGY', + 'DIHCOR', + 'DIMER', + 'DIPOLE', + 'DISTANCE', + 'DISTANCE_FROM_CONTOUR', + 'EEFSOLV', + 'ENERGY', + 'ERMSD', + 'EXTRACV', + 'FAKE', + 'GHBFIX', + 'GPROPERTYMAP', + 'GYRATION', + 'PARABETARMSD', + 'PATH', + 'PATHMSD', + 'PCAVARS', + 'POSITION', + 'PROJECTION_ON_AXIS', + 'PROPERTYMAP', + 'PUCKERING', + 'TEMPLATE', + 'TORSION', + 'VOLUME', + 'DRMSD', + 'MULTI_RMSD', + 'PCARMSD', + 'RMSD', + 'TARGET', + 'COMBINE', + 'CUSTOM', + 'EMSEMBLE', + 'FUNCPATHGENERAL', + 'FUNCPATHMSD', + 'LOCALENSEMBLE', + 'MATHEVAL', + 'PIECEWISE', + 'SORT', + 'STATS', + 'ANGLES', + 'BOND_DIRECTIONS', + 'BRIDGE', + 'COORDINATIONNUMBER', + 'DENSITY', + 'DISTANCES', + 'FCCUBIC', + 'ENVIRONMENTSIMILARITY', + 'FCCUBIC', + 'HBPAMM_SH', + 'INPLANEDISTANCES', + 'MOLECULES', + 'PLANES', + 'Q3', + 'Q4', + 'Q6', + 'SIMPLECUBIC', + 'TETRAHEDRAL', + 'TORSIONS', + 'XDISTANCES', + 'XYDISTANCES', + 'XYTORSIONS', + 'XZDISTANCES', + 'XZTORSIONS', + 'YANGLES', + 'YDISTANCES', + 'YXTORSIONS', + 'YZDISTANCES', + 'YZTORSIONS', + 'ZANGLES', + 'ZDISTANCES', + 'ZXTORSIONS', + 'ZYTORSIONS', + 'MFILTER_BETWEEN', + 'MFILTER_LESS', + 'MFILTER_MORE', + 'AROUND', + 'CAVITY', + 'INCYLINDER', + 'INENVELOPE', + 'INSPHERE', + 'TETRAHEDRALPORE', + 'GRADIENT', + 'INTERMOLECULARTORSIONS', + 'LOCAL_AVERAGE', + 'LOCAL_Q3', + 'LOCAL_Q4', + 'LOCAL_Q6', + 'MCOLV_COMBINE', + 'MCOLV_PRODUCT', + 'NLINKS', + 'PAMM', + 'SMAC', + 'POLYMER_ANGLES', + 'MTRANSFORM_BETWEEN', + 'MTRANSFORM_LESS', + 'MTRANSFORM_MORE', + 'ALIGNED_MATRIX', + 'CONTACT_MATRIX', + 'HBOND_MATRIX', + 'HBPAMM_MATRIX', + 'SMAC_MATRIX', + 'TOPOLOGY_MATRIX', + 'COLUMNSUMS', + 'CLUSTER_WITHSURFACE', + 'DFSCLUSTERING', + 'ROWSUMS', + 'SPRINT', + 'CLUSTER_DIAMETER', + 'CLUSTER_DISTRIBUTION', + 'CLUSTER_NATOMS', + 'CLUSTER_PROPERTIES', +] diff --git a/mlptrain/sampling/reaction_coord.py b/mlptrain/sampling/reaction_coord.py index aa23ebf5..1d643999 100644 --- a/mlptrain/sampling/reaction_coord.py +++ b/mlptrain/sampling/reaction_coord.py @@ -8,12 +8,14 @@ class ReactionCoordinate(Function, ABC): - - def __call__(self, - arg: Union[ase.atoms.Atoms, - 'mlptrain.Configuration', - 'mlptrain.ConfigurationSet'] - ) -> Union[float, np.ndarray]: + def __call__( + self, + arg: Union[ + ase.atoms.Atoms, + 'mlptrain.Configuration', + 'mlptrain.ConfigurationSet', + ], + ) -> Union[float, np.ndarray]: """Value of this reaction coordinate""" if isinstance(arg, ase.atoms.Atoms): @@ -26,8 +28,10 @@ def __call__(self, return np.array([self._call(c.ase_atoms) for c in arg]) else: - raise ValueError('Reaction coordinate must be called using ase ' - 'atoms, a configuration or configuration set') + raise ValueError( + 'Reaction coordinate must be called using ase ' + 'atoms, a configuration or configuration set' + ) @abstractmethod def _call(self, atoms: ase.atoms.Atoms): @@ -37,8 +41,9 @@ def grad(self, atoms: ase.atoms.Atoms): """Gradient of this reaction coordinate for a set of ase atoms""" if not isinstance(atoms, ase.atoms.Atoms): - raise NotImplementedError('Grad must be called with a set of ' - 'ASE atoms') + raise NotImplementedError( + 'Grad must be called with a set of ' 'ASE atoms' + ) return self._grad(atoms) @@ -48,7 +53,6 @@ def _grad(self, atoms: ase.atoms.Atoms): class DummyCoordinate(ReactionCoordinate): - def _call(self, atoms: ase.atoms.Atoms): raise ValueError('Cannot call energy on a dummy coordinate') @@ -69,8 +73,10 @@ def __init__(self, *args): for arg in args: if len(arg) != 2: - raise ValueError('Distances must be initialised from a ' - '2-tuple of atom indices') + raise ValueError( + 'Distances must be initialised from a ' + '2-tuple of atom indices' + ) self.atom_pair_list.append(tuple(arg)) @@ -98,13 +104,18 @@ def __init__(self, *args): atom_idxs = [idx for pair in self.atom_pair_list for idx in pair] if len(set(atom_idxs)) != len(atom_idxs): - raise ValueError('All atoms in reaction coordinate must be ' - 'different') + raise ValueError( + 'All atoms in reaction coordinate must be ' 'different' + ) def _call(self, atoms: ase.atoms.Atoms): """Average distance between atom pairs""" - return np.mean([atoms.get_distance(i, j, mic=True) - for (i, j) in self.atom_pair_list]) + return np.mean( + [ + atoms.get_distance(i, j, mic=True) + for (i, j) in self.atom_pair_list + ] + ) def _grad(self, atoms: ase.atoms.Atoms): """Gradient of the average distance between atom pairs. Each component @@ -120,13 +131,15 @@ def _grad(self, atoms: ase.atoms.Atoms): derivative = np.zeros(shape=(len(atoms), 3)) - distances = [atoms.get_distance(i, j, mic=True) - for (i, j) in self.atom_pair_list] + distances = [ + atoms.get_distance(i, j, mic=True) + for (i, j) in self.atom_pair_list + ] for m, (i, j) in enumerate(self.atom_pair_list): - x_dist, y_dist, z_dist = [atoms[i].position[k] - - atoms[j].position[k] for k in - range(3)] + x_dist, y_dist, z_dist = [ + atoms[i].position[k] - atoms[j].position[k] for k in range(3) + ] x_i = x_dist / (self.n_pairs * distances[m]) y_i = y_dist / (self.n_pairs * distances[m]) @@ -163,13 +176,17 @@ def __init__(self, *args): _Distances.__init__(self, *args) if len(args) != 2: - raise ValueError('DifferenceDistance must comprise exactly two ' - 'pairs of atoms') + raise ValueError( + 'DifferenceDistance must comprise exactly two ' + 'pairs of atoms' + ) def _call(self, atoms: ase.atoms.Atoms): """Difference in distance between two atom pairs""" - dists = [atoms.get_distance(i, j, mic=True) - for (i, j) in self.atom_pair_list] + dists = [ + atoms.get_distance(i, j, mic=True) + for (i, j) in self.atom_pair_list + ] return dists[0] - dists[1] @@ -190,7 +207,6 @@ def _grad(self, atoms: ase.atoms.Atoms): derivative = np.zeros(shape=(len(atoms), 3)) for m, (i, j) in enumerate(self.atom_pair_list): - r = atoms[i].position - atoms[j].position r /= np.linalg.norm(r) diff --git a/mlptrain/sampling/tests/data.zip b/mlptrain/sampling/tests/data.zip deleted file mode 100644 index 6a2fff90..00000000 Binary files a/mlptrain/sampling/tests/data.zip and /dev/null differ diff --git a/mlptrain/sampling/tests/molecules.py b/mlptrain/sampling/tests/molecules.py deleted file mode 100644 index 69ef1c89..00000000 --- a/mlptrain/sampling/tests/molecules.py +++ /dev/null @@ -1,16 +0,0 @@ -import mlptrain as mlt -from autode.atoms import Atom - - -def _h2(): - """Dihydrogen molecule""" - atoms = [Atom('H', -0.80952, 2.49855, 0.), Atom('H', -0.34877, 1.961, 0.)] - return mlt.Molecule(atoms=atoms, charge=0, mult=1) - - -def _h2o(): - """Water molecule""" - atoms = [Atom('H', 2.32670, 0.51322, 0.), - Atom('H', 1.03337, 0.70894, -0.89333), - Atom('O', 1.35670, 0.51322, 0.)] - return mlt.Molecule(atoms=atoms, charge=0, mult=1) diff --git a/mlptrain/sampling/tests/test_md.py b/mlptrain/sampling/tests/test_md.py deleted file mode 100644 index bfd0fd5d..00000000 --- a/mlptrain/sampling/tests/test_md.py +++ /dev/null @@ -1,120 +0,0 @@ -import os -import numpy as np -import mlptrain as mlt -from ase.io.trajectory import Trajectory as ASETrajectory -from ase.constraints import Hookean -from .test_potential import TestPotential -from .molecules import _h2, _h2o -from .utils import work_in_zipped_dir -here = os.path.abspath(os.path.dirname(__file__)) - - -def _h2_configuration(): - system = mlt.System(_h2(), box=[50, 50, 50]) - config = system.random_configuration() - - return config - - -def _h2o_configuration(): - system = mlt.System(_h2o(), box=[50, 50, 50]) - config = system.random_configuration() - - return config - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_md_full_plumed_input(): - - bias = mlt.PlumedBias(filename='plumed_bias_nopath.dat') - - mlt.md.run_mlp_md(configuration=_h2o_configuration(), - mlp=TestPotential('1D'), - temp=300, - dt=1, - interval=10, - bias=bias, - kept_substrings=['.dat'], - ps=1) - - assert os.path.exists('colvar.dat') - assert os.path.exists('HILLS.dat') - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_md_restart(): - - atoms = _h2_configuration().ase_atoms - initial_trajectory = ASETrajectory('md_restart.traj', 'r', atoms) - - mlt.md.run_mlp_md(configuration=_h2_configuration(), - mlp=TestPotential('1D'), - temp=300, - dt=1, - interval=10, - restart_files=['md_restart.traj'], - ps=1) - - assert os.path.exists('md_restart.traj') - - final_trajectory = ASETrajectory('md_restart.traj', 'r', atoms) - - # 10 ps simulation with dt = 1 fs and interval of 10 -> 1001 frames - assert len(initial_trajectory) == 1001 - - # Adding 1 ps simulation with interval 10 -> 101 frames, but removing one - # duplicate frame - assert len(final_trajectory) == 1001 + 101 - 1 - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_md_save(): - - mlt.md.run_mlp_md(configuration=_h2_configuration(), - mlp=TestPotential('1D'), - temp=300, - dt=1, - interval=10, - kept_substrings=['.traj'], - ps=1, - save_fs=200) - - assert os.path.exists('trajectory.traj') - - assert not os.path.exists('trajectory_0fs.traj') - assert os.path.exists('trajectory_200fs.traj') - assert os.path.exists('trajectory_1000fs.traj') - assert not os.path.exists('trajectory_1200fs.traj') - - traj_200fs = ASETrajectory('trajectory_200fs.traj') - - # 200 ps / 10 interval == 20 frames; + 1 starting frame - assert len(traj_200fs) == 20 + 1 - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_md_traj_attachments(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - bias = mlt.PlumedBias(cvs=cv1) - - hookean_constraint = Hookean(a1=1, a2=2, k=100, rt=0.5) - - traj = mlt.md.run_mlp_md(configuration=_h2o_configuration(), - mlp=TestPotential('1D'), - temp=300, - dt=1, - interval=10, - bias=bias, - kept_substrings=['colvar_cv1.dat'], - constraints=[hookean_constraint], - ps=1) - - plumed_coordinates = np.loadtxt('colvar_cv1.dat', usecols=1) - - for i, config in enumerate(traj): - assert np.shape(config.plumed_coordinates) == (1,) - assert config.plumed_coordinates[0] == plumed_coordinates[i] - - assert all(bias_energy is not None for bias_energy in traj.bias_energies) - assert any(bias_energy != 0 for bias_energy in traj.bias_energies) diff --git a/mlptrain/sampling/tests/test_metadynamics.py b/mlptrain/sampling/tests/test_metadynamics.py deleted file mode 100644 index f0fbec3b..00000000 --- a/mlptrain/sampling/tests/test_metadynamics.py +++ /dev/null @@ -1,319 +0,0 @@ -import os -import numpy as np -import mlptrain as mlt -from ase.io.trajectory import Trajectory as ASETrajectory -from .test_potential import TestPotential -from .molecules import _h2, _h2o -from .utils import work_in_zipped_dir -mlt.Config.n_cores = 2 -here = os.path.abspath(os.path.dirname(__file__)) - - -def _h2_configuration(): - system = mlt.System(_h2(), box=[50, 50, 50]) - config = system.random_configuration() - - return config - - -def _h2o_configuration(): - system = mlt.System(_h2o(), box=[50, 50, 50]) - config = system.random_configuration() - - return config - - -def _run_metadynamics(metadynamics, - n_runs, - configuration=None, - al_iter=None, - save_sep=False, - all_to_xyz=False, - restart=False, - **kwargs): - - if configuration is None: - configuration = _h2_configuration() - - metadynamics.run_metadynamics(configuration=configuration, - mlp=TestPotential('1D'), - temp=300, - dt=1, - interval=10, - pace=100, - width=0.05, - height=0.1, - biasfactor=3, - al_iter=al_iter, - n_runs=n_runs, - save_sep=save_sep, - all_to_xyz=all_to_xyz, - restart=restart, - **kwargs) - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_run_metadynamics(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - metad = mlt.Metadynamics(cv1) - n_runs = 4 - - assert metad.bias is not None - - _run_metadynamics(metad, n_runs, all_to_xyz=True, save_fs=200, fs=500) - - assert os.path.exists('trajectories') - assert os.path.exists('trajectories/combined_trajectory.xyz') - - metad_dir = 'plumed_files/metadynamics' - for idx in range(1, n_runs + 1): - assert os.path.exists(f'trajectories/trajectory_{idx}.traj') - - for sim_time in [200, 400]: - assert os.path.exists(f'trajectories/' - f'trajectory_{idx}_{sim_time}fs.traj') - assert os.path.exists(f'trajectories/' - f'metad_{idx}_{sim_time}fs.xyz') - - assert os.path.exists(os.path.join(metad_dir, - f'colvar_cv1_{idx}.dat')) - assert os.path.exists(os.path.join(metad_dir, - f'HILLS_{idx}.dat')) - - assert os.path.exists(f'gaussian_heights/gaussian_heights_{idx}.pdf') - - metad.compute_fes(n_bins=100) - - for idx in range(1, n_runs + 1): - assert os.path.exists(f'plumed_files/metadynamics/fes_{idx}.dat') - - assert os.path.exists('fes_raw.npy') - fes_raw = np.load('fes_raw.npy') - - # 1 cv, 4 fes -> 5; 100 bins - assert np.shape(fes_raw) == (5, 100) - - metad.plot_fes('fes_raw.npy') - assert os.path.exists('metad_free_energy.pdf') - - metad.plot_fes_convergence(stride=2, n_surfaces=2) - - # 500 / 100: simulation time divided by the pace <=> number of gaussians - # Surfaces are computed every 2 gaussians - n_computed_surfaces = (500 / 100) // 2 - for idx in range(int(n_computed_surfaces)): - assert os.path.exists(f'plumed_files/fes_convergence/fes_1_{idx}.dat') - - assert os.path.exists('fes_convergence/fes_convergence_diff.pdf') - assert os.path.exists('fes_convergence/fes_convergence.pdf') - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_run_metadynamics_restart(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - metad = mlt.Metadynamics(cv1) - n_runs = 4 - - _run_metadynamics(metad, n_runs, fs=500) - - _run_metadynamics(metad, n_runs, restart=True, fs=500) - - n_steps = len(np.loadtxt('plumed_files/metadynamics/colvar_cv1_1.dat', - usecols=0)) - n_gaussians = len(np.loadtxt('plumed_files/metadynamics/HILLS_1.dat', - usecols=0)) - - # Adding two 500 fs simulations with interval 10 -> 51 frames each, but - # removing one duplicate frame - assert n_steps == 51 + 51 - 1 - assert n_gaussians == 5 + 5 - - assert os.path.exists('trajectories/trajectory_1.traj') - - trajectory = ASETrajectory('trajectories/trajectory_1.traj') - - # Adding two 1 ps simulations with interval 10 -> 101 frames each, but - # removing one duplicate frame (same as before, except testing this for - # the generated .traj file instead of .dat file) - assert len(trajectory) == 51 + 51 - 1 - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_run_metadynamics_with_inherited_bias(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - metad = mlt.Metadynamics(cv1) - n_runs = 4 - - _run_metadynamics(metad, n_runs, al_iter=3, fs=500) - - _run_metadynamics(metad, n_runs, al_iter=3, restart=True, fs=500) - - metad_dir = 'plumed_files/metadynamics' - for idx in range(1, n_runs + 1): - assert os.path.exists(f'trajectories/trajectory_{idx}.traj') - - assert os.path.exists(os.path.join(metad_dir, - f'colvar_cv1_{idx}.dat')) - assert os.path.exists(os.path.join(metad_dir, - f'HILLS_{idx}.dat')) - - metad.compute_fes(via_reweighting=True) - assert os.path.exists('fes_raw.npy') - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_run_metadynamics_with_component(): - - cv1 = mlt.PlumedCustomCV('plumed_cv_dist.dat', 'x') - metad = mlt.Metadynamics(cv1) - n_runs = 4 - - _run_metadynamics(metad, n_runs, fs=100) - - metad_dir = 'plumed_files/metadynamics' - for idx in range(1, n_runs + 1): - assert os.path.exists(os.path.join(metad_dir, f'colvar_cv1_x_{idx}.dat')) - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_run_metadynamics_with_additional_cvs(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - cv2 = mlt.PlumedAverageCV('cv2', (2, 1)) - cv2.attach_upper_wall(location=3.0, kappa=150.0) - - bias = mlt.PlumedBias(cvs=(cv1, cv2)) - - metad = mlt.Metadynamics(cvs=cv1, bias=bias) - - assert metad.bias == bias - assert metad.n_cvs == 1 - - n_runs = 1 - _run_metadynamics(metad, - configuration=_h2o_configuration(), - n_runs=n_runs, - write_plumed_setup=True, - fs=100) - - with open('plumed_files/metadynamics/plumed_setup.dat', 'r') as f: - plumed_setup = [line.strip() for line in f] - - # Not including the units - assert plumed_setup[1:] == ['cv1_dist1: DISTANCE ATOMS=1,2', - 'cv1: CUSTOM ARG=cv1_dist1 VAR=cv1_dist1 ' - f'FUNC={1/1}*(cv1_dist1) PERIODIC=NO', - 'cv2_dist1: DISTANCE ATOMS=3,2', - 'cv2: CUSTOM ARG=cv2_dist1 VAR=cv2_dist1 ' - f'FUNC={1/1}*(cv2_dist1) PERIODIC=NO', - 'UPPER_WALLS ARG=cv2 AT=3.0 KAPPA=150.0 EXP=2', - 'metad: METAD ARG=cv1 PACE=100 HEIGHT=0.1 ' - 'SIGMA=0.05 TEMP=300 BIASFACTOR=3 ' - 'FILE=HILLS_1.dat', - 'PRINT ARG=cv1,cv1_dist1 ' - 'FILE=colvar_cv1_1.dat STRIDE=10', - 'PRINT ARG=cv2,cv2_dist1 ' - 'FILE=colvar_cv2_1.dat STRIDE=10'] - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_estimate_width(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - metad = mlt.Metadynamics(cv1) - - width = metad.estimate_width(configurations=_h2_configuration(), - mlp=TestPotential('1D'), - plot=True, - fs=100) - - assert len(width) == 1 - - files_directory = 'plumed_files/width_estimation' - plots_directory = 'width_estimation' - - assert os.path.isdir(files_directory) - assert os.path.exists(os.path.join(files_directory, 'colvar_cv1_1.dat')) - - assert os.path.isdir(plots_directory) - assert os.path.exists(os.path.join(plots_directory, 'cv1_config1.pdf')) - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_try_multiple_biasfactors(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - metad = mlt.Metadynamics(cv1) - biasfactors = range(5, 11, 5) - - metad.try_multiple_biasfactors(configuration=_h2_configuration(), - mlp=TestPotential('1D'), - temp=300, - interval=10, - dt=1, - pace=100, - width=0.05, - height=0.1, - biasfactors=biasfactors, - plotted_cvs=cv1, - fs=100) - - files_dir = 'plumed_files/multiple_biasfactors' - assert os.path.isdir(files_dir) - - plots_dir = 'multiple_biasfactors' - assert os.path.isdir(plots_dir) - - for idx, biasf in enumerate(biasfactors, start=1): - assert os.path.exists(os.path.join(files_dir, f'colvar_cv1_{idx}.dat')) - assert os.path.exists(os.path.join(plots_dir, f'cv1_biasf{biasf}.pdf')) - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_block_analysis(): - - cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) - metad = mlt.Metadynamics(cv1) - dt = 1 - interval = 10 - n_runs = 1 - ps = 2 - start_time = 0.5 - - metad.run_metadynamics(configuration=_h2_configuration(), - mlp=TestPotential('1D'), - temp=300, - dt=dt, - interval=interval, - pace=100, - width=0.05, - height=0.1, - biasfactor=3, - n_runs=n_runs, - ps=ps) - - metad.block_analysis(start_time=start_time) - - assert os.path.exists('block_analysis.pdf') - assert os.path.exists('block_analysis.npz') - - start_time_fs = start_time * 1E3 - n_steps = int(start_time_fs / dt) - n_used_frames = n_steps // interval - - min_n_blocks = 10 - min_blocksize = 10 - blocksize_interval = 10 - max_blocksize = n_used_frames // min_n_blocks - - data = np.load('block_analysis.npz') - - # axis 0: CV1; axis 1: 300 bins - assert np.shape(data['CVs']) == (1, 300) - for blocksize in range(min_blocksize, max_blocksize + 1, blocksize_interval): - - # axis 0: error; axis 1: 300 bins - assert np.shape(data[str(blocksize)]) == (3, 300) diff --git a/mlptrain/sampling/tests/test_plumed.py b/mlptrain/sampling/tests/test_plumed.py deleted file mode 100644 index eb873df6..00000000 --- a/mlptrain/sampling/tests/test_plumed.py +++ /dev/null @@ -1,205 +0,0 @@ -import os -import pytest -import mlptrain as mlt -from .utils import work_in_zipped_dir -here = os.path.abspath(os.path.dirname(__file__)) - - -def test_plumed_cv_from_atom_groups(): - - cv1 = mlt.PlumedDifferenceCV('cv1', ((0, 1), (2, 3))) - - assert cv1.name == 'cv1' - assert cv1.units == 'Å' - assert cv1.dof_names == ['cv1_dist1', 'cv1_dist2'] - assert cv1.setup == ['cv1_dist1: DISTANCE ATOMS=1,2', - 'cv1_dist2: DISTANCE ATOMS=3,4', - 'cv1: CUSTOM ' - 'ARG=cv1_dist1,cv1_dist2 ' - 'VAR=cv1_dist1,cv1_dist2 ' - 'FUNC=cv1_dist1-cv1_dist2 ' - 'PERIODIC=NO'] - - cv2 = mlt.PlumedAverageCV('cv2', (0, 1, 2)) - - assert cv2.name == 'cv2' - assert cv2.units == 'rad' - assert cv2.dof_names == ['cv2_ang1'] - assert cv2.setup == ['cv2_ang1: ANGLE ATOMS=1,2,3', - 'cv2: CUSTOM ' - 'ARG=cv2_ang1 ' - 'VAR=cv2_ang1 ' - 'FUNC=1.0*(cv2_ang1) ' - 'PERIODIC=NO'] - - with pytest.raises(TypeError): - mlt.PlumedAverageCV('') - - with pytest.raises(TypeError): - mlt.PlumedAverageCV('', 0) - - with pytest.raises(TypeError): - mlt.PlumedAverageCV('', ()) - - with pytest.raises(ValueError): - mlt.PlumedAverageCV('', (1,)) - - with pytest.raises(NotImplementedError): - mlt.PlumedAverageCV('', [(0, 1, 2, 3, 4, 5), (1, 2, 3)]) - - with pytest.raises(ValueError): - mlt.PlumedDifferenceCV('', ((0, 1), (2, 3), (4, 5))) - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_plumed_cv_from_file(): - - cv1 = mlt.PlumedCustomCV('plumed_cv_custom.dat', - component='spath', - units='Å') - - assert cv1.name == 'p1.spath' - assert cv1.units == 'Å' - assert cv1.setup == ['p1: PATH ' - 'REFERENCE=path.pdb ' - 'TYPE=OPTIMAL ' - 'LAMBDA=500.0'] - - with open('path.pdb', 'r') as f: - data1 = f.read() - - assert cv1.files == [('path.pdb', data1)] - - os.remove('path.pdb') - cv1.write_files() - - with open('path.pdb', 'r') as f: - data2 = f.read() - - assert data1 == data2 - - -def test_plumed_cv_walls(): - - cv1 = mlt.PlumedDifferenceCV('cv1', ((0, 1), (2, 3))) - - cv1.attach_lower_wall(location=1, kappa=150.0, exp=3) - cv1.attach_upper_wall(location=3, kappa=150.0, exp=3) - - assert cv1.setup == ['cv1_dist1: DISTANCE ATOMS=1,2', - 'cv1_dist2: DISTANCE ATOMS=3,4', - 'cv1: CUSTOM ' - 'ARG=cv1_dist1,cv1_dist2 ' - 'VAR=cv1_dist1,cv1_dist2 ' - 'FUNC=cv1_dist1-cv1_dist2 ' - 'PERIODIC=NO', - 'LOWER_WALLS ARG=cv1 AT=1 KAPPA=150.0 EXP=3', - 'UPPER_WALLS ARG=cv1 AT=3 KAPPA=150.0 EXP=3'] - - with pytest.raises(TypeError): - cv1.attach_lower_wall(location=0.5, kappa=150.0, exp=3) - - -def test_plumed_bias_from_cvs(): - - cv1 = mlt.PlumedAverageCV('cv1', [(0, 1, 2, 3)]) - cv2 = mlt.PlumedAverageCV('cv2', [(4, 5, 6, 7)]) - - bias = mlt.PlumedBias((cv1, cv2)) - - with pytest.raises(ValueError): - bias._set_metad_params(pace=10, width=(0.2, 0.3), height=0.5, biasfactor=0.5) - - with pytest.raises(ValueError): - bias._set_metad_params(pace=10, width=0.2, height=0.5, biasfactor=2) - - bias.initialise_for_metad_al(pace=10, - width=(0.2, 0.3), - height=0.5, - biasfactor=2, - grid_min=(0.5, 1.5), - grid_max=(0.6, 1.6)) - - assert bias.cvs == (cv1, cv2) - assert bias.pace == 10 - assert bias.width == (0.2, 0.3) - assert bias.height == 0.5 - assert bias.biasfactor == 2 - assert bias.metad_grid_min == (0.5, 1.5) - assert bias.metad_grid_max == (0.6, 1.6) - assert bias.metad_grid_bin is None - - assert bias.metad_grid_setup == 'GRID_MIN=0.5,1.5 GRID_MAX=0.6,1.6 ' - - bias.strip() - - for attribute, value in bias.__dict__.items(): - if attribute == 'cvs': - assert value is not None - - else: - assert value is None - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_plumed_bias_from_file(): - - bias = mlt.PlumedBias(filename='plumed_bias.dat') - - assert bias.setup == ['dof1: DISTANCE ATOMS=1,2', - 'dof2: DISTANCE ATOMS=2,3', - 'cv1: CUSTOM ARG=dof1,dof2 VAR=dof1,dof2 ' - 'FUNC=dof2-dof1 PERIODIC=NO', - 'lwall: LOWER_WALLS ARG=cv1 AT=1 KAPPA=150.0 EXP=3', - 'p1: PATH REFERENCE=path.pdb TYPE=OPTIMAL ' - 'LAMBDA=500.0', - 'UPPER_WALLS ARG=cv1 AT=3 KAPPA=150.0 EXP=3', - 'METAD ARG=cv1,p1.spath PACE=100 HEIGHT=0.1 ' - 'SIGMA=0.5 BIASFACTOR=4 FILE=HILLS.dat', - 'PRINT ARG=cv1,p1.spath FILE=colvar.dat STRIDE=10'] - - with open('path.pdb', 'r') as f: - data1 = f.read() - - assert bias.cv_files == [('path.pdb', data1)] - - os.remove('path.pdb') - bias.write_cv_files() - - with open('path.pdb', 'r') as f: - data2 = f.read() - - assert data1 == data2 - - bias.strip() - - assert bias.setup == ['dof1: DISTANCE ATOMS=1,2', - 'dof2: DISTANCE ATOMS=2,3', - 'cv1: CUSTOM ARG=dof1,dof2 VAR=dof1,dof2 ' - 'FUNC=dof2-dof1 PERIODIC=NO', - 'lwall: LOWER_WALLS ARG=cv1 AT=1 KAPPA=150.0 EXP=3', - 'p1: PATH REFERENCE=path.pdb TYPE=OPTIMAL ' - 'LAMBDA=500.0', - 'UPPER_WALLS ARG=cv1 AT=3 KAPPA=150.0 EXP=3'] - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_plumed_plot(): - - colvar1 = 'test_plumed_plot/colvar1.dat' - colvar2 = 'test_plumed_plot/colvar2.dat' - - mlt.plot_cv_versus_time(filename=colvar1, - time_units='fs', - cv_units='Å', - cv_limits=(0.5, 1.5), - label='0') - - assert os.path.exists('cv1_0.pdf') - - mlt.plot_cv1_and_cv2(filenames=(colvar1, colvar2), - cvs_units=('Å', 'Å'), - cvs_limits=((0.5, 1.5), (0.5, 1.5)), - label='0') - - assert os.path.exists('cv1_cv2_0.pdf') diff --git a/mlptrain/sampling/tests/test_umbrella.py b/mlptrain/sampling/tests/test_umbrella.py deleted file mode 100644 index 6b8a0177..00000000 --- a/mlptrain/sampling/tests/test_umbrella.py +++ /dev/null @@ -1,183 +0,0 @@ -import os -import time - -import numpy as np -import pytest - -import mlptrain as mlt -from .test_potential import TestPotential -from .utils import work_in_zipped_dir -here = os.path.abspath(os.path.dirname(__file__)) - - -def _h2_umbrella(): - return mlt.UmbrellaSampling(zeta_func=mlt.AverageDistance([0, 1]), kappa=100) - - -def _h2_pulled_traj(): - traj = mlt.ConfigurationSet() - traj.load_xyz(os.path.join(here, 'data', 'h2_traj.xyz'), charge=0, mult=1) - - return traj - - -def _h2_sparse_traj(): - traj = _h2_pulled_traj() - sparse_traj = mlt.ConfigurationSet() - sparse_traj.append(traj[0]) - sparse_traj.append(traj[-1]) - - return sparse_traj - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_run_umbrella(): - - umbrella = _h2_umbrella() - traj = _h2_pulled_traj() - n_windows = 3 - - assert umbrella.kappa is not None and np.isclose(umbrella.kappa, 100.) - assert umbrella.zeta_refs is None - - # Zeta refs are now reset - umbrella.run_umbrella_sampling(traj, - mlp=TestPotential('1D'), - temp=300, - interval=5, - dt=0.5, - n_windows=n_windows, - save_sep=False, - all_to_xyz=True, - fs=1000, - save_fs=300) - - # Sampling with a high force constant should lead to fitted Gaussians - # that closely match the reference (target) values - for window in umbrella.windows: - assert window.gaussian_plotted is not None - assert np.isclose(window.gaussian_plotted.mean, window.zeta_ref, atol=0.1) - - assert os.path.exists('trajectories') - assert os.path.exists('trajectories/combined_trajectory.xyz') - - for idx in range(1, n_windows + 1): - assert os.path.exists(f'trajectories/trajectory_{idx}.traj') - - for sim_time in [300, 600, 900]: - assert os.path.exists(f'trajectories/trajectory_{idx}_{sim_time}fs.traj') - assert os.path.exists(f'trajectories/window_{idx}_{sim_time}fs.xyz') - - assert os.path.exists('fitted_data.pdf') - - -# TODO: This tests fails on GHA with MACE install, -# need to investigate more, for now skipping. -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -@pytest.mark.skip(reason="Test fails on GHA with MACE") -def test_umbrella_parallel(): - - execution_time = {} - - for n_cores in (1, 2): - - mlt.Config.n_cores = n_cores - - umbrella = _h2_umbrella() - traj = _h2_pulled_traj() - - start = time.perf_counter() - umbrella.run_umbrella_sampling(traj, - mlp=TestPotential('1D'), - temp=300, - interval=5, - dt=0.5, - n_windows=4, - fs=500) - finish = time.perf_counter() - - execution_time[n_cores] = finish - start - - # Calculation with more cores should run faster - assert execution_time[2] < execution_time[1] - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_umbrella_sparse_traj(): - - umbrella = _h2_umbrella() - traj = _h2_sparse_traj() - n_windows = 9 - - # Indices from 1 to 9 - zeta_refs = umbrella._reference_values(traj=traj, - num=n_windows, - final_ref=None, - init_ref=None) - - middle_ref = zeta_refs[5] - middle_bias = mlt.Bias(zeta_func=umbrella.zeta_func, - kappa=umbrella.kappa, - reference=middle_ref) - - # There should be no good starting frame for the middle window (index 5) - # as the sparse trajectory only contains the initial and final frame - assert umbrella._no_ok_frame_in(traj, middle_ref) - - umbrella.run_umbrella_sampling(traj, - mlp=TestPotential('1D'), - temp=300, - interval=5, - dt=0.5, - n_windows=n_windows, - fs=100, - save_sep=True) - - assert os.path.exists('trajectories') - assert os.path.isdir('trajectories') - - previous_window_traj = mlt.ConfigurationSet() - previous_window_traj.load_xyz(filename='trajectories/window_4.xyz', - charge=0, - mult=1) - - middle_window_traj = mlt.ConfigurationSet() - middle_window_traj.load_xyz(filename='trajectories/window_5.xyz', - charge=0, - mult=1) - - closest_frame = umbrella._best_init_frame(bias=middle_bias, - traj=previous_window_traj) - starting_frame = middle_window_traj[0] - - # The starting frame for the middle window (index 5) should be - # the closest frame from the previous window (index 4) - assert starting_frame == closest_frame - - -@work_in_zipped_dir(os.path.join(here, 'data.zip')) -def test_umbrella_save_load(): - - umbrella = _h2_umbrella() - traj = _h2_pulled_traj() - - umbrella.run_umbrella_sampling(traj, - mlp=TestPotential('1D'), - temp=300, - interval=5, - dt=0.5, - n_windows=3, - fs=100, - save_sep=False) - - umbrella.save(folder_name='tmp_us') - assert os.path.exists('tmp_us') and os.path.isdir('tmp_us') - - loaded = mlt.UmbrellaSampling.from_folder(folder_name='tmp_us', temp=300) - assert len(loaded.windows) == 3 - assert np.allclose(loaded.zeta_refs, umbrella.zeta_refs) - - for idx, window in enumerate(loaded.windows): - assert np.isclose(window.zeta_ref, umbrella.zeta_refs[idx]) - assert np.isclose(window._bias.kappa, 100) - assert len(window._obs_zetas) == 41 diff --git a/mlptrain/sampling/umbrella.py b/mlptrain/sampling/umbrella.py index 56de2698..d89777a4 100644 --- a/mlptrain/sampling/umbrella.py +++ b/mlptrain/sampling/umbrella.py @@ -4,7 +4,6 @@ import time import glob import numpy as np -import matplotlib.pyplot as plt from scipy.optimize import curve_fit from scipy.integrate import simpson from typing import Optional, List, Callable, Tuple @@ -24,9 +23,7 @@ class _Window: """Contains the attributes belonging to an US window used for WHAM or UI""" - def __init__(self, - obs_zetas: np.ndarray, - bias: 'mlptrain.Bias'): + def __init__(self, obs_zetas: np.ndarray, bias: 'mlptrain.Bias'): """ Umbrella Window @@ -42,12 +39,12 @@ def __init__(self, self._bias = bias self._obs_zetas = obs_zetas - self._gaussian_pdf: Optional[_FittedGaussian] = None + self._gaussian_pdf: Optional[_FittedGaussian] = None self._gaussian_plotted: Optional[_FittedGaussian] = None - self.bin_edges: Optional[np.ndarray] = None + self.bin_edges: Optional[np.ndarray] = None self.bias_energies: Optional[np.ndarray] = None - self.hist: Optional[np.ndarray] = None + self.hist: Optional[np.ndarray] = None self.free_energy = 0.0 @@ -60,8 +57,9 @@ def bin(self) -> None: self.hist, _ = np.histogram(self._obs_zetas, bins=self.bin_edges) - self.bias_energies = ((self._bias.kappa/2) - * (self.bin_centres - self._bias.ref)**2) + self.bias_energies = (self._bias.kappa / 2) * ( + self.bin_centres - self._bias.ref + ) ** 2 return None @property @@ -89,8 +87,10 @@ def gaussian_plotted(self) -> '_FittedGaussian': """Gaussian which was plotted during umbrella sampling simulation""" if self._gaussian_plotted is None: - raise TypeError('No plotted gaussian is stored in the window, ' - 'make sure to run umbrella sampling first') + raise TypeError( + 'No plotted gaussian is stored in the window, ' + 'make sure to run umbrella sampling first' + ) return self._gaussian_plotted @@ -98,8 +98,10 @@ def gaussian_plotted(self) -> '_FittedGaussian': def n(self) -> int: """Number of samples in this window""" if self.hist is None: - raise ValueError('Cannot determine the number of samples - ' - 'window has not been binned') + raise ValueError( + 'Cannot determine the number of samples - ' + 'window has not been binned' + ) return int(np.sum(self.hist)) @@ -107,8 +109,10 @@ def dAu_dq(self, zetas, beta): """PMF from a single window""" if self.gaussian_pdf is None: - raise TypeError('Cannot estimate PMF if the window does not ' - 'contain a fitted probability density function') + raise TypeError( + 'Cannot estimate PMF if the window does not ' + 'contain a fitted probability density function' + ) mean_zeta_b = self.gaussian_pdf.mean std_zeta_b = self.gaussian_pdf.std @@ -116,8 +120,9 @@ def dAu_dq(self, zetas, beta): zeta_ref = self.zeta_ref # Equation 8.8.21 from Tuckerman, p. 344 - _dAu_dq = ((1.0 / beta) * (zetas - mean_zeta_b) / (std_zeta_b**2) - - kappa * (zetas - zeta_ref)) + _dAu_dq = (1.0 / beta) * (zetas - mean_zeta_b) / ( + std_zeta_b**2 + ) - kappa * (zetas - zeta_ref) return _dAu_dq @@ -145,18 +150,23 @@ def from_file(cls, filename: str) -> '_Window': (mlptrain.sampling.umbrella._Window): """ file_lines = open(filename, 'r', errors='ignore').readlines() - header_line = file_lines.pop(0) # Pop the first line + header_line = file_lines.pop(0) # Pop the first line - ref_zeta = float(header_line.split()[0]) # Å - kappa = float(header_line.split()[1]) # eV / Å^2 + ref_zeta = float(header_line.split()[0]) # Å + kappa = float(header_line.split()[1]) # eV / Å^2 - obs_zeta = [float(line.split()[0]) for line in file_lines - if len(line.split()) > 0] + obs_zeta = [ + float(line.split()[0]) + for line in file_lines + if len(line.split()) > 0 + ] - window = cls(obs_zetas=np.array(obs_zeta), - bias=Bias(zeta_func=DummyCoordinate(), - kappa=kappa, - reference=ref_zeta)) + window = cls( + obs_zetas=np.array(obs_zeta), + bias=Bias( + zeta_func=DummyCoordinate(), kappa=kappa, reference=ref_zeta + ), + ) return window @@ -181,20 +191,26 @@ def _fit_gaussian(self, normalised) -> None: gaussian = _FittedGaussian() - a_0, mu_0, sigma_0 = (np.max(self.hist), - np.average(self._obs_zetas), - float(np.std(self._obs_zetas))) + a_0, mu_0, sigma_0 = ( + np.max(self.hist), + np.average(self._obs_zetas), + float(np.std(self._obs_zetas)), + ) try: - gaussian.params, _ = curve_fit(gaussian.value, - self.bin_centres, - self.hist, - p0=[1.0, 1.0, 1.0], # init guess - maxfev=10000) + gaussian.params, _ = curve_fit( + gaussian.value, + self.bin_centres, + self.hist, + p0=[1.0, 1.0, 1.0], # init guess + maxfev=10000, + ) except RuntimeError: - logger.warning('Could not fit gaussian to a histogram, using ' - 'parameters obtained without fitting instead') + logger.warning( + 'Could not fit gaussian to a histogram, using ' + 'parameters obtained without fitting instead' + ) gaussian.params = a_0, mu_0, sigma_0 @@ -207,16 +223,23 @@ def _fit_gaussian(self, normalised) -> None: def _plot_gaussian(self, hist, bin_centres) -> None: """Fit a Gaussian to a histogram of data and plot the result""" + import matplotlib.pyplot as plt + gaussian = _FittedGaussian() try: - gaussian.params, _ = curve_fit(gaussian.value, bin_centres, hist, - p0=[1.0, 1.0, 1.0], - maxfev=10000) + gaussian.params, _ = curve_fit( + gaussian.value, + bin_centres, + hist, + p0=[1.0, 1.0, 1.0], + maxfev=10000, + ) if np.min(np.abs(bin_centres - gaussian.mean)) > 1.0: - raise RuntimeError('Gaussian mean was not within the 1 Å of ' - 'the ζ range') + raise RuntimeError( + 'Gaussian mean was not within the 1 Å of ' 'the ζ range' + ) except RuntimeError: logger.error('Failed to fit a gaussian to this data') @@ -231,10 +254,9 @@ def _plot_gaussian(self, hist, bin_centres) -> None: self._gaussian_plotted = gaussian return None - def plot(self, - min_zeta: float, - max_zeta: float, - plot_gaussian: bool = True) -> None: + def plot( + self, min_zeta: float, max_zeta: float, plot_gaussian: bool = True + ) -> None: """ Plot this window along with a fitted Gaussian function if possible @@ -246,11 +268,17 @@ def plot(self, plot_gaussian: """ - hist, bin_edges = np.histogram(self._obs_zetas, - density=False, - bins=np.linspace(min_zeta - 0.1*abs(min_zeta), - max_zeta + 0.1*abs(max_zeta), - num=400)) + import matplotlib.pyplot as plt + + hist, bin_edges = np.histogram( + self._obs_zetas, + density=False, + bins=np.linspace( + min_zeta - 0.1 * abs(min_zeta), + max_zeta + 0.1 * abs(max_zeta), + num=400, + ), + ) bin_centres = (bin_edges[1:] + bin_edges[:-1]) / 2 plt.plot(bin_centres, hist, alpha=0.1) @@ -272,10 +300,12 @@ class UmbrellaSampling: umbrella sampling windows and running WHAM or umbrella integration. """ - def __init__(self, - zeta_func: 'mlptrain.sampling.reaction_coord.ReactionCoordinate', - kappa: float, - temp: Optional[float] = None): + def __init__( + self, + zeta_func: 'mlptrain.sampling.reaction_coord.ReactionCoordinate', + kappa: float, + temp: Optional[float] = None, + ): """ Umbrella sampling to predict free energy using an mlp under a harmonic bias: @@ -294,19 +324,21 @@ def __init__(self, kappa: Value of the spring constant, κ, used in umbrella sampling """ - self.kappa: float = kappa # eV Å^-2 - self.zeta_func: Callable = zeta_func # ζ(r) - self.temp: Optional[float] = temp # K + self.kappa: float = kappa # eV Å^-2 + self.zeta_func: Callable = zeta_func # ζ(r) + self.temp: Optional[float] = temp # K - self.windows: List[_Window] = [] + self.windows: List[_Window] = [] @staticmethod def _best_init_frame(bias, traj): """Find the frames whose bias value is the lowest, i.e. has the closest reaction coordinate to the desired""" if len(traj) == 0: - raise RuntimeError('Cannot determine the best frame from a ' - 'trajectory with length zero') + raise RuntimeError( + 'Cannot determine the best frame from a ' + 'trajectory with length zero' + ) min_e_idx = np.argmin([bias(frame.ase_atoms) for frame in traj]) @@ -341,19 +373,20 @@ def _no_ok_frame_in(self, traj, ref) -> bool: """ return np.min(np.abs(self.zeta_func(traj) - ref)) > 0.5 - def run_umbrella_sampling(self, - traj: 'mlptrain.ConfigurationSet', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - interval: int, - dt: float, - init_ref: Optional[float] = None, - final_ref: Optional[float] = None, - n_windows: int = 10, - save_sep: bool = True, - all_to_xyz: bool = False, - **kwargs - ) -> None: + def run_umbrella_sampling( + self, + traj: 'mlptrain.ConfigurationSet', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + interval: int, + dt: float, + init_ref: Optional[float] = None, + final_ref: Optional[float] = None, + n_windows: int = 10, + save_sep: bool = True, + all_to_xyz: bool = False, + **kwargs, + ) -> None: """ Run umbrella sampling across n_windows, fitting Gaussians to the sampled values of the reaction coordinate. @@ -370,17 +403,17 @@ def run_umbrella_sampling(self, temp: Temperature in K to initialise velocities and to run NVT MD. Must be positive - + interval: (int) Interval between saving the geometry - + dt: (float) Time-step in fs - + init_ref: (float | None) Value of reaction coordinate in Å for first window - + final_ref: (float | None) Value of reaction coordinate in Å for first window - + n_windows: (int) Number of windows to run in the umbrella sampling save_sep: (bool) If True saves trajectories of each window @@ -404,23 +437,27 @@ def run_umbrella_sampling(self, start_umbrella = time.perf_counter() if temp <= 0: - raise ValueError('Temperature must be positive and non-zero for ' - 'umbrella sampling') + raise ValueError( + 'Temperature must be positive and non-zero for ' + 'umbrella sampling' + ) self.temp = temp - zeta_refs = self._reference_values(traj, n_windows, init_ref, final_ref) + zeta_refs = self._reference_values( + traj, n_windows, init_ref, final_ref + ) # window_process.get() --> window_traj window_processes, window_trajs, biases = [], [], [] n_processes = min(n_windows, Config.n_cores) - logger.info(f'Running Umbrella Sampling with {n_windows} window(s), ' - f'{n_processes} window(s) are run in parallel') + logger.info( + f'Running Umbrella Sampling with {n_windows} window(s), ' + f'{n_processes} window(s) are run in parallel' + ) with Pool(processes=n_processes) as pool: - for idx, ref in enumerate(zeta_refs): - # Without copy kwargs is overwritten at every iteration kwargs_single = deepcopy(kwargs) kwargs_single['idx'] = idx + 1 @@ -431,88 +468,98 @@ def run_umbrella_sampling(self, if self._no_ok_frame_in(traj, ref): # Takes the trajectory of the previous window, .get() blocks # the main process until the previous window finishes - _traj = window_processes[idx-1].get() + _traj = window_processes[idx - 1].get() else: _traj = traj init_frame = self._best_init_frame(bias, _traj) - window_process = pool.apply_async(func=self._run_individual_window, - args=(init_frame, - mlp, - temp, - interval, - dt, - bias), - kwds=kwargs_single) + window_process = pool.apply_async( + func=self._run_individual_window, + args=(init_frame, mlp, temp, interval, dt, bias), + kwds=kwargs_single, + ) window_processes.append(window_process) biases.append(bias) pool.close() for window_process, bias in zip(window_processes, biases): - window_traj = window_process.get() - window = _Window(obs_zetas=self.zeta_func(window_traj), - bias=bias) - window.plot(min_zeta=min(zeta_refs), - max_zeta=max(zeta_refs), - plot_gaussian=True) + window = _Window( + obs_zetas=self.zeta_func(window_traj), bias=bias + ) + window.plot( + min_zeta=min(zeta_refs), + max_zeta=max(zeta_refs), + plot_gaussian=True, + ) self.windows.append(window) window_trajs.append(window_traj) pool.join() finish_umbrella = time.perf_counter() - logger.info('Umbrella sampling done in ' - f'{(finish_umbrella - start_umbrella) / 60:.1f} m') + logger.info( + 'Umbrella sampling done in ' + f'{(finish_umbrella - start_umbrella) / 60:.1f} m' + ) # Move .traj files into 'trajectories' folder and compute .xyz files - self._move_and_save_files(window_trajs=window_trajs, - save_sep=save_sep, - all_to_xyz=all_to_xyz) + self._move_and_save_files( + window_trajs=window_trajs, save_sep=save_sep, all_to_xyz=all_to_xyz + ) return None - def _run_individual_window(self, - frame: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - temp: float, - interval: int, - dt: float, - bias: 'mlptrain.Bias', - **kwargs): + def _run_individual_window( + self, + frame: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + temp: float, + interval: int, + dt: float, + bias: 'mlptrain.Bias', + **kwargs, + ): """Run an individual umbrella sampling window""" - logger.info(f'Running US window {kwargs["idx"]} with ' - f'ζ_ref={kwargs["ref"]:.2f} Å ' - f'and κ = {self.kappa:.3f} eV / Å^2') + logger.info( + f'Running US window {kwargs["idx"]} with ' + f'ζ_ref={kwargs["ref"]:.2f} Å ' + f'and κ = {self.kappa:.3f} eV / Å^2' + ) kwargs['n_cores'] = 1 - traj = run_mlp_md(configuration=frame, - mlp=mlp, - temp=temp, - dt=dt, - interval=interval, - bias=bias, - kept_substrings=['.traj'], - **kwargs) + traj = run_mlp_md( + configuration=frame, + mlp=mlp, + temp=temp, + dt=dt, + interval=interval, + bias=bias, + kept_substrings=['.traj'], + **kwargs, + ) return traj @staticmethod - def _move_and_save_files(window_trajs: List['mlptrain.Trajectory'], - save_sep: bool, - all_to_xyz: bool - ) -> None: + def _move_and_save_files( + window_trajs: List['mlptrain.Trajectory'], + save_sep: bool, + all_to_xyz: bool, + ) -> None: """ Save window trajectories, move them into trajectories folder and compute .xyz files """ - move_files([r'trajectory_\d+\.traj', r'trajectory_\d+_\w+\.traj'], - dst_folder='trajectories', - regex=True) + move_files( + [r'trajectory_\d+\.traj', r'trajectory_\d+_\w+\.traj'], + dst_folder='trajectories', + regex=True, + ) os.chdir('trajectories') @@ -551,7 +598,7 @@ def free_energies(self, prob_dist) -> np.ndarray: Returns: (np.ndarray): A(ζ) """ - return - (1.0 / self.beta) * np.log(prob_dist) + return -(1.0 / self.beta) * np.log(prob_dist) @property def zeta_refs(self) -> Optional[np.ndarray]: @@ -577,32 +624,37 @@ def beta(self) -> float: (float): β in units of eV^-1 """ if self.temp is None: - raise ValueError('Cannot calculate β without a defined temperature' - ' please set .temp') + raise ValueError( + 'Cannot calculate β without a defined temperature' + ' please set .temp' + ) - k_b = 8.617333262E-5 # Boltzmann constant in eV / K + k_b = 8.617333262e-5 # Boltzmann constant in eV / K return 1.0 / (k_b * self.temp) def _bin_windows(self, n_bins: int) -> None: """For each window bin the observed zetas into a histogram""" - bin_centres = np.linspace(self.zeta_refs[0], self.zeta_refs[-1], num=n_bins) + bin_centres = np.linspace( + self.zeta_refs[0], self.zeta_refs[-1], num=n_bins + ) bin_width = (bin_centres[-1] - bin_centres[0]) / (len(bin_centres) - 1) - logger.debug(f"Bin width: {bin_width} Å") + logger.debug(f'Bin width: {bin_width} Å') for window in self.windows: window.bin_edges = np.linspace( start=bin_centres[0] - bin_width / 2, stop=bin_centres[-1] + bin_width / 2, - num=len(bin_centres) + 1 + num=len(bin_centres) + 1, ) window.bin() - def wham(self, - tol: float = 1E-3, - max_iterations: int = 100000, - n_bins: int = 100 - ) -> Tuple[np.ndarray, np.ndarray]: + def wham( + self, + tol: float = 1e-3, + max_iterations: int = 100000, + n_bins: int = 100, + ) -> Tuple[np.ndarray, np.ndarray]: """ Construct an unbiased distribution (on a grid) from a set of windows @@ -621,29 +673,30 @@ def wham(self, (np.ndarray, np.ndarray): Tuple containing the reaction coordinate and values of the free energy """ - beta = self.beta # 1 / (k_B T) + beta = self.beta # 1 / (k_B T) self._bin_windows(n_bins=n_bins) # Discretised reaction coordinate zetas = np.linspace(self.zeta_refs[0], self.zeta_refs[-1], num=n_bins) p = np.ones_like(zetas) / len(zetas) # P(ζ) uniform distribution - p_prev = np.inf * np.ones_like(p) # Start with P(ζ)_(-1) = ∞ + p_prev = np.inf * np.ones_like(p) # Start with P(ζ)_(-1) = ∞ def converged(): return np.max(np.abs(p_prev - p)) < tol for iteration in range(max_iterations): - # Equation 8.8.18 from Tuckerman, p. 343 - p = (sum(w_k.hist for w_k in self.windows) - / sum(w_k.n * np.exp(beta * (w_k.free_energy - w_k.bias_energies)) - for w_k in self.windows)) + p = sum(w_k.hist for w_k in self.windows) / sum( + w_k.n * np.exp(beta * (w_k.free_energy - w_k.bias_energies)) + for w_k in self.windows + ) for w_k in self.windows: # Equation 8.8.19 from Tuckerman, p. 343 - w_k.free_energy = (-(1.0/beta) - * np.log(np.sum(p * np.exp(-w_k.bias_energies * beta)))) + w_k.free_energy = -(1.0 / beta) * np.log( + np.sum(p * np.exp(-w_k.bias_energies * beta)) + ) if converged(): logger.info(f'WHAM converged in {iteration} iterations') @@ -651,13 +704,14 @@ def converged(): p_prev = p - _plot_and_save_free_energy(free_energies=self.free_energies(p), - zetas=zetas) + _plot_and_save_free_energy( + free_energies=self.free_energies(p), zetas=zetas + ) return zetas, self.free_energies(p) - def umbrella_integration(self, - n_bins: int = 100 - ) -> Tuple[np.ndarray, np.ndarray]: + def umbrella_integration( + self, n_bins: int = 100 + ) -> Tuple[np.ndarray, np.ndarray]: """ Perform umbrella integration on the umbrella windows to un-bias the probability distribution. Such that the PMF becomes @@ -703,12 +757,11 @@ def umbrella_integration(self, free_energies[i] = 0.0 else: - free_energies[i] = simpson(dA_dq[:i], - zetas[:i], - dx=zetas_spacing) + free_energies[i] = simpson( + dA_dq[:i], zetas[:i], dx=zetas_spacing + ) - _plot_and_save_free_energy(free_energies=free_energies, - zetas=zetas) + _plot_and_save_free_energy(free_energies=free_energies, zetas=zetas) return zetas, free_energies def save(self, folder_name: str = 'umbrella') -> None: @@ -723,8 +776,9 @@ def save(self, folder_name: str = 'umbrella') -> None: os.mkdir(folder_name) for idx, window in enumerate(self.windows): - window.save(filename=os.path.join(folder_name, - f'window_{idx+1}.txt')) + window.save( + filename=os.path.join(folder_name, f'window_{idx+1}.txt') + ) return None @@ -732,8 +786,10 @@ def load(self, folder_name: str) -> None: """Load data from a set of saved windows""" if not os.path.isdir(folder_name): - raise ValueError(f'Loading from a folder was not possible as ' - f'{folder_name} is not a valid folder') + raise ValueError( + f'Loading from a folder was not possible as ' + f'{folder_name} is not a valid folder' + ) for filename in glob.glob(os.path.join(folder_name, 'window_*.txt')): window = _Window.from_file(filename) @@ -742,9 +798,7 @@ def load(self, folder_name: str) -> None: return None @classmethod - def from_folder(cls, - folder_name: str, - temp: float) -> 'UmbrellaSampling': + def from_folder(cls, folder_name: str, temp: float) -> 'UmbrellaSampling': """ Create an umbrella sampling instance from a folder containing the window data @@ -765,9 +819,7 @@ def from_folder(cls, return us @classmethod - def from_folders(cls, - *args: str, - temp: float) -> 'UmbrellaSampling': + def from_folders(cls, *args: str, temp: float) -> 'UmbrellaSampling': """ Load a set of individual umbrella sampling simulations in to a single one @@ -796,11 +848,7 @@ def _order_windows_by_zeta_ref(self) -> None: class _FittedGaussian: - - def __init__(self, - a: float = 1.0, - b: float = 1.0, - c: float = 1.0): + def __init__(self, a: float = 1.0, b: float = 1.0, c: float = 1.0): """ Gaussian defined by three parameters: @@ -813,7 +861,7 @@ def __call__(self, x): @staticmethod def value(x, a, b, c): - return a * np.exp(-(x - b)**2 / (2. * c**2)) + return a * np.exp(-((x - b) ** 2) / (2.0 * c**2)) @property def mean(self) -> float: @@ -826,9 +874,9 @@ def std(self) -> float: return self.params[2] -def _plot_and_save_free_energy(free_energies, - zetas, - units='kcal mol-1') -> None: +def _plot_and_save_free_energy( + free_energies, zetas, units='kcal mol-1' +) -> None: """ Plots the free energy against the reaction coordinate and saves the corresponding values as a .txt file @@ -838,6 +886,7 @@ def _plot_and_save_free_energy(free_energies, zetas: Values of the reaction coordinate """ + import matplotlib.pyplot as plt free_energies = convert_ase_energy(energy_array=free_energies, units=units) diff --git a/mlptrain/system.py b/mlptrain/system.py index 05c132d4..d2546118 100644 --- a/mlptrain/system.py +++ b/mlptrain/system.py @@ -3,7 +3,6 @@ import numpy as np from typing import Union, Sequence, List from scipy.spatial.distance import cdist -from scipy.stats import special_ortho_group from mlptrain.configurations import Configuration, ConfigurationSet from mlptrain.log import logger from mlptrain.box import Box @@ -13,9 +12,9 @@ class System: """System with molecules but without any coordinates""" - def __init__(self, - *args: Molecule, - box: Union[Box, Sequence[float], None]): + def __init__( + self, *args: Molecule, box: Union[Box, Sequence[float], None] + ): """ System containing a set of molecules. @@ -38,11 +37,12 @@ def __init__(self, else: self.box = box if isinstance(box, Box) else Box(box) - def random_configuration(self, - min_dist: float = 2.0, - with_intra: bool = False, - intra_sigma: float = 0.01 - ) -> 'mlptrain.Configuration': + def random_configuration( + self, + min_dist: float = 2.0, + with_intra: bool = False, + intra_sigma: float = 0.01, + ) -> 'mlptrain.Configuration': """ Generate a random configuration of this system, where all the molecules in the system have been randomised @@ -63,31 +63,34 @@ def random_configuration(self, (RuntimeError): If all the molecules cannot be randomised while maintaining the required min. distance between them """ - configuration = Configuration(charge=self.charge, - mult=self.mult) + configuration = Configuration( + charge=self.charge, mult=self.mult, box=self.box + ) for molecule in self.molecules: - if with_intra: - logger.info(f'Adding random normal displacement with ' - f'σ={intra_sigma} Å') + logger.info( + f'Adding random normal displacement with ' + f'σ={intra_sigma} Å' + ) molecule.random_normal_jiggle(sigma=intra_sigma) self._shift_to_midpoint(molecule) if configuration.n_atoms > 0: self._rotate_randomly(molecule) - self._shift_randomly(molecule, - coords=configuration.coordinates, - min_dist=min_dist) + self._shift_randomly( + molecule, + coords=configuration.coordinates, + min_dist=min_dist, + ) configuration.atoms += molecule.atoms.copy() return configuration - def random_configurations(self, - num: int, - **kwargs - ) -> 'mlptrain.ConfigurationSet': + def random_configurations( + self, num: int, **kwargs + ) -> 'mlptrain.ConfigurationSet': """ Generate a number of random configurations of this system @@ -125,13 +128,13 @@ def configuration(self) -> 'mlptrain.Configuration': return self.random_configuration(with_intra=False) else: - raise NotImplementedError("A single configuration for a system " - "with > 1 molecule(s) is not implemented" - " Call random_configuration()") + raise NotImplementedError( + 'A single configuration for a system ' + 'with > 1 molecule(s) is not implemented' + ' Call random_configuration()' + ) - def add_molecule(self, - molecule: 'mlptrain.Molecule' - ) -> None: + def add_molecule(self, molecule: 'mlptrain.Molecule') -> None: """ Add a molecule to this system @@ -143,10 +146,9 @@ def add_molecule(self, self.molecules.append(molecule) return None - def add_molecules(self, - molecule: 'mlptrain.Molecule', - num: int = 1 - ) -> None: + def add_molecules( + self, molecule: 'mlptrain.Molecule', num: int = 1 + ) -> None: """ Add multiple versions of a molecule to this sytem @@ -204,6 +206,8 @@ def _shift_to_midpoint(self, molecule) -> None: @staticmethod def _rotate_randomly(molecule) -> None: """Rotate a molecule randomly around it's centroid""" + from scipy.stats import special_ortho_group + logger.info(f'Rotating {molecule.name} about its centroid') coords, centroid = molecule.coordinates, molecule.centroid @@ -214,7 +218,9 @@ def _rotate_randomly(molecule) -> None: return None - def _shift_randomly(self, molecule, coords, min_dist, max_iters=500) -> None: + def _shift_randomly( + self, molecule, coords, min_dist, max_iters=500 + ) -> None: """ Shift a molecule such that that there more than min_dist between each of a molecule's coordinates and a current set @@ -236,12 +242,13 @@ def in_box(_coords) -> bool: max_delta = np.max(np.max(_coords, axis=0) - np.array([a, b, c])) return np.min(_coords) > 0.0 and max_delta < 0 - for i in range(1, max_iters+1): - + for i in range(1, max_iters + 1): m_coords = np.copy(molecule_coords) - vec = [np.random.uniform(-a/2, a/2), # Random translation vector - np.random.uniform(-b/2, b/2), - np.random.uniform(-c/2, c/2)] + vec = [ + np.random.uniform(-a / 2, a / 2), # Random translation vector + np.random.uniform(-b / 2, b / 2), + np.random.uniform(-c / 2, c / 2), + ] # Shift by 0.1 increments in the random direction vec = 0.1 * np.array(vec) / np.linalg.norm(vec) @@ -254,9 +261,11 @@ def in_box(_coords) -> bool: break if i == max_iters: - raise RuntimeError(f'Failed to shift {molecule.formula} to a ' - f'random location in the box. ' - f'Tried {max_iters} times') + raise RuntimeError( + f'Failed to shift {molecule.formula} to a ' + f'random location in the box. ' + f'Tried {max_iters} times' + ) molecule.coordinates = m_coords return diff --git a/mlptrain/training/active.py b/mlptrain/training/active.py index 63b770a4..aa8f0575 100644 --- a/mlptrain/training/active.py +++ b/mlptrain/training/active.py @@ -16,31 +16,32 @@ from mlptrain.log import logger from mlptrain.box import Box +def train( + mlp: 'mlptrain.potentials._base.MLPotential', + method_name: str, + selection_method: SelectionMethod = AbsDiffE(), + max_active_time: float = 1000, + n_configs_iter: int = 10, + temp: float = 300.0, + max_e_threshold: Optional[float] = None, + max_active_iters: int = 50, + n_init_configs: int = 10, + init_configs: Optional['mlptrain.ConfigurationSet'] = None, + fix_init_config: bool = False, + bbond_energy: Optional[dict] = None, + fbond_energy: Optional[dict] = None, + init_active_temp: Optional[float] = None, + min_active_iters: int = 1, + bias_start_iter: int = 0, + restart_iter: Optional[int] = None, + inherit_metad_bias: bool = False, + constraints: Optional[List] = None, + bias: Optional = None, + md_program: str = 'ASE', + pbc: bool = False, + box_size: Optional[list] = None + ) -> None: -def train(mlp: 'mlptrain.potentials._base.MLPotential', - method_name: str, - selection_method: SelectionMethod = AbsDiffE(), - max_active_time: float = 1000, - n_configs_iter: int = 10, - temp: float = 300.0, - max_e_threshold: Optional[float] = None, - max_active_iters: int = 50, - n_init_configs: int = 10, - init_configs: Optional['mlptrain.ConfigurationSet'] = None, - fix_init_config: bool = False, - bbond_energy: Optional[dict] = None, - fbond_energy: Optional[dict] = None, - init_active_temp: Optional[float] = None, - min_active_iters: int = 1, - bias_start_iter: int = 0, - restart_iter: Optional[int] = None, - inherit_metad_bias: bool = False, - constraints: Optional[List] = None, - bias: Optional = None, - md_program: str = "ASE", - pbc: bool = False, - box_size: Optional[list] = None - ) -> None: """ Train a system using active learning, by propagating dynamics using ML driven molecular dynamics (MD) and adding configurations based on some @@ -133,11 +134,24 @@ def train(mlp: 'mlptrain.potentials._base.MLPotential', md_program: (str) 'ASE' or 'OpenMM' - pbc: (bool) If True, MLP-MD propagates with periodic boundary conditions. - However, the training data still lack PBC. + pbc: (bool) If True, MLP-MD propagates with periodic boundary conditions. + The solvent should be therefore placed ina box and not sphere. + The training data are still treated as clusters in + electronic structure computations. box_size: (List | None) Size of the box where MLP-MD propogated. """ + if md_program.lower() == 'openmm': + if not isinstance(mlp, mlptrain.potentials.MACE): + raise ValueError( + 'The OpenMM backend only supports the use of the MACE potential.' + ) + + if any([bias, fbond_energy, bbond_energy, constraints]): + raise NotImplementedError( + "The OpenMM backend does not support the use of the 'bias', " + "'fbond_energy', 'bbond_energy', or 'constraints' arguments." + ) _check_bias(bias=bias, temp=temp, inherit_metad_bias=inherit_metad_bias) @@ -145,26 +159,29 @@ def train(mlp: 'mlptrain.potentials._base.MLPotential', assert box_size is not None, "to propagate with PBC, box_size cannot be None" if restart_iter is not None: - _initialise_restart(mlp=mlp, - restart_iter=restart_iter, - inherit_metad_bias=inherit_metad_bias) + _initialise_restart( + mlp=mlp, + restart_iter=restart_iter, + inherit_metad_bias=inherit_metad_bias, + ) init_config = mlp.training_data[0] elif init_configs is None: - init_configs = _gen_and_set_init_training_configs(mlp=mlp, - method_name=method_name, - num=n_init_configs) - init_config = init_configs[0] - + init_config = mlp.system.configuration + _gen_and_set_init_training_configs( + mlp=mlp, method_name=method_name, num=n_init_configs + ) + else: init_config = init_configs[0] - _set_init_training_configs(mlp=mlp, - init_configs=init_configs, - method_name=method_name) + _set_init_training_configs( + mlp=mlp, init_configs=init_configs, method_name=method_name + ) if isinstance(bias, PlumedBias) and not bias.from_file: - _attach_plumed_coords_to_init_configs(init_configs=mlp.training_data, - bias=bias) + _attach_plumed_coords_to_init_configs( + init_configs=mlp.training_data, bias=bias + ) if mlp.requires_atomic_energies: mlp.set_atomic_energies(method_name=method_name) @@ -173,7 +190,6 @@ def train(mlp: 'mlptrain.potentials._base.MLPotential', # Run the active learning loop, running iterative MLP-MD for iteration in range(max_active_iters): - if restart_iter is not None and iteration <= restart_iter: continue if isinstance(bias, PlumedBias) and iteration > bias_start_iter: @@ -183,37 +199,40 @@ def train(mlp: 'mlptrain.potentials._base.MLPotential', previous_n_train = mlp.n_train - init_config_iter = _update_init_config(init_config=init_config, - mlp=mlp, - fix_init_config=fix_init_config, - bias=bias, - inherit_metad_bias=inherit_metad_bias, - bias_start_iter=bias_start_iter, - iteration=iteration) - - _add_active_configs(mlp=mlp, - init_config=init_config_iter, - selection_method=selection_method, - n_configs=n_configs_iter, - method_name=method_name, - temp=temp, - max_time=max_active_time, - bbond_energy=bbond_energy, - fbond_energy=fbond_energy, - init_temp=init_active_temp, - extra_time= extra_time, - constraints=constraints, - bias=deepcopy(bias), - inherit_metad_bias=inherit_metad_bias, - bias_start_iter=bias_start_iter, - iteration=iteration, - md_program=md_program, - pbc=pbc, - box_size=box_size) + init_config_iter = _update_init_config( + init_config=init_config, + mlp=mlp, + fix_init_config=fix_init_config, + bias=bias, + inherit_metad_bias=inherit_metad_bias, + bias_start_iter=bias_start_iter, + iteration=iteration, + ) + + _add_active_configs( + mlp=mlp, + init_config=init_config_iter, + selection_method=selection_method, + n_configs=n_configs_iter, + method_name=method_name, + temp=temp, + max_time=max_active_time, + bbond_energy=bbond_energy, + fbond_energy=fbond_energy, + init_temp=init_active_temp, + extra_time= extra_time, + constraints=constraints, + bias=deepcopy(bias), + inherit_metad_bias=inherit_metad_bias, + bias_start_iter=bias_start_iter, + iteration=iteration, + md_program=md_program, + pbc=pbc, + box_size=box_size + ) # Active learning finds no configurations if mlp.n_train == previous_n_train: - if iteration >= min_active_iters: logger.info('No AL configurations found') break @@ -238,45 +257,55 @@ def train(mlp: 'mlptrain.potentials._base.MLPotential', return None -def _add_active_configs(mlp: 'mlptrain.potentials._base.MLPotential', - init_config: 'mlptrain.Configuration', - selection_method: 'mlptrain.training.selection.SelectionMethod', - n_configs: int = 10, - **kwargs - ) -> None: +def _add_active_configs( + mlp: 'mlptrain.potentials._base.MLPotential', + init_config: 'mlptrain.Configuration', + selection_method: 'mlptrain.training.selection.SelectionMethod', + n_configs: int = 10, + **kwargs, +) -> None: """ Add a number (n_configs) of configurations to the current training data based on active learning selection of MLP-MD generated configurations """ if Config.n_cores > n_configs and Config.n_cores % n_configs != 0: - raise NotImplementedError('Active learning is only implemented using ' - 'an multiple of the number n_configs_iter. ' - f'Please use n*{n_configs} cores.') + raise NotImplementedError( + 'Active learning is only implemented using ' + 'an multiple of the number n_configs_iter. ' + f'Please use n*{n_configs} cores.' + ) n_processes = min(n_configs, Config.n_cores) n_cores_pp = max(Config.n_cores // n_configs, 1) - logger.info('Searching for "active" configurations with ' - f'{n_processes} processes using {n_cores_pp} cores / process') + logger.info( + 'Searching for "active" configurations with ' + f'{n_processes} processes using {n_cores_pp} cores / process' + ) if 'bias' in kwargs and kwargs['iteration'] < kwargs['bias_start_iter']: - logger.info(f'Iteration {kwargs["iteration"]}: the bias potential ' - 'is not applied') + logger.info( + f'Iteration {kwargs["iteration"]}: the bias potential ' + 'is not applied' + ) kwargs['bias'] = _remove_bias_potential(kwargs['bias']) configs = ConfigurationSet() results = [] with mp.get_context('spawn').Pool(processes=n_processes) as pool: - for idx in range(n_configs): kwargs['idx'] = idx - result = pool.apply_async(_gen_active_config, - args=(init_config.copy(), - mlp.copy(), - selection_method.copy(), - n_cores_pp), - kwds=deepcopy(kwargs)) + result = pool.apply_async( + _gen_active_config, + args=( + init_config.copy(), + mlp.copy(), + selection_method.copy(), + n_cores_pp, + ), + kwds=deepcopy(kwargs), + ) results.append(result) pool.close() @@ -294,29 +323,35 @@ def _add_active_configs(mlp: 'mlptrain.potentials._base.MLPotential', if 'method_name' in kwargs and configs.has_a_none_energy: for config in configs: if config.energy.true is None: - config.single_point(kwargs['method_name'], n_cores=Config.n_cores) - - if (kwargs['inherit_metad_bias'] is True - and kwargs['iteration'] >= kwargs['bias_start_iter']): + config.single_point( + kwargs['method_name'], n_cores=Config.n_cores + ) + + if ( + kwargs['inherit_metad_bias'] is True + and kwargs['iteration'] >= kwargs['bias_start_iter'] + ): _generate_inheritable_metad_bias(n_configs=n_configs, kwargs=kwargs) mlp.training_data += configs os.makedirs('datasets', exist_ok=True) - mlp.training_data.save(f'datasets/' - f'dataset_after_iter_{kwargs["iteration"]}.npz') + mlp.training_data.save( + f'datasets/' f'dataset_after_iter_{kwargs["iteration"]}.npz' + ) return None -def _gen_active_config(config: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - selector: 'mlptrain.training.selection.SelectionMethod', - n_cores: int, - max_time: float, - method_name: str, - **kwargs - ) -> Optional['mlptrain.Configuration']: +def _gen_active_config( + config: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + selector: 'mlptrain.training.selection.SelectionMethod', + n_cores: int, + max_time: float, + method_name: str, + **kwargs, +) -> Optional['mlptrain.Configuration']: """ Generate a configuration based on 'active learning', by running MLP-MD until a configuration that satisfies the selection_method is found. @@ -370,12 +405,18 @@ def _gen_active_config(config: 'mlptrain.Configuration', dataset for the next iteration of active learning """ - curr_time = 0. if 'curr_time' not in kwargs else kwargs.pop('curr_time') - extra_time = 0. if 'extra_time' not in kwargs else kwargs.pop('extra_time') + curr_time = 0.0 if 'curr_time' not in kwargs else kwargs.pop('curr_time') + extra_time = ( + 0.0 if 'extra_time' not in kwargs else kwargs.pop('extra_time') + ) n_calls = 0 if 'n_calls' not in kwargs else kwargs.pop('n_calls') - temp = 300. if 'temp' not in kwargs else kwargs.pop('temp') - i_temp = temp if 'init_active_temp' not in kwargs else kwargs.pop('init_active_temp') + temp = 300.0 if 'temp' not in kwargs else kwargs.pop('temp') + i_temp = ( + temp + if 'init_active_temp' not in kwargs + else kwargs.pop('init_active_temp') + ) pbc = False if 'pbc' not in kwargs else kwargs.pop('pbc') box_size = None if 'box_size' not in kwargs else kwargs.pop('box_size') @@ -385,33 +426,36 @@ def _gen_active_config(config: 'mlptrain.Configuration', md_time = 2 + n_calls**3 + float(extra_time) - if (kwargs['inherit_metad_bias'] is True - and kwargs['iteration'] >= kwargs['bias_start_iter']): - + if ( + kwargs['inherit_metad_bias'] is True + and kwargs['iteration'] >= kwargs['bias_start_iter'] + ): kwargs = _modify_kwargs_for_metad_bias_inheritance(kwargs) if pbc: config.box = Box(box_size) - if kwargs['md_program'].lower() == 'openmm': - traj = run_mlp_md_openmm(config, - mlp=mlp, - temp=temp if curr_time > 0 else i_temp, - dt=0.5, - interval=max(1, 2*md_time//selector.n_backtrack), - fs=md_time, - n_cores=1, - **kwargs) + traj = run_mlp_md_openmm( + config, + mlp=mlp, + temp=temp if curr_time > 0 else i_temp, + dt=0.5, + interval=int(max(1, 2 * md_time // selector.n_backtrack)), + fs=md_time, + n_cores=1, + **kwargs, + ) else: - traj = run_mlp_md(config, - mlp=mlp, - temp=temp if curr_time > 0 else i_temp, - dt=0.5, - interval=max(1, 2*md_time//selector.n_backtrack), - fs=md_time, - n_cores=1, - **kwargs) - + traj = run_mlp_md( + config, + mlp=mlp, + temp=temp if curr_time > 0 else i_temp, + dt=0.5, + interval=int(max(1, 2 * md_time // selector.n_backtrack)), + fs=md_time, + n_cores=1, + **kwargs, + ) traj.t0 = curr_time # Increment the initial time (t0) @@ -421,16 +465,46 @@ def _gen_active_config(config: 'mlptrain.Configuration', selector(traj.final_frame, mlp, method_name=method_name, n_cores=n_cores) if selector.select: - if traj.final_frame.energy.true is None: - traj.final_frame.single_point(method_name, n_cores=n_cores) + if selector.check: + logger.info( + 'currently applying distance selector,' + 'to avoid un-physical structures,' + 'do backtracking in the trajectory to' + 'find the first configuration in ' + '{selector.n_backtrack} steps recognised as outlier' + ) + + stride = max(1, len(traj) // selector.n_backtrack) + + back_traj = ConfigurationSet() + for i in reversed(traj[::stride]): + back_traj.append(i) + + for i, frame in enumerate(back_traj): + logger.info( + f'Starting to check {i} th configuration' + 'to determine whether it is the first' + 'configurations selected by the distance selector' + ) + selector(frame, mlp, method_name=method_name, n_cores=n_cores) + if selector.select is False: + logger.info(f'Selecting {i-1} th configuration.') + frame = back_traj[i - 1] + break + else: + frame = traj.final_frame - return traj.final_frame + if frame.energy.true is None: + frame.single_point(method_name, n_cores=n_cores) - if selector.too_large: + return frame - logger.warning('Backtracking in the trajectory to find a suitable ' - f'configuration in {selector.n_backtrack} steps') - stride = max(1, len(traj)//selector.n_backtrack) + if selector.too_large: + logger.warning( + 'Backtracking in the trajectory to find a suitable ' + f'configuration in {selector.n_backtrack} steps' + ) + stride = max(1, len(traj) // selector.n_backtrack) for frame in reversed(traj[::stride]): selector(frame, mlp, method_name=method_name, n_cores=n_cores) @@ -452,26 +526,38 @@ def _gen_active_config(config: 'mlptrain.Configuration', curr_time += md_time # If the prediction is within the threshold then call this function again - return _gen_active_config(config, mlp, selector, n_cores, max_time, method_name, - temp=temp, - curr_time=curr_time, - n_calls=n_calls+1, - **kwargs) - - -def _set_init_training_configs(mlp: 'mlptrain.potentials._base.MLPotential', - init_configs: 'mlptrain.ConfigurationSet', - method_name: str - ) -> None: + return _gen_active_config( + config, + mlp, + selector, + n_cores, + max_time, + method_name, + temp=temp, + curr_time=curr_time, + n_calls=n_calls + 1, + **kwargs, + ) + + +def _set_init_training_configs( + mlp: 'mlptrain.potentials._base.MLPotential', + init_configs: 'mlptrain.ConfigurationSet', + method_name: str, +) -> None: """Set some initial training configurations""" if len(init_configs) == 0: - raise ValueError('Cannot set initial training configurations with a ' - 'set of size 0') + raise ValueError( + 'Cannot set initial training configurations with a ' + 'set of size 0' + ) if not all(cfg.energy.true is not None for cfg in init_configs): - logger.info(f'Initialised with {len(init_configs)} configurations ' - f'all with defined energy') + logger.info( + f'Initialised with {len(init_configs)} configurations ' + f'all with defined energy' + ) init_configs.single_point(method=method_name) mlp.training_data += init_configs @@ -479,10 +565,9 @@ def _set_init_training_configs(mlp: 'mlptrain.potentials._base.MLPotential', return None -def _gen_and_set_init_training_configs(mlp: 'mlptrain.potentials._base.MLPotential', - method_name: str, - num: int - ) -> None: +def _gen_and_set_init_training_configs( + mlp: 'mlptrain.potentials._base.MLPotential', method_name: str, num: int +) -> None: """ Generate a set of initial configurations for a system, if init_configs is undefined. Otherwise ensure all the true energies and forces are defined @@ -502,7 +587,7 @@ def _gen_and_set_init_training_configs(mlp: 'mlptrain.potentials._base.MLPotenti while p_acc < 0.1: n_generated_configs = 0 - dist -= 0.2 # Reduce the minimum distance requirement + dist -= 0.2 # Reduce the minimum distance requirement for _ in range(10): try: @@ -513,15 +598,18 @@ def _gen_and_set_init_training_configs(mlp: 'mlptrain.potentials._base.MLPotenti continue p_acc = n_generated_configs / 10 - logger.info(f'Generated configurations with p={p_acc:.2f} with a ' - f'minimum distance of {dist:.2f}') + logger.info( + f'Generated configurations with p={p_acc:.2f} with a ' + f'minimum distance of {dist:.2f}' + ) # Generate the initial configurations init_configs = ConfigurationSet() while len(init_configs) < num: try: - config = mlp.system.random_configuration(min_dist=dist, - with_intra=True) + config = mlp.system.random_configuration( + min_dist=dist, with_intra=True + ) config.box = Box([100, 100, 100]) init_configs.append(config) @@ -534,10 +622,11 @@ def _gen_and_set_init_training_configs(mlp: 'mlptrain.potentials._base.MLPotenti return init_configs -def _initialise_restart(mlp: 'mlptrain.potentials._base.MLPotential', - restart_iter: int, - inherit_metad_bias: bool - ) -> None: +def _initialise_restart( + mlp: 'mlptrain.potentials._base.MLPotential', + restart_iter: int, + inherit_metad_bias: bool, +) -> None: """Initialise initial configurations and inherited bias""" init_configs = ConfigurationSet() @@ -547,18 +636,19 @@ def _initialise_restart(mlp: 'mlptrain.potentials._base.MLPotential', if inherit_metad_bias: hills_path = f'accumulated_bias/bias_after_iter_{restart_iter}.dat' if os.path.exists(hills_path): - shutil.copyfile(src=hills_path, - dst=f'HILLS_{restart_iter}.dat') + shutil.copyfile(src=hills_path, dst=f'HILLS_{restart_iter}.dat') else: - raise FileNotFoundError('Inherited bias generated after iteration ' - f'{restart_iter} not found') + raise FileNotFoundError( + 'Inherited bias generated after iteration ' + f'{restart_iter} not found' + ) return None -def _attach_plumed_coords_to_init_configs(init_configs: 'mlptrain.ConfigurationSet', - bias: 'mlptrain.PlumedBias' - ) -> None: +def _attach_plumed_coords_to_init_configs( + init_configs: 'mlptrain.ConfigurationSet', bias: 'mlptrain.PlumedBias' +) -> None: """ Attach PLUMED collective variable values to the configurations in the initial training set @@ -573,10 +663,12 @@ def _attach_plumed_coords_to_init_configs(init_configs: 'mlptrain.ConfigurationS driver_setup = ['UNITS LENGTH=A'] for cv in bias.cvs: driver_setup.extend(cv.setup) - driver_setup.append('PRINT ' - f'ARG={cv.name} ' - f'FILE=colvar_{cv.name}_driver.dat ' - 'STRIDE=1') + driver_setup.append( + 'PRINT ' + f'ARG={cv.name} ' + f'FILE=colvar_{cv.name}_driver.dat ' + 'STRIDE=1' + ) # Remove duplicate lines driver_setup = list(dict.fromkeys(driver_setup)) @@ -585,10 +677,18 @@ def _attach_plumed_coords_to_init_configs(init_configs: 'mlptrain.ConfigurationS for line in driver_setup: f.write(f'{line}\n') - driver_process = Popen(['plumed', 'driver', - '--ixyz', 'init_configs_driver.xyz', - '--plumed', 'driver_setup.dat', - '--length-units', 'A']) + driver_process = Popen( + [ + 'plumed', + 'driver', + '--ixyz', + 'init_configs_driver.xyz', + '--plumed', + 'driver_setup.dat', + '--length-units', + 'A', + ] + ) driver_process.wait() os.remove('init_configs_driver.xyz') @@ -608,14 +708,15 @@ def _attach_plumed_coords_to_init_configs(init_configs: 'mlptrain.ConfigurationS return None -def _update_init_config(init_config: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials._base.MLPotential', - fix_init_config: bool, - bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], - inherit_metad_bias: bool, - bias_start_iter: int, - iteration: int - ) -> 'mlptrain.Configuration': +def _update_init_config( + init_config: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials._base.MLPotential', + fix_init_config: bool, + bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], + inherit_metad_bias: bool, + bias_start_iter: int, + iteration: int, +) -> 'mlptrain.Configuration': """Update initial configuration for an active learning iteration""" if fix_init_config: @@ -623,12 +724,13 @@ def _update_init_config(init_config: 'mlptrain.Configuration', else: if bias is not None: - if inherit_metad_bias and iteration >= bias_start_iter: - _attach_inherited_bias_energies(configurations=mlp.training_data, - iteration=iteration, - bias_start_iter=bias_start_iter, - bias=bias) + _attach_inherited_bias_energies( + configurations=mlp.training_data, + iteration=iteration, + bias_start_iter=bias_start_iter, + bias=bias, + ) return mlp.training_data.lowest_inherited_biased_energy @@ -639,10 +741,11 @@ def _update_init_config(init_config: 'mlptrain.Configuration', return mlp.training_data.lowest_energy -def _check_bias(bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], - temp: float, - inherit_metad_bias: bool - ) -> None: +def _check_bias( + bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], + temp: float, + inherit_metad_bias: bool, +) -> None: """ Check if the bias is suitable for running active learning with the requested parameters @@ -656,21 +759,21 @@ def _check_bias(bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], return None -def _check_bias_parameters(bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], - temp: float - ) -> None: +def _check_bias_parameters( + bias: Optional[Union['mlptrain.Bias', 'mlptrain.PlumedBias']], temp: float +) -> None: """ Check if all the required parameters of the bias are set (currently only checks PlumedBias initialised not from a file) """ if isinstance(bias, PlumedBias): - if bias.from_file is False and bias.metadynamics is True: - if bias.height == 0: - logger.info('Setting the height for metadynamics active ' - 'learning to 5*k_B*T') + logger.info( + 'Setting the height for metadynamics active ' + 'learning to 5*k_B*T' + ) bias.height = 5 * ase_units.kB * temp return None @@ -683,18 +786,22 @@ def _check_bias_for_metad_bias_inheritance(bias: Optional) -> None: """ if not isinstance(bias, PlumedBias): - raise TypeError('Metadynamics bias can only be inherited when ' - 'using PlumedBias') + raise TypeError( + 'Metadynamics bias can only be inherited when ' 'using PlumedBias' + ) if bias.from_file: - raise ValueError('Metadynamics bias cannot be inherited using ' - 'PlumedBias from a file') + raise ValueError( + 'Metadynamics bias cannot be inherited using ' + 'PlumedBias from a file' + ) return None -def _remove_bias_potential(bias: Optional - ) -> Union['mlptrain.sampling.PlumedBias', None]: +def _remove_bias_potential( + bias: Optional, +) -> Union['mlptrain.sampling.PlumedBias', None]: """ Remove bias potential from a bias, except LOWER_WALLS and UPPER_WALLS when the bias is PlumedBias @@ -731,9 +838,7 @@ def _modify_kwargs_for_metad_bias_inheritance(kwargs: dict) -> dict: return kwargs -def _generate_inheritable_metad_bias(n_configs: int, - kwargs: dict - ) -> None: +def _generate_inheritable_metad_bias(n_configs: int, kwargs: dict) -> None: """ Generate files containing metadynamics bias to be inherited in the next active learning iteration @@ -746,23 +851,28 @@ def _generate_inheritable_metad_bias(n_configs: int, using_hills = all(os.path.exists(fname) for fname in hills_files) if using_hills: - _generate_inheritable_metad_bias_hills(n_configs=n_configs, - hills_files=hills_files, - iteration=iteration, - bias_start_iter=bias_start_iter) + _generate_inheritable_metad_bias_hills( + n_configs=n_configs, + hills_files=hills_files, + iteration=iteration, + bias_start_iter=bias_start_iter, + ) else: - logger.error('All files required for generating inheritable ' - 'metadynamics bias could not be found') + logger.error( + 'All files required for generating inheritable ' + 'metadynamics bias could not be found' + ) return None -def _generate_inheritable_metad_bias_hills(n_configs: int, - hills_files: List[str], - iteration: int, - bias_start_iter: int - ) -> None: +def _generate_inheritable_metad_bias_hills( + n_configs: int, + hills_files: List[str], + iteration: int, + bias_start_iter: int, +) -> None: """ Generate HILLS_{iteration}.dat file containing metadynamics bias to be inherited in the next active learning iteration {iteration+1} @@ -773,12 +883,10 @@ def _generate_inheritable_metad_bias_hills(n_configs: int, if iteration == bias_start_iter: open(f'HILLS_{iteration-1}.dat', 'w').close() - shutil.move(src=f'HILLS_{iteration-1}.dat', - dst=f'HILLS_{iteration}.dat') + shutil.move(src=f'HILLS_{iteration-1}.dat', dst=f'HILLS_{iteration}.dat') # Remove inherited bias from files containing new bias for fname in hills_files: - with open(fname, 'r') as f: f_lines = f.readlines() @@ -797,10 +905,11 @@ def _generate_inheritable_metad_bias_hills(n_configs: int, prev_line = line with open(fname, 'w') as f: - # No new gaussians deposited - if (second_header_first_index == 0 - and os.path.getsize(f'HILLS_{iteration}.dat') != 0): + if ( + second_header_first_index == 0 + and os.path.getsize(f'HILLS_{iteration}.dat') != 0 + ): pass else: @@ -808,7 +917,6 @@ def _generate_inheritable_metad_bias_hills(n_configs: int, f.write(line) for idx, fname in enumerate(hills_files): - with open(fname, 'r') as f: f_lines = f.readlines() @@ -823,7 +931,6 @@ def _generate_inheritable_metad_bias_hills(n_configs: int, height_column_index = f_lines[0].split().index('height') - 2 with open(f'HILLS_{iteration}.dat', 'a') as final_hills_file: - # Attach the header to the final file if it's empty if os.path.getsize(f'HILLS_{iteration}.dat') == 0: for i in range(n_lines_in_header): @@ -846,24 +953,28 @@ def _generate_inheritable_metad_bias_hills(n_configs: int, os.remove(fname) os.makedirs('accumulated_bias', exist_ok=True) - shutil.copyfile(src=f'HILLS_{iteration}.dat', - dst=f'accumulated_bias/bias_after_iter_{iteration}.dat') + shutil.copyfile( + src=f'HILLS_{iteration}.dat', + dst=f'accumulated_bias/bias_after_iter_{iteration}.dat', + ) return None -def _attach_inherited_bias_energies(configurations: 'mlptrain.ConfigurationSet', - iteration: int, - bias_start_iter: int, - bias: 'mlptrain.PlumedBias' - ) -> None: +def _attach_inherited_bias_energies( + configurations: 'mlptrain.ConfigurationSet', + iteration: int, + bias_start_iter: int, + bias: 'mlptrain.PlumedBias', +) -> None: """ Attach inherited metadynamics bias energies from the previous active learning iteration to the configurations """ - logger.info('Attaching inherited bias energies to the whole training ' - 'data set') + logger.info( + 'Attaching inherited bias energies to the whole training ' 'data set' + ) if iteration == bias_start_iter: for config in configurations: @@ -877,17 +988,19 @@ def _attach_inherited_bias_energies(configurations: 'mlptrain.ConfigurationSet', return None else: - _generate_grid_from_hills(configurations=configurations, - iteration=iteration, - bias=bias) + _generate_grid_from_hills( + configurations=configurations, iteration=iteration, bias=bias + ) cvs_cols = range(0, bias.n_metad_cvs) - cvs_grid = np.loadtxt(f'bias_grid_{iteration-1}.dat', - usecols=cvs_cols, ndmin=2) + cvs_grid = np.loadtxt( + f'bias_grid_{iteration-1}.dat', usecols=cvs_cols, ndmin=2 + ) cvs_grid = np.flip(cvs_grid, axis=1) - bias_grid = np.loadtxt(f'bias_grid_{iteration-1}.dat', - usecols=bias.n_metad_cvs) + bias_grid = np.loadtxt( + f'bias_grid_{iteration-1}.dat', usecols=bias.n_metad_cvs + ) bias_grid = -bias_grid header = [] @@ -909,26 +1022,28 @@ def _attach_inherited_bias_energies(configurations: 'mlptrain.ConfigurationSet', metad_cv_idxs.reverse() for config in configurations: - start_idxs = [0] block_width = np.prod(n_bins) for i, cv in enumerate(bias.metad_cvs): - end_idx = start_idxs[i] + block_width - idx = np.searchsorted(a=cvs_grid[start_idxs[i]:end_idx, i], - v=config.plumed_coordinates[metad_cv_idxs[i]], - side='right') + idx = np.searchsorted( + a=cvs_grid[start_idxs[i] : end_idx, i], + v=config.plumed_coordinates[metad_cv_idxs[i]], + side='right', + ) start_idx = start_idxs[i] + idx start_idxs.append(start_idx) block_width = int(block_width / n_bins[i]) if start_idx == end_idx: - raise IndexError(f'CV {cv.name} value lies at the edge or ' - f'outside of the grid for at least one ' - f'of the configurations in the training ' - f'set.') + raise IndexError( + f'CV {cv.name} value lies at the edge or ' + f'outside of the grid for at least one ' + f'of the configurations in the training ' + f'set.' + ) config.energy.inherited_bias = bias_grid[start_idxs[-1]] @@ -937,10 +1052,11 @@ def _attach_inherited_bias_energies(configurations: 'mlptrain.ConfigurationSet', return None -def _generate_grid_from_hills(configurations: 'mlptrain.ConfigurationSet', - iteration: int, - bias: 'mlptrain.PlumedBias' - ) -> None: +def _generate_grid_from_hills( + configurations: 'mlptrain.ConfigurationSet', + iteration: int, + bias: 'mlptrain.PlumedBias', +) -> None: """ Generate bias_grid_{iteration-1}.dat from HILLS_{iteration-1}.dat """ @@ -958,27 +1074,39 @@ def _generate_grid_from_hills(configurations: 'mlptrain.ConfigurationSet', max_params.append(max_value + difference * extension_coefficient) bin_widths = [(width / 5) for width in bias.width] - n_bins = [int((max_params[i] - min_params[i]) / bin_widths[i]) - for i in range(bias.n_metad_cvs)] + n_bins = [ + int((max_params[i] - min_params[i]) / bin_widths[i]) + for i in range(bias.n_metad_cvs) + ] n_bins = [bins if bins != 0 else 1 for bins in n_bins] bin_sequence = ','.join(str(bins) for bins in n_bins) min_sequence = ','.join(str(param) for param in min_params) max_sequence = ','.join(str(param) for param in max_params) - sum_hills_process = Popen(['plumed', 'sum_hills', '--negbias', - '--hills', f'HILLS_{iteration-1}.dat', - '--outfile', f'bias_grid_{iteration-1}.dat', - '--bin', bin_sequence, - '--min', min_sequence, - '--max', max_sequence]) + sum_hills_process = Popen( + [ + 'plumed', + 'sum_hills', + '--negbias', + '--hills', + f'HILLS_{iteration-1}.dat', + '--outfile', + f'bias_grid_{iteration-1}.dat', + '--bin', + bin_sequence, + '--min', + min_sequence, + '--max', + max_sequence, + ] + ) sum_hills_process.wait() return None -def _remove_last_inherited_metad_bias_file(max_active_iters: int - ) -> None: +def _remove_last_inherited_metad_bias_file(max_active_iters: int) -> None: """Remove the last inherited metadynamics bias file""" for iteration in range(max_active_iters): diff --git a/mlptrain/training/selection.py b/mlptrain/training/selection.py index 82a9da19..bb87cb1a 100644 --- a/mlptrain/training/selection.py +++ b/mlptrain/training/selection.py @@ -5,12 +5,15 @@ from typing import Optional from mlptrain.descriptors import soap_kernel_vector from mlptrain.log import logger +from mlptrain.descriptors import soap_matrix +from sklearn.neighbors import LocalOutlierFactor +from sklearn.decomposition import PCA class SelectionMethod(ABC): """Active learning selection method - NOTE: Should execute in serial + NOTE: Should execute in serial """ def __init__(self): @@ -20,11 +23,12 @@ def __init__(self): self._configuration: Optional['mlptrain.Configuration'] = None @abstractmethod - def __call__(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials.MLPotential', - **kwargs - ) -> None: + def __call__( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials.MLPotential', + **kwargs, + ) -> None: """Evaluate the selector""" @property @@ -49,14 +53,20 @@ def n_backtrack(self) -> int: (int): """ + @property + def check(self) -> bool: + """ + Should we keep checking configurations in the MLP-MD trajectory + until the first configuration that will be selected by the selector is found? + """ + return False + def copy(self) -> 'SelectionMethod': return deepcopy(self) class AbsDiffE(SelectionMethod): - - def __init__(self, - e_thresh: float = 0.1): + def __init__(self, e_thresh: float = 0.1): """ Selection method based on the absolute difference between the true and predicted total energies. @@ -86,14 +96,17 @@ def __call__(self, configuration, mlp, **kwargs) -> None: self._configuration = configuration if method_name is None: - raise ValueError('Evaluating the absolute difference requires a ' - 'method name but None was present') + raise ValueError( + 'Evaluating the absolute difference requires a ' + 'method name but None was present' + ) if configuration.energy.predicted is None: self._configuration.single_point(mlp) - self._configuration.single_point(method_name, - n_cores=kwargs.get('n_cores', 1)) + self._configuration.single_point( + method_name, n_cores=kwargs.get('n_cores', 1) + ) return None @property @@ -117,10 +130,8 @@ def n_backtrack(self) -> int: return 10 -class MaxAtomicEnvDistance(SelectionMethod): - - def __init__(self, - threshold: float = 0.999): +class AtomicEnvSimilarity(SelectionMethod): + def __init__(self, threshold: float = 0.999): """ Selection criteria based on the maximum distance between any of the training set and a new configuration. Evaluated based on the similarity @@ -139,10 +150,12 @@ def __init__(self, self.threshold = float(threshold) self._k_vec = np.array([]) - def __call__(self, - configuration: 'mlptrain.Configuration', - mlp: 'mlptrain.potentials.MLPotential', - **kwargs) -> None: + def __call__( + self, + configuration: 'mlptrain.Configuration', + mlp: 'mlptrain.potentials.MLPotential', + **kwargs, + ) -> None: """ Evaluate the selection criteria @@ -155,9 +168,9 @@ def __call__(self, if len(mlp.training_data) == 0: return None - self._k_vec = soap_kernel_vector(configuration, - configurations=mlp.training_data, - zeta=8) + self._k_vec = soap_kernel_vector( + configuration, configurations=mlp.training_data, zeta=8 + ) return None @property @@ -187,3 +200,114 @@ def n_backtrack(self) -> int: def _n_training_envs(self) -> int: """Number of training environments available""" return len(self._k_vec) + + +def outlier_identifier( + configuration: 'mlptrain.Configuration', + configurations: 'mlptrain.ConfigurationSet', + dim_reduction: bool = False, + distance_metric: str = 'euclidean', + n_neighbors: int = 15, +) -> int: + """ + This function identifies whether a new data (configuration) + is the outlier in comparison with the existing data (configurations) by Local Outlier + Factor (LOF). For more details about the LOF method, please see the lit. + Breunig, M. M., Kriegel, H.-P., Ng, R. T. & Sander, J. LOF: Identifying + density-based local outliers. SIGMOD Rec. 29, 93–104 (2000). + + ----------------------------------------------------------------------- + Arguments: + + dim_reduction: if Ture, dimensionality reduction will + be performed before LOF calculation (so far only PCA available). + distance_metric: distance metric used in LOF, + which could be one of 'euclidean', + 'cosine' and 'manhattan’. + n_neighbors: number of neighbors considered when computing the LOF. + + ----------------------------------------------------------------------- + Returns: + + -1 for anomalies/outliers and +1 for inliers. + """ + + m1 = soap_matrix(configurations) + m1 /= np.linalg.norm(m1, axis=1).reshape(len(configurations), 1) + + v1 = soap_matrix(configuration) + v1 /= np.linalg.norm(v1, axis=1).reshape(1, -1) + + if dim_reduction: + pca = PCA(n_components=3) + m1 = pca.fit_transform(m1) + v1 = pca.transform(v1) + + clf = LocalOutlierFactor( + n_neighbors=n_neighbors, + metric=distance_metric, + novelty=True, + contamination=0.2, + ) + 'contamination: define the porpotional of outliner in the data, the higher, the less abnormal' + + clf.fit(m1) + + new = clf.predict(v1) + + return new + + +class AtomicEnvDistance(SelectionMethod): + def __init__( + self, + pca: bool = False, + distance_metric: str = 'euclidean', + n_neighbors: int = 15, + ): + """ + Selection criteria based on analysis whether the configuration is + outlier by outlier_identifier function + ----------------------------------------------------------------------- + Arguments: + pca: whether to do dimensionality reduction by PCA. + As the selected distance_metric may potentially suffer from + the curse of dimensionality, the dimensionality reduction step + (using PCA) could be applied before calculating the LOF. + This would ensure good performance in high-dimensional data space. + For the other arguments, please see details in the outlier_identifier function + """ + super().__init__() + self.pca = pca + self.metric = distance_metric + self.n_neighbors = n_neighbors + + def __call__(self, configuration, mlp, **kwargs) -> None: + self.mlp = mlp + self._configuration = configuration + + @property + def select(self) -> bool: + metric = outlier_identifier( + self._configuration, + self.mlp.training_data, + self.pca, + self.metric, + self.n_neighbors, + ) + return metric == -1 + + @property + def too_large(self) -> bool: + return False + + @property + def n_backtrack(self) -> int: + return 10 + + @property + def check(self) -> bool: + if self.mlp.n_train > 30: + return True + else: + return False diff --git a/mlptrain/training/tests/__init__.py b/mlptrain/training/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mlptrain/training/tests/test_selection.py b/mlptrain/training/tests/test_selection.py deleted file mode 100644 index f9d20bef..00000000 --- a/mlptrain/training/tests/test_selection.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import mlptrain as mlt -from autode.atoms import Atom -from mlptrain.training.selection import MaxAtomicEnvDistance -here = os.path.abspath(os.path.dirname(__file__)) - - -def _similar_methane(): - - atoms = [ - Atom('C', - 0.83511, 2.41296, 0.00000), - Atom('H', 0.24737, 2.41296, 0.00000), - Atom('H', - 1.19178, 2.07309, 0.94983), - Atom('H', - 1.19178, 1.76033, - 0.76926), - Atom('H', - 1.28016, 3.36760, - 0.18057) - ] - - return mlt.Configuration(atoms=atoms) - - -def _distorted_methane(): - - atoms = [ - Atom('C', - 0.83511, 2.41296, 0.00000), - Atom('H', 0.34723, 2.42545, 0.00000), - Atom('H', - 1.19178, 2.07309, 0.94983), - Atom('H', - 1.50592, -0.01979, -0.76926), - Atom('H', - 1.28016, 3.36760, -0.18057) - ] - - return mlt.Configuration(atoms=atoms) - - -def test_selection_on_structures(): - - configs = mlt.ConfigurationSet() - - file_path = os.path.join(here, 'data', 'methane.xyz') - configs.load_xyz(filename=file_path, - charge=0, mult=1, box=None) - - assert len(configs) == 3 - - selector = MaxAtomicEnvDistance(threshold=0.9) - mlp = mlt.potentials.GAP('blank') - mlp.training_data = configs - - selector(configuration=_similar_methane(), mlp=mlp) - assert not selector.select - - selector(configuration=_distorted_methane(), mlp=mlp) - assert selector.select diff --git a/mlptrain/utils.py b/mlptrain/utils.py index 193d387b..e1f0bd3c 100644 --- a/mlptrain/utils.py +++ b/mlptrain/utils.py @@ -8,8 +8,10 @@ from ase import units as ase_units -def work_in_tmp_dir(kept_substrings: Optional[Sequence[str]] = None, - copied_substrings: Optional[Sequence[str]] = None): +def work_in_tmp_dir( + kept_substrings: Optional[Sequence[str]] = None, + copied_substrings: Optional[Sequence[str]] = None, +): """ Execute a function in a temporary directory @@ -25,22 +27,22 @@ def work_in_tmp_dir(kept_substrings: Optional[Sequence[str]] = None, """ def func_decorator(func): - @wraps(func) def wrapped_function(*args, **kwargs): - here_path = os.getcwd() tmpdir_path = mkdtemp() if copied_substrings is not None: - for filename in os.listdir(here_path): - if _name_contains_substring(name=filename, - substrings=copied_substrings, - regex=False): - - shutil.copy(src=os.path.join(here_path, filename), - dst=os.path.join(tmpdir_path, filename)) + if _name_contains_substring( + name=filename, + substrings=copied_substrings, + regex=False, + ): + shutil.copy( + src=os.path.join(here_path, filename), + dst=os.path.join(tmpdir_path, filename), + ) # Move directories and execute os.chdir(tmpdir_path) @@ -50,14 +52,16 @@ def wrapped_function(*args, **kwargs): finally: if kept_substrings is not None: - for filename in os.listdir(tmpdir_path): - if _name_contains_substring(name=filename, - substrings=kept_substrings, - regex=False): - - shutil.copy(src=os.path.join(tmpdir_path, filename), - dst=os.path.join(here_path, filename)) + if _name_contains_substring( + name=filename, + substrings=kept_substrings, + regex=False, + ): + shutil.copy( + src=os.path.join(tmpdir_path, filename), + dst=os.path.join(here_path, filename), + ) os.chdir(here_path) @@ -67,13 +71,13 @@ def wrapped_function(*args, **kwargs): return out return wrapped_function + return func_decorator -def _name_contains_substring(name: str, - substrings: Sequence[str], - regex: bool - ) -> bool: +def _name_contains_substring( + name: str, substrings: Sequence[str], regex: bool +) -> bool: """Returns True if one of the regex or regular substrings are found in the name""" @@ -99,7 +103,6 @@ def work_in_dir(dirname: str): """ def func_decorator(func): - @wraps(func) def wrapped_function(*args, **kwargs): here_path = os.getcwd() @@ -112,12 +115,11 @@ def wrapped_function(*args, **kwargs): return out return wrapped_function + return func_decorator -def unique_name(name: str, - path: Optional[str] = None - ) -> str: +def unique_name(name: str, path: Optional[str] = None) -> str: """ Returns a unique name for a file or directory in the specified directory by adding bck0, bck1, ... to the front of the name until a unique name @@ -152,12 +154,13 @@ def _name_exists(): return name -def move_files(moved_substrings: List[str], - dst_folder: str, - src_folder: Optional[str] = None, - unique: bool = True, - regex: bool = False - ) -> None: +def move_files( + moved_substrings: List[str], + dst_folder: str, + src_folder: Optional[str] = None, + unique: bool = True, + regex: bool = False, +) -> None: """ Move files with given regex or regular substrings from a directory src_folder to a directory dst_folder. If dst_folder already exists @@ -184,7 +187,6 @@ def move_files(moved_substrings: List[str], src_folder = os.getcwd() if os.path.exists(dst_folder) and unique: - name = dst_folder.split('/')[-1] path = '/'.join(dst_folder.split('/')[:-1]) unique_dst_folder = os.path.join(path, unique_name(name, path)) @@ -196,10 +198,9 @@ def move_files(moved_substrings: List[str], os.makedirs(dst_folder) for filename in os.listdir(src_folder): - if _name_contains_substring(name=filename, - substrings=moved_substrings, - regex=regex): - + if _name_contains_substring( + name=filename, substrings=moved_substrings, regex=regex + ): source = os.path.join(src_folder, filename) destination = os.path.join(dst_folder, filename) shutil.move(src=source, dst=destination) @@ -230,9 +231,9 @@ def _modified_exponent(exponent): return re.sub(exponent_pattern, _modified_exponent, string) -def convert_ase_time(time_array: Union[np.ndarray, float], - units: str - ) -> np.ndarray: +def convert_ase_time( + time_array: Union[np.ndarray, float], units: str +) -> np.ndarray: """ Converts ASE time units to different time units. @@ -266,9 +267,9 @@ def convert_ase_time(time_array: Union[np.ndarray, float], return time_array -def convert_ase_energy(energy_array: Union[np.ndarray, float], - units: str - ) -> np.ndarray: +def convert_ase_energy( + energy_array: Union[np.ndarray, float], units: str +) -> np.ndarray: """ Converts ASE energy units to different energy units. diff --git a/pyproject.toml b/pyproject.toml index 02b85a3a..00591d84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,3 +21,6 @@ line-length = 79 [tool.ruff.format] quote-style = "single" + +[tool.pytest.ini_options] +addopts = "--cov-report term --cov-report xml" diff --git a/mlptrain/configurations/tests/__init__.py b/tests/__init__.py similarity index 100% rename from mlptrain/configurations/tests/__init__.py rename to tests/__init__.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..611640cb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,40 @@ +import mlptrain as mlt +import pytest +from autode.atoms import Atom + + +@pytest.fixture +def h2(): + """Dihydrogen molecule""" + atoms = [ + Atom('H', -0.80952, 2.49855, 0.0), + Atom('H', -0.34877, 1.961, 0.0), + ] + return mlt.Molecule(atoms=atoms, charge=0, mult=1) + + +@pytest.fixture +def h2o(): + """Water molecule""" + atoms = [ + Atom('H', 2.32670, 0.51322, 0.0), + Atom('H', 1.03337, 0.70894, -0.89333), + Atom('O', 1.35670, 0.51322, 0.0), + ] + return mlt.Molecule(atoms=atoms, charge=0, mult=1) + + +@pytest.fixture +def h2_configuration(h2): + system = mlt.System(h2, box=[50, 50, 50]) + config = system.random_configuration() + + return config + + +@pytest.fixture +def h2o_configuration(h2o): + system = mlt.System(h2o, box=[50, 50, 50]) + config = system.random_configuration() + + return config diff --git a/mlptrain/sampling/tests/__init__.py b/tests/data/__init__.py similarity index 100% rename from mlptrain/sampling/tests/__init__.py rename to tests/data/__init__.py diff --git a/tests/data/data.zip b/tests/data/data.zip new file mode 100644 index 00000000..bdb8509e Binary files /dev/null and b/tests/data/data.zip differ diff --git a/mlptrain/training/tests/data/methane.xyz b/tests/data/methane.xyz similarity index 100% rename from mlptrain/training/tests/data/methane.xyz rename to tests/data/methane.xyz diff --git a/mlptrain/sampling/tests/utils.py b/tests/data/utils.py similarity index 99% rename from mlptrain/sampling/tests/utils.py rename to tests/data/utils.py index cf45886e..02de4a0f 100644 --- a/mlptrain/sampling/tests/utils.py +++ b/tests/data/utils.py @@ -15,10 +15,8 @@ def work_in_zipped_dir(zip_path, chdir=True): assert zip_path.endswith('.zip') def func_decorator(func): - @wraps(func) def wrapped_function(*args, **kwargs): - dir_path = zip_path[:-4] # Remove the .zip extension extract_path = os.path.split(dir_path)[0] @@ -42,4 +40,5 @@ def wrapped_function(*args, **kwargs): return result return wrapped_function + return func_decorator diff --git a/mlptrain/sampling/tests/test_bias.py b/tests/test_bias.py similarity index 57% rename from mlptrain/sampling/tests/test_bias.py rename to tests/test_bias.py index cbc91808..40945e06 100644 --- a/mlptrain/sampling/tests/test_bias.py +++ b/tests/test_bias.py @@ -4,7 +4,7 @@ import mlptrain as mlt from mlptrain.utils import work_in_tmp_dir from .test_potential import TestPotential -from .molecules import _h2 + mlt.Config.n_cores = 1 here = os.path.abspath(os.path.dirname(__file__)) @@ -12,30 +12,27 @@ def _get_avg_dists(atoms, atom_pair_list): """Return the average distance between atoms in all m pairs""" - euclidean_dists = [atoms.get_distance(i, j, mic=True) - for (i, j) in atom_pair_list] + euclidean_dists = [ + atoms.get_distance(i, j, mic=True) for (i, j) in atom_pair_list + ] return np.mean(euclidean_dists) @work_in_tmp_dir() -def test_bias(): - - system = mlt.System(_h2(), box=[50, 50, 50]) +def test_bias(h2): + system = mlt.System(h2, box=[50, 50, 50]) pot = TestPotential('1D') config = system.random_configuration() - bias = mlt.Bias(mlt.AverageDistance([0, 1]), - reference=0.7, - kappa=100) - + bias = mlt.Bias(mlt.AverageDistance([0, 1]), reference=0.7, kappa=100) + assert bias.ref is not None assert bias.kappa is not None assert bias.f.atom_pair_list == [(0, 1)] - new_pos = [[0, 0, 0], - [0, 0, 1]] + new_pos = [[0, 0, 0], [0, 0, 1]] ase_atoms = config.ase_atoms ase_atoms.set_positions(new_pos, apply_constraint=False) @@ -44,19 +41,22 @@ def test_bias(): bias_force = bias.grad(ase_atoms) - assert bias_force[0][2] == - bias_force[1][[2]] + assert bias_force[0][2] == -bias_force[1][[2]] assert np.isclose(bias_force[0][2], -30) # kappa * (1-0.7) - trajectory = mlt.md.run_mlp_md(configuration=config, - mlp=pot, - fs=1000, - temp=300, - dt=0.5, - interval=10, - bias=bias) - - data = [_get_avg_dists(config.ase_atoms, [[0, 1]]) - for config in trajectory] + trajectory = mlt.md.run_mlp_md( + configuration=config, + mlp=pot, + fs=1000, + temp=300, + dt=0.5, + interval=10, + bias=bias, + ) + + data = [ + _get_avg_dists(config.ase_atoms, [[0, 1]]) for config in trajectory + ] hist, bin_edges = np.histogram(data, density=False, bins=500) mids = 0.5 * (bin_edges[1:] + bin_edges[:-1]) diff --git a/mlptrain/configurations/tests/test_configuration.py b/tests/test_configuration.py similarity index 99% rename from mlptrain/configurations/tests/test_configuration.py rename to tests/test_configuration.py index 27ebc95c..f1383f29 100644 --- a/mlptrain/configurations/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -3,7 +3,6 @@ def test_equality(): - config1 = Configuration() assert config1 == config1 assert config1 == Configuration() diff --git a/mlptrain/configurations/tests/test_configuration_set.py b/tests/test_configuration_set.py similarity index 78% rename from mlptrain/configurations/tests/test_configuration_set.py rename to tests/test_configuration_set.py index 6ce7a6b1..a8ac68d1 100644 --- a/mlptrain/configurations/tests/test_configuration_set.py +++ b/tests/test_configuration_set.py @@ -8,7 +8,6 @@ @work_in_tmp_dir() def test_configurations_save(): - configs = ConfigurationSet() # Configuration sets should be constructable from nothing @@ -26,7 +25,6 @@ def test_configurations_save(): @work_in_tmp_dir() def test_configurations_load_default(): - configs = ConfigurationSet(Configuration(atoms=[Atom('H')])) assert len(configs) == 1 @@ -44,11 +42,11 @@ def test_configurations_load_default(): @work_in_tmp_dir() def test_configurations_load_alt_attrs(): - - configs = ConfigurationSet(Configuration(atoms=[Atom('H')], - charge=-1, - mult=3, - box=Box([1., 1., 1.]))) + configs = ConfigurationSet( + Configuration( + atoms=[Atom('H')], charge=-1, mult=3, box=Box([1.0, 1.0, 1.0]) + ) + ) configs.save('tmp.npz') new_configs = ConfigurationSet('tmp.npz') config = new_configs[0] @@ -61,7 +59,6 @@ def test_configurations_load_alt_attrs(): @work_in_tmp_dir() def test_configurations_load_with_energies_forces(): - config = Configuration(atoms=[Atom('H')]) config.energy.true = -1.0 config.energy.predicted = -0.9 @@ -76,24 +73,27 @@ def test_configurations_load_with_energies_forces(): for attr in ('energy', 'forces'): for kind in ('predicted', 'true'): - - assert np.allclose(getattr(getattr(loaded_config, attr), kind), - getattr(getattr(config, attr), kind)) + assert np.allclose( + getattr(getattr(loaded_config, attr), kind), + getattr(getattr(config, attr), kind), + ) @work_in_tmp_dir() def test_configurations_load_xyz(): - configs = ConfigurationSet() with open('tmp.xyz', 'w') as xyz_file: - print('1', - 'title line', - 'H 0.0 0.0 0.0', - '1', - 'title line', - 'H 1.0 0.0 0.0', - sep='\n', file=xyz_file) + print( + '1', + 'title line', + 'H 0.0 0.0 0.0', + '1', + 'title line', + 'H 1.0 0.0 0.0', + sep='\n', + file=xyz_file, + ) configs.load_xyz('tmp.xyz', charge=0, mult=2) diff --git a/tests/test_md.py b/tests/test_md.py new file mode 100644 index 00000000..f165c6d7 --- /dev/null +++ b/tests/test_md.py @@ -0,0 +1,110 @@ +import os +import numpy as np +import mlptrain as mlt +from ase.io.trajectory import Trajectory as ASETrajectory +from ase.constraints import Hookean +from .test_potential import TestPotential +from .data.utils import work_in_zipped_dir + +here = os.path.abspath(os.path.dirname(__file__)) + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_md_full_plumed_input(h2o_configuration): + bias = mlt.PlumedBias(filename='plumed_bias_nopath.dat') + + mlt.md.run_mlp_md( + configuration=h2o_configuration, + mlp=TestPotential('1D'), + temp=300, + dt=1, + interval=10, + bias=bias, + kept_substrings=['.dat'], + ps=1, + ) + + assert os.path.exists('colvar.dat') + assert os.path.exists('HILLS.dat') + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_md_restart(h2_configuration): + atoms = h2_configuration.ase_atoms + initial_trajectory = ASETrajectory('md_restart.traj', 'r', atoms) + + mlt.md.run_mlp_md( + configuration=h2_configuration, + mlp=TestPotential('1D'), + temp=300, + dt=1, + interval=10, + restart_files=['md_restart.traj'], + ps=1, + ) + + assert os.path.exists('md_restart.traj') + + final_trajectory = ASETrajectory('md_restart.traj', 'r', atoms) + + # 10 ps simulation with dt = 1 fs and interval of 10 -> 1001 frames + assert len(initial_trajectory) == 1001 + + # Adding 1 ps simulation with interval 10 -> 101 frames, but removing one + # duplicate frame + assert len(final_trajectory) == 1001 + 101 - 1 + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_md_save(h2_configuration): + mlt.md.run_mlp_md( + configuration=h2_configuration, + mlp=TestPotential('1D'), + temp=300, + dt=1, + interval=10, + kept_substrings=['.traj'], + ps=1, + save_fs=200, + ) + + assert os.path.exists('trajectory.traj') + + assert not os.path.exists('trajectory_0fs.traj') + assert os.path.exists('trajectory_200fs.traj') + assert os.path.exists('trajectory_1000fs.traj') + assert not os.path.exists('trajectory_1200fs.traj') + + traj_200fs = ASETrajectory('trajectory_200fs.traj') + + # 200 ps / 10 interval == 20 frames; + 1 starting frame + assert len(traj_200fs) == 20 + 1 + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_md_traj_attachments(h2o_configuration): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + bias = mlt.PlumedBias(cvs=cv1) + + hookean_constraint = Hookean(a1=1, a2=2, k=100, rt=0.5) + + traj = mlt.md.run_mlp_md( + configuration=h2o_configuration, + mlp=TestPotential('1D'), + temp=300, + dt=1, + interval=10, + bias=bias, + kept_substrings=['colvar_cv1.dat'], + constraints=[hookean_constraint], + ps=1, + ) + + plumed_coordinates = np.loadtxt('colvar_cv1.dat', usecols=1) + + for i, config in enumerate(traj): + assert np.shape(config.plumed_coordinates) == (1,) + assert config.plumed_coordinates[0] == plumed_coordinates[i] + + assert all(bias_energy is not None for bias_energy in traj.bias_energies) + assert any(bias_energy != 0 for bias_energy in traj.bias_energies) diff --git a/tests/test_metadynamics.py b/tests/test_metadynamics.py new file mode 100644 index 00000000..8d196353 --- /dev/null +++ b/tests/test_metadynamics.py @@ -0,0 +1,321 @@ +import os +import numpy as np +import mlptrain as mlt +import pytest +from ase.io.trajectory import Trajectory as ASETrajectory +from .test_potential import TestPotential +from .data.utils import work_in_zipped_dir + +mlt.Config.n_cores = 2 +here = os.path.abspath(os.path.dirname(__file__)) + + +@pytest.fixture +def run_metadynamics(): + def _run_metadynamics( + metad, + n_runs, + configuration, + al_iter=None, + save_sep=False, + all_to_xyz=False, + restart=False, + **kwargs, + ): + metad.run_metadynamics( + configuration=configuration, + mlp=TestPotential('1D'), + temp=300, + dt=1, + interval=10, + pace=100, + width=0.05, + height=0.1, + biasfactor=3, + al_iter=al_iter, + n_runs=n_runs, + save_sep=save_sep, + all_to_xyz=all_to_xyz, + restart=restart, + **kwargs, + ) + + return _run_metadynamics + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_run_metadynamics(h2_configuration, run_metadynamics): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + metad = mlt.Metadynamics(cv1) + n_runs = 4 + + assert metad.bias is not None + + run_metadynamics( + metad, n_runs, h2_configuration, all_to_xyz=True, save_fs=200, fs=500 + ) + + assert os.path.exists('trajectories') + assert os.path.exists('trajectories/combined_trajectory.xyz') + + metad_dir = 'plumed_files/metadynamics' + for idx in range(1, n_runs + 1): + assert os.path.exists(f'trajectories/trajectory_{idx}.traj') + + for sim_time in [200, 400]: + assert os.path.exists( + f'trajectories/' f'trajectory_{idx}_{sim_time}fs.traj' + ) + assert os.path.exists( + f'trajectories/' f'metad_{idx}_{sim_time}fs.xyz' + ) + + assert os.path.exists(os.path.join(metad_dir, f'colvar_cv1_{idx}.dat')) + assert os.path.exists(os.path.join(metad_dir, f'HILLS_{idx}.dat')) + + assert os.path.exists(f'gaussian_heights/gaussian_heights_{idx}.pdf') + + metad.compute_fes(n_bins=100) + + for idx in range(1, n_runs + 1): + assert os.path.exists(f'plumed_files/metadynamics/fes_{idx}.dat') + + assert os.path.exists('fes_raw.npy') + fes_raw = np.load('fes_raw.npy') + + # 1 cv, 4 fes -> 5; 100 bins + assert np.shape(fes_raw) == (5, 100) + + metad.plot_fes('fes_raw.npy') + assert os.path.exists('metad_free_energy.pdf') + + metad.plot_fes_convergence(stride=2, n_surfaces=2) + + # 500 / 100: simulation time divided by the pace <=> number of gaussians + # Surfaces are computed every 2 gaussians + n_computed_surfaces = (500 / 100) // 2 + for idx in range(int(n_computed_surfaces)): + assert os.path.exists(f'plumed_files/fes_convergence/fes_1_{idx}.dat') + + assert os.path.exists('fes_convergence/fes_convergence_diff.pdf') + assert os.path.exists('fes_convergence/fes_convergence.pdf') + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_run_metadynamics_restart(h2_configuration, run_metadynamics): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + metad = mlt.Metadynamics(cv1) + n_runs = 4 + + run_metadynamics(metad, n_runs, h2_configuration, fs=500) + + run_metadynamics(metad, n_runs, h2_configuration, restart=True, fs=500) + + n_steps = len( + np.loadtxt('plumed_files/metadynamics/colvar_cv1_1.dat', usecols=0) + ) + n_gaussians = len( + np.loadtxt('plumed_files/metadynamics/HILLS_1.dat', usecols=0) + ) + + # Adding two 500 fs simulations with interval 10 -> 51 frames each, but + # removing one duplicate frame + assert n_steps == 51 + 51 - 1 + assert n_gaussians == 5 + 5 + + assert os.path.exists('trajectories/trajectory_1.traj') + + trajectory = ASETrajectory('trajectories/trajectory_1.traj') + + # Adding two 1 ps simulations with interval 10 -> 101 frames each, but + # removing one duplicate frame (same as before, except testing this for + # the generated .traj file instead of .dat file) + assert len(trajectory) == 51 + 51 - 1 + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_run_metadynamics_with_inherited_bias( + h2_configuration, run_metadynamics +): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + metad = mlt.Metadynamics(cv1) + n_runs = 4 + + run_metadynamics(metad, n_runs, h2_configuration, al_iter=3, fs=500) + + run_metadynamics( + metad, n_runs, h2_configuration, al_iter=3, restart=True, fs=500 + ) + + metad_dir = 'plumed_files/metadynamics' + for idx in range(1, n_runs + 1): + assert os.path.exists(f'trajectories/trajectory_{idx}.traj') + + assert os.path.exists(os.path.join(metad_dir, f'colvar_cv1_{idx}.dat')) + assert os.path.exists(os.path.join(metad_dir, f'HILLS_{idx}.dat')) + + metad.compute_fes(via_reweighting=True) + assert os.path.exists('fes_raw.npy') + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_run_metadynamics_with_component(h2_configuration, run_metadynamics): + cv1 = mlt.PlumedCustomCV('plumed_cv_dist.dat', 'x') + metad = mlt.Metadynamics(cv1) + n_runs = 4 + + run_metadynamics(metad, n_runs, h2_configuration, fs=100) + + metad_dir = 'plumed_files/metadynamics' + for idx in range(1, n_runs + 1): + assert os.path.exists( + os.path.join(metad_dir, f'colvar_cv1_x_{idx}.dat') + ) + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_run_metadynamics_with_additional_cvs( + h2o_configuration, run_metadynamics +): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + cv2 = mlt.PlumedAverageCV('cv2', (2, 1)) + cv2.attach_upper_wall(location=3.0, kappa=150.0) + + bias = mlt.PlumedBias(cvs=(cv1, cv2)) + + metad = mlt.Metadynamics(cvs=cv1, bias=bias) + + assert metad.bias == bias + assert metad.n_cvs == 1 + + n_runs = 1 + run_metadynamics( + metad, + configuration=h2o_configuration, + n_runs=n_runs, + write_plumed_setup=True, + fs=100, + ) + + with open('plumed_files/metadynamics/plumed_setup.dat', 'r') as f: + plumed_setup = [line.strip() for line in f] + + # Not including the units + assert plumed_setup[1:] == [ + 'cv1_dist1: DISTANCE ATOMS=1,2', + 'cv1: CUSTOM ARG=cv1_dist1 VAR=cv1_dist1 ' + f'FUNC={1/1}*(cv1_dist1) PERIODIC=NO', + 'cv2_dist1: DISTANCE ATOMS=3,2', + 'cv2: CUSTOM ARG=cv2_dist1 VAR=cv2_dist1 ' + f'FUNC={1/1}*(cv2_dist1) PERIODIC=NO', + 'UPPER_WALLS ARG=cv2 AT=3.0 KAPPA=150.0 EXP=2', + 'metad: METAD ARG=cv1 PACE=100 HEIGHT=0.1 ' + 'SIGMA=0.05 TEMP=300 BIASFACTOR=3 ' + 'FILE=HILLS_1.dat', + 'PRINT ARG=cv1,cv1_dist1 ' 'FILE=colvar_cv1_1.dat STRIDE=10', + 'PRINT ARG=cv2,cv2_dist1 ' 'FILE=colvar_cv2_1.dat STRIDE=10', + ] + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_estimate_width(h2_configuration): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + metad = mlt.Metadynamics(cv1) + + width = metad.estimate_width( + configurations=h2_configuration, + mlp=TestPotential('1D'), + plot=True, + fs=100, + ) + + assert len(width) == 1 + + files_directory = 'plumed_files/width_estimation' + plots_directory = 'width_estimation' + + assert os.path.isdir(files_directory) + assert os.path.exists(os.path.join(files_directory, 'colvar_cv1_1.dat')) + + assert os.path.isdir(plots_directory) + assert os.path.exists(os.path.join(plots_directory, 'cv1_config1.pdf')) + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_try_multiple_biasfactors(h2_configuration): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + metad = mlt.Metadynamics(cv1) + biasfactors = range(5, 11, 5) + + metad.try_multiple_biasfactors( + configuration=h2_configuration, + mlp=TestPotential('1D'), + temp=300, + interval=10, + dt=1, + pace=100, + width=0.05, + height=0.1, + biasfactors=biasfactors, + plotted_cvs=cv1, + fs=100, + ) + + files_dir = 'plumed_files/multiple_biasfactors' + assert os.path.isdir(files_dir) + + plots_dir = 'multiple_biasfactors' + assert os.path.isdir(plots_dir) + + for idx, biasf in enumerate(biasfactors, start=1): + assert os.path.exists(os.path.join(files_dir, f'colvar_cv1_{idx}.dat')) + assert os.path.exists(os.path.join(plots_dir, f'cv1_biasf{biasf}.pdf')) + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_block_analysis(h2_configuration): + cv1 = mlt.PlumedAverageCV('cv1', (0, 1)) + metad = mlt.Metadynamics(cv1) + dt = 1 + interval = 10 + n_runs = 1 + ps = 2 + start_time = 0.5 + + metad.run_metadynamics( + configuration=h2_configuration, + mlp=TestPotential('1D'), + temp=300, + dt=dt, + interval=interval, + pace=100, + width=0.05, + height=0.1, + biasfactor=3, + n_runs=n_runs, + ps=ps, + ) + + metad.block_analysis(start_time=start_time) + + assert os.path.exists('block_analysis.pdf') + assert os.path.exists('block_analysis.npz') + + start_time_fs = start_time * 1e3 + n_steps = int(start_time_fs / dt) + n_used_frames = n_steps // interval + + min_n_blocks = 10 + min_blocksize = 10 + blocksize_interval = 10 + max_blocksize = n_used_frames // min_n_blocks + + data = np.load('block_analysis.npz') + + # axis 0: CV1; axis 1: 300 bins + assert np.shape(data['CVs']) == (1, 300) + for blocksize in range( + min_blocksize, max_blocksize + 1, blocksize_interval + ): + # axis 0: error; axis 1: 300 bins + assert np.shape(data[str(blocksize)]) == (3, 300) diff --git a/tests/test_openmm_md.py b/tests/test_openmm_md.py new file mode 100644 index 00000000..83caee4f --- /dev/null +++ b/tests/test_openmm_md.py @@ -0,0 +1,200 @@ +import os +import numpy as np +import mlptrain as mlt +import pytest +from ase.io.trajectory import Trajectory as ASETrajectory + +from .data.utils import work_in_zipped_dir + +import ase.units + +here = os.path.abspath(os.path.dirname(__file__)) + +# All tests should have 'test_openmm' in their name so that they are skipped for the GAP CI run. + + +@pytest.fixture +def h2_system_config(h2): + system = mlt.System(h2, box=[50, 50, 50]) + config = system.random_configuration() + return system, config + + +@pytest.fixture +def h2o_system_config(h2o): + system = mlt.System(h2o, box=[50, 50, 50]) + config = system.random_configuration() + return system, config + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_openmm_topology(h2_system_config, h2o_system_config): + """Test the creation of an OpenMM Topology from an ASE Atoms object.""" + # H2 molecule + _, config = h2_system_config + atoms = config.ase_atoms + topology = mlt.md_openmm._create_openmm_topology(atoms) + + assert topology.getNumAtoms() == len(atoms) + assert topology.getNumResidues() == len(atoms) + assert topology.getNumChains() == 1 + assert topology.getNumBonds() == 0 + assert np.allclose( + topology.getPeriodicBoxVectors()._value * 10.0, atoms.get_cell().array + ) + + # H2O molecule + _, config = h2o_system_config + atoms = config.ase_atoms + topology = mlt.md_openmm._create_openmm_topology(atoms) + + assert topology.getNumAtoms() == len(atoms) + assert topology.getNumResidues() == len(atoms) + assert topology.getNumChains() == 1 + assert topology.getNumBonds() == 0 + assert np.allclose( + topology.getPeriodicBoxVectors()._value * 10.0, atoms.get_cell().array + ) + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_openmm_simulation(h2o_system_config): + """Test the OpenMM Simulation object.""" + # H2O molecule + system, config = h2o_system_config + atoms = config.ase_atoms + mace = mlt.potentials.MACE('water', system=system) + + topology = mlt.md_openmm._create_openmm_topology(atoms) + platform = mlt.md_openmm._get_openmm_platform() + + simulation = mlt.md_openmm._create_openmm_simulation( + mlp=mace, + topology=topology, + temp=300, + dt=1, + platform=platform, + ) + + assert np.isclose(simulation.integrator.getTemperature()._value, 300) + assert np.isclose(simulation.integrator.getStepSize()._value, 0.001) + + mlt.md_openmm._set_momenta_and_geometry( + simulation=simulation, + positions=atoms.get_positions() * 0.1, + temp=0.0, + restart_file=None, + ) + + # Check that potential and kinetic energies are correct + # and consistent with ASE + reference_pot_energy = -13310.4853515625 # kJ/mol + openmm_pot_energy = ( + simulation.context.getState(getEnergy=True).getPotentialEnergy()._value + ) + atoms.set_calculator(mace.ase_calculator) + ase_pot_energy = atoms.get_potential_energy() / ( + (ase.units.kJ / ase.units.mol) / ase.units.eV + ) + + assert np.isclose(openmm_pot_energy, reference_pot_energy) + assert np.isclose(ase_pot_energy, reference_pot_energy) + assert np.isclose(ase_pot_energy, openmm_pot_energy) + + +def test_openmm_simulation_name_generation(): + """Test the simulation name generation.""" + name = mlt.md_openmm._get_simulation_name() + assert name == 'simulation.state.xml' + + name = mlt.md_openmm._get_simulation_name(idx=2) + assert name == 'simulation_2.state.xml' + + state_file = 'file1.state.xml' + name = mlt.md_openmm._get_simulation_name(restart_files=[state_file]) + assert name == state_file + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_openmm_md(h2o_system_config): + """Test the OpenMM MD simulation.""" + # H2O molecule + system, config = h2o_system_config + + mace = mlt.potentials.MACE('water', system=system) + + # Run some dynamics with the potential + mlt.md_openmm.run_mlp_md_openmm( + configuration=config, + mlp=mace, + temp=300, + dt=1, + interval=10, + fs=100, + kept_substrings=['.state.xml', '.traj'], + ) + + traj = ASETrajectory('trajectory.traj') + assert os.path.exists('simulation.state.xml') + assert os.path.exists('trajectory.traj') + # 100 fs simulation with dt = 1 fs and interval of 10 -> 11 frames + assert len(traj) == 11 + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_openmm_md_restart(h2o_system_config): + """Test the MD restart functionality.""" + system, config = h2o_system_config + atoms = config.ase_atoms + mace = mlt.potentials.MACE('water', system=system) + initial_trajectory = ASETrajectory('md_restart_h2o.traj', 'r', atoms) + + mlt.md_openmm.run_mlp_md_openmm( + configuration=config, + mlp=mace, + temp=300, + dt=1, + interval=10, + restart_files=['md_restart_h2o.traj', 'md_restart_h2o.state.xml'], + fs=100, + ) + + assert os.path.exists('md_restart_h2o.traj') + + final_trajectory = ASETrajectory('md_restart_h2o.traj', 'r', atoms) + + # 10 ps simulation with dt = 1 fs and interval of 10 -> 1001 frames + assert len(initial_trajectory) == 1001 + + # Adding 1 ps simulation with interval 10 -> 101 frames, but removing one + # duplicate frame + assert len(final_trajectory) == 1001 + 11 - 1 + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_openmm_md_save(h2o_system_config): + """Test the MD save functionality.""" + system, config = h2o_system_config + mace = mlt.potentials.MACE('water', system=system) + + mlt.md_openmm.run_mlp_md_openmm( + configuration=config, + mlp=mace, + temp=300, + dt=1, + interval=10, + kept_substrings=['.traj'], + fs=100, + save_fs=20, + ) + + assert os.path.exists('trajectory.traj') + assert not os.path.exists('trajectory_0fs.traj') + assert os.path.exists('trajectory_20fs.traj') + assert os.path.exists('trajectory_100fs.traj') + assert not os.path.exists('trajectory_120fs.traj') + + traj_20fs = ASETrajectory('trajectory_20fs.traj') + + # 20 fs / 10 interval == 2 frames; + 1 starting frame + assert len(traj_20fs) == 2 + 1 diff --git a/tests/test_plumed.py b/tests/test_plumed.py new file mode 100644 index 00000000..a315177e --- /dev/null +++ b/tests/test_plumed.py @@ -0,0 +1,215 @@ +import os +import pytest +import mlptrain as mlt +from .data.utils import work_in_zipped_dir + +here = os.path.abspath(os.path.dirname(__file__)) + + +def test_plumed_cv_from_atom_groups(): + cv1 = mlt.PlumedDifferenceCV('cv1', ((0, 1), (2, 3))) + + assert cv1.name == 'cv1' + assert cv1.units == 'Å' + assert cv1.dof_names == ['cv1_dist1', 'cv1_dist2'] + assert cv1.setup == [ + 'cv1_dist1: DISTANCE ATOMS=1,2', + 'cv1_dist2: DISTANCE ATOMS=3,4', + 'cv1: CUSTOM ' + 'ARG=cv1_dist1,cv1_dist2 ' + 'VAR=cv1_dist1,cv1_dist2 ' + 'FUNC=cv1_dist1-cv1_dist2 ' + 'PERIODIC=NO', + ] + + cv2 = mlt.PlumedAverageCV('cv2', (0, 1, 2)) + + assert cv2.name == 'cv2' + assert cv2.units == 'rad' + assert cv2.dof_names == ['cv2_ang1'] + assert cv2.setup == [ + 'cv2_ang1: ANGLE ATOMS=1,2,3', + 'cv2: CUSTOM ' + 'ARG=cv2_ang1 ' + 'VAR=cv2_ang1 ' + 'FUNC=1.0*(cv2_ang1) ' + 'PERIODIC=NO', + ] + + with pytest.raises(TypeError): + mlt.PlumedAverageCV('') + + with pytest.raises(TypeError): + mlt.PlumedAverageCV('', 0) + + with pytest.raises(TypeError): + mlt.PlumedAverageCV('', ()) + + with pytest.raises(ValueError): + mlt.PlumedAverageCV('', (1,)) + + with pytest.raises(NotImplementedError): + mlt.PlumedAverageCV('', [(0, 1, 2, 3, 4, 5), (1, 2, 3)]) + + with pytest.raises(ValueError): + mlt.PlumedDifferenceCV('', ((0, 1), (2, 3), (4, 5))) + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_plumed_cv_from_file(): + cv1 = mlt.PlumedCustomCV( + 'plumed_cv_custom.dat', component='spath', units='Å' + ) + + assert cv1.name == 'p1.spath' + assert cv1.units == 'Å' + assert cv1.setup == [ + 'p1: PATH ' 'REFERENCE=path.pdb ' 'TYPE=OPTIMAL ' 'LAMBDA=500.0' + ] + + with open('path.pdb', 'r') as f: + data1 = f.read() + + assert cv1.files == [('path.pdb', data1)] + + os.remove('path.pdb') + cv1.write_files() + + with open('path.pdb', 'r') as f: + data2 = f.read() + + assert data1 == data2 + + +def test_plumed_cv_walls(): + cv1 = mlt.PlumedDifferenceCV('cv1', ((0, 1), (2, 3))) + + cv1.attach_lower_wall(location=1, kappa=150.0, exp=3) + cv1.attach_upper_wall(location=3, kappa=150.0, exp=3) + + assert cv1.setup == [ + 'cv1_dist1: DISTANCE ATOMS=1,2', + 'cv1_dist2: DISTANCE ATOMS=3,4', + 'cv1: CUSTOM ' + 'ARG=cv1_dist1,cv1_dist2 ' + 'VAR=cv1_dist1,cv1_dist2 ' + 'FUNC=cv1_dist1-cv1_dist2 ' + 'PERIODIC=NO', + 'LOWER_WALLS ARG=cv1 AT=1 KAPPA=150.0 EXP=3', + 'UPPER_WALLS ARG=cv1 AT=3 KAPPA=150.0 EXP=3', + ] + + with pytest.raises(TypeError): + cv1.attach_lower_wall(location=0.5, kappa=150.0, exp=3) + + +def test_plumed_bias_from_cvs(): + cv1 = mlt.PlumedAverageCV('cv1', [(0, 1, 2, 3)]) + cv2 = mlt.PlumedAverageCV('cv2', [(4, 5, 6, 7)]) + + bias = mlt.PlumedBias((cv1, cv2)) + + with pytest.raises(ValueError): + bias._set_metad_params( + pace=10, width=(0.2, 0.3), height=0.5, biasfactor=0.5 + ) + + with pytest.raises(ValueError): + bias._set_metad_params(pace=10, width=0.2, height=0.5, biasfactor=2) + + bias.initialise_for_metad_al( + pace=10, + width=(0.2, 0.3), + height=0.5, + biasfactor=2, + grid_min=(0.5, 1.5), + grid_max=(0.6, 1.6), + ) + + assert bias.cvs == (cv1, cv2) + assert bias.pace == 10 + assert bias.width == (0.2, 0.3) + assert bias.height == 0.5 + assert bias.biasfactor == 2 + assert bias.metad_grid_min == (0.5, 1.5) + assert bias.metad_grid_max == (0.6, 1.6) + assert bias.metad_grid_bin is None + + assert bias.metad_grid_setup == 'GRID_MIN=0.5,1.5 GRID_MAX=0.6,1.6 ' + + bias.strip() + + for attribute, value in bias.__dict__.items(): + if attribute == 'cvs': + assert value is not None + + else: + assert value is None + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_plumed_bias_from_file(): + bias = mlt.PlumedBias(filename='plumed_bias.dat') + + assert bias.setup == [ + 'dof1: DISTANCE ATOMS=1,2', + 'dof2: DISTANCE ATOMS=2,3', + 'cv1: CUSTOM ARG=dof1,dof2 VAR=dof1,dof2 ' + 'FUNC=dof2-dof1 PERIODIC=NO', + 'lwall: LOWER_WALLS ARG=cv1 AT=1 KAPPA=150.0 EXP=3', + 'p1: PATH REFERENCE=path.pdb TYPE=OPTIMAL ' 'LAMBDA=500.0', + 'UPPER_WALLS ARG=cv1 AT=3 KAPPA=150.0 EXP=3', + 'METAD ARG=cv1,p1.spath PACE=100 HEIGHT=0.1 ' + 'SIGMA=0.5 BIASFACTOR=4 FILE=HILLS.dat', + 'PRINT ARG=cv1,p1.spath FILE=colvar.dat STRIDE=10', + ] + + with open('path.pdb', 'r') as f: + data1 = f.read() + + assert bias.cv_files == [('path.pdb', data1)] + + os.remove('path.pdb') + bias.write_cv_files() + + with open('path.pdb', 'r') as f: + data2 = f.read() + + assert data1 == data2 + + bias.strip() + + assert bias.setup == [ + 'dof1: DISTANCE ATOMS=1,2', + 'dof2: DISTANCE ATOMS=2,3', + 'cv1: CUSTOM ARG=dof1,dof2 VAR=dof1,dof2 ' + 'FUNC=dof2-dof1 PERIODIC=NO', + 'lwall: LOWER_WALLS ARG=cv1 AT=1 KAPPA=150.0 EXP=3', + 'p1: PATH REFERENCE=path.pdb TYPE=OPTIMAL ' 'LAMBDA=500.0', + 'UPPER_WALLS ARG=cv1 AT=3 KAPPA=150.0 EXP=3', + ] + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_plumed_plot(): + colvar1 = 'test_plumed_plot/colvar1.dat' + colvar2 = 'test_plumed_plot/colvar2.dat' + + mlt.plot_cv_versus_time( + filename=colvar1, + time_units='fs', + cv_units='Å', + cv_limits=(0.5, 1.5), + label='0', + ) + + assert os.path.exists('cv1_0.pdf') + + mlt.plot_cv1_and_cv2( + filenames=(colvar1, colvar2), + cvs_units=('Å', 'Å'), + cvs_limits=((0.5, 1.5), (0.5, 1.5)), + label='0', + ) + + assert os.path.exists('cv1_cv2_0.pdf') diff --git a/mlptrain/sampling/tests/test_potential.py b/tests/test_potential.py similarity index 76% rename from mlptrain/sampling/tests/test_potential.py rename to tests/test_potential.py index 9452e772..a66aa8b6 100644 --- a/mlptrain/sampling/tests/test_potential.py +++ b/tests/test_potential.py @@ -5,23 +5,21 @@ class HarmonicPotential(Calculator): - __test__ = False def get_potential_energy(self, atoms): - r = atoms.get_distance(0, 1) - return (r - 1)**2 + return (r - 1) ** 2 def get_forces(self, atoms): - derivative = np.zeros((len(atoms), 3)) r = atoms.get_distance(0, 1) - x_dist, y_dist, z_dist = [atoms[0].position[j] - atoms[1].position[j] - for j in range(3)] + x_dist, y_dist, z_dist = [ + atoms[0].position[j] - atoms[1].position[j] for j in range(3) + ] x_i, y_i, z_i = (x_dist / r), (y_dist / r), (z_dist / r) @@ -34,20 +32,14 @@ def get_forces(self, atoms): class TestPotential(MLPotential): - __test__ = False - def __init__(self, - name: str, - calculator='harmonic', - system=None): - + def __init__(self, name: str, calculator='harmonic', system=None): super().__init__(name=name, system=system) self.calculator = calculator.lower() @property def ase_calculator(self): - if self.calculator == 'harmonic': return HarmonicPotential() @@ -55,8 +47,9 @@ def ase_calculator(self): return LennardJones(rc=2.5, r0=3.0) else: - raise NotImplementedError(f'{self.calculator} is not implemented ' - f'as a test potential') + raise NotImplementedError( + f'{self.calculator} is not implemented ' f'as a test potential' + ) def _train(self) -> None: """ABC for MLPotential required but unused in TestPotential""" diff --git a/mlptrain/sampling/tests/test_reaction_coord.py b/tests/test_reaction_coord.py similarity index 72% rename from mlptrain/sampling/tests/test_reaction_coord.py rename to tests/test_reaction_coord.py index 56ad4bdb..5d8c5494 100644 --- a/mlptrain/sampling/tests/test_reaction_coord.py +++ b/tests/test_reaction_coord.py @@ -3,15 +3,12 @@ import mlptrain as mlt from ase.atoms import Atoms as ASEAtoms -from .molecules import _h2o -def test_differencedistance(): +def test_differencedistance(h2o_configuration): """Test the DifferenceDistance class for reaction coordinate""" - system = mlt.System(_h2o(), box=[50, 50, 50]) - - config = system.random_configuration() + config = h2o_configuration atoms = config.ase_atoms diff_dist = mlt.DifferenceDistance((0, 1), (0, 2)) @@ -41,14 +38,16 @@ def test_differencedistance(): mlt.DifferenceDistance((0, 1)) -@pytest.mark.parametrize('rs', [[(0, 1), (0, 2)], [(1, 0), (0, 2)], [(1, 0), (2, 0)]]) -def test_differencedistance_numerical_gradient(rs, h=1E-8): +@pytest.mark.parametrize( + 'rs', [[(0, 1), (0, 2)], [(1, 0), (0, 2)], [(1, 0), (2, 0)]] +) +def test_differencedistance_numerical_gradient(rs, h=1e-8): """Test that the analytic gradient is correct for differencedistance""" - atoms = ASEAtoms(symbols=['H', 'H', 'H'], - positions=[[0.0, 0.0, 0.0], - [1.0, 0.1, 0.3], - [-2.0, 0.2, 0.4]]) + atoms = ASEAtoms( + symbols=['H', 'H', 'H'], + positions=[[0.0, 0.0, 0.0], [1.0, 0.1, 0.3], [-2.0, 0.2, 0.4]], + ) z = mlt.DifferenceDistance(*rs) grad = z.grad(atoms) @@ -56,7 +55,6 @@ def test_differencedistance_numerical_gradient(rs, h=1E-8): for i in range(3): for j in range(3): - # Shift to a new position, evaluate the energy and shift back atoms.positions[i, j] += h e_plus_h = z(atoms) @@ -64,4 +62,4 @@ def test_differencedistance_numerical_gradient(rs, h=1E-8): num_grad_ij = (e_plus_h - e) / h - assert np.isclose(grad[i, j], num_grad_ij, atol=1E-8) + assert np.isclose(grad[i, j], num_grad_ij, atol=1e-8) diff --git a/tests/test_selection.py b/tests/test_selection.py new file mode 100644 index 00000000..e1998d33 --- /dev/null +++ b/tests/test_selection.py @@ -0,0 +1,49 @@ +import os +import mlptrain as mlt +from autode.atoms import Atom +from mlptrain.training.selection import AtomicEnvSimilarity + +here = os.path.abspath(os.path.dirname(__file__)) + + +def _similar_methane(): + atoms = [ + Atom('C', -0.83511, 2.41296, 0.00000), + Atom('H', 0.24737, 2.41296, 0.00000), + Atom('H', -1.19178, 2.07309, 0.94983), + Atom('H', -1.19178, 1.76033, -0.76926), + Atom('H', -1.28016, 3.36760, -0.18057), + ] + + return mlt.Configuration(atoms=atoms) + + +def _distorted_methane(): + atoms = [ + Atom('C', -0.83511, 2.41296, 0.00000), + Atom('H', 0.34723, 2.42545, 0.00000), + Atom('H', -1.19178, 2.07309, 0.94983), + Atom('H', -1.50592, -0.01979, -0.76926), + Atom('H', -1.28016, 3.36760, -0.18057), + ] + + return mlt.Configuration(atoms=atoms) + + +def test_selection_on_structures(): + configs = mlt.ConfigurationSet() + + file_path = os.path.join(here, 'data', 'methane.xyz') + configs.load_xyz(filename=file_path, charge=0, mult=1, box=None) + + assert len(configs) == 3 + + selector = AtomicEnvSimilarity(threshold=0.9) + mlp = mlt.potentials.GAP('blank') + mlp.training_data = configs + + selector(configuration=_similar_methane(), mlp=mlp) + assert not selector.select + + selector(configuration=_distorted_methane(), mlp=mlp) + assert selector.select diff --git a/mlptrain/configurations/tests/test_trajectory.py b/tests/test_trajectory.py similarity index 99% rename from mlptrain/configurations/tests/test_trajectory.py rename to tests/test_trajectory.py index bb94543a..354cdaca 100644 --- a/mlptrain/configurations/tests/test_trajectory.py +++ b/tests/test_trajectory.py @@ -4,7 +4,6 @@ def test_trajectory_allows_duplicates(): - traj = Trajectory(Configuration(atoms=[Atom('H')])) traj.append(Configuration(atoms=[Atom('H')])) assert len(traj) == 2 diff --git a/tests/test_umbrella.py b/tests/test_umbrella.py new file mode 100644 index 00000000..95e36d60 --- /dev/null +++ b/tests/test_umbrella.py @@ -0,0 +1,199 @@ +import os +import time + +import numpy as np +import pytest + +import mlptrain as mlt +from .test_potential import TestPotential +from .data.utils import work_in_zipped_dir + +here = os.path.abspath(os.path.dirname(__file__)) + + +def _h2_umbrella(): + return mlt.UmbrellaSampling( + zeta_func=mlt.AverageDistance([0, 1]), kappa=100 + ) + + +def _h2_pulled_traj(): + traj = mlt.ConfigurationSet() + traj.load_xyz( + os.path.join(here, 'data/data', 'h2_traj.xyz'), charge=0, mult=1 + ) + + return traj + + +def _h2_sparse_traj(): + traj = _h2_pulled_traj() + sparse_traj = mlt.ConfigurationSet() + sparse_traj.append(traj[0]) + sparse_traj.append(traj[-1]) + + return sparse_traj + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_run_umbrella(): + umbrella = _h2_umbrella() + traj = _h2_pulled_traj() + n_windows = 3 + + assert umbrella.kappa is not None and np.isclose(umbrella.kappa, 100.0) + assert umbrella.zeta_refs is None + + # Zeta refs are now reset + umbrella.run_umbrella_sampling( + traj, + mlp=TestPotential('1D'), + temp=300, + interval=5, + dt=0.5, + n_windows=n_windows, + save_sep=False, + all_to_xyz=True, + fs=1000, + save_fs=300, + ) + + # Sampling with a high force constant should lead to fitted Gaussians + # that closely match the reference (target) values + for window in umbrella.windows: + assert window.gaussian_plotted is not None + assert np.isclose( + window.gaussian_plotted.mean, window.zeta_ref, atol=0.1 + ) + + assert os.path.exists('trajectories') + assert os.path.exists('trajectories/combined_trajectory.xyz') + + for idx in range(1, n_windows + 1): + assert os.path.exists(f'trajectories/trajectory_{idx}.traj') + + for sim_time in [300, 600, 900]: + assert os.path.exists( + f'trajectories/trajectory_{idx}_{sim_time}fs.traj' + ) + assert os.path.exists( + f'trajectories/window_{idx}_{sim_time}fs.xyz' + ) + + assert os.path.exists('fitted_data.pdf') + + +# TODO: This tests fails on GHA with MACE install, +# need to investigate more, for now skipping. +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +@pytest.mark.skip(reason='Test fails on GHA with MACE') +def test_umbrella_parallel(): + execution_time = {} + + for n_cores in (1, 2): + mlt.Config.n_cores = n_cores + + umbrella = _h2_umbrella() + traj = _h2_pulled_traj() + + start = time.perf_counter() + umbrella.run_umbrella_sampling( + traj, + mlp=TestPotential('1D'), + temp=300, + interval=5, + dt=0.5, + n_windows=4, + fs=500, + ) + finish = time.perf_counter() + + execution_time[n_cores] = finish - start + + # Calculation with more cores should run faster + assert execution_time[2] < execution_time[1] + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_umbrella_sparse_traj(): + umbrella = _h2_umbrella() + traj = _h2_sparse_traj() + n_windows = 9 + + # Indices from 1 to 9 + zeta_refs = umbrella._reference_values( + traj=traj, num=n_windows, final_ref=None, init_ref=None + ) + + middle_ref = zeta_refs[5] + middle_bias = mlt.Bias( + zeta_func=umbrella.zeta_func, + kappa=umbrella.kappa, + reference=middle_ref, + ) + + # There should be no good starting frame for the middle window (index 5) + # as the sparse trajectory only contains the initial and final frame + assert umbrella._no_ok_frame_in(traj, middle_ref) + + umbrella.run_umbrella_sampling( + traj, + mlp=TestPotential('1D'), + temp=300, + interval=5, + dt=0.5, + n_windows=n_windows, + fs=100, + save_sep=True, + ) + + assert os.path.exists('trajectories') + assert os.path.isdir('trajectories') + + previous_window_traj = mlt.ConfigurationSet() + previous_window_traj.load_xyz( + filename='trajectories/window_4.xyz', charge=0, mult=1 + ) + + middle_window_traj = mlt.ConfigurationSet() + middle_window_traj.load_xyz( + filename='trajectories/window_5.xyz', charge=0, mult=1 + ) + + closest_frame = umbrella._best_init_frame( + bias=middle_bias, traj=previous_window_traj + ) + starting_frame = middle_window_traj[0] + + # The starting frame for the middle window (index 5) should be + # the closest frame from the previous window (index 4) + assert starting_frame == closest_frame + + +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) +def test_umbrella_save_load(): + umbrella = _h2_umbrella() + traj = _h2_pulled_traj() + + umbrella.run_umbrella_sampling( + traj, + mlp=TestPotential('1D'), + temp=300, + interval=5, + dt=0.5, + n_windows=3, + fs=100, + save_sep=False, + ) + + umbrella.save(folder_name='tmp_us') + assert os.path.exists('tmp_us') and os.path.isdir('tmp_us') + + loaded = mlt.UmbrellaSampling.from_folder(folder_name='tmp_us', temp=300) + assert len(loaded.windows) == 3 + assert np.allclose(loaded.zeta_refs, umbrella.zeta_refs) + + for idx, window in enumerate(loaded.windows): + assert np.isclose(window.zeta_ref, umbrella.zeta_refs[idx]) + assert np.isclose(window._bias.kappa, 100) + assert len(window._obs_zetas) == 41 diff --git a/mlptrain/sampling/tests/test_wham.py b/tests/test_wham.py similarity index 53% rename from mlptrain/sampling/tests/test_wham.py rename to tests/test_wham.py index af325b19..635436ee 100644 --- a/mlptrain/sampling/tests/test_wham.py +++ b/tests/test_wham.py @@ -2,16 +2,15 @@ import numpy as np from mlptrain.sampling.bias import Bias from mlptrain.sampling.umbrella import _Window, UmbrellaSampling -from .utils import work_in_zipped_dir +from .data.utils import work_in_zipped_dir + here = os.path.dirname(os.path.abspath(__file__)) kj_to_ev = 0.0103642 def _initialised_us() -> UmbrellaSampling: - - us = UmbrellaSampling(zeta_func=lambda x: None, - kappa=0.0) + us = UmbrellaSampling(zeta_func=lambda x: None, kappa=0.0) us.temp = 300 zeta_refs = np.linspace(1.8245, 3.1100, num=20) # 20 windows @@ -21,54 +20,61 @@ def _initialised_us() -> UmbrellaSampling: # Ensure the data has the correct reference value for the hard # coded array - assert np.isclose(zeta_refs[window_idx], - float(data_lines[0].split()[1])) + assert np.isclose( + zeta_refs[window_idx], float(data_lines[0].split()[1]) + ) zeta_obs = [float(line.split()[1]) for line in data_lines[1:-1]] - window = _Window(obs_zetas=np.array(zeta_obs), - bias=Bias(zeta_func=None, - kappa=float(data_lines[0].split()[2]), - reference=zeta_refs[window_idx]) - ) + window = _Window( + obs_zetas=np.array(zeta_obs), + bias=Bias( + zeta_func=None, + kappa=float(data_lines[0].split()[2]), + reference=zeta_refs[window_idx], + ), + ) us.windows.append(window) return us -@work_in_zipped_dir(os.path.join(here, 'data.zip')) +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) def test_wham_is_close_to_ref(): - us = _initialised_us() zetas, free_energies = us.wham(n_bins=499) free_energies -= min(free_energies) - ref_zetas = np.array([float(line.split()[0]) for line in - open('ref_wham.txt', 'r').readlines()[1:-1]]) + ref_zetas = np.array( + [ + float(line.split()[0]) + for line in open('ref_wham.txt', 'r').readlines()[1:-1] + ] + ) - ref_free_energies = [float(line.split()[1]) * kj_to_ev for line in - open('ref_wham.txt', 'r').readlines()[1:-1]] + ref_free_energies = [ + float(line.split()[1]) * kj_to_ev + for line in open('ref_wham.txt', 'r').readlines()[1:-1] + ] # Ensure every free energy value, at a particular zeta, is close to the # reference, to within ~0.5 kcal mol-1 for zeta, free_energy in zip(zetas, free_energies): - close_idx = np.argmin(np.abs(ref_zetas - zeta)) - assert np.isclose(free_energy, - ref_free_energies[close_idx], - atol=0.02) + assert np.isclose(free_energy, ref_free_energies[close_idx], atol=0.02) -@work_in_zipped_dir(os.path.join(here, 'data.zip')) +@work_in_zipped_dir(os.path.join(here, 'data/data.zip')) def test_wham_is_somewhat_independent_of_nbins(): - us = _initialised_us() _, free_energies_500bins = us.wham(n_bins=500) _, free_energies_1000bins = us.wham(n_bins=1000) assert np.allclose( free_energies_500bins, - np.mean(free_energies_1000bins.reshape(-1, 2), axis=1), # block average - atol=5E-2 + np.mean( + free_energies_1000bins.reshape(-1, 2), axis=1 + ), # block average + atol=5e-2, )