Merge branch 'main' into Plum_CNCV

duartegroup · Nov 29, 2024 · 92de7d3 · 92de7d3
2 parents a57d709 + cbb38fd
commit 92de7d3
Show file tree

Hide file tree

Showing 24 changed files with 9,072 additions and 0 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,6 +8,7 @@ repos:
       - id: no-commit-to-branch
       - id: check-executables-have-shebangs
       - id: check-shebang-scripts-are-executable
+        exclude: examples/WTMetaD_paper/r2/free_energy/wtmetad_ib/accumulated_bias/bias_after_iter_15.dat
       - id: check-added-large-files
         args: ['--maxkb=500', '--enforce-all']
         exclude: tests/data/data.zip

diff --git a/examples/WTMetaD_paper/README.md b/examples/WTMetaD_paper/README.md
@@ -0,0 +1,9 @@
+#  Active learning meets metadynamics: Automated workflow for reactive machine learning potentials
+
+This folder contains codes and initial files for various computational tasks discussed in the paper "Active learning meets metadynamics: Automated workflow for reactive machine learning potentials" ([https://chemrxiv.org/engage/chemrxiv/article-details/671fe54b98c8527d9ea7647a]).
+
+The manuscript demonstrates three reactions: the SN2 reaction in implicit solvent (referred to as r1), rearrangement reaction in the gas phase (r2), and glycosylation reaction in explicit solvent (r3). Python scripts for each reaction can be found in the corresponding folders.
+
+For r1 and r2, active learning (AL) was utilised with both WTMetaD-IB and downhill sampling methods to train the potential. Input geometries (as *.xyz files) and training script are located within folders named after the training methods, such as al_downhill.
+
+In the case of r2, free energy calculations were performed using different enhanced sampling methods. All scripts can be found in the free_energy folder within the r2 directory. Within the free_energy folder, each enhanced sampling method is organised in separate folders containing Python scripts and necessary configuration files.
diff --git a/examples/WTMetaD_paper/r1/al_downhill/r1_ts.xyz b/examples/WTMetaD_paper/r1/al_downhill/r1_ts.xyz
@@ -0,0 +1,8 @@
+6
+Coordinates from ORCA-job sn2_nebts_NEB-CI_converged
+  C   0.02394151244587     -0.02273675265530      0.05167818806821
+  F   -0.65299103371021      0.72656624059729     -1.73488992550113
+  Cl  0.68562794924814     -0.78844305036752      1.89308430540763
+  H   0.99880860408280      0.16036474652930     -0.38536247269303
+  H   -0.52740777250600      0.82166542425637      0.44671920342826
+  H   -0.52797925956060     -0.89741660836014     -0.27122929870995
diff --git a/examples/WTMetaD_paper/r1/al_downhill/training.py b/examples/WTMetaD_paper/r1/al_downhill/training.py
@@ -0,0 +1,31 @@
+import mlptrain as mlt
+
+mlt.Config.n_cores = 30
+mlt.Config.orca_keywords = ['PBE0', 'def2-SVP', 'EnGrad', 'CPCM(Water)']
+
+if __name__ == '__main__':
+    # Initialise the system
+    system = mlt.System(mlt.Molecule('r1_ts.xyz', charge=-1, mult=1), box=None)
+
+    # Define CV and attach an upper wall
+    avg_r = mlt.PlumedAverageCV(name='avg_r', atom_groups=((0, 1), (0, 2)))
+    avg_r.attach_upper_wall(location=2.5, kappa=1000)
+
+    # Define CVs for extra information
+    r_f = mlt.PlumedAverageCV(name='r_f', atom_groups=(0, 1))
+    r_cl = mlt.PlumedAverageCV(name='r_cl', atom_groups=(0, 2))
+
+    # Initialise PlumedBias
+    bias = mlt.PlumedBias(cvs=(avg_r, r_f, r_cl))
+
+    # Define the potential and train using Downhill AL (fix_init_config=True)
+    ace = mlt.potentials.ACE('r1_downhill', system=system)
+    ace.al_train(
+        method_name='orca',
+        temp=500,
+        n_configs_iter=5,
+        max_active_iters=50,
+        min_active_iters=20,
+        fix_init_config=True,
+        bias=bias,
+    )
diff --git a/examples/WTMetaD_paper/r1/al_wtmetad/ch3cl_f.xyz b/examples/WTMetaD_paper/r1/al_wtmetad/ch3cl_f.xyz
@@ -0,0 +1,8 @@
+6
+
+C          0.11150        0.23091        0.58288
+F         -0.88850        1.32101       -2.02704
+Cl         0.64512       -0.39439        2.08233
+H          1.02099        0.50449       -0.09172
+H         -0.45901        1.13911        0.74042
+H         -0.45901       -0.53348        0.04182
diff --git a/examples/WTMetaD_paper/r1/al_wtmetad/training.py b/examples/WTMetaD_paper/r1/al_wtmetad/training.py
@@ -0,0 +1,39 @@
+import mlptrain as mlt
+
+mlt.Config.n_cores = 20
+mlt.Config.orca_keywords = ['PBE0', 'def2-SVP', 'EnGrad', 'CPCM(Water)']
+
+if __name__ == '__main__':
+    system = mlt.System(
+        mlt.Molecule('ch3cl_f.xyz', charge=-1, mult=1), box=None
+    )
+
+    # Define CV and attach an upper wall
+    avg_r = mlt.PlumedAverageCV(name='avg_r', atom_groups=((0, 1), (0, 2)))
+    avg_r.attach_upper_wall(location=2.5, kappa=1000)
+
+    # Define CVs for extra information
+    r_f = mlt.PlumedAverageCV(name='r_f', atom_groups=(0, 1))
+    r_cl = mlt.PlumedAverageCV(name='r_cl', atom_groups=(0, 2))
+
+    # Define CV for WTMetaD AL (r_cl - r_f)
+    diff_r = mlt.PlumedDifferenceCV(
+        name='diff_r', atom_groups=((0, 2), (0, 1))
+    )
+
+    # Initialise PlumedBias for WTMetaD AL
+    bias = mlt.PlumedBias(cvs=(avg_r, r_f, r_cl, diff_r))
+    bias.initialise_for_metad_al(width=0.05, cvs=diff_r, biasfactor=100)
+
+    # Define the potential and train using WTMetaD AL (inherit_metad_bias=True)
+    ace = mlt.potentials.ACE('r1_wtmetad', system=system)
+    ace.al_train(
+        method_name='orca',
+        temp=300,
+        n_init_configs=5,
+        n_configs_iter=5,
+        max_active_iters=50,
+        min_active_iters=30,
+        inherit_metad_bias=True,
+        bias=bias,
+    )
diff --git a/examples/WTMetaD_paper/r2/al_downhill/al_downhill.py b/examples/WTMetaD_paper/r2/al_downhill/al_downhill.py
@@ -0,0 +1,35 @@
+import autode as ade
+import mlptrain as mlt
+
+# ORCA PATH (change accordingly)
+ade.Config.ORCA.path = '/usr/local/orca_5_0_3/orca'
+
+mlt.Config.n_cores = 10
+mlt.Config.orca_keywords = ['PBE0', 'D3BJ', 'def2-SVP', 'EnGrad']
+
+if __name__ == '__main__':
+    system = mlt.System(mlt.Molecule('ts_r2.xyz', charge=0, mult=1), box=None)
+
+    # Define CVs for extra information
+    avg_r = mlt.PlumedAverageCV(name='avg_r', atom_groups=((14, 11), (14, 10)))
+    r_1 = mlt.PlumedAverageCV(name='r_1', atom_groups=(14, 11))
+    r_2 = mlt.PlumedAverageCV(name='r_2', atom_groups=(14, 10))
+    diff_r = mlt.PlumedDifferenceCV(
+        name='diff_r', atom_groups=((14, 11), (14, 10))
+    )
+
+    # Initialise PlumedBias
+    bias = mlt.PlumedBias(cvs=(avg_r, r_1, r_2, diff_r))
+
+    # Define the potential and train using Downhill AL (fix_init_config=True)
+    ace = mlt.potentials.ACE('r2_downhill', system=system)
+    ace.al_train(
+        method_name='orca',
+        temp=500,
+        n_init_configs=10,
+        n_configs_iter=10,
+        max_active_iters=50,
+        min_active_iters=20,
+        fix_init_config=True,
+        bias=bias,
+    )
diff --git a/examples/WTMetaD_paper/r2/al_downhill/ts_r2.xyz b/examples/WTMetaD_paper/r2/al_downhill/ts_r2.xyz
@@ -0,0 +1,25 @@
+23
+Coordinates from ORCA-job ts_2
+  C   -2.66439005909114      2.65299207388647      0.78745998436255
+  C   -2.67887404989887      1.28747390765675      0.38852882506916
+  C   -1.56970666815882      0.67461786371524     -0.14641426702188
+  C   -0.37799042894344      1.42925812930736     -0.32382038603211
+  C   -0.37079634676906      2.81148083306055      0.09023385565607
+  C   -1.52125738464133      3.40795809751788      0.65254942674056
+  H   -1.50289375312190      4.45626539206397      0.96302768555986
+  H   -3.57070364836348      3.09792166406850      1.20441075664968
+  H   -3.60062126935202      0.71302763969736      0.51535519693843
+  H   -1.60258355173816     -0.37766602828367     -0.43967208064813
+  C   0.92456570544541      3.31753505809060     -0.15347633687560
+  C   1.72786411538397      2.23585108850567     -0.72575815854290
+  H   1.22817413754548      4.36380165541877     -0.12613291191324
+  C   0.88824740245950      1.09215485450898     -0.82943542615214
+  C   2.18463150524301      2.44443139726686      1.05085740248514
+  C   2.96854451026492      2.45002472048448     -1.54037911195286
+  H   1.19994619177669      0.12279718501542     -1.21659289133211
+  H   2.69452544821297      2.65557302244572     -2.58626430067515
+  H   3.60551208150717      1.55243185555685     -1.53156185106088
+  H   3.56503062112247      3.29922665297333     -1.17492648923220
+  H   1.49069905669169      1.84916547328124      1.65040376618190
+  H   2.42620003624003      3.40209098690239      1.53339211735138
+  H   3.13306634818493      1.90539647685923      0.90501519444448
diff --git a/examples/WTMetaD_paper/r2/al_wtmetad/al_wtmetad.py b/examples/WTMetaD_paper/r2/al_wtmetad/al_wtmetad.py
@@ -0,0 +1,37 @@
+import mlptrain as mlt
+
+mlt.Config.n_cores = 10
+mlt.Config.orca_keywords = ['PBE0', 'D3BJ', 'def2-SVP', 'EnGrad']
+
+if __name__ == '__main__':
+    system = mlt.System(mlt.Molecule('r_r2.xyz', charge=0, mult=1), box=None)
+
+    # Define CV and attach an upper wall
+    avg_r = mlt.PlumedAverageCV(name='avg_r', atom_groups=((14, 11), (14, 10)))
+    avg_r.attach_upper_wall(location=2.5, kappa=1000)
+
+    # Define CVs for extra information
+    r_1 = mlt.PlumedAverageCV(name='r_1', atom_groups=(14, 11))
+    r_2 = mlt.PlumedAverageCV(name='r_2', atom_groups=(14, 10))
+
+    # Define CV for WTMetaD AL (r_1 - r_2)
+    diff_r = mlt.PlumedDifferenceCV(
+        name='diff_r', atom_groups=((14, 11), (14, 10))
+    )
+
+    # Initialise PlumedBias for WTMetaD AL
+    bias = mlt.PlumedBias(cvs=(avg_r, r_1, r_2, diff_r))
+    bias.initialise_for_metad_al(width=0.05, cvs=diff_r, biasfactor=90)
+
+    # Define the potential and train using WTMetaD AL (inherit_metad_bias=True)
+    ace = mlt.potentials.ACE('isoindene_2_metad_2', system=system)
+    ace.al_train(
+        method_name='orca',
+        temp=300,
+        n_init_configs=5,
+        n_configs_iter=10,
+        max_active_iters=40,
+        min_active_iters=20,
+        inherit_metad_bias=True,
+        bias=bias,
+    )
diff --git a/examples/WTMetaD_paper/r2/al_wtmetad/r_r2.xyz b/examples/WTMetaD_paper/r2/al_wtmetad/r_r2.xyz
@@ -0,0 +1,25 @@
+23
+Coordinates from ORCA-job reactant_2
+  C   -2.81578879620079      2.71939195713069      0.40597652743383
+  C   -2.75306801675332      1.29879637349104      0.12494353708397
+  C   -1.57543956286648      0.67007495572890     -0.12725319077628
+  C   -0.35564336599371      1.43708094671903     -0.11453115671888
+  C   -0.41953214675013      2.88547229814566      0.17378132549330
+  C   -1.69985337085700      3.49383364072311      0.43197139067454
+  H   -1.76038527053415      4.56397504901440      0.64444557198421
+  H   -3.79415372095779      3.16615899050664      0.60025014318963
+  H   -3.68677075140821      0.73077207382657      0.11837866310995
+  H   -1.54169132169281     -0.40154984311682     -0.33807153245421
+  C   0.83993724079311      3.39615214471678      0.13487848344868
+  C   1.82185831002678      2.30200252248213     -0.18701816957288
+  H   1.13022181850322      4.43459180951911      0.30501970498636
+  C   0.94166848737729      1.08930300007155     -0.32657183576441
+  C   2.83234594843814      2.11612229365514      0.95847709653150
+  C   2.56148744849290      2.59927910422309     -1.50322929151805
+  H   1.32235111859832      0.09373371278190     -0.56213839383041
+  H   1.84979611856880      2.73365377204159     -2.33076142860886
+  H   3.24183470046471      1.77249061784375     -1.76025485532520
+  H   3.16292499408300      3.51688104975674     -1.40914717372226
+  H   2.31566211120603      1.90230302413868      1.90547216568181
+  H   3.43825007798258      3.02608548619431      1.09016398584808
+  H   3.51580406856637      1.28144540475756      0.73822666157863