Moredocs nico (#237)

* mod_config bug fix * Metrics plot clean up * added config to documentation * eval documentation --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
apax-hub · Mar 5, 2024 · b8bfb32 · b8bfb32
1 parent c361f0e
commit b8bfb32
Show file tree

Hide file tree

Showing 5 changed files with 248 additions and 80 deletions.
diff --git a/apax/utils/datasets.py b/apax/utils/datasets.py
@@ -20,7 +20,7 @@ def download_md22_stachyose(data_path):
     return file_path
 
 
-def download_md17_benzene_DFT(data_path):
+def download_benzene_DFT(data_path):
     url = "http://www.quantum-machine.org/gdml/data/xyz/benzene2018_dft.zip"
     file_path = data_path / "benzene2018_dft.zip"
 
@@ -36,7 +36,7 @@ def download_md17_benzene_DFT(data_path):
     return new_file_path
 
 
-def download_md17_benzene_CCSDT(data_path):
+def download_md22_benzene_CCSDT(data_path):
     url = "http://www.quantum-machine.org/gdml/data/xyz/benzene_ccsd_t.zip"
     file_path = data_path / "benzene_ccsdt.zip"
 
@@ -63,7 +63,7 @@ def modify_xyz_file(file_path, target_string, replacement_string):
     return new_file_path
 
 
-def mod_md17(file_path):
+def mod_md_datasets(file_path):
     new_file_path = file_path.with_name(file_path.stem + "_mod" + file_path.suffix)
     with open(file_path, "r") as input_file, open(new_file_path, "w") as output_file:
         for line in input_file:

diff --git a/docs/source/_tutorials/05_Full_Config.nblink b/docs/source/_tutorials/05_Full_Config.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/05_Full_Config.ipynb"
+}
diff --git a/docs/source/_tutorials/index.rst b/docs/source/_tutorials/index.rst
@@ -8,3 +8,4 @@ Tutorials
    02_Molecular_dynamics
    03_Transfer_Learning
    04_Batch_Data_Selection
+   05_Full_Config
diff --git a/examples/01_Model_Training.ipynb b/examples/01_Model_Training.ipynb
diff --git a/examples/05_Full_Config.ipynb b/examples/05_Full_Config.ipynb
@@ -0,0 +1,111 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Complete Configuration File\n",
+    " \n",
+    "```yaml\n",
+    "n_epochs: <NUMBER OF EPOCHS>  # Number of training epochs.\n",
+    "seed: 1                       # Seed for initialising random numbers\n",
+    "patience: None                # Number of epochs without improvement before trainings gets terminated.\n",
+    "n_models: 1                   # Number of models to be trained at once.\n",
+    "n_jitted_steps: 1             # Number of train batches to be processed in a compiled loop. \n",
+    "                              # Can yield singificant speedups for small structures or small batch sizes.\n",
+    "\n",
+    "data:\n",
+    "  directory: models/          # Path to the directory where the training results and checkpoints will be written.\n",
+    "  experiment: apax            # Name of  the model. Distinguishes it from the other models trained in the same `directory`.\n",
+    "  data_path: <PATH>           # Path to a single dataset file. Set either this or `val_data_path` and `train_data_path`.\n",
+    "  train_data_path: <PATH>     # Path to a training dataset. Set this and `val_data_path` if your data comes pre-split.\n",
+    "  val_data_path: <PATH>       # Path to a validation dataset. Set this and `train_data_path` if your data comes pre-split.\n",
+    "  test_data_path: <PATH>      # Path to a test dataset. Set this, `train_data_path` and `val_data_path` if your data comes pre-split.\n",
+    "\n",
+    "  n_train: 1000               # Number of training datapoints from `data_path`.\n",
+    "  n_valid: 100                # Number of validation datapoints from `data_path`.\n",
+    "\n",
+    "  batch_size: 32              # Number of training examples to be evaluated at once.\n",
+    "  valid_batch_size: 100       # Number of validation examples to be evaluated at once.\n",
+    "\n",
+    "  shift_method: \"per_element_regression_shift\"\n",
+    "  shift_options:\n",
+    "    energy_regularisation: 1.0    # Magnitude of the regularization in the per-element energy regression.\n",
+    "  shuffle_buffer_size: 1000       # Size of the `tf.data` shuffle buffer.\n",
+    "\n",
+    "  pos_unit: Ang\n",
+    "  energy_unit: eV\n",
+    "\n",
+    "  additional_properties_info:     # Dict of property name, shape (ragged or fixed) pairs\n",
+    "\n",
+    "model:\n",
+    "  n_basis: 7                  # Number of uncontracted gaussian basis functions.\n",
+    "  n_radial: 5                 # Number of contracted basis functions.\n",
+    "  nn: [512, 512]              # Number of hidden layers and units in those layers.\n",
+    "\n",
+    "  r_max: 6.0                  # Position of the first uncontracted basis function's mean.\n",
+    "  r_min: 0.5                  # Cutoff radius of the descriptor.\n",
+    "\n",
+    "  use_zbl: false              # \n",
+    "\n",
+    "  b_init: normal              # Initialization scheme for the neural network biases. Either `normal` or `zeros`.\n",
+    "  descriptor_dtype: fp64\n",
+    "  readout_dtype: fp32\n",
+    "  scale_shift_dtype: fp32\n",
+    "\n",
+    "loss:\n",
+    "- loss_type: structures       # Weighting scheme for atomic contributions.\n",
+    "                              # See the MLIP package for reference 10.1088/2632-2153/abc9fe for details\n",
+    "  name: energy                # Keyword of the quantity e.g `energy`.\n",
+    "  weight: 1.0                 # Weighting factor in the overall loss function.\n",
+    "- loss_type: structures\n",
+    "  name: forces\n",
+    "  weight: 4.0\n",
+    "\n",
+    "metrics:\n",
+    "- name: energy                # Keyword of the quantity e.g `energy`.\n",
+    "  reductions:                 # List of reductions performed on the difference between target and predictions.\n",
+    "                              # Can be mae, mse, rmse for energies and forces. For forces it is also possible to use `angle`.\n",
+    "  - mae\n",
+    "- name: forces\n",
+    "  reductions:\n",
+    "  - mae\n",
+    "  - mse\n",
+    "\n",
+    "optimizer:\n",
+    "  opt_name: adam            # Name of the optimizer. Can be any `optax` optimizer.\n",
+    "  opt_kwargs: {}            # Optimizer keyword arguments. Passed to the `optax` optimizer.\n",
+    "  emb_lr: 0.03              # Learning rate of the elemental embedding contraction coefficients.\n",
+    "  nn_lr: 0.03               # Learning rate of the neural network parameters.\n",
+    "  scale_lr: 0.001           # Learning rate of the elemental output scaling factors.\n",
+    "  shift_lr: 0.05            # Learning rate of the elemental output shifts.\n",
+    "  zbl_lr: 0.001             # \n",
+    "  transition_begin: 0       # Number of training steps (not epochs) before the start of the linear learning rate schedule.\n",
+    "\n",
+    "callbacks:\n",
+    "- name: csv                 # Keyword of the callback used. Currently we implement \"csv\" and \"tensorboard\".\n",
+    "\n",
+    "progress_bar:\n",
+    "  disable_epoch_pbar: false   # Set to True to disable the epoch progress bar.\n",
+    "  disable_nl_pbar: false      # Set to True to disable the NL precomputation progress bar.\n",
+    "\n",
+    "\n",
+    "checkpoints:\n",
+    "  ckpt_interval: 1                # Number of epochs between checkpoints.\n",
+    "  \n",
+    "                                  # The options below are used for transfer learning\n",
+    "  base_model_checkpoint: null     # Path to the folder containing a pre-trained model ckpt.\n",
+    "  reset_layers: []                # List of layer names for which the parameters will be reinitialized.\n",
+    "\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}