Skip to content

Commit

Permalink
Moredocs nico (#237)
Browse files Browse the repository at this point in the history
* mod_config bug fix

* Metrics plot clean up

* added config to documentation

* eval documentation

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Tetracarbonylnickel and pre-commit-ci[bot] authored Mar 5, 2024
1 parent c361f0e commit b8bfb32
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 80 deletions.
6 changes: 3 additions & 3 deletions apax/utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def download_md22_stachyose(data_path):
return file_path


def download_md17_benzene_DFT(data_path):
def download_benzene_DFT(data_path):
url = "http://www.quantum-machine.org/gdml/data/xyz/benzene2018_dft.zip"
file_path = data_path / "benzene2018_dft.zip"

Expand All @@ -36,7 +36,7 @@ def download_md17_benzene_DFT(data_path):
return new_file_path


def download_md17_benzene_CCSDT(data_path):
def download_md22_benzene_CCSDT(data_path):
url = "http://www.quantum-machine.org/gdml/data/xyz/benzene_ccsd_t.zip"
file_path = data_path / "benzene_ccsdt.zip"

Expand All @@ -63,7 +63,7 @@ def modify_xyz_file(file_path, target_string, replacement_string):
return new_file_path


def mod_md17(file_path):
def mod_md_datasets(file_path):
new_file_path = file_path.with_name(file_path.stem + "_mod" + file_path.suffix)
with open(file_path, "r") as input_file, open(new_file_path, "w") as output_file:
for line in input_file:
Expand Down
3 changes: 3 additions & 0 deletions docs/source/_tutorials/05_Full_Config.nblink
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"path": "../../../examples/05_Full_Config.ipynb"
}
1 change: 1 addition & 0 deletions docs/source/_tutorials/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Tutorials
02_Molecular_dynamics
03_Transfer_Learning
04_Batch_Data_Selection
05_Full_Config
207 changes: 130 additions & 77 deletions examples/01_Model_Training.ipynb

Large diffs are not rendered by default.

111 changes: 111 additions & 0 deletions examples/05_Full_Config.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Complete Configuration File\n",
" \n",
"```yaml\n",
"n_epochs: <NUMBER OF EPOCHS> # Number of training epochs.\n",
"seed: 1 # Seed for initialising random numbers\n",
"patience: None # Number of epochs without improvement before trainings gets terminated.\n",
"n_models: 1 # Number of models to be trained at once.\n",
"n_jitted_steps: 1 # Number of train batches to be processed in a compiled loop. \n",
" # Can yield singificant speedups for small structures or small batch sizes.\n",
"\n",
"data:\n",
" directory: models/ # Path to the directory where the training results and checkpoints will be written.\n",
" experiment: apax # Name of the model. Distinguishes it from the other models trained in the same `directory`.\n",
" data_path: <PATH> # Path to a single dataset file. Set either this or `val_data_path` and `train_data_path`.\n",
" train_data_path: <PATH> # Path to a training dataset. Set this and `val_data_path` if your data comes pre-split.\n",
" val_data_path: <PATH> # Path to a validation dataset. Set this and `train_data_path` if your data comes pre-split.\n",
" test_data_path: <PATH> # Path to a test dataset. Set this, `train_data_path` and `val_data_path` if your data comes pre-split.\n",
"\n",
" n_train: 1000 # Number of training datapoints from `data_path`.\n",
" n_valid: 100 # Number of validation datapoints from `data_path`.\n",
"\n",
" batch_size: 32 # Number of training examples to be evaluated at once.\n",
" valid_batch_size: 100 # Number of validation examples to be evaluated at once.\n",
"\n",
" shift_method: \"per_element_regression_shift\"\n",
" shift_options:\n",
" energy_regularisation: 1.0 # Magnitude of the regularization in the per-element energy regression.\n",
" shuffle_buffer_size: 1000 # Size of the `tf.data` shuffle buffer.\n",
"\n",
" pos_unit: Ang\n",
" energy_unit: eV\n",
"\n",
" additional_properties_info: # Dict of property name, shape (ragged or fixed) pairs\n",
"\n",
"model:\n",
" n_basis: 7 # Number of uncontracted gaussian basis functions.\n",
" n_radial: 5 # Number of contracted basis functions.\n",
" nn: [512, 512] # Number of hidden layers and units in those layers.\n",
"\n",
" r_max: 6.0 # Position of the first uncontracted basis function's mean.\n",
" r_min: 0.5 # Cutoff radius of the descriptor.\n",
"\n",
" use_zbl: false # \n",
"\n",
" b_init: normal # Initialization scheme for the neural network biases. Either `normal` or `zeros`.\n",
" descriptor_dtype: fp64\n",
" readout_dtype: fp32\n",
" scale_shift_dtype: fp32\n",
"\n",
"loss:\n",
"- loss_type: structures # Weighting scheme for atomic contributions.\n",
" # See the MLIP package for reference 10.1088/2632-2153/abc9fe for details\n",
" name: energy # Keyword of the quantity e.g `energy`.\n",
" weight: 1.0 # Weighting factor in the overall loss function.\n",
"- loss_type: structures\n",
" name: forces\n",
" weight: 4.0\n",
"\n",
"metrics:\n",
"- name: energy # Keyword of the quantity e.g `energy`.\n",
" reductions: # List of reductions performed on the difference between target and predictions.\n",
" # Can be mae, mse, rmse for energies and forces. For forces it is also possible to use `angle`.\n",
" - mae\n",
"- name: forces\n",
" reductions:\n",
" - mae\n",
" - mse\n",
"\n",
"optimizer:\n",
" opt_name: adam # Name of the optimizer. Can be any `optax` optimizer.\n",
" opt_kwargs: {} # Optimizer keyword arguments. Passed to the `optax` optimizer.\n",
" emb_lr: 0.03 # Learning rate of the elemental embedding contraction coefficients.\n",
" nn_lr: 0.03 # Learning rate of the neural network parameters.\n",
" scale_lr: 0.001 # Learning rate of the elemental output scaling factors.\n",
" shift_lr: 0.05 # Learning rate of the elemental output shifts.\n",
" zbl_lr: 0.001 # \n",
" transition_begin: 0 # Number of training steps (not epochs) before the start of the linear learning rate schedule.\n",
"\n",
"callbacks:\n",
"- name: csv # Keyword of the callback used. Currently we implement \"csv\" and \"tensorboard\".\n",
"\n",
"progress_bar:\n",
" disable_epoch_pbar: false # Set to True to disable the epoch progress bar.\n",
" disable_nl_pbar: false # Set to True to disable the NL precomputation progress bar.\n",
"\n",
"\n",
"checkpoints:\n",
" ckpt_interval: 1 # Number of epochs between checkpoints.\n",
" \n",
" # The options below are used for transfer learning\n",
" base_model_checkpoint: null # Path to the folder containing a pre-trained model ckpt.\n",
" reset_layers: [] # List of layer names for which the parameters will be reinitialized.\n",
"\n",
"```"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit b8bfb32

Please sign in to comment.