diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9e2cbf607..765db654c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,7 +22,7 @@ jobs: - name: Install rdkit run: python -m pip install rdkit openbabel-wheel - name: Install dependencies - run: python -m pip install .[amber,ase,pymatgen] coverage + run: python -m pip install .[amber,ase,pymatgen] coverage ./tests/plugin - name: Test run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report - name: Run codecov diff --git a/.gitignore b/.gitignore index 41ee080cf..cd48a3e25 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,7 @@ _version.py __pycache__ docs/_build docs/formats.csv +docs/drivers.csv +docs/minimizers.csv docs/api/ +docs/formats/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 14b08cef1..abaa911e6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,12 +18,12 @@ repos: - id: check-toml # Python - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.7.0 hooks: - id: black-jupyter -- repo: https://github.com/charliermarsh/ruff-pre-commit +- repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.0.263 + rev: v0.0.287 hooks: - id: ruff args: ["--fix"] @@ -35,7 +35,7 @@ repos: args: ["--write"] # Python inside docs - repo: https://github.com/asottile/blacken-docs - rev: 1.13.0 + rev: 1.16.0 hooks: - id: blacken-docs ci: diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..87f8c7330 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,12 @@ +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" +sphinx: + configuration: docs/conf.py +formats: all +python: + install: + - requirements: docs/requirements.txt diff --git a/README.md b/README.md index cfa611d9c..1e86a7196 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +# dpdata + +[![conda-forge](https://img.shields.io/conda/dn/conda-forge/dpdata?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/dpdata) +[![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata) +[![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/) + **dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian. dpdata only works with python 3.7 or above. @@ -82,11 +88,12 @@ The `System` or `LabeledSystem` can be constructed from the following file forma | Amber | multi | True | True | LabeledSystem | 'amber/md' | | Amber/sqm | sqm.out | False | False | System | 'sqm/out' | | Gromacs | gro | True | False | System | 'gromacs/gro' | -| ABACUS | STRU | False | False | System | 'abacus/stru' | +| ABACUS | STRU | False | False | System | 'abacus/stru' | | ABACUS | STRU | False | True | LabeledSystem | 'abacus/scf' | | ABACUS | cif | True | True | LabeledSystem | 'abacus/md' | | ABACUS | STRU | True | True | LabeledSystem | 'abacus/relax' | | ase | structure | True | True | MultiSystems | 'ase/structure' | +| DFTB+ | dftbplus | False | True | LabeledSystem | 'dftbplus' | The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. diff --git a/docs/formats.rst b/docs/formats.rst index c0ff5b8f9..41ca90916 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -6,3 +6,10 @@ dpdata supports the following formats: .. csv-table:: Supported Formats :file: formats.csv :header-rows: 1 + + +.. toctree:: + :maxdepth: 1 + :glob: + + formats/* diff --git a/docs/make_format.py b/docs/make_format.py index a208b21ed..62dd08793 100644 --- a/docs/make_format.py +++ b/docs/make_format.py @@ -1,9 +1,18 @@ import csv +import os from collections import defaultdict +from inspect import Parameter, Signature, cleandoc, signature +from typing import Literal + +from numpydoc.docscrape import Parameter as numpydoc_Parameter +from numpydoc.docscrape_sphinx import SphinxDocString + +from dpdata.bond_order_system import BondOrderSystem # ensure all plugins are loaded! from dpdata.driver import Driver, Minimizer from dpdata.format import Format +from dpdata.system import LabeledSystem, MultiSystems, System def get_formats() -> dict: @@ -64,7 +73,7 @@ def get_cls_link(cls: object) -> str: def check_supported(fmt: Format): - methods = set() + methods = {} for mtd in [ "from_system", "to_system", @@ -76,33 +85,255 @@ def check_supported(fmt: Format): "to_multi_systems", ]: if detect_overridden(fmt, mtd): - methods.add(mtd) + methods[mtd] = None if mtd == "to_system": - methods.add("to_labeled_system") + methods["to_labeled_system"] = None if fmt.MultiMode != fmt.MultiModes.NotImplemented: - methods.add("from_multi_systems") - methods.add("to_multi_systems") - return methods + methods["from_multi_systems"] = None + methods["to_multi_systems"] = None + return list(methods.keys()) method_links = { - "from_system": ":func:`System() `", - "to_system": ":func:`System.to() `", - "from_labeled_system": ":func:`LabeledSystem() `", - "to_labeled_system": ":func:`LabeledSystem.to() `", - "from_bond_order_system": ":func:`BondOrderSystem() `", - "to_bond_order_system": ":func:`BondOrderSystem.to() `", - "from_multi_systems": ":func:`MultiSystems.load_systems_from_file() `", - "to_multi_systems": ":func:`MultiSystems.to() `", + "from_system": ":ref:`System() <{}_{}>`", + "to_system": ":ref:`System.to() <{}_{}>`", + "from_labeled_system": ":ref:`LabeledSystem() <{}_{}>`", + "to_labeled_system": ":ref:`LabeledSystem.to() <{}_{}>`", + "from_bond_order_system": ":ref:`BondOrderSystem() <{}_{}>`", + "to_bond_order_system": ":ref:`BondOrderSystem.to() <{}_{}>`", + "from_multi_systems": ":ref:`MultiSystems.load_systems_from_file() <{}_{}>`", + "to_multi_systems": ":ref:`MultiSystems.to() <{}_{}>`", +} + +method_classes = { + "from_system": "System", + "to_system": "System", + "from_labeled_system": "LabeledSystem", + "to_labeled_system": "LabeledSystem", + "from_bond_order_system": "BondOrderSystem", + "to_bond_order_system": "BondOrderSystem", + "from_multi_systems": "MultiSystems", + "to_multi_systems": "MultiSystems", } +method_cls_obj = { + "from_system": System, + "to_system": System, + "from_labeled_system": LabeledSystem, + "to_labeled_system": LabeledSystem, + "from_bond_order_system": BondOrderSystem, + "to_bond_order_system": BondOrderSystem, + "from_multi_systems": MultiSystems, + "to_multi_systems": MultiSystems, +} + + +def generate_sub_format_pages(formats: dict): + """Generate sub format pages.""" + os.makedirs("formats", exist_ok=True) + for format, alias in formats.items(): + # format: Format, alias: list[str] + buff = [] + buff.append(".. _%s:" % format.__name__) + buff.append("") + for aa in alias: + buff.append("%s format" % aa) + buff.append("=" * len(buff[-1])) + buff.append("") + buff.append("Class: %s" % get_cls_link(format)) + buff.append("") + + docstring = format.__doc__ + if docstring is not None: + docstring = cleandoc(docstring) + rst = str(SphinxDocString(docstring)) + buff.append(rst) + buff.append("") + + buff.append("Conversions") + buff.append("-----------") + methods = check_supported(format) + for method in methods: + buff.append("") + buff.append(f".. _{format.__name__}_{method}:") + buff.append("") + if method.startswith("from_"): + buff.append("Convert from this format to %s" % method_classes[method]) + buff.append("`" * len(buff[-1])) + elif method.startswith("to_"): + buff.append("Convert from %s to this format" % method_classes[method]) + buff.append("`" * len(buff[-1])) + buff.append("") + method_obj = getattr(format, method) + if ( + method == "to_labeled_system" + and method not in format.__dict__ + and "to_system" in format.__dict__ + ): + method_obj = getattr(format, "to_system") + docstring = method_obj.__doc__ + if docstring is not None: + docstring = cleandoc(docstring) + sig = signature(method_obj) + parameters = dict(sig.parameters) + return_annotation = sig.return_annotation + # del self + del parameters[list(parameters)[0]] + # del data + if method.startswith("to_"): + del parameters[list(parameters)[0]] + if "args" in parameters: + del parameters["args"] + if "kwargs" in parameters: + del parameters["kwargs"] + if method == "to_multi_systems" or method.startswith("from_"): + sig = Signature( + list(parameters.values()), return_annotation=method_cls_obj[method] + ) + else: + sig = Signature( + list(parameters.values()), return_annotation=return_annotation + ) + sig = str(sig) + if method.startswith("from_"): + if method != "from_multi_systems": + for aa in alias: + parameters["fmt"] = Parameter( + "fmt", + Parameter.POSITIONAL_OR_KEYWORD, + default=None, + annotation=Literal[aa], + ) + sig_fmt = Signature( + list(parameters.values()), + return_annotation=method_cls_obj[method], + ) + sig_fmt = str(sig_fmt) + buff.append( + f""".. py:function:: dpdata.{method_classes[method]}{sig_fmt}""" + ) + buff.append(""" :noindex:""") + for aa in alias: + buff.append( + """.. py:function:: dpdata.{}.from_{}{}""".format( + method_classes[method], + aa.replace("/", "_").replace(".", ""), + sig, + ) + ) + buff.append(""" :noindex:""") + buff.append("") + if docstring is None or method not in format.__dict__: + docstring = """ Convert this format to :class:`%s`.""" % ( + method_classes[method] + ) + doc_obj = SphinxDocString(docstring) + if len(doc_obj["Parameters"]) > 0: + doc_obj["Parameters"] = [ + xx + for xx in doc_obj["Parameters"] + if xx.name not in ("*args", "**kwargs") + ] + else: + if method == "from_multi_systems": + doc_obj["Parameters"] = [ + numpydoc_Parameter( + "directory", + "str", + ["directory of systems"], + ) + ] + doc_obj["Yields"] = [] + doc_obj["Returns"] = [ + numpydoc_Parameter("", method_classes[method], ["converted system"]) + ] + rst = " " + str(doc_obj) + buff.append(rst) + buff.append("") + elif method.startswith("to_"): + for aa in alias: + parameters = { + "fmt": Parameter( + "fmt", + Parameter.POSITIONAL_OR_KEYWORD, + annotation=Literal[aa], + ), + **parameters, + } + if method == "to_multi_systems": + sig_fmt = Signature( + list(parameters.values()), + return_annotation=method_cls_obj[method], + ) + else: + sig_fmt = Signature( + list(parameters.values()), + return_annotation=return_annotation, + ) + sig_fmt = str(sig_fmt) + buff.append( + f""".. py:function:: dpdata.{method_classes[method]}.to{sig_fmt}""" + ) + buff.append(""" :noindex:""") + for aa in alias: + buff.append( + """.. py:function:: dpdata.{}.to_{}{}""".format( + method_classes[method], + aa.replace("/", "_").replace(".", ""), + sig, + ) + ) + buff.append(""" :noindex:""") + buff.append("") + if docstring is None or ( + method not in format.__dict__ + and not ( + method == "to_labeled_system" + and method not in format.__dict__ + and "to_system" in format.__dict__ + ) + ): + docstring = "Convert :class:`%s` to this format." % ( + method_classes[method] + ) + doc_obj = SphinxDocString(docstring) + if len(doc_obj["Parameters"]) > 0: + doc_obj["Parameters"] = [ + xx + for xx in doc_obj["Parameters"][1:] + if xx.name not in ("*args", "**kwargs") + ] + else: + if method == "to_multi_systems": + doc_obj["Parameters"] = [ + numpydoc_Parameter( + "directory", + "str", + ["directory to save systems"], + ) + ] + if method == "to_multi_systems": + doc_obj["Yields"] = [] + doc_obj["Returns"] = [ + numpydoc_Parameter("", method_classes[method], ["this system"]) + ] + rst = " " + str(doc_obj) + buff.append(rst) + buff.append("") + buff.append("") + buff.append("") + + with open("formats/%s.rst" % format.__name__, "w") as rstfile: + rstfile.write("\n".join(buff)) + + if __name__ == "__main__": formats = get_formats() with open("formats.csv", "w", newline="") as csvfile: fieldnames = [ - "Class", + "Format", "Alias", - "Supported Functions", + "Supported Conversions", ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) @@ -110,10 +341,11 @@ def check_supported(fmt: Format): for kk, vv in formats.items(): writer.writerow( { - "Class": get_cls_link(kk), + "Format": ":ref:`%s`" % kk.__name__, "Alias": "\n".join("``%s``" % vvv for vvv in vv), - "Supported Functions": "\n".join( - method_links[mtd] for mtd in check_supported(kk) + "Supported Conversions": "\n".join( + method_links[mtd].format(kk.__name__, mtd) + for mtd in check_supported(kk) ), } ) @@ -151,3 +383,4 @@ def check_supported(fmt: Format): "Alias": "\n".join("``%s``" % vvv for vvv in vv), } ) + generate_sub_format_pages(formats) diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py index 7726cafeb..4b8ec0387 100644 --- a/dpdata/abacus/md.py +++ b/dpdata/abacus/md.py @@ -43,13 +43,20 @@ def get_coord_dump_freq(inlines): def get_coords_from_dump(dumplines, natoms): nlines = len(dumplines) total_natoms = sum(natoms) + # The output of VIRIAL, FORCE, and VELOCITY are controlled by INPUT parameters dump_virial, dump_force, and dump_vel, respectively. + # So the search of keywords can determine whether these datas are printed into MD_dump. calc_stress = False + calc_force = False + check_line = 6 if "VIRIAL" in dumplines[6]: calc_stress = True - else: - assert ( - "POSITIONS" in dumplines[6] and "FORCE" in dumplines[6] - ), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check." + check_line = 10 + assert ( + "POSITION" in dumplines[check_line] + ), "keywords 'POSITION' cannot be found in the 6th line. Please check." + if "FORCE" in dumplines[check_line]: + calc_force = True + nframes_dump = -1 if calc_stress: nframes_dump = int(nlines / (total_natoms + 13)) @@ -104,9 +111,13 @@ def get_coords_from_dump(dumplines, natoms): if not newversion: coords[iframe, iat] *= celldm - forces[iframe, iat] = np.array( - [float(i) for i in dumplines[iline + skipline + iat].split()[5:8]] - ) + if calc_force: + forces[iframe, iat] = np.array( + [ + float(i) + for i in dumplines[iline + skipline + iat].split()[5:8] + ] + ) iframe += 1 assert iframe == nframes_dump, ( "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump." @@ -137,7 +148,7 @@ def get_energy(outlines, ndump, dump_freq): def get_frame(fname): - if type(fname) == str: + if isinstance(fname, str): # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") @@ -195,7 +206,7 @@ def get_frame(fname): data["energies"] = energy data["forces"] = force data["virials"] = stress - if type(data["virials"]) != np.ndarray: + if not isinstance(data["virials"], np.ndarray): del data["virials"] data["orig"] = np.zeros(3) diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index 5dd77030b..db910fc7e 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -167,7 +167,7 @@ def get_coords_from_log(loglines, natoms): def get_frame(fname): - if type(fname) == str: + if isinstance(fname, str): # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index 1816913e5..fbd658b45 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -161,7 +161,7 @@ def get_frame(fname): "forces": [], } - if type(fname) == str: + if isinstance(fname, str): # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") @@ -255,7 +255,7 @@ def get_nele_from_stru(geometry_inlines): def get_frame_from_stru(fname): - assert type(fname) == str + assert isinstance(fname, str) with open(fname) as fp: geometry_inlines = fp.read().split("\n") nele = get_nele_from_stru(geometry_inlines) @@ -304,7 +304,7 @@ def make_unlabeled_stru( out += "\n" if numerical_descriptor is not None: - assert type(numerical_descriptor) == str + assert isinstance(numerical_descriptor, str) out += "NUMERICAL_DESCRIPTOR\n%s\n" % numerical_descriptor out += "\n" @@ -327,10 +327,11 @@ def make_unlabeled_stru( out += "0.0\n" out += str(data["atom_numbs"][iele]) + "\n" for iatom in range(data["atom_numbs"][iele]): + iatomtype = np.nonzero(data["atom_types"] == iele)[0][iatom] out += "%.12f %.12f %.12f %d %d %d\n" % ( - data["coords"][frame_idx][natom_tot, 0], - data["coords"][frame_idx][natom_tot, 1], - data["coords"][frame_idx][natom_tot, 2], + data["coords"][frame_idx][iatomtype, 0], + data["coords"][frame_idx][iatomtype, 1], + data["coords"][frame_idx][iatomtype, 2], 1, 1, 1, diff --git a/dpdata/cp2k/cell.py b/dpdata/cp2k/cell.py index 7f5f3ef0e..7af73353e 100644 --- a/dpdata/cp2k/cell.py +++ b/dpdata/cp2k/cell.py @@ -28,24 +28,18 @@ def cell_to_low_triangle(A, B, C, alpha, beta, gamma): """ if not np.pi * 5 / 180 < alpha < np.pi * 175 / 180: raise RuntimeError( - "alpha=={}: must be a radian, and \ - must be in np.pi*5/180 < alpha < np.pi*175/180".format( - alpha - ) + f"alpha=={alpha}: must be a radian, and \ + must be in np.pi*5/180 < alpha < np.pi*175/180" ) if not np.pi * 5 / 180 < beta < np.pi * 175 / 180: raise RuntimeError( - "beta=={}: must be a radian, and \ - must be in np.pi*5/180 < beta < np.pi*175/180".format( - beta - ) + f"beta=={beta}: must be a radian, and \ + must be in np.pi*5/180 < beta < np.pi*175/180" ) if not np.pi * 5 / 180 < gamma < np.pi * 175 / 180: raise RuntimeError( - "gamma=={}: must be a radian, and \ - must be in np.pi*5/180 < gamma < np.pi*175/180".format( - gamma - ) + f"gamma=={gamma}: must be a radian, and \ + must be in np.pi*5/180 < gamma < np.pi*175/180" ) if not A > 0.2: raise RuntimeError(f"A=={A}, must be greater than 0.2") diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py index 13d5be7c7..0b08c51ef 100644 --- a/dpdata/cp2k/output.py +++ b/dpdata/cp2k/output.py @@ -327,9 +327,7 @@ def handle_single_xyz_frame(self, lines): atom_num = int(lines[0].strip("\n").strip()) if len(lines) != atom_num + 2: raise RuntimeError( - "format error, atom_num=={}, {}!=atom_num+2".format( - atom_num, len(lines) - ) + f"format error, atom_num=={atom_num}, {len(lines)}!=atom_num+2" ) data_format_line = lines[1].strip("\n").strip() + " " prop_pattern = re.compile(r"(?P\w+)\s*=\s*(?P.*?)[, ]") @@ -411,18 +409,18 @@ def get_frames(fname): cell.append(ii.split()[4:7]) if "Atomic kind:" in ii: atom_symbol_list.append(ii.split()[3]) - if "Atom Kind Element" in ii: + + # beginning of coords block + if "Atom Kind Element" in ii or "Atom Kind Element" in ii: coord_flag = True - coord_idx = idx + # parse coords lines + elif coord_flag: + if ii == "\n": + coord_flag = len(coord) == 0 # skip empty line at the beginning + else: + coord.append(ii.split()[4:7]) + atom_symbol_idx_list.append(ii.split()[1]) - # get the coord block info - if coord_flag: - if idx > coord_idx + 1: - if ii == "\n": - coord_flag = False - else: - coord.append(ii.split()[4:7]) - atom_symbol_idx_list.append(ii.split()[1]) if "ENERGY|" in ii: energy = ii.split()[8] if " Atom Kind " in ii: @@ -488,7 +486,7 @@ def get_frames(fname): atom_types = [] atom_numbs = [] # preserve the atom_name order - atom_names = atom_symbol_list[np.sort(symbol_idx)] + atom_names = atom_symbol_list[np.sort(symbol_idx, kind="stable")] for jj in atom_symbol_list: for idx, ii in enumerate(atom_names): if jj == ii: diff --git a/dpdata/data_type.py b/dpdata/data_type.py new file mode 100644 index 000000000..b5141a4e0 --- /dev/null +++ b/dpdata/data_type.py @@ -0,0 +1,154 @@ +from enum import Enum, unique +from typing import TYPE_CHECKING, Tuple + +import numpy as np + +from dpdata.plugin import Plugin + +if TYPE_CHECKING: + from dpdata.system import System + + +@unique +class Axis(Enum): + """Data axis.""" + + NFRAMES = "nframes" + NATOMS = "natoms" + NTYPES = "ntypes" + NBONDS = "nbonds" + + +class AnyInt(int): + """AnyInt equals to any other integer.""" + + def __eq__(self, other): + return True + + +class DataError(Exception): + """Data is not correct.""" + + +class DataType: + """DataType represents a type of data, like coordinates, energies, etc. + + Parameters + ---------- + name : str + name of data + dtype : type or tuple[type] + data type, e.g. np.ndarray + shape : tuple[int], optional + shape of data. Used when data is list or np.ndarray. Use Axis to + represents numbers + required : bool, default=True + whether this data is required + """ + + def __init__( + self, + name: str, + dtype: type, + shape: Tuple[int, Axis] = None, + required: bool = True, + ) -> None: + self.name = name + self.dtype = dtype + self.shape = shape + self.required = required + + def real_shape(self, system: "System") -> Tuple[int]: + """Returns expected real shape of a system.""" + shape = [] + for ii in self.shape: + if ii is Axis.NFRAMES: + shape.append(system.get_nframes()) + elif ii is Axis.NTYPES: + shape.append(system.get_ntypes()) + elif ii is Axis.NATOMS: + shape.append(system.get_natoms()) + elif ii is Axis.NBONDS: + # BondOrderSystem + shape.append(system.get_nbonds()) + elif ii == -1: + shape.append(AnyInt(-1)) + elif isinstance(ii, int): + shape.append(ii) + else: + raise RuntimeError("Shape is not an int!") + return tuple(shape) + + def check(self, system: "System"): + """Check if a system has correct data of this type. + + Parameters + ---------- + system : System + checked system + + Raises + ------ + DataError + type or shape of data is not correct + """ + # check if exists + if self.name in system.data: + data = system.data[self.name] + # check dtype + # allow list for empty np.ndarray + if isinstance(data, list) and not len(data): + pass + elif not isinstance(data, self.dtype): + raise DataError( + f"Type of {self.name} is {type(data).__name__}, but expected {self.dtype.__name__}" + ) + # check shape + if self.shape is not None: + shape = self.real_shape(system) + # skip checking empty list of np.ndarray + if isinstance(data, np.ndarray): + if data.size and shape != data.shape: + raise DataError( + f"Shape of {self.name} is {data.shape}, but expected {shape}" + ) + elif isinstance(data, list): + if len(shape) and shape[0] != len(data): + raise DataError( + "Length of %s is %d, but expected %d" + % (self.name, len(data), shape[0]) + ) + else: + raise RuntimeError("Unsupported type to check shape") + elif self.required: + raise DataError("%s not found in data" % self.name) + + +__system_data_type_plugin = Plugin() +__labeled_system_data_type_plugin = Plugin() + + +def register_data_type(data_type: DataType, labeled: bool): + """Register a data type. + + Parameters + ---------- + data_type : DataType + data type to be registered + labeled : bool + whether this data type is for LabeledSystem + """ + plugin = __labeled_system_data_type_plugin if labeled else __system_data_type_plugin + plugin.register(data_type.name)(data_type) + + +def get_data_types(labeled: bool): + """Get all registered data types. + + Parameters + ---------- + labeled : bool + whether this data type is for LabeledSystem + """ + plugin = __labeled_system_data_type_plugin if labeled else __system_data_type_plugin + return tuple(plugin.plugins.values()) diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index 66bf4ee66..7b909b162 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -1,9 +1,12 @@ import glob import os import shutil +import warnings import numpy as np +import dpdata + from .raw import load_type @@ -60,6 +63,42 @@ def to_system_data(folder, type_map=None, labels=True): data["forces"] = np.concatenate(all_forces, axis=0) if len(all_virs) > 0: data["virials"] = np.concatenate(all_virs, axis=0) + # allow custom dtypes + if labels: + for dtype in dpdata.system.LabeledSystem.DTYPES: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format." + ) + continue + natoms = data["coords"].shape[1] + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx + for xx in dtype.shape[1:] + ] + all_data = [] + for ii in sets: + tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy")) + if tmp is not None: + all_data.append(np.reshape(tmp, [tmp.shape[0], *shape])) + if len(all_data) > 0: + data[dtype.name] = np.concatenate(all_data, axis=0) return data @@ -131,3 +170,34 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): if data.get("nopbc", False): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass + # allow custom dtypes + for dtype in dpdata.system.LabeledSystem.DTYPES: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if dtype.name not in data: + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format." + ) + continue + ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec) + for ii in range(nsets): + set_stt = ii * set_size + set_end = (ii + 1) * set_size + set_folder = os.path.join(folder, "set.%03d" % ii) + np.save(os.path.join(set_folder, dtype.name), ddata[set_stt:set_end]) diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index e7e1abe6a..f2ee9c8a2 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -1,10 +1,13 @@ """Utils for deepmd/hdf5 format.""" +import warnings from typing import Optional, Union import h5py import numpy as np from wcmatch.glob import globfilter +import dpdata + __all__ = ["to_system_data", "dump"] @@ -30,6 +33,9 @@ def to_system_data( g = f[folder] if folder else f data = {} + # ignore empty files or groups + if "type.raw" not in g.keys(): + return data data["atom_types"] = g["type.raw"][:] ntypes = np.max(data["atom_types"]) + 1 natoms = data["atom_types"].size @@ -89,6 +95,39 @@ def to_system_data( "required": False, }, } + # allow custom dtypes + for dtype in dpdata.system.LabeledSystem.DTYPES: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/hdf5 format." + ) + continue + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:] + ] + + data_types[dtype.name] = { + "fn": dtype.name, + "labeled": True, + "shape": shape, + "required": False, + } for dt, prop in data_types.items(): all_data = [] @@ -140,6 +179,9 @@ def dump( g = f.create_group(folder) else: g = f + # ignore empty systems + if not len(data["coords"]): + return # dump raw (array in fact) g.create_dataset("type.raw", data=data["atom_types"]) g.create_dataset("type_map.raw", data=np.array(data["atom_names"], dtype="S")) @@ -161,6 +203,37 @@ def dump( "forces": {"fn": "force", "shape": (nframes, -1), "dump": True}, "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True}, } + + # allow custom dtypes + for dtype in dpdata.system.LabeledSystem.DTYPES: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/hdf5 format." + ) + continue + + data_types[dtype.name] = { + "fn": dtype.name, + "shape": (nframes, -1), + "dump": True, + } + for dt, prop in data_types.items(): if dt in data: if prop["dump"]: diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index 2f2021d44..c7a64ec47 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -1,7 +1,10 @@ import os +import warnings import numpy as np +import dpdata + def load_type(folder, type_map=None): data = {} @@ -57,6 +60,42 @@ def to_system_data(folder, type_map=None, labels=True): data["virials"] = np.reshape(data["virials"], [nframes, 3, 3]) if os.path.isfile(os.path.join(folder, "nopbc")): data["nopbc"] = True + # allow custom dtypes + if labels: + for dtype in dpdata.system.LabeledSystem.DTYPES: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not ( + len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES + ): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format." + ) + continue + natoms = data["coords"].shape[1] + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx + for xx in dtype.shape[1:] + ] + if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")): + data[dtype.name] = np.reshape( + np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")), + [nframes, *shape], + ) return data else: raise RuntimeError("not dir " + folder) @@ -102,3 +141,30 @@ def dump(folder, data): if data.get("nopbc", False): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass + # allow custom dtypes + for dtype in dpdata.system.LabeledSystem.DTYPES: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if dtype.name not in data: + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/raw format." + ) + continue + ddata = np.reshape(data[dtype.name], [nframes, -1]) + np.savetxt(os.path.join(folder, f"{dtype.name}.raw"), ddata) diff --git a/dpdata/dftbplus/__init__.py b/dpdata/dftbplus/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dpdata/dftbplus/output.py b/dpdata/dftbplus/output.py new file mode 100644 index 000000000..ba8f6c840 --- /dev/null +++ b/dpdata/dftbplus/output.py @@ -0,0 +1,74 @@ +from typing import Tuple + +import numpy as np + + +def read_dftb_plus(fn_1: str, fn_2: str) -> Tuple[str, np.ndarray, float, np.ndarray]: + """Read from DFTB+ input and output. + + Parameters + ---------- + fn_1 : str + DFTB+ input file name + fn_2 : str + DFTB+ output file name + + Returns + ------- + str + atomic symbols + np.ndarray + atomic coordinates + float + total potential energy + np.ndarray + atomic forces + + """ + coord = None + symbols = None + forces = None + energy = None + with open(fn_1) as f: + flag = 0 + for line in f: + if flag == 1: + flag += 1 + elif flag == 2: + components = line.split() + flag += 1 + elif line.startswith("Geometry"): + flag = 1 + coord = [] + symbols = [] + elif flag in (3, 4, 5, 6): + s = line.split() + components_num = int(s[1]) + symbols.append(components[components_num - 1]) + coord.append([float(s[2]), float(s[3]), float(s[4])]) + flag += 1 + if flag == 7: + flag = 0 + with open(fn_2) as f: + flag = 0 + for line in f: + if line.startswith("Total Forces"): + flag = 8 + forces = [] + elif flag in (8, 9, 10, 11): + s = line.split() + forces.append([float(s[1]), float(s[2]), float(s[3])]) + flag += 1 + if flag == 12: + flag = 0 + elif line.startswith("Total energy:"): + s = line.split() + energy = float(s[2]) + flag = 0 + + symbols = np.array(symbols) + forces = np.array(forces) + coord = np.array(coord) + assert coord.shape == forces.shape + + return symbols, coord, energy, forces diff --git a/dpdata/driver.py b/dpdata/driver.py index b446a5e83..de72d3fe7 100644 --- a/dpdata/driver.py +++ b/dpdata/driver.py @@ -125,6 +125,7 @@ class HybridDriver(Driver): ... {"type": "sqm", "qm_theory": "DFTB3"}, ... {"type": "dp", "dp": "frozen_model.pb"}, ... ]) + This driver is the hybrid of SQM and DP. """ diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py index cdc28bdcd..017f75a34 100644 --- a/dpdata/lammps/dump.py +++ b/dpdata/lammps/dump.py @@ -215,7 +215,9 @@ def system_data(lines, type_map=None, type_idx_zero=True, unwrap=False): system["cells"].append(cell) atype = get_atype(array_lines[ii], type_idx_zero=type_idx_zero) # map atom type; a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id] - idx = np.argsort(atype)[np.argsort(np.argsort(system["atom_types"]))] + idx = np.argsort(atype, kind="stable")[ + np.argsort(np.argsort(system["atom_types"], kind="stable"), kind="stable") + ] system["coords"].append( safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx] ) diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py index 219543eb3..de8f1c746 100644 --- a/dpdata/md/rdf.py +++ b/dpdata/md/rdf.py @@ -57,14 +57,14 @@ def compute_rdf(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100): def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100): - all_types = list(set(list(np.sort(atype)))) + all_types = list(set(list(np.sort(atype, kind="stable")))) if sel_type[0] is None: sel_type[0] = all_types if sel_type[1] is None: sel_type[1] = all_types - if type(sel_type[0]) is not list: + if not isinstance(sel_type[0], list): sel_type[0] = [sel_type[0]] - if type(sel_type[1]) is not list: + if not isinstance(sel_type[1], list): sel_type[1] = [sel_type[1]] natoms = len(posis) import ase.neighborlist diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index 38eab85fd..6953aba53 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -82,10 +82,12 @@ class DeePMDMixedFormat(Format): Examples -------- Dump a MultiSystems into a mixed type numpy directory: + >>> import dpdata >>> dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") Load a mixed type data into a MultiSystems: + >>> import dpdata >>> dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") """ @@ -95,7 +97,9 @@ def from_system_mix(self, file_name, type_map=None, **kwargs): file_name, type_map=type_map, labels=False ) - def to_system(self, data, file_name, prec=np.float64, **kwargs): + def to_system( + self, data, file_name, set_size: int = 2000, prec=np.float64, **kwargs + ): """Dump the system in deepmd mixed type format (numpy binary) to `folder`. The frames were already split to different systems, so these frames can be dumped to one single subfolders @@ -107,12 +111,14 @@ def to_system(self, data, file_name, prec=np.float64, **kwargs): System data file_name : str The output folder + set_size : int, default=2000 + set size prec : {numpy.float32, numpy.float64} The floating point precision of the compressed data **kwargs : dict other parameters """ - dpdata.deepmd.mixed.dump(file_name, data, comp_prec=prec) + dpdata.deepmd.mixed.dump(file_name, data, set_size=set_size, comp_prec=prec) def from_labeled_system_mix(self, file_name, type_map=None, **kwargs): return dpdata.deepmd.mixed.to_system_data( @@ -157,6 +163,7 @@ class DeePMDHDF5Format(Format): Examples -------- Dump a MultiSystems to a HDF5 file: + >>> import dpdata >>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5") """ @@ -394,8 +401,10 @@ def label(self, data: dict) -> dict: type_map = self.dp.get_type_map() ori_sys = dpdata.System.from_dict({"data": data}) + ori_sys_copy = ori_sys.copy() ori_sys.sort_atom_names(type_map=type_map) atype = ori_sys["atom_types"] + ori_sys = ori_sys_copy if not self.enable_auto_batch_size: labeled_sys = dpdata.LabeledSystem() diff --git a/dpdata/plugins/dftbplus.py b/dpdata/plugins/dftbplus.py index e69de29bb..5c8b46828 100644 --- a/dpdata/plugins/dftbplus.py +++ b/dpdata/plugins/dftbplus.py @@ -0,0 +1,61 @@ +import numpy as np + +from dpdata.dftbplus.output import read_dftb_plus +from dpdata.format import Format +from dpdata.unit import EnergyConversion, ForceConversion + +energy_convert = EnergyConversion("hartree", "eV").value() +force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() + + +@Format.register("dftbplus") +class DFTBplusFormat(Format): + """The DFTBplusFormat class handles files in the DFTB+ format. + + This class provides a method to read DFTB+ files from a labeled system and + returns a dictionary containing various properties of the system.For more + information, please refer to the official documentation at the following URL: + https://dftbplus.org/documentation + + Attributes + ---------- + None + + Methods + ------- + from_labeled_system(file_paths, **kwargs): Reads system information from files. + + """ + + def from_labeled_system(self, file_paths, **kwargs): + """Reads system information from the given DFTB+ file paths. + + Parameters + ---------- + file_paths : tuple + A tuple containing the input and output file paths. + - Input file (file_in): Contains information about symbols and coord. + - Output file (file_out): Contains information about energy and force. + **kwargs : dict + other parameters + + """ + file_in, file_out = file_paths + symbols, coord, energy, forces = read_dftb_plus(file_in, file_out) + last_occurrence = {v: i for i, v in enumerate(symbols)} + atom_names = np.array(sorted(np.unique(symbols), key=last_occurrence.get)) + atom_types = np.array([np.where(atom_names == s)[0][0] for s in symbols]) + atom_numbs = np.array([np.sum(atom_types == i) for i in range(len(atom_names))]) + natoms = coord.shape[0] + + return { + "atom_types": atom_types, + "atom_names": list(atom_names), + "atom_numbs": list(atom_numbs), + "coords": coord.reshape((1, natoms, 3)), + "energies": np.array([energy * energy_convert]), + "forces": (forces * force_convert).reshape((1, natoms, 3)), + "cells": np.zeros((1, 3, 3)), + "orig": np.zeros(3), + "nopbc": True, + } diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py index 659b18458..d2f5dead4 100755 --- a/dpdata/qe/scf.py +++ b/dpdata/qe/scf.py @@ -87,7 +87,7 @@ def get_coords(lines, cell): atom_types = [] atom_numbs = [] # preserve the atom_name order - atom_names = atom_symbol_list[np.sort(symbol_idx)] + atom_names = atom_symbol_list[np.sort(symbol_idx, kind="stable")] for jj in atom_symbol_list: for idx, ii in enumerate(atom_names): if jj == ii: @@ -129,13 +129,13 @@ def get_stress(lines): def get_frame(fname): - if type(fname) == str: + if isinstance(fname, str): path_out = fname outname = os.path.basename(path_out) # the name of the input file is assumed to be different from the output by 'in' and 'out' inname = outname.replace("out", "in") path_in = os.path.join(os.path.dirname(path_out), inname) - elif type(fname) == list and len(fname) == 2: + elif isinstance(fname, list) and len(fname) == 2: path_in = fname[0] path_out = fname[1] else: diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py index ab1a790b5..e27990cbe 100644 --- a/dpdata/qe/traj.py +++ b/dpdata/qe/traj.py @@ -205,6 +205,7 @@ def to_system_data(input_name, prefix, begin=0, step=1): data["cells"], tmp_steps = load_data( prefix + ".cel", 3, begin=begin, step=step, convert=length_convert ) + data["cells"] = np.transpose(data["cells"], (0, 2, 1)) if csteps != tmp_steps: csteps.append(None) tmp_steps.append(None) diff --git a/dpdata/system.py b/dpdata/system.py index 743f3d057..8af8e4a59 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -2,7 +2,6 @@ import glob import os from copy import deepcopy -from enum import Enum, unique from typing import Any, Dict, Optional, Tuple, Union import numpy as np @@ -15,6 +14,7 @@ # ensure all plugins are loaded! import dpdata.plugins from dpdata.amber.mask import load_param_file, pick_by_amber_mask +from dpdata.data_type import Axis, DataError, DataType, get_data_types from dpdata.driver import Driver, Minimizer from dpdata.format import Format from dpdata.plugin import Plugin @@ -33,112 +33,6 @@ def load_format(fmt): ) -@unique -class Axis(Enum): - """Data axis.""" - - NFRAMES = "nframes" - NATOMS = "natoms" - NTYPES = "ntypes" - NBONDS = "nbonds" - - -class DataError(Exception): - """Data is not correct.""" - - -class DataType: - """DataType represents a type of data, like coordinates, energies, etc. - - Parameters - ---------- - name : str - name of data - dtype : type or tuple[type] - data type, e.g. np.ndarray - shape : tuple[int], optional - shape of data. Used when data is list or np.ndarray. Use Axis to - represents numbers - required : bool, default=True - whether this data is required - """ - - def __init__( - self, - name: str, - dtype: type, - shape: Tuple[int, Axis] = None, - required: bool = True, - ) -> None: - self.name = name - self.dtype = dtype - self.shape = shape - self.required = required - - def real_shape(self, system: "System") -> Tuple[int]: - """Returns expected real shape of a system.""" - shape = [] - for ii in self.shape: - if ii is Axis.NFRAMES: - shape.append(system.get_nframes()) - elif ii is Axis.NTYPES: - shape.append(system.get_ntypes()) - elif ii is Axis.NATOMS: - shape.append(system.get_natoms()) - elif ii is Axis.NBONDS: - # BondOrderSystem - shape.append(system.get_nbonds()) - elif isinstance(ii, int): - shape.append(ii) - else: - raise RuntimeError("Shape is not an int!") - return tuple(shape) - - def check(self, system: "System"): - """Check if a system has correct data of this type. - - Parameters - ---------- - system : System - checked system - - Raises - ------ - DataError - type or shape of data is not correct - """ - # check if exists - if self.name in system.data: - data = system.data[self.name] - # check dtype - # allow list for empty np.ndarray - if isinstance(data, list) and not len(data): - pass - elif not isinstance(data, self.dtype): - raise DataError( - f"Type of {self.name} is {type(data).__name__}, but expected {self.dtype.__name__}" - ) - # check shape - if self.shape is not None: - shape = self.real_shape(system) - # skip checking empty list of np.ndarray - if isinstance(data, np.ndarray): - if data.size and shape != data.shape: - raise DataError( - f"Shape of {self.name} is {data.shape}, but expected {shape}" - ) - elif isinstance(data, list): - if len(shape) and shape[0] != len(data): - raise DataError( - "Length of %s is %d, but expected %d" - % (self.name, len(data), shape[0]) - ) - else: - raise RuntimeError("Unsupported type to check shape") - elif self.required: - raise DataError("%s not found in data" % self.name) - - class System(MSONable): """The data System. @@ -521,17 +415,21 @@ def append(self, system): return False elif not len(self.data["atom_numbs"]): # this system is non-converged but the system to append is converged - self.data = system.data + self.data = system.data.copy() return False if system.uniq_formula != self.uniq_formula: raise RuntimeError( f"systems with inconsistent formula could not be append: {self.uniq_formula} v.s. {system.uniq_formula}" ) if system.data["atom_names"] != self.data["atom_names"]: + # prevent original system to be modified + system = system.copy() # allow to append a system with different atom_names order system.sort_atom_names() self.sort_atom_names() if (system.data["atom_types"] != self.data["atom_types"]).any(): + # prevent original system to be modified + system = system.copy() # allow to append a system with different atom_types order system.sort_atom_types() self.sort_atom_types() @@ -611,7 +509,7 @@ def check_type_map(self, type_map): self.sort_atom_names(type_map=type_map) def apply_type_map(self, type_map): - if type_map is not None and type(type_map) is list: + if type_map is not None and isinstance(type_map, list): self.check_type_map(type_map) else: raise RuntimeError("invalid type map, cannot be applied") @@ -624,7 +522,7 @@ def sort_atom_types(self) -> np.ndarray: idx : np.ndarray new atom index in the Axis.NATOMS """ - idx = np.argsort(self.data["atom_types"]) + idx = np.argsort(self.data["atom_types"], kind="stable") for tt in self.DTYPES: if tt.name not in self.data: # skip optional data @@ -756,7 +654,7 @@ def replicate(self, ncopy): if len(ncopy) != 3: raise RuntimeError("ncopy must be a list or tuple with 3 int") for ii in ncopy: - if type(ii) is not int: + if not isinstance(ii, int): raise RuntimeError("ncopy must be a list or tuple must with 3 int") tmp = System() @@ -767,7 +665,7 @@ def replicate(self, ncopy): np.array(np.copy(data["atom_numbs"])) * np.prod(ncopy) ) tmp.data["atom_types"] = np.sort( - np.tile(np.copy(data["atom_types"]), np.prod(ncopy)) + np.tile(np.copy(data["atom_types"]), np.prod(ncopy)), kind="stable" ) tmp.data["cells"] = np.copy(data["cells"]) for ii in range(3): @@ -792,18 +690,10 @@ def replace(self, initial_atom_type, end_atom_type, replace_num): raise RuntimeError( "Must use method replace() of the instance of class dpdata.System" ) - if type(replace_num) is not int: - raise ValueError( - "replace_num must be a integer. Now is {replace_num}".format( - replace_num=replace_num - ) - ) + if not isinstance(replace_num, int): + raise ValueError(f"replace_num must be a integer. Now is {replace_num}") if replace_num <= 0: - raise ValueError( - "replace_num must be larger than 0.Now is {replace_num}".format( - replace_num=replace_num - ) - ) + raise ValueError(f"replace_num must be larger than 0.Now is {replace_num}") try: initial_atom_index = self.data["atom_names"].index(initial_atom_type) @@ -941,6 +831,7 @@ def predict(self, *args: Any, driver: str = "dp", **kwargs: Any) -> "LabeledSyst Examples -------- The default driver is DP: + >>> labeled_sys = ori_sys.predict("frozen_model_compressed.pb") """ if not isinstance(driver, Driver): @@ -1063,6 +954,17 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): idx = pick_by_amber_mask(parm, maskstr) return self.pick_atom_idx(idx, nopbc=nopbc) + @classmethod + def register_data_type(cls, *data_type: Tuple[DataType]): + """Register data type. + + Parameters + ---------- + *data_type : tuple[DataType] + data type to be regiestered + """ + cls.DTYPES = cls.DTYPES + tuple(data_type) + def get_cell_perturb_matrix(cell_pert_fraction): if cell_pert_fraction < 0: @@ -1440,6 +1342,8 @@ def append(self, *systems): def __append(self, system): if not system.formula: return + # prevent changing the original system + system = system.copy() self.check_atom_names(system) formula = system.formula if formula in self.systems: @@ -1514,9 +1418,10 @@ def minimize( Examples -------- Minimize a system using ASE BFGS along with a DP driver: + >>> from dpdata.driver import Driver >>> from ase.optimize import BFGS - >>> driver = driver.get_driver("dp")("some_model.pb") + >>> driver = Driver.get_driver("dp")("some_model.pb") >>> some_system.minimize(minimizer="ase", driver=driver, optimizer=BFGS, fmax=1e-5) """ if not isinstance(minimizer, Minimizer): @@ -1659,7 +1564,8 @@ def get_cls_name(cls: object) -> str: def add_format_methods(): - """Add format methods to System, LabeledSystem, and MultiSystems. + """Add format methods to System, LabeledSystem, and MultiSystems; add data types + to System and LabeledSystem. Notes ----- @@ -1703,5 +1609,10 @@ def to_format(self, *args, **kwargs): setattr(LabeledSystem, method, get_func(formatcls)) setattr(MultiSystems, method, get_func(formatcls)) + # at this point, System.DTYPES and LabeledSystem.DTYPES has been initialized + System.register_data_type(*get_data_types(labeled=False)) + LabeledSystem.register_data_type(*get_data_types(labeled=False)) + LabeledSystem.register_data_type(*get_data_types(labeled=True)) + add_format_methods() diff --git a/dpdata/utils.py b/dpdata/utils.py index 55cf41ef5..da7261790 100644 --- a/dpdata/utils.py +++ b/dpdata/utils.py @@ -63,14 +63,16 @@ def sort_atom_names(data, type_map=None): # a[as[a]] == b[as[b]] as == argsort # as[as[b]] == as^{-1}[b] # a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id] - idx = np.argsort(data["atom_names"])[np.argsort(np.argsort(type_map))] + idx = np.argsort(data["atom_names"], kind="stable")[ + np.argsort(np.argsort(type_map, kind="stable"), kind="stable") + ] else: # index that will sort an array by alphabetical order - idx = np.argsort(data["atom_names"]) + idx = np.argsort(data["atom_names"], kind="stable") # sort atom_names, atom_numbs, atom_types by idx data["atom_names"] = list(np.array(data["atom_names"])[idx]) data["atom_numbs"] = list(np.array(data["atom_numbs"])[idx]) - data["atom_types"] = np.argsort(idx)[data["atom_types"]] + data["atom_types"] = np.argsort(idx, kind="stable")[data["atom_types"]] return data diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index c2d4118c4..8b3f164be 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -47,9 +47,7 @@ def handle_single_xyz_frame(lines): atom_num = int(lines[0].strip("\n").strip()) if len(lines) != atom_num + 2: raise RuntimeError( - "format error, atom_num=={}, {}!=atom_num+2".format( - atom_num, len(lines) - ) + f"format error, atom_num=={atom_num}, {len(lines)}!=atom_num+2" ) data_format_line = lines[1].strip("\n").strip() + " " field_value_pattern = re.compile( diff --git a/plugin_example/README.md b/plugin_example/README.md index 322756f93..a506a4e88 100644 --- a/plugin_example/README.md +++ b/plugin_example/README.md @@ -6,6 +6,7 @@ pip install . ```py import dpdata + print(dpdata.System(12, fmt="random")) ``` diff --git a/pyproject.toml b/pyproject.toml index 5b555ad59..087d6ff9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ docs = [ 'm2r2', 'deepmodeling-sphinx>=0.1.1', 'sphinx-argparse', + 'rdkit', ] [tool.setuptools.packages.find] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1c7c22936..000000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -monty==2.0.4 -pymatgen==2019.7.2 diff --git a/tests/dftbplus/detailed.out b/tests/dftbplus/detailed.out new file mode 100644 index 000000000..21fe93fc5 --- /dev/null +++ b/tests/dftbplus/detailed.out @@ -0,0 +1,88 @@ + Fermi distribution function + +Calculation with static geometry + + +******************************************************************************** + iSCC Total electronic Diff electronic SCC error + 88 -0.33554958E+01 -0.31884693E-07 0.11352275E-06 +******************************************************************************** + + Total charge: -0.00000000 + + Atomic gross charges (e) + Atom Charge + 1 -0.48336965 + 2 0.03575828 + 3 0.22380553 + 4 0.22380583 + +Nr. of electrons (up): 8.00000000 +Atom populations (up) + Atom Population + 1 5.48336965 + 2 0.96424172 + 3 0.77619447 + 4 0.77619417 + +l-shell populations (up) + Atom Sh. l Population + 1 1 0 1.65733979 + 1 2 1 3.82602986 + 2 1 0 0.96424172 + 2 2 1 0.00000000 + 3 1 0 0.77619447 + 3 2 1 0.00000000 + 4 1 0 0.77619417 + 4 2 1 0.00000000 + +Orbital populations (up) + Atom Sh. l m Population Label + 1 1 0 0 1.65733979 s + 1 2 1 -1 1.21678996 p_y + 1 2 1 0 1.46556336 p_z + 1 2 1 1 1.14367654 p_x + 2 1 0 0 0.96424172 s + 2 2 1 -1 0.00000000 p_y + 2 2 1 0 0.00000000 p_z + 2 2 1 1 0.00000000 p_x + 3 1 0 0 0.77619447 s + 3 2 1 -1 0.00000000 p_y + 3 2 1 0 0.00000000 p_z + 3 2 1 1 0.00000000 p_x + 4 1 0 0 0.77619417 s + 4 2 1 -1 0.00000000 p_y + 4 2 1 0 0.00000000 p_z + 4 2 1 1 0.00000000 p_x + +Fermi level: -0.2370222488 H -6.4497 eV +Band energy: -3.2187242078 H -87.5859 eV +TS: 0.0000000000 H 0.0000 eV +Band free energy (E-TS): -3.2187242078 H -87.5859 eV +Extrapolated E(0K): -3.2187242078 H -87.5859 eV +Input / Output electrons (q): 8.0000000000 8.0000000000 + +Energy H0: -3.3599884076 H -91.4299 eV +Energy SCC: 0.0056352830 H 0.1533 eV +Energy 3rd: -0.0011426808 H -0.0311 eV +Total Electronic energy: -3.3554958054 H -91.3077 eV +Repulsive energy: 0.0590974170 H 1.6081 eV +Total energy: -3.2963983884 H -89.6996 eV +Extrapolated to 0: -3.2963983884 H -89.6996 eV +Total Mermin free energy: -3.2963983884 H -89.6996 eV +Force related energy: -3.2963983884 H -89.6996 eV + +SCC converged + +Total Forces + 1 0.016567056203 0.002817951422 0.005634574270 + 2 -0.018803818530 -0.000002880649 -0.000006015442 + 3 0.001118562874 -0.005291070259 -0.000870711110 + 4 0.001118199454 0.002475999486 -0.004757847718 + +Maximal derivative component: 0.188038E-01 au + +Dipole moment: -0.06792979 -0.20495079 -0.40960550 au +Dipole moment: -0.17266032 -0.52093298 -1.04111341 Debye + + diff --git a/tests/dftbplus/dftb_pin.hsd b/tests/dftbplus/dftb_pin.hsd new file mode 100644 index 000000000..02e0ed228 --- /dev/null +++ b/tests/dftbplus/dftb_pin.hsd @@ -0,0 +1,93 @@ +Geometry = GenFormat { +4 C +N H +1 1 1.014150 0.112320 0.047370 +2 2 3.909390 0.037985 -0.101159 +3 2 0.702550 -0.851820 -0.060860 +4 2 0.702550 0.603740 -0.789160 +} +Driver = {} +Hamiltonian = DFTB { + SCC = Yes + MaxAngularMomentum = { + N = "p" + H = "p" + } + SlaterKosterFiles = { + N-N = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/N-N.skf" + H-H = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/H-H.skf" + H-N = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/H-N.skf" + N-H = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/N-H.skf" + } + ThirdOrderFull = Yes + HubbardDerivs = { + H = -0.1857 + N = -0.1535 + } + HCorrection = Damping { + Exponent = 4.0 + } + PolynomialRepulsive = {} + ShellResolvedSCC = No + OldSKInterpolation = No + RangeSeparated = None {} + ReadInitialCharges = No + InitialCharges = {} + SCCTolerance = 1.0000000000000001E-005 + ConvergentSCCOnly = Yes + SpinPolarisation = {} + ElectricField = {} + Solver = RelativelyRobust {} + Charge = 0.0000000000000000 + MaxSCCIterations = 100 + OnSiteCorrection = {} + Dispersion = {} + Solvation = {} + Electrostatics = GammaFunctional {} + ThirdOrder = No + Differentiation = FiniteDiff { + Delta = 1.2207031250000000E-004 + } + ForceEvaluation = "Traditional" + Mixer = Broyden { + MixingParameter = 0.20000000000000001 + InverseJacobiWeight = 1.0000000000000000E-002 + MinimalWeight = 1.0000000000000000 + MaximalWeight = 100000.00000000000 + WeightFactor = 1.0000000000000000E-002 + } + Filling = Fermi { + Temperature = 0.0000000000000000 + } +} +Options = { + WriteDetailedOut = Yes + WriteAutotestTag = No + WriteDetailedXML = No + WriteResultsTag = No + RestartFrequency = 20 + RandomSeed = 0 + WriteHS = No + WriteRealHS = No + MinimiseMemoryUsage = No + ShowFoldedCoords = No + TimingVerbosity = 1 + WriteChargesAsText = No +} +Analysis = { + CalculateForces = Yes + ProjectStates = {} + WriteEigenvectors = No + WriteBandOut = Yes + MullikenAnalysis = Yes + WriteNetCharges = No + AtomResolvedEnergies = No +} +ParserOptions = { + ParserVersion = 11 + WriteHSDInput = Yes + StopAfterParsing = No + IgnoreUnprocessedNodes = No +} +Reks = None {} +ExcitedState = {} diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py new file mode 100644 index 000000000..b3821cb34 --- /dev/null +++ b/tests/plugin/dpdata_plugin_test/__init__.py @@ -0,0 +1,16 @@ +import numpy as np + +from dpdata.data_type import Axis, DataType, register_data_type + +# test data type + +register_data_type( + DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True +) + +register_data_type( + DataType("bar", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, -1), required=False), + labeled=True, +) + +ep = None diff --git a/tests/plugin/pyproject.toml b/tests/plugin/pyproject.toml new file mode 100644 index 000000000..7ce1f854a --- /dev/null +++ b/tests/plugin/pyproject.toml @@ -0,0 +1,17 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "dpdata_plugin_test" +version = "0.0.0" +description = "A test for dpdata plugin" +dependencies = [ + 'numpy', + 'dpdata', +] +readme = "README.md" +requires-python = ">=3.7" + +[project.entry-points.'dpdata.plugins'] +random = "dpdata_plugin_test:ep" diff --git a/tests/poscars/POSCAR.P42nmc.replace b/tests/poscars/POSCAR.P42nmc.replace index e785d1286..5dd05d848 100644 --- a/tests/poscars/POSCAR.P42nmc.replace +++ b/tests/poscars/POSCAR.P42nmc.replace @@ -6,99 +6,99 @@ Hf24 O64 Zr8 Hf O Zr 24 64 8 Cartesian - 0.0000000000 0.0000000000 0.0000000000 - 7.5009600000 7.5009600000 5.0819200000 - 7.5009600000 7.5009600000 0.0000000000 - 7.5009600000 2.5003200000 10.1638400000 - 2.5003200000 7.5009600000 5.0819200000 - 2.5003200000 7.5009600000 10.1638400000 - 2.5003200000 2.5003200000 5.0819200000 - 2.5003200000 2.5003200000 10.1638400000 - 5.0006400000 2.5003200000 7.6228800000 - 10.0012800000 7.5009600000 2.5409600000 - 0.0000000000 2.5003200000 7.6228800000 - 0.0000000000 2.5003200000 2.5409600000 - 7.5009600000 5.0006400000 7.6228800000 - 7.5009600000 5.0006400000 2.5409600000 - 10.0012800000 7.5009600000 7.6228800000 - 7.5009600000 0.0000000000 2.5409600000 - 0.0000000000 0.0000000000 5.0819200000 - 0.0000000000 5.0006400000 5.0819200000 - 7.5009600000 0.0000000000 7.6228800000 - 5.0006400000 0.0000000000 5.0819200000 - 2.5003200000 0.0000000000 2.5409600000 - 5.0006400000 0.0000000000 10.1638400000 - 2.5003200000 0.0000000000 7.6228800000 - 2.5003200000 5.0006400000 7.6228800000 - 1.2501600000 8.7511200000 1.0558196992 - 1.2501600000 8.7511200000 3.5967796992 - 1.2501600000 3.7504800000 8.6786996992 - 1.2501600000 3.7504800000 3.5967796992 - 6.2508000000 8.7511200000 6.1377396992 - 6.2508000000 8.7511200000 1.0558196992 - 6.2508000000 3.7504800000 6.1377396992 - 8.7511200000 1.2501600000 8.6786996992 - 6.2508000000 3.7504800000 1.0558196992 - 1.2501600000 3.7504800000 1.0558196992 - 1.2501600000 8.7511200000 8.6786996992 - 8.7511200000 6.2508000000 3.5967796992 - 8.7511200000 6.2508000000 8.6786996992 - 1.2501600000 8.7511200000 6.1377396992 - 1.2501600000 3.7504800000 6.1377396992 - 6.2508000000 3.7504800000 3.5967796992 - 6.2508000000 6.2508000000 1.4851403008 - 6.2508000000 8.7511200000 3.5967796992 - 6.2508000000 1.2501600000 9.1080203008 - 6.2508000000 1.2501600000 4.0261003008 - 1.2501600000 6.2508000000 9.1080203008 - 1.2501600000 6.2508000000 4.0261003008 - 1.2501600000 1.2501600000 9.1080203008 - 1.2501600000 1.2501600000 4.0261003008 - 6.2508000000 6.2508000000 6.5670603008 - 8.7511200000 1.2501600000 3.5967796992 - 6.2508000000 1.2501600000 6.5670603008 - 6.2508000000 1.2501600000 1.4851403008 - 1.2501600000 6.2508000000 6.5670603008 - 1.2501600000 6.2508000000 1.4851403008 - 1.2501600000 1.2501600000 6.5670603008 - 1.2501600000 1.2501600000 1.4851403008 - 6.2508000000 8.7511200000 8.6786996992 - 6.2508000000 3.7504800000 8.6786996992 - 3.7504800000 6.2508000000 8.6786996992 - 8.7511200000 8.7511200000 9.1080203008 - 3.7504800000 1.2501600000 8.6786996992 - 3.7504800000 3.7504800000 1.4851403008 - 3.7504800000 3.7504800000 6.5670603008 - 3.7504800000 8.7511200000 1.4851403008 - 3.7504800000 8.7511200000 6.5670603008 - 8.7511200000 3.7504800000 1.4851403008 - 8.7511200000 3.7504800000 6.5670603008 - 8.7511200000 8.7511200000 1.4851403008 - 8.7511200000 8.7511200000 6.5670603008 - 3.7504800000 6.2508000000 3.5967796992 - 3.7504800000 3.7504800000 9.1080203008 - 3.7504800000 8.7511200000 4.0261003008 - 3.7504800000 8.7511200000 9.1080203008 - 3.7504800000 3.7504800000 4.0261003008 - 8.7511200000 3.7504800000 9.1080203008 - 3.7504800000 1.2501600000 3.5967796992 - 8.7511200000 3.7504800000 4.0261003008 - 8.7511200000 6.2508000000 1.0558196992 - 8.7511200000 1.2501600000 6.1377396992 - 8.7511200000 1.2501600000 1.0558196992 - 8.7511200000 6.2508000000 6.1377396992 - 3.7504800000 6.2508000000 1.0558196992 - 3.7504800000 1.2501600000 6.1377396992 - 3.7504800000 1.2501600000 1.0558196992 - 6.2508000000 6.2508000000 4.0261003008 - 8.7511200000 8.7511200000 4.0261003008 - 3.7504800000 6.2508000000 6.1377396992 - 6.2508000000 6.2508000000 9.1080203008 - 7.5009600000 2.5003200000 5.0819200000 - 5.0006400000 7.5009600000 7.6228800000 - 5.0006400000 7.5009600000 2.5409600000 - 5.0006400000 2.5003200000 2.5409600000 - 2.5003200000 5.0006400000 2.5409600000 - 5.0006400000 5.0006400000 5.0819200000 - 5.0006400000 5.0006400000 10.1638400000 - 0.0000000000 5.0006400000 10.1638400000 +0.000000 0.000000 0.000000 +0.000000 0.000000 5.081920 +0.000000 5.000640 5.081920 +5.000640 0.000000 10.163840 +5.000640 0.000000 5.081920 +2.500320 0.000000 2.540960 +2.500320 0.000000 7.622880 +2.500320 5.000640 7.622880 +7.500960 0.000000 2.540960 +7.500960 0.000000 7.622880 +7.500960 5.000640 2.540960 +7.500960 5.000640 7.622880 +0.000000 2.500320 2.540960 +0.000000 2.500320 7.622880 +10.001280 7.500960 2.540960 +10.001280 7.500960 7.622880 +5.000640 2.500320 7.622880 +2.500320 2.500320 10.163840 +2.500320 2.500320 5.081920 +2.500320 7.500960 10.163840 +2.500320 7.500960 5.081920 +7.500960 2.500320 10.163840 +7.500960 7.500960 0.000000 +7.500960 7.500960 5.081920 +3.750480 3.750480 1.485140 +3.750480 3.750480 6.567060 +3.750480 8.751120 1.485140 +3.750480 8.751120 6.567060 +8.751120 3.750480 1.485140 +8.751120 3.750480 6.567060 +8.751120 8.751120 1.485140 +8.751120 8.751120 6.567060 +3.750480 3.750480 4.026100 +3.750480 3.750480 9.108020 +3.750480 8.751120 4.026100 +3.750480 8.751120 9.108020 +8.751120 3.750480 4.026100 +8.751120 3.750480 9.108020 +8.751120 8.751120 4.026100 +8.751120 8.751120 9.108020 +3.750480 1.250160 1.055820 +3.750480 1.250160 6.137740 +3.750480 6.250800 1.055820 +3.750480 6.250800 6.137740 +8.751120 1.250160 1.055820 +8.751120 1.250160 6.137740 +8.751120 6.250800 1.055820 +8.751120 6.250800 6.137740 +3.750480 1.250160 3.596780 +3.750480 1.250160 8.678700 +3.750480 6.250800 3.596780 +3.750480 6.250800 8.678700 +8.751120 1.250160 3.596780 +8.751120 1.250160 8.678700 +8.751120 6.250800 3.596780 +8.751120 6.250800 8.678700 +1.250160 3.750480 1.055820 +1.250160 3.750480 6.137740 +1.250160 8.751120 1.055820 +1.250160 8.751120 6.137740 +6.250800 3.750480 1.055820 +6.250800 3.750480 6.137740 +6.250800 8.751120 1.055820 +6.250800 8.751120 6.137740 +1.250160 3.750480 3.596780 +1.250160 3.750480 8.678700 +1.250160 8.751120 3.596780 +1.250160 8.751120 8.678700 +6.250800 3.750480 3.596780 +6.250800 3.750480 8.678700 +6.250800 8.751120 3.596780 +6.250800 8.751120 8.678700 +1.250160 1.250160 1.485140 +1.250160 1.250160 6.567060 +1.250160 6.250800 1.485140 +1.250160 6.250800 6.567060 +6.250800 1.250160 1.485140 +6.250800 1.250160 6.567060 +6.250800 6.250800 1.485140 +6.250800 6.250800 6.567060 +1.250160 1.250160 4.026100 +1.250160 1.250160 9.108020 +1.250160 6.250800 4.026100 +1.250160 6.250800 9.108020 +6.250800 1.250160 4.026100 +6.250800 1.250160 9.108020 +6.250800 6.250800 4.026100 +6.250800 6.250800 9.108020 +0.000000 5.000640 10.163840 +5.000640 5.000640 10.163840 +5.000640 5.000640 5.081920 +2.500320 5.000640 2.540960 +5.000640 2.500320 2.540960 +5.000640 7.500960 2.540960 +5.000640 7.500960 7.622880 +7.500960 2.500320 5.081920 diff --git a/tests/qe.traj/traj6.cel b/tests/qe.traj/traj6.cel index bcba5c377..c2a735317 100644 --- a/tests/qe.traj/traj6.cel +++ b/tests/qe.traj/traj6.cel @@ -1,24 +1,24 @@ 195 -5.157620138324029213e+00 0 0 -8.882263018337732685e-02 5.782217839848955876e+00 0 -4.531494312205264219e-02 1.857154025688012577e-01 5.855553673164934025e+00 +5.157620138324029213e+00 8.882263018337732685e-02 4.531494312205264219e-02 +0 5.782217839848955876e+00 1.857154025688012577e-01 +0 0 5.855553673164934025e+00 200 -5.359985500701728967e+00 0 0 -3.585941820098031974e-01 5.317218997480877896e+00 0 -7.606780476053129902e-01 7.811107228901693622e-01 5.715864930517207121e+00 +5.359985500701728967e+00 3.585941820098031974e-01 7.606780476053129902e-01 +0 5.317218997480877896e+00 7.811107228901693622e-01 +0 0 5.715864930517207121e+00 201 -5.235484852416917079e+00 0 0 -5.612267823951659906e-01 5.640178212424978632e+00 0 -1.561235654197479228e-02 2.819721738692628765e-01 5.868536463888631260e+00 +5.235484852416917079e+00 5.612267823951659906e-01 1.561235654197479228e-02 +0 5.640178212424978632e+00 2.819721738692628765e-01 +0 0 5.868536463888631260e+00 202 -5.735200354152383717e+00 0 0 -8.818112752306928037e-02 5.003946763988647461e+00 0 -5.346029912365874992e-01 4.453576669882056693e-01 5.311832768820492490e+00 +5.735200354152383717e+00 8.818112752306928037e-02 5.346029912365874992e-01 +0 5.003946763988647461e+00 4.453576669882056693e-01 +0 0 5.311832768820492490e+00 203 -5.969007839117177916e+00 0 0 -7.974158979687617776e-02 5.086270090760550922e+00 0 -2.286579398249665163e-01 1.449088120223311904e-01 5.588188387987865546e+00 +5.969007839117177916e+00 7.974158979687617776e-02 2.286579398249665163e-01 +0 5.086270090760550922e+00 1.449088120223311904e-01 +0 0 5.588188387987865546e+00 204 -5.308510801020571712e+00 0 0 -3.076052782312116429e-01 5.279388982187173340e+00 0 -4.321921336152507731e-01 8.121110815096156399e-01 5.301664983741235737e+00 +5.308510801020571712e+00 3.076052782312116429e-01 4.321921336152507731e-01 +0 5.279388982187173340e+00 8.121110815096156399e-01 +0 0 5.301664983741235737e+00 diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py new file mode 100644 index 000000000..5a0e2bab9 --- /dev/null +++ b/tests/test_custom_data_type.py @@ -0,0 +1,84 @@ +import unittest + +import h5py +import numpy as np + +import dpdata + + +class TestDeepmdLoadDumpComp(unittest.TestCase): + def setUp(self): + self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.foo = np.ones((len(self.system), 2, 4)) + self.system.data["foo"] = self.foo + self.system.check_data() + + def test_to_deepmd_raw(self): + self.system.to_deepmd_raw("data_foo") + foo = np.loadtxt("data_foo/foo.raw") + np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo) + + def test_from_deepmd_raw(self): + self.system.to_deepmd_raw("data_foo") + x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw") + np.testing.assert_allclose(x.data["foo"], self.foo) + + def test_to_deepmd_npy(self): + self.system.to_deepmd_npy("data_foo") + foo = np.load("data_foo/set.000/foo.npy") + np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo) + + def test_from_deepmd_npy(self): + self.system.to_deepmd_npy("data_foo") + x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy") + np.testing.assert_allclose(x.data["foo"], self.foo) + + def test_to_deepmd_hdf5(self): + self.system.to_deepmd_hdf5("data_foo.h5") + with h5py.File("data_foo.h5") as f: + foo = f["set.000/foo.npy"][:] + np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo) + + def test_from_deepmd_hdf5(self): + self.system.to_deepmd_hdf5("data_foo.h5") + x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5") + np.testing.assert_allclose(x.data["foo"], self.foo) + + +class TestDeepmdLoadDumpCompAny(unittest.TestCase): + def setUp(self): + self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.bar = np.ones((len(self.system), self.system.get_natoms(), 2)) + self.system.data["bar"] = self.bar + self.system.check_data() + + def test_to_deepmd_raw(self): + self.system.to_deepmd_raw("data_bar") + bar = np.loadtxt("data_bar/bar.raw") + np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar) + + def test_from_deepmd_raw(self): + self.system.to_deepmd_raw("data_bar") + x = dpdata.LabeledSystem("data_bar", fmt="deepmd/raw") + np.testing.assert_allclose(x.data["bar"], self.bar) + + def test_to_deepmd_npy(self): + self.system.to_deepmd_npy("data_bar") + bar = np.load("data_bar/set.000/bar.npy") + np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar) + + def test_from_deepmd_npy(self): + self.system.to_deepmd_npy("data_bar") + x = dpdata.LabeledSystem("data_bar", fmt="deepmd/npy") + np.testing.assert_allclose(x.data["bar"], self.bar) + + def test_to_deepmd_hdf5(self): + self.system.to_deepmd_hdf5("data_bar.h5") + with h5py.File("data_bar.h5") as f: + bar = f["set.000/bar.npy"][:] + np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar) + + def test_from_deepmd_hdf5(self): + self.system.to_deepmd_hdf5("data_bar.h5") + x = dpdata.LabeledSystem("data_bar.h5", fmt="deepmd/hdf5") + np.testing.assert_allclose(x.data["bar"], self.bar) diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py index 6311410ef..7e522e065 100644 --- a/tests/test_deepmd_mixed.py +++ b/tests/test_deepmd_mixed.py @@ -109,6 +109,107 @@ def test_str(self): ) +class TestMixedMultiSystemsDumpLoadSetSize( + unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC +): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + + # C1H3 + system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1B2 + system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 2, 1] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3]) + # C1H1A2B1 + system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "D"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1C2 + system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data) + + self.ms = dpdata.MultiSystems( + system_1, + system_2, + system_1_modified_type_1, + system_1_modified_type_2, + system_1_modified_type_3, + ) + self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed", set_size=1) + self.place_holder_ms = dpdata.MultiSystems() + self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") + self.systems = dpdata.MultiSystems() + self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") + self.system_1 = self.ms["C1H4A0B0D0"] + self.system_2 = self.systems["C1H4A0B0D0"] + mixed_sets = glob("tmp.deepmd.mixed/*/set.*") + self.assertEqual(len(mixed_sets), 5) + for i in mixed_sets: + self.assertEqual( + os.path.exists(os.path.join(i, "real_atom_types.npy")), True + ) + + self.system_names = [ + "C1H4A0B0D0", + "C1H3A0B0D0", + "C1H1A1B2D0", + "C1H1A2B1D0", + "C1H1A1B0D2", + ] + self.system_sizes = { + "C1H4A0B0D0": 1, + "C1H3A0B0D0": 1, + "C1H1A1B2D0": 1, + "C1H1A2B1D0": 1, + "C1H1A1B0D2": 1, + } + self.atom_names = ["C", "H", "A", "B", "D"] + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed"): + shutil.rmtree("tmp.deepmd.mixed") + + def test_len(self): + self.assertEqual(len(self.ms), 5) + self.assertEqual(len(self.place_holder_ms), 2) + self.assertEqual(len(self.systems), 5) + + def test_get_nframes(self): + self.assertEqual(self.ms.get_nframes(), 5) + self.assertEqual(self.place_holder_ms.get_nframes(), 5) + self.assertEqual(self.systems.get_nframes(), 5) + + def test_str(self): + self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)") + self.assertEqual( + str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)" + ) + self.assertEqual( + str(self.systems), "MultiSystems (5 systems containing 5 frames)" + ) + + class TestMixedMultiSystemsTypeChange( unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC ): diff --git a/tests/test_dftbplus.py b/tests/test_dftbplus.py new file mode 100644 index 000000000..2a2913a52 --- /dev/null +++ b/tests/test_dftbplus.py @@ -0,0 +1,58 @@ +import unittest + +import numpy as np +from comp_sys import CompLabeledSys, IsNoPBC +from context import dpdata + + +class TestDeepmdLoadAmmonia(unittest.TestCase, CompLabeledSys, IsNoPBC): + def setUp(self): + energy_convert = dpdata.unit.EnergyConversion("hartree", "eV").value() + force_convert = dpdata.unit.ForceConversion( + "hartree/bohr", "eV/angstrom" + ).value() + + self.system_1 = dpdata.LabeledSystem( + ("dftbplus/dftb_pin.hsd", "dftbplus/detailed.out"), fmt="dftbplus" + ) + + self.system_2 = dpdata.LabeledSystem( + data={ + "atom_types": np.array([0, 1, 1, 1]), + "atom_names": ["N", "H"], + "atom_numbs": [1, 3], + "coords": np.array( + [ + [ + [1.014150, 0.112320, 0.047370], + [3.909390, 0.037985, -0.101159], + [0.702550, -0.851820, -0.060860], + [0.702550, 0.603740, -0.789160], + ] + ] + ), + "energies": np.array([-3.2963983884]) * energy_convert, + "forces": np.array( + [ + [ + [0.016567056203, 0.002817951422, 0.005634574270], + [-0.018803818530, -0.000002880649, -0.000006015442], + [0.001118562874, -0.005291070259, -0.000870711110], + [0.001118199454, 0.002475999486, -0.004757847718], + ] + ] + ) + * force_convert, + "cells": np.zeros((1, 3, 3)), + "orig": np.zeros(3), + "nopbc": True, + } + ) + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + +if __name__ == "__main__": + unittest.main()