diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9e2cbf607..765db654c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Install rdkit
       run: python -m pip install rdkit openbabel-wheel
     - name: Install dependencies
-      run: python -m pip install .[amber,ase,pymatgen] coverage
+      run: python -m pip install .[amber,ase,pymatgen] coverage ./tests/plugin
     - name: Test
       run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
     - name: Run codecov
diff --git a/.gitignore b/.gitignore
index 41ee080cf..cd48a3e25 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,4 +24,7 @@ _version.py
 __pycache__
 docs/_build
 docs/formats.csv
+docs/drivers.csv
+docs/minimizers.csv
 docs/api/
+docs/formats/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 14b08cef1..abaa911e6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,12 +18,12 @@ repos:
     -   id: check-toml
 # Python
 -   repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
     -   id: black-jupyter
--   repo: https://github.com/charliermarsh/ruff-pre-commit
+-   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.263
+    rev: v0.0.287
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -35,7 +35,7 @@ repos:
       args: ["--write"]
 # Python inside docs
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.13.0
+    rev: 1.16.0
     hooks:
     -   id: blacken-docs
 ci:
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 000000000..87f8c7330
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,12 @@
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+sphinx:
+   configuration: docs/conf.py
+formats: all
+python:
+   install:
+   - requirements: docs/requirements.txt
diff --git a/README.md b/README.md
index cfa611d9c..1e86a7196 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,9 @@
+# dpdata
+
+[![conda-forge](https://img.shields.io/conda/dn/conda-forge/dpdata?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/dpdata)
+[![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata)
+[![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/)
+
 **dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian.
 dpdata only works with python 3.7 or above.
 
@@ -82,11 +88,12 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
 | Amber   | multi       | True         | True    | LabeledSystem | 'amber/md'           |
 | Amber/sqm | sqm.out   | False        | False   | System        | 'sqm/out'            |
 | Gromacs | gro         | True         | False   | System        | 'gromacs/gro'        |
-| ABACUS  | STRU        | False        | False   | System        | 'abacus/stru'         |
+| ABACUS  | STRU        | False        | False   | System        | 'abacus/stru'        |
 | ABACUS  | STRU        | False        | True    | LabeledSystem | 'abacus/scf'         |
 | ABACUS  | cif         | True         | True    | LabeledSystem | 'abacus/md'          |
 | ABACUS  | STRU        | True         | True    | LabeledSystem | 'abacus/relax'       |
 | ase     | structure   | True         | True    | MultiSystems  | 'ase/structure'      |
+| DFTB+   | dftbplus    | False        | True    | LabeledSystem | 'dftbplus'           |
 
 
 The Class `dpdata.MultiSystems`  can read data  from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
diff --git a/docs/formats.rst b/docs/formats.rst
index c0ff5b8f9..41ca90916 100644
--- a/docs/formats.rst
+++ b/docs/formats.rst
@@ -6,3 +6,10 @@ dpdata supports the following formats:
 .. csv-table:: Supported Formats
    :file: formats.csv
    :header-rows: 1
+
+
+.. toctree::
+   :maxdepth: 1
+   :glob:
+
+   formats/*
diff --git a/docs/make_format.py b/docs/make_format.py
index a208b21ed..62dd08793 100644
--- a/docs/make_format.py
+++ b/docs/make_format.py
@@ -1,9 +1,18 @@
 import csv
+import os
 from collections import defaultdict
+from inspect import Parameter, Signature, cleandoc, signature
+from typing import Literal
+
+from numpydoc.docscrape import Parameter as numpydoc_Parameter
+from numpydoc.docscrape_sphinx import SphinxDocString
+
+from dpdata.bond_order_system import BondOrderSystem
 
 # ensure all plugins are loaded!
 from dpdata.driver import Driver, Minimizer
 from dpdata.format import Format
+from dpdata.system import LabeledSystem, MultiSystems, System
 
 
 def get_formats() -> dict:
@@ -64,7 +73,7 @@ def get_cls_link(cls: object) -> str:
 
 
 def check_supported(fmt: Format):
-    methods = set()
+    methods = {}
     for mtd in [
         "from_system",
         "to_system",
@@ -76,33 +85,255 @@ def check_supported(fmt: Format):
         "to_multi_systems",
     ]:
         if detect_overridden(fmt, mtd):
-            methods.add(mtd)
+            methods[mtd] = None
             if mtd == "to_system":
-                methods.add("to_labeled_system")
+                methods["to_labeled_system"] = None
     if fmt.MultiMode != fmt.MultiModes.NotImplemented:
-        methods.add("from_multi_systems")
-        methods.add("to_multi_systems")
-    return methods
+        methods["from_multi_systems"] = None
+        methods["to_multi_systems"] = None
+    return list(methods.keys())
 
 
 method_links = {
-    "from_system": ":func:`System() <dpdata.system.System>`",
-    "to_system": ":func:`System.to() <dpdata.system.System.to>`",
-    "from_labeled_system": ":func:`LabeledSystem() <dpdata.system.LabeledSystem>`",
-    "to_labeled_system": ":func:`LabeledSystem.to() <dpdata.system.System.to>`",
-    "from_bond_order_system": ":func:`BondOrderSystem() <dpdata.bond_order_system.BondOrderSystem>`",
-    "to_bond_order_system": ":func:`BondOrderSystem.to() <dpdata.system.System.to>`",
-    "from_multi_systems": ":func:`MultiSystems.load_systems_from_file() <dpdata.system.MultiSystems.load_systems_from_file>`",
-    "to_multi_systems": ":func:`MultiSystems.to() <dpdata.system.MultiSystems.to>`",
+    "from_system": ":ref:`System() <{}_{}>`",
+    "to_system": ":ref:`System.to() <{}_{}>`",
+    "from_labeled_system": ":ref:`LabeledSystem() <{}_{}>`",
+    "to_labeled_system": ":ref:`LabeledSystem.to() <{}_{}>`",
+    "from_bond_order_system": ":ref:`BondOrderSystem() <{}_{}>`",
+    "to_bond_order_system": ":ref:`BondOrderSystem.to() <{}_{}>`",
+    "from_multi_systems": ":ref:`MultiSystems.load_systems_from_file() <{}_{}>`",
+    "to_multi_systems": ":ref:`MultiSystems.to() <{}_{}>`",
+}
+
+method_classes = {
+    "from_system": "System",
+    "to_system": "System",
+    "from_labeled_system": "LabeledSystem",
+    "to_labeled_system": "LabeledSystem",
+    "from_bond_order_system": "BondOrderSystem",
+    "to_bond_order_system": "BondOrderSystem",
+    "from_multi_systems": "MultiSystems",
+    "to_multi_systems": "MultiSystems",
 }
 
+method_cls_obj = {
+    "from_system": System,
+    "to_system": System,
+    "from_labeled_system": LabeledSystem,
+    "to_labeled_system": LabeledSystem,
+    "from_bond_order_system": BondOrderSystem,
+    "to_bond_order_system": BondOrderSystem,
+    "from_multi_systems": MultiSystems,
+    "to_multi_systems": MultiSystems,
+}
+
+
+def generate_sub_format_pages(formats: dict):
+    """Generate sub format pages."""
+    os.makedirs("formats", exist_ok=True)
+    for format, alias in formats.items():
+        # format: Format, alias: list[str]
+        buff = []
+        buff.append(".. _%s:" % format.__name__)
+        buff.append("")
+        for aa in alias:
+            buff.append("%s format" % aa)
+            buff.append("=" * len(buff[-1]))
+            buff.append("")
+        buff.append("Class: %s" % get_cls_link(format))
+        buff.append("")
+
+        docstring = format.__doc__
+        if docstring is not None:
+            docstring = cleandoc(docstring)
+            rst = str(SphinxDocString(docstring))
+            buff.append(rst)
+            buff.append("")
+
+        buff.append("Conversions")
+        buff.append("-----------")
+        methods = check_supported(format)
+        for method in methods:
+            buff.append("")
+            buff.append(f".. _{format.__name__}_{method}:")
+            buff.append("")
+            if method.startswith("from_"):
+                buff.append("Convert from this format to %s" % method_classes[method])
+                buff.append("`" * len(buff[-1]))
+            elif method.startswith("to_"):
+                buff.append("Convert from %s to this format" % method_classes[method])
+                buff.append("`" * len(buff[-1]))
+            buff.append("")
+            method_obj = getattr(format, method)
+            if (
+                method == "to_labeled_system"
+                and method not in format.__dict__
+                and "to_system" in format.__dict__
+            ):
+                method_obj = getattr(format, "to_system")
+            docstring = method_obj.__doc__
+            if docstring is not None:
+                docstring = cleandoc(docstring)
+            sig = signature(method_obj)
+            parameters = dict(sig.parameters)
+            return_annotation = sig.return_annotation
+            # del self
+            del parameters[list(parameters)[0]]
+            # del data
+            if method.startswith("to_"):
+                del parameters[list(parameters)[0]]
+            if "args" in parameters:
+                del parameters["args"]
+            if "kwargs" in parameters:
+                del parameters["kwargs"]
+            if method == "to_multi_systems" or method.startswith("from_"):
+                sig = Signature(
+                    list(parameters.values()), return_annotation=method_cls_obj[method]
+                )
+            else:
+                sig = Signature(
+                    list(parameters.values()), return_annotation=return_annotation
+                )
+            sig = str(sig)
+            if method.startswith("from_"):
+                if method != "from_multi_systems":
+                    for aa in alias:
+                        parameters["fmt"] = Parameter(
+                            "fmt",
+                            Parameter.POSITIONAL_OR_KEYWORD,
+                            default=None,
+                            annotation=Literal[aa],
+                        )
+                        sig_fmt = Signature(
+                            list(parameters.values()),
+                            return_annotation=method_cls_obj[method],
+                        )
+                        sig_fmt = str(sig_fmt)
+                        buff.append(
+                            f""".. py:function:: dpdata.{method_classes[method]}{sig_fmt}"""
+                        )
+                        buff.append("""   :noindex:""")
+                for aa in alias:
+                    buff.append(
+                        """.. py:function:: dpdata.{}.from_{}{}""".format(
+                            method_classes[method],
+                            aa.replace("/", "_").replace(".", ""),
+                            sig,
+                        )
+                    )
+                    buff.append("""   :noindex:""")
+                buff.append("")
+                if docstring is None or method not in format.__dict__:
+                    docstring = """   Convert this format to :class:`%s`.""" % (
+                        method_classes[method]
+                    )
+                doc_obj = SphinxDocString(docstring)
+                if len(doc_obj["Parameters"]) > 0:
+                    doc_obj["Parameters"] = [
+                        xx
+                        for xx in doc_obj["Parameters"]
+                        if xx.name not in ("*args", "**kwargs")
+                    ]
+                else:
+                    if method == "from_multi_systems":
+                        doc_obj["Parameters"] = [
+                            numpydoc_Parameter(
+                                "directory",
+                                "str",
+                                ["directory of systems"],
+                            )
+                        ]
+                doc_obj["Yields"] = []
+                doc_obj["Returns"] = [
+                    numpydoc_Parameter("", method_classes[method], ["converted system"])
+                ]
+                rst = "   " + str(doc_obj)
+                buff.append(rst)
+                buff.append("")
+            elif method.startswith("to_"):
+                for aa in alias:
+                    parameters = {
+                        "fmt": Parameter(
+                            "fmt",
+                            Parameter.POSITIONAL_OR_KEYWORD,
+                            annotation=Literal[aa],
+                        ),
+                        **parameters,
+                    }
+                    if method == "to_multi_systems":
+                        sig_fmt = Signature(
+                            list(parameters.values()),
+                            return_annotation=method_cls_obj[method],
+                        )
+                    else:
+                        sig_fmt = Signature(
+                            list(parameters.values()),
+                            return_annotation=return_annotation,
+                        )
+                    sig_fmt = str(sig_fmt)
+                    buff.append(
+                        f""".. py:function:: dpdata.{method_classes[method]}.to{sig_fmt}"""
+                    )
+                    buff.append("""   :noindex:""")
+                for aa in alias:
+                    buff.append(
+                        """.. py:function:: dpdata.{}.to_{}{}""".format(
+                            method_classes[method],
+                            aa.replace("/", "_").replace(".", ""),
+                            sig,
+                        )
+                    )
+                    buff.append("""   :noindex:""")
+                buff.append("")
+                if docstring is None or (
+                    method not in format.__dict__
+                    and not (
+                        method == "to_labeled_system"
+                        and method not in format.__dict__
+                        and "to_system" in format.__dict__
+                    )
+                ):
+                    docstring = "Convert :class:`%s` to this format." % (
+                        method_classes[method]
+                    )
+                doc_obj = SphinxDocString(docstring)
+                if len(doc_obj["Parameters"]) > 0:
+                    doc_obj["Parameters"] = [
+                        xx
+                        for xx in doc_obj["Parameters"][1:]
+                        if xx.name not in ("*args", "**kwargs")
+                    ]
+                else:
+                    if method == "to_multi_systems":
+                        doc_obj["Parameters"] = [
+                            numpydoc_Parameter(
+                                "directory",
+                                "str",
+                                ["directory to save systems"],
+                            )
+                        ]
+                if method == "to_multi_systems":
+                    doc_obj["Yields"] = []
+                    doc_obj["Returns"] = [
+                        numpydoc_Parameter("", method_classes[method], ["this system"])
+                    ]
+                rst = "   " + str(doc_obj)
+                buff.append(rst)
+                buff.append("")
+            buff.append("")
+            buff.append("")
+
+        with open("formats/%s.rst" % format.__name__, "w") as rstfile:
+            rstfile.write("\n".join(buff))
+
+
 if __name__ == "__main__":
     formats = get_formats()
     with open("formats.csv", "w", newline="") as csvfile:
         fieldnames = [
-            "Class",
+            "Format",
             "Alias",
-            "Supported Functions",
+            "Supported Conversions",
         ]
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 
@@ -110,10 +341,11 @@ def check_supported(fmt: Format):
         for kk, vv in formats.items():
             writer.writerow(
                 {
-                    "Class": get_cls_link(kk),
+                    "Format": ":ref:`%s`" % kk.__name__,
                     "Alias": "\n".join("``%s``" % vvv for vvv in vv),
-                    "Supported Functions": "\n".join(
-                        method_links[mtd] for mtd in check_supported(kk)
+                    "Supported Conversions": "\n".join(
+                        method_links[mtd].format(kk.__name__, mtd)
+                        for mtd in check_supported(kk)
                     ),
                 }
             )
@@ -151,3 +383,4 @@ def check_supported(fmt: Format):
                     "Alias": "\n".join("``%s``" % vvv for vvv in vv),
                 }
             )
+    generate_sub_format_pages(formats)
diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
index 7726cafeb..4b8ec0387 100644
--- a/dpdata/abacus/md.py
+++ b/dpdata/abacus/md.py
@@ -43,13 +43,20 @@ def get_coord_dump_freq(inlines):
 def get_coords_from_dump(dumplines, natoms):
     nlines = len(dumplines)
     total_natoms = sum(natoms)
+    # The output of VIRIAL, FORCE, and VELOCITY are controlled by INPUT parameters dump_virial, dump_force, and dump_vel, respectively.
+    # So the search of keywords can determine whether these datas are printed into MD_dump.
     calc_stress = False
+    calc_force = False
+    check_line = 6
     if "VIRIAL" in dumplines[6]:
         calc_stress = True
-    else:
-        assert (
-            "POSITIONS" in dumplines[6] and "FORCE" in dumplines[6]
-        ), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check."
+        check_line = 10
+    assert (
+        "POSITION" in dumplines[check_line]
+    ), "keywords 'POSITION' cannot be found in the 6th line. Please check."
+    if "FORCE" in dumplines[check_line]:
+        calc_force = True
+
     nframes_dump = -1
     if calc_stress:
         nframes_dump = int(nlines / (total_natoms + 13))
@@ -104,9 +111,13 @@ def get_coords_from_dump(dumplines, natoms):
                 if not newversion:
                     coords[iframe, iat] *= celldm
 
-                forces[iframe, iat] = np.array(
-                    [float(i) for i in dumplines[iline + skipline + iat].split()[5:8]]
-                )
+                if calc_force:
+                    forces[iframe, iat] = np.array(
+                        [
+                            float(i)
+                            for i in dumplines[iline + skipline + iat].split()[5:8]
+                        ]
+                    )
             iframe += 1
     assert iframe == nframes_dump, (
         "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."
@@ -137,7 +148,7 @@ def get_energy(outlines, ndump, dump_freq):
 
 
 def get_frame(fname):
-    if type(fname) == str:
+    if isinstance(fname, str):
         # if the input parameter is only one string, it is assumed that it is the
         # base directory containing INPUT file;
         path_in = os.path.join(fname, "INPUT")
@@ -195,7 +206,7 @@ def get_frame(fname):
     data["energies"] = energy
     data["forces"] = force
     data["virials"] = stress
-    if type(data["virials"]) != np.ndarray:
+    if not isinstance(data["virials"], np.ndarray):
         del data["virials"]
     data["orig"] = np.zeros(3)
 
diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py
index 5dd77030b..db910fc7e 100644
--- a/dpdata/abacus/relax.py
+++ b/dpdata/abacus/relax.py
@@ -167,7 +167,7 @@ def get_coords_from_log(loglines, natoms):
 
 
 def get_frame(fname):
-    if type(fname) == str:
+    if isinstance(fname, str):
         # if the input parameter is only one string, it is assumed that it is the
         # base directory containing INPUT file;
         path_in = os.path.join(fname, "INPUT")
diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
index 1816913e5..fbd658b45 100644
--- a/dpdata/abacus/scf.py
+++ b/dpdata/abacus/scf.py
@@ -161,7 +161,7 @@ def get_frame(fname):
         "forces": [],
     }
 
-    if type(fname) == str:
+    if isinstance(fname, str):
         # if the input parameter is only one string, it is assumed that it is the
         # base directory containing INPUT file;
         path_in = os.path.join(fname, "INPUT")
@@ -255,7 +255,7 @@ def get_nele_from_stru(geometry_inlines):
 
 
 def get_frame_from_stru(fname):
-    assert type(fname) == str
+    assert isinstance(fname, str)
     with open(fname) as fp:
         geometry_inlines = fp.read().split("\n")
     nele = get_nele_from_stru(geometry_inlines)
@@ -304,7 +304,7 @@ def make_unlabeled_stru(
         out += "\n"
 
     if numerical_descriptor is not None:
-        assert type(numerical_descriptor) == str
+        assert isinstance(numerical_descriptor, str)
         out += "NUMERICAL_DESCRIPTOR\n%s\n" % numerical_descriptor
         out += "\n"
 
@@ -327,10 +327,11 @@ def make_unlabeled_stru(
         out += "0.0\n"
         out += str(data["atom_numbs"][iele]) + "\n"
         for iatom in range(data["atom_numbs"][iele]):
+            iatomtype = np.nonzero(data["atom_types"] == iele)[0][iatom]
             out += "%.12f %.12f %.12f %d %d %d\n" % (
-                data["coords"][frame_idx][natom_tot, 0],
-                data["coords"][frame_idx][natom_tot, 1],
-                data["coords"][frame_idx][natom_tot, 2],
+                data["coords"][frame_idx][iatomtype, 0],
+                data["coords"][frame_idx][iatomtype, 1],
+                data["coords"][frame_idx][iatomtype, 2],
                 1,
                 1,
                 1,
diff --git a/dpdata/cp2k/cell.py b/dpdata/cp2k/cell.py
index 7f5f3ef0e..7af73353e 100644
--- a/dpdata/cp2k/cell.py
+++ b/dpdata/cp2k/cell.py
@@ -28,24 +28,18 @@ def cell_to_low_triangle(A, B, C, alpha, beta, gamma):
     """
     if not np.pi * 5 / 180 < alpha < np.pi * 175 / 180:
         raise RuntimeError(
-            "alpha=={}: must be a radian, and \
-            must be in np.pi*5/180 < alpha < np.pi*175/180".format(
-                alpha
-            )
+            f"alpha=={alpha}: must be a radian, and \
+            must be in np.pi*5/180 < alpha < np.pi*175/180"
         )
     if not np.pi * 5 / 180 < beta < np.pi * 175 / 180:
         raise RuntimeError(
-            "beta=={}: must be a radian, and \
-            must be in np.pi*5/180 < beta < np.pi*175/180".format(
-                beta
-            )
+            f"beta=={beta}: must be a radian, and \
+            must be in np.pi*5/180 < beta < np.pi*175/180"
         )
     if not np.pi * 5 / 180 < gamma < np.pi * 175 / 180:
         raise RuntimeError(
-            "gamma=={}: must be a radian, and \
-                must be in np.pi*5/180 < gamma < np.pi*175/180".format(
-                gamma
-            )
+            f"gamma=={gamma}: must be a radian, and \
+                must be in np.pi*5/180 < gamma < np.pi*175/180"
         )
     if not A > 0.2:
         raise RuntimeError(f"A=={A}, must be greater than 0.2")
diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py
index 13d5be7c7..0b08c51ef 100644
--- a/dpdata/cp2k/output.py
+++ b/dpdata/cp2k/output.py
@@ -327,9 +327,7 @@ def handle_single_xyz_frame(self, lines):
         atom_num = int(lines[0].strip("\n").strip())
         if len(lines) != atom_num + 2:
             raise RuntimeError(
-                "format error, atom_num=={}, {}!=atom_num+2".format(
-                    atom_num, len(lines)
-                )
+                f"format error, atom_num=={atom_num}, {len(lines)}!=atom_num+2"
             )
         data_format_line = lines[1].strip("\n").strip() + " "
         prop_pattern = re.compile(r"(?P<prop>\w+)\s*=\s*(?P<number>.*?)[, ]")
@@ -411,18 +409,18 @@ def get_frames(fname):
                 cell.append(ii.split()[4:7])
             if "Atomic kind:" in ii:
                 atom_symbol_list.append(ii.split()[3])
-            if "Atom  Kind  Element" in ii:
+
+            # beginning of coords block
+            if "Atom  Kind  Element" in ii or "Atom Kind Element" in ii:
                 coord_flag = True
-                coord_idx = idx
+            # parse coords lines
+            elif coord_flag:
+                if ii == "\n":
+                    coord_flag = len(coord) == 0  # skip empty line at the beginning
+                else:
+                    coord.append(ii.split()[4:7])
+                    atom_symbol_idx_list.append(ii.split()[1])
 
-            # get the coord block info
-            if coord_flag:
-                if idx > coord_idx + 1:
-                    if ii == "\n":
-                        coord_flag = False
-                    else:
-                        coord.append(ii.split()[4:7])
-                        atom_symbol_idx_list.append(ii.split()[1])
             if "ENERGY|" in ii:
                 energy = ii.split()[8]
             if " Atom   Kind " in ii:
@@ -488,7 +486,7 @@ def get_frames(fname):
     atom_types = []
     atom_numbs = []
     # preserve the atom_name order
-    atom_names = atom_symbol_list[np.sort(symbol_idx)]
+    atom_names = atom_symbol_list[np.sort(symbol_idx, kind="stable")]
     for jj in atom_symbol_list:
         for idx, ii in enumerate(atom_names):
             if jj == ii:
diff --git a/dpdata/data_type.py b/dpdata/data_type.py
new file mode 100644
index 000000000..b5141a4e0
--- /dev/null
+++ b/dpdata/data_type.py
@@ -0,0 +1,154 @@
+from enum import Enum, unique
+from typing import TYPE_CHECKING, Tuple
+
+import numpy as np
+
+from dpdata.plugin import Plugin
+
+if TYPE_CHECKING:
+    from dpdata.system import System
+
+
+@unique
+class Axis(Enum):
+    """Data axis."""
+
+    NFRAMES = "nframes"
+    NATOMS = "natoms"
+    NTYPES = "ntypes"
+    NBONDS = "nbonds"
+
+
+class AnyInt(int):
+    """AnyInt equals to any other integer."""
+
+    def __eq__(self, other):
+        return True
+
+
+class DataError(Exception):
+    """Data is not correct."""
+
+
+class DataType:
+    """DataType represents a type of data, like coordinates, energies, etc.
+
+    Parameters
+    ----------
+    name : str
+        name of data
+    dtype : type or tuple[type]
+        data type, e.g. np.ndarray
+    shape : tuple[int], optional
+        shape of data. Used when data is list or np.ndarray. Use Axis to
+        represents numbers
+    required : bool, default=True
+        whether this data is required
+    """
+
+    def __init__(
+        self,
+        name: str,
+        dtype: type,
+        shape: Tuple[int, Axis] = None,
+        required: bool = True,
+    ) -> None:
+        self.name = name
+        self.dtype = dtype
+        self.shape = shape
+        self.required = required
+
+    def real_shape(self, system: "System") -> Tuple[int]:
+        """Returns expected real shape of a system."""
+        shape = []
+        for ii in self.shape:
+            if ii is Axis.NFRAMES:
+                shape.append(system.get_nframes())
+            elif ii is Axis.NTYPES:
+                shape.append(system.get_ntypes())
+            elif ii is Axis.NATOMS:
+                shape.append(system.get_natoms())
+            elif ii is Axis.NBONDS:
+                # BondOrderSystem
+                shape.append(system.get_nbonds())
+            elif ii == -1:
+                shape.append(AnyInt(-1))
+            elif isinstance(ii, int):
+                shape.append(ii)
+            else:
+                raise RuntimeError("Shape is not an int!")
+        return tuple(shape)
+
+    def check(self, system: "System"):
+        """Check if a system has correct data of this type.
+
+        Parameters
+        ----------
+        system : System
+            checked system
+
+        Raises
+        ------
+        DataError
+            type or shape of data is not correct
+        """
+        # check if exists
+        if self.name in system.data:
+            data = system.data[self.name]
+            # check dtype
+            # allow list for empty np.ndarray
+            if isinstance(data, list) and not len(data):
+                pass
+            elif not isinstance(data, self.dtype):
+                raise DataError(
+                    f"Type of {self.name} is {type(data).__name__}, but expected {self.dtype.__name__}"
+                )
+            # check shape
+            if self.shape is not None:
+                shape = self.real_shape(system)
+                # skip checking empty list of np.ndarray
+                if isinstance(data, np.ndarray):
+                    if data.size and shape != data.shape:
+                        raise DataError(
+                            f"Shape of {self.name} is {data.shape}, but expected {shape}"
+                        )
+                elif isinstance(data, list):
+                    if len(shape) and shape[0] != len(data):
+                        raise DataError(
+                            "Length of %s is %d, but expected %d"
+                            % (self.name, len(data), shape[0])
+                        )
+                else:
+                    raise RuntimeError("Unsupported type to check shape")
+        elif self.required:
+            raise DataError("%s not found in data" % self.name)
+
+
+__system_data_type_plugin = Plugin()
+__labeled_system_data_type_plugin = Plugin()
+
+
+def register_data_type(data_type: DataType, labeled: bool):
+    """Register a data type.
+
+    Parameters
+    ----------
+    data_type : DataType
+        data type to be registered
+    labeled : bool
+        whether this data type is for LabeledSystem
+    """
+    plugin = __labeled_system_data_type_plugin if labeled else __system_data_type_plugin
+    plugin.register(data_type.name)(data_type)
+
+
+def get_data_types(labeled: bool):
+    """Get all registered data types.
+
+    Parameters
+    ----------
+    labeled : bool
+        whether this data type is for LabeledSystem
+    """
+    plugin = __labeled_system_data_type_plugin if labeled else __system_data_type_plugin
+    return tuple(plugin.plugins.values())
diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index 66bf4ee66..7b909b162 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -1,9 +1,12 @@
 import glob
 import os
 import shutil
+import warnings
 
 import numpy as np
 
+import dpdata
+
 from .raw import load_type
 
 
@@ -60,6 +63,42 @@ def to_system_data(folder, type_map=None, labels=True):
         data["forces"] = np.concatenate(all_forces, axis=0)
     if len(all_virs) > 0:
         data["virials"] = np.concatenate(all_virs, axis=0)
+    # allow custom dtypes
+    if labels:
+        for dtype in dpdata.system.LabeledSystem.DTYPES:
+            if dtype.name in (
+                "atom_numbs",
+                "atom_names",
+                "atom_types",
+                "orig",
+                "cells",
+                "coords",
+                "real_atom_types",
+                "real_atom_names",
+                "nopbc",
+                "energies",
+                "forces",
+                "virials",
+            ):
+                # skip as these data contains specific rules
+                continue
+            if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+                warnings.warn(
+                    f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format."
+                )
+                continue
+            natoms = data["coords"].shape[1]
+            shape = [
+                natoms if xx == dpdata.system.Axis.NATOMS else xx
+                for xx in dtype.shape[1:]
+            ]
+            all_data = []
+            for ii in sets:
+                tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy"))
+                if tmp is not None:
+                    all_data.append(np.reshape(tmp, [tmp.shape[0], *shape]))
+            if len(all_data) > 0:
+                data[dtype.name] = np.concatenate(all_data, axis=0)
     return data
 
 
@@ -131,3 +170,34 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
     if data.get("nopbc", False):
         with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if dtype.name not in data:
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format."
+            )
+            continue
+        ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec)
+        for ii in range(nsets):
+            set_stt = ii * set_size
+            set_end = (ii + 1) * set_size
+            set_folder = os.path.join(folder, "set.%03d" % ii)
+            np.save(os.path.join(set_folder, dtype.name), ddata[set_stt:set_end])
diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py
index e7e1abe6a..f2ee9c8a2 100644
--- a/dpdata/deepmd/hdf5.py
+++ b/dpdata/deepmd/hdf5.py
@@ -1,10 +1,13 @@
 """Utils for deepmd/hdf5 format."""
+import warnings
 from typing import Optional, Union
 
 import h5py
 import numpy as np
 from wcmatch.glob import globfilter
 
+import dpdata
+
 __all__ = ["to_system_data", "dump"]
 
 
@@ -30,6 +33,9 @@ def to_system_data(
     g = f[folder] if folder else f
 
     data = {}
+    # ignore empty files or groups
+    if "type.raw" not in g.keys():
+        return data
     data["atom_types"] = g["type.raw"][:]
     ntypes = np.max(data["atom_types"]) + 1
     natoms = data["atom_types"].size
@@ -89,6 +95,39 @@ def to_system_data(
             "required": False,
         },
     }
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/hdf5 format."
+            )
+            continue
+        shape = [
+            natoms if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:]
+        ]
+
+        data_types[dtype.name] = {
+            "fn": dtype.name,
+            "labeled": True,
+            "shape": shape,
+            "required": False,
+        }
 
     for dt, prop in data_types.items():
         all_data = []
@@ -140,6 +179,9 @@ def dump(
         g = f.create_group(folder)
     else:
         g = f
+    # ignore empty systems
+    if not len(data["coords"]):
+        return
     # dump raw (array in fact)
     g.create_dataset("type.raw", data=data["atom_types"])
     g.create_dataset("type_map.raw", data=np.array(data["atom_names"], dtype="S"))
@@ -161,6 +203,37 @@ def dump(
         "forces": {"fn": "force", "shape": (nframes, -1), "dump": True},
         "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True},
     }
+
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/hdf5 format."
+            )
+            continue
+
+        data_types[dtype.name] = {
+            "fn": dtype.name,
+            "shape": (nframes, -1),
+            "dump": True,
+        }
+
     for dt, prop in data_types.items():
         if dt in data:
             if prop["dump"]:
diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py
index 2f2021d44..c7a64ec47 100644
--- a/dpdata/deepmd/raw.py
+++ b/dpdata/deepmd/raw.py
@@ -1,7 +1,10 @@
 import os
+import warnings
 
 import numpy as np
 
+import dpdata
+
 
 def load_type(folder, type_map=None):
     data = {}
@@ -57,6 +60,42 @@ def to_system_data(folder, type_map=None, labels=True):
                 data["virials"] = np.reshape(data["virials"], [nframes, 3, 3])
         if os.path.isfile(os.path.join(folder, "nopbc")):
             data["nopbc"] = True
+        # allow custom dtypes
+        if labels:
+            for dtype in dpdata.system.LabeledSystem.DTYPES:
+                if dtype.name in (
+                    "atom_numbs",
+                    "atom_names",
+                    "atom_types",
+                    "orig",
+                    "cells",
+                    "coords",
+                    "real_atom_types",
+                    "real_atom_names",
+                    "nopbc",
+                    "energies",
+                    "forces",
+                    "virials",
+                ):
+                    # skip as these data contains specific rules
+                    continue
+                if not (
+                    len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES
+                ):
+                    warnings.warn(
+                        f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format."
+                    )
+                    continue
+                natoms = data["coords"].shape[1]
+                shape = [
+                    natoms if xx == dpdata.system.Axis.NATOMS else xx
+                    for xx in dtype.shape[1:]
+                ]
+                if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")):
+                    data[dtype.name] = np.reshape(
+                        np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")),
+                        [nframes, *shape],
+                    )
         return data
     else:
         raise RuntimeError("not dir " + folder)
@@ -102,3 +141,30 @@ def dump(folder, data):
     if data.get("nopbc", False):
         with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if dtype.name not in data:
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/raw format."
+            )
+            continue
+        ddata = np.reshape(data[dtype.name], [nframes, -1])
+        np.savetxt(os.path.join(folder, f"{dtype.name}.raw"), ddata)
diff --git a/dpdata/dftbplus/__init__.py b/dpdata/dftbplus/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dpdata/dftbplus/output.py b/dpdata/dftbplus/output.py
new file mode 100644
index 000000000..ba8f6c840
--- /dev/null
+++ b/dpdata/dftbplus/output.py
@@ -0,0 +1,74 @@
+from typing import Tuple
+
+import numpy as np
+
+
+def read_dftb_plus(fn_1: str, fn_2: str) -> Tuple[str, np.ndarray, float, np.ndarray]:
+    """Read from DFTB+ input and output.
+
+    Parameters
+    ----------
+    fn_1 : str
+        DFTB+ input file name
+    fn_2 : str
+        DFTB+ output file name
+
+    Returns
+    -------
+    str
+        atomic symbols
+    np.ndarray
+        atomic coordinates
+    float
+        total potential energy
+    np.ndarray
+        atomic forces
+
+    """
+    coord = None
+    symbols = None
+    forces = None
+    energy = None
+    with open(fn_1) as f:
+        flag = 0
+        for line in f:
+            if flag == 1:
+                flag += 1
+            elif flag == 2:
+                components = line.split()
+                flag += 1
+            elif line.startswith("Geometry"):
+                flag = 1
+                coord = []
+                symbols = []
+            elif flag in (3, 4, 5, 6):
+                s = line.split()
+                components_num = int(s[1])
+                symbols.append(components[components_num - 1])
+                coord.append([float(s[2]), float(s[3]), float(s[4])])
+                flag += 1
+                if flag == 7:
+                    flag = 0
+    with open(fn_2) as f:
+        flag = 0
+        for line in f:
+            if line.startswith("Total Forces"):
+                flag = 8
+                forces = []
+            elif flag in (8, 9, 10, 11):
+                s = line.split()
+                forces.append([float(s[1]), float(s[2]), float(s[3])])
+                flag += 1
+                if flag == 12:
+                    flag = 0
+            elif line.startswith("Total energy:"):
+                s = line.split()
+                energy = float(s[2])
+                flag = 0
+
+    symbols = np.array(symbols)
+    forces = np.array(forces)
+    coord = np.array(coord)
+    assert coord.shape == forces.shape
+
+    return symbols, coord, energy, forces
diff --git a/dpdata/driver.py b/dpdata/driver.py
index b446a5e83..de72d3fe7 100644
--- a/dpdata/driver.py
+++ b/dpdata/driver.py
@@ -125,6 +125,7 @@ class HybridDriver(Driver):
     ...     {"type": "sqm", "qm_theory": "DFTB3"},
     ...     {"type": "dp", "dp": "frozen_model.pb"},
     ... ])
+
     This driver is the hybrid of SQM and DP.
     """
 
diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py
index cdc28bdcd..017f75a34 100644
--- a/dpdata/lammps/dump.py
+++ b/dpdata/lammps/dump.py
@@ -215,7 +215,9 @@ def system_data(lines, type_map=None, type_idx_zero=True, unwrap=False):
         system["cells"].append(cell)
         atype = get_atype(array_lines[ii], type_idx_zero=type_idx_zero)
         # map atom type; a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id]
-        idx = np.argsort(atype)[np.argsort(np.argsort(system["atom_types"]))]
+        idx = np.argsort(atype, kind="stable")[
+            np.argsort(np.argsort(system["atom_types"], kind="stable"), kind="stable")
+        ]
         system["coords"].append(
             safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx]
         )
diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py
index 219543eb3..de8f1c746 100644
--- a/dpdata/md/rdf.py
+++ b/dpdata/md/rdf.py
@@ -57,14 +57,14 @@ def compute_rdf(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100):
 
 
 def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100):
-    all_types = list(set(list(np.sort(atype))))
+    all_types = list(set(list(np.sort(atype, kind="stable"))))
     if sel_type[0] is None:
         sel_type[0] = all_types
     if sel_type[1] is None:
         sel_type[1] = all_types
-    if type(sel_type[0]) is not list:
+    if not isinstance(sel_type[0], list):
         sel_type[0] = [sel_type[0]]
-    if type(sel_type[1]) is not list:
+    if not isinstance(sel_type[1], list):
         sel_type[1] = [sel_type[1]]
     natoms = len(posis)
     import ase.neighborlist
diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py
index 38eab85fd..6953aba53 100644
--- a/dpdata/plugins/deepmd.py
+++ b/dpdata/plugins/deepmd.py
@@ -82,10 +82,12 @@ class DeePMDMixedFormat(Format):
     Examples
     --------
     Dump a MultiSystems into a mixed type numpy directory:
+
     >>> import dpdata
     >>> dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir")
 
     Load a mixed type data into a MultiSystems:
+
     >>> import dpdata
     >>> dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed")
     """
@@ -95,7 +97,9 @@ def from_system_mix(self, file_name, type_map=None, **kwargs):
             file_name, type_map=type_map, labels=False
         )
 
-    def to_system(self, data, file_name, prec=np.float64, **kwargs):
+    def to_system(
+        self, data, file_name, set_size: int = 2000, prec=np.float64, **kwargs
+    ):
         """Dump the system in deepmd mixed type format (numpy binary) to `folder`.
 
         The frames were already split to different systems, so these frames can be dumped to one single subfolders
@@ -107,12 +111,14 @@ def to_system(self, data, file_name, prec=np.float64, **kwargs):
             System data
         file_name : str
             The output folder
+        set_size : int, default=2000
+            set size
         prec : {numpy.float32, numpy.float64}
             The floating point precision of the compressed data
         **kwargs : dict
             other parameters
         """
-        dpdata.deepmd.mixed.dump(file_name, data, comp_prec=prec)
+        dpdata.deepmd.mixed.dump(file_name, data, set_size=set_size, comp_prec=prec)
 
     def from_labeled_system_mix(self, file_name, type_map=None, **kwargs):
         return dpdata.deepmd.mixed.to_system_data(
@@ -157,6 +163,7 @@ class DeePMDHDF5Format(Format):
     Examples
     --------
     Dump a MultiSystems to a HDF5 file:
+
     >>> import dpdata
     >>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5")
     """
@@ -394,8 +401,10 @@ def label(self, data: dict) -> dict:
         type_map = self.dp.get_type_map()
 
         ori_sys = dpdata.System.from_dict({"data": data})
+        ori_sys_copy = ori_sys.copy()
         ori_sys.sort_atom_names(type_map=type_map)
         atype = ori_sys["atom_types"]
+        ori_sys = ori_sys_copy
 
         if not self.enable_auto_batch_size:
             labeled_sys = dpdata.LabeledSystem()
diff --git a/dpdata/plugins/dftbplus.py b/dpdata/plugins/dftbplus.py
index e69de29bb..5c8b46828 100644
--- a/dpdata/plugins/dftbplus.py
+++ b/dpdata/plugins/dftbplus.py
@@ -0,0 +1,61 @@
+import numpy as np
+
+from dpdata.dftbplus.output import read_dftb_plus
+from dpdata.format import Format
+from dpdata.unit import EnergyConversion, ForceConversion
+
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+
+
+@Format.register("dftbplus")
+class DFTBplusFormat(Format):
+    """The DFTBplusFormat class handles files in the DFTB+ format.
+
+    This class provides a method to read DFTB+ files from a labeled system and
+    returns a dictionary containing various properties of the system.For more
+    information, please refer to the official documentation at the following URL:
+    https://dftbplus.org/documentation
+
+    Attributes
+    ----------
+        None
+
+    Methods
+    -------
+        from_labeled_system(file_paths, **kwargs): Reads system information from files.
+
+    """
+
+    def from_labeled_system(self, file_paths, **kwargs):
+        """Reads system information from the given DFTB+ file paths.
+
+        Parameters
+        ----------
+        file_paths : tuple
+            A tuple containing the input and output file paths.
+            - Input file (file_in): Contains information about symbols and coord.
+            - Output file (file_out): Contains information about energy and force.
+        **kwargs : dict
+            other parameters
+
+        """
+        file_in, file_out = file_paths
+        symbols, coord, energy, forces = read_dftb_plus(file_in, file_out)
+        last_occurrence = {v: i for i, v in enumerate(symbols)}
+        atom_names = np.array(sorted(np.unique(symbols), key=last_occurrence.get))
+        atom_types = np.array([np.where(atom_names == s)[0][0] for s in symbols])
+        atom_numbs = np.array([np.sum(atom_types == i) for i in range(len(atom_names))])
+        natoms = coord.shape[0]
+
+        return {
+            "atom_types": atom_types,
+            "atom_names": list(atom_names),
+            "atom_numbs": list(atom_numbs),
+            "coords": coord.reshape((1, natoms, 3)),
+            "energies": np.array([energy * energy_convert]),
+            "forces": (forces * force_convert).reshape((1, natoms, 3)),
+            "cells": np.zeros((1, 3, 3)),
+            "orig": np.zeros(3),
+            "nopbc": True,
+        }
diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py
index 659b18458..d2f5dead4 100755
--- a/dpdata/qe/scf.py
+++ b/dpdata/qe/scf.py
@@ -87,7 +87,7 @@ def get_coords(lines, cell):
     atom_types = []
     atom_numbs = []
     # preserve the atom_name order
-    atom_names = atom_symbol_list[np.sort(symbol_idx)]
+    atom_names = atom_symbol_list[np.sort(symbol_idx, kind="stable")]
     for jj in atom_symbol_list:
         for idx, ii in enumerate(atom_names):
             if jj == ii:
@@ -129,13 +129,13 @@ def get_stress(lines):
 
 
 def get_frame(fname):
-    if type(fname) == str:
+    if isinstance(fname, str):
         path_out = fname
         outname = os.path.basename(path_out)
         # the name of the input file is assumed to be different from the output by 'in' and 'out'
         inname = outname.replace("out", "in")
         path_in = os.path.join(os.path.dirname(path_out), inname)
-    elif type(fname) == list and len(fname) == 2:
+    elif isinstance(fname, list) and len(fname) == 2:
         path_in = fname[0]
         path_out = fname[1]
     else:
diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py
index ab1a790b5..e27990cbe 100644
--- a/dpdata/qe/traj.py
+++ b/dpdata/qe/traj.py
@@ -205,6 +205,7 @@ def to_system_data(input_name, prefix, begin=0, step=1):
         data["cells"], tmp_steps = load_data(
             prefix + ".cel", 3, begin=begin, step=step, convert=length_convert
         )
+        data["cells"] = np.transpose(data["cells"], (0, 2, 1))
         if csteps != tmp_steps:
             csteps.append(None)
             tmp_steps.append(None)
diff --git a/dpdata/system.py b/dpdata/system.py
index 743f3d057..8af8e4a59 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -2,7 +2,6 @@
 import glob
 import os
 from copy import deepcopy
-from enum import Enum, unique
 from typing import Any, Dict, Optional, Tuple, Union
 
 import numpy as np
@@ -15,6 +14,7 @@
 # ensure all plugins are loaded!
 import dpdata.plugins
 from dpdata.amber.mask import load_param_file, pick_by_amber_mask
+from dpdata.data_type import Axis, DataError, DataType, get_data_types
 from dpdata.driver import Driver, Minimizer
 from dpdata.format import Format
 from dpdata.plugin import Plugin
@@ -33,112 +33,6 @@ def load_format(fmt):
     )
 
 
-@unique
-class Axis(Enum):
-    """Data axis."""
-
-    NFRAMES = "nframes"
-    NATOMS = "natoms"
-    NTYPES = "ntypes"
-    NBONDS = "nbonds"
-
-
-class DataError(Exception):
-    """Data is not correct."""
-
-
-class DataType:
-    """DataType represents a type of data, like coordinates, energies, etc.
-
-    Parameters
-    ----------
-    name : str
-        name of data
-    dtype : type or tuple[type]
-        data type, e.g. np.ndarray
-    shape : tuple[int], optional
-        shape of data. Used when data is list or np.ndarray. Use Axis to
-        represents numbers
-    required : bool, default=True
-        whether this data is required
-    """
-
-    def __init__(
-        self,
-        name: str,
-        dtype: type,
-        shape: Tuple[int, Axis] = None,
-        required: bool = True,
-    ) -> None:
-        self.name = name
-        self.dtype = dtype
-        self.shape = shape
-        self.required = required
-
-    def real_shape(self, system: "System") -> Tuple[int]:
-        """Returns expected real shape of a system."""
-        shape = []
-        for ii in self.shape:
-            if ii is Axis.NFRAMES:
-                shape.append(system.get_nframes())
-            elif ii is Axis.NTYPES:
-                shape.append(system.get_ntypes())
-            elif ii is Axis.NATOMS:
-                shape.append(system.get_natoms())
-            elif ii is Axis.NBONDS:
-                # BondOrderSystem
-                shape.append(system.get_nbonds())
-            elif isinstance(ii, int):
-                shape.append(ii)
-            else:
-                raise RuntimeError("Shape is not an int!")
-        return tuple(shape)
-
-    def check(self, system: "System"):
-        """Check if a system has correct data of this type.
-
-        Parameters
-        ----------
-        system : System
-            checked system
-
-        Raises
-        ------
-        DataError
-            type or shape of data is not correct
-        """
-        # check if exists
-        if self.name in system.data:
-            data = system.data[self.name]
-            # check dtype
-            # allow list for empty np.ndarray
-            if isinstance(data, list) and not len(data):
-                pass
-            elif not isinstance(data, self.dtype):
-                raise DataError(
-                    f"Type of {self.name} is {type(data).__name__}, but expected {self.dtype.__name__}"
-                )
-            # check shape
-            if self.shape is not None:
-                shape = self.real_shape(system)
-                # skip checking empty list of np.ndarray
-                if isinstance(data, np.ndarray):
-                    if data.size and shape != data.shape:
-                        raise DataError(
-                            f"Shape of {self.name} is {data.shape}, but expected {shape}"
-                        )
-                elif isinstance(data, list):
-                    if len(shape) and shape[0] != len(data):
-                        raise DataError(
-                            "Length of %s is %d, but expected %d"
-                            % (self.name, len(data), shape[0])
-                        )
-                else:
-                    raise RuntimeError("Unsupported type to check shape")
-        elif self.required:
-            raise DataError("%s not found in data" % self.name)
-
-
 class System(MSONable):
     """The data System.
 
@@ -521,17 +415,21 @@ def append(self, system):
             return False
         elif not len(self.data["atom_numbs"]):
             # this system is non-converged but the system to append is converged
-            self.data = system.data
+            self.data = system.data.copy()
             return False
         if system.uniq_formula != self.uniq_formula:
             raise RuntimeError(
                 f"systems with inconsistent formula could not be append: {self.uniq_formula} v.s. {system.uniq_formula}"
             )
         if system.data["atom_names"] != self.data["atom_names"]:
+            # prevent original system to be modified
+            system = system.copy()
             # allow to append a system with different atom_names order
             system.sort_atom_names()
             self.sort_atom_names()
         if (system.data["atom_types"] != self.data["atom_types"]).any():
+            # prevent original system to be modified
+            system = system.copy()
             # allow to append a system with different atom_types order
             system.sort_atom_types()
             self.sort_atom_types()
@@ -611,7 +509,7 @@ def check_type_map(self, type_map):
             self.sort_atom_names(type_map=type_map)
 
     def apply_type_map(self, type_map):
-        if type_map is not None and type(type_map) is list:
+        if type_map is not None and isinstance(type_map, list):
             self.check_type_map(type_map)
         else:
             raise RuntimeError("invalid type map, cannot be applied")
@@ -624,7 +522,7 @@ def sort_atom_types(self) -> np.ndarray:
         idx : np.ndarray
             new atom index in the Axis.NATOMS
         """
-        idx = np.argsort(self.data["atom_types"])
+        idx = np.argsort(self.data["atom_types"], kind="stable")
         for tt in self.DTYPES:
             if tt.name not in self.data:
                 # skip optional data
@@ -756,7 +654,7 @@ def replicate(self, ncopy):
         if len(ncopy) != 3:
             raise RuntimeError("ncopy must be a list or tuple with 3 int")
         for ii in ncopy:
-            if type(ii) is not int:
+            if not isinstance(ii, int):
                 raise RuntimeError("ncopy must be a list or tuple must with 3 int")
 
         tmp = System()
@@ -767,7 +665,7 @@ def replicate(self, ncopy):
             np.array(np.copy(data["atom_numbs"])) * np.prod(ncopy)
         )
         tmp.data["atom_types"] = np.sort(
-            np.tile(np.copy(data["atom_types"]), np.prod(ncopy))
+            np.tile(np.copy(data["atom_types"]), np.prod(ncopy)), kind="stable"
         )
         tmp.data["cells"] = np.copy(data["cells"])
         for ii in range(3):
@@ -792,18 +690,10 @@ def replace(self, initial_atom_type, end_atom_type, replace_num):
             raise RuntimeError(
                 "Must use method replace() of the instance of class dpdata.System"
             )
-        if type(replace_num) is not int:
-            raise ValueError(
-                "replace_num must be a integer. Now is {replace_num}".format(
-                    replace_num=replace_num
-                )
-            )
+        if not isinstance(replace_num, int):
+            raise ValueError(f"replace_num must be a integer. Now is {replace_num}")
         if replace_num <= 0:
-            raise ValueError(
-                "replace_num must be larger than 0.Now is {replace_num}".format(
-                    replace_num=replace_num
-                )
-            )
+            raise ValueError(f"replace_num must be larger than 0.Now is {replace_num}")
 
         try:
             initial_atom_index = self.data["atom_names"].index(initial_atom_type)
@@ -941,6 +831,7 @@ def predict(self, *args: Any, driver: str = "dp", **kwargs: Any) -> "LabeledSyst
         Examples
         --------
         The default driver is DP:
+
         >>> labeled_sys = ori_sys.predict("frozen_model_compressed.pb")
         """
         if not isinstance(driver, Driver):
@@ -1063,6 +954,17 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None):
             idx = pick_by_amber_mask(parm, maskstr)
             return self.pick_atom_idx(idx, nopbc=nopbc)
 
+    @classmethod
+    def register_data_type(cls, *data_type: Tuple[DataType]):
+        """Register data type.
+
+        Parameters
+        ----------
+        *data_type : tuple[DataType]
+            data type to be regiestered
+        """
+        cls.DTYPES = cls.DTYPES + tuple(data_type)
+
 
 def get_cell_perturb_matrix(cell_pert_fraction):
     if cell_pert_fraction < 0:
@@ -1440,6 +1342,8 @@ def append(self, *systems):
     def __append(self, system):
         if not system.formula:
             return
+        # prevent changing the original system
+        system = system.copy()
         self.check_atom_names(system)
         formula = system.formula
         if formula in self.systems:
@@ -1514,9 +1418,10 @@ def minimize(
         Examples
         --------
         Minimize a system using ASE BFGS along with a DP driver:
+
         >>> from dpdata.driver import Driver
         >>> from ase.optimize import BFGS
-        >>> driver = driver.get_driver("dp")("some_model.pb")
+        >>> driver = Driver.get_driver("dp")("some_model.pb")
         >>> some_system.minimize(minimizer="ase", driver=driver, optimizer=BFGS, fmax=1e-5)
         """
         if not isinstance(minimizer, Minimizer):
@@ -1659,7 +1564,8 @@ def get_cls_name(cls: object) -> str:
 
 
 def add_format_methods():
-    """Add format methods to System, LabeledSystem, and MultiSystems.
+    """Add format methods to System, LabeledSystem, and MultiSystems; add data types
+    to System and LabeledSystem.
 
     Notes
     -----
@@ -1703,5 +1609,10 @@ def to_format(self, *args, **kwargs):
         setattr(LabeledSystem, method, get_func(formatcls))
         setattr(MultiSystems, method, get_func(formatcls))
 
+    # at this point, System.DTYPES and LabeledSystem.DTYPES has been initialized
+    System.register_data_type(*get_data_types(labeled=False))
+    LabeledSystem.register_data_type(*get_data_types(labeled=False))
+    LabeledSystem.register_data_type(*get_data_types(labeled=True))
+
 
 add_format_methods()
diff --git a/dpdata/utils.py b/dpdata/utils.py
index 55cf41ef5..da7261790 100644
--- a/dpdata/utils.py
+++ b/dpdata/utils.py
@@ -63,14 +63,16 @@ def sort_atom_names(data, type_map=None):
         # a[as[a]] == b[as[b]]  as == argsort
         # as[as[b]] == as^{-1}[b]
         # a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id]
-        idx = np.argsort(data["atom_names"])[np.argsort(np.argsort(type_map))]
+        idx = np.argsort(data["atom_names"], kind="stable")[
+            np.argsort(np.argsort(type_map, kind="stable"), kind="stable")
+        ]
     else:
         # index that will sort an array by alphabetical order
-        idx = np.argsort(data["atom_names"])
+        idx = np.argsort(data["atom_names"], kind="stable")
     # sort atom_names, atom_numbs, atom_types by idx
     data["atom_names"] = list(np.array(data["atom_names"])[idx])
     data["atom_numbs"] = list(np.array(data["atom_numbs"])[idx])
-    data["atom_types"] = np.argsort(idx)[data["atom_types"]]
+    data["atom_types"] = np.argsort(idx, kind="stable")[data["atom_types"]]
     return data
 
 
diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py
index c2d4118c4..8b3f164be 100644
--- a/dpdata/xyz/quip_gap_xyz.py
+++ b/dpdata/xyz/quip_gap_xyz.py
@@ -47,9 +47,7 @@ def handle_single_xyz_frame(lines):
         atom_num = int(lines[0].strip("\n").strip())
         if len(lines) != atom_num + 2:
             raise RuntimeError(
-                "format error, atom_num=={}, {}!=atom_num+2".format(
-                    atom_num, len(lines)
-                )
+                f"format error, atom_num=={atom_num}, {len(lines)}!=atom_num+2"
             )
         data_format_line = lines[1].strip("\n").strip() + " "
         field_value_pattern = re.compile(
diff --git a/plugin_example/README.md b/plugin_example/README.md
index 322756f93..a506a4e88 100644
--- a/plugin_example/README.md
+++ b/plugin_example/README.md
@@ -6,6 +6,7 @@ pip install .
 
 ```py
 import dpdata
+
 print(dpdata.System(12, fmt="random"))
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
index 5b555ad59..087d6ff9a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,7 @@ docs = [
     'm2r2',
     'deepmodeling-sphinx>=0.1.1',
     'sphinx-argparse',
+    'rdkit',
 ]
 
 [tool.setuptools.packages.find]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 1c7c22936..000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-monty==2.0.4
-pymatgen==2019.7.2
diff --git a/tests/dftbplus/detailed.out b/tests/dftbplus/detailed.out
new file mode 100644
index 000000000..21fe93fc5
--- /dev/null
+++ b/tests/dftbplus/detailed.out
@@ -0,0 +1,88 @@
+ Fermi distribution function
+
+Calculation with static geometry
+
+
+********************************************************************************
+ iSCC Total electronic   Diff electronic      SCC error    
+   88   -0.33554958E+01   -0.31884693E-07    0.11352275E-06
+********************************************************************************
+
+ Total charge:    -0.00000000
+
+ Atomic gross charges (e)
+ Atom           Charge
+    1      -0.48336965
+    2       0.03575828
+    3       0.22380553
+    4       0.22380583
+
+Nr. of electrons (up):      8.00000000
+Atom populations (up)
+ Atom       Population
+    1       5.48336965
+    2       0.96424172
+    3       0.77619447
+    4       0.77619417
+
+l-shell populations (up)
+ Atom Sh.   l       Population
+    1   1   0       1.65733979
+    1   2   1       3.82602986
+    2   1   0       0.96424172
+    2   2   1       0.00000000
+    3   1   0       0.77619447
+    3   2   1       0.00000000
+    4   1   0       0.77619417
+    4   2   1       0.00000000
+
+Orbital populations (up)
+ Atom Sh.   l   m       Population  Label
+    1   1   0   0       1.65733979  s
+    1   2   1  -1       1.21678996  p_y
+    1   2   1   0       1.46556336  p_z
+    1   2   1   1       1.14367654  p_x
+    2   1   0   0       0.96424172  s
+    2   2   1  -1       0.00000000  p_y
+    2   2   1   0       0.00000000  p_z
+    2   2   1   1       0.00000000  p_x
+    3   1   0   0       0.77619447  s
+    3   2   1  -1       0.00000000  p_y
+    3   2   1   0       0.00000000  p_z
+    3   2   1   1       0.00000000  p_x
+    4   1   0   0       0.77619417  s
+    4   2   1  -1       0.00000000  p_y
+    4   2   1   0       0.00000000  p_z
+    4   2   1   1       0.00000000  p_x
+
+Fermi level:                        -0.2370222488 H           -6.4497 eV
+Band energy:                        -3.2187242078 H          -87.5859 eV
+TS:                                  0.0000000000 H            0.0000 eV
+Band free energy (E-TS):            -3.2187242078 H          -87.5859 eV
+Extrapolated E(0K):                 -3.2187242078 H          -87.5859 eV
+Input / Output electrons (q):      8.0000000000      8.0000000000
+
+Energy H0:                          -3.3599884076 H          -91.4299 eV
+Energy SCC:                          0.0056352830 H            0.1533 eV
+Energy 3rd:                         -0.0011426808 H           -0.0311 eV
+Total Electronic energy:            -3.3554958054 H          -91.3077 eV
+Repulsive energy:                    0.0590974170 H            1.6081 eV
+Total energy:                       -3.2963983884 H          -89.6996 eV
+Extrapolated to 0:                  -3.2963983884 H          -89.6996 eV
+Total Mermin free energy:           -3.2963983884 H          -89.6996 eV
+Force related energy:               -3.2963983884 H          -89.6996 eV
+
+SCC converged
+
+Total Forces
+    1      0.016567056203      0.002817951422      0.005634574270
+    2     -0.018803818530     -0.000002880649     -0.000006015442
+    3      0.001118562874     -0.005291070259     -0.000870711110
+    4      0.001118199454      0.002475999486     -0.004757847718
+
+Maximal derivative component:        0.188038E-01 au
+
+Dipole moment:   -0.06792979   -0.20495079   -0.40960550 au
+Dipole moment:   -0.17266032   -0.52093298   -1.04111341 Debye
+
+
diff --git a/tests/dftbplus/dftb_pin.hsd b/tests/dftbplus/dftb_pin.hsd
new file mode 100644
index 000000000..02e0ed228
--- /dev/null
+++ b/tests/dftbplus/dftb_pin.hsd
@@ -0,0 +1,93 @@
+Geometry = GenFormat {
+4 C
+N H
+1     1        1.014150        0.112320        0.047370
+2     2        3.909390        0.037985       -0.101159
+3     2        0.702550       -0.851820       -0.060860
+4     2        0.702550        0.603740       -0.789160
+}
+Driver = {}
+Hamiltonian = DFTB {
+  SCC = Yes
+  MaxAngularMomentum = {
+    N = "p"
+    H = "p"
+  }
+  SlaterKosterFiles = {
+    N-N = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/N-N.skf"
+    H-H = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/H-H.skf"
+    H-N = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/H-N.skf"
+    N-H = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/N-H.skf"
+  }
+  ThirdOrderFull = Yes
+  HubbardDerivs = {
+    H = -0.1857
+    N = -0.1535
+  }
+  HCorrection = Damping {
+    Exponent = 4.0
+  }
+  PolynomialRepulsive = {}
+  ShellResolvedSCC = No
+  OldSKInterpolation = No
+  RangeSeparated = None {}
+  ReadInitialCharges = No
+  InitialCharges = {}
+  SCCTolerance = 1.0000000000000001E-005
+  ConvergentSCCOnly = Yes
+  SpinPolarisation = {}
+  ElectricField = {}
+  Solver = RelativelyRobust {}
+  Charge = 0.0000000000000000
+  MaxSCCIterations = 100
+  OnSiteCorrection = {}
+  Dispersion = {}
+  Solvation = {}
+  Electrostatics = GammaFunctional {}
+  ThirdOrder = No
+  Differentiation = FiniteDiff {
+    Delta = 1.2207031250000000E-004
+  }
+  ForceEvaluation = "Traditional"
+  Mixer = Broyden {
+    MixingParameter = 0.20000000000000001
+    InverseJacobiWeight = 1.0000000000000000E-002
+    MinimalWeight = 1.0000000000000000
+    MaximalWeight = 100000.00000000000
+    WeightFactor = 1.0000000000000000E-002
+  }
+  Filling = Fermi {
+    Temperature = 0.0000000000000000
+  }
+}
+Options = {
+  WriteDetailedOut = Yes
+  WriteAutotestTag = No
+  WriteDetailedXML = No
+  WriteResultsTag = No
+  RestartFrequency = 20
+  RandomSeed = 0
+  WriteHS = No
+  WriteRealHS = No
+  MinimiseMemoryUsage = No
+  ShowFoldedCoords = No
+  TimingVerbosity = 1
+  WriteChargesAsText = No
+}
+Analysis = {
+  CalculateForces = Yes
+  ProjectStates = {}
+  WriteEigenvectors = No
+  WriteBandOut = Yes
+  MullikenAnalysis = Yes
+  WriteNetCharges = No
+  AtomResolvedEnergies = No
+}
+ParserOptions = {
+  ParserVersion = 11
+  WriteHSDInput = Yes
+  StopAfterParsing = No
+  IgnoreUnprocessedNodes = No
+}
+Reks = None {}
+ExcitedState = {}
diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py
new file mode 100644
index 000000000..b3821cb34
--- /dev/null
+++ b/tests/plugin/dpdata_plugin_test/__init__.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+from dpdata.data_type import Axis, DataType, register_data_type
+
+# test data type
+
+register_data_type(
+    DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True
+)
+
+register_data_type(
+    DataType("bar", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, -1), required=False),
+    labeled=True,
+)
+
+ep = None
diff --git a/tests/plugin/pyproject.toml b/tests/plugin/pyproject.toml
new file mode 100644
index 000000000..7ce1f854a
--- /dev/null
+++ b/tests/plugin/pyproject.toml
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "dpdata_plugin_test"
+version = "0.0.0"
+description = "A test for dpdata plugin"
+dependencies = [
+    'numpy',
+    'dpdata',
+]
+readme = "README.md"
+requires-python = ">=3.7"
+
+[project.entry-points.'dpdata.plugins']
+random = "dpdata_plugin_test:ep"
diff --git a/tests/poscars/POSCAR.P42nmc.replace b/tests/poscars/POSCAR.P42nmc.replace
index e785d1286..5dd05d848 100644
--- a/tests/poscars/POSCAR.P42nmc.replace
+++ b/tests/poscars/POSCAR.P42nmc.replace
@@ -6,99 +6,99 @@ Hf24 O64 Zr8
 Hf O Zr 
 24 64 8 
 Cartesian
-   0.0000000000    0.0000000000    0.0000000000
-   7.5009600000    7.5009600000    5.0819200000
-   7.5009600000    7.5009600000    0.0000000000
-   7.5009600000    2.5003200000   10.1638400000
-   2.5003200000    7.5009600000    5.0819200000
-   2.5003200000    7.5009600000   10.1638400000
-   2.5003200000    2.5003200000    5.0819200000
-   2.5003200000    2.5003200000   10.1638400000
-   5.0006400000    2.5003200000    7.6228800000
-  10.0012800000    7.5009600000    2.5409600000
-   0.0000000000    2.5003200000    7.6228800000
-   0.0000000000    2.5003200000    2.5409600000
-   7.5009600000    5.0006400000    7.6228800000
-   7.5009600000    5.0006400000    2.5409600000
-  10.0012800000    7.5009600000    7.6228800000
-   7.5009600000    0.0000000000    2.5409600000
-   0.0000000000    0.0000000000    5.0819200000
-   0.0000000000    5.0006400000    5.0819200000
-   7.5009600000    0.0000000000    7.6228800000
-   5.0006400000    0.0000000000    5.0819200000
-   2.5003200000    0.0000000000    2.5409600000
-   5.0006400000    0.0000000000   10.1638400000
-   2.5003200000    0.0000000000    7.6228800000
-   2.5003200000    5.0006400000    7.6228800000
-   1.2501600000    8.7511200000    1.0558196992
-   1.2501600000    8.7511200000    3.5967796992
-   1.2501600000    3.7504800000    8.6786996992
-   1.2501600000    3.7504800000    3.5967796992
-   6.2508000000    8.7511200000    6.1377396992
-   6.2508000000    8.7511200000    1.0558196992
-   6.2508000000    3.7504800000    6.1377396992
-   8.7511200000    1.2501600000    8.6786996992
-   6.2508000000    3.7504800000    1.0558196992
-   1.2501600000    3.7504800000    1.0558196992
-   1.2501600000    8.7511200000    8.6786996992
-   8.7511200000    6.2508000000    3.5967796992
-   8.7511200000    6.2508000000    8.6786996992
-   1.2501600000    8.7511200000    6.1377396992
-   1.2501600000    3.7504800000    6.1377396992
-   6.2508000000    3.7504800000    3.5967796992
-   6.2508000000    6.2508000000    1.4851403008
-   6.2508000000    8.7511200000    3.5967796992
-   6.2508000000    1.2501600000    9.1080203008
-   6.2508000000    1.2501600000    4.0261003008
-   1.2501600000    6.2508000000    9.1080203008
-   1.2501600000    6.2508000000    4.0261003008
-   1.2501600000    1.2501600000    9.1080203008
-   1.2501600000    1.2501600000    4.0261003008
-   6.2508000000    6.2508000000    6.5670603008
-   8.7511200000    1.2501600000    3.5967796992
-   6.2508000000    1.2501600000    6.5670603008
-   6.2508000000    1.2501600000    1.4851403008
-   1.2501600000    6.2508000000    6.5670603008
-   1.2501600000    6.2508000000    1.4851403008
-   1.2501600000    1.2501600000    6.5670603008
-   1.2501600000    1.2501600000    1.4851403008
-   6.2508000000    8.7511200000    8.6786996992
-   6.2508000000    3.7504800000    8.6786996992
-   3.7504800000    6.2508000000    8.6786996992
-   8.7511200000    8.7511200000    9.1080203008
-   3.7504800000    1.2501600000    8.6786996992
-   3.7504800000    3.7504800000    1.4851403008
-   3.7504800000    3.7504800000    6.5670603008
-   3.7504800000    8.7511200000    1.4851403008
-   3.7504800000    8.7511200000    6.5670603008
-   8.7511200000    3.7504800000    1.4851403008
-   8.7511200000    3.7504800000    6.5670603008
-   8.7511200000    8.7511200000    1.4851403008
-   8.7511200000    8.7511200000    6.5670603008
-   3.7504800000    6.2508000000    3.5967796992
-   3.7504800000    3.7504800000    9.1080203008
-   3.7504800000    8.7511200000    4.0261003008
-   3.7504800000    8.7511200000    9.1080203008
-   3.7504800000    3.7504800000    4.0261003008
-   8.7511200000    3.7504800000    9.1080203008
-   3.7504800000    1.2501600000    3.5967796992
-   8.7511200000    3.7504800000    4.0261003008
-   8.7511200000    6.2508000000    1.0558196992
-   8.7511200000    1.2501600000    6.1377396992
-   8.7511200000    1.2501600000    1.0558196992
-   8.7511200000    6.2508000000    6.1377396992
-   3.7504800000    6.2508000000    1.0558196992
-   3.7504800000    1.2501600000    6.1377396992
-   3.7504800000    1.2501600000    1.0558196992
-   6.2508000000    6.2508000000    4.0261003008
-   8.7511200000    8.7511200000    4.0261003008
-   3.7504800000    6.2508000000    6.1377396992
-   6.2508000000    6.2508000000    9.1080203008
-   7.5009600000    2.5003200000    5.0819200000
-   5.0006400000    7.5009600000    7.6228800000
-   5.0006400000    7.5009600000    2.5409600000
-   5.0006400000    2.5003200000    2.5409600000
-   2.5003200000    5.0006400000    2.5409600000
-   5.0006400000    5.0006400000    5.0819200000
-   5.0006400000    5.0006400000   10.1638400000
-   0.0000000000    5.0006400000   10.1638400000
+0.000000 0.000000 0.000000
+0.000000 0.000000 5.081920
+0.000000 5.000640 5.081920
+5.000640 0.000000 10.163840
+5.000640 0.000000 5.081920
+2.500320 0.000000 2.540960
+2.500320 0.000000 7.622880
+2.500320 5.000640 7.622880
+7.500960 0.000000 2.540960
+7.500960 0.000000 7.622880
+7.500960 5.000640 2.540960
+7.500960 5.000640 7.622880
+0.000000 2.500320 2.540960
+0.000000 2.500320 7.622880
+10.001280 7.500960 2.540960
+10.001280 7.500960 7.622880
+5.000640 2.500320 7.622880
+2.500320 2.500320 10.163840
+2.500320 2.500320 5.081920
+2.500320 7.500960 10.163840
+2.500320 7.500960 5.081920
+7.500960 2.500320 10.163840
+7.500960 7.500960 0.000000
+7.500960 7.500960 5.081920
+3.750480 3.750480 1.485140
+3.750480 3.750480 6.567060
+3.750480 8.751120 1.485140
+3.750480 8.751120 6.567060
+8.751120 3.750480 1.485140
+8.751120 3.750480 6.567060
+8.751120 8.751120 1.485140
+8.751120 8.751120 6.567060
+3.750480 3.750480 4.026100
+3.750480 3.750480 9.108020
+3.750480 8.751120 4.026100
+3.750480 8.751120 9.108020
+8.751120 3.750480 4.026100
+8.751120 3.750480 9.108020
+8.751120 8.751120 4.026100
+8.751120 8.751120 9.108020
+3.750480 1.250160 1.055820
+3.750480 1.250160 6.137740
+3.750480 6.250800 1.055820
+3.750480 6.250800 6.137740
+8.751120 1.250160 1.055820
+8.751120 1.250160 6.137740
+8.751120 6.250800 1.055820
+8.751120 6.250800 6.137740
+3.750480 1.250160 3.596780
+3.750480 1.250160 8.678700
+3.750480 6.250800 3.596780
+3.750480 6.250800 8.678700
+8.751120 1.250160 3.596780
+8.751120 1.250160 8.678700
+8.751120 6.250800 3.596780
+8.751120 6.250800 8.678700
+1.250160 3.750480 1.055820
+1.250160 3.750480 6.137740
+1.250160 8.751120 1.055820
+1.250160 8.751120 6.137740
+6.250800 3.750480 1.055820
+6.250800 3.750480 6.137740
+6.250800 8.751120 1.055820
+6.250800 8.751120 6.137740
+1.250160 3.750480 3.596780
+1.250160 3.750480 8.678700
+1.250160 8.751120 3.596780
+1.250160 8.751120 8.678700
+6.250800 3.750480 3.596780
+6.250800 3.750480 8.678700
+6.250800 8.751120 3.596780
+6.250800 8.751120 8.678700
+1.250160 1.250160 1.485140
+1.250160 1.250160 6.567060
+1.250160 6.250800 1.485140
+1.250160 6.250800 6.567060
+6.250800 1.250160 1.485140
+6.250800 1.250160 6.567060
+6.250800 6.250800 1.485140
+6.250800 6.250800 6.567060
+1.250160 1.250160 4.026100
+1.250160 1.250160 9.108020
+1.250160 6.250800 4.026100
+1.250160 6.250800 9.108020
+6.250800 1.250160 4.026100
+6.250800 1.250160 9.108020
+6.250800 6.250800 4.026100
+6.250800 6.250800 9.108020
+0.000000 5.000640 10.163840
+5.000640 5.000640 10.163840
+5.000640 5.000640 5.081920
+2.500320 5.000640 2.540960
+5.000640 2.500320 2.540960
+5.000640 7.500960 2.540960
+5.000640 7.500960 7.622880
+7.500960 2.500320 5.081920
diff --git a/tests/qe.traj/traj6.cel b/tests/qe.traj/traj6.cel
index bcba5c377..c2a735317 100644
--- a/tests/qe.traj/traj6.cel
+++ b/tests/qe.traj/traj6.cel
@@ -1,24 +1,24 @@
 195
-5.157620138324029213e+00 0 0
-8.882263018337732685e-02 5.782217839848955876e+00 0
-4.531494312205264219e-02 1.857154025688012577e-01 5.855553673164934025e+00
+5.157620138324029213e+00 8.882263018337732685e-02 4.531494312205264219e-02
+0 5.782217839848955876e+00 1.857154025688012577e-01
+0 0 5.855553673164934025e+00
 200
-5.359985500701728967e+00 0 0
-3.585941820098031974e-01 5.317218997480877896e+00 0
-7.606780476053129902e-01 7.811107228901693622e-01 5.715864930517207121e+00
+5.359985500701728967e+00 3.585941820098031974e-01 7.606780476053129902e-01
+0 5.317218997480877896e+00 7.811107228901693622e-01
+0 0 5.715864930517207121e+00
 201
-5.235484852416917079e+00 0 0
-5.612267823951659906e-01 5.640178212424978632e+00 0
-1.561235654197479228e-02 2.819721738692628765e-01 5.868536463888631260e+00
+5.235484852416917079e+00 5.612267823951659906e-01 1.561235654197479228e-02
+0 5.640178212424978632e+00 2.819721738692628765e-01
+0 0 5.868536463888631260e+00
 202
-5.735200354152383717e+00 0 0
-8.818112752306928037e-02 5.003946763988647461e+00 0
-5.346029912365874992e-01 4.453576669882056693e-01 5.311832768820492490e+00
+5.735200354152383717e+00 8.818112752306928037e-02 5.346029912365874992e-01
+0 5.003946763988647461e+00 4.453576669882056693e-01
+0 0 5.311832768820492490e+00
 203
-5.969007839117177916e+00 0 0
-7.974158979687617776e-02 5.086270090760550922e+00 0
-2.286579398249665163e-01 1.449088120223311904e-01 5.588188387987865546e+00
+5.969007839117177916e+00 7.974158979687617776e-02 2.286579398249665163e-01
+0 5.086270090760550922e+00 1.449088120223311904e-01
+0 0  5.588188387987865546e+00
 204
-5.308510801020571712e+00 0 0
-3.076052782312116429e-01 5.279388982187173340e+00 0
-4.321921336152507731e-01 8.121110815096156399e-01 5.301664983741235737e+00
+5.308510801020571712e+00 3.076052782312116429e-01 4.321921336152507731e-01
+0 5.279388982187173340e+00 8.121110815096156399e-01
+0 0 5.301664983741235737e+00
diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py
new file mode 100644
index 000000000..5a0e2bab9
--- /dev/null
+++ b/tests/test_custom_data_type.py
@@ -0,0 +1,84 @@
+import unittest
+
+import h5py
+import numpy as np
+
+import dpdata
+
+
+class TestDeepmdLoadDumpComp(unittest.TestCase):
+    def setUp(self):
+        self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
+        self.foo = np.ones((len(self.system), 2, 4))
+        self.system.data["foo"] = self.foo
+        self.system.check_data()
+
+    def test_to_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_foo")
+        foo = np.loadtxt("data_foo/foo.raw")
+        np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)
+
+    def test_from_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_foo")
+        x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw")
+        np.testing.assert_allclose(x.data["foo"], self.foo)
+
+    def test_to_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_foo")
+        foo = np.load("data_foo/set.000/foo.npy")
+        np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)
+
+    def test_from_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_foo")
+        x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy")
+        np.testing.assert_allclose(x.data["foo"], self.foo)
+
+    def test_to_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_foo.h5")
+        with h5py.File("data_foo.h5") as f:
+            foo = f["set.000/foo.npy"][:]
+        np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)
+
+    def test_from_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_foo.h5")
+        x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5")
+        np.testing.assert_allclose(x.data["foo"], self.foo)
+
+
+class TestDeepmdLoadDumpCompAny(unittest.TestCase):
+    def setUp(self):
+        self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
+        self.bar = np.ones((len(self.system), self.system.get_natoms(), 2))
+        self.system.data["bar"] = self.bar
+        self.system.check_data()
+
+    def test_to_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_bar")
+        bar = np.loadtxt("data_bar/bar.raw")
+        np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar)
+
+    def test_from_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_bar")
+        x = dpdata.LabeledSystem("data_bar", fmt="deepmd/raw")
+        np.testing.assert_allclose(x.data["bar"], self.bar)
+
+    def test_to_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_bar")
+        bar = np.load("data_bar/set.000/bar.npy")
+        np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar)
+
+    def test_from_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_bar")
+        x = dpdata.LabeledSystem("data_bar", fmt="deepmd/npy")
+        np.testing.assert_allclose(x.data["bar"], self.bar)
+
+    def test_to_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_bar.h5")
+        with h5py.File("data_bar.h5") as f:
+            bar = f["set.000/bar.npy"][:]
+        np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar)
+
+    def test_from_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_bar.h5")
+        x = dpdata.LabeledSystem("data_bar.h5", fmt="deepmd/hdf5")
+        np.testing.assert_allclose(x.data["bar"], self.bar)
diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py
index 6311410ef..7e522e065 100644
--- a/tests/test_deepmd_mixed.py
+++ b/tests/test_deepmd_mixed.py
@@ -109,6 +109,107 @@ def test_str(self):
         )
 
 
+class TestMixedMultiSystemsDumpLoadSetSize(
+    unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
+):
+    def setUp(self):
+        self.places = 6
+        self.e_places = 6
+        self.f_places = 6
+        self.v_places = 6
+
+        # C1H4
+        system_1 = dpdata.LabeledSystem(
+            "gaussian/methane.gaussianlog", fmt="gaussian/log"
+        )
+
+        # C1H3
+        system_2 = dpdata.LabeledSystem(
+            "gaussian/methane_sub.gaussianlog", fmt="gaussian/log"
+        )
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 1, 2]
+        tmp_data["atom_names"] = ["C", "H", "A", "B"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3])
+        # C1H1A1B2
+        system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data)
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 2, 1]
+        tmp_data["atom_names"] = ["C", "H", "A", "B"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3])
+        # C1H1A2B1
+        system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data)
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 1, 2]
+        tmp_data["atom_names"] = ["C", "H", "A", "D"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3])
+        # C1H1A1C2
+        system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data)
+
+        self.ms = dpdata.MultiSystems(
+            system_1,
+            system_2,
+            system_1_modified_type_1,
+            system_1_modified_type_2,
+            system_1_modified_type_3,
+        )
+        self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed", set_size=1)
+        self.place_holder_ms = dpdata.MultiSystems()
+        self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy")
+        self.systems = dpdata.MultiSystems()
+        self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed")
+        self.system_1 = self.ms["C1H4A0B0D0"]
+        self.system_2 = self.systems["C1H4A0B0D0"]
+        mixed_sets = glob("tmp.deepmd.mixed/*/set.*")
+        self.assertEqual(len(mixed_sets), 5)
+        for i in mixed_sets:
+            self.assertEqual(
+                os.path.exists(os.path.join(i, "real_atom_types.npy")), True
+            )
+
+        self.system_names = [
+            "C1H4A0B0D0",
+            "C1H3A0B0D0",
+            "C1H1A1B2D0",
+            "C1H1A2B1D0",
+            "C1H1A1B0D2",
+        ]
+        self.system_sizes = {
+            "C1H4A0B0D0": 1,
+            "C1H3A0B0D0": 1,
+            "C1H1A1B2D0": 1,
+            "C1H1A2B1D0": 1,
+            "C1H1A1B0D2": 1,
+        }
+        self.atom_names = ["C", "H", "A", "B", "D"]
+
+    def tearDown(self):
+        if os.path.exists("tmp.deepmd.mixed"):
+            shutil.rmtree("tmp.deepmd.mixed")
+
+    def test_len(self):
+        self.assertEqual(len(self.ms), 5)
+        self.assertEqual(len(self.place_holder_ms), 2)
+        self.assertEqual(len(self.systems), 5)
+
+    def test_get_nframes(self):
+        self.assertEqual(self.ms.get_nframes(), 5)
+        self.assertEqual(self.place_holder_ms.get_nframes(), 5)
+        self.assertEqual(self.systems.get_nframes(), 5)
+
+    def test_str(self):
+        self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)")
+        self.assertEqual(
+            str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)"
+        )
+        self.assertEqual(
+            str(self.systems), "MultiSystems (5 systems containing 5 frames)"
+        )
+
+
 class TestMixedMultiSystemsTypeChange(
     unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
 ):
diff --git a/tests/test_dftbplus.py b/tests/test_dftbplus.py
new file mode 100644
index 000000000..2a2913a52
--- /dev/null
+++ b/tests/test_dftbplus.py
@@ -0,0 +1,58 @@
+import unittest
+
+import numpy as np
+from comp_sys import CompLabeledSys, IsNoPBC
+from context import dpdata
+
+
+class TestDeepmdLoadAmmonia(unittest.TestCase, CompLabeledSys, IsNoPBC):
+    def setUp(self):
+        energy_convert = dpdata.unit.EnergyConversion("hartree", "eV").value()
+        force_convert = dpdata.unit.ForceConversion(
+            "hartree/bohr", "eV/angstrom"
+        ).value()
+
+        self.system_1 = dpdata.LabeledSystem(
+            ("dftbplus/dftb_pin.hsd", "dftbplus/detailed.out"), fmt="dftbplus"
+        )
+
+        self.system_2 = dpdata.LabeledSystem(
+            data={
+                "atom_types": np.array([0, 1, 1, 1]),
+                "atom_names": ["N", "H"],
+                "atom_numbs": [1, 3],
+                "coords": np.array(
+                    [
+                        [
+                            [1.014150, 0.112320, 0.047370],
+                            [3.909390, 0.037985, -0.101159],
+                            [0.702550, -0.851820, -0.060860],
+                            [0.702550, 0.603740, -0.789160],
+                        ]
+                    ]
+                ),
+                "energies": np.array([-3.2963983884]) * energy_convert,
+                "forces": np.array(
+                    [
+                        [
+                            [0.016567056203, 0.002817951422, 0.005634574270],
+                            [-0.018803818530, -0.000002880649, -0.000006015442],
+                            [0.001118562874, -0.005291070259, -0.000870711110],
+                            [0.001118199454, 0.002475999486, -0.004757847718],
+                        ]
+                    ]
+                )
+                * force_convert,
+                "cells": np.zeros((1, 3, 3)),
+                "orig": np.zeros(3),
+                "nopbc": True,
+            }
+        )
+        self.places = 6
+        self.e_places = 6
+        self.f_places = 6
+        self.v_places = 6
+
+
+if __name__ == "__main__":
+    unittest.main()