From 80d3b6a890aa6822b3c7dbcb16f6307ae3b43037 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Sep 2023 12:04:32 +0200
Subject: [PATCH 01/98] Added some temporary comments in workflow.py.

---
 src/esm_runscripts/workflow.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index b677ea507..af97f288c 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -336,7 +336,7 @@ def init_total_workflow(config):
         if not "compute" in config["general"]["workflow"]["subjobs"]:
             config["general"]["workflow"]["subjobs"].update(compute)
         if not "tidy" in config["general"]["workflow"]["subjobs"]:
-            config["general"]["workflow"]["subjobs"].update(tidy)    
+            config["general"]["workflow"]["subjobs"].update(tidy)
     if not "last_task_in_queue" in config["general"]["workflow"]:
         config["general"]["workflow"]["last_task_in_queue"] = "tidy"
     if not "first_task_in_queue" in config["general"]["workflow"]:
@@ -350,35 +350,47 @@ def init_total_workflow(config):
 
 def collect_all_workflow_information(config):
 
+    # For each component entry in config (can be a model or a new entry (e.g. 'flows')
     for model in config:
         if "workflow" in config[model]:
             w_config = config[model]["workflow"]
             gw_config = config["general"]["workflow"]
 
+            # looks for entry 'subjob_clusters' in config of each component
             if "subjob_clusters" in w_config:
                 for cluster in w_config["subjob_clusters"]:
+                    # if a cluster is also in the general config, this cluster will be merged together ...
                     if cluster in gw_config["subjob_clusters"]:
                         gw_config["subjob_clusters"][cluster] = merge_if_possible(
                             w_config["subjob_clusters"][cluster],
                             gw_config["subjob_clusters"][cluster],
                         )
+                    # if cluster is not in general config, it will copied into it.
                     else:
                         gw_config["subjob_clusters"][cluster] = copy.deepcopy(
                             w_config["subjob_clusters"][cluster],
                         )
 
+            # looks for entry 'subjobs' in config of each component
             if "subjobs" in w_config:
+                # copies component workflow config to new variable ref_config
                 ref_config = copy.deepcopy(w_config)
+                # ??? for every subjob in ???
                 for subjob in list(copy.deepcopy(w_config["subjobs"])):
 
                     # subjobs (other than clusters) should be model specific
+                    # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry.
                     gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy(
                         w_config["subjobs"][subjob]
                     )
+                    # if this copied subjobs is also n general workflow subjobs it will be deleted there
                     if subjob in gw_config["subjobs"]:
                         del gw_config["subjobs"][subjob]
+
                     # make sure that the run_after and run_before refer to that cluster
+                    # for all subjobs now in general workflow
                     for other_subjob in gw_config["subjobs"]:
+                        # sets run_after and run_before to correct subjob???
                         if "run_after" in gw_config["subjobs"][other_subjob]:
                             if (
                                 gw_config["subjobs"][other_subjob]["run_after"]

From b63e9a98854e69615f7dd14360022e0296272d9c Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 6 Sep 2023 11:35:20 +0200
Subject: [PATCH 02/98] Started to add docstrings to workflow.py

---
 src/esm_runscripts/workflow.py | 81 +++++++++++++++++++++++++++++++---
 1 file changed, 76 insertions(+), 5 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index af97f288c..56a46bb15 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -3,6 +3,13 @@
 
 
 def skip_cluster(cluster, config):
+    """
+    Arguments:
+        cluster
+        config
+    Returns:
+        True or False
+    """
     gw_config = config["general"]["workflow"]
     clusterconf = gw_config["subjob_clusters"][cluster]
 
@@ -37,7 +44,14 @@ def skip_cluster(cluster, config):
 
 
 def assemble_workflow(config):
-    #
+    """
+    Assembles the workflow tasks from the runscript.
+
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
     config = init_total_workflow(config)
     config = collect_all_workflow_information(config)
     config = complete_clusters(config)
@@ -56,11 +70,25 @@ def assemble_workflow(config):
 
 
 def display_nicely(config):
+    """
+    Pretty prints the workflow configuration assembled in config["general"].
+
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
     esm_parser.pprint_config(config["general"]["workflow"])
     return config
 
 
 def prepend_newrun_job(config):
+    """
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
     gw_config = config["general"]["workflow"]
     first_cluster_name = gw_config["first_task_in_queue"]
     first_cluster = gw_config["subjob_clusters"][first_cluster_name]
@@ -103,10 +131,14 @@ def prepend_newrun_job(config):
 
     return config
 
-    #
-
 
 def order_clusters(config):
+    """
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
     gw_config = config["general"]["workflow"]
 
     for subjob_cluster in gw_config["subjob_clusters"]:
@@ -190,6 +222,12 @@ def order_clusters(config):
 
 
 def complete_clusters(config):
+    """
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
     gw_config = config["general"]["workflow"]
 
     # First, complete the matching subjobs <-> clusters
@@ -272,6 +310,16 @@ def complete_clusters(config):
 
 
 def merge_single_entry_if_possible(entry, sourceconf, targetconf):
+    """
+    Merges a dictionary entry into a target dictionary that has he same key.
+
+    Arguments:
+        entry -- dictionary key
+        sourceconf -- dictionary
+        targetconf -- dictionary
+    Returns:
+        targetconf
+    """
     if entry in sourceconf:
         if entry in targetconf and not sourceconf[entry] == targetconf[entry]:
             print(f"Mismatch found in {entry} for cluster {targetconf}")
@@ -281,7 +329,14 @@ def merge_single_entry_if_possible(entry, sourceconf, targetconf):
 
 
 def init_total_workflow(config):
-    # add compute, tidy etc information already here!
+    """
+    Add compute, tidy etc information already here!
+
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
 
     tasks = 0
     for model in config["general"]["valid_model_names"]:
@@ -349,8 +404,14 @@ def init_total_workflow(config):
 
 
 def collect_all_workflow_information(config):
+    """
+    For each component entry in config (can be a model or a new entry (e.g. 'flows')
 
-    # For each component entry in config (can be a model or a new entry (e.g. 'flows')
+    Arguments:
+        config -- dictionary
+    Returns:
+        config
+    """
     for model in config:
         if "workflow" in config[model]:
             w_config = config[model]["workflow"]
@@ -433,6 +494,16 @@ def collect_all_workflow_information(config):
 
 
 def merge_if_possible(source, target):
+    """
+    Merges the entries of source dictionary into target dictionary, if not already in.
+    (Will not overwrite entries in target dictionary.)
+
+    Arguments:
+        source -- dictionary
+        target -- dictionary
+    Returns:
+        target
+    """
     for entry in source:
         if entry in target:
             if not source[entry] == target[entry]:

From f9c06594e1a3b88754445d74f3cec6dd46714084 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 7 Sep 2023 13:50:33 +0200
Subject: [PATCH 03/98] Added a docstring to esm_plugin_mamager

---
 src/esm_plugin_manager/esm_plugin_manager.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py
index 792d27502..8b52c6cd8 100644
--- a/src/esm_plugin_manager/esm_plugin_manager.py
+++ b/src/esm_plugin_manager/esm_plugin_manager.py
@@ -117,6 +117,17 @@ def check_plugin_availability(plugins):
 
 
 def work_through_recipe(recipe, plugins, config):
+    """
+    Works through the esm_runscripts recipes and plugin recipes.
+
+    Arguments:
+        recipe -- dictionary            # What is in these two dictionaries? Where do the entries are comming from?
+        plugins -- dictionary
+        config -- dictionary
+
+    Returns:
+        config
+    """
     if config.get("general", {}).get("debug_recipe", False):
         import pdb
 

From 0c4a96290c9dac1c72b9ede8433c55b60ce3098a Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 7 Sep 2023 13:51:35 +0200
Subject: [PATCH 04/98] Some docstring changes in workflow.py

---
 src/esm_runscripts/workflow.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 56a46bb15..84e9769d2 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -405,7 +405,11 @@ def init_total_workflow(config):
 
 def collect_all_workflow_information(config):
     """
-    For each component entry in config (can be a model or a new entry (e.g. 'flows')
+    Collects all workflow information for each component entry in config
+    (can be a model/component or a new entry (e.g. 'flows')
+
+    Checks if there are "workflow" entries in the user runscript and copies or merges them into
+    config["general"]["workflow"]
 
     Arguments:
         config -- dictionary
@@ -414,13 +418,16 @@ def collect_all_workflow_information(config):
     """
     for model in config:
         if "workflow" in config[model]:
+            # looks for "workflow" in each entry of config (can be model/component, general, etc.)
             w_config = config[model]["workflow"]
+            # looks for "workflow" in "general" section of config.
             gw_config = config["general"]["workflow"]
 
-            # looks for entry 'subjob_clusters' in config of each component
+            # looks for entry 'subjob_clusters' in config of each component that has a "workflow"
             if "subjob_clusters" in w_config:
                 for cluster in w_config["subjob_clusters"]:
-                    # if a cluster is also in the general config, this cluster will be merged together ...
+                    # if a certain cluster is also in the general config, this cluster will be merged together ...
+                    # what cluster could this be?
                     if cluster in gw_config["subjob_clusters"]:
                         gw_config["subjob_clusters"][cluster] = merge_if_possible(
                             w_config["subjob_clusters"][cluster],

From f0bd3856c5c5964e31b6033f10a299bcaacc9db3 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 6 Oct 2023 12:40:07 +0200
Subject: [PATCH 05/98] Added comments to esm_plugin_manager

---
 src/esm_plugin_manager/esm_plugin_manager.py | 28 +++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py
index 8b52c6cd8..dbfcb4c59 100644
--- a/src/esm_plugin_manager/esm_plugin_manager.py
+++ b/src/esm_plugin_manager/esm_plugin_manager.py
@@ -18,11 +18,26 @@ def read_recipe(recipe, additional_dict, needs_parse=True):
 
 
 def read_plugin_information(plugins_bare, recipe, needs_parse=True):
-    # pluginfile = esm_plugins.yaml
+    """
+    Reads in plugin information from the pluginfile = esm_plugins.yaml
+
+    Arguments:
+        plugins_bare -- disctionary as it is read in by function 'read_recipe'
+        recipe -- dictionary of all workitems of an recipe
+        needs_parse -- True (default) or False
+
+    Returns:
+        plugins - dictionary that has information for each workitem of recipe:
+                    module: e.g. esm_runscripts
+                    submodule: e.g. prepare (this is the Python file where the workitem function is defined.
+                    type: e.g. core
+    """
     if needs_parse:
         plugins_bare = yaml_file_to_dict(plugins_bare)
     extra_info = ["location", "git-url"]
     plugins = {}
+    # loop over all recipe entries
+    # tries to find workitem in 'plugins_bare'
     for workitem in recipe["recipe"]:
         found = False
         for module_type in ["core", "plugins"]:
@@ -31,13 +46,22 @@ def read_plugin_information(plugins_bare, recipe, needs_parse=True):
                     for submodule in plugins_bare[module_type][module]:
                         if submodule in extra_info:
                             continue
+                        # functionlist is a list of workitems (Python function names)
                         functionlist = plugins_bare[module_type][module][submodule]
+                        # if the workitem of the recipe is found in this list
+                        # the dictionary plugins will be filled with fields for
+                        # - 'module' (e.g. esm_runscirpts)
+                        # - 'submodule' (e.g. prepare, this is basically the name
+                        #               of the python file this function is defined in)
+                        # - 'type' (core of plugin)
                         if workitem in functionlist:
                             plugins[workitem] = {
                                 "module": module,
                                 "submodule": submodule,
                                 "type": module_type,
                             }
+                            # add extra info ["location", "git-url"] if found in plugins_bare dict
+                            # is there a use case for this?
                             for extra in extra_info:
                                 if extra in plugins_bare[module_type][module]:
                                     plugins[workitem].update(
@@ -47,6 +71,7 @@ def read_plugin_information(plugins_bare, recipe, needs_parse=True):
                                             ]
                                         }
                                     )
+                            # if workitem is found, all loops including loop over module_type can be aborted.
                             found = True
                             break
                     if found:
@@ -133,6 +158,7 @@ def work_through_recipe(recipe, plugins, config):
 
         pdb.set_trace()
     recipes = recipe["recipe"]
+    # Loop over the recipe
     for index, workitem in enumerate(recipes, start=1):
         if config["general"].get("verbose", False):
             # diagnostic message of which recipe step is being executed

From 8c4b753c4abcdc37ed9199ee0955b3f63a85576b Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 6 Oct 2023 12:40:54 +0200
Subject: [PATCH 06/98] Start to refactor workflow.

---
 src/esm_runscripts/workflow.py | 637 +++++++++++++++++++++------------
 1 file changed, 402 insertions(+), 235 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 84e9769d2..94828e44d 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,63 +1,186 @@
 import sys, copy, os
 import esm_parser
 
-
-def skip_cluster(cluster, config):
-    """
-    Arguments:
-        cluster
-        config
-    Returns:
-        True or False
-    """
-    gw_config = config["general"]["workflow"]
-    clusterconf = gw_config["subjob_clusters"][cluster]
-
-    """
-    print(f"run_only {clusterconf.get('run_only', 'Error') }")
-    print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}")
-    print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}")
-    print(f"chunk_number {config['general'].get('chunk_number', -998)}")
-    print(f"run_number {config['general'].get('run_number', -998)}")
-    print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}")
-    print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}")
-    """
-
-    if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[
-        "general"
-    ].get("last_run_in_chunk", False):
-        return True
-    if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[
-        "general"
-    ].get("first_run_in_chunk", False):
-        return True
-    if clusterconf.get("skip_chunk_number", -999) == config["general"].get(
-        "chunk_number", -998
-    ):
-        return True
-    if clusterconf.get("skip_run_number", -999) == config["general"].get(
-        "run_number", -998
-    ):
-        return True
-
-    return False
-
+#import pdb
+
+class Workflow:
+    """A workflow class."""
+    default_phases = []
+    user_phases = []
+    always_run_with = []
+    first_task_in_queue = ""
+    last_task_in_queue = ""
+    next_run_triggered_by = ""
+
+    def __init__(self, phases, always_run_with=[]):
+        """
+        Create a new workflow.
+
+        Arguments:
+            phases -- List of workflow phases
+            always_run_with -- List of phases that precedes each phase
+        """
+        # TODO: NW call here the phase object ???
+        self.phases = phases
+        self.always_run_with = always_run_with
+
+    def num_phases_in_workflow(self):
+        """
+        Return the number of phases in workflow.
+        """
+        return len(self.phases)
+
+    def write_to_config(self, config):
+        """
+        Write to config.
+        """
+        # NW: It is assumed here, that there are no workflows in config["general"]
+        # or that these are removed after collect_...
+        config["general"]["workflow"] = {}
+        config["general"]["workflow"].update(self.__dict__)
+        config["general"]["workflow"]["subjobs"] = {}
+        for phase in self.phases:
+            temp_dict = {phase.name: phase.__dict__}
+            config["general"]["workflow"]["subjobs"].update(temp_dict)
+
+        return config
+
+    def check_user_workflow_dependency(self):
+        """
+        Check whether the user defined workflow phases are independent from eachother or not.
+        """
+        independent = False
+        user_phases_names = [phase.name for phase in self.user_phases]
+        run_after_list = [phase.run_after for phase in self.user_phases]
+        run_before_list = [phase.run_before for phase in self.user_phases]
+        if not set(user_phases_names).intersection(set(run_after_list).union(set(run_before_list))):
+            independent = True
+        else:
+            independent = False
+
+        return independent
+
+    def check_unknown_phases(self):
+        """
+        Check if any user phase addresses an unknown workflow phase.
+        """
+        unknown_user_phase = True
+        phases_names = [phase.name for phase in self.phases]
+        user_phases_names = [phase.name for phase in self.user_phases]
+        # Filter out all falsy items (e.g. [], "", None)
+        run_after_list = list(filter(None, [phase.run_after for phase in self.user_phases]))
+        run_before_list = list(filter(None, [phase.run_before for phase in self.user_phases]))
+
+        unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names)))
+        return unknown_user_phases
+
+    def skip_cluster(self, config):
+        """
+        Checks if a phase/cluster can be skipped.
+        Needed keywords: run_only, skip_chunk_number
+        Arguments:
+            self
+            config
+        Returns:
+            True or False
+        """
+        #gw_config = config["general"]["workflow"]
+        #clusterconf = gw_config["subjob_clusters"][cluster]
+
+        #"""
+        #print(f"run_only {clusterconf.get('run_only', 'Error') }")
+        #print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}")
+        #print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}")
+        #print(f"chunk_number {config['general'].get('chunk_number', -998)}")
+        #print(f"run_number {config['general'].get('run_number', -998)}")
+        #print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}")
+        #print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}")
+        #"""
+
+        #if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[
+        #    "general"
+        #].get("last_run_in_chunk", False):
+        #    return True
+        #if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[
+        #    "general"
+        #].get("first_run_in_chunk", False):
+        #    return True
+        #if clusterconf.get("skip_chunk_number", -999) == config["general"].get(
+        #    "chunk_number", -998
+        #):
+        #    return True
+        #if clusterconf.get("skip_run_number", -999) == config["general"].get(
+        #    "run_number", -998
+        #):
+        #    return True
+
+        return False
+
+class WorkflowPhase:
+    """A workflow phase class."""
+    name = None
+    nproc = 1
+    run_before = None
+    run_after = None
+    submit_to_batch_system = True
+    run_on_queue = None
+    cluster = None
+    next_submit = []
+    called_from = None
+
+    def __init__(self, phase_name):
+        self.name = phase_name
+
+class UserWorkflowPhase(WorkflowPhase):
+    """A user workflow phase class."""
+    batch_or_shell = "batch"
+    order_in_cluster = "concurrent"
+    script = None
+    script_dir = None
+    call_function = None
+    env_preparation = None
+    run_only = None
+    skip_chunk_number = None
+
+    def __init__(self, phase_name):
+        self.name = phase_name
 
 def assemble_workflow(config):
+    from . import Workflow
     """
-    Assembles the workflow tasks from the runscript.
+    Assembles the workflow tasks.
+    Is called from the plugin recipe prepcompute.
 
     Arguments:
         config -- dictionary
     Returns:
         config
     """
-    config = init_total_workflow(config)
-    config = collect_all_workflow_information(config)
-    config = complete_clusters(config)
-    config = order_clusters(config)
+
+    # 1. Generate default workflow object
+    #TODO: preset of default workflow phases should be set in some config file.
+    workflow = Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"])
+    # initialize the default workflow as Workflow object
+    # TODO: NW where are these default phases defined???
+    # 2. Initialize default workflow phases
+    workflow = init_default_workflow(workflow, config)
+    # 3. Read in workflows from runscript and config files
+    workflow = collect_all_user_workflows(workflow, config)
+
+    #config = collect_all_workflow_information(config)
+
+# Why do I need to do the following function call?
+    # 4. Order user workflows into default workflow wrt. workflow attributs.
+    workflow = order_clusters(workflow, config)
+
+    workflow = complete_clusters(workflow, config)
+    breakpoint()
     config = prepend_newrun_job(config)
+    # 5. write the workflow to config
+    config = workflow.write_to_config(config)
+    # 6. Remove old worklow from config
 
+    # Set "jobtype" for the first task???
     if config["general"]["jobtype"] == "unknown":
         config["general"]["command_line_config"]["jobtype"] = config["general"][
             "workflow"
@@ -82,16 +205,24 @@ def display_nicely(config):
     return config
 
 
-def prepend_newrun_job(config):
+def prepend_newrun_job(workflow, config):
     """
+    Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
+    and do not follow a 'SimulationSetup' subjob_clusters.
+    E.g. if two user workflow are the last two subjob_clusters ???
+    Any other example cases when this is the case?
+
     Arguments:
         config -- dictionary
     Returns:
-        config
+        workflow
     """
     gw_config = config["general"]["workflow"]
     first_cluster_name = gw_config["first_task_in_queue"]
+    print(first_cluster_name)
+    breakpoint()
     first_cluster = gw_config["subjob_clusters"][first_cluster_name]
+    esm_parser.pprint_config(first_cluster)
 
     if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup":
 
@@ -131,98 +262,98 @@ def prepend_newrun_job(config):
 
     return config
 
-
-def order_clusters(config):
+def set_phase_attrib(workflow_phases, phase_name, attrib, value):
+    for phase in workflow_phases:
+        if phase.name == phase_name:
+            if type(getattr(phase, attrib)).__name__ == "list":
+                phase.__dict__[attrib].append(value)
+            else:
+                phase.__setattr__(attrib, value)
+
+def get_phase_attrib(workflow_phases, phase_name, attrib):
+    for phase in workflow_phases:
+        if phase.name == phase_name:
+            value = getattr(phase, attrib)
+    return value
+
+def order_clusters(workflow, config):
     """
+    Put the subjob_clusters in order ???
+
     Arguments:
         config -- dictionary
     Returns:
-        config
+        workflow
     """
-    gw_config = config["general"]["workflow"]
-
-    for subjob_cluster in gw_config["subjob_clusters"]:
-        if not "next_submit" in gw_config["subjob_clusters"][subjob_cluster]:
-            gw_config["subjob_clusters"][subjob_cluster]["next_submit"] = []
-
-    for subjob_cluster in gw_config["subjob_clusters"]:
-        if not "run_after" in gw_config["subjob_clusters"][subjob_cluster]:
-            if not ("run_before" in gw_config["subjob_clusters"][subjob_cluster]):
-
-                print(f"Don't know when to execute cluster {subjob_cluster}.")
-                print(gw_config)
-                sys.exit(-1)
-
-        if "run_after" in gw_config["subjob_clusters"][subjob_cluster]:
-            if "run_before" in gw_config["subjob_clusters"][subjob_cluster]:
-                print(
-                    f"Specifying both run_after and run_before for cluster {subjob_cluster} may lead to problems."
-                )
-                print(f"Please choose.")
-                sys.exit(-1)
-            if (
-                not gw_config["subjob_clusters"][subjob_cluster]["run_after"]
-                in gw_config["subjob_clusters"]
-            ):
-                print(f"Unknown cluster {gw_config['subjob_clusters'][subjob_cluster]['run_after']}.")
-                sys.exit(-1)
-
-            calling_cluster = gw_config["subjob_clusters"][subjob_cluster]["run_after"]
-
-            if (
-                not subjob_cluster
-                in gw_config["subjob_clusters"][calling_cluster]["next_submit"]
-            ):
-                gw_config["subjob_clusters"][calling_cluster]["next_submit"].append(
-                    subjob_cluster
-                )
-            gw_config["subjob_clusters"][subjob_cluster][
-                "called_from"
-            ] = calling_cluster
-
-            if calling_cluster == gw_config["last_task_in_queue"]:
-                gw_config["last_task_in_queue"] = subjob_cluster
-
-        if "run_before" in gw_config["subjob_clusters"][subjob_cluster]:
-            if (
-                not gw_config["subjob_clusters"][subjob_cluster]["run_before"]
-                in gw_config["subjob_clusters"]
-            ):
-                print(f"Unknown cluster {gw_config['subjob_clusters'][subjob_cluster]['run_before']}.")
-                sys.exit(-1)
-
-            called_cluster = gw_config["subjob_clusters"][subjob_cluster]["run_before"]
-
-            if (
-                not called_cluster
-                in gw_config["subjob_clusters"][subjob_cluster]["next_submit"]
-            ):
-                gw_config["subjob_clusters"][subjob_cluster]["next_submit"].append(
-                    called_cluster
-                )
-            gw_config["subjob_clusters"][called_cluster]["called_from"] = subjob_cluster
-
-            if called_cluster == gw_config["first_task_in_queue"]:
-                gw_config["first_task_in_queue"] = subjob_cluster
-
-    if "next_run_triggered_by" in gw_config:
-        gw_config["last_task_in_queue"] = gw_config["next_run_triggered_by"]
-
-    first_cluster_name = gw_config["first_task_in_queue"]
-    first_cluster = gw_config["subjob_clusters"][first_cluster_name]
-    last_cluster_name = gw_config["last_task_in_queue"]
-    last_cluster = gw_config["subjob_clusters"][last_cluster_name]
-
-    if not first_cluster_name in last_cluster.get("next_submit", ["Error"]):
-        last_cluster["next_submit"].append(first_cluster_name)
-    if not last_cluster_name in first_cluster.get("called_from", ["Error"]):
-        first_cluster["called_from"] = last_cluster_name
-
-    return config
-
-
-def complete_clusters(config):
+    independent = workflow.check_user_workflow_dependency()
+    unknown_phases = workflow.check_unknown_phases()
+
+    if unknown_phases:
+        esm_parser.user_error("ERROR", "Undefined subjob/phase.")
+
+    for user_phase in workflow.user_phases:
+# TODO: Check if run_after or run_before is set for each user phase
+        if not user_phase.run_before and not user_phase.run_after:
+            esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before")
+# TODO: Check if not both run_after and run_before are set at the same time for each user phase
+        if user_phase.run_before and user_phase.run_after:
+            esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before")
+
+# TODO: Correct for "last_task_in_queue" if necessary
+    # Collect all next_run_triggered_by entrie
+    next_triggered = []
+    run_after = []
+    for model in config:
+        if "workflow" in config[model]:
+            if "next_run_triggered_by" in config[model]["workflow"]:
+                next_triggered.append(config[model]["workflow"]["next_run_triggered_by"])
+    next_triggered = list(filter((workflow.next_run_triggered_by).__ne__, next_triggered))
+    if len(next_triggered) > 1:
+        esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.")
+    elif len(next_triggered) == 1:
+        workflow.next_run_triggered_by = next_triggered[0]
+    #else: let default
+
+# Fill up "next_submit" list
+    next_submits = {}
+    for phase in workflow.phases + workflow.user_phases:
+        next_submits[phase.name] = []
+    for phase2 in workflow.phases + workflow.user_phases:
+        if not phase2.run_after == None:
+            next_submits[phase2.run_after].append(phase2.name)
+            phase2.called_from = phase2.run_after
+    for phase3 in workflow.phases + workflow.user_phases:
+        phase3.next_submit = next_submits[phase3.name]
+
+    for phase4 in workflow.phases + workflow.user_phases:
+        calling_cluster = phase4.run_after
+#
+        if calling_cluster == workflow.last_task_in_queue:
+            workflow.last_task_in_queue = phase4.name
+#
+        called_cluster = phase4.run_before
+        set_phase_attrib(workflow.phases+workflow.user_phases, called_cluster, "called_from", phase4.name)
+        if called_cluster == workflow.first_task_in_queue:
+            workflow.first_task_in_queue = phase4.name
+#
+    first_cluster_name = workflow.first_task_in_queue
+    last_cluster_name = workflow.last_task_in_queue
+#
+    value = get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit")
+    if not first_cluster_name in get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit"):
+        set_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit", first_cluster_name)
+    if not last_cluster_name == get_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from"):
+        set_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from", last_cluster_name)
+#
+    return workflow
+
+
+def complete_clusters(workflow, config):
+    # all that are within a next_submit list are in a cluster if:
+    # run concurrently
+    # have the same cluster entry.
     """
+    Rearanges the subjobs to their subjobs_clusters ???
     Arguments:
         config -- dictionary
     Returns:
@@ -230,16 +361,32 @@ def complete_clusters(config):
     """
     gw_config = config["general"]["workflow"]
 
+    # sortiert alles in dict subjob_clusters
+    clusters = []
+    for phase in workflow.phases + workflow.user_phases:
+        if phase.cluster == None:
+            phase.cluster = phase.name
+        clusters.append(phase.cluster)
+
+    # Check if all subjobs of the same cluster have the same run_after
+
+
+    # TODO: calc nproc
+    # TODO: check for batch
+
     # First, complete the matching subjobs <-> clusters
 
     for subjob in gw_config["subjobs"]:
+        # Erstellt ein leeres dict im dict subjob_clusters
         subjob_cluster = gw_config["subjobs"][subjob]["subjob_cluster"]
         if not subjob_cluster in gw_config["subjob_clusters"]:
             gw_config["subjob_clusters"][subjob_cluster] = {}
 
+        # Erstellt leere Liste fuer den jeweiligen subjob_cluster
         if not "subjobs" in gw_config["subjob_clusters"][subjob_cluster]:
             gw_config["subjob_clusters"][subjob_cluster]["subjobs"] = []
 
+        # Haengt alle subjobs in diese Liste an.
         gw_config["subjob_clusters"][subjob_cluster]["subjobs"].append(subjob)
 
     # Then, complete the resource information per cluster
@@ -306,38 +453,18 @@ def complete_clusters(config):
             nproc = nproc_max
         clusterconf["nproc"] = nproc
 
+    # wie wird hier config angepasst?
+    breakpoint()
     return config
 
 
-def merge_single_entry_if_possible(entry, sourceconf, targetconf):
-    """
-    Merges a dictionary entry into a target dictionary that has he same key.
-
-    Arguments:
-        entry -- dictionary key
-        sourceconf -- dictionary
-        targetconf -- dictionary
-    Returns:
-        targetconf
-    """
-    if entry in sourceconf:
-        if entry in targetconf and not sourceconf[entry] == targetconf[entry]:
-            print(f"Mismatch found in {entry} for cluster {targetconf}")
-            sys.exit(-1)
-        targetconf[entry] = sourceconf[entry]
-    return targetconf
-
 
-def init_total_workflow(config):
+def calc_number_of_tasks(config):
     """
-    Add compute, tidy etc information already here!
-
-    Arguments:
-        config -- dictionary
-    Returns:
-        config
+    Calculates the total number of needed tasks
+    in phase compute
+    TODO: make this phase method??? Or recipe entry???
     """
-
     tasks = 0
     for model in config["general"]["valid_model_names"]:
         if "nproc" in config[model]:
@@ -350,63 +477,96 @@ def init_total_workflow(config):
                     and config[model]["nprocbr"] != "remove_from_namelist"
                 ):
                     tasks += config[model]["nprocar"] * config[model]["nprocbr"]
+    return tasks
 
-    prepcompute = {
-        "prepcompute": {
-            "nproc": 1,
-            "run_before": "compute",
-        }
-    }
-
-    compute = {
-        "compute": {
-            "nproc": tasks,
-            "run_before": "tidy",
-            "submit_to_batch_system": config["general"].get(
-                "submit_to_batch_system", True
-            ),
-            "run_on_queue": config["computer"]["partitions"]["compute"]["name"],
-        }
-    }
+def init_default_workflow(default_workflow, config):
+    """
+    Add workflow for precompute, compute, and tidy phases
+    etc information already here!
 
-    # das ist nur vorübergehend
-    tidy = {
-        "tidy": {
-            "nproc": 1,
-            "run_after": "compute",
-        }
-    }
+    Arguments:
+        default_workflow -- workflow object
+        config -- dictionary
+    Returns:
+        default_workflow
+    """
 
-    if not "workflow" in config["general"]:
-        config["general"]["workflow"] = {}
-    if not "subjob_clusters" in config["general"]["workflow"]:
-        config["general"]["workflow"]["subjob_clusters"] = {}
-    if not "subjobs" in config["general"]["workflow"]:
-        config["general"]["workflow"]["subjobs"] = prepcompute
-        config["general"]["workflow"]["subjobs"].update(compute)
-        config["general"]["workflow"]["subjobs"].update(tidy)
-    else:
-        if not "prepcompute" in config["general"]["workflow"]["subjobs"]:
-            config["general"]["workflow"]["subjobs"].update(prepcompute)
-        if not "compute" in config["general"]["workflow"]["subjobs"]:
-            config["general"]["workflow"]["subjobs"].update(compute)
-        if not "tidy" in config["general"]["workflow"]["subjobs"]:
-            config["general"]["workflow"]["subjobs"].update(tidy)
-    if not "last_task_in_queue" in config["general"]["workflow"]:
-        config["general"]["workflow"]["last_task_in_queue"] = "tidy"
-    if not "first_task_in_queue" in config["general"]["workflow"]:
-        config["general"]["workflow"]["first_task_in_queue"] = "prepcompute"
-
-    if not "next_run_triggered_by" in config["general"]["workflow"]:
-        config["general"]["workflow"]["next_run_triggered_by"] = "tidy"
+    # TODO: make a method of class Workflow
 
-    return config
+    # For testing only, set in some yaml config
+    workflow_phases = default_workflow.phases
 
+    # Calculating the number of tasks for each component/model
+    # needed for phase compute
+    tasks = calc_number_of_tasks(config)
+    # Create default workflow phase objects:
+    default_workflow.phases = []
+    for ind, phase in enumerate(workflow_phases):
+        default_workflow.phases.append(WorkflowPhase(phase))
+
+    for ind, phase in enumerate(default_workflow.phases):
+        if ind < default_workflow.num_phases_in_workflow() - 1:
+            phase.run_before = default_workflow.phases[ind+1].name
+        else:
+            phase.run_after = default_workflow.phases[ind-1].name
+        # TODO: this needs to be set somewhere else, or different.
+        phase.cluster = phase.name
+        if phase.name == "compute":
+            phase.nproc = tasks
+            phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True)
+            phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
+
+    default_workflow.first_task_in_queue = default_workflow.phases[0].name      # prepcompute
+    default_workflow.last_task_in_queue = default_workflow.phases[-1].name      # tidy
+    # next_run_triggered_by only used to set last_task_in_queue
+    # TODO: why not set last_task_in_queue directly?
+    default_workflow.next_run_triggered_by = default_workflow.phases[-1].name   # tidy
+
+    return default_workflow
+
+def collect_all_user_workflows(user_workflow,config):
+    """
+    Collect all workflows set by config files.
+    """
+    user_workflow_phases = []
+    user_workflow_phases_names = []
+    for model in config:
+        if "workflow" in config[model]:
+            w_config = config[model]["workflow"]
+            if "subjobs" in w_config:
+                # copies component workflow config to new variable ref_config
+                ref_config = copy.deepcopy(w_config)
+                for subjob in list(copy.deepcopy(w_config["subjobs"])):
+                    # create a new phase object for subjob
+                    # new_phase_name = subjob + "_" + model
+                    # each subjob needs to have an unique name
+                    new_phase_name = subjob
+                    new_phase = UserWorkflowPhase(new_phase_name)
+                    if not new_phase_name in user_workflow_phases_names:
+                        user_workflow_phases_names.append(new_phase_name)
+                        # set attributes of user_workflow phases
+                        for key, value in w_config["subjobs"][subjob].items():
+                            new_phase.__setattr__(key, value)
+                        user_workflow_phases.append(new_phase)
+                    else:
+                        esm_parser.user_error("ERROR", "Two subjobs of the same name.")
+
+    user_workflow.user_phases = user_workflow_phases
+    return user_workflow
+
+
+
+
+
+
+
+################### Maybe outdated routines ######################
 
 def collect_all_workflow_information(config):
     """
     Collects all workflow information for each component entry in config
     (can be a model/component or a new entry (e.g. 'flows')
+    NOTE(NW): Should it be possible to set a workflow in the model section of the runscript? Why not?
 
     Checks if there are "workflow" entries in the user runscript and copies or merges them into
     config["general"]["workflow"]
@@ -448,6 +608,7 @@ def collect_all_workflow_information(config):
 
                     # subjobs (other than clusters) should be model specific
                     # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry.
+                    # appends the model name to the subjob name and copy it to config["general"]
                     gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy(
                         w_config["subjobs"][subjob]
                     )
@@ -459,49 +620,55 @@ def collect_all_workflow_information(config):
                     # for all subjobs now in general workflow
                     for other_subjob in gw_config["subjobs"]:
                         # sets run_after and run_before to correct subjob???
+                        # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model)
+                        # this run_after will be set to the new subjob name (subjob_model)
                         if "run_after" in gw_config["subjobs"][other_subjob]:
-                            if (
-                                gw_config["subjobs"][other_subjob]["run_after"]
-                                == subjob
-                            ):
-                                gw_config["subjobs"][other_subjob][
-                                    "run_after"
-                                ] == subjob + "_" + model
+                            if (gw_config["subjobs"][other_subjob]["run_after"] == subjob):
+                                gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model
                         if "run_before" in gw_config["subjobs"][other_subjob]:
-                            if (
-                                gw_config["subjobs"][other_subjob]["run_before"]
-                                == subjob
-                            ):
-                                gw_config["subjobs"][other_subjob][
-                                    "run_before"
-                                ] == subjob + "_" + model
+                            if (gw_config["subjobs"][other_subjob]["run_before"] == subjob):
+                                gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model
 
                     # if not in another cluster, each subjob gets its own
-                    if (
-                        not "subjob_cluster"
-                        in gw_config["subjobs"][subjob + "_" + model]
-                    ):
-                        gw_config["subjobs"][subjob + "_" + model][
-                            "subjob_cluster"
-                        ] = subjob  # + "_" + model
+                    if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]):
+                        gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob  # + "_" + model
 
+            # checks if next_run:triggered_by is tidy or the one in user workflow, or empty?
             if "next_run_triggered_by" in w_config:
-                if not gw_config["next_run_triggered_by"] in [
-                    "tidy",
-                    w_config["next_run_triggered_by"],
-                ]:
+                if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]:
                     print(f"Mismatch found setting next_run_triggered_by for workflow.")
                     sys.exit(-1)
                 else:
-                    gw_config["next_run_triggered_by"] = w_config[
-                        "next_run_triggered_by"
-                    ]
+                    gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"]
+                    # what if w_config["next_run_triggered_by"] is empty?
 
     return config
 
+def merge_single_entry_if_possible(entry, sourceconf, targetconf):
+    """
+    Merges a dictionary entry into a target dictionary that has he same key.
+
+    Arguments:
+        entry -- dictionary key
+        sourceconf -- dictionary
+        targetconf -- dictionary
+    Returns:
+        targetconf
+    """
+    if entry in sourceconf:
+        # Check if entry is already in targetconf AND different to sourceconf, then exit
+        if entry in targetconf and not sourceconf[entry] == targetconf[entry]:
+            print(f"Mismatch found in {entry} for cluster {targetconf}")
+            sys.exit(-1)
+        # Continues here if entry exists already in targetconf AND the same as sourceconf or
+        # not already in targetconf and set it to sourceconf
+        targetconf[entry] = sourceconf[entry]
+    return targetconf
 
 def merge_if_possible(source, target):
     """
+    Does the same as above but for a whole dict
+
     Merges the entries of source dictionary into target dictionary, if not already in.
     (Will not overwrite entries in target dictionary.)
 

From e329e146580c72c0f07fed4483a06d4d46571cd6 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 6 Oct 2023 18:02:51 +0200
Subject: [PATCH 07/98] Started to refactor workflow.py

---
 src/esm_runscripts/workflow.py             | 171 ++++++++-------------
 tests/test_esm_runscripts/test_workflow.py | 141 +++++++++++++++++
 2 files changed, 205 insertions(+), 107 deletions(-)
 create mode 100644 tests/test_esm_runscripts/test_workflow.py

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 94828e44d..ce5e38e66 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -127,23 +127,26 @@ class WorkflowPhase:
     cluster = None
     next_submit = []
     called_from = None
+    batch_or_shell = "SimulationSetup"
+    order_in_cluster = "sequential"
+    run_only = None
+    skip_chunk_number = None
+    skip_run_number = None
 
     def __init__(self, phase_name):
         self.name = phase_name
 
 class UserWorkflowPhase(WorkflowPhase):
     """A user workflow phase class."""
-    batch_or_shell = "batch"
-    order_in_cluster = "concurrent"
     script = None
     script_dir = None
     call_function = None
     env_preparation = None
-    run_only = None
-    skip_chunk_number = None
 
     def __init__(self, phase_name):
         self.name = phase_name
+        batch_or_shell = "batch"
+        submit_to_batch_system = False
 
 def assemble_workflow(config):
     from . import Workflow
@@ -173,11 +176,11 @@ def assemble_workflow(config):
     # 4. Order user workflows into default workflow wrt. workflow attributs.
     workflow = order_clusters(workflow, config)
 
-    workflow = complete_clusters(workflow, config)
-    breakpoint()
-    config = prepend_newrun_job(config)
+    subjob_clusters = complete_clusters(workflow, config)
+    subjob_clusters = prepend_newrun_job(config)
     # 5. write the workflow to config
     config = workflow.write_to_config(config)
+    breakpoint()
     # 6. Remove old worklow from config
 
     # Set "jobtype" for the first task???
@@ -191,6 +194,9 @@ def assemble_workflow(config):
 
     return config
 
+def write_subjob_clusters_to_config(config, subjob_clusters):
+    config["general"]["subjob_clusters"] = subjob_clusters
+    return config
 
 def display_nicely(config):
     """
@@ -205,7 +211,7 @@ def display_nicely(config):
     return config
 
 
-def prepend_newrun_job(workflow, config):
+def prepend_newrun_job(workflow, config, subjob_clusters):
     """
     Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
     and do not follow a 'SimulationSetup' subjob_clusters.
@@ -216,18 +222,16 @@ def prepend_newrun_job(workflow, config):
         config -- dictionary
     Returns:
         workflow
+        subjob_clusters
     """
-    gw_config = config["general"]["workflow"]
-    first_cluster_name = gw_config["first_task_in_queue"]
-    print(first_cluster_name)
-    breakpoint()
-    first_cluster = gw_config["subjob_clusters"][first_cluster_name]
-    esm_parser.pprint_config(first_cluster)
+    first_cluster_name = workflow.first_task_in_queue
+    first_cluster = subjob_clusters[first_cluster_name]
+    #esm_parser.pprint_config(first_cluster)
 
     if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup":
 
-        last_cluster_name = gw_config["last_task_in_queue"]
-        last_cluster = gw_config["subjob_clusters"][last_cluster_name]
+        last_cluster_name = workflow.last_task_in_queue
+        last_cluster = subjob_clusters[last_cluster_name]
 
         new_first_cluster_name = "newrun"
         new_first_cluster = {
@@ -245,7 +249,7 @@ def prepend_newrun_job(workflow, config):
 
         first_cluster["called_from"] = "newrun"
 
-        gw_config["first_task_in_queue"] = "newrun"
+        workflow.first_task_in_queue = "newrun"
 
         new_subjob = {
             "newrun_general": {
@@ -257,10 +261,12 @@ def prepend_newrun_job(workflow, config):
             }
         }
 
-        gw_config["subjob_clusters"].update(new_first_cluster)
-        gw_config["subjobs"].update(new_subjob)
+        subjob_clusters.update(new_first_cluster)
 
-    return config
+# TODO: add new phase to workflow???
+        #gw_config["subjobs"].update(new_subjob)
+
+    return [workflow, subjob_clusters]
 
 def set_phase_attrib(workflow_phases, phase_name, attrib, value):
     for phase in workflow_phases:
@@ -335,6 +341,8 @@ def order_clusters(workflow, config):
         set_phase_attrib(workflow.phases+workflow.user_phases, called_cluster, "called_from", phase4.name)
         if called_cluster == workflow.first_task_in_queue:
             workflow.first_task_in_queue = phase4.name
+        if phase4.cluster == None:
+            phase4.cluster = phase4.name
 #
     first_cluster_name = workflow.first_task_in_queue
     last_cluster_name = workflow.last_task_in_queue
@@ -355,109 +363,58 @@ def complete_clusters(workflow, config):
     """
     Rearanges the subjobs to their subjobs_clusters ???
     Arguments:
+        workflow -- obj
         config -- dictionary
     Returns:
-        config
+        subjob_clusters -- dictionary
     """
-    gw_config = config["general"]["workflow"]
+    # sort into dict subjob_clusters
+    subjob_clusters = {}
 
-    # sortiert alles in dict subjob_clusters
-    clusters = []
     for phase in workflow.phases + workflow.user_phases:
-        if phase.cluster == None:
-            phase.cluster = phase.name
-        clusters.append(phase.cluster)
-
-    # Check if all subjobs of the same cluster have the same run_after
-
-
-    # TODO: calc nproc
-    # TODO: check for batch
-
-    # First, complete the matching subjobs <-> clusters
-
-    for subjob in gw_config["subjobs"]:
         # Erstellt ein leeres dict im dict subjob_clusters
-        subjob_cluster = gw_config["subjobs"][subjob]["subjob_cluster"]
-        if not subjob_cluster in gw_config["subjob_clusters"]:
-            gw_config["subjob_clusters"][subjob_cluster] = {}
+        if not phase.cluster in subjob_clusters:
+            subjob_clusters[phase.cluster] = {}
 
-        # Erstellt leere Liste fuer den jeweiligen subjob_cluster
-        if not "subjobs" in gw_config["subjob_clusters"][subjob_cluster]:
-            gw_config["subjob_clusters"][subjob_cluster]["subjobs"] = []
+        # Create empty list for each subjob_cluster
+        if not "subjobs" in subjob_clusters[phase.cluster]:
+            subjob_clusters[phase.cluster]["subjobs"] = []
 
-        # Haengt alle subjobs in diese Liste an.
-        gw_config["subjob_clusters"][subjob_cluster]["subjobs"].append(subjob)
+        # Append subjobs to list.
+        subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
 
     # Then, complete the resource information per cluster
     # determine whether a cluster is to be submitted to a batch system
-
-    for subjob_cluster in gw_config["subjob_clusters"]:
+    for subjob_cluster in subjob_clusters:
         nproc_sum = nproc_max = 0
-        clusterconf = gw_config["subjob_clusters"][subjob_cluster]
-        for subjob in clusterconf["subjobs"]:
-            subjobconf = gw_config["subjobs"][subjob]
-
-            clusterconf = merge_single_entry_if_possible(
-                "submit_to_batch_system", subjobconf, clusterconf
-            )
-            clusterconf = merge_single_entry_if_possible(
-                "order_in_cluster", subjobconf, clusterconf
-            )
-
-            if subjobconf.get("submit_to_batch_system", False):
-                clusterconf["batch_or_shell"] = "batch"
-            elif subjobconf.get("script", False):
-                clusterconf["batch_or_shell"] = "shell"
-
-            clusterconf = merge_single_entry_if_possible(
-                "run_on_queue", subjobconf, clusterconf
-            )
-            clusterconf = merge_single_entry_if_possible(
-                "run_after", subjobconf, clusterconf
-            )
-            clusterconf = merge_single_entry_if_possible(
-                "run_before", subjobconf, clusterconf
-            )
-            clusterconf = merge_single_entry_if_possible(
-                "run_only", subjobconf, clusterconf
-            )
-            clusterconf = merge_single_entry_if_possible(
-                "skip_run_number", subjobconf, clusterconf
-            )
-            clusterconf = merge_single_entry_if_possible(
-                "skip_chunk_number", subjobconf, clusterconf
-            )
-
-            nproc_sum += subjobconf.get("nproc", 1)
-            nproc_max = max(subjobconf.get("nproc", 1), nproc_max)
-
-        if not "submit_to_batch_system" in clusterconf:
-            clusterconf["submit_to_batch_system"] = False
-        else:
-            if not "run_on_queue" in clusterconf:
-                print(
-                    f"Information on target queue is missing in cluster {clusterconf}."
-                )
-                sys.exit(-1)
-
-        if not clusterconf.get("batch_or_shell", False):
-            clusterconf["batch_or_shell"] = "SimulationSetup"
-
-        if not "order_in_cluster" in clusterconf:
-            clusterconf["order_in_cluster"] = "sequential"
+        attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
+        for attrib in attributes:
+            temp_list = []
+            for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
+                if not get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib) in temp_list:
+                    subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib)
+                else:
+                    print("Missmatch in attributes")
+                    sys.exit(-1)
+            nproc_sum += get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc")
+            nproc_max = max(get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc"), nproc_max)
 
-        if clusterconf["order_in_cluster"] == "concurrent":
+#        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
+#            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
+#        elif subjob_clusters[subjob_cluster].get("script", False):
+#            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
+#
+        if not "run_on_queue" in subjob_clusters[subjob_cluster]:
+            print(f"Information on target queue is missing in cluster {subjob_cluster}.")
+            sys.exit(-1)
+#
+# TODO: Check in nproc is calculated correctly
+        if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
             nproc = nproc_sum
         else:
             nproc = nproc_max
-        clusterconf["nproc"] = nproc
-
-    # wie wird hier config angepasst?
-    breakpoint()
-    return config
-
-
+        subjob_clusters[subjob_cluster]["nproc"] = nproc
+    return subjob_clusters
 
 def calc_number_of_tasks(config):
     """
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
new file mode 100644
index 000000000..9ab44ac10
--- /dev/null
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+""" Test for ``esm_runscripts.workflow``"""
+
+from esm_runscripts import workflow
+import pytest
+
+@pytest.fixture()
+def test_workflow_object():
+    test_workflow = workflow.Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"])
+    return test_workflow
+
+@pytest.fixture()
+def test_config():
+    """Setup a test config dictionary."""
+    config = {
+        'computer': {'partitions': {'compute': {'name': 'test'}}},
+        'fesom': {
+            'nproc': 128},
+        'rnfmap': {
+            'nproc': 128},
+        'oasis3mct': {
+            'nproc': 128},
+        'xios': {
+            'nproc': 128},
+        'oifs': {
+            'workflow': {
+#                'next_run_triggered_by': 'tidy',
+                'subjobs': {
+                    'my_new_subjob_oifs': {
+                        'batch_or_shell': 'batch',
+                        'nproc': 1,
+                        'order_in_cluster': 'concurrent',
+                        'cluster': 'test_cluster',
+                        'run_after': 'tidy',
+                        'run_on_queue': 'compute',
+                        'script': 'helloworld.sh',
+                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                        'submit_to_batch_system': True}}}},
+        'general': {
+            'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'],
+            'workflow': {
+                'next_run_triggered_by': 'tidy',
+                'subjobs': {
+                    'my_new_subjob_general': {
+                        'batch_or_shell': 'batch',
+                        'order_in_cluster': 'concurrent',
+                        'run_on_queue': 'compute',
+                        'nproc': 1,
+                        'run_after': 'tidy',
+                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                        'script': 'hallowelt.sh',
+                        'submit_to_batch_system': True}}}},
+        'flow': {
+            'workflow': {
+                'next_run_triggered_by': 'tidy',
+                'subjobs': {
+                    'my_new_subjob_flow': {
+                        'batch_or_shell': 'batch',
+                        'order_in_cluster': 'concurrent',
+                        'cluster': 'test_cluster',
+                        'run_on_queue': 'compute',
+                        'nproc': 1,
+                        'run_after': 'tidy',
+                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                        'script': 'hallowelt.sh',
+                        'submit_to_batch_system': True}}}}}
+    return config
+
+def test_check_user_workflow_dependency(test_workflow_object, test_config):
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    independent = test_workflow_object.check_user_workflow_dependency()
+    assert independent
+
+def test_check_user_workflow_dependency_2(test_workflow_object, test_config):
+    test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs'
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    independent = test_workflow_object.check_user_workflow_dependency()
+    assert not independent
+
+def test_check_unknown_phases(test_workflow_object, test_config):
+    test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob'
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    unknown_phases = test_workflow_object.check_unknown_phases()
+    assert unknown_phases
+
+def test_assemble_workflow():
+    pytest.fail("something wrong")
+
+def test_collect_all_user_workflow(test_config):
+    pytest.fail("something wrong")
+
+def test_calc_number_of_tasks():
+    pytest.fail("something wrong")
+
+def test_order_clusters(test_workflow_object, test_config):
+    test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general'
+#    test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow'
+#    test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
+    #test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs'
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
+    pytest.fail("something wrong")
+
+def test_complete_clusters(test_workflow_object, test_config):
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
+    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    pytest.fail("something wrong")
+
+def test_prepend_newrun_job(test_workflow_object, test_config):
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
+    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
+    pytest.fail("something wrong")
+
+def test_write_to_config(test_workflow_object, test_config):
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
+    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
+    config = test_workflow_object.write_to_config(test_config)
+    pytest.fail("something wrong")
+
+def test_write_subjob_clusters_to_config(test_workflow_object, test_config):
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
+    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
+    test_config = workflow.write_subjob_clusters_to_config(test_config, subjob_clusters)
+    test_config = test_workflow_object.write_to_config(test_config)
+    pytest.fail("something wrong")

From d7f45659982a2ab05ddbb0560063c7ecc68862a9 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 16 Oct 2023 15:15:21 +0200
Subject: [PATCH 08/98] Placed the default definition of workflow phases in
 esm_software.../defauls.yaml .

---
 .../esm_software/esm_runscripts/defaults.yaml | 26 ++++++++++++-------
 .../esm_runscripts/esm_plugins.yaml           |  4 +--
 src/esm_runscripts/workflow.py                | 22 +++++++++++++---
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 43b918aa7..50a0eb717 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -1,11 +1,17 @@
-
 per_model_defaults:
-        file_movements:
-                default:
-                        all_directions: copy
-                bin:
-                        init_to_exp:    copy
-                        exp_to_run:     copy
-                        run_to_work:    copy
-                        work_to_run:    copy
-
+    file_movements:
+        default:
+            all_directions: copy
+        bin:
+            init_to_exp:    copy
+            exp_to_run:     copy
+            run_to_work:    copy
+            work_to_run:    copy
+default_workflow_phases:
+    phases:
+        - prepcompute
+        - compute
+        - tidy
+    always_run_with:
+        - prepare
+        - prepexp
diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml
index 4c2d2ae94..4e47c7dca 100644
--- a/configs/esm_software/esm_runscripts/esm_plugins.yaml
+++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml
@@ -23,7 +23,7 @@ core:
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
-                        
+
 
 
                 prepcompute:
@@ -52,7 +52,7 @@ core:
                         - "throw_away_some_infiles"
 
 
-                observe:          
+                observe:
                         - "init_monitor_file"
                           #- "get_last_jobid"
                         - "wait_and_observe"
diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index ce5e38e66..6d3f64b0d 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -161,10 +161,26 @@ def assemble_workflow(config):
     """
 
     # 1. Generate default workflow object
-    #TODO: preset of default workflow phases should be set in some config file.
-    workflow = Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"])
     # initialize the default workflow as Workflow object
-    # TODO: NW where are these default phases defined???
+    # TODO: NW where are these default phases defined? For now I placed it in
+    # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml
+    phases = []
+    always_run_with = []
+    if "defaults.yaml" in config["general"]:
+        if "default_workflow_phases" in config["general"]["defaults.yaml"]:
+            phases = config["general"]["defaults.yaml"]["default_workflow_phases"]["phases"]
+            if "always_run_with" in config["general"]["defaults.yaml"]["default_workflow_phases"]:
+                always_run_with = config["general"]["defaults.yaml"]["default_workflow_phases"]["always_run_with"]
+
+    if phases and always_run_with:
+        workflow = Workflow(phases, always_run_with=always_run_with)
+    elif phases:
+        workflow = Workflow(phases)
+    else:
+        esm_parser.user_error("ERROR", "No default workflow phases defined.")
+        # Note: NW Should this work also if no default phases are set in such a config file, but
+        # instead all workflow phases are defined in different configs and/or runscripts?
+        # TODO: NW Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml?
     # 2. Initialize default workflow phases
     workflow = init_default_workflow(workflow, config)
     # 3. Read in workflows from runscript and config files

From a2fc1a4c6ef8de0b95ca68935cdc3c33c034d980 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <31928930+nwieters@users.noreply.github.com>
Date: Tue, 17 Oct 2023 16:09:48 +0200
Subject: [PATCH 09/98] Update src/esm_plugin_manager/esm_plugin_manager.py

Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com>
---
 src/esm_plugin_manager/esm_plugin_manager.py | 26 ++++++++++++--------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py
index dbfcb4c59..ec355ec6f 100644
--- a/src/esm_plugin_manager/esm_plugin_manager.py
+++ b/src/esm_plugin_manager/esm_plugin_manager.py
@@ -21,16 +21,22 @@ def read_plugin_information(plugins_bare, recipe, needs_parse=True):
     """
     Reads in plugin information from the pluginfile = esm_plugins.yaml
 
-    Arguments:
-        plugins_bare -- disctionary as it is read in by function 'read_recipe'
-        recipe -- dictionary of all workitems of an recipe
-        needs_parse -- True (default) or False
-
-    Returns:
-        plugins - dictionary that has information for each workitem of recipe:
-                    module: e.g. esm_runscripts
-                    submodule: e.g. prepare (this is the Python file where the workitem function is defined.
-                    type: e.g. core
+    Parameters
+    ----------
+    plugins_bare : dict
+        Dictionary as it is read in by function ``read_recipe``
+    recipe : dict
+        Dictionary of all workitems of a recipe
+    needs_parse : bool
+        True (default) or False
+
+    Returns
+    -------
+    plugins : dict
+        Dictionary that has information for each workitem of the recipe:
+        - module: e.g. esm_runscripts
+        - submodule: e.g. prepare (this is the Python file where the workitem function is defined.
+        - type: e.g. core
     """
     if needs_parse:
         plugins_bare = yaml_file_to_dict(plugins_bare)

From fef151d4e821597883acd400c60a156a89064f33 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <31928930+nwieters@users.noreply.github.com>
Date: Tue, 17 Oct 2023 16:21:03 +0200
Subject: [PATCH 10/98] Update src/esm_plugin_manager/esm_plugin_manager.py

Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com>
---
 src/esm_plugin_manager/esm_plugin_manager.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py
index ec355ec6f..c465cb0ea 100644
--- a/src/esm_plugin_manager/esm_plugin_manager.py
+++ b/src/esm_plugin_manager/esm_plugin_manager.py
@@ -151,13 +151,15 @@ def work_through_recipe(recipe, plugins, config):
     """
     Works through the esm_runscripts recipes and plugin recipes.
 
-    Arguments:
-        recipe -- dictionary            # What is in these two dictionaries? Where do the entries are comming from?
-        plugins -- dictionary
-        config -- dictionary
+    Parameters
+    ----------
+        recipe : dict            # What is in these two dictionaries? Where do the entries are comming from?
+        plugins : dict
+        config : dict
 
-    Returns:
-        config
+    Returns
+    -------
+        config : dict
     """
     if config.get("general", {}).get("debug_recipe", False):
         import pdb

From c733a0041c9cb9fc20e179eeb5c794a90772126c Mon Sep 17 00:00:00 2001
From: Nadine Wieters <31928930+nwieters@users.noreply.github.com>
Date: Tue, 17 Oct 2023 16:21:31 +0200
Subject: [PATCH 11/98] Update src/esm_runscripts/workflow.py

Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com>
---
 src/esm_runscripts/workflow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 6d3f64b0d..96970b480 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -34,7 +34,7 @@ def write_to_config(self, config):
         """
         Write to config.
         """
-        # NW: It is assumed here, that there are no workflows in config["general"]
+        # It is assumed here, that there are no workflows in config["general"]
         # or that these are removed after collect_...
         config["general"]["workflow"] = {}
         config["general"]["workflow"].update(self.__dict__)

From 506c0a75c3b939d3cb2547e8a40ed507232eaeac Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 17 Oct 2023 16:40:56 +0200
Subject: [PATCH 12/98] Correcte syntax of docstrings.

---
 src/esm_runscripts/workflow.py | 129 ++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 44 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 96970b480..460c52e3d 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -16,9 +16,16 @@ def __init__(self, phases, always_run_with=[]):
         """
         Create a new workflow.
 
-        Arguments:
-            phases -- List of workflow phases
-            always_run_with -- List of phases that precedes each phase
+        Parameters
+        ----------
+        phases : list
+            List of workflow phases
+        always_run_with : list
+            List of phases that precedes each phase
+
+        Returns
+        -------
+        none
         """
         # TODO: NW call here the phase object ???
         self.phases = phases
@@ -78,10 +85,14 @@ def skip_cluster(self, config):
         """
         Checks if a phase/cluster can be skipped.
         Needed keywords: run_only, skip_chunk_number
-        Arguments:
+
+        Parameters
+        ----------
             self
-            config
-        Returns:
+            config : dict
+
+        Returns
+        -------
             True or False
         """
         #gw_config = config["general"]["workflow"]
@@ -154,10 +165,13 @@ def assemble_workflow(config):
     Assembles the workflow tasks.
     Is called from the plugin recipe prepcompute.
 
-    Arguments:
-        config -- dictionary
-    Returns:
-        config
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
+        config : dict
     """
 
     # 1. Generate default workflow object
@@ -218,10 +232,13 @@ def display_nicely(config):
     """
     Pretty prints the workflow configuration assembled in config["general"].
 
-    Arguments:
-        config -- dictionary
-    Returns:
-        config
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
+        config : dict
     """
     esm_parser.pprint_config(config["general"]["workflow"])
     return config
@@ -234,9 +251,12 @@ def prepend_newrun_job(workflow, config, subjob_clusters):
     E.g. if two user workflow are the last two subjob_clusters ???
     Any other example cases when this is the case?
 
-    Arguments:
-        config -- dictionary
-    Returns:
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
         workflow
         subjob_clusters
     """
@@ -302,9 +322,12 @@ def order_clusters(workflow, config):
     """
     Put the subjob_clusters in order ???
 
-    Arguments:
-        config -- dictionary
-    Returns:
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
         workflow
     """
     independent = workflow.check_user_workflow_dependency()
@@ -378,11 +401,15 @@ def complete_clusters(workflow, config):
     # have the same cluster entry.
     """
     Rearanges the subjobs to their subjobs_clusters ???
-    Arguments:
-        workflow -- obj
-        config -- dictionary
-    Returns:
-        subjob_clusters -- dictionary
+
+    Parameters
+    ----------
+        workflow
+        config : dict
+
+    Returns
+    -------
+        subjob_clusters : dict
     """
     # sort into dict subjob_clusters
     subjob_clusters = {}
@@ -457,10 +484,14 @@ def init_default_workflow(default_workflow, config):
     Add workflow for precompute, compute, and tidy phases
     etc information already here!
 
-    Arguments:
-        default_workflow -- workflow object
-        config -- dictionary
-    Returns:
+    Parameters
+    ----------
+        default_workflow
+            workflow object
+        config : dict
+
+    Returns
+    -------
         default_workflow
     """
 
@@ -544,10 +575,13 @@ def collect_all_workflow_information(config):
     Checks if there are "workflow" entries in the user runscript and copies or merges them into
     config["general"]["workflow"]
 
-    Arguments:
-        config -- dictionary
-    Returns:
-        config
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
+        config : dict
     """
     for model in config:
         if "workflow" in config[model]:
@@ -621,12 +655,16 @@ def merge_single_entry_if_possible(entry, sourceconf, targetconf):
     """
     Merges a dictionary entry into a target dictionary that has he same key.
 
-    Arguments:
-        entry -- dictionary key
-        sourceconf -- dictionary
-        targetconf -- dictionary
-    Returns:
-        targetconf
+    Parameters
+    ----------
+        entry : str
+            dictionary key
+        sourceconf : dict
+        targetconf : dict
+
+    Returns
+    -------
+        targetconf : dict
     """
     if entry in sourceconf:
         # Check if entry is already in targetconf AND different to sourceconf, then exit
@@ -645,11 +683,14 @@ def merge_if_possible(source, target):
     Merges the entries of source dictionary into target dictionary, if not already in.
     (Will not overwrite entries in target dictionary.)
 
-    Arguments:
-        source -- dictionary
-        target -- dictionary
-    Returns:
-        target
+    Parameters
+    ----------
+        source : dict
+        target : dict
+
+    Returns
+    -------
+        target : dict
     """
     for entry in source:
         if entry in target:

From 03b6e24c61782a966495748801667fc139e8cffe Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 17 Oct 2023 16:57:17 +0200
Subject: [PATCH 13/98] Renamed default_workflow_phases entry in defaults.yaml

---
 .../esm_software/esm_runscripts/defaults.yaml  |  2 +-
 src/esm_runscripts/workflow.py                 | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 50a0eb717..2aa0684dd 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -7,7 +7,7 @@ per_model_defaults:
             exp_to_run:     copy
             run_to_work:    copy
             work_to_run:    copy
-default_workflow_phases:
+workflow:
     phases:
         - prepcompute
         - compute
diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 460c52e3d..0c8f215d6 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -27,7 +27,7 @@ def __init__(self, phases, always_run_with=[]):
         -------
         none
         """
-        # TODO: NW call here the phase object ???
+        # TODO: Call here the phase object ???
         self.phases = phases
         self.always_run_with = always_run_with
 
@@ -176,15 +176,15 @@ def assemble_workflow(config):
 
     # 1. Generate default workflow object
     # initialize the default workflow as Workflow object
-    # TODO: NW where are these default phases defined? For now I placed it in
+    # TODO: Where are these default phases defined? For now I placed it in
     # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml
     phases = []
     always_run_with = []
     if "defaults.yaml" in config["general"]:
-        if "default_workflow_phases" in config["general"]["defaults.yaml"]:
-            phases = config["general"]["defaults.yaml"]["default_workflow_phases"]["phases"]
-            if "always_run_with" in config["general"]["defaults.yaml"]["default_workflow_phases"]:
-                always_run_with = config["general"]["defaults.yaml"]["default_workflow_phases"]["always_run_with"]
+        if "workflow" in config["general"]["defaults.yaml"]:
+            phases = config["general"]["defaults.yaml"]["workflow"]["phases"]
+            if "always_run_with" in config["general"]["defaults.yaml"]["workflow"]:
+                always_run_with = config["general"]["defaults.yaml"]["workflow"]["always_run_with"]
 
     if phases and always_run_with:
         workflow = Workflow(phases, always_run_with=always_run_with)
@@ -192,9 +192,9 @@ def assemble_workflow(config):
         workflow = Workflow(phases)
     else:
         esm_parser.user_error("ERROR", "No default workflow phases defined.")
-        # Note: NW Should this work also if no default phases are set in such a config file, but
+        # Note: Should this work also if no default phases are set in such a config file, but
         # instead all workflow phases are defined in different configs and/or runscripts?
-        # TODO: NW Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml?
+        # TODO: Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml?
     # 2. Initialize default workflow phases
     workflow = init_default_workflow(workflow, config)
     # 3. Read in workflows from runscript and config files
@@ -570,7 +570,7 @@ def collect_all_workflow_information(config):
     """
     Collects all workflow information for each component entry in config
     (can be a model/component or a new entry (e.g. 'flows')
-    NOTE(NW): Should it be possible to set a workflow in the model section of the runscript? Why not?
+    NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not?
 
     Checks if there are "workflow" entries in the user runscript and copies or merges them into
     config["general"]["workflow"]

From b3b5fb187b60dbafd58e9cc1376cf19a2d1c1a9b Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 18 Oct 2023 09:22:28 +0200
Subject: [PATCH 14/98] Renamed workflow method, added property decorator and
 added test.

---
 src/esm_runscripts/workflow.py             | 5 +++--
 tests/test_esm_runscripts/test_workflow.py | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 0c8f215d6..fe838cf8b 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -31,7 +31,8 @@ def __init__(self, phases, always_run_with=[]):
         self.phases = phases
         self.always_run_with = always_run_with
 
-    def num_phases_in_workflow(self):
+    @property
+    def num_phases(self):
         """
         Return the number of phases in workflow.
         """
@@ -509,7 +510,7 @@ def init_default_workflow(default_workflow, config):
         default_workflow.phases.append(WorkflowPhase(phase))
 
     for ind, phase in enumerate(default_workflow.phases):
-        if ind < default_workflow.num_phases_in_workflow() - 1:
+        if ind < default_workflow.num_phases - 1:
             phase.run_before = default_workflow.phases[ind+1].name
         else:
             phase.run_after = default_workflow.phases[ind-1].name
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 9ab44ac10..855228146 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -67,6 +67,10 @@ def test_config():
                         'submit_to_batch_system': True}}}}}
     return config
 
+def test_num_phases(test_workflow_object, test_config):
+    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    assert test_workflow_object.num_phases == 3
+
 def test_check_user_workflow_dependency(test_workflow_object, test_config):
     test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
     test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)

From 6260278940d46b08f9a1a64f7bb2c2cc665d681c Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 18 Oct 2023 10:10:46 +0200
Subject: [PATCH 15/98] Added a temporary workflow to awicm3.

---
 configs/setups/awicm3/awicm3.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml
index 2ac1f8722..e95af50ab 100644
--- a/configs/setups/awicm3/awicm3.yaml
+++ b/configs/setups/awicm3/awicm3.yaml
@@ -296,6 +296,11 @@ oifs:
         tl_o3_data_dir: ${input_dir}/${version}/climate/
         ICMGG_INIT_name: "_${fesom.resolution}"
 
+        workflow:
+            next_run_triggered_by: tidy
+            subjobs:
+                my_new_subjob:
+                    batch_or_shell: shell
 
         # Postprocessing
         choose_general.postprocessing:

From de0e598ccee1f4fa05ad8001bccffa0421a3eef5 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 20 Oct 2023 17:18:38 +0200
Subject: [PATCH 16/98] Added method to return a list of an attribute for all
 phases.

---
 src/esm_runscripts/workflow.py | 44 +++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index fe838cf8b..c22a708a5 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -38,6 +38,27 @@ def num_phases(self):
         """
         return len(self.phases)
 
+    def get_phases_attribs_list(self, phase_type, attrib):
+        """
+        Return the names of all phases as list.
+
+        Parameters
+        ----------
+            self: class Workflow
+            phase_type: str (default or user)
+            attribute: str
+
+        Returns
+        -------
+            phases_attribs : list
+        """
+        if phase_type == 'user':
+            phases_attribs = [getattr(phase, attrib) for phase in self.user_phases]
+        else:
+            phases_attribs = [getattr(phase, attrib) for phase in self.phases]
+
+        return phases_attribs
+
     def write_to_config(self, config):
         """
         Write to config.
@@ -55,15 +76,20 @@ def write_to_config(self, config):
 
     def check_user_workflow_dependency(self):
         """
-        Check whether the user defined workflow phases are independent from eachother or not.
+        Check whether the user defined workflow phases are independent from each other or not.
         """
         independent = False
-        user_phases_names = [phase.name for phase in self.user_phases]
-        run_after_list = [phase.run_after for phase in self.user_phases]
-        run_before_list = [phase.run_before for phase in self.user_phases]
+        user_phases_names = self.get_phases_attribs_list('user','name')
+        run_after_list = self.get_phases_attribs_list('user','run_after')
+        run_before_list = self.get_phases_attribs_list('user','run_before')
+
+        # All user phases are independent from each other, if
+        # none of the ``user_phases_names`` are found in the union of ``run_before_list`` and ``run_after_list``
+        # That means alls user phases can be run independent from each other.
         if not set(user_phases_names).intersection(set(run_after_list).union(set(run_before_list))):
             independent = True
         else:
+            # TODO: What todo in other case?
             independent = False
 
         return independent
@@ -73,11 +99,13 @@ def check_unknown_phases(self):
         Check if any user phase addresses an unknown workflow phase.
         """
         unknown_user_phase = True
-        phases_names = [phase.name for phase in self.phases]
-        user_phases_names = [phase.name for phase in self.user_phases]
+        phases_names = self.get_phases_attribs_list('default','name')
+        user_phases_names = self.get_phases_attribs_list('user','name')
+        run_after = self.get_phases_attribs_list('user','run_after')
+        run_before = self.get_phases_attribs_list('user','run_before')
         # Filter out all falsy items (e.g. [], "", None)
-        run_after_list = list(filter(None, [phase.run_after for phase in self.user_phases]))
-        run_before_list = list(filter(None, [phase.run_before for phase in self.user_phases]))
+        run_after_list = list(filter(None, run_after))
+        run_before_list = list(filter(None, run_before))
 
         unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names)))
         return unknown_user_phases

From 11fa96b88592cf19883037b4d90f2564bcfa1c85 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 23 Oct 2023 17:11:22 +0200
Subject: [PATCH 17/98] Convert functions into methods, moved class variables
 to be instance variables.

---
 src/esm_runscripts/workflow.py             | 670 +++++++++++----------
 tests/test_esm_runscripts/test_workflow.py |  52 +-
 2 files changed, 369 insertions(+), 353 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index c22a708a5..3644d3503 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -5,12 +5,6 @@
 
 class Workflow:
     """A workflow class."""
-    default_phases = []
-    user_phases = []
-    always_run_with = []
-    first_task_in_queue = ""
-    last_task_in_queue = ""
-    next_run_triggered_by = ""
 
     def __init__(self, phases, always_run_with=[]):
         """
@@ -19,14 +13,20 @@ def __init__(self, phases, always_run_with=[]):
         Parameters
         ----------
         phases : list
-            List of workflow phases
+            List of workflow phases names
         always_run_with : list
-            List of phases that precedes each phase
+            List of phases that precedes each phase in phases
 
         Returns
         -------
         none
         """
+        self.default_phases = []
+        self.user_phases = []
+        self.always_run_with = []
+        self.first_task_in_queue = ""
+        self.last_task_in_queue = ""
+        self.next_run_triggered_by = ""
         # TODO: Call here the phase object ???
         self.phases = phases
         self.always_run_with = always_run_with
@@ -59,6 +59,86 @@ def get_phases_attribs_list(self, phase_type, attrib):
 
         return phases_attribs
 
+    def init_default_workflow(self, config):
+        """
+        Add workflow for precompute, compute, and tidy phases
+        etc information already here!
+    
+        Parameters
+        ----------
+            self : Workflow object
+            config : dict
+    
+        Returns
+        -------
+            self : Workflow object
+        """
+    
+        workflow_phases = self.phases
+    
+        # Calculating the number of tasks for each component/model
+        # needed for phase compute
+        tasks = calc_number_of_tasks(config)
+        # Initiate/create default workflow phase objects
+        # and reset/append to Workflow.phases variable
+        self.phases = []
+        for ind, phase in enumerate(workflow_phases):
+            self.phases.append(WorkflowPhase(phase))
+    
+        for ind, phase in enumerate(self.phases):
+            if ind < self.num_phases - 1:
+                # Set run_before attrib of all phases (except last on) to the next phase name
+                phase.run_before = self.phases[ind+1].name
+            else:
+                # Set run_after attrib of last phase to previous phase name
+                phase.run_after = self.phases[ind-1].name
+
+            # TODO: this needs to be set somewhere else, or different.
+            phase.cluster = phase.name
+            if phase.name == "compute":
+                phase.nproc = tasks
+                phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True)
+                phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
+    
+        # Set default workflow values
+        self.first_task_in_queue = self.phases[0].name      # prepcompute
+        self.last_task_in_queue = self.phases[-1].name      # tidy
+        # next_run_triggered_by only used to set last_task_in_queue
+        # TODO: why not set last_task_in_queue directly?
+        self.next_run_triggered_by = self.phases[-1].name   # tidy
+    
+        return self
+
+    def collect_all_user_workflows(self,config):
+        """
+        Collect all workflows set by config files.
+        """
+        user_workflow_phases = []
+        user_workflow_phases_names = []
+        for model in config:
+            if "workflow" in config[model]:
+                w_config = config[model]["workflow"]
+                if "subjobs" in w_config:
+                    # copies component workflow config to new variable ref_config
+                    ref_config = copy.deepcopy(w_config)
+                    for subjob in list(copy.deepcopy(w_config["subjobs"])):
+                        # create a new phase object for subjob
+                        # new_phase_name = subjob + "_" + model
+                        # each subjob needs to have an unique name
+                        new_phase_name = subjob
+                        new_phase = UserWorkflowPhase(new_phase_name)
+                        if not new_phase_name in user_workflow_phases_names:
+                            user_workflow_phases_names.append(new_phase_name)
+                            # set attributes of user_workflow phases
+                            for key, value in w_config["subjobs"][subjob].items():
+                                new_phase.__setattr__(key, value)
+                            user_workflow_phases.append(new_phase)
+                        else:
+                            esm_parser.user_error("ERROR", "Two subjobs of the same name.")
+    
+        self.user_phases = user_workflow_phases
+        return self
+
     def write_to_config(self, config):
         """
         Write to config.
@@ -77,6 +157,14 @@ def write_to_config(self, config):
     def check_user_workflow_dependency(self):
         """
         Check whether the user defined workflow phases are independent from each other or not.
+
+        Arguments
+        ---------
+            self : Workflow object
+
+        Returns
+        -------
+            independent : bool (default: False)
         """
         independent = False
         user_phases_names = self.get_phases_attribs_list('user','name')
@@ -103,13 +191,218 @@ def check_unknown_phases(self):
         user_phases_names = self.get_phases_attribs_list('user','name')
         run_after = self.get_phases_attribs_list('user','run_after')
         run_before = self.get_phases_attribs_list('user','run_before')
-        # Filter out all falsy items (e.g. [], "", None)
+        # Filter out all elements that are None
+        # ``filter(None, anylist)`` will filter out all items of anylist, for which ``if item`` is false (e.g. [], "", None, {}, '').
+        # See also https://docs.python.org/3/library/functions.html#filter
         run_after_list = list(filter(None, run_after))
         run_before_list = list(filter(None, run_before))
 
         unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names)))
         return unknown_user_phases
 
+    def order_clusters(self, config):
+        """
+        Put the subjob_clusters in order ???
+    
+        Parameters
+        ----------
+            config : dict
+    
+        Returns
+        -------
+            self : Workflow object
+        """
+        independent = self.check_user_workflow_dependency()
+        unknown_phases = self.check_unknown_phases()
+    
+        if unknown_phases:
+            esm_parser.user_error("ERROR", "Undefined subjob/phase.")
+    
+        for user_phase in self.user_phases:
+    # TODO: Check if run_after or run_before is set for each user phase
+            if not user_phase.run_before and not user_phase.run_after:
+                esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before")
+    # TODO: Check if not both run_after and run_before are set at the same time for each user phase
+            if user_phase.run_before and user_phase.run_after:
+                esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before")
+    
+    # TODO: Correct for "last_task_in_queue" if necessary
+        # Collect all next_run_triggered_by entrie
+        next_triggered = []
+        run_after = []
+        for model in config:
+            if "workflow" in config[model]:
+                if "next_run_triggered_by" in config[model]["workflow"]:
+                    next_triggered.append(config[model]["workflow"]["next_run_triggered_by"])
+        next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
+        if len(next_triggered) > 1:
+            esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.")
+        elif len(next_triggered) == 1:
+            self.next_run_triggered_by = next_triggered[0]
+        #else: let default
+    
+    # Fill up "next_submit" list
+        next_submits = {}
+        for phase in self.phases + self.user_phases:
+            next_submits[phase.name] = []
+        for phase2 in self.phases + self.user_phases:
+            if not phase2.run_after == None:
+                next_submits[phase2.run_after].append(phase2.name)
+                phase2.called_from = phase2.run_after
+        for phase3 in self.phases + self.user_phases:
+            phase3.next_submit = next_submits[phase3.name]
+    
+        for phase4 in self.phases + self.user_phases:
+            calling_cluster = phase4.run_after
+    #
+            if calling_cluster == self.last_task_in_queue:
+                self.last_task_in_queue = phase4.name
+    #
+            called_cluster = phase4.run_before
+            set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name)
+            if called_cluster == self.first_task_in_queue:
+                self.first_task_in_queue = phase4.name
+            if phase4.cluster == None:
+                phase4.cluster = phase4.name
+    #
+        first_cluster_name = self.first_task_in_queue
+        last_cluster_name = self.last_task_in_queue
+    #
+        value = get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit")
+        if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
+            set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name)
+        if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"):
+            set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name)
+    #
+        return self
+
+    def complete_clusters(config):
+        # all that are within a next_submit list are in a cluster if:
+        # run concurrently
+        # have the same cluster entry.
+        """
+        Rearanges the subjobs to their subjobs_clusters ???
+    
+        Parameters
+        ----------
+            self : Workflow object
+            config : dict
+    
+        Returns
+        -------
+            subjob_clusters : dict
+        """
+        # sort into dict subjob_clusters
+        subjob_clusters = {}
+    
+        for phase in self.phases + self.user_phases:
+            # Erstellt ein leeres dict im dict subjob_clusters
+            if not phase.cluster in subjob_clusters:
+                subjob_clusters[phase.cluster] = {}
+    
+            # Create empty list for each subjob_cluster
+            if not "subjobs" in subjob_clusters[phase.cluster]:
+                subjob_clusters[phase.cluster]["subjobs"] = []
+    
+            # Append subjobs to list.
+            subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
+    
+        # Then, complete the resource information per cluster
+        # determine whether a cluster is to be submitted to a batch system
+        for subjob_cluster in subjob_clusters:
+            nproc_sum = nproc_max = 0
+            attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
+            for attrib in attributes:
+                temp_list = []
+                for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
+                    if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list:
+                        subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib)
+                    else:
+                        print("Missmatch in attributes")
+                        sys.exit(-1)
+                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
+                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
+    
+    #        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
+    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
+    #        elif subjob_clusters[subjob_cluster].get("script", False):
+    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
+    #
+            if not "run_on_queue" in subjob_clusters[subjob_cluster]:
+                print(f"Information on target queue is missing in cluster {subjob_cluster}.")
+                sys.exit(-1)
+    #
+    # TODO: Check in nproc is calculated correctly
+            if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
+                nproc = nproc_sum
+            else:
+                nproc = nproc_max
+            subjob_clusters[subjob_cluster]["nproc"] = nproc
+        return subjob_clusters
+
+    def prepend_newrun_job(config, subjob_clusters):
+        """
+        Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
+        and do not follow a 'SimulationSetup' subjob_clusters.
+        E.g. if two user workflow are the last two subjob_clusters ???
+        Any other example cases when this is the case?
+    
+        Parameters
+        ----------
+            self : Workflow object
+            config : dict
+            subjob_clusters : dict
+    
+        Returns
+        -------
+            self : Workflow object
+            subjob_clusters
+        """
+        first_cluster_name = self.first_task_in_queue
+        first_cluster = subjob_clusters[first_cluster_name]
+        #esm_parser.pprint_config(first_cluster)
+    
+        if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup":
+    
+            last_cluster_name = self.last_task_in_queue
+            last_cluster = subjob_clusters[last_cluster_name]
+    
+            new_first_cluster_name = "newrun"
+            new_first_cluster = {
+                "newrun": {
+                    "called_from": last_cluster_name,
+                    "run_before": first_cluster_name,
+                    "next_submit": [first_cluster_name],
+                    "subjobs": ["newrun_general"],
+                    "batch_or_shell": "SimulationSetup",
+                }
+            }
+    
+            last_cluster["next_submit"].append("newrun")
+            last_cluster["next_submit"].remove(first_cluster_name)
+    
+            first_cluster["called_from"] = "newrun"
+    
+            self.first_task_in_queue = "newrun"
+    
+            new_subjob = {
+                "newrun_general": {
+                    "nproc": 1,
+                    "called_from": last_cluster_name,
+                    "run_before": first_cluster_name,
+                    "next_submit": [first_cluster_name],
+                    "subjob_cluster": "newrun",
+                }
+            }
+    
+            subjob_clusters.update(new_first_cluster)
+
+# TODO: add new phase to workflow???
+        #gw_config["subjobs"].update(new_subjob)
+
+        return [self, subjob_clusters]
+
+
     def skip_cluster(self, config):
         """
         Checks if a phase/cluster can be skipped.
@@ -158,35 +451,37 @@ def skip_cluster(self, config):
 
 class WorkflowPhase:
     """A workflow phase class."""
-    name = None
-    nproc = 1
-    run_before = None
-    run_after = None
-    submit_to_batch_system = True
-    run_on_queue = None
-    cluster = None
-    next_submit = []
-    called_from = None
-    batch_or_shell = "SimulationSetup"
-    order_in_cluster = "sequential"
-    run_only = None
-    skip_chunk_number = None
-    skip_run_number = None
 
     def __init__(self, phase_name):
+        self.name = None
+        self.nproc = 1
+        self.run_before = None
+        self.run_after = None
+        self.submit_to_batch_system = True
+        self.run_on_queue = None
+        self.cluster = None
+        self.next_submit = []
+        self.called_from = None
+        self.batch_or_shell = "SimulationSetup"
+        self.order_in_cluster = "sequential"
+        self.run_only = None
+        self.skip_chunk_number = None
+        self.skip_run_number = None
         self.name = phase_name
 
 class UserWorkflowPhase(WorkflowPhase):
     """A user workflow phase class."""
-    script = None
-    script_dir = None
-    call_function = None
-    env_preparation = None
 
     def __init__(self, phase_name):
-        self.name = phase_name
-        batch_or_shell = "batch"
-        submit_to_batch_system = False
+
+        WorkflowPhase.__init__(self, phase_name)
+
+        self.script = None
+        self.script_dir = None
+        self.call_function = None
+        self.env_preparation = None
+        self.batch_or_shell = "shell"
+        self.submit_to_batch_system = False
 
 def assemble_workflow(config):
     from . import Workflow
@@ -225,18 +520,18 @@ def assemble_workflow(config):
         # instead all workflow phases are defined in different configs and/or runscripts?
         # TODO: Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml?
     # 2. Initialize default workflow phases
-    workflow = init_default_workflow(workflow, config)
+    workflow = workflow.init_default_workflow(config)
     # 3. Read in workflows from runscript and config files
-    workflow = collect_all_user_workflows(workflow, config)
+    workflow = workflow.collect_all_user_workflows(config)
 
     #config = collect_all_workflow_information(config)
 
 # Why do I need to do the following function call?
     # 4. Order user workflows into default workflow wrt. workflow attributs.
-    workflow = order_clusters(workflow, config)
+    workflow = workflow.order_clusters(config)
 
-    subjob_clusters = complete_clusters(workflow, config)
-    subjob_clusters = prepend_newrun_job(config)
+    subjob_clusters = workflow.complete_clusters(config)
+    subjob_clusters = workflow.prepend_newrun_job(config)
     # 5. write the workflow to config
     config = workflow.write_to_config(config)
     breakpoint()
@@ -257,82 +552,6 @@ def write_subjob_clusters_to_config(config, subjob_clusters):
     config["general"]["subjob_clusters"] = subjob_clusters
     return config
 
-def display_nicely(config):
-    """
-    Pretty prints the workflow configuration assembled in config["general"].
-
-    Parameters
-    ----------
-        config : dict
-
-    Returns
-    -------
-        config : dict
-    """
-    esm_parser.pprint_config(config["general"]["workflow"])
-    return config
-
-
-def prepend_newrun_job(workflow, config, subjob_clusters):
-    """
-    Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
-    and do not follow a 'SimulationSetup' subjob_clusters.
-    E.g. if two user workflow are the last two subjob_clusters ???
-    Any other example cases when this is the case?
-
-    Parameters
-    ----------
-        config : dict
-
-    Returns
-    -------
-        workflow
-        subjob_clusters
-    """
-    first_cluster_name = workflow.first_task_in_queue
-    first_cluster = subjob_clusters[first_cluster_name]
-    #esm_parser.pprint_config(first_cluster)
-
-    if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup":
-
-        last_cluster_name = workflow.last_task_in_queue
-        last_cluster = subjob_clusters[last_cluster_name]
-
-        new_first_cluster_name = "newrun"
-        new_first_cluster = {
-            "newrun": {
-                "called_from": last_cluster_name,
-                "run_before": first_cluster_name,
-                "next_submit": [first_cluster_name],
-                "subjobs": ["newrun_general"],
-                "batch_or_shell": "SimulationSetup",
-            }
-        }
-
-        last_cluster["next_submit"].append("newrun")
-        last_cluster["next_submit"].remove(first_cluster_name)
-
-        first_cluster["called_from"] = "newrun"
-
-        workflow.first_task_in_queue = "newrun"
-
-        new_subjob = {
-            "newrun_general": {
-                "nproc": 1,
-                "called_from": last_cluster_name,
-                "run_before": first_cluster_name,
-                "next_submit": [first_cluster_name],
-                "subjob_cluster": "newrun",
-            }
-        }
-
-        subjob_clusters.update(new_first_cluster)
-
-# TODO: add new phase to workflow???
-        #gw_config["subjobs"].update(new_subjob)
-
-    return [workflow, subjob_clusters]
-
 def set_phase_attrib(workflow_phases, phase_name, attrib, value):
     for phase in workflow_phases:
         if phase.name == phase_name:
@@ -347,146 +566,6 @@ def get_phase_attrib(workflow_phases, phase_name, attrib):
             value = getattr(phase, attrib)
     return value
 
-def order_clusters(workflow, config):
-    """
-    Put the subjob_clusters in order ???
-
-    Parameters
-    ----------
-        config : dict
-
-    Returns
-    -------
-        workflow
-    """
-    independent = workflow.check_user_workflow_dependency()
-    unknown_phases = workflow.check_unknown_phases()
-
-    if unknown_phases:
-        esm_parser.user_error("ERROR", "Undefined subjob/phase.")
-
-    for user_phase in workflow.user_phases:
-# TODO: Check if run_after or run_before is set for each user phase
-        if not user_phase.run_before and not user_phase.run_after:
-            esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before")
-# TODO: Check if not both run_after and run_before are set at the same time for each user phase
-        if user_phase.run_before and user_phase.run_after:
-            esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before")
-
-# TODO: Correct for "last_task_in_queue" if necessary
-    # Collect all next_run_triggered_by entrie
-    next_triggered = []
-    run_after = []
-    for model in config:
-        if "workflow" in config[model]:
-            if "next_run_triggered_by" in config[model]["workflow"]:
-                next_triggered.append(config[model]["workflow"]["next_run_triggered_by"])
-    next_triggered = list(filter((workflow.next_run_triggered_by).__ne__, next_triggered))
-    if len(next_triggered) > 1:
-        esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.")
-    elif len(next_triggered) == 1:
-        workflow.next_run_triggered_by = next_triggered[0]
-    #else: let default
-
-# Fill up "next_submit" list
-    next_submits = {}
-    for phase in workflow.phases + workflow.user_phases:
-        next_submits[phase.name] = []
-    for phase2 in workflow.phases + workflow.user_phases:
-        if not phase2.run_after == None:
-            next_submits[phase2.run_after].append(phase2.name)
-            phase2.called_from = phase2.run_after
-    for phase3 in workflow.phases + workflow.user_phases:
-        phase3.next_submit = next_submits[phase3.name]
-
-    for phase4 in workflow.phases + workflow.user_phases:
-        calling_cluster = phase4.run_after
-#
-        if calling_cluster == workflow.last_task_in_queue:
-            workflow.last_task_in_queue = phase4.name
-#
-        called_cluster = phase4.run_before
-        set_phase_attrib(workflow.phases+workflow.user_phases, called_cluster, "called_from", phase4.name)
-        if called_cluster == workflow.first_task_in_queue:
-            workflow.first_task_in_queue = phase4.name
-        if phase4.cluster == None:
-            phase4.cluster = phase4.name
-#
-    first_cluster_name = workflow.first_task_in_queue
-    last_cluster_name = workflow.last_task_in_queue
-#
-    value = get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit")
-    if not first_cluster_name in get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit"):
-        set_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit", first_cluster_name)
-    if not last_cluster_name == get_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from"):
-        set_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from", last_cluster_name)
-#
-    return workflow
-
-
-def complete_clusters(workflow, config):
-    # all that are within a next_submit list are in a cluster if:
-    # run concurrently
-    # have the same cluster entry.
-    """
-    Rearanges the subjobs to their subjobs_clusters ???
-
-    Parameters
-    ----------
-        workflow
-        config : dict
-
-    Returns
-    -------
-        subjob_clusters : dict
-    """
-    # sort into dict subjob_clusters
-    subjob_clusters = {}
-
-    for phase in workflow.phases + workflow.user_phases:
-        # Erstellt ein leeres dict im dict subjob_clusters
-        if not phase.cluster in subjob_clusters:
-            subjob_clusters[phase.cluster] = {}
-
-        # Create empty list for each subjob_cluster
-        if not "subjobs" in subjob_clusters[phase.cluster]:
-            subjob_clusters[phase.cluster]["subjobs"] = []
-
-        # Append subjobs to list.
-        subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
-
-    # Then, complete the resource information per cluster
-    # determine whether a cluster is to be submitted to a batch system
-    for subjob_cluster in subjob_clusters:
-        nproc_sum = nproc_max = 0
-        attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
-        for attrib in attributes:
-            temp_list = []
-            for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
-                if not get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib) in temp_list:
-                    subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib)
-                else:
-                    print("Missmatch in attributes")
-                    sys.exit(-1)
-            nproc_sum += get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc")
-            nproc_max = max(get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc"), nproc_max)
-
-#        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
-#            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
-#        elif subjob_clusters[subjob_cluster].get("script", False):
-#            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
-#
-        if not "run_on_queue" in subjob_clusters[subjob_cluster]:
-            print(f"Information on target queue is missing in cluster {subjob_cluster}.")
-            sys.exit(-1)
-#
-# TODO: Check in nproc is calculated correctly
-        if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
-            nproc = nproc_sum
-        else:
-            nproc = nproc_max
-        subjob_clusters[subjob_cluster]["nproc"] = nproc
-    return subjob_clusters
 
 def calc_number_of_tasks(config):
     """
@@ -508,92 +587,29 @@ def calc_number_of_tasks(config):
                     tasks += config[model]["nprocar"] * config[model]["nprocbr"]
     return tasks
 
-def init_default_workflow(default_workflow, config):
-    """
-    Add workflow for precompute, compute, and tidy phases
-    etc information already here!
 
-    Parameters
-    ----------
-        default_workflow
-            workflow object
-        config : dict
 
-    Returns
-    -------
-        default_workflow
-    """
 
-    # TODO: make a method of class Workflow
-
-    # For testing only, set in some yaml config
-    workflow_phases = default_workflow.phases
-
-    # Calculating the number of tasks for each component/model
-    # needed for phase compute
-    tasks = calc_number_of_tasks(config)
-    # Create default workflow phase objects:
-    default_workflow.phases = []
-    for ind, phase in enumerate(workflow_phases):
-        default_workflow.phases.append(WorkflowPhase(phase))
-
-    for ind, phase in enumerate(default_workflow.phases):
-        if ind < default_workflow.num_phases - 1:
-            phase.run_before = default_workflow.phases[ind+1].name
-        else:
-            phase.run_after = default_workflow.phases[ind-1].name
-        # TODO: this needs to be set somewhere else, or different.
-        phase.cluster = phase.name
-        if phase.name == "compute":
-            phase.nproc = tasks
-            phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True)
-            phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
-
-    default_workflow.first_task_in_queue = default_workflow.phases[0].name      # prepcompute
-    default_workflow.last_task_in_queue = default_workflow.phases[-1].name      # tidy
-    # next_run_triggered_by only used to set last_task_in_queue
-    # TODO: why not set last_task_in_queue directly?
-    default_workflow.next_run_triggered_by = default_workflow.phases[-1].name   # tidy
-
-    return default_workflow
-
-def collect_all_user_workflows(user_workflow,config):
-    """
-    Collect all workflows set by config files.
-    """
-    user_workflow_phases = []
-    user_workflow_phases_names = []
-    for model in config:
-        if "workflow" in config[model]:
-            w_config = config[model]["workflow"]
-            if "subjobs" in w_config:
-                # copies component workflow config to new variable ref_config
-                ref_config = copy.deepcopy(w_config)
-                for subjob in list(copy.deepcopy(w_config["subjobs"])):
-                    # create a new phase object for subjob
-                    # new_phase_name = subjob + "_" + model
-                    # each subjob needs to have an unique name
-                    new_phase_name = subjob
-                    new_phase = UserWorkflowPhase(new_phase_name)
-                    if not new_phase_name in user_workflow_phases_names:
-                        user_workflow_phases_names.append(new_phase_name)
-                        # set attributes of user_workflow phases
-                        for key, value in w_config["subjobs"][subjob].items():
-                            new_phase.__setattr__(key, value)
-                        user_workflow_phases.append(new_phase)
-                    else:
-                        esm_parser.user_error("ERROR", "Two subjobs of the same name.")
-
-    user_workflow.user_phases = user_workflow_phases
-    return user_workflow
 
 
 
+################### Maybe outdated routines ######################
 
+def display_nicely(config):
+    """
+    Pretty prints the workflow configuration assembled in config["general"].
 
+    Parameters
+    ----------
+        config : dict
 
+    Returns
+    -------
+        config : dict
+    """
+    esm_parser.pprint_config(config["general"]["workflow"])
+    return config
 
-################### Maybe outdated routines ######################
 
 def collect_all_workflow_information(config):
     """
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 855228146..d13871db6 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -68,26 +68,26 @@ def test_config():
     return config
 
 def test_num_phases(test_workflow_object, test_config):
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     assert test_workflow_object.num_phases == 3
 
 def test_check_user_workflow_dependency(test_workflow_object, test_config):
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
     independent = test_workflow_object.check_user_workflow_dependency()
     assert independent
 
 def test_check_user_workflow_dependency_2(test_workflow_object, test_config):
     test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs'
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
     independent = test_workflow_object.check_user_workflow_dependency()
     assert not independent
 
 def test_check_unknown_phases(test_workflow_object, test_config):
     test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob'
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
     unknown_phases = test_workflow_object.check_unknown_phases()
     assert unknown_phases
 
@@ -105,40 +105,40 @@ def test_order_clusters(test_workflow_object, test_config):
 #    test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow'
 #    test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
     #test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs'
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
-    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+    test_workflow_object = test_workflow_object.order_clusters(test_config)
     pytest.fail("something wrong")
 
 def test_complete_clusters(test_workflow_object, test_config):
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
-    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
-    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    subjob_clusters = test_workflow_object.complete_clusters(test_config)
     pytest.fail("something wrong")
 
 def test_prepend_newrun_job(test_workflow_object, test_config):
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
-    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
-    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    subjob_clusters = test_workflow_object.complete_clusters(test_config)
     [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
     pytest.fail("something wrong")
 
 def test_write_to_config(test_workflow_object, test_config):
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
-    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
-    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    subjob_clusters = test_workflow_object.complete_clusters(test_config)
     [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
     config = test_workflow_object.write_to_config(test_config)
     pytest.fail("something wrong")
 
 def test_write_subjob_clusters_to_config(test_workflow_object, test_config):
-    test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config)
-    test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config)
-    test_workflow_object = workflow.order_clusters(test_workflow_object, test_config)
-    subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config)
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    subjob_clusters = test_workflow_object.complete_clusters(test_config)
     [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
     test_config = workflow.write_subjob_clusters_to_config(test_config, subjob_clusters)
     test_config = test_workflow_object.write_to_config(test_config)

From b8d1db041ad73d82121ec054b257e33381add3ca Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 27 Oct 2023 18:07:20 +0200
Subject: [PATCH 18/98] Further developments until order_cluster.

---
 src/esm_runscripts/workflow.py | 256 ++++++++++++++++++++++-----------
 1 file changed, 176 insertions(+), 80 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 3644d3503..d35b2e8c3 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,7 +1,7 @@
-import sys, copy, os
+import sys, copy
 import esm_parser
 
-#import pdb
+import pdb
 
 class Workflow:
     """A workflow class."""
@@ -23,10 +23,9 @@ def __init__(self, phases, always_run_with=[]):
         """
         self.default_phases = []
         self.user_phases = []
-        self.always_run_with = []
-        self.first_task_in_queue = ""
-        self.last_task_in_queue = ""
-        self.next_run_triggered_by = ""
+        self.first_task_in_queue = []
+        self.last_task_in_queue = []
+        self.next_run_triggered_by = []
         # TODO: Call here the phase object ???
         self.phases = phases
         self.always_run_with = always_run_with
@@ -46,7 +45,7 @@ def get_phases_attribs_list(self, phase_type, attrib):
         ----------
             self: class Workflow
             phase_type: str (default or user)
-            attribute: str
+            attrib: str
 
         Returns
         -------
@@ -63,19 +62,19 @@ def init_default_workflow(self, config):
         """
         Add workflow for precompute, compute, and tidy phases
         etc information already here!
-    
+
         Parameters
         ----------
             self : Workflow object
             config : dict
-    
+
         Returns
         -------
             self : Workflow object
         """
-    
+
         workflow_phases = self.phases
-    
+
         # Calculating the number of tasks for each component/model
         # needed for phase compute
         tasks = calc_number_of_tasks(config)
@@ -84,7 +83,7 @@ def init_default_workflow(self, config):
         self.phases = []
         for ind, phase in enumerate(workflow_phases):
             self.phases.append(WorkflowPhase(phase))
-    
+
         for ind, phase in enumerate(self.phases):
             if ind < self.num_phases - 1:
                 # Set run_before attrib of all phases (except last on) to the next phase name
@@ -99,43 +98,92 @@ def init_default_workflow(self, config):
                 phase.nproc = tasks
                 phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True)
                 phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
-    
+
         # Set default workflow values
-        self.first_task_in_queue = self.phases[0].name      # prepcompute
-        self.last_task_in_queue = self.phases[-1].name      # tidy
+        set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name)   # prepcompute
+        set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name)   # tidy
         # next_run_triggered_by only used to set last_task_in_queue
         # TODO: why not set last_task_in_queue directly?
-        self.next_run_triggered_by = self.phases[-1].name   # tidy
-    
+        set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name)   # tidy
+
         return self
 
+    def check_if_keyword_is_valid(self, keyword):
+        """
+        Checks if the key given for a user workflow is valie
+
+        Parameters
+        ----------
+            keyword : str
+
+        Returns
+        -------
+            true or false
+        """
+
+        if hasattr(self, keyword):
+            return True
+        else:
+            return False
+
     def collect_all_user_workflows(self,config):
         """
         Collect all workflows set by config files.
+
+        Parameters
+        ----------
+            self : Workflow object
+            config : dict
+
+        Returns
+        -------
+            self : Workflow object
         """
+
         user_workflow_phases = []
         user_workflow_phases_names = []
         for model in config:
             if "workflow" in config[model]:
                 w_config = config[model]["workflow"]
-                if "subjobs" in w_config:
+                #if "subjobs" in w_config:
+                if "phases" in w_config:
                     # copies component workflow config to new variable ref_config
                     ref_config = copy.deepcopy(w_config)
-                    for subjob in list(copy.deepcopy(w_config["subjobs"])):
-                        # create a new phase object for subjob
-                        # new_phase_name = subjob + "_" + model
-                        # each subjob needs to have an unique name
-                        new_phase_name = subjob
+                    # Set attributes of workflow
+                    # This will be overwritten by all user defined workflows???
+                    # Collect them in a list???
+                    # check if valid workflow keywords
+                    for key, value in w_config.items():
+                        if self.check_if_keyword_is_valid(key):
+                            # set here only workflow attributes
+                            if not key == "phases":
+                                set_workflow_attrib(self, key, value)
+                        else:
+                            esm_parser.user_error("ERROR", f"``{key}`` is not a valid keyword of a workflow.")
+                    #for subjob in list(copy.deepcopy(w_config["subjobs"])):
+                    for phase in list(copy.deepcopy(w_config["phases"])):
+                        new_phase_name = phase
+                        # create a new user phase object for ``phase``
                         new_phase = UserWorkflowPhase(new_phase_name)
+                        if phase in self.get_phases_attribs_list("default", "name"):
+                            esm_parser.user_error("ERROR", f"The user phase ``{new_phase_name}`` has the same name as a default workflow phase. This is not allowed.")
+                        # each subjob needs to have an unique name
+                        # check if the name of the new user phase does not already exist
                         if not new_phase_name in user_workflow_phases_names:
+                            # and append it to the list of user phases of the workflow
                             user_workflow_phases_names.append(new_phase_name)
                             # set attributes of user_workflow phases
-                            for key, value in w_config["subjobs"][subjob].items():
-                                new_phase.__setattr__(key, value)
+                            # check if valid workflow phase keywords
+                            for key, value in w_config["phases"][phase].items():
+                                if new_phase.check_if_keyword_is_valid(key):
+                                    set_phase_attrib([new_phase], new_phase_name, key, value)
+#                                    new_phase.__setattr__(key, value)
+                                else:
+                                    esm_parser.user_error("ERROR", f"``{key}`` of workflow phase ``{new_phase_name}`` is not a valid keyword of a workflow phase.")
                             user_workflow_phases.append(new_phase)
                         else:
-                            esm_parser.user_error("ERROR", "Two subjobs of the same name.")
-    
+                            esm_parser.user_error("ERROR", f"Two workflow phases have the same name {new_phase_name}.")
+
         self.user_phases = user_workflow_phases
         return self
 
@@ -184,9 +232,17 @@ def check_user_workflow_dependency(self):
 
     def check_unknown_phases(self):
         """
-        Check if any user phase addresses an unknown workflow phase.
+        Check if any user phase attributes points to any unknown workflow phase.
+
+        Parameters
+        ----------
+            self : Workflow object
+
+        Returns
+        -------
+            unknown_phases : set
         """
-        unknown_user_phase = True
+        unknown_phases = []
         phases_names = self.get_phases_attribs_list('default','name')
         user_phases_names = self.get_phases_attribs_list('user','name')
         run_after = self.get_phases_attribs_list('user','run_after')
@@ -196,62 +252,77 @@ def check_unknown_phases(self):
         # See also https://docs.python.org/3/library/functions.html#filter
         run_after_list = list(filter(None, run_after))
         run_before_list = list(filter(None, run_before))
+        # Get all phases that are defined as run_after or run_before, but do not exist as user or default phase.
+        # If unknown_phase is not empty, there is a user_phase that defines run_after or run_before for a not existing phase.
+        unknown_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names)))
 
-        unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names)))
-        return unknown_user_phases
+        return unknown_phases
 
     def order_clusters(self, config):
         """
-        Put the subjob_clusters in order ???
-    
+        Put the subjob_clusters in order.
+
         Parameters
         ----------
             config : dict
-    
+
         Returns
         -------
             self : Workflow object
         """
+        # Check if user phases are independent from each other
+        # TODO: What if not independent?
         independent = self.check_user_workflow_dependency()
+        # Check if there are unknown phases, if yes, will give error exception
         unknown_phases = self.check_unknown_phases()
-    
         if unknown_phases:
-            esm_parser.user_error("ERROR", "Undefined subjob/phase.")
-    
+            unknowns = ', '.join(unknown_phases)
+            esm_parser.user_error("ERROR", f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` or ``run_after``.")
+
         for user_phase in self.user_phases:
-    # TODO: Check if run_after or run_before is set for each user phase
+            # Check if run_after or run_before is set for each user phase
             if not user_phase.run_before and not user_phase.run_after:
-                esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before")
-    # TODO: Check if not both run_after and run_before are set at the same time for each user phase
+                esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.")
+            # Check if not both run_after and run_before are set at the same time for each user phase
             if user_phase.run_before and user_phase.run_after:
-                esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before")
-    
-    # TODO: Correct for "last_task_in_queue" if necessary
-        # Collect all next_run_triggered_by entrie
-        next_triggered = []
+                esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.")
+
+        # Correct for ``last_task_in_queue`` if necessary
+        # Collect all next_run_triggered_by entries
+        next_triggered = self.next_run_triggered_by
         run_after = []
-        for model in config:
-            if "workflow" in config[model]:
-                if "next_run_triggered_by" in config[model]["workflow"]:
-                    next_triggered.append(config[model]["workflow"]["next_run_triggered_by"])
-        next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
-        if len(next_triggered) > 1:
-            esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.")
-        elif len(next_triggered) == 1:
-            self.next_run_triggered_by = next_triggered[0]
-        #else: let default
-    
-    # Fill up "next_submit" list
+        #for model in config:
+        #    if "workflow" in config[model]:
+        #        if "next_run_triggered_by" in config[model]["workflow"]:
+        #            next_triggered.append(config[model]["workflow"]["next_run_triggered_by"])
+
+        # How needs the next_triggered_by be set??? Which to choose if several workflows are defined?
+
+        #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
+        #if len(next_triggered) > 1:
+        #    esm_parser.user_error("ERROR", "Mismatch found setting next_run_triggered_by for workflow.")
+        #elif len(next_triggered) == 1:
+        #    self.next_run_triggered_by = next_triggered[0]
+        ##else: let default
+
+        # Set "next_submit" and "called_from"
+        # Create a dict of all phases with empty lists
         next_submits = {}
         for phase in self.phases + self.user_phases:
             next_submits[phase.name] = []
+
         for phase2 in self.phases + self.user_phases:
             if not phase2.run_after == None:
                 next_submits[phase2.run_after].append(phase2.name)
                 phase2.called_from = phase2.run_after
+
         for phase3 in self.phases + self.user_phases:
             phase3.next_submit = next_submits[phase3.name]
-    
+
+        print(self.last_task_in_queue)
+        # ich bin hier
+        breakpoint()
+
         for phase4 in self.phases + self.user_phases:
             calling_cluster = phase4.run_after
     #
@@ -276,37 +347,37 @@ def order_clusters(self, config):
     #
         return self
 
-    def complete_clusters(config):
+    def complete_clusters(self, config):
         # all that are within a next_submit list are in a cluster if:
         # run concurrently
         # have the same cluster entry.
         """
         Rearanges the subjobs to their subjobs_clusters ???
-    
+
         Parameters
         ----------
             self : Workflow object
             config : dict
-    
+
         Returns
         -------
             subjob_clusters : dict
         """
         # sort into dict subjob_clusters
         subjob_clusters = {}
-    
+
         for phase in self.phases + self.user_phases:
             # Erstellt ein leeres dict im dict subjob_clusters
             if not phase.cluster in subjob_clusters:
                 subjob_clusters[phase.cluster] = {}
-    
+
             # Create empty list for each subjob_cluster
             if not "subjobs" in subjob_clusters[phase.cluster]:
                 subjob_clusters[phase.cluster]["subjobs"] = []
-    
+
             # Append subjobs to list.
             subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
-    
+
         # Then, complete the resource information per cluster
         # determine whether a cluster is to be submitted to a batch system
         for subjob_cluster in subjob_clusters:
@@ -322,7 +393,7 @@ def complete_clusters(config):
                         sys.exit(-1)
                 nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
                 nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
-    
+
     #        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
     #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
     #        elif subjob_clusters[subjob_cluster].get("script", False):
@@ -340,19 +411,19 @@ def complete_clusters(config):
             subjob_clusters[subjob_cluster]["nproc"] = nproc
         return subjob_clusters
 
-    def prepend_newrun_job(config, subjob_clusters):
+    def prepend_newrun_job(self, config, subjob_clusters):
         """
         Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
         and do not follow a 'SimulationSetup' subjob_clusters.
         E.g. if two user workflow are the last two subjob_clusters ???
         Any other example cases when this is the case?
-    
+
         Parameters
         ----------
             self : Workflow object
             config : dict
             subjob_clusters : dict
-    
+
         Returns
         -------
             self : Workflow object
@@ -361,12 +432,12 @@ def prepend_newrun_job(config, subjob_clusters):
         first_cluster_name = self.first_task_in_queue
         first_cluster = subjob_clusters[first_cluster_name]
         #esm_parser.pprint_config(first_cluster)
-    
+
         if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup":
-    
+
             last_cluster_name = self.last_task_in_queue
             last_cluster = subjob_clusters[last_cluster_name]
-    
+
             new_first_cluster_name = "newrun"
             new_first_cluster = {
                 "newrun": {
@@ -377,14 +448,14 @@ def prepend_newrun_job(config, subjob_clusters):
                     "batch_or_shell": "SimulationSetup",
                 }
             }
-    
+
             last_cluster["next_submit"].append("newrun")
             last_cluster["next_submit"].remove(first_cluster_name)
-    
+
             first_cluster["called_from"] = "newrun"
-    
+
             self.first_task_in_queue = "newrun"
-    
+
             new_subjob = {
                 "newrun_general": {
                     "nproc": 1,
@@ -394,7 +465,7 @@ def prepend_newrun_job(config, subjob_clusters):
                     "subjob_cluster": "newrun",
                 }
             }
-    
+
             subjob_clusters.update(new_first_cluster)
 
 # TODO: add new phase to workflow???
@@ -483,6 +554,25 @@ def __init__(self, phase_name):
         self.batch_or_shell = "shell"
         self.submit_to_batch_system = False
 
+    def check_if_keyword_is_valid(self, keyword):
+        """
+        Checks if the key given for a user workflow is valie
+
+        Parameters
+        ----------
+            keyword : str
+
+        Returns
+        -------
+            true or false
+        """
+
+        if hasattr(self, keyword):
+            return True
+        else:
+            return False
+
+
 def assemble_workflow(config):
     from . import Workflow
     """
@@ -530,11 +620,11 @@ def assemble_workflow(config):
     # 4. Order user workflows into default workflow wrt. workflow attributs.
     workflow = workflow.order_clusters(config)
 
+    breakpoint()
     subjob_clusters = workflow.complete_clusters(config)
     subjob_clusters = workflow.prepend_newrun_job(config)
     # 5. write the workflow to config
     config = workflow.write_to_config(config)
-    breakpoint()
     # 6. Remove old worklow from config
 
     # Set "jobtype" for the first task???
@@ -552,6 +642,12 @@ def write_subjob_clusters_to_config(config, subjob_clusters):
     config["general"]["subjob_clusters"] = subjob_clusters
     return config
 
+def set_workflow_attrib(workflow, attrib, value):
+    if type(getattr(workflow, attrib)).__name__ == "list":
+        workflow.__dict__[attrib].append(value)
+    else:
+        workflow.__setattr__(attrib, value)
+
 def set_phase_attrib(workflow_phases, phase_name, attrib, value):
     for phase in workflow_phases:
         if phase.name == phase_name:
@@ -564,7 +660,7 @@ def get_phase_attrib(workflow_phases, phase_name, attrib):
     for phase in workflow_phases:
         if phase.name == phase_name:
             value = getattr(phase, attrib)
-    return value
+            return value
 
 
 def calc_number_of_tasks(config):
@@ -688,7 +784,7 @@ def collect_all_workflow_information(config):
             # checks if next_run:triggered_by is tidy or the one in user workflow, or empty?
             if "next_run_triggered_by" in w_config:
                 if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]:
-                    print(f"Mismatch found setting next_run_triggered_by for workflow.")
+                    print("Mismatch found setting next_run_triggered_by for workflow.")
                     sys.exit(-1)
                 else:
                     gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"]

From 2cd0da6151319cf4a3198ba6107266a7a3be926a Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 3 Nov 2023 11:42:38 +0100
Subject: [PATCH 19/98] Further changes for workflow manager.

---
 src/esm_runscripts/batch_system.py |  4 +-
 src/esm_runscripts/workflow.py     | 91 ++++++++++++++++++++++--------
 2 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/src/esm_runscripts/batch_system.py b/src/esm_runscripts/batch_system.py
index a8faec98e..4f8461168 100644
--- a/src/esm_runscripts/batch_system.py
+++ b/src/esm_runscripts/batch_system.py
@@ -374,6 +374,7 @@ def get_run_commands(config, subjob, batch_or_shell):  # here or in compute.py?
 
         commands = []
         if subjob.startswith("compute"):
+            # for batch jobs
             if config["general"].get("submit_to_batch_system", True):
                 batch_system = config["computer"]
                 if "execution_command" in batch_system:
@@ -384,6 +385,7 @@ def get_run_commands(config, subjob, batch_or_shell):  # here or in compute.py?
                     )
                     if config["general"].get("multi_srun"):
                         return self.bs.get_run_commands_multisrun(config, commands)
+            # for shell scrips
             else:
                 for model in config:
                     if model == "computer":
@@ -516,7 +518,7 @@ def write_simple_runscript(config, cluster, batch_or_shell="batch"):
             #    dummy = 0
             else:  # "normal" case
                 dummy = 0
-
+# was macht das hier? wo/wie wird submits_abother_job definiert?
             if submits_another_job(config, cluster):  # and batch_or_shell == "batch":
                 # -j ? is that used somewhere? I don't think so, replaced by workflow
                 #   " -j "+ config["general"]["jobtype"]
diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index d35b2e8c3..e20a84fc9 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,6 +1,8 @@
 import sys, copy
 import esm_parser
 
+from pprint import pprint
+
 import pdb
 
 class Workflow:
@@ -172,7 +174,7 @@ def collect_all_user_workflows(self,config):
                         if not new_phase_name in user_workflow_phases_names:
                             # and append it to the list of user phases of the workflow
                             user_workflow_phases_names.append(new_phase_name)
-                            # set attributes of user_workflow phases
+                            # set attributes of user_workflow phases from config settings
                             # check if valid workflow phase keywords
                             for key, value in w_config["phases"][phase].items():
                                 if new_phase.check_if_keyword_is_valid(key):
@@ -191,15 +193,37 @@ def write_to_config(self, config):
         """
         Write to config.
         """
+        cluster_att = []
+        for att in dir(self.phases[0]):
+            if(att[:2] != "__"):
+                cluster_att.append(att)
+        # 1. Delete unnecessary config workflow entries (e.g. in general)
+        if "workflow" in config["general"]:
+            del config["general"]["workflow"]
+
         # It is assumed here, that there are no workflows in config["general"]
         # or that these are removed after collect_...
         config["general"]["workflow"] = {}
         config["general"]["workflow"].update(self.__dict__)
+        # 3. Write clusters
+        config["general"]["workflow"]["subjob_clusters"] = {}
+        for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
+            config["general"]["workflow"]["subjob_clusters"][cluster] = {}
+            config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = []
+            for phase in self.phases + self.user_phases:
+                if phase.cluster == cluster:
+                    config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase.name)
+                    for att in cluster_att:
+                        config["general"]["workflow"]["subjob_clusters"][cluster][att] = getattr(phase, att)
+        # 2. Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
-        for phase in self.phases:
+        for phase in self.phases+self.user_phases:
             temp_dict = {phase.name: phase.__dict__}
             config["general"]["workflow"]["subjobs"].update(temp_dict)
 
+        # Todo: delete phases and user_phases
+        del config["general"]["workflow"]["phases"]
+        del config["general"]["workflow"]["user_phases"]
         return config
 
     def check_user_workflow_dependency(self):
@@ -306,6 +330,8 @@ def order_clusters(self, config):
         ##else: let default
 
         # Set "next_submit" and "called_from"
+        # "next_submit" which phase will be called next (run_after of the next phase)
+        # "called_from" name of previous phase, run_after of current phase
         # Create a dict of all phases with empty lists
         next_submits = {}
         for phase in self.phases + self.user_phases:
@@ -319,32 +345,51 @@ def order_clusters(self, config):
         for phase3 in self.phases + self.user_phases:
             phase3.next_submit = next_submits[phase3.name]
 
-        print(self.last_task_in_queue)
-        # ich bin hier
-        breakpoint()
+#        for phase6 in self.phases + self.user_phases:
+#            print(phase6.name, phase6.run_after, phase6.called_from, phase6.next_submit)
+
+
+# assign user phases to a cluster (tbd)
+        # - if all phases have the same run_after and run_before they can be in the cluster
+        # - in this cluster they will be run in parallel?
+
 
         for phase4 in self.phases + self.user_phases:
             calling_cluster = phase4.run_after
-    #
+
+    # set last_task_in_queue
             if calling_cluster == self.last_task_in_queue:
-                self.last_task_in_queue = phase4.name
-    #
+                self.last_task_in_queue.append(phase4.name)
+
             called_cluster = phase4.run_before
+#            print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}")
             set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name)
+
+    # set first_task_in_queue
             if called_cluster == self.first_task_in_queue:
-                self.first_task_in_queue = phase4.name
+                self.first_task_in_queue.append(phase4.name)
+
+    # set empty cluster entries to phase name
             if phase4.cluster == None:
                 phase4.cluster = phase4.name
-    #
-        first_cluster_name = self.first_task_in_queue
-        last_cluster_name = self.last_task_in_queue
-    #
-        value = get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit")
+
+# todo: check if num list > 1, is this possible ???
+        first_cluster_name = self.first_task_in_queue[0]
+        last_cluster_name = self.last_task_in_queue[0]
+
+        # if first_cluster_name is not next_submit of last_cluster_name
         if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
             set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name)
+        # if last_cluster_name is not called_from of first_cluster_name
         if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"):
             set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name)
-    #
+
+#        for i in range(len(self.phases)):
+#            pprint(self.phases[i].__dict__)
+#
+#        for i in range(len(self.user_phases)):
+#            pprint(self.user_phases[i].__dict__)
+
         return self
 
     def complete_clusters(self, config):
@@ -528,7 +573,7 @@ def __init__(self, phase_name):
         self.nproc = 1
         self.run_before = None
         self.run_after = None
-        self.submit_to_batch_system = True
+        self.submit_to_batch_system = False
         self.run_on_queue = None
         self.cluster = None
         self.next_submit = []
@@ -554,6 +599,7 @@ def __init__(self, phase_name):
         self.batch_or_shell = "shell"
         self.submit_to_batch_system = False
 
+
     def check_if_keyword_is_valid(self, keyword):
         """
         Checks if the key given for a user workflow is valie
@@ -613,16 +659,13 @@ def assemble_workflow(config):
     workflow = workflow.init_default_workflow(config)
     # 3. Read in workflows from runscript and config files
     workflow = workflow.collect_all_user_workflows(config)
-
     #config = collect_all_workflow_information(config)
-
-# Why do I need to do the following function call?
     # 4. Order user workflows into default workflow wrt. workflow attributs.
     workflow = workflow.order_clusters(config)
 
-    breakpoint()
-    subjob_clusters = workflow.complete_clusters(config)
-    subjob_clusters = workflow.prepend_newrun_job(config)
+# What is the next functions needed for?
+#    subjob_clusters = workflow.complete_clusters(config)
+#    subjob_clusters = workflow.prepend_newrun_job(config)
     # 5. write the workflow to config
     config = workflow.write_to_config(config)
     # 6. Remove old worklow from config
@@ -631,10 +674,10 @@ def assemble_workflow(config):
     if config["general"]["jobtype"] == "unknown":
         config["general"]["command_line_config"]["jobtype"] = config["general"][
             "workflow"
-        ]["first_task_in_queue"]
+        ]["first_task_in_queue"][0]
         config["general"]["jobtype"] = config["general"]["workflow"][
             "first_task_in_queue"
-        ]
+        ][0]            # todo: this needs to be a string, not a list
 
     return config
 

From 273175f4db21ef0844d1bee8e5b5256ad67ce313 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 10 Nov 2023 12:45:30 +0100
Subject: [PATCH 20/98] Added prepend_newrun, skip_cluster, fixed next_submit
 entries.

---
 src/esm_runscripts/workflow.py | 550 +++++++++++++++++----------------
 1 file changed, 280 insertions(+), 270 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index e20a84fc9..f94030a33 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -25,9 +25,9 @@ def __init__(self, phases, always_run_with=[]):
         """
         self.default_phases = []
         self.user_phases = []
-        self.first_task_in_queue = []
-        self.last_task_in_queue = []
-        self.next_run_triggered_by = []
+        self.first_task_in_queue = None
+        self.last_task_in_queue = None
+        self.next_run_triggered_by = None
         # TODO: Call here the phase object ???
         self.phases = phases
         self.always_run_with = always_run_with
@@ -39,6 +39,23 @@ def num_phases(self):
         """
         return len(self.phases)
 
+    def get_workflow_phase_by_name(self, phase_name):
+        """
+        Returns phase of phase_name
+
+        Arguments
+        ---------
+            self : class Workflow
+            phase_name : str (name of the phase to be returned
+
+        Returns
+        -------
+            phase : class phase or user_phase
+        """
+        for phase in self.phases + self.user_phases:
+            if phase.name == phase_name:
+                return phase
+
     def get_phases_attribs_list(self, phase_type, attrib):
         """
         Return the names of all phases as list.
@@ -90,14 +107,19 @@ def init_default_workflow(self, config):
             if ind < self.num_phases - 1:
                 # Set run_before attrib of all phases (except last on) to the next phase name
                 phase.run_before = self.phases[ind+1].name
+                phase.next_submit.append(self.phases[ind+1].name)
+                phase.run_after = self.phases[ind-1].name
             else:
                 # Set run_after attrib of last phase to previous phase name
+                phase.run_before = self.phases[0].name
+                phase.next_submit.append(self.phases[0].name)
                 phase.run_after = self.phases[ind-1].name
 
             # TODO: this needs to be set somewhere else, or different.
             phase.cluster = phase.name
             if phase.name == "compute":
                 phase.nproc = tasks
+                phase.batch_or_shell = 'batch'
                 phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True)
                 phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
 
@@ -308,9 +330,8 @@ def order_clusters(self, config):
             if not user_phase.run_before and not user_phase.run_after:
                 esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.")
             # Check if not both run_after and run_before are set at the same time for each user phase
-            if user_phase.run_before and user_phase.run_after:
-                esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.")
-
+#            if user_phase.run_before and user_phase.run_after:
+#                esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.")
         # Correct for ``last_task_in_queue`` if necessary
         # Collect all next_run_triggered_by entries
         next_triggered = self.next_run_triggered_by
@@ -345,10 +366,6 @@ def order_clusters(self, config):
         for phase3 in self.phases + self.user_phases:
             phase3.next_submit = next_submits[phase3.name]
 
-#        for phase6 in self.phases + self.user_phases:
-#            print(phase6.name, phase6.run_after, phase6.called_from, phase6.next_submit)
-
-
 # assign user phases to a cluster (tbd)
         # - if all phases have the same run_after and run_before they can be in the cluster
         # - in this cluster they will be run in parallel?
@@ -357,25 +374,28 @@ def order_clusters(self, config):
         for phase4 in self.phases + self.user_phases:
             calling_cluster = phase4.run_after
 
-    # set last_task_in_queue
-            if calling_cluster == self.last_task_in_queue:
-                self.last_task_in_queue.append(phase4.name)
-
-            called_cluster = phase4.run_before
-#            print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}")
-            set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name)
-
-    # set first_task_in_queue
-            if called_cluster == self.first_task_in_queue:
-                self.first_task_in_queue.append(phase4.name)
+# brauch ich das hier noch???
+#    # set last_task_in_queue
+#            if calling_cluster == self.last_task_in_queue:
+#                #self.last_task_in_queue.append(phase4.name)
+#                self.last_task_in_queue = phase4.name
+#
+#            called_cluster = phase4.run_before
+##            print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}")
+#            set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name)
+#
+#    # set first_task_in_queue
+#            if called_cluster == self.first_task_in_queue:
+##                self.first_task_in_queue.append(phase4.name)
+#                self.first_task_in_queue = phase4.name
 
     # set empty cluster entries to phase name
             if phase4.cluster == None:
                 phase4.cluster = phase4.name
 
 # todo: check if num list > 1, is this possible ???
-        first_cluster_name = self.first_task_in_queue[0]
-        last_cluster_name = self.last_task_in_queue[0]
+        first_cluster_name = self.first_task_in_queue
+        last_cluster_name = self.last_task_in_queue
 
         # if first_cluster_name is not next_submit of last_cluster_name
         if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
@@ -384,12 +404,6 @@ def order_clusters(self, config):
         if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"):
             set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name)
 
-#        for i in range(len(self.phases)):
-#            pprint(self.phases[i].__dict__)
-#
-#        for i in range(len(self.user_phases)):
-#            pprint(self.user_phases[i].__dict__)
-
         return self
 
     def complete_clusters(self, config):
@@ -456,114 +470,103 @@ def complete_clusters(self, config):
             subjob_clusters[subjob_cluster]["nproc"] = nproc
         return subjob_clusters
 
-    def prepend_newrun_job(self, config, subjob_clusters):
+    def prepend_newrun_job(self, config):
         """
+        - Creates a new cluster "newrun" if first_task_in_queue is not of
+          type 'SimulationSetup'
+        - Why is this needed? So that every first task is a SimulationSetup to init a config object???
+
         Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
-        and do not follow a 'SimulationSetup' subjob_clusters.
-        E.g. if two user workflow are the last two subjob_clusters ???
-        Any other example cases when this is the case?
+        and are not of type 'SimulationSetup'.
 
         Parameters
         ----------
             self : Workflow object
             config : dict
-            subjob_clusters : dict
 
         Returns
         -------
             self : Workflow object
-            subjob_clusters
         """
-        first_cluster_name = self.first_task_in_queue
-        first_cluster = subjob_clusters[first_cluster_name]
-        #esm_parser.pprint_config(first_cluster)
+        first_task_name = self.first_task_in_queue
+        first_phase = self.get_workflow_phase_by_name(first_task_name)
 
-        if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup":
+        if not get_phase_attrib(first_phase, first_phase.name, "batch_or_shell") == "SimulationSetup":
 
-            last_cluster_name = self.last_task_in_queue
-            last_cluster = subjob_clusters[last_cluster_name]
+            last_task_name = self.last_task_in_queue
+            last_phase = self.get_workflow_phase_by_name(last_task_name)
 
-            new_first_cluster_name = "newrun"
-            new_first_cluster = {
-                "newrun": {
-                    "called_from": last_cluster_name,
-                    "run_before": first_cluster_name,
-                    "next_submit": [first_cluster_name],
-                    "subjobs": ["newrun_general"],
-                    "batch_or_shell": "SimulationSetup",
-                }
-            }
+            new_first_phase_name = "newrun_general"
+            # Create new default phase object
+            new_first_phase = WorkflowPhase(new_first_phase_name)
+            set_phase_attrib(new_first_phase, new_first_phase_name, "called_from", last_task_name)
+            set_phase_attrib(new_first_phase, new_first_phase_name, "run_before", first_task_name)
+            set_phase_attrib(new_first_phase, new_first_phase_name, "next_submit", first_task_name)
+            set_phase_attrib(new_first_phase, new_first_phase_name, "cluster", "newrun")
+            set_phase_attrib(new_first_phase, new_first_phase_name, "batch_or_shell", "SimulationSetup")
+            set_phase_attrib(new_first_phase, new_first_phase_name, "nproc", 1)
 
-            last_cluster["next_submit"].append("newrun")
-            last_cluster["next_submit"].remove(first_cluster_name)
+            # reset last_task attributes
+            set_phase_attrib(last_phase, last_phase.name, "next_submit", new_first_phase_name)
+            last_phase.next_submit.remove(first_task_name)
 
-            first_cluster["called_from"] = "newrun"
+            # reset first_task attributes
+            first_phase.called_from = new_first_phase_name
 
-            self.first_task_in_queue = "newrun"
+            # reset workflow attributes
+            self.first_task_in_queue = new_first_phase_name
 
-            new_subjob = {
-                "newrun_general": {
-                    "nproc": 1,
-                    "called_from": last_cluster_name,
-                    "run_before": first_cluster_name,
-                    "next_submit": [first_cluster_name],
-                    "subjob_cluster": "newrun",
-                }
-            }
+            # Set new phase to beginning of default phase list
+            self.phases.insert(0, new_first_phase)
 
-            subjob_clusters.update(new_first_cluster)
+        return self
 
-# TODO: add new phase to workflow???
-        #gw_config["subjobs"].update(new_subjob)
 
-        return [self, subjob_clusters]
+def skip_cluster(cluster, config):
+    """
+    Checks if a phase/cluster can be skipped.
+    Needed keywords: run_only, skip_chunk_number
 
+    Parameters
+    ----------
+        self
+        config : dict
 
-    def skip_cluster(self, config):
-        """
-        Checks if a phase/cluster can be skipped.
-        Needed keywords: run_only, skip_chunk_number
+    Returns
+    -------
+        True or False
+    """
+    gw_config = config["general"]["workflow"]
+    clusterconf = gw_config["subjob_clusters"][cluster]
 
-        Parameters
-        ----------
-            self
-            config : dict
+    """
+    print(f"run_only {clusterconf.get('run_only', 'Error') }")
+    print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}")
+    print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}")
+    print(f"chunk_number {config['general'].get('chunk_number', -998)}")
+    print(f"run_number {config['general'].get('run_number', -998)}")
+    print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}")
+    print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}")
+    """
 
-        Returns
-        -------
-            True or False
-        """
-        #gw_config = config["general"]["workflow"]
-        #clusterconf = gw_config["subjob_clusters"][cluster]
-
-        #"""
-        #print(f"run_only {clusterconf.get('run_only', 'Error') }")
-        #print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}")
-        #print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}")
-        #print(f"chunk_number {config['general'].get('chunk_number', -998)}")
-        #print(f"run_number {config['general'].get('run_number', -998)}")
-        #print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}")
-        #print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}")
-        #"""
-
-        #if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[
-        #    "general"
-        #].get("last_run_in_chunk", False):
-        #    return True
-        #if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[
-        #    "general"
-        #].get("first_run_in_chunk", False):
-        #    return True
-        #if clusterconf.get("skip_chunk_number", -999) == config["general"].get(
-        #    "chunk_number", -998
-        #):
-        #    return True
-        #if clusterconf.get("skip_run_number", -999) == config["general"].get(
-        #    "run_number", -998
-        #):
-        #    return True
-
-        return False
+    if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[
+        "general"
+    ].get("last_run_in_chunk", False):
+        return True
+    if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[
+        "general"
+    ].get("first_run_in_chunk", False):
+        return True
+    if clusterconf.get("skip_chunk_number", -999) == config["general"].get(
+        "chunk_number", -998
+    ):
+        return True
+    if clusterconf.get("skip_run_number", -999) == config["general"].get(
+        "run_number", -998
+    ):
+        return True
+
+    return False
 
 class WorkflowPhase:
     """A workflow phase class."""
@@ -585,6 +588,7 @@ def __init__(self, phase_name):
         self.skip_run_number = None
         self.name = phase_name
 
+
 class UserWorkflowPhase(WorkflowPhase):
     """A user workflow phase class."""
 
@@ -663,21 +667,23 @@ def assemble_workflow(config):
     # 4. Order user workflows into default workflow wrt. workflow attributs.
     workflow = workflow.order_clusters(config)
 
-# What is the next functions needed for?
-#    subjob_clusters = workflow.complete_clusters(config)
-#    subjob_clusters = workflow.prepend_newrun_job(config)
-    # 5. write the workflow to config
+    # What is the next functions needed for?
+    # subjob_clusters = workflow.complete_clusters(config)
+
+    # 5. create new first phase of type SimulationSetup, if first_task_in_queue is user phase (type batch or shell)
+    workflow = workflow.prepend_newrun_job(config)
+    # 6. write the workflow to config
     config = workflow.write_to_config(config)
-    # 6. Remove old worklow from config
+    # 7. Remove old worklow from config
 
     # Set "jobtype" for the first task???
     if config["general"]["jobtype"] == "unknown":
         config["general"]["command_line_config"]["jobtype"] = config["general"][
             "workflow"
-        ]["first_task_in_queue"][0]
+        ]["first_task_in_queue"]
         config["general"]["jobtype"] = config["general"]["workflow"][
             "first_task_in_queue"
-        ][0]            # todo: this needs to be a string, not a list
+        ]
 
     return config
 
@@ -692,6 +698,8 @@ def set_workflow_attrib(workflow, attrib, value):
         workflow.__setattr__(attrib, value)
 
 def set_phase_attrib(workflow_phases, phase_name, attrib, value):
+    if not type(workflow_phases) is list:
+        workflow_phases = [workflow_phases]
     for phase in workflow_phases:
         if phase.name == phase_name:
             if type(getattr(phase, attrib)).__name__ == "list":
@@ -700,6 +708,8 @@ def set_phase_attrib(workflow_phases, phase_name, attrib, value):
                 phase.__setattr__(attrib, value)
 
 def get_phase_attrib(workflow_phases, phase_name, attrib):
+    if not type(workflow_phases) is list:
+        workflow_phases = [workflow_phases]
     for phase in workflow_phases:
         if phase.name == phase_name:
             value = getattr(phase, attrib)
@@ -734,155 +744,155 @@ def calc_number_of_tasks(config):
 
 ################### Maybe outdated routines ######################
 
-def display_nicely(config):
-    """
-    Pretty prints the workflow configuration assembled in config["general"].
-
-    Parameters
-    ----------
-        config : dict
-
-    Returns
-    -------
-        config : dict
-    """
-    esm_parser.pprint_config(config["general"]["workflow"])
-    return config
-
-
-def collect_all_workflow_information(config):
-    """
-    Collects all workflow information for each component entry in config
-    (can be a model/component or a new entry (e.g. 'flows')
-    NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not?
-
-    Checks if there are "workflow" entries in the user runscript and copies or merges them into
-    config["general"]["workflow"]
-
-    Parameters
-    ----------
-        config : dict
-
-    Returns
-    -------
-        config : dict
-    """
-    for model in config:
-        if "workflow" in config[model]:
-            # looks for "workflow" in each entry of config (can be model/component, general, etc.)
-            w_config = config[model]["workflow"]
-            # looks for "workflow" in "general" section of config.
-            gw_config = config["general"]["workflow"]
-
-            # looks for entry 'subjob_clusters' in config of each component that has a "workflow"
-            if "subjob_clusters" in w_config:
-                for cluster in w_config["subjob_clusters"]:
-                    # if a certain cluster is also in the general config, this cluster will be merged together ...
-                    # what cluster could this be?
-                    if cluster in gw_config["subjob_clusters"]:
-                        gw_config["subjob_clusters"][cluster] = merge_if_possible(
-                            w_config["subjob_clusters"][cluster],
-                            gw_config["subjob_clusters"][cluster],
-                        )
-                    # if cluster is not in general config, it will copied into it.
-                    else:
-                        gw_config["subjob_clusters"][cluster] = copy.deepcopy(
-                            w_config["subjob_clusters"][cluster],
-                        )
-
-            # looks for entry 'subjobs' in config of each component
-            if "subjobs" in w_config:
-                # copies component workflow config to new variable ref_config
-                ref_config = copy.deepcopy(w_config)
-                # ??? for every subjob in ???
-                for subjob in list(copy.deepcopy(w_config["subjobs"])):
-
-                    # subjobs (other than clusters) should be model specific
-                    # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry.
-                    # appends the model name to the subjob name and copy it to config["general"]
-                    gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy(
-                        w_config["subjobs"][subjob]
-                    )
-                    # if this copied subjobs is also n general workflow subjobs it will be deleted there
-                    if subjob in gw_config["subjobs"]:
-                        del gw_config["subjobs"][subjob]
-
-                    # make sure that the run_after and run_before refer to that cluster
-                    # for all subjobs now in general workflow
-                    for other_subjob in gw_config["subjobs"]:
-                        # sets run_after and run_before to correct subjob???
-                        # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model)
-                        # this run_after will be set to the new subjob name (subjob_model)
-                        if "run_after" in gw_config["subjobs"][other_subjob]:
-                            if (gw_config["subjobs"][other_subjob]["run_after"] == subjob):
-                                gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model
-                        if "run_before" in gw_config["subjobs"][other_subjob]:
-                            if (gw_config["subjobs"][other_subjob]["run_before"] == subjob):
-                                gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model
-
-                    # if not in another cluster, each subjob gets its own
-                    if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]):
-                        gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob  # + "_" + model
-
-            # checks if next_run:triggered_by is tidy or the one in user workflow, or empty?
-            if "next_run_triggered_by" in w_config:
-                if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]:
-                    print("Mismatch found setting next_run_triggered_by for workflow.")
-                    sys.exit(-1)
-                else:
-                    gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"]
-                    # what if w_config["next_run_triggered_by"] is empty?
-
-    return config
-
-def merge_single_entry_if_possible(entry, sourceconf, targetconf):
-    """
-    Merges a dictionary entry into a target dictionary that has he same key.
-
-    Parameters
-    ----------
-        entry : str
-            dictionary key
-        sourceconf : dict
-        targetconf : dict
-
-    Returns
-    -------
-        targetconf : dict
-    """
-    if entry in sourceconf:
-        # Check if entry is already in targetconf AND different to sourceconf, then exit
-        if entry in targetconf and not sourceconf[entry] == targetconf[entry]:
-            print(f"Mismatch found in {entry} for cluster {targetconf}")
-            sys.exit(-1)
-        # Continues here if entry exists already in targetconf AND the same as sourceconf or
-        # not already in targetconf and set it to sourceconf
-        targetconf[entry] = sourceconf[entry]
-    return targetconf
-
-def merge_if_possible(source, target):
-    """
-    Does the same as above but for a whole dict
-
-    Merges the entries of source dictionary into target dictionary, if not already in.
-    (Will not overwrite entries in target dictionary.)
-
-    Parameters
-    ----------
-        source : dict
-        target : dict
-
-    Returns
-    -------
-        target : dict
-    """
-    for entry in source:
-        if entry in target:
-            if not source[entry] == target[entry]:
-                print(
-                    f"Mismatch while trying to merge subjob_clusters {source} into {target}"
-                )
-                sys.exit(-1)
-        else:
-            target[entry] = source[entry]
-    return target
+#def display_nicely(config):
+#    """
+#    Pretty prints the workflow configuration assembled in config["general"].
+#
+#    Parameters
+#    ----------
+#        config : dict
+#
+#    Returns
+#    -------
+#        config : dict
+#    """
+#    esm_parser.pprint_config(config["general"]["workflow"])
+#    return config
+#
+#
+#def collect_all_workflow_information(config):
+#    """
+#    Collects all workflow information for each component entry in config
+#    (can be a model/component or a new entry (e.g. 'flows')
+#    NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not?
+#
+#    Checks if there are "workflow" entries in the user runscript and copies or merges them into
+#    config["general"]["workflow"]
+#
+#    Parameters
+#    ----------
+#        config : dict
+#
+#    Returns
+#    -------
+#        config : dict
+#    """
+#    for model in config:
+#        if "workflow" in config[model]:
+#            # looks for "workflow" in each entry of config (can be model/component, general, etc.)
+#            w_config = config[model]["workflow"]
+#            # looks for "workflow" in "general" section of config.
+#            gw_config = config["general"]["workflow"]
+#
+#            # looks for entry 'subjob_clusters' in config of each component that has a "workflow"
+#            if "subjob_clusters" in w_config:
+#                for cluster in w_config["subjob_clusters"]:
+#                    # if a certain cluster is also in the general config, this cluster will be merged together ...
+#                    # what cluster could this be?
+#                    if cluster in gw_config["subjob_clusters"]:
+#                        gw_config["subjob_clusters"][cluster] = merge_if_possible(
+#                            w_config["subjob_clusters"][cluster],
+#                            gw_config["subjob_clusters"][cluster],
+#                        )
+#                    # if cluster is not in general config, it will copied into it.
+#                    else:
+#                        gw_config["subjob_clusters"][cluster] = copy.deepcopy(
+#                            w_config["subjob_clusters"][cluster],
+#                        )
+#
+#            # looks for entry 'subjobs' in config of each component
+#            if "subjobs" in w_config:
+#                # copies component workflow config to new variable ref_config
+#                ref_config = copy.deepcopy(w_config)
+#                # ??? for every subjob in ???
+#                for subjob in list(copy.deepcopy(w_config["subjobs"])):
+#
+#                    # subjobs (other than clusters) should be model specific
+#                    # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry.
+#                    # appends the model name to the subjob name and copy it to config["general"]
+#                    gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy(
+#                        w_config["subjobs"][subjob]
+#                    )
+#                    # if this copied subjobs is also n general workflow subjobs it will be deleted there
+#                    if subjob in gw_config["subjobs"]:
+#                        del gw_config["subjobs"][subjob]
+#
+#                    # make sure that the run_after and run_before refer to that cluster
+#                    # for all subjobs now in general workflow
+#                    for other_subjob in gw_config["subjobs"]:
+#                        # sets run_after and run_before to correct subjob???
+#                        # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model)
+#                        # this run_after will be set to the new subjob name (subjob_model)
+#                        if "run_after" in gw_config["subjobs"][other_subjob]:
+#                            if (gw_config["subjobs"][other_subjob]["run_after"] == subjob):
+#                                gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model
+#                        if "run_before" in gw_config["subjobs"][other_subjob]:
+#                            if (gw_config["subjobs"][other_subjob]["run_before"] == subjob):
+#                                gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model
+#
+#                    # if not in another cluster, each subjob gets its own
+#                    if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]):
+#                        gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob  # + "_" + model
+#
+#            # checks if next_run:triggered_by is tidy or the one in user workflow, or empty?
+#            if "next_run_triggered_by" in w_config:
+#                if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]:
+#                    print("Mismatch found setting next_run_triggered_by for workflow.")
+#                    sys.exit(-1)
+#                else:
+#                    gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"]
+#                    # what if w_config["next_run_triggered_by"] is empty?
+#
+#    return config
+#
+#def merge_single_entry_if_possible(entry, sourceconf, targetconf):
+#    """
+#    Merges a dictionary entry into a target dictionary that has he same key.
+#
+#    Parameters
+#    ----------
+#        entry : str
+#            dictionary key
+#        sourceconf : dict
+#        targetconf : dict
+#
+#    Returns
+#    -------
+#        targetconf : dict
+#    """
+#    if entry in sourceconf:
+#        # Check if entry is already in targetconf AND different to sourceconf, then exit
+#        if entry in targetconf and not sourceconf[entry] == targetconf[entry]:
+#            print(f"Mismatch found in {entry} for cluster {targetconf}")
+#            sys.exit(-1)
+#        # Continues here if entry exists already in targetconf AND the same as sourceconf or
+#        # not already in targetconf and set it to sourceconf
+#        targetconf[entry] = sourceconf[entry]
+#    return targetconf
+#
+#def merge_if_possible(source, target):
+#    """
+#    Does the same as above but for a whole dict
+#
+#    Merges the entries of source dictionary into target dictionary, if not already in.
+#    (Will not overwrite entries in target dictionary.)
+#
+#    Parameters
+#    ----------
+#        source : dict
+#        target : dict
+#
+#    Returns
+#    -------
+#        target : dict
+#    """
+#    for entry in source:
+#        if entry in target:
+#            if not source[entry] == target[entry]:
+#                print(
+#                    f"Mismatch while trying to merge subjob_clusters {source} into {target}"
+#                )
+#                sys.exit(-1)
+#        else:
+#            target[entry] = source[entry]
+#    return target

From 3c72864d2edea36d70f59d42deafdecd095775ea Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 13 Nov 2023 11:29:40 +0100
Subject: [PATCH 21/98] Reactivated function display_nicely.

---
 src/esm_runscripts/workflow.py | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index f94030a33..3b1af7b4a 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -736,28 +736,23 @@ def calc_number_of_tasks(config):
                     tasks += config[model]["nprocar"] * config[model]["nprocbr"]
     return tasks
 
+def display_nicely(config):
+    """
+    Pretty prints the workflow configuration assembled in config["general"].
+    Is called by e.g. ``esm_runscripts runscript.yaml -e <expid> -i workflow``
 
+    Parameters
+    ----------
+        config : dict
 
-
-
-
+    Returns
+    -------
+        config : dict
+    """
+    esm_parser.pprint_config(config["general"]["workflow"])
+    return config
 
 ################### Maybe outdated routines ######################
-
-#def display_nicely(config):
-#    """
-#    Pretty prints the workflow configuration assembled in config["general"].
-#
-#    Parameters
-#    ----------
-#        config : dict
-#
-#    Returns
-#    -------
-#        config : dict
-#    """
-#    esm_parser.pprint_config(config["general"]["workflow"])
-#    return config
 #
 #
 #def collect_all_workflow_information(config):

From 547edb09f7524083bec0cbaeca111e71c54733b3 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 14 Nov 2023 12:05:45 +0100
Subject: [PATCH 22/98] Added some flake8 style optimization.

---
 src/esm_runscripts/workflow.py | 247 +++++++++++++++++++++++----------
 1 file changed, 177 insertions(+), 70 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 3b1af7b4a..086cf64a9 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,9 +1,11 @@
-import sys, copy
+import sys
+import copy
 import esm_parser
 
-from pprint import pprint
+# from pprint import pprint
+
+# import pdb
 
-import pdb
 
 class Workflow:
     """A workflow class."""
@@ -105,7 +107,8 @@ def init_default_workflow(self, config):
 
         for ind, phase in enumerate(self.phases):
             if ind < self.num_phases - 1:
-                # Set run_before attrib of all phases (except last on) to the next phase name
+                # Set run_before attrib of all phases (except last on)
+                # to the next phase name
                 phase.run_before = self.phases[ind+1].name
                 phase.next_submit.append(self.phases[ind+1].name)
                 phase.run_after = self.phases[ind-1].name
@@ -120,15 +123,16 @@ def init_default_workflow(self, config):
             if phase.name == "compute":
                 phase.nproc = tasks
                 phase.batch_or_shell = 'batch'
-                phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True)
+                phase.submit_to_batch_system = config["general"].get(
+                    "submit_to_batch_system", True)
                 phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
 
         # Set default workflow values
-        set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name)   # prepcompute
-        set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name)   # tidy
+        set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name)
+        set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name)
         # next_run_triggered_by only used to set last_task_in_queue
         # TODO: why not set last_task_in_queue directly?
-        set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name)   # tidy
+        set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name)
 
         return self
 
@@ -150,7 +154,7 @@ def check_if_keyword_is_valid(self, keyword):
         else:
             return False
 
-    def collect_all_user_workflows(self,config):
+    def collect_all_user_workflows(self, config):
         """
         Collect all workflows set by config files.
 
@@ -169,7 +173,8 @@ def collect_all_user_workflows(self,config):
         for model in config:
             if "workflow" in config[model]:
                 w_config = config[model]["workflow"]
-                #if "subjobs" in w_config:
+                # if "subjobs" in w_config:
+                # breakpoint()
                 if "phases" in w_config:
                     # copies component workflow config to new variable ref_config
                     ref_config = copy.deepcopy(w_config)
@@ -183,30 +188,56 @@ def collect_all_user_workflows(self,config):
                             if not key == "phases":
                                 set_workflow_attrib(self, key, value)
                         else:
-                            esm_parser.user_error("ERROR", f"``{key}`` is not a valid keyword of a workflow.")
-                    #for subjob in list(copy.deepcopy(w_config["subjobs"])):
+                            err_msg = f"``{key}`` is not a valid keyword of a workflow."
+                            esm_parser.user_error("ERROR", err_msg)
+                    # for subjob in list(copy.deepcopy(w_config["subjobs"])):
                     for phase in list(copy.deepcopy(w_config["phases"])):
                         new_phase_name = phase
                         # create a new user phase object for ``phase``
                         new_phase = UserWorkflowPhase(new_phase_name)
                         if phase in self.get_phases_attribs_list("default", "name"):
-                            esm_parser.user_error("ERROR", f"The user phase ``{new_phase_name}`` has the same name as a default workflow phase. This is not allowed.")
+                            err_msg = (
+                                f"The user phase ``{new_phase_name}`` "
+                                f"has the same name as a default workflow phase. "
+                                f"This is not allowed."
+                            )
+                            esm_parser.user_error("ERROR", err_msg)
                         # each subjob needs to have an unique name
                         # check if the name of the new user phase does not already exist
-                        if not new_phase_name in user_workflow_phases_names:
+                        if new_phase_name not in user_workflow_phases_names:
                             # and append it to the list of user phases of the workflow
                             user_workflow_phases_names.append(new_phase_name)
-                            # set attributes of user_workflow phases from config settings
+                            # set attributes of user_workflow phases from
+                            # config settings
                             # check if valid workflow phase keywords
                             for key, value in w_config["phases"][phase].items():
                                 if new_phase.check_if_keyword_is_valid(key):
-                                    set_phase_attrib([new_phase], new_phase_name, key, value)
+                                    set_phase_attrib(
+                                        [new_phase], new_phase_name, key, value
+                                    )
 #                                    new_phase.__setattr__(key, value)
                                 else:
-                                    esm_parser.user_error("ERROR", f"``{key}`` of workflow phase ``{new_phase_name}`` is not a valid keyword of a workflow phase.")
+                                    err_msg = (
+                                        f"``{key}`` of workflow phase "
+                                        f"``{new_phase_name}`` is not a valid keyword "
+                                        f"of a workflow phase."
+                                    )
+                                    esm_parser.user_error("ERROR", err_msg)
+                            if new_phase.submit_to_batch_system and new_phase.batch_or_shell == "shell":
+                                err_msg = (
+                                    f"Inconsistence attributes for keywords "
+                                    f"``submit_to_batch_system`` and "
+                                    f"``batch_or_shell`` for phase "
+                                    f"``{new_phase.name}``."
+                                )
+                                esm_parser.user_error("ERROR", err_msg)
                             user_workflow_phases.append(new_phase)
                         else:
-                            esm_parser.user_error("ERROR", f"Two workflow phases have the same name {new_phase_name}.")
+                            err_msg = (
+                                f"Two workflow phases have the same name "
+                                f"{new_phase_name}."
+                            )
+                            esm_parser.user_error("ERROR", err_msg)
 
         self.user_phases = user_workflow_phases
         return self
@@ -217,7 +248,7 @@ def write_to_config(self, config):
         """
         cluster_att = []
         for att in dir(self.phases[0]):
-            if(att[:2] != "__"):
+            if (att[:2] != "__"):
                 cluster_att.append(att)
         # 1. Delete unnecessary config workflow entries (e.g. in general)
         if "workflow" in config["general"]:
@@ -250,7 +281,8 @@ def write_to_config(self, config):
 
     def check_user_workflow_dependency(self):
         """
-        Check whether the user defined workflow phases are independent from each other or not.
+        Check whether the user defined workflow phases are independent
+        from each other or not.
 
         Arguments
         ---------
@@ -261,12 +293,13 @@ def check_user_workflow_dependency(self):
             independent : bool (default: False)
         """
         independent = False
-        user_phases_names = self.get_phases_attribs_list('user','name')
-        run_after_list = self.get_phases_attribs_list('user','run_after')
-        run_before_list = self.get_phases_attribs_list('user','run_before')
+        user_phases_names = self.get_phases_attribs_list('user', 'name')
+        run_after_list = self.get_phases_attribs_list('user', 'run_after')
+        run_before_list = self.get_phases_attribs_list('user', 'run_before')
 
         # All user phases are independent from each other, if
-        # none of the ``user_phases_names`` are found in the union of ``run_before_list`` and ``run_after_list``
+        # none of the ``user_phases_names`` are found in the union of
+        # ``run_before_list`` and ``run_after_list``
         # That means alls user phases can be run independent from each other.
         if not set(user_phases_names).intersection(set(run_after_list).union(set(run_before_list))):
             independent = True
@@ -289,17 +322,20 @@ def check_unknown_phases(self):
             unknown_phases : set
         """
         unknown_phases = []
-        phases_names = self.get_phases_attribs_list('default','name')
-        user_phases_names = self.get_phases_attribs_list('user','name')
-        run_after = self.get_phases_attribs_list('user','run_after')
-        run_before = self.get_phases_attribs_list('user','run_before')
+        phases_names = self.get_phases_attribs_list('default', 'name')
+        user_phases_names = self.get_phases_attribs_list('user', 'name')
+        run_after = self.get_phases_attribs_list('user', 'run_after')
+        run_before = self.get_phases_attribs_list('user', 'run_before')
         # Filter out all elements that are None
-        # ``filter(None, anylist)`` will filter out all items of anylist, for which ``if item`` is false (e.g. [], "", None, {}, '').
+        # ``filter(None, anylist)`` will filter out all items of anylist,
+        # for which ``if item`` is false (e.g. [], "", None, {}, '').
         # See also https://docs.python.org/3/library/functions.html#filter
         run_after_list = list(filter(None, run_after))
         run_before_list = list(filter(None, run_before))
-        # Get all phases that are defined as run_after or run_before, but do not exist as user or default phase.
-        # If unknown_phase is not empty, there is a user_phase that defines run_after or run_before for a not existing phase.
+        # Get all phases that are defined as run_after or run_before,
+        # but do not exist as user or default phase.
+        # If unknown_phase is not empty, there is a user_phase that defines run_after
+        # or run_before for a not existing phase.
         unknown_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names)))
 
         return unknown_phases
@@ -323,32 +359,47 @@ def order_clusters(self, config):
         unknown_phases = self.check_unknown_phases()
         if unknown_phases:
             unknowns = ', '.join(unknown_phases)
-            esm_parser.user_error("ERROR", f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` or ``run_after``.")
+            err_msg = (
+                f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` "
+                f"or ``run_after``."
+            )
+            esm_parser.user_error("ERROR", err_msg)
 
         for user_phase in self.user_phases:
             # Check if run_after or run_before is set for each user phase
             if not user_phase.run_before and not user_phase.run_after:
                 esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.")
-            # Check if not both run_after and run_before are set at the same time for each user phase
+            # Check if not both run_after and run_before are set at the same
+            # time for each user phase
 #            if user_phase.run_before and user_phase.run_after:
-#                esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.")
+#                err_msg = (
+#                    f"Both run_after and run_before are set. Don't know when "
+#                    f"to start {user_phase.name}. Please only set run_after "
+#                    f"or run_before."
+#                )
+#                esm_parser.user_error("ERROR", err_msg)
         # Correct for ``last_task_in_queue`` if necessary
         # Collect all next_run_triggered_by entries
         next_triggered = self.next_run_triggered_by
-        run_after = []
-        #for model in config:
-        #    if "workflow" in config[model]:
-        #        if "next_run_triggered_by" in config[model]["workflow"]:
-        #            next_triggered.append(config[model]["workflow"]["next_run_triggered_by"])
-
-        # How needs the next_triggered_by be set??? Which to choose if several workflows are defined?
-
-        #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
-        #if len(next_triggered) > 1:
-        #    esm_parser.user_error("ERROR", "Mismatch found setting next_run_triggered_by for workflow.")
-        #elif len(next_triggered) == 1:
+        # check if next_triggered is default or user phase
+        # if user phase
+        # get last default phase and correct next_submit
+        # get first default phase and correct run_after, called_from
+        # correct last_task_in_queue of workflow
+        if next_triggered not in self.get_phases_attribs_list("default", "name"):
+            self.phases[-1].next_submit.remove(self.phases[0].name)
+            self.phases[-1].next_submit.append(next_triggered)
+            self.phases[0].run_after = next_triggered
+            self.phases[0].called_from = next_triggered
+            self.last_task_in_queue = next_triggered
+
+        # next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
+        # if len(next_triggered) > 1:
+        #    err_msg = "Mismatch found setting next_run_triggered_by for workflow."
+        #    esm_parser.user_error("ERROR", err_msg)
+        # elif len(next_triggered) == 1:
         #    self.next_run_triggered_by = next_triggered[0]
-        ##else: let default
+        # # else: let default
 
         # Set "next_submit" and "called_from"
         # "next_submit" which phase will be called next (run_after of the next phase)
@@ -359,7 +410,7 @@ def order_clusters(self, config):
             next_submits[phase.name] = []
 
         for phase2 in self.phases + self.user_phases:
-            if not phase2.run_after == None:
+            if phase2.run_after is not None:
                 next_submits[phase2.run_after].append(phase2.name)
                 phase2.called_from = phase2.run_after
 
@@ -367,10 +418,10 @@ def order_clusters(self, config):
             phase3.next_submit = next_submits[phase3.name]
 
 # assign user phases to a cluster (tbd)
-        # - if all phases have the same run_after and run_before they can be in the cluster
+        # - if all phases have the same run_after and run_before they can be
+        #   in the cluster
         # - in this cluster they will be run in parallel?
 
-
         for phase4 in self.phases + self.user_phases:
             calling_cluster = phase4.run_after
 
@@ -381,16 +432,16 @@ def order_clusters(self, config):
 #                self.last_task_in_queue = phase4.name
 #
 #            called_cluster = phase4.run_before
-##            print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}")
+# #            print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}")
 #            set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name)
 #
 #    # set first_task_in_queue
 #            if called_cluster == self.first_task_in_queue:
-##                self.first_task_in_queue.append(phase4.name)
+# #                self.first_task_in_queue.append(phase4.name)
 #                self.first_task_in_queue = phase4.name
 
     # set empty cluster entries to phase name
-            if phase4.cluster == None:
+            if phase4.cluster is None:
                 phase4.cluster = phase4.name
 
 # todo: check if num list > 1, is this possible ???
@@ -398,7 +449,7 @@ def order_clusters(self, config):
         last_cluster_name = self.last_task_in_queue
 
         # if first_cluster_name is not next_submit of last_cluster_name
-        if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
+        if first_cluster_name not in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
             set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name)
         # if last_cluster_name is not called_from of first_cluster_name
         if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"):
@@ -427,11 +478,11 @@ def complete_clusters(self, config):
 
         for phase in self.phases + self.user_phases:
             # Erstellt ein leeres dict im dict subjob_clusters
-            if not phase.cluster in subjob_clusters:
+            if phase.cluster not in subjob_clusters:
                 subjob_clusters[phase.cluster] = {}
 
             # Create empty list for each subjob_cluster
-            if not "subjobs" in subjob_clusters[phase.cluster]:
+            if "subjobs" not in subjob_clusters[phase.cluster]:
                 subjob_clusters[phase.cluster]["subjobs"] = []
 
             # Append subjobs to list.
@@ -458,7 +509,7 @@ def complete_clusters(self, config):
     #        elif subjob_clusters[subjob_cluster].get("script", False):
     #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
     #
-            if not "run_on_queue" in subjob_clusters[subjob_cluster]:
+            if "run_on_queue" not in subjob_clusters[subjob_cluster]:
                 print(f"Information on target queue is missing in cluster {subjob_cluster}.")
                 sys.exit(-1)
     #
@@ -474,7 +525,8 @@ def prepend_newrun_job(self, config):
         """
         - Creates a new cluster "newrun" if first_task_in_queue is not of
           type 'SimulationSetup'
-        - Why is this needed? So that every first task is a SimulationSetup to init a config object???
+        - Why is this needed? So that every first task is a SimulationSetup to init
+          a config object???
 
         Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
         and are not of type 'SimulationSetup'.
@@ -568,6 +620,7 @@ def skip_cluster(cluster, config):
 
     return False
 
+
 class WorkflowPhase:
     """A workflow phase class."""
 
@@ -603,7 +656,6 @@ def __init__(self, phase_name):
         self.batch_or_shell = "shell"
         self.submit_to_batch_system = False
 
-
     def check_if_keyword_is_valid(self, keyword):
         """
         Checks if the key given for a user workflow is valie
@@ -656,21 +708,24 @@ def assemble_workflow(config):
         workflow = Workflow(phases)
     else:
         esm_parser.user_error("ERROR", "No default workflow phases defined.")
-        # Note: Should this work also if no default phases are set in such a config file, but
-        # instead all workflow phases are defined in different configs and/or runscripts?
-        # TODO: Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml?
+        # Note: Should this work also if no default phases are set in such a config
+        # file, but instead all workflow phases are defined in different configs
+        # and/or runscripts?
+        # TODO: Where could a user define a different (default) phase list?
+        # Or should this be changed in defaults.yaml?
     # 2. Initialize default workflow phases
     workflow = workflow.init_default_workflow(config)
     # 3. Read in workflows from runscript and config files
     workflow = workflow.collect_all_user_workflows(config)
-    #config = collect_all_workflow_information(config)
+    # config = collect_all_workflow_information(config)
     # 4. Order user workflows into default workflow wrt. workflow attributs.
     workflow = workflow.order_clusters(config)
 
     # What is the next functions needed for?
     # subjob_clusters = workflow.complete_clusters(config)
 
-    # 5. create new first phase of type SimulationSetup, if first_task_in_queue is user phase (type batch or shell)
+    # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
+    #    user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job(config)
     # 6. write the workflow to config
     config = workflow.write_to_config(config)
@@ -687,16 +742,19 @@ def assemble_workflow(config):
 
     return config
 
+
 def write_subjob_clusters_to_config(config, subjob_clusters):
     config["general"]["subjob_clusters"] = subjob_clusters
     return config
 
+
 def set_workflow_attrib(workflow, attrib, value):
     if type(getattr(workflow, attrib)).__name__ == "list":
         workflow.__dict__[attrib].append(value)
     else:
         workflow.__setattr__(attrib, value)
 
+
 def set_phase_attrib(workflow_phases, phase_name, attrib, value):
     if not type(workflow_phases) is list:
         workflow_phases = [workflow_phases]
@@ -707,6 +765,7 @@ def set_phase_attrib(workflow_phases, phase_name, attrib, value):
             else:
                 phase.__setattr__(attrib, value)
 
+
 def get_phase_attrib(workflow_phases, phase_name, attrib):
     if not type(workflow_phases) is list:
         workflow_phases = [workflow_phases]
@@ -736,6 +795,52 @@ def calc_number_of_tasks(config):
                     tasks += config[model]["nprocar"] * config[model]["nprocbr"]
     return tasks
 
+
+def display_workflow(config):
+    """
+    Displays current workflow settings.
+
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
+        config : dict (needed???)
+    """
+
+    esm_parser.pprint_config(config["general"]["workflow"])
+
+    first_phase = config["general"]["workflow"]["first_task_in_queue"]
+    second_phase = config["general"]["workflow"]["subjobs"][first_phase]["next_submit"]
+
+    workflow_order = f"{first_phase}"
+
+    while first_phase not in second_phase and second_phase:
+        sec_phase_str = ""
+        for sec_phase in second_phase:
+            if config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]:
+                second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]
+            if sec_phase_str == "":
+                sec_phase_str = f"{sec_phase_str} {sec_phase}"
+            else:
+                sec_phase_str = f"{sec_phase_str}, {sec_phase}"
+        workflow_order = f"{workflow_order} -> {sec_phase_str}"
+    else:
+        # second_phase.remove(first_phase)
+        sec_phase_str = ""
+        for sec_phase in second_phase:
+            second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]
+            if sec_phase_str == "":
+                sec_phase_str = f"{sec_phase_str} {sec_phase}"
+            else:
+                sec_phase_str = f"{sec_phase_str}, {sec_phase}"
+        workflow_order = f"{workflow_order} -> {sec_phase_str}"
+
+    esm_parser.user_note("Workflow sequence", f"{workflow_order}")
+    return config
+
+
 def display_nicely(config):
     """
     Pretty prints the workflow configuration assembled in config["general"].
@@ -752,16 +857,18 @@ def display_nicely(config):
     esm_parser.pprint_config(config["general"]["workflow"])
     return config
 
-################### Maybe outdated routines ######################
+# ################## Maybe outdated routines ######################
 #
 #
-#def collect_all_workflow_information(config):
+# def collect_all_workflow_information(config):
 #    """
 #    Collects all workflow information for each component entry in config
 #    (can be a model/component or a new entry (e.g. 'flows')
-#    NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not?
+#    NOTE: Should it be possible to set a workflow in the model section of the
+#          runscript? Why not?
 #
-#    Checks if there are "workflow" entries in the user runscript and copies or merges them into
+#    Checks if there are "workflow" entries in the user runscript and copies or
+#    merges them into
 #    config["general"]["workflow"]
 #
 #    Parameters
@@ -840,7 +947,7 @@ def display_nicely(config):
 #
 #    return config
 #
-#def merge_single_entry_if_possible(entry, sourceconf, targetconf):
+# def merge_single_entry_if_possible(entry, sourceconf, targetconf):
 #    """
 #    Merges a dictionary entry into a target dictionary that has he same key.
 #
@@ -865,7 +972,7 @@ def display_nicely(config):
 #        targetconf[entry] = sourceconf[entry]
 #    return targetconf
 #
-#def merge_if_possible(source, target):
+# def merge_if_possible(source, target):
 #    """
 #    Does the same as above but for a whole dict
 #

From 465d8d342b12fedb87ee5ca42b6515c0e655b02f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 16 Nov 2023 15:32:43 +0100
Subject: [PATCH 23/98] Adapted the output for inspect (-i) workflow.

---
 src/esm_runscripts/inspect.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/esm_runscripts/inspect.py b/src/esm_runscripts/inspect.py
index e01ed04d0..d6fb6519e 100644
--- a/src/esm_runscripts/inspect.py
+++ b/src/esm_runscripts/inspect.py
@@ -17,8 +17,8 @@ def run_job(config):
 
 def inspect_workflow(config):
     if config["general"]["inspect"] == "workflow":
-
-        config = workflow.display_nicely(config)
+        config = workflow.display_workflow(config)
+#        config = workflow.display_nicely(config)
         sys.exit(0)
     return config
 

From 6aee3956dc16c9d211582245bec5e15b27680e15 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 16 Nov 2023 15:35:02 +0100
Subject: [PATCH 24/98] Fix multiple phases in one cluster, fix batch_or_shell
 to be set by esm_tools.

---
 src/esm_runscripts/workflow.py | 351 ++++++++++++++++-----------------
 1 file changed, 173 insertions(+), 178 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 086cf64a9..6341748af 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -4,13 +4,13 @@
 
 # from pprint import pprint
 
-# import pdb
+import pdb
 
 
 class Workflow:
     """A workflow class."""
 
-    def __init__(self, phases, always_run_with=[]):
+    def __init__(self, phases, always_run_with=[], phases_to_submit_to_batch_system=[]):
         """
         Create a new workflow.
 
@@ -25,14 +25,14 @@ def __init__(self, phases, always_run_with=[]):
         -------
         none
         """
-        self.default_phases = []
         self.user_phases = []
-        self.first_task_in_queue = None
+        self.first_task_in_queue = None                     # needed
         self.last_task_in_queue = None
         self.next_run_triggered_by = None
         # TODO: Call here the phase object ???
         self.phases = phases
         self.always_run_with = always_run_with
+        self.phases_to_submit_to_batch_system = phases_to_submit_to_batch_system
 
     @property
     def num_phases(self):
@@ -60,7 +60,7 @@ def get_workflow_phase_by_name(self, phase_name):
 
     def get_phases_attribs_list(self, phase_type, attrib):
         """
-        Return the names of all phases as list.
+        Returns a certain attribute for all phases as a list.
 
         Parameters
         ----------
@@ -118,27 +118,44 @@ def init_default_workflow(self, config):
                 phase.next_submit.append(self.phases[0].name)
                 phase.run_after = self.phases[ind-1].name
 
-            # TODO: this needs to be set somewhere else, or different.
             phase.cluster = phase.name
-            if phase.name == "compute":
-                phase.nproc = tasks
+            if phase.name in self.phases_to_submit_to_batch_system:
                 phase.batch_or_shell = 'batch'
-                phase.submit_to_batch_system = config["general"].get(
-                    "submit_to_batch_system", True)
+                phase.submit_to_batch_system = True
                 phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
+                phase.nproc = tasks
 
         # Set default workflow values
-        set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name)
-        set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name)
-        # next_run_triggered_by only used to set last_task_in_queue
-        # TODO: why not set last_task_in_queue directly?
-        set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name)
+        self.set_workflow_attrib("first_task_in_queue", self.phases[0].name)
+        self.set_workflow_attrib("last_task_in_queue", self.phases[-1].name)
+        self.set_workflow_attrib("next_run_triggered_by", self.phases[-1].name)
 
         return self
 
+    def set_workflow_attrib(self, attrib, value):
+        """
+        Sets a workflow attribute.
+
+        Parameters
+        ----------
+            attrib : str
+            value :
+
+        Returns
+        -------
+            None
+        """
+
+        if type(getattr(self, attrib)).__name__ == "list":
+            self.__dict__[attrib].append(value)
+        else:
+            self.__setattr__(attrib, value)
+
     def check_if_keyword_is_valid(self, keyword):
         """
-        Checks if the key given for a user workflow is valie
+        Checks if the key given for a user workflow is valid.
+        Only keywords are allowed, that are already set during
+        initialization.
 
         Parameters
         ----------
@@ -156,7 +173,7 @@ def check_if_keyword_is_valid(self, keyword):
 
     def collect_all_user_workflows(self, config):
         """
-        Collect all workflows set by config files.
+        Collect all workflows defined in config files.
 
         Parameters
         ----------
@@ -174,10 +191,7 @@ def collect_all_user_workflows(self, config):
             if "workflow" in config[model]:
                 w_config = config[model]["workflow"]
                 # if "subjobs" in w_config:
-                # breakpoint()
                 if "phases" in w_config:
-                    # copies component workflow config to new variable ref_config
-                    ref_config = copy.deepcopy(w_config)
                     # Set attributes of workflow
                     # This will be overwritten by all user defined workflows???
                     # Collect them in a list???
@@ -186,7 +200,7 @@ def collect_all_user_workflows(self, config):
                         if self.check_if_keyword_is_valid(key):
                             # set here only workflow attributes
                             if not key == "phases":
-                                set_workflow_attrib(self, key, value)
+                                self.set_workflow_attrib(key, value)
                         else:
                             err_msg = f"``{key}`` is not a valid keyword of a workflow."
                             esm_parser.user_error("ERROR", err_msg)
@@ -195,6 +209,8 @@ def collect_all_user_workflows(self, config):
                         new_phase_name = phase
                         # create a new user phase object for ``phase``
                         new_phase = UserWorkflowPhase(new_phase_name)
+                        # each subjob needs to have an unique name
+                        # 1. check if ``new_phase`` is already defined as a default phase
                         if phase in self.get_phases_attribs_list("default", "name"):
                             err_msg = (
                                 f"The user phase ``{new_phase_name}`` "
@@ -202,20 +218,25 @@ def collect_all_user_workflows(self, config):
                                 f"This is not allowed."
                             )
                             esm_parser.user_error("ERROR", err_msg)
-                        # each subjob needs to have an unique name
-                        # check if the name of the new user phase does not already exist
-                        if new_phase_name not in user_workflow_phases_names:
-                            # and append it to the list of user phases of the workflow
+                        # 2. check if the name of the new user phase does not already exist
+                        if new_phase_name in user_workflow_phases_names:
+                            err_msg = (
+                                f"Two workflow phases have the same name "
+                                f"{new_phase_name}."
+                            )
+                            esm_parser.user_error("ERROR", err_msg)
+                        # 3. if user phase has a new and unique name
+                        else:
+                            # append it to the list of user phases of the workflow
                             user_workflow_phases_names.append(new_phase_name)
                             # set attributes of user_workflow phases from
                             # config settings
-                            # check if valid workflow phase keywords
+                            # check if valid phase keywords
                             for key, value in w_config["phases"][phase].items():
                                 if new_phase.check_if_keyword_is_valid(key):
                                     set_phase_attrib(
-                                        [new_phase], new_phase_name, key, value
+                                        new_phase, new_phase_name, key, value
                                     )
-#                                    new_phase.__setattr__(key, value)
                                 else:
                                     err_msg = (
                                         f"``{key}`` of workflow phase "
@@ -223,21 +244,14 @@ def collect_all_user_workflows(self, config):
                                         f"of a workflow phase."
                                     )
                                     esm_parser.user_error("ERROR", err_msg)
-                            if new_phase.submit_to_batch_system and new_phase.batch_or_shell == "shell":
-                                err_msg = (
-                                    f"Inconsistence attributes for keywords "
-                                    f"``submit_to_batch_system`` and "
-                                    f"``batch_or_shell`` for phase "
-                                    f"``{new_phase.name}``."
-                                )
-                                esm_parser.user_error("ERROR", err_msg)
+                            # Make sure that batch_or_shell is set to batch if submit_to_batch is true
+                            # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed
+                            # for setting it to SimulationSetup and in other functions (resubmit, etc.)
+                            # Should not be set by user. TODO: Remove from dumentation.
+                            if new_phase.submit_to_batch_system:
+                                new_phase.batch_or_shell == "batch"
+
                             user_workflow_phases.append(new_phase)
-                        else:
-                            err_msg = (
-                                f"Two workflow phases have the same name "
-                                f"{new_phase_name}."
-                            )
-                            esm_parser.user_error("ERROR", err_msg)
 
         self.user_phases = user_workflow_phases
         return self
@@ -245,6 +259,7 @@ def collect_all_user_workflows(self, config):
     def write_to_config(self, config):
         """
         Write to config.
+        TODO: Rename subjobs to phases. Nees changes also in resubmit.py and other files???
         """
         cluster_att = []
         for att in dir(self.phases[0]):
@@ -254,8 +269,6 @@ def write_to_config(self, config):
         if "workflow" in config["general"]:
             del config["general"]["workflow"]
 
-        # It is assumed here, that there are no workflows in config["general"]
-        # or that these are removed after collect_...
         config["general"]["workflow"] = {}
         config["general"]["workflow"].update(self.__dict__)
         # 3. Write clusters
@@ -340,9 +353,9 @@ def check_unknown_phases(self):
 
         return unknown_phases
 
-    def order_clusters(self, config):
+    def order_phases(self):
         """
-        Put the subjob_clusters in order.
+        Put the phases in order.
 
         Parameters
         ----------
@@ -352,10 +365,10 @@ def order_clusters(self, config):
         -------
             self : Workflow object
         """
-        # Check if user phases are independent from each other
+        # check if user phases are independent from each other
         # TODO: What if not independent?
         independent = self.check_user_workflow_dependency()
-        # Check if there are unknown phases, if yes, will give error exception
+        # check if there are unknown phases, if yes, will give error exception
         unknown_phases = self.check_unknown_phases()
         if unknown_phases:
             unknowns = ', '.join(unknown_phases)
@@ -365,10 +378,19 @@ def order_clusters(self, config):
             )
             esm_parser.user_error("ERROR", err_msg)
 
+        # check if run_after or run_before is set for each user phase
+        # if not, run_after will be set to last default phase
         for user_phase in self.user_phases:
-            # Check if run_after or run_before is set for each user phase
             if not user_phase.run_before and not user_phase.run_after:
-                esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.")
+                user_phase.run_after = self.phases[-1].name
+                err_msg = (
+                    f"No value given for ``run_after`` or ``run_before`` "
+                    f"of user phase ``{user_phase.name}``. "
+                    f"Set it to last default phase in workflow: "
+                    f"``{self.phases[-1].name}``."
+                )
+                esm_parser.user_note("NOTE", err_msg)
+
             # Check if not both run_after and run_before are set at the same
             # time for each user phase
 #            if user_phase.run_before and user_phase.run_after:
@@ -378,6 +400,7 @@ def order_clusters(self, config):
 #                    f"or run_before."
 #                )
 #                esm_parser.user_error("ERROR", err_msg)
+
         # Correct for ``last_task_in_queue`` if necessary
         # Collect all next_run_triggered_by entries
         next_triggered = self.next_run_triggered_by
@@ -393,13 +416,15 @@ def order_clusters(self, config):
             self.phases[0].called_from = next_triggered
             self.last_task_in_queue = next_triggered
 
-        # next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
+        # what does this do?
+        #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
         # if len(next_triggered) > 1:
         #    err_msg = "Mismatch found setting next_run_triggered_by for workflow."
         #    esm_parser.user_error("ERROR", err_msg)
         # elif len(next_triggered) == 1:
         #    self.next_run_triggered_by = next_triggered[0]
         # # else: let default
+        #breakpoint()
 
         # Set "next_submit" and "called_from"
         # "next_submit" which phase will be called next (run_after of the next phase)
@@ -409,42 +434,23 @@ def order_clusters(self, config):
         for phase in self.phases + self.user_phases:
             next_submits[phase.name] = []
 
+        for phase4 in self.phases + self.user_phases:
+            # if a cluster is not set for a phase set it to the phase name,
+            # so that every phase belongs to a cluster
+            # default cluster has the same name as the phase itself
+            if phase4.cluster is None:
+                phase4.cluster = phase4.name
+
         for phase2 in self.phases + self.user_phases:
             if phase2.run_after is not None:
-                next_submits[phase2.run_after].append(phase2.name)
+                #next_submits[phase2.run_after].append(phase2.name)
+                if phase2.cluster not in next_submits[phase2.run_after]:
+                    next_submits[phase2.run_after].append(phase2.cluster)
                 phase2.called_from = phase2.run_after
 
         for phase3 in self.phases + self.user_phases:
             phase3.next_submit = next_submits[phase3.name]
 
-# assign user phases to a cluster (tbd)
-        # - if all phases have the same run_after and run_before they can be
-        #   in the cluster
-        # - in this cluster they will be run in parallel?
-
-        for phase4 in self.phases + self.user_phases:
-            calling_cluster = phase4.run_after
-
-# brauch ich das hier noch???
-#    # set last_task_in_queue
-#            if calling_cluster == self.last_task_in_queue:
-#                #self.last_task_in_queue.append(phase4.name)
-#                self.last_task_in_queue = phase4.name
-#
-#            called_cluster = phase4.run_before
-# #            print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}")
-#            set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name)
-#
-#    # set first_task_in_queue
-#            if called_cluster == self.first_task_in_queue:
-# #                self.first_task_in_queue.append(phase4.name)
-#                self.first_task_in_queue = phase4.name
-
-    # set empty cluster entries to phase name
-            if phase4.cluster is None:
-                phase4.cluster = phase4.name
-
-# todo: check if num list > 1, is this possible ???
         first_cluster_name = self.first_task_in_queue
         last_cluster_name = self.last_task_in_queue
 
@@ -457,69 +463,69 @@ def order_clusters(self, config):
 
         return self
 
-    def complete_clusters(self, config):
-        # all that are within a next_submit list are in a cluster if:
-        # run concurrently
-        # have the same cluster entry.
-        """
-        Rearanges the subjobs to their subjobs_clusters ???
-
-        Parameters
-        ----------
-            self : Workflow object
-            config : dict
-
-        Returns
-        -------
-            subjob_clusters : dict
-        """
-        # sort into dict subjob_clusters
-        subjob_clusters = {}
-
-        for phase in self.phases + self.user_phases:
-            # Erstellt ein leeres dict im dict subjob_clusters
-            if phase.cluster not in subjob_clusters:
-                subjob_clusters[phase.cluster] = {}
-
-            # Create empty list for each subjob_cluster
-            if "subjobs" not in subjob_clusters[phase.cluster]:
-                subjob_clusters[phase.cluster]["subjobs"] = []
-
-            # Append subjobs to list.
-            subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
-
-        # Then, complete the resource information per cluster
-        # determine whether a cluster is to be submitted to a batch system
-        for subjob_cluster in subjob_clusters:
-            nproc_sum = nproc_max = 0
-            attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
-            for attrib in attributes:
-                temp_list = []
-                for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
-                    if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list:
-                        subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib)
-                    else:
-                        print("Missmatch in attributes")
-                        sys.exit(-1)
-                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
-                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
-
-    #        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
-    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
-    #        elif subjob_clusters[subjob_cluster].get("script", False):
-    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
-    #
-            if "run_on_queue" not in subjob_clusters[subjob_cluster]:
-                print(f"Information on target queue is missing in cluster {subjob_cluster}.")
-                sys.exit(-1)
-    #
-    # TODO: Check in nproc is calculated correctly
-            if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
-                nproc = nproc_sum
-            else:
-                nproc = nproc_max
-            subjob_clusters[subjob_cluster]["nproc"] = nproc
-        return subjob_clusters
+#    def complete_clusters(self, config):
+#        # all that are within a next_submit list are in a cluster if:
+#        # run concurrently
+#        # have the same cluster entry.
+#        """
+#        Rearanges the subjobs to their subjobs_clusters ???
+#
+#        Parameters
+#        ----------
+#            self : Workflow object
+#            config : dict
+#
+#        Returns
+#        -------
+#            subjob_clusters : dict
+#        """
+#        # sort into dict subjob_clusters
+#        subjob_clusters = {}
+#
+#        for phase in self.phases + self.user_phases:
+#            # Erstellt ein leeres dict im dict subjob_clusters
+#            if phase.cluster not in subjob_clusters:
+#                subjob_clusters[phase.cluster] = {}
+#
+#            # Create empty list for each subjob_cluster
+#            if "subjobs" not in subjob_clusters[phase.cluster]:
+#                subjob_clusters[phase.cluster]["subjobs"] = []
+#
+#            # Append subjobs to list.
+#            subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
+#
+#        # Then, complete the resource information per cluster
+#        # determine whether a cluster is to be submitted to a batch system
+#        for subjob_cluster in subjob_clusters:
+#            nproc_sum = nproc_max = 0
+#            attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
+#            for attrib in attributes:
+#                temp_list = []
+#                for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
+#                    if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list:
+#                        subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib)
+#                    else:
+#                        print("Missmatch in attributes")
+#                        sys.exit(-1)
+#                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
+#                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
+#
+#    #        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
+#    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
+#    #        elif subjob_clusters[subjob_cluster].get("script", False):
+#    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
+#    #
+#            if "run_on_queue" not in subjob_clusters[subjob_cluster]:
+#                print(f"Information on target queue is missing in cluster {subjob_cluster}.")
+#                sys.exit(-1)
+#    #
+#    # TODO: Check in nproc is calculated correctly
+#            if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
+#                nproc = nproc_sum
+#            else:
+#                nproc = nproc_max
+#            subjob_clusters[subjob_cluster]["nproc"] = nproc
+#        return subjob_clusters
 
     def prepend_newrun_job(self, config):
         """
@@ -578,6 +584,7 @@ def skip_cluster(cluster, config):
     """
     Checks if a phase/cluster can be skipped.
     Needed keywords: run_only, skip_chunk_number
+    Is called from resubmit.py
 
     Parameters
     ----------
@@ -626,16 +633,16 @@ class WorkflowPhase:
 
     def __init__(self, phase_name):
         self.name = None
-        self.nproc = 1
+        self.nproc = 1                              # needed
         self.run_before = None
         self.run_after = None
-        self.submit_to_batch_system = False
+        self.submit_to_batch_system = False         # needed
         self.run_on_queue = None
         self.cluster = None
-        self.next_submit = []
-        self.called_from = None
-        self.batch_or_shell = "SimulationSetup"
-        self.order_in_cluster = "sequential"
+        self.next_submit = []                       # needed
+        self.called_from = None                     # needed
+        self.batch_or_shell = "SimulationSetup"     # needed
+        self.order_in_cluster = "sequential"        # needed ???
         self.run_only = None
         self.skip_chunk_number = None
         self.skip_run_number = None
@@ -694,32 +701,30 @@ def assemble_workflow(config):
     # initialize the default workflow as Workflow object
     # TODO: Where are these default phases defined? For now I placed it in
     # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml
-    phases = []
-    always_run_with = []
     if "defaults.yaml" in config["general"]:
         if "workflow" in config["general"]["defaults.yaml"]:
-            phases = config["general"]["defaults.yaml"]["workflow"]["phases"]
-            if "always_run_with" in config["general"]["defaults.yaml"]["workflow"]:
-                always_run_with = config["general"]["defaults.yaml"]["workflow"]["always_run_with"]
-
-    if phases and always_run_with:
-        workflow = Workflow(phases, always_run_with=always_run_with)
-    elif phases:
-        workflow = Workflow(phases)
+            phases = config["general"]["defaults.yaml"]["workflow"].get("phases", [])
+            always_run_with = config["general"]["defaults.yaml"]["workflow"].get("always_run_with", [])
+            phases_to_submit_to_batch_system = config["general"]["defaults.yaml"]["workflow"].get("phases_to_submit_to_batch_system", [])
+
+    if phases:
+        workflow = Workflow(phases, always_run_with=always_run_with, phases_to_submit_to_batch_system=phases_to_submit_to_batch_system)
     else:
         esm_parser.user_error("ERROR", "No default workflow phases defined.")
         # Note: Should this work also if no default phases are set in such a config
         # file, but instead all workflow phases are defined in different configs
         # and/or runscripts?
-        # TODO: Where could a user define a different (default) phase list?
-        # Or should this be changed in defaults.yaml?
+        # Where could a user define a different (default) phase list?
+        # Or should this be changed in defaults.yaml as it is now?
+
     # 2. Initialize default workflow phases
     workflow = workflow.init_default_workflow(config)
+
     # 3. Read in workflows from runscript and config files
     workflow = workflow.collect_all_user_workflows(config)
-    # config = collect_all_workflow_information(config)
-    # 4. Order user workflows into default workflow wrt. workflow attributs.
-    workflow = workflow.order_clusters(config)
+
+    # 4. Order user workflows into default workflow wrt. workflow and phase attributs.
+    workflow = workflow.order_phases()
 
     # What is the next functions needed for?
     # subjob_clusters = workflow.complete_clusters(config)
@@ -727,9 +732,11 @@ def assemble_workflow(config):
     # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
     #    user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job(config)
+
     # 6. write the workflow to config
-    config = workflow.write_to_config(config)
     # 7. Remove old worklow from config
+    config = workflow.write_to_config(config)
+
 
     # Set "jobtype" for the first task???
     if config["general"]["jobtype"] == "unknown":
@@ -743,18 +750,6 @@ def assemble_workflow(config):
     return config
 
 
-def write_subjob_clusters_to_config(config, subjob_clusters):
-    config["general"]["subjob_clusters"] = subjob_clusters
-    return config
-
-
-def set_workflow_attrib(workflow, attrib, value):
-    if type(getattr(workflow, attrib)).__name__ == "list":
-        workflow.__dict__[attrib].append(value)
-    else:
-        workflow.__setattr__(attrib, value)
-
-
 def set_phase_attrib(workflow_phases, phase_name, attrib, value):
     if not type(workflow_phases) is list:
         workflow_phases = [workflow_phases]
@@ -830,7 +825,7 @@ def display_workflow(config):
         # second_phase.remove(first_phase)
         sec_phase_str = ""
         for sec_phase in second_phase:
-            second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]
+            second_phase = config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"]
             if sec_phase_str == "":
                 sec_phase_str = f"{sec_phase_str} {sec_phase}"
             else:

From 138b3c2e6cf001f6b3ad9828d7b84e09ebd0ec9c Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 17 Nov 2023 13:10:32 +0100
Subject: [PATCH 25/98] Added some further comments and added cluster info in
 display_workflow for inspect argument.

---
 src/esm_runscripts/workflow.py | 41 +++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 6341748af..1730d631a 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -259,7 +259,7 @@ def collect_all_user_workflows(self, config):
     def write_to_config(self, config):
         """
         Write to config.
-        TODO: Rename subjobs to phases. Nees changes also in resubmit.py and other files???
+        TODO: Rename ``subjobs`` to ``phases``. But this needs changes also in resubmit.py and other files???
         """
         cluster_att = []
         for att in dir(self.phases[0]):
@@ -416,18 +416,8 @@ def order_phases(self):
             self.phases[0].called_from = next_triggered
             self.last_task_in_queue = next_triggered
 
-        # what does this do?
-        #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered))
-        # if len(next_triggered) > 1:
-        #    err_msg = "Mismatch found setting next_run_triggered_by for workflow."
-        #    esm_parser.user_error("ERROR", err_msg)
-        # elif len(next_triggered) == 1:
-        #    self.next_run_triggered_by = next_triggered[0]
-        # # else: let default
-        #breakpoint()
-
         # Set "next_submit" and "called_from"
-        # "next_submit" which phase will be called next (run_after of the next phase)
+        # "next_submit" which phase/cluster will be called next (run_after of the next phase)
         # "called_from" name of previous phase, run_after of current phase
         # Create a dict of all phases with empty lists
         next_submits = {}
@@ -441,9 +431,9 @@ def order_phases(self):
             if phase4.cluster is None:
                 phase4.cluster = phase4.name
 
+        # set next_submits to the cluster name rather then to the phase name
         for phase2 in self.phases + self.user_phases:
             if phase2.run_after is not None:
-                #next_submits[phase2.run_after].append(phase2.name)
                 if phase2.cluster not in next_submits[phase2.run_after]:
                     next_submits[phase2.run_after].append(phase2.cluster)
                 phase2.called_from = phase2.run_after
@@ -455,9 +445,11 @@ def order_phases(self):
         last_cluster_name = self.last_task_in_queue
 
         # if first_cluster_name is not next_submit of last_cluster_name
+        # set 'next_submit' of last phase/cluster to first phase/cluster in workflow
         if first_cluster_name not in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
             set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name)
         # if last_cluster_name is not called_from of first_cluster_name
+        # set 'called_from' of first phase/cluster to last phase/cluster
         if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"):
             set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name)
 
@@ -804,35 +796,42 @@ def display_workflow(config):
         config : dict (needed???)
     """
 
-    esm_parser.pprint_config(config["general"]["workflow"])
+    display_nicely(config)
 
     first_phase = config["general"]["workflow"]["first_task_in_queue"]
+    subjobs = config["general"]["workflow"]["subjob_clusters"][first_phase]["subjobs"]
+    # Note: next_submit points to the next cluster (not phase)
     second_phase = config["general"]["workflow"]["subjobs"][first_phase]["next_submit"]
 
-    workflow_order = f"{first_phase}"
+    workflow_order = f"``{first_phase}`` {subjobs}"
 
+    # While first_phase (first_task_in_queue) is not to be called by the next phase (next_submit).
+    # In other words: If not last phase/cluster is reached.
     while first_phase not in second_phase and second_phase:
         sec_phase_str = ""
         for sec_phase in second_phase:
             if config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]:
                 second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]
+                subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"]
             if sec_phase_str == "":
-                sec_phase_str = f"{sec_phase_str} {sec_phase}"
+                sec_phase_str = f"{sec_phase_str} ``{sec_phase}`` {subjobs}"
             else:
-                sec_phase_str = f"{sec_phase_str}, {sec_phase}"
+                sec_phase_str = f"{sec_phase_str}, ``{sec_phase}`` {subjobs}"
         workflow_order = f"{workflow_order} -> {sec_phase_str}"
+    # For last phase that would start the next run
     else:
-        # second_phase.remove(first_phase)
         sec_phase_str = ""
+        # for all cluster in next_submit
         for sec_phase in second_phase:
             second_phase = config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"]
+            subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"]
             if sec_phase_str == "":
-                sec_phase_str = f"{sec_phase_str} {sec_phase}"
+                sec_phase_str = f"{sec_phase_str} ``{sec_phase}`` {subjobs}"
             else:
-                sec_phase_str = f"{sec_phase_str}, {sec_phase}"
+                sec_phase_str = f"{sec_phase_str} and ``{sec_phase}`` {subjobs}"
         workflow_order = f"{workflow_order} -> {sec_phase_str}"
 
-    esm_parser.user_note("Workflow sequence", f"{workflow_order}")
+    esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}")
     return config
 
 

From fcf5a410d406b43d01eb92e437ce53fae58774bd Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 17 Nov 2023 16:48:01 +0100
Subject: [PATCH 26/98] Add new workflow attribute in defaults.yaml.

---
 configs/esm_software/esm_runscripts/defaults.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 2aa0684dd..812e32ad0 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -15,3 +15,5 @@ workflow:
     always_run_with:
         - prepare
         - prepexp
+    phases_to_submit_to_batch_system:
+        - compute

From 5a3bfb86595ba2cede94183c2f87815132f6c229 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 17 Nov 2023 16:49:14 +0100
Subject: [PATCH 27/98] Reactivated function complete_clusters.

---
 src/esm_runscripts/workflow.py | 104 ++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 54 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 1730d631a..1efaeb86d 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -278,18 +278,22 @@ def write_to_config(self, config):
             config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = []
             for phase in self.phases + self.user_phases:
                 if phase.cluster == cluster:
+                    # TODO: Are there more attributes to be merged from the different phases within a cluster???
+                    # nproc is calculated in complete_clusters -> can be placed here???
                     config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase.name)
                     for att in cluster_att:
                         config["general"]["workflow"]["subjob_clusters"][cluster][att] = getattr(phase, att)
+                    config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster
         # 2. Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
         for phase in self.phases+self.user_phases:
             temp_dict = {phase.name: phase.__dict__}
             config["general"]["workflow"]["subjobs"].update(temp_dict)
 
-        # Todo: delete phases and user_phases
+        # delete phases and user_phases
         del config["general"]["workflow"]["phases"]
         del config["general"]["workflow"]["user_phases"]
+
         return config
 
     def check_user_workflow_dependency(self):
@@ -455,69 +459,61 @@ def order_phases(self):
 
         return self
 
-#    def complete_clusters(self, config):
-#        # all that are within a next_submit list are in a cluster if:
-#        # run concurrently
-#        # have the same cluster entry.
-#        """
-#        Rearanges the subjobs to their subjobs_clusters ???
-#
-#        Parameters
-#        ----------
-#            self : Workflow object
-#            config : dict
-#
-#        Returns
-#        -------
-#            subjob_clusters : dict
-#        """
-#        # sort into dict subjob_clusters
-#        subjob_clusters = {}
-#
-#        for phase in self.phases + self.user_phases:
-#            # Erstellt ein leeres dict im dict subjob_clusters
-#            if phase.cluster not in subjob_clusters:
-#                subjob_clusters[phase.cluster] = {}
-#
-#            # Create empty list for each subjob_cluster
-#            if "subjobs" not in subjob_clusters[phase.cluster]:
-#                subjob_clusters[phase.cluster]["subjobs"] = []
-#
-#            # Append subjobs to list.
-#            subjob_clusters[phase.cluster]["subjobs"].append(phase.name)
-#
-#        # Then, complete the resource information per cluster
-#        # determine whether a cluster is to be submitted to a batch system
-#        for subjob_cluster in subjob_clusters:
-#            nproc_sum = nproc_max = 0
+    def complete_clusters(self, config):
+        # all that are within a next_submit list are in a cluster if:
+        # run concurrently
+        # have the same cluster entry.
+        """
+        Rearanges the subjobs to their subjobs_clusters ???
+
+        TODO: Can this be put into other functions/methods?
+
+        Parameters
+        ----------
+            self : Workflow object
+            config : dict
+
+        Returns
+        -------
+            config : dict
+        """
+        subjob_clusters = config["general"]["workflow"]["subjob_clusters"]
+
+        # Then, complete the resource information per cluster
+        # determine whether a cluster is to be submitted to a batch system
+        for subjob_cluster in subjob_clusters:
+            nproc_sum = nproc_max = 0
+            # Check if the following attributes are set for each cluster???
 #            attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
 #            for attrib in attributes:
 #                temp_list = []
-#                for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
+            for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
+                # Check if the following attributes are set for each cluster???
 #                    if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list:
 #                        subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib)
 #                    else:
 #                        print("Missmatch in attributes")
 #                        sys.exit(-1)
-#                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
-#                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
+                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
+                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
 #
-#    #        if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
+            if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
 #    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
+
+# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch?
 #    #        elif subjob_clusters[subjob_cluster].get("script", False):
 #    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
 #    #
-#            if "run_on_queue" not in subjob_clusters[subjob_cluster]:
-#                print(f"Information on target queue is missing in cluster {subjob_cluster}.")
-#                sys.exit(-1)
-#    #
-#    # TODO: Check in nproc is calculated correctly
-#            if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
-#                nproc = nproc_sum
-#            else:
-#                nproc = nproc_max
-#            subjob_clusters[subjob_cluster]["nproc"] = nproc
-#        return subjob_clusters
+                if "run_on_queue" not in subjob_clusters[subjob_cluster]:
+                    err_msg = f"No value for target queue given by ``run_on_queue' for cluster {subjob_cluster}."
+                    esm_parser.user_error("ERROR", err_msg)
+
+                if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
+                    nproc = nproc_sum
+                else:
+                    nproc = nproc_max
+                subjob_clusters[subjob_cluster]["nproc"] = nproc
+        return config
 
     def prepend_newrun_job(self, config):
         """
@@ -718,9 +714,6 @@ def assemble_workflow(config):
     # 4. Order user workflows into default workflow wrt. workflow and phase attributs.
     workflow = workflow.order_phases()
 
-    # What is the next functions needed for?
-    # subjob_clusters = workflow.complete_clusters(config)
-
     # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
     #    user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job(config)
@@ -729,6 +722,9 @@ def assemble_workflow(config):
     # 7. Remove old worklow from config
     config = workflow.write_to_config(config)
 
+    # 8. complete some information in a cluster
+    #    e.g. if phases in cluster are submit to sbatch system
+    config = workflow.complete_clusters(config)
 
     # Set "jobtype" for the first task???
     if config["general"]["jobtype"] == "unknown":

From e6bada5540b473523a783715f6a93e5a9beb91fc Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 17 Nov 2023 16:50:44 +0100
Subject: [PATCH 28/98] Some changes for processing phases in awicm3.

---
 configs/setups/awicm3/awicm3.yaml | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml
index 30f6284c4..5c5f29716 100644
--- a/configs/setups/awicm3/awicm3.yaml
+++ b/configs/setups/awicm3/awicm3.yaml
@@ -325,18 +325,22 @@ oifs:
         tl_o3_data_dir: ${input_dir}/${version}/climate/
         ICMGG_INIT_name: "_${fesom.resolution}"
 
-        workflow:
-            next_run_triggered_by: tidy
-            subjobs:
-                my_new_subjob:
-                    batch_or_shell: shell
+#        workflow:
+#            next_run_triggered_by: tidy
+#            phases:
+#                my_new_subjob:
+#                    batch_or_shell: shell
+#                    run_before: tidy
+#                    run_after: compute
+#                    script_dir: "/work/ab0995/a270089/myrunscripts/"
+#                    script: "helloworld.sh"
 
         # Postprocessing
         choose_general.postprocessing:
                 True:
-                        workflow:
+                        add_workflow:
                                 next_run_triggered_by: tidy
-                                subjobs:
+                                phases:
                                         postprocessing:
                                                 batch_or_shell: batch
                                                 order_in_cluster: concurrent

From ccafa8b3441ff04612b045b9c5666924292fceb9 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <31928930+nwieters@users.noreply.github.com>
Date: Mon, 20 Nov 2023 10:18:23 +0100
Subject: [PATCH 29/98] Update src/esm_runscripts/inspect.py

Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com>
---
 src/esm_runscripts/inspect.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/esm_runscripts/inspect.py b/src/esm_runscripts/inspect.py
index d6fb6519e..5d6b89122 100644
--- a/src/esm_runscripts/inspect.py
+++ b/src/esm_runscripts/inspect.py
@@ -18,7 +18,6 @@ def run_job(config):
 def inspect_workflow(config):
     if config["general"]["inspect"] == "workflow":
         config = workflow.display_workflow(config)
-#        config = workflow.display_nicely(config)
         sys.exit(0)
     return config
 

From e107d99fa29f0f0d2c5eda073a7ebe251a8acf8f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 20 Nov 2023 16:29:17 +0100
Subject: [PATCH 30/98] Merged default and user phase class, made phase a
 subclass of dict.

---
 src/esm_runscripts/workflow.py | 240 +++++++++++++++------------------
 1 file changed, 105 insertions(+), 135 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 1efaeb86d..e20d6ab49 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -55,7 +55,7 @@ def get_workflow_phase_by_name(self, phase_name):
             phase : class phase or user_phase
         """
         for phase in self.phases + self.user_phases:
-            if phase.name == phase_name:
+            if phase["name"] == phase_name:
                 return phase
 
     def get_phases_attribs_list(self, phase_type, attrib):
@@ -64,7 +64,7 @@ def get_phases_attribs_list(self, phase_type, attrib):
 
         Parameters
         ----------
-            self: class Workflow
+            self: Workflow object
             phase_type: str (default or user)
             attrib: str
 
@@ -73,9 +73,9 @@ def get_phases_attribs_list(self, phase_type, attrib):
             phases_attribs : list
         """
         if phase_type == 'user':
-            phases_attribs = [getattr(phase, attrib) for phase in self.user_phases]
+            phases_attribs = [phase[attrib] for phase in self.user_phases]
         else:
-            phases_attribs = [getattr(phase, attrib) for phase in self.phases]
+            phases_attribs = [phase[attrib] for phase in self.phases]
 
         return phases_attribs
 
@@ -109,26 +109,26 @@ def init_default_workflow(self, config):
             if ind < self.num_phases - 1:
                 # Set run_before attrib of all phases (except last on)
                 # to the next phase name
-                phase.run_before = self.phases[ind+1].name
-                phase.next_submit.append(self.phases[ind+1].name)
-                phase.run_after = self.phases[ind-1].name
+                phase["run_before"] = self.phases[ind+1]["name"]
+                phase["next_submit"].append(self.phases[ind+1]["name"])
+                phase["run_after"] = self.phases[ind-1]["name"]
             else:
                 # Set run_after attrib of last phase to previous phase name
-                phase.run_before = self.phases[0].name
-                phase.next_submit.append(self.phases[0].name)
-                phase.run_after = self.phases[ind-1].name
+                phase["run_before"] = self.phases[0]["name"]
+                phase["next_submit"].append(self.phases[0]["name"])
+                phase["run_after"] = self.phases[ind-1]["name"]
 
-            phase.cluster = phase.name
-            if phase.name in self.phases_to_submit_to_batch_system:
-                phase.batch_or_shell = 'batch'
-                phase.submit_to_batch_system = True
-                phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"]
-                phase.nproc = tasks
+            phase["cluster"] = phase["name"]
+            if phase["name"] in self.phases_to_submit_to_batch_system:
+                phase["batch_or_shell"] = 'batch'
+                phase["submit_to_batch_system"] = True
+                phase["run_on_queue"] = config["computer"]["partitions"]["compute"]["name"]
+                phase["nproc"] = tasks
 
         # Set default workflow values
-        self.set_workflow_attrib("first_task_in_queue", self.phases[0].name)
-        self.set_workflow_attrib("last_task_in_queue", self.phases[-1].name)
-        self.set_workflow_attrib("next_run_triggered_by", self.phases[-1].name)
+        self.set_workflow_attrib("first_task_in_queue", self.phases[0]["name"])
+        self.set_workflow_attrib("last_task_in_queue", self.phases[-1]["name"])
+        self.set_workflow_attrib("next_run_triggered_by", self.phases[-1]["name"])
 
         return self
 
@@ -206,9 +206,9 @@ def collect_all_user_workflows(self, config):
                             esm_parser.user_error("ERROR", err_msg)
                     # for subjob in list(copy.deepcopy(w_config["subjobs"])):
                     for phase in list(copy.deepcopy(w_config["phases"])):
-                        new_phase_name = phase
+#                        new_phase_name = phase
                         # create a new user phase object for ``phase``
-                        new_phase = UserWorkflowPhase(new_phase_name)
+                        new_phase = WorkflowPhase(phase)
                         # each subjob needs to have an unique name
                         # 1. check if ``new_phase`` is already defined as a default phase
                         if phase in self.get_phases_attribs_list("default", "name"):
@@ -219,7 +219,7 @@ def collect_all_user_workflows(self, config):
                             )
                             esm_parser.user_error("ERROR", err_msg)
                         # 2. check if the name of the new user phase does not already exist
-                        if new_phase_name in user_workflow_phases_names:
+                        if phase in user_workflow_phases_names:
                             err_msg = (
                                 f"Two workflow phases have the same name "
                                 f"{new_phase_name}."
@@ -228,15 +228,13 @@ def collect_all_user_workflows(self, config):
                         # 3. if user phase has a new and unique name
                         else:
                             # append it to the list of user phases of the workflow
-                            user_workflow_phases_names.append(new_phase_name)
+                            user_workflow_phases_names.append(phase)
                             # set attributes of user_workflow phases from
                             # config settings
                             # check if valid phase keywords
                             for key, value in w_config["phases"][phase].items():
-                                if new_phase.check_if_keyword_is_valid(key):
-                                    set_phase_attrib(
-                                        new_phase, new_phase_name, key, value
-                                    )
+                                if key in new_phase:
+                                    new_phase[key] = value
                                 else:
                                     err_msg = (
                                         f"``{key}`` of workflow phase "
@@ -247,10 +245,11 @@ def collect_all_user_workflows(self, config):
                             # Make sure that batch_or_shell is set to batch if submit_to_batch is true
                             # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed
                             # for setting it to SimulationSetup and in other functions (resubmit, etc.)
-                            # Should not be set by user. TODO: Remove from dumentation.
-                            if new_phase.submit_to_batch_system:
-                                new_phase.batch_or_shell == "batch"
-
+                            # Should not be set by user. TODO: Remove from documentation.
+                            if new_phase["submit_to_batch_system"]:
+                                new_phase["batch_or_shell"] = "batch"
+                            else:
+                                new_phase["batch_or_shell"] = "shell"
                             user_workflow_phases.append(new_phase)
 
         self.user_phases = user_workflow_phases
@@ -261,10 +260,6 @@ def write_to_config(self, config):
         Write to config.
         TODO: Rename ``subjobs`` to ``phases``. But this needs changes also in resubmit.py and other files???
         """
-        cluster_att = []
-        for att in dir(self.phases[0]):
-            if (att[:2] != "__"):
-                cluster_att.append(att)
         # 1. Delete unnecessary config workflow entries (e.g. in general)
         if "workflow" in config["general"]:
             del config["general"]["workflow"]
@@ -277,18 +272,18 @@ def write_to_config(self, config):
             config["general"]["workflow"]["subjob_clusters"][cluster] = {}
             config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = []
             for phase in self.phases + self.user_phases:
-                if phase.cluster == cluster:
+                if phase["cluster"] == cluster:
                     # TODO: Are there more attributes to be merged from the different phases within a cluster???
                     # nproc is calculated in complete_clusters -> can be placed here???
-                    config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase.name)
-                    for att in cluster_att:
-                        config["general"]["workflow"]["subjob_clusters"][cluster][att] = getattr(phase, att)
+                    config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase["name"])
+                    for att in phase:
+                        config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att]
                     config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster
         # 2. Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
-        for phase in self.phases+self.user_phases:
-            temp_dict = {phase.name: phase.__dict__}
-            config["general"]["workflow"]["subjobs"].update(temp_dict)
+        for phase in self.phases + self.user_phases:
+            temp_dict = phase
+            config["general"]["workflow"]["subjobs"][phase["name"]] = temp_dict
 
         # delete phases and user_phases
         del config["general"]["workflow"]["phases"]
@@ -385,22 +380,22 @@ def order_phases(self):
         # check if run_after or run_before is set for each user phase
         # if not, run_after will be set to last default phase
         for user_phase in self.user_phases:
-            if not user_phase.run_before and not user_phase.run_after:
-                user_phase.run_after = self.phases[-1].name
+            if not user_phase["run_before"] and not user_phase["run_after"]:
+                user_phase["run_after"] = self.phases[-1]["name"]
                 err_msg = (
                     f"No value given for ``run_after`` or ``run_before`` "
-                    f"of user phase ``{user_phase.name}``. "
+                    f"of user phase ``{user_phase['name']}``. "
                     f"Set it to last default phase in workflow: "
-                    f"``{self.phases[-1].name}``."
+                    f"``{self.phases[-1]['name']}``."
                 )
                 esm_parser.user_note("NOTE", err_msg)
 
             # Check if not both run_after and run_before are set at the same
             # time for each user phase
-#            if user_phase.run_before and user_phase.run_after:
+#            if user_phase['run_before'] and user_phase['run_after']:
 #                err_msg = (
 #                    f"Both run_after and run_before are set. Don't know when "
-#                    f"to start {user_phase.name}. Please only set run_after "
+#                    f"to start {user_phase['name']}. Please only set run_after "
 #                    f"or run_before."
 #                )
 #                esm_parser.user_error("ERROR", err_msg)
@@ -414,10 +409,10 @@ def order_phases(self):
         # get first default phase and correct run_after, called_from
         # correct last_task_in_queue of workflow
         if next_triggered not in self.get_phases_attribs_list("default", "name"):
-            self.phases[-1].next_submit.remove(self.phases[0].name)
-            self.phases[-1].next_submit.append(next_triggered)
-            self.phases[0].run_after = next_triggered
-            self.phases[0].called_from = next_triggered
+            self.phases[-1]["next_submit"].remove(self.phases[0]["name"])
+            self.phases[-1]["next_submit"].append(next_triggered)
+            self.phases[0]["run_after"] = next_triggered
+            self.phases[0]["called_from"] = next_triggered
             self.last_task_in_queue = next_triggered
 
         # Set "next_submit" and "called_from"
@@ -426,36 +421,38 @@ def order_phases(self):
         # Create a dict of all phases with empty lists
         next_submits = {}
         for phase in self.phases + self.user_phases:
-            next_submits[phase.name] = []
+            next_submits[phase["name"]] = []
 
         for phase4 in self.phases + self.user_phases:
             # if a cluster is not set for a phase set it to the phase name,
             # so that every phase belongs to a cluster
             # default cluster has the same name as the phase itself
-            if phase4.cluster is None:
-                phase4.cluster = phase4.name
+            if phase4["cluster"] is None:
+                phase4["cluster"] = phase4["name"]
 
         # set next_submits to the cluster name rather then to the phase name
         for phase2 in self.phases + self.user_phases:
-            if phase2.run_after is not None:
-                if phase2.cluster not in next_submits[phase2.run_after]:
-                    next_submits[phase2.run_after].append(phase2.cluster)
-                phase2.called_from = phase2.run_after
+            if phase2["run_after"] is not None:
+                if phase2["cluster"] not in next_submits[phase2["run_after"]]:
+                    next_submits[phase2["run_after"]].append(phase2["cluster"])
+                phase2["called_from"] = phase2["run_after"]
 
         for phase3 in self.phases + self.user_phases:
-            phase3.next_submit = next_submits[phase3.name]
+            phase3["next_submit"] = next_submits[phase3["name"]]
 
         first_cluster_name = self.first_task_in_queue
+        first_phase = self.get_workflow_phase_by_name(first_cluster_name)
         last_cluster_name = self.last_task_in_queue
+        last_phase = self.get_workflow_phase_by_name(last_cluster_name)
 
         # if first_cluster_name is not next_submit of last_cluster_name
         # set 'next_submit' of last phase/cluster to first phase/cluster in workflow
-        if first_cluster_name not in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"):
-            set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name)
+        if first_cluster_name not in last_phase["next_submit"]:
+            last_phase.set_attrib("next_submit", first_cluster_name)
         # if last_cluster_name is not called_from of first_cluster_name
         # set 'called_from' of first phase/cluster to last phase/cluster
-        if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"):
-            set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name)
+        if not last_cluster_name == first_phase["called_from"]:
+            first_phase.set_attrib("called_from", last_cluster_name)
 
         return self
 
@@ -537,7 +534,7 @@ def prepend_newrun_job(self, config):
         first_task_name = self.first_task_in_queue
         first_phase = self.get_workflow_phase_by_name(first_task_name)
 
-        if not get_phase_attrib(first_phase, first_phase.name, "batch_or_shell") == "SimulationSetup":
+        if not first_phase["batch_or_shell"] == "SimulationSetup":
 
             last_task_name = self.last_task_in_queue
             last_phase = self.get_workflow_phase_by_name(last_task_name)
@@ -545,22 +542,24 @@ def prepend_newrun_job(self, config):
             new_first_phase_name = "newrun_general"
             # Create new default phase object
             new_first_phase = WorkflowPhase(new_first_phase_name)
-            set_phase_attrib(new_first_phase, new_first_phase_name, "called_from", last_task_name)
-            set_phase_attrib(new_first_phase, new_first_phase_name, "run_before", first_task_name)
-            set_phase_attrib(new_first_phase, new_first_phase_name, "next_submit", first_task_name)
-            set_phase_attrib(new_first_phase, new_first_phase_name, "cluster", "newrun")
-            set_phase_attrib(new_first_phase, new_first_phase_name, "batch_or_shell", "SimulationSetup")
-            set_phase_attrib(new_first_phase, new_first_phase_name, "nproc", 1)
+            new_first_phase.set_attrib("next_submit", first_phase["cluster"])
+            new_first_phase.set_attrib("called_from", last_phase["cluster"])
+            new_first_phase.set_attrib("run_before", first_phase["cluster"])
+            new_first_phase.set_attrib("next_submit", first_phase["cluster"])
+            new_first_phase.set_attrib("cluster", "newrun")
+            new_first_phase.set_attrib("batch_or_shell", "SimulationSetup")
+            new_first_phase.set_attrib("nproc", 1)
 
             # reset last_task attributes
-            set_phase_attrib(last_phase, last_phase.name, "next_submit", new_first_phase_name)
-            last_phase.next_submit.remove(first_task_name)
+            last_phase.set_attrib("next_submit", "newrun")
+            last_phase.remove_attrib("next_submit", first_phase["cluster"])
 
             # reset first_task attributes
-            first_phase.called_from = new_first_phase_name
+            first_phase.set_attrib("called_from", "newrun")
+            first_phase.set_attrib("run_after", "newrun")
 
             # reset workflow attributes
-            self.first_task_in_queue = new_first_phase_name
+            self.first_task_in_queue = "newrun"
 
             # Set new phase to beginning of default phase list
             self.phases.insert(0, new_first_phase)
@@ -616,58 +615,40 @@ def skip_cluster(cluster, config):
     return False
 
 
-class WorkflowPhase:
+class WorkflowPhase(dict):
     """A workflow phase class."""
 
     def __init__(self, phase_name):
-        self.name = None
-        self.nproc = 1                              # needed
-        self.run_before = None
-        self.run_after = None
-        self.submit_to_batch_system = False         # needed
-        self.run_on_queue = None
-        self.cluster = None
-        self.next_submit = []                       # needed
-        self.called_from = None                     # needed
-        self.batch_or_shell = "SimulationSetup"     # needed
-        self.order_in_cluster = "sequential"        # needed ???
-        self.run_only = None
-        self.skip_chunk_number = None
-        self.skip_run_number = None
-        self.name = phase_name
-
-
-class UserWorkflowPhase(WorkflowPhase):
-    """A user workflow phase class."""
-
-    def __init__(self, phase_name):
-
-        WorkflowPhase.__init__(self, phase_name)
-
-        self.script = None
-        self.script_dir = None
-        self.call_function = None
-        self.env_preparation = None
-        self.batch_or_shell = "shell"
-        self.submit_to_batch_system = False
-
-    def check_if_keyword_is_valid(self, keyword):
-        """
-        Checks if the key given for a user workflow is valie
-
-        Parameters
-        ----------
-            keyword : str
-
-        Returns
-        -------
-            true or false
-        """
+        self["nproc"] = 1                              # needed
+        self["run_before"] = None
+        self["run_after"] = None
+        self["submit_to_batch_system"] = False         # needed
+        self["run_on_queue"] = None
+        self["cluster"] = None
+        self["next_submit"] = []                       # needed
+        self["called_from"] = None                     # needed
+        self["batch_or_shell"] = "SimulationSetup"     # needed
+        self["order_in_cluster"] = "sequential"        # needed ???
+        self["run_only"] = None
+        self["skip_chunk_number"] = None
+        self["skip_run_number"] = None
+        self["name"] = phase_name
+        self["script"] = None
+        self["script_dir"] = None
+        self["call_function"] = None
+        self["env_preparation"] = None
+
+    def set_attrib(self, attrib, value):
+        if type(self[attrib]) == "list":
+            self[attrib].append(value)
+        else:
+            self[attrib] = value
 
-        if hasattr(self, keyword):
-            return True
+    def remove_attrib(self, attrib, value):
+        if type(self[attrib]) == "list":
+            self[attrib].remove(value)
         else:
-            return False
+            self[attrib] = None
 
 
 def assemble_workflow(config):
@@ -738,23 +719,12 @@ def assemble_workflow(config):
     return config
 
 
-def set_phase_attrib(workflow_phases, phase_name, attrib, value):
-    if not type(workflow_phases) is list:
-        workflow_phases = [workflow_phases]
-    for phase in workflow_phases:
-        if phase.name == phase_name:
-            if type(getattr(phase, attrib)).__name__ == "list":
-                phase.__dict__[attrib].append(value)
-            else:
-                phase.__setattr__(attrib, value)
-
-
 def get_phase_attrib(workflow_phases, phase_name, attrib):
     if not type(workflow_phases) is list:
         workflow_phases = [workflow_phases]
     for phase in workflow_phases:
-        if phase.name == phase_name:
-            value = getattr(phase, attrib)
+        if phase["name"] == phase_name:
+            value = phase[attrib]
             return value
 
 

From ba204e6a687f1a3af0359cbd82b27d26b3b82064 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 20 Nov 2023 16:30:29 +0100
Subject: [PATCH 31/98] Fixed some of the workflow tests.

---
 tests/test_esm_runscripts/test_workflow.py | 36 ++++++++++++----------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index d13871db6..e39aadac2 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -26,7 +26,7 @@ def test_config():
         'oifs': {
             'workflow': {
 #                'next_run_triggered_by': 'tidy',
-                'subjobs': {
+                'phases': {
                     'my_new_subjob_oifs': {
                         'batch_or_shell': 'batch',
                         'nproc': 1,
@@ -41,7 +41,7 @@ def test_config():
             'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'],
             'workflow': {
                 'next_run_triggered_by': 'tidy',
-                'subjobs': {
+                'phases': {
                     'my_new_subjob_general': {
                         'batch_or_shell': 'batch',
                         'order_in_cluster': 'concurrent',
@@ -54,7 +54,7 @@ def test_config():
         'flow': {
             'workflow': {
                 'next_run_triggered_by': 'tidy',
-                'subjobs': {
+                'phases': {
                     'my_new_subjob_flow': {
                         'batch_or_shell': 'batch',
                         'order_in_cluster': 'concurrent',
@@ -78,14 +78,14 @@ def test_check_user_workflow_dependency(test_workflow_object, test_config):
     assert independent
 
 def test_check_user_workflow_dependency_2(test_workflow_object, test_config):
-    test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs'
+    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs'
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
     independent = test_workflow_object.check_user_workflow_dependency()
     assert not independent
 
 def test_check_unknown_phases(test_workflow_object, test_config):
-    test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob'
+    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob'
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
     unknown_phases = test_workflow_object.check_unknown_phases()
@@ -100,27 +100,27 @@ def test_collect_all_user_workflow(test_config):
 def test_calc_number_of_tasks():
     pytest.fail("something wrong")
 
-def test_order_clusters(test_workflow_object, test_config):
-    test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general'
+def test_order_phases(test_workflow_object, test_config):
+    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general'
 #    test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow'
 #    test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
-    #test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs'
+    #test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs'
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    test_workflow_object = test_workflow_object.order_phases()
     pytest.fail("something wrong")
 
 def test_complete_clusters(test_workflow_object, test_config):
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    test_workflow_object = test_workflow_object.order_phases()
     subjob_clusters = test_workflow_object.complete_clusters(test_config)
     pytest.fail("something wrong")
 
 def test_prepend_newrun_job(test_workflow_object, test_config):
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    test_workflow_object = test_workflow_object.order_phases()
     subjob_clusters = test_workflow_object.complete_clusters(test_config)
     [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
     pytest.fail("something wrong")
@@ -128,7 +128,7 @@ def test_prepend_newrun_job(test_workflow_object, test_config):
 def test_write_to_config(test_workflow_object, test_config):
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_clusters(test_config)
+    test_workflow_object = test_workflow_object.order_phases()
     subjob_clusters = test_workflow_object.complete_clusters(test_config)
     [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
     config = test_workflow_object.write_to_config(test_config)
@@ -137,9 +137,13 @@ def test_write_to_config(test_workflow_object, test_config):
 def test_write_subjob_clusters_to_config(test_workflow_object, test_config):
     test_workflow_object = test_workflow_object.init_default_workflow(test_config)
     test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_clusters(test_config)
-    subjob_clusters = test_workflow_object.complete_clusters(test_config)
-    [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
-    test_config = workflow.write_subjob_clusters_to_config(test_config, subjob_clusters)
+    test_workflow_object = test_workflow_object.order_phases()
+    test_workflow_object = test_workflow_object.prepend_newrun_job(test_config)
     test_config = test_workflow_object.write_to_config(test_config)
+    test_workflow_object = test_workflow_object.complete_clusters(test_config)
+
+def test_prepend_newrun_job(test_workflow_object, test_config):
+    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+    test_workflow_object = test_workflow_object.prepend_newrun_job(test_config)
     pytest.fail("something wrong")

From 6174fa3a558cc9554acfc3138218bbe98abf8127 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 20 Nov 2023 16:59:12 +0100
Subject: [PATCH 32/98] Added review suggestions.

---
 src/esm_runscripts/workflow.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index e20d6ab49..4140f1d45 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -64,9 +64,9 @@ def get_phases_attribs_list(self, phase_type, attrib):
 
         Parameters
         ----------
-            self: Workflow object
-            phase_type: str (default or user)
-            attrib: str
+            phase_type : str
+                ``default`` or ``user``
+            attrib : str
 
         Returns
         -------
@@ -86,7 +86,6 @@ def init_default_workflow(self, config):
 
         Parameters
         ----------
-            self : Workflow object
             config : dict
 
         Returns
@@ -96,8 +95,7 @@ def init_default_workflow(self, config):
 
         workflow_phases = self.phases
 
-        # Calculating the number of tasks for each component/model
-        # needed for phase compute
+        # Calculating the number of mpi tasks for each component/model/script
         tasks = calc_number_of_tasks(config)
         # Initiate/create default workflow phase objects
         # and reset/append to Workflow.phases variable

From 93b49b5d771eab6c9f4b998a5fdb8c2f28060f09 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 22 Nov 2023 12:04:23 +0100
Subject: [PATCH 33/98] Changed initialization of workflow and phases.

---
 src/esm_runscripts/workflow.py | 136 ++++++++++++++-------------------
 1 file changed, 57 insertions(+), 79 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 4140f1d45..d19725b1c 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -10,29 +10,27 @@
 class Workflow:
     """A workflow class."""
 
-    def __init__(self, phases, always_run_with=[], phases_to_submit_to_batch_system=[]):
+    def __init__(self, workflow_yaml):
         """
         Create a new workflow.
 
         Parameters
         ----------
-        phases : list
-            List of workflow phases names
-        always_run_with : list
-            List of phases that precedes each phase in phases
+        workflow_yaml : dict
+            Dictionary from defaults.yaml to initialize workflow
+            for default phases.
 
         Returns
         -------
         none
         """
+        # TODO: check if key is in workflow_yaml dict
+        self.phases = []
         self.user_phases = []
-        self.first_task_in_queue = None                     # needed
-        self.last_task_in_queue = None
-        self.next_run_triggered_by = None
+        self.first_task_in_queue = workflow_yaml["first_task_in_queue"]
+        self.last_task_in_queue = workflow_yaml["last_task_in_queue"]
+        self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"]
         # TODO: Call here the phase object ???
-        self.phases = phases
-        self.always_run_with = always_run_with
-        self.phases_to_submit_to_batch_system = phases_to_submit_to_batch_system
 
     @property
     def num_phases(self):
@@ -79,10 +77,10 @@ def get_phases_attribs_list(self, phase_type, attrib):
 
         return phases_attribs
 
-    def init_default_workflow(self, config):
+    def config_sbatch_phases(self, config):
         """
-        Add workflow for precompute, compute, and tidy phases
-        etc information already here!
+        Calculating the number of mpi tasks for each component/model/script
+        and set queue for default phases that run as batch jobs
 
         Parameters
         ----------
@@ -93,41 +91,16 @@ def init_default_workflow(self, config):
             self : Workflow object
         """
 
-        workflow_phases = self.phases
+        #workflow_phases = self["phases"]
 
-        # Calculating the number of mpi tasks for each component/model/script
         tasks = calc_number_of_tasks(config)
-        # Initiate/create default workflow phase objects
-        # and reset/append to Workflow.phases variable
-        self.phases = []
-        for ind, phase in enumerate(workflow_phases):
-            self.phases.append(WorkflowPhase(phase))
 
         for ind, phase in enumerate(self.phases):
-            if ind < self.num_phases - 1:
-                # Set run_before attrib of all phases (except last on)
-                # to the next phase name
-                phase["run_before"] = self.phases[ind+1]["name"]
-                phase["next_submit"].append(self.phases[ind+1]["name"])
-                phase["run_after"] = self.phases[ind-1]["name"]
-            else:
-                # Set run_after attrib of last phase to previous phase name
-                phase["run_before"] = self.phases[0]["name"]
-                phase["next_submit"].append(self.phases[0]["name"])
-                phase["run_after"] = self.phases[ind-1]["name"]
-
-            phase["cluster"] = phase["name"]
-            if phase["name"] in self.phases_to_submit_to_batch_system:
+            if phase["submit_to_batch_system"]:
                 phase["batch_or_shell"] = 'batch'
-                phase["submit_to_batch_system"] = True
                 phase["run_on_queue"] = config["computer"]["partitions"]["compute"]["name"]
                 phase["nproc"] = tasks
 
-        # Set default workflow values
-        self.set_workflow_attrib("first_task_in_queue", self.phases[0]["name"])
-        self.set_workflow_attrib("last_task_in_queue", self.phases[-1]["name"])
-        self.set_workflow_attrib("next_run_triggered_by", self.phases[-1]["name"])
-
         return self
 
     def set_workflow_attrib(self, attrib, value):
@@ -164,10 +137,7 @@ def check_if_keyword_is_valid(self, keyword):
             true or false
         """
 
-        if hasattr(self, keyword):
-            return True
-        else:
-            return False
+        return hasattr(self, keyword)
 
     def collect_all_user_workflows(self, config):
         """
@@ -202,12 +172,11 @@ def collect_all_user_workflows(self, config):
                         else:
                             err_msg = f"``{key}`` is not a valid keyword of a workflow."
                             esm_parser.user_error("ERROR", err_msg)
-                    # for subjob in list(copy.deepcopy(w_config["subjobs"])):
-                    for phase in list(copy.deepcopy(w_config["phases"])):
-#                        new_phase_name = phase
-                        # create a new user phase object for ``phase``
-                        new_phase = WorkflowPhase(phase)
-                        # each subjob needs to have an unique name
+                    for phase in w_config["phases"]:
+                        # each phase (of a model/setup) needs to have an unique name
+                        # same phases of the same model/setup defined in different config files
+                        # are overwritten by the usual config file hierarchy
+                        # user phases are not alowed to have the same name asdefault phases (e.g. compute)
                         # 1. check if ``new_phase`` is already defined as a default phase
                         if phase in self.get_phases_attribs_list("default", "name"):
                             err_msg = (
@@ -216,7 +185,8 @@ def collect_all_user_workflows(self, config):
                                 f"This is not allowed."
                             )
                             esm_parser.user_error("ERROR", err_msg)
-                        # 2. check if the name of the new user phase does not already exist
+                        # 2. check if the name of the new user phase (for a model/setup) does not already exist
+                        #    (for another model/setup).
                         if phase in user_workflow_phases_names:
                             err_msg = (
                                 f"Two workflow phases have the same name "
@@ -225,30 +195,22 @@ def collect_all_user_workflows(self, config):
                             esm_parser.user_error("ERROR", err_msg)
                         # 3. if user phase has a new and unique name
                         else:
-                            # append it to the list of user phases of the workflow
-                            user_workflow_phases_names.append(phase)
-                            # set attributes of user_workflow phases from
-                            # config settings
-                            # check if valid phase keywords
-                            for key, value in w_config["phases"][phase].items():
-                                if key in new_phase:
-                                    new_phase[key] = value
-                                else:
-                                    err_msg = (
-                                        f"``{key}`` of workflow phase "
-                                        f"``{new_phase_name}`` is not a valid keyword "
-                                        f"of a workflow phase."
-                                    )
-                                    esm_parser.user_error("ERROR", err_msg)
+                            phase_config = copy.deepcopy(w_config["phases"][phase])
+                            # add phase name
+                            phase_config["name"] = phase
                             # Make sure that batch_or_shell is set to batch if submit_to_batch is true
                             # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed
                             # for setting it to SimulationSetup and in other functions (resubmit, etc.)
                             # Should not be set by user. TODO: Remove from documentation.
-                            if new_phase["submit_to_batch_system"]:
-                                new_phase["batch_or_shell"] = "batch"
+                            if phase_config["submit_to_batch_system"]:
+                                phase_config["batch_or_shell"] = "batch"
                             else:
-                                new_phase["batch_or_shell"] = "shell"
+                                phase_config["batch_or_shell"] = "shell"
+                            # create a new user phase object for ``phase``
+                            new_phase = WorkflowPhase(phase_config)
+                            # append it to the list of user phases of the workflow
                             user_workflow_phases.append(new_phase)
+                            user_workflow_phases_names.append(phase)
 
         self.user_phases = user_workflow_phases
         return self
@@ -356,14 +318,14 @@ def order_phases(self):
 
         Parameters
         ----------
-            config : dict
 
         Returns
         -------
             self : Workflow object
         """
         # check if user phases are independent from each other
-        # TODO: What if not independent?
+        # TODO: What if not independent???
+        # do not run in parallel in same cluster???
         independent = self.check_user_workflow_dependency()
         # check if there are unknown phases, if yes, will give error exception
         unknown_phases = self.check_unknown_phases()
@@ -616,7 +578,8 @@ def skip_cluster(cluster, config):
 class WorkflowPhase(dict):
     """A workflow phase class."""
 
-    def __init__(self, phase_name):
+    def __init__(self, phase):
+        # default
         self["nproc"] = 1                              # needed
         self["run_before"] = None
         self["run_after"] = None
@@ -630,12 +593,24 @@ def __init__(self, phase_name):
         self["run_only"] = None
         self["skip_chunk_number"] = None
         self["skip_run_number"] = None
-        self["name"] = phase_name
+        self["name"] = None
         self["script"] = None
         self["script_dir"] = None
         self["call_function"] = None
         self["env_preparation"] = None
 
+        # check if phase keywords are valid
+        for key, value in phase.items():
+            if key not in self:
+                err_msg = (
+                    f"``{key}`` of workflow phase "
+                    f"``{new_phase_name}`` is not a valid keyword "
+                    f"of a workflow phase."
+                )
+                esm_parser.user_error("ERROR", err_msg)
+
+        super().__init__(phase)
+
     def set_attrib(self, attrib, value):
         if type(self[attrib]) == "list":
             self[attrib].append(value)
@@ -670,12 +645,14 @@ def assemble_workflow(config):
     # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml
     if "defaults.yaml" in config["general"]:
         if "workflow" in config["general"]["defaults.yaml"]:
+            workflow = config["general"]["defaults.yaml"]["workflow"]
             phases = config["general"]["defaults.yaml"]["workflow"].get("phases", [])
-            always_run_with = config["general"]["defaults.yaml"]["workflow"].get("always_run_with", [])
-            phases_to_submit_to_batch_system = config["general"]["defaults.yaml"]["workflow"].get("phases_to_submit_to_batch_system", [])
 
+    # 2. Initialize default workflow phases
     if phases:
-        workflow = Workflow(phases, always_run_with=always_run_with, phases_to_submit_to_batch_system=phases_to_submit_to_batch_system)
+        workflow = Workflow(workflow)
+        for phase in phases:
+            workflow.phases.append(WorkflowPhase(phases[phase]))
     else:
         esm_parser.user_error("ERROR", "No default workflow phases defined.")
         # Note: Should this work also if no default phases are set in such a config
@@ -684,8 +661,9 @@ def assemble_workflow(config):
         # Where could a user define a different (default) phase list?
         # Or should this be changed in defaults.yaml as it is now?
 
-    # 2. Initialize default workflow phases
-    workflow = workflow.init_default_workflow(config)
+    # 3. Calc mpi tasks and set queue for batch jobs for default phases
+    # TODO: Put it into other method?
+    workflow = workflow.config_sbatch_phases(config)
 
     # 3. Read in workflows from runscript and config files
     workflow = workflow.collect_all_user_workflows(config)

From afab16fc1fdc8c614323c093110cd8e227513e6b Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 22 Nov 2023 12:06:48 +0100
Subject: [PATCH 34/98] Changed initial config of default workflow phases.

---
 .../esm_software/esm_runscripts/defaults.yaml | 74 +++++++++++++++++--
 1 file changed, 66 insertions(+), 8 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 812e32ad0..31ffa1394 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -7,13 +7,71 @@ per_model_defaults:
             exp_to_run:     copy
             run_to_work:    copy
             work_to_run:    copy
+
 workflow:
+    user_phases: None
+    first_task_in_queue: prepcompute
+    last_task_in_queue: tidy
+    next_run_triggered_by: tidy
+
     phases:
-        - prepcompute
-        - compute
-        - tidy
-    always_run_with:
-        - prepare
-        - prepexp
-    phases_to_submit_to_batch_system:
-        - compute
+        prepcompute:
+            batch_or_shell: SimulationSetup
+            call_function: None
+            called_from: tidy
+            cluster: prepcompute
+            env_preparation: None
+            name: prepcompute
+            next_submit:
+                - compute
+            nproc: 1
+            order_in_cluster: sequential
+            run_after: tidy
+            run_before: compute
+            run_on_queue: None
+            run_only: None
+            script: None
+            script_dir: None
+            skip_chunk_number: None
+            skip_run_number: None
+            submit_to_batch_system: False
+        compute:
+            batch_or_shell: batch
+            call_function: None
+            called_from: prepcompute
+            cluster: compute
+            env_preparation: None
+            name: compute
+            next_submit:
+                - tidy
+            nproc: None
+            order_in_cluster: sequential
+            run_after: prepcompute
+            run_before: tidy
+            run_on_queue: None
+            run_only: None
+            script: None
+            script_dir: None
+            skip_chunk_number: None
+            skip_run_number: None
+            submit_to_batch_system: True
+        tidy:
+            batch_or_shell: SimulationSetup
+            call_function: None
+            called_from: compute
+            cluster: tidy
+            env_preparation: None
+            name: tidy
+            next_submit:
+                - prepcompute
+            nproc: 1
+            order_in_cluster: sequential
+            run_after: compute
+            run_before: prepcompute
+            run_on_queue: None
+            run_only: None
+            script: None
+            script_dir: None
+            skip_chunk_number: None
+            skip_run_number: None
+            submit_to_batch_system: False

From ad0e620becbfca33c94808f73dfe0fddbf261948 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 22 Nov 2023 12:40:35 +0100
Subject: [PATCH 35/98] Bugfix in error message.

---
 src/esm_runscripts/workflow.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index d19725b1c..411bb1ebf 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -180,7 +180,7 @@ def collect_all_user_workflows(self, config):
                         # 1. check if ``new_phase`` is already defined as a default phase
                         if phase in self.get_phases_attribs_list("default", "name"):
                             err_msg = (
-                                f"The user phase ``{new_phase_name}`` "
+                                f"The user phase ``{phase}`` "
                                 f"has the same name as a default workflow phase. "
                                 f"This is not allowed."
                             )
@@ -190,7 +190,7 @@ def collect_all_user_workflows(self, config):
                         if phase in user_workflow_phases_names:
                             err_msg = (
                                 f"Two workflow phases have the same name "
-                                f"{new_phase_name}."
+                                f"``{phase}``."
                             )
                             esm_parser.user_error("ERROR", err_msg)
                         # 3. if user phase has a new and unique name
@@ -202,7 +202,7 @@ def collect_all_user_workflows(self, config):
                             # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed
                             # for setting it to SimulationSetup and in other functions (resubmit, etc.)
                             # Should not be set by user. TODO: Remove from documentation.
-                            if phase_config["submit_to_batch_system"]:
+                            if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
                             else:
                                 phase_config["batch_or_shell"] = "shell"
@@ -327,6 +327,7 @@ def order_phases(self):
         # TODO: What if not independent???
         # do not run in parallel in same cluster???
         independent = self.check_user_workflow_dependency()
+
         # check if there are unknown phases, if yes, will give error exception
         unknown_phases = self.check_unknown_phases()
         if unknown_phases:

From 91f1af6bbace2823293fc0deb9503373a0e5c28f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 22 Nov 2023 16:00:28 +0100
Subject: [PATCH 36/98] Make next_run_triggered_by a keyword of phase
 (trigger_next_run) not workflow.

---
 src/esm_runscripts/workflow.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 411bb1ebf..342e8b5b7 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -91,8 +91,6 @@ def config_sbatch_phases(self, config):
             self : Workflow object
         """
 
-        #workflow_phases = self["phases"]
-
         tasks = calc_number_of_tasks(config)
 
         for ind, phase in enumerate(self.phases):
@@ -155,6 +153,7 @@ def collect_all_user_workflows(self, config):
 
         user_workflow_phases = []
         user_workflow_phases_names = []
+        user_workflow_next_run_triggered_by = []
         for model in config:
             if "workflow" in config[model]:
                 w_config = config[model]["workflow"]
@@ -211,6 +210,17 @@ def collect_all_user_workflows(self, config):
                             # append it to the list of user phases of the workflow
                             user_workflow_phases.append(new_phase)
                             user_workflow_phases_names.append(phase)
+                            if phase_config.get("trigger_next_run", False):
+                                user_workflow_next_run_triggered_by.append(phase)
+        if len(user_workflow_next_run_triggered_by) > 1:
+            err_msg = (
+                f"More than one phase is set to "
+                f"trigger the next run: ``{user_workflow_next_run_triggered_by}``. "
+                f"Only set ``trigger_next_run: True`` for one phase."
+            )
+            esm_parser.user_error("ERROR", err_msg)
+        else:
+            self.next_run_triggered_by = user_workflow_next_run_triggered_by[0]
 
         self.user_phases = user_workflow_phases
         return self
@@ -362,7 +372,7 @@ def order_phases(self):
 #                esm_parser.user_error("ERROR", err_msg)
 
         # Correct for ``last_task_in_queue`` if necessary
-        # Collect all next_run_triggered_by entries
+        # Collect all next_run_triggered_by entries???
         next_triggered = self.next_run_triggered_by
         # check if next_triggered is default or user phase
         # if user phase
@@ -580,10 +590,14 @@ class WorkflowPhase(dict):
     """A workflow phase class."""
 
     def __init__(self, phase):
-        # default
+        # defaults
+        self["name"] = None
+        self["script"] = None
+        self["script_dir"] = None
         self["nproc"] = 1                              # needed
         self["run_before"] = None
         self["run_after"] = None
+        self["trigger_next_run"] = False               # needed
         self["submit_to_batch_system"] = False         # needed
         self["run_on_queue"] = None
         self["cluster"] = None
@@ -594,9 +608,6 @@ def __init__(self, phase):
         self["run_only"] = None
         self["skip_chunk_number"] = None
         self["skip_run_number"] = None
-        self["name"] = None
-        self["script"] = None
-        self["script_dir"] = None
         self["call_function"] = None
         self["env_preparation"] = None
 

From aed23769f12252e68bedbb65c28bc1893d7a9531 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 22 Nov 2023 16:08:50 +0100
Subject: [PATCH 37/98] Bugfix if no phase trigger_next_run.

---
 src/esm_runscripts/workflow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 342e8b5b7..d5b022681 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -219,7 +219,7 @@ def collect_all_user_workflows(self, config):
                 f"Only set ``trigger_next_run: True`` for one phase."
             )
             esm_parser.user_error("ERROR", err_msg)
-        else:
+        elif user_workflow_next_run_triggered_by:
             self.next_run_triggered_by = user_workflow_next_run_triggered_by[0]
 
         self.user_phases = user_workflow_phases

From 40c9190d463ca0323dbc4fbd051a244c817908b2 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 23 Nov 2023 16:51:26 +0100
Subject: [PATCH 38/98] Resolved function complete_cluster into other
 functions, fix next_submit for clusters.

---
 src/esm_runscripts/workflow.py | 131 +++++++++++++++------------------
 1 file changed, 58 insertions(+), 73 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index d5b022681..e6336fe98 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -203,6 +203,10 @@ def collect_all_user_workflows(self, config):
                             # Should not be set by user. TODO: Remove from documentation.
                             if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
+                                if not phase_config.get("run_on_queue", False):
+                                    breakpoint()
+                                    err_msg = f"No value for target queue given by ``run_on_queue' for phase {phase}."
+                                    esm_parser.user_error("ERROR", err_msg)
                             else:
                                 phase_config["batch_or_shell"] = "shell"
                             # create a new user phase object for ``phase``
@@ -249,6 +253,30 @@ def write_to_config(self, config):
                     for att in phase:
                         config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att]
                     config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster
+                    # if a phase in a cluster triggers the next run, set next_submit in cluster conf
+        for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
+            for phase in self.phases + self.user_phases:
+                if phase["cluster"] == cluster:
+                    if phase["name"] in self.next_run_triggered_by:
+                        if self.first_task_in_queue not in config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"]:
+                            config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"].append(self.first_task_in_queue)
+
+        for subjob_cluster in config["general"]["workflow"]["subjob_clusters"]:
+            nproc_sum = nproc_max = 0
+            for subjob in config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["subjobs"]:
+                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
+                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
+            if config["general"]["workflow"]["subjob_clusters"][subjob_cluster].get("submit_to_batch_system", False):
+# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch?
+#    #        elif subjob_clusters[subjob_cluster].get("script", False):
+#    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
+#    #
+                if config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["order_in_cluster"] == "concurrent":
+                    nproc = nproc_sum
+                else:
+                    nproc = nproc_max
+                config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["nproc"] = nproc
+
         # 2. Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
         for phase in self.phases + self.user_phases:
@@ -322,9 +350,9 @@ def check_unknown_phases(self):
 
         return unknown_phases
 
-    def order_phases(self):
+    def order_phases_and_clusters(self):
         """
-        Put the phases in order.
+        Put the phases and clusters in order.
 
         Parameters
         ----------
@@ -333,6 +361,7 @@ def order_phases(self):
         -------
             self : Workflow object
         """
+
         # check if user phases are independent from each other
         # TODO: What if not independent???
         # do not run in parallel in same cluster???
@@ -371,9 +400,11 @@ def order_phases(self):
 #                )
 #                esm_parser.user_error("ERROR", err_msg)
 
-        # Correct for ``last_task_in_queue`` if necessary
-        # Collect all next_run_triggered_by entries???
+        # Correct workflow attributes (``last_task_in_queue``, `next_run_triggered``)
+        # if necessary
+
         next_triggered = self.next_run_triggered_by
+
         # check if next_triggered is default or user phase
         # if user phase
         # get last default phase and correct next_submit
@@ -390,9 +421,9 @@ def order_phases(self):
         # "next_submit" which phase/cluster will be called next (run_after of the next phase)
         # "called_from" name of previous phase, run_after of current phase
         # Create a dict of all phases with empty lists
-        next_submits = {}
-        for phase in self.phases + self.user_phases:
-            next_submits[phase["name"]] = []
+
+        # Create a cluster dict:
+        clusters = {}
 
         for phase4 in self.phases + self.user_phases:
             # if a cluster is not set for a phase set it to the phase name,
@@ -400,16 +431,27 @@ def order_phases(self):
             # default cluster has the same name as the phase itself
             if phase4["cluster"] is None:
                 phase4["cluster"] = phase4["name"]
+            clusters[phase4["cluster"]] = {"name": phase4["cluster"]}
+
+
+        next_submits = {}
+        for phase in self.phases + self.user_phases:
+            next_submits[phase["name"]] = []
+            next_submits[phase["cluster"]] = []
 
         # set next_submits to the cluster name rather then to the phase name
         for phase2 in self.phases + self.user_phases:
             if phase2["run_after"] is not None:
                 if phase2["cluster"] not in next_submits[phase2["run_after"]]:
-                    next_submits[phase2["run_after"]].append(phase2["cluster"])
+                    if phase2["cluster"] not in next_submits[phase2["run_after"]]:
+                        next_submits[phase2["run_after"]].append(phase2["cluster"])
+                    if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]:
+                        next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"])
                 phase2["called_from"] = phase2["run_after"]
 
         for phase3 in self.phases + self.user_phases:
-            phase3["next_submit"] = next_submits[phase3["name"]]
+            phase3.set_attrib("next_submit", next_submits[phase3["name"]])
+#            phase3["next_submit"] = next_submits[phase3["name"]]
 
         first_cluster_name = self.first_task_in_queue
         first_phase = self.get_workflow_phase_by_name(first_cluster_name)
@@ -427,63 +469,8 @@ def order_phases(self):
 
         return self
 
-    def complete_clusters(self, config):
-        # all that are within a next_submit list are in a cluster if:
-        # run concurrently
-        # have the same cluster entry.
-        """
-        Rearanges the subjobs to their subjobs_clusters ???
-
-        TODO: Can this be put into other functions/methods?
 
-        Parameters
-        ----------
-            self : Workflow object
-            config : dict
-
-        Returns
-        -------
-            config : dict
-        """
-        subjob_clusters = config["general"]["workflow"]["subjob_clusters"]
-
-        # Then, complete the resource information per cluster
-        # determine whether a cluster is to be submitted to a batch system
-        for subjob_cluster in subjob_clusters:
-            nproc_sum = nproc_max = 0
-            # Check if the following attributes are set for each cluster???
-#            attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"]
-#            for attrib in attributes:
-#                temp_list = []
-            for subjob in subjob_clusters[subjob_cluster]["subjobs"]:
-                # Check if the following attributes are set for each cluster???
-#                    if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list:
-#                        subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib)
-#                    else:
-#                        print("Missmatch in attributes")
-#                        sys.exit(-1)
-                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
-                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
-#
-            if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False):
-#    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch"
-
-# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch?
-#    #        elif subjob_clusters[subjob_cluster].get("script", False):
-#    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
-#    #
-                if "run_on_queue" not in subjob_clusters[subjob_cluster]:
-                    err_msg = f"No value for target queue given by ``run_on_queue' for cluster {subjob_cluster}."
-                    esm_parser.user_error("ERROR", err_msg)
-
-                if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent":
-                    nproc = nproc_sum
-                else:
-                    nproc = nproc_max
-                subjob_clusters[subjob_cluster]["nproc"] = nproc
-        return config
-
-    def prepend_newrun_job(self, config):
+    def prepend_newrun_job(self):
         """
         - Creates a new cluster "newrun" if first_task_in_queue is not of
           type 'SimulationSetup'
@@ -496,7 +483,6 @@ def prepend_newrun_job(self, config):
         Parameters
         ----------
             self : Workflow object
-            config : dict
 
         Returns
         -------
@@ -681,19 +667,16 @@ def assemble_workflow(config):
     workflow = workflow.collect_all_user_workflows(config)
 
     # 4. Order user workflows into default workflow wrt. workflow and phase attributs.
-    workflow = workflow.order_phases()
+    workflow = workflow.order_phases_and_clusters()
 
     # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
     #    user phase (type batch or shell)
-    workflow = workflow.prepend_newrun_job(config)
+    workflow = workflow.prepend_newrun_job()
 
     # 6. write the workflow to config
     # 7. Remove old worklow from config
     config = workflow.write_to_config(config)
 
-    # 8. complete some information in a cluster
-    #    e.g. if phases in cluster are submit to sbatch system
-    config = workflow.complete_clusters(config)
 
     # Set "jobtype" for the first task???
     if config["general"]["jobtype"] == "unknown":
@@ -764,8 +747,10 @@ def display_workflow(config):
     while first_phase not in second_phase and second_phase:
         sec_phase_str = ""
         for sec_phase in second_phase:
-            if config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]:
-                second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]
+            if config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"]:
+                second_phase = config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"]
+                subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"]
+            else:
                 subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"]
             if sec_phase_str == "":
                 sec_phase_str = f"{sec_phase_str} ``{sec_phase}`` {subjobs}"

From 1a8d12120f11656d35248123aec060c8cdb9da0c Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 23 Nov 2023 17:25:47 +0100
Subject: [PATCH 39/98] Removed obsolete functions.

---
 src/esm_runscripts/workflow.py | 146 +--------------------------------
 1 file changed, 2 insertions(+), 144 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index e6336fe98..4ce247c81 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -722,7 +722,7 @@ def calc_number_of_tasks(config):
 
 def display_workflow(config):
     """
-    Displays current workflow settings.
+    Displays workflow sequence.
 
     Parameters
     ----------
@@ -730,7 +730,7 @@ def display_workflow(config):
 
     Returns
     -------
-        config : dict (needed???)
+        config : dict
     """
 
     display_nicely(config)
@@ -789,145 +789,3 @@ def display_nicely(config):
     """
     esm_parser.pprint_config(config["general"]["workflow"])
     return config
-
-# ################## Maybe outdated routines ######################
-#
-#
-# def collect_all_workflow_information(config):
-#    """
-#    Collects all workflow information for each component entry in config
-#    (can be a model/component or a new entry (e.g. 'flows')
-#    NOTE: Should it be possible to set a workflow in the model section of the
-#          runscript? Why not?
-#
-#    Checks if there are "workflow" entries in the user runscript and copies or
-#    merges them into
-#    config["general"]["workflow"]
-#
-#    Parameters
-#    ----------
-#        config : dict
-#
-#    Returns
-#    -------
-#        config : dict
-#    """
-#    for model in config:
-#        if "workflow" in config[model]:
-#            # looks for "workflow" in each entry of config (can be model/component, general, etc.)
-#            w_config = config[model]["workflow"]
-#            # looks for "workflow" in "general" section of config.
-#            gw_config = config["general"]["workflow"]
-#
-#            # looks for entry 'subjob_clusters' in config of each component that has a "workflow"
-#            if "subjob_clusters" in w_config:
-#                for cluster in w_config["subjob_clusters"]:
-#                    # if a certain cluster is also in the general config, this cluster will be merged together ...
-#                    # what cluster could this be?
-#                    if cluster in gw_config["subjob_clusters"]:
-#                        gw_config["subjob_clusters"][cluster] = merge_if_possible(
-#                            w_config["subjob_clusters"][cluster],
-#                            gw_config["subjob_clusters"][cluster],
-#                        )
-#                    # if cluster is not in general config, it will copied into it.
-#                    else:
-#                        gw_config["subjob_clusters"][cluster] = copy.deepcopy(
-#                            w_config["subjob_clusters"][cluster],
-#                        )
-#
-#            # looks for entry 'subjobs' in config of each component
-#            if "subjobs" in w_config:
-#                # copies component workflow config to new variable ref_config
-#                ref_config = copy.deepcopy(w_config)
-#                # ??? for every subjob in ???
-#                for subjob in list(copy.deepcopy(w_config["subjobs"])):
-#
-#                    # subjobs (other than clusters) should be model specific
-#                    # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry.
-#                    # appends the model name to the subjob name and copy it to config["general"]
-#                    gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy(
-#                        w_config["subjobs"][subjob]
-#                    )
-#                    # if this copied subjobs is also n general workflow subjobs it will be deleted there
-#                    if subjob in gw_config["subjobs"]:
-#                        del gw_config["subjobs"][subjob]
-#
-#                    # make sure that the run_after and run_before refer to that cluster
-#                    # for all subjobs now in general workflow
-#                    for other_subjob in gw_config["subjobs"]:
-#                        # sets run_after and run_before to correct subjob???
-#                        # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model)
-#                        # this run_after will be set to the new subjob name (subjob_model)
-#                        if "run_after" in gw_config["subjobs"][other_subjob]:
-#                            if (gw_config["subjobs"][other_subjob]["run_after"] == subjob):
-#                                gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model
-#                        if "run_before" in gw_config["subjobs"][other_subjob]:
-#                            if (gw_config["subjobs"][other_subjob]["run_before"] == subjob):
-#                                gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model
-#
-#                    # if not in another cluster, each subjob gets its own
-#                    if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]):
-#                        gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob  # + "_" + model
-#
-#            # checks if next_run:triggered_by is tidy or the one in user workflow, or empty?
-#            if "next_run_triggered_by" in w_config:
-#                if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]:
-#                    print("Mismatch found setting next_run_triggered_by for workflow.")
-#                    sys.exit(-1)
-#                else:
-#                    gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"]
-#                    # what if w_config["next_run_triggered_by"] is empty?
-#
-#    return config
-#
-# def merge_single_entry_if_possible(entry, sourceconf, targetconf):
-#    """
-#    Merges a dictionary entry into a target dictionary that has he same key.
-#
-#    Parameters
-#    ----------
-#        entry : str
-#            dictionary key
-#        sourceconf : dict
-#        targetconf : dict
-#
-#    Returns
-#    -------
-#        targetconf : dict
-#    """
-#    if entry in sourceconf:
-#        # Check if entry is already in targetconf AND different to sourceconf, then exit
-#        if entry in targetconf and not sourceconf[entry] == targetconf[entry]:
-#            print(f"Mismatch found in {entry} for cluster {targetconf}")
-#            sys.exit(-1)
-#        # Continues here if entry exists already in targetconf AND the same as sourceconf or
-#        # not already in targetconf and set it to sourceconf
-#        targetconf[entry] = sourceconf[entry]
-#    return targetconf
-#
-# def merge_if_possible(source, target):
-#    """
-#    Does the same as above but for a whole dict
-#
-#    Merges the entries of source dictionary into target dictionary, if not already in.
-#    (Will not overwrite entries in target dictionary.)
-#
-#    Parameters
-#    ----------
-#        source : dict
-#        target : dict
-#
-#    Returns
-#    -------
-#        target : dict
-#    """
-#    for entry in source:
-#        if entry in target:
-#            if not source[entry] == target[entry]:
-#                print(
-#                    f"Mismatch while trying to merge subjob_clusters {source} into {target}"
-#                )
-#                sys.exit(-1)
-#        else:
-#            target[entry] = source[entry]
-#    return target

From 916a3459128b047c8fc62c0c9ddd22a6802dc38f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 24 Nov 2023 11:57:44 +0100
Subject: [PATCH 40/98] (Re)moved redundant code, renamed function for
 collecting phases.

---
 src/esm_runscripts/workflow.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 4ce247c81..c72298e93 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -77,10 +77,9 @@ def get_phases_attribs_list(self, phase_type, attrib):
 
         return phases_attribs
 
-    def config_sbatch_phases(self, config):
+    def set_default_nproc(self, config):
         """
         Calculating the number of mpi tasks for each component/model/script
-        and set queue for default phases that run as batch jobs
 
         Parameters
         ----------
@@ -95,8 +94,6 @@ def config_sbatch_phases(self, config):
 
         for ind, phase in enumerate(self.phases):
             if phase["submit_to_batch_system"]:
-                phase["batch_or_shell"] = 'batch'
-                phase["run_on_queue"] = config["computer"]["partitions"]["compute"]["name"]
                 phase["nproc"] = tasks
 
         return self
@@ -137,7 +134,7 @@ def check_if_keyword_is_valid(self, keyword):
 
         return hasattr(self, keyword)
 
-    def collect_all_user_workflows(self, config):
+    def collect_all_user_phases(self, config):
         """
         Collect all workflows defined in config files.
 
@@ -609,6 +606,9 @@ def __init__(self, phase):
 
         super().__init__(phase)
 
+        if self.get("submit_to_batch_system", False):
+            self["batch_or_shell"] = "batch"
+
     def set_attrib(self, attrib, value):
         if type(self[attrib]) == "list":
             self[attrib].append(value)
@@ -659,12 +659,12 @@ def assemble_workflow(config):
         # Where could a user define a different (default) phase list?
         # Or should this be changed in defaults.yaml as it is now?
 
-    # 3. Calc mpi tasks and set queue for batch jobs for default phases
+    # 3. Calc mpi tasks for batch jobs of default phases
     # TODO: Put it into other method?
-    workflow = workflow.config_sbatch_phases(config)
+    workflow = workflow.set_default_nproc(config)
 
-    # 3. Read in workflows from runscript and config files
-    workflow = workflow.collect_all_user_workflows(config)
+    # 3. Read in phases from runscript and config files
+    workflow = workflow.collect_all_user_phases(config)
 
     # 4. Order user workflows into default workflow wrt. workflow and phase attributs.
     workflow = workflow.order_phases_and_clusters()

From c264f1ac7a1d67e67ed10d9b44aacf2713f4d1fb Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 24 Nov 2023 12:18:37 +0100
Subject: [PATCH 41/98] Removed the possibility to set workflow keywords by
 user.

---
 src/esm_runscripts/workflow.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index c72298e93..a5ff8c513 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -156,17 +156,9 @@ def collect_all_user_phases(self, config):
                 w_config = config[model]["workflow"]
                 # if "subjobs" in w_config:
                 if "phases" in w_config:
-                    # Set attributes of workflow
-                    # This will be overwritten by all user defined workflows???
-                    # Collect them in a list???
-                    # check if valid workflow keywords
                     for key, value in w_config.items():
-                        if self.check_if_keyword_is_valid(key):
-                            # set here only workflow attributes
-                            if not key == "phases":
-                                self.set_workflow_attrib(key, value)
-                        else:
-                            err_msg = f"``{key}`` is not a valid keyword of a workflow."
+                        if not key == "phases":
+                            err_msg = f"``{key}`` is not allowed to be set for a workflow."
                             esm_parser.user_error("ERROR", err_msg)
                     for phase in w_config["phases"]:
                         # each phase (of a model/setup) needs to have an unique name

From cf9d0e344f00b4369ee43389cf29012ec8f11416 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 27 Nov 2023 17:26:24 +0100
Subject: [PATCH 42/98] Worked on workflow tests, and other minor changes to
 workflow.

---
 src/esm_runscripts/workflow.py             |  26 +-
 tests/test_esm_runscripts/test_workflow.py | 331 ++++++++++++++++++---
 2 files changed, 301 insertions(+), 56 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index a5ff8c513..d924d4c75 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -2,8 +2,6 @@
 import copy
 import esm_parser
 
-# from pprint import pprint
-
 import pdb
 
 
@@ -193,8 +191,7 @@ def collect_all_user_phases(self, config):
                             if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
                                 if not phase_config.get("run_on_queue", False):
-                                    breakpoint()
-                                    err_msg = f"No value for target queue given by ``run_on_queue' for phase {phase}."
+                                    err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase}``."
                                     esm_parser.user_error("ERROR", err_msg)
                             else:
                                 phase_config["batch_or_shell"] = "shell"
@@ -237,7 +234,6 @@ def write_to_config(self, config):
             for phase in self.phases + self.user_phases:
                 if phase["cluster"] == cluster:
                     # TODO: Are there more attributes to be merged from the different phases within a cluster???
-                    # nproc is calculated in complete_clusters -> can be placed here???
                     config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase["name"])
                     for att in phase:
                         config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att]
@@ -591,7 +587,7 @@ def __init__(self, phase):
             if key not in self:
                 err_msg = (
                     f"``{key}`` of workflow phase "
-                    f"``{new_phase_name}`` is not a valid keyword "
+                    f"``{phase['name']}`` is not a valid keyword "
                     f"of a workflow phase."
                 )
                 esm_parser.user_error("ERROR", err_msg)
@@ -637,8 +633,13 @@ def assemble_workflow(config):
         if "workflow" in config["general"]["defaults.yaml"]:
             workflow = config["general"]["defaults.yaml"]["workflow"]
             phases = config["general"]["defaults.yaml"]["workflow"].get("phases", [])
+        else:
+            esm_parser.user_error("ERROR", "No default workflow defined.")
+    else:
+        workflow = []
+        phases = []
 
-    # 2. Initialize default workflow phases
+    # 2. Initialize default workflow phases from defaults.yaml
     if phases:
         workflow = Workflow(workflow)
         for phase in phases:
@@ -652,17 +653,17 @@ def assemble_workflow(config):
         # Or should this be changed in defaults.yaml as it is now?
 
     # 3. Calc mpi tasks for batch jobs of default phases
-    # TODO: Put it into other method?
+    # TODO: Put it into other method???
     workflow = workflow.set_default_nproc(config)
 
     # 3. Read in phases from runscript and config files
     workflow = workflow.collect_all_user_phases(config)
 
-    # 4. Order user workflows into default workflow wrt. workflow and phase attributs.
+    # 4. Order user workflows into default workflow wrt. phase attributs.
     workflow = workflow.order_phases_and_clusters()
 
     # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
-    #    user phase (type batch or shell)
+    #    a user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job()
 
     # 6. write the workflow to config
@@ -671,6 +672,8 @@ def assemble_workflow(config):
 
 
     # Set "jobtype" for the first task???
+    # NOTE: This is either first default phase or
+    #       newrun??? Can't this not be set in prepend_newrun then?
     if config["general"]["jobtype"] == "unknown":
         config["general"]["command_line_config"]["jobtype"] = config["general"][
             "workflow"
@@ -726,6 +729,9 @@ def display_workflow(config):
     """
 
     display_nicely(config)
+    display_workflow_sequence(config)
+
+def display_workflow_sequence(config):
 
     first_phase = config["general"]["workflow"]["first_task_in_queue"]
     subjobs = config["general"]["workflow"]["subjob_clusters"][first_phase]["subjobs"]
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index e39aadac2..e2bc49cfb 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -4,10 +4,55 @@
 
 from esm_runscripts import workflow
 import pytest
+import esm_parser
+
+@pytest.fixture()
+def test_default_phases_dict():
+    phases_dict = {
+        'compute': {
+            'called_from': 'prepcompute',
+            'cluster': 'compute',
+            'name': 'compute',
+            'next_submit': ['tidy'],
+            'nproc': 'None',
+            'order_in_cluster': 'sequential',
+            'run_after': 'prepcompute',
+            'run_before': 'tidy',
+            'run_on_queue': 'compute',
+            'submit_to_batch_system': True},
+        'prepcompute': {
+            'batch_or_shell': 'SimulationSetup',
+            'called_from': 'tidy',
+            'cluster': 'prepcompute',
+            'name': 'prepcompute',
+            'next_submit': ['compute'],
+            'nproc': 1,
+            'order_in_cluster': 'sequential',
+            'run_after': 'tidy',
+            'run_before': 'compute',
+            'submit_to_batch_system': False},
+        'tidy': {
+            'batch_or_shell': 'SimulationSetup',
+            'called_from': 'compute',
+            'cluster': 'tidy',
+            'name': 'tidy',
+            'next_submit': ['prepcompute'],
+            'nproc': 1,
+            'order_in_cluster': 'sequential',
+            'run_after': 'compute',
+            'run_before': 'prepcompute',
+            'submit_to_batch_system': False}
+    }
+    return phases_dict
 
 @pytest.fixture()
 def test_workflow_object():
-    test_workflow = workflow.Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"])
+    workflow_dict = {
+        'first_task_in_queue': 'prepcompute',
+        'last_task_in_queue': 'tidy',
+        'next_run_triggered_by': 'tidy'
+    }
+    test_workflow = workflow.Workflow(workflow_dict)
     return test_workflow
 
 @pytest.fixture()
@@ -40,7 +85,7 @@ def test_config():
         'general': {
             'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'],
             'workflow': {
-                'next_run_triggered_by': 'tidy',
+#                'next_run_triggered_by': 'tidy',
                 'phases': {
                     'my_new_subjob_general': {
                         'batch_or_shell': 'batch',
@@ -53,7 +98,7 @@ def test_config():
                         'submit_to_batch_system': True}}}},
         'flow': {
             'workflow': {
-                'next_run_triggered_by': 'tidy',
+#                'next_run_triggered_by': 'tidy',
                 'phases': {
                     'my_new_subjob_flow': {
                         'batch_or_shell': 'batch',
@@ -64,30 +109,145 @@ def test_config():
                         'run_after': 'tidy',
                         'script_dir': '/work/ab0995/a270089/myrunscripts/',
                         'script': 'hallowelt.sh',
-                        'submit_to_batch_system': True}}}}}
+                        'submit_to_batch_system': True}
+                }
+            }
+        }
+    }
     return config
 
-def test_num_phases(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+@pytest.fixture()
+def test_config_2():
+    """Setup a test config dictionary."""
+    config = {
+        'computer': {'partitions': {'compute': {'name': 'test'}}},
+        'fesom': {
+            'nproc': 128},
+        'rnfmap': {
+            'nproc': 128},
+        'oasis3mct': {
+            'nproc': 128},
+        'xios': {
+            'nproc': 128},
+        'general': {
+            'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'],
+            'jobtype': 'unknown',
+            'command_line_config': {
+                'jobtype': None
+            },
+            "defaults.yaml": {
+                'workflow': {
+                    'first_task_in_queue': 'prepcompute',
+                    'last_task_in_queue': 'tidy',
+                    'next_run_triggered_by': 'tidy',
+                    'phases': {
+                        'compute': {
+                            'called_from': 'prepcompute',
+                            'cluster': 'compute',
+                            'name': 'compute',
+                            'next_submit': ['tidy'],
+                            'nproc': 'None',
+                            'order_in_cluster': 'sequential',
+                            'run_after': 'prepcompute',
+                            'run_before': 'tidy',
+                            'run_on_queue': 'compute',
+                            'submit_to_batch_system': True},
+                        'prepcompute': {
+                            'batch_or_shell': 'SimulationSetup',
+                            'called_from': 'tidy',
+                            'cluster': 'prepcompute',
+                            'name': 'prepcompute',
+                            'next_submit': ['compute'],
+                            'nproc': 1,
+                            'order_in_cluster': 'sequential',
+                            'run_after': 'tidy',
+                            'run_before': 'compute',
+                            'submit_to_batch_system': False},
+                        'tidy': {
+                            'batch_or_shell': 'SimulationSetup',
+                            'called_from': 'compute',
+                            'cluster': 'tidy',
+                            'name': 'tidy',
+                            'next_submit': ['prepcompute'],
+                            'nproc': 1,
+                            'order_in_cluster': 'sequential',
+                            'run_after': 'compute',
+                            'run_before': 'prepcompute',
+                            'submit_to_batch_system': False}
+                    }
+                }
+            },
+            'workflow': {
+                'my_new_subjob_general': {
+                    'batch_or_shell': 'batch',
+                    'order_in_cluster': 'concurrent',
+                    'run_on_queue': 'compute',
+                    'nproc': 1,
+                    'run_after': 'tidy',
+                    'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                    'script': 'hallowelt.sh',
+                    'submit_to_batch_system': True}
+            }
+        },
+        'flow': {
+            'workflow': {
+                'phases': {
+                    'my_new_subjob_flow': {
+                        'batch_or_shell': 'batch',
+                        'order_in_cluster': 'concurrent',
+                        'cluster': 'test_cluster',
+                        'run_on_queue': 'compute',
+                        'nproc': 1,
+                        'run_after': 'tidy',
+                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                        'script': 'hallowelt.sh',
+                        'submit_to_batch_system': True,
+                        'trigger_next_run': True}
+                }
+            }
+        },
+        'oifs': {
+            'workflow': {
+                'phases': {
+                    'my_new_subjob_oifs': {
+                        'batch_or_shell': 'batch',
+                        'nproc': 1,
+                        'order_in_cluster': 'concurrent',
+                        'cluster': 'test_cluster',
+                        'run_after': 'tidy',
+                        'run_on_queue': 'compute',
+                        'script': 'helloworld.sh',
+                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                        'submit_to_batch_system': True}
+                }
+            }
+        },
+    }
+    return config
+
+def test_num_phases(test_workflow_object, test_default_phases_dict, test_config):
+    for phase in test_default_phases_dict:
+        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
     assert test_workflow_object.num_phases == 3
 
-def test_check_user_workflow_dependency(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+def test_check_user_workflow_dependency(test_workflow_object, test_default_phases_dict, test_config):
+    for phase in test_default_phases_dict:
+        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
     independent = test_workflow_object.check_user_workflow_dependency()
     assert independent
 
 def test_check_user_workflow_dependency_2(test_workflow_object, test_config):
     test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs'
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+#    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
     independent = test_workflow_object.check_user_workflow_dependency()
     assert not independent
 
 def test_check_unknown_phases(test_workflow_object, test_config):
     test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob'
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
+#    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
     unknown_phases = test_workflow_object.check_unknown_phases()
     assert unknown_phases
 
@@ -100,50 +260,129 @@ def test_collect_all_user_workflow(test_config):
 def test_calc_number_of_tasks():
     pytest.fail("something wrong")
 
-def test_order_phases(test_workflow_object, test_config):
+def test_order_phases_and_clusters(test_workflow_object, test_default_phases_dict, test_config):
+    for phase in test_default_phases_dict:
+        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
     test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general'
 #    test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow'
 #    test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
     #test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs'
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_phases()
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
+    test_workflow_object = test_workflow_object.order_phases_and_clusters()
     pytest.fail("something wrong")
 
-def test_complete_clusters(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_phases()
-    subjob_clusters = test_workflow_object.complete_clusters(test_config)
+def test_complete_clusters(test_workflow_object, test_default_phases_dict, test_config):
+    for phase in test_default_phases_dict:
+        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
+    test_workflow_object = test_workflow_object.order_phases_and_clusters()
     pytest.fail("something wrong")
 
-def test_prepend_newrun_job(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_phases()
-    subjob_clusters = test_workflow_object.complete_clusters(test_config)
-    [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
+def test_prepend_newrun_job(test_workflow_object, test_default_phases_dict, test_config):
+    for phase in test_default_phases_dict:
+        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
+    test_workflow_object = test_workflow_object.order_phases_and_clusters()
+    test_workflow_object = test_workflow_object.prepend_newrun_job()
     pytest.fail("something wrong")
 
-def test_write_to_config(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_phases()
-    subjob_clusters = test_workflow_object.complete_clusters(test_config)
-    [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters)
+def test_write_to_config(test_workflow_object, test_default_phases_dict, test_config):
+    for phase in test_default_phases_dict:
+        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
+    test_workflow_object = test_workflow_object.set_default_nproc(test_config)
+    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
+    test_workflow_object = test_workflow_object.order_phases_and_clusters()
+    test_workflow_object = test_workflow_object.prepend_newrun_job()
     config = test_workflow_object.write_to_config(test_config)
     pytest.fail("something wrong")
 
-def test_write_subjob_clusters_to_config(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.order_phases()
-    test_workflow_object = test_workflow_object.prepend_newrun_job(test_config)
-    test_config = test_workflow_object.write_to_config(test_config)
-    test_workflow_object = test_workflow_object.complete_clusters(test_config)
-
-def test_prepend_newrun_job(test_workflow_object, test_config):
-    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config)
-    test_workflow_object = test_workflow_object.prepend_newrun_job(test_config)
+# Test scenarios
+# 1. Add one single phase at the end of the default workflow (Example 1 in documentation)
+def test_example_1(test_config_2):
+    test_config_2 = workflow.assemble_workflow(test_config_2)
+    workflow.display_workflow_sequence(test_config_2)
+#    esm_parser.pprint_config(test_config_2)
+
     pytest.fail("something wrong")
+
+# Test exceptions
+# 1. If still a workflow keyword is set by user.
+def test_exception_test_workflow_keyword(test_config_2):
+    test_config_2['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 2. If an invalid phase keyword is set.
+def test_exception_invalid_phase_keyword(test_config_2):
+    test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['wrong_keyword'] = 'wrong_value'
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 3. If an unknown phase is called for , e.g. in 'run_after'
+def test_exception_unknown_phase(test_config_2):
+    test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'trudy'
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 4. If a user phase has the same name as a default phase.
+def test_if_user_phase_has_default_phase_name(test_config_2):
+    test_config_2['flow']['workflow']['phases']['compute'] = {
+        'batch_or_shell': 'batch',
+        'order_in_cluster': 'concurrent',
+        'cluster': 'test_cluster',
+        'run_on_queue': 'compute',
+        'nproc': 1,
+        'run_after': 'tidy',
+        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+        'script': 'hallowelt.sh',
+        'submit_to_batch_system': True}
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 5. If two user phases have the same name and are defined in different models/setups.
+def test_if_two_user_phase_have_the_same_name(test_config_2):
+    test_config_2['oifs']['workflow']['phases']['my_new_subjob_flow'] = {
+        'batch_or_shell': 'batch',
+        'order_in_cluster': 'concurrent',
+        'cluster': 'test_cluster',
+        'run_on_queue': 'compute',
+        'nproc': 1,
+        'run_after': 'tidy',
+        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+        'script': 'hallowelt.sh',
+        'submit_to_batch_system': True}
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 6. If no queue is given for a phase that should be run on sbatch system.
+def test_if_queue_is_missing(test_config_2):
+    test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs'] = {
+        'batch_or_shell': 'batch',
+        'order_in_cluster': 'concurrent',
+        'cluster': 'test_cluster',
+        #'run_on_queue': 'compute',
+        'nproc': 1,
+        'run_after': 'tidy',
+        'script_dir': '/work/ab0995/a270089/myrunscripts/',
+        'script': 'hallowelt.sh',
+        'submit_to_batch_system': True}
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 7. If more than one phase trigger_next_run.
+def test_if_trigger_next_run_unclear(test_config_2):
+    test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs']['trigger_next_run'] = True
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 8. If no default phases are defined in defaults.yaml.
+def test_if_no_default_phases(test_config_2):
+    test_config_2['general']['defaults.yaml']['workflow'].pop('phases', None)
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)
+
+# 9. If no default workflow is defined in defaults.yaml.
+def test_inf_no_default_workflow(test_config_2):
+    test_config_2['general']['defaults.yaml'].pop('workflow', None)
+    with pytest.raises(SystemExit):
+        test_config_2 = workflow.assemble_workflow(test_config_2)

From f86202ddb20724408eb1f67c203cef51810e85c1 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 28 Nov 2023 16:33:49 +0100
Subject: [PATCH 43/98] Fixed a bug in order_phases_and_clusters

---
 src/esm_runscripts/workflow.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index d924d4c75..75aab8bf4 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -339,6 +339,10 @@ def order_phases_and_clusters(self):
         """
         Put the phases and clusters in order.
 
+        Tasks:
+        1. Correct for ``triggered_next_run`` if set by user phase
+            - next_submit, run_after, called_from, run_before???
+
         Parameters
         ----------
 
@@ -396,10 +400,16 @@ def order_phases_and_clusters(self):
         # get first default phase and correct run_after, called_from
         # correct last_task_in_queue of workflow
         if next_triggered not in self.get_phases_attribs_list("default", "name"):
-            self.phases[-1]["next_submit"].remove(self.phases[0]["name"])
-            self.phases[-1]["next_submit"].append(next_triggered)
-            self.phases[0]["run_after"] = next_triggered
-            self.phases[0]["called_from"] = next_triggered
+            first_task_name = self.first_task_in_queue
+            first_phase = self.get_workflow_phase_by_name(first_task_name)
+            last_task_name = self.last_task_in_queue
+            last_phase = self.get_workflow_phase_by_name(last_task_name)
+
+            last_phase["next_submit"].remove(first_phase["name"])
+            last_phase["next_submit"].append(next_triggered)
+            last_phase["run_before"] = next_triggered
+            first_phase["run_after"] = next_triggered
+            first_phase["called_from"] = next_triggered
             self.last_task_in_queue = next_triggered
 
         # Set "next_submit" and "called_from"
@@ -731,7 +741,7 @@ def display_workflow(config):
     display_nicely(config)
     display_workflow_sequence(config)
 
-def display_workflow_sequence(config):
+def display_workflow_sequence(config, display=True):
 
     first_phase = config["general"]["workflow"]["first_task_in_queue"]
     subjobs = config["general"]["workflow"]["subjob_clusters"][first_phase]["subjobs"]
@@ -768,8 +778,11 @@ def display_workflow_sequence(config):
                 sec_phase_str = f"{sec_phase_str} and ``{sec_phase}`` {subjobs}"
         workflow_order = f"{workflow_order} -> {sec_phase_str}"
 
-    esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}")
-    return config
+    if display:
+        esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}")
+    else:
+        workflow_order = workflow_order.replace("``", "")
+    return workflow_order
 
 
 def display_nicely(config):

From 59590ed1dcd4444945b4ba67fb922c2dda8fc773 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 28 Nov 2023 16:34:50 +0100
Subject: [PATCH 44/98] Added workflow tests for test examples (in
 documentation).

---
 tests/test_esm_runscripts/test_workflow.py | 148 ++++++++++++++++++++-
 1 file changed, 142 insertions(+), 6 deletions(-)

diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index e2bc49cfb..742612325 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -173,7 +173,8 @@ def test_config_2():
                             'order_in_cluster': 'sequential',
                             'run_after': 'compute',
                             'run_before': 'prepcompute',
-                            'submit_to_batch_system': False}
+                            'submit_to_batch_system': False,
+                            'trigger_next_run': True}
                     }
                 }
             },
@@ -225,6 +226,74 @@ def test_config_2():
     }
     return config
 
+@pytest.fixture()
+def test_default_config_example():
+    """Setup a test config dictionary."""
+    config = {
+        'computer': {'partitions': {'compute': {'name': 'test'}}},
+        'fesom': {
+            'nproc': 128},
+        'oifs': {
+            'nproc': 128},
+        'rnfmap': {
+            'nproc': 128},
+        'oasis3mct': {
+            'nproc': 128},
+        'xios': {
+            'nproc': 128},
+        'general': {
+            'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'],
+            'jobtype': 'unknown',
+            'command_line_config': {
+                'jobtype': None
+            },
+            "defaults.yaml": {
+                'workflow': {
+                    'first_task_in_queue': 'prepcompute',
+                    'last_task_in_queue': 'tidy',
+                    'next_run_triggered_by': 'tidy',
+                    'phases': {
+                        'compute': {
+                            'called_from': 'prepcompute',
+                            'cluster': 'compute',
+                            'name': 'compute',
+                            'next_submit': ['tidy'],
+                            'nproc': 'None',
+                            'order_in_cluster': 'sequential',
+                            'run_after': 'prepcompute',
+                            'run_before': 'tidy',
+                            'run_on_queue': 'compute',
+                            'submit_to_batch_system': True},
+                        'prepcompute': {
+                            'batch_or_shell': 'SimulationSetup',
+                            'called_from': 'tidy',
+                            'cluster': 'prepcompute',
+                            'name': 'prepcompute',
+                            'next_submit': ['compute'],
+                            'nproc': 1,
+                            'order_in_cluster': 'sequential',
+                            'run_after': 'tidy',
+                            'run_before': 'compute',
+                            'submit_to_batch_system': False},
+                        'tidy': {
+                            'batch_or_shell': 'SimulationSetup',
+                            'called_from': 'compute',
+                            'cluster': 'tidy',
+                            'name': 'tidy',
+                            'next_submit': ['prepcompute'],
+                            'nproc': 1,
+                            'order_in_cluster': 'sequential',
+                            'run_after': 'compute',
+                            'run_before': 'prepcompute',
+                            'submit_to_batch_system': False,
+                            'trigger_next_run': True}
+                    }
+                }
+            }
+        }
+    }
+    return config
+
 def test_num_phases(test_workflow_object, test_default_phases_dict, test_config):
     for phase in test_default_phases_dict:
         test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
@@ -297,13 +366,80 @@ def test_write_to_config(test_workflow_object, test_default_phases_dict, test_co
     pytest.fail("something wrong")
 
 # Test scenarios
+# 0. Default workflow
+def test_example_0(test_default_config_example):
+    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+    assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  prepcompute ['prepcompute']"
+    assert order == assumption
+
 # 1. Add one single phase at the end of the default workflow (Example 1 in documentation)
-def test_example_1(test_config_2):
-    test_config_2 = workflow.assemble_workflow(test_config_2)
-    workflow.display_workflow_sequence(test_config_2)
-#    esm_parser.pprint_config(test_config_2)
+def test_example_1(test_default_config_example):
+    test_default_config_example["general"]["workflow"] = {
+        'phases': {
+            'my_postprocessing': {
+                'script': 'helloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/'}
+        }
+    }
+    assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  prepcompute ['prepcompute'] and my_postprocessing ['my_postprocessing']"
+    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+    assert order == assumption
+
+# 2. Prepend new phase at the beginning of workflow
+def test_example_2(test_default_config_example):
+    test_default_config_example["general"]["workflow"] = {
+        'phases': {
+            'my_preprocessing': {
+                'run_before': 'prepcompute',
+                'script': 'helloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/'}
+        }
+    }
+    assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  prepcompute ['prepcompute']"
+    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+    assert order == assumption
+
+# 3. Append new phase at the beginning of workflow
+def test_example_3(test_default_config_example):
+    test_default_config_example["general"]["workflow"] = {
+        'phases': {
+            'my_new_last_phase': {
+                'script': 'helloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                'trigger_next_run': True}
+        }
+    }
+    assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  my_new_last_phase ['my_new_last_phase'] ->  prepcompute ['prepcompute']"
+    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+    assert order == assumption
+
+# 4. Append two new phases in the same cluster
+def test_example_4(test_default_config_example):
+    test_default_config_example["general"]["workflow"] = {
+        'phases': {
+            'my_new_last_phase': {
+                'script': 'helloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                'submit_to_batch_system': True,
+                'run_on_queue': 'compute',
+                'cluster': 'my_own_new_cluster'},
+            'my_second_new_phase': {
+                'script': 'halloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                'submit_to_batch_system': True,
+                'run_on_queue': 'compute',
+                'cluster': 'my_own_new_cluster'}
+        }
+    }
+    assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  prepcompute ['prepcompute'] and my_own_new_cluster ['my_new_last_phase', 'my_second_new_phase']"
+    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+    assert order == assumption
 
-    pytest.fail("something wrong")
 
 # Test exceptions
 # 1. If still a workflow keyword is set by user.

From ff7145bf2f2b2c356fdf9cb1d06693263ab97b29 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 29 Nov 2023 14:00:59 +0100
Subject: [PATCH 45/98] Fixed test example_2, add preprocessing phase with
 adding newrun.

---
 src/esm_runscripts/workflow.py             | 38 +++++++++++++++-------
 tests/test_esm_runscripts/test_workflow.py |  3 +-
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 75aab8bf4..1f1912d8a 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -443,7 +443,17 @@ def order_phases_and_clusters(self):
                     if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]:
                         next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"])
                 phase2["called_from"] = phase2["run_after"]
-
+            else:
+                if phase2["run_before"] is not None:
+                    if phase2["run_before"] == self.first_task_in_queue:
+                        next_submits[phase2["name"]].append(self.first_task_in_queue)
+                        next_submits[self.last_task_in_queue].append(phase2["cluster"])
+                        next_submits[self.last_task_in_queue].remove(self.first_task_in_queue)
+                        phase2["run_after"] = self.last_task_in_queue
+                        last_phase = self.get_workflow_phase_by_name(self.last_task_in_queue)
+                        last_phase["run_before"] = phase2["name"]
+                        last_phase["next_submit"].append(phase2["name"])
+                        self.first_task_in_queue = phase2["name"]
         for phase3 in self.phases + self.user_phases:
             phase3.set_attrib("next_submit", next_submits[phase3["name"]])
 #            phase3["next_submit"] = next_submits[phase3["name"]]
@@ -493,18 +503,24 @@ def prepend_newrun_job(self):
 
             new_first_phase_name = "newrun_general"
             # Create new default phase object
-            new_first_phase = WorkflowPhase(new_first_phase_name)
-            new_first_phase.set_attrib("next_submit", first_phase["cluster"])
-            new_first_phase.set_attrib("called_from", last_phase["cluster"])
-            new_first_phase.set_attrib("run_before", first_phase["cluster"])
-            new_first_phase.set_attrib("next_submit", first_phase["cluster"])
-            new_first_phase.set_attrib("cluster", "newrun")
-            new_first_phase.set_attrib("batch_or_shell", "SimulationSetup")
-            new_first_phase.set_attrib("nproc", 1)
+            config_new_first_phase = {
+                "name": "newrun",
+                "next_submit": [first_phase["cluster"]],
+                "called_from": last_phase["cluster"],
+                "run_before": first_phase["cluster"],
+                "run_after": last_phase["cluster"],
+                "cluster": "newrun",
+                "batch_or_shell": "SimulationSetup",
+                "nproc": 1
+            }
+            new_first_phase = WorkflowPhase(config_new_first_phase)
 
             # reset last_task attributes
-            last_phase.set_attrib("next_submit", "newrun")
-            last_phase.remove_attrib("next_submit", first_phase["cluster"])
+            last_phase["next_submit"].append("newrun")
+            last_phase["next_submit"].remove(first_phase["cluster"])
+            # why does the next line not work???
+            #last_phase.set_attrib("next_submit", "newrun")
+            #last_phase.remove_attrib("next_submit", first_phase["cluster"])
 
             # reset first_task attributes
             first_phase.set_attrib("called_from", "newrun")
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 742612325..a3eee06c1 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -397,7 +397,7 @@ def test_example_2(test_default_config_example):
                 'script_dir': '/work/ab0995/a270089/myrunscripts/'}
         }
     }
-    assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  prepcompute ['prepcompute']"
+    assumption = "newrun ['newrun'] ->  my_preprocessing ['my_preprocessing'] ->  prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  newrun ['newrun']"
     test_default_config_example = workflow.assemble_workflow(test_default_config_example)
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
@@ -440,6 +440,7 @@ def test_example_4(test_default_config_example):
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
 
+# 5. Append two new phases in the same cluster, one of them triggers the next run
 
 # Test exceptions
 # 1. If still a workflow keyword is set by user.

From c784380d8ef7c83b8d10d9aa43b1b8929c06a53a Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 29 Nov 2023 14:54:08 +0100
Subject: [PATCH 46/98] Added test for workflow example 5.

---
 tests/test_esm_runscripts/test_workflow.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index a3eee06c1..883aac4b6 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -441,6 +441,28 @@ def test_example_4(test_default_config_example):
     assert order == assumption
 
 # 5. Append two new phases in the same cluster, one of them triggers the next run
+def test_example_5(test_default_config_example):
+    test_default_config_example["general"]["workflow"] = {
+        'phases': {
+            'my_new_last_phase': {
+                'script': 'helloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                'submit_to_batch_system': True,
+                'run_on_queue': 'compute',
+                'cluster': 'my_own_new_cluster',
+                'trigger_next_run': True},
+            'my_second_new_phase': {
+                'script': 'halloworld.sh',
+                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+                'submit_to_batch_system': True,
+                'run_on_queue': 'compute',
+                'cluster': 'my_own_new_cluster'}
+        }
+    }
+    assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  my_own_new_cluster ['my_new_last_phase', 'my_second_new_phase'] ->  prepcompute ['prepcompute']"
+    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+    assert order == assumption
 
 # Test exceptions
 # 1. If still a workflow keyword is set by user.

From 6aa7684bd21b028700e494567f0f6a64e1e750b3 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 30 Nov 2023 16:59:35 +0100
Subject: [PATCH 47/98] Added function to cluster phases after collecting them.

---
 src/esm_runscripts/workflow.py             | 175 ++++++++++++++-------
 tests/test_esm_runscripts/test_workflow.py |  43 ++++-
 2 files changed, 163 insertions(+), 55 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 1f1912d8a..5fa8b2f27 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -25,6 +25,7 @@ def __init__(self, workflow_yaml):
         # TODO: check if key is in workflow_yaml dict
         self.phases = []
         self.user_phases = []
+        self.clusters = {}
         self.first_task_in_queue = workflow_yaml["first_task_in_queue"]
         self.last_task_in_queue = workflow_yaml["last_task_in_queue"]
         self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"]
@@ -179,15 +180,14 @@ def collect_all_user_phases(self, config):
                                 f"``{phase}``."
                             )
                             esm_parser.user_error("ERROR", err_msg)
-                        # 3. if user phase has a new and unique name
+                        # 3. if user phase (for each setup/model) has a new and unique name
                         else:
                             phase_config = copy.deepcopy(w_config["phases"][phase])
                             # add phase name
                             phase_config["name"] = phase
                             # Make sure that batch_or_shell is set to batch if submit_to_batch is true
-                            # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed
-                            # for setting it to SimulationSetup and in other functions (resubmit, etc.)
                             # Should not be set by user. TODO: Remove from documentation.
+                            # Check if run_on_queue is given if sbatch job
                             if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
                                 if not phase_config.get("run_on_queue", False):
@@ -202,6 +202,7 @@ def collect_all_user_phases(self, config):
                             user_workflow_phases_names.append(phase)
                             if phase_config.get("trigger_next_run", False):
                                 user_workflow_next_run_triggered_by.append(phase)
+        # check if more than one user phase is set to trigger the next run
         if len(user_workflow_next_run_triggered_by) > 1:
             err_msg = (
                 f"More than one phase is set to "
@@ -215,6 +216,65 @@ def collect_all_user_phases(self, config):
         self.user_phases = user_workflow_phases
         return self
 
+    def cluster_phases(self):
+        """Merge phases into clusters."""
+
+        clusters = {}
+        for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
+            clusters[cluster] = {"phases": []}
+        for phase in self.phases + self.user_phases:
+            clusters[phase["cluster"]]["phases"].append(phase["name"])
+
+        for cluster in clusters:
+            nproc = nproc_sum = nproc_max = 0
+            if len(clusters[cluster]["phases"]) == 1:
+                phase_name = clusters[cluster]["phases"][0]
+                phase = self.get_workflow_phase_by_name(phase_name)
+                clusters[cluster].update(phase)
+            else:
+                clusters[cluster].update(WorkflowPhase({}))
+                phases_list = []
+                for phase_name in clusters[cluster]["phases"]:
+                    phases_list.append(self.get_workflow_phase_by_name(phase_name))
+
+                # check for inconsistencies
+                attribs = {}
+                for attrib in WorkflowPhase({}):
+                    attribs[attrib] = []
+                    [attribs[attrib].append(item) for item in [phase[attrib] for phase in phases_list] if item not in attribs[attrib]]
+                    if len(attribs[attrib]) == 1:
+                        clusters[cluster][attrib] = attribs[attrib][0]
+                    else:
+                        if type(clusters[cluster][attrib]) is list:
+                            clusters[cluster][attrib] = attribs[attrib]
+                        else:
+                            if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc"]:
+                                err_msg = (
+                                    f"Mismatch for {attrib}")
+                                esm_parser.user_error("ERROR", err_msg)
+                            elif attrib == "name":
+                                clusters[cluster]["name"] = cluster
+                            else:
+                                clusters[cluster][attrib] = "check phase"
+
+                # calculate nproc if cluster is to be submitted to sbatch system
+                for phase in phases_list:
+                    nproc_sum += phase["nproc"]
+                    nproc_max = max(phase["nproc"], nproc_max)
+
+                    if clusters[cluster].get("submit_to_batch_system", False):
+                        if phase["order_in_cluster"] == "concurrent":
+                            if clusters[cluster]["order_in_cluster"] is None:
+                                clusters[cluster]["order_in_cluster"] = "concurrent"
+                            nproc = nproc_sum
+                        else:
+                            clusters[cluster]["order_in_cluster"] = "sequential"
+                            nproc = nproc_max
+                clusters[cluster]["nproc"] = nproc
+
+        self.clusters = clusters
+        return self
+
     def write_to_config(self, config):
         """
         Write to config.
@@ -246,22 +306,6 @@ def write_to_config(self, config):
                         if self.first_task_in_queue not in config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"]:
                             config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"].append(self.first_task_in_queue)
 
-        for subjob_cluster in config["general"]["workflow"]["subjob_clusters"]:
-            nproc_sum = nproc_max = 0
-            for subjob in config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["subjobs"]:
-                nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc")
-                nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max)
-            if config["general"]["workflow"]["subjob_clusters"][subjob_cluster].get("submit_to_batch_system", False):
-# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch?
-#    #        elif subjob_clusters[subjob_cluster].get("script", False):
-#    #            subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell"
-#    #
-                if config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["order_in_cluster"] == "concurrent":
-                    nproc = nproc_sum
-                else:
-                    nproc = nproc_max
-                config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["nproc"] = nproc
-
         # 2. Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
         for phase in self.phases + self.user_phases:
@@ -351,10 +395,13 @@ def order_phases_and_clusters(self):
             self : Workflow object
         """
 
+
         # check if user phases are independent from each other
-        # TODO: What if not independent???
-        # do not run in parallel in same cluster???
-        independent = self.check_user_workflow_dependency()
+        # independent = self.check_user_workflow_dependency()
+
+# 1. Check for exceptions
+#    - Unknown phase set by user
+#    - Missing keywords in user phases
 
         # check if there are unknown phases, if yes, will give error exception
         unknown_phases = self.check_unknown_phases()
@@ -389,64 +436,72 @@ def order_phases_and_clusters(self):
 #                )
 #                esm_parser.user_error("ERROR", err_msg)
 
-        # Correct workflow attributes (``last_task_in_queue``, `next_run_triggered``)
-        # if necessary
+# 3. Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``)
 
-        next_triggered = self.next_run_triggered_by
+        # next_run_triggered_by is always the last phase
 
         # check if next_triggered is default or user phase
         # if user phase
-        # get last default phase and correct next_submit
+        # get last default phase and correct next_submit and run_before
         # get first default phase and correct run_after, called_from
         # correct last_task_in_queue of workflow
+
+        next_triggered = self.next_run_triggered_by
+        triggered_next_run_phase = self.get_workflow_phase_by_name(next_triggered)
         if next_triggered not in self.get_phases_attribs_list("default", "name"):
             first_task_name = self.first_task_in_queue
             first_phase = self.get_workflow_phase_by_name(first_task_name)
-            last_task_name = self.last_task_in_queue
-            last_phase = self.get_workflow_phase_by_name(last_task_name)
+            old_last_task_name = self.last_task_in_queue
+            old_last_phase = self.get_workflow_phase_by_name(old_last_task_name)
+
+            old_last_phase["next_submit"].remove(first_phase["name"])
+            old_last_phase["next_submit"].append(next_triggered)
+            old_last_phase["run_before"] = next_triggered
+            old_last_phase["trigger_next_run"] = False
+            if triggered_next_run_phase["cluster"] not in self.clusters[old_last_phase["cluster"]]["next_submit"]:
+                self.clusters[old_last_phase["cluster"]]["next_submit"].append(triggered_next_run_phase["cluster"])
+            self.clusters[old_last_phase["cluster"]]["run_before"] = triggered_next_run_phase["cluster"]
+            self.clusters[old_last_phase["cluster"]]["trigger_next_run"] = False
 
-            last_phase["next_submit"].remove(first_phase["name"])
-            last_phase["next_submit"].append(next_triggered)
-            last_phase["run_before"] = next_triggered
             first_phase["run_after"] = next_triggered
             first_phase["called_from"] = next_triggered
+            self.clusters[first_phase["cluster"]]["run_after"] = triggered_next_run_phase["cluster"]
+            self.clusters[first_phase["cluster"]]["called_from"] = triggered_next_run_phase["cluster"]
+
+            self.clusters[triggered_next_run_phase["cluster"]]["next_submit"].append(first_phase["cluster"])
+            self.clusters[triggered_next_run_phase["cluster"]]["run_before"] = first_phase["cluster"]
+            self.clusters[triggered_next_run_phase["cluster"]]["run_after"] = old_last_phase["cluster"]
+
             self.last_task_in_queue = next_triggered
 
+
+# 4. Intergrate new user phases by correcting next_submit, called_from, run_after, run_before
+
         # Set "next_submit" and "called_from"
         # "next_submit" which phase/cluster will be called next (run_after of the next phase)
         # "called_from" name of previous phase, run_after of current phase
-        # Create a dict of all phases with empty lists
-
-        # Create a cluster dict:
-        clusters = {}
-
-        for phase4 in self.phases + self.user_phases:
-            # if a cluster is not set for a phase set it to the phase name,
-            # so that every phase belongs to a cluster
-            # default cluster has the same name as the phase itself
-            if phase4["cluster"] is None:
-                phase4["cluster"] = phase4["name"]
-            clusters[phase4["cluster"]] = {"name": phase4["cluster"]}
-
 
+        # Create a dict of all phases with empty lists
         next_submits = {}
         for phase in self.phases + self.user_phases:
             next_submits[phase["name"]] = []
             next_submits[phase["cluster"]] = []
 
-        # set next_submits to the cluster name rather then to the phase name
+
         for phase2 in self.phases + self.user_phases:
-            if phase2["run_after"] is not None:
+            if phase2.get("run_after", None):
                 if phase2["cluster"] not in next_submits[phase2["run_after"]]:
-                    if phase2["cluster"] not in next_submits[phase2["run_after"]]:
-                        next_submits[phase2["run_after"]].append(phase2["cluster"])
-                    if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]:
-                        next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"])
+                    next_submits[phase2["run_after"]].append(phase2["cluster"])
+                if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]:
+                    next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"])
                 phase2["called_from"] = phase2["run_after"]
             else:
-                if phase2["run_before"] is not None:
+                # if only run_before is set, e.g. to add a phase at the beginning of a run
+                if phase2.get("run_before", None):
                     if phase2["run_before"] == self.first_task_in_queue:
                         next_submits[phase2["name"]].append(self.first_task_in_queue)
+                        if self.first_task_in_queue not in next_submits[phase2["cluster"]]:
+                            next_submits[phase2["cluster"]].append(self.first_task_in_queue)
                         next_submits[self.last_task_in_queue].append(phase2["cluster"])
                         next_submits[self.last_task_in_queue].remove(self.first_task_in_queue)
                         phase2["run_after"] = self.last_task_in_queue
@@ -454,10 +509,15 @@ def order_phases_and_clusters(self):
                         last_phase["run_before"] = phase2["name"]
                         last_phase["next_submit"].append(phase2["name"])
                         self.first_task_in_queue = phase2["name"]
+
+        breakpoint()
+
         for phase3 in self.phases + self.user_phases:
             phase3.set_attrib("next_submit", next_submits[phase3["name"]])
 #            phase3["next_submit"] = next_submits[phase3["name"]]
 
+# 5. Correct first and last new phases of whole workflow
+
         first_cluster_name = self.first_task_in_queue
         first_phase = self.get_workflow_phase_by_name(first_cluster_name)
         last_cluster_name = self.last_task_in_queue
@@ -479,8 +539,6 @@ def prepend_newrun_job(self):
         """
         - Creates a new cluster "newrun" if first_task_in_queue is not of
           type 'SimulationSetup'
-        - Why is this needed? So that every first task is a SimulationSetup to init
-          a config object???
 
         Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup')
         and are not of type 'SimulationSetup'.
@@ -601,7 +659,7 @@ def __init__(self, phase):
         self["next_submit"] = []                       # needed
         self["called_from"] = None                     # needed
         self["batch_or_shell"] = "SimulationSetup"     # needed
-        self["order_in_cluster"] = "sequential"        # needed ???
+        self["order_in_cluster"] = None                # needed ???
         self["run_only"] = None
         self["skip_chunk_number"] = None
         self["skip_run_number"] = None
@@ -620,9 +678,14 @@ def __init__(self, phase):
 
         super().__init__(phase)
 
+        # make sure batch_or_shell is batch for sbatch jobs
         if self.get("submit_to_batch_system", False):
             self["batch_or_shell"] = "batch"
 
+        # set cluster to phase name, if not given
+        if self.get("cluster", None) is None:
+            self["cluster"] = self["name"]
+
     def set_attrib(self, attrib, value):
         if type(self[attrib]) == "list":
             self[attrib].append(value)
@@ -685,8 +748,12 @@ def assemble_workflow(config):
     # 3. Read in phases from runscript and config files
     workflow = workflow.collect_all_user_phases(config)
 
+    # 4. Cluster phases
+    workflow = workflow.cluster_phases()
+
     # 4. Order user workflows into default workflow wrt. phase attributs.
     workflow = workflow.order_phases_and_clusters()
+    breakpoint()
 
     # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
     #    a user phase (type batch or shell)
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 883aac4b6..6d9d49343 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -367,6 +367,7 @@ def test_write_to_config(test_workflow_object, test_default_phases_dict, test_co
 
 # Test scenarios
 # 0. Default workflow
+@pytest.mark.example
 def test_example_0(test_default_config_example):
     test_default_config_example = workflow.assemble_workflow(test_default_config_example)
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
@@ -374,6 +375,7 @@ def test_example_0(test_default_config_example):
     assert order == assumption
 
 # 1. Add one single phase at the end of the default workflow (Example 1 in documentation)
+@pytest.mark.example
 def test_example_1(test_default_config_example):
     test_default_config_example["general"]["workflow"] = {
         'phases': {
@@ -388,6 +390,7 @@ def test_example_1(test_default_config_example):
     assert order == assumption
 
 # 2. Prepend new phase at the beginning of workflow
+@pytest.mark.example
 def test_example_2(test_default_config_example):
     test_default_config_example["general"]["workflow"] = {
         'phases': {
@@ -400,9 +403,13 @@ def test_example_2(test_default_config_example):
     assumption = "newrun ['newrun'] ->  my_preprocessing ['my_preprocessing'] ->  prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  newrun ['newrun']"
     test_default_config_example = workflow.assemble_workflow(test_default_config_example)
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
-    assert order == assumption
+    print(assumption)
+    print(order)
+    pytest.fail("something wrong")
+    #assert order == assumption
 
 # 3. Append new phase at the beginning of workflow
+@pytest.mark.example
 def test_example_3(test_default_config_example):
     test_default_config_example["general"]["workflow"] = {
         'phases': {
@@ -418,6 +425,7 @@ def test_example_3(test_default_config_example):
     assert order == assumption
 
 # 4. Append two new phases in the same cluster
+@pytest.mark.example
 def test_example_4(test_default_config_example):
     test_default_config_example["general"]["workflow"] = {
         'phases': {
@@ -426,12 +434,14 @@ def test_example_4(test_default_config_example):
                 'script_dir': '/work/ab0995/a270089/myrunscripts/',
                 'submit_to_batch_system': True,
                 'run_on_queue': 'compute',
+                'order_in_cluster': 'concurrent',
                 'cluster': 'my_own_new_cluster'},
             'my_second_new_phase': {
                 'script': 'halloworld.sh',
                 'script_dir': '/work/ab0995/a270089/myrunscripts/',
                 'submit_to_batch_system': True,
                 'run_on_queue': 'compute',
+                'order_in_cluster': 'concurrent',
                 'cluster': 'my_own_new_cluster'}
         }
     }
@@ -441,6 +451,7 @@ def test_example_4(test_default_config_example):
     assert order == assumption
 
 # 5. Append two new phases in the same cluster, one of them triggers the next run
+@pytest.mark.example
 def test_example_5(test_default_config_example):
     test_default_config_example["general"]["workflow"] = {
         'phases': {
@@ -449,6 +460,7 @@ def test_example_5(test_default_config_example):
                 'script_dir': '/work/ab0995/a270089/myrunscripts/',
                 'submit_to_batch_system': True,
                 'run_on_queue': 'compute',
+                'order_in_cluster': 'concurrent',
                 'cluster': 'my_own_new_cluster',
                 'trigger_next_run': True},
             'my_second_new_phase': {
@@ -456,6 +468,7 @@ def test_example_5(test_default_config_example):
                 'script_dir': '/work/ab0995/a270089/myrunscripts/',
                 'submit_to_batch_system': True,
                 'run_on_queue': 'compute',
+                'order_in_cluster': 'concurrent',
                 'cluster': 'my_own_new_cluster'}
         }
     }
@@ -464,6 +477,34 @@ def test_example_5(test_default_config_example):
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
 
+# 6. Append two new phases in the same cluster at the beginning of run
+#@pytest.mark.example
+#def test_example_6(test_default_config_example):
+#    test_default_config_example["general"]["workflow"] = {
+#        'phases': {
+#            'my_new_last_phase': {
+#                'script': 'helloworld.sh',
+#                'run_before': 'prepcompute',
+#                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+#                'submit_to_batch_system': True,
+#                'run_on_queue': 'compute',
+#                'cluster': 'my_own_new_cluster'},
+#            'my_second_new_phase': {
+#                'script': 'halloworld.sh',
+#                'script_dir': '/work/ab0995/a270089/myrunscripts/',
+#                'run_before': 'prepcompute',
+#                'submit_to_batch_system': True,
+#                'run_on_queue': 'compute',
+#                'cluster': 'my_own_new_cluster'}
+#        }
+#    }
+#    assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  my_own_new_cluster ['my_new_last_phase', 'my_second_new_phase'] ->  prepcompute ['prepcompute']"
+#    test_default_config_example = workflow.assemble_workflow(test_default_config_example)
+#    order = workflow.display_workflow_sequence(test_default_config_example, display=False)
+#    assert order == assumption
+
+
+
 # Test exceptions
 # 1. If still a workflow keyword is set by user.
 def test_exception_test_workflow_keyword(test_config_2):

From 58e3729dc1e405fa653b7cc06fa098248a2921f0 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 1 Dec 2023 13:36:25 +0100
Subject: [PATCH 48/98] Changes to pass all example tests.

---
 src/esm_runscripts/workflow.py             | 130 +++++++++++----------
 tests/test_esm_runscripts/test_workflow.py |   5 +-
 2 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 5fa8b2f27..0f34388df 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -214,6 +214,30 @@ def collect_all_user_phases(self, config):
             self.next_run_triggered_by = user_workflow_next_run_triggered_by[0]
 
         self.user_phases = user_workflow_phases
+
+        # check if there are unknown phases, if yes, will give error exception
+        unknown_phases = self.check_unknown_phases()
+        if unknown_phases:
+            unknowns = ', '.join(unknown_phases)
+            err_msg = (
+                f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` "
+                f"or ``run_after``."
+            )
+            esm_parser.user_error("ERROR", err_msg)
+
+        # check if run_after or run_before is set for each user phase
+        # if not, run_after will be set to last default phase
+        for user_phase in self.user_phases:
+            if not user_phase["run_before"] and not user_phase["run_after"]:
+                user_phase["run_after"] = self.phases[-1]["name"]
+                err_msg = (
+                    f"No value given for ``run_after`` or ``run_before`` "
+                    f"of user phase ``{user_phase['name']}``. "
+                    f"Set it to last default phase in workflow: "
+                    f"``{self.phases[-1]['name']}``."
+                )
+                esm_parser.user_note("NOTE", err_msg)
+
         return self
 
     def cluster_phases(self):
@@ -248,12 +272,14 @@ def cluster_phases(self):
                         if type(clusters[cluster][attrib]) is list:
                             clusters[cluster][attrib] = attribs[attrib]
                         else:
-                            if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc"]:
+                            if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]:
                                 err_msg = (
                                     f"Mismatch for {attrib}")
                                 esm_parser.user_error("ERROR", err_msg)
                             elif attrib == "name":
                                 clusters[cluster]["name"] = cluster
+                            elif attrib == "trigger_next_run":
+                                clusters[cluster][attrib] = any(attribs[attrib])
                             else:
                                 clusters[cluster][attrib] = "check phase"
 
@@ -286,25 +312,16 @@ def write_to_config(self, config):
 
         config["general"]["workflow"] = {}
         config["general"]["workflow"].update(self.__dict__)
+
         # 3. Write clusters
         config["general"]["workflow"]["subjob_clusters"] = {}
-        for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
+        for cluster in self.clusters:
             config["general"]["workflow"]["subjob_clusters"][cluster] = {}
             config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = []
-            for phase in self.phases + self.user_phases:
-                if phase["cluster"] == cluster:
-                    # TODO: Are there more attributes to be merged from the different phases within a cluster???
-                    config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase["name"])
-                    for att in phase:
-                        config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att]
-                    config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster
-                    # if a phase in a cluster triggers the next run, set next_submit in cluster conf
-        for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
-            for phase in self.phases + self.user_phases:
-                if phase["cluster"] == cluster:
-                    if phase["name"] in self.next_run_triggered_by:
-                        if self.first_task_in_queue not in config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"]:
-                            config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"].append(self.first_task_in_queue)
+            for phase_name in self.clusters[cluster]["phases"]:
+                config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase_name)
+            for att in self.clusters[cluster]:
+                config["general"]["workflow"]["subjob_clusters"][cluster][att] = self.clusters[cluster][att]
 
         # 2. Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
@@ -399,33 +416,6 @@ def order_phases_and_clusters(self):
         # check if user phases are independent from each other
         # independent = self.check_user_workflow_dependency()
 
-# 1. Check for exceptions
-#    - Unknown phase set by user
-#    - Missing keywords in user phases
-
-        # check if there are unknown phases, if yes, will give error exception
-        unknown_phases = self.check_unknown_phases()
-        if unknown_phases:
-            unknowns = ', '.join(unknown_phases)
-            err_msg = (
-                f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` "
-                f"or ``run_after``."
-            )
-            esm_parser.user_error("ERROR", err_msg)
-
-        # check if run_after or run_before is set for each user phase
-        # if not, run_after will be set to last default phase
-        for user_phase in self.user_phases:
-            if not user_phase["run_before"] and not user_phase["run_after"]:
-                user_phase["run_after"] = self.phases[-1]["name"]
-                err_msg = (
-                    f"No value given for ``run_after`` or ``run_before`` "
-                    f"of user phase ``{user_phase['name']}``. "
-                    f"Set it to last default phase in workflow: "
-                    f"``{self.phases[-1]['name']}``."
-                )
-                esm_parser.user_note("NOTE", err_msg)
-
             # Check if not both run_after and run_before are set at the same
             # time for each user phase
 #            if user_phase['run_before'] and user_phase['run_after']:
@@ -482,39 +472,52 @@ def order_phases_and_clusters(self):
         # "called_from" name of previous phase, run_after of current phase
 
         # Create a dict of all phases with empty lists
-        next_submits = {}
+        next_submits_phases = {}
+        next_submits_clusters = {}
         for phase in self.phases + self.user_phases:
-            next_submits[phase["name"]] = []
-            next_submits[phase["cluster"]] = []
-
+            next_submits_phases[phase["name"]] = []
+            next_submits_clusters[phase["cluster"]] = []
 
+#        for cluster in self.clusters:
         for phase2 in self.phases + self.user_phases:
             if phase2.get("run_after", None):
-                if phase2["cluster"] not in next_submits[phase2["run_after"]]:
-                    next_submits[phase2["run_after"]].append(phase2["cluster"])
-                if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]:
-                    next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"])
+                if phase2["name"] not in next_submits_phases[phase2["run_after"]]:
+                    next_submits_phases[phase2["run_after"]].append(phase2["name"])
                 phase2["called_from"] = phase2["run_after"]
+            if self.clusters[phase2["cluster"]].get("run_after", None):
+                if phase2["cluster"] not in next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]]:
+                    next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]].append(phase2["cluster"])
+                self.clusters[phase2["cluster"]]["called_from"] = self.clusters[phase2["cluster"]]["run_after"]
             else:
                 # if only run_before is set, e.g. to add a phase at the beginning of a run
                 if phase2.get("run_before", None):
                     if phase2["run_before"] == self.first_task_in_queue:
-                        next_submits[phase2["name"]].append(self.first_task_in_queue)
-                        if self.first_task_in_queue not in next_submits[phase2["cluster"]]:
-                            next_submits[phase2["cluster"]].append(self.first_task_in_queue)
-                        next_submits[self.last_task_in_queue].append(phase2["cluster"])
-                        next_submits[self.last_task_in_queue].remove(self.first_task_in_queue)
-                        phase2["run_after"] = self.last_task_in_queue
+                        old_first_phase = self.get_workflow_phase_by_name(self.first_task_in_queue)
                         last_phase = self.get_workflow_phase_by_name(self.last_task_in_queue)
+                        next_submits_phases[phase2["name"]].append(self.first_task_in_queue)
+                        if self.first_task_in_queue not in next_submits_clusters[phase2["cluster"]]:
+                            next_submits_clusters[phase2["cluster"]].append(self.first_task_in_queue)
+                        next_submits_clusters[self.last_task_in_queue].append(phase2["cluster"])
+                        next_submits_phases[self.last_task_in_queue].append(phase2["name"])
+                        next_submits_phases[self.last_task_in_queue].remove(self.first_task_in_queue)
+                        next_submits_clusters[last_phase["cluster"]].remove(old_first_phase["cluster"])
+                        phase2["run_after"] = self.last_task_in_queue
                         last_phase["run_before"] = phase2["name"]
+                        self.clusters[last_phase["cluster"]]["run_before"] = phase2["name"]
+                        self.clusters[old_first_phase["cluster"]]["run_after"] = phase2["name"]
+                        self.clusters[old_first_phase["cluster"]]["called_from"] = phase2["name"]
+                        self.clusters[phase2["cluster"]]["called_from"] = last_phase["cluster"]
+                        self.clusters[phase2["cluster"]]["run_after"] = last_phase["cluster"]
                         last_phase["next_submit"].append(phase2["name"])
                         self.first_task_in_queue = phase2["name"]
 
-        breakpoint()
+        for cluster in self.clusters:
+            if next_submits_clusters[cluster]:
+                self.clusters[cluster]["next_submit"] = next_submits_clusters[cluster]
 
         for phase3 in self.phases + self.user_phases:
-            phase3.set_attrib("next_submit", next_submits[phase3["name"]])
-#            phase3["next_submit"] = next_submits[phase3["name"]]
+            if next_submits_phases[phase3["name"]]:
+                phase3.set_attrib("next_submit", next_submits_phases[phase3["name"]])
 
 # 5. Correct first and last new phases of whole workflow
 
@@ -575,6 +578,10 @@ def prepend_newrun_job(self):
 
             # reset last_task attributes
             last_phase["next_submit"].append("newrun")
+            self.clusters[last_phase["cluster"]]["next_submit"] = ["newrun"]
+            self.clusters[last_phase["cluster"]]["run_before"] = "newrun"
+            self.clusters[new_first_phase["cluster"]] = new_first_phase
+            self.clusters[new_first_phase["cluster"]]["phases"] = ["newrun"]
             last_phase["next_submit"].remove(first_phase["cluster"])
             # why does the next line not work???
             #last_phase.set_attrib("next_submit", "newrun")
@@ -583,6 +590,8 @@ def prepend_newrun_job(self):
             # reset first_task attributes
             first_phase.set_attrib("called_from", "newrun")
             first_phase.set_attrib("run_after", "newrun")
+            self.clusters[first_phase["cluster"]]["called_from"] = "newrun"
+            self.clusters[first_phase["cluster"]]["run_after"] = "newrun"
 
             # reset workflow attributes
             self.first_task_in_queue = "newrun"
@@ -753,7 +762,6 @@ def assemble_workflow(config):
 
     # 4. Order user workflows into default workflow wrt. phase attributs.
     workflow = workflow.order_phases_and_clusters()
-    breakpoint()
 
     # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
     #    a user phase (type batch or shell)
diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 6d9d49343..93d3c84e3 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -403,10 +403,7 @@ def test_example_2(test_default_config_example):
     assumption = "newrun ['newrun'] ->  my_preprocessing ['my_preprocessing'] ->  prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  newrun ['newrun']"
     test_default_config_example = workflow.assemble_workflow(test_default_config_example)
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
-    print(assumption)
-    print(order)
-    pytest.fail("something wrong")
-    #assert order == assumption
+    assert order == assumption
 
 # 3. Append new phase at the beginning of workflow
 @pytest.mark.example

From ecf6bbb4046fc69c030e9afd19e5fc0be4129ee4 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 1 Dec 2023 15:50:12 +0100
Subject: [PATCH 49/98] Code optimizations

---
 src/esm_runscripts/workflow.py | 129 ++++++++++++++-------------------
 1 file changed, 54 insertions(+), 75 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 0f34388df..32ef95741 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,4 +1,3 @@
-import sys
 import copy
 import esm_parser
 
@@ -153,8 +152,8 @@ def collect_all_user_phases(self, config):
         for model in config:
             if "workflow" in config[model]:
                 w_config = config[model]["workflow"]
-                # if "subjobs" in w_config:
                 if "phases" in w_config:
+                    # check if still tries to set workflow keywords
                     for key, value in w_config.items():
                         if not key == "phases":
                             err_msg = f"``{key}`` is not allowed to be set for a workflow."
@@ -163,8 +162,11 @@ def collect_all_user_phases(self, config):
                         # each phase (of a model/setup) needs to have an unique name
                         # same phases of the same model/setup defined in different config files
                         # are overwritten by the usual config file hierarchy
-                        # user phases are not alowed to have the same name asdefault phases (e.g. compute)
-                        # 1. check if ``new_phase`` is already defined as a default phase
+                        # user phases are not alowed to have the same name as default phases (e.g. compute)
+
+                        # check if ``new_phase`` is already defined as a default phase
+                        # look for the name of the current phase in the list of default phase names
+                        # if found, raise exception
                         if phase in self.get_phases_attribs_list("default", "name"):
                             err_msg = (
                                 f"The user phase ``{phase}`` "
@@ -172,24 +174,26 @@ def collect_all_user_phases(self, config):
                                 f"This is not allowed."
                             )
                             esm_parser.user_error("ERROR", err_msg)
-                        # 2. check if the name of the new user phase (for a model/setup) does not already exist
-                        #    (for another model/setup).
+
+                        # check if the name of the new user phase (for a model/setup) does not already exist
+                        # (for another model/setup).
                         if phase in user_workflow_phases_names:
                             err_msg = (
                                 f"Two workflow phases have the same name "
                                 f"``{phase}``."
                             )
                             esm_parser.user_error("ERROR", err_msg)
-                        # 3. if user phase (for each setup/model) has a new and unique name
+
+                        # if user phase (for each setup/model) has a new and unique name
                         else:
                             phase_config = copy.deepcopy(w_config["phases"][phase])
                             # add phase name
                             phase_config["name"] = phase
                             # Make sure that batch_or_shell is set to batch if submit_to_batch is true
                             # Should not be set by user. TODO: Remove from documentation.
-                            # Check if run_on_queue is given if sbatch job
                             if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
+                                # Check if run_on_queue is given if submit_to_sbatch is true
                                 if not phase_config.get("run_on_queue", False):
                                     err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase}``."
                                     esm_parser.user_error("ERROR", err_msg)
@@ -202,7 +206,7 @@ def collect_all_user_phases(self, config):
                             user_workflow_phases_names.append(phase)
                             if phase_config.get("trigger_next_run", False):
                                 user_workflow_next_run_triggered_by.append(phase)
-        # check if more than one user phase is set to trigger the next run
+        # check if more than one user phase has set trigger_next_run to true
         if len(user_workflow_next_run_triggered_by) > 1:
             err_msg = (
                 f"More than one phase is set to "
@@ -244,44 +248,55 @@ def cluster_phases(self):
         """Merge phases into clusters."""
 
         clusters = {}
+        # create an empty phases list for each cluster
         for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
             clusters[cluster] = {"phases": []}
+        # append all phases that are within the same cluster
         for phase in self.phases + self.user_phases:
             clusters[phase["cluster"]]["phases"].append(phase["name"])
 
         for cluster in clusters:
             nproc = nproc_sum = nproc_max = 0
+            # if only one phase in cluster
             if len(clusters[cluster]["phases"]) == 1:
                 phase_name = clusters[cluster]["phases"][0]
                 phase = self.get_workflow_phase_by_name(phase_name)
                 clusters[cluster].update(phase)
+            # if more than one phase are within the same cluster
             else:
+                # fill in default phase keys for each cluster to cluster dictionary
                 clusters[cluster].update(WorkflowPhase({}))
+                # create a list of all phases (dicts) that are within the same cluster
                 phases_list = []
                 for phase_name in clusters[cluster]["phases"]:
                     phases_list.append(self.get_workflow_phase_by_name(phase_name))
 
-                # check for inconsistencies
-                attribs = {}
-                for attrib in WorkflowPhase({}):
-                    attribs[attrib] = []
-                    [attribs[attrib].append(item) for item in [phase[attrib] for phase in phases_list] if item not in attribs[attrib]]
-                    if len(attribs[attrib]) == 1:
-                        clusters[cluster][attrib] = attribs[attrib][0]
+                # check for inconsistencies of phase keywords within a cluster
+                keywords = {}
+                for key in WorkflowPhase({}):
+                    keywords[key] = []
+                    # append keyword of a phase only if not already in keywords[key]
+                    [keywords[key].append(item) for item in [phase[key] for phase in phases_list] if item not in keywords[key]]
+                    # if there are no inconsistencies, all phases have the same values for keyword
+                    if len(keywords[key]) == 1:
+                        clusters[cluster][key] = keywords[key][0]
+                    # if different phases have set different values for the same keyword/attrib
                     else:
-                        if type(clusters[cluster][attrib]) is list:
-                            clusters[cluster][attrib] = attribs[attrib]
+                        if type(clusters[cluster][key]) is list:
+                            clusters[cluster][key] = keywords[key]
                         else:
-                            if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]:
+                            if key not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]:
                                 err_msg = (
-                                    f"Mismatch for {attrib}")
+                                    f"Mismatch for {key}")
                                 esm_parser.user_error("ERROR", err_msg)
-                            elif attrib == "name":
+                            elif key == "name":
                                 clusters[cluster]["name"] = cluster
-                            elif attrib == "trigger_next_run":
-                                clusters[cluster][attrib] = any(attribs[attrib])
+                            elif key == "trigger_next_run":
+                                # set key of cluster to True if key for any (at least one) of the phases is set to True
+                                clusters[cluster][key] = any(keywords[key])
                             else:
-                                clusters[cluster][attrib] = "check phase"
+                                # if key is set different for each phase in same cluster set to fill value (e.g. for script, scriptdir)
+                                clusters[cluster][key] = "check phase"
 
                 # calculate nproc if cluster is to be submitted to sbatch system
                 for phase in phases_list:
@@ -297,7 +312,7 @@ def cluster_phases(self):
                             clusters[cluster]["order_in_cluster"] = "sequential"
                             nproc = nproc_max
                 clusters[cluster]["nproc"] = nproc
-
+        # write clusters dictionary to workflow object attribute
         self.clusters = clusters
         return self
 
@@ -367,7 +382,7 @@ def check_user_workflow_dependency(self):
 
     def check_unknown_phases(self):
         """
-        Check if any user phase attributes points to any unknown workflow phase.
+        Check if any user phase keyword (run_afteer, run_before) points to an unknown workflow phase.
 
         Parameters
         ----------
@@ -378,10 +393,10 @@ def check_unknown_phases(self):
             unknown_phases : set
         """
         unknown_phases = []
-        phases_names = self.get_phases_attribs_list('default', 'name')
-        user_phases_names = self.get_phases_attribs_list('user', 'name')
-        run_after = self.get_phases_attribs_list('user', 'run_after')
-        run_before = self.get_phases_attribs_list('user', 'run_before')
+        phases_names = self.get_phases_attribs_list('default', 'name')          # list of names of all default phases
+        user_phases_names = self.get_phases_attribs_list('user', 'name')        # list of name of all user phases
+        run_after = self.get_phases_attribs_list('user', 'run_after')           # list of all run_after values for all user phases
+        run_before = self.get_phases_attribs_list('user', 'run_before')         # list of all run_before values for all user phases
         # Filter out all elements that are None
         # ``filter(None, anylist)`` will filter out all items of anylist,
         # for which ``if item`` is false (e.g. [], "", None, {}, '').
@@ -398,39 +413,22 @@ def check_unknown_phases(self):
 
     def order_phases_and_clusters(self):
         """
-        Put the phases and clusters in order.
-
-        Tasks:
-        1. Correct for ``triggered_next_run`` if set by user phase
-            - next_submit, run_after, called_from, run_before???
+        Put the phases and clusters in the right order.
 
         Parameters
         ----------
+            self : Workflow object
 
         Returns
         -------
             self : Workflow object
         """
 
-
-        # check if user phases are independent from each other
-        # independent = self.check_user_workflow_dependency()
-
-            # Check if not both run_after and run_before are set at the same
-            # time for each user phase
-#            if user_phase['run_before'] and user_phase['run_after']:
-#                err_msg = (
-#                    f"Both run_after and run_before are set. Don't know when "
-#                    f"to start {user_phase['name']}. Please only set run_after "
-#                    f"or run_before."
-#                )
-#                esm_parser.user_error("ERROR", err_msg)
-
-# 3. Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``)
+# Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``)
 
         # next_run_triggered_by is always the last phase
 
-        # check if next_triggered is default or user phase
+        # check if next_triggered is set to a default or user phase
         # if user phase
         # get last default phase and correct next_submit and run_before
         # get first default phase and correct run_after, called_from
@@ -465,20 +463,19 @@ def order_phases_and_clusters(self):
             self.last_task_in_queue = next_triggered
 
 
-# 4. Intergrate new user phases by correcting next_submit, called_from, run_after, run_before
+# Intergrate new user phases by correcting next_submit, called_from, run_after, run_before
 
         # Set "next_submit" and "called_from"
         # "next_submit" which phase/cluster will be called next (run_after of the next phase)
         # "called_from" name of previous phase, run_after of current phase
 
-        # Create a dict of all phases with empty lists
+        # Create a dict of all phases and for all clusters with empty lists
         next_submits_phases = {}
         next_submits_clusters = {}
         for phase in self.phases + self.user_phases:
             next_submits_phases[phase["name"]] = []
             next_submits_clusters[phase["cluster"]] = []
 
-#        for cluster in self.clusters:
         for phase2 in self.phases + self.user_phases:
             if phase2.get("run_after", None):
                 if phase2["name"] not in next_submits_phases[phase2["run_after"]]:
@@ -519,25 +516,8 @@ def order_phases_and_clusters(self):
             if next_submits_phases[phase3["name"]]:
                 phase3.set_attrib("next_submit", next_submits_phases[phase3["name"]])
 
-# 5. Correct first and last new phases of whole workflow
-
-        first_cluster_name = self.first_task_in_queue
-        first_phase = self.get_workflow_phase_by_name(first_cluster_name)
-        last_cluster_name = self.last_task_in_queue
-        last_phase = self.get_workflow_phase_by_name(last_cluster_name)
-
-        # if first_cluster_name is not next_submit of last_cluster_name
-        # set 'next_submit' of last phase/cluster to first phase/cluster in workflow
-        if first_cluster_name not in last_phase["next_submit"]:
-            last_phase.set_attrib("next_submit", first_cluster_name)
-        # if last_cluster_name is not called_from of first_cluster_name
-        # set 'called_from' of first phase/cluster to last phase/cluster
-        if not last_cluster_name == first_phase["called_from"]:
-            first_phase.set_attrib("called_from", last_cluster_name)
-
         return self
 
-
     def prepend_newrun_job(self):
         """
         - Creates a new cluster "newrun" if first_task_in_queue is not of
@@ -562,7 +542,6 @@ def prepend_newrun_job(self):
             last_task_name = self.last_task_in_queue
             last_phase = self.get_workflow_phase_by_name(last_task_name)
 
-            new_first_phase_name = "newrun_general"
             # Create new default phase object
             config_new_first_phase = {
                 "name": "newrun",
@@ -584,8 +563,8 @@ def prepend_newrun_job(self):
             self.clusters[new_first_phase["cluster"]]["phases"] = ["newrun"]
             last_phase["next_submit"].remove(first_phase["cluster"])
             # why does the next line not work???
-            #last_phase.set_attrib("next_submit", "newrun")
-            #last_phase.remove_attrib("next_submit", first_phase["cluster"])
+            # last_phase.set_attrib("next_submit", "newrun")
+            # last_phase.remove_attrib("next_submit", first_phase["cluster"])
 
             # reset first_task attributes
             first_phase.set_attrib("called_from", "newrun")
@@ -771,7 +750,6 @@ def assemble_workflow(config):
     # 7. Remove old worklow from config
     config = workflow.write_to_config(config)
 
-
     # Set "jobtype" for the first task???
     # NOTE: This is either first default phase or
     #       newrun??? Can't this not be set in prepend_newrun then?
@@ -832,6 +810,7 @@ def display_workflow(config):
     display_nicely(config)
     display_workflow_sequence(config)
 
+
 def display_workflow_sequence(config, display=True):
 
     first_phase = config["general"]["workflow"]["first_task_in_queue"]

From 97d5d28c5dccc75ed0bd66c9bd06ab29ebf793bc Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Dec 2023 10:41:21 +0100
Subject: [PATCH 50/98] Removed unused function.

---
 src/esm_runscripts/workflow.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 32ef95741..e52320c78 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -30,13 +30,6 @@ def __init__(self, workflow_yaml):
         self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"]
         # TODO: Call here the phase object ???
 
-    @property
-    def num_phases(self):
-        """
-        Return the number of phases in workflow.
-        """
-        return len(self.phases)
-
     def get_workflow_phase_by_name(self, phase_name):
         """
         Returns phase of phase_name

From 04d8ff0813bb18628b8e5011efab0e5e6e84f303 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Dec 2023 12:03:53 +0100
Subject: [PATCH 51/98] Finished workflow tests.

---
 tests/test_esm_runscripts/test_workflow.py | 244 +++------------------
 1 file changed, 36 insertions(+), 208 deletions(-)

diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 93d3c84e3..eb59efb9c 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -6,118 +6,8 @@
 import pytest
 import esm_parser
 
-@pytest.fixture()
-def test_default_phases_dict():
-    phases_dict = {
-        'compute': {
-            'called_from': 'prepcompute',
-            'cluster': 'compute',
-            'name': 'compute',
-            'next_submit': ['tidy'],
-            'nproc': 'None',
-            'order_in_cluster': 'sequential',
-            'run_after': 'prepcompute',
-            'run_before': 'tidy',
-            'run_on_queue': 'compute',
-            'submit_to_batch_system': True},
-        'prepcompute': {
-            'batch_or_shell': 'SimulationSetup',
-            'called_from': 'tidy',
-            'cluster': 'prepcompute',
-            'name': 'prepcompute',
-            'next_submit': ['compute'],
-            'nproc': 1,
-            'order_in_cluster': 'sequential',
-            'run_after': 'tidy',
-            'run_before': 'compute',
-            'submit_to_batch_system': False},
-        'tidy': {
-            'batch_or_shell': 'SimulationSetup',
-            'called_from': 'compute',
-            'cluster': 'tidy',
-            'name': 'tidy',
-            'next_submit': ['prepcompute'],
-            'nproc': 1,
-            'order_in_cluster': 'sequential',
-            'run_after': 'compute',
-            'run_before': 'prepcompute',
-            'submit_to_batch_system': False}
-    }
-    return phases_dict
-
-@pytest.fixture()
-def test_workflow_object():
-    workflow_dict = {
-        'first_task_in_queue': 'prepcompute',
-        'last_task_in_queue': 'tidy',
-        'next_run_triggered_by': 'tidy'
-    }
-    test_workflow = workflow.Workflow(workflow_dict)
-    return test_workflow
-
 @pytest.fixture()
 def test_config():
-    """Setup a test config dictionary."""
-    config = {
-        'computer': {'partitions': {'compute': {'name': 'test'}}},
-        'fesom': {
-            'nproc': 128},
-        'rnfmap': {
-            'nproc': 128},
-        'oasis3mct': {
-            'nproc': 128},
-        'xios': {
-            'nproc': 128},
-        'oifs': {
-            'workflow': {
-#                'next_run_triggered_by': 'tidy',
-                'phases': {
-                    'my_new_subjob_oifs': {
-                        'batch_or_shell': 'batch',
-                        'nproc': 1,
-                        'order_in_cluster': 'concurrent',
-                        'cluster': 'test_cluster',
-                        'run_after': 'tidy',
-                        'run_on_queue': 'compute',
-                        'script': 'helloworld.sh',
-                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
-                        'submit_to_batch_system': True}}}},
-        'general': {
-            'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'],
-            'workflow': {
-#                'next_run_triggered_by': 'tidy',
-                'phases': {
-                    'my_new_subjob_general': {
-                        'batch_or_shell': 'batch',
-                        'order_in_cluster': 'concurrent',
-                        'run_on_queue': 'compute',
-                        'nproc': 1,
-                        'run_after': 'tidy',
-                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
-                        'script': 'hallowelt.sh',
-                        'submit_to_batch_system': True}}}},
-        'flow': {
-            'workflow': {
-#                'next_run_triggered_by': 'tidy',
-                'phases': {
-                    'my_new_subjob_flow': {
-                        'batch_or_shell': 'batch',
-                        'order_in_cluster': 'concurrent',
-                        'cluster': 'test_cluster',
-                        'run_on_queue': 'compute',
-                        'nproc': 1,
-                        'run_after': 'tidy',
-                        'script_dir': '/work/ab0995/a270089/myrunscripts/',
-                        'script': 'hallowelt.sh',
-                        'submit_to_batch_system': True}
-                }
-            }
-        }
-    }
-    return config
-
-@pytest.fixture()
-def test_config_2():
     """Setup a test config dictionary."""
     config = {
         'computer': {'partitions': {'compute': {'name': 'test'}}},
@@ -294,77 +184,6 @@ def test_default_config_example():
     }
     return config
 
-def test_num_phases(test_workflow_object, test_default_phases_dict, test_config):
-    for phase in test_default_phases_dict:
-        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
-    assert test_workflow_object.num_phases == 3
-
-def test_check_user_workflow_dependency(test_workflow_object, test_default_phases_dict, test_config):
-    for phase in test_default_phases_dict:
-        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    independent = test_workflow_object.check_user_workflow_dependency()
-    assert independent
-
-def test_check_user_workflow_dependency_2(test_workflow_object, test_config):
-    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs'
-#    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    independent = test_workflow_object.check_user_workflow_dependency()
-    assert not independent
-
-def test_check_unknown_phases(test_workflow_object, test_config):
-    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob'
-#    test_workflow_object = test_workflow_object.init_default_workflow(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    unknown_phases = test_workflow_object.check_unknown_phases()
-    assert unknown_phases
-
-def test_assemble_workflow():
-    pytest.fail("something wrong")
-
-def test_collect_all_user_workflow(test_config):
-    pytest.fail("something wrong")
-
-def test_calc_number_of_tasks():
-    pytest.fail("something wrong")
-
-def test_order_phases_and_clusters(test_workflow_object, test_default_phases_dict, test_config):
-    for phase in test_default_phases_dict:
-        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
-    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general'
-#    test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow'
-#    test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
-    #test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs'
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    test_workflow_object = test_workflow_object.order_phases_and_clusters()
-    pytest.fail("something wrong")
-
-def test_complete_clusters(test_workflow_object, test_default_phases_dict, test_config):
-    for phase in test_default_phases_dict:
-        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    test_workflow_object = test_workflow_object.order_phases_and_clusters()
-    pytest.fail("something wrong")
-
-def test_prepend_newrun_job(test_workflow_object, test_default_phases_dict, test_config):
-    for phase in test_default_phases_dict:
-        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    test_workflow_object = test_workflow_object.order_phases_and_clusters()
-    test_workflow_object = test_workflow_object.prepend_newrun_job()
-    pytest.fail("something wrong")
-
-def test_write_to_config(test_workflow_object, test_default_phases_dict, test_config):
-    for phase in test_default_phases_dict:
-        test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase]))
-    test_workflow_object = test_workflow_object.set_default_nproc(test_config)
-    test_workflow_object = test_workflow_object.collect_all_user_phases(test_config)
-    test_workflow_object = test_workflow_object.order_phases_and_clusters()
-    test_workflow_object = test_workflow_object.prepend_newrun_job()
-    config = test_workflow_object.write_to_config(test_config)
-    pytest.fail("something wrong")
-
 # Test scenarios
 # 0. Default workflow
 @pytest.mark.example
@@ -504,26 +323,30 @@ def test_example_5(test_default_config_example):
 
 # Test exceptions
 # 1. If still a workflow keyword is set by user.
-def test_exception_test_workflow_keyword(test_config_2):
-    test_config_2['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
+@pytest.mark.exceptions
+def test_exception_test_workflow_keyword(test_config):
+    test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general'
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 2. If an invalid phase keyword is set.
-def test_exception_invalid_phase_keyword(test_config_2):
-    test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['wrong_keyword'] = 'wrong_value'
+@pytest.mark.exceptions
+def test_exception_invalid_phase_keyword(test_config):
+    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['wrong_keyword'] = 'wrong_value'
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 3. If an unknown phase is called for , e.g. in 'run_after'
-def test_exception_unknown_phase(test_config_2):
-    test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'trudy'
+@pytest.mark.exceptions
+def test_exception_unknown_phase(test_config):
+    test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'trudy'
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 4. If a user phase has the same name as a default phase.
-def test_if_user_phase_has_default_phase_name(test_config_2):
-    test_config_2['flow']['workflow']['phases']['compute'] = {
+@pytest.mark.exceptions
+def test_if_user_phase_has_default_phase_name(test_config):
+    test_config['flow']['workflow']['phases']['compute'] = {
         'batch_or_shell': 'batch',
         'order_in_cluster': 'concurrent',
         'cluster': 'test_cluster',
@@ -534,11 +357,12 @@ def test_if_user_phase_has_default_phase_name(test_config_2):
         'script': 'hallowelt.sh',
         'submit_to_batch_system': True}
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 5. If two user phases have the same name and are defined in different models/setups.
-def test_if_two_user_phase_have_the_same_name(test_config_2):
-    test_config_2['oifs']['workflow']['phases']['my_new_subjob_flow'] = {
+@pytest.mark.exceptions
+def test_if_two_user_phase_have_the_same_name(test_config):
+    test_config['oifs']['workflow']['phases']['my_new_subjob_flow'] = {
         'batch_or_shell': 'batch',
         'order_in_cluster': 'concurrent',
         'cluster': 'test_cluster',
@@ -549,11 +373,12 @@ def test_if_two_user_phase_have_the_same_name(test_config_2):
         'script': 'hallowelt.sh',
         'submit_to_batch_system': True}
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 6. If no queue is given for a phase that should be run on sbatch system.
-def test_if_queue_is_missing(test_config_2):
-    test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs'] = {
+@pytest.mark.exceptions
+def test_if_queue_is_missing(test_config):
+    test_config['oifs']['workflow']['phases']['my_new_subjob_oifs'] = {
         'batch_or_shell': 'batch',
         'order_in_cluster': 'concurrent',
         'cluster': 'test_cluster',
@@ -564,22 +389,25 @@ def test_if_queue_is_missing(test_config_2):
         'script': 'hallowelt.sh',
         'submit_to_batch_system': True}
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 7. If more than one phase trigger_next_run.
-def test_if_trigger_next_run_unclear(test_config_2):
-    test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs']['trigger_next_run'] = True
+@pytest.mark.exceptions
+def test_if_trigger_next_run_unclear(test_config):
+    test_config['oifs']['workflow']['phases']['my_new_subjob_oifs']['trigger_next_run'] = True
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 8. If no default phases are defined in defaults.yaml.
-def test_if_no_default_phases(test_config_2):
-    test_config_2['general']['defaults.yaml']['workflow'].pop('phases', None)
+@pytest.mark.exceptions
+def test_if_no_default_phases(test_config):
+    test_config['general']['defaults.yaml']['workflow'].pop('phases', None)
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)
 
 # 9. If no default workflow is defined in defaults.yaml.
-def test_inf_no_default_workflow(test_config_2):
-    test_config_2['general']['defaults.yaml'].pop('workflow', None)
+@pytest.mark.exceptions
+def test_inf_no_default_workflow(test_config):
+    test_config['general']['defaults.yaml'].pop('workflow', None)
     with pytest.raises(SystemExit):
-        test_config_2 = workflow.assemble_workflow(test_config_2)
+        test_config = workflow.assemble_workflow(test_config)

From 16187d923f01e952a8ddec0a03f8bb53be82cf06 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Dec 2023 12:09:42 +0100
Subject: [PATCH 52/98] Changes in default workflow definition.

---
 .../esm_software/esm_runscripts/defaults.yaml | 26 +------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 31ffa1394..d8234199c 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -17,10 +17,8 @@ workflow:
     phases:
         prepcompute:
             batch_or_shell: SimulationSetup
-            call_function: None
             called_from: tidy
             cluster: prepcompute
-            env_preparation: None
             name: prepcompute
             next_submit:
                 - compute
@@ -28,19 +26,10 @@ workflow:
             order_in_cluster: sequential
             run_after: tidy
             run_before: compute
-            run_on_queue: None
-            run_only: None
-            script: None
-            script_dir: None
-            skip_chunk_number: None
-            skip_run_number: None
             submit_to_batch_system: False
         compute:
-            batch_or_shell: batch
-            call_function: None
             called_from: prepcompute
             cluster: compute
-            env_preparation: None
             name: compute
             next_submit:
                 - tidy
@@ -48,19 +37,12 @@ workflow:
             order_in_cluster: sequential
             run_after: prepcompute
             run_before: tidy
-            run_on_queue: None
-            run_only: None
-            script: None
-            script_dir: None
-            skip_chunk_number: None
-            skip_run_number: None
+            run_on_queue: ${computer.partitions.pp.name}
             submit_to_batch_system: True
         tidy:
             batch_or_shell: SimulationSetup
-            call_function: None
             called_from: compute
             cluster: tidy
-            env_preparation: None
             name: tidy
             next_submit:
                 - prepcompute
@@ -68,10 +50,4 @@ workflow:
             order_in_cluster: sequential
             run_after: compute
             run_before: prepcompute
-            run_on_queue: None
-            run_only: None
-            script: None
-            script_dir: None
-            skip_chunk_number: None
-            skip_run_number: None
             submit_to_batch_system: False

From f26a115180e1be705383f9e7d817a27ebe1ae4f8 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Dec 2023 12:11:28 +0100
Subject: [PATCH 53/98] Revive init_workflow funtion.

---
 src/esm_runscripts/workflow.py | 63 +++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index e52320c78..66e831a0f 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -68,6 +68,7 @@ def get_phases_attribs_list(self, phase_type, attrib):
 
         return phases_attribs
 
+
     def set_default_nproc(self, config):
         """
         Calculating the number of mpi tasks for each component/model/script
@@ -694,33 +695,9 @@ def assemble_workflow(config):
     -------
         config : dict
     """
-
-    # 1. Generate default workflow object
-    # initialize the default workflow as Workflow object
-    # TODO: Where are these default phases defined? For now I placed it in
-    # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml
-    if "defaults.yaml" in config["general"]:
-        if "workflow" in config["general"]["defaults.yaml"]:
-            workflow = config["general"]["defaults.yaml"]["workflow"]
-            phases = config["general"]["defaults.yaml"]["workflow"].get("phases", [])
-        else:
-            esm_parser.user_error("ERROR", "No default workflow defined.")
-    else:
-        workflow = []
-        phases = []
-
-    # 2. Initialize default workflow phases from defaults.yaml
-    if phases:
-        workflow = Workflow(workflow)
-        for phase in phases:
-            workflow.phases.append(WorkflowPhase(phases[phase]))
-    else:
-        esm_parser.user_error("ERROR", "No default workflow phases defined.")
-        # Note: Should this work also if no default phases are set in such a config
-        # file, but instead all workflow phases are defined in different configs
-        # and/or runscripts?
-        # Where could a user define a different (default) phase list?
-        # Or should this be changed in defaults.yaml as it is now?
+    # 1. Generate default workflow object and
+    # 2. initialize default workflow phases from defaults.yaml
+    workflow = init_default_workflow(config)
 
     # 3. Calc mpi tasks for batch jobs of default phases
     # TODO: Put it into other method???
@@ -756,6 +733,38 @@ def assemble_workflow(config):
 
     return config
 
+def init_default_workflow(config):
+    """
+    Initialize workflow and default phases from defauls.yaml
+    """
+    # 1. Generate default workflow object
+    # initialize the default workflow as Workflow object
+    # TODO: Where are these default phases defined? For now I placed it in
+    # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml
+    if "defaults.yaml" in config["general"]:
+        if "workflow" in config["general"]["defaults.yaml"]:
+            workflow = config["general"]["defaults.yaml"]["workflow"]
+            phases = config["general"]["defaults.yaml"]["workflow"].get("phases", [])
+        else:
+            esm_parser.user_error("ERROR", "No default workflow defined.")
+    else:
+        workflow = []
+        phases = []
+
+    # 2. Initialize default workflow phases from defaults.yaml
+    if phases:
+        workflow = Workflow(workflow)
+        for phase in phases:
+            workflow.phases.append(WorkflowPhase(phases[phase]))
+    else:
+        esm_parser.user_error("ERROR", "No default workflow phases defined.")
+        # Note: Should this work also if no default phases are set in such a config
+        # file, but instead all workflow phases are defined in different configs
+        # and/or runscripts?
+        # Where could a user define a different (default) phase list?
+        # Or should this be changed in defaults.yaml as it is now?
+
+    return workflow
 
 def get_phase_attrib(workflow_phases, phase_name, attrib):
     if not type(workflow_phases) is list:

From 08d697600e09ca26a79540b78c13d6bbe1004924 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Dec 2023 12:12:34 +0100
Subject: [PATCH 54/98] Added my email to setup.py

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 185eca844..99e40c947 100644
--- a/setup.py
+++ b/setup.py
@@ -52,8 +52,9 @@
         "miguel.andres-martinez@awi.de",
         "deniz.ural@awi.de",
         "jan.streffing@awi.de",
+        "nadine.wieters@awi.de",
         "sebastian.wahl@geomar.de",
-	      "kai.himstedt@dkrz.de",
+        "kai.himstedt@dkrz.de",
     ],
     python_requires=">=3.6, <=3.11",
     classifiers=[

From 7e1dd6b120914a55821197d1f84916dde5673b2f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 4 Dec 2023 12:16:42 +0100
Subject: [PATCH 55/98] Added comments and docstrings.

---
 src/esm_runscripts/resubmit.py | 162 +++++++++++++++++++++++++++++++--
 1 file changed, 156 insertions(+), 6 deletions(-)

diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py
index daa15047d..e2ecbd0d8 100644
--- a/src/esm_runscripts/resubmit.py
+++ b/src/esm_runscripts/resubmit.py
@@ -7,6 +7,18 @@
 
 
 def submit(config):
+    """
+    Submits a jobscript to the batch system by calling os.system
+
+    Arguments
+    ---------
+        config : dict
+
+    Returns
+    -------
+        config : dict
+    """
+
     if config["general"]["verbose"]:
         print("\n", 40 * "+ ")
     print("Submitting jobscript to batch system...")
@@ -22,15 +34,43 @@ def submit(config):
 
 
 def resubmit_batch_or_shell(config, batch_or_shell, cluster=None):
+    """
+    - Creates a submit_commant and sets it to config depending on kind of submission (batch or shell)
+    - Calls function submit to acually submitting the shell or batch command
+
+    Arguments
+    ---------
+        config : dict
+        batch_or_shell : Bool
+        cluster : (optional)
+
+    Returns
+    -------
+        config : dict
+    """
+
     config = config["general"]["batch"].write_simple_runscript(
         config, cluster, batch_or_shell
     )
+    # Checks, if not submitted with option -c in esm_runscript call (check run)
     if not check_if_check(config):
         config = submit(config)
     return config
 
 
 def resubmit_SimulationSetup(config, cluster=None):
+    """
+    Resubmitting a workflow phase/cluster that is of type SimulationSetup
+    - Initialize the cluster as a new SimulationSetup object
+
+    Arguments
+    ---------
+        config : dict
+        cluster : str (optional: name of cluster)
+    Returns
+    -------
+        config : dict
+    """
     monitor_file = logfiles.logfile_handle
     # Jobs that should be started directly from the compute job:
 
@@ -60,6 +100,7 @@ def resubmit_SimulationSetup(config, cluster=None):
             cluster_obj.config[f"{cluster}_update_{jobtype}_config_before_resubmit"]
         )
 
+    # Checks, if not submitted with option -c in esm_runscript call (check run)
     if not check_if_check(config):
 
         monitor_file.write(f"Calling {cluster} job:\n")
@@ -69,14 +110,29 @@ def resubmit_SimulationSetup(config, cluster=None):
 
 
 def get_submission_type(cluster, config):
-    # Figure out if next job is resubmitted to batch system,
-    # just executed in shell or invoked as new SimulationSetup
-    # object
+    """
+    Figure out if next job is
+        - resubmitted to batch system,
+        - just executed in shell or
+        - invoked as new SimulationSetup object
+
+    Arguments
+    ---------
+        cluster : str (name of cluster)
+        config : dict
+
+    Returns
+    -------
+        submission_type : str
+    """
 
     clusterconf = config["general"]["workflow"]["subjob_clusters"][cluster]
 
     if clusterconf.get("submit_to_batch_system", False):
         submission_type = "batch"
+    # This information should come from the config of the cluster/workflow phase
+    # This information is given in batch_or_shell attribute of workflow phase/cluster
+    # TODO: Make this a function of workflow manager???
     elif cluster in ["newrun", "prepcompute", "tidy", "inspect", "viz"]:
         submission_type = "SimulationSetup"
     else:
@@ -86,6 +142,17 @@ def get_submission_type(cluster, config):
 
 
 def end_of_experiment(config):
+    """
+    Checks if it is the end of the experiment.
+
+    Arguments
+    ---------
+        config
+
+    Returns
+    -------
+        True or False
+    """
     if config["general"]["next_date"] >= config["general"]["final_date"]:
         monitor_file = logfiles.logfile_handle
         monitor_file.write("Reached the end of the simulation, quitting...\n")
@@ -96,6 +163,17 @@ def end_of_experiment(config):
 
 
 def end_of_experiment_all_models(config):
+    """
+    Checks if end of experiment is reached and everything is done
+
+    Arguments
+    ---------
+        config : dict
+
+    Returns
+    -------
+        True or False
+    """
     index = 1
     expid = config["general"]["expid"]
     while "model" + str(index) in config["general"]["original_config"]:
@@ -131,6 +209,17 @@ def end_of_experiment_all_models(config):
 
 
 def check_if_check(config):
+    """
+    Will check if esm_runscripts has been called with option -c (check run only)
+
+    Arguments
+    ---------
+        config : dict
+
+    Returns
+    -------
+        True or False
+    """
     if config["general"]["check"]:
         print(
             "Actually not submitting anything, this job preparation was launched in 'check' mode (-c)."
@@ -142,7 +231,21 @@ def check_if_check(config):
 
 
 def maybe_resubmit(config):
-
+    """
+    If nextrun is started,
+    - calls funtion to increment date and run_number
+    - calls function to write new date file
+    If it recognizes that is was actually the last run
+    - returns if end of the experiment (if not iterative_coupling)
+
+    Arguments
+    ---------
+        config : dict
+
+    Returns
+    -------
+        config : dict
+    """
     jobtype = config["general"]["jobtype"]
 
     nextrun = resubmit_recursively(config, jobtype=jobtype)
@@ -154,6 +257,9 @@ def maybe_resubmit(config):
 
         if end_of_experiment(config):
             if config["general"].get("iterative_coupling", False):
+                # If not iterative coupling
+                # check if end of experiment for all models
+                # if not???
                 if end_of_experiment_all_models(config):
                     return config
             else:
@@ -161,6 +267,7 @@ def maybe_resubmit(config):
                 return config
 
         cluster = config["general"]["workflow"]["first_task_in_queue"]
+        # For what is nextrun here nedded?
         nextrun = resubmit_recursively(
             config, list_of_clusters=[cluster], nextrun_in=True
         )
@@ -169,8 +276,26 @@ def maybe_resubmit(config):
 
 
 def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in=False):
+    """
+    - Reads in a list of all clusters (next_submit) in a workflow of a given jobtype (if not passes as argument)
+    - Checks if cluster is going to be skipped
+    - Gets the submission_type of cluster and calls the corresponding resubmit function
+    - If cluster is skipped, calls this function again ???
+    - What is nextrun_in for? What if true? If within a run???
+    - When could cluster be first_task_in_queue and nextrun_in=true?
+
+    Arguments
+    ---------
+        config : dict
+        jobtype : (optional)
+        list_of_clusters: (optional)
+        nextrun_in: (optional)
+
+    Returns
+    -------
+        nextrun : Boolean
+    """
     nextrun = False
-
     if not list_of_clusters:
         list_of_clusters = config["general"]["workflow"]["subjob_clusters"][
             jobtype
@@ -180,7 +305,7 @@ def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in
         if (
             cluster == config["general"]["workflow"]["first_task_in_queue"]
             and not nextrun_in
-        ):
+        ):                          # if beginning of next run?
             nextrun = True
         else:
             if not workflow.skip_cluster(cluster, config):
@@ -199,6 +324,20 @@ def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in
 
 
 def _increment_date_and_run_number(config):
+    """
+    - Incrementing
+        - date by adding "delta_date" to "cuirrent_date"
+        - run_number by adding +1
+    - Updating config
+
+    Arguments
+    ---------
+        config : dict
+
+    Returns
+    -------
+        config : dict
+    """
     config["general"]["run_number"] += 1
     config["general"]["current_date"] += config["general"]["delta_date"]
 
@@ -216,6 +355,17 @@ def _increment_date_and_run_number(config):
 
 
 def _write_date_file(config):  # self, date_file=None):
+    """
+    Writes new date file for experiment.
+
+    Arguments
+    ---------
+        config : dict
+
+    Returns
+    -------
+        config : dict
+    """
     # monitor_file = config["general"]["logfile"]
     monitor_file = logfiles.logfile_handle
 

From d1f5a21fdc197aa43f8a041caee35df9ee93a1d5 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 7 Dec 2023 16:01:52 +0100
Subject: [PATCH 56/98] Some more code improvements.

---
 src/esm_runscripts/workflow.py | 332 ++++++++++++++++++++-------------
 src/esm_tests/resources        |   2 +-
 2 files changed, 203 insertions(+), 131 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 66e831a0f..a10df867d 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -22,13 +22,26 @@ def __init__(self, workflow_yaml):
         none
         """
         # TODO: check if key is in workflow_yaml dict
-        self.phases = []
-        self.user_phases = []
-        self.clusters = {}
-        self.first_task_in_queue = workflow_yaml["first_task_in_queue"]
-        self.last_task_in_queue = workflow_yaml["last_task_in_queue"]
-        self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"]
-        # TODO: Call here the phase object ???
+        self.phases = []                            # list for default phases (defined in defauls.yaml)
+        self.user_phases = []                       # list of user phases (collected by collect_all_user_phases)
+        self.clusters = {}                          # dictionary of clusters
+
+        error = False
+
+        if "first_task_in_queue" in workflow_yaml: self.first_task_in_queue = workflow_yaml["first_task_in_queue"]
+        else: error = True
+        if "last_task_in_queue" in workflow_yaml: self.last_task_in_queue = workflow_yaml["last_task_in_queue"]
+        else: error = True
+        if "next_run_triggered_by" in workflow_yaml: self.next_run_triggered_by =  workflow_yaml["next_run_triggered_by"]
+        else: error = True
+
+        if error:
+            err_msg = (
+                        f"Missing workflow keywords. "
+                        f"Make sure the following keywords are set in defaults.yaml: "
+                        f"``first_task_in_queue``, ``last_task_in_queue``, ``next_run_triggered_by``."
+            )
+            esm_parser.user_error("ERROR", err_msg)
 
     def get_workflow_phase_by_name(self, phase_name):
         """
@@ -47,7 +60,7 @@ def get_workflow_phase_by_name(self, phase_name):
             if phase["name"] == phase_name:
                 return phase
 
-    def get_phases_attribs_list(self, phase_type, attrib):
+    def get_phases_values_list(self, phase_type, keyword):
         """
         Returns a certain attribute for all phases as a list.
 
@@ -55,23 +68,23 @@ def get_phases_attribs_list(self, phase_type, attrib):
         ----------
             phase_type : str
                 ``default`` or ``user``
-            attrib : str
+            keyword : str
 
         Returns
         -------
-            phases_attribs : list
+            phases_values : list
         """
         if phase_type == 'user':
-            phases_attribs = [phase[attrib] for phase in self.user_phases]
+            phases_values = [phase[keyword] for phase in self.user_phases]
         else:
-            phases_attribs = [phase[attrib] for phase in self.phases]
+            phases_values = [phase[keyword] for phase in self.phases]
 
-        return phases_attribs
+        return phases_values
 
 
     def set_default_nproc(self, config):
         """
-        Calculating the number of mpi tasks for each component/model/script
+        Calculating the number of mpi tasks for default phases and each component/model/script
 
         Parameters
         ----------
@@ -82,11 +95,14 @@ def set_default_nproc(self, config):
             self : Workflow object
         """
 
+        # Get the sum of all mpi tasks
         tasks = calc_number_of_tasks(config)
 
+        # Write this number of tasks to phase, if
+        # phase will be submitted to batch system
         for ind, phase in enumerate(self.phases):
             if phase["submit_to_batch_system"]:
-                phase["nproc"] = tasks
+                set_value(phase, "nproc", tasks)
 
         return self
 
@@ -104,7 +120,7 @@ def set_workflow_attrib(self, attrib, value):
             None
         """
 
-        if type(getattr(self, attrib)).__name__ == "list":
+        if type(getattr(self, attrib)).__name__ == list:
             self.__dict__[attrib].append(value)
         else:
             self.__setattr__(attrib, value)
@@ -147,12 +163,12 @@ def collect_all_user_phases(self, config):
             if "workflow" in config[model]:
                 w_config = config[model]["workflow"]
                 if "phases" in w_config:
-                    # check if still tries to set workflow keywords
+                    # check if there are still workflow keywords set (except 'phases')
                     for key, value in w_config.items():
                         if not key == "phases":
                             err_msg = f"``{key}`` is not allowed to be set for a workflow."
                             esm_parser.user_error("ERROR", err_msg)
-                    for phase in w_config["phases"]:
+                    for phase_name in w_config["phases"]:
                         # each phase (of a model/setup) needs to have an unique name
                         # same phases of the same model/setup defined in different config files
                         # are overwritten by the usual config file hierarchy
@@ -161,9 +177,10 @@ def collect_all_user_phases(self, config):
                         # check if ``new_phase`` is already defined as a default phase
                         # look for the name of the current phase in the list of default phase names
                         # if found, raise exception
-                        if phase in self.get_phases_attribs_list("default", "name"):
+
+                        if phase_name in self.get_phases_values_list("default", "name"):
                             err_msg = (
-                                f"The user phase ``{phase}`` "
+                                f"The user phase ``{phase_name}`` "
                                 f"has the same name as a default workflow phase. "
                                 f"This is not allowed."
                             )
@@ -171,35 +188,39 @@ def collect_all_user_phases(self, config):
 
                         # check if the name of the new user phase (for a model/setup) does not already exist
                         # (for another model/setup).
-                        if phase in user_workflow_phases_names:
+                        if phase_name in user_workflow_phases_names:
                             err_msg = (
                                 f"Two workflow phases have the same name "
-                                f"``{phase}``."
+                                f"``{phase_name}``."
                             )
                             esm_parser.user_error("ERROR", err_msg)
 
-                        # if user phase (for each setup/model) has a new and unique name
+                        # if user phase (for each setup/model) has a non-default and unique name
                         else:
-                            phase_config = copy.deepcopy(w_config["phases"][phase])
+                            phase_config = copy.deepcopy(w_config["phases"][phase_name])
                             # add phase name
-                            phase_config["name"] = phase
-                            # Make sure that batch_or_shell is set to batch if submit_to_batch is true
-                            # Should not be set by user. TODO: Remove from documentation.
+                            phase_config["name"] = phase_name
+                            # make sure that batch_or_shell is set to batch if submit_to_batch is true
+                            # should not be set by user. TODO: Remove from documentation
                             if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
-                                # Check if run_on_queue is given if submit_to_sbatch is true
+                                # check if run_on_queue is given if submit_to_sbatch is true
                                 if not phase_config.get("run_on_queue", False):
-                                    err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase}``."
+                                    err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase_name}``."
                                     esm_parser.user_error("ERROR", err_msg)
                             else:
                                 phase_config["batch_or_shell"] = "shell"
+
                             # create a new user phase object for ``phase``
                             new_phase = WorkflowPhase(phase_config)
+
                             # append it to the list of user phases of the workflow
                             user_workflow_phases.append(new_phase)
-                            user_workflow_phases_names.append(phase)
+                            user_workflow_phases_names.append(phase_name)
+
+                            # collect all user phases that are set to trigger the next run
                             if phase_config.get("trigger_next_run", False):
-                                user_workflow_next_run_triggered_by.append(phase)
+                                user_workflow_next_run_triggered_by.append(phase_name)
         # check if more than one user phase has set trigger_next_run to true
         if len(user_workflow_next_run_triggered_by) > 1:
             err_msg = (
@@ -209,9 +230,10 @@ def collect_all_user_phases(self, config):
             )
             esm_parser.user_error("ERROR", err_msg)
         elif user_workflow_next_run_triggered_by:
-            self.next_run_triggered_by = user_workflow_next_run_triggered_by[0]
+            self.set_workflow_attrib("next_run_triggered_by", user_workflow_next_run_triggered_by[0])
 
-        self.user_phases = user_workflow_phases
+        # add user phases to workflow
+        self.set_workflow_attrib("user_phases", user_workflow_phases)
 
         # check if there are unknown phases, if yes, will give error exception
         unknown_phases = self.check_unknown_phases()
@@ -227,7 +249,7 @@ def collect_all_user_phases(self, config):
         # if not, run_after will be set to last default phase
         for user_phase in self.user_phases:
             if not user_phase["run_before"] and not user_phase["run_after"]:
-                user_phase["run_after"] = self.phases[-1]["name"]
+                set_value(user_phase, "run_after", self.phases[-1]["name"])
                 err_msg = (
                     f"No value given for ``run_after`` or ``run_before`` "
                     f"of user phase ``{user_phase['name']}``. "
@@ -239,75 +261,87 @@ def collect_all_user_phases(self, config):
         return self
 
     def cluster_phases(self):
-        """Merge phases into clusters."""
+        """
+        Merge phases into clusters.
+        """
 
         clusters = {}
         # create an empty phases list for each cluster
-        for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"):
-            clusters[cluster] = {"phases": []}
-        # append all phases that are within the same cluster
+        for cluster_name in self.get_phases_values_list("default", "cluster") + self.get_phases_values_list("user", "cluster"):
+            clusters[cluster_name] = {"phases": []}
+        # collect all phases that are within the same cluster
         for phase in self.phases + self.user_phases:
             clusters[phase["cluster"]]["phases"].append(phase["name"])
 
-        for cluster in clusters:
+        for cluster_name in clusters:
             nproc = nproc_sum = nproc_max = 0
             # if only one phase in cluster
-            if len(clusters[cluster]["phases"]) == 1:
-                phase_name = clusters[cluster]["phases"][0]
+            if len(clusters[cluster_name]["phases"]) == 1:
+                phase_name = clusters[cluster_name]["phases"][0]
                 phase = self.get_workflow_phase_by_name(phase_name)
-                clusters[cluster].update(phase)
+                clusters[cluster_name].update(phase)
             # if more than one phase are within the same cluster
             else:
                 # fill in default phase keys for each cluster to cluster dictionary
-                clusters[cluster].update(WorkflowPhase({}))
+                clusters[cluster_name].update(WorkflowPhase({}))
                 # create a list of all phases (dicts) that are within the same cluster
                 phases_list = []
-                for phase_name in clusters[cluster]["phases"]:
+                for phase_name in clusters[cluster_name]["phases"]:
                     phases_list.append(self.get_workflow_phase_by_name(phase_name))
 
                 # check for inconsistencies of phase keywords within a cluster
+                # collect all values for keywords of WorkflowPhase in a dictionary 'keywords'
                 keywords = {}
                 for key in WorkflowPhase({}):
                     keywords[key] = []
                     # append keyword of a phase only if not already in keywords[key]
                     [keywords[key].append(item) for item in [phase[key] for phase in phases_list] if item not in keywords[key]]
-                    # if there are no inconsistencies, all phases have the same values for keyword
+                    # if there are no inconsistencies, all phases have the same values for a keyword 'key'
                     if len(keywords[key]) == 1:
-                        clusters[cluster][key] = keywords[key][0]
-                    # if different phases have set different values for the same keyword/attrib
+                        clusters[cluster_name][key] = keywords[key][0]
+                    # if different phases have set different values for the same keyword
                     else:
-                        if type(clusters[cluster][key]) is list:
-                            clusters[cluster][key] = keywords[key]
+                        # if keyword is of type list, just add the list into the cluster
+                        if type(clusters[cluster_name][key]) is list:
+                            clusters[cluster_name][key] = keywords[key]
+                        # otherwise select a single value for keyword
                         else:
-                            if key not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]:
+                            # TODO: Explain this exception handling more
+                            if key not in ["name", "script", "script_dir", "order_in_cluster", "nproc", "trigger_next_run"]:
                                 err_msg = (
                                     f"Mismatch for {key}")
                                 esm_parser.user_error("ERROR", err_msg)
                             elif key == "name":
-                                clusters[cluster]["name"] = cluster
+                                # set keyword name to the name of the cluster
+                                clusters[cluster_name]["name"] = cluster_name
                             elif key == "trigger_next_run":
                                 # set key of cluster to True if key for any (at least one) of the phases is set to True
-                                clusters[cluster][key] = any(keywords[key])
+                                clusters[cluster_name][key] = any(keywords[key])
+#                            elif key in ["script", "script_dir"]:
+#                                for ind, phase_name in enumerate(clusters[cluster_name]["phases"]):
+#                                    phase = self.get_workflow_phase_by_name(phase_name)
+#                                    phase_dict = {phase["name"]: {"script": phase["script"], "script_dir": phase["script_dir"]}}
+#                                    clusters[cluster_name]["phases"][ind] = phase_dict
                             else:
                                 # if key is set different for each phase in same cluster set to fill value (e.g. for script, scriptdir)
-                                clusters[cluster][key] = "check phase"
+                                clusters[cluster_name][key] = "check phase"
 
                 # calculate nproc if cluster is to be submitted to sbatch system
                 for phase in phases_list:
                     nproc_sum += phase["nproc"]
                     nproc_max = max(phase["nproc"], nproc_max)
 
-                    if clusters[cluster].get("submit_to_batch_system", False):
+                    if clusters[cluster_name].get("submit_to_batch_system", False):
                         if phase["order_in_cluster"] == "concurrent":
-                            if clusters[cluster]["order_in_cluster"] is None:
-                                clusters[cluster]["order_in_cluster"] = "concurrent"
+                            if clusters[cluster_name]["order_in_cluster"] is None:
+                                clusters[cluster_name]["order_in_cluster"] = "concurrent"
                             nproc = nproc_sum
                         else:
-                            clusters[cluster]["order_in_cluster"] = "sequential"
+                            clusters[cluster_name]["order_in_cluster"] = "sequential"
                             nproc = nproc_max
-                clusters[cluster]["nproc"] = nproc
+                clusters[cluster_name]["nproc"] = nproc
         # write clusters dictionary to workflow object attribute
-        self.clusters = clusters
+        self.set_workflow_attrib("clusters", clusters)
         return self
 
     def write_to_config(self, config):
@@ -358,9 +392,9 @@ def check_user_workflow_dependency(self):
             independent : bool (default: False)
         """
         independent = False
-        user_phases_names = self.get_phases_attribs_list('user', 'name')
-        run_after_list = self.get_phases_attribs_list('user', 'run_after')
-        run_before_list = self.get_phases_attribs_list('user', 'run_before')
+        user_phases_names = self.get_phases_values_list('user', 'name')
+        run_after_list = self.get_phases_values_list('user', 'run_after')
+        run_before_list = self.get_phases_values_list('user', 'run_before')
 
         # All user phases are independent from each other, if
         # none of the ``user_phases_names`` are found in the union of
@@ -387,10 +421,10 @@ def check_unknown_phases(self):
             unknown_phases : set
         """
         unknown_phases = []
-        phases_names = self.get_phases_attribs_list('default', 'name')          # list of names of all default phases
-        user_phases_names = self.get_phases_attribs_list('user', 'name')        # list of name of all user phases
-        run_after = self.get_phases_attribs_list('user', 'run_after')           # list of all run_after values for all user phases
-        run_before = self.get_phases_attribs_list('user', 'run_before')         # list of all run_before values for all user phases
+        phases_names = self.get_phases_values_list('default', 'name')          # list of names of all default phases
+        user_phases_names = self.get_phases_values_list('user', 'name')        # list of name of all user phases
+        run_after = self.get_phases_values_list('user', 'run_after')           # list of all run_after values for all user phases
+        run_before = self.get_phases_values_list('user', 'run_before')         # list of all run_before values for all user phases
         # Filter out all elements that are None
         # ``filter(None, anylist)`` will filter out all items of anylist,
         # for which ``if item`` is false (e.g. [], "", None, {}, '').
@@ -418,7 +452,7 @@ def order_phases_and_clusters(self):
             self : Workflow object
         """
 
-# Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``)
+# correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``)
 
         # next_run_triggered_by is always the last phase
 
@@ -428,36 +462,36 @@ def order_phases_and_clusters(self):
         # get first default phase and correct run_after, called_from
         # correct last_task_in_queue of workflow
 
-        next_triggered = self.next_run_triggered_by
-        triggered_next_run_phase = self.get_workflow_phase_by_name(next_triggered)
-        if next_triggered not in self.get_phases_attribs_list("default", "name"):
+        old_next_triggered = self.next_run_triggered_by
+        triggered_next_run_phase = self.get_workflow_phase_by_name(old_next_triggered)
+        if old_next_triggered not in self.get_phases_values_list("default", "name"):
             first_task_name = self.first_task_in_queue
             first_phase = self.get_workflow_phase_by_name(first_task_name)
             old_last_task_name = self.last_task_in_queue
             old_last_phase = self.get_workflow_phase_by_name(old_last_task_name)
 
-            old_last_phase["next_submit"].remove(first_phase["name"])
-            old_last_phase["next_submit"].append(next_triggered)
-            old_last_phase["run_before"] = next_triggered
-            old_last_phase["trigger_next_run"] = False
-            if triggered_next_run_phase["cluster"] not in self.clusters[old_last_phase["cluster"]]["next_submit"]:
-                self.clusters[old_last_phase["cluster"]]["next_submit"].append(triggered_next_run_phase["cluster"])
-            self.clusters[old_last_phase["cluster"]]["run_before"] = triggered_next_run_phase["cluster"]
-            self.clusters[old_last_phase["cluster"]]["trigger_next_run"] = False
-
-            first_phase["run_after"] = next_triggered
-            first_phase["called_from"] = next_triggered
-            self.clusters[first_phase["cluster"]]["run_after"] = triggered_next_run_phase["cluster"]
-            self.clusters[first_phase["cluster"]]["called_from"] = triggered_next_run_phase["cluster"]
-
-            self.clusters[triggered_next_run_phase["cluster"]]["next_submit"].append(first_phase["cluster"])
+            remove_value(old_last_phase, "next_submit", first_phase["name"])
+            set_value(old_last_phase, "next_submit", old_next_triggered)
+            set_value(old_last_phase, "run_before", old_next_triggered)
+            set_value(old_last_phase, "trigger_next_run", False)
+
+            set_value(self.clusters[old_last_phase["cluster"]], "next_submit", triggered_next_run_phase["cluster"], if_not_in=True)
+            set_value(self.clusters[old_last_phase["cluster"]], "run_before", triggered_next_run_phase["cluster"])
+            set_value(self.clusters[old_last_phase["cluster"]], "trigger_next_run", False)
+
+            set_value(first_phase, "run_after", old_next_triggered)
+            set_value(first_phase, "called_from" ,old_next_triggered)
+            set_value(self.clusters[first_phase["cluster"]], "run_after", triggered_next_run_phase["cluster"])
+            set_value(self.clusters[first_phase["cluster"]], "called_from", triggered_next_run_phase["cluster"])
+
+            set_value(self.clusters[triggered_next_run_phase["cluster"]], "next_submit" , first_phase["cluster"])
             self.clusters[triggered_next_run_phase["cluster"]]["run_before"] = first_phase["cluster"]
             self.clusters[triggered_next_run_phase["cluster"]]["run_after"] = old_last_phase["cluster"]
 
-            self.last_task_in_queue = next_triggered
+            self.set_workflow_attrib("last_task_in_queue", old_next_triggered)
 
 
-# Intergrate new user phases by correcting next_submit, called_from, run_after, run_before
+# intergrate new user phases by correcting next_submit, called_from, run_after, run_before
 
         # Set "next_submit" and "called_from"
         # "next_submit" which phase/cluster will be called next (run_after of the next phase)
@@ -473,12 +507,12 @@ def order_phases_and_clusters(self):
         for phase2 in self.phases + self.user_phases:
             if phase2.get("run_after", None):
                 if phase2["name"] not in next_submits_phases[phase2["run_after"]]:
-                    next_submits_phases[phase2["run_after"]].append(phase2["name"])
-                phase2["called_from"] = phase2["run_after"]
+                    next_submits_phases[phase2["run_after"]].append(phase2["name"])     # use set_value ???
+                set_value(phase2, "called_from",phase2["run_after"])
             if self.clusters[phase2["cluster"]].get("run_after", None):
                 if phase2["cluster"] not in next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]]:
                     next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]].append(phase2["cluster"])
-                self.clusters[phase2["cluster"]]["called_from"] = self.clusters[phase2["cluster"]]["run_after"]
+                set_value(self.clusters[phase2["cluster"]], "called_from", self.clusters[phase2["cluster"]]["run_after"])
             else:
                 # if only run_before is set, e.g. to add a phase at the beginning of a run
                 if phase2.get("run_before", None):
@@ -492,15 +526,16 @@ def order_phases_and_clusters(self):
                         next_submits_phases[self.last_task_in_queue].append(phase2["name"])
                         next_submits_phases[self.last_task_in_queue].remove(self.first_task_in_queue)
                         next_submits_clusters[last_phase["cluster"]].remove(old_first_phase["cluster"])
-                        phase2["run_after"] = self.last_task_in_queue
-                        last_phase["run_before"] = phase2["name"]
-                        self.clusters[last_phase["cluster"]]["run_before"] = phase2["name"]
-                        self.clusters[old_first_phase["cluster"]]["run_after"] = phase2["name"]
-                        self.clusters[old_first_phase["cluster"]]["called_from"] = phase2["name"]
-                        self.clusters[phase2["cluster"]]["called_from"] = last_phase["cluster"]
-                        self.clusters[phase2["cluster"]]["run_after"] = last_phase["cluster"]
-                        last_phase["next_submit"].append(phase2["name"])
-                        self.first_task_in_queue = phase2["name"]
+                        set_value(phase2, "run_after", self.last_task_in_queue)
+                        set_value(last_phase, "run_before", phase2["name"])
+                        set_value(self.clusters[last_phase["cluster"]], "run_before", phase2["name"])
+                        set_value(self.clusters[old_first_phase["cluster"]], "run_after", phase2["name"])
+                        set_value(self.clusters[old_first_phase["cluster"]], "called_from", phase2["name"])
+                        set_value(self.clusters[phase2["cluster"]], "called_from",last_phase["cluster"])
+                        set_value(self.clusters[phase2["cluster"]], "run_after", last_phase["cluster"])
+                        set_value(last_phase, "next_submit", phase2["name"])
+
+                        self.set_workflow_attrib("first_task_in_queue", phase2["name"])
 
         for cluster in self.clusters:
             if next_submits_clusters[cluster]:
@@ -508,7 +543,7 @@ def order_phases_and_clusters(self):
 
         for phase3 in self.phases + self.user_phases:
             if next_submits_phases[phase3["name"]]:
-                phase3.set_attrib("next_submit", next_submits_phases[phase3["name"]])
+                phase3["next_submit"] = next_submits_phases[phase3["name"]]
 
         return self
 
@@ -550,24 +585,21 @@ def prepend_newrun_job(self):
             new_first_phase = WorkflowPhase(config_new_first_phase)
 
             # reset last_task attributes
-            last_phase["next_submit"].append("newrun")
-            self.clusters[last_phase["cluster"]]["next_submit"] = ["newrun"]
-            self.clusters[last_phase["cluster"]]["run_before"] = "newrun"
+            set_value(last_phase, "next_submit", "newrun")
+            set_value(self.clusters[last_phase["cluster"]], "next_submit", "newrun", reset=True)
+            set_value(self.clusters[last_phase["cluster"]], "run_before", "newrun")
             self.clusters[new_first_phase["cluster"]] = new_first_phase
-            self.clusters[new_first_phase["cluster"]]["phases"] = ["newrun"]
-            last_phase["next_submit"].remove(first_phase["cluster"])
-            # why does the next line not work???
-            # last_phase.set_attrib("next_submit", "newrun")
-            # last_phase.remove_attrib("next_submit", first_phase["cluster"])
+            set_value(self.clusters[new_first_phase["cluster"]], "phases", ["newrun"], new=True)
+            remove_value(last_phase, "next_submit", first_phase["cluster"])
 
             # reset first_task attributes
-            first_phase.set_attrib("called_from", "newrun")
-            first_phase.set_attrib("run_after", "newrun")
-            self.clusters[first_phase["cluster"]]["called_from"] = "newrun"
-            self.clusters[first_phase["cluster"]]["run_after"] = "newrun"
+            set_value(first_phase, "called_from", "newrun")
+            set_value(first_phase, "run_after", "newrun")
+            set_value(self.clusters[first_phase["cluster"]], "called_from", "newrun")
+            set_value(self.clusters[first_phase["cluster"]], "run_after", "newrun")
 
             # reset workflow attributes
-            self.first_task_in_queue = "newrun"
+            self.set_workflow_attrib("first_task_in_queue", "newrun")
 
             # Set new phase to beginning of default phase list
             self.phases.insert(0, new_first_phase)
@@ -668,17 +700,55 @@ def __init__(self, phase):
         if self.get("cluster", None) is None:
             self["cluster"] = self["name"]
 
-    def set_attrib(self, attrib, value):
-        if type(self[attrib]) == "list":
-            self[attrib].append(value)
-        else:
-            self[attrib] = value
+def set_value(phase, keyword, value, if_not_in=False, reset=False, new=False):
+    """
+    Set a value for a given keyword.
 
-    def remove_attrib(self, attrib, value):
-        if type(self[attrib]) == "list":
-            self[attrib].remove(value)
+    Parameters
+    ----------
+        phase : dict or phase object
+            Phase or cluster
+        keyword : str
+        value : str or list
+        if_not_in : boolean (optional)
+            False (default) - if value should always be appended.
+            True - if value should only be appended if not already in value list.
+        reset : boolean (optional)
+            False (default) - if only append to value list.
+            True - if value list should be reset with new value list.
+        new : boolean (optional)
+            False (default) - for keywords that are already in phase.
+            True - if a new keyword should be created in phase and set to value.
+    """
+    if not new:
+        if type(phase[keyword]) == list:
+            if if_not_in:
+                if value not in phase[keyword]:
+                    phase[keyword].append(value)
+            elif reset:
+                phase[keyword] = [value]
+            else:
+                phase[keyword].append(value)
         else:
-            self[attrib] = None
+            phase[keyword] = value
+    else:
+        phase[keyword] = value
+
+def remove_value(phase, keyword, value):
+    """
+    Remove value for keyword from phase.
+
+    Parameters
+    ----------
+        phase : dict or phase object
+            Phase or cluster
+        keyword : str
+        value : str
+    """
+    if type(phase[keyword]) == list:
+        phase[keyword].remove(value)
+    else:
+        phase[keyword] = None
 
 
 def assemble_workflow(config):
@@ -766,21 +836,23 @@ def init_default_workflow(config):
 
     return workflow
 
-def get_phase_attrib(workflow_phases, phase_name, attrib):
-    if not type(workflow_phases) is list:
-        workflow_phases = [workflow_phases]
-    for phase in workflow_phases:
-        if phase["name"] == phase_name:
-            value = phase[attrib]
-            return value
-
 
 def calc_number_of_tasks(config):
     """
     Calculates the total number of needed tasks
     in phase compute
     TODO: make this phase method??? Or recipe entry???
+
+    Parameters
+    ----------
+        config : dict
+
+    Returns
+    -------
+        tasks : int
+            Number of task for all models
     """
+
     tasks = 0
     for model in config["general"]["valid_model_names"]:
         if "nproc" in config[model]:
diff --git a/src/esm_tests/resources b/src/esm_tests/resources
index 95190bc36..3c9d2c97a 160000
--- a/src/esm_tests/resources
+++ b/src/esm_tests/resources
@@ -1 +1 @@
-Subproject commit 95190bc364f8bc80e7af342514223ea58b29feb1
+Subproject commit 3c9d2c97a849a27c8a57abd0e35ca1cef09783b0

From 21fec587e347a6b56aa7b96ad76eb94b3260abad Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 8 Dec 2023 17:20:00 +0100
Subject: [PATCH 57/98] Small changes in some comments.

---
 src/esm_runscripts/workflow.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index a10df867d..ec5c5bf8c 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -773,21 +773,21 @@ def assemble_workflow(config):
     # TODO: Put it into other method???
     workflow = workflow.set_default_nproc(config)
 
-    # 3. Read in phases from runscript and config files
+    # 4. Collect all user phases from runscript and config files
     workflow = workflow.collect_all_user_phases(config)
 
-    # 4. Cluster phases
+    # 5. Cluster phases
     workflow = workflow.cluster_phases()
 
-    # 4. Order user workflows into default workflow wrt. phase attributs.
+    # 6. Order user phases into default phases wrt. phase keywords
     workflow = workflow.order_phases_and_clusters()
 
-    # 5. create new first phase of type SimulationSetup, if first_task_in_queue is
+    # 7. create new first phase of type SimulationSetup, if first_task_in_queue is
     #    a user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job()
 
-    # 6. write the workflow to config
-    # 7. Remove old worklow from config
+    # 8. write the workflow to config
+    # 9. Remove old worklow from config
     config = workflow.write_to_config(config)
 
     # Set "jobtype" for the first task???

From 67d08f83d057056b6913d63035e5b09d2341bd25 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 12 Dec 2023 15:45:23 +0100
Subject: [PATCH 58/98] Remove calc of nproc in workflow.py.

---
 src/esm_runscripts/workflow.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index ec5c5bf8c..46e74d7a9 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,6 +1,7 @@
 import copy
 import esm_parser
 
+import pygraphviz as pgv
 import pdb
 
 
@@ -765,29 +766,25 @@ def assemble_workflow(config):
     -------
         config : dict
     """
-    # 1. Generate default workflow object and
-    # 2. initialize default workflow phases from defaults.yaml
+    # - Generate default workflow object and
+    # - initialize default workflow phases from defaults.yaml
     workflow = init_default_workflow(config)
 
-    # 3. Calc mpi tasks for batch jobs of default phases
-    # TODO: Put it into other method???
-    workflow = workflow.set_default_nproc(config)
-
-    # 4. Collect all user phases from runscript and config files
+    # - Collect all user phases from runscript and config files
     workflow = workflow.collect_all_user_phases(config)
 
-    # 5. Cluster phases
+    # - Cluster phases
     workflow = workflow.cluster_phases()
 
-    # 6. Order user phases into default phases wrt. phase keywords
+    # - Order user phases into default phases wrt. phase keywords
     workflow = workflow.order_phases_and_clusters()
 
-    # 7. create new first phase of type SimulationSetup, if first_task_in_queue is
-    #    a user phase (type batch or shell)
+    # - create new first phase of type SimulationSetup, if first_task_in_queue is
+    #   a user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job()
 
-    # 8. write the workflow to config
-    # 9. Remove old worklow from config
+    # - write the workflow to config
+    # - Remove old worklow from config
     config = workflow.write_to_config(config)
 
     # Set "jobtype" for the first task???
@@ -926,6 +923,7 @@ def display_workflow_sequence(config, display=True):
         esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}")
     else:
         workflow_order = workflow_order.replace("``", "")
+
     return workflow_order
 
 

From 582caa13908455e5ff3a31b31f48f1ad7cdd9274 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 12 Dec 2023 16:36:44 +0100
Subject: [PATCH 59/98] Remove keyword run_on_queue (not used) and give error
 by missing run_after/run_before.

---
 src/esm_runscripts/workflow.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 46e74d7a9..8b7714a84 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -206,9 +206,9 @@ def collect_all_user_phases(self, config):
                             if phase_config.get("submit_to_batch_system", False):
                                 phase_config["batch_or_shell"] = "batch"
                                 # check if run_on_queue is given if submit_to_sbatch is true
-                                if not phase_config.get("run_on_queue", False):
-                                    err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase_name}``."
-                                    esm_parser.user_error("ERROR", err_msg)
+#                                if not phase_config.get("run_on_queue", False):
+#                                    err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase_name}``."
+#                                    esm_parser.user_error("ERROR", err_msg)
                             else:
                                 phase_config["batch_or_shell"] = "shell"
 
@@ -254,10 +254,9 @@ def collect_all_user_phases(self, config):
                 err_msg = (
                     f"No value given for ``run_after`` or ``run_before`` "
                     f"of user phase ``{user_phase['name']}``. "
-                    f"Set it to last default phase in workflow: "
-                    f"``{self.phases[-1]['name']}``."
+                    f"Please set either run_after or run_before."
                 )
-                esm_parser.user_note("NOTE", err_msg)
+                esm_parser.user_error("NOTE", err_msg)
 
         return self
 
@@ -669,7 +668,7 @@ def __init__(self, phase):
         self["run_after"] = None
         self["trigger_next_run"] = False               # needed
         self["submit_to_batch_system"] = False         # needed
-        self["run_on_queue"] = None
+#        self["run_on_queue"] = None
         self["cluster"] = None
         self["next_submit"] = []                       # needed
         self["called_from"] = None                     # needed

From 698a523596fff4209d1adc6ccec8fde898f7aacc Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 13 Dec 2023 16:30:09 +0100
Subject: [PATCH 60/98] Adapted log-file name and append mode.

---
 src/esm_runscripts/compute.py | 3 ++-
 src/esm_runscripts/helpers.py | 4 ++--
 src/esm_runscripts/prepexp.py | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/esm_runscripts/compute.py b/src/esm_runscripts/compute.py
index 596db3f6b..d3f829720 100644
--- a/src/esm_runscripts/compute.py
+++ b/src/esm_runscripts/compute.py
@@ -281,12 +281,13 @@ def initialize_experiment_logfile(config):
     # Write trace-log file now that we know where to do that
     if "trace_sink" in dir(logger):
         experiment_dir = config["general"]["experiment_dir"]
+        jobtype = config["general"]["jobtype"]
         expid = config["general"]["expid"]
         it_coupled_model_name = config["general"]["iterative_coupled_model"]
         datestamp = config["general"]["run_datestamp"]
         logfile_path = (
             f"{experiment_dir}/log/"
-            f"{expid}_{it_coupled_model_name}esm_runscripts_{datestamp}.log"
+            f"{expid}_{it_coupled_model_name}esm_runscripts_{jobtype}_{datestamp}.log"
         )
 
         logger.trace_sink.def_path(logfile_path)
diff --git a/src/esm_runscripts/helpers.py b/src/esm_runscripts/helpers.py
index 6c59005e0..f143b3ae1 100644
--- a/src/esm_runscripts/helpers.py
+++ b/src/esm_runscripts/helpers.py
@@ -103,7 +103,7 @@ def write_to_log(config, message, message_sep=None):
     ----
     The user can control two things regarding the logfile format:
 
-    1) The datestamp formatting, whjich is taken from the config
+    1) The datestamp formatting, which is taken from the config
        section ``general.experiment_log_file_dateformat``.
     2) The message separators; taken from
        ``general.experiment_log_file_message_sep``. Note that if the
@@ -304,7 +304,7 @@ def def_path(self, path):
             Path of the logging file.
         """
         self.path = path
-        self.write_log(self.log_record, "w")
+        self.write_log(self.log_record, "a")
 
 
 ################################################################################
diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 8ac0359cd..46904788e 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -246,6 +246,7 @@ def initialize_experiment_logfile(config):
     """
 
     experiment_dir = config["general"]["experiment_dir"]
+    jobtype = config["general"]["jobtype"]
     expid = config["general"]["expid"]
     it_coupled_model = config["general"]["iterative_coupled_model"]
     datestamp = config["general"]["run_datestamp"]
@@ -272,7 +273,7 @@ def initialize_experiment_logfile(config):
     if "trace_sink" in dir(logger):
         logfile_path = (
             f"{experiment_dir}/log/"
-            f"{expid}_{it_coupled_model}esm_runscripts_{datestamp}.log"
+            f"{expid}_{it_coupled_model}esm_runscripts_{jobtype}_{datestamp}.log"
         )
 
         logger.trace_sink.def_path(logfile_path)

From bc6c0828f063ee2af0e691975af1203b63d57f20 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 13 Dec 2023 16:31:21 +0100
Subject: [PATCH 61/98] Removed run_on_queue from default phases.

---
 configs/esm_software/esm_runscripts/defaults.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index d8234199c..0bb88502d 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -37,7 +37,7 @@ workflow:
             order_in_cluster: sequential
             run_after: prepcompute
             run_before: tidy
-            run_on_queue: ${computer.partitions.pp.name}
+            #run_on_queue: ${computer.partitions.pp.name}
             submit_to_batch_system: True
         tidy:
             batch_or_shell: SimulationSetup

From 05a03abc0543763bc5a38e3f8401aaf6d6493a70 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 13 Dec 2023 16:32:12 +0100
Subject: [PATCH 62/98] Removed run_on_queue.

---
 configs/setups/awicm3/awicm3.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml
index 052cb67b4..17cd645f2 100644
--- a/configs/setups/awicm3/awicm3.yaml
+++ b/configs/setups/awicm3/awicm3.yaml
@@ -345,7 +345,7 @@ oifs:
                                         postprocessing:
                                                 batch_or_shell: batch
                                                 order_in_cluster: concurrent
-                                                run_on_queue: ${computer.partitions.pp.name}
+                                                #run_on_queue: ${computer.partitions.pp.name}
                                                 run_after: tidy
                                                 script_dir: ${general.esm_function_dir}/setups/awicm3
                                                 submit_to_batch_system: True

From 8f75e6fe629ebb0e8d7becf54f88b4e1dbfe69e4 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 13 Dec 2023 16:33:31 +0100
Subject: [PATCH 63/98] Added START and END statement to logfile of recipe
 steps.

---
 src/esm_plugin_manager/esm_plugin_manager.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py
index 3603115b3..698eb05e5 100644
--- a/src/esm_plugin_manager/esm_plugin_manager.py
+++ b/src/esm_plugin_manager/esm_plugin_manager.py
@@ -173,7 +173,7 @@ def work_through_recipe(recipe, plugins, config):
         if config["general"].get("verbose", False):
             # diagnostic message of which recipe step is being executed
             message = (
-                f"::: Executing the step:  {workitem}    "
+                f"::: START Executing the step:  {workitem}    "
                 f"(step [{index}/{len(recipes)}] of the job:  "
                 f'{recipe["job_type"]})'
             )
@@ -203,6 +203,18 @@ def work_through_recipe(recipe, plugins, config):
                 thismodule = importlib.util.module_from_spec(spec)
                 spec.loader.exec_module(thismodule)
                 config = getattr(thismodule, workitem)(config)
+        if config["general"].get("verbose", False):
+            # diagnostic message of which recipe step is being executed
+            message = (
+                f"::: END Executing the step:  {workitem}    "
+                f"(step [{index}/{len(recipes)}] of the job:  "
+                f'{recipe["job_type"]})'
+            )
+
+            print()
+            print("=" * len(message))
+            print(message)
+            print("=" * len(message))
     return config
 
 

From 4bfff6a471647f5b860a31f71e95d93ff602e9fd Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 13 Dec 2023 16:34:44 +0100
Subject: [PATCH 64/98] Added comments.

---
 src/esm_runscripts/resubmit.py    | 21 ++++++++++++++++++++-
 src/esm_runscripts/sim_objects.py |  6 +++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py
index e2ecbd0d8..717a0416a 100644
--- a/src/esm_runscripts/resubmit.py
+++ b/src/esm_runscripts/resubmit.py
@@ -248,6 +248,19 @@ def maybe_resubmit(config):
     """
     jobtype = config["general"]["jobtype"]
 
+    # check if nextrun starts???
+    # this resubmits any following jobtypes/phases until nextrun is true
+    # here nextrun is always set to true (if resubmit_recursively is finished)
+
+    # cases: 1. it is the beginning of (next) run:
+    #           - resubmit_recursively returns true but does not do anything except for returning true
+    #           - check if end of simulation -> return
+    #           - returns if iterative coupling, why ???
+    #           - if not end of simulation and not iterative_coupling -> calls itself again with nextrun_in=True which leads to case 2.
+    #        2. it is NOT the beginning if (next) run:
+    #           it will start to loop over all remaining clusters to check if it can sumbit something (SimulationSetup, sbatch, shell) and do so,
+    #           until first start of next run is reached.
+    #        3. nextrun is fals if no entries in next_submit for that particular jobtype/cluster
     nextrun = resubmit_recursively(config, jobtype=jobtype)
 
     if nextrun:  # submit list contains stuff from next run
@@ -296,22 +309,28 @@ def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in
         nextrun : Boolean
     """
     nextrun = False
+    # get a list of clusters that follow the current jobtype
     if not list_of_clusters:
         list_of_clusters = config["general"]["workflow"]["subjob_clusters"][
             jobtype
         ].get("next_submit", [])
 
     for cluster in list_of_clusters:
+        # if beginning of next run
         if (
             cluster == config["general"]["workflow"]["first_task_in_queue"]
             and not nextrun_in
-        ):                          # if beginning of next run?
+        ):
             nextrun = True
+        # if not at the beginning of a run
         else:
+            # and cluster is not going to be skipped
             if not workflow.skip_cluster(cluster, config):
                 submission_type = get_submission_type(cluster, config)
                 if submission_type == "SimulationSetup":
+                    # create the SimulationSetup object for the this/next jobtype
                     resubmit_SimulationSetup(config, cluster)
+                    # or submits to batch or shell if not check run
                 elif submission_type in ["batch", "shell"]:
                     resubmit_batch_or_shell(config, submission_type, cluster)
             else:
diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index a9353d41d..b1c08fda9 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -15,6 +15,7 @@
 
 import esm_parser
 
+#import pdb
 
 class SimulationSetup(object):
     def __init__(self, command_line_config=None, user_config=None):
@@ -51,7 +52,7 @@ def __call__(self, kill_after_submit=True):
             self.inspect()
             helpers.end_it_all(self.config)
 
-        # Run the preexp recipe
+        # Run the prepexp recipe
         self.config = prepexp.run_job(self.config)
 
         # self.pseudocall(kill_after_submit)
@@ -83,12 +84,15 @@ def __call__(self, kill_after_submit=True):
             ].replace("observe_", "")
             # that last line is necessary so that maybe_resubmit knows which
             # cluster to look up in the workflow
+            # because all cluster with batch_or_shell=sbatch will be called 
+            # esm_runscripts ... -t observe_<cluster> ...
 
         else:
             self.assembler()
 
         resubmit.maybe_resubmit(self.config)
 
+        # if this line is reached, the run is submitted and running or finished
         self.config = logfiles.finalize_logfiles(self.config, org_jobtype)
 
         if self.config["general"]["submitted"]:

From 18fd44a719ce407f8cf6689dd6ea8a033e7184f6 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 14 Dec 2023 08:35:16 +0100
Subject: [PATCH 65/98] Added comments.

---
 src/esm_runscripts/sim_objects.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index b1c08fda9..8bf974b3f 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -52,7 +52,7 @@ def __call__(self, kill_after_submit=True):
             self.inspect()
             helpers.end_it_all(self.config)
 
-        # Run the prepexp recipe
+        # Run the prepexp recipe always before every jobtype/cluster
         self.config = prepexp.run_job(self.config)
 
         # self.pseudocall(kill_after_submit)
@@ -60,6 +60,8 @@ def __call__(self, kill_after_submit=True):
         org_jobtype = str(self.config["general"]["jobtype"])
         self.config = logfiles.initialize_logfiles(self.config, org_jobtype)
 
+        # if not check run???
+        # set stdout and stderr to lofile
         if self.config["general"]["submitted"]:
             old_stdout = sys.stdout
             old_stderr = sys.stderr

From cdd7c2018a21c3767e605ee804d1b6f9f93ad9c3 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 14 Dec 2023 14:52:04 +0100
Subject: [PATCH 66/98] Change comment

---
 src/esm_runscripts/batch_system.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esm_runscripts/batch_system.py b/src/esm_runscripts/batch_system.py
index 4f8461168..82cbccb53 100644
--- a/src/esm_runscripts/batch_system.py
+++ b/src/esm_runscripts/batch_system.py
@@ -518,7 +518,7 @@ def write_simple_runscript(config, cluster, batch_or_shell="batch"):
             #    dummy = 0
             else:  # "normal" case
                 dummy = 0
-# was macht das hier? wo/wie wird submits_abother_job definiert?
+            # check if this cluster has has something to submit (next_submit not empty)
             if submits_another_job(config, cluster):  # and batch_or_shell == "batch":
                 # -j ? is that used somewhere? I don't think so, replaced by workflow
                 #   " -j "+ config["general"]["jobtype"]

From 638a1eac3dc346b80d20dd2336611b923b0e66a9 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 14 Dec 2023 14:52:44 +0100
Subject: [PATCH 67/98] Adde jobid to logfiles.

---
 src/esm_runscripts/logfiles.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/esm_runscripts/logfiles.py b/src/esm_runscripts/logfiles.py
index 102c0d233..16a635491 100644
--- a/src/esm_runscripts/logfiles.py
+++ b/src/esm_runscripts/logfiles.py
@@ -62,10 +62,16 @@ def finalize_logfiles(config, org_jobtype):
 
 
 def set_logfile_name(config, jobtype=None):
+    jobid = str(config["general"].get("jobid", None))
 
     if not jobtype:
         jobtype = config["general"]["jobtype"]
 
+    if jobid:
+        jobid = f"_{jobid}"
+    else:
+        jobid = "_nojobid"
+
     filejobtype = jobtype
     # if "observe" in filejobtype:
     #    filejobtype = filejobtype.replace("observe_", "")
@@ -92,6 +98,7 @@ def set_logfile_name(config, jobtype=None):
         + filejobtype
         + "_"
         + config["general"]["run_datestamp"]
+        + jobid
         + ".log"
     )
 

From ff0724ca270197570219ee3050027e7d68f96cd5 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 15 Dec 2023 10:56:07 +0100
Subject: [PATCH 68/98] Comment out unused functions.

---
 src/esm_runscripts/workflow.py | 104 ++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 8b7714a84..7159995c3 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -83,29 +83,29 @@ def get_phases_values_list(self, phase_type, keyword):
         return phases_values
 
 
-    def set_default_nproc(self, config):
-        """
-        Calculating the number of mpi tasks for default phases and each component/model/script
-
-        Parameters
-        ----------
-            config : dict
-
-        Returns
-        -------
-            self : Workflow object
-        """
-
-        # Get the sum of all mpi tasks
-        tasks = calc_number_of_tasks(config)
-
-        # Write this number of tasks to phase, if
-        # phase will be submitted to batch system
-        for ind, phase in enumerate(self.phases):
-            if phase["submit_to_batch_system"]:
-                set_value(phase, "nproc", tasks)
-
-        return self
+#    def set_default_nproc(self, config):
+#        """
+#        Calculating the number of mpi tasks for default phases and each component/model/script
+#
+#        Parameters
+#        ----------
+#            config : dict
+#
+#        Returns
+#        -------
+#            self : Workflow object
+#        """
+#
+#        # Get the sum of all mpi tasks
+#        tasks = calc_number_of_tasks(config)
+#
+#        # Write this number of tasks to phase, if
+#        # phase will be submitted to batch system
+#        for ind, phase in enumerate(self.phases):
+#            if phase["submit_to_batch_system"]:
+#                set_value(phase, "nproc", tasks)
+#
+#        return self
 
     def set_workflow_attrib(self, attrib, value):
         """
@@ -833,35 +833,35 @@ def init_default_workflow(config):
     return workflow
 
 
-def calc_number_of_tasks(config):
-    """
-    Calculates the total number of needed tasks
-    in phase compute
-    TODO: make this phase method??? Or recipe entry???
-
-    Parameters
-    ----------
-        config : dict
-
-    Returns
-    -------
-        tasks : int
-            Number of task for all models
-    """
-
-    tasks = 0
-    for model in config["general"]["valid_model_names"]:
-        if "nproc" in config[model]:
-            tasks += config[model]["nproc"]
-        elif "nproca" in config[model] and "nprocb" in config[model]:
-            tasks += config[model]["nproca"] * config[model]["nprocb"]
-            if "nprocar" in config[model] and "nprocbr" in config[model]:
-                if (
-                    config[model]["nprocar"] != "remove_from_namelist"
-                    and config[model]["nprocbr"] != "remove_from_namelist"
-                ):
-                    tasks += config[model]["nprocar"] * config[model]["nprocbr"]
-    return tasks
+#def calc_number_of_tasks(config):
+#    """
+#    Calculates the total number of needed tasks
+#    in phase compute
+#    TODO: make this phase method??? Or recipe entry???
+#
+#    Parameters
+#    ----------
+#        config : dict
+#
+#    Returns
+#    -------
+#        tasks : int
+#            Number of task for all models
+#    """
+#
+#    tasks = 0
+#    for model in config["general"]["valid_model_names"]:
+#        if "nproc" in config[model]:
+#            tasks += config[model]["nproc"]
+#        elif "nproca" in config[model] and "nprocb" in config[model]:
+#            tasks += config[model]["nproca"] * config[model]["nprocb"]
+#            if "nprocar" in config[model] and "nprocbr" in config[model]:
+#                if (
+#                    config[model]["nprocar"] != "remove_from_namelist"
+#                    and config[model]["nprocbr"] != "remove_from_namelist"
+#                ):
+#                    tasks += config[model]["nprocar"] * config[model]["nprocbr"]
+#    return tasks
 
 
 def display_workflow(config):

From e285372c6d6bbd0659d57de8e8d57b5f571d03e2 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 15 Jan 2024 12:26:43 +0100
Subject: [PATCH 69/98] Added -s argument to read current_date; some code
 syntax optimisations.

---
 src/esm_runscripts/prepare.py | 72 ++++++++++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/src/esm_runscripts/prepare.py b/src/esm_runscripts/prepare.py
index 2b62bf4fa..59870d8c4 100644
--- a/src/esm_runscripts/prepare.py
+++ b/src/esm_runscripts/prepare.py
@@ -68,10 +68,53 @@ def _read_date_file(config):
         date = config["general"].get("initial_date", "18500101")
         run_number = 1
         write_file = True
+
+    date_c = config["general"].get("current_date", None)
+
+    if date_c is not None:
+        date_fdf = Date(date)
+        date_c = Date(str(config["general"]["current_date"]))
+        run_number_c = int(config["general"]["run_number"])
+        last_jobtype = config["general"].get("last_jobtype", "")
+        isresubmitted = last_jobtype == config["general"]["jobtype"]
+
+        if date_fdf != date_c:
+
+            msg = (
+                f"``Date`` and ``run_number`` are ``not`` taken from date file, "
+                f"but from command_line argument (provided by -s or --start_date). "
+                f"The given start_date ({date_c}) and run_number ({run_number_c}) "
+                f"are different from the values "
+                f"in the current date file of your experiment ({date}, {run_number}). "
+                f"Your experiment may now be in a non consecutive state. "
+                f"Please confirm if you want to continue:"
+            )
+            esm_parser.user_note("Detached experiment:", msg)
+            proceed = ""
+            if isresubmitted:
+                proceed = questionary.select(
+                    "Do you want to continue?",
+                    choices=[
+                        f"Yes, with date from command line argument: {str(config['general']['current_date'])}",
+                        f"Yes, with date from date file: {date}",
+                        "No, cancel."
+                    ]).ask()
+
+                if 'Yes, with date from command line argument' in proceed:
+                    date = str(date_c)
+                    run_number = run_number_c
+                elif 'Yes, with date from date file' in proceed:
+                    date = date
+                    run_number = run_number
+                else:
+                    esm_parser.user_note("The experiment will be cancelled:", f"You cancelled the experiment due to date discrepancies.")
+                    sys.exit(1)
+
     config["general"]["run_number"] = run_number
     config["general"]["current_date"] = date
     logging.info("current_date = %s", date)
     logging.info("run_number = %s", run_number)
+
     return config
 
 
@@ -274,7 +317,7 @@ def _initialize_calendar(config):
     if config["general"]["reset_calendar_to_last"]:
         config = find_last_prepared_run(config)
     config = set_most_dates(config)
-    if not "iterative_coupling" in config["general"]:
+    if "iterative_coupling" not in config["general"]:
         config["general"]["chunk_number"] = 1
 
         if config["general"]["run_number"] == 1:
@@ -346,7 +389,7 @@ def set_leapyear(config):
                 config["general"]["leapyear"] = config[model]["leapyear"]
                 break
 
-    if not "leapyear" in config["general"]:
+    if "leapyear" not in config["general"]:
         for model in config["general"]["valid_model_names"]:
             config[model]["leapyear"] = True
         config["general"]["leapyear"] = True
@@ -634,39 +677,39 @@ def set_parent_info(config):
     # Make sure "ini_parent_dir" and "ini_restart_dir" both work:
     for model in config["general"]["valid_model_names"]:
         # If only ini_restart_* variables are used in runcscript, set ini_parent_* to the same values
-        if not "ini_parent_dir" in config[model]:
+        if "ini_parent_dir" not in config[model]:
             if "ini_restart_dir" in config[model]:
                 config[model]["ini_parent_dir"] = config[model]["ini_restart_dir"]
-        if not "ini_parent_exp_id" in config[model]:
+        if "ini_parent_exp_id" not in config[model]:
             if "ini_restart_exp_id" in config[model]:
                 config[model]["ini_parent_exp_id"] = config[model]["ini_restart_exp_id"]
-        if not "ini_parent_date" in config[model]:
+        if "ini_parent_date" not in config[model]:
             if "ini_restart_date" in config[model]:
                 config[model]["ini_parent_date"] = config[model]["ini_restart_date"]
 
     # check if parent is defined in esm_tools style
     # (only given for setup)
     setup = config["general"]["setup_name"]
-    if not setup in config:
+    if setup not in config:
         setup = "general"
     if "ini_parent_exp_id" in config[setup]:
         for model in config["general"]["valid_model_names"]:
-            if not "ini_parent_exp_id" in config[model]:
+            if "ini_parent_exp_id" not in config[model]:
                 config[model]["ini_parent_exp_id"] = config[setup]["ini_parent_exp_id"]
     if "ini_parent_date" in config[setup]:
         for model in config["general"]["valid_model_names"]:
-            if not "ini_parent_date" in config[model]:
+            if "ini_parent_date" not in config[model]:
                 config[model]["ini_parent_date"] = config[setup]["ini_parent_date"]
     if "ini_parent_dir" in config[setup]:
         for model in config["general"]["valid_model_names"]:
-            if not "ini_parent_dir" in config[model]:
+            if "ini_parent_dir" not in config[model]:
                 config[model]["ini_parent_dir"] = (
                     config[setup]["ini_parent_dir"] + "/" + model
                 )
 
     # Get correct parent info
     for model in config["general"]["valid_model_names"]:
-        if config[model]["lresume"] == True and config["general"]["run_number"] == 1:
+        if config[model]["lresume"] is True and config["general"]["run_number"] == 1:
             config[model]["parent_expid"] = config[model]["ini_parent_exp_id"]
             if "parent_date" not in config[model]:
                 config[model]["parent_date"] = config[model]["ini_parent_date"]
@@ -726,6 +769,7 @@ def add_vcs_info(config):
         yaml.dump(vcs_versions, f)
     return config
 
+
 def check_vcs_info_against_last_run(config):
     """
     Ensures that the version control info for two runs is identical between the
@@ -777,7 +821,6 @@ def check_vcs_info_against_last_run(config):
             If you are **sure** that this is OK, you can set 'general.allow_vcs_differences' to True to avoid this check.
             """)
 
-
     return config
 
 
@@ -811,7 +854,7 @@ def initialize_batch_system(config):
 
 
 def initialize_coupler(config):
-    if config["general"]["standalone"] == False:
+    if config["general"]["standalone"] is False:
         from . import coupler
 
         base_dir = config["general"]["base_dir"]
@@ -882,6 +925,7 @@ def check_config_for_warnings_errors(config):
 
     return config
 
+
 def warn_error(config, trigger, note_function):
     """
     Checks the ``sections`` of the ``config`` for a given ``trigger`` (``"error"`` or
@@ -940,7 +984,7 @@ def warn_error(config, trigger, note_function):
         Method to report the note
     """
     # Sufixes for the warning special case
-    if trigger=="warning":
+    if trigger == "warning":
         sufix_name = f" WARNING"
     else:
         sufix_name = f""
@@ -967,7 +1011,7 @@ def warn_error(config, trigger, note_function):
                     # needs to halt, and the user has not defined the
                     # ``--ignore-config-warnings`` flag in the ``esm_runscripts`` call
                     if (
-                        trigger=="warning"
+                        trigger == "warning"
                         and config["general"].get("isinteractive")
                         and action_info.get("ask_user_to_continue", False)
                         and not config["general"].get("ignore_config_warnings", False)

From 3bbaaae7264e3a9f4a8974565d864bfecf396e48 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 15 Jan 2024 12:27:28 +0100
Subject: [PATCH 70/98] Test, to remove maybe_resumbit.

---
 src/esm_runscripts/sim_objects.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index 8bf974b3f..f96ae1326 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -15,7 +15,7 @@
 
 import esm_parser
 
-#import pdb
+import pdb
 
 class SimulationSetup(object):
     def __init__(self, command_line_config=None, user_config=None):
@@ -46,6 +46,7 @@ def __init__(self, command_line_config=None, user_config=None):
         # sys.exit(0)
 
     def __call__(self, kill_after_submit=True):
+        breakpoint()
         # Trigger inspect functionalities
         if self.config["general"]["jobtype"] == "inspect":
             # esm_parser.pprint_config(self.config)
@@ -86,13 +87,16 @@ def __call__(self, kill_after_submit=True):
             ].replace("observe_", "")
             # that last line is necessary so that maybe_resubmit knows which
             # cluster to look up in the workflow
-            # because all cluster with batch_or_shell=sbatch will be called 
+            # because all cluster with batch_or_shell=sbatch will be called
             # esm_runscripts ... -t observe_<cluster> ...
 
         else:
+            # write .run file for all workflow phases.
+
+            # Is this dunction call needed here?
             self.assembler()
 
-        resubmit.maybe_resubmit(self.config)
+        #resubmit.maybe_resubmit(self.config)
 
         # if this line is reached, the run is submitted and running or finished
         self.config = logfiles.finalize_logfiles(self.config, org_jobtype)

From 09ac11b8ae97a3b6d03073f8dac088cc22e73051 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 23 Jan 2024 15:27:21 +0100
Subject: [PATCH 71/98] Test to remove jobtype setting in workflow.py

---
 src/esm_runscripts/resubmit.py    |  1 +
 src/esm_runscripts/sim_objects.py |  1 -
 src/esm_runscripts/workflow.py    | 16 ++++++++--------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py
index 717a0416a..5046372c1 100644
--- a/src/esm_runscripts/resubmit.py
+++ b/src/esm_runscripts/resubmit.py
@@ -5,6 +5,7 @@
 from . import chunky_parts
 from . import workflow
 
+import pdb
 
 def submit(config):
     """
diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index f96ae1326..cc31591ef 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -46,7 +46,6 @@ def __init__(self, command_line_config=None, user_config=None):
         # sys.exit(0)
 
     def __call__(self, kill_after_submit=True):
-        breakpoint()
         # Trigger inspect functionalities
         if self.config["general"]["jobtype"] == "inspect":
             # esm_parser.pprint_config(self.config)
diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 7159995c3..9cb31e190 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,7 +1,7 @@
 import copy
 import esm_parser
 
-import pygraphviz as pgv
+#import pygraphviz as pgv
 import pdb
 
 
@@ -789,13 +789,13 @@ def assemble_workflow(config):
     # Set "jobtype" for the first task???
     # NOTE: This is either first default phase or
     #       newrun??? Can't this not be set in prepend_newrun then?
-    if config["general"]["jobtype"] == "unknown":
-        config["general"]["command_line_config"]["jobtype"] = config["general"][
-            "workflow"
-        ]["first_task_in_queue"]
-        config["general"]["jobtype"] = config["general"]["workflow"][
-            "first_task_in_queue"
-        ]
+#    if config["general"]["jobtype"] == "unknown":
+#        config["general"]["command_line_config"]["jobtype"] = config["general"][
+#            "workflow"
+#        ]["first_task_in_queue"]
+#        config["general"]["jobtype"] = config["general"]["workflow"][
+#            "first_task_in_queue"
+#        ]
 
     return config
 

From ad156750b924f13b270181bf868aaa1c07a16b3c Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Thu, 1 Feb 2024 15:19:10 +0100
Subject: [PATCH 72/98] comment and clean esm_software/esm_runscripts config
 files

---
 .../esm_software/esm_runscripts/defaults.yaml |  2 ++
 .../esm_runscripts/esm_plugins.yaml           | 33 ++++++++-----------
 .../esm_runscripts/esm_runscripts.yaml        | 30 +++++------------
 3 files changed, 24 insertions(+), 41 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 43b918aa7..594cc20ec 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -1,4 +1,6 @@
+# ESM-Runscripts defaults
 
+# Defaults to be added to each model or component
 per_model_defaults:
         file_movements:
                 default:
diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml
index e80644882..785a43f7d 100644
--- a/configs/esm_software/esm_runscripts/esm_plugins.yaml
+++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml
@@ -1,3 +1,14 @@
+# Mappings of functions/methods to parent ESM python libraries:
+# tells ESM-Tools in which library and file/sublibrary can find the functions
+# of the recipies
+# (prescribed in configs/esm_software/esm_runscripts/esm_runscripts.yaml).
+#
+# Core (not an external plugin)
+#       Library Sublibrary/file     Function/method
+#       esm_runscripts
+#               prepare
+#                       - "_read_date_file"
+
 core:
         esm_runscripts:
                 prepare:
@@ -24,7 +35,7 @@ core:
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
-                        
+
 
 
                 prepcompute:
@@ -53,9 +64,8 @@ core:
                         - "throw_away_some_infiles"
 
 
-                observe:          
+                observe:
                         - "init_monitor_file"
-                          #- "get_last_jobid"
                         - "wait_and_observe"
                         - "wake_up_call"
 
@@ -82,7 +92,6 @@ core:
                         - "database_entry"
 
                 batch_system:
-                        #- "calculate_requirements"
                         - "write_simple_runscript"
                         - "write_env"
                         - "find_openmp"
@@ -101,19 +110,3 @@ core:
                 workflow:
                         - "assemble_workflow"
 
-
-
-# To add your custom plugin, see oifs.yaml, section compute_recipe, and compare to the list above.
-# the custom plugin 'preprocess' listed in the compute_recipe in oifs.yaml needs to be installed beforehand with
-#
-# git clone https://github.com/esm-tools-plugins/preprocess
-# cd preprocess
-# if required activate python3 e.g.
-# module load anaconda3/bleeding_edge on
-# pip install --user
-#
-# now check if the plugin is available
-# esm_plugins should print
-# The following plugins are installed and available:
-#  - preprocess
-#
diff --git a/configs/esm_software/esm_runscripts/esm_runscripts.yaml b/configs/esm_software/esm_runscripts/esm_runscripts.yaml
index 7d4b022be..7973ba4f8 100644
--- a/configs/esm_software/esm_runscripts/esm_runscripts.yaml
+++ b/configs/esm_software/esm_runscripts/esm_runscripts.yaml
@@ -1,3 +1,8 @@
+# Default recipies
+# ESM-Tools uses config/esm_software/esm_runscripts/esm_plugins.yaml to understand
+# where to look for each of this function/methods (the steps within each recipy, e.g.
+# "_read_date_file").
+
 choose_job_type:
 
         #postprocess:
@@ -7,21 +12,17 @@ choose_job_type:
         prepare:
                 recipe:
                         - "_read_date_file"
-                        #- "setup_correct_chunk_config"
                         - "_update_run_in_chunk"
                         - "check_model_lresume"
-
-# kh 01.12.20 this must be done before resolve_some_choose_blocks, because potentially also choosable
-# entries can be overridden via usermods.yaml (e.g. computer.useMPI: intel18_bullxmpi)
+                        # This must be done before resolve_some_choose_blocks, because
+                        # potentially also choosable entries can be overridden via
+                        # usermods.yaml (e.g. computer.useMPI: intel18_bullxmpi)
                         - "apply_last_minute_changes"
                         - "find_openmp"
                         - "resolve_some_choose_blocks"
                         - "_initialize_calendar"
                         - "set_chunk_calendar"
                         - "resolve_some_choose_blocks"
-
-# kh 01.12.20 moved up a few positions
-#                       - "apply_last_minute_changes"
                         - "_add_all_folders"
                         - "set_prev_date"
                         - "set_parent_info"
@@ -48,15 +49,12 @@ choose_job_type:
                         - "copy_tools_to_thisrun"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
 
-
         observe:
                 recipe:
                         - "init_monitor_file"
-                          #- "get_last_jobid"
                         - "wait_and_observe"
                         - "wake_up_call"
 
-
         tidy:
                 recipe:
                         - "tidy_coupler"
@@ -66,14 +64,7 @@ choose_job_type:
                         - "throw_away_some_infiles"
                         - "copy_stuff_back_from_work"
                         - "copy_all_results_to_exp"
-                          #- "_update_chunk_date_file"
                         - "clean_run_dir"
-                          #- "start_post_job"
-                          #- "signal_tidy_completion"
-                          #- "_increment_date_and_run_number"
-                          #- "_write_date_file"
-                          #- "maybe_resubmit"
-
 
         prepcompute:
                 recipe:
@@ -82,21 +73,17 @@ choose_job_type:
                         - "create_new_files"
                         - "create_empty_folders"
                         - "prepare_coupler_files"
-                          #- "calculate_requirements"
-                          #- "add_batch_hostfile"
                         - "assemble"
                         - "log_used_files"
                         - "copy_files_to_thisrun"
                         - "modify_namelists"
                         - "modify_files"
                         - "copy_files_to_work"
-                          #- "write_simple_runscript"
                         - "report_missing_files"
                         #- "add_vcs_info"
                         #- "check_vcs_info_against_last_run"
                         - "_write_finalized_config"
                         - "database_entry"
-                          #- "submit"
 
         inspect:
                 recipe:
@@ -108,3 +95,4 @@ choose_job_type:
                         - "inspect_config"
                         - "inspect_folder"
                         - "inspect_file"
+

From cb6e666106c0b0327e19b3cbb8655d2531b37c22 Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Thu, 1 Feb 2024 19:02:54 +0100
Subject: [PATCH 73/98] paetially refactoring of SimulationSetup.__init__ and
 some of the function of config_initialization.py

---
 src/esm_runscripts/cli.py                   |  2 +-
 src/esm_runscripts/config_initialization.py | 96 +++++++++++++++------
 src/esm_runscripts/prepare.py               |  2 +-
 src/esm_runscripts/sim_objects.py           | 12 ++-
 4 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/src/esm_runscripts/cli.py b/src/esm_runscripts/cli.py
index 4498c92f6..081bba182 100644
--- a/src/esm_runscripts/cli.py
+++ b/src/esm_runscripts/cli.py
@@ -278,7 +278,7 @@ def main():
         logger.debug(f"starting (jobtype): {jobtype}")
         logger.debug(command_line_config)
 
-    Setup = SimulationSetup(command_line_config)
+    Setup = SimulationSetup(command_line_config=command_line_config)
     # if not Setup.config['general']['submitted']:
     if not Setup.config["general"]["submitted"] and not no_motd:
         check_all_esm_packages()
diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py
index d037fbd1f..7d02b4580 100644
--- a/src/esm_runscripts/config_initialization.py
+++ b/src/esm_runscripts/config_initialization.py
@@ -8,10 +8,7 @@
 from . import chunky_parts
 
 
-def init_first_user_config(command_line_config, user_config):
-
-    if not user_config:
-        user_config = get_user_config_from_command_line(command_line_config)
+def init_iterative_coupling(command_line_config, user_config):
 
     # maybe switch to another runscript, if iterative coupling
     user_config["general"]["iterative_coupled_model"] = ""
@@ -84,13 +81,40 @@ def save_command_line_config(config, command_line_config):
 
 
 def get_user_config_from_command_line(command_line_config):
+    """
+    Reads the runscript provided in ``command_line_config`` and overwirtes the
+    information of the runscript with that of the command line (command line wins
+    over the runscript.
+
+    Input
+    -----
+    command_line_config : dict
+        Dictionary containing the information coming from the command line
+
+    Returns
+    -------
+    user_config : dict, DictWithProvenance
+        Dictionary containing the information from the command line on top of the
+        runscript's
+
+    Raises
+    ------
+    Syntaxerror : esm_parser.user_error
+        If there is a problem with the parsing of the runscript
+    """
+
+    # Default user_config
+    user_config = {
+        "general": {
+            "additional_files": [],
+        },
+    }
+
+    # Read the content of the runscrip
     try:
-        # use the full absolute path instead of CWD
-        user_config = esm_parser.initialize_from_yaml(
-            command_line_config["runscript_abspath"]
+        user_config.update(
+            esm_parser.initialize_from_yaml(command_line_config["runscript_abspath"])
         )
-        if "additional_files" not in user_config["general"]:
-            user_config["general"]["additional_files"] = []
     # If sys.exit is triggered through esm_parser.user_error (i.e. from
     # ``check_for_empty_components`` in ``yaml_to_dict.py``) catch the sys.exit.
     except SystemExit as sysexit:
@@ -101,23 +125,45 @@ def get_user_config_from_command_line(command_line_config):
             f"An error occurred while reading the config file "
             f"``{command_line_config['runscript_abspath']}`` from the command line.")
 
-    # NOTE(PG): I really really don't like this. But I also don't want to
-    # re-introduce black/white lists
-    #
-    # User config wins over command line:
-    # -----------------------------------
-    # Update all **except** for use_venv if it was supplied in the
-    # runscript:
-    deupdate_use_venv = False
-    if "use_venv" in user_config["general"]:
-        user_use_venv = user_config["general"]["use_venv"]
-        deupdate_use_venv = True
     user_config["general"].update(command_line_config)
-    if deupdate_use_venv:
-        user_config["general"]["use_venv"] = user_use_venv
-    user_config["general"]["isinteractive"] = command_line_config.get(
-        "last_jobtype", ""
-    )=="command_line"
+
+    return user_config
+
+
+def init_interactive_info(command_line_config, user_config):
+    """
+    Initialize key-values to evaluate at any point whether interactive functions are to
+    be run (e.g. questionaries, warnings, etc.). The following key-values are set within
+    ``user_config["general"]``:
+    - ``isinteractive``: ``True`` if this function is trigger by a command line
+            execution
+    - ``isresubmitted``: ``True`` if the ``last_jobtype`` is the same as the current
+            ``jobtype`` (after the user triggers ``esm_runscripts`` there is a first
+            step of preparing the experiment folder and then it resubmit it itself from
+            the experiment folder; most questionaries need to be run in this second step
+            ``isresubmitted`` because only then the updated information via the
+            questionaries plays a role in the simulation).
+
+    Input
+    -----
+    command_line_config : dict
+        Dictionary containing the information coming from the command line
+    user_config : dict
+        Dictionary containing the information from the command line on top of the
+        runscript's
+
+    Returns
+    -------
+    user_config : Dict
+        Same as the input ``user_config`` but with the interactive variables
+    """
+    last_jobtype = command_line_config.get("last_jobtype", "")
+    isinteractive = last_jobtype == "command_line"
+    isresubmitted = last_jobtype == user_config["general"]["jobtype"]
+
+    user_config["general"]["isinteractive"] = isinteractive
+    user_config["general"]["isresubmitted"] = isresubmitted
+
     return user_config
 
 
diff --git a/src/esm_runscripts/prepare.py b/src/esm_runscripts/prepare.py
index ad5e13aaf..c8007421c 100644
--- a/src/esm_runscripts/prepare.py
+++ b/src/esm_runscripts/prepare.py
@@ -869,7 +869,7 @@ def check_config_for_warnings_errors(config):
 
     # Find conditions to warn (avoid warning more than once)
     last_jobtype = config["general"].get("last_jobtype", "")
-    isresubmitted = last_jobtype == config["general"]["jobtype"]
+    isresubmitted = config["general"].get("isresubmitted", "")
     isinteractive = config["general"].get("isinteractive", "")
 
     # Only warn if it is an interactive session or while submitted
diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index a9353d41d..fa2449708 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -24,7 +24,17 @@ def __init__(self, command_line_config=None, user_config=None):
                 "SimulationSetup needs to be initialized with either command_line_config or user_config."
             )
 
-        user_config = config_initialization.init_first_user_config(
+        # Initialize user_config using the command line arguments and the given runscript
+        if not user_config:
+            user_config = config_initialization.get_user_config_from_command_line(
+                command_line_config
+            )
+
+        # Initialize information about interactive sessions
+        user_config = config_initialization.init_interactive_info(command_line_config, user_config)
+
+        # Initialize iterative coupling information
+        user_config = config_initialization.init_iterative_coupling(
             command_line_config, user_config
         )
 

From b509de70fd145d5a0488f41053d7a3cf2bbbc411 Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Thu, 1 Feb 2024 20:34:02 +0100
Subject: [PATCH 74/98] restructure SimulationSetup.__init__: 1) the functions
 there do not call other functions (i.e. by looking at
 SimulationSetup.__init__ it is now clear what are the steps 2) redefine each
 steps in config_initialization.py so that each step has a single and well
 defined used and 3) collect all defaults variables defined in the functions
 from config_initialization.py and included them into a new general section in
 the configs/esm_software/esm_runscripts/defaults.yaml

---
 .../esm_software/esm_runscripts/defaults.yaml |  7 ++++
 src/esm_runscripts/config_initialization.py   | 40 ++++++-------------
 src/esm_runscripts/sim_objects.py             | 23 +++++++++--
 3 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 594cc20ec..0a23ca5fa 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -1,5 +1,12 @@
 # ESM-Runscripts defaults
 
+# Defaults added to the general section
+general:
+    additional_files: []
+    iterative_coupled_model: ""
+    reset_calendar_to_last: False
+    verbose: False
+
 # Defaults to be added to each model or component
 per_model_defaults:
         file_movements:
diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py
index 7d02b4580..23219c355 100644
--- a/src/esm_runscripts/config_initialization.py
+++ b/src/esm_runscripts/config_initialization.py
@@ -11,7 +11,6 @@
 def init_iterative_coupling(command_line_config, user_config):
 
     # maybe switch to another runscript, if iterative coupling
-    user_config["general"]["iterative_coupled_model"] = ""
     if user_config["general"].get("iterative_coupling", False):
         user_config = chunky_parts.setup_correct_chunk_config(user_config)
 
@@ -50,23 +49,19 @@ def init_iterative_coupling(command_line_config, user_config):
     return user_config
 
 
-def complete_config_from_user_config(user_config):
-    config = get_total_config_from_user_config(user_config)
+def complete_config_with_inspect(config):
 
-    if "verbose" not in config["general"]:
-        config["general"]["verbose"] = False
+    general = config["general"]
 
-    config["general"]["reset_calendar_to_last"] = False
+    if general.get("inspect"):
+        general["jobtype"] = "inspect"
 
-    if config["general"].get("inspect"):
-        config["general"]["jobtype"] = "inspect"
-
-        if config["general"].get("inspect") not in [
+        if general.get("inspect") not in [
             "workflow",
             "overview",
             "config",
         ]:
-            config["general"]["reset_calendar_to_last"] = True
+            general["reset_calendar_to_last"] = True
 
     return config
 
@@ -103,17 +98,10 @@ def get_user_config_from_command_line(command_line_config):
         If there is a problem with the parsing of the runscript
     """
 
-    # Default user_config
-    user_config = {
-        "general": {
-            "additional_files": [],
-        },
-    }
-
     # Read the content of the runscrip
     try:
-        user_config.update(
-            esm_parser.initialize_from_yaml(command_line_config["runscript_abspath"])
+        user_config = esm_parser.initialize_from_yaml(
+            command_line_config["runscript_abspath"]
         )
     # If sys.exit is triggered through esm_parser.user_error (i.e. from
     # ``check_for_empty_components`` in ``yaml_to_dict.py``) catch the sys.exit.
@@ -185,13 +173,16 @@ def get_total_config_from_user_config(user_config):
         user_config,
     )
 
-    config = add_esm_runscripts_defaults_to_config(config)
-
     config["computer"]["jobtype"] = config["general"]["jobtype"]
     config["general"]["experiment_dir"] = (
         config["general"]["base_dir"] + "/" + config["general"]["expid"]
     )
 
+    return config
+
+
+def check_account(config):
+
     # Check if the 'account' variable is needed and missing
     if config["computer"].get("accounting", False):
         if "account" not in config["general"]:
@@ -211,12 +202,7 @@ def add_esm_runscripts_defaults_to_config(config):
     path_to_file = esm_tools.get_config_filepath() + "/esm_software/esm_runscripts/defaults.yaml"
     default_config = esm_parser.yaml_file_to_dict(path_to_file)
     config["general"]["defaults.yaml"] = default_config
-    config = distribute_per_model_defaults(config)
-    return config
-
 
-def distribute_per_model_defaults(config):
-    default_config = config["general"]["defaults.yaml"]
     if "general" in default_config:
         config["general"] = esm_parser.new_deep_update(
             config["general"], default_config["general"]
diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index fa2449708..cc5359f8b 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -38,17 +38,34 @@ def __init__(self, command_line_config=None, user_config=None):
             command_line_config, user_config
         )
 
-        self.config = config_initialization.complete_config_from_user_config(
+        # Load total config from all the configuration files involved in this simulation
+        self.config = config_initialization.get_total_config_from_user_config(
             user_config
         )
 
+        # Complete missing key-values with the defaults defined in
+        # ``configs/esm_software/esm_runscripts/defaults.yaml``
+        self.config = config_initialization.add_esm_runscripts_defaults_to_config(
+            self.config
+        )
+
+        # Check if the ``account`` is missing in ``general``
+        self.config = config_initialization.check_account(self.config)
+
+        # Complete information for inspect
+        self.config = config_initialization.complete_config_with_inspect(
+            self.config
+        )
+
+        # Save the ``command_line_config`` in ``general``
         self.config = config_initialization.save_command_line_config(
             self.config, command_line_config
         )
 
-        # self.config = workflow.assemble(self.config)
-
+        # Initialize the ``prev_run`` object
         self.config["prev_run"] = prev_run.PrevRunInfo(self.config)
+
+        # Run ``prepare`` recipe
         self.config = prepare.run_job(self.config)
 
         # esm_parser.pprint_config(self.config)

From dfff3ebba51c4f9f1cdb8741bc24428557f3920e Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Thu, 1 Feb 2024 21:47:28 +0100
Subject: [PATCH 75/98] fix a bug in init_interactive_info that was afecting
 esm_master

---
 src/esm_runscripts/config_initialization.py | 26 +++++++++++----------
 src/esm_runscripts/sim_objects.py           |  2 +-
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py
index 23219c355..985910b60 100644
--- a/src/esm_runscripts/config_initialization.py
+++ b/src/esm_runscripts/config_initialization.py
@@ -118,11 +118,11 @@ def get_user_config_from_command_line(command_line_config):
     return user_config
 
 
-def init_interactive_info(command_line_config, user_config):
+def init_interactive_info(config, command_line_config):
     """
     Initialize key-values to evaluate at any point whether interactive functions are to
     be run (e.g. questionaries, warnings, etc.). The following key-values are set within
-    ``user_config["general"]``:
+    ``config["general"]``:
     - ``isinteractive``: ``True`` if this function is trigger by a command line
             execution
     - ``isresubmitted``: ``True`` if the ``last_jobtype`` is the same as the current
@@ -136,23 +136,25 @@ def init_interactive_info(command_line_config, user_config):
     -----
     command_line_config : dict
         Dictionary containing the information coming from the command line
-    user_config : dict
-        Dictionary containing the information from the command line on top of the
-        runscript's
+    config : dict
+        Dictionary containing the simulation configuration
 
     Returns
     -------
-    user_config : Dict
-        Same as the input ``user_config`` but with the interactive variables
+    config : dict
+        Same as the input ``config`` but with the interactive variables
     """
-    last_jobtype = command_line_config.get("last_jobtype", "")
+    if command_line_config:
+        last_jobtype = command_line_config.get("last_jobtype", "")
+    else:
+        last_jobtype = ""
     isinteractive = last_jobtype == "command_line"
-    isresubmitted = last_jobtype == user_config["general"]["jobtype"]
+    isresubmitted = last_jobtype == config["general"]["jobtype"]
 
-    user_config["general"]["isinteractive"] = isinteractive
-    user_config["general"]["isresubmitted"] = isresubmitted
+    config["general"]["isinteractive"] = isinteractive
+    config["general"]["isresubmitted"] = isresubmitted
 
-    return user_config
+    return config
 
 
 def get_total_config_from_user_config(user_config):
diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index cc5359f8b..9861223f3 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -31,7 +31,7 @@ def __init__(self, command_line_config=None, user_config=None):
             )
 
         # Initialize information about interactive sessions
-        user_config = config_initialization.init_interactive_info(command_line_config, user_config)
+        user_config = config_initialization.init_interactive_info(user_config, command_line_config)
 
         # Initialize iterative coupling information
         user_config = config_initialization.init_iterative_coupling(

From 49c43e38c620357802e0998af0e1afd073bb470b Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 2 Feb 2024 08:57:31 +0100
Subject: [PATCH 76/98] Added optional argument to _write_finalized_config.

---
 src/esm_runscripts/prepcompute.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/esm_runscripts/prepcompute.py b/src/esm_runscripts/prepcompute.py
index 0aca0d487..0197cc75f 100644
--- a/src/esm_runscripts/prepcompute.py
+++ b/src/esm_runscripts/prepcompute.py
@@ -252,7 +252,7 @@ def copy_files_to_work(config):
     return config
 
 
-def _write_finalized_config(config):
+def _write_finalized_config(config, config_file_path=None):
     """Writes <expid>_finished_config.yaml file
     Parameters
     ----------
@@ -319,10 +319,11 @@ class EsmConfigDumper(yaml.dumper.Dumper):
     thisrun_config_dir = config["general"]["thisrun_config_dir"]
     expid = config["general"]["expid"]
     it_coupled_model_name = config["general"]["iterative_coupled_model"]
-    config_file_path = (
-        f"{thisrun_config_dir}/"
-        f"{expid}_{it_coupled_model_name}finished_config.yaml"
-    )
+    if not config_file_path:
+        config_file_path = (
+            f"{thisrun_config_dir}/"
+            f"{expid}_{it_coupled_model_name}finished_config.yaml"
+        )
     with open(config_file_path, "w") as config_file:
         # Avoid saving ``prev_run`` information in the config file
         config_final = copy.deepcopy(config)  # PrevRunInfo

From 3456dd65f3693773eb0ae6dfa487cc181bc610a6 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 2 Feb 2024 10:54:43 +0100
Subject: [PATCH 77/98] Add docstring

---
 src/esm_runscripts/prepcompute.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/esm_runscripts/prepcompute.py b/src/esm_runscripts/prepcompute.py
index 0197cc75f..73f87792f 100644
--- a/src/esm_runscripts/prepcompute.py
+++ b/src/esm_runscripts/prepcompute.py
@@ -253,11 +253,24 @@ def copy_files_to_work(config):
 
 
 def _write_finalized_config(config, config_file_path=None):
-    """Writes <expid>_finished_config.yaml file
-    Parameters
-    ----------
-    config : esm-tools config object
     """
+    Writes <expid>_finished_config.yaml file
+
+    Input
+    -----
+    config : dict
+        esm-tools config object
+    config_file_path : string
+        Optional file path and name where the content of config is to be stored.
+        Default is None. If not given (default) the path will be set depending on
+        settings in config and the file name is <expid>_finished_config.yaml.
+
+    Returns
+    -------
+    config : dict
+
+    """
+
     # first define the representers for the non-built-in types, as recommended
     # here: https://pyyaml.org/wiki/PyYAMLDocumentation
     def date_representer(dumper, date):

From acfe195620692b1d9a495cdc6e15e3c9decf78da Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Fri, 2 Feb 2024 15:47:13 +0100
Subject: [PATCH 78/98] add docstrings to all functions in
 config_initialization.py and to SimulationSetup in sim_objects.py

---
 src/esm_runscripts/config_initialization.py | 109 +++++++++++++++++++-
 src/esm_runscripts/sim_objects.py           |  67 +++++++++---
 2 files changed, 159 insertions(+), 17 deletions(-)

diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py
index 985910b60..05e40c649 100644
--- a/src/esm_runscripts/config_initialization.py
+++ b/src/esm_runscripts/config_initialization.py
@@ -9,6 +9,23 @@
 
 
 def init_iterative_coupling(command_line_config, user_config):
+    """
+    Completes information for the interactive coupling (offline coupling) in the
+    ``user_config`` if this simulation is indeed a interactive coupling.
+
+    Input
+    -----
+    command_line_config : dict
+        Dictionary containing the information coming from the command line
+    user_config : dict, esm_parser.ConfigSetup
+        Dictionary containing the basic user information
+
+    Returns
+    -------
+    user_config : dict, esm_parser.ConfigSetup
+        Dictionary containing the basic user information and the additional processed
+        information needed for offline coupling simulations
+    """
 
     # maybe switch to another runscript, if iterative coupling
     if user_config["general"].get("iterative_coupling", False):
@@ -50,6 +67,20 @@ def init_iterative_coupling(command_line_config, user_config):
 
 
 def complete_config_with_inspect(config):
+    """
+    Completes information for ``inspect`` jobs.
+
+    Input
+    -----
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation
+
+    Returns
+    -------
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation and the
+        ``inspect`` information
+    """
 
     general = config["general"]
 
@@ -67,6 +98,22 @@ def complete_config_with_inspect(config):
 
 
 def save_command_line_config(config, command_line_config):
+    """
+    Store the config coming from the command line in the ``config``.
+
+    Input
+    -----
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation
+    command_line_config : dict
+        Dictionary containing the information coming from the command line
+
+    Returns
+    -------
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation and the
+        ``command_line_config`` stored in the ``general`` section
+    """
     if command_line_config:
         config["general"]["command_line_config"] = command_line_config
     else:
@@ -79,7 +126,7 @@ def get_user_config_from_command_line(command_line_config):
     """
     Reads the runscript provided in ``command_line_config`` and overwirtes the
     information of the runscript with that of the command line (command line wins
-    over the runscript.
+    over the runscript).
 
     Input
     -----
@@ -88,7 +135,7 @@ def get_user_config_from_command_line(command_line_config):
 
     Returns
     -------
-    user_config : dict, DictWithProvenance
+    user_config : dict, esm_parser.ConfigSetup
         Dictionary containing the information from the command line on top of the
         runscript's
 
@@ -136,12 +183,12 @@ def init_interactive_info(config, command_line_config):
     -----
     command_line_config : dict
         Dictionary containing the information coming from the command line
-    config : dict
+    config : dict, esm_parser.ConfigSetup
         Dictionary containing the simulation configuration
 
     Returns
     -------
-    config : dict
+    config : dict, esm_parser.ConfigSetup
         Same as the input ``config`` but with the interactive variables
     """
     if command_line_config:
@@ -158,7 +205,21 @@ def init_interactive_info(config, command_line_config):
 
 
 def get_total_config_from_user_config(user_config):
+    """
+    Finds the version of the setup in ``user_config`` instanciates the ``config`` with
+    ``esm_parser.ConfigSetup`` which appends all the information from the config files
+    required for this simulation and stores it in ``config``.
+
+    Input
+    -----
+    user_config : dict, esm_parser.ConfigSetup
+        Dictionary containing the basic user information
 
+    Returns
+    -------
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation
+    """
     if "version" in user_config["general"]:
         version = str(user_config["general"]["version"])
     else:
@@ -184,6 +245,26 @@ def get_total_config_from_user_config(user_config):
 
 
 def check_account(config):
+    """
+    Checks whether the user has **not** defined a job scheduling account (e.g. slurm)
+    ``config["general"]["account"]`` while the machine requires it for running jobs, and
+    in that case reports an error.
+
+    Input
+    -----
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation
+
+    Returns
+    -------
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation
+
+    Raises
+    ------
+    Missing account info : esm_parser.user_error
+        If the system requires a job scheduler account but none was provided by the user
+    """
 
     # Check if the 'account' variable is needed and missing
     if config["computer"].get("accounting", False):
@@ -201,6 +282,25 @@ def check_account(config):
 
 
 def add_esm_runscripts_defaults_to_config(config):
+    """
+    Add the defaults defined in ``configs/esm_software/esm_runscripts/defaults.yaml`` to
+    the ``config``, if those key-values do not exist yet. The ``keys`` supported in that
+    file are:
+    - ``general``: to be assigned to the ``general`` section of the ``config``
+    - ``per_model_defaults``: to be added to each component/model section of the
+      ``config``
+
+    Input
+    -----
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation
+
+    Returns
+    -------
+    config : dict, esm_parser.ConfigSetup
+        ConfigSetup object containing the information of the current simulation and the
+        defaults
+    """
     path_to_file = esm_tools.get_config_filepath() + "/esm_software/esm_runscripts/defaults.yaml"
     default_config = esm_parser.yaml_file_to_dict(path_to_file)
     config["general"]["defaults.yaml"] = default_config
@@ -222,4 +322,5 @@ def add_esm_runscripts_defaults_to_config(config):
             config[model] = esm_parser.new_deep_update(
                 config[model], per_model_defaults
             )
+
     return config
diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index 9861223f3..41c8be5e3 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -17,55 +17,96 @@
 
 
 class SimulationSetup(object):
-    def __init__(self, command_line_config=None, user_config=None):
 
+    def __init__(self, command_line_config=None, user_config=None):
+        """
+        Initializes the ``SimulationSetup`` object, and prepares the ``self.config`` by
+        taking the information from the ``command_line_config`` and/or the
+        ``user_config`` and expanding it with the configuration files from `ESM-Tools`
+        (in `esm_tools/configs`), and then running the ``prepare`` recipe. In essence,
+        ``__init__`` takes care of loading and baking all the config information,
+        resolving the ``chooses``, ``add_``, etc. It is used by ``esm_runscripts`` and
+        ``esm_master``. Below, a more detailed description of the steps of the
+        ``__init__``:
+
+        1. Check that at least one input is given
+        2. Initialize user_config (command line arguments + content of the runscript)
+        3. Initialize information about interactive sessions
+        4. Initialize interactive coupling information (offline coupling)
+        5. Load total config from all the configuration files involved in this
+           simulation. Input: user_config -> returns: self.config
+        6. Add the defaults in ``configs/esm_software/esm_runscripts/defaults.yaml``
+           to missing key-values in self.config
+        7. Check if the ``account`` is missing in ``general``
+        8. Complete information for inspect
+        9. Store the ``command_line_config`` in ``general``
+        10. Initialize the ``prev_run`` object
+        11. Run ``prepare`` recipe (resolve the `ESM-Tools` syntax)
+
+        Input
+        -----
+        command_line_config : dict
+            Dictionary containing the information coming from the command line
+        user_config : dict, DictWithProvenance
+            Dictionary containing the basic user information. Is only an input in
+            ``esm_master``, not in ``esm_runscripts`` (i.e. ``esm_master`` does not need
+            to read a runscript)
+
+        Raises
+        ------
+        ValueError :
+            If neither ``command_line_config`` nor ``user_config`` are defined
+        """
+        # 1. Check that at least one input is given
         if not command_line_config and not user_config:
             raise ValueError(
-                "SimulationSetup needs to be initialized with either command_line_config or user_config."
+                "SimulationSetup needs to be initialized with either "
+                "command_line_config or user_config."
             )
 
-        # Initialize user_config using the command line arguments and the given runscript
+        # 2. Initialize user_config (command line arguments + content of the runscript)
         if not user_config:
             user_config = config_initialization.get_user_config_from_command_line(
                 command_line_config
             )
 
-        # Initialize information about interactive sessions
+        # 3. Initialize information about interactive sessions
         user_config = config_initialization.init_interactive_info(user_config, command_line_config)
 
-        # Initialize iterative coupling information
+        # 4. Initialize iterative coupling information (offline coupling)
         user_config = config_initialization.init_iterative_coupling(
             command_line_config, user_config
         )
 
-        # Load total config from all the configuration files involved in this simulation
+        # 5. Load total config from all the configuration files involved in this
+        # simulation
         self.config = config_initialization.get_total_config_from_user_config(
             user_config
         )
 
-        # Complete missing key-values with the defaults defined in
-        # ``configs/esm_software/esm_runscripts/defaults.yaml``
+        # 6. Add the defaults in ``configs/esm_software/esm_runscripts/defaults.yaml``
+        # to missing key-values in self.config
         self.config = config_initialization.add_esm_runscripts_defaults_to_config(
             self.config
         )
 
-        # Check if the ``account`` is missing in ``general``
+        # 7. Check if the ``account`` is missing in ``general``
         self.config = config_initialization.check_account(self.config)
 
-        # Complete information for inspect
+        # 8. Complete information for inspect
         self.config = config_initialization.complete_config_with_inspect(
             self.config
         )
 
-        # Save the ``command_line_config`` in ``general``
+        # 9. Store the ``command_line_config`` in ``general``
         self.config = config_initialization.save_command_line_config(
             self.config, command_line_config
         )
 
-        # Initialize the ``prev_run`` object
+        # 10. Initialize the ``prev_run`` object
         self.config["prev_run"] = prev_run.PrevRunInfo(self.config)
 
-        # Run ``prepare`` recipe
+        # 11. Run ``prepare`` recipe (resolve the `ESM-Tools` syntax)
         self.config = prepare.run_job(self.config)
 
         # esm_parser.pprint_config(self.config)

From ddf643ab90a291f25382ef3bd0c744685fc3dcd7 Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Fri, 2 Feb 2024 15:52:10 +0100
Subject: [PATCH 79/98] black formating of config_initialization.py and remove
 strings concatenations with +

---
 src/esm_runscripts/config_initialization.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py
index 05e40c649..9dd2071a0 100644
--- a/src/esm_runscripts/config_initialization.py
+++ b/src/esm_runscripts/config_initialization.py
@@ -54,9 +54,9 @@ def init_iterative_coupling(command_line_config, user_config):
         # Set the ``iterative_coupled_model`` string, to add the model name to the
         # run_ folder, finished_config.yaml, etc., to avoid overwritting with the
         # files of other offline coupled models
-        user_config["general"]["iterative_coupled_model"] = (
-            f"{user_config['general']['setup_name']}_"
-        )
+        user_config["general"][
+            "iterative_coupled_model"
+        ] = f"{user_config['general']['setup_name']}_"
         # Extract information about the models run in the previous chunk
         chunky_parts.prev_chunk_info(user_config)
 
@@ -158,7 +158,8 @@ def get_user_config_from_command_line(command_line_config):
         esm_parser.user_error(
             "Syntax error",
             f"An error occurred while reading the config file "
-            f"``{command_line_config['runscript_abspath']}`` from the command line.")
+            f"``{command_line_config['runscript_abspath']}`` from the command line.",
+        )
 
     user_config["general"].update(command_line_config)
 
@@ -237,9 +238,9 @@ def get_total_config_from_user_config(user_config):
     )
 
     config["computer"]["jobtype"] = config["general"]["jobtype"]
-    config["general"]["experiment_dir"] = (
-        config["general"]["base_dir"] + "/" + config["general"]["expid"]
-    )
+    config["general"][
+        "experiment_dir"
+    ] = f"{config['general']['base_dir']}/{config['general']['expid']}"
 
     return config
 
@@ -301,7 +302,9 @@ def add_esm_runscripts_defaults_to_config(config):
         ConfigSetup object containing the information of the current simulation and the
         defaults
     """
-    path_to_file = esm_tools.get_config_filepath() + "/esm_software/esm_runscripts/defaults.yaml"
+    path_to_file = (
+        f"{esm_tools.get_config_filepath()}/esm_software/esm_runscripts/defaults.yaml"
+    )
     default_config = esm_parser.yaml_file_to_dict(path_to_file)
     config["general"]["defaults.yaml"] = default_config
 

From 4257e41499505852eeb053f96e8bc3c87ffa3b95 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 2 Feb 2024 16:28:17 +0100
Subject: [PATCH 80/98] Put the esm_runscripts call via subprocess in its own
 prepexp revipe.

---
 .../esm_runscripts/esm_plugins.yaml           |  1 +
 .../esm_runscripts/esm_runscripts.yaml        |  1 +
 src/esm_runscripts/prepexp.py                 | 66 +++++++++++++------
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml
index 785a43f7d..35af8d2ec 100644
--- a/configs/esm_software/esm_runscripts/esm_plugins.yaml
+++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml
@@ -34,6 +34,7 @@ core:
                         - "_create_component_folders"
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
+                        - "call_esm_runscripts_internally"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
 
 
diff --git a/configs/esm_software/esm_runscripts/esm_runscripts.yaml b/configs/esm_software/esm_runscripts/esm_runscripts.yaml
index 7973ba4f8..e9177fd77 100644
--- a/configs/esm_software/esm_runscripts/esm_runscripts.yaml
+++ b/configs/esm_software/esm_runscripts/esm_runscripts.yaml
@@ -47,6 +47,7 @@ choose_job_type:
                         - "_create_component_folders"
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
+                        - "call_esm_runscripts_internally"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
 
         observe:
diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 8ac0359cd..b0fb9d80a 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -2,6 +2,7 @@
 import shutil
 import sys
 import pathlib
+import subprocess
 
 import questionary
 from colorama import Fore
@@ -13,6 +14,10 @@
 from .helpers import end_it_all, evaluate, write_to_log
 from loguru import logger
 
+from . import prepcompute
+
+import pdb
+
 
 def run_job(config):
     evaluate(config, "prepexp", "prepexp_recipe")
@@ -36,6 +41,7 @@ def copy_tools_to_thisrun(config):
     Copies the tools, namelists and runscripts to the experiment directory,
     making sure that they don't overwrite previously existing files unless
     the ``-U`` flag is used.
+
     Parameters
     ----------
     config : dict
@@ -101,20 +107,11 @@ def copy_tools_to_thisrun(config):
         # `killall esm_runscripts` might be required
         esm_parser.user_error(error_type, error_text)
 
-    # If ``fromdir`` and ``scriptsdir`` are the same, this is already a computing
-    # simulation which means we want to use the script in the experiment folder,
-    # so no copying is needed
-    if (fromdir == scriptsdir) and not gconfig["update"]:
-        if config["general"]["verbose"]:
-            print("Started from the experiment folder, continuing...")
-        return config
-    # Not computing but initialisation
-    else:
-        if not fromdir == scriptsdir:
-            if config["general"]["verbose"]:
-                print("Not started from experiment folder, restarting...")
-        else:
-            print("Tools were updated, restarting...")
+    # If ``fromdir`` and ``scriptsdir`` are the same (the same as ``isresubmitted=True``),
+    # this is already a computing simulation which means we want to use the script
+    # in the experiment folder, so no copying is needed.
+
+    if not gconfig["isresubmitted"]:
 
         # At this point, ``fromdir`` and ``scriptsdir`` are different. Update the
         # runscript if necessary
@@ -138,6 +135,34 @@ def copy_tools_to_thisrun(config):
         for tfile in gconfig["additional_files"]:
             update_runscript(fromdir, scriptsdir, tfile, gconfig, "additional file")
 
+    return config
+
+def call_esm_runscripts_internally(config):
+    """
+    Calls esm_runscripts in a subprocess call.
+
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
+
+    """
+
+    gconfig = config["general"]
+
+    # Return if called from the experiment
+    if gconfig["isresubmitted"] and not gconfig["update"]:
+        if config["general"]["verbose"]:
+            print("Started from the experiment folder, continuing...")
+        return config
+    # Not computing but initialisation
+    else:
+        if not gconfig["isresubmitted"]:
+            if config["general"]["verbose"]:
+                print("Not started from experiment folder, restarting...")
+        else:
+            print("Tools were updated, restarting...")
+        scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"])
         # remove the update option otherwise it will enter an infinite loop
         original_command = gconfig["original_command"]
         options_to_remove = [" -U ", " --update "]
@@ -159,26 +184,29 @@ def copy_tools_to_thisrun(config):
             new_command_list.append(command)
 
         new_command = " ".join(new_command_list)
-        restart_command = f"cd {scriptsdir}; esm_runscripts {new_command}"
+        restart_command = f"esm_runscripts {new_command}"
 
         # Add non-interaction flags
-        non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}"]
+        non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"]
         for ni_flag in non_interaction_flags:
             # prevent continuous addition of ``ni_flag``
             if ni_flag not in restart_command:
                 restart_command += f" {ni_flag} "
 
+        #prepcompute._write_finalized_config(config, '/albedo/work/user/nwieters/myrunscripts/config_after_prepexp.txt')
+
         if config["general"]["verbose"]:
             print(restart_command)
-        os.system(restart_command)
+
+        if os.path.exists(scriptsdir):
+            subprocess.check_call(restart_command.split(), cwd=scriptsdir)
 
         gconfig["profile"] = False
         end_it_all(config)
 
-
 def _create_folders(config, filetypes):
     """
-    Generates the experiment file tree. Foldres are created for every filetype
+    Generates the experiment file tree. Folders are created for every filetype
     except for "ignore".
     """
     for filetype in filetypes:

From 6f6633a30844669db2535043eb12e3c54bdf8ccd Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Fri, 2 Feb 2024 16:42:06 +0100
Subject: [PATCH 81/98] Small changes.

---
 src/esm_runscripts/prepexp.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index b0fb9d80a..12f8f7f25 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -112,9 +112,8 @@ def copy_tools_to_thisrun(config):
     # in the experiment folder, so no copying is needed.
 
     if not gconfig["isresubmitted"]:
-
-        # At this point, ``fromdir`` and ``scriptsdir`` are different. Update the
-        # runscript if necessary
+        # At this point, ``fromdir`` and ``scriptsdir`` are different (same as gconfig["isresubmitted"]=False).
+        # Update the runscript if necessary
         update_runscript(
             fromdir, scriptsdir, gconfig["scriptname"], gconfig, "runscript"
         )
@@ -157,12 +156,11 @@ def call_esm_runscripts_internally(config):
         return config
     # Not computing but initialisation
     else:
-        if not gconfig["isresubmitted"]:
-            if config["general"]["verbose"]:
-                print("Not started from experiment folder, restarting...")
-        else:
-            print("Tools were updated, restarting...")
+        if config["general"]["verbose"]:
+            print("Not started from experiment folder, restarting...")
+        
         scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"])
+
         # remove the update option otherwise it will enter an infinite loop
         original_command = gconfig["original_command"]
         options_to_remove = [" -U ", " --update "]
@@ -193,13 +191,12 @@ def call_esm_runscripts_internally(config):
             if ni_flag not in restart_command:
                 restart_command += f" {ni_flag} "
 
-        #prepcompute._write_finalized_config(config, '/albedo/work/user/nwieters/myrunscripts/config_after_prepexp.txt')
-
         if config["general"]["verbose"]:
             print(restart_command)
 
         if os.path.exists(scriptsdir):
             subprocess.check_call(restart_command.split(), cwd=scriptsdir)
+        # Todo: include exception if scriptsdir not found
 
         gconfig["profile"] = False
         end_it_all(config)

From bad0f350fab47caedf88a4943038d93f2118607f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 12 Feb 2024 13:44:25 +0100
Subject: [PATCH 82/98] Added docstrings and refactoring
 _copy_preliminary_files_from_experiment_to_thisrun function in prepexp.py

---
 src/esm_runscripts/prepexp.py | 81 +++++++++++++++++++++++++++++------
 1 file changed, 68 insertions(+), 13 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 12f8f7f25..5f2ea904a 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -15,16 +15,31 @@
 from loguru import logger
 
 from . import prepcompute
+from . import filelists
 
 import pdb
 
 
 def run_job(config):
+    """
+    Run prepexp job.
+
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
+    """
     evaluate(config, "prepexp", "prepexp_recipe")
     return config
 
 
 def color_diff(diff):
+    """
+
+    Parameters
+    ----------
+    diff : 
+    """
     for line in diff:
         if line.startswith("+"):
             yield Fore.GREEN + line + Fore.RESET
@@ -205,6 +220,13 @@ def _create_folders(config, filetypes):
     """
     Generates the experiment file tree. Folders are created for every filetype
     except for "ignore".
+
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
+    filetypes: list
+
     """
     for filetype in filetypes:
         if not filetype == "ignore":
@@ -222,6 +244,11 @@ def _create_setup_folders(config):
 
     This also creates a small marker file at the top of
     the experiment so that the "root" can be found from inside.
+
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
     """
     _create_folders(config["general"], config["general"]["all_filetypes"])
     with open(
@@ -232,6 +259,13 @@ def _create_setup_folders(config):
 
 
 def _create_component_folders(config):
+    """
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
+    """
+
     for component in config["general"]["valid_model_names"]:
         _create_folders(config[component], config["general"]["all_model_filetypes"])
     return config
@@ -254,12 +288,12 @@ def initialize_experiment_logfile(config):
 
     Parameters
     ----------
-    dict :
+    config : dict
         The experiment configuration
 
     Return
     ------
-    dict :
+    config : dict
         As per convention for the plug-in system; this gives back the
         entire config.
 
@@ -312,6 +346,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
     ``esm_runscripts``. If that flag is not used and the source and target are different
     then raises a user-friendly error recommending to use the ``-U`` flag with the warning
     that the files will be overwritten.
+
     Parameters
     ----------
     cls : obj
@@ -327,6 +362,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
     file_type : str
         String specifying the nature of the file, only necessary for printing information
         and for the error description.
+
     Exceptions
     ----------
     UserError
@@ -399,27 +435,46 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
 
 
 def _copy_preliminary_files_from_experiment_to_thisrun(config):
-    # I don't like this one bit. DB
+    """
+    - Copies the setup *.date file from <experiment>/scripts/ folder
+      to <experiment>/run_xxxxxxxx-xxxxxxxx/scripts/ folder.
+    - Copies the runscript yaml file from current folder (<experiment>/scripts) 
+      to <experiment>/run_xxxxxxxx-xxxxxxxx/scripts/<runscript>
+    - Copies 'additional_files' (if any, e.g. fesom_output.yaml, that are called
+      via 'further_reading' in the runscript or other config file) from ...
+      to <experiment>/run_xxxxxxxx-xxxxxxxx/scripts/ folder.
+
+    Why here???
+
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
+    """
+
     filelist = [
         (
             "scripts",
             f"{config['general']['expid']}_{config['general']['setup_name']}.date",
             "copy",
+        ),
+        (
+            "scripts",
+            f"{config['general']['scriptname']}",
+            "copy",
         )
     ]
 
+    for additional_file in config["general"].get("additional_files",[]):
+        filelist.append(("scripts", additional_file, "copy"))
+
     for filetype, filename, copy_or_link in filelist:
-        source = config["general"]["experiment_" + filetype + "_dir"]
-        dest = config["general"]["thisrun_" + filetype + "_dir"]
-        if copy_or_link == "copy":
-            method = shutil.copy2
-        elif copy_or_link == "link":
-            method = os.symlink
+        source = config["general"].get("experiment_" + filetype + "_dir", "")
+        dest = config["general"].get("thisrun_" + filetype + "_dir", "")
+
+        method = filelists.get_method(copy_or_link)
+
         if os.path.isfile(source + "/" + filename):
             method(source + "/" + filename, dest + "/" + filename)
-    this_script = config["general"]["scriptname"]
-    shutil.copy2("./" + this_script, config["general"]["thisrun_scripts_dir"])
 
-    for additional_file in config["general"]["additional_files"]:
-        shutil.copy2(additional_file, config["general"]["thisrun_scripts_dir"])
     return config

From b799dba2e08a3cff28c37bf9ac71226fdd930f9b Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 13 Feb 2024 15:18:55 +0100
Subject: [PATCH 83/98] Rename step in prepexp recipe, extract function that
 runs esm_runscripts in subprocess.

---
 .../esm_runscripts/esm_plugins.yaml           |  2 +-
 .../esm_runscripts/esm_runscripts.yaml        |  2 +-
 src/esm_runscripts/prepexp.py                 | 77 +++++++++++++------
 3 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml
index 35af8d2ec..4e74b1bd0 100644
--- a/configs/esm_software/esm_runscripts/esm_plugins.yaml
+++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml
@@ -34,7 +34,7 @@ core:
                         - "_create_component_folders"
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
-                        - "call_esm_runscripts_internally"
+                        - "call_esm_runscripts_from_prepexp"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
 
 
diff --git a/configs/esm_software/esm_runscripts/esm_runscripts.yaml b/configs/esm_software/esm_runscripts/esm_runscripts.yaml
index e9177fd77..2f3783f5f 100644
--- a/configs/esm_software/esm_runscripts/esm_runscripts.yaml
+++ b/configs/esm_software/esm_runscripts/esm_runscripts.yaml
@@ -47,7 +47,7 @@ choose_job_type:
                         - "_create_component_folders"
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
-                        - "call_esm_runscripts_internally"
+                        - "call_esm_runscripts_from_prepexp"
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
 
         observe:
diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 5f2ea904a..0c685c4dd 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -151,9 +151,57 @@ def copy_tools_to_thisrun(config):
 
     return config
 
-def call_esm_runscripts_internally(config):
+def _call_esm_runscripts_internally(config, command, exedir):
     """
-    Calls esm_runscripts in a subprocess call.
+    - Removes update flags from command input.
+    - Adds additional flags to command input.
+    - Addes esm_runscripts command if necessary.
+    - Calls esm_runscipts internally in a subprocess call.
+
+    Parameters
+    ----------
+    config : dict
+        Dictionary containing the configuration information.
+    command : str
+        Command or esm_runscripts arguments
+    exedir : str
+        Path from which the command is to be executed.
+
+    """
+
+    # Remove the update option otherwise it will enter an infinite loop.
+    options_to_remove = [" -U ", " --update "]
+    for option in options_to_remove:
+        command = command.replace(option, " ")
+
+    # Check if 'esm_runscripts' command is given in 'command' argument.
+    if not command.startswith("esm_runscripts"):
+        command = f"esm_runscripts {command}"
+
+    # Add non-interaction flags, current jobtype, and current task (phase) [-t] if not already in 'command'
+    non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"]
+    for ni_flag in non_interaction_flags:
+        # prevent continuous addition of ``ni_flag``
+        if ni_flag not in command:
+            command += f" {ni_flag} "
+
+    # Check if the path exists, in which 'commend' should be executed
+    if os.path.exists(exedir):
+        subprocess.check_call(command.split(), cwd=exedir)
+    else:
+        error_type = "runtime error in function ``_call_esm_runscripts_internally``"
+        error_text = f"{exedir} does not exists. Aborting."
+        esm_parser.user_error(error_type, error_text)
+
+    if config["general"]["verbose"]:
+        print(command)
+
+    end_it_all(config)
+
+def call_esm_runscripts_from_prepexp(config):
+    """
+    Recipe step that creates a esm_runscripts command and submits this
+    to the functions that executes this command in a subprocess call.
 
     Parameters
     ----------
@@ -164,11 +212,12 @@ def call_esm_runscripts_internally(config):
 
     gconfig = config["general"]
 
-    # Return if called from the experiment
+    # Return if already called from the experiment folder
     if gconfig["isresubmitted"] and not gconfig["update"]:
         if config["general"]["verbose"]:
             print("Started from the experiment folder, continuing...")
         return config
+
     # Not computing but initialisation
     else:
         if config["general"]["verbose"]:
@@ -178,15 +227,13 @@ def call_esm_runscripts_internally(config):
 
         # remove the update option otherwise it will enter an infinite loop
         original_command = gconfig["original_command"]
-        options_to_remove = [" -U ", " --update "]
-        for option in options_to_remove:
-            original_command = original_command.replace(option, " ")
 
         # Before resubmitting the esm_runscripts, the path of the runscript
         # needs to be modified. Remove the absolute/relative path
         runscript_absdir, runscript = os.path.split(gconfig["runscript_abspath"])
         original_command_list = original_command.split()
         new_command_list = []
+
         for command in original_command_list:
             # current command will contain the full path, so replace it with
             # the YAML file only since we are going to execute it from the
@@ -197,24 +244,10 @@ def call_esm_runscripts_internally(config):
             new_command_list.append(command)
 
         new_command = " ".join(new_command_list)
-        restart_command = f"esm_runscripts {new_command}"
-
-        # Add non-interaction flags
-        non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"]
-        for ni_flag in non_interaction_flags:
-            # prevent continuous addition of ``ni_flag``
-            if ni_flag not in restart_command:
-                restart_command += f" {ni_flag} "
 
-        if config["general"]["verbose"]:
-            print(restart_command)
+        _call_esm_runscripts_internally(config, new_command, scriptsdir)
 
-        if os.path.exists(scriptsdir):
-            subprocess.check_call(restart_command.split(), cwd=scriptsdir)
-        # Todo: include exception if scriptsdir not found
-
-        gconfig["profile"] = False
-        end_it_all(config)
+        return config
 
 def _create_folders(config, filetypes):
     """

From ec2f8a702089d037cc74852b39c9a10dc0636221 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 14 Feb 2024 16:19:37 +0100
Subject: [PATCH 84/98] Bugfix

---
 src/esm_runscripts/prepexp.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 0c685c4dd..935dd0cec 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -212,8 +212,11 @@ def call_esm_runscripts_from_prepexp(config):
 
     gconfig = config["general"]
 
+    fromdir = os.path.realpath(gconfig["started_from"])
+    scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"])
+
     # Return if already called from the experiment folder
-    if gconfig["isresubmitted"] and not gconfig["update"]:
+    if (fromdir == scriptsdir) and not gconfig["update"]:
         if config["general"]["verbose"]:
             print("Started from the experiment folder, continuing...")
         return config

From 5ed14cb11ebfe40502e87dfd00c01933830c65aa Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Wed, 14 Feb 2024 16:50:47 +0100
Subject: [PATCH 85/98] Small changes.

---
 src/esm_runscripts/prepexp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 935dd0cec..fb127fe80 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -196,6 +196,7 @@ def _call_esm_runscripts_internally(config, command, exedir):
     if config["general"]["verbose"]:
         print(command)
 
+    # Exit after resubmission of esm_runscripts
     end_it_all(config)
 
 def call_esm_runscripts_from_prepexp(config):
@@ -228,7 +229,6 @@ def call_esm_runscripts_from_prepexp(config):
         
         scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"])
 
-        # remove the update option otherwise it will enter an infinite loop
         original_command = gconfig["original_command"]
 
         # Before resubmitting the esm_runscripts, the path of the runscript

From 50d25ac407d5f070ddbd2edcdf5a1fe3ef5d03ac Mon Sep 17 00:00:00 2001
From: Miguel Andres-Martinez <miguel.andres-martinez@awi.de>
Date: Thu, 15 Feb 2024 13:36:45 +0100
Subject: [PATCH 86/98] isort src/esm_runscripts/prepexp.py

---
 src/esm_runscripts/prepexp.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index fb127fe80..e9f1df4ea 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -1,23 +1,19 @@
 import os
-import shutil
-import sys
 import pathlib
+import shutil
 import subprocess
+import sys
 
 import questionary
 from colorama import Fore
+from loguru import logger
 
-import esm_tools
 import esm_parser
+import esm_tools
 
+from . import filelists, prepcompute
 from .batch_system import batch_system
 from .helpers import end_it_all, evaluate, write_to_log
-from loguru import logger
-
-from . import prepcompute
-from . import filelists
-
-import pdb
 
 
 def run_job(config):
@@ -418,6 +414,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
     # If the target path exists compare the two scripts
     else:
         import difflib
+
         import esm_parser
 
         script_o = open(fromdir + "/" + tfile).readlines()

From 532792c9ccd6bf9bc34d35d7f8df36cc26c26631 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 19 Feb 2024 10:40:08 +0100
Subject: [PATCH 87/98] Add comment in prepexp recipe.

---
 configs/esm_software/esm_runscripts/esm_plugins.yaml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml
index 4e74b1bd0..7c5ff537b 100644
--- a/configs/esm_software/esm_runscripts/esm_plugins.yaml
+++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml
@@ -34,11 +34,15 @@ core:
                         - "_create_component_folders"
                         - "initialize_experiment_logfile"
                         - "copy_tools_to_thisrun"
+                        # The next step will call esm_runscripts again from the experiment folder,
+                        # if the current folder is not the experiment folder already.
+                        # If esm_runscripts will be excuted, the following step will be skipped, since
+                        # there is a sys.exit() after the esm_runscripts call.
                         - "call_esm_runscripts_from_prepexp"
+                        # The following step will be skipped, if not in experiment folder.
+                        # It will only be called if esm_runscripts is called from experiment folder.
                         - "_copy_preliminary_files_from_experiment_to_thisrun"
 
-
-
                 prepcompute:
                         - "compile_model"
                         - "_write_finalized_config"

From 7dced13d79c93474ff25b4e6af93cc8bb749bcb2 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 19 Feb 2024 11:36:02 +0100
Subject: [PATCH 88/98] Added review suggestions.

---
 src/esm_runscripts/prepexp.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index e9f1df4ea..bd6154f63 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -31,10 +31,14 @@ def run_job(config):
 
 def color_diff(diff):
     """
+    Adds color to text from a diff:
+    - Green for lines starting with ``+``
+    - Red for lines starting with ``-``
+    - Blue for lines starting with ``^``
 
     Parameters
     ----------
-    diff : 
+    diff : iterable object of strings to be colored
     """
     for line in diff:
         if line.startswith("+"):
@@ -198,13 +202,21 @@ def _call_esm_runscripts_internally(config, command, exedir):
 def call_esm_runscripts_from_prepexp(config):
     """
     Recipe step that creates a esm_runscripts command and submits this
-    to the functions that executes this command in a subprocess call.
+    to the function that modifies (if necessary) and executes this command
+    in a subprocess call, if the current folder is NOT the experiment folder.
+    The function will return and do nothing, if it is called already 
+    from the experiment folder.
+
 
     Parameters
     ----------
     config : dict
         Dictionary containing the configuration information.
 
+    Returns
+    -------
+    config : dict
+        Dictionary containing the configuration information.
     """
 
     gconfig = config["general"]
@@ -212,7 +224,7 @@ def call_esm_runscripts_from_prepexp(config):
     fromdir = os.path.realpath(gconfig["started_from"])
     scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"])
 
-    # Return if already called from the experiment folder
+    # Return if already called from the experiment folder without update flag
     if (fromdir == scriptsdir) and not gconfig["update"]:
         if config["general"]["verbose"]:
             print("Started from the experiment folder, continuing...")
@@ -477,8 +489,6 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config):
       via 'further_reading' in the runscript or other config file) from ...
       to <experiment>/run_xxxxxxxx-xxxxxxxx/scripts/ folder.
 
-    Why here???
-
     Parameters
     ----------
     config : dict
@@ -502,8 +512,8 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config):
         filelist.append(("scripts", additional_file, "copy"))
 
     for filetype, filename, copy_or_link in filelist:
-        source = config["general"].get("experiment_" + filetype + "_dir", "")
-        dest = config["general"].get("thisrun_" + filetype + "_dir", "")
+        source = config["general"].get(f"experiment_{filetype}_dir", "")
+        dest = config["general"].get(f"thisrun_{filetype}_dir", "")
 
         method = filelists.get_method(copy_or_link)
 

From 9910105a8f0dcf3c9761721226d9562a4a9aa401 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 19 Feb 2024 13:31:41 +0100
Subject: [PATCH 89/98] Changed condition for runscript update.

---
 src/esm_runscripts/prepexp.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index bd6154f63..6a1741e2e 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -122,13 +122,9 @@ def copy_tools_to_thisrun(config):
         # `killall esm_runscripts` might be required
         esm_parser.user_error(error_type, error_text)
 
-    # If ``fromdir`` and ``scriptsdir`` are the same (the same as ``isresubmitted=True``),
-    # this is already a computing simulation which means we want to use the script
-    # in the experiment folder, so no copying is needed.
-
-    if not gconfig["isresubmitted"]:
-        # At this point, ``fromdir`` and ``scriptsdir`` are different (same as gconfig["isresubmitted"]=False).
-        # Update the runscript if necessary
+    # If ``fromdir`` and ``scriptsdir`` are different, we are not in the experiment.
+    # In this case, update the runscript if necessary.
+    if not fromdir == scriptsdir:
         update_runscript(
             fromdir, scriptsdir, gconfig["scriptname"], gconfig, "runscript"
         )

From b86b0712185788b4733d40fe6db78fff74d3eb4d Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 19 Feb 2024 14:16:52 +0100
Subject: [PATCH 90/98] Changed string concatenations to f-string format.

---
 src/esm_runscripts/prepexp.py | 38 +++++++++++++++++------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 6a1741e2e..94e94dca7 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -74,8 +74,8 @@ def copy_tools_to_thisrun(config):
     # Paths inside the experiment directory where esm_tools and namelists
     # are copied to. Those are not functional but a reference to what was
     # the original state when the experiment was firstly started
-    tools_dir = scriptsdir + "/esm_tools/configs"
-    namelists_dir = scriptsdir + "/esm_tools/namelists"
+    tools_dir = f"{scriptsdir}/esm_tools/configs"
+    namelists_dir = f"{scriptsdir}/esm_tools/namelists"
 
     if config["general"]["verbose"]:
         print("Started from :", fromdir)
@@ -271,10 +271,10 @@ def _create_folders(config, filetypes):
     for filetype in filetypes:
         if not filetype == "ignore":
             if not filetype == "work":
-                if not os.path.exists(config["experiment_" + filetype + "_dir"]):
-                    os.makedirs(config["experiment_" + filetype + "_dir"])
-            if not os.path.exists(config["thisrun_" + filetype + "_dir"]):
-                os.makedirs(config["thisrun_" + filetype + "_dir"])
+                if not os.path.exists(config[f"experiment_{filetype}_dir"]):
+                    os.makedirs(config[f"experiment_{filetype}_dir"])
+            if not os.path.exists(config[f"thisrun_{filetype}_dir"]):
+                os.makedirs(config[f"thisrun_{filetype}_dir"])
 
 
 def _create_setup_folders(config):
@@ -292,7 +292,7 @@ def _create_setup_folders(config):
     """
     _create_folders(config["general"], config["general"]["all_filetypes"])
     with open(
-        config["general"]["experiment_dir"] + "/.top_of_exp_tree", "w"
+        f"{config['general']['experiment_dir']}/.top_of_exp_tree", "w"
     ) as top_marker:
         top_marker.write(f"Top of experiment {config['general']['expid']}")
     return config
@@ -415,8 +415,8 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
 
     # If the target file in ``scriptsdir`` does not exist, then copy the file
     # to the target.
-    if not os.path.isfile(scriptsdir + "/" + tfile):
-        oldscript = fromdir + "/" + tfile
+    if not os.path.isfile(f"{scriptsdir}/{tfile}"):
+        oldscript = f"{fromdir}/{tfile}"
         print(oldscript)
         shutil.copy2(oldscript, scriptsdir)
     # If the target path exists compare the two scripts
@@ -425,16 +425,16 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
 
         import esm_parser
 
-        script_o = open(fromdir + "/" + tfile).readlines()
-        script_t = open(scriptsdir + "/" + tfile).readlines()
+        script_o = open(f"{fromdir}/{tfile}").readlines()
+        script_t = open(f"{scriptsdir}/{tfile}").readlines()
 
         diffobj = difflib.SequenceMatcher(a=script_t, b=script_o)
         # If the files are different
         if not diffobj.ratio() == 1:
             # Find differences
             differences = (
-                f"{fromdir + '/' + tfile} differs from "
-                + f"{scriptsdir + '/' + tfile}:\n"
+                f"{fromdir}/{tfile} differs from "
+                + f"{scriptsdir}/'{tfile}:\n"
             )
             for line in color_diff(difflib.unified_diff(script_t, script_o)):
                 differences += line
@@ -444,9 +444,9 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
             if gconfig["update"]:
                 esm_parser.user_note(
                     f"Original {file_type} different from target",
-                    differences + "\n" + f"{scriptsdir + '/' + tfile} will be updated!",
+                    f"{differences}\n{scriptsdir}/{tfile} will be updated!",
                 )
-                oldscript = fromdir + "/" + tfile
+                oldscript = f"{fromdir}/{tfile}"
                 print(oldscript)
                 shutil.copy2(oldscript, scriptsdir)
             # If the --update flag is not called, exit with an error showing the
@@ -466,10 +466,10 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type):
                     + "updated with the above changes?"
                 ).ask()
                 if update_choice:
-                    oldscript = fromdir + "/" + tfile
+                    oldscript = f"{fromdir}/{tfile}"
                     print(oldscript)
                     shutil.copy2(oldscript, scriptsdir)
-                    print(f"{scriptsdir + '/' + tfile} updated!")
+                    print(f"{scriptsdir}/{tfile} updated!")
                 else:
                     print("Submission stopped")
                     sys.exit(1)
@@ -513,7 +513,7 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config):
 
         method = filelists.get_method(copy_or_link)
 
-        if os.path.isfile(source + "/" + filename):
-            method(source + "/" + filename, dest + "/" + filename)
+        if os.path.isfile(f"{source}/{filename}"):
+            method(f"{source}/{filename}", f"{dest}/{filename}")
 
     return config

From 3dc080b0da0444b3d62b2d5e0a8ae59ea297f826 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Mon, 19 Feb 2024 15:07:13 +0100
Subject: [PATCH 91/98] Applied flake8 recommendations.

---
 src/esm_runscripts/prepexp.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py
index 94e94dca7..6fe5a6625 100644
--- a/src/esm_runscripts/prepexp.py
+++ b/src/esm_runscripts/prepexp.py
@@ -11,8 +11,7 @@
 import esm_parser
 import esm_tools
 
-from . import filelists, prepcompute
-from .batch_system import batch_system
+from . import filelists
 from .helpers import end_it_all, evaluate, write_to_log
 
 
@@ -96,7 +95,7 @@ def copy_tools_to_thisrun(config):
         print("Copying standard yamls from: ", esm_tools.get_config_filepath())
         esm_tools.copy_config_folder(tools_dir)
     if not os.path.isdir(namelists_dir):
-        print("Copying standard namelists from: ",esm_tools.get_namelist_filepath())
+        print("Copying standard namelists from: ", esm_tools.get_namelist_filepath())
         esm_tools.copy_namelist_folder(namelists_dir)
 
     # check for recursive creation of the file tree. This prevents the risk of
@@ -147,6 +146,7 @@ def copy_tools_to_thisrun(config):
 
     return config
 
+
 def _call_esm_runscripts_internally(config, command, exedir):
     """
     - Removes update flags from command input.
@@ -174,8 +174,13 @@ def _call_esm_runscripts_internally(config, command, exedir):
     if not command.startswith("esm_runscripts"):
         command = f"esm_runscripts {command}"
 
-    # Add non-interaction flags, current jobtype, and current task (phase) [-t] if not already in 'command'
-    non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"]
+    # Add non-interaction flags, current jobtype, and current task (phase) [-t]
+    # if not already in 'command'
+    non_interaction_flags = [
+                                "--no-motd",
+                                f"--last-jobtype {config['general']['jobtype']}",
+                                f"-t {config['general']['jobtype']}"
+                            ]
     for ni_flag in non_interaction_flags:
         # prevent continuous addition of ``ni_flag``
         if ni_flag not in command:
@@ -195,12 +200,13 @@ def _call_esm_runscripts_internally(config, command, exedir):
     # Exit after resubmission of esm_runscripts
     end_it_all(config)
 
+
 def call_esm_runscripts_from_prepexp(config):
     """
     Recipe step that creates a esm_runscripts command and submits this
     to the function that modifies (if necessary) and executes this command
     in a subprocess call, if the current folder is NOT the experiment folder.
-    The function will return and do nothing, if it is called already 
+    The function will return and do nothing, if it is called already
     from the experiment folder.
 
 
@@ -230,7 +236,7 @@ def call_esm_runscripts_from_prepexp(config):
     else:
         if config["general"]["verbose"]:
             print("Not started from experiment folder, restarting...")
-        
+
         scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"])
 
         original_command = gconfig["original_command"]
@@ -256,6 +262,7 @@ def call_esm_runscripts_from_prepexp(config):
 
         return config
 
+
 def _create_folders(config, filetypes):
     """
     Generates the experiment file tree. Folders are created for every filetype
@@ -479,7 +486,7 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config):
     """
     - Copies the setup *.date file from <experiment>/scripts/ folder
       to <experiment>/run_xxxxxxxx-xxxxxxxx/scripts/ folder.
-    - Copies the runscript yaml file from current folder (<experiment>/scripts) 
+    - Copies the runscript yaml file from current folder (<experiment>/scripts)
       to <experiment>/run_xxxxxxxx-xxxxxxxx/scripts/<runscript>
     - Copies 'additional_files' (if any, e.g. fesom_output.yaml, that are called
       via 'further_reading' in the runscript or other config file) from ...
@@ -504,7 +511,7 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config):
         )
     ]
 
-    for additional_file in config["general"].get("additional_files",[]):
+    for additional_file in config["general"].get("additional_files", []):
         filelist.append(("scripts", additional_file, "copy"))
 
     for filetype, filename, copy_or_link in filelist:

From 8208f29236bf552e92a7c3ab29e4605785d6349f Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 20 Feb 2024 09:36:35 +0100
Subject: [PATCH 92/98] Add flake8 recommondations.

---
 tests/test_esm_runscripts/test_workflow.py | 23 ++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index eb59efb9c..1e030dac1 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -6,6 +6,7 @@
 import pytest
 import esm_parser
 
+
 @pytest.fixture()
 def test_config():
     """Setup a test config dictionary."""
@@ -116,6 +117,7 @@ def test_config():
     }
     return config
 
+
 @pytest.fixture()
 def test_default_config_example():
     """Setup a test config dictionary."""
@@ -184,6 +186,7 @@ def test_default_config_example():
     }
     return config
 
+
 # Test scenarios
 # 0. Default workflow
 @pytest.mark.example
@@ -193,6 +196,7 @@ def test_example_0(test_default_config_example):
     assumption = "prepcompute ['prepcompute'] ->  compute ['compute'] ->  tidy ['tidy'] ->  prepcompute ['prepcompute']"
     assert order == assumption
 
+
 # 1. Add one single phase at the end of the default workflow (Example 1 in documentation)
 @pytest.mark.example
 def test_example_1(test_default_config_example):
@@ -208,6 +212,7 @@ def test_example_1(test_default_config_example):
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
 
+
 # 2. Prepend new phase at the beginning of workflow
 @pytest.mark.example
 def test_example_2(test_default_config_example):
@@ -224,6 +229,7 @@ def test_example_2(test_default_config_example):
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
 
+
 # 3. Append new phase at the beginning of workflow
 @pytest.mark.example
 def test_example_3(test_default_config_example):
@@ -240,6 +246,7 @@ def test_example_3(test_default_config_example):
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
 
+
 # 4. Append two new phases in the same cluster
 @pytest.mark.example
 def test_example_4(test_default_config_example):
@@ -266,6 +273,7 @@ def test_example_4(test_default_config_example):
     order = workflow.display_workflow_sequence(test_default_config_example, display=False)
     assert order == assumption
 
+
 # 5. Append two new phases in the same cluster, one of them triggers the next run
 @pytest.mark.example
 def test_example_5(test_default_config_example):
@@ -294,8 +302,8 @@ def test_example_5(test_default_config_example):
     assert order == assumption
 
 # 6. Append two new phases in the same cluster at the beginning of run
-#@pytest.mark.example
-#def test_example_6(test_default_config_example):
+# @pytest.mark.example
+# def test_example_6(test_default_config_example):
 #    test_default_config_example["general"]["workflow"] = {
 #        'phases': {
 #            'my_new_last_phase': {
@@ -320,7 +328,6 @@ def test_example_5(test_default_config_example):
 #    assert order == assumption
 
 
-
 # Test exceptions
 # 1. If still a workflow keyword is set by user.
 @pytest.mark.exceptions
@@ -329,6 +336,7 @@ def test_exception_test_workflow_keyword(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 2. If an invalid phase keyword is set.
 @pytest.mark.exceptions
 def test_exception_invalid_phase_keyword(test_config):
@@ -336,6 +344,7 @@ def test_exception_invalid_phase_keyword(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 3. If an unknown phase is called for , e.g. in 'run_after'
 @pytest.mark.exceptions
 def test_exception_unknown_phase(test_config):
@@ -343,6 +352,7 @@ def test_exception_unknown_phase(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 4. If a user phase has the same name as a default phase.
 @pytest.mark.exceptions
 def test_if_user_phase_has_default_phase_name(test_config):
@@ -359,6 +369,7 @@ def test_if_user_phase_has_default_phase_name(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 5. If two user phases have the same name and are defined in different models/setups.
 @pytest.mark.exceptions
 def test_if_two_user_phase_have_the_same_name(test_config):
@@ -375,6 +386,7 @@ def test_if_two_user_phase_have_the_same_name(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 6. If no queue is given for a phase that should be run on sbatch system.
 @pytest.mark.exceptions
 def test_if_queue_is_missing(test_config):
@@ -382,7 +394,7 @@ def test_if_queue_is_missing(test_config):
         'batch_or_shell': 'batch',
         'order_in_cluster': 'concurrent',
         'cluster': 'test_cluster',
-        #'run_on_queue': 'compute',
+        # 'run_on_queue': 'compute',
         'nproc': 1,
         'run_after': 'tidy',
         'script_dir': '/work/ab0995/a270089/myrunscripts/',
@@ -391,6 +403,7 @@ def test_if_queue_is_missing(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 7. If more than one phase trigger_next_run.
 @pytest.mark.exceptions
 def test_if_trigger_next_run_unclear(test_config):
@@ -398,6 +411,7 @@ def test_if_trigger_next_run_unclear(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 8. If no default phases are defined in defaults.yaml.
 @pytest.mark.exceptions
 def test_if_no_default_phases(test_config):
@@ -405,6 +419,7 @@ def test_if_no_default_phases(test_config):
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
 
+
 # 9. If no default workflow is defined in defaults.yaml.
 @pytest.mark.exceptions
 def test_inf_no_default_workflow(test_config):

From e3f8e2b264da035827031ad1d92b47c941e9006c Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Tue, 20 Feb 2024 14:32:05 +0100
Subject: [PATCH 93/98] Added workflow test.

---
 tests/test_esm_runscripts/test_workflow.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py
index 1e030dac1..e19c54f20 100644
--- a/tests/test_esm_runscripts/test_workflow.py
+++ b/tests/test_esm_runscripts/test_workflow.py
@@ -426,3 +426,11 @@ def test_inf_no_default_workflow(test_config):
     test_config['general']['defaults.yaml'].pop('workflow', None)
     with pytest.raises(SystemExit):
         test_config = workflow.assemble_workflow(test_config)
+
+
+def test_get_workflow_commands_for_run():
+    config = esm_parser.yaml_file_to_dict('config3.yaml')
+    config = workflow.get_workflow_commands_for_run(config)
+    # assert order == assumption
+    assert 1 == 2
+

From 6e3c43a9b5c66a799149c8b70e80230f5e169a1b Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 22 Feb 2024 15:07:51 +0100
Subject: [PATCH 94/98] Reactivate call of maybe_resubmit.

---
 src/esm_runscripts/sim_objects.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py
index 1bbc59222..9945869cf 100644
--- a/src/esm_runscripts/sim_objects.py
+++ b/src/esm_runscripts/sim_objects.py
@@ -163,7 +163,7 @@ def __call__(self, kill_after_submit=True):
             # Is this dunction call needed here?
             self.assembler()
 
-        #resubmit.maybe_resubmit(self.config)
+        resubmit.maybe_resubmit(self.config)
 
         # if this line is reached, the run is submitted and running or finished
         self.config = logfiles.finalize_logfiles(self.config, org_jobtype)

From 8386d6f2137e19173e545948d593769fe5ef4731 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 22 Feb 2024 15:32:05 +0100
Subject: [PATCH 95/98] Implement new function to write run file.

---
 src/esm_runscripts/batch_system.py | 167 +++++++++++++++++++++++++----
 1 file changed, 148 insertions(+), 19 deletions(-)

diff --git a/src/esm_runscripts/batch_system.py b/src/esm_runscripts/batch_system.py
index 82cbccb53..d963dfa27 100644
--- a/src/esm_runscripts/batch_system.py
+++ b/src/esm_runscripts/batch_system.py
@@ -1,17 +1,16 @@
+import copy
 import os
-import textwrap
-import sys
+import pdb
 import stat
-import copy
+import sys
+import textwrap
 
 import esm_environment
-
 from esm_parser import find_variable, user_error, user_note
-from . import helpers
-from . import dataprocess
-from . import prepare
-from .slurm import Slurm
+
+from . import dataprocess, helpers, prepare
 from .pbs import Pbs
+from .slurm import Slurm
 
 known_batch_systems = ["slurm", "pbs"]
 reserved_jobtypes = ["prepcompute", "compute", "prepare", "tidy", "inspect"]
@@ -263,7 +262,7 @@ def calculate_requirements(config, cluster=None):
 
             if (
                 not cluster
-                or not cluster in config["general"]["workflow"]["subjob_clusters"]
+                or cluster not in config["general"]["workflow"]["subjob_clusters"]
             ):
                 print(f"Unknown or unset cluster: {cluster}.")
                 sys.exit(-1)
@@ -324,7 +323,7 @@ def get_extra(config):
                         )
             elif isinstance(pre_run_commands, str):
                 extras.append(pre_run_commands)
-            elif pre_run_commands == None:
+            elif pre_run_commands is None:
                 continue
             else:
                 user_error(
@@ -371,6 +370,33 @@ def append_done_statement(config, subjob):
 
     @staticmethod
     def get_run_commands(config, subjob, batch_or_shell):  # here or in compute.py?
+        """
+        Creates the command of the specific phase to be put in the *.run file.
+        
+        This function is covering the following phase types:
+        - SimulationSetup: phases that are run as 'esm_runscripts' command
+        - batch: phases that are run via 'srun' command
+        - shell: phases that are run as shell scripts. The command is generated by
+                 a function in the 'dataprocess' module.
+
+        Special case: phase 'compute':
+        - This phase is of type 'batch'
+
+        Todo: How about other phases of type batch? in dataprocess???
+
+        Parameters
+        ----------
+            config: dict
+            subjob: str
+                Name of phase
+            batch_or_shell: str
+                Type of phase (SimulationSetup, batch, shell)
+
+        Returns
+        -------
+            commands: list
+                List of command and arguments of a phase depending of its type.
+        """
 
         commands = []
         if subjob.startswith("compute"):
@@ -385,7 +411,7 @@ def get_run_commands(config, subjob, batch_or_shell):  # here or in compute.py?
                     )
                     if config["general"].get("multi_srun"):
                         return self.bs.get_run_commands_multisrun(config, commands)
-            # for shell scrips
+            # for shell scripts
             else:
                 for model in config:
                     if model == "computer":
@@ -397,9 +423,48 @@ def get_run_commands(config, subjob, batch_or_shell):  # here or in compute.py?
                             + f" 2>&1{config['computer'].get('write_execution_log', '')} &"
                         )
         else:
-            subjob_tasks = dataprocess.subjob_tasks(config, subjob, batch_or_shell)
-            for task in subjob_tasks:
-                commands.append(task)
+            if batch_or_shell == "SimulationSetup":
+                # for phase type 'SimulationSetup' (e.g. prepcompute, tidy)
+                commands = []
+                commands.append("esm_runscripts")
+                # add runscript with absolute path
+                runscript = config["general"]["runscript_abspath"]
+                commands.append(runscript)
+                # add experiment id
+                commands.append(f"-e {config['general']['expid']}")
+                # add task
+                commands.append(f"-t {subjob}")
+                # add date
+                commands.append("-s " + config['general']['current_date'].format(
+                         form=9, givenph=False, givenpm=False, givenps=False
+                     ))
+                # add 
+                commands.append(f"-r {str(config['general']['run_number'])}")
+                # add verbose and no message_of_the day argument
+                commands.append("-v --no-motd")
+                # add last-jobtype argument
+                commands.append(f"--last-jobtype {subjob}")
+                # add --open-ran or use_venv argument
+                if "--open-run" in config["general"]["original_command"] or not config[
+                    "general"
+                ].get("use_venv"):
+                    commands.append(" --open-run")
+                elif "--contained-run" in config["general"][
+                    "original_command"
+                ] or config["general"].get("use_venv"):
+                    commands.append("--contained-run")
+                else:
+                    print("ERROR -- Not sure if you were in a contained or open run!")
+                    print(
+                        "ERROR -- See write_simple_runscript for the code causing this."
+                    )
+                    sys.exit(1)
+            else:
+                # for all other phase types (batch, shell) except phase 'compute'
+                subjob_tasks = dataprocess.subjob_tasks(config, subjob, batch_or_shell)
+                # Why was this necessary? And not set commands directly?
+                for task in subjob_tasks:
+                    commands.append(task)
 
         return commands
 
@@ -430,8 +495,73 @@ def get_submit_command(config, batch_or_shell, runfilename):
         return commands
 
     @staticmethod
-    def write_simple_runscript(config, cluster, batch_or_shell="batch"):
+    def write_run_batch_script(config, cluster, batch_or_shell="batch"):
+        
+        workflow = config["general"]["workflow"]["object"]
+        phases = workflow.phases
+
+        self = config["general"]["batch"]
+        runfilename = batch_system.get_run_filename(config, cluster)
+        if config["general"]["verbose"]:
+            print("jobtype: ", config["general"]["jobtype"])
+            print("writing run file for:", cluster)
+
+        with open(runfilename, "w") as runfile:
+            config = batch_system.calculate_requirements(config, "compute")
+            # TODO: remove it once it's not needed anymore (substituted by packjob)
+            if (
+                cluster in reserved_jobtypes
+                and config["computer"].get("taskset", False)
+            ):
+                config = config["general"]["batch"].write_het_par_wrappers(config)
+            # Prepare launcher
+            config = config["general"]["batch"].prepare_launcher(config, "compute")
+            # Initiate the header
+            header = batch_system.get_batch_header(config, "compute")
+            for line in header:
+                runfile.write(line + "\n")
+            runfile.write("\n")
+            # environment for each phase of a cluster
+            environment = batch_system.get_environment(config, "compute")
+            batch_system.write_env(config, environment, runfilename)
+            for line in environment:
+                runfile.write(line + "\n")
+
+            # extra entries for each phase
+            extra = batch_system.get_extra(config)
+            for line in extra:
+                runfile.write(line + "\n")
+
+            for phase in ["compute", "tidy", "prepcompute"]:
+                # Add actual commands
+                commands = batch_system.get_run_commands(
+                    config, phase, batch_or_shell
+                )
+                # commands = clusterconf.get("data_task_list", [])
+                runfile.write("\n")
+                runfile.write(self.append_start_statement(config, phase) + "\n")
+                runfile.write("\n")
+                runfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n")
+
+#                if cluster in reserved_jobtypes:
+                config["general"]["batch"].add_pre_launcher_lines(
+                    config, cluster, runfile
+                )
+
+                phase = workflow.get_workflow_phase_by_name(phase)
+                command = phase["run_command"]
+                runfile.write(f"{command} --run-from-batch-script\n")
+                runfile.write(workflow.append_done_statement(config, phase) + "\n")
+
+            runfile.write("\n")
+            runfile.write("wait\n")
+
+        breakpoint()
+        return config
+
 
+    @staticmethod
+    def write_simple_runscript(config, cluster, batch_or_shell="batch"):
         # if no cluster is specified, work on the one we are in
         # if not cluster:
         #    cluster = config["general"]["jobtype"]
@@ -833,7 +963,7 @@ def calc_launcher_flags(config, model, cluster):
             cpus_per_proc = config[model].get("cpus_per_proc", omp_num_threads)
             # Check for CPUs and OpenMP threads
             if omp_num_threads > cpus_per_proc:
-                esm_parser.user_error(
+                user_error(
                     "OpenMP configuration",
                     (
                         "The number of OpenMP threads cannot be larger than the number"
@@ -845,7 +975,7 @@ def calc_launcher_flags(config, model, cluster):
         elif "nproca" in config[model] and "nprocb" in config[model]:
             # ``nproca``/``nprocb`` not compatible with ``omp_num_threads``
             if omp_num_threads > 1:
-                esm_parser.user_note(
+                user_note(
                     "nproc",
                     "``nproca``/``nprocb`` not compatible with ``omp_num_threads``",
                 )
@@ -854,7 +984,7 @@ def calc_launcher_flags(config, model, cluster):
             omp_num_threads = 1
         else:
 
-# kh 22.06.22 defensive (user_error/user_note could also be added here)
+            # kh 22.06.22 defensive (user_error/user_note could also be added here)
             nproc = 0
             cpus_per_proc = 0
 #           omp_num_threads = 0
@@ -885,7 +1015,6 @@ def calc_launcher_flags(config, model, cluster):
         return launcher_flags
 
 
-
 def submits_another_job(config, cluster):
     clusterconf = config["general"]["workflow"]["subjob_clusters"][cluster]
     if clusterconf.get("next_submit", []) == []:

From 1742320bee6511611c9e0c187f40b42922f598c5 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 22 Feb 2024 15:35:28 +0100
Subject: [PATCH 96/98] Start to refactor maybe_resubmit function.

---
 src/esm_runscripts/resubmit.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py
index 5046372c1..c123a8629 100644
--- a/src/esm_runscripts/resubmit.py
+++ b/src/esm_runscripts/resubmit.py
@@ -247,7 +247,22 @@ def maybe_resubmit(config):
     -------
         config : dict
     """
-    jobtype = config["general"]["jobtype"]
+    jobtype = config["general"]["jobtype"]              # current phase
+    workflow = config["general"]["workflow"]["object"]
+    first_phase_in_cluster = workflow.first_task_in_queue
+    if jobtype == first_phase_in_cluster:
+        config = config["general"]["batch"].write_run_batch_script(
+            config, 'sim_cluster', 'batch'
+        )
+        print("Create *.run file")
+    phases = workflow.phases
+
+    resubmit_batch_or_shell(config, "batch", "compute")
+
+    breakpoint()
+
+    # TODO: Check if run from *.run file
+    # TODO: Create *.run file
 
     # check if nextrun starts???
     # this resubmits any following jobtypes/phases until nextrun is true
@@ -262,6 +277,7 @@ def maybe_resubmit(config):
     #           it will start to loop over all remaining clusters to check if it can sumbit something (SimulationSetup, sbatch, shell) and do so,
     #           until first start of next run is reached.
     #        3. nextrun is fals if no entries in next_submit for that particular jobtype/cluster
+
     nextrun = resubmit_recursively(config, jobtype=jobtype)
 
     if nextrun:  # submit list contains stuff from next run

From 0705bcdd8e988218d4ea29042344e775b1539b71 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 22 Feb 2024 15:44:19 +0100
Subject: [PATCH 97/98] Add workflow methon to set the run command of each
 phase.

---
 src/esm_runscripts/workflow.py | 72 +++++++++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py
index 9cb31e190..5c7d84cd5 100644
--- a/src/esm_runscripts/workflow.py
+++ b/src/esm_runscripts/workflow.py
@@ -1,6 +1,8 @@
 import copy
 import esm_parser
 
+from . import batch_system
+
 #import pygraphviz as pgv
 import pdb
 
@@ -22,10 +24,12 @@ def __init__(self, workflow_yaml):
         -------
         none
         """
+
         # TODO: check if key is in workflow_yaml dict
         self.phases = []                            # list for default phases (defined in defauls.yaml)
         self.user_phases = []                       # list of user phases (collected by collect_all_user_phases)
         self.clusters = {}                          # dictionary of clusters
+        
 
         error = False
 
@@ -35,6 +39,8 @@ def __init__(self, workflow_yaml):
         else: error = True
         if "next_run_triggered_by" in workflow_yaml: self.next_run_triggered_by =  workflow_yaml["next_run_triggered_by"]
         else: error = True
+        if "default_cluster" in workflow_yaml: self.default_cluster =  workflow_yaml["default_cluster"]
+        else: error = True
 
         if error:
             err_msg = (
@@ -348,15 +354,16 @@ def write_to_config(self, config):
         """
         Write to config.
         TODO: Rename ``subjobs`` to ``phases``. But this needs changes also in resubmit.py and other files???
+        TODO: Put workflow object into config.
         """
-        # 1. Delete unnecessary config workflow entries (e.g. in general)
+        # Delete unnecessary config workflow entries (e.g. in general)
         if "workflow" in config["general"]:
             del config["general"]["workflow"]
 
         config["general"]["workflow"] = {}
         config["general"]["workflow"].update(self.__dict__)
 
-        # 3. Write clusters
+        # Write clusters
         config["general"]["workflow"]["subjob_clusters"] = {}
         for cluster in self.clusters:
             config["general"]["workflow"]["subjob_clusters"][cluster] = {}
@@ -366,16 +373,20 @@ def write_to_config(self, config):
             for att in self.clusters[cluster]:
                 config["general"]["workflow"]["subjob_clusters"][cluster][att] = self.clusters[cluster][att]
 
-        # 2. Write subjobs/phases
+        # Write subjobs/phases
         config["general"]["workflow"]["subjobs"] = {}
         for phase in self.phases + self.user_phases:
-            temp_dict = phase
-            config["general"]["workflow"]["subjobs"][phase["name"]] = temp_dict
+            config["general"]["workflow"]["subjobs"][phase["name"]] = {}
+            for key, val in phase.items():
+                config["general"]["workflow"]["subjobs"][phase["name"]][key] = val
 
-        # delete phases and user_phases
+        # Delete phases and user_phases
         del config["general"]["workflow"]["phases"]
         del config["general"]["workflow"]["user_phases"]
 
+        # Write workflow object
+        config["general"]["workflow"]["object"] = self
+
         return config
 
     def check_user_workflow_dependency(self):
@@ -547,6 +558,36 @@ def order_phases_and_clusters(self):
 
         return self
 
+
+    def get_workflow_commands_for_run(self, config):
+        """
+        Gets the command for each workflow phase and writes in into config.
+    
+        Parameters
+        ----------
+            self: workflow object
+            config: dict
+    
+        Returns
+        -------
+            config: dict
+        """
+        phases = self.phases
+        phase_type = ""
+        run_command = ""
+        run_commands = []
+    
+        for phase in phases:
+            phase_type = phase.get("batch_or_shell", None)
+            phase_name = phase.get("name", "")
+            run_command = ' '.join(batch_system.get_run_commands(config, phase_name, phase_type))
+            phase["run_command"] = run_command
+            run_commands.append(run_command)
+
+        setattr(self, 'run_commands', run_commands)
+        return self
+
+
     def prepend_newrun_job(self):
         """
         - Creates a new cluster "newrun" if first_task_in_queue is not of
@@ -668,7 +709,7 @@ def __init__(self, phase):
         self["run_after"] = None
         self["trigger_next_run"] = False               # needed
         self["submit_to_batch_system"] = False         # needed
-#        self["run_on_queue"] = None
+        self["run_on_queue"] = None
         self["cluster"] = None
         self["next_submit"] = []                       # needed
         self["called_from"] = None                     # needed
@@ -679,6 +720,7 @@ def __init__(self, phase):
         self["skip_run_number"] = None
         self["call_function"] = None
         self["env_preparation"] = None
+        self["run_command"] = None
 
         # check if phase keywords are valid
         for key, value in phase.items():
@@ -782,6 +824,8 @@ def assemble_workflow(config):
     #   a user phase (type batch or shell)
     workflow = workflow.prepend_newrun_job()
 
+    workflow = workflow.get_workflow_commands_for_run(config)
+
     # - write the workflow to config
     # - Remove old worklow from config
     config = workflow.write_to_config(config)
@@ -789,13 +833,13 @@ def assemble_workflow(config):
     # Set "jobtype" for the first task???
     # NOTE: This is either first default phase or
     #       newrun??? Can't this not be set in prepend_newrun then?
-#    if config["general"]["jobtype"] == "unknown":
-#        config["general"]["command_line_config"]["jobtype"] = config["general"][
-#            "workflow"
-#        ]["first_task_in_queue"]
-#        config["general"]["jobtype"] = config["general"]["workflow"][
-#            "first_task_in_queue"
-#        ]
+    if config["general"]["jobtype"] == "unknown":
+        config["general"]["command_line_config"]["jobtype"] = config["general"][
+            "workflow"
+        ]["first_task_in_queue"]
+        config["general"]["jobtype"] = config["general"]["workflow"][
+            "first_task_in_queue"
+        ]
 
     return config
 

From 458f0a0572e071fb9a1bc276ae390a8563855388 Mon Sep 17 00:00:00 2001
From: Nadine Wieters <nadine.wieters@awi.de>
Date: Thu, 22 Feb 2024 15:46:36 +0100
Subject: [PATCH 98/98] Add default cluster.

---
 configs/esm_software/esm_runscripts/defaults.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml
index 1824320b9..474eebded 100644
--- a/configs/esm_software/esm_runscripts/defaults.yaml
+++ b/configs/esm_software/esm_runscripts/defaults.yaml
@@ -23,12 +23,13 @@ workflow:
     first_task_in_queue: prepcompute
     last_task_in_queue: tidy
     next_run_triggered_by: tidy
+    default_cluster: sim_cluster
 
     phases:
         prepcompute:
             batch_or_shell: SimulationSetup
             called_from: tidy
-            cluster: prepcompute
+            cluster: sim_cluster
             name: prepcompute
             next_submit:
                 - compute
@@ -39,7 +40,7 @@ workflow:
             submit_to_batch_system: False
         compute:
             called_from: prepcompute
-            cluster: compute
+            cluster: sim_cluster
             name: compute
             next_submit:
                 - tidy
@@ -52,7 +53,7 @@ workflow:
         tidy:
             batch_or_shell: SimulationSetup
             called_from: compute
-            cluster: tidy
+            cluster: sim_cluster
             name: tidy
             next_submit:
                 - prepcompute