From 80d3b6a890aa6822b3c7dbcb16f6307ae3b43037 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Sep 2023 12:04:32 +0200 Subject: [PATCH 01/98] Added some temporary comments in workflow.py. --- src/esm_runscripts/workflow.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index b677ea507..af97f288c 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -336,7 +336,7 @@ def init_total_workflow(config): if not "compute" in config["general"]["workflow"]["subjobs"]: config["general"]["workflow"]["subjobs"].update(compute) if not "tidy" in config["general"]["workflow"]["subjobs"]: - config["general"]["workflow"]["subjobs"].update(tidy) + config["general"]["workflow"]["subjobs"].update(tidy) if not "last_task_in_queue" in config["general"]["workflow"]: config["general"]["workflow"]["last_task_in_queue"] = "tidy" if not "first_task_in_queue" in config["general"]["workflow"]: @@ -350,35 +350,47 @@ def init_total_workflow(config): def collect_all_workflow_information(config): + # For each component entry in config (can be a model or a new entry (e.g. 'flows') for model in config: if "workflow" in config[model]: w_config = config[model]["workflow"] gw_config = config["general"]["workflow"] + # looks for entry 'subjob_clusters' in config of each component if "subjob_clusters" in w_config: for cluster in w_config["subjob_clusters"]: + # if a cluster is also in the general config, this cluster will be merged together ... if cluster in gw_config["subjob_clusters"]: gw_config["subjob_clusters"][cluster] = merge_if_possible( w_config["subjob_clusters"][cluster], gw_config["subjob_clusters"][cluster], ) + # if cluster is not in general config, it will copied into it. else: gw_config["subjob_clusters"][cluster] = copy.deepcopy( w_config["subjob_clusters"][cluster], ) + # looks for entry 'subjobs' in config of each component if "subjobs" in w_config: + # copies component workflow config to new variable ref_config ref_config = copy.deepcopy(w_config) + # ??? for every subjob in ??? for subjob in list(copy.deepcopy(w_config["subjobs"])): # subjobs (other than clusters) should be model specific + # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry. gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy( w_config["subjobs"][subjob] ) + # if this copied subjobs is also n general workflow subjobs it will be deleted there if subjob in gw_config["subjobs"]: del gw_config["subjobs"][subjob] + # make sure that the run_after and run_before refer to that cluster + # for all subjobs now in general workflow for other_subjob in gw_config["subjobs"]: + # sets run_after and run_before to correct subjob??? if "run_after" in gw_config["subjobs"][other_subjob]: if ( gw_config["subjobs"][other_subjob]["run_after"] From b63e9a98854e69615f7dd14360022e0296272d9c Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 6 Sep 2023 11:35:20 +0200 Subject: [PATCH 02/98] Started to add docstrings to workflow.py --- src/esm_runscripts/workflow.py | 81 +++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index af97f288c..56a46bb15 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -3,6 +3,13 @@ def skip_cluster(cluster, config): + """ + Arguments: + cluster + config + Returns: + True or False + """ gw_config = config["general"]["workflow"] clusterconf = gw_config["subjob_clusters"][cluster] @@ -37,7 +44,14 @@ def skip_cluster(cluster, config): def assemble_workflow(config): - # + """ + Assembles the workflow tasks from the runscript. + + Arguments: + config -- dictionary + Returns: + config + """ config = init_total_workflow(config) config = collect_all_workflow_information(config) config = complete_clusters(config) @@ -56,11 +70,25 @@ def assemble_workflow(config): def display_nicely(config): + """ + Pretty prints the workflow configuration assembled in config["general"]. + + Arguments: + config -- dictionary + Returns: + config + """ esm_parser.pprint_config(config["general"]["workflow"]) return config def prepend_newrun_job(config): + """ + Arguments: + config -- dictionary + Returns: + config + """ gw_config = config["general"]["workflow"] first_cluster_name = gw_config["first_task_in_queue"] first_cluster = gw_config["subjob_clusters"][first_cluster_name] @@ -103,10 +131,14 @@ def prepend_newrun_job(config): return config - # - def order_clusters(config): + """ + Arguments: + config -- dictionary + Returns: + config + """ gw_config = config["general"]["workflow"] for subjob_cluster in gw_config["subjob_clusters"]: @@ -190,6 +222,12 @@ def order_clusters(config): def complete_clusters(config): + """ + Arguments: + config -- dictionary + Returns: + config + """ gw_config = config["general"]["workflow"] # First, complete the matching subjobs <-> clusters @@ -272,6 +310,16 @@ def complete_clusters(config): def merge_single_entry_if_possible(entry, sourceconf, targetconf): + """ + Merges a dictionary entry into a target dictionary that has he same key. + + Arguments: + entry -- dictionary key + sourceconf -- dictionary + targetconf -- dictionary + Returns: + targetconf + """ if entry in sourceconf: if entry in targetconf and not sourceconf[entry] == targetconf[entry]: print(f"Mismatch found in {entry} for cluster {targetconf}") @@ -281,7 +329,14 @@ def merge_single_entry_if_possible(entry, sourceconf, targetconf): def init_total_workflow(config): - # add compute, tidy etc information already here! + """ + Add compute, tidy etc information already here! + + Arguments: + config -- dictionary + Returns: + config + """ tasks = 0 for model in config["general"]["valid_model_names"]: @@ -349,8 +404,14 @@ def init_total_workflow(config): def collect_all_workflow_information(config): + """ + For each component entry in config (can be a model or a new entry (e.g. 'flows') - # For each component entry in config (can be a model or a new entry (e.g. 'flows') + Arguments: + config -- dictionary + Returns: + config + """ for model in config: if "workflow" in config[model]: w_config = config[model]["workflow"] @@ -433,6 +494,16 @@ def collect_all_workflow_information(config): def merge_if_possible(source, target): + """ + Merges the entries of source dictionary into target dictionary, if not already in. + (Will not overwrite entries in target dictionary.) + + Arguments: + source -- dictionary + target -- dictionary + Returns: + target + """ for entry in source: if entry in target: if not source[entry] == target[entry]: From f9c06594e1a3b88754445d74f3cec6dd46714084 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 7 Sep 2023 13:50:33 +0200 Subject: [PATCH 03/98] Added a docstring to esm_plugin_mamager --- src/esm_plugin_manager/esm_plugin_manager.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py index 792d27502..8b52c6cd8 100644 --- a/src/esm_plugin_manager/esm_plugin_manager.py +++ b/src/esm_plugin_manager/esm_plugin_manager.py @@ -117,6 +117,17 @@ def check_plugin_availability(plugins): def work_through_recipe(recipe, plugins, config): + """ + Works through the esm_runscripts recipes and plugin recipes. + + Arguments: + recipe -- dictionary # What is in these two dictionaries? Where do the entries are comming from? + plugins -- dictionary + config -- dictionary + + Returns: + config + """ if config.get("general", {}).get("debug_recipe", False): import pdb From 0c4a96290c9dac1c72b9ede8433c55b60ce3098a Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 7 Sep 2023 13:51:35 +0200 Subject: [PATCH 04/98] Some docstring changes in workflow.py --- src/esm_runscripts/workflow.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 56a46bb15..84e9769d2 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -405,7 +405,11 @@ def init_total_workflow(config): def collect_all_workflow_information(config): """ - For each component entry in config (can be a model or a new entry (e.g. 'flows') + Collects all workflow information for each component entry in config + (can be a model/component or a new entry (e.g. 'flows') + + Checks if there are "workflow" entries in the user runscript and copies or merges them into + config["general"]["workflow"] Arguments: config -- dictionary @@ -414,13 +418,16 @@ def collect_all_workflow_information(config): """ for model in config: if "workflow" in config[model]: + # looks for "workflow" in each entry of config (can be model/component, general, etc.) w_config = config[model]["workflow"] + # looks for "workflow" in "general" section of config. gw_config = config["general"]["workflow"] - # looks for entry 'subjob_clusters' in config of each component + # looks for entry 'subjob_clusters' in config of each component that has a "workflow" if "subjob_clusters" in w_config: for cluster in w_config["subjob_clusters"]: - # if a cluster is also in the general config, this cluster will be merged together ... + # if a certain cluster is also in the general config, this cluster will be merged together ... + # what cluster could this be? if cluster in gw_config["subjob_clusters"]: gw_config["subjob_clusters"][cluster] = merge_if_possible( w_config["subjob_clusters"][cluster], From f0bd3856c5c5964e31b6033f10a299bcaacc9db3 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 6 Oct 2023 12:40:07 +0200 Subject: [PATCH 05/98] Added comments to esm_plugin_manager --- src/esm_plugin_manager/esm_plugin_manager.py | 28 +++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py index 8b52c6cd8..dbfcb4c59 100644 --- a/src/esm_plugin_manager/esm_plugin_manager.py +++ b/src/esm_plugin_manager/esm_plugin_manager.py @@ -18,11 +18,26 @@ def read_recipe(recipe, additional_dict, needs_parse=True): def read_plugin_information(plugins_bare, recipe, needs_parse=True): - # pluginfile = esm_plugins.yaml + """ + Reads in plugin information from the pluginfile = esm_plugins.yaml + + Arguments: + plugins_bare -- disctionary as it is read in by function 'read_recipe' + recipe -- dictionary of all workitems of an recipe + needs_parse -- True (default) or False + + Returns: + plugins - dictionary that has information for each workitem of recipe: + module: e.g. esm_runscripts + submodule: e.g. prepare (this is the Python file where the workitem function is defined. + type: e.g. core + """ if needs_parse: plugins_bare = yaml_file_to_dict(plugins_bare) extra_info = ["location", "git-url"] plugins = {} + # loop over all recipe entries + # tries to find workitem in 'plugins_bare' for workitem in recipe["recipe"]: found = False for module_type in ["core", "plugins"]: @@ -31,13 +46,22 @@ def read_plugin_information(plugins_bare, recipe, needs_parse=True): for submodule in plugins_bare[module_type][module]: if submodule in extra_info: continue + # functionlist is a list of workitems (Python function names) functionlist = plugins_bare[module_type][module][submodule] + # if the workitem of the recipe is found in this list + # the dictionary plugins will be filled with fields for + # - 'module' (e.g. esm_runscirpts) + # - 'submodule' (e.g. prepare, this is basically the name + # of the python file this function is defined in) + # - 'type' (core of plugin) if workitem in functionlist: plugins[workitem] = { "module": module, "submodule": submodule, "type": module_type, } + # add extra info ["location", "git-url"] if found in plugins_bare dict + # is there a use case for this? for extra in extra_info: if extra in plugins_bare[module_type][module]: plugins[workitem].update( @@ -47,6 +71,7 @@ def read_plugin_information(plugins_bare, recipe, needs_parse=True): ] } ) + # if workitem is found, all loops including loop over module_type can be aborted. found = True break if found: @@ -133,6 +158,7 @@ def work_through_recipe(recipe, plugins, config): pdb.set_trace() recipes = recipe["recipe"] + # Loop over the recipe for index, workitem in enumerate(recipes, start=1): if config["general"].get("verbose", False): # diagnostic message of which recipe step is being executed From 8c4b753c4abcdc37ed9199ee0955b3f63a85576b Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 6 Oct 2023 12:40:54 +0200 Subject: [PATCH 06/98] Start to refactor workflow. --- src/esm_runscripts/workflow.py | 637 +++++++++++++++++++++------------ 1 file changed, 402 insertions(+), 235 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 84e9769d2..94828e44d 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,63 +1,186 @@ import sys, copy, os import esm_parser - -def skip_cluster(cluster, config): - """ - Arguments: - cluster - config - Returns: - True or False - """ - gw_config = config["general"]["workflow"] - clusterconf = gw_config["subjob_clusters"][cluster] - - """ - print(f"run_only {clusterconf.get('run_only', 'Error') }") - print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}") - print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}") - print(f"chunk_number {config['general'].get('chunk_number', -998)}") - print(f"run_number {config['general'].get('run_number', -998)}") - print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}") - print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}") - """ - - if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[ - "general" - ].get("last_run_in_chunk", False): - return True - if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[ - "general" - ].get("first_run_in_chunk", False): - return True - if clusterconf.get("skip_chunk_number", -999) == config["general"].get( - "chunk_number", -998 - ): - return True - if clusterconf.get("skip_run_number", -999) == config["general"].get( - "run_number", -998 - ): - return True - - return False - +#import pdb + +class Workflow: + """A workflow class.""" + default_phases = [] + user_phases = [] + always_run_with = [] + first_task_in_queue = "" + last_task_in_queue = "" + next_run_triggered_by = "" + + def __init__(self, phases, always_run_with=[]): + """ + Create a new workflow. + + Arguments: + phases -- List of workflow phases + always_run_with -- List of phases that precedes each phase + """ + # TODO: NW call here the phase object ??? + self.phases = phases + self.always_run_with = always_run_with + + def num_phases_in_workflow(self): + """ + Return the number of phases in workflow. + """ + return len(self.phases) + + def write_to_config(self, config): + """ + Write to config. + """ + # NW: It is assumed here, that there are no workflows in config["general"] + # or that these are removed after collect_... + config["general"]["workflow"] = {} + config["general"]["workflow"].update(self.__dict__) + config["general"]["workflow"]["subjobs"] = {} + for phase in self.phases: + temp_dict = {phase.name: phase.__dict__} + config["general"]["workflow"]["subjobs"].update(temp_dict) + + return config + + def check_user_workflow_dependency(self): + """ + Check whether the user defined workflow phases are independent from eachother or not. + """ + independent = False + user_phases_names = [phase.name for phase in self.user_phases] + run_after_list = [phase.run_after for phase in self.user_phases] + run_before_list = [phase.run_before for phase in self.user_phases] + if not set(user_phases_names).intersection(set(run_after_list).union(set(run_before_list))): + independent = True + else: + independent = False + + return independent + + def check_unknown_phases(self): + """ + Check if any user phase addresses an unknown workflow phase. + """ + unknown_user_phase = True + phases_names = [phase.name for phase in self.phases] + user_phases_names = [phase.name for phase in self.user_phases] + # Filter out all falsy items (e.g. [], "", None) + run_after_list = list(filter(None, [phase.run_after for phase in self.user_phases])) + run_before_list = list(filter(None, [phase.run_before for phase in self.user_phases])) + + unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names))) + return unknown_user_phases + + def skip_cluster(self, config): + """ + Checks if a phase/cluster can be skipped. + Needed keywords: run_only, skip_chunk_number + Arguments: + self + config + Returns: + True or False + """ + #gw_config = config["general"]["workflow"] + #clusterconf = gw_config["subjob_clusters"][cluster] + + #""" + #print(f"run_only {clusterconf.get('run_only', 'Error') }") + #print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}") + #print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}") + #print(f"chunk_number {config['general'].get('chunk_number', -998)}") + #print(f"run_number {config['general'].get('run_number', -998)}") + #print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}") + #print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}") + #""" + + #if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[ + # "general" + #].get("last_run_in_chunk", False): + # return True + #if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[ + # "general" + #].get("first_run_in_chunk", False): + # return True + #if clusterconf.get("skip_chunk_number", -999) == config["general"].get( + # "chunk_number", -998 + #): + # return True + #if clusterconf.get("skip_run_number", -999) == config["general"].get( + # "run_number", -998 + #): + # return True + + return False + +class WorkflowPhase: + """A workflow phase class.""" + name = None + nproc = 1 + run_before = None + run_after = None + submit_to_batch_system = True + run_on_queue = None + cluster = None + next_submit = [] + called_from = None + + def __init__(self, phase_name): + self.name = phase_name + +class UserWorkflowPhase(WorkflowPhase): + """A user workflow phase class.""" + batch_or_shell = "batch" + order_in_cluster = "concurrent" + script = None + script_dir = None + call_function = None + env_preparation = None + run_only = None + skip_chunk_number = None + + def __init__(self, phase_name): + self.name = phase_name def assemble_workflow(config): + from . import Workflow """ - Assembles the workflow tasks from the runscript. + Assembles the workflow tasks. + Is called from the plugin recipe prepcompute. Arguments: config -- dictionary Returns: config """ - config = init_total_workflow(config) - config = collect_all_workflow_information(config) - config = complete_clusters(config) - config = order_clusters(config) + + # 1. Generate default workflow object + #TODO: preset of default workflow phases should be set in some config file. + workflow = Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"]) + # initialize the default workflow as Workflow object + # TODO: NW where are these default phases defined??? + # 2. Initialize default workflow phases + workflow = init_default_workflow(workflow, config) + # 3. Read in workflows from runscript and config files + workflow = collect_all_user_workflows(workflow, config) + + #config = collect_all_workflow_information(config) + +# Why do I need to do the following function call? + # 4. Order user workflows into default workflow wrt. workflow attributs. + workflow = order_clusters(workflow, config) + + workflow = complete_clusters(workflow, config) + breakpoint() config = prepend_newrun_job(config) + # 5. write the workflow to config + config = workflow.write_to_config(config) + # 6. Remove old worklow from config + # Set "jobtype" for the first task??? if config["general"]["jobtype"] == "unknown": config["general"]["command_line_config"]["jobtype"] = config["general"][ "workflow" @@ -82,16 +205,24 @@ def display_nicely(config): return config -def prepend_newrun_job(config): +def prepend_newrun_job(workflow, config): """ + Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') + and do not follow a 'SimulationSetup' subjob_clusters. + E.g. if two user workflow are the last two subjob_clusters ??? + Any other example cases when this is the case? + Arguments: config -- dictionary Returns: - config + workflow """ gw_config = config["general"]["workflow"] first_cluster_name = gw_config["first_task_in_queue"] + print(first_cluster_name) + breakpoint() first_cluster = gw_config["subjob_clusters"][first_cluster_name] + esm_parser.pprint_config(first_cluster) if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup": @@ -131,98 +262,98 @@ def prepend_newrun_job(config): return config - -def order_clusters(config): +def set_phase_attrib(workflow_phases, phase_name, attrib, value): + for phase in workflow_phases: + if phase.name == phase_name: + if type(getattr(phase, attrib)).__name__ == "list": + phase.__dict__[attrib].append(value) + else: + phase.__setattr__(attrib, value) + +def get_phase_attrib(workflow_phases, phase_name, attrib): + for phase in workflow_phases: + if phase.name == phase_name: + value = getattr(phase, attrib) + return value + +def order_clusters(workflow, config): """ + Put the subjob_clusters in order ??? + Arguments: config -- dictionary Returns: - config + workflow """ - gw_config = config["general"]["workflow"] - - for subjob_cluster in gw_config["subjob_clusters"]: - if not "next_submit" in gw_config["subjob_clusters"][subjob_cluster]: - gw_config["subjob_clusters"][subjob_cluster]["next_submit"] = [] - - for subjob_cluster in gw_config["subjob_clusters"]: - if not "run_after" in gw_config["subjob_clusters"][subjob_cluster]: - if not ("run_before" in gw_config["subjob_clusters"][subjob_cluster]): - - print(f"Don't know when to execute cluster {subjob_cluster}.") - print(gw_config) - sys.exit(-1) - - if "run_after" in gw_config["subjob_clusters"][subjob_cluster]: - if "run_before" in gw_config["subjob_clusters"][subjob_cluster]: - print( - f"Specifying both run_after and run_before for cluster {subjob_cluster} may lead to problems." - ) - print(f"Please choose.") - sys.exit(-1) - if ( - not gw_config["subjob_clusters"][subjob_cluster]["run_after"] - in gw_config["subjob_clusters"] - ): - print(f"Unknown cluster {gw_config['subjob_clusters'][subjob_cluster]['run_after']}.") - sys.exit(-1) - - calling_cluster = gw_config["subjob_clusters"][subjob_cluster]["run_after"] - - if ( - not subjob_cluster - in gw_config["subjob_clusters"][calling_cluster]["next_submit"] - ): - gw_config["subjob_clusters"][calling_cluster]["next_submit"].append( - subjob_cluster - ) - gw_config["subjob_clusters"][subjob_cluster][ - "called_from" - ] = calling_cluster - - if calling_cluster == gw_config["last_task_in_queue"]: - gw_config["last_task_in_queue"] = subjob_cluster - - if "run_before" in gw_config["subjob_clusters"][subjob_cluster]: - if ( - not gw_config["subjob_clusters"][subjob_cluster]["run_before"] - in gw_config["subjob_clusters"] - ): - print(f"Unknown cluster {gw_config['subjob_clusters'][subjob_cluster]['run_before']}.") - sys.exit(-1) - - called_cluster = gw_config["subjob_clusters"][subjob_cluster]["run_before"] - - if ( - not called_cluster - in gw_config["subjob_clusters"][subjob_cluster]["next_submit"] - ): - gw_config["subjob_clusters"][subjob_cluster]["next_submit"].append( - called_cluster - ) - gw_config["subjob_clusters"][called_cluster]["called_from"] = subjob_cluster - - if called_cluster == gw_config["first_task_in_queue"]: - gw_config["first_task_in_queue"] = subjob_cluster - - if "next_run_triggered_by" in gw_config: - gw_config["last_task_in_queue"] = gw_config["next_run_triggered_by"] - - first_cluster_name = gw_config["first_task_in_queue"] - first_cluster = gw_config["subjob_clusters"][first_cluster_name] - last_cluster_name = gw_config["last_task_in_queue"] - last_cluster = gw_config["subjob_clusters"][last_cluster_name] - - if not first_cluster_name in last_cluster.get("next_submit", ["Error"]): - last_cluster["next_submit"].append(first_cluster_name) - if not last_cluster_name in first_cluster.get("called_from", ["Error"]): - first_cluster["called_from"] = last_cluster_name - - return config - - -def complete_clusters(config): + independent = workflow.check_user_workflow_dependency() + unknown_phases = workflow.check_unknown_phases() + + if unknown_phases: + esm_parser.user_error("ERROR", "Undefined subjob/phase.") + + for user_phase in workflow.user_phases: +# TODO: Check if run_after or run_before is set for each user phase + if not user_phase.run_before and not user_phase.run_after: + esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before") +# TODO: Check if not both run_after and run_before are set at the same time for each user phase + if user_phase.run_before and user_phase.run_after: + esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before") + +# TODO: Correct for "last_task_in_queue" if necessary + # Collect all next_run_triggered_by entrie + next_triggered = [] + run_after = [] + for model in config: + if "workflow" in config[model]: + if "next_run_triggered_by" in config[model]["workflow"]: + next_triggered.append(config[model]["workflow"]["next_run_triggered_by"]) + next_triggered = list(filter((workflow.next_run_triggered_by).__ne__, next_triggered)) + if len(next_triggered) > 1: + esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.") + elif len(next_triggered) == 1: + workflow.next_run_triggered_by = next_triggered[0] + #else: let default + +# Fill up "next_submit" list + next_submits = {} + for phase in workflow.phases + workflow.user_phases: + next_submits[phase.name] = [] + for phase2 in workflow.phases + workflow.user_phases: + if not phase2.run_after == None: + next_submits[phase2.run_after].append(phase2.name) + phase2.called_from = phase2.run_after + for phase3 in workflow.phases + workflow.user_phases: + phase3.next_submit = next_submits[phase3.name] + + for phase4 in workflow.phases + workflow.user_phases: + calling_cluster = phase4.run_after +# + if calling_cluster == workflow.last_task_in_queue: + workflow.last_task_in_queue = phase4.name +# + called_cluster = phase4.run_before + set_phase_attrib(workflow.phases+workflow.user_phases, called_cluster, "called_from", phase4.name) + if called_cluster == workflow.first_task_in_queue: + workflow.first_task_in_queue = phase4.name +# + first_cluster_name = workflow.first_task_in_queue + last_cluster_name = workflow.last_task_in_queue +# + value = get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit") + if not first_cluster_name in get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit"): + set_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit", first_cluster_name) + if not last_cluster_name == get_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from"): + set_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from", last_cluster_name) +# + return workflow + + +def complete_clusters(workflow, config): + # all that are within a next_submit list are in a cluster if: + # run concurrently + # have the same cluster entry. """ + Rearanges the subjobs to their subjobs_clusters ??? Arguments: config -- dictionary Returns: @@ -230,16 +361,32 @@ def complete_clusters(config): """ gw_config = config["general"]["workflow"] + # sortiert alles in dict subjob_clusters + clusters = [] + for phase in workflow.phases + workflow.user_phases: + if phase.cluster == None: + phase.cluster = phase.name + clusters.append(phase.cluster) + + # Check if all subjobs of the same cluster have the same run_after + + + # TODO: calc nproc + # TODO: check for batch + # First, complete the matching subjobs <-> clusters for subjob in gw_config["subjobs"]: + # Erstellt ein leeres dict im dict subjob_clusters subjob_cluster = gw_config["subjobs"][subjob]["subjob_cluster"] if not subjob_cluster in gw_config["subjob_clusters"]: gw_config["subjob_clusters"][subjob_cluster] = {} + # Erstellt leere Liste fuer den jeweiligen subjob_cluster if not "subjobs" in gw_config["subjob_clusters"][subjob_cluster]: gw_config["subjob_clusters"][subjob_cluster]["subjobs"] = [] + # Haengt alle subjobs in diese Liste an. gw_config["subjob_clusters"][subjob_cluster]["subjobs"].append(subjob) # Then, complete the resource information per cluster @@ -306,38 +453,18 @@ def complete_clusters(config): nproc = nproc_max clusterconf["nproc"] = nproc + # wie wird hier config angepasst? + breakpoint() return config -def merge_single_entry_if_possible(entry, sourceconf, targetconf): - """ - Merges a dictionary entry into a target dictionary that has he same key. - - Arguments: - entry -- dictionary key - sourceconf -- dictionary - targetconf -- dictionary - Returns: - targetconf - """ - if entry in sourceconf: - if entry in targetconf and not sourceconf[entry] == targetconf[entry]: - print(f"Mismatch found in {entry} for cluster {targetconf}") - sys.exit(-1) - targetconf[entry] = sourceconf[entry] - return targetconf - -def init_total_workflow(config): +def calc_number_of_tasks(config): """ - Add compute, tidy etc information already here! - - Arguments: - config -- dictionary - Returns: - config + Calculates the total number of needed tasks + in phase compute + TODO: make this phase method??? Or recipe entry??? """ - tasks = 0 for model in config["general"]["valid_model_names"]: if "nproc" in config[model]: @@ -350,63 +477,96 @@ def init_total_workflow(config): and config[model]["nprocbr"] != "remove_from_namelist" ): tasks += config[model]["nprocar"] * config[model]["nprocbr"] + return tasks - prepcompute = { - "prepcompute": { - "nproc": 1, - "run_before": "compute", - } - } - - compute = { - "compute": { - "nproc": tasks, - "run_before": "tidy", - "submit_to_batch_system": config["general"].get( - "submit_to_batch_system", True - ), - "run_on_queue": config["computer"]["partitions"]["compute"]["name"], - } - } +def init_default_workflow(default_workflow, config): + """ + Add workflow for precompute, compute, and tidy phases + etc information already here! - # das ist nur vorübergehend - tidy = { - "tidy": { - "nproc": 1, - "run_after": "compute", - } - } + Arguments: + default_workflow -- workflow object + config -- dictionary + Returns: + default_workflow + """ - if not "workflow" in config["general"]: - config["general"]["workflow"] = {} - if not "subjob_clusters" in config["general"]["workflow"]: - config["general"]["workflow"]["subjob_clusters"] = {} - if not "subjobs" in config["general"]["workflow"]: - config["general"]["workflow"]["subjobs"] = prepcompute - config["general"]["workflow"]["subjobs"].update(compute) - config["general"]["workflow"]["subjobs"].update(tidy) - else: - if not "prepcompute" in config["general"]["workflow"]["subjobs"]: - config["general"]["workflow"]["subjobs"].update(prepcompute) - if not "compute" in config["general"]["workflow"]["subjobs"]: - config["general"]["workflow"]["subjobs"].update(compute) - if not "tidy" in config["general"]["workflow"]["subjobs"]: - config["general"]["workflow"]["subjobs"].update(tidy) - if not "last_task_in_queue" in config["general"]["workflow"]: - config["general"]["workflow"]["last_task_in_queue"] = "tidy" - if not "first_task_in_queue" in config["general"]["workflow"]: - config["general"]["workflow"]["first_task_in_queue"] = "prepcompute" - - if not "next_run_triggered_by" in config["general"]["workflow"]: - config["general"]["workflow"]["next_run_triggered_by"] = "tidy" + # TODO: make a method of class Workflow - return config + # For testing only, set in some yaml config + workflow_phases = default_workflow.phases + # Calculating the number of tasks for each component/model + # needed for phase compute + tasks = calc_number_of_tasks(config) + # Create default workflow phase objects: + default_workflow.phases = [] + for ind, phase in enumerate(workflow_phases): + default_workflow.phases.append(WorkflowPhase(phase)) + + for ind, phase in enumerate(default_workflow.phases): + if ind < default_workflow.num_phases_in_workflow() - 1: + phase.run_before = default_workflow.phases[ind+1].name + else: + phase.run_after = default_workflow.phases[ind-1].name + # TODO: this needs to be set somewhere else, or different. + phase.cluster = phase.name + if phase.name == "compute": + phase.nproc = tasks + phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True) + phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] + + default_workflow.first_task_in_queue = default_workflow.phases[0].name # prepcompute + default_workflow.last_task_in_queue = default_workflow.phases[-1].name # tidy + # next_run_triggered_by only used to set last_task_in_queue + # TODO: why not set last_task_in_queue directly? + default_workflow.next_run_triggered_by = default_workflow.phases[-1].name # tidy + + return default_workflow + +def collect_all_user_workflows(user_workflow,config): + """ + Collect all workflows set by config files. + """ + user_workflow_phases = [] + user_workflow_phases_names = [] + for model in config: + if "workflow" in config[model]: + w_config = config[model]["workflow"] + if "subjobs" in w_config: + # copies component workflow config to new variable ref_config + ref_config = copy.deepcopy(w_config) + for subjob in list(copy.deepcopy(w_config["subjobs"])): + # create a new phase object for subjob + # new_phase_name = subjob + "_" + model + # each subjob needs to have an unique name + new_phase_name = subjob + new_phase = UserWorkflowPhase(new_phase_name) + if not new_phase_name in user_workflow_phases_names: + user_workflow_phases_names.append(new_phase_name) + # set attributes of user_workflow phases + for key, value in w_config["subjobs"][subjob].items(): + new_phase.__setattr__(key, value) + user_workflow_phases.append(new_phase) + else: + esm_parser.user_error("ERROR", "Two subjobs of the same name.") + + user_workflow.user_phases = user_workflow_phases + return user_workflow + + + + + + + +################### Maybe outdated routines ###################### def collect_all_workflow_information(config): """ Collects all workflow information for each component entry in config (can be a model/component or a new entry (e.g. 'flows') + NOTE(NW): Should it be possible to set a workflow in the model section of the runscript? Why not? Checks if there are "workflow" entries in the user runscript and copies or merges them into config["general"]["workflow"] @@ -448,6 +608,7 @@ def collect_all_workflow_information(config): # subjobs (other than clusters) should be model specific # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry. + # appends the model name to the subjob name and copy it to config["general"] gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy( w_config["subjobs"][subjob] ) @@ -459,49 +620,55 @@ def collect_all_workflow_information(config): # for all subjobs now in general workflow for other_subjob in gw_config["subjobs"]: # sets run_after and run_before to correct subjob??? + # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model) + # this run_after will be set to the new subjob name (subjob_model) if "run_after" in gw_config["subjobs"][other_subjob]: - if ( - gw_config["subjobs"][other_subjob]["run_after"] - == subjob - ): - gw_config["subjobs"][other_subjob][ - "run_after" - ] == subjob + "_" + model + if (gw_config["subjobs"][other_subjob]["run_after"] == subjob): + gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model if "run_before" in gw_config["subjobs"][other_subjob]: - if ( - gw_config["subjobs"][other_subjob]["run_before"] - == subjob - ): - gw_config["subjobs"][other_subjob][ - "run_before" - ] == subjob + "_" + model + if (gw_config["subjobs"][other_subjob]["run_before"] == subjob): + gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model # if not in another cluster, each subjob gets its own - if ( - not "subjob_cluster" - in gw_config["subjobs"][subjob + "_" + model] - ): - gw_config["subjobs"][subjob + "_" + model][ - "subjob_cluster" - ] = subjob # + "_" + model + if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]): + gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob # + "_" + model + # checks if next_run:triggered_by is tidy or the one in user workflow, or empty? if "next_run_triggered_by" in w_config: - if not gw_config["next_run_triggered_by"] in [ - "tidy", - w_config["next_run_triggered_by"], - ]: + if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]: print(f"Mismatch found setting next_run_triggered_by for workflow.") sys.exit(-1) else: - gw_config["next_run_triggered_by"] = w_config[ - "next_run_triggered_by" - ] + gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"] + # what if w_config["next_run_triggered_by"] is empty? return config +def merge_single_entry_if_possible(entry, sourceconf, targetconf): + """ + Merges a dictionary entry into a target dictionary that has he same key. + + Arguments: + entry -- dictionary key + sourceconf -- dictionary + targetconf -- dictionary + Returns: + targetconf + """ + if entry in sourceconf: + # Check if entry is already in targetconf AND different to sourceconf, then exit + if entry in targetconf and not sourceconf[entry] == targetconf[entry]: + print(f"Mismatch found in {entry} for cluster {targetconf}") + sys.exit(-1) + # Continues here if entry exists already in targetconf AND the same as sourceconf or + # not already in targetconf and set it to sourceconf + targetconf[entry] = sourceconf[entry] + return targetconf def merge_if_possible(source, target): """ + Does the same as above but for a whole dict + Merges the entries of source dictionary into target dictionary, if not already in. (Will not overwrite entries in target dictionary.) From e329e146580c72c0f07fed4483a06d4d46571cd6 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 6 Oct 2023 18:02:51 +0200 Subject: [PATCH 07/98] Started to refactor workflow.py --- src/esm_runscripts/workflow.py | 171 ++++++++------------- tests/test_esm_runscripts/test_workflow.py | 141 +++++++++++++++++ 2 files changed, 205 insertions(+), 107 deletions(-) create mode 100644 tests/test_esm_runscripts/test_workflow.py diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 94828e44d..ce5e38e66 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -127,23 +127,26 @@ class WorkflowPhase: cluster = None next_submit = [] called_from = None + batch_or_shell = "SimulationSetup" + order_in_cluster = "sequential" + run_only = None + skip_chunk_number = None + skip_run_number = None def __init__(self, phase_name): self.name = phase_name class UserWorkflowPhase(WorkflowPhase): """A user workflow phase class.""" - batch_or_shell = "batch" - order_in_cluster = "concurrent" script = None script_dir = None call_function = None env_preparation = None - run_only = None - skip_chunk_number = None def __init__(self, phase_name): self.name = phase_name + batch_or_shell = "batch" + submit_to_batch_system = False def assemble_workflow(config): from . import Workflow @@ -173,11 +176,11 @@ def assemble_workflow(config): # 4. Order user workflows into default workflow wrt. workflow attributs. workflow = order_clusters(workflow, config) - workflow = complete_clusters(workflow, config) - breakpoint() - config = prepend_newrun_job(config) + subjob_clusters = complete_clusters(workflow, config) + subjob_clusters = prepend_newrun_job(config) # 5. write the workflow to config config = workflow.write_to_config(config) + breakpoint() # 6. Remove old worklow from config # Set "jobtype" for the first task??? @@ -191,6 +194,9 @@ def assemble_workflow(config): return config +def write_subjob_clusters_to_config(config, subjob_clusters): + config["general"]["subjob_clusters"] = subjob_clusters + return config def display_nicely(config): """ @@ -205,7 +211,7 @@ def display_nicely(config): return config -def prepend_newrun_job(workflow, config): +def prepend_newrun_job(workflow, config, subjob_clusters): """ Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') and do not follow a 'SimulationSetup' subjob_clusters. @@ -216,18 +222,16 @@ def prepend_newrun_job(workflow, config): config -- dictionary Returns: workflow + subjob_clusters """ - gw_config = config["general"]["workflow"] - first_cluster_name = gw_config["first_task_in_queue"] - print(first_cluster_name) - breakpoint() - first_cluster = gw_config["subjob_clusters"][first_cluster_name] - esm_parser.pprint_config(first_cluster) + first_cluster_name = workflow.first_task_in_queue + first_cluster = subjob_clusters[first_cluster_name] + #esm_parser.pprint_config(first_cluster) if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup": - last_cluster_name = gw_config["last_task_in_queue"] - last_cluster = gw_config["subjob_clusters"][last_cluster_name] + last_cluster_name = workflow.last_task_in_queue + last_cluster = subjob_clusters[last_cluster_name] new_first_cluster_name = "newrun" new_first_cluster = { @@ -245,7 +249,7 @@ def prepend_newrun_job(workflow, config): first_cluster["called_from"] = "newrun" - gw_config["first_task_in_queue"] = "newrun" + workflow.first_task_in_queue = "newrun" new_subjob = { "newrun_general": { @@ -257,10 +261,12 @@ def prepend_newrun_job(workflow, config): } } - gw_config["subjob_clusters"].update(new_first_cluster) - gw_config["subjobs"].update(new_subjob) + subjob_clusters.update(new_first_cluster) - return config +# TODO: add new phase to workflow??? + #gw_config["subjobs"].update(new_subjob) + + return [workflow, subjob_clusters] def set_phase_attrib(workflow_phases, phase_name, attrib, value): for phase in workflow_phases: @@ -335,6 +341,8 @@ def order_clusters(workflow, config): set_phase_attrib(workflow.phases+workflow.user_phases, called_cluster, "called_from", phase4.name) if called_cluster == workflow.first_task_in_queue: workflow.first_task_in_queue = phase4.name + if phase4.cluster == None: + phase4.cluster = phase4.name # first_cluster_name = workflow.first_task_in_queue last_cluster_name = workflow.last_task_in_queue @@ -355,109 +363,58 @@ def complete_clusters(workflow, config): """ Rearanges the subjobs to their subjobs_clusters ??? Arguments: + workflow -- obj config -- dictionary Returns: - config + subjob_clusters -- dictionary """ - gw_config = config["general"]["workflow"] + # sort into dict subjob_clusters + subjob_clusters = {} - # sortiert alles in dict subjob_clusters - clusters = [] for phase in workflow.phases + workflow.user_phases: - if phase.cluster == None: - phase.cluster = phase.name - clusters.append(phase.cluster) - - # Check if all subjobs of the same cluster have the same run_after - - - # TODO: calc nproc - # TODO: check for batch - - # First, complete the matching subjobs <-> clusters - - for subjob in gw_config["subjobs"]: # Erstellt ein leeres dict im dict subjob_clusters - subjob_cluster = gw_config["subjobs"][subjob]["subjob_cluster"] - if not subjob_cluster in gw_config["subjob_clusters"]: - gw_config["subjob_clusters"][subjob_cluster] = {} + if not phase.cluster in subjob_clusters: + subjob_clusters[phase.cluster] = {} - # Erstellt leere Liste fuer den jeweiligen subjob_cluster - if not "subjobs" in gw_config["subjob_clusters"][subjob_cluster]: - gw_config["subjob_clusters"][subjob_cluster]["subjobs"] = [] + # Create empty list for each subjob_cluster + if not "subjobs" in subjob_clusters[phase.cluster]: + subjob_clusters[phase.cluster]["subjobs"] = [] - # Haengt alle subjobs in diese Liste an. - gw_config["subjob_clusters"][subjob_cluster]["subjobs"].append(subjob) + # Append subjobs to list. + subjob_clusters[phase.cluster]["subjobs"].append(phase.name) # Then, complete the resource information per cluster # determine whether a cluster is to be submitted to a batch system - - for subjob_cluster in gw_config["subjob_clusters"]: + for subjob_cluster in subjob_clusters: nproc_sum = nproc_max = 0 - clusterconf = gw_config["subjob_clusters"][subjob_cluster] - for subjob in clusterconf["subjobs"]: - subjobconf = gw_config["subjobs"][subjob] - - clusterconf = merge_single_entry_if_possible( - "submit_to_batch_system", subjobconf, clusterconf - ) - clusterconf = merge_single_entry_if_possible( - "order_in_cluster", subjobconf, clusterconf - ) - - if subjobconf.get("submit_to_batch_system", False): - clusterconf["batch_or_shell"] = "batch" - elif subjobconf.get("script", False): - clusterconf["batch_or_shell"] = "shell" - - clusterconf = merge_single_entry_if_possible( - "run_on_queue", subjobconf, clusterconf - ) - clusterconf = merge_single_entry_if_possible( - "run_after", subjobconf, clusterconf - ) - clusterconf = merge_single_entry_if_possible( - "run_before", subjobconf, clusterconf - ) - clusterconf = merge_single_entry_if_possible( - "run_only", subjobconf, clusterconf - ) - clusterconf = merge_single_entry_if_possible( - "skip_run_number", subjobconf, clusterconf - ) - clusterconf = merge_single_entry_if_possible( - "skip_chunk_number", subjobconf, clusterconf - ) - - nproc_sum += subjobconf.get("nproc", 1) - nproc_max = max(subjobconf.get("nproc", 1), nproc_max) - - if not "submit_to_batch_system" in clusterconf: - clusterconf["submit_to_batch_system"] = False - else: - if not "run_on_queue" in clusterconf: - print( - f"Information on target queue is missing in cluster {clusterconf}." - ) - sys.exit(-1) - - if not clusterconf.get("batch_or_shell", False): - clusterconf["batch_or_shell"] = "SimulationSetup" - - if not "order_in_cluster" in clusterconf: - clusterconf["order_in_cluster"] = "sequential" + attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] + for attrib in attributes: + temp_list = [] + for subjob in subjob_clusters[subjob_cluster]["subjobs"]: + if not get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib) in temp_list: + subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib) + else: + print("Missmatch in attributes") + sys.exit(-1) + nproc_sum += get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc") + nproc_max = max(get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc"), nproc_max) - if clusterconf["order_in_cluster"] == "concurrent": +# if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): +# subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" +# elif subjob_clusters[subjob_cluster].get("script", False): +# subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" +# + if not "run_on_queue" in subjob_clusters[subjob_cluster]: + print(f"Information on target queue is missing in cluster {subjob_cluster}.") + sys.exit(-1) +# +# TODO: Check in nproc is calculated correctly + if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": nproc = nproc_sum else: nproc = nproc_max - clusterconf["nproc"] = nproc - - # wie wird hier config angepasst? - breakpoint() - return config - - + subjob_clusters[subjob_cluster]["nproc"] = nproc + return subjob_clusters def calc_number_of_tasks(config): """ diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py new file mode 100644 index 000000000..9ab44ac10 --- /dev/null +++ b/tests/test_esm_runscripts/test_workflow.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +""" Test for ``esm_runscripts.workflow``""" + +from esm_runscripts import workflow +import pytest + +@pytest.fixture() +def test_workflow_object(): + test_workflow = workflow.Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"]) + return test_workflow + +@pytest.fixture() +def test_config(): + """Setup a test config dictionary.""" + config = { + 'computer': {'partitions': {'compute': {'name': 'test'}}}, + 'fesom': { + 'nproc': 128}, + 'rnfmap': { + 'nproc': 128}, + 'oasis3mct': { + 'nproc': 128}, + 'xios': { + 'nproc': 128}, + 'oifs': { + 'workflow': { +# 'next_run_triggered_by': 'tidy', + 'subjobs': { + 'my_new_subjob_oifs': { + 'batch_or_shell': 'batch', + 'nproc': 1, + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + 'run_after': 'tidy', + 'run_on_queue': 'compute', + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'submit_to_batch_system': True}}}}, + 'general': { + 'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'], + 'workflow': { + 'next_run_triggered_by': 'tidy', + 'subjobs': { + 'my_new_subjob_general': { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True}}}}, + 'flow': { + 'workflow': { + 'next_run_triggered_by': 'tidy', + 'subjobs': { + 'my_new_subjob_flow': { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + 'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True}}}}} + return config + +def test_check_user_workflow_dependency(test_workflow_object, test_config): + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + independent = test_workflow_object.check_user_workflow_dependency() + assert independent + +def test_check_user_workflow_dependency_2(test_workflow_object, test_config): + test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs' + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + independent = test_workflow_object.check_user_workflow_dependency() + assert not independent + +def test_check_unknown_phases(test_workflow_object, test_config): + test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob' + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + unknown_phases = test_workflow_object.check_unknown_phases() + assert unknown_phases + +def test_assemble_workflow(): + pytest.fail("something wrong") + +def test_collect_all_user_workflow(test_config): + pytest.fail("something wrong") + +def test_calc_number_of_tasks(): + pytest.fail("something wrong") + +def test_order_clusters(test_workflow_object, test_config): + test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general' +# test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow' +# test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' + #test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs' + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) + pytest.fail("something wrong") + +def test_complete_clusters(test_workflow_object, test_config): + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) + subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + pytest.fail("something wrong") + +def test_prepend_newrun_job(test_workflow_object, test_config): + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) + subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) + pytest.fail("something wrong") + +def test_write_to_config(test_workflow_object, test_config): + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) + subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) + config = test_workflow_object.write_to_config(test_config) + pytest.fail("something wrong") + +def test_write_subjob_clusters_to_config(test_workflow_object, test_config): + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) + subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) + test_config = workflow.write_subjob_clusters_to_config(test_config, subjob_clusters) + test_config = test_workflow_object.write_to_config(test_config) + pytest.fail("something wrong") From d7f45659982a2ab05ddbb0560063c7ecc68862a9 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 16 Oct 2023 15:15:21 +0200 Subject: [PATCH 08/98] Placed the default definition of workflow phases in esm_software.../defauls.yaml . --- .../esm_software/esm_runscripts/defaults.yaml | 26 ++++++++++++------- .../esm_runscripts/esm_plugins.yaml | 4 +-- src/esm_runscripts/workflow.py | 22 +++++++++++++--- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 43b918aa7..50a0eb717 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -1,11 +1,17 @@ - per_model_defaults: - file_movements: - default: - all_directions: copy - bin: - init_to_exp: copy - exp_to_run: copy - run_to_work: copy - work_to_run: copy - + file_movements: + default: + all_directions: copy + bin: + init_to_exp: copy + exp_to_run: copy + run_to_work: copy + work_to_run: copy +default_workflow_phases: + phases: + - prepcompute + - compute + - tidy + always_run_with: + - prepare + - prepexp diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml index 4c2d2ae94..4e47c7dca 100644 --- a/configs/esm_software/esm_runscripts/esm_plugins.yaml +++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml @@ -23,7 +23,7 @@ core: - "initialize_experiment_logfile" - "copy_tools_to_thisrun" - "_copy_preliminary_files_from_experiment_to_thisrun" - + prepcompute: @@ -52,7 +52,7 @@ core: - "throw_away_some_infiles" - observe: + observe: - "init_monitor_file" #- "get_last_jobid" - "wait_and_observe" diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index ce5e38e66..6d3f64b0d 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -161,10 +161,26 @@ def assemble_workflow(config): """ # 1. Generate default workflow object - #TODO: preset of default workflow phases should be set in some config file. - workflow = Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"]) # initialize the default workflow as Workflow object - # TODO: NW where are these default phases defined??? + # TODO: NW where are these default phases defined? For now I placed it in + # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml + phases = [] + always_run_with = [] + if "defaults.yaml" in config["general"]: + if "default_workflow_phases" in config["general"]["defaults.yaml"]: + phases = config["general"]["defaults.yaml"]["default_workflow_phases"]["phases"] + if "always_run_with" in config["general"]["defaults.yaml"]["default_workflow_phases"]: + always_run_with = config["general"]["defaults.yaml"]["default_workflow_phases"]["always_run_with"] + + if phases and always_run_with: + workflow = Workflow(phases, always_run_with=always_run_with) + elif phases: + workflow = Workflow(phases) + else: + esm_parser.user_error("ERROR", "No default workflow phases defined.") + # Note: NW Should this work also if no default phases are set in such a config file, but + # instead all workflow phases are defined in different configs and/or runscripts? + # TODO: NW Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml? # 2. Initialize default workflow phases workflow = init_default_workflow(workflow, config) # 3. Read in workflows from runscript and config files From a2fc1a4c6ef8de0b95ca68935cdc3c33c034d980 Mon Sep 17 00:00:00 2001 From: Nadine Wieters <31928930+nwieters@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:09:48 +0200 Subject: [PATCH 09/98] Update src/esm_plugin_manager/esm_plugin_manager.py Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com> --- src/esm_plugin_manager/esm_plugin_manager.py | 26 ++++++++++++-------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py index dbfcb4c59..ec355ec6f 100644 --- a/src/esm_plugin_manager/esm_plugin_manager.py +++ b/src/esm_plugin_manager/esm_plugin_manager.py @@ -21,16 +21,22 @@ def read_plugin_information(plugins_bare, recipe, needs_parse=True): """ Reads in plugin information from the pluginfile = esm_plugins.yaml - Arguments: - plugins_bare -- disctionary as it is read in by function 'read_recipe' - recipe -- dictionary of all workitems of an recipe - needs_parse -- True (default) or False - - Returns: - plugins - dictionary that has information for each workitem of recipe: - module: e.g. esm_runscripts - submodule: e.g. prepare (this is the Python file where the workitem function is defined. - type: e.g. core + Parameters + ---------- + plugins_bare : dict + Dictionary as it is read in by function ``read_recipe`` + recipe : dict + Dictionary of all workitems of a recipe + needs_parse : bool + True (default) or False + + Returns + ------- + plugins : dict + Dictionary that has information for each workitem of the recipe: + - module: e.g. esm_runscripts + - submodule: e.g. prepare (this is the Python file where the workitem function is defined. + - type: e.g. core """ if needs_parse: plugins_bare = yaml_file_to_dict(plugins_bare) From fef151d4e821597883acd400c60a156a89064f33 Mon Sep 17 00:00:00 2001 From: Nadine Wieters <31928930+nwieters@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:21:03 +0200 Subject: [PATCH 10/98] Update src/esm_plugin_manager/esm_plugin_manager.py Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com> --- src/esm_plugin_manager/esm_plugin_manager.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py index ec355ec6f..c465cb0ea 100644 --- a/src/esm_plugin_manager/esm_plugin_manager.py +++ b/src/esm_plugin_manager/esm_plugin_manager.py @@ -151,13 +151,15 @@ def work_through_recipe(recipe, plugins, config): """ Works through the esm_runscripts recipes and plugin recipes. - Arguments: - recipe -- dictionary # What is in these two dictionaries? Where do the entries are comming from? - plugins -- dictionary - config -- dictionary + Parameters + ---------- + recipe : dict # What is in these two dictionaries? Where do the entries are comming from? + plugins : dict + config : dict - Returns: - config + Returns + ------- + config : dict """ if config.get("general", {}).get("debug_recipe", False): import pdb From c733a0041c9cb9fc20e179eeb5c794a90772126c Mon Sep 17 00:00:00 2001 From: Nadine Wieters <31928930+nwieters@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:21:31 +0200 Subject: [PATCH 11/98] Update src/esm_runscripts/workflow.py Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com> --- src/esm_runscripts/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 6d3f64b0d..96970b480 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -34,7 +34,7 @@ def write_to_config(self, config): """ Write to config. """ - # NW: It is assumed here, that there are no workflows in config["general"] + # It is assumed here, that there are no workflows in config["general"] # or that these are removed after collect_... config["general"]["workflow"] = {} config["general"]["workflow"].update(self.__dict__) From 506c0a75c3b939d3cb2547e8a40ed507232eaeac Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 17 Oct 2023 16:40:56 +0200 Subject: [PATCH 12/98] Correcte syntax of docstrings. --- src/esm_runscripts/workflow.py | 129 ++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 44 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 96970b480..460c52e3d 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -16,9 +16,16 @@ def __init__(self, phases, always_run_with=[]): """ Create a new workflow. - Arguments: - phases -- List of workflow phases - always_run_with -- List of phases that precedes each phase + Parameters + ---------- + phases : list + List of workflow phases + always_run_with : list + List of phases that precedes each phase + + Returns + ------- + none """ # TODO: NW call here the phase object ??? self.phases = phases @@ -78,10 +85,14 @@ def skip_cluster(self, config): """ Checks if a phase/cluster can be skipped. Needed keywords: run_only, skip_chunk_number - Arguments: + + Parameters + ---------- self - config - Returns: + config : dict + + Returns + ------- True or False """ #gw_config = config["general"]["workflow"] @@ -154,10 +165,13 @@ def assemble_workflow(config): Assembles the workflow tasks. Is called from the plugin recipe prepcompute. - Arguments: - config -- dictionary - Returns: - config + Parameters + ---------- + config : dict + + Returns + ------- + config : dict """ # 1. Generate default workflow object @@ -218,10 +232,13 @@ def display_nicely(config): """ Pretty prints the workflow configuration assembled in config["general"]. - Arguments: - config -- dictionary - Returns: - config + Parameters + ---------- + config : dict + + Returns + ------- + config : dict """ esm_parser.pprint_config(config["general"]["workflow"]) return config @@ -234,9 +251,12 @@ def prepend_newrun_job(workflow, config, subjob_clusters): E.g. if two user workflow are the last two subjob_clusters ??? Any other example cases when this is the case? - Arguments: - config -- dictionary - Returns: + Parameters + ---------- + config : dict + + Returns + ------- workflow subjob_clusters """ @@ -302,9 +322,12 @@ def order_clusters(workflow, config): """ Put the subjob_clusters in order ??? - Arguments: - config -- dictionary - Returns: + Parameters + ---------- + config : dict + + Returns + ------- workflow """ independent = workflow.check_user_workflow_dependency() @@ -378,11 +401,15 @@ def complete_clusters(workflow, config): # have the same cluster entry. """ Rearanges the subjobs to their subjobs_clusters ??? - Arguments: - workflow -- obj - config -- dictionary - Returns: - subjob_clusters -- dictionary + + Parameters + ---------- + workflow + config : dict + + Returns + ------- + subjob_clusters : dict """ # sort into dict subjob_clusters subjob_clusters = {} @@ -457,10 +484,14 @@ def init_default_workflow(default_workflow, config): Add workflow for precompute, compute, and tidy phases etc information already here! - Arguments: - default_workflow -- workflow object - config -- dictionary - Returns: + Parameters + ---------- + default_workflow + workflow object + config : dict + + Returns + ------- default_workflow """ @@ -544,10 +575,13 @@ def collect_all_workflow_information(config): Checks if there are "workflow" entries in the user runscript and copies or merges them into config["general"]["workflow"] - Arguments: - config -- dictionary - Returns: - config + Parameters + ---------- + config : dict + + Returns + ------- + config : dict """ for model in config: if "workflow" in config[model]: @@ -621,12 +655,16 @@ def merge_single_entry_if_possible(entry, sourceconf, targetconf): """ Merges a dictionary entry into a target dictionary that has he same key. - Arguments: - entry -- dictionary key - sourceconf -- dictionary - targetconf -- dictionary - Returns: - targetconf + Parameters + ---------- + entry : str + dictionary key + sourceconf : dict + targetconf : dict + + Returns + ------- + targetconf : dict """ if entry in sourceconf: # Check if entry is already in targetconf AND different to sourceconf, then exit @@ -645,11 +683,14 @@ def merge_if_possible(source, target): Merges the entries of source dictionary into target dictionary, if not already in. (Will not overwrite entries in target dictionary.) - Arguments: - source -- dictionary - target -- dictionary - Returns: - target + Parameters + ---------- + source : dict + target : dict + + Returns + ------- + target : dict """ for entry in source: if entry in target: From 03b6e24c61782a966495748801667fc139e8cffe Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 17 Oct 2023 16:57:17 +0200 Subject: [PATCH 13/98] Renamed default_workflow_phases entry in defaults.yaml --- .../esm_software/esm_runscripts/defaults.yaml | 2 +- src/esm_runscripts/workflow.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 50a0eb717..2aa0684dd 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -7,7 +7,7 @@ per_model_defaults: exp_to_run: copy run_to_work: copy work_to_run: copy -default_workflow_phases: +workflow: phases: - prepcompute - compute diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 460c52e3d..0c8f215d6 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -27,7 +27,7 @@ def __init__(self, phases, always_run_with=[]): ------- none """ - # TODO: NW call here the phase object ??? + # TODO: Call here the phase object ??? self.phases = phases self.always_run_with = always_run_with @@ -176,15 +176,15 @@ def assemble_workflow(config): # 1. Generate default workflow object # initialize the default workflow as Workflow object - # TODO: NW where are these default phases defined? For now I placed it in + # TODO: Where are these default phases defined? For now I placed it in # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml phases = [] always_run_with = [] if "defaults.yaml" in config["general"]: - if "default_workflow_phases" in config["general"]["defaults.yaml"]: - phases = config["general"]["defaults.yaml"]["default_workflow_phases"]["phases"] - if "always_run_with" in config["general"]["defaults.yaml"]["default_workflow_phases"]: - always_run_with = config["general"]["defaults.yaml"]["default_workflow_phases"]["always_run_with"] + if "workflow" in config["general"]["defaults.yaml"]: + phases = config["general"]["defaults.yaml"]["workflow"]["phases"] + if "always_run_with" in config["general"]["defaults.yaml"]["workflow"]: + always_run_with = config["general"]["defaults.yaml"]["workflow"]["always_run_with"] if phases and always_run_with: workflow = Workflow(phases, always_run_with=always_run_with) @@ -192,9 +192,9 @@ def assemble_workflow(config): workflow = Workflow(phases) else: esm_parser.user_error("ERROR", "No default workflow phases defined.") - # Note: NW Should this work also if no default phases are set in such a config file, but + # Note: Should this work also if no default phases are set in such a config file, but # instead all workflow phases are defined in different configs and/or runscripts? - # TODO: NW Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml? + # TODO: Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml? # 2. Initialize default workflow phases workflow = init_default_workflow(workflow, config) # 3. Read in workflows from runscript and config files @@ -570,7 +570,7 @@ def collect_all_workflow_information(config): """ Collects all workflow information for each component entry in config (can be a model/component or a new entry (e.g. 'flows') - NOTE(NW): Should it be possible to set a workflow in the model section of the runscript? Why not? + NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not? Checks if there are "workflow" entries in the user runscript and copies or merges them into config["general"]["workflow"] From b3b5fb187b60dbafd58e9cc1376cf19a2d1c1a9b Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 18 Oct 2023 09:22:28 +0200 Subject: [PATCH 14/98] Renamed workflow method, added property decorator and added test. --- src/esm_runscripts/workflow.py | 5 +++-- tests/test_esm_runscripts/test_workflow.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 0c8f215d6..fe838cf8b 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -31,7 +31,8 @@ def __init__(self, phases, always_run_with=[]): self.phases = phases self.always_run_with = always_run_with - def num_phases_in_workflow(self): + @property + def num_phases(self): """ Return the number of phases in workflow. """ @@ -509,7 +510,7 @@ def init_default_workflow(default_workflow, config): default_workflow.phases.append(WorkflowPhase(phase)) for ind, phase in enumerate(default_workflow.phases): - if ind < default_workflow.num_phases_in_workflow() - 1: + if ind < default_workflow.num_phases - 1: phase.run_before = default_workflow.phases[ind+1].name else: phase.run_after = default_workflow.phases[ind-1].name diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 9ab44ac10..855228146 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -67,6 +67,10 @@ def test_config(): 'submit_to_batch_system': True}}}}} return config +def test_num_phases(test_workflow_object, test_config): + test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + assert test_workflow_object.num_phases == 3 + def test_check_user_workflow_dependency(test_workflow_object, test_config): test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) From 6260278940d46b08f9a1a64f7bb2c2cc665d681c Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 18 Oct 2023 10:10:46 +0200 Subject: [PATCH 15/98] Added a temporary workflow to awicm3. --- configs/setups/awicm3/awicm3.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml index 2ac1f8722..e95af50ab 100644 --- a/configs/setups/awicm3/awicm3.yaml +++ b/configs/setups/awicm3/awicm3.yaml @@ -296,6 +296,11 @@ oifs: tl_o3_data_dir: ${input_dir}/${version}/climate/ ICMGG_INIT_name: "_${fesom.resolution}" + workflow: + next_run_triggered_by: tidy + subjobs: + my_new_subjob: + batch_or_shell: shell # Postprocessing choose_general.postprocessing: From de0e598ccee1f4fa05ad8001bccffa0421a3eef5 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 20 Oct 2023 17:18:38 +0200 Subject: [PATCH 16/98] Added method to return a list of an attribute for all phases. --- src/esm_runscripts/workflow.py | 44 +++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index fe838cf8b..c22a708a5 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -38,6 +38,27 @@ def num_phases(self): """ return len(self.phases) + def get_phases_attribs_list(self, phase_type, attrib): + """ + Return the names of all phases as list. + + Parameters + ---------- + self: class Workflow + phase_type: str (default or user) + attribute: str + + Returns + ------- + phases_attribs : list + """ + if phase_type == 'user': + phases_attribs = [getattr(phase, attrib) for phase in self.user_phases] + else: + phases_attribs = [getattr(phase, attrib) for phase in self.phases] + + return phases_attribs + def write_to_config(self, config): """ Write to config. @@ -55,15 +76,20 @@ def write_to_config(self, config): def check_user_workflow_dependency(self): """ - Check whether the user defined workflow phases are independent from eachother or not. + Check whether the user defined workflow phases are independent from each other or not. """ independent = False - user_phases_names = [phase.name for phase in self.user_phases] - run_after_list = [phase.run_after for phase in self.user_phases] - run_before_list = [phase.run_before for phase in self.user_phases] + user_phases_names = self.get_phases_attribs_list('user','name') + run_after_list = self.get_phases_attribs_list('user','run_after') + run_before_list = self.get_phases_attribs_list('user','run_before') + + # All user phases are independent from each other, if + # none of the ``user_phases_names`` are found in the union of ``run_before_list`` and ``run_after_list`` + # That means alls user phases can be run independent from each other. if not set(user_phases_names).intersection(set(run_after_list).union(set(run_before_list))): independent = True else: + # TODO: What todo in other case? independent = False return independent @@ -73,11 +99,13 @@ def check_unknown_phases(self): Check if any user phase addresses an unknown workflow phase. """ unknown_user_phase = True - phases_names = [phase.name for phase in self.phases] - user_phases_names = [phase.name for phase in self.user_phases] + phases_names = self.get_phases_attribs_list('default','name') + user_phases_names = self.get_phases_attribs_list('user','name') + run_after = self.get_phases_attribs_list('user','run_after') + run_before = self.get_phases_attribs_list('user','run_before') # Filter out all falsy items (e.g. [], "", None) - run_after_list = list(filter(None, [phase.run_after for phase in self.user_phases])) - run_before_list = list(filter(None, [phase.run_before for phase in self.user_phases])) + run_after_list = list(filter(None, run_after)) + run_before_list = list(filter(None, run_before)) unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names))) return unknown_user_phases From 11fa96b88592cf19883037b4d90f2564bcfa1c85 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 23 Oct 2023 17:11:22 +0200 Subject: [PATCH 17/98] Convert functions into methods, moved class variables to be instance variables. --- src/esm_runscripts/workflow.py | 670 +++++++++++---------- tests/test_esm_runscripts/test_workflow.py | 52 +- 2 files changed, 369 insertions(+), 353 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index c22a708a5..3644d3503 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -5,12 +5,6 @@ class Workflow: """A workflow class.""" - default_phases = [] - user_phases = [] - always_run_with = [] - first_task_in_queue = "" - last_task_in_queue = "" - next_run_triggered_by = "" def __init__(self, phases, always_run_with=[]): """ @@ -19,14 +13,20 @@ def __init__(self, phases, always_run_with=[]): Parameters ---------- phases : list - List of workflow phases + List of workflow phases names always_run_with : list - List of phases that precedes each phase + List of phases that precedes each phase in phases Returns ------- none """ + self.default_phases = [] + self.user_phases = [] + self.always_run_with = [] + self.first_task_in_queue = "" + self.last_task_in_queue = "" + self.next_run_triggered_by = "" # TODO: Call here the phase object ??? self.phases = phases self.always_run_with = always_run_with @@ -59,6 +59,86 @@ def get_phases_attribs_list(self, phase_type, attrib): return phases_attribs + def init_default_workflow(self, config): + """ + Add workflow for precompute, compute, and tidy phases + etc information already here! + + Parameters + ---------- + self : Workflow object + config : dict + + Returns + ------- + self : Workflow object + """ + + workflow_phases = self.phases + + # Calculating the number of tasks for each component/model + # needed for phase compute + tasks = calc_number_of_tasks(config) + # Initiate/create default workflow phase objects + # and reset/append to Workflow.phases variable + self.phases = [] + for ind, phase in enumerate(workflow_phases): + self.phases.append(WorkflowPhase(phase)) + + for ind, phase in enumerate(self.phases): + if ind < self.num_phases - 1: + # Set run_before attrib of all phases (except last on) to the next phase name + phase.run_before = self.phases[ind+1].name + else: + # Set run_after attrib of last phase to previous phase name + phase.run_after = self.phases[ind-1].name + + # TODO: this needs to be set somewhere else, or different. + phase.cluster = phase.name + if phase.name == "compute": + phase.nproc = tasks + phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True) + phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] + + # Set default workflow values + self.first_task_in_queue = self.phases[0].name # prepcompute + self.last_task_in_queue = self.phases[-1].name # tidy + # next_run_triggered_by only used to set last_task_in_queue + # TODO: why not set last_task_in_queue directly? + self.next_run_triggered_by = self.phases[-1].name # tidy + + return self + + def collect_all_user_workflows(self,config): + """ + Collect all workflows set by config files. + """ + user_workflow_phases = [] + user_workflow_phases_names = [] + for model in config: + if "workflow" in config[model]: + w_config = config[model]["workflow"] + if "subjobs" in w_config: + # copies component workflow config to new variable ref_config + ref_config = copy.deepcopy(w_config) + for subjob in list(copy.deepcopy(w_config["subjobs"])): + # create a new phase object for subjob + # new_phase_name = subjob + "_" + model + # each subjob needs to have an unique name + new_phase_name = subjob + new_phase = UserWorkflowPhase(new_phase_name) + if not new_phase_name in user_workflow_phases_names: + user_workflow_phases_names.append(new_phase_name) + # set attributes of user_workflow phases + for key, value in w_config["subjobs"][subjob].items(): + new_phase.__setattr__(key, value) + user_workflow_phases.append(new_phase) + else: + esm_parser.user_error("ERROR", "Two subjobs of the same name.") + + self.user_phases = user_workflow_phases + return self + def write_to_config(self, config): """ Write to config. @@ -77,6 +157,14 @@ def write_to_config(self, config): def check_user_workflow_dependency(self): """ Check whether the user defined workflow phases are independent from each other or not. + + Arguments + --------- + self : Workflow object + + Returns + ------- + independent : bool (default: False) """ independent = False user_phases_names = self.get_phases_attribs_list('user','name') @@ -103,13 +191,218 @@ def check_unknown_phases(self): user_phases_names = self.get_phases_attribs_list('user','name') run_after = self.get_phases_attribs_list('user','run_after') run_before = self.get_phases_attribs_list('user','run_before') - # Filter out all falsy items (e.g. [], "", None) + # Filter out all elements that are None + # ``filter(None, anylist)`` will filter out all items of anylist, for which ``if item`` is false (e.g. [], "", None, {}, ''). + # See also https://docs.python.org/3/library/functions.html#filter run_after_list = list(filter(None, run_after)) run_before_list = list(filter(None, run_before)) unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names))) return unknown_user_phases + def order_clusters(self, config): + """ + Put the subjob_clusters in order ??? + + Parameters + ---------- + config : dict + + Returns + ------- + self : Workflow object + """ + independent = self.check_user_workflow_dependency() + unknown_phases = self.check_unknown_phases() + + if unknown_phases: + esm_parser.user_error("ERROR", "Undefined subjob/phase.") + + for user_phase in self.user_phases: + # TODO: Check if run_after or run_before is set for each user phase + if not user_phase.run_before and not user_phase.run_after: + esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before") + # TODO: Check if not both run_after and run_before are set at the same time for each user phase + if user_phase.run_before and user_phase.run_after: + esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before") + + # TODO: Correct for "last_task_in_queue" if necessary + # Collect all next_run_triggered_by entrie + next_triggered = [] + run_after = [] + for model in config: + if "workflow" in config[model]: + if "next_run_triggered_by" in config[model]["workflow"]: + next_triggered.append(config[model]["workflow"]["next_run_triggered_by"]) + next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) + if len(next_triggered) > 1: + esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.") + elif len(next_triggered) == 1: + self.next_run_triggered_by = next_triggered[0] + #else: let default + + # Fill up "next_submit" list + next_submits = {} + for phase in self.phases + self.user_phases: + next_submits[phase.name] = [] + for phase2 in self.phases + self.user_phases: + if not phase2.run_after == None: + next_submits[phase2.run_after].append(phase2.name) + phase2.called_from = phase2.run_after + for phase3 in self.phases + self.user_phases: + phase3.next_submit = next_submits[phase3.name] + + for phase4 in self.phases + self.user_phases: + calling_cluster = phase4.run_after + # + if calling_cluster == self.last_task_in_queue: + self.last_task_in_queue = phase4.name + # + called_cluster = phase4.run_before + set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name) + if called_cluster == self.first_task_in_queue: + self.first_task_in_queue = phase4.name + if phase4.cluster == None: + phase4.cluster = phase4.name + # + first_cluster_name = self.first_task_in_queue + last_cluster_name = self.last_task_in_queue + # + value = get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit") + if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): + set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name) + if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"): + set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name) + # + return self + + def complete_clusters(config): + # all that are within a next_submit list are in a cluster if: + # run concurrently + # have the same cluster entry. + """ + Rearanges the subjobs to their subjobs_clusters ??? + + Parameters + ---------- + self : Workflow object + config : dict + + Returns + ------- + subjob_clusters : dict + """ + # sort into dict subjob_clusters + subjob_clusters = {} + + for phase in self.phases + self.user_phases: + # Erstellt ein leeres dict im dict subjob_clusters + if not phase.cluster in subjob_clusters: + subjob_clusters[phase.cluster] = {} + + # Create empty list for each subjob_cluster + if not "subjobs" in subjob_clusters[phase.cluster]: + subjob_clusters[phase.cluster]["subjobs"] = [] + + # Append subjobs to list. + subjob_clusters[phase.cluster]["subjobs"].append(phase.name) + + # Then, complete the resource information per cluster + # determine whether a cluster is to be submitted to a batch system + for subjob_cluster in subjob_clusters: + nproc_sum = nproc_max = 0 + attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] + for attrib in attributes: + temp_list = [] + for subjob in subjob_clusters[subjob_cluster]["subjobs"]: + if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list: + subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib) + else: + print("Missmatch in attributes") + sys.exit(-1) + nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") + nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) + + # if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): + # subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" + # elif subjob_clusters[subjob_cluster].get("script", False): + # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" + # + if not "run_on_queue" in subjob_clusters[subjob_cluster]: + print(f"Information on target queue is missing in cluster {subjob_cluster}.") + sys.exit(-1) + # + # TODO: Check in nproc is calculated correctly + if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": + nproc = nproc_sum + else: + nproc = nproc_max + subjob_clusters[subjob_cluster]["nproc"] = nproc + return subjob_clusters + + def prepend_newrun_job(config, subjob_clusters): + """ + Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') + and do not follow a 'SimulationSetup' subjob_clusters. + E.g. if two user workflow are the last two subjob_clusters ??? + Any other example cases when this is the case? + + Parameters + ---------- + self : Workflow object + config : dict + subjob_clusters : dict + + Returns + ------- + self : Workflow object + subjob_clusters + """ + first_cluster_name = self.first_task_in_queue + first_cluster = subjob_clusters[first_cluster_name] + #esm_parser.pprint_config(first_cluster) + + if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup": + + last_cluster_name = self.last_task_in_queue + last_cluster = subjob_clusters[last_cluster_name] + + new_first_cluster_name = "newrun" + new_first_cluster = { + "newrun": { + "called_from": last_cluster_name, + "run_before": first_cluster_name, + "next_submit": [first_cluster_name], + "subjobs": ["newrun_general"], + "batch_or_shell": "SimulationSetup", + } + } + + last_cluster["next_submit"].append("newrun") + last_cluster["next_submit"].remove(first_cluster_name) + + first_cluster["called_from"] = "newrun" + + self.first_task_in_queue = "newrun" + + new_subjob = { + "newrun_general": { + "nproc": 1, + "called_from": last_cluster_name, + "run_before": first_cluster_name, + "next_submit": [first_cluster_name], + "subjob_cluster": "newrun", + } + } + + subjob_clusters.update(new_first_cluster) + +# TODO: add new phase to workflow??? + #gw_config["subjobs"].update(new_subjob) + + return [self, subjob_clusters] + + def skip_cluster(self, config): """ Checks if a phase/cluster can be skipped. @@ -158,35 +451,37 @@ def skip_cluster(self, config): class WorkflowPhase: """A workflow phase class.""" - name = None - nproc = 1 - run_before = None - run_after = None - submit_to_batch_system = True - run_on_queue = None - cluster = None - next_submit = [] - called_from = None - batch_or_shell = "SimulationSetup" - order_in_cluster = "sequential" - run_only = None - skip_chunk_number = None - skip_run_number = None def __init__(self, phase_name): + self.name = None + self.nproc = 1 + self.run_before = None + self.run_after = None + self.submit_to_batch_system = True + self.run_on_queue = None + self.cluster = None + self.next_submit = [] + self.called_from = None + self.batch_or_shell = "SimulationSetup" + self.order_in_cluster = "sequential" + self.run_only = None + self.skip_chunk_number = None + self.skip_run_number = None self.name = phase_name class UserWorkflowPhase(WorkflowPhase): """A user workflow phase class.""" - script = None - script_dir = None - call_function = None - env_preparation = None def __init__(self, phase_name): - self.name = phase_name - batch_or_shell = "batch" - submit_to_batch_system = False + + WorkflowPhase.__init__(self, phase_name) + + self.script = None + self.script_dir = None + self.call_function = None + self.env_preparation = None + self.batch_or_shell = "shell" + self.submit_to_batch_system = False def assemble_workflow(config): from . import Workflow @@ -225,18 +520,18 @@ def assemble_workflow(config): # instead all workflow phases are defined in different configs and/or runscripts? # TODO: Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml? # 2. Initialize default workflow phases - workflow = init_default_workflow(workflow, config) + workflow = workflow.init_default_workflow(config) # 3. Read in workflows from runscript and config files - workflow = collect_all_user_workflows(workflow, config) + workflow = workflow.collect_all_user_workflows(config) #config = collect_all_workflow_information(config) # Why do I need to do the following function call? # 4. Order user workflows into default workflow wrt. workflow attributs. - workflow = order_clusters(workflow, config) + workflow = workflow.order_clusters(config) - subjob_clusters = complete_clusters(workflow, config) - subjob_clusters = prepend_newrun_job(config) + subjob_clusters = workflow.complete_clusters(config) + subjob_clusters = workflow.prepend_newrun_job(config) # 5. write the workflow to config config = workflow.write_to_config(config) breakpoint() @@ -257,82 +552,6 @@ def write_subjob_clusters_to_config(config, subjob_clusters): config["general"]["subjob_clusters"] = subjob_clusters return config -def display_nicely(config): - """ - Pretty prints the workflow configuration assembled in config["general"]. - - Parameters - ---------- - config : dict - - Returns - ------- - config : dict - """ - esm_parser.pprint_config(config["general"]["workflow"]) - return config - - -def prepend_newrun_job(workflow, config, subjob_clusters): - """ - Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') - and do not follow a 'SimulationSetup' subjob_clusters. - E.g. if two user workflow are the last two subjob_clusters ??? - Any other example cases when this is the case? - - Parameters - ---------- - config : dict - - Returns - ------- - workflow - subjob_clusters - """ - first_cluster_name = workflow.first_task_in_queue - first_cluster = subjob_clusters[first_cluster_name] - #esm_parser.pprint_config(first_cluster) - - if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup": - - last_cluster_name = workflow.last_task_in_queue - last_cluster = subjob_clusters[last_cluster_name] - - new_first_cluster_name = "newrun" - new_first_cluster = { - "newrun": { - "called_from": last_cluster_name, - "run_before": first_cluster_name, - "next_submit": [first_cluster_name], - "subjobs": ["newrun_general"], - "batch_or_shell": "SimulationSetup", - } - } - - last_cluster["next_submit"].append("newrun") - last_cluster["next_submit"].remove(first_cluster_name) - - first_cluster["called_from"] = "newrun" - - workflow.first_task_in_queue = "newrun" - - new_subjob = { - "newrun_general": { - "nproc": 1, - "called_from": last_cluster_name, - "run_before": first_cluster_name, - "next_submit": [first_cluster_name], - "subjob_cluster": "newrun", - } - } - - subjob_clusters.update(new_first_cluster) - -# TODO: add new phase to workflow??? - #gw_config["subjobs"].update(new_subjob) - - return [workflow, subjob_clusters] - def set_phase_attrib(workflow_phases, phase_name, attrib, value): for phase in workflow_phases: if phase.name == phase_name: @@ -347,146 +566,6 @@ def get_phase_attrib(workflow_phases, phase_name, attrib): value = getattr(phase, attrib) return value -def order_clusters(workflow, config): - """ - Put the subjob_clusters in order ??? - - Parameters - ---------- - config : dict - - Returns - ------- - workflow - """ - independent = workflow.check_user_workflow_dependency() - unknown_phases = workflow.check_unknown_phases() - - if unknown_phases: - esm_parser.user_error("ERROR", "Undefined subjob/phase.") - - for user_phase in workflow.user_phases: -# TODO: Check if run_after or run_before is set for each user phase - if not user_phase.run_before and not user_phase.run_after: - esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before") -# TODO: Check if not both run_after and run_before are set at the same time for each user phase - if user_phase.run_before and user_phase.run_after: - esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before") - -# TODO: Correct for "last_task_in_queue" if necessary - # Collect all next_run_triggered_by entrie - next_triggered = [] - run_after = [] - for model in config: - if "workflow" in config[model]: - if "next_run_triggered_by" in config[model]["workflow"]: - next_triggered.append(config[model]["workflow"]["next_run_triggered_by"]) - next_triggered = list(filter((workflow.next_run_triggered_by).__ne__, next_triggered)) - if len(next_triggered) > 1: - esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.") - elif len(next_triggered) == 1: - workflow.next_run_triggered_by = next_triggered[0] - #else: let default - -# Fill up "next_submit" list - next_submits = {} - for phase in workflow.phases + workflow.user_phases: - next_submits[phase.name] = [] - for phase2 in workflow.phases + workflow.user_phases: - if not phase2.run_after == None: - next_submits[phase2.run_after].append(phase2.name) - phase2.called_from = phase2.run_after - for phase3 in workflow.phases + workflow.user_phases: - phase3.next_submit = next_submits[phase3.name] - - for phase4 in workflow.phases + workflow.user_phases: - calling_cluster = phase4.run_after -# - if calling_cluster == workflow.last_task_in_queue: - workflow.last_task_in_queue = phase4.name -# - called_cluster = phase4.run_before - set_phase_attrib(workflow.phases+workflow.user_phases, called_cluster, "called_from", phase4.name) - if called_cluster == workflow.first_task_in_queue: - workflow.first_task_in_queue = phase4.name - if phase4.cluster == None: - phase4.cluster = phase4.name -# - first_cluster_name = workflow.first_task_in_queue - last_cluster_name = workflow.last_task_in_queue -# - value = get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit") - if not first_cluster_name in get_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit"): - set_phase_attrib(workflow.phases+workflow.user_phases, last_cluster_name, "next_submit", first_cluster_name) - if not last_cluster_name == get_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from"): - set_phase_attrib(workflow.phases+workflow.user_phases, first_cluster_name, "called_from", last_cluster_name) -# - return workflow - - -def complete_clusters(workflow, config): - # all that are within a next_submit list are in a cluster if: - # run concurrently - # have the same cluster entry. - """ - Rearanges the subjobs to their subjobs_clusters ??? - - Parameters - ---------- - workflow - config : dict - - Returns - ------- - subjob_clusters : dict - """ - # sort into dict subjob_clusters - subjob_clusters = {} - - for phase in workflow.phases + workflow.user_phases: - # Erstellt ein leeres dict im dict subjob_clusters - if not phase.cluster in subjob_clusters: - subjob_clusters[phase.cluster] = {} - - # Create empty list for each subjob_cluster - if not "subjobs" in subjob_clusters[phase.cluster]: - subjob_clusters[phase.cluster]["subjobs"] = [] - - # Append subjobs to list. - subjob_clusters[phase.cluster]["subjobs"].append(phase.name) - - # Then, complete the resource information per cluster - # determine whether a cluster is to be submitted to a batch system - for subjob_cluster in subjob_clusters: - nproc_sum = nproc_max = 0 - attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] - for attrib in attributes: - temp_list = [] - for subjob in subjob_clusters[subjob_cluster]["subjobs"]: - if not get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib) in temp_list: - subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(workflow.phases + workflow.user_phases, subjob, attrib) - else: - print("Missmatch in attributes") - sys.exit(-1) - nproc_sum += get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc") - nproc_max = max(get_phase_attrib(workflow.phases + workflow.user_phases, subjob, "nproc"), nproc_max) - -# if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): -# subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" -# elif subjob_clusters[subjob_cluster].get("script", False): -# subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" -# - if not "run_on_queue" in subjob_clusters[subjob_cluster]: - print(f"Information on target queue is missing in cluster {subjob_cluster}.") - sys.exit(-1) -# -# TODO: Check in nproc is calculated correctly - if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": - nproc = nproc_sum - else: - nproc = nproc_max - subjob_clusters[subjob_cluster]["nproc"] = nproc - return subjob_clusters def calc_number_of_tasks(config): """ @@ -508,92 +587,29 @@ def calc_number_of_tasks(config): tasks += config[model]["nprocar"] * config[model]["nprocbr"] return tasks -def init_default_workflow(default_workflow, config): - """ - Add workflow for precompute, compute, and tidy phases - etc information already here! - Parameters - ---------- - default_workflow - workflow object - config : dict - Returns - ------- - default_workflow - """ - # TODO: make a method of class Workflow - - # For testing only, set in some yaml config - workflow_phases = default_workflow.phases - - # Calculating the number of tasks for each component/model - # needed for phase compute - tasks = calc_number_of_tasks(config) - # Create default workflow phase objects: - default_workflow.phases = [] - for ind, phase in enumerate(workflow_phases): - default_workflow.phases.append(WorkflowPhase(phase)) - - for ind, phase in enumerate(default_workflow.phases): - if ind < default_workflow.num_phases - 1: - phase.run_before = default_workflow.phases[ind+1].name - else: - phase.run_after = default_workflow.phases[ind-1].name - # TODO: this needs to be set somewhere else, or different. - phase.cluster = phase.name - if phase.name == "compute": - phase.nproc = tasks - phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True) - phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] - - default_workflow.first_task_in_queue = default_workflow.phases[0].name # prepcompute - default_workflow.last_task_in_queue = default_workflow.phases[-1].name # tidy - # next_run_triggered_by only used to set last_task_in_queue - # TODO: why not set last_task_in_queue directly? - default_workflow.next_run_triggered_by = default_workflow.phases[-1].name # tidy - - return default_workflow - -def collect_all_user_workflows(user_workflow,config): - """ - Collect all workflows set by config files. - """ - user_workflow_phases = [] - user_workflow_phases_names = [] - for model in config: - if "workflow" in config[model]: - w_config = config[model]["workflow"] - if "subjobs" in w_config: - # copies component workflow config to new variable ref_config - ref_config = copy.deepcopy(w_config) - for subjob in list(copy.deepcopy(w_config["subjobs"])): - # create a new phase object for subjob - # new_phase_name = subjob + "_" + model - # each subjob needs to have an unique name - new_phase_name = subjob - new_phase = UserWorkflowPhase(new_phase_name) - if not new_phase_name in user_workflow_phases_names: - user_workflow_phases_names.append(new_phase_name) - # set attributes of user_workflow phases - for key, value in w_config["subjobs"][subjob].items(): - new_phase.__setattr__(key, value) - user_workflow_phases.append(new_phase) - else: - esm_parser.user_error("ERROR", "Two subjobs of the same name.") - - user_workflow.user_phases = user_workflow_phases - return user_workflow +################### Maybe outdated routines ###################### +def display_nicely(config): + """ + Pretty prints the workflow configuration assembled in config["general"]. + Parameters + ---------- + config : dict + Returns + ------- + config : dict + """ + esm_parser.pprint_config(config["general"]["workflow"]) + return config -################### Maybe outdated routines ###################### def collect_all_workflow_information(config): """ diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 855228146..d13871db6 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -68,26 +68,26 @@ def test_config(): return config def test_num_phases(test_workflow_object, test_config): - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) assert test_workflow_object.num_phases == 3 def test_check_user_workflow_dependency(test_workflow_object, test_config): - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) independent = test_workflow_object.check_user_workflow_dependency() assert independent def test_check_user_workflow_dependency_2(test_workflow_object, test_config): test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs' - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) independent = test_workflow_object.check_user_workflow_dependency() assert not independent def test_check_unknown_phases(test_workflow_object, test_config): test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob' - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) unknown_phases = test_workflow_object.check_unknown_phases() assert unknown_phases @@ -105,40 +105,40 @@ def test_order_clusters(test_workflow_object, test_config): # test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow' # test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' #test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs' - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) - test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) + test_workflow_object = test_workflow_object.order_clusters(test_config) pytest.fail("something wrong") def test_complete_clusters(test_workflow_object, test_config): - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) - test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) - subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) + test_workflow_object = test_workflow_object.order_clusters(test_config) + subjob_clusters = test_workflow_object.complete_clusters(test_config) pytest.fail("something wrong") def test_prepend_newrun_job(test_workflow_object, test_config): - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) - test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) - subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) + test_workflow_object = test_workflow_object.order_clusters(test_config) + subjob_clusters = test_workflow_object.complete_clusters(test_config) [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) pytest.fail("something wrong") def test_write_to_config(test_workflow_object, test_config): - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) - test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) - subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) + test_workflow_object = test_workflow_object.order_clusters(test_config) + subjob_clusters = test_workflow_object.complete_clusters(test_config) [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) config = test_workflow_object.write_to_config(test_config) pytest.fail("something wrong") def test_write_subjob_clusters_to_config(test_workflow_object, test_config): - test_workflow_object = workflow.init_default_workflow(test_workflow_object, test_config) - test_workflow_object = workflow.collect_all_user_workflows(test_workflow_object, test_config) - test_workflow_object = workflow.order_clusters(test_workflow_object, test_config) - subjob_clusters = workflow.complete_clusters(test_workflow_object, test_config) + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) + test_workflow_object = test_workflow_object.order_clusters(test_config) + subjob_clusters = test_workflow_object.complete_clusters(test_config) [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) test_config = workflow.write_subjob_clusters_to_config(test_config, subjob_clusters) test_config = test_workflow_object.write_to_config(test_config) From b8d1db041ad73d82121ec054b257e33381add3ca Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 27 Oct 2023 18:07:20 +0200 Subject: [PATCH 18/98] Further developments until order_cluster. --- src/esm_runscripts/workflow.py | 256 ++++++++++++++++++++++----------- 1 file changed, 176 insertions(+), 80 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 3644d3503..d35b2e8c3 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,7 +1,7 @@ -import sys, copy, os +import sys, copy import esm_parser -#import pdb +import pdb class Workflow: """A workflow class.""" @@ -23,10 +23,9 @@ def __init__(self, phases, always_run_with=[]): """ self.default_phases = [] self.user_phases = [] - self.always_run_with = [] - self.first_task_in_queue = "" - self.last_task_in_queue = "" - self.next_run_triggered_by = "" + self.first_task_in_queue = [] + self.last_task_in_queue = [] + self.next_run_triggered_by = [] # TODO: Call here the phase object ??? self.phases = phases self.always_run_with = always_run_with @@ -46,7 +45,7 @@ def get_phases_attribs_list(self, phase_type, attrib): ---------- self: class Workflow phase_type: str (default or user) - attribute: str + attrib: str Returns ------- @@ -63,19 +62,19 @@ def init_default_workflow(self, config): """ Add workflow for precompute, compute, and tidy phases etc information already here! - + Parameters ---------- self : Workflow object config : dict - + Returns ------- self : Workflow object """ - + workflow_phases = self.phases - + # Calculating the number of tasks for each component/model # needed for phase compute tasks = calc_number_of_tasks(config) @@ -84,7 +83,7 @@ def init_default_workflow(self, config): self.phases = [] for ind, phase in enumerate(workflow_phases): self.phases.append(WorkflowPhase(phase)) - + for ind, phase in enumerate(self.phases): if ind < self.num_phases - 1: # Set run_before attrib of all phases (except last on) to the next phase name @@ -99,43 +98,92 @@ def init_default_workflow(self, config): phase.nproc = tasks phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True) phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] - + # Set default workflow values - self.first_task_in_queue = self.phases[0].name # prepcompute - self.last_task_in_queue = self.phases[-1].name # tidy + set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name) # prepcompute + set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name) # tidy # next_run_triggered_by only used to set last_task_in_queue # TODO: why not set last_task_in_queue directly? - self.next_run_triggered_by = self.phases[-1].name # tidy - + set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name) # tidy + return self + def check_if_keyword_is_valid(self, keyword): + """ + Checks if the key given for a user workflow is valie + + Parameters + ---------- + keyword : str + + Returns + ------- + true or false + """ + + if hasattr(self, keyword): + return True + else: + return False + def collect_all_user_workflows(self,config): """ Collect all workflows set by config files. + + Parameters + ---------- + self : Workflow object + config : dict + + Returns + ------- + self : Workflow object """ + user_workflow_phases = [] user_workflow_phases_names = [] for model in config: if "workflow" in config[model]: w_config = config[model]["workflow"] - if "subjobs" in w_config: + #if "subjobs" in w_config: + if "phases" in w_config: # copies component workflow config to new variable ref_config ref_config = copy.deepcopy(w_config) - for subjob in list(copy.deepcopy(w_config["subjobs"])): - # create a new phase object for subjob - # new_phase_name = subjob + "_" + model - # each subjob needs to have an unique name - new_phase_name = subjob + # Set attributes of workflow + # This will be overwritten by all user defined workflows??? + # Collect them in a list??? + # check if valid workflow keywords + for key, value in w_config.items(): + if self.check_if_keyword_is_valid(key): + # set here only workflow attributes + if not key == "phases": + set_workflow_attrib(self, key, value) + else: + esm_parser.user_error("ERROR", f"``{key}`` is not a valid keyword of a workflow.") + #for subjob in list(copy.deepcopy(w_config["subjobs"])): + for phase in list(copy.deepcopy(w_config["phases"])): + new_phase_name = phase + # create a new user phase object for ``phase`` new_phase = UserWorkflowPhase(new_phase_name) + if phase in self.get_phases_attribs_list("default", "name"): + esm_parser.user_error("ERROR", f"The user phase ``{new_phase_name}`` has the same name as a default workflow phase. This is not allowed.") + # each subjob needs to have an unique name + # check if the name of the new user phase does not already exist if not new_phase_name in user_workflow_phases_names: + # and append it to the list of user phases of the workflow user_workflow_phases_names.append(new_phase_name) # set attributes of user_workflow phases - for key, value in w_config["subjobs"][subjob].items(): - new_phase.__setattr__(key, value) + # check if valid workflow phase keywords + for key, value in w_config["phases"][phase].items(): + if new_phase.check_if_keyword_is_valid(key): + set_phase_attrib([new_phase], new_phase_name, key, value) +# new_phase.__setattr__(key, value) + else: + esm_parser.user_error("ERROR", f"``{key}`` of workflow phase ``{new_phase_name}`` is not a valid keyword of a workflow phase.") user_workflow_phases.append(new_phase) else: - esm_parser.user_error("ERROR", "Two subjobs of the same name.") - + esm_parser.user_error("ERROR", f"Two workflow phases have the same name {new_phase_name}.") + self.user_phases = user_workflow_phases return self @@ -184,9 +232,17 @@ def check_user_workflow_dependency(self): def check_unknown_phases(self): """ - Check if any user phase addresses an unknown workflow phase. + Check if any user phase attributes points to any unknown workflow phase. + + Parameters + ---------- + self : Workflow object + + Returns + ------- + unknown_phases : set """ - unknown_user_phase = True + unknown_phases = [] phases_names = self.get_phases_attribs_list('default','name') user_phases_names = self.get_phases_attribs_list('user','name') run_after = self.get_phases_attribs_list('user','run_after') @@ -196,62 +252,77 @@ def check_unknown_phases(self): # See also https://docs.python.org/3/library/functions.html#filter run_after_list = list(filter(None, run_after)) run_before_list = list(filter(None, run_before)) + # Get all phases that are defined as run_after or run_before, but do not exist as user or default phase. + # If unknown_phase is not empty, there is a user_phase that defines run_after or run_before for a not existing phase. + unknown_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names))) - unknown_user_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names))) - return unknown_user_phases + return unknown_phases def order_clusters(self, config): """ - Put the subjob_clusters in order ??? - + Put the subjob_clusters in order. + Parameters ---------- config : dict - + Returns ------- self : Workflow object """ + # Check if user phases are independent from each other + # TODO: What if not independent? independent = self.check_user_workflow_dependency() + # Check if there are unknown phases, if yes, will give error exception unknown_phases = self.check_unknown_phases() - if unknown_phases: - esm_parser.user_error("ERROR", "Undefined subjob/phase.") - + unknowns = ', '.join(unknown_phases) + esm_parser.user_error("ERROR", f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` or ``run_after``.") + for user_phase in self.user_phases: - # TODO: Check if run_after or run_before is set for each user phase + # Check if run_after or run_before is set for each user phase if not user_phase.run_before and not user_phase.run_after: - esm_parser.user_error("ERROR", "Don't know when to start user_phase. Please set run_after or run_before") - # TODO: Check if not both run_after and run_before are set at the same time for each user phase + esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.") + # Check if not both run_after and run_before are set at the same time for each user phase if user_phase.run_before and user_phase.run_after: - esm_parser.user_error("ERROR", "Both run_after and run_before are. Don't know when to start user_phase. Please only set run_after or run_before") - - # TODO: Correct for "last_task_in_queue" if necessary - # Collect all next_run_triggered_by entrie - next_triggered = [] + esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.") + + # Correct for ``last_task_in_queue`` if necessary + # Collect all next_run_triggered_by entries + next_triggered = self.next_run_triggered_by run_after = [] - for model in config: - if "workflow" in config[model]: - if "next_run_triggered_by" in config[model]["workflow"]: - next_triggered.append(config[model]["workflow"]["next_run_triggered_by"]) - next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) - if len(next_triggered) > 1: - esm_parser.user_error("ERROR", f"Mismatch found setting next_run_triggered_by for workflow.") - elif len(next_triggered) == 1: - self.next_run_triggered_by = next_triggered[0] - #else: let default - - # Fill up "next_submit" list + #for model in config: + # if "workflow" in config[model]: + # if "next_run_triggered_by" in config[model]["workflow"]: + # next_triggered.append(config[model]["workflow"]["next_run_triggered_by"]) + + # How needs the next_triggered_by be set??? Which to choose if several workflows are defined? + + #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) + #if len(next_triggered) > 1: + # esm_parser.user_error("ERROR", "Mismatch found setting next_run_triggered_by for workflow.") + #elif len(next_triggered) == 1: + # self.next_run_triggered_by = next_triggered[0] + ##else: let default + + # Set "next_submit" and "called_from" + # Create a dict of all phases with empty lists next_submits = {} for phase in self.phases + self.user_phases: next_submits[phase.name] = [] + for phase2 in self.phases + self.user_phases: if not phase2.run_after == None: next_submits[phase2.run_after].append(phase2.name) phase2.called_from = phase2.run_after + for phase3 in self.phases + self.user_phases: phase3.next_submit = next_submits[phase3.name] - + + print(self.last_task_in_queue) + # ich bin hier + breakpoint() + for phase4 in self.phases + self.user_phases: calling_cluster = phase4.run_after # @@ -276,37 +347,37 @@ def order_clusters(self, config): # return self - def complete_clusters(config): + def complete_clusters(self, config): # all that are within a next_submit list are in a cluster if: # run concurrently # have the same cluster entry. """ Rearanges the subjobs to their subjobs_clusters ??? - + Parameters ---------- self : Workflow object config : dict - + Returns ------- subjob_clusters : dict """ # sort into dict subjob_clusters subjob_clusters = {} - + for phase in self.phases + self.user_phases: # Erstellt ein leeres dict im dict subjob_clusters if not phase.cluster in subjob_clusters: subjob_clusters[phase.cluster] = {} - + # Create empty list for each subjob_cluster if not "subjobs" in subjob_clusters[phase.cluster]: subjob_clusters[phase.cluster]["subjobs"] = [] - + # Append subjobs to list. subjob_clusters[phase.cluster]["subjobs"].append(phase.name) - + # Then, complete the resource information per cluster # determine whether a cluster is to be submitted to a batch system for subjob_cluster in subjob_clusters: @@ -322,7 +393,7 @@ def complete_clusters(config): sys.exit(-1) nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) - + # if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): # subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" # elif subjob_clusters[subjob_cluster].get("script", False): @@ -340,19 +411,19 @@ def complete_clusters(config): subjob_clusters[subjob_cluster]["nproc"] = nproc return subjob_clusters - def prepend_newrun_job(config, subjob_clusters): + def prepend_newrun_job(self, config, subjob_clusters): """ Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') and do not follow a 'SimulationSetup' subjob_clusters. E.g. if two user workflow are the last two subjob_clusters ??? Any other example cases when this is the case? - + Parameters ---------- self : Workflow object config : dict subjob_clusters : dict - + Returns ------- self : Workflow object @@ -361,12 +432,12 @@ def prepend_newrun_job(config, subjob_clusters): first_cluster_name = self.first_task_in_queue first_cluster = subjob_clusters[first_cluster_name] #esm_parser.pprint_config(first_cluster) - + if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup": - + last_cluster_name = self.last_task_in_queue last_cluster = subjob_clusters[last_cluster_name] - + new_first_cluster_name = "newrun" new_first_cluster = { "newrun": { @@ -377,14 +448,14 @@ def prepend_newrun_job(config, subjob_clusters): "batch_or_shell": "SimulationSetup", } } - + last_cluster["next_submit"].append("newrun") last_cluster["next_submit"].remove(first_cluster_name) - + first_cluster["called_from"] = "newrun" - + self.first_task_in_queue = "newrun" - + new_subjob = { "newrun_general": { "nproc": 1, @@ -394,7 +465,7 @@ def prepend_newrun_job(config, subjob_clusters): "subjob_cluster": "newrun", } } - + subjob_clusters.update(new_first_cluster) # TODO: add new phase to workflow??? @@ -483,6 +554,25 @@ def __init__(self, phase_name): self.batch_or_shell = "shell" self.submit_to_batch_system = False + def check_if_keyword_is_valid(self, keyword): + """ + Checks if the key given for a user workflow is valie + + Parameters + ---------- + keyword : str + + Returns + ------- + true or false + """ + + if hasattr(self, keyword): + return True + else: + return False + + def assemble_workflow(config): from . import Workflow """ @@ -530,11 +620,11 @@ def assemble_workflow(config): # 4. Order user workflows into default workflow wrt. workflow attributs. workflow = workflow.order_clusters(config) + breakpoint() subjob_clusters = workflow.complete_clusters(config) subjob_clusters = workflow.prepend_newrun_job(config) # 5. write the workflow to config config = workflow.write_to_config(config) - breakpoint() # 6. Remove old worklow from config # Set "jobtype" for the first task??? @@ -552,6 +642,12 @@ def write_subjob_clusters_to_config(config, subjob_clusters): config["general"]["subjob_clusters"] = subjob_clusters return config +def set_workflow_attrib(workflow, attrib, value): + if type(getattr(workflow, attrib)).__name__ == "list": + workflow.__dict__[attrib].append(value) + else: + workflow.__setattr__(attrib, value) + def set_phase_attrib(workflow_phases, phase_name, attrib, value): for phase in workflow_phases: if phase.name == phase_name: @@ -564,7 +660,7 @@ def get_phase_attrib(workflow_phases, phase_name, attrib): for phase in workflow_phases: if phase.name == phase_name: value = getattr(phase, attrib) - return value + return value def calc_number_of_tasks(config): @@ -688,7 +784,7 @@ def collect_all_workflow_information(config): # checks if next_run:triggered_by is tidy or the one in user workflow, or empty? if "next_run_triggered_by" in w_config: if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]: - print(f"Mismatch found setting next_run_triggered_by for workflow.") + print("Mismatch found setting next_run_triggered_by for workflow.") sys.exit(-1) else: gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"] From 2cd0da6151319cf4a3198ba6107266a7a3be926a Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 3 Nov 2023 11:42:38 +0100 Subject: [PATCH 19/98] Further changes for workflow manager. --- src/esm_runscripts/batch_system.py | 4 +- src/esm_runscripts/workflow.py | 91 ++++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 25 deletions(-) diff --git a/src/esm_runscripts/batch_system.py b/src/esm_runscripts/batch_system.py index a8faec98e..4f8461168 100644 --- a/src/esm_runscripts/batch_system.py +++ b/src/esm_runscripts/batch_system.py @@ -374,6 +374,7 @@ def get_run_commands(config, subjob, batch_or_shell): # here or in compute.py? commands = [] if subjob.startswith("compute"): + # for batch jobs if config["general"].get("submit_to_batch_system", True): batch_system = config["computer"] if "execution_command" in batch_system: @@ -384,6 +385,7 @@ def get_run_commands(config, subjob, batch_or_shell): # here or in compute.py? ) if config["general"].get("multi_srun"): return self.bs.get_run_commands_multisrun(config, commands) + # for shell scrips else: for model in config: if model == "computer": @@ -516,7 +518,7 @@ def write_simple_runscript(config, cluster, batch_or_shell="batch"): # dummy = 0 else: # "normal" case dummy = 0 - +# was macht das hier? wo/wie wird submits_abother_job definiert? if submits_another_job(config, cluster): # and batch_or_shell == "batch": # -j ? is that used somewhere? I don't think so, replaced by workflow # " -j "+ config["general"]["jobtype"] diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index d35b2e8c3..e20a84fc9 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,6 +1,8 @@ import sys, copy import esm_parser +from pprint import pprint + import pdb class Workflow: @@ -172,7 +174,7 @@ def collect_all_user_workflows(self,config): if not new_phase_name in user_workflow_phases_names: # and append it to the list of user phases of the workflow user_workflow_phases_names.append(new_phase_name) - # set attributes of user_workflow phases + # set attributes of user_workflow phases from config settings # check if valid workflow phase keywords for key, value in w_config["phases"][phase].items(): if new_phase.check_if_keyword_is_valid(key): @@ -191,15 +193,37 @@ def write_to_config(self, config): """ Write to config. """ + cluster_att = [] + for att in dir(self.phases[0]): + if(att[:2] != "__"): + cluster_att.append(att) + # 1. Delete unnecessary config workflow entries (e.g. in general) + if "workflow" in config["general"]: + del config["general"]["workflow"] + # It is assumed here, that there are no workflows in config["general"] # or that these are removed after collect_... config["general"]["workflow"] = {} config["general"]["workflow"].update(self.__dict__) + # 3. Write clusters + config["general"]["workflow"]["subjob_clusters"] = {} + for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): + config["general"]["workflow"]["subjob_clusters"][cluster] = {} + config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = [] + for phase in self.phases + self.user_phases: + if phase.cluster == cluster: + config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase.name) + for att in cluster_att: + config["general"]["workflow"]["subjob_clusters"][cluster][att] = getattr(phase, att) + # 2. Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} - for phase in self.phases: + for phase in self.phases+self.user_phases: temp_dict = {phase.name: phase.__dict__} config["general"]["workflow"]["subjobs"].update(temp_dict) + # Todo: delete phases and user_phases + del config["general"]["workflow"]["phases"] + del config["general"]["workflow"]["user_phases"] return config def check_user_workflow_dependency(self): @@ -306,6 +330,8 @@ def order_clusters(self, config): ##else: let default # Set "next_submit" and "called_from" + # "next_submit" which phase will be called next (run_after of the next phase) + # "called_from" name of previous phase, run_after of current phase # Create a dict of all phases with empty lists next_submits = {} for phase in self.phases + self.user_phases: @@ -319,32 +345,51 @@ def order_clusters(self, config): for phase3 in self.phases + self.user_phases: phase3.next_submit = next_submits[phase3.name] - print(self.last_task_in_queue) - # ich bin hier - breakpoint() +# for phase6 in self.phases + self.user_phases: +# print(phase6.name, phase6.run_after, phase6.called_from, phase6.next_submit) + + +# assign user phases to a cluster (tbd) + # - if all phases have the same run_after and run_before they can be in the cluster + # - in this cluster they will be run in parallel? + for phase4 in self.phases + self.user_phases: calling_cluster = phase4.run_after - # + + # set last_task_in_queue if calling_cluster == self.last_task_in_queue: - self.last_task_in_queue = phase4.name - # + self.last_task_in_queue.append(phase4.name) + called_cluster = phase4.run_before +# print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}") set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name) + + # set first_task_in_queue if called_cluster == self.first_task_in_queue: - self.first_task_in_queue = phase4.name + self.first_task_in_queue.append(phase4.name) + + # set empty cluster entries to phase name if phase4.cluster == None: phase4.cluster = phase4.name - # - first_cluster_name = self.first_task_in_queue - last_cluster_name = self.last_task_in_queue - # - value = get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit") + +# todo: check if num list > 1, is this possible ??? + first_cluster_name = self.first_task_in_queue[0] + last_cluster_name = self.last_task_in_queue[0] + + # if first_cluster_name is not next_submit of last_cluster_name if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name) + # if last_cluster_name is not called_from of first_cluster_name if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"): set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name) - # + +# for i in range(len(self.phases)): +# pprint(self.phases[i].__dict__) +# +# for i in range(len(self.user_phases)): +# pprint(self.user_phases[i].__dict__) + return self def complete_clusters(self, config): @@ -528,7 +573,7 @@ def __init__(self, phase_name): self.nproc = 1 self.run_before = None self.run_after = None - self.submit_to_batch_system = True + self.submit_to_batch_system = False self.run_on_queue = None self.cluster = None self.next_submit = [] @@ -554,6 +599,7 @@ def __init__(self, phase_name): self.batch_or_shell = "shell" self.submit_to_batch_system = False + def check_if_keyword_is_valid(self, keyword): """ Checks if the key given for a user workflow is valie @@ -613,16 +659,13 @@ def assemble_workflow(config): workflow = workflow.init_default_workflow(config) # 3. Read in workflows from runscript and config files workflow = workflow.collect_all_user_workflows(config) - #config = collect_all_workflow_information(config) - -# Why do I need to do the following function call? # 4. Order user workflows into default workflow wrt. workflow attributs. workflow = workflow.order_clusters(config) - breakpoint() - subjob_clusters = workflow.complete_clusters(config) - subjob_clusters = workflow.prepend_newrun_job(config) +# What is the next functions needed for? +# subjob_clusters = workflow.complete_clusters(config) +# subjob_clusters = workflow.prepend_newrun_job(config) # 5. write the workflow to config config = workflow.write_to_config(config) # 6. Remove old worklow from config @@ -631,10 +674,10 @@ def assemble_workflow(config): if config["general"]["jobtype"] == "unknown": config["general"]["command_line_config"]["jobtype"] = config["general"][ "workflow" - ]["first_task_in_queue"] + ]["first_task_in_queue"][0] config["general"]["jobtype"] = config["general"]["workflow"][ "first_task_in_queue" - ] + ][0] # todo: this needs to be a string, not a list return config From 273175f4db21ef0844d1bee8e5b5256ad67ce313 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 10 Nov 2023 12:45:30 +0100 Subject: [PATCH 20/98] Added prepend_newrun, skip_cluster, fixed next_submit entries. --- src/esm_runscripts/workflow.py | 550 +++++++++++++++++---------------- 1 file changed, 280 insertions(+), 270 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index e20a84fc9..f94030a33 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -25,9 +25,9 @@ def __init__(self, phases, always_run_with=[]): """ self.default_phases = [] self.user_phases = [] - self.first_task_in_queue = [] - self.last_task_in_queue = [] - self.next_run_triggered_by = [] + self.first_task_in_queue = None + self.last_task_in_queue = None + self.next_run_triggered_by = None # TODO: Call here the phase object ??? self.phases = phases self.always_run_with = always_run_with @@ -39,6 +39,23 @@ def num_phases(self): """ return len(self.phases) + def get_workflow_phase_by_name(self, phase_name): + """ + Returns phase of phase_name + + Arguments + --------- + self : class Workflow + phase_name : str (name of the phase to be returned + + Returns + ------- + phase : class phase or user_phase + """ + for phase in self.phases + self.user_phases: + if phase.name == phase_name: + return phase + def get_phases_attribs_list(self, phase_type, attrib): """ Return the names of all phases as list. @@ -90,14 +107,19 @@ def init_default_workflow(self, config): if ind < self.num_phases - 1: # Set run_before attrib of all phases (except last on) to the next phase name phase.run_before = self.phases[ind+1].name + phase.next_submit.append(self.phases[ind+1].name) + phase.run_after = self.phases[ind-1].name else: # Set run_after attrib of last phase to previous phase name + phase.run_before = self.phases[0].name + phase.next_submit.append(self.phases[0].name) phase.run_after = self.phases[ind-1].name # TODO: this needs to be set somewhere else, or different. phase.cluster = phase.name if phase.name == "compute": phase.nproc = tasks + phase.batch_or_shell = 'batch' phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True) phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] @@ -308,9 +330,8 @@ def order_clusters(self, config): if not user_phase.run_before and not user_phase.run_after: esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.") # Check if not both run_after and run_before are set at the same time for each user phase - if user_phase.run_before and user_phase.run_after: - esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.") - +# if user_phase.run_before and user_phase.run_after: +# esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.") # Correct for ``last_task_in_queue`` if necessary # Collect all next_run_triggered_by entries next_triggered = self.next_run_triggered_by @@ -345,10 +366,6 @@ def order_clusters(self, config): for phase3 in self.phases + self.user_phases: phase3.next_submit = next_submits[phase3.name] -# for phase6 in self.phases + self.user_phases: -# print(phase6.name, phase6.run_after, phase6.called_from, phase6.next_submit) - - # assign user phases to a cluster (tbd) # - if all phases have the same run_after and run_before they can be in the cluster # - in this cluster they will be run in parallel? @@ -357,25 +374,28 @@ def order_clusters(self, config): for phase4 in self.phases + self.user_phases: calling_cluster = phase4.run_after - # set last_task_in_queue - if calling_cluster == self.last_task_in_queue: - self.last_task_in_queue.append(phase4.name) - - called_cluster = phase4.run_before -# print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}") - set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name) - - # set first_task_in_queue - if called_cluster == self.first_task_in_queue: - self.first_task_in_queue.append(phase4.name) +# brauch ich das hier noch??? +# # set last_task_in_queue +# if calling_cluster == self.last_task_in_queue: +# #self.last_task_in_queue.append(phase4.name) +# self.last_task_in_queue = phase4.name +# +# called_cluster = phase4.run_before +## print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}") +# set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name) +# +# # set first_task_in_queue +# if called_cluster == self.first_task_in_queue: +## self.first_task_in_queue.append(phase4.name) +# self.first_task_in_queue = phase4.name # set empty cluster entries to phase name if phase4.cluster == None: phase4.cluster = phase4.name # todo: check if num list > 1, is this possible ??? - first_cluster_name = self.first_task_in_queue[0] - last_cluster_name = self.last_task_in_queue[0] + first_cluster_name = self.first_task_in_queue + last_cluster_name = self.last_task_in_queue # if first_cluster_name is not next_submit of last_cluster_name if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): @@ -384,12 +404,6 @@ def order_clusters(self, config): if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"): set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name) -# for i in range(len(self.phases)): -# pprint(self.phases[i].__dict__) -# -# for i in range(len(self.user_phases)): -# pprint(self.user_phases[i].__dict__) - return self def complete_clusters(self, config): @@ -456,114 +470,103 @@ def complete_clusters(self, config): subjob_clusters[subjob_cluster]["nproc"] = nproc return subjob_clusters - def prepend_newrun_job(self, config, subjob_clusters): + def prepend_newrun_job(self, config): """ + - Creates a new cluster "newrun" if first_task_in_queue is not of + type 'SimulationSetup' + - Why is this needed? So that every first task is a SimulationSetup to init a config object??? + Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') - and do not follow a 'SimulationSetup' subjob_clusters. - E.g. if two user workflow are the last two subjob_clusters ??? - Any other example cases when this is the case? + and are not of type 'SimulationSetup'. Parameters ---------- self : Workflow object config : dict - subjob_clusters : dict Returns ------- self : Workflow object - subjob_clusters """ - first_cluster_name = self.first_task_in_queue - first_cluster = subjob_clusters[first_cluster_name] - #esm_parser.pprint_config(first_cluster) + first_task_name = self.first_task_in_queue + first_phase = self.get_workflow_phase_by_name(first_task_name) - if not first_cluster.get("batch_or_shell", "Error") == "SimulationSetup": + if not get_phase_attrib(first_phase, first_phase.name, "batch_or_shell") == "SimulationSetup": - last_cluster_name = self.last_task_in_queue - last_cluster = subjob_clusters[last_cluster_name] + last_task_name = self.last_task_in_queue + last_phase = self.get_workflow_phase_by_name(last_task_name) - new_first_cluster_name = "newrun" - new_first_cluster = { - "newrun": { - "called_from": last_cluster_name, - "run_before": first_cluster_name, - "next_submit": [first_cluster_name], - "subjobs": ["newrun_general"], - "batch_or_shell": "SimulationSetup", - } - } + new_first_phase_name = "newrun_general" + # Create new default phase object + new_first_phase = WorkflowPhase(new_first_phase_name) + set_phase_attrib(new_first_phase, new_first_phase_name, "called_from", last_task_name) + set_phase_attrib(new_first_phase, new_first_phase_name, "run_before", first_task_name) + set_phase_attrib(new_first_phase, new_first_phase_name, "next_submit", first_task_name) + set_phase_attrib(new_first_phase, new_first_phase_name, "cluster", "newrun") + set_phase_attrib(new_first_phase, new_first_phase_name, "batch_or_shell", "SimulationSetup") + set_phase_attrib(new_first_phase, new_first_phase_name, "nproc", 1) - last_cluster["next_submit"].append("newrun") - last_cluster["next_submit"].remove(first_cluster_name) + # reset last_task attributes + set_phase_attrib(last_phase, last_phase.name, "next_submit", new_first_phase_name) + last_phase.next_submit.remove(first_task_name) - first_cluster["called_from"] = "newrun" + # reset first_task attributes + first_phase.called_from = new_first_phase_name - self.first_task_in_queue = "newrun" + # reset workflow attributes + self.first_task_in_queue = new_first_phase_name - new_subjob = { - "newrun_general": { - "nproc": 1, - "called_from": last_cluster_name, - "run_before": first_cluster_name, - "next_submit": [first_cluster_name], - "subjob_cluster": "newrun", - } - } + # Set new phase to beginning of default phase list + self.phases.insert(0, new_first_phase) - subjob_clusters.update(new_first_cluster) + return self -# TODO: add new phase to workflow??? - #gw_config["subjobs"].update(new_subjob) - return [self, subjob_clusters] +def skip_cluster(cluster, config): + """ + Checks if a phase/cluster can be skipped. + Needed keywords: run_only, skip_chunk_number + Parameters + ---------- + self + config : dict - def skip_cluster(self, config): - """ - Checks if a phase/cluster can be skipped. - Needed keywords: run_only, skip_chunk_number + Returns + ------- + True or False + """ + gw_config = config["general"]["workflow"] + clusterconf = gw_config["subjob_clusters"][cluster] - Parameters - ---------- - self - config : dict + """ + print(f"run_only {clusterconf.get('run_only', 'Error') }") + print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}") + print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}") + print(f"chunk_number {config['general'].get('chunk_number', -998)}") + print(f"run_number {config['general'].get('run_number', -998)}") + print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}") + print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}") + """ - Returns - ------- - True or False - """ - #gw_config = config["general"]["workflow"] - #clusterconf = gw_config["subjob_clusters"][cluster] - - #""" - #print(f"run_only {clusterconf.get('run_only', 'Error') }") - #print(f"skip_chunk_number {clusterconf.get('skip_chunk_number', -999)}") - #print(f"skip_run_number {clusterconf.get('skip_run_number', -999)}") - #print(f"chunk_number {config['general'].get('chunk_number', -998)}") - #print(f"run_number {config['general'].get('run_number', -998)}") - #print(f"last_run_in_chunk {config['general']['last_run_in_chunk']}") - #print(f"first_run_in_chunk {config['general']['first_run_in_chunk']}") - #""" - - #if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[ - # "general" - #].get("last_run_in_chunk", False): - # return True - #if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[ - # "general" - #].get("first_run_in_chunk", False): - # return True - #if clusterconf.get("skip_chunk_number", -999) == config["general"].get( - # "chunk_number", -998 - #): - # return True - #if clusterconf.get("skip_run_number", -999) == config["general"].get( - # "run_number", -998 - #): - # return True - - return False + if clusterconf.get("run_only", "Error") == "last_run_in_chunk" and not config[ + "general" + ].get("last_run_in_chunk", False): + return True + if clusterconf.get("run_only", "Error") == "first_run_in_chunk" and not config[ + "general" + ].get("first_run_in_chunk", False): + return True + if clusterconf.get("skip_chunk_number", -999) == config["general"].get( + "chunk_number", -998 + ): + return True + if clusterconf.get("skip_run_number", -999) == config["general"].get( + "run_number", -998 + ): + return True + + return False class WorkflowPhase: """A workflow phase class.""" @@ -585,6 +588,7 @@ def __init__(self, phase_name): self.skip_run_number = None self.name = phase_name + class UserWorkflowPhase(WorkflowPhase): """A user workflow phase class.""" @@ -663,21 +667,23 @@ def assemble_workflow(config): # 4. Order user workflows into default workflow wrt. workflow attributs. workflow = workflow.order_clusters(config) -# What is the next functions needed for? -# subjob_clusters = workflow.complete_clusters(config) -# subjob_clusters = workflow.prepend_newrun_job(config) - # 5. write the workflow to config + # What is the next functions needed for? + # subjob_clusters = workflow.complete_clusters(config) + + # 5. create new first phase of type SimulationSetup, if first_task_in_queue is user phase (type batch or shell) + workflow = workflow.prepend_newrun_job(config) + # 6. write the workflow to config config = workflow.write_to_config(config) - # 6. Remove old worklow from config + # 7. Remove old worklow from config # Set "jobtype" for the first task??? if config["general"]["jobtype"] == "unknown": config["general"]["command_line_config"]["jobtype"] = config["general"][ "workflow" - ]["first_task_in_queue"][0] + ]["first_task_in_queue"] config["general"]["jobtype"] = config["general"]["workflow"][ "first_task_in_queue" - ][0] # todo: this needs to be a string, not a list + ] return config @@ -692,6 +698,8 @@ def set_workflow_attrib(workflow, attrib, value): workflow.__setattr__(attrib, value) def set_phase_attrib(workflow_phases, phase_name, attrib, value): + if not type(workflow_phases) is list: + workflow_phases = [workflow_phases] for phase in workflow_phases: if phase.name == phase_name: if type(getattr(phase, attrib)).__name__ == "list": @@ -700,6 +708,8 @@ def set_phase_attrib(workflow_phases, phase_name, attrib, value): phase.__setattr__(attrib, value) def get_phase_attrib(workflow_phases, phase_name, attrib): + if not type(workflow_phases) is list: + workflow_phases = [workflow_phases] for phase in workflow_phases: if phase.name == phase_name: value = getattr(phase, attrib) @@ -734,155 +744,155 @@ def calc_number_of_tasks(config): ################### Maybe outdated routines ###################### -def display_nicely(config): - """ - Pretty prints the workflow configuration assembled in config["general"]. - - Parameters - ---------- - config : dict - - Returns - ------- - config : dict - """ - esm_parser.pprint_config(config["general"]["workflow"]) - return config - - -def collect_all_workflow_information(config): - """ - Collects all workflow information for each component entry in config - (can be a model/component or a new entry (e.g. 'flows') - NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not? - - Checks if there are "workflow" entries in the user runscript and copies or merges them into - config["general"]["workflow"] - - Parameters - ---------- - config : dict - - Returns - ------- - config : dict - """ - for model in config: - if "workflow" in config[model]: - # looks for "workflow" in each entry of config (can be model/component, general, etc.) - w_config = config[model]["workflow"] - # looks for "workflow" in "general" section of config. - gw_config = config["general"]["workflow"] - - # looks for entry 'subjob_clusters' in config of each component that has a "workflow" - if "subjob_clusters" in w_config: - for cluster in w_config["subjob_clusters"]: - # if a certain cluster is also in the general config, this cluster will be merged together ... - # what cluster could this be? - if cluster in gw_config["subjob_clusters"]: - gw_config["subjob_clusters"][cluster] = merge_if_possible( - w_config["subjob_clusters"][cluster], - gw_config["subjob_clusters"][cluster], - ) - # if cluster is not in general config, it will copied into it. - else: - gw_config["subjob_clusters"][cluster] = copy.deepcopy( - w_config["subjob_clusters"][cluster], - ) - - # looks for entry 'subjobs' in config of each component - if "subjobs" in w_config: - # copies component workflow config to new variable ref_config - ref_config = copy.deepcopy(w_config) - # ??? for every subjob in ??? - for subjob in list(copy.deepcopy(w_config["subjobs"])): - - # subjobs (other than clusters) should be model specific - # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry. - # appends the model name to the subjob name and copy it to config["general"] - gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy( - w_config["subjobs"][subjob] - ) - # if this copied subjobs is also n general workflow subjobs it will be deleted there - if subjob in gw_config["subjobs"]: - del gw_config["subjobs"][subjob] - - # make sure that the run_after and run_before refer to that cluster - # for all subjobs now in general workflow - for other_subjob in gw_config["subjobs"]: - # sets run_after and run_before to correct subjob??? - # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model) - # this run_after will be set to the new subjob name (subjob_model) - if "run_after" in gw_config["subjobs"][other_subjob]: - if (gw_config["subjobs"][other_subjob]["run_after"] == subjob): - gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model - if "run_before" in gw_config["subjobs"][other_subjob]: - if (gw_config["subjobs"][other_subjob]["run_before"] == subjob): - gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model - - # if not in another cluster, each subjob gets its own - if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]): - gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob # + "_" + model - - # checks if next_run:triggered_by is tidy or the one in user workflow, or empty? - if "next_run_triggered_by" in w_config: - if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]: - print("Mismatch found setting next_run_triggered_by for workflow.") - sys.exit(-1) - else: - gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"] - # what if w_config["next_run_triggered_by"] is empty? - - return config - -def merge_single_entry_if_possible(entry, sourceconf, targetconf): - """ - Merges a dictionary entry into a target dictionary that has he same key. - - Parameters - ---------- - entry : str - dictionary key - sourceconf : dict - targetconf : dict - - Returns - ------- - targetconf : dict - """ - if entry in sourceconf: - # Check if entry is already in targetconf AND different to sourceconf, then exit - if entry in targetconf and not sourceconf[entry] == targetconf[entry]: - print(f"Mismatch found in {entry} for cluster {targetconf}") - sys.exit(-1) - # Continues here if entry exists already in targetconf AND the same as sourceconf or - # not already in targetconf and set it to sourceconf - targetconf[entry] = sourceconf[entry] - return targetconf - -def merge_if_possible(source, target): - """ - Does the same as above but for a whole dict - - Merges the entries of source dictionary into target dictionary, if not already in. - (Will not overwrite entries in target dictionary.) - - Parameters - ---------- - source : dict - target : dict - - Returns - ------- - target : dict - """ - for entry in source: - if entry in target: - if not source[entry] == target[entry]: - print( - f"Mismatch while trying to merge subjob_clusters {source} into {target}" - ) - sys.exit(-1) - else: - target[entry] = source[entry] - return target +#def display_nicely(config): +# """ +# Pretty prints the workflow configuration assembled in config["general"]. +# +# Parameters +# ---------- +# config : dict +# +# Returns +# ------- +# config : dict +# """ +# esm_parser.pprint_config(config["general"]["workflow"]) +# return config +# +# +#def collect_all_workflow_information(config): +# """ +# Collects all workflow information for each component entry in config +# (can be a model/component or a new entry (e.g. 'flows') +# NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not? +# +# Checks if there are "workflow" entries in the user runscript and copies or merges them into +# config["general"]["workflow"] +# +# Parameters +# ---------- +# config : dict +# +# Returns +# ------- +# config : dict +# """ +# for model in config: +# if "workflow" in config[model]: +# # looks for "workflow" in each entry of config (can be model/component, general, etc.) +# w_config = config[model]["workflow"] +# # looks for "workflow" in "general" section of config. +# gw_config = config["general"]["workflow"] +# +# # looks for entry 'subjob_clusters' in config of each component that has a "workflow" +# if "subjob_clusters" in w_config: +# for cluster in w_config["subjob_clusters"]: +# # if a certain cluster is also in the general config, this cluster will be merged together ... +# # what cluster could this be? +# if cluster in gw_config["subjob_clusters"]: +# gw_config["subjob_clusters"][cluster] = merge_if_possible( +# w_config["subjob_clusters"][cluster], +# gw_config["subjob_clusters"][cluster], +# ) +# # if cluster is not in general config, it will copied into it. +# else: +# gw_config["subjob_clusters"][cluster] = copy.deepcopy( +# w_config["subjob_clusters"][cluster], +# ) +# +# # looks for entry 'subjobs' in config of each component +# if "subjobs" in w_config: +# # copies component workflow config to new variable ref_config +# ref_config = copy.deepcopy(w_config) +# # ??? for every subjob in ??? +# for subjob in list(copy.deepcopy(w_config["subjobs"])): +# +# # subjobs (other than clusters) should be model specific +# # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry. +# # appends the model name to the subjob name and copy it to config["general"] +# gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy( +# w_config["subjobs"][subjob] +# ) +# # if this copied subjobs is also n general workflow subjobs it will be deleted there +# if subjob in gw_config["subjobs"]: +# del gw_config["subjobs"][subjob] +# +# # make sure that the run_after and run_before refer to that cluster +# # for all subjobs now in general workflow +# for other_subjob in gw_config["subjobs"]: +# # sets run_after and run_before to correct subjob??? +# # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model) +# # this run_after will be set to the new subjob name (subjob_model) +# if "run_after" in gw_config["subjobs"][other_subjob]: +# if (gw_config["subjobs"][other_subjob]["run_after"] == subjob): +# gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model +# if "run_before" in gw_config["subjobs"][other_subjob]: +# if (gw_config["subjobs"][other_subjob]["run_before"] == subjob): +# gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model +# +# # if not in another cluster, each subjob gets its own +# if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]): +# gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob # + "_" + model +# +# # checks if next_run:triggered_by is tidy or the one in user workflow, or empty? +# if "next_run_triggered_by" in w_config: +# if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]: +# print("Mismatch found setting next_run_triggered_by for workflow.") +# sys.exit(-1) +# else: +# gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"] +# # what if w_config["next_run_triggered_by"] is empty? +# +# return config +# +#def merge_single_entry_if_possible(entry, sourceconf, targetconf): +# """ +# Merges a dictionary entry into a target dictionary that has he same key. +# +# Parameters +# ---------- +# entry : str +# dictionary key +# sourceconf : dict +# targetconf : dict +# +# Returns +# ------- +# targetconf : dict +# """ +# if entry in sourceconf: +# # Check if entry is already in targetconf AND different to sourceconf, then exit +# if entry in targetconf and not sourceconf[entry] == targetconf[entry]: +# print(f"Mismatch found in {entry} for cluster {targetconf}") +# sys.exit(-1) +# # Continues here if entry exists already in targetconf AND the same as sourceconf or +# # not already in targetconf and set it to sourceconf +# targetconf[entry] = sourceconf[entry] +# return targetconf +# +#def merge_if_possible(source, target): +# """ +# Does the same as above but for a whole dict +# +# Merges the entries of source dictionary into target dictionary, if not already in. +# (Will not overwrite entries in target dictionary.) +# +# Parameters +# ---------- +# source : dict +# target : dict +# +# Returns +# ------- +# target : dict +# """ +# for entry in source: +# if entry in target: +# if not source[entry] == target[entry]: +# print( +# f"Mismatch while trying to merge subjob_clusters {source} into {target}" +# ) +# sys.exit(-1) +# else: +# target[entry] = source[entry] +# return target From 3c72864d2edea36d70f59d42deafdecd095775ea Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 13 Nov 2023 11:29:40 +0100 Subject: [PATCH 21/98] Reactivated function display_nicely. --- src/esm_runscripts/workflow.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index f94030a33..3b1af7b4a 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -736,28 +736,23 @@ def calc_number_of_tasks(config): tasks += config[model]["nprocar"] * config[model]["nprocbr"] return tasks +def display_nicely(config): + """ + Pretty prints the workflow configuration assembled in config["general"]. + Is called by e.g. ``esm_runscripts runscript.yaml -e -i workflow`` + Parameters + ---------- + config : dict - - - + Returns + ------- + config : dict + """ + esm_parser.pprint_config(config["general"]["workflow"]) + return config ################### Maybe outdated routines ###################### - -#def display_nicely(config): -# """ -# Pretty prints the workflow configuration assembled in config["general"]. -# -# Parameters -# ---------- -# config : dict -# -# Returns -# ------- -# config : dict -# """ -# esm_parser.pprint_config(config["general"]["workflow"]) -# return config # # #def collect_all_workflow_information(config): From 547edb09f7524083bec0cbaeca111e71c54733b3 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 14 Nov 2023 12:05:45 +0100 Subject: [PATCH 22/98] Added some flake8 style optimization. --- src/esm_runscripts/workflow.py | 247 +++++++++++++++++++++++---------- 1 file changed, 177 insertions(+), 70 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 3b1af7b4a..086cf64a9 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,9 +1,11 @@ -import sys, copy +import sys +import copy import esm_parser -from pprint import pprint +# from pprint import pprint + +# import pdb -import pdb class Workflow: """A workflow class.""" @@ -105,7 +107,8 @@ def init_default_workflow(self, config): for ind, phase in enumerate(self.phases): if ind < self.num_phases - 1: - # Set run_before attrib of all phases (except last on) to the next phase name + # Set run_before attrib of all phases (except last on) + # to the next phase name phase.run_before = self.phases[ind+1].name phase.next_submit.append(self.phases[ind+1].name) phase.run_after = self.phases[ind-1].name @@ -120,15 +123,16 @@ def init_default_workflow(self, config): if phase.name == "compute": phase.nproc = tasks phase.batch_or_shell = 'batch' - phase.submit_to_batch_system = config["general"].get("submit_to_batch_system", True) + phase.submit_to_batch_system = config["general"].get( + "submit_to_batch_system", True) phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] # Set default workflow values - set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name) # prepcompute - set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name) # tidy + set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name) + set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name) # next_run_triggered_by only used to set last_task_in_queue # TODO: why not set last_task_in_queue directly? - set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name) # tidy + set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name) return self @@ -150,7 +154,7 @@ def check_if_keyword_is_valid(self, keyword): else: return False - def collect_all_user_workflows(self,config): + def collect_all_user_workflows(self, config): """ Collect all workflows set by config files. @@ -169,7 +173,8 @@ def collect_all_user_workflows(self,config): for model in config: if "workflow" in config[model]: w_config = config[model]["workflow"] - #if "subjobs" in w_config: + # if "subjobs" in w_config: + # breakpoint() if "phases" in w_config: # copies component workflow config to new variable ref_config ref_config = copy.deepcopy(w_config) @@ -183,30 +188,56 @@ def collect_all_user_workflows(self,config): if not key == "phases": set_workflow_attrib(self, key, value) else: - esm_parser.user_error("ERROR", f"``{key}`` is not a valid keyword of a workflow.") - #for subjob in list(copy.deepcopy(w_config["subjobs"])): + err_msg = f"``{key}`` is not a valid keyword of a workflow." + esm_parser.user_error("ERROR", err_msg) + # for subjob in list(copy.deepcopy(w_config["subjobs"])): for phase in list(copy.deepcopy(w_config["phases"])): new_phase_name = phase # create a new user phase object for ``phase`` new_phase = UserWorkflowPhase(new_phase_name) if phase in self.get_phases_attribs_list("default", "name"): - esm_parser.user_error("ERROR", f"The user phase ``{new_phase_name}`` has the same name as a default workflow phase. This is not allowed.") + err_msg = ( + f"The user phase ``{new_phase_name}`` " + f"has the same name as a default workflow phase. " + f"This is not allowed." + ) + esm_parser.user_error("ERROR", err_msg) # each subjob needs to have an unique name # check if the name of the new user phase does not already exist - if not new_phase_name in user_workflow_phases_names: + if new_phase_name not in user_workflow_phases_names: # and append it to the list of user phases of the workflow user_workflow_phases_names.append(new_phase_name) - # set attributes of user_workflow phases from config settings + # set attributes of user_workflow phases from + # config settings # check if valid workflow phase keywords for key, value in w_config["phases"][phase].items(): if new_phase.check_if_keyword_is_valid(key): - set_phase_attrib([new_phase], new_phase_name, key, value) + set_phase_attrib( + [new_phase], new_phase_name, key, value + ) # new_phase.__setattr__(key, value) else: - esm_parser.user_error("ERROR", f"``{key}`` of workflow phase ``{new_phase_name}`` is not a valid keyword of a workflow phase.") + err_msg = ( + f"``{key}`` of workflow phase " + f"``{new_phase_name}`` is not a valid keyword " + f"of a workflow phase." + ) + esm_parser.user_error("ERROR", err_msg) + if new_phase.submit_to_batch_system and new_phase.batch_or_shell == "shell": + err_msg = ( + f"Inconsistence attributes for keywords " + f"``submit_to_batch_system`` and " + f"``batch_or_shell`` for phase " + f"``{new_phase.name}``." + ) + esm_parser.user_error("ERROR", err_msg) user_workflow_phases.append(new_phase) else: - esm_parser.user_error("ERROR", f"Two workflow phases have the same name {new_phase_name}.") + err_msg = ( + f"Two workflow phases have the same name " + f"{new_phase_name}." + ) + esm_parser.user_error("ERROR", err_msg) self.user_phases = user_workflow_phases return self @@ -217,7 +248,7 @@ def write_to_config(self, config): """ cluster_att = [] for att in dir(self.phases[0]): - if(att[:2] != "__"): + if (att[:2] != "__"): cluster_att.append(att) # 1. Delete unnecessary config workflow entries (e.g. in general) if "workflow" in config["general"]: @@ -250,7 +281,8 @@ def write_to_config(self, config): def check_user_workflow_dependency(self): """ - Check whether the user defined workflow phases are independent from each other or not. + Check whether the user defined workflow phases are independent + from each other or not. Arguments --------- @@ -261,12 +293,13 @@ def check_user_workflow_dependency(self): independent : bool (default: False) """ independent = False - user_phases_names = self.get_phases_attribs_list('user','name') - run_after_list = self.get_phases_attribs_list('user','run_after') - run_before_list = self.get_phases_attribs_list('user','run_before') + user_phases_names = self.get_phases_attribs_list('user', 'name') + run_after_list = self.get_phases_attribs_list('user', 'run_after') + run_before_list = self.get_phases_attribs_list('user', 'run_before') # All user phases are independent from each other, if - # none of the ``user_phases_names`` are found in the union of ``run_before_list`` and ``run_after_list`` + # none of the ``user_phases_names`` are found in the union of + # ``run_before_list`` and ``run_after_list`` # That means alls user phases can be run independent from each other. if not set(user_phases_names).intersection(set(run_after_list).union(set(run_before_list))): independent = True @@ -289,17 +322,20 @@ def check_unknown_phases(self): unknown_phases : set """ unknown_phases = [] - phases_names = self.get_phases_attribs_list('default','name') - user_phases_names = self.get_phases_attribs_list('user','name') - run_after = self.get_phases_attribs_list('user','run_after') - run_before = self.get_phases_attribs_list('user','run_before') + phases_names = self.get_phases_attribs_list('default', 'name') + user_phases_names = self.get_phases_attribs_list('user', 'name') + run_after = self.get_phases_attribs_list('user', 'run_after') + run_before = self.get_phases_attribs_list('user', 'run_before') # Filter out all elements that are None - # ``filter(None, anylist)`` will filter out all items of anylist, for which ``if item`` is false (e.g. [], "", None, {}, ''). + # ``filter(None, anylist)`` will filter out all items of anylist, + # for which ``if item`` is false (e.g. [], "", None, {}, ''). # See also https://docs.python.org/3/library/functions.html#filter run_after_list = list(filter(None, run_after)) run_before_list = list(filter(None, run_before)) - # Get all phases that are defined as run_after or run_before, but do not exist as user or default phase. - # If unknown_phase is not empty, there is a user_phase that defines run_after or run_before for a not existing phase. + # Get all phases that are defined as run_after or run_before, + # but do not exist as user or default phase. + # If unknown_phase is not empty, there is a user_phase that defines run_after + # or run_before for a not existing phase. unknown_phases = set(run_after_list).union(set(run_before_list)).difference(set(user_phases_names).union(set(phases_names))) return unknown_phases @@ -323,32 +359,47 @@ def order_clusters(self, config): unknown_phases = self.check_unknown_phases() if unknown_phases: unknowns = ', '.join(unknown_phases) - esm_parser.user_error("ERROR", f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` or ``run_after``.") + err_msg = ( + f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` " + f"or ``run_after``." + ) + esm_parser.user_error("ERROR", err_msg) for user_phase in self.user_phases: # Check if run_after or run_before is set for each user phase if not user_phase.run_before and not user_phase.run_after: esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.") - # Check if not both run_after and run_before are set at the same time for each user phase + # Check if not both run_after and run_before are set at the same + # time for each user phase # if user_phase.run_before and user_phase.run_after: -# esm_parser.user_error("ERROR", f"Both run_after and run_before are set. Don't know when to start {user_phase.name}. Please only set run_after or run_before.") +# err_msg = ( +# f"Both run_after and run_before are set. Don't know when " +# f"to start {user_phase.name}. Please only set run_after " +# f"or run_before." +# ) +# esm_parser.user_error("ERROR", err_msg) # Correct for ``last_task_in_queue`` if necessary # Collect all next_run_triggered_by entries next_triggered = self.next_run_triggered_by - run_after = [] - #for model in config: - # if "workflow" in config[model]: - # if "next_run_triggered_by" in config[model]["workflow"]: - # next_triggered.append(config[model]["workflow"]["next_run_triggered_by"]) - - # How needs the next_triggered_by be set??? Which to choose if several workflows are defined? - - #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) - #if len(next_triggered) > 1: - # esm_parser.user_error("ERROR", "Mismatch found setting next_run_triggered_by for workflow.") - #elif len(next_triggered) == 1: + # check if next_triggered is default or user phase + # if user phase + # get last default phase and correct next_submit + # get first default phase and correct run_after, called_from + # correct last_task_in_queue of workflow + if next_triggered not in self.get_phases_attribs_list("default", "name"): + self.phases[-1].next_submit.remove(self.phases[0].name) + self.phases[-1].next_submit.append(next_triggered) + self.phases[0].run_after = next_triggered + self.phases[0].called_from = next_triggered + self.last_task_in_queue = next_triggered + + # next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) + # if len(next_triggered) > 1: + # err_msg = "Mismatch found setting next_run_triggered_by for workflow." + # esm_parser.user_error("ERROR", err_msg) + # elif len(next_triggered) == 1: # self.next_run_triggered_by = next_triggered[0] - ##else: let default + # # else: let default # Set "next_submit" and "called_from" # "next_submit" which phase will be called next (run_after of the next phase) @@ -359,7 +410,7 @@ def order_clusters(self, config): next_submits[phase.name] = [] for phase2 in self.phases + self.user_phases: - if not phase2.run_after == None: + if phase2.run_after is not None: next_submits[phase2.run_after].append(phase2.name) phase2.called_from = phase2.run_after @@ -367,10 +418,10 @@ def order_clusters(self, config): phase3.next_submit = next_submits[phase3.name] # assign user phases to a cluster (tbd) - # - if all phases have the same run_after and run_before they can be in the cluster + # - if all phases have the same run_after and run_before they can be + # in the cluster # - in this cluster they will be run in parallel? - for phase4 in self.phases + self.user_phases: calling_cluster = phase4.run_after @@ -381,16 +432,16 @@ def order_clusters(self, config): # self.last_task_in_queue = phase4.name # # called_cluster = phase4.run_before -## print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}") +# # print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}") # set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name) # # # set first_task_in_queue # if called_cluster == self.first_task_in_queue: -## self.first_task_in_queue.append(phase4.name) +# # self.first_task_in_queue.append(phase4.name) # self.first_task_in_queue = phase4.name # set empty cluster entries to phase name - if phase4.cluster == None: + if phase4.cluster is None: phase4.cluster = phase4.name # todo: check if num list > 1, is this possible ??? @@ -398,7 +449,7 @@ def order_clusters(self, config): last_cluster_name = self.last_task_in_queue # if first_cluster_name is not next_submit of last_cluster_name - if not first_cluster_name in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): + if first_cluster_name not in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name) # if last_cluster_name is not called_from of first_cluster_name if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"): @@ -427,11 +478,11 @@ def complete_clusters(self, config): for phase in self.phases + self.user_phases: # Erstellt ein leeres dict im dict subjob_clusters - if not phase.cluster in subjob_clusters: + if phase.cluster not in subjob_clusters: subjob_clusters[phase.cluster] = {} # Create empty list for each subjob_cluster - if not "subjobs" in subjob_clusters[phase.cluster]: + if "subjobs" not in subjob_clusters[phase.cluster]: subjob_clusters[phase.cluster]["subjobs"] = [] # Append subjobs to list. @@ -458,7 +509,7 @@ def complete_clusters(self, config): # elif subjob_clusters[subjob_cluster].get("script", False): # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" # - if not "run_on_queue" in subjob_clusters[subjob_cluster]: + if "run_on_queue" not in subjob_clusters[subjob_cluster]: print(f"Information on target queue is missing in cluster {subjob_cluster}.") sys.exit(-1) # @@ -474,7 +525,8 @@ def prepend_newrun_job(self, config): """ - Creates a new cluster "newrun" if first_task_in_queue is not of type 'SimulationSetup' - - Why is this needed? So that every first task is a SimulationSetup to init a config object??? + - Why is this needed? So that every first task is a SimulationSetup to init + a config object??? Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') and are not of type 'SimulationSetup'. @@ -568,6 +620,7 @@ def skip_cluster(cluster, config): return False + class WorkflowPhase: """A workflow phase class.""" @@ -603,7 +656,6 @@ def __init__(self, phase_name): self.batch_or_shell = "shell" self.submit_to_batch_system = False - def check_if_keyword_is_valid(self, keyword): """ Checks if the key given for a user workflow is valie @@ -656,21 +708,24 @@ def assemble_workflow(config): workflow = Workflow(phases) else: esm_parser.user_error("ERROR", "No default workflow phases defined.") - # Note: Should this work also if no default phases are set in such a config file, but - # instead all workflow phases are defined in different configs and/or runscripts? - # TODO: Where could a user define a different (default) phase list? Or should this be changed in defaults.yaml? + # Note: Should this work also if no default phases are set in such a config + # file, but instead all workflow phases are defined in different configs + # and/or runscripts? + # TODO: Where could a user define a different (default) phase list? + # Or should this be changed in defaults.yaml? # 2. Initialize default workflow phases workflow = workflow.init_default_workflow(config) # 3. Read in workflows from runscript and config files workflow = workflow.collect_all_user_workflows(config) - #config = collect_all_workflow_information(config) + # config = collect_all_workflow_information(config) # 4. Order user workflows into default workflow wrt. workflow attributs. workflow = workflow.order_clusters(config) # What is the next functions needed for? # subjob_clusters = workflow.complete_clusters(config) - # 5. create new first phase of type SimulationSetup, if first_task_in_queue is user phase (type batch or shell) + # 5. create new first phase of type SimulationSetup, if first_task_in_queue is + # user phase (type batch or shell) workflow = workflow.prepend_newrun_job(config) # 6. write the workflow to config config = workflow.write_to_config(config) @@ -687,16 +742,19 @@ def assemble_workflow(config): return config + def write_subjob_clusters_to_config(config, subjob_clusters): config["general"]["subjob_clusters"] = subjob_clusters return config + def set_workflow_attrib(workflow, attrib, value): if type(getattr(workflow, attrib)).__name__ == "list": workflow.__dict__[attrib].append(value) else: workflow.__setattr__(attrib, value) + def set_phase_attrib(workflow_phases, phase_name, attrib, value): if not type(workflow_phases) is list: workflow_phases = [workflow_phases] @@ -707,6 +765,7 @@ def set_phase_attrib(workflow_phases, phase_name, attrib, value): else: phase.__setattr__(attrib, value) + def get_phase_attrib(workflow_phases, phase_name, attrib): if not type(workflow_phases) is list: workflow_phases = [workflow_phases] @@ -736,6 +795,52 @@ def calc_number_of_tasks(config): tasks += config[model]["nprocar"] * config[model]["nprocbr"] return tasks + +def display_workflow(config): + """ + Displays current workflow settings. + + Parameters + ---------- + config : dict + + Returns + ------- + config : dict (needed???) + """ + + esm_parser.pprint_config(config["general"]["workflow"]) + + first_phase = config["general"]["workflow"]["first_task_in_queue"] + second_phase = config["general"]["workflow"]["subjobs"][first_phase]["next_submit"] + + workflow_order = f"{first_phase}" + + while first_phase not in second_phase and second_phase: + sec_phase_str = "" + for sec_phase in second_phase: + if config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]: + second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"] + if sec_phase_str == "": + sec_phase_str = f"{sec_phase_str} {sec_phase}" + else: + sec_phase_str = f"{sec_phase_str}, {sec_phase}" + workflow_order = f"{workflow_order} -> {sec_phase_str}" + else: + # second_phase.remove(first_phase) + sec_phase_str = "" + for sec_phase in second_phase: + second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"] + if sec_phase_str == "": + sec_phase_str = f"{sec_phase_str} {sec_phase}" + else: + sec_phase_str = f"{sec_phase_str}, {sec_phase}" + workflow_order = f"{workflow_order} -> {sec_phase_str}" + + esm_parser.user_note("Workflow sequence", f"{workflow_order}") + return config + + def display_nicely(config): """ Pretty prints the workflow configuration assembled in config["general"]. @@ -752,16 +857,18 @@ def display_nicely(config): esm_parser.pprint_config(config["general"]["workflow"]) return config -################### Maybe outdated routines ###################### +# ################## Maybe outdated routines ###################### # # -#def collect_all_workflow_information(config): +# def collect_all_workflow_information(config): # """ # Collects all workflow information for each component entry in config # (can be a model/component or a new entry (e.g. 'flows') -# NOTE: Should it be possible to set a workflow in the model section of the runscript? Why not? +# NOTE: Should it be possible to set a workflow in the model section of the +# runscript? Why not? # -# Checks if there are "workflow" entries in the user runscript and copies or merges them into +# Checks if there are "workflow" entries in the user runscript and copies or +# merges them into # config["general"]["workflow"] # # Parameters @@ -840,7 +947,7 @@ def display_nicely(config): # # return config # -#def merge_single_entry_if_possible(entry, sourceconf, targetconf): +# def merge_single_entry_if_possible(entry, sourceconf, targetconf): # """ # Merges a dictionary entry into a target dictionary that has he same key. # @@ -865,7 +972,7 @@ def display_nicely(config): # targetconf[entry] = sourceconf[entry] # return targetconf # -#def merge_if_possible(source, target): +# def merge_if_possible(source, target): # """ # Does the same as above but for a whole dict # From 465d8d342b12fedb87ee5ca42b6515c0e655b02f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 16 Nov 2023 15:32:43 +0100 Subject: [PATCH 23/98] Adapted the output for inspect (-i) workflow. --- src/esm_runscripts/inspect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/esm_runscripts/inspect.py b/src/esm_runscripts/inspect.py index e01ed04d0..d6fb6519e 100644 --- a/src/esm_runscripts/inspect.py +++ b/src/esm_runscripts/inspect.py @@ -17,8 +17,8 @@ def run_job(config): def inspect_workflow(config): if config["general"]["inspect"] == "workflow": - - config = workflow.display_nicely(config) + config = workflow.display_workflow(config) +# config = workflow.display_nicely(config) sys.exit(0) return config From 6aee3956dc16c9d211582245bec5e15b27680e15 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 16 Nov 2023 15:35:02 +0100 Subject: [PATCH 24/98] Fix multiple phases in one cluster, fix batch_or_shell to be set by esm_tools. --- src/esm_runscripts/workflow.py | 351 ++++++++++++++++----------------- 1 file changed, 173 insertions(+), 178 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 086cf64a9..6341748af 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -4,13 +4,13 @@ # from pprint import pprint -# import pdb +import pdb class Workflow: """A workflow class.""" - def __init__(self, phases, always_run_with=[]): + def __init__(self, phases, always_run_with=[], phases_to_submit_to_batch_system=[]): """ Create a new workflow. @@ -25,14 +25,14 @@ def __init__(self, phases, always_run_with=[]): ------- none """ - self.default_phases = [] self.user_phases = [] - self.first_task_in_queue = None + self.first_task_in_queue = None # needed self.last_task_in_queue = None self.next_run_triggered_by = None # TODO: Call here the phase object ??? self.phases = phases self.always_run_with = always_run_with + self.phases_to_submit_to_batch_system = phases_to_submit_to_batch_system @property def num_phases(self): @@ -60,7 +60,7 @@ def get_workflow_phase_by_name(self, phase_name): def get_phases_attribs_list(self, phase_type, attrib): """ - Return the names of all phases as list. + Returns a certain attribute for all phases as a list. Parameters ---------- @@ -118,27 +118,44 @@ def init_default_workflow(self, config): phase.next_submit.append(self.phases[0].name) phase.run_after = self.phases[ind-1].name - # TODO: this needs to be set somewhere else, or different. phase.cluster = phase.name - if phase.name == "compute": - phase.nproc = tasks + if phase.name in self.phases_to_submit_to_batch_system: phase.batch_or_shell = 'batch' - phase.submit_to_batch_system = config["general"].get( - "submit_to_batch_system", True) + phase.submit_to_batch_system = True phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] + phase.nproc = tasks # Set default workflow values - set_workflow_attrib(self, "first_task_in_queue", self.phases[0].name) - set_workflow_attrib(self, "last_task_in_queue", self.phases[-1].name) - # next_run_triggered_by only used to set last_task_in_queue - # TODO: why not set last_task_in_queue directly? - set_workflow_attrib(self, "next_run_triggered_by", self.phases[-1].name) + self.set_workflow_attrib("first_task_in_queue", self.phases[0].name) + self.set_workflow_attrib("last_task_in_queue", self.phases[-1].name) + self.set_workflow_attrib("next_run_triggered_by", self.phases[-1].name) return self + def set_workflow_attrib(self, attrib, value): + """ + Sets a workflow attribute. + + Parameters + ---------- + attrib : str + value : + + Returns + ------- + None + """ + + if type(getattr(self, attrib)).__name__ == "list": + self.__dict__[attrib].append(value) + else: + self.__setattr__(attrib, value) + def check_if_keyword_is_valid(self, keyword): """ - Checks if the key given for a user workflow is valie + Checks if the key given for a user workflow is valid. + Only keywords are allowed, that are already set during + initialization. Parameters ---------- @@ -156,7 +173,7 @@ def check_if_keyword_is_valid(self, keyword): def collect_all_user_workflows(self, config): """ - Collect all workflows set by config files. + Collect all workflows defined in config files. Parameters ---------- @@ -174,10 +191,7 @@ def collect_all_user_workflows(self, config): if "workflow" in config[model]: w_config = config[model]["workflow"] # if "subjobs" in w_config: - # breakpoint() if "phases" in w_config: - # copies component workflow config to new variable ref_config - ref_config = copy.deepcopy(w_config) # Set attributes of workflow # This will be overwritten by all user defined workflows??? # Collect them in a list??? @@ -186,7 +200,7 @@ def collect_all_user_workflows(self, config): if self.check_if_keyword_is_valid(key): # set here only workflow attributes if not key == "phases": - set_workflow_attrib(self, key, value) + self.set_workflow_attrib(key, value) else: err_msg = f"``{key}`` is not a valid keyword of a workflow." esm_parser.user_error("ERROR", err_msg) @@ -195,6 +209,8 @@ def collect_all_user_workflows(self, config): new_phase_name = phase # create a new user phase object for ``phase`` new_phase = UserWorkflowPhase(new_phase_name) + # each subjob needs to have an unique name + # 1. check if ``new_phase`` is already defined as a default phase if phase in self.get_phases_attribs_list("default", "name"): err_msg = ( f"The user phase ``{new_phase_name}`` " @@ -202,20 +218,25 @@ def collect_all_user_workflows(self, config): f"This is not allowed." ) esm_parser.user_error("ERROR", err_msg) - # each subjob needs to have an unique name - # check if the name of the new user phase does not already exist - if new_phase_name not in user_workflow_phases_names: - # and append it to the list of user phases of the workflow + # 2. check if the name of the new user phase does not already exist + if new_phase_name in user_workflow_phases_names: + err_msg = ( + f"Two workflow phases have the same name " + f"{new_phase_name}." + ) + esm_parser.user_error("ERROR", err_msg) + # 3. if user phase has a new and unique name + else: + # append it to the list of user phases of the workflow user_workflow_phases_names.append(new_phase_name) # set attributes of user_workflow phases from # config settings - # check if valid workflow phase keywords + # check if valid phase keywords for key, value in w_config["phases"][phase].items(): if new_phase.check_if_keyword_is_valid(key): set_phase_attrib( - [new_phase], new_phase_name, key, value + new_phase, new_phase_name, key, value ) -# new_phase.__setattr__(key, value) else: err_msg = ( f"``{key}`` of workflow phase " @@ -223,21 +244,14 @@ def collect_all_user_workflows(self, config): f"of a workflow phase." ) esm_parser.user_error("ERROR", err_msg) - if new_phase.submit_to_batch_system and new_phase.batch_or_shell == "shell": - err_msg = ( - f"Inconsistence attributes for keywords " - f"``submit_to_batch_system`` and " - f"``batch_or_shell`` for phase " - f"``{new_phase.name}``." - ) - esm_parser.user_error("ERROR", err_msg) + # Make sure that batch_or_shell is set to batch if submit_to_batch is true + # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed + # for setting it to SimulationSetup and in other functions (resubmit, etc.) + # Should not be set by user. TODO: Remove from dumentation. + if new_phase.submit_to_batch_system: + new_phase.batch_or_shell == "batch" + user_workflow_phases.append(new_phase) - else: - err_msg = ( - f"Two workflow phases have the same name " - f"{new_phase_name}." - ) - esm_parser.user_error("ERROR", err_msg) self.user_phases = user_workflow_phases return self @@ -245,6 +259,7 @@ def collect_all_user_workflows(self, config): def write_to_config(self, config): """ Write to config. + TODO: Rename subjobs to phases. Nees changes also in resubmit.py and other files??? """ cluster_att = [] for att in dir(self.phases[0]): @@ -254,8 +269,6 @@ def write_to_config(self, config): if "workflow" in config["general"]: del config["general"]["workflow"] - # It is assumed here, that there are no workflows in config["general"] - # or that these are removed after collect_... config["general"]["workflow"] = {} config["general"]["workflow"].update(self.__dict__) # 3. Write clusters @@ -340,9 +353,9 @@ def check_unknown_phases(self): return unknown_phases - def order_clusters(self, config): + def order_phases(self): """ - Put the subjob_clusters in order. + Put the phases in order. Parameters ---------- @@ -352,10 +365,10 @@ def order_clusters(self, config): ------- self : Workflow object """ - # Check if user phases are independent from each other + # check if user phases are independent from each other # TODO: What if not independent? independent = self.check_user_workflow_dependency() - # Check if there are unknown phases, if yes, will give error exception + # check if there are unknown phases, if yes, will give error exception unknown_phases = self.check_unknown_phases() if unknown_phases: unknowns = ', '.join(unknown_phases) @@ -365,10 +378,19 @@ def order_clusters(self, config): ) esm_parser.user_error("ERROR", err_msg) + # check if run_after or run_before is set for each user phase + # if not, run_after will be set to last default phase for user_phase in self.user_phases: - # Check if run_after or run_before is set for each user phase if not user_phase.run_before and not user_phase.run_after: - esm_parser.user_error("ERROR", f"Don't know when to start user_phase. Please set run_after or run_before for user phase {user_phase.name}.") + user_phase.run_after = self.phases[-1].name + err_msg = ( + f"No value given for ``run_after`` or ``run_before`` " + f"of user phase ``{user_phase.name}``. " + f"Set it to last default phase in workflow: " + f"``{self.phases[-1].name}``." + ) + esm_parser.user_note("NOTE", err_msg) + # Check if not both run_after and run_before are set at the same # time for each user phase # if user_phase.run_before and user_phase.run_after: @@ -378,6 +400,7 @@ def order_clusters(self, config): # f"or run_before." # ) # esm_parser.user_error("ERROR", err_msg) + # Correct for ``last_task_in_queue`` if necessary # Collect all next_run_triggered_by entries next_triggered = self.next_run_triggered_by @@ -393,13 +416,15 @@ def order_clusters(self, config): self.phases[0].called_from = next_triggered self.last_task_in_queue = next_triggered - # next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) + # what does this do? + #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) # if len(next_triggered) > 1: # err_msg = "Mismatch found setting next_run_triggered_by for workflow." # esm_parser.user_error("ERROR", err_msg) # elif len(next_triggered) == 1: # self.next_run_triggered_by = next_triggered[0] # # else: let default + #breakpoint() # Set "next_submit" and "called_from" # "next_submit" which phase will be called next (run_after of the next phase) @@ -409,42 +434,23 @@ def order_clusters(self, config): for phase in self.phases + self.user_phases: next_submits[phase.name] = [] + for phase4 in self.phases + self.user_phases: + # if a cluster is not set for a phase set it to the phase name, + # so that every phase belongs to a cluster + # default cluster has the same name as the phase itself + if phase4.cluster is None: + phase4.cluster = phase4.name + for phase2 in self.phases + self.user_phases: if phase2.run_after is not None: - next_submits[phase2.run_after].append(phase2.name) + #next_submits[phase2.run_after].append(phase2.name) + if phase2.cluster not in next_submits[phase2.run_after]: + next_submits[phase2.run_after].append(phase2.cluster) phase2.called_from = phase2.run_after for phase3 in self.phases + self.user_phases: phase3.next_submit = next_submits[phase3.name] -# assign user phases to a cluster (tbd) - # - if all phases have the same run_after and run_before they can be - # in the cluster - # - in this cluster they will be run in parallel? - - for phase4 in self.phases + self.user_phases: - calling_cluster = phase4.run_after - -# brauch ich das hier noch??? -# # set last_task_in_queue -# if calling_cluster == self.last_task_in_queue: -# #self.last_task_in_queue.append(phase4.name) -# self.last_task_in_queue = phase4.name -# -# called_cluster = phase4.run_before -# # print(f"calling_cluster: {calling_cluster} ->", phase4.name, f" -> called_cluster: {called_cluster}") -# set_phase_attrib(self.phases+self.user_phases, called_cluster, "called_from", phase4.name) -# -# # set first_task_in_queue -# if called_cluster == self.first_task_in_queue: -# # self.first_task_in_queue.append(phase4.name) -# self.first_task_in_queue = phase4.name - - # set empty cluster entries to phase name - if phase4.cluster is None: - phase4.cluster = phase4.name - -# todo: check if num list > 1, is this possible ??? first_cluster_name = self.first_task_in_queue last_cluster_name = self.last_task_in_queue @@ -457,69 +463,69 @@ def order_clusters(self, config): return self - def complete_clusters(self, config): - # all that are within a next_submit list are in a cluster if: - # run concurrently - # have the same cluster entry. - """ - Rearanges the subjobs to their subjobs_clusters ??? - - Parameters - ---------- - self : Workflow object - config : dict - - Returns - ------- - subjob_clusters : dict - """ - # sort into dict subjob_clusters - subjob_clusters = {} - - for phase in self.phases + self.user_phases: - # Erstellt ein leeres dict im dict subjob_clusters - if phase.cluster not in subjob_clusters: - subjob_clusters[phase.cluster] = {} - - # Create empty list for each subjob_cluster - if "subjobs" not in subjob_clusters[phase.cluster]: - subjob_clusters[phase.cluster]["subjobs"] = [] - - # Append subjobs to list. - subjob_clusters[phase.cluster]["subjobs"].append(phase.name) - - # Then, complete the resource information per cluster - # determine whether a cluster is to be submitted to a batch system - for subjob_cluster in subjob_clusters: - nproc_sum = nproc_max = 0 - attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] - for attrib in attributes: - temp_list = [] - for subjob in subjob_clusters[subjob_cluster]["subjobs"]: - if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list: - subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib) - else: - print("Missmatch in attributes") - sys.exit(-1) - nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") - nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) - - # if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): - # subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" - # elif subjob_clusters[subjob_cluster].get("script", False): - # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" - # - if "run_on_queue" not in subjob_clusters[subjob_cluster]: - print(f"Information on target queue is missing in cluster {subjob_cluster}.") - sys.exit(-1) - # - # TODO: Check in nproc is calculated correctly - if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": - nproc = nproc_sum - else: - nproc = nproc_max - subjob_clusters[subjob_cluster]["nproc"] = nproc - return subjob_clusters +# def complete_clusters(self, config): +# # all that are within a next_submit list are in a cluster if: +# # run concurrently +# # have the same cluster entry. +# """ +# Rearanges the subjobs to their subjobs_clusters ??? +# +# Parameters +# ---------- +# self : Workflow object +# config : dict +# +# Returns +# ------- +# subjob_clusters : dict +# """ +# # sort into dict subjob_clusters +# subjob_clusters = {} +# +# for phase in self.phases + self.user_phases: +# # Erstellt ein leeres dict im dict subjob_clusters +# if phase.cluster not in subjob_clusters: +# subjob_clusters[phase.cluster] = {} +# +# # Create empty list for each subjob_cluster +# if "subjobs" not in subjob_clusters[phase.cluster]: +# subjob_clusters[phase.cluster]["subjobs"] = [] +# +# # Append subjobs to list. +# subjob_clusters[phase.cluster]["subjobs"].append(phase.name) +# +# # Then, complete the resource information per cluster +# # determine whether a cluster is to be submitted to a batch system +# for subjob_cluster in subjob_clusters: +# nproc_sum = nproc_max = 0 +# attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] +# for attrib in attributes: +# temp_list = [] +# for subjob in subjob_clusters[subjob_cluster]["subjobs"]: +# if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list: +# subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib) +# else: +# print("Missmatch in attributes") +# sys.exit(-1) +# nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") +# nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) +# +# # if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): +# # subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" +# # elif subjob_clusters[subjob_cluster].get("script", False): +# # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" +# # +# if "run_on_queue" not in subjob_clusters[subjob_cluster]: +# print(f"Information on target queue is missing in cluster {subjob_cluster}.") +# sys.exit(-1) +# # +# # TODO: Check in nproc is calculated correctly +# if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": +# nproc = nproc_sum +# else: +# nproc = nproc_max +# subjob_clusters[subjob_cluster]["nproc"] = nproc +# return subjob_clusters def prepend_newrun_job(self, config): """ @@ -578,6 +584,7 @@ def skip_cluster(cluster, config): """ Checks if a phase/cluster can be skipped. Needed keywords: run_only, skip_chunk_number + Is called from resubmit.py Parameters ---------- @@ -626,16 +633,16 @@ class WorkflowPhase: def __init__(self, phase_name): self.name = None - self.nproc = 1 + self.nproc = 1 # needed self.run_before = None self.run_after = None - self.submit_to_batch_system = False + self.submit_to_batch_system = False # needed self.run_on_queue = None self.cluster = None - self.next_submit = [] - self.called_from = None - self.batch_or_shell = "SimulationSetup" - self.order_in_cluster = "sequential" + self.next_submit = [] # needed + self.called_from = None # needed + self.batch_or_shell = "SimulationSetup" # needed + self.order_in_cluster = "sequential" # needed ??? self.run_only = None self.skip_chunk_number = None self.skip_run_number = None @@ -694,32 +701,30 @@ def assemble_workflow(config): # initialize the default workflow as Workflow object # TODO: Where are these default phases defined? For now I placed it in # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml - phases = [] - always_run_with = [] if "defaults.yaml" in config["general"]: if "workflow" in config["general"]["defaults.yaml"]: - phases = config["general"]["defaults.yaml"]["workflow"]["phases"] - if "always_run_with" in config["general"]["defaults.yaml"]["workflow"]: - always_run_with = config["general"]["defaults.yaml"]["workflow"]["always_run_with"] - - if phases and always_run_with: - workflow = Workflow(phases, always_run_with=always_run_with) - elif phases: - workflow = Workflow(phases) + phases = config["general"]["defaults.yaml"]["workflow"].get("phases", []) + always_run_with = config["general"]["defaults.yaml"]["workflow"].get("always_run_with", []) + phases_to_submit_to_batch_system = config["general"]["defaults.yaml"]["workflow"].get("phases_to_submit_to_batch_system", []) + + if phases: + workflow = Workflow(phases, always_run_with=always_run_with, phases_to_submit_to_batch_system=phases_to_submit_to_batch_system) else: esm_parser.user_error("ERROR", "No default workflow phases defined.") # Note: Should this work also if no default phases are set in such a config # file, but instead all workflow phases are defined in different configs # and/or runscripts? - # TODO: Where could a user define a different (default) phase list? - # Or should this be changed in defaults.yaml? + # Where could a user define a different (default) phase list? + # Or should this be changed in defaults.yaml as it is now? + # 2. Initialize default workflow phases workflow = workflow.init_default_workflow(config) + # 3. Read in workflows from runscript and config files workflow = workflow.collect_all_user_workflows(config) - # config = collect_all_workflow_information(config) - # 4. Order user workflows into default workflow wrt. workflow attributs. - workflow = workflow.order_clusters(config) + + # 4. Order user workflows into default workflow wrt. workflow and phase attributs. + workflow = workflow.order_phases() # What is the next functions needed for? # subjob_clusters = workflow.complete_clusters(config) @@ -727,9 +732,11 @@ def assemble_workflow(config): # 5. create new first phase of type SimulationSetup, if first_task_in_queue is # user phase (type batch or shell) workflow = workflow.prepend_newrun_job(config) + # 6. write the workflow to config - config = workflow.write_to_config(config) # 7. Remove old worklow from config + config = workflow.write_to_config(config) + # Set "jobtype" for the first task??? if config["general"]["jobtype"] == "unknown": @@ -743,18 +750,6 @@ def assemble_workflow(config): return config -def write_subjob_clusters_to_config(config, subjob_clusters): - config["general"]["subjob_clusters"] = subjob_clusters - return config - - -def set_workflow_attrib(workflow, attrib, value): - if type(getattr(workflow, attrib)).__name__ == "list": - workflow.__dict__[attrib].append(value) - else: - workflow.__setattr__(attrib, value) - - def set_phase_attrib(workflow_phases, phase_name, attrib, value): if not type(workflow_phases) is list: workflow_phases = [workflow_phases] @@ -830,7 +825,7 @@ def display_workflow(config): # second_phase.remove(first_phase) sec_phase_str = "" for sec_phase in second_phase: - second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"] + second_phase = config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"] if sec_phase_str == "": sec_phase_str = f"{sec_phase_str} {sec_phase}" else: From 138b3c2e6cf001f6b3ad9828d7b84e09ebd0ec9c Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 17 Nov 2023 13:10:32 +0100 Subject: [PATCH 25/98] Added some further comments and added cluster info in display_workflow for inspect argument. --- src/esm_runscripts/workflow.py | 41 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 6341748af..1730d631a 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -259,7 +259,7 @@ def collect_all_user_workflows(self, config): def write_to_config(self, config): """ Write to config. - TODO: Rename subjobs to phases. Nees changes also in resubmit.py and other files??? + TODO: Rename ``subjobs`` to ``phases``. But this needs changes also in resubmit.py and other files??? """ cluster_att = [] for att in dir(self.phases[0]): @@ -416,18 +416,8 @@ def order_phases(self): self.phases[0].called_from = next_triggered self.last_task_in_queue = next_triggered - # what does this do? - #next_triggered = list(filter((self.next_run_triggered_by).__ne__, next_triggered)) - # if len(next_triggered) > 1: - # err_msg = "Mismatch found setting next_run_triggered_by for workflow." - # esm_parser.user_error("ERROR", err_msg) - # elif len(next_triggered) == 1: - # self.next_run_triggered_by = next_triggered[0] - # # else: let default - #breakpoint() - # Set "next_submit" and "called_from" - # "next_submit" which phase will be called next (run_after of the next phase) + # "next_submit" which phase/cluster will be called next (run_after of the next phase) # "called_from" name of previous phase, run_after of current phase # Create a dict of all phases with empty lists next_submits = {} @@ -441,9 +431,9 @@ def order_phases(self): if phase4.cluster is None: phase4.cluster = phase4.name + # set next_submits to the cluster name rather then to the phase name for phase2 in self.phases + self.user_phases: if phase2.run_after is not None: - #next_submits[phase2.run_after].append(phase2.name) if phase2.cluster not in next_submits[phase2.run_after]: next_submits[phase2.run_after].append(phase2.cluster) phase2.called_from = phase2.run_after @@ -455,9 +445,11 @@ def order_phases(self): last_cluster_name = self.last_task_in_queue # if first_cluster_name is not next_submit of last_cluster_name + # set 'next_submit' of last phase/cluster to first phase/cluster in workflow if first_cluster_name not in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name) # if last_cluster_name is not called_from of first_cluster_name + # set 'called_from' of first phase/cluster to last phase/cluster if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"): set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name) @@ -804,35 +796,42 @@ def display_workflow(config): config : dict (needed???) """ - esm_parser.pprint_config(config["general"]["workflow"]) + display_nicely(config) first_phase = config["general"]["workflow"]["first_task_in_queue"] + subjobs = config["general"]["workflow"]["subjob_clusters"][first_phase]["subjobs"] + # Note: next_submit points to the next cluster (not phase) second_phase = config["general"]["workflow"]["subjobs"][first_phase]["next_submit"] - workflow_order = f"{first_phase}" + workflow_order = f"``{first_phase}`` {subjobs}" + # While first_phase (first_task_in_queue) is not to be called by the next phase (next_submit). + # In other words: If not last phase/cluster is reached. while first_phase not in second_phase and second_phase: sec_phase_str = "" for sec_phase in second_phase: if config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]: second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"] + subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"] if sec_phase_str == "": - sec_phase_str = f"{sec_phase_str} {sec_phase}" + sec_phase_str = f"{sec_phase_str} ``{sec_phase}`` {subjobs}" else: - sec_phase_str = f"{sec_phase_str}, {sec_phase}" + sec_phase_str = f"{sec_phase_str}, ``{sec_phase}`` {subjobs}" workflow_order = f"{workflow_order} -> {sec_phase_str}" + # For last phase that would start the next run else: - # second_phase.remove(first_phase) sec_phase_str = "" + # for all cluster in next_submit for sec_phase in second_phase: second_phase = config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"] + subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"] if sec_phase_str == "": - sec_phase_str = f"{sec_phase_str} {sec_phase}" + sec_phase_str = f"{sec_phase_str} ``{sec_phase}`` {subjobs}" else: - sec_phase_str = f"{sec_phase_str}, {sec_phase}" + sec_phase_str = f"{sec_phase_str} and ``{sec_phase}`` {subjobs}" workflow_order = f"{workflow_order} -> {sec_phase_str}" - esm_parser.user_note("Workflow sequence", f"{workflow_order}") + esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}") return config From fcf5a410d406b43d01eb92e437ce53fae58774bd Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 17 Nov 2023 16:48:01 +0100 Subject: [PATCH 26/98] Add new workflow attribute in defaults.yaml. --- configs/esm_software/esm_runscripts/defaults.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 2aa0684dd..812e32ad0 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -15,3 +15,5 @@ workflow: always_run_with: - prepare - prepexp + phases_to_submit_to_batch_system: + - compute From 5a3bfb86595ba2cede94183c2f87815132f6c229 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 17 Nov 2023 16:49:14 +0100 Subject: [PATCH 27/98] Reactivated function complete_clusters. --- src/esm_runscripts/workflow.py | 104 ++++++++++++++++----------------- 1 file changed, 50 insertions(+), 54 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 1730d631a..1efaeb86d 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -278,18 +278,22 @@ def write_to_config(self, config): config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = [] for phase in self.phases + self.user_phases: if phase.cluster == cluster: + # TODO: Are there more attributes to be merged from the different phases within a cluster??? + # nproc is calculated in complete_clusters -> can be placed here??? config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase.name) for att in cluster_att: config["general"]["workflow"]["subjob_clusters"][cluster][att] = getattr(phase, att) + config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster # 2. Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} for phase in self.phases+self.user_phases: temp_dict = {phase.name: phase.__dict__} config["general"]["workflow"]["subjobs"].update(temp_dict) - # Todo: delete phases and user_phases + # delete phases and user_phases del config["general"]["workflow"]["phases"] del config["general"]["workflow"]["user_phases"] + return config def check_user_workflow_dependency(self): @@ -455,69 +459,61 @@ def order_phases(self): return self -# def complete_clusters(self, config): -# # all that are within a next_submit list are in a cluster if: -# # run concurrently -# # have the same cluster entry. -# """ -# Rearanges the subjobs to their subjobs_clusters ??? -# -# Parameters -# ---------- -# self : Workflow object -# config : dict -# -# Returns -# ------- -# subjob_clusters : dict -# """ -# # sort into dict subjob_clusters -# subjob_clusters = {} -# -# for phase in self.phases + self.user_phases: -# # Erstellt ein leeres dict im dict subjob_clusters -# if phase.cluster not in subjob_clusters: -# subjob_clusters[phase.cluster] = {} -# -# # Create empty list for each subjob_cluster -# if "subjobs" not in subjob_clusters[phase.cluster]: -# subjob_clusters[phase.cluster]["subjobs"] = [] -# -# # Append subjobs to list. -# subjob_clusters[phase.cluster]["subjobs"].append(phase.name) -# -# # Then, complete the resource information per cluster -# # determine whether a cluster is to be submitted to a batch system -# for subjob_cluster in subjob_clusters: -# nproc_sum = nproc_max = 0 + def complete_clusters(self, config): + # all that are within a next_submit list are in a cluster if: + # run concurrently + # have the same cluster entry. + """ + Rearanges the subjobs to their subjobs_clusters ??? + + TODO: Can this be put into other functions/methods? + + Parameters + ---------- + self : Workflow object + config : dict + + Returns + ------- + config : dict + """ + subjob_clusters = config["general"]["workflow"]["subjob_clusters"] + + # Then, complete the resource information per cluster + # determine whether a cluster is to be submitted to a batch system + for subjob_cluster in subjob_clusters: + nproc_sum = nproc_max = 0 + # Check if the following attributes are set for each cluster??? # attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] # for attrib in attributes: # temp_list = [] -# for subjob in subjob_clusters[subjob_cluster]["subjobs"]: + for subjob in subjob_clusters[subjob_cluster]["subjobs"]: + # Check if the following attributes are set for each cluster??? # if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list: # subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib) # else: # print("Missmatch in attributes") # sys.exit(-1) -# nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") -# nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) + nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") + nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) # -# # if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): + if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): # # subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" + +# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch? # # elif subjob_clusters[subjob_cluster].get("script", False): # # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" # # -# if "run_on_queue" not in subjob_clusters[subjob_cluster]: -# print(f"Information on target queue is missing in cluster {subjob_cluster}.") -# sys.exit(-1) -# # -# # TODO: Check in nproc is calculated correctly -# if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": -# nproc = nproc_sum -# else: -# nproc = nproc_max -# subjob_clusters[subjob_cluster]["nproc"] = nproc -# return subjob_clusters + if "run_on_queue" not in subjob_clusters[subjob_cluster]: + err_msg = f"No value for target queue given by ``run_on_queue' for cluster {subjob_cluster}." + esm_parser.user_error("ERROR", err_msg) + + if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": + nproc = nproc_sum + else: + nproc = nproc_max + subjob_clusters[subjob_cluster]["nproc"] = nproc + return config def prepend_newrun_job(self, config): """ @@ -718,9 +714,6 @@ def assemble_workflow(config): # 4. Order user workflows into default workflow wrt. workflow and phase attributs. workflow = workflow.order_phases() - # What is the next functions needed for? - # subjob_clusters = workflow.complete_clusters(config) - # 5. create new first phase of type SimulationSetup, if first_task_in_queue is # user phase (type batch or shell) workflow = workflow.prepend_newrun_job(config) @@ -729,6 +722,9 @@ def assemble_workflow(config): # 7. Remove old worklow from config config = workflow.write_to_config(config) + # 8. complete some information in a cluster + # e.g. if phases in cluster are submit to sbatch system + config = workflow.complete_clusters(config) # Set "jobtype" for the first task??? if config["general"]["jobtype"] == "unknown": From e6bada5540b473523a783715f6a93e5a9beb91fc Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 17 Nov 2023 16:50:44 +0100 Subject: [PATCH 28/98] Some changes for processing phases in awicm3. --- configs/setups/awicm3/awicm3.yaml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml index 30f6284c4..5c5f29716 100644 --- a/configs/setups/awicm3/awicm3.yaml +++ b/configs/setups/awicm3/awicm3.yaml @@ -325,18 +325,22 @@ oifs: tl_o3_data_dir: ${input_dir}/${version}/climate/ ICMGG_INIT_name: "_${fesom.resolution}" - workflow: - next_run_triggered_by: tidy - subjobs: - my_new_subjob: - batch_or_shell: shell +# workflow: +# next_run_triggered_by: tidy +# phases: +# my_new_subjob: +# batch_or_shell: shell +# run_before: tidy +# run_after: compute +# script_dir: "/work/ab0995/a270089/myrunscripts/" +# script: "helloworld.sh" # Postprocessing choose_general.postprocessing: True: - workflow: + add_workflow: next_run_triggered_by: tidy - subjobs: + phases: postprocessing: batch_or_shell: batch order_in_cluster: concurrent From ccafa8b3441ff04612b045b9c5666924292fceb9 Mon Sep 17 00:00:00 2001 From: Nadine Wieters <31928930+nwieters@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:18:23 +0100 Subject: [PATCH 29/98] Update src/esm_runscripts/inspect.py Co-authored-by: Miguel <63242832+mandresm@users.noreply.github.com> --- src/esm_runscripts/inspect.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/esm_runscripts/inspect.py b/src/esm_runscripts/inspect.py index d6fb6519e..5d6b89122 100644 --- a/src/esm_runscripts/inspect.py +++ b/src/esm_runscripts/inspect.py @@ -18,7 +18,6 @@ def run_job(config): def inspect_workflow(config): if config["general"]["inspect"] == "workflow": config = workflow.display_workflow(config) -# config = workflow.display_nicely(config) sys.exit(0) return config From e107d99fa29f0f0d2c5eda073a7ebe251a8acf8f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 20 Nov 2023 16:29:17 +0100 Subject: [PATCH 30/98] Merged default and user phase class, made phase a subclass of dict. --- src/esm_runscripts/workflow.py | 240 +++++++++++++++------------------ 1 file changed, 105 insertions(+), 135 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 1efaeb86d..e20d6ab49 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -55,7 +55,7 @@ def get_workflow_phase_by_name(self, phase_name): phase : class phase or user_phase """ for phase in self.phases + self.user_phases: - if phase.name == phase_name: + if phase["name"] == phase_name: return phase def get_phases_attribs_list(self, phase_type, attrib): @@ -64,7 +64,7 @@ def get_phases_attribs_list(self, phase_type, attrib): Parameters ---------- - self: class Workflow + self: Workflow object phase_type: str (default or user) attrib: str @@ -73,9 +73,9 @@ def get_phases_attribs_list(self, phase_type, attrib): phases_attribs : list """ if phase_type == 'user': - phases_attribs = [getattr(phase, attrib) for phase in self.user_phases] + phases_attribs = [phase[attrib] for phase in self.user_phases] else: - phases_attribs = [getattr(phase, attrib) for phase in self.phases] + phases_attribs = [phase[attrib] for phase in self.phases] return phases_attribs @@ -109,26 +109,26 @@ def init_default_workflow(self, config): if ind < self.num_phases - 1: # Set run_before attrib of all phases (except last on) # to the next phase name - phase.run_before = self.phases[ind+1].name - phase.next_submit.append(self.phases[ind+1].name) - phase.run_after = self.phases[ind-1].name + phase["run_before"] = self.phases[ind+1]["name"] + phase["next_submit"].append(self.phases[ind+1]["name"]) + phase["run_after"] = self.phases[ind-1]["name"] else: # Set run_after attrib of last phase to previous phase name - phase.run_before = self.phases[0].name - phase.next_submit.append(self.phases[0].name) - phase.run_after = self.phases[ind-1].name + phase["run_before"] = self.phases[0]["name"] + phase["next_submit"].append(self.phases[0]["name"]) + phase["run_after"] = self.phases[ind-1]["name"] - phase.cluster = phase.name - if phase.name in self.phases_to_submit_to_batch_system: - phase.batch_or_shell = 'batch' - phase.submit_to_batch_system = True - phase.run_on_queue = config["computer"]["partitions"]["compute"]["name"] - phase.nproc = tasks + phase["cluster"] = phase["name"] + if phase["name"] in self.phases_to_submit_to_batch_system: + phase["batch_or_shell"] = 'batch' + phase["submit_to_batch_system"] = True + phase["run_on_queue"] = config["computer"]["partitions"]["compute"]["name"] + phase["nproc"] = tasks # Set default workflow values - self.set_workflow_attrib("first_task_in_queue", self.phases[0].name) - self.set_workflow_attrib("last_task_in_queue", self.phases[-1].name) - self.set_workflow_attrib("next_run_triggered_by", self.phases[-1].name) + self.set_workflow_attrib("first_task_in_queue", self.phases[0]["name"]) + self.set_workflow_attrib("last_task_in_queue", self.phases[-1]["name"]) + self.set_workflow_attrib("next_run_triggered_by", self.phases[-1]["name"]) return self @@ -206,9 +206,9 @@ def collect_all_user_workflows(self, config): esm_parser.user_error("ERROR", err_msg) # for subjob in list(copy.deepcopy(w_config["subjobs"])): for phase in list(copy.deepcopy(w_config["phases"])): - new_phase_name = phase +# new_phase_name = phase # create a new user phase object for ``phase`` - new_phase = UserWorkflowPhase(new_phase_name) + new_phase = WorkflowPhase(phase) # each subjob needs to have an unique name # 1. check if ``new_phase`` is already defined as a default phase if phase in self.get_phases_attribs_list("default", "name"): @@ -219,7 +219,7 @@ def collect_all_user_workflows(self, config): ) esm_parser.user_error("ERROR", err_msg) # 2. check if the name of the new user phase does not already exist - if new_phase_name in user_workflow_phases_names: + if phase in user_workflow_phases_names: err_msg = ( f"Two workflow phases have the same name " f"{new_phase_name}." @@ -228,15 +228,13 @@ def collect_all_user_workflows(self, config): # 3. if user phase has a new and unique name else: # append it to the list of user phases of the workflow - user_workflow_phases_names.append(new_phase_name) + user_workflow_phases_names.append(phase) # set attributes of user_workflow phases from # config settings # check if valid phase keywords for key, value in w_config["phases"][phase].items(): - if new_phase.check_if_keyword_is_valid(key): - set_phase_attrib( - new_phase, new_phase_name, key, value - ) + if key in new_phase: + new_phase[key] = value else: err_msg = ( f"``{key}`` of workflow phase " @@ -247,10 +245,11 @@ def collect_all_user_workflows(self, config): # Make sure that batch_or_shell is set to batch if submit_to_batch is true # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed # for setting it to SimulationSetup and in other functions (resubmit, etc.) - # Should not be set by user. TODO: Remove from dumentation. - if new_phase.submit_to_batch_system: - new_phase.batch_or_shell == "batch" - + # Should not be set by user. TODO: Remove from documentation. + if new_phase["submit_to_batch_system"]: + new_phase["batch_or_shell"] = "batch" + else: + new_phase["batch_or_shell"] = "shell" user_workflow_phases.append(new_phase) self.user_phases = user_workflow_phases @@ -261,10 +260,6 @@ def write_to_config(self, config): Write to config. TODO: Rename ``subjobs`` to ``phases``. But this needs changes also in resubmit.py and other files??? """ - cluster_att = [] - for att in dir(self.phases[0]): - if (att[:2] != "__"): - cluster_att.append(att) # 1. Delete unnecessary config workflow entries (e.g. in general) if "workflow" in config["general"]: del config["general"]["workflow"] @@ -277,18 +272,18 @@ def write_to_config(self, config): config["general"]["workflow"]["subjob_clusters"][cluster] = {} config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = [] for phase in self.phases + self.user_phases: - if phase.cluster == cluster: + if phase["cluster"] == cluster: # TODO: Are there more attributes to be merged from the different phases within a cluster??? # nproc is calculated in complete_clusters -> can be placed here??? - config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase.name) - for att in cluster_att: - config["general"]["workflow"]["subjob_clusters"][cluster][att] = getattr(phase, att) + config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase["name"]) + for att in phase: + config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att] config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster # 2. Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} - for phase in self.phases+self.user_phases: - temp_dict = {phase.name: phase.__dict__} - config["general"]["workflow"]["subjobs"].update(temp_dict) + for phase in self.phases + self.user_phases: + temp_dict = phase + config["general"]["workflow"]["subjobs"][phase["name"]] = temp_dict # delete phases and user_phases del config["general"]["workflow"]["phases"] @@ -385,22 +380,22 @@ def order_phases(self): # check if run_after or run_before is set for each user phase # if not, run_after will be set to last default phase for user_phase in self.user_phases: - if not user_phase.run_before and not user_phase.run_after: - user_phase.run_after = self.phases[-1].name + if not user_phase["run_before"] and not user_phase["run_after"]: + user_phase["run_after"] = self.phases[-1]["name"] err_msg = ( f"No value given for ``run_after`` or ``run_before`` " - f"of user phase ``{user_phase.name}``. " + f"of user phase ``{user_phase['name']}``. " f"Set it to last default phase in workflow: " - f"``{self.phases[-1].name}``." + f"``{self.phases[-1]['name']}``." ) esm_parser.user_note("NOTE", err_msg) # Check if not both run_after and run_before are set at the same # time for each user phase -# if user_phase.run_before and user_phase.run_after: +# if user_phase['run_before'] and user_phase['run_after']: # err_msg = ( # f"Both run_after and run_before are set. Don't know when " -# f"to start {user_phase.name}. Please only set run_after " +# f"to start {user_phase['name']}. Please only set run_after " # f"or run_before." # ) # esm_parser.user_error("ERROR", err_msg) @@ -414,10 +409,10 @@ def order_phases(self): # get first default phase and correct run_after, called_from # correct last_task_in_queue of workflow if next_triggered not in self.get_phases_attribs_list("default", "name"): - self.phases[-1].next_submit.remove(self.phases[0].name) - self.phases[-1].next_submit.append(next_triggered) - self.phases[0].run_after = next_triggered - self.phases[0].called_from = next_triggered + self.phases[-1]["next_submit"].remove(self.phases[0]["name"]) + self.phases[-1]["next_submit"].append(next_triggered) + self.phases[0]["run_after"] = next_triggered + self.phases[0]["called_from"] = next_triggered self.last_task_in_queue = next_triggered # Set "next_submit" and "called_from" @@ -426,36 +421,38 @@ def order_phases(self): # Create a dict of all phases with empty lists next_submits = {} for phase in self.phases + self.user_phases: - next_submits[phase.name] = [] + next_submits[phase["name"]] = [] for phase4 in self.phases + self.user_phases: # if a cluster is not set for a phase set it to the phase name, # so that every phase belongs to a cluster # default cluster has the same name as the phase itself - if phase4.cluster is None: - phase4.cluster = phase4.name + if phase4["cluster"] is None: + phase4["cluster"] = phase4["name"] # set next_submits to the cluster name rather then to the phase name for phase2 in self.phases + self.user_phases: - if phase2.run_after is not None: - if phase2.cluster not in next_submits[phase2.run_after]: - next_submits[phase2.run_after].append(phase2.cluster) - phase2.called_from = phase2.run_after + if phase2["run_after"] is not None: + if phase2["cluster"] not in next_submits[phase2["run_after"]]: + next_submits[phase2["run_after"]].append(phase2["cluster"]) + phase2["called_from"] = phase2["run_after"] for phase3 in self.phases + self.user_phases: - phase3.next_submit = next_submits[phase3.name] + phase3["next_submit"] = next_submits[phase3["name"]] first_cluster_name = self.first_task_in_queue + first_phase = self.get_workflow_phase_by_name(first_cluster_name) last_cluster_name = self.last_task_in_queue + last_phase = self.get_workflow_phase_by_name(last_cluster_name) # if first_cluster_name is not next_submit of last_cluster_name # set 'next_submit' of last phase/cluster to first phase/cluster in workflow - if first_cluster_name not in get_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit"): - set_phase_attrib(self.phases+self.user_phases, last_cluster_name, "next_submit", first_cluster_name) + if first_cluster_name not in last_phase["next_submit"]: + last_phase.set_attrib("next_submit", first_cluster_name) # if last_cluster_name is not called_from of first_cluster_name # set 'called_from' of first phase/cluster to last phase/cluster - if not last_cluster_name == get_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from"): - set_phase_attrib(self.phases+self.user_phases, first_cluster_name, "called_from", last_cluster_name) + if not last_cluster_name == first_phase["called_from"]: + first_phase.set_attrib("called_from", last_cluster_name) return self @@ -537,7 +534,7 @@ def prepend_newrun_job(self, config): first_task_name = self.first_task_in_queue first_phase = self.get_workflow_phase_by_name(first_task_name) - if not get_phase_attrib(first_phase, first_phase.name, "batch_or_shell") == "SimulationSetup": + if not first_phase["batch_or_shell"] == "SimulationSetup": last_task_name = self.last_task_in_queue last_phase = self.get_workflow_phase_by_name(last_task_name) @@ -545,22 +542,24 @@ def prepend_newrun_job(self, config): new_first_phase_name = "newrun_general" # Create new default phase object new_first_phase = WorkflowPhase(new_first_phase_name) - set_phase_attrib(new_first_phase, new_first_phase_name, "called_from", last_task_name) - set_phase_attrib(new_first_phase, new_first_phase_name, "run_before", first_task_name) - set_phase_attrib(new_first_phase, new_first_phase_name, "next_submit", first_task_name) - set_phase_attrib(new_first_phase, new_first_phase_name, "cluster", "newrun") - set_phase_attrib(new_first_phase, new_first_phase_name, "batch_or_shell", "SimulationSetup") - set_phase_attrib(new_first_phase, new_first_phase_name, "nproc", 1) + new_first_phase.set_attrib("next_submit", first_phase["cluster"]) + new_first_phase.set_attrib("called_from", last_phase["cluster"]) + new_first_phase.set_attrib("run_before", first_phase["cluster"]) + new_first_phase.set_attrib("next_submit", first_phase["cluster"]) + new_first_phase.set_attrib("cluster", "newrun") + new_first_phase.set_attrib("batch_or_shell", "SimulationSetup") + new_first_phase.set_attrib("nproc", 1) # reset last_task attributes - set_phase_attrib(last_phase, last_phase.name, "next_submit", new_first_phase_name) - last_phase.next_submit.remove(first_task_name) + last_phase.set_attrib("next_submit", "newrun") + last_phase.remove_attrib("next_submit", first_phase["cluster"]) # reset first_task attributes - first_phase.called_from = new_first_phase_name + first_phase.set_attrib("called_from", "newrun") + first_phase.set_attrib("run_after", "newrun") # reset workflow attributes - self.first_task_in_queue = new_first_phase_name + self.first_task_in_queue = "newrun" # Set new phase to beginning of default phase list self.phases.insert(0, new_first_phase) @@ -616,58 +615,40 @@ def skip_cluster(cluster, config): return False -class WorkflowPhase: +class WorkflowPhase(dict): """A workflow phase class.""" def __init__(self, phase_name): - self.name = None - self.nproc = 1 # needed - self.run_before = None - self.run_after = None - self.submit_to_batch_system = False # needed - self.run_on_queue = None - self.cluster = None - self.next_submit = [] # needed - self.called_from = None # needed - self.batch_or_shell = "SimulationSetup" # needed - self.order_in_cluster = "sequential" # needed ??? - self.run_only = None - self.skip_chunk_number = None - self.skip_run_number = None - self.name = phase_name - - -class UserWorkflowPhase(WorkflowPhase): - """A user workflow phase class.""" - - def __init__(self, phase_name): - - WorkflowPhase.__init__(self, phase_name) - - self.script = None - self.script_dir = None - self.call_function = None - self.env_preparation = None - self.batch_or_shell = "shell" - self.submit_to_batch_system = False - - def check_if_keyword_is_valid(self, keyword): - """ - Checks if the key given for a user workflow is valie - - Parameters - ---------- - keyword : str - - Returns - ------- - true or false - """ + self["nproc"] = 1 # needed + self["run_before"] = None + self["run_after"] = None + self["submit_to_batch_system"] = False # needed + self["run_on_queue"] = None + self["cluster"] = None + self["next_submit"] = [] # needed + self["called_from"] = None # needed + self["batch_or_shell"] = "SimulationSetup" # needed + self["order_in_cluster"] = "sequential" # needed ??? + self["run_only"] = None + self["skip_chunk_number"] = None + self["skip_run_number"] = None + self["name"] = phase_name + self["script"] = None + self["script_dir"] = None + self["call_function"] = None + self["env_preparation"] = None + + def set_attrib(self, attrib, value): + if type(self[attrib]) == "list": + self[attrib].append(value) + else: + self[attrib] = value - if hasattr(self, keyword): - return True + def remove_attrib(self, attrib, value): + if type(self[attrib]) == "list": + self[attrib].remove(value) else: - return False + self[attrib] = None def assemble_workflow(config): @@ -738,23 +719,12 @@ def assemble_workflow(config): return config -def set_phase_attrib(workflow_phases, phase_name, attrib, value): - if not type(workflow_phases) is list: - workflow_phases = [workflow_phases] - for phase in workflow_phases: - if phase.name == phase_name: - if type(getattr(phase, attrib)).__name__ == "list": - phase.__dict__[attrib].append(value) - else: - phase.__setattr__(attrib, value) - - def get_phase_attrib(workflow_phases, phase_name, attrib): if not type(workflow_phases) is list: workflow_phases = [workflow_phases] for phase in workflow_phases: - if phase.name == phase_name: - value = getattr(phase, attrib) + if phase["name"] == phase_name: + value = phase[attrib] return value From ba204e6a687f1a3af0359cbd82b27d26b3b82064 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 20 Nov 2023 16:30:29 +0100 Subject: [PATCH 31/98] Fixed some of the workflow tests. --- tests/test_esm_runscripts/test_workflow.py | 36 ++++++++++++---------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index d13871db6..e39aadac2 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -26,7 +26,7 @@ def test_config(): 'oifs': { 'workflow': { # 'next_run_triggered_by': 'tidy', - 'subjobs': { + 'phases': { 'my_new_subjob_oifs': { 'batch_or_shell': 'batch', 'nproc': 1, @@ -41,7 +41,7 @@ def test_config(): 'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'], 'workflow': { 'next_run_triggered_by': 'tidy', - 'subjobs': { + 'phases': { 'my_new_subjob_general': { 'batch_or_shell': 'batch', 'order_in_cluster': 'concurrent', @@ -54,7 +54,7 @@ def test_config(): 'flow': { 'workflow': { 'next_run_triggered_by': 'tidy', - 'subjobs': { + 'phases': { 'my_new_subjob_flow': { 'batch_or_shell': 'batch', 'order_in_cluster': 'concurrent', @@ -78,14 +78,14 @@ def test_check_user_workflow_dependency(test_workflow_object, test_config): assert independent def test_check_user_workflow_dependency_2(test_workflow_object, test_config): - test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs' + test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs' test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) independent = test_workflow_object.check_user_workflow_dependency() assert not independent def test_check_unknown_phases(test_workflow_object, test_config): - test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob' + test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob' test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) unknown_phases = test_workflow_object.check_unknown_phases() @@ -100,27 +100,27 @@ def test_collect_all_user_workflow(test_config): def test_calc_number_of_tasks(): pytest.fail("something wrong") -def test_order_clusters(test_workflow_object, test_config): - test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general' +def test_order_phases(test_workflow_object, test_config): + test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general' # test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow' # test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' - #test_config['flow']['workflow']['subjobs']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs' + #test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs' test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_clusters(test_config) + test_workflow_object = test_workflow_object.order_phases() pytest.fail("something wrong") def test_complete_clusters(test_workflow_object, test_config): test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_clusters(test_config) + test_workflow_object = test_workflow_object.order_phases() subjob_clusters = test_workflow_object.complete_clusters(test_config) pytest.fail("something wrong") def test_prepend_newrun_job(test_workflow_object, test_config): test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_clusters(test_config) + test_workflow_object = test_workflow_object.order_phases() subjob_clusters = test_workflow_object.complete_clusters(test_config) [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) pytest.fail("something wrong") @@ -128,7 +128,7 @@ def test_prepend_newrun_job(test_workflow_object, test_config): def test_write_to_config(test_workflow_object, test_config): test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_clusters(test_config) + test_workflow_object = test_workflow_object.order_phases() subjob_clusters = test_workflow_object.complete_clusters(test_config) [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) config = test_workflow_object.write_to_config(test_config) @@ -137,9 +137,13 @@ def test_write_to_config(test_workflow_object, test_config): def test_write_subjob_clusters_to_config(test_workflow_object, test_config): test_workflow_object = test_workflow_object.init_default_workflow(test_config) test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_clusters(test_config) - subjob_clusters = test_workflow_object.complete_clusters(test_config) - [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) - test_config = workflow.write_subjob_clusters_to_config(test_config, subjob_clusters) + test_workflow_object = test_workflow_object.order_phases() + test_workflow_object = test_workflow_object.prepend_newrun_job(test_config) test_config = test_workflow_object.write_to_config(test_config) + test_workflow_object = test_workflow_object.complete_clusters(test_config) + +def test_prepend_newrun_job(test_workflow_object, test_config): + test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) + test_workflow_object = test_workflow_object.prepend_newrun_job(test_config) pytest.fail("something wrong") From 6174fa3a558cc9554acfc3138218bbe98abf8127 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 20 Nov 2023 16:59:12 +0100 Subject: [PATCH 32/98] Added review suggestions. --- src/esm_runscripts/workflow.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index e20d6ab49..4140f1d45 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -64,9 +64,9 @@ def get_phases_attribs_list(self, phase_type, attrib): Parameters ---------- - self: Workflow object - phase_type: str (default or user) - attrib: str + phase_type : str + ``default`` or ``user`` + attrib : str Returns ------- @@ -86,7 +86,6 @@ def init_default_workflow(self, config): Parameters ---------- - self : Workflow object config : dict Returns @@ -96,8 +95,7 @@ def init_default_workflow(self, config): workflow_phases = self.phases - # Calculating the number of tasks for each component/model - # needed for phase compute + # Calculating the number of mpi tasks for each component/model/script tasks = calc_number_of_tasks(config) # Initiate/create default workflow phase objects # and reset/append to Workflow.phases variable From 93b49b5d771eab6c9f4b998a5fdb8c2f28060f09 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 22 Nov 2023 12:04:23 +0100 Subject: [PATCH 33/98] Changed initialization of workflow and phases. --- src/esm_runscripts/workflow.py | 136 ++++++++++++++------------------- 1 file changed, 57 insertions(+), 79 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 4140f1d45..d19725b1c 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -10,29 +10,27 @@ class Workflow: """A workflow class.""" - def __init__(self, phases, always_run_with=[], phases_to_submit_to_batch_system=[]): + def __init__(self, workflow_yaml): """ Create a new workflow. Parameters ---------- - phases : list - List of workflow phases names - always_run_with : list - List of phases that precedes each phase in phases + workflow_yaml : dict + Dictionary from defaults.yaml to initialize workflow + for default phases. Returns ------- none """ + # TODO: check if key is in workflow_yaml dict + self.phases = [] self.user_phases = [] - self.first_task_in_queue = None # needed - self.last_task_in_queue = None - self.next_run_triggered_by = None + self.first_task_in_queue = workflow_yaml["first_task_in_queue"] + self.last_task_in_queue = workflow_yaml["last_task_in_queue"] + self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"] # TODO: Call here the phase object ??? - self.phases = phases - self.always_run_with = always_run_with - self.phases_to_submit_to_batch_system = phases_to_submit_to_batch_system @property def num_phases(self): @@ -79,10 +77,10 @@ def get_phases_attribs_list(self, phase_type, attrib): return phases_attribs - def init_default_workflow(self, config): + def config_sbatch_phases(self, config): """ - Add workflow for precompute, compute, and tidy phases - etc information already here! + Calculating the number of mpi tasks for each component/model/script + and set queue for default phases that run as batch jobs Parameters ---------- @@ -93,41 +91,16 @@ def init_default_workflow(self, config): self : Workflow object """ - workflow_phases = self.phases + #workflow_phases = self["phases"] - # Calculating the number of mpi tasks for each component/model/script tasks = calc_number_of_tasks(config) - # Initiate/create default workflow phase objects - # and reset/append to Workflow.phases variable - self.phases = [] - for ind, phase in enumerate(workflow_phases): - self.phases.append(WorkflowPhase(phase)) for ind, phase in enumerate(self.phases): - if ind < self.num_phases - 1: - # Set run_before attrib of all phases (except last on) - # to the next phase name - phase["run_before"] = self.phases[ind+1]["name"] - phase["next_submit"].append(self.phases[ind+1]["name"]) - phase["run_after"] = self.phases[ind-1]["name"] - else: - # Set run_after attrib of last phase to previous phase name - phase["run_before"] = self.phases[0]["name"] - phase["next_submit"].append(self.phases[0]["name"]) - phase["run_after"] = self.phases[ind-1]["name"] - - phase["cluster"] = phase["name"] - if phase["name"] in self.phases_to_submit_to_batch_system: + if phase["submit_to_batch_system"]: phase["batch_or_shell"] = 'batch' - phase["submit_to_batch_system"] = True phase["run_on_queue"] = config["computer"]["partitions"]["compute"]["name"] phase["nproc"] = tasks - # Set default workflow values - self.set_workflow_attrib("first_task_in_queue", self.phases[0]["name"]) - self.set_workflow_attrib("last_task_in_queue", self.phases[-1]["name"]) - self.set_workflow_attrib("next_run_triggered_by", self.phases[-1]["name"]) - return self def set_workflow_attrib(self, attrib, value): @@ -164,10 +137,7 @@ def check_if_keyword_is_valid(self, keyword): true or false """ - if hasattr(self, keyword): - return True - else: - return False + return hasattr(self, keyword) def collect_all_user_workflows(self, config): """ @@ -202,12 +172,11 @@ def collect_all_user_workflows(self, config): else: err_msg = f"``{key}`` is not a valid keyword of a workflow." esm_parser.user_error("ERROR", err_msg) - # for subjob in list(copy.deepcopy(w_config["subjobs"])): - for phase in list(copy.deepcopy(w_config["phases"])): -# new_phase_name = phase - # create a new user phase object for ``phase`` - new_phase = WorkflowPhase(phase) - # each subjob needs to have an unique name + for phase in w_config["phases"]: + # each phase (of a model/setup) needs to have an unique name + # same phases of the same model/setup defined in different config files + # are overwritten by the usual config file hierarchy + # user phases are not alowed to have the same name asdefault phases (e.g. compute) # 1. check if ``new_phase`` is already defined as a default phase if phase in self.get_phases_attribs_list("default", "name"): err_msg = ( @@ -216,7 +185,8 @@ def collect_all_user_workflows(self, config): f"This is not allowed." ) esm_parser.user_error("ERROR", err_msg) - # 2. check if the name of the new user phase does not already exist + # 2. check if the name of the new user phase (for a model/setup) does not already exist + # (for another model/setup). if phase in user_workflow_phases_names: err_msg = ( f"Two workflow phases have the same name " @@ -225,30 +195,22 @@ def collect_all_user_workflows(self, config): esm_parser.user_error("ERROR", err_msg) # 3. if user phase has a new and unique name else: - # append it to the list of user phases of the workflow - user_workflow_phases_names.append(phase) - # set attributes of user_workflow phases from - # config settings - # check if valid phase keywords - for key, value in w_config["phases"][phase].items(): - if key in new_phase: - new_phase[key] = value - else: - err_msg = ( - f"``{key}`` of workflow phase " - f"``{new_phase_name}`` is not a valid keyword " - f"of a workflow phase." - ) - esm_parser.user_error("ERROR", err_msg) + phase_config = copy.deepcopy(w_config["phases"][phase]) + # add phase name + phase_config["name"] = phase # Make sure that batch_or_shell is set to batch if submit_to_batch is true # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed # for setting it to SimulationSetup and in other functions (resubmit, etc.) # Should not be set by user. TODO: Remove from documentation. - if new_phase["submit_to_batch_system"]: - new_phase["batch_or_shell"] = "batch" + if phase_config["submit_to_batch_system"]: + phase_config["batch_or_shell"] = "batch" else: - new_phase["batch_or_shell"] = "shell" + phase_config["batch_or_shell"] = "shell" + # create a new user phase object for ``phase`` + new_phase = WorkflowPhase(phase_config) + # append it to the list of user phases of the workflow user_workflow_phases.append(new_phase) + user_workflow_phases_names.append(phase) self.user_phases = user_workflow_phases return self @@ -356,14 +318,14 @@ def order_phases(self): Parameters ---------- - config : dict Returns ------- self : Workflow object """ # check if user phases are independent from each other - # TODO: What if not independent? + # TODO: What if not independent??? + # do not run in parallel in same cluster??? independent = self.check_user_workflow_dependency() # check if there are unknown phases, if yes, will give error exception unknown_phases = self.check_unknown_phases() @@ -616,7 +578,8 @@ def skip_cluster(cluster, config): class WorkflowPhase(dict): """A workflow phase class.""" - def __init__(self, phase_name): + def __init__(self, phase): + # default self["nproc"] = 1 # needed self["run_before"] = None self["run_after"] = None @@ -630,12 +593,24 @@ def __init__(self, phase_name): self["run_only"] = None self["skip_chunk_number"] = None self["skip_run_number"] = None - self["name"] = phase_name + self["name"] = None self["script"] = None self["script_dir"] = None self["call_function"] = None self["env_preparation"] = None + # check if phase keywords are valid + for key, value in phase.items(): + if key not in self: + err_msg = ( + f"``{key}`` of workflow phase " + f"``{new_phase_name}`` is not a valid keyword " + f"of a workflow phase." + ) + esm_parser.user_error("ERROR", err_msg) + + super().__init__(phase) + def set_attrib(self, attrib, value): if type(self[attrib]) == "list": self[attrib].append(value) @@ -670,12 +645,14 @@ def assemble_workflow(config): # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml if "defaults.yaml" in config["general"]: if "workflow" in config["general"]["defaults.yaml"]: + workflow = config["general"]["defaults.yaml"]["workflow"] phases = config["general"]["defaults.yaml"]["workflow"].get("phases", []) - always_run_with = config["general"]["defaults.yaml"]["workflow"].get("always_run_with", []) - phases_to_submit_to_batch_system = config["general"]["defaults.yaml"]["workflow"].get("phases_to_submit_to_batch_system", []) + # 2. Initialize default workflow phases if phases: - workflow = Workflow(phases, always_run_with=always_run_with, phases_to_submit_to_batch_system=phases_to_submit_to_batch_system) + workflow = Workflow(workflow) + for phase in phases: + workflow.phases.append(WorkflowPhase(phases[phase])) else: esm_parser.user_error("ERROR", "No default workflow phases defined.") # Note: Should this work also if no default phases are set in such a config @@ -684,8 +661,9 @@ def assemble_workflow(config): # Where could a user define a different (default) phase list? # Or should this be changed in defaults.yaml as it is now? - # 2. Initialize default workflow phases - workflow = workflow.init_default_workflow(config) + # 3. Calc mpi tasks and set queue for batch jobs for default phases + # TODO: Put it into other method? + workflow = workflow.config_sbatch_phases(config) # 3. Read in workflows from runscript and config files workflow = workflow.collect_all_user_workflows(config) From afab16fc1fdc8c614323c093110cd8e227513e6b Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 22 Nov 2023 12:06:48 +0100 Subject: [PATCH 34/98] Changed initial config of default workflow phases. --- .../esm_software/esm_runscripts/defaults.yaml | 74 +++++++++++++++++-- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 812e32ad0..31ffa1394 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -7,13 +7,71 @@ per_model_defaults: exp_to_run: copy run_to_work: copy work_to_run: copy + workflow: + user_phases: None + first_task_in_queue: prepcompute + last_task_in_queue: tidy + next_run_triggered_by: tidy + phases: - - prepcompute - - compute - - tidy - always_run_with: - - prepare - - prepexp - phases_to_submit_to_batch_system: - - compute + prepcompute: + batch_or_shell: SimulationSetup + call_function: None + called_from: tidy + cluster: prepcompute + env_preparation: None + name: prepcompute + next_submit: + - compute + nproc: 1 + order_in_cluster: sequential + run_after: tidy + run_before: compute + run_on_queue: None + run_only: None + script: None + script_dir: None + skip_chunk_number: None + skip_run_number: None + submit_to_batch_system: False + compute: + batch_or_shell: batch + call_function: None + called_from: prepcompute + cluster: compute + env_preparation: None + name: compute + next_submit: + - tidy + nproc: None + order_in_cluster: sequential + run_after: prepcompute + run_before: tidy + run_on_queue: None + run_only: None + script: None + script_dir: None + skip_chunk_number: None + skip_run_number: None + submit_to_batch_system: True + tidy: + batch_or_shell: SimulationSetup + call_function: None + called_from: compute + cluster: tidy + env_preparation: None + name: tidy + next_submit: + - prepcompute + nproc: 1 + order_in_cluster: sequential + run_after: compute + run_before: prepcompute + run_on_queue: None + run_only: None + script: None + script_dir: None + skip_chunk_number: None + skip_run_number: None + submit_to_batch_system: False From ad0e620becbfca33c94808f73dfe0fddbf261948 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 22 Nov 2023 12:40:35 +0100 Subject: [PATCH 35/98] Bugfix in error message. --- src/esm_runscripts/workflow.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index d19725b1c..411bb1ebf 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -180,7 +180,7 @@ def collect_all_user_workflows(self, config): # 1. check if ``new_phase`` is already defined as a default phase if phase in self.get_phases_attribs_list("default", "name"): err_msg = ( - f"The user phase ``{new_phase_name}`` " + f"The user phase ``{phase}`` " f"has the same name as a default workflow phase. " f"This is not allowed." ) @@ -190,7 +190,7 @@ def collect_all_user_workflows(self, config): if phase in user_workflow_phases_names: err_msg = ( f"Two workflow phases have the same name " - f"{new_phase_name}." + f"``{phase}``." ) esm_parser.user_error("ERROR", err_msg) # 3. if user phase has a new and unique name @@ -202,7 +202,7 @@ def collect_all_user_workflows(self, config): # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed # for setting it to SimulationSetup and in other functions (resubmit, etc.) # Should not be set by user. TODO: Remove from documentation. - if phase_config["submit_to_batch_system"]: + if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" else: phase_config["batch_or_shell"] = "shell" @@ -327,6 +327,7 @@ def order_phases(self): # TODO: What if not independent??? # do not run in parallel in same cluster??? independent = self.check_user_workflow_dependency() + # check if there are unknown phases, if yes, will give error exception unknown_phases = self.check_unknown_phases() if unknown_phases: From 91f1af6bbace2823293fc0deb9503373a0e5c28f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 22 Nov 2023 16:00:28 +0100 Subject: [PATCH 36/98] Make next_run_triggered_by a keyword of phase (trigger_next_run) not workflow. --- src/esm_runscripts/workflow.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 411bb1ebf..342e8b5b7 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -91,8 +91,6 @@ def config_sbatch_phases(self, config): self : Workflow object """ - #workflow_phases = self["phases"] - tasks = calc_number_of_tasks(config) for ind, phase in enumerate(self.phases): @@ -155,6 +153,7 @@ def collect_all_user_workflows(self, config): user_workflow_phases = [] user_workflow_phases_names = [] + user_workflow_next_run_triggered_by = [] for model in config: if "workflow" in config[model]: w_config = config[model]["workflow"] @@ -211,6 +210,17 @@ def collect_all_user_workflows(self, config): # append it to the list of user phases of the workflow user_workflow_phases.append(new_phase) user_workflow_phases_names.append(phase) + if phase_config.get("trigger_next_run", False): + user_workflow_next_run_triggered_by.append(phase) + if len(user_workflow_next_run_triggered_by) > 1: + err_msg = ( + f"More than one phase is set to " + f"trigger the next run: ``{user_workflow_next_run_triggered_by}``. " + f"Only set ``trigger_next_run: True`` for one phase." + ) + esm_parser.user_error("ERROR", err_msg) + else: + self.next_run_triggered_by = user_workflow_next_run_triggered_by[0] self.user_phases = user_workflow_phases return self @@ -362,7 +372,7 @@ def order_phases(self): # esm_parser.user_error("ERROR", err_msg) # Correct for ``last_task_in_queue`` if necessary - # Collect all next_run_triggered_by entries + # Collect all next_run_triggered_by entries??? next_triggered = self.next_run_triggered_by # check if next_triggered is default or user phase # if user phase @@ -580,10 +590,14 @@ class WorkflowPhase(dict): """A workflow phase class.""" def __init__(self, phase): - # default + # defaults + self["name"] = None + self["script"] = None + self["script_dir"] = None self["nproc"] = 1 # needed self["run_before"] = None self["run_after"] = None + self["trigger_next_run"] = False # needed self["submit_to_batch_system"] = False # needed self["run_on_queue"] = None self["cluster"] = None @@ -594,9 +608,6 @@ def __init__(self, phase): self["run_only"] = None self["skip_chunk_number"] = None self["skip_run_number"] = None - self["name"] = None - self["script"] = None - self["script_dir"] = None self["call_function"] = None self["env_preparation"] = None From aed23769f12252e68bedbb65c28bc1893d7a9531 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 22 Nov 2023 16:08:50 +0100 Subject: [PATCH 37/98] Bugfix if no phase trigger_next_run. --- src/esm_runscripts/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 342e8b5b7..d5b022681 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -219,7 +219,7 @@ def collect_all_user_workflows(self, config): f"Only set ``trigger_next_run: True`` for one phase." ) esm_parser.user_error("ERROR", err_msg) - else: + elif user_workflow_next_run_triggered_by: self.next_run_triggered_by = user_workflow_next_run_triggered_by[0] self.user_phases = user_workflow_phases From 40c9190d463ca0323dbc4fbd051a244c817908b2 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 23 Nov 2023 16:51:26 +0100 Subject: [PATCH 38/98] Resolved function complete_cluster into other functions, fix next_submit for clusters. --- src/esm_runscripts/workflow.py | 131 +++++++++++++++------------------ 1 file changed, 58 insertions(+), 73 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index d5b022681..e6336fe98 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -203,6 +203,10 @@ def collect_all_user_workflows(self, config): # Should not be set by user. TODO: Remove from documentation. if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" + if not phase_config.get("run_on_queue", False): + breakpoint() + err_msg = f"No value for target queue given by ``run_on_queue' for phase {phase}." + esm_parser.user_error("ERROR", err_msg) else: phase_config["batch_or_shell"] = "shell" # create a new user phase object for ``phase`` @@ -249,6 +253,30 @@ def write_to_config(self, config): for att in phase: config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att] config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster + # if a phase in a cluster triggers the next run, set next_submit in cluster conf + for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): + for phase in self.phases + self.user_phases: + if phase["cluster"] == cluster: + if phase["name"] in self.next_run_triggered_by: + if self.first_task_in_queue not in config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"]: + config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"].append(self.first_task_in_queue) + + for subjob_cluster in config["general"]["workflow"]["subjob_clusters"]: + nproc_sum = nproc_max = 0 + for subjob in config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["subjobs"]: + nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") + nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) + if config["general"]["workflow"]["subjob_clusters"][subjob_cluster].get("submit_to_batch_system", False): +# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch? +# # elif subjob_clusters[subjob_cluster].get("script", False): +# # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" +# # + if config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["order_in_cluster"] == "concurrent": + nproc = nproc_sum + else: + nproc = nproc_max + config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["nproc"] = nproc + # 2. Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} for phase in self.phases + self.user_phases: @@ -322,9 +350,9 @@ def check_unknown_phases(self): return unknown_phases - def order_phases(self): + def order_phases_and_clusters(self): """ - Put the phases in order. + Put the phases and clusters in order. Parameters ---------- @@ -333,6 +361,7 @@ def order_phases(self): ------- self : Workflow object """ + # check if user phases are independent from each other # TODO: What if not independent??? # do not run in parallel in same cluster??? @@ -371,9 +400,11 @@ def order_phases(self): # ) # esm_parser.user_error("ERROR", err_msg) - # Correct for ``last_task_in_queue`` if necessary - # Collect all next_run_triggered_by entries??? + # Correct workflow attributes (``last_task_in_queue``, `next_run_triggered``) + # if necessary + next_triggered = self.next_run_triggered_by + # check if next_triggered is default or user phase # if user phase # get last default phase and correct next_submit @@ -390,9 +421,9 @@ def order_phases(self): # "next_submit" which phase/cluster will be called next (run_after of the next phase) # "called_from" name of previous phase, run_after of current phase # Create a dict of all phases with empty lists - next_submits = {} - for phase in self.phases + self.user_phases: - next_submits[phase["name"]] = [] + + # Create a cluster dict: + clusters = {} for phase4 in self.phases + self.user_phases: # if a cluster is not set for a phase set it to the phase name, @@ -400,16 +431,27 @@ def order_phases(self): # default cluster has the same name as the phase itself if phase4["cluster"] is None: phase4["cluster"] = phase4["name"] + clusters[phase4["cluster"]] = {"name": phase4["cluster"]} + + + next_submits = {} + for phase in self.phases + self.user_phases: + next_submits[phase["name"]] = [] + next_submits[phase["cluster"]] = [] # set next_submits to the cluster name rather then to the phase name for phase2 in self.phases + self.user_phases: if phase2["run_after"] is not None: if phase2["cluster"] not in next_submits[phase2["run_after"]]: - next_submits[phase2["run_after"]].append(phase2["cluster"]) + if phase2["cluster"] not in next_submits[phase2["run_after"]]: + next_submits[phase2["run_after"]].append(phase2["cluster"]) + if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]: + next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"]) phase2["called_from"] = phase2["run_after"] for phase3 in self.phases + self.user_phases: - phase3["next_submit"] = next_submits[phase3["name"]] + phase3.set_attrib("next_submit", next_submits[phase3["name"]]) +# phase3["next_submit"] = next_submits[phase3["name"]] first_cluster_name = self.first_task_in_queue first_phase = self.get_workflow_phase_by_name(first_cluster_name) @@ -427,63 +469,8 @@ def order_phases(self): return self - def complete_clusters(self, config): - # all that are within a next_submit list are in a cluster if: - # run concurrently - # have the same cluster entry. - """ - Rearanges the subjobs to their subjobs_clusters ??? - - TODO: Can this be put into other functions/methods? - Parameters - ---------- - self : Workflow object - config : dict - - Returns - ------- - config : dict - """ - subjob_clusters = config["general"]["workflow"]["subjob_clusters"] - - # Then, complete the resource information per cluster - # determine whether a cluster is to be submitted to a batch system - for subjob_cluster in subjob_clusters: - nproc_sum = nproc_max = 0 - # Check if the following attributes are set for each cluster??? -# attributes = ["submit_to_batch_system", "order_in_cluster", "run_on_queue", "run_after", "run_before", "run_only", "skip_run_number", "skip_chunk_number", "batch_or_shell"] -# for attrib in attributes: -# temp_list = [] - for subjob in subjob_clusters[subjob_cluster]["subjobs"]: - # Check if the following attributes are set for each cluster??? -# if not get_phase_attrib(self.phases + self.user_phases, subjob, attrib) in temp_list: -# subjob_clusters[subjob_cluster][attrib] = get_phase_attrib(self.phases + self.user_phases, subjob, attrib) -# else: -# print("Missmatch in attributes") -# sys.exit(-1) - nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") - nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) -# - if subjob_clusters[subjob_cluster].get("submit_to_batch_system", False): -# # subjob_clusters[subjob_cluster]["batch_or_shell"] = "batch" - -# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch? -# # elif subjob_clusters[subjob_cluster].get("script", False): -# # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" -# # - if "run_on_queue" not in subjob_clusters[subjob_cluster]: - err_msg = f"No value for target queue given by ``run_on_queue' for cluster {subjob_cluster}." - esm_parser.user_error("ERROR", err_msg) - - if subjob_clusters[subjob_cluster]["order_in_cluster"] == "concurrent": - nproc = nproc_sum - else: - nproc = nproc_max - subjob_clusters[subjob_cluster]["nproc"] = nproc - return config - - def prepend_newrun_job(self, config): + def prepend_newrun_job(self): """ - Creates a new cluster "newrun" if first_task_in_queue is not of type 'SimulationSetup' @@ -496,7 +483,6 @@ def prepend_newrun_job(self, config): Parameters ---------- self : Workflow object - config : dict Returns ------- @@ -681,19 +667,16 @@ def assemble_workflow(config): workflow = workflow.collect_all_user_workflows(config) # 4. Order user workflows into default workflow wrt. workflow and phase attributs. - workflow = workflow.order_phases() + workflow = workflow.order_phases_and_clusters() # 5. create new first phase of type SimulationSetup, if first_task_in_queue is # user phase (type batch or shell) - workflow = workflow.prepend_newrun_job(config) + workflow = workflow.prepend_newrun_job() # 6. write the workflow to config # 7. Remove old worklow from config config = workflow.write_to_config(config) - # 8. complete some information in a cluster - # e.g. if phases in cluster are submit to sbatch system - config = workflow.complete_clusters(config) # Set "jobtype" for the first task??? if config["general"]["jobtype"] == "unknown": @@ -764,8 +747,10 @@ def display_workflow(config): while first_phase not in second_phase and second_phase: sec_phase_str = "" for sec_phase in second_phase: - if config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"]: - second_phase = config["general"]["workflow"]["subjobs"][sec_phase]["next_submit"] + if config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"]: + second_phase = config["general"]["workflow"]["subjob_clusters"][sec_phase]["next_submit"] + subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"] + else: subjobs = config["general"]["workflow"]["subjob_clusters"][sec_phase]["subjobs"] if sec_phase_str == "": sec_phase_str = f"{sec_phase_str} ``{sec_phase}`` {subjobs}" From 1a8d12120f11656d35248123aec060c8cdb9da0c Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 23 Nov 2023 17:25:47 +0100 Subject: [PATCH 39/98] Removed obsolete functions. --- src/esm_runscripts/workflow.py | 146 +-------------------------------- 1 file changed, 2 insertions(+), 144 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index e6336fe98..4ce247c81 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -722,7 +722,7 @@ def calc_number_of_tasks(config): def display_workflow(config): """ - Displays current workflow settings. + Displays workflow sequence. Parameters ---------- @@ -730,7 +730,7 @@ def display_workflow(config): Returns ------- - config : dict (needed???) + config : dict """ display_nicely(config) @@ -789,145 +789,3 @@ def display_nicely(config): """ esm_parser.pprint_config(config["general"]["workflow"]) return config - -# ################## Maybe outdated routines ###################### -# -# -# def collect_all_workflow_information(config): -# """ -# Collects all workflow information for each component entry in config -# (can be a model/component or a new entry (e.g. 'flows') -# NOTE: Should it be possible to set a workflow in the model section of the -# runscript? Why not? -# -# Checks if there are "workflow" entries in the user runscript and copies or -# merges them into -# config["general"]["workflow"] -# -# Parameters -# ---------- -# config : dict -# -# Returns -# ------- -# config : dict -# """ -# for model in config: -# if "workflow" in config[model]: -# # looks for "workflow" in each entry of config (can be model/component, general, etc.) -# w_config = config[model]["workflow"] -# # looks for "workflow" in "general" section of config. -# gw_config = config["general"]["workflow"] -# -# # looks for entry 'subjob_clusters' in config of each component that has a "workflow" -# if "subjob_clusters" in w_config: -# for cluster in w_config["subjob_clusters"]: -# # if a certain cluster is also in the general config, this cluster will be merged together ... -# # what cluster could this be? -# if cluster in gw_config["subjob_clusters"]: -# gw_config["subjob_clusters"][cluster] = merge_if_possible( -# w_config["subjob_clusters"][cluster], -# gw_config["subjob_clusters"][cluster], -# ) -# # if cluster is not in general config, it will copied into it. -# else: -# gw_config["subjob_clusters"][cluster] = copy.deepcopy( -# w_config["subjob_clusters"][cluster], -# ) -# -# # looks for entry 'subjobs' in config of each component -# if "subjobs" in w_config: -# # copies component workflow config to new variable ref_config -# ref_config = copy.deepcopy(w_config) -# # ??? for every subjob in ??? -# for subjob in list(copy.deepcopy(w_config["subjobs"])): -# -# # subjobs (other than clusters) should be model specific -# # subjobs that are defined in subjobs of components workflow configs and not in a subjob_cluster are copied to general with suffix of componet entry. -# # appends the model name to the subjob name and copy it to config["general"] -# gw_config["subjobs"][subjob + "_" + model] = copy.deepcopy( -# w_config["subjobs"][subjob] -# ) -# # if this copied subjobs is also n general workflow subjobs it will be deleted there -# if subjob in gw_config["subjobs"]: -# del gw_config["subjobs"][subjob] -# -# # make sure that the run_after and run_before refer to that cluster -# # for all subjobs now in general workflow -# for other_subjob in gw_config["subjobs"]: -# # sets run_after and run_before to correct subjob??? -# # if a subjob of general workflow has run_after attribute to a user subjob (that has been renamed to subjob_model) -# # this run_after will be set to the new subjob name (subjob_model) -# if "run_after" in gw_config["subjobs"][other_subjob]: -# if (gw_config["subjobs"][other_subjob]["run_after"] == subjob): -# gw_config["subjobs"][other_subjob]["run_after"] == subjob + "_" + model -# if "run_before" in gw_config["subjobs"][other_subjob]: -# if (gw_config["subjobs"][other_subjob]["run_before"] == subjob): -# gw_config["subjobs"][other_subjob]["run_before"] == subjob + "_" + model -# -# # if not in another cluster, each subjob gets its own -# if (not "subjob_cluster" in gw_config["subjobs"][subjob + "_" + model]): -# gw_config["subjobs"][subjob + "_" + model]["subjob_cluster"] = subjob # + "_" + model -# -# # checks if next_run:triggered_by is tidy or the one in user workflow, or empty? -# if "next_run_triggered_by" in w_config: -# if not gw_config["next_run_triggered_by"] in ["tidy", w_config["next_run_triggered_by"], ]: -# print("Mismatch found setting next_run_triggered_by for workflow.") -# sys.exit(-1) -# else: -# gw_config["next_run_triggered_by"] = w_config["next_run_triggered_by"] -# # what if w_config["next_run_triggered_by"] is empty? -# -# return config -# -# def merge_single_entry_if_possible(entry, sourceconf, targetconf): -# """ -# Merges a dictionary entry into a target dictionary that has he same key. -# -# Parameters -# ---------- -# entry : str -# dictionary key -# sourceconf : dict -# targetconf : dict -# -# Returns -# ------- -# targetconf : dict -# """ -# if entry in sourceconf: -# # Check if entry is already in targetconf AND different to sourceconf, then exit -# if entry in targetconf and not sourceconf[entry] == targetconf[entry]: -# print(f"Mismatch found in {entry} for cluster {targetconf}") -# sys.exit(-1) -# # Continues here if entry exists already in targetconf AND the same as sourceconf or -# # not already in targetconf and set it to sourceconf -# targetconf[entry] = sourceconf[entry] -# return targetconf -# -# def merge_if_possible(source, target): -# """ -# Does the same as above but for a whole dict -# -# Merges the entries of source dictionary into target dictionary, if not already in. -# (Will not overwrite entries in target dictionary.) -# -# Parameters -# ---------- -# source : dict -# target : dict -# -# Returns -# ------- -# target : dict -# """ -# for entry in source: -# if entry in target: -# if not source[entry] == target[entry]: -# print( -# f"Mismatch while trying to merge subjob_clusters {source} into {target}" -# ) -# sys.exit(-1) -# else: -# target[entry] = source[entry] -# return target From 916a3459128b047c8fc62c0c9ddd22a6802dc38f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 24 Nov 2023 11:57:44 +0100 Subject: [PATCH 40/98] (Re)moved redundant code, renamed function for collecting phases. --- src/esm_runscripts/workflow.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 4ce247c81..c72298e93 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -77,10 +77,9 @@ def get_phases_attribs_list(self, phase_type, attrib): return phases_attribs - def config_sbatch_phases(self, config): + def set_default_nproc(self, config): """ Calculating the number of mpi tasks for each component/model/script - and set queue for default phases that run as batch jobs Parameters ---------- @@ -95,8 +94,6 @@ def config_sbatch_phases(self, config): for ind, phase in enumerate(self.phases): if phase["submit_to_batch_system"]: - phase["batch_or_shell"] = 'batch' - phase["run_on_queue"] = config["computer"]["partitions"]["compute"]["name"] phase["nproc"] = tasks return self @@ -137,7 +134,7 @@ def check_if_keyword_is_valid(self, keyword): return hasattr(self, keyword) - def collect_all_user_workflows(self, config): + def collect_all_user_phases(self, config): """ Collect all workflows defined in config files. @@ -609,6 +606,9 @@ def __init__(self, phase): super().__init__(phase) + if self.get("submit_to_batch_system", False): + self["batch_or_shell"] = "batch" + def set_attrib(self, attrib, value): if type(self[attrib]) == "list": self[attrib].append(value) @@ -659,12 +659,12 @@ def assemble_workflow(config): # Where could a user define a different (default) phase list? # Or should this be changed in defaults.yaml as it is now? - # 3. Calc mpi tasks and set queue for batch jobs for default phases + # 3. Calc mpi tasks for batch jobs of default phases # TODO: Put it into other method? - workflow = workflow.config_sbatch_phases(config) + workflow = workflow.set_default_nproc(config) - # 3. Read in workflows from runscript and config files - workflow = workflow.collect_all_user_workflows(config) + # 3. Read in phases from runscript and config files + workflow = workflow.collect_all_user_phases(config) # 4. Order user workflows into default workflow wrt. workflow and phase attributs. workflow = workflow.order_phases_and_clusters() From c264f1ac7a1d67e67ed10d9b44aacf2713f4d1fb Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 24 Nov 2023 12:18:37 +0100 Subject: [PATCH 41/98] Removed the possibility to set workflow keywords by user. --- src/esm_runscripts/workflow.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index c72298e93..a5ff8c513 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -156,17 +156,9 @@ def collect_all_user_phases(self, config): w_config = config[model]["workflow"] # if "subjobs" in w_config: if "phases" in w_config: - # Set attributes of workflow - # This will be overwritten by all user defined workflows??? - # Collect them in a list??? - # check if valid workflow keywords for key, value in w_config.items(): - if self.check_if_keyword_is_valid(key): - # set here only workflow attributes - if not key == "phases": - self.set_workflow_attrib(key, value) - else: - err_msg = f"``{key}`` is not a valid keyword of a workflow." + if not key == "phases": + err_msg = f"``{key}`` is not allowed to be set for a workflow." esm_parser.user_error("ERROR", err_msg) for phase in w_config["phases"]: # each phase (of a model/setup) needs to have an unique name From cf9d0e344f00b4369ee43389cf29012ec8f11416 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 27 Nov 2023 17:26:24 +0100 Subject: [PATCH 42/98] Worked on workflow tests, and other minor changes to workflow. --- src/esm_runscripts/workflow.py | 26 +- tests/test_esm_runscripts/test_workflow.py | 331 ++++++++++++++++++--- 2 files changed, 301 insertions(+), 56 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index a5ff8c513..d924d4c75 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -2,8 +2,6 @@ import copy import esm_parser -# from pprint import pprint - import pdb @@ -193,8 +191,7 @@ def collect_all_user_phases(self, config): if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" if not phase_config.get("run_on_queue", False): - breakpoint() - err_msg = f"No value for target queue given by ``run_on_queue' for phase {phase}." + err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase}``." esm_parser.user_error("ERROR", err_msg) else: phase_config["batch_or_shell"] = "shell" @@ -237,7 +234,6 @@ def write_to_config(self, config): for phase in self.phases + self.user_phases: if phase["cluster"] == cluster: # TODO: Are there more attributes to be merged from the different phases within a cluster??? - # nproc is calculated in complete_clusters -> can be placed here??? config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase["name"]) for att in phase: config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att] @@ -591,7 +587,7 @@ def __init__(self, phase): if key not in self: err_msg = ( f"``{key}`` of workflow phase " - f"``{new_phase_name}`` is not a valid keyword " + f"``{phase['name']}`` is not a valid keyword " f"of a workflow phase." ) esm_parser.user_error("ERROR", err_msg) @@ -637,8 +633,13 @@ def assemble_workflow(config): if "workflow" in config["general"]["defaults.yaml"]: workflow = config["general"]["defaults.yaml"]["workflow"] phases = config["general"]["defaults.yaml"]["workflow"].get("phases", []) + else: + esm_parser.user_error("ERROR", "No default workflow defined.") + else: + workflow = [] + phases = [] - # 2. Initialize default workflow phases + # 2. Initialize default workflow phases from defaults.yaml if phases: workflow = Workflow(workflow) for phase in phases: @@ -652,17 +653,17 @@ def assemble_workflow(config): # Or should this be changed in defaults.yaml as it is now? # 3. Calc mpi tasks for batch jobs of default phases - # TODO: Put it into other method? + # TODO: Put it into other method??? workflow = workflow.set_default_nproc(config) # 3. Read in phases from runscript and config files workflow = workflow.collect_all_user_phases(config) - # 4. Order user workflows into default workflow wrt. workflow and phase attributs. + # 4. Order user workflows into default workflow wrt. phase attributs. workflow = workflow.order_phases_and_clusters() # 5. create new first phase of type SimulationSetup, if first_task_in_queue is - # user phase (type batch or shell) + # a user phase (type batch or shell) workflow = workflow.prepend_newrun_job() # 6. write the workflow to config @@ -671,6 +672,8 @@ def assemble_workflow(config): # Set "jobtype" for the first task??? + # NOTE: This is either first default phase or + # newrun??? Can't this not be set in prepend_newrun then? if config["general"]["jobtype"] == "unknown": config["general"]["command_line_config"]["jobtype"] = config["general"][ "workflow" @@ -726,6 +729,9 @@ def display_workflow(config): """ display_nicely(config) + display_workflow_sequence(config) + +def display_workflow_sequence(config): first_phase = config["general"]["workflow"]["first_task_in_queue"] subjobs = config["general"]["workflow"]["subjob_clusters"][first_phase]["subjobs"] diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index e39aadac2..e2bc49cfb 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -4,10 +4,55 @@ from esm_runscripts import workflow import pytest +import esm_parser + +@pytest.fixture() +def test_default_phases_dict(): + phases_dict = { + 'compute': { + 'called_from': 'prepcompute', + 'cluster': 'compute', + 'name': 'compute', + 'next_submit': ['tidy'], + 'nproc': 'None', + 'order_in_cluster': 'sequential', + 'run_after': 'prepcompute', + 'run_before': 'tidy', + 'run_on_queue': 'compute', + 'submit_to_batch_system': True}, + 'prepcompute': { + 'batch_or_shell': 'SimulationSetup', + 'called_from': 'tidy', + 'cluster': 'prepcompute', + 'name': 'prepcompute', + 'next_submit': ['compute'], + 'nproc': 1, + 'order_in_cluster': 'sequential', + 'run_after': 'tidy', + 'run_before': 'compute', + 'submit_to_batch_system': False}, + 'tidy': { + 'batch_or_shell': 'SimulationSetup', + 'called_from': 'compute', + 'cluster': 'tidy', + 'name': 'tidy', + 'next_submit': ['prepcompute'], + 'nproc': 1, + 'order_in_cluster': 'sequential', + 'run_after': 'compute', + 'run_before': 'prepcompute', + 'submit_to_batch_system': False} + } + return phases_dict @pytest.fixture() def test_workflow_object(): - test_workflow = workflow.Workflow(["prepcompute","compute","tidy"],always_run_with=["prepare","prepexp"]) + workflow_dict = { + 'first_task_in_queue': 'prepcompute', + 'last_task_in_queue': 'tidy', + 'next_run_triggered_by': 'tidy' + } + test_workflow = workflow.Workflow(workflow_dict) return test_workflow @pytest.fixture() @@ -40,7 +85,7 @@ def test_config(): 'general': { 'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'], 'workflow': { - 'next_run_triggered_by': 'tidy', +# 'next_run_triggered_by': 'tidy', 'phases': { 'my_new_subjob_general': { 'batch_or_shell': 'batch', @@ -53,7 +98,7 @@ def test_config(): 'submit_to_batch_system': True}}}}, 'flow': { 'workflow': { - 'next_run_triggered_by': 'tidy', +# 'next_run_triggered_by': 'tidy', 'phases': { 'my_new_subjob_flow': { 'batch_or_shell': 'batch', @@ -64,30 +109,145 @@ def test_config(): 'run_after': 'tidy', 'script_dir': '/work/ab0995/a270089/myrunscripts/', 'script': 'hallowelt.sh', - 'submit_to_batch_system': True}}}}} + 'submit_to_batch_system': True} + } + } + } + } return config -def test_num_phases(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) +@pytest.fixture() +def test_config_2(): + """Setup a test config dictionary.""" + config = { + 'computer': {'partitions': {'compute': {'name': 'test'}}}, + 'fesom': { + 'nproc': 128}, + 'rnfmap': { + 'nproc': 128}, + 'oasis3mct': { + 'nproc': 128}, + 'xios': { + 'nproc': 128}, + 'general': { + 'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'], + 'jobtype': 'unknown', + 'command_line_config': { + 'jobtype': None + }, + "defaults.yaml": { + 'workflow': { + 'first_task_in_queue': 'prepcompute', + 'last_task_in_queue': 'tidy', + 'next_run_triggered_by': 'tidy', + 'phases': { + 'compute': { + 'called_from': 'prepcompute', + 'cluster': 'compute', + 'name': 'compute', + 'next_submit': ['tidy'], + 'nproc': 'None', + 'order_in_cluster': 'sequential', + 'run_after': 'prepcompute', + 'run_before': 'tidy', + 'run_on_queue': 'compute', + 'submit_to_batch_system': True}, + 'prepcompute': { + 'batch_or_shell': 'SimulationSetup', + 'called_from': 'tidy', + 'cluster': 'prepcompute', + 'name': 'prepcompute', + 'next_submit': ['compute'], + 'nproc': 1, + 'order_in_cluster': 'sequential', + 'run_after': 'tidy', + 'run_before': 'compute', + 'submit_to_batch_system': False}, + 'tidy': { + 'batch_or_shell': 'SimulationSetup', + 'called_from': 'compute', + 'cluster': 'tidy', + 'name': 'tidy', + 'next_submit': ['prepcompute'], + 'nproc': 1, + 'order_in_cluster': 'sequential', + 'run_after': 'compute', + 'run_before': 'prepcompute', + 'submit_to_batch_system': False} + } + } + }, + 'workflow': { + 'my_new_subjob_general': { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True} + } + }, + 'flow': { + 'workflow': { + 'phases': { + 'my_new_subjob_flow': { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + 'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True, + 'trigger_next_run': True} + } + } + }, + 'oifs': { + 'workflow': { + 'phases': { + 'my_new_subjob_oifs': { + 'batch_or_shell': 'batch', + 'nproc': 1, + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + 'run_after': 'tidy', + 'run_on_queue': 'compute', + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'submit_to_batch_system': True} + } + } + }, + } + return config + +def test_num_phases(test_workflow_object, test_default_phases_dict, test_config): + for phase in test_default_phases_dict: + test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) assert test_workflow_object.num_phases == 3 -def test_check_user_workflow_dependency(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) +def test_check_user_workflow_dependency(test_workflow_object, test_default_phases_dict, test_config): + for phase in test_default_phases_dict: + test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) independent = test_workflow_object.check_user_workflow_dependency() assert independent def test_check_user_workflow_dependency_2(test_workflow_object, test_config): test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs' - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) +# test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) independent = test_workflow_object.check_user_workflow_dependency() assert not independent def test_check_unknown_phases(test_workflow_object, test_config): test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob' - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) +# test_workflow_object = test_workflow_object.init_default_workflow(test_config) + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) unknown_phases = test_workflow_object.check_unknown_phases() assert unknown_phases @@ -100,50 +260,129 @@ def test_collect_all_user_workflow(test_config): def test_calc_number_of_tasks(): pytest.fail("something wrong") -def test_order_phases(test_workflow_object, test_config): +def test_order_phases_and_clusters(test_workflow_object, test_default_phases_dict, test_config): + for phase in test_default_phases_dict: + test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general' # test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow' # test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' #test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs' - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_phases() + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) + test_workflow_object = test_workflow_object.order_phases_and_clusters() pytest.fail("something wrong") -def test_complete_clusters(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_phases() - subjob_clusters = test_workflow_object.complete_clusters(test_config) +def test_complete_clusters(test_workflow_object, test_default_phases_dict, test_config): + for phase in test_default_phases_dict: + test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) + test_workflow_object = test_workflow_object.order_phases_and_clusters() pytest.fail("something wrong") -def test_prepend_newrun_job(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_phases() - subjob_clusters = test_workflow_object.complete_clusters(test_config) - [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) +def test_prepend_newrun_job(test_workflow_object, test_default_phases_dict, test_config): + for phase in test_default_phases_dict: + test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) + test_workflow_object = test_workflow_object.order_phases_and_clusters() + test_workflow_object = test_workflow_object.prepend_newrun_job() pytest.fail("something wrong") -def test_write_to_config(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_phases() - subjob_clusters = test_workflow_object.complete_clusters(test_config) - [test_workflow_object, subjob_clusters] = workflow.prepend_newrun_job(test_workflow_object, test_config, subjob_clusters) +def test_write_to_config(test_workflow_object, test_default_phases_dict, test_config): + for phase in test_default_phases_dict: + test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) + test_workflow_object = test_workflow_object.set_default_nproc(test_config) + test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) + test_workflow_object = test_workflow_object.order_phases_and_clusters() + test_workflow_object = test_workflow_object.prepend_newrun_job() config = test_workflow_object.write_to_config(test_config) pytest.fail("something wrong") -def test_write_subjob_clusters_to_config(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.order_phases() - test_workflow_object = test_workflow_object.prepend_newrun_job(test_config) - test_config = test_workflow_object.write_to_config(test_config) - test_workflow_object = test_workflow_object.complete_clusters(test_config) - -def test_prepend_newrun_job(test_workflow_object, test_config): - test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_workflows(test_config) - test_workflow_object = test_workflow_object.prepend_newrun_job(test_config) +# Test scenarios +# 1. Add one single phase at the end of the default workflow (Example 1 in documentation) +def test_example_1(test_config_2): + test_config_2 = workflow.assemble_workflow(test_config_2) + workflow.display_workflow_sequence(test_config_2) +# esm_parser.pprint_config(test_config_2) + pytest.fail("something wrong") + +# Test exceptions +# 1. If still a workflow keyword is set by user. +def test_exception_test_workflow_keyword(test_config_2): + test_config_2['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 2. If an invalid phase keyword is set. +def test_exception_invalid_phase_keyword(test_config_2): + test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['wrong_keyword'] = 'wrong_value' + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 3. If an unknown phase is called for , e.g. in 'run_after' +def test_exception_unknown_phase(test_config_2): + test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'trudy' + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 4. If a user phase has the same name as a default phase. +def test_if_user_phase_has_default_phase_name(test_config_2): + test_config_2['flow']['workflow']['phases']['compute'] = { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + 'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True} + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 5. If two user phases have the same name and are defined in different models/setups. +def test_if_two_user_phase_have_the_same_name(test_config_2): + test_config_2['oifs']['workflow']['phases']['my_new_subjob_flow'] = { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + 'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True} + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 6. If no queue is given for a phase that should be run on sbatch system. +def test_if_queue_is_missing(test_config_2): + test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs'] = { + 'batch_or_shell': 'batch', + 'order_in_cluster': 'concurrent', + 'cluster': 'test_cluster', + #'run_on_queue': 'compute', + 'nproc': 1, + 'run_after': 'tidy', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'script': 'hallowelt.sh', + 'submit_to_batch_system': True} + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 7. If more than one phase trigger_next_run. +def test_if_trigger_next_run_unclear(test_config_2): + test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs']['trigger_next_run'] = True + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 8. If no default phases are defined in defaults.yaml. +def test_if_no_default_phases(test_config_2): + test_config_2['general']['defaults.yaml']['workflow'].pop('phases', None) + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) + +# 9. If no default workflow is defined in defaults.yaml. +def test_inf_no_default_workflow(test_config_2): + test_config_2['general']['defaults.yaml'].pop('workflow', None) + with pytest.raises(SystemExit): + test_config_2 = workflow.assemble_workflow(test_config_2) From f86202ddb20724408eb1f67c203cef51810e85c1 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 28 Nov 2023 16:33:49 +0100 Subject: [PATCH 43/98] Fixed a bug in order_phases_and_clusters --- src/esm_runscripts/workflow.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index d924d4c75..75aab8bf4 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -339,6 +339,10 @@ def order_phases_and_clusters(self): """ Put the phases and clusters in order. + Tasks: + 1. Correct for ``triggered_next_run`` if set by user phase + - next_submit, run_after, called_from, run_before??? + Parameters ---------- @@ -396,10 +400,16 @@ def order_phases_and_clusters(self): # get first default phase and correct run_after, called_from # correct last_task_in_queue of workflow if next_triggered not in self.get_phases_attribs_list("default", "name"): - self.phases[-1]["next_submit"].remove(self.phases[0]["name"]) - self.phases[-1]["next_submit"].append(next_triggered) - self.phases[0]["run_after"] = next_triggered - self.phases[0]["called_from"] = next_triggered + first_task_name = self.first_task_in_queue + first_phase = self.get_workflow_phase_by_name(first_task_name) + last_task_name = self.last_task_in_queue + last_phase = self.get_workflow_phase_by_name(last_task_name) + + last_phase["next_submit"].remove(first_phase["name"]) + last_phase["next_submit"].append(next_triggered) + last_phase["run_before"] = next_triggered + first_phase["run_after"] = next_triggered + first_phase["called_from"] = next_triggered self.last_task_in_queue = next_triggered # Set "next_submit" and "called_from" @@ -731,7 +741,7 @@ def display_workflow(config): display_nicely(config) display_workflow_sequence(config) -def display_workflow_sequence(config): +def display_workflow_sequence(config, display=True): first_phase = config["general"]["workflow"]["first_task_in_queue"] subjobs = config["general"]["workflow"]["subjob_clusters"][first_phase]["subjobs"] @@ -768,8 +778,11 @@ def display_workflow_sequence(config): sec_phase_str = f"{sec_phase_str} and ``{sec_phase}`` {subjobs}" workflow_order = f"{workflow_order} -> {sec_phase_str}" - esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}") - return config + if display: + esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}") + else: + workflow_order = workflow_order.replace("``", "") + return workflow_order def display_nicely(config): From 59590ed1dcd4444945b4ba67fb922c2dda8fc773 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 28 Nov 2023 16:34:50 +0100 Subject: [PATCH 44/98] Added workflow tests for test examples (in documentation). --- tests/test_esm_runscripts/test_workflow.py | 148 ++++++++++++++++++++- 1 file changed, 142 insertions(+), 6 deletions(-) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index e2bc49cfb..742612325 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -173,7 +173,8 @@ def test_config_2(): 'order_in_cluster': 'sequential', 'run_after': 'compute', 'run_before': 'prepcompute', - 'submit_to_batch_system': False} + 'submit_to_batch_system': False, + 'trigger_next_run': True} } } }, @@ -225,6 +226,74 @@ def test_config_2(): } return config +@pytest.fixture() +def test_default_config_example(): + """Setup a test config dictionary.""" + config = { + 'computer': {'partitions': {'compute': {'name': 'test'}}}, + 'fesom': { + 'nproc': 128}, + 'oifs': { + 'nproc': 128}, + 'rnfmap': { + 'nproc': 128}, + 'oasis3mct': { + 'nproc': 128}, + 'xios': { + 'nproc': 128}, + 'general': { + 'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'], + 'jobtype': 'unknown', + 'command_line_config': { + 'jobtype': None + }, + "defaults.yaml": { + 'workflow': { + 'first_task_in_queue': 'prepcompute', + 'last_task_in_queue': 'tidy', + 'next_run_triggered_by': 'tidy', + 'phases': { + 'compute': { + 'called_from': 'prepcompute', + 'cluster': 'compute', + 'name': 'compute', + 'next_submit': ['tidy'], + 'nproc': 'None', + 'order_in_cluster': 'sequential', + 'run_after': 'prepcompute', + 'run_before': 'tidy', + 'run_on_queue': 'compute', + 'submit_to_batch_system': True}, + 'prepcompute': { + 'batch_or_shell': 'SimulationSetup', + 'called_from': 'tidy', + 'cluster': 'prepcompute', + 'name': 'prepcompute', + 'next_submit': ['compute'], + 'nproc': 1, + 'order_in_cluster': 'sequential', + 'run_after': 'tidy', + 'run_before': 'compute', + 'submit_to_batch_system': False}, + 'tidy': { + 'batch_or_shell': 'SimulationSetup', + 'called_from': 'compute', + 'cluster': 'tidy', + 'name': 'tidy', + 'next_submit': ['prepcompute'], + 'nproc': 1, + 'order_in_cluster': 'sequential', + 'run_after': 'compute', + 'run_before': 'prepcompute', + 'submit_to_batch_system': False, + 'trigger_next_run': True} + } + } + } + } + } + return config + def test_num_phases(test_workflow_object, test_default_phases_dict, test_config): for phase in test_default_phases_dict: test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) @@ -297,13 +366,80 @@ def test_write_to_config(test_workflow_object, test_default_phases_dict, test_co pytest.fail("something wrong") # Test scenarios +# 0. Default workflow +def test_example_0(test_default_config_example): + test_default_config_example = workflow.assemble_workflow(test_default_config_example) + order = workflow.display_workflow_sequence(test_default_config_example, display=False) + assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> prepcompute ['prepcompute']" + assert order == assumption + # 1. Add one single phase at the end of the default workflow (Example 1 in documentation) -def test_example_1(test_config_2): - test_config_2 = workflow.assemble_workflow(test_config_2) - workflow.display_workflow_sequence(test_config_2) -# esm_parser.pprint_config(test_config_2) +def test_example_1(test_default_config_example): + test_default_config_example["general"]["workflow"] = { + 'phases': { + 'my_postprocessing': { + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/'} + } + } + assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> prepcompute ['prepcompute'] and my_postprocessing ['my_postprocessing']" + test_default_config_example = workflow.assemble_workflow(test_default_config_example) + order = workflow.display_workflow_sequence(test_default_config_example, display=False) + assert order == assumption + +# 2. Prepend new phase at the beginning of workflow +def test_example_2(test_default_config_example): + test_default_config_example["general"]["workflow"] = { + 'phases': { + 'my_preprocessing': { + 'run_before': 'prepcompute', + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/'} + } + } + assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> prepcompute ['prepcompute']" + test_default_config_example = workflow.assemble_workflow(test_default_config_example) + order = workflow.display_workflow_sequence(test_default_config_example, display=False) + assert order == assumption + +# 3. Append new phase at the beginning of workflow +def test_example_3(test_default_config_example): + test_default_config_example["general"]["workflow"] = { + 'phases': { + 'my_new_last_phase': { + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'trigger_next_run': True} + } + } + assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> my_new_last_phase ['my_new_last_phase'] -> prepcompute ['prepcompute']" + test_default_config_example = workflow.assemble_workflow(test_default_config_example) + order = workflow.display_workflow_sequence(test_default_config_example, display=False) + assert order == assumption + +# 4. Append two new phases in the same cluster +def test_example_4(test_default_config_example): + test_default_config_example["general"]["workflow"] = { + 'phases': { + 'my_new_last_phase': { + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'submit_to_batch_system': True, + 'run_on_queue': 'compute', + 'cluster': 'my_own_new_cluster'}, + 'my_second_new_phase': { + 'script': 'halloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'submit_to_batch_system': True, + 'run_on_queue': 'compute', + 'cluster': 'my_own_new_cluster'} + } + } + assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> prepcompute ['prepcompute'] and my_own_new_cluster ['my_new_last_phase', 'my_second_new_phase']" + test_default_config_example = workflow.assemble_workflow(test_default_config_example) + order = workflow.display_workflow_sequence(test_default_config_example, display=False) + assert order == assumption - pytest.fail("something wrong") # Test exceptions # 1. If still a workflow keyword is set by user. From ff7145bf2f2b2c356fdf9cb1d06693263ab97b29 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 29 Nov 2023 14:00:59 +0100 Subject: [PATCH 45/98] Fixed test example_2, add preprocessing phase with adding newrun. --- src/esm_runscripts/workflow.py | 38 +++++++++++++++------- tests/test_esm_runscripts/test_workflow.py | 3 +- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 75aab8bf4..1f1912d8a 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -443,7 +443,17 @@ def order_phases_and_clusters(self): if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]: next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"]) phase2["called_from"] = phase2["run_after"] - + else: + if phase2["run_before"] is not None: + if phase2["run_before"] == self.first_task_in_queue: + next_submits[phase2["name"]].append(self.first_task_in_queue) + next_submits[self.last_task_in_queue].append(phase2["cluster"]) + next_submits[self.last_task_in_queue].remove(self.first_task_in_queue) + phase2["run_after"] = self.last_task_in_queue + last_phase = self.get_workflow_phase_by_name(self.last_task_in_queue) + last_phase["run_before"] = phase2["name"] + last_phase["next_submit"].append(phase2["name"]) + self.first_task_in_queue = phase2["name"] for phase3 in self.phases + self.user_phases: phase3.set_attrib("next_submit", next_submits[phase3["name"]]) # phase3["next_submit"] = next_submits[phase3["name"]] @@ -493,18 +503,24 @@ def prepend_newrun_job(self): new_first_phase_name = "newrun_general" # Create new default phase object - new_first_phase = WorkflowPhase(new_first_phase_name) - new_first_phase.set_attrib("next_submit", first_phase["cluster"]) - new_first_phase.set_attrib("called_from", last_phase["cluster"]) - new_first_phase.set_attrib("run_before", first_phase["cluster"]) - new_first_phase.set_attrib("next_submit", first_phase["cluster"]) - new_first_phase.set_attrib("cluster", "newrun") - new_first_phase.set_attrib("batch_or_shell", "SimulationSetup") - new_first_phase.set_attrib("nproc", 1) + config_new_first_phase = { + "name": "newrun", + "next_submit": [first_phase["cluster"]], + "called_from": last_phase["cluster"], + "run_before": first_phase["cluster"], + "run_after": last_phase["cluster"], + "cluster": "newrun", + "batch_or_shell": "SimulationSetup", + "nproc": 1 + } + new_first_phase = WorkflowPhase(config_new_first_phase) # reset last_task attributes - last_phase.set_attrib("next_submit", "newrun") - last_phase.remove_attrib("next_submit", first_phase["cluster"]) + last_phase["next_submit"].append("newrun") + last_phase["next_submit"].remove(first_phase["cluster"]) + # why does the next line not work??? + #last_phase.set_attrib("next_submit", "newrun") + #last_phase.remove_attrib("next_submit", first_phase["cluster"]) # reset first_task attributes first_phase.set_attrib("called_from", "newrun") diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 742612325..a3eee06c1 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -397,7 +397,7 @@ def test_example_2(test_default_config_example): 'script_dir': '/work/ab0995/a270089/myrunscripts/'} } } - assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> prepcompute ['prepcompute']" + assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> newrun ['newrun']" test_default_config_example = workflow.assemble_workflow(test_default_config_example) order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption @@ -440,6 +440,7 @@ def test_example_4(test_default_config_example): order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption +# 5. Append two new phases in the same cluster, one of them triggers the next run # Test exceptions # 1. If still a workflow keyword is set by user. From c784380d8ef7c83b8d10d9aa43b1b8929c06a53a Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 29 Nov 2023 14:54:08 +0100 Subject: [PATCH 46/98] Added test for workflow example 5. --- tests/test_esm_runscripts/test_workflow.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index a3eee06c1..883aac4b6 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -441,6 +441,28 @@ def test_example_4(test_default_config_example): assert order == assumption # 5. Append two new phases in the same cluster, one of them triggers the next run +def test_example_5(test_default_config_example): + test_default_config_example["general"]["workflow"] = { + 'phases': { + 'my_new_last_phase': { + 'script': 'helloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'submit_to_batch_system': True, + 'run_on_queue': 'compute', + 'cluster': 'my_own_new_cluster', + 'trigger_next_run': True}, + 'my_second_new_phase': { + 'script': 'halloworld.sh', + 'script_dir': '/work/ab0995/a270089/myrunscripts/', + 'submit_to_batch_system': True, + 'run_on_queue': 'compute', + 'cluster': 'my_own_new_cluster'} + } + } + assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> my_own_new_cluster ['my_new_last_phase', 'my_second_new_phase'] -> prepcompute ['prepcompute']" + test_default_config_example = workflow.assemble_workflow(test_default_config_example) + order = workflow.display_workflow_sequence(test_default_config_example, display=False) + assert order == assumption # Test exceptions # 1. If still a workflow keyword is set by user. From 6aa7684bd21b028700e494567f0f6a64e1e750b3 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 30 Nov 2023 16:59:35 +0100 Subject: [PATCH 47/98] Added function to cluster phases after collecting them. --- src/esm_runscripts/workflow.py | 175 ++++++++++++++------- tests/test_esm_runscripts/test_workflow.py | 43 ++++- 2 files changed, 163 insertions(+), 55 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 1f1912d8a..5fa8b2f27 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -25,6 +25,7 @@ def __init__(self, workflow_yaml): # TODO: check if key is in workflow_yaml dict self.phases = [] self.user_phases = [] + self.clusters = {} self.first_task_in_queue = workflow_yaml["first_task_in_queue"] self.last_task_in_queue = workflow_yaml["last_task_in_queue"] self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"] @@ -179,15 +180,14 @@ def collect_all_user_phases(self, config): f"``{phase}``." ) esm_parser.user_error("ERROR", err_msg) - # 3. if user phase has a new and unique name + # 3. if user phase (for each setup/model) has a new and unique name else: phase_config = copy.deepcopy(w_config["phases"][phase]) # add phase name phase_config["name"] = phase # Make sure that batch_or_shell is set to batch if submit_to_batch is true - # TODO: remove/replace batch_or_shell by submit_to_batch_system? Is needed - # for setting it to SimulationSetup and in other functions (resubmit, etc.) # Should not be set by user. TODO: Remove from documentation. + # Check if run_on_queue is given if sbatch job if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" if not phase_config.get("run_on_queue", False): @@ -202,6 +202,7 @@ def collect_all_user_phases(self, config): user_workflow_phases_names.append(phase) if phase_config.get("trigger_next_run", False): user_workflow_next_run_triggered_by.append(phase) + # check if more than one user phase is set to trigger the next run if len(user_workflow_next_run_triggered_by) > 1: err_msg = ( f"More than one phase is set to " @@ -215,6 +216,65 @@ def collect_all_user_phases(self, config): self.user_phases = user_workflow_phases return self + def cluster_phases(self): + """Merge phases into clusters.""" + + clusters = {} + for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): + clusters[cluster] = {"phases": []} + for phase in self.phases + self.user_phases: + clusters[phase["cluster"]]["phases"].append(phase["name"]) + + for cluster in clusters: + nproc = nproc_sum = nproc_max = 0 + if len(clusters[cluster]["phases"]) == 1: + phase_name = clusters[cluster]["phases"][0] + phase = self.get_workflow_phase_by_name(phase_name) + clusters[cluster].update(phase) + else: + clusters[cluster].update(WorkflowPhase({})) + phases_list = [] + for phase_name in clusters[cluster]["phases"]: + phases_list.append(self.get_workflow_phase_by_name(phase_name)) + + # check for inconsistencies + attribs = {} + for attrib in WorkflowPhase({}): + attribs[attrib] = [] + [attribs[attrib].append(item) for item in [phase[attrib] for phase in phases_list] if item not in attribs[attrib]] + if len(attribs[attrib]) == 1: + clusters[cluster][attrib] = attribs[attrib][0] + else: + if type(clusters[cluster][attrib]) is list: + clusters[cluster][attrib] = attribs[attrib] + else: + if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc"]: + err_msg = ( + f"Mismatch for {attrib}") + esm_parser.user_error("ERROR", err_msg) + elif attrib == "name": + clusters[cluster]["name"] = cluster + else: + clusters[cluster][attrib] = "check phase" + + # calculate nproc if cluster is to be submitted to sbatch system + for phase in phases_list: + nproc_sum += phase["nproc"] + nproc_max = max(phase["nproc"], nproc_max) + + if clusters[cluster].get("submit_to_batch_system", False): + if phase["order_in_cluster"] == "concurrent": + if clusters[cluster]["order_in_cluster"] is None: + clusters[cluster]["order_in_cluster"] = "concurrent" + nproc = nproc_sum + else: + clusters[cluster]["order_in_cluster"] = "sequential" + nproc = nproc_max + clusters[cluster]["nproc"] = nproc + + self.clusters = clusters + return self + def write_to_config(self, config): """ Write to config. @@ -246,22 +306,6 @@ def write_to_config(self, config): if self.first_task_in_queue not in config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"]: config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"].append(self.first_task_in_queue) - for subjob_cluster in config["general"]["workflow"]["subjob_clusters"]: - nproc_sum = nproc_max = 0 - for subjob in config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["subjobs"]: - nproc_sum += get_phase_attrib(self.phases + self.user_phases, subjob, "nproc") - nproc_max = max(get_phase_attrib(self.phases + self.user_phases, subjob, "nproc"), nproc_max) - if config["general"]["workflow"]["subjob_clusters"][subjob_cluster].get("submit_to_batch_system", False): -# Why setting batch_or_shell to shell if a script is given? Wouldn't now all phases be executed as shell and never as batch? -# # elif subjob_clusters[subjob_cluster].get("script", False): -# # subjob_clusters[subjob_cluster]["batch_or_shell"] = "shell" -# # - if config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["order_in_cluster"] == "concurrent": - nproc = nproc_sum - else: - nproc = nproc_max - config["general"]["workflow"]["subjob_clusters"][subjob_cluster]["nproc"] = nproc - # 2. Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} for phase in self.phases + self.user_phases: @@ -351,10 +395,13 @@ def order_phases_and_clusters(self): self : Workflow object """ + # check if user phases are independent from each other - # TODO: What if not independent??? - # do not run in parallel in same cluster??? - independent = self.check_user_workflow_dependency() + # independent = self.check_user_workflow_dependency() + +# 1. Check for exceptions +# - Unknown phase set by user +# - Missing keywords in user phases # check if there are unknown phases, if yes, will give error exception unknown_phases = self.check_unknown_phases() @@ -389,64 +436,72 @@ def order_phases_and_clusters(self): # ) # esm_parser.user_error("ERROR", err_msg) - # Correct workflow attributes (``last_task_in_queue``, `next_run_triggered``) - # if necessary +# 3. Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``) - next_triggered = self.next_run_triggered_by + # next_run_triggered_by is always the last phase # check if next_triggered is default or user phase # if user phase - # get last default phase and correct next_submit + # get last default phase and correct next_submit and run_before # get first default phase and correct run_after, called_from # correct last_task_in_queue of workflow + + next_triggered = self.next_run_triggered_by + triggered_next_run_phase = self.get_workflow_phase_by_name(next_triggered) if next_triggered not in self.get_phases_attribs_list("default", "name"): first_task_name = self.first_task_in_queue first_phase = self.get_workflow_phase_by_name(first_task_name) - last_task_name = self.last_task_in_queue - last_phase = self.get_workflow_phase_by_name(last_task_name) + old_last_task_name = self.last_task_in_queue + old_last_phase = self.get_workflow_phase_by_name(old_last_task_name) + + old_last_phase["next_submit"].remove(first_phase["name"]) + old_last_phase["next_submit"].append(next_triggered) + old_last_phase["run_before"] = next_triggered + old_last_phase["trigger_next_run"] = False + if triggered_next_run_phase["cluster"] not in self.clusters[old_last_phase["cluster"]]["next_submit"]: + self.clusters[old_last_phase["cluster"]]["next_submit"].append(triggered_next_run_phase["cluster"]) + self.clusters[old_last_phase["cluster"]]["run_before"] = triggered_next_run_phase["cluster"] + self.clusters[old_last_phase["cluster"]]["trigger_next_run"] = False - last_phase["next_submit"].remove(first_phase["name"]) - last_phase["next_submit"].append(next_triggered) - last_phase["run_before"] = next_triggered first_phase["run_after"] = next_triggered first_phase["called_from"] = next_triggered + self.clusters[first_phase["cluster"]]["run_after"] = triggered_next_run_phase["cluster"] + self.clusters[first_phase["cluster"]]["called_from"] = triggered_next_run_phase["cluster"] + + self.clusters[triggered_next_run_phase["cluster"]]["next_submit"].append(first_phase["cluster"]) + self.clusters[triggered_next_run_phase["cluster"]]["run_before"] = first_phase["cluster"] + self.clusters[triggered_next_run_phase["cluster"]]["run_after"] = old_last_phase["cluster"] + self.last_task_in_queue = next_triggered + +# 4. Intergrate new user phases by correcting next_submit, called_from, run_after, run_before + # Set "next_submit" and "called_from" # "next_submit" which phase/cluster will be called next (run_after of the next phase) # "called_from" name of previous phase, run_after of current phase - # Create a dict of all phases with empty lists - - # Create a cluster dict: - clusters = {} - - for phase4 in self.phases + self.user_phases: - # if a cluster is not set for a phase set it to the phase name, - # so that every phase belongs to a cluster - # default cluster has the same name as the phase itself - if phase4["cluster"] is None: - phase4["cluster"] = phase4["name"] - clusters[phase4["cluster"]] = {"name": phase4["cluster"]} - + # Create a dict of all phases with empty lists next_submits = {} for phase in self.phases + self.user_phases: next_submits[phase["name"]] = [] next_submits[phase["cluster"]] = [] - # set next_submits to the cluster name rather then to the phase name + for phase2 in self.phases + self.user_phases: - if phase2["run_after"] is not None: + if phase2.get("run_after", None): if phase2["cluster"] not in next_submits[phase2["run_after"]]: - if phase2["cluster"] not in next_submits[phase2["run_after"]]: - next_submits[phase2["run_after"]].append(phase2["cluster"]) - if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]: - next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"]) + next_submits[phase2["run_after"]].append(phase2["cluster"]) + if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]: + next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"]) phase2["called_from"] = phase2["run_after"] else: - if phase2["run_before"] is not None: + # if only run_before is set, e.g. to add a phase at the beginning of a run + if phase2.get("run_before", None): if phase2["run_before"] == self.first_task_in_queue: next_submits[phase2["name"]].append(self.first_task_in_queue) + if self.first_task_in_queue not in next_submits[phase2["cluster"]]: + next_submits[phase2["cluster"]].append(self.first_task_in_queue) next_submits[self.last_task_in_queue].append(phase2["cluster"]) next_submits[self.last_task_in_queue].remove(self.first_task_in_queue) phase2["run_after"] = self.last_task_in_queue @@ -454,10 +509,15 @@ def order_phases_and_clusters(self): last_phase["run_before"] = phase2["name"] last_phase["next_submit"].append(phase2["name"]) self.first_task_in_queue = phase2["name"] + + breakpoint() + for phase3 in self.phases + self.user_phases: phase3.set_attrib("next_submit", next_submits[phase3["name"]]) # phase3["next_submit"] = next_submits[phase3["name"]] +# 5. Correct first and last new phases of whole workflow + first_cluster_name = self.first_task_in_queue first_phase = self.get_workflow_phase_by_name(first_cluster_name) last_cluster_name = self.last_task_in_queue @@ -479,8 +539,6 @@ def prepend_newrun_job(self): """ - Creates a new cluster "newrun" if first_task_in_queue is not of type 'SimulationSetup' - - Why is this needed? So that every first task is a SimulationSetup to init - a config object??? Looks for subjob_cluster that are set by user workflow (not a 'SimulationSetup') and are not of type 'SimulationSetup'. @@ -601,7 +659,7 @@ def __init__(self, phase): self["next_submit"] = [] # needed self["called_from"] = None # needed self["batch_or_shell"] = "SimulationSetup" # needed - self["order_in_cluster"] = "sequential" # needed ??? + self["order_in_cluster"] = None # needed ??? self["run_only"] = None self["skip_chunk_number"] = None self["skip_run_number"] = None @@ -620,9 +678,14 @@ def __init__(self, phase): super().__init__(phase) + # make sure batch_or_shell is batch for sbatch jobs if self.get("submit_to_batch_system", False): self["batch_or_shell"] = "batch" + # set cluster to phase name, if not given + if self.get("cluster", None) is None: + self["cluster"] = self["name"] + def set_attrib(self, attrib, value): if type(self[attrib]) == "list": self[attrib].append(value) @@ -685,8 +748,12 @@ def assemble_workflow(config): # 3. Read in phases from runscript and config files workflow = workflow.collect_all_user_phases(config) + # 4. Cluster phases + workflow = workflow.cluster_phases() + # 4. Order user workflows into default workflow wrt. phase attributs. workflow = workflow.order_phases_and_clusters() + breakpoint() # 5. create new first phase of type SimulationSetup, if first_task_in_queue is # a user phase (type batch or shell) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 883aac4b6..6d9d49343 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -367,6 +367,7 @@ def test_write_to_config(test_workflow_object, test_default_phases_dict, test_co # Test scenarios # 0. Default workflow +@pytest.mark.example def test_example_0(test_default_config_example): test_default_config_example = workflow.assemble_workflow(test_default_config_example) order = workflow.display_workflow_sequence(test_default_config_example, display=False) @@ -374,6 +375,7 @@ def test_example_0(test_default_config_example): assert order == assumption # 1. Add one single phase at the end of the default workflow (Example 1 in documentation) +@pytest.mark.example def test_example_1(test_default_config_example): test_default_config_example["general"]["workflow"] = { 'phases': { @@ -388,6 +390,7 @@ def test_example_1(test_default_config_example): assert order == assumption # 2. Prepend new phase at the beginning of workflow +@pytest.mark.example def test_example_2(test_default_config_example): test_default_config_example["general"]["workflow"] = { 'phases': { @@ -400,9 +403,13 @@ def test_example_2(test_default_config_example): assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> newrun ['newrun']" test_default_config_example = workflow.assemble_workflow(test_default_config_example) order = workflow.display_workflow_sequence(test_default_config_example, display=False) - assert order == assumption + print(assumption) + print(order) + pytest.fail("something wrong") + #assert order == assumption # 3. Append new phase at the beginning of workflow +@pytest.mark.example def test_example_3(test_default_config_example): test_default_config_example["general"]["workflow"] = { 'phases': { @@ -418,6 +425,7 @@ def test_example_3(test_default_config_example): assert order == assumption # 4. Append two new phases in the same cluster +@pytest.mark.example def test_example_4(test_default_config_example): test_default_config_example["general"]["workflow"] = { 'phases': { @@ -426,12 +434,14 @@ def test_example_4(test_default_config_example): 'script_dir': '/work/ab0995/a270089/myrunscripts/', 'submit_to_batch_system': True, 'run_on_queue': 'compute', + 'order_in_cluster': 'concurrent', 'cluster': 'my_own_new_cluster'}, 'my_second_new_phase': { 'script': 'halloworld.sh', 'script_dir': '/work/ab0995/a270089/myrunscripts/', 'submit_to_batch_system': True, 'run_on_queue': 'compute', + 'order_in_cluster': 'concurrent', 'cluster': 'my_own_new_cluster'} } } @@ -441,6 +451,7 @@ def test_example_4(test_default_config_example): assert order == assumption # 5. Append two new phases in the same cluster, one of them triggers the next run +@pytest.mark.example def test_example_5(test_default_config_example): test_default_config_example["general"]["workflow"] = { 'phases': { @@ -449,6 +460,7 @@ def test_example_5(test_default_config_example): 'script_dir': '/work/ab0995/a270089/myrunscripts/', 'submit_to_batch_system': True, 'run_on_queue': 'compute', + 'order_in_cluster': 'concurrent', 'cluster': 'my_own_new_cluster', 'trigger_next_run': True}, 'my_second_new_phase': { @@ -456,6 +468,7 @@ def test_example_5(test_default_config_example): 'script_dir': '/work/ab0995/a270089/myrunscripts/', 'submit_to_batch_system': True, 'run_on_queue': 'compute', + 'order_in_cluster': 'concurrent', 'cluster': 'my_own_new_cluster'} } } @@ -464,6 +477,34 @@ def test_example_5(test_default_config_example): order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption +# 6. Append two new phases in the same cluster at the beginning of run +#@pytest.mark.example +#def test_example_6(test_default_config_example): +# test_default_config_example["general"]["workflow"] = { +# 'phases': { +# 'my_new_last_phase': { +# 'script': 'helloworld.sh', +# 'run_before': 'prepcompute', +# 'script_dir': '/work/ab0995/a270089/myrunscripts/', +# 'submit_to_batch_system': True, +# 'run_on_queue': 'compute', +# 'cluster': 'my_own_new_cluster'}, +# 'my_second_new_phase': { +# 'script': 'halloworld.sh', +# 'script_dir': '/work/ab0995/a270089/myrunscripts/', +# 'run_before': 'prepcompute', +# 'submit_to_batch_system': True, +# 'run_on_queue': 'compute', +# 'cluster': 'my_own_new_cluster'} +# } +# } +# assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> my_own_new_cluster ['my_new_last_phase', 'my_second_new_phase'] -> prepcompute ['prepcompute']" +# test_default_config_example = workflow.assemble_workflow(test_default_config_example) +# order = workflow.display_workflow_sequence(test_default_config_example, display=False) +# assert order == assumption + + + # Test exceptions # 1. If still a workflow keyword is set by user. def test_exception_test_workflow_keyword(test_config_2): From 58e3729dc1e405fa653b7cc06fa098248a2921f0 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 1 Dec 2023 13:36:25 +0100 Subject: [PATCH 48/98] Changes to pass all example tests. --- src/esm_runscripts/workflow.py | 130 +++++++++++---------- tests/test_esm_runscripts/test_workflow.py | 5 +- 2 files changed, 70 insertions(+), 65 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 5fa8b2f27..0f34388df 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -214,6 +214,30 @@ def collect_all_user_phases(self, config): self.next_run_triggered_by = user_workflow_next_run_triggered_by[0] self.user_phases = user_workflow_phases + + # check if there are unknown phases, if yes, will give error exception + unknown_phases = self.check_unknown_phases() + if unknown_phases: + unknowns = ', '.join(unknown_phases) + err_msg = ( + f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` " + f"or ``run_after``." + ) + esm_parser.user_error("ERROR", err_msg) + + # check if run_after or run_before is set for each user phase + # if not, run_after will be set to last default phase + for user_phase in self.user_phases: + if not user_phase["run_before"] and not user_phase["run_after"]: + user_phase["run_after"] = self.phases[-1]["name"] + err_msg = ( + f"No value given for ``run_after`` or ``run_before`` " + f"of user phase ``{user_phase['name']}``. " + f"Set it to last default phase in workflow: " + f"``{self.phases[-1]['name']}``." + ) + esm_parser.user_note("NOTE", err_msg) + return self def cluster_phases(self): @@ -248,12 +272,14 @@ def cluster_phases(self): if type(clusters[cluster][attrib]) is list: clusters[cluster][attrib] = attribs[attrib] else: - if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc"]: + if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]: err_msg = ( f"Mismatch for {attrib}") esm_parser.user_error("ERROR", err_msg) elif attrib == "name": clusters[cluster]["name"] = cluster + elif attrib == "trigger_next_run": + clusters[cluster][attrib] = any(attribs[attrib]) else: clusters[cluster][attrib] = "check phase" @@ -286,25 +312,16 @@ def write_to_config(self, config): config["general"]["workflow"] = {} config["general"]["workflow"].update(self.__dict__) + # 3. Write clusters config["general"]["workflow"]["subjob_clusters"] = {} - for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): + for cluster in self.clusters: config["general"]["workflow"]["subjob_clusters"][cluster] = {} config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"] = [] - for phase in self.phases + self.user_phases: - if phase["cluster"] == cluster: - # TODO: Are there more attributes to be merged from the different phases within a cluster??? - config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase["name"]) - for att in phase: - config["general"]["workflow"]["subjob_clusters"][cluster][att] = phase[att] - config["general"]["workflow"]["subjob_clusters"][cluster]["name"] = cluster - # if a phase in a cluster triggers the next run, set next_submit in cluster conf - for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): - for phase in self.phases + self.user_phases: - if phase["cluster"] == cluster: - if phase["name"] in self.next_run_triggered_by: - if self.first_task_in_queue not in config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"]: - config["general"]["workflow"]["subjob_clusters"][cluster]["next_submit"].append(self.first_task_in_queue) + for phase_name in self.clusters[cluster]["phases"]: + config["general"]["workflow"]["subjob_clusters"][cluster]["subjobs"].append(phase_name) + for att in self.clusters[cluster]: + config["general"]["workflow"]["subjob_clusters"][cluster][att] = self.clusters[cluster][att] # 2. Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} @@ -399,33 +416,6 @@ def order_phases_and_clusters(self): # check if user phases are independent from each other # independent = self.check_user_workflow_dependency() -# 1. Check for exceptions -# - Unknown phase set by user -# - Missing keywords in user phases - - # check if there are unknown phases, if yes, will give error exception - unknown_phases = self.check_unknown_phases() - if unknown_phases: - unknowns = ', '.join(unknown_phases) - err_msg = ( - f"Unknown phase(s) ``{unknowns}`` defined as ``run_before`` " - f"or ``run_after``." - ) - esm_parser.user_error("ERROR", err_msg) - - # check if run_after or run_before is set for each user phase - # if not, run_after will be set to last default phase - for user_phase in self.user_phases: - if not user_phase["run_before"] and not user_phase["run_after"]: - user_phase["run_after"] = self.phases[-1]["name"] - err_msg = ( - f"No value given for ``run_after`` or ``run_before`` " - f"of user phase ``{user_phase['name']}``. " - f"Set it to last default phase in workflow: " - f"``{self.phases[-1]['name']}``." - ) - esm_parser.user_note("NOTE", err_msg) - # Check if not both run_after and run_before are set at the same # time for each user phase # if user_phase['run_before'] and user_phase['run_after']: @@ -482,39 +472,52 @@ def order_phases_and_clusters(self): # "called_from" name of previous phase, run_after of current phase # Create a dict of all phases with empty lists - next_submits = {} + next_submits_phases = {} + next_submits_clusters = {} for phase in self.phases + self.user_phases: - next_submits[phase["name"]] = [] - next_submits[phase["cluster"]] = [] - + next_submits_phases[phase["name"]] = [] + next_submits_clusters[phase["cluster"]] = [] +# for cluster in self.clusters: for phase2 in self.phases + self.user_phases: if phase2.get("run_after", None): - if phase2["cluster"] not in next_submits[phase2["run_after"]]: - next_submits[phase2["run_after"]].append(phase2["cluster"]) - if phase2["cluster"] not in next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")]: - next_submits[get_phase_attrib(self.phases + self.user_phases, phase2["run_after"], "cluster")].append(phase2["cluster"]) + if phase2["name"] not in next_submits_phases[phase2["run_after"]]: + next_submits_phases[phase2["run_after"]].append(phase2["name"]) phase2["called_from"] = phase2["run_after"] + if self.clusters[phase2["cluster"]].get("run_after", None): + if phase2["cluster"] not in next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]]: + next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]].append(phase2["cluster"]) + self.clusters[phase2["cluster"]]["called_from"] = self.clusters[phase2["cluster"]]["run_after"] else: # if only run_before is set, e.g. to add a phase at the beginning of a run if phase2.get("run_before", None): if phase2["run_before"] == self.first_task_in_queue: - next_submits[phase2["name"]].append(self.first_task_in_queue) - if self.first_task_in_queue not in next_submits[phase2["cluster"]]: - next_submits[phase2["cluster"]].append(self.first_task_in_queue) - next_submits[self.last_task_in_queue].append(phase2["cluster"]) - next_submits[self.last_task_in_queue].remove(self.first_task_in_queue) - phase2["run_after"] = self.last_task_in_queue + old_first_phase = self.get_workflow_phase_by_name(self.first_task_in_queue) last_phase = self.get_workflow_phase_by_name(self.last_task_in_queue) + next_submits_phases[phase2["name"]].append(self.first_task_in_queue) + if self.first_task_in_queue not in next_submits_clusters[phase2["cluster"]]: + next_submits_clusters[phase2["cluster"]].append(self.first_task_in_queue) + next_submits_clusters[self.last_task_in_queue].append(phase2["cluster"]) + next_submits_phases[self.last_task_in_queue].append(phase2["name"]) + next_submits_phases[self.last_task_in_queue].remove(self.first_task_in_queue) + next_submits_clusters[last_phase["cluster"]].remove(old_first_phase["cluster"]) + phase2["run_after"] = self.last_task_in_queue last_phase["run_before"] = phase2["name"] + self.clusters[last_phase["cluster"]]["run_before"] = phase2["name"] + self.clusters[old_first_phase["cluster"]]["run_after"] = phase2["name"] + self.clusters[old_first_phase["cluster"]]["called_from"] = phase2["name"] + self.clusters[phase2["cluster"]]["called_from"] = last_phase["cluster"] + self.clusters[phase2["cluster"]]["run_after"] = last_phase["cluster"] last_phase["next_submit"].append(phase2["name"]) self.first_task_in_queue = phase2["name"] - breakpoint() + for cluster in self.clusters: + if next_submits_clusters[cluster]: + self.clusters[cluster]["next_submit"] = next_submits_clusters[cluster] for phase3 in self.phases + self.user_phases: - phase3.set_attrib("next_submit", next_submits[phase3["name"]]) -# phase3["next_submit"] = next_submits[phase3["name"]] + if next_submits_phases[phase3["name"]]: + phase3.set_attrib("next_submit", next_submits_phases[phase3["name"]]) # 5. Correct first and last new phases of whole workflow @@ -575,6 +578,10 @@ def prepend_newrun_job(self): # reset last_task attributes last_phase["next_submit"].append("newrun") + self.clusters[last_phase["cluster"]]["next_submit"] = ["newrun"] + self.clusters[last_phase["cluster"]]["run_before"] = "newrun" + self.clusters[new_first_phase["cluster"]] = new_first_phase + self.clusters[new_first_phase["cluster"]]["phases"] = ["newrun"] last_phase["next_submit"].remove(first_phase["cluster"]) # why does the next line not work??? #last_phase.set_attrib("next_submit", "newrun") @@ -583,6 +590,8 @@ def prepend_newrun_job(self): # reset first_task attributes first_phase.set_attrib("called_from", "newrun") first_phase.set_attrib("run_after", "newrun") + self.clusters[first_phase["cluster"]]["called_from"] = "newrun" + self.clusters[first_phase["cluster"]]["run_after"] = "newrun" # reset workflow attributes self.first_task_in_queue = "newrun" @@ -753,7 +762,6 @@ def assemble_workflow(config): # 4. Order user workflows into default workflow wrt. phase attributs. workflow = workflow.order_phases_and_clusters() - breakpoint() # 5. create new first phase of type SimulationSetup, if first_task_in_queue is # a user phase (type batch or shell) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 6d9d49343..93d3c84e3 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -403,10 +403,7 @@ def test_example_2(test_default_config_example): assumption = "newrun ['newrun'] -> my_preprocessing ['my_preprocessing'] -> prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> newrun ['newrun']" test_default_config_example = workflow.assemble_workflow(test_default_config_example) order = workflow.display_workflow_sequence(test_default_config_example, display=False) - print(assumption) - print(order) - pytest.fail("something wrong") - #assert order == assumption + assert order == assumption # 3. Append new phase at the beginning of workflow @pytest.mark.example From ecf6bbb4046fc69c030e9afd19e5fc0be4129ee4 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 1 Dec 2023 15:50:12 +0100 Subject: [PATCH 49/98] Code optimizations --- src/esm_runscripts/workflow.py | 129 ++++++++++++++------------------- 1 file changed, 54 insertions(+), 75 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 0f34388df..32ef95741 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,4 +1,3 @@ -import sys import copy import esm_parser @@ -153,8 +152,8 @@ def collect_all_user_phases(self, config): for model in config: if "workflow" in config[model]: w_config = config[model]["workflow"] - # if "subjobs" in w_config: if "phases" in w_config: + # check if still tries to set workflow keywords for key, value in w_config.items(): if not key == "phases": err_msg = f"``{key}`` is not allowed to be set for a workflow." @@ -163,8 +162,11 @@ def collect_all_user_phases(self, config): # each phase (of a model/setup) needs to have an unique name # same phases of the same model/setup defined in different config files # are overwritten by the usual config file hierarchy - # user phases are not alowed to have the same name asdefault phases (e.g. compute) - # 1. check if ``new_phase`` is already defined as a default phase + # user phases are not alowed to have the same name as default phases (e.g. compute) + + # check if ``new_phase`` is already defined as a default phase + # look for the name of the current phase in the list of default phase names + # if found, raise exception if phase in self.get_phases_attribs_list("default", "name"): err_msg = ( f"The user phase ``{phase}`` " @@ -172,24 +174,26 @@ def collect_all_user_phases(self, config): f"This is not allowed." ) esm_parser.user_error("ERROR", err_msg) - # 2. check if the name of the new user phase (for a model/setup) does not already exist - # (for another model/setup). + + # check if the name of the new user phase (for a model/setup) does not already exist + # (for another model/setup). if phase in user_workflow_phases_names: err_msg = ( f"Two workflow phases have the same name " f"``{phase}``." ) esm_parser.user_error("ERROR", err_msg) - # 3. if user phase (for each setup/model) has a new and unique name + + # if user phase (for each setup/model) has a new and unique name else: phase_config = copy.deepcopy(w_config["phases"][phase]) # add phase name phase_config["name"] = phase # Make sure that batch_or_shell is set to batch if submit_to_batch is true # Should not be set by user. TODO: Remove from documentation. - # Check if run_on_queue is given if sbatch job if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" + # Check if run_on_queue is given if submit_to_sbatch is true if not phase_config.get("run_on_queue", False): err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase}``." esm_parser.user_error("ERROR", err_msg) @@ -202,7 +206,7 @@ def collect_all_user_phases(self, config): user_workflow_phases_names.append(phase) if phase_config.get("trigger_next_run", False): user_workflow_next_run_triggered_by.append(phase) - # check if more than one user phase is set to trigger the next run + # check if more than one user phase has set trigger_next_run to true if len(user_workflow_next_run_triggered_by) > 1: err_msg = ( f"More than one phase is set to " @@ -244,44 +248,55 @@ def cluster_phases(self): """Merge phases into clusters.""" clusters = {} + # create an empty phases list for each cluster for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): clusters[cluster] = {"phases": []} + # append all phases that are within the same cluster for phase in self.phases + self.user_phases: clusters[phase["cluster"]]["phases"].append(phase["name"]) for cluster in clusters: nproc = nproc_sum = nproc_max = 0 + # if only one phase in cluster if len(clusters[cluster]["phases"]) == 1: phase_name = clusters[cluster]["phases"][0] phase = self.get_workflow_phase_by_name(phase_name) clusters[cluster].update(phase) + # if more than one phase are within the same cluster else: + # fill in default phase keys for each cluster to cluster dictionary clusters[cluster].update(WorkflowPhase({})) + # create a list of all phases (dicts) that are within the same cluster phases_list = [] for phase_name in clusters[cluster]["phases"]: phases_list.append(self.get_workflow_phase_by_name(phase_name)) - # check for inconsistencies - attribs = {} - for attrib in WorkflowPhase({}): - attribs[attrib] = [] - [attribs[attrib].append(item) for item in [phase[attrib] for phase in phases_list] if item not in attribs[attrib]] - if len(attribs[attrib]) == 1: - clusters[cluster][attrib] = attribs[attrib][0] + # check for inconsistencies of phase keywords within a cluster + keywords = {} + for key in WorkflowPhase({}): + keywords[key] = [] + # append keyword of a phase only if not already in keywords[key] + [keywords[key].append(item) for item in [phase[key] for phase in phases_list] if item not in keywords[key]] + # if there are no inconsistencies, all phases have the same values for keyword + if len(keywords[key]) == 1: + clusters[cluster][key] = keywords[key][0] + # if different phases have set different values for the same keyword/attrib else: - if type(clusters[cluster][attrib]) is list: - clusters[cluster][attrib] = attribs[attrib] + if type(clusters[cluster][key]) is list: + clusters[cluster][key] = keywords[key] else: - if attrib not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]: + if key not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]: err_msg = ( - f"Mismatch for {attrib}") + f"Mismatch for {key}") esm_parser.user_error("ERROR", err_msg) - elif attrib == "name": + elif key == "name": clusters[cluster]["name"] = cluster - elif attrib == "trigger_next_run": - clusters[cluster][attrib] = any(attribs[attrib]) + elif key == "trigger_next_run": + # set key of cluster to True if key for any (at least one) of the phases is set to True + clusters[cluster][key] = any(keywords[key]) else: - clusters[cluster][attrib] = "check phase" + # if key is set different for each phase in same cluster set to fill value (e.g. for script, scriptdir) + clusters[cluster][key] = "check phase" # calculate nproc if cluster is to be submitted to sbatch system for phase in phases_list: @@ -297,7 +312,7 @@ def cluster_phases(self): clusters[cluster]["order_in_cluster"] = "sequential" nproc = nproc_max clusters[cluster]["nproc"] = nproc - + # write clusters dictionary to workflow object attribute self.clusters = clusters return self @@ -367,7 +382,7 @@ def check_user_workflow_dependency(self): def check_unknown_phases(self): """ - Check if any user phase attributes points to any unknown workflow phase. + Check if any user phase keyword (run_afteer, run_before) points to an unknown workflow phase. Parameters ---------- @@ -378,10 +393,10 @@ def check_unknown_phases(self): unknown_phases : set """ unknown_phases = [] - phases_names = self.get_phases_attribs_list('default', 'name') - user_phases_names = self.get_phases_attribs_list('user', 'name') - run_after = self.get_phases_attribs_list('user', 'run_after') - run_before = self.get_phases_attribs_list('user', 'run_before') + phases_names = self.get_phases_attribs_list('default', 'name') # list of names of all default phases + user_phases_names = self.get_phases_attribs_list('user', 'name') # list of name of all user phases + run_after = self.get_phases_attribs_list('user', 'run_after') # list of all run_after values for all user phases + run_before = self.get_phases_attribs_list('user', 'run_before') # list of all run_before values for all user phases # Filter out all elements that are None # ``filter(None, anylist)`` will filter out all items of anylist, # for which ``if item`` is false (e.g. [], "", None, {}, ''). @@ -398,39 +413,22 @@ def check_unknown_phases(self): def order_phases_and_clusters(self): """ - Put the phases and clusters in order. - - Tasks: - 1. Correct for ``triggered_next_run`` if set by user phase - - next_submit, run_after, called_from, run_before??? + Put the phases and clusters in the right order. Parameters ---------- + self : Workflow object Returns ------- self : Workflow object """ - - # check if user phases are independent from each other - # independent = self.check_user_workflow_dependency() - - # Check if not both run_after and run_before are set at the same - # time for each user phase -# if user_phase['run_before'] and user_phase['run_after']: -# err_msg = ( -# f"Both run_after and run_before are set. Don't know when " -# f"to start {user_phase['name']}. Please only set run_after " -# f"or run_before." -# ) -# esm_parser.user_error("ERROR", err_msg) - -# 3. Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``) +# Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``) # next_run_triggered_by is always the last phase - # check if next_triggered is default or user phase + # check if next_triggered is set to a default or user phase # if user phase # get last default phase and correct next_submit and run_before # get first default phase and correct run_after, called_from @@ -465,20 +463,19 @@ def order_phases_and_clusters(self): self.last_task_in_queue = next_triggered -# 4. Intergrate new user phases by correcting next_submit, called_from, run_after, run_before +# Intergrate new user phases by correcting next_submit, called_from, run_after, run_before # Set "next_submit" and "called_from" # "next_submit" which phase/cluster will be called next (run_after of the next phase) # "called_from" name of previous phase, run_after of current phase - # Create a dict of all phases with empty lists + # Create a dict of all phases and for all clusters with empty lists next_submits_phases = {} next_submits_clusters = {} for phase in self.phases + self.user_phases: next_submits_phases[phase["name"]] = [] next_submits_clusters[phase["cluster"]] = [] -# for cluster in self.clusters: for phase2 in self.phases + self.user_phases: if phase2.get("run_after", None): if phase2["name"] not in next_submits_phases[phase2["run_after"]]: @@ -519,25 +516,8 @@ def order_phases_and_clusters(self): if next_submits_phases[phase3["name"]]: phase3.set_attrib("next_submit", next_submits_phases[phase3["name"]]) -# 5. Correct first and last new phases of whole workflow - - first_cluster_name = self.first_task_in_queue - first_phase = self.get_workflow_phase_by_name(first_cluster_name) - last_cluster_name = self.last_task_in_queue - last_phase = self.get_workflow_phase_by_name(last_cluster_name) - - # if first_cluster_name is not next_submit of last_cluster_name - # set 'next_submit' of last phase/cluster to first phase/cluster in workflow - if first_cluster_name not in last_phase["next_submit"]: - last_phase.set_attrib("next_submit", first_cluster_name) - # if last_cluster_name is not called_from of first_cluster_name - # set 'called_from' of first phase/cluster to last phase/cluster - if not last_cluster_name == first_phase["called_from"]: - first_phase.set_attrib("called_from", last_cluster_name) - return self - def prepend_newrun_job(self): """ - Creates a new cluster "newrun" if first_task_in_queue is not of @@ -562,7 +542,6 @@ def prepend_newrun_job(self): last_task_name = self.last_task_in_queue last_phase = self.get_workflow_phase_by_name(last_task_name) - new_first_phase_name = "newrun_general" # Create new default phase object config_new_first_phase = { "name": "newrun", @@ -584,8 +563,8 @@ def prepend_newrun_job(self): self.clusters[new_first_phase["cluster"]]["phases"] = ["newrun"] last_phase["next_submit"].remove(first_phase["cluster"]) # why does the next line not work??? - #last_phase.set_attrib("next_submit", "newrun") - #last_phase.remove_attrib("next_submit", first_phase["cluster"]) + # last_phase.set_attrib("next_submit", "newrun") + # last_phase.remove_attrib("next_submit", first_phase["cluster"]) # reset first_task attributes first_phase.set_attrib("called_from", "newrun") @@ -771,7 +750,6 @@ def assemble_workflow(config): # 7. Remove old worklow from config config = workflow.write_to_config(config) - # Set "jobtype" for the first task??? # NOTE: This is either first default phase or # newrun??? Can't this not be set in prepend_newrun then? @@ -832,6 +810,7 @@ def display_workflow(config): display_nicely(config) display_workflow_sequence(config) + def display_workflow_sequence(config, display=True): first_phase = config["general"]["workflow"]["first_task_in_queue"] From 97d5d28c5dccc75ed0bd66c9bd06ab29ebf793bc Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Dec 2023 10:41:21 +0100 Subject: [PATCH 50/98] Removed unused function. --- src/esm_runscripts/workflow.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 32ef95741..e52320c78 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -30,13 +30,6 @@ def __init__(self, workflow_yaml): self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"] # TODO: Call here the phase object ??? - @property - def num_phases(self): - """ - Return the number of phases in workflow. - """ - return len(self.phases) - def get_workflow_phase_by_name(self, phase_name): """ Returns phase of phase_name From 04d8ff0813bb18628b8e5011efab0e5e6e84f303 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Dec 2023 12:03:53 +0100 Subject: [PATCH 51/98] Finished workflow tests. --- tests/test_esm_runscripts/test_workflow.py | 244 +++------------------ 1 file changed, 36 insertions(+), 208 deletions(-) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 93d3c84e3..eb59efb9c 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -6,118 +6,8 @@ import pytest import esm_parser -@pytest.fixture() -def test_default_phases_dict(): - phases_dict = { - 'compute': { - 'called_from': 'prepcompute', - 'cluster': 'compute', - 'name': 'compute', - 'next_submit': ['tidy'], - 'nproc': 'None', - 'order_in_cluster': 'sequential', - 'run_after': 'prepcompute', - 'run_before': 'tidy', - 'run_on_queue': 'compute', - 'submit_to_batch_system': True}, - 'prepcompute': { - 'batch_or_shell': 'SimulationSetup', - 'called_from': 'tidy', - 'cluster': 'prepcompute', - 'name': 'prepcompute', - 'next_submit': ['compute'], - 'nproc': 1, - 'order_in_cluster': 'sequential', - 'run_after': 'tidy', - 'run_before': 'compute', - 'submit_to_batch_system': False}, - 'tidy': { - 'batch_or_shell': 'SimulationSetup', - 'called_from': 'compute', - 'cluster': 'tidy', - 'name': 'tidy', - 'next_submit': ['prepcompute'], - 'nproc': 1, - 'order_in_cluster': 'sequential', - 'run_after': 'compute', - 'run_before': 'prepcompute', - 'submit_to_batch_system': False} - } - return phases_dict - -@pytest.fixture() -def test_workflow_object(): - workflow_dict = { - 'first_task_in_queue': 'prepcompute', - 'last_task_in_queue': 'tidy', - 'next_run_triggered_by': 'tidy' - } - test_workflow = workflow.Workflow(workflow_dict) - return test_workflow - @pytest.fixture() def test_config(): - """Setup a test config dictionary.""" - config = { - 'computer': {'partitions': {'compute': {'name': 'test'}}}, - 'fesom': { - 'nproc': 128}, - 'rnfmap': { - 'nproc': 128}, - 'oasis3mct': { - 'nproc': 128}, - 'xios': { - 'nproc': 128}, - 'oifs': { - 'workflow': { -# 'next_run_triggered_by': 'tidy', - 'phases': { - 'my_new_subjob_oifs': { - 'batch_or_shell': 'batch', - 'nproc': 1, - 'order_in_cluster': 'concurrent', - 'cluster': 'test_cluster', - 'run_after': 'tidy', - 'run_on_queue': 'compute', - 'script': 'helloworld.sh', - 'script_dir': '/work/ab0995/a270089/myrunscripts/', - 'submit_to_batch_system': True}}}}, - 'general': { - 'valid_model_names': ['fesom', 'oifs', 'rnfmap', 'oasis3mct', 'xios'], - 'workflow': { -# 'next_run_triggered_by': 'tidy', - 'phases': { - 'my_new_subjob_general': { - 'batch_or_shell': 'batch', - 'order_in_cluster': 'concurrent', - 'run_on_queue': 'compute', - 'nproc': 1, - 'run_after': 'tidy', - 'script_dir': '/work/ab0995/a270089/myrunscripts/', - 'script': 'hallowelt.sh', - 'submit_to_batch_system': True}}}}, - 'flow': { - 'workflow': { -# 'next_run_triggered_by': 'tidy', - 'phases': { - 'my_new_subjob_flow': { - 'batch_or_shell': 'batch', - 'order_in_cluster': 'concurrent', - 'cluster': 'test_cluster', - 'run_on_queue': 'compute', - 'nproc': 1, - 'run_after': 'tidy', - 'script_dir': '/work/ab0995/a270089/myrunscripts/', - 'script': 'hallowelt.sh', - 'submit_to_batch_system': True} - } - } - } - } - return config - -@pytest.fixture() -def test_config_2(): """Setup a test config dictionary.""" config = { 'computer': {'partitions': {'compute': {'name': 'test'}}}, @@ -294,77 +184,6 @@ def test_default_config_example(): } return config -def test_num_phases(test_workflow_object, test_default_phases_dict, test_config): - for phase in test_default_phases_dict: - test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) - assert test_workflow_object.num_phases == 3 - -def test_check_user_workflow_dependency(test_workflow_object, test_default_phases_dict, test_config): - for phase in test_default_phases_dict: - test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - independent = test_workflow_object.check_user_workflow_dependency() - assert independent - -def test_check_user_workflow_dependency_2(test_workflow_object, test_config): - test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_oifs' -# test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - independent = test_workflow_object.check_user_workflow_dependency() - assert not independent - -def test_check_unknown_phases(test_workflow_object, test_config): - test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob' -# test_workflow_object = test_workflow_object.init_default_workflow(test_config) - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - unknown_phases = test_workflow_object.check_unknown_phases() - assert unknown_phases - -def test_assemble_workflow(): - pytest.fail("something wrong") - -def test_collect_all_user_workflow(test_config): - pytest.fail("something wrong") - -def test_calc_number_of_tasks(): - pytest.fail("something wrong") - -def test_order_phases_and_clusters(test_workflow_object, test_default_phases_dict, test_config): - for phase in test_default_phases_dict: - test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) - test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'my_new_subjob_general' -# test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_flow' -# test_config['oifs']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' - #test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_before'] = 'my_new_subjob_oifs' - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - test_workflow_object = test_workflow_object.order_phases_and_clusters() - pytest.fail("something wrong") - -def test_complete_clusters(test_workflow_object, test_default_phases_dict, test_config): - for phase in test_default_phases_dict: - test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - test_workflow_object = test_workflow_object.order_phases_and_clusters() - pytest.fail("something wrong") - -def test_prepend_newrun_job(test_workflow_object, test_default_phases_dict, test_config): - for phase in test_default_phases_dict: - test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - test_workflow_object = test_workflow_object.order_phases_and_clusters() - test_workflow_object = test_workflow_object.prepend_newrun_job() - pytest.fail("something wrong") - -def test_write_to_config(test_workflow_object, test_default_phases_dict, test_config): - for phase in test_default_phases_dict: - test_workflow_object.phases.append(workflow.WorkflowPhase(test_default_phases_dict[phase])) - test_workflow_object = test_workflow_object.set_default_nproc(test_config) - test_workflow_object = test_workflow_object.collect_all_user_phases(test_config) - test_workflow_object = test_workflow_object.order_phases_and_clusters() - test_workflow_object = test_workflow_object.prepend_newrun_job() - config = test_workflow_object.write_to_config(test_config) - pytest.fail("something wrong") - # Test scenarios # 0. Default workflow @pytest.mark.example @@ -504,26 +323,30 @@ def test_example_5(test_default_config_example): # Test exceptions # 1. If still a workflow keyword is set by user. -def test_exception_test_workflow_keyword(test_config_2): - test_config_2['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' +@pytest.mark.exceptions +def test_exception_test_workflow_keyword(test_config): + test_config['flow']['workflow']['next_run_triggered_by'] = 'my_new_subjob_general' with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 2. If an invalid phase keyword is set. -def test_exception_invalid_phase_keyword(test_config_2): - test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['wrong_keyword'] = 'wrong_value' +@pytest.mark.exceptions +def test_exception_invalid_phase_keyword(test_config): + test_config['flow']['workflow']['phases']['my_new_subjob_flow']['wrong_keyword'] = 'wrong_value' with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 3. If an unknown phase is called for , e.g. in 'run_after' -def test_exception_unknown_phase(test_config_2): - test_config_2['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'trudy' +@pytest.mark.exceptions +def test_exception_unknown_phase(test_config): + test_config['flow']['workflow']['phases']['my_new_subjob_flow']['run_after'] = 'trudy' with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 4. If a user phase has the same name as a default phase. -def test_if_user_phase_has_default_phase_name(test_config_2): - test_config_2['flow']['workflow']['phases']['compute'] = { +@pytest.mark.exceptions +def test_if_user_phase_has_default_phase_name(test_config): + test_config['flow']['workflow']['phases']['compute'] = { 'batch_or_shell': 'batch', 'order_in_cluster': 'concurrent', 'cluster': 'test_cluster', @@ -534,11 +357,12 @@ def test_if_user_phase_has_default_phase_name(test_config_2): 'script': 'hallowelt.sh', 'submit_to_batch_system': True} with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 5. If two user phases have the same name and are defined in different models/setups. -def test_if_two_user_phase_have_the_same_name(test_config_2): - test_config_2['oifs']['workflow']['phases']['my_new_subjob_flow'] = { +@pytest.mark.exceptions +def test_if_two_user_phase_have_the_same_name(test_config): + test_config['oifs']['workflow']['phases']['my_new_subjob_flow'] = { 'batch_or_shell': 'batch', 'order_in_cluster': 'concurrent', 'cluster': 'test_cluster', @@ -549,11 +373,12 @@ def test_if_two_user_phase_have_the_same_name(test_config_2): 'script': 'hallowelt.sh', 'submit_to_batch_system': True} with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 6. If no queue is given for a phase that should be run on sbatch system. -def test_if_queue_is_missing(test_config_2): - test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs'] = { +@pytest.mark.exceptions +def test_if_queue_is_missing(test_config): + test_config['oifs']['workflow']['phases']['my_new_subjob_oifs'] = { 'batch_or_shell': 'batch', 'order_in_cluster': 'concurrent', 'cluster': 'test_cluster', @@ -564,22 +389,25 @@ def test_if_queue_is_missing(test_config_2): 'script': 'hallowelt.sh', 'submit_to_batch_system': True} with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 7. If more than one phase trigger_next_run. -def test_if_trigger_next_run_unclear(test_config_2): - test_config_2['oifs']['workflow']['phases']['my_new_subjob_oifs']['trigger_next_run'] = True +@pytest.mark.exceptions +def test_if_trigger_next_run_unclear(test_config): + test_config['oifs']['workflow']['phases']['my_new_subjob_oifs']['trigger_next_run'] = True with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 8. If no default phases are defined in defaults.yaml. -def test_if_no_default_phases(test_config_2): - test_config_2['general']['defaults.yaml']['workflow'].pop('phases', None) +@pytest.mark.exceptions +def test_if_no_default_phases(test_config): + test_config['general']['defaults.yaml']['workflow'].pop('phases', None) with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) # 9. If no default workflow is defined in defaults.yaml. -def test_inf_no_default_workflow(test_config_2): - test_config_2['general']['defaults.yaml'].pop('workflow', None) +@pytest.mark.exceptions +def test_inf_no_default_workflow(test_config): + test_config['general']['defaults.yaml'].pop('workflow', None) with pytest.raises(SystemExit): - test_config_2 = workflow.assemble_workflow(test_config_2) + test_config = workflow.assemble_workflow(test_config) From 16187d923f01e952a8ddec0a03f8bb53be82cf06 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Dec 2023 12:09:42 +0100 Subject: [PATCH 52/98] Changes in default workflow definition. --- .../esm_software/esm_runscripts/defaults.yaml | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 31ffa1394..d8234199c 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -17,10 +17,8 @@ workflow: phases: prepcompute: batch_or_shell: SimulationSetup - call_function: None called_from: tidy cluster: prepcompute - env_preparation: None name: prepcompute next_submit: - compute @@ -28,19 +26,10 @@ workflow: order_in_cluster: sequential run_after: tidy run_before: compute - run_on_queue: None - run_only: None - script: None - script_dir: None - skip_chunk_number: None - skip_run_number: None submit_to_batch_system: False compute: - batch_or_shell: batch - call_function: None called_from: prepcompute cluster: compute - env_preparation: None name: compute next_submit: - tidy @@ -48,19 +37,12 @@ workflow: order_in_cluster: sequential run_after: prepcompute run_before: tidy - run_on_queue: None - run_only: None - script: None - script_dir: None - skip_chunk_number: None - skip_run_number: None + run_on_queue: ${computer.partitions.pp.name} submit_to_batch_system: True tidy: batch_or_shell: SimulationSetup - call_function: None called_from: compute cluster: tidy - env_preparation: None name: tidy next_submit: - prepcompute @@ -68,10 +50,4 @@ workflow: order_in_cluster: sequential run_after: compute run_before: prepcompute - run_on_queue: None - run_only: None - script: None - script_dir: None - skip_chunk_number: None - skip_run_number: None submit_to_batch_system: False From f26a115180e1be705383f9e7d817a27ebe1ae4f8 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Dec 2023 12:11:28 +0100 Subject: [PATCH 53/98] Revive init_workflow funtion. --- src/esm_runscripts/workflow.py | 63 +++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index e52320c78..66e831a0f 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -68,6 +68,7 @@ def get_phases_attribs_list(self, phase_type, attrib): return phases_attribs + def set_default_nproc(self, config): """ Calculating the number of mpi tasks for each component/model/script @@ -694,33 +695,9 @@ def assemble_workflow(config): ------- config : dict """ - - # 1. Generate default workflow object - # initialize the default workflow as Workflow object - # TODO: Where are these default phases defined? For now I placed it in - # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml - if "defaults.yaml" in config["general"]: - if "workflow" in config["general"]["defaults.yaml"]: - workflow = config["general"]["defaults.yaml"]["workflow"] - phases = config["general"]["defaults.yaml"]["workflow"].get("phases", []) - else: - esm_parser.user_error("ERROR", "No default workflow defined.") - else: - workflow = [] - phases = [] - - # 2. Initialize default workflow phases from defaults.yaml - if phases: - workflow = Workflow(workflow) - for phase in phases: - workflow.phases.append(WorkflowPhase(phases[phase])) - else: - esm_parser.user_error("ERROR", "No default workflow phases defined.") - # Note: Should this work also if no default phases are set in such a config - # file, but instead all workflow phases are defined in different configs - # and/or runscripts? - # Where could a user define a different (default) phase list? - # Or should this be changed in defaults.yaml as it is now? + # 1. Generate default workflow object and + # 2. initialize default workflow phases from defaults.yaml + workflow = init_default_workflow(config) # 3. Calc mpi tasks for batch jobs of default phases # TODO: Put it into other method??? @@ -756,6 +733,38 @@ def assemble_workflow(config): return config +def init_default_workflow(config): + """ + Initialize workflow and default phases from defauls.yaml + """ + # 1. Generate default workflow object + # initialize the default workflow as Workflow object + # TODO: Where are these default phases defined? For now I placed it in + # esm_tools/configs/esm_software/esm_runscripts/defaults.yaml + if "defaults.yaml" in config["general"]: + if "workflow" in config["general"]["defaults.yaml"]: + workflow = config["general"]["defaults.yaml"]["workflow"] + phases = config["general"]["defaults.yaml"]["workflow"].get("phases", []) + else: + esm_parser.user_error("ERROR", "No default workflow defined.") + else: + workflow = [] + phases = [] + + # 2. Initialize default workflow phases from defaults.yaml + if phases: + workflow = Workflow(workflow) + for phase in phases: + workflow.phases.append(WorkflowPhase(phases[phase])) + else: + esm_parser.user_error("ERROR", "No default workflow phases defined.") + # Note: Should this work also if no default phases are set in such a config + # file, but instead all workflow phases are defined in different configs + # and/or runscripts? + # Where could a user define a different (default) phase list? + # Or should this be changed in defaults.yaml as it is now? + + return workflow def get_phase_attrib(workflow_phases, phase_name, attrib): if not type(workflow_phases) is list: From 08d697600e09ca26a79540b78c13d6bbe1004924 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Dec 2023 12:12:34 +0100 Subject: [PATCH 54/98] Added my email to setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 185eca844..99e40c947 100644 --- a/setup.py +++ b/setup.py @@ -52,8 +52,9 @@ "miguel.andres-martinez@awi.de", "deniz.ural@awi.de", "jan.streffing@awi.de", + "nadine.wieters@awi.de", "sebastian.wahl@geomar.de", - "kai.himstedt@dkrz.de", + "kai.himstedt@dkrz.de", ], python_requires=">=3.6, <=3.11", classifiers=[ From 7e1dd6b120914a55821197d1f84916dde5673b2f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 4 Dec 2023 12:16:42 +0100 Subject: [PATCH 55/98] Added comments and docstrings. --- src/esm_runscripts/resubmit.py | 162 +++++++++++++++++++++++++++++++-- 1 file changed, 156 insertions(+), 6 deletions(-) diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py index daa15047d..e2ecbd0d8 100644 --- a/src/esm_runscripts/resubmit.py +++ b/src/esm_runscripts/resubmit.py @@ -7,6 +7,18 @@ def submit(config): + """ + Submits a jobscript to the batch system by calling os.system + + Arguments + --------- + config : dict + + Returns + ------- + config : dict + """ + if config["general"]["verbose"]: print("\n", 40 * "+ ") print("Submitting jobscript to batch system...") @@ -22,15 +34,43 @@ def submit(config): def resubmit_batch_or_shell(config, batch_or_shell, cluster=None): + """ + - Creates a submit_commant and sets it to config depending on kind of submission (batch or shell) + - Calls function submit to acually submitting the shell or batch command + + Arguments + --------- + config : dict + batch_or_shell : Bool + cluster : (optional) + + Returns + ------- + config : dict + """ + config = config["general"]["batch"].write_simple_runscript( config, cluster, batch_or_shell ) + # Checks, if not submitted with option -c in esm_runscript call (check run) if not check_if_check(config): config = submit(config) return config def resubmit_SimulationSetup(config, cluster=None): + """ + Resubmitting a workflow phase/cluster that is of type SimulationSetup + - Initialize the cluster as a new SimulationSetup object + + Arguments + --------- + config : dict + cluster : str (optional: name of cluster) + Returns + ------- + config : dict + """ monitor_file = logfiles.logfile_handle # Jobs that should be started directly from the compute job: @@ -60,6 +100,7 @@ def resubmit_SimulationSetup(config, cluster=None): cluster_obj.config[f"{cluster}_update_{jobtype}_config_before_resubmit"] ) + # Checks, if not submitted with option -c in esm_runscript call (check run) if not check_if_check(config): monitor_file.write(f"Calling {cluster} job:\n") @@ -69,14 +110,29 @@ def resubmit_SimulationSetup(config, cluster=None): def get_submission_type(cluster, config): - # Figure out if next job is resubmitted to batch system, - # just executed in shell or invoked as new SimulationSetup - # object + """ + Figure out if next job is + - resubmitted to batch system, + - just executed in shell or + - invoked as new SimulationSetup object + + Arguments + --------- + cluster : str (name of cluster) + config : dict + + Returns + ------- + submission_type : str + """ clusterconf = config["general"]["workflow"]["subjob_clusters"][cluster] if clusterconf.get("submit_to_batch_system", False): submission_type = "batch" + # This information should come from the config of the cluster/workflow phase + # This information is given in batch_or_shell attribute of workflow phase/cluster + # TODO: Make this a function of workflow manager??? elif cluster in ["newrun", "prepcompute", "tidy", "inspect", "viz"]: submission_type = "SimulationSetup" else: @@ -86,6 +142,17 @@ def get_submission_type(cluster, config): def end_of_experiment(config): + """ + Checks if it is the end of the experiment. + + Arguments + --------- + config + + Returns + ------- + True or False + """ if config["general"]["next_date"] >= config["general"]["final_date"]: monitor_file = logfiles.logfile_handle monitor_file.write("Reached the end of the simulation, quitting...\n") @@ -96,6 +163,17 @@ def end_of_experiment(config): def end_of_experiment_all_models(config): + """ + Checks if end of experiment is reached and everything is done + + Arguments + --------- + config : dict + + Returns + ------- + True or False + """ index = 1 expid = config["general"]["expid"] while "model" + str(index) in config["general"]["original_config"]: @@ -131,6 +209,17 @@ def end_of_experiment_all_models(config): def check_if_check(config): + """ + Will check if esm_runscripts has been called with option -c (check run only) + + Arguments + --------- + config : dict + + Returns + ------- + True or False + """ if config["general"]["check"]: print( "Actually not submitting anything, this job preparation was launched in 'check' mode (-c)." @@ -142,7 +231,21 @@ def check_if_check(config): def maybe_resubmit(config): - + """ + If nextrun is started, + - calls funtion to increment date and run_number + - calls function to write new date file + If it recognizes that is was actually the last run + - returns if end of the experiment (if not iterative_coupling) + + Arguments + --------- + config : dict + + Returns + ------- + config : dict + """ jobtype = config["general"]["jobtype"] nextrun = resubmit_recursively(config, jobtype=jobtype) @@ -154,6 +257,9 @@ def maybe_resubmit(config): if end_of_experiment(config): if config["general"].get("iterative_coupling", False): + # If not iterative coupling + # check if end of experiment for all models + # if not??? if end_of_experiment_all_models(config): return config else: @@ -161,6 +267,7 @@ def maybe_resubmit(config): return config cluster = config["general"]["workflow"]["first_task_in_queue"] + # For what is nextrun here nedded? nextrun = resubmit_recursively( config, list_of_clusters=[cluster], nextrun_in=True ) @@ -169,8 +276,26 @@ def maybe_resubmit(config): def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in=False): + """ + - Reads in a list of all clusters (next_submit) in a workflow of a given jobtype (if not passes as argument) + - Checks if cluster is going to be skipped + - Gets the submission_type of cluster and calls the corresponding resubmit function + - If cluster is skipped, calls this function again ??? + - What is nextrun_in for? What if true? If within a run??? + - When could cluster be first_task_in_queue and nextrun_in=true? + + Arguments + --------- + config : dict + jobtype : (optional) + list_of_clusters: (optional) + nextrun_in: (optional) + + Returns + ------- + nextrun : Boolean + """ nextrun = False - if not list_of_clusters: list_of_clusters = config["general"]["workflow"]["subjob_clusters"][ jobtype @@ -180,7 +305,7 @@ def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in if ( cluster == config["general"]["workflow"]["first_task_in_queue"] and not nextrun_in - ): + ): # if beginning of next run? nextrun = True else: if not workflow.skip_cluster(cluster, config): @@ -199,6 +324,20 @@ def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in def _increment_date_and_run_number(config): + """ + - Incrementing + - date by adding "delta_date" to "cuirrent_date" + - run_number by adding +1 + - Updating config + + Arguments + --------- + config : dict + + Returns + ------- + config : dict + """ config["general"]["run_number"] += 1 config["general"]["current_date"] += config["general"]["delta_date"] @@ -216,6 +355,17 @@ def _increment_date_and_run_number(config): def _write_date_file(config): # self, date_file=None): + """ + Writes new date file for experiment. + + Arguments + --------- + config : dict + + Returns + ------- + config : dict + """ # monitor_file = config["general"]["logfile"] monitor_file = logfiles.logfile_handle From d1f5a21fdc197aa43f8a041caee35df9ee93a1d5 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 7 Dec 2023 16:01:52 +0100 Subject: [PATCH 56/98] Some more code improvements. --- src/esm_runscripts/workflow.py | 332 ++++++++++++++++++++------------- src/esm_tests/resources | 2 +- 2 files changed, 203 insertions(+), 131 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 66e831a0f..a10df867d 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -22,13 +22,26 @@ def __init__(self, workflow_yaml): none """ # TODO: check if key is in workflow_yaml dict - self.phases = [] - self.user_phases = [] - self.clusters = {} - self.first_task_in_queue = workflow_yaml["first_task_in_queue"] - self.last_task_in_queue = workflow_yaml["last_task_in_queue"] - self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"] - # TODO: Call here the phase object ??? + self.phases = [] # list for default phases (defined in defauls.yaml) + self.user_phases = [] # list of user phases (collected by collect_all_user_phases) + self.clusters = {} # dictionary of clusters + + error = False + + if "first_task_in_queue" in workflow_yaml: self.first_task_in_queue = workflow_yaml["first_task_in_queue"] + else: error = True + if "last_task_in_queue" in workflow_yaml: self.last_task_in_queue = workflow_yaml["last_task_in_queue"] + else: error = True + if "next_run_triggered_by" in workflow_yaml: self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"] + else: error = True + + if error: + err_msg = ( + f"Missing workflow keywords. " + f"Make sure the following keywords are set in defaults.yaml: " + f"``first_task_in_queue``, ``last_task_in_queue``, ``next_run_triggered_by``." + ) + esm_parser.user_error("ERROR", err_msg) def get_workflow_phase_by_name(self, phase_name): """ @@ -47,7 +60,7 @@ def get_workflow_phase_by_name(self, phase_name): if phase["name"] == phase_name: return phase - def get_phases_attribs_list(self, phase_type, attrib): + def get_phases_values_list(self, phase_type, keyword): """ Returns a certain attribute for all phases as a list. @@ -55,23 +68,23 @@ def get_phases_attribs_list(self, phase_type, attrib): ---------- phase_type : str ``default`` or ``user`` - attrib : str + keyword : str Returns ------- - phases_attribs : list + phases_values : list """ if phase_type == 'user': - phases_attribs = [phase[attrib] for phase in self.user_phases] + phases_values = [phase[keyword] for phase in self.user_phases] else: - phases_attribs = [phase[attrib] for phase in self.phases] + phases_values = [phase[keyword] for phase in self.phases] - return phases_attribs + return phases_values def set_default_nproc(self, config): """ - Calculating the number of mpi tasks for each component/model/script + Calculating the number of mpi tasks for default phases and each component/model/script Parameters ---------- @@ -82,11 +95,14 @@ def set_default_nproc(self, config): self : Workflow object """ + # Get the sum of all mpi tasks tasks = calc_number_of_tasks(config) + # Write this number of tasks to phase, if + # phase will be submitted to batch system for ind, phase in enumerate(self.phases): if phase["submit_to_batch_system"]: - phase["nproc"] = tasks + set_value(phase, "nproc", tasks) return self @@ -104,7 +120,7 @@ def set_workflow_attrib(self, attrib, value): None """ - if type(getattr(self, attrib)).__name__ == "list": + if type(getattr(self, attrib)).__name__ == list: self.__dict__[attrib].append(value) else: self.__setattr__(attrib, value) @@ -147,12 +163,12 @@ def collect_all_user_phases(self, config): if "workflow" in config[model]: w_config = config[model]["workflow"] if "phases" in w_config: - # check if still tries to set workflow keywords + # check if there are still workflow keywords set (except 'phases') for key, value in w_config.items(): if not key == "phases": err_msg = f"``{key}`` is not allowed to be set for a workflow." esm_parser.user_error("ERROR", err_msg) - for phase in w_config["phases"]: + for phase_name in w_config["phases"]: # each phase (of a model/setup) needs to have an unique name # same phases of the same model/setup defined in different config files # are overwritten by the usual config file hierarchy @@ -161,9 +177,10 @@ def collect_all_user_phases(self, config): # check if ``new_phase`` is already defined as a default phase # look for the name of the current phase in the list of default phase names # if found, raise exception - if phase in self.get_phases_attribs_list("default", "name"): + + if phase_name in self.get_phases_values_list("default", "name"): err_msg = ( - f"The user phase ``{phase}`` " + f"The user phase ``{phase_name}`` " f"has the same name as a default workflow phase. " f"This is not allowed." ) @@ -171,35 +188,39 @@ def collect_all_user_phases(self, config): # check if the name of the new user phase (for a model/setup) does not already exist # (for another model/setup). - if phase in user_workflow_phases_names: + if phase_name in user_workflow_phases_names: err_msg = ( f"Two workflow phases have the same name " - f"``{phase}``." + f"``{phase_name}``." ) esm_parser.user_error("ERROR", err_msg) - # if user phase (for each setup/model) has a new and unique name + # if user phase (for each setup/model) has a non-default and unique name else: - phase_config = copy.deepcopy(w_config["phases"][phase]) + phase_config = copy.deepcopy(w_config["phases"][phase_name]) # add phase name - phase_config["name"] = phase - # Make sure that batch_or_shell is set to batch if submit_to_batch is true - # Should not be set by user. TODO: Remove from documentation. + phase_config["name"] = phase_name + # make sure that batch_or_shell is set to batch if submit_to_batch is true + # should not be set by user. TODO: Remove from documentation if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" - # Check if run_on_queue is given if submit_to_sbatch is true + # check if run_on_queue is given if submit_to_sbatch is true if not phase_config.get("run_on_queue", False): - err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase}``." + err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase_name}``." esm_parser.user_error("ERROR", err_msg) else: phase_config["batch_or_shell"] = "shell" + # create a new user phase object for ``phase`` new_phase = WorkflowPhase(phase_config) + # append it to the list of user phases of the workflow user_workflow_phases.append(new_phase) - user_workflow_phases_names.append(phase) + user_workflow_phases_names.append(phase_name) + + # collect all user phases that are set to trigger the next run if phase_config.get("trigger_next_run", False): - user_workflow_next_run_triggered_by.append(phase) + user_workflow_next_run_triggered_by.append(phase_name) # check if more than one user phase has set trigger_next_run to true if len(user_workflow_next_run_triggered_by) > 1: err_msg = ( @@ -209,9 +230,10 @@ def collect_all_user_phases(self, config): ) esm_parser.user_error("ERROR", err_msg) elif user_workflow_next_run_triggered_by: - self.next_run_triggered_by = user_workflow_next_run_triggered_by[0] + self.set_workflow_attrib("next_run_triggered_by", user_workflow_next_run_triggered_by[0]) - self.user_phases = user_workflow_phases + # add user phases to workflow + self.set_workflow_attrib("user_phases", user_workflow_phases) # check if there are unknown phases, if yes, will give error exception unknown_phases = self.check_unknown_phases() @@ -227,7 +249,7 @@ def collect_all_user_phases(self, config): # if not, run_after will be set to last default phase for user_phase in self.user_phases: if not user_phase["run_before"] and not user_phase["run_after"]: - user_phase["run_after"] = self.phases[-1]["name"] + set_value(user_phase, "run_after", self.phases[-1]["name"]) err_msg = ( f"No value given for ``run_after`` or ``run_before`` " f"of user phase ``{user_phase['name']}``. " @@ -239,75 +261,87 @@ def collect_all_user_phases(self, config): return self def cluster_phases(self): - """Merge phases into clusters.""" + """ + Merge phases into clusters. + """ clusters = {} # create an empty phases list for each cluster - for cluster in self.get_phases_attribs_list("default", "cluster") + self.get_phases_attribs_list("user", "cluster"): - clusters[cluster] = {"phases": []} - # append all phases that are within the same cluster + for cluster_name in self.get_phases_values_list("default", "cluster") + self.get_phases_values_list("user", "cluster"): + clusters[cluster_name] = {"phases": []} + # collect all phases that are within the same cluster for phase in self.phases + self.user_phases: clusters[phase["cluster"]]["phases"].append(phase["name"]) - for cluster in clusters: + for cluster_name in clusters: nproc = nproc_sum = nproc_max = 0 # if only one phase in cluster - if len(clusters[cluster]["phases"]) == 1: - phase_name = clusters[cluster]["phases"][0] + if len(clusters[cluster_name]["phases"]) == 1: + phase_name = clusters[cluster_name]["phases"][0] phase = self.get_workflow_phase_by_name(phase_name) - clusters[cluster].update(phase) + clusters[cluster_name].update(phase) # if more than one phase are within the same cluster else: # fill in default phase keys for each cluster to cluster dictionary - clusters[cluster].update(WorkflowPhase({})) + clusters[cluster_name].update(WorkflowPhase({})) # create a list of all phases (dicts) that are within the same cluster phases_list = [] - for phase_name in clusters[cluster]["phases"]: + for phase_name in clusters[cluster_name]["phases"]: phases_list.append(self.get_workflow_phase_by_name(phase_name)) # check for inconsistencies of phase keywords within a cluster + # collect all values for keywords of WorkflowPhase in a dictionary 'keywords' keywords = {} for key in WorkflowPhase({}): keywords[key] = [] # append keyword of a phase only if not already in keywords[key] [keywords[key].append(item) for item in [phase[key] for phase in phases_list] if item not in keywords[key]] - # if there are no inconsistencies, all phases have the same values for keyword + # if there are no inconsistencies, all phases have the same values for a keyword 'key' if len(keywords[key]) == 1: - clusters[cluster][key] = keywords[key][0] - # if different phases have set different values for the same keyword/attrib + clusters[cluster_name][key] = keywords[key][0] + # if different phases have set different values for the same keyword else: - if type(clusters[cluster][key]) is list: - clusters[cluster][key] = keywords[key] + # if keyword is of type list, just add the list into the cluster + if type(clusters[cluster_name][key]) is list: + clusters[cluster_name][key] = keywords[key] + # otherwise select a single value for keyword else: - if key not in ["name", "script", "scriptdir", "order_in_cluster", "nproc", "trigger_next_run"]: + # TODO: Explain this exception handling more + if key not in ["name", "script", "script_dir", "order_in_cluster", "nproc", "trigger_next_run"]: err_msg = ( f"Mismatch for {key}") esm_parser.user_error("ERROR", err_msg) elif key == "name": - clusters[cluster]["name"] = cluster + # set keyword name to the name of the cluster + clusters[cluster_name]["name"] = cluster_name elif key == "trigger_next_run": # set key of cluster to True if key for any (at least one) of the phases is set to True - clusters[cluster][key] = any(keywords[key]) + clusters[cluster_name][key] = any(keywords[key]) +# elif key in ["script", "script_dir"]: +# for ind, phase_name in enumerate(clusters[cluster_name]["phases"]): +# phase = self.get_workflow_phase_by_name(phase_name) +# phase_dict = {phase["name"]: {"script": phase["script"], "script_dir": phase["script_dir"]}} +# clusters[cluster_name]["phases"][ind] = phase_dict else: # if key is set different for each phase in same cluster set to fill value (e.g. for script, scriptdir) - clusters[cluster][key] = "check phase" + clusters[cluster_name][key] = "check phase" # calculate nproc if cluster is to be submitted to sbatch system for phase in phases_list: nproc_sum += phase["nproc"] nproc_max = max(phase["nproc"], nproc_max) - if clusters[cluster].get("submit_to_batch_system", False): + if clusters[cluster_name].get("submit_to_batch_system", False): if phase["order_in_cluster"] == "concurrent": - if clusters[cluster]["order_in_cluster"] is None: - clusters[cluster]["order_in_cluster"] = "concurrent" + if clusters[cluster_name]["order_in_cluster"] is None: + clusters[cluster_name]["order_in_cluster"] = "concurrent" nproc = nproc_sum else: - clusters[cluster]["order_in_cluster"] = "sequential" + clusters[cluster_name]["order_in_cluster"] = "sequential" nproc = nproc_max - clusters[cluster]["nproc"] = nproc + clusters[cluster_name]["nproc"] = nproc # write clusters dictionary to workflow object attribute - self.clusters = clusters + self.set_workflow_attrib("clusters", clusters) return self def write_to_config(self, config): @@ -358,9 +392,9 @@ def check_user_workflow_dependency(self): independent : bool (default: False) """ independent = False - user_phases_names = self.get_phases_attribs_list('user', 'name') - run_after_list = self.get_phases_attribs_list('user', 'run_after') - run_before_list = self.get_phases_attribs_list('user', 'run_before') + user_phases_names = self.get_phases_values_list('user', 'name') + run_after_list = self.get_phases_values_list('user', 'run_after') + run_before_list = self.get_phases_values_list('user', 'run_before') # All user phases are independent from each other, if # none of the ``user_phases_names`` are found in the union of @@ -387,10 +421,10 @@ def check_unknown_phases(self): unknown_phases : set """ unknown_phases = [] - phases_names = self.get_phases_attribs_list('default', 'name') # list of names of all default phases - user_phases_names = self.get_phases_attribs_list('user', 'name') # list of name of all user phases - run_after = self.get_phases_attribs_list('user', 'run_after') # list of all run_after values for all user phases - run_before = self.get_phases_attribs_list('user', 'run_before') # list of all run_before values for all user phases + phases_names = self.get_phases_values_list('default', 'name') # list of names of all default phases + user_phases_names = self.get_phases_values_list('user', 'name') # list of name of all user phases + run_after = self.get_phases_values_list('user', 'run_after') # list of all run_after values for all user phases + run_before = self.get_phases_values_list('user', 'run_before') # list of all run_before values for all user phases # Filter out all elements that are None # ``filter(None, anylist)`` will filter out all items of anylist, # for which ``if item`` is false (e.g. [], "", None, {}, ''). @@ -418,7 +452,7 @@ def order_phases_and_clusters(self): self : Workflow object """ -# Correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``) +# correct workflow attributes (``last_task_in_queue``, ``first_task_in_queue``, ``next_run_triggered``) # next_run_triggered_by is always the last phase @@ -428,36 +462,36 @@ def order_phases_and_clusters(self): # get first default phase and correct run_after, called_from # correct last_task_in_queue of workflow - next_triggered = self.next_run_triggered_by - triggered_next_run_phase = self.get_workflow_phase_by_name(next_triggered) - if next_triggered not in self.get_phases_attribs_list("default", "name"): + old_next_triggered = self.next_run_triggered_by + triggered_next_run_phase = self.get_workflow_phase_by_name(old_next_triggered) + if old_next_triggered not in self.get_phases_values_list("default", "name"): first_task_name = self.first_task_in_queue first_phase = self.get_workflow_phase_by_name(first_task_name) old_last_task_name = self.last_task_in_queue old_last_phase = self.get_workflow_phase_by_name(old_last_task_name) - old_last_phase["next_submit"].remove(first_phase["name"]) - old_last_phase["next_submit"].append(next_triggered) - old_last_phase["run_before"] = next_triggered - old_last_phase["trigger_next_run"] = False - if triggered_next_run_phase["cluster"] not in self.clusters[old_last_phase["cluster"]]["next_submit"]: - self.clusters[old_last_phase["cluster"]]["next_submit"].append(triggered_next_run_phase["cluster"]) - self.clusters[old_last_phase["cluster"]]["run_before"] = triggered_next_run_phase["cluster"] - self.clusters[old_last_phase["cluster"]]["trigger_next_run"] = False - - first_phase["run_after"] = next_triggered - first_phase["called_from"] = next_triggered - self.clusters[first_phase["cluster"]]["run_after"] = triggered_next_run_phase["cluster"] - self.clusters[first_phase["cluster"]]["called_from"] = triggered_next_run_phase["cluster"] - - self.clusters[triggered_next_run_phase["cluster"]]["next_submit"].append(first_phase["cluster"]) + remove_value(old_last_phase, "next_submit", first_phase["name"]) + set_value(old_last_phase, "next_submit", old_next_triggered) + set_value(old_last_phase, "run_before", old_next_triggered) + set_value(old_last_phase, "trigger_next_run", False) + + set_value(self.clusters[old_last_phase["cluster"]], "next_submit", triggered_next_run_phase["cluster"], if_not_in=True) + set_value(self.clusters[old_last_phase["cluster"]], "run_before", triggered_next_run_phase["cluster"]) + set_value(self.clusters[old_last_phase["cluster"]], "trigger_next_run", False) + + set_value(first_phase, "run_after", old_next_triggered) + set_value(first_phase, "called_from" ,old_next_triggered) + set_value(self.clusters[first_phase["cluster"]], "run_after", triggered_next_run_phase["cluster"]) + set_value(self.clusters[first_phase["cluster"]], "called_from", triggered_next_run_phase["cluster"]) + + set_value(self.clusters[triggered_next_run_phase["cluster"]], "next_submit" , first_phase["cluster"]) self.clusters[triggered_next_run_phase["cluster"]]["run_before"] = first_phase["cluster"] self.clusters[triggered_next_run_phase["cluster"]]["run_after"] = old_last_phase["cluster"] - self.last_task_in_queue = next_triggered + self.set_workflow_attrib("last_task_in_queue", old_next_triggered) -# Intergrate new user phases by correcting next_submit, called_from, run_after, run_before +# intergrate new user phases by correcting next_submit, called_from, run_after, run_before # Set "next_submit" and "called_from" # "next_submit" which phase/cluster will be called next (run_after of the next phase) @@ -473,12 +507,12 @@ def order_phases_and_clusters(self): for phase2 in self.phases + self.user_phases: if phase2.get("run_after", None): if phase2["name"] not in next_submits_phases[phase2["run_after"]]: - next_submits_phases[phase2["run_after"]].append(phase2["name"]) - phase2["called_from"] = phase2["run_after"] + next_submits_phases[phase2["run_after"]].append(phase2["name"]) # use set_value ??? + set_value(phase2, "called_from",phase2["run_after"]) if self.clusters[phase2["cluster"]].get("run_after", None): if phase2["cluster"] not in next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]]: next_submits_clusters[self.clusters[phase2["cluster"]]["run_after"]].append(phase2["cluster"]) - self.clusters[phase2["cluster"]]["called_from"] = self.clusters[phase2["cluster"]]["run_after"] + set_value(self.clusters[phase2["cluster"]], "called_from", self.clusters[phase2["cluster"]]["run_after"]) else: # if only run_before is set, e.g. to add a phase at the beginning of a run if phase2.get("run_before", None): @@ -492,15 +526,16 @@ def order_phases_and_clusters(self): next_submits_phases[self.last_task_in_queue].append(phase2["name"]) next_submits_phases[self.last_task_in_queue].remove(self.first_task_in_queue) next_submits_clusters[last_phase["cluster"]].remove(old_first_phase["cluster"]) - phase2["run_after"] = self.last_task_in_queue - last_phase["run_before"] = phase2["name"] - self.clusters[last_phase["cluster"]]["run_before"] = phase2["name"] - self.clusters[old_first_phase["cluster"]]["run_after"] = phase2["name"] - self.clusters[old_first_phase["cluster"]]["called_from"] = phase2["name"] - self.clusters[phase2["cluster"]]["called_from"] = last_phase["cluster"] - self.clusters[phase2["cluster"]]["run_after"] = last_phase["cluster"] - last_phase["next_submit"].append(phase2["name"]) - self.first_task_in_queue = phase2["name"] + set_value(phase2, "run_after", self.last_task_in_queue) + set_value(last_phase, "run_before", phase2["name"]) + set_value(self.clusters[last_phase["cluster"]], "run_before", phase2["name"]) + set_value(self.clusters[old_first_phase["cluster"]], "run_after", phase2["name"]) + set_value(self.clusters[old_first_phase["cluster"]], "called_from", phase2["name"]) + set_value(self.clusters[phase2["cluster"]], "called_from",last_phase["cluster"]) + set_value(self.clusters[phase2["cluster"]], "run_after", last_phase["cluster"]) + set_value(last_phase, "next_submit", phase2["name"]) + + self.set_workflow_attrib("first_task_in_queue", phase2["name"]) for cluster in self.clusters: if next_submits_clusters[cluster]: @@ -508,7 +543,7 @@ def order_phases_and_clusters(self): for phase3 in self.phases + self.user_phases: if next_submits_phases[phase3["name"]]: - phase3.set_attrib("next_submit", next_submits_phases[phase3["name"]]) + phase3["next_submit"] = next_submits_phases[phase3["name"]] return self @@ -550,24 +585,21 @@ def prepend_newrun_job(self): new_first_phase = WorkflowPhase(config_new_first_phase) # reset last_task attributes - last_phase["next_submit"].append("newrun") - self.clusters[last_phase["cluster"]]["next_submit"] = ["newrun"] - self.clusters[last_phase["cluster"]]["run_before"] = "newrun" + set_value(last_phase, "next_submit", "newrun") + set_value(self.clusters[last_phase["cluster"]], "next_submit", "newrun", reset=True) + set_value(self.clusters[last_phase["cluster"]], "run_before", "newrun") self.clusters[new_first_phase["cluster"]] = new_first_phase - self.clusters[new_first_phase["cluster"]]["phases"] = ["newrun"] - last_phase["next_submit"].remove(first_phase["cluster"]) - # why does the next line not work??? - # last_phase.set_attrib("next_submit", "newrun") - # last_phase.remove_attrib("next_submit", first_phase["cluster"]) + set_value(self.clusters[new_first_phase["cluster"]], "phases", ["newrun"], new=True) + remove_value(last_phase, "next_submit", first_phase["cluster"]) # reset first_task attributes - first_phase.set_attrib("called_from", "newrun") - first_phase.set_attrib("run_after", "newrun") - self.clusters[first_phase["cluster"]]["called_from"] = "newrun" - self.clusters[first_phase["cluster"]]["run_after"] = "newrun" + set_value(first_phase, "called_from", "newrun") + set_value(first_phase, "run_after", "newrun") + set_value(self.clusters[first_phase["cluster"]], "called_from", "newrun") + set_value(self.clusters[first_phase["cluster"]], "run_after", "newrun") # reset workflow attributes - self.first_task_in_queue = "newrun" + self.set_workflow_attrib("first_task_in_queue", "newrun") # Set new phase to beginning of default phase list self.phases.insert(0, new_first_phase) @@ -668,17 +700,55 @@ def __init__(self, phase): if self.get("cluster", None) is None: self["cluster"] = self["name"] - def set_attrib(self, attrib, value): - if type(self[attrib]) == "list": - self[attrib].append(value) - else: - self[attrib] = value +def set_value(phase, keyword, value, if_not_in=False, reset=False, new=False): + """ + Set a value for a given keyword. - def remove_attrib(self, attrib, value): - if type(self[attrib]) == "list": - self[attrib].remove(value) + Parameters + ---------- + phase : dict or phase object + Phase or cluster + keyword : str + value : str or list + if_not_in : boolean (optional) + False (default) - if value should always be appended. + True - if value should only be appended if not already in value list. + reset : boolean (optional) + False (default) - if only append to value list. + True - if value list should be reset with new value list. + new : boolean (optional) + False (default) - for keywords that are already in phase. + True - if a new keyword should be created in phase and set to value. + """ + if not new: + if type(phase[keyword]) == list: + if if_not_in: + if value not in phase[keyword]: + phase[keyword].append(value) + elif reset: + phase[keyword] = [value] + else: + phase[keyword].append(value) else: - self[attrib] = None + phase[keyword] = value + else: + phase[keyword] = value + +def remove_value(phase, keyword, value): + """ + Remove value for keyword from phase. + + Parameters + ---------- + phase : dict or phase object + Phase or cluster + keyword : str + value : str + """ + if type(phase[keyword]) == list: + phase[keyword].remove(value) + else: + phase[keyword] = None def assemble_workflow(config): @@ -766,21 +836,23 @@ def init_default_workflow(config): return workflow -def get_phase_attrib(workflow_phases, phase_name, attrib): - if not type(workflow_phases) is list: - workflow_phases = [workflow_phases] - for phase in workflow_phases: - if phase["name"] == phase_name: - value = phase[attrib] - return value - def calc_number_of_tasks(config): """ Calculates the total number of needed tasks in phase compute TODO: make this phase method??? Or recipe entry??? + + Parameters + ---------- + config : dict + + Returns + ------- + tasks : int + Number of task for all models """ + tasks = 0 for model in config["general"]["valid_model_names"]: if "nproc" in config[model]: diff --git a/src/esm_tests/resources b/src/esm_tests/resources index 95190bc36..3c9d2c97a 160000 --- a/src/esm_tests/resources +++ b/src/esm_tests/resources @@ -1 +1 @@ -Subproject commit 95190bc364f8bc80e7af342514223ea58b29feb1 +Subproject commit 3c9d2c97a849a27c8a57abd0e35ca1cef09783b0 From 21fec587e347a6b56aa7b96ad76eb94b3260abad Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 8 Dec 2023 17:20:00 +0100 Subject: [PATCH 57/98] Small changes in some comments. --- src/esm_runscripts/workflow.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index a10df867d..ec5c5bf8c 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -773,21 +773,21 @@ def assemble_workflow(config): # TODO: Put it into other method??? workflow = workflow.set_default_nproc(config) - # 3. Read in phases from runscript and config files + # 4. Collect all user phases from runscript and config files workflow = workflow.collect_all_user_phases(config) - # 4. Cluster phases + # 5. Cluster phases workflow = workflow.cluster_phases() - # 4. Order user workflows into default workflow wrt. phase attributs. + # 6. Order user phases into default phases wrt. phase keywords workflow = workflow.order_phases_and_clusters() - # 5. create new first phase of type SimulationSetup, if first_task_in_queue is + # 7. create new first phase of type SimulationSetup, if first_task_in_queue is # a user phase (type batch or shell) workflow = workflow.prepend_newrun_job() - # 6. write the workflow to config - # 7. Remove old worklow from config + # 8. write the workflow to config + # 9. Remove old worklow from config config = workflow.write_to_config(config) # Set "jobtype" for the first task??? From 67d08f83d057056b6913d63035e5b09d2341bd25 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 12 Dec 2023 15:45:23 +0100 Subject: [PATCH 58/98] Remove calc of nproc in workflow.py. --- src/esm_runscripts/workflow.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index ec5c5bf8c..46e74d7a9 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,6 +1,7 @@ import copy import esm_parser +import pygraphviz as pgv import pdb @@ -765,29 +766,25 @@ def assemble_workflow(config): ------- config : dict """ - # 1. Generate default workflow object and - # 2. initialize default workflow phases from defaults.yaml + # - Generate default workflow object and + # - initialize default workflow phases from defaults.yaml workflow = init_default_workflow(config) - # 3. Calc mpi tasks for batch jobs of default phases - # TODO: Put it into other method??? - workflow = workflow.set_default_nproc(config) - - # 4. Collect all user phases from runscript and config files + # - Collect all user phases from runscript and config files workflow = workflow.collect_all_user_phases(config) - # 5. Cluster phases + # - Cluster phases workflow = workflow.cluster_phases() - # 6. Order user phases into default phases wrt. phase keywords + # - Order user phases into default phases wrt. phase keywords workflow = workflow.order_phases_and_clusters() - # 7. create new first phase of type SimulationSetup, if first_task_in_queue is - # a user phase (type batch or shell) + # - create new first phase of type SimulationSetup, if first_task_in_queue is + # a user phase (type batch or shell) workflow = workflow.prepend_newrun_job() - # 8. write the workflow to config - # 9. Remove old worklow from config + # - write the workflow to config + # - Remove old worklow from config config = workflow.write_to_config(config) # Set "jobtype" for the first task??? @@ -926,6 +923,7 @@ def display_workflow_sequence(config, display=True): esm_parser.user_note("Workflow sequence (cluster [phases])", f"{workflow_order}") else: workflow_order = workflow_order.replace("``", "") + return workflow_order From 582caa13908455e5ff3a31b31f48f1ad7cdd9274 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 12 Dec 2023 16:36:44 +0100 Subject: [PATCH 59/98] Remove keyword run_on_queue (not used) and give error by missing run_after/run_before. --- src/esm_runscripts/workflow.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 46e74d7a9..8b7714a84 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -206,9 +206,9 @@ def collect_all_user_phases(self, config): if phase_config.get("submit_to_batch_system", False): phase_config["batch_or_shell"] = "batch" # check if run_on_queue is given if submit_to_sbatch is true - if not phase_config.get("run_on_queue", False): - err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase_name}``." - esm_parser.user_error("ERROR", err_msg) +# if not phase_config.get("run_on_queue", False): +# err_msg = f"No value for target queue given by ``run_on_queue`` for phase ``{phase_name}``." +# esm_parser.user_error("ERROR", err_msg) else: phase_config["batch_or_shell"] = "shell" @@ -254,10 +254,9 @@ def collect_all_user_phases(self, config): err_msg = ( f"No value given for ``run_after`` or ``run_before`` " f"of user phase ``{user_phase['name']}``. " - f"Set it to last default phase in workflow: " - f"``{self.phases[-1]['name']}``." + f"Please set either run_after or run_before." ) - esm_parser.user_note("NOTE", err_msg) + esm_parser.user_error("NOTE", err_msg) return self @@ -669,7 +668,7 @@ def __init__(self, phase): self["run_after"] = None self["trigger_next_run"] = False # needed self["submit_to_batch_system"] = False # needed - self["run_on_queue"] = None +# self["run_on_queue"] = None self["cluster"] = None self["next_submit"] = [] # needed self["called_from"] = None # needed From 698a523596fff4209d1adc6ccec8fde898f7aacc Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 13 Dec 2023 16:30:09 +0100 Subject: [PATCH 60/98] Adapted log-file name and append mode. --- src/esm_runscripts/compute.py | 3 ++- src/esm_runscripts/helpers.py | 4 ++-- src/esm_runscripts/prepexp.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/esm_runscripts/compute.py b/src/esm_runscripts/compute.py index 596db3f6b..d3f829720 100644 --- a/src/esm_runscripts/compute.py +++ b/src/esm_runscripts/compute.py @@ -281,12 +281,13 @@ def initialize_experiment_logfile(config): # Write trace-log file now that we know where to do that if "trace_sink" in dir(logger): experiment_dir = config["general"]["experiment_dir"] + jobtype = config["general"]["jobtype"] expid = config["general"]["expid"] it_coupled_model_name = config["general"]["iterative_coupled_model"] datestamp = config["general"]["run_datestamp"] logfile_path = ( f"{experiment_dir}/log/" - f"{expid}_{it_coupled_model_name}esm_runscripts_{datestamp}.log" + f"{expid}_{it_coupled_model_name}esm_runscripts_{jobtype}_{datestamp}.log" ) logger.trace_sink.def_path(logfile_path) diff --git a/src/esm_runscripts/helpers.py b/src/esm_runscripts/helpers.py index 6c59005e0..f143b3ae1 100644 --- a/src/esm_runscripts/helpers.py +++ b/src/esm_runscripts/helpers.py @@ -103,7 +103,7 @@ def write_to_log(config, message, message_sep=None): ---- The user can control two things regarding the logfile format: - 1) The datestamp formatting, whjich is taken from the config + 1) The datestamp formatting, which is taken from the config section ``general.experiment_log_file_dateformat``. 2) The message separators; taken from ``general.experiment_log_file_message_sep``. Note that if the @@ -304,7 +304,7 @@ def def_path(self, path): Path of the logging file. """ self.path = path - self.write_log(self.log_record, "w") + self.write_log(self.log_record, "a") ################################################################################ diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 8ac0359cd..46904788e 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -246,6 +246,7 @@ def initialize_experiment_logfile(config): """ experiment_dir = config["general"]["experiment_dir"] + jobtype = config["general"]["jobtype"] expid = config["general"]["expid"] it_coupled_model = config["general"]["iterative_coupled_model"] datestamp = config["general"]["run_datestamp"] @@ -272,7 +273,7 @@ def initialize_experiment_logfile(config): if "trace_sink" in dir(logger): logfile_path = ( f"{experiment_dir}/log/" - f"{expid}_{it_coupled_model}esm_runscripts_{datestamp}.log" + f"{expid}_{it_coupled_model}esm_runscripts_{jobtype}_{datestamp}.log" ) logger.trace_sink.def_path(logfile_path) From bc6c0828f063ee2af0e691975af1203b63d57f20 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 13 Dec 2023 16:31:21 +0100 Subject: [PATCH 61/98] Removed run_on_queue from default phases. --- configs/esm_software/esm_runscripts/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index d8234199c..0bb88502d 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -37,7 +37,7 @@ workflow: order_in_cluster: sequential run_after: prepcompute run_before: tidy - run_on_queue: ${computer.partitions.pp.name} + #run_on_queue: ${computer.partitions.pp.name} submit_to_batch_system: True tidy: batch_or_shell: SimulationSetup From 05a03abc0543763bc5a38e3f8401aaf6d6493a70 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 13 Dec 2023 16:32:12 +0100 Subject: [PATCH 62/98] Removed run_on_queue. --- configs/setups/awicm3/awicm3.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml index 052cb67b4..17cd645f2 100644 --- a/configs/setups/awicm3/awicm3.yaml +++ b/configs/setups/awicm3/awicm3.yaml @@ -345,7 +345,7 @@ oifs: postprocessing: batch_or_shell: batch order_in_cluster: concurrent - run_on_queue: ${computer.partitions.pp.name} + #run_on_queue: ${computer.partitions.pp.name} run_after: tidy script_dir: ${general.esm_function_dir}/setups/awicm3 submit_to_batch_system: True From 8f75e6fe629ebb0e8d7becf54f88b4e1dbfe69e4 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 13 Dec 2023 16:33:31 +0100 Subject: [PATCH 63/98] Added START and END statement to logfile of recipe steps. --- src/esm_plugin_manager/esm_plugin_manager.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/esm_plugin_manager/esm_plugin_manager.py b/src/esm_plugin_manager/esm_plugin_manager.py index 3603115b3..698eb05e5 100644 --- a/src/esm_plugin_manager/esm_plugin_manager.py +++ b/src/esm_plugin_manager/esm_plugin_manager.py @@ -173,7 +173,7 @@ def work_through_recipe(recipe, plugins, config): if config["general"].get("verbose", False): # diagnostic message of which recipe step is being executed message = ( - f"::: Executing the step: {workitem} " + f"::: START Executing the step: {workitem} " f"(step [{index}/{len(recipes)}] of the job: " f'{recipe["job_type"]})' ) @@ -203,6 +203,18 @@ def work_through_recipe(recipe, plugins, config): thismodule = importlib.util.module_from_spec(spec) spec.loader.exec_module(thismodule) config = getattr(thismodule, workitem)(config) + if config["general"].get("verbose", False): + # diagnostic message of which recipe step is being executed + message = ( + f"::: END Executing the step: {workitem} " + f"(step [{index}/{len(recipes)}] of the job: " + f'{recipe["job_type"]})' + ) + + print() + print("=" * len(message)) + print(message) + print("=" * len(message)) return config From 4bfff6a471647f5b860a31f71e95d93ff602e9fd Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 13 Dec 2023 16:34:44 +0100 Subject: [PATCH 64/98] Added comments. --- src/esm_runscripts/resubmit.py | 21 ++++++++++++++++++++- src/esm_runscripts/sim_objects.py | 6 +++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py index e2ecbd0d8..717a0416a 100644 --- a/src/esm_runscripts/resubmit.py +++ b/src/esm_runscripts/resubmit.py @@ -248,6 +248,19 @@ def maybe_resubmit(config): """ jobtype = config["general"]["jobtype"] + # check if nextrun starts??? + # this resubmits any following jobtypes/phases until nextrun is true + # here nextrun is always set to true (if resubmit_recursively is finished) + + # cases: 1. it is the beginning of (next) run: + # - resubmit_recursively returns true but does not do anything except for returning true + # - check if end of simulation -> return + # - returns if iterative coupling, why ??? + # - if not end of simulation and not iterative_coupling -> calls itself again with nextrun_in=True which leads to case 2. + # 2. it is NOT the beginning if (next) run: + # it will start to loop over all remaining clusters to check if it can sumbit something (SimulationSetup, sbatch, shell) and do so, + # until first start of next run is reached. + # 3. nextrun is fals if no entries in next_submit for that particular jobtype/cluster nextrun = resubmit_recursively(config, jobtype=jobtype) if nextrun: # submit list contains stuff from next run @@ -296,22 +309,28 @@ def resubmit_recursively(config, jobtype=None, list_of_clusters=None, nextrun_in nextrun : Boolean """ nextrun = False + # get a list of clusters that follow the current jobtype if not list_of_clusters: list_of_clusters = config["general"]["workflow"]["subjob_clusters"][ jobtype ].get("next_submit", []) for cluster in list_of_clusters: + # if beginning of next run if ( cluster == config["general"]["workflow"]["first_task_in_queue"] and not nextrun_in - ): # if beginning of next run? + ): nextrun = True + # if not at the beginning of a run else: + # and cluster is not going to be skipped if not workflow.skip_cluster(cluster, config): submission_type = get_submission_type(cluster, config) if submission_type == "SimulationSetup": + # create the SimulationSetup object for the this/next jobtype resubmit_SimulationSetup(config, cluster) + # or submits to batch or shell if not check run elif submission_type in ["batch", "shell"]: resubmit_batch_or_shell(config, submission_type, cluster) else: diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index a9353d41d..b1c08fda9 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -15,6 +15,7 @@ import esm_parser +#import pdb class SimulationSetup(object): def __init__(self, command_line_config=None, user_config=None): @@ -51,7 +52,7 @@ def __call__(self, kill_after_submit=True): self.inspect() helpers.end_it_all(self.config) - # Run the preexp recipe + # Run the prepexp recipe self.config = prepexp.run_job(self.config) # self.pseudocall(kill_after_submit) @@ -83,12 +84,15 @@ def __call__(self, kill_after_submit=True): ].replace("observe_", "") # that last line is necessary so that maybe_resubmit knows which # cluster to look up in the workflow + # because all cluster with batch_or_shell=sbatch will be called + # esm_runscripts ... -t observe_ ... else: self.assembler() resubmit.maybe_resubmit(self.config) + # if this line is reached, the run is submitted and running or finished self.config = logfiles.finalize_logfiles(self.config, org_jobtype) if self.config["general"]["submitted"]: From 18fd44a719ce407f8cf6689dd6ea8a033e7184f6 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 14 Dec 2023 08:35:16 +0100 Subject: [PATCH 65/98] Added comments. --- src/esm_runscripts/sim_objects.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index b1c08fda9..8bf974b3f 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -52,7 +52,7 @@ def __call__(self, kill_after_submit=True): self.inspect() helpers.end_it_all(self.config) - # Run the prepexp recipe + # Run the prepexp recipe always before every jobtype/cluster self.config = prepexp.run_job(self.config) # self.pseudocall(kill_after_submit) @@ -60,6 +60,8 @@ def __call__(self, kill_after_submit=True): org_jobtype = str(self.config["general"]["jobtype"]) self.config = logfiles.initialize_logfiles(self.config, org_jobtype) + # if not check run??? + # set stdout and stderr to lofile if self.config["general"]["submitted"]: old_stdout = sys.stdout old_stderr = sys.stderr From cdd7c2018a21c3767e605ee804d1b6f9f93ad9c3 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 14 Dec 2023 14:52:04 +0100 Subject: [PATCH 66/98] Change comment --- src/esm_runscripts/batch_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esm_runscripts/batch_system.py b/src/esm_runscripts/batch_system.py index 4f8461168..82cbccb53 100644 --- a/src/esm_runscripts/batch_system.py +++ b/src/esm_runscripts/batch_system.py @@ -518,7 +518,7 @@ def write_simple_runscript(config, cluster, batch_or_shell="batch"): # dummy = 0 else: # "normal" case dummy = 0 -# was macht das hier? wo/wie wird submits_abother_job definiert? + # check if this cluster has has something to submit (next_submit not empty) if submits_another_job(config, cluster): # and batch_or_shell == "batch": # -j ? is that used somewhere? I don't think so, replaced by workflow # " -j "+ config["general"]["jobtype"] From 638a1eac3dc346b80d20dd2336611b923b0e66a9 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 14 Dec 2023 14:52:44 +0100 Subject: [PATCH 67/98] Adde jobid to logfiles. --- src/esm_runscripts/logfiles.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/esm_runscripts/logfiles.py b/src/esm_runscripts/logfiles.py index 102c0d233..16a635491 100644 --- a/src/esm_runscripts/logfiles.py +++ b/src/esm_runscripts/logfiles.py @@ -62,10 +62,16 @@ def finalize_logfiles(config, org_jobtype): def set_logfile_name(config, jobtype=None): + jobid = str(config["general"].get("jobid", None)) if not jobtype: jobtype = config["general"]["jobtype"] + if jobid: + jobid = f"_{jobid}" + else: + jobid = "_nojobid" + filejobtype = jobtype # if "observe" in filejobtype: # filejobtype = filejobtype.replace("observe_", "") @@ -92,6 +98,7 @@ def set_logfile_name(config, jobtype=None): + filejobtype + "_" + config["general"]["run_datestamp"] + + jobid + ".log" ) From ff0724ca270197570219ee3050027e7d68f96cd5 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 15 Dec 2023 10:56:07 +0100 Subject: [PATCH 68/98] Comment out unused functions. --- src/esm_runscripts/workflow.py | 104 ++++++++++++++++----------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 8b7714a84..7159995c3 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -83,29 +83,29 @@ def get_phases_values_list(self, phase_type, keyword): return phases_values - def set_default_nproc(self, config): - """ - Calculating the number of mpi tasks for default phases and each component/model/script - - Parameters - ---------- - config : dict - - Returns - ------- - self : Workflow object - """ - - # Get the sum of all mpi tasks - tasks = calc_number_of_tasks(config) - - # Write this number of tasks to phase, if - # phase will be submitted to batch system - for ind, phase in enumerate(self.phases): - if phase["submit_to_batch_system"]: - set_value(phase, "nproc", tasks) - - return self +# def set_default_nproc(self, config): +# """ +# Calculating the number of mpi tasks for default phases and each component/model/script +# +# Parameters +# ---------- +# config : dict +# +# Returns +# ------- +# self : Workflow object +# """ +# +# # Get the sum of all mpi tasks +# tasks = calc_number_of_tasks(config) +# +# # Write this number of tasks to phase, if +# # phase will be submitted to batch system +# for ind, phase in enumerate(self.phases): +# if phase["submit_to_batch_system"]: +# set_value(phase, "nproc", tasks) +# +# return self def set_workflow_attrib(self, attrib, value): """ @@ -833,35 +833,35 @@ def init_default_workflow(config): return workflow -def calc_number_of_tasks(config): - """ - Calculates the total number of needed tasks - in phase compute - TODO: make this phase method??? Or recipe entry??? - - Parameters - ---------- - config : dict - - Returns - ------- - tasks : int - Number of task for all models - """ - - tasks = 0 - for model in config["general"]["valid_model_names"]: - if "nproc" in config[model]: - tasks += config[model]["nproc"] - elif "nproca" in config[model] and "nprocb" in config[model]: - tasks += config[model]["nproca"] * config[model]["nprocb"] - if "nprocar" in config[model] and "nprocbr" in config[model]: - if ( - config[model]["nprocar"] != "remove_from_namelist" - and config[model]["nprocbr"] != "remove_from_namelist" - ): - tasks += config[model]["nprocar"] * config[model]["nprocbr"] - return tasks +#def calc_number_of_tasks(config): +# """ +# Calculates the total number of needed tasks +# in phase compute +# TODO: make this phase method??? Or recipe entry??? +# +# Parameters +# ---------- +# config : dict +# +# Returns +# ------- +# tasks : int +# Number of task for all models +# """ +# +# tasks = 0 +# for model in config["general"]["valid_model_names"]: +# if "nproc" in config[model]: +# tasks += config[model]["nproc"] +# elif "nproca" in config[model] and "nprocb" in config[model]: +# tasks += config[model]["nproca"] * config[model]["nprocb"] +# if "nprocar" in config[model] and "nprocbr" in config[model]: +# if ( +# config[model]["nprocar"] != "remove_from_namelist" +# and config[model]["nprocbr"] != "remove_from_namelist" +# ): +# tasks += config[model]["nprocar"] * config[model]["nprocbr"] +# return tasks def display_workflow(config): From e285372c6d6bbd0659d57de8e8d57b5f571d03e2 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 15 Jan 2024 12:26:43 +0100 Subject: [PATCH 69/98] Added -s argument to read current_date; some code syntax optimisations. --- src/esm_runscripts/prepare.py | 72 ++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/src/esm_runscripts/prepare.py b/src/esm_runscripts/prepare.py index 2b62bf4fa..59870d8c4 100644 --- a/src/esm_runscripts/prepare.py +++ b/src/esm_runscripts/prepare.py @@ -68,10 +68,53 @@ def _read_date_file(config): date = config["general"].get("initial_date", "18500101") run_number = 1 write_file = True + + date_c = config["general"].get("current_date", None) + + if date_c is not None: + date_fdf = Date(date) + date_c = Date(str(config["general"]["current_date"])) + run_number_c = int(config["general"]["run_number"]) + last_jobtype = config["general"].get("last_jobtype", "") + isresubmitted = last_jobtype == config["general"]["jobtype"] + + if date_fdf != date_c: + + msg = ( + f"``Date`` and ``run_number`` are ``not`` taken from date file, " + f"but from command_line argument (provided by -s or --start_date). " + f"The given start_date ({date_c}) and run_number ({run_number_c}) " + f"are different from the values " + f"in the current date file of your experiment ({date}, {run_number}). " + f"Your experiment may now be in a non consecutive state. " + f"Please confirm if you want to continue:" + ) + esm_parser.user_note("Detached experiment:", msg) + proceed = "" + if isresubmitted: + proceed = questionary.select( + "Do you want to continue?", + choices=[ + f"Yes, with date from command line argument: {str(config['general']['current_date'])}", + f"Yes, with date from date file: {date}", + "No, cancel." + ]).ask() + + if 'Yes, with date from command line argument' in proceed: + date = str(date_c) + run_number = run_number_c + elif 'Yes, with date from date file' in proceed: + date = date + run_number = run_number + else: + esm_parser.user_note("The experiment will be cancelled:", f"You cancelled the experiment due to date discrepancies.") + sys.exit(1) + config["general"]["run_number"] = run_number config["general"]["current_date"] = date logging.info("current_date = %s", date) logging.info("run_number = %s", run_number) + return config @@ -274,7 +317,7 @@ def _initialize_calendar(config): if config["general"]["reset_calendar_to_last"]: config = find_last_prepared_run(config) config = set_most_dates(config) - if not "iterative_coupling" in config["general"]: + if "iterative_coupling" not in config["general"]: config["general"]["chunk_number"] = 1 if config["general"]["run_number"] == 1: @@ -346,7 +389,7 @@ def set_leapyear(config): config["general"]["leapyear"] = config[model]["leapyear"] break - if not "leapyear" in config["general"]: + if "leapyear" not in config["general"]: for model in config["general"]["valid_model_names"]: config[model]["leapyear"] = True config["general"]["leapyear"] = True @@ -634,39 +677,39 @@ def set_parent_info(config): # Make sure "ini_parent_dir" and "ini_restart_dir" both work: for model in config["general"]["valid_model_names"]: # If only ini_restart_* variables are used in runcscript, set ini_parent_* to the same values - if not "ini_parent_dir" in config[model]: + if "ini_parent_dir" not in config[model]: if "ini_restart_dir" in config[model]: config[model]["ini_parent_dir"] = config[model]["ini_restart_dir"] - if not "ini_parent_exp_id" in config[model]: + if "ini_parent_exp_id" not in config[model]: if "ini_restart_exp_id" in config[model]: config[model]["ini_parent_exp_id"] = config[model]["ini_restart_exp_id"] - if not "ini_parent_date" in config[model]: + if "ini_parent_date" not in config[model]: if "ini_restart_date" in config[model]: config[model]["ini_parent_date"] = config[model]["ini_restart_date"] # check if parent is defined in esm_tools style # (only given for setup) setup = config["general"]["setup_name"] - if not setup in config: + if setup not in config: setup = "general" if "ini_parent_exp_id" in config[setup]: for model in config["general"]["valid_model_names"]: - if not "ini_parent_exp_id" in config[model]: + if "ini_parent_exp_id" not in config[model]: config[model]["ini_parent_exp_id"] = config[setup]["ini_parent_exp_id"] if "ini_parent_date" in config[setup]: for model in config["general"]["valid_model_names"]: - if not "ini_parent_date" in config[model]: + if "ini_parent_date" not in config[model]: config[model]["ini_parent_date"] = config[setup]["ini_parent_date"] if "ini_parent_dir" in config[setup]: for model in config["general"]["valid_model_names"]: - if not "ini_parent_dir" in config[model]: + if "ini_parent_dir" not in config[model]: config[model]["ini_parent_dir"] = ( config[setup]["ini_parent_dir"] + "/" + model ) # Get correct parent info for model in config["general"]["valid_model_names"]: - if config[model]["lresume"] == True and config["general"]["run_number"] == 1: + if config[model]["lresume"] is True and config["general"]["run_number"] == 1: config[model]["parent_expid"] = config[model]["ini_parent_exp_id"] if "parent_date" not in config[model]: config[model]["parent_date"] = config[model]["ini_parent_date"] @@ -726,6 +769,7 @@ def add_vcs_info(config): yaml.dump(vcs_versions, f) return config + def check_vcs_info_against_last_run(config): """ Ensures that the version control info for two runs is identical between the @@ -777,7 +821,6 @@ def check_vcs_info_against_last_run(config): If you are **sure** that this is OK, you can set 'general.allow_vcs_differences' to True to avoid this check. """) - return config @@ -811,7 +854,7 @@ def initialize_batch_system(config): def initialize_coupler(config): - if config["general"]["standalone"] == False: + if config["general"]["standalone"] is False: from . import coupler base_dir = config["general"]["base_dir"] @@ -882,6 +925,7 @@ def check_config_for_warnings_errors(config): return config + def warn_error(config, trigger, note_function): """ Checks the ``sections`` of the ``config`` for a given ``trigger`` (``"error"`` or @@ -940,7 +984,7 @@ def warn_error(config, trigger, note_function): Method to report the note """ # Sufixes for the warning special case - if trigger=="warning": + if trigger == "warning": sufix_name = f" WARNING" else: sufix_name = f"" @@ -967,7 +1011,7 @@ def warn_error(config, trigger, note_function): # needs to halt, and the user has not defined the # ``--ignore-config-warnings`` flag in the ``esm_runscripts`` call if ( - trigger=="warning" + trigger == "warning" and config["general"].get("isinteractive") and action_info.get("ask_user_to_continue", False) and not config["general"].get("ignore_config_warnings", False) From 3bbaaae7264e3a9f4a8974565d864bfecf396e48 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 15 Jan 2024 12:27:28 +0100 Subject: [PATCH 70/98] Test, to remove maybe_resumbit. --- src/esm_runscripts/sim_objects.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index 8bf974b3f..f96ae1326 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -15,7 +15,7 @@ import esm_parser -#import pdb +import pdb class SimulationSetup(object): def __init__(self, command_line_config=None, user_config=None): @@ -46,6 +46,7 @@ def __init__(self, command_line_config=None, user_config=None): # sys.exit(0) def __call__(self, kill_after_submit=True): + breakpoint() # Trigger inspect functionalities if self.config["general"]["jobtype"] == "inspect": # esm_parser.pprint_config(self.config) @@ -86,13 +87,16 @@ def __call__(self, kill_after_submit=True): ].replace("observe_", "") # that last line is necessary so that maybe_resubmit knows which # cluster to look up in the workflow - # because all cluster with batch_or_shell=sbatch will be called + # because all cluster with batch_or_shell=sbatch will be called # esm_runscripts ... -t observe_ ... else: + # write .run file for all workflow phases. + + # Is this dunction call needed here? self.assembler() - resubmit.maybe_resubmit(self.config) + #resubmit.maybe_resubmit(self.config) # if this line is reached, the run is submitted and running or finished self.config = logfiles.finalize_logfiles(self.config, org_jobtype) From 09ac11b8ae97a3b6d03073f8dac088cc22e73051 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 23 Jan 2024 15:27:21 +0100 Subject: [PATCH 71/98] Test to remove jobtype setting in workflow.py --- src/esm_runscripts/resubmit.py | 1 + src/esm_runscripts/sim_objects.py | 1 - src/esm_runscripts/workflow.py | 16 ++++++++-------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py index 717a0416a..5046372c1 100644 --- a/src/esm_runscripts/resubmit.py +++ b/src/esm_runscripts/resubmit.py @@ -5,6 +5,7 @@ from . import chunky_parts from . import workflow +import pdb def submit(config): """ diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index f96ae1326..cc31591ef 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -46,7 +46,6 @@ def __init__(self, command_line_config=None, user_config=None): # sys.exit(0) def __call__(self, kill_after_submit=True): - breakpoint() # Trigger inspect functionalities if self.config["general"]["jobtype"] == "inspect": # esm_parser.pprint_config(self.config) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 7159995c3..9cb31e190 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,7 +1,7 @@ import copy import esm_parser -import pygraphviz as pgv +#import pygraphviz as pgv import pdb @@ -789,13 +789,13 @@ def assemble_workflow(config): # Set "jobtype" for the first task??? # NOTE: This is either first default phase or # newrun??? Can't this not be set in prepend_newrun then? - if config["general"]["jobtype"] == "unknown": - config["general"]["command_line_config"]["jobtype"] = config["general"][ - "workflow" - ]["first_task_in_queue"] - config["general"]["jobtype"] = config["general"]["workflow"][ - "first_task_in_queue" - ] +# if config["general"]["jobtype"] == "unknown": +# config["general"]["command_line_config"]["jobtype"] = config["general"][ +# "workflow" +# ]["first_task_in_queue"] +# config["general"]["jobtype"] = config["general"]["workflow"][ +# "first_task_in_queue" +# ] return config From ad156750b924f13b270181bf868aaa1c07a16b3c Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Thu, 1 Feb 2024 15:19:10 +0100 Subject: [PATCH 72/98] comment and clean esm_software/esm_runscripts config files --- .../esm_software/esm_runscripts/defaults.yaml | 2 ++ .../esm_runscripts/esm_plugins.yaml | 33 ++++++++----------- .../esm_runscripts/esm_runscripts.yaml | 30 +++++------------ 3 files changed, 24 insertions(+), 41 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 43b918aa7..594cc20ec 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -1,4 +1,6 @@ +# ESM-Runscripts defaults +# Defaults to be added to each model or component per_model_defaults: file_movements: default: diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml index e80644882..785a43f7d 100644 --- a/configs/esm_software/esm_runscripts/esm_plugins.yaml +++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml @@ -1,3 +1,14 @@ +# Mappings of functions/methods to parent ESM python libraries: +# tells ESM-Tools in which library and file/sublibrary can find the functions +# of the recipies +# (prescribed in configs/esm_software/esm_runscripts/esm_runscripts.yaml). +# +# Core (not an external plugin) +# Library Sublibrary/file Function/method +# esm_runscripts +# prepare +# - "_read_date_file" + core: esm_runscripts: prepare: @@ -24,7 +35,7 @@ core: - "initialize_experiment_logfile" - "copy_tools_to_thisrun" - "_copy_preliminary_files_from_experiment_to_thisrun" - + prepcompute: @@ -53,9 +64,8 @@ core: - "throw_away_some_infiles" - observe: + observe: - "init_monitor_file" - #- "get_last_jobid" - "wait_and_observe" - "wake_up_call" @@ -82,7 +92,6 @@ core: - "database_entry" batch_system: - #- "calculate_requirements" - "write_simple_runscript" - "write_env" - "find_openmp" @@ -101,19 +110,3 @@ core: workflow: - "assemble_workflow" - - -# To add your custom plugin, see oifs.yaml, section compute_recipe, and compare to the list above. -# the custom plugin 'preprocess' listed in the compute_recipe in oifs.yaml needs to be installed beforehand with -# -# git clone https://github.com/esm-tools-plugins/preprocess -# cd preprocess -# if required activate python3 e.g. -# module load anaconda3/bleeding_edge on -# pip install --user -# -# now check if the plugin is available -# esm_plugins should print -# The following plugins are installed and available: -# - preprocess -# diff --git a/configs/esm_software/esm_runscripts/esm_runscripts.yaml b/configs/esm_software/esm_runscripts/esm_runscripts.yaml index 7d4b022be..7973ba4f8 100644 --- a/configs/esm_software/esm_runscripts/esm_runscripts.yaml +++ b/configs/esm_software/esm_runscripts/esm_runscripts.yaml @@ -1,3 +1,8 @@ +# Default recipies +# ESM-Tools uses config/esm_software/esm_runscripts/esm_plugins.yaml to understand +# where to look for each of this function/methods (the steps within each recipy, e.g. +# "_read_date_file"). + choose_job_type: #postprocess: @@ -7,21 +12,17 @@ choose_job_type: prepare: recipe: - "_read_date_file" - #- "setup_correct_chunk_config" - "_update_run_in_chunk" - "check_model_lresume" - -# kh 01.12.20 this must be done before resolve_some_choose_blocks, because potentially also choosable -# entries can be overridden via usermods.yaml (e.g. computer.useMPI: intel18_bullxmpi) + # This must be done before resolve_some_choose_blocks, because + # potentially also choosable entries can be overridden via + # usermods.yaml (e.g. computer.useMPI: intel18_bullxmpi) - "apply_last_minute_changes" - "find_openmp" - "resolve_some_choose_blocks" - "_initialize_calendar" - "set_chunk_calendar" - "resolve_some_choose_blocks" - -# kh 01.12.20 moved up a few positions -# - "apply_last_minute_changes" - "_add_all_folders" - "set_prev_date" - "set_parent_info" @@ -48,15 +49,12 @@ choose_job_type: - "copy_tools_to_thisrun" - "_copy_preliminary_files_from_experiment_to_thisrun" - observe: recipe: - "init_monitor_file" - #- "get_last_jobid" - "wait_and_observe" - "wake_up_call" - tidy: recipe: - "tidy_coupler" @@ -66,14 +64,7 @@ choose_job_type: - "throw_away_some_infiles" - "copy_stuff_back_from_work" - "copy_all_results_to_exp" - #- "_update_chunk_date_file" - "clean_run_dir" - #- "start_post_job" - #- "signal_tidy_completion" - #- "_increment_date_and_run_number" - #- "_write_date_file" - #- "maybe_resubmit" - prepcompute: recipe: @@ -82,21 +73,17 @@ choose_job_type: - "create_new_files" - "create_empty_folders" - "prepare_coupler_files" - #- "calculate_requirements" - #- "add_batch_hostfile" - "assemble" - "log_used_files" - "copy_files_to_thisrun" - "modify_namelists" - "modify_files" - "copy_files_to_work" - #- "write_simple_runscript" - "report_missing_files" #- "add_vcs_info" #- "check_vcs_info_against_last_run" - "_write_finalized_config" - "database_entry" - #- "submit" inspect: recipe: @@ -108,3 +95,4 @@ choose_job_type: - "inspect_config" - "inspect_folder" - "inspect_file" + From cb6e666106c0b0327e19b3cbb8655d2531b37c22 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Thu, 1 Feb 2024 19:02:54 +0100 Subject: [PATCH 73/98] paetially refactoring of SimulationSetup.__init__ and some of the function of config_initialization.py --- src/esm_runscripts/cli.py | 2 +- src/esm_runscripts/config_initialization.py | 96 +++++++++++++++------ src/esm_runscripts/prepare.py | 2 +- src/esm_runscripts/sim_objects.py | 12 ++- 4 files changed, 84 insertions(+), 28 deletions(-) diff --git a/src/esm_runscripts/cli.py b/src/esm_runscripts/cli.py index 4498c92f6..081bba182 100644 --- a/src/esm_runscripts/cli.py +++ b/src/esm_runscripts/cli.py @@ -278,7 +278,7 @@ def main(): logger.debug(f"starting (jobtype): {jobtype}") logger.debug(command_line_config) - Setup = SimulationSetup(command_line_config) + Setup = SimulationSetup(command_line_config=command_line_config) # if not Setup.config['general']['submitted']: if not Setup.config["general"]["submitted"] and not no_motd: check_all_esm_packages() diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py index d037fbd1f..7d02b4580 100644 --- a/src/esm_runscripts/config_initialization.py +++ b/src/esm_runscripts/config_initialization.py @@ -8,10 +8,7 @@ from . import chunky_parts -def init_first_user_config(command_line_config, user_config): - - if not user_config: - user_config = get_user_config_from_command_line(command_line_config) +def init_iterative_coupling(command_line_config, user_config): # maybe switch to another runscript, if iterative coupling user_config["general"]["iterative_coupled_model"] = "" @@ -84,13 +81,40 @@ def save_command_line_config(config, command_line_config): def get_user_config_from_command_line(command_line_config): + """ + Reads the runscript provided in ``command_line_config`` and overwirtes the + information of the runscript with that of the command line (command line wins + over the runscript. + + Input + ----- + command_line_config : dict + Dictionary containing the information coming from the command line + + Returns + ------- + user_config : dict, DictWithProvenance + Dictionary containing the information from the command line on top of the + runscript's + + Raises + ------ + Syntaxerror : esm_parser.user_error + If there is a problem with the parsing of the runscript + """ + + # Default user_config + user_config = { + "general": { + "additional_files": [], + }, + } + + # Read the content of the runscrip try: - # use the full absolute path instead of CWD - user_config = esm_parser.initialize_from_yaml( - command_line_config["runscript_abspath"] + user_config.update( + esm_parser.initialize_from_yaml(command_line_config["runscript_abspath"]) ) - if "additional_files" not in user_config["general"]: - user_config["general"]["additional_files"] = [] # If sys.exit is triggered through esm_parser.user_error (i.e. from # ``check_for_empty_components`` in ``yaml_to_dict.py``) catch the sys.exit. except SystemExit as sysexit: @@ -101,23 +125,45 @@ def get_user_config_from_command_line(command_line_config): f"An error occurred while reading the config file " f"``{command_line_config['runscript_abspath']}`` from the command line.") - # NOTE(PG): I really really don't like this. But I also don't want to - # re-introduce black/white lists - # - # User config wins over command line: - # ----------------------------------- - # Update all **except** for use_venv if it was supplied in the - # runscript: - deupdate_use_venv = False - if "use_venv" in user_config["general"]: - user_use_venv = user_config["general"]["use_venv"] - deupdate_use_venv = True user_config["general"].update(command_line_config) - if deupdate_use_venv: - user_config["general"]["use_venv"] = user_use_venv - user_config["general"]["isinteractive"] = command_line_config.get( - "last_jobtype", "" - )=="command_line" + + return user_config + + +def init_interactive_info(command_line_config, user_config): + """ + Initialize key-values to evaluate at any point whether interactive functions are to + be run (e.g. questionaries, warnings, etc.). The following key-values are set within + ``user_config["general"]``: + - ``isinteractive``: ``True`` if this function is trigger by a command line + execution + - ``isresubmitted``: ``True`` if the ``last_jobtype`` is the same as the current + ``jobtype`` (after the user triggers ``esm_runscripts`` there is a first + step of preparing the experiment folder and then it resubmit it itself from + the experiment folder; most questionaries need to be run in this second step + ``isresubmitted`` because only then the updated information via the + questionaries plays a role in the simulation). + + Input + ----- + command_line_config : dict + Dictionary containing the information coming from the command line + user_config : dict + Dictionary containing the information from the command line on top of the + runscript's + + Returns + ------- + user_config : Dict + Same as the input ``user_config`` but with the interactive variables + """ + last_jobtype = command_line_config.get("last_jobtype", "") + isinteractive = last_jobtype == "command_line" + isresubmitted = last_jobtype == user_config["general"]["jobtype"] + + user_config["general"]["isinteractive"] = isinteractive + user_config["general"]["isresubmitted"] = isresubmitted + return user_config diff --git a/src/esm_runscripts/prepare.py b/src/esm_runscripts/prepare.py index ad5e13aaf..c8007421c 100644 --- a/src/esm_runscripts/prepare.py +++ b/src/esm_runscripts/prepare.py @@ -869,7 +869,7 @@ def check_config_for_warnings_errors(config): # Find conditions to warn (avoid warning more than once) last_jobtype = config["general"].get("last_jobtype", "") - isresubmitted = last_jobtype == config["general"]["jobtype"] + isresubmitted = config["general"].get("isresubmitted", "") isinteractive = config["general"].get("isinteractive", "") # Only warn if it is an interactive session or while submitted diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index a9353d41d..fa2449708 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -24,7 +24,17 @@ def __init__(self, command_line_config=None, user_config=None): "SimulationSetup needs to be initialized with either command_line_config or user_config." ) - user_config = config_initialization.init_first_user_config( + # Initialize user_config using the command line arguments and the given runscript + if not user_config: + user_config = config_initialization.get_user_config_from_command_line( + command_line_config + ) + + # Initialize information about interactive sessions + user_config = config_initialization.init_interactive_info(command_line_config, user_config) + + # Initialize iterative coupling information + user_config = config_initialization.init_iterative_coupling( command_line_config, user_config ) From b509de70fd145d5a0488f41053d7a3cf2bbbc411 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Thu, 1 Feb 2024 20:34:02 +0100 Subject: [PATCH 74/98] restructure SimulationSetup.__init__: 1) the functions there do not call other functions (i.e. by looking at SimulationSetup.__init__ it is now clear what are the steps 2) redefine each steps in config_initialization.py so that each step has a single and well defined used and 3) collect all defaults variables defined in the functions from config_initialization.py and included them into a new general section in the configs/esm_software/esm_runscripts/defaults.yaml --- .../esm_software/esm_runscripts/defaults.yaml | 7 ++++ src/esm_runscripts/config_initialization.py | 40 ++++++------------- src/esm_runscripts/sim_objects.py | 23 +++++++++-- 3 files changed, 40 insertions(+), 30 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 594cc20ec..0a23ca5fa 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -1,5 +1,12 @@ # ESM-Runscripts defaults +# Defaults added to the general section +general: + additional_files: [] + iterative_coupled_model: "" + reset_calendar_to_last: False + verbose: False + # Defaults to be added to each model or component per_model_defaults: file_movements: diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py index 7d02b4580..23219c355 100644 --- a/src/esm_runscripts/config_initialization.py +++ b/src/esm_runscripts/config_initialization.py @@ -11,7 +11,6 @@ def init_iterative_coupling(command_line_config, user_config): # maybe switch to another runscript, if iterative coupling - user_config["general"]["iterative_coupled_model"] = "" if user_config["general"].get("iterative_coupling", False): user_config = chunky_parts.setup_correct_chunk_config(user_config) @@ -50,23 +49,19 @@ def init_iterative_coupling(command_line_config, user_config): return user_config -def complete_config_from_user_config(user_config): - config = get_total_config_from_user_config(user_config) +def complete_config_with_inspect(config): - if "verbose" not in config["general"]: - config["general"]["verbose"] = False + general = config["general"] - config["general"]["reset_calendar_to_last"] = False + if general.get("inspect"): + general["jobtype"] = "inspect" - if config["general"].get("inspect"): - config["general"]["jobtype"] = "inspect" - - if config["general"].get("inspect") not in [ + if general.get("inspect") not in [ "workflow", "overview", "config", ]: - config["general"]["reset_calendar_to_last"] = True + general["reset_calendar_to_last"] = True return config @@ -103,17 +98,10 @@ def get_user_config_from_command_line(command_line_config): If there is a problem with the parsing of the runscript """ - # Default user_config - user_config = { - "general": { - "additional_files": [], - }, - } - # Read the content of the runscrip try: - user_config.update( - esm_parser.initialize_from_yaml(command_line_config["runscript_abspath"]) + user_config = esm_parser.initialize_from_yaml( + command_line_config["runscript_abspath"] ) # If sys.exit is triggered through esm_parser.user_error (i.e. from # ``check_for_empty_components`` in ``yaml_to_dict.py``) catch the sys.exit. @@ -185,13 +173,16 @@ def get_total_config_from_user_config(user_config): user_config, ) - config = add_esm_runscripts_defaults_to_config(config) - config["computer"]["jobtype"] = config["general"]["jobtype"] config["general"]["experiment_dir"] = ( config["general"]["base_dir"] + "/" + config["general"]["expid"] ) + return config + + +def check_account(config): + # Check if the 'account' variable is needed and missing if config["computer"].get("accounting", False): if "account" not in config["general"]: @@ -211,12 +202,7 @@ def add_esm_runscripts_defaults_to_config(config): path_to_file = esm_tools.get_config_filepath() + "/esm_software/esm_runscripts/defaults.yaml" default_config = esm_parser.yaml_file_to_dict(path_to_file) config["general"]["defaults.yaml"] = default_config - config = distribute_per_model_defaults(config) - return config - -def distribute_per_model_defaults(config): - default_config = config["general"]["defaults.yaml"] if "general" in default_config: config["general"] = esm_parser.new_deep_update( config["general"], default_config["general"] diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index fa2449708..cc5359f8b 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -38,17 +38,34 @@ def __init__(self, command_line_config=None, user_config=None): command_line_config, user_config ) - self.config = config_initialization.complete_config_from_user_config( + # Load total config from all the configuration files involved in this simulation + self.config = config_initialization.get_total_config_from_user_config( user_config ) + # Complete missing key-values with the defaults defined in + # ``configs/esm_software/esm_runscripts/defaults.yaml`` + self.config = config_initialization.add_esm_runscripts_defaults_to_config( + self.config + ) + + # Check if the ``account`` is missing in ``general`` + self.config = config_initialization.check_account(self.config) + + # Complete information for inspect + self.config = config_initialization.complete_config_with_inspect( + self.config + ) + + # Save the ``command_line_config`` in ``general`` self.config = config_initialization.save_command_line_config( self.config, command_line_config ) - # self.config = workflow.assemble(self.config) - + # Initialize the ``prev_run`` object self.config["prev_run"] = prev_run.PrevRunInfo(self.config) + + # Run ``prepare`` recipe self.config = prepare.run_job(self.config) # esm_parser.pprint_config(self.config) From dfff3ebba51c4f9f1cdb8741bc24428557f3920e Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Thu, 1 Feb 2024 21:47:28 +0100 Subject: [PATCH 75/98] fix a bug in init_interactive_info that was afecting esm_master --- src/esm_runscripts/config_initialization.py | 26 +++++++++++---------- src/esm_runscripts/sim_objects.py | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py index 23219c355..985910b60 100644 --- a/src/esm_runscripts/config_initialization.py +++ b/src/esm_runscripts/config_initialization.py @@ -118,11 +118,11 @@ def get_user_config_from_command_line(command_line_config): return user_config -def init_interactive_info(command_line_config, user_config): +def init_interactive_info(config, command_line_config): """ Initialize key-values to evaluate at any point whether interactive functions are to be run (e.g. questionaries, warnings, etc.). The following key-values are set within - ``user_config["general"]``: + ``config["general"]``: - ``isinteractive``: ``True`` if this function is trigger by a command line execution - ``isresubmitted``: ``True`` if the ``last_jobtype`` is the same as the current @@ -136,23 +136,25 @@ def init_interactive_info(command_line_config, user_config): ----- command_line_config : dict Dictionary containing the information coming from the command line - user_config : dict - Dictionary containing the information from the command line on top of the - runscript's + config : dict + Dictionary containing the simulation configuration Returns ------- - user_config : Dict - Same as the input ``user_config`` but with the interactive variables + config : dict + Same as the input ``config`` but with the interactive variables """ - last_jobtype = command_line_config.get("last_jobtype", "") + if command_line_config: + last_jobtype = command_line_config.get("last_jobtype", "") + else: + last_jobtype = "" isinteractive = last_jobtype == "command_line" - isresubmitted = last_jobtype == user_config["general"]["jobtype"] + isresubmitted = last_jobtype == config["general"]["jobtype"] - user_config["general"]["isinteractive"] = isinteractive - user_config["general"]["isresubmitted"] = isresubmitted + config["general"]["isinteractive"] = isinteractive + config["general"]["isresubmitted"] = isresubmitted - return user_config + return config def get_total_config_from_user_config(user_config): diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index cc5359f8b..9861223f3 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -31,7 +31,7 @@ def __init__(self, command_line_config=None, user_config=None): ) # Initialize information about interactive sessions - user_config = config_initialization.init_interactive_info(command_line_config, user_config) + user_config = config_initialization.init_interactive_info(user_config, command_line_config) # Initialize iterative coupling information user_config = config_initialization.init_iterative_coupling( From 49c43e38c620357802e0998af0e1afd073bb470b Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 2 Feb 2024 08:57:31 +0100 Subject: [PATCH 76/98] Added optional argument to _write_finalized_config. --- src/esm_runscripts/prepcompute.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/esm_runscripts/prepcompute.py b/src/esm_runscripts/prepcompute.py index 0aca0d487..0197cc75f 100644 --- a/src/esm_runscripts/prepcompute.py +++ b/src/esm_runscripts/prepcompute.py @@ -252,7 +252,7 @@ def copy_files_to_work(config): return config -def _write_finalized_config(config): +def _write_finalized_config(config, config_file_path=None): """Writes _finished_config.yaml file Parameters ---------- @@ -319,10 +319,11 @@ class EsmConfigDumper(yaml.dumper.Dumper): thisrun_config_dir = config["general"]["thisrun_config_dir"] expid = config["general"]["expid"] it_coupled_model_name = config["general"]["iterative_coupled_model"] - config_file_path = ( - f"{thisrun_config_dir}/" - f"{expid}_{it_coupled_model_name}finished_config.yaml" - ) + if not config_file_path: + config_file_path = ( + f"{thisrun_config_dir}/" + f"{expid}_{it_coupled_model_name}finished_config.yaml" + ) with open(config_file_path, "w") as config_file: # Avoid saving ``prev_run`` information in the config file config_final = copy.deepcopy(config) # PrevRunInfo From 3456dd65f3693773eb0ae6dfa487cc181bc610a6 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 2 Feb 2024 10:54:43 +0100 Subject: [PATCH 77/98] Add docstring --- src/esm_runscripts/prepcompute.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/esm_runscripts/prepcompute.py b/src/esm_runscripts/prepcompute.py index 0197cc75f..73f87792f 100644 --- a/src/esm_runscripts/prepcompute.py +++ b/src/esm_runscripts/prepcompute.py @@ -253,11 +253,24 @@ def copy_files_to_work(config): def _write_finalized_config(config, config_file_path=None): - """Writes _finished_config.yaml file - Parameters - ---------- - config : esm-tools config object """ + Writes _finished_config.yaml file + + Input + ----- + config : dict + esm-tools config object + config_file_path : string + Optional file path and name where the content of config is to be stored. + Default is None. If not given (default) the path will be set depending on + settings in config and the file name is _finished_config.yaml. + + Returns + ------- + config : dict + + """ + # first define the representers for the non-built-in types, as recommended # here: https://pyyaml.org/wiki/PyYAMLDocumentation def date_representer(dumper, date): From acfe195620692b1d9a495cdc6e15e3c9decf78da Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 2 Feb 2024 15:47:13 +0100 Subject: [PATCH 78/98] add docstrings to all functions in config_initialization.py and to SimulationSetup in sim_objects.py --- src/esm_runscripts/config_initialization.py | 109 +++++++++++++++++++- src/esm_runscripts/sim_objects.py | 67 +++++++++--- 2 files changed, 159 insertions(+), 17 deletions(-) diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py index 985910b60..05e40c649 100644 --- a/src/esm_runscripts/config_initialization.py +++ b/src/esm_runscripts/config_initialization.py @@ -9,6 +9,23 @@ def init_iterative_coupling(command_line_config, user_config): + """ + Completes information for the interactive coupling (offline coupling) in the + ``user_config`` if this simulation is indeed a interactive coupling. + + Input + ----- + command_line_config : dict + Dictionary containing the information coming from the command line + user_config : dict, esm_parser.ConfigSetup + Dictionary containing the basic user information + + Returns + ------- + user_config : dict, esm_parser.ConfigSetup + Dictionary containing the basic user information and the additional processed + information needed for offline coupling simulations + """ # maybe switch to another runscript, if iterative coupling if user_config["general"].get("iterative_coupling", False): @@ -50,6 +67,20 @@ def init_iterative_coupling(command_line_config, user_config): def complete_config_with_inspect(config): + """ + Completes information for ``inspect`` jobs. + + Input + ----- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation + + Returns + ------- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation and the + ``inspect`` information + """ general = config["general"] @@ -67,6 +98,22 @@ def complete_config_with_inspect(config): def save_command_line_config(config, command_line_config): + """ + Store the config coming from the command line in the ``config``. + + Input + ----- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation + command_line_config : dict + Dictionary containing the information coming from the command line + + Returns + ------- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation and the + ``command_line_config`` stored in the ``general`` section + """ if command_line_config: config["general"]["command_line_config"] = command_line_config else: @@ -79,7 +126,7 @@ def get_user_config_from_command_line(command_line_config): """ Reads the runscript provided in ``command_line_config`` and overwirtes the information of the runscript with that of the command line (command line wins - over the runscript. + over the runscript). Input ----- @@ -88,7 +135,7 @@ def get_user_config_from_command_line(command_line_config): Returns ------- - user_config : dict, DictWithProvenance + user_config : dict, esm_parser.ConfigSetup Dictionary containing the information from the command line on top of the runscript's @@ -136,12 +183,12 @@ def init_interactive_info(config, command_line_config): ----- command_line_config : dict Dictionary containing the information coming from the command line - config : dict + config : dict, esm_parser.ConfigSetup Dictionary containing the simulation configuration Returns ------- - config : dict + config : dict, esm_parser.ConfigSetup Same as the input ``config`` but with the interactive variables """ if command_line_config: @@ -158,7 +205,21 @@ def init_interactive_info(config, command_line_config): def get_total_config_from_user_config(user_config): + """ + Finds the version of the setup in ``user_config`` instanciates the ``config`` with + ``esm_parser.ConfigSetup`` which appends all the information from the config files + required for this simulation and stores it in ``config``. + + Input + ----- + user_config : dict, esm_parser.ConfigSetup + Dictionary containing the basic user information + Returns + ------- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation + """ if "version" in user_config["general"]: version = str(user_config["general"]["version"]) else: @@ -184,6 +245,26 @@ def get_total_config_from_user_config(user_config): def check_account(config): + """ + Checks whether the user has **not** defined a job scheduling account (e.g. slurm) + ``config["general"]["account"]`` while the machine requires it for running jobs, and + in that case reports an error. + + Input + ----- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation + + Returns + ------- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation + + Raises + ------ + Missing account info : esm_parser.user_error + If the system requires a job scheduler account but none was provided by the user + """ # Check if the 'account' variable is needed and missing if config["computer"].get("accounting", False): @@ -201,6 +282,25 @@ def check_account(config): def add_esm_runscripts_defaults_to_config(config): + """ + Add the defaults defined in ``configs/esm_software/esm_runscripts/defaults.yaml`` to + the ``config``, if those key-values do not exist yet. The ``keys`` supported in that + file are: + - ``general``: to be assigned to the ``general`` section of the ``config`` + - ``per_model_defaults``: to be added to each component/model section of the + ``config`` + + Input + ----- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation + + Returns + ------- + config : dict, esm_parser.ConfigSetup + ConfigSetup object containing the information of the current simulation and the + defaults + """ path_to_file = esm_tools.get_config_filepath() + "/esm_software/esm_runscripts/defaults.yaml" default_config = esm_parser.yaml_file_to_dict(path_to_file) config["general"]["defaults.yaml"] = default_config @@ -222,4 +322,5 @@ def add_esm_runscripts_defaults_to_config(config): config[model] = esm_parser.new_deep_update( config[model], per_model_defaults ) + return config diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index 9861223f3..41c8be5e3 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -17,55 +17,96 @@ class SimulationSetup(object): - def __init__(self, command_line_config=None, user_config=None): + def __init__(self, command_line_config=None, user_config=None): + """ + Initializes the ``SimulationSetup`` object, and prepares the ``self.config`` by + taking the information from the ``command_line_config`` and/or the + ``user_config`` and expanding it with the configuration files from `ESM-Tools` + (in `esm_tools/configs`), and then running the ``prepare`` recipe. In essence, + ``__init__`` takes care of loading and baking all the config information, + resolving the ``chooses``, ``add_``, etc. It is used by ``esm_runscripts`` and + ``esm_master``. Below, a more detailed description of the steps of the + ``__init__``: + + 1. Check that at least one input is given + 2. Initialize user_config (command line arguments + content of the runscript) + 3. Initialize information about interactive sessions + 4. Initialize interactive coupling information (offline coupling) + 5. Load total config from all the configuration files involved in this + simulation. Input: user_config -> returns: self.config + 6. Add the defaults in ``configs/esm_software/esm_runscripts/defaults.yaml`` + to missing key-values in self.config + 7. Check if the ``account`` is missing in ``general`` + 8. Complete information for inspect + 9. Store the ``command_line_config`` in ``general`` + 10. Initialize the ``prev_run`` object + 11. Run ``prepare`` recipe (resolve the `ESM-Tools` syntax) + + Input + ----- + command_line_config : dict + Dictionary containing the information coming from the command line + user_config : dict, DictWithProvenance + Dictionary containing the basic user information. Is only an input in + ``esm_master``, not in ``esm_runscripts`` (i.e. ``esm_master`` does not need + to read a runscript) + + Raises + ------ + ValueError : + If neither ``command_line_config`` nor ``user_config`` are defined + """ + # 1. Check that at least one input is given if not command_line_config and not user_config: raise ValueError( - "SimulationSetup needs to be initialized with either command_line_config or user_config." + "SimulationSetup needs to be initialized with either " + "command_line_config or user_config." ) - # Initialize user_config using the command line arguments and the given runscript + # 2. Initialize user_config (command line arguments + content of the runscript) if not user_config: user_config = config_initialization.get_user_config_from_command_line( command_line_config ) - # Initialize information about interactive sessions + # 3. Initialize information about interactive sessions user_config = config_initialization.init_interactive_info(user_config, command_line_config) - # Initialize iterative coupling information + # 4. Initialize iterative coupling information (offline coupling) user_config = config_initialization.init_iterative_coupling( command_line_config, user_config ) - # Load total config from all the configuration files involved in this simulation + # 5. Load total config from all the configuration files involved in this + # simulation self.config = config_initialization.get_total_config_from_user_config( user_config ) - # Complete missing key-values with the defaults defined in - # ``configs/esm_software/esm_runscripts/defaults.yaml`` + # 6. Add the defaults in ``configs/esm_software/esm_runscripts/defaults.yaml`` + # to missing key-values in self.config self.config = config_initialization.add_esm_runscripts_defaults_to_config( self.config ) - # Check if the ``account`` is missing in ``general`` + # 7. Check if the ``account`` is missing in ``general`` self.config = config_initialization.check_account(self.config) - # Complete information for inspect + # 8. Complete information for inspect self.config = config_initialization.complete_config_with_inspect( self.config ) - # Save the ``command_line_config`` in ``general`` + # 9. Store the ``command_line_config`` in ``general`` self.config = config_initialization.save_command_line_config( self.config, command_line_config ) - # Initialize the ``prev_run`` object + # 10. Initialize the ``prev_run`` object self.config["prev_run"] = prev_run.PrevRunInfo(self.config) - # Run ``prepare`` recipe + # 11. Run ``prepare`` recipe (resolve the `ESM-Tools` syntax) self.config = prepare.run_job(self.config) # esm_parser.pprint_config(self.config) From ddf643ab90a291f25382ef3bd0c744685fc3dcd7 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 2 Feb 2024 15:52:10 +0100 Subject: [PATCH 79/98] black formating of config_initialization.py and remove strings concatenations with + --- src/esm_runscripts/config_initialization.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/esm_runscripts/config_initialization.py b/src/esm_runscripts/config_initialization.py index 05e40c649..9dd2071a0 100644 --- a/src/esm_runscripts/config_initialization.py +++ b/src/esm_runscripts/config_initialization.py @@ -54,9 +54,9 @@ def init_iterative_coupling(command_line_config, user_config): # Set the ``iterative_coupled_model`` string, to add the model name to the # run_ folder, finished_config.yaml, etc., to avoid overwritting with the # files of other offline coupled models - user_config["general"]["iterative_coupled_model"] = ( - f"{user_config['general']['setup_name']}_" - ) + user_config["general"][ + "iterative_coupled_model" + ] = f"{user_config['general']['setup_name']}_" # Extract information about the models run in the previous chunk chunky_parts.prev_chunk_info(user_config) @@ -158,7 +158,8 @@ def get_user_config_from_command_line(command_line_config): esm_parser.user_error( "Syntax error", f"An error occurred while reading the config file " - f"``{command_line_config['runscript_abspath']}`` from the command line.") + f"``{command_line_config['runscript_abspath']}`` from the command line.", + ) user_config["general"].update(command_line_config) @@ -237,9 +238,9 @@ def get_total_config_from_user_config(user_config): ) config["computer"]["jobtype"] = config["general"]["jobtype"] - config["general"]["experiment_dir"] = ( - config["general"]["base_dir"] + "/" + config["general"]["expid"] - ) + config["general"][ + "experiment_dir" + ] = f"{config['general']['base_dir']}/{config['general']['expid']}" return config @@ -301,7 +302,9 @@ def add_esm_runscripts_defaults_to_config(config): ConfigSetup object containing the information of the current simulation and the defaults """ - path_to_file = esm_tools.get_config_filepath() + "/esm_software/esm_runscripts/defaults.yaml" + path_to_file = ( + f"{esm_tools.get_config_filepath()}/esm_software/esm_runscripts/defaults.yaml" + ) default_config = esm_parser.yaml_file_to_dict(path_to_file) config["general"]["defaults.yaml"] = default_config From 4257e41499505852eeb053f96e8bc3c87ffa3b95 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 2 Feb 2024 16:28:17 +0100 Subject: [PATCH 80/98] Put the esm_runscripts call via subprocess in its own prepexp revipe. --- .../esm_runscripts/esm_plugins.yaml | 1 + .../esm_runscripts/esm_runscripts.yaml | 1 + src/esm_runscripts/prepexp.py | 66 +++++++++++++------ 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml index 785a43f7d..35af8d2ec 100644 --- a/configs/esm_software/esm_runscripts/esm_plugins.yaml +++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml @@ -34,6 +34,7 @@ core: - "_create_component_folders" - "initialize_experiment_logfile" - "copy_tools_to_thisrun" + - "call_esm_runscripts_internally" - "_copy_preliminary_files_from_experiment_to_thisrun" diff --git a/configs/esm_software/esm_runscripts/esm_runscripts.yaml b/configs/esm_software/esm_runscripts/esm_runscripts.yaml index 7973ba4f8..e9177fd77 100644 --- a/configs/esm_software/esm_runscripts/esm_runscripts.yaml +++ b/configs/esm_software/esm_runscripts/esm_runscripts.yaml @@ -47,6 +47,7 @@ choose_job_type: - "_create_component_folders" - "initialize_experiment_logfile" - "copy_tools_to_thisrun" + - "call_esm_runscripts_internally" - "_copy_preliminary_files_from_experiment_to_thisrun" observe: diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 8ac0359cd..b0fb9d80a 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -2,6 +2,7 @@ import shutil import sys import pathlib +import subprocess import questionary from colorama import Fore @@ -13,6 +14,10 @@ from .helpers import end_it_all, evaluate, write_to_log from loguru import logger +from . import prepcompute + +import pdb + def run_job(config): evaluate(config, "prepexp", "prepexp_recipe") @@ -36,6 +41,7 @@ def copy_tools_to_thisrun(config): Copies the tools, namelists and runscripts to the experiment directory, making sure that they don't overwrite previously existing files unless the ``-U`` flag is used. + Parameters ---------- config : dict @@ -101,20 +107,11 @@ def copy_tools_to_thisrun(config): # `killall esm_runscripts` might be required esm_parser.user_error(error_type, error_text) - # If ``fromdir`` and ``scriptsdir`` are the same, this is already a computing - # simulation which means we want to use the script in the experiment folder, - # so no copying is needed - if (fromdir == scriptsdir) and not gconfig["update"]: - if config["general"]["verbose"]: - print("Started from the experiment folder, continuing...") - return config - # Not computing but initialisation - else: - if not fromdir == scriptsdir: - if config["general"]["verbose"]: - print("Not started from experiment folder, restarting...") - else: - print("Tools were updated, restarting...") + # If ``fromdir`` and ``scriptsdir`` are the same (the same as ``isresubmitted=True``), + # this is already a computing simulation which means we want to use the script + # in the experiment folder, so no copying is needed. + + if not gconfig["isresubmitted"]: # At this point, ``fromdir`` and ``scriptsdir`` are different. Update the # runscript if necessary @@ -138,6 +135,34 @@ def copy_tools_to_thisrun(config): for tfile in gconfig["additional_files"]: update_runscript(fromdir, scriptsdir, tfile, gconfig, "additional file") + return config + +def call_esm_runscripts_internally(config): + """ + Calls esm_runscripts in a subprocess call. + + Parameters + ---------- + config : dict + Dictionary containing the configuration information. + + """ + + gconfig = config["general"] + + # Return if called from the experiment + if gconfig["isresubmitted"] and not gconfig["update"]: + if config["general"]["verbose"]: + print("Started from the experiment folder, continuing...") + return config + # Not computing but initialisation + else: + if not gconfig["isresubmitted"]: + if config["general"]["verbose"]: + print("Not started from experiment folder, restarting...") + else: + print("Tools were updated, restarting...") + scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"]) # remove the update option otherwise it will enter an infinite loop original_command = gconfig["original_command"] options_to_remove = [" -U ", " --update "] @@ -159,26 +184,29 @@ def copy_tools_to_thisrun(config): new_command_list.append(command) new_command = " ".join(new_command_list) - restart_command = f"cd {scriptsdir}; esm_runscripts {new_command}" + restart_command = f"esm_runscripts {new_command}" # Add non-interaction flags - non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}"] + non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"] for ni_flag in non_interaction_flags: # prevent continuous addition of ``ni_flag`` if ni_flag not in restart_command: restart_command += f" {ni_flag} " + #prepcompute._write_finalized_config(config, '/albedo/work/user/nwieters/myrunscripts/config_after_prepexp.txt') + if config["general"]["verbose"]: print(restart_command) - os.system(restart_command) + + if os.path.exists(scriptsdir): + subprocess.check_call(restart_command.split(), cwd=scriptsdir) gconfig["profile"] = False end_it_all(config) - def _create_folders(config, filetypes): """ - Generates the experiment file tree. Foldres are created for every filetype + Generates the experiment file tree. Folders are created for every filetype except for "ignore". """ for filetype in filetypes: From 6f6633a30844669db2535043eb12e3c54bdf8ccd Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Fri, 2 Feb 2024 16:42:06 +0100 Subject: [PATCH 81/98] Small changes. --- src/esm_runscripts/prepexp.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index b0fb9d80a..12f8f7f25 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -112,9 +112,8 @@ def copy_tools_to_thisrun(config): # in the experiment folder, so no copying is needed. if not gconfig["isresubmitted"]: - - # At this point, ``fromdir`` and ``scriptsdir`` are different. Update the - # runscript if necessary + # At this point, ``fromdir`` and ``scriptsdir`` are different (same as gconfig["isresubmitted"]=False). + # Update the runscript if necessary update_runscript( fromdir, scriptsdir, gconfig["scriptname"], gconfig, "runscript" ) @@ -157,12 +156,11 @@ def call_esm_runscripts_internally(config): return config # Not computing but initialisation else: - if not gconfig["isresubmitted"]: - if config["general"]["verbose"]: - print("Not started from experiment folder, restarting...") - else: - print("Tools were updated, restarting...") + if config["general"]["verbose"]: + print("Not started from experiment folder, restarting...") + scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"]) + # remove the update option otherwise it will enter an infinite loop original_command = gconfig["original_command"] options_to_remove = [" -U ", " --update "] @@ -193,13 +191,12 @@ def call_esm_runscripts_internally(config): if ni_flag not in restart_command: restart_command += f" {ni_flag} " - #prepcompute._write_finalized_config(config, '/albedo/work/user/nwieters/myrunscripts/config_after_prepexp.txt') - if config["general"]["verbose"]: print(restart_command) if os.path.exists(scriptsdir): subprocess.check_call(restart_command.split(), cwd=scriptsdir) + # Todo: include exception if scriptsdir not found gconfig["profile"] = False end_it_all(config) From bad0f350fab47caedf88a4943038d93f2118607f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 12 Feb 2024 13:44:25 +0100 Subject: [PATCH 82/98] Added docstrings and refactoring _copy_preliminary_files_from_experiment_to_thisrun function in prepexp.py --- src/esm_runscripts/prepexp.py | 81 +++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 12f8f7f25..5f2ea904a 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -15,16 +15,31 @@ from loguru import logger from . import prepcompute +from . import filelists import pdb def run_job(config): + """ + Run prepexp job. + + Parameters + ---------- + config : dict + Dictionary containing the configuration information. + """ evaluate(config, "prepexp", "prepexp_recipe") return config def color_diff(diff): + """ + + Parameters + ---------- + diff : + """ for line in diff: if line.startswith("+"): yield Fore.GREEN + line + Fore.RESET @@ -205,6 +220,13 @@ def _create_folders(config, filetypes): """ Generates the experiment file tree. Folders are created for every filetype except for "ignore". + + Parameters + ---------- + config : dict + Dictionary containing the configuration information. + filetypes: list + """ for filetype in filetypes: if not filetype == "ignore": @@ -222,6 +244,11 @@ def _create_setup_folders(config): This also creates a small marker file at the top of the experiment so that the "root" can be found from inside. + + Parameters + ---------- + config : dict + Dictionary containing the configuration information. """ _create_folders(config["general"], config["general"]["all_filetypes"]) with open( @@ -232,6 +259,13 @@ def _create_setup_folders(config): def _create_component_folders(config): + """ + Parameters + ---------- + config : dict + Dictionary containing the configuration information. + """ + for component in config["general"]["valid_model_names"]: _create_folders(config[component], config["general"]["all_model_filetypes"]) return config @@ -254,12 +288,12 @@ def initialize_experiment_logfile(config): Parameters ---------- - dict : + config : dict The experiment configuration Return ------ - dict : + config : dict As per convention for the plug-in system; this gives back the entire config. @@ -312,6 +346,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): ``esm_runscripts``. If that flag is not used and the source and target are different then raises a user-friendly error recommending to use the ``-U`` flag with the warning that the files will be overwritten. + Parameters ---------- cls : obj @@ -327,6 +362,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): file_type : str String specifying the nature of the file, only necessary for printing information and for the error description. + Exceptions ---------- UserError @@ -399,27 +435,46 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): def _copy_preliminary_files_from_experiment_to_thisrun(config): - # I don't like this one bit. DB + """ + - Copies the setup *.date file from /scripts/ folder + to /run_xxxxxxxx-xxxxxxxx/scripts/ folder. + - Copies the runscript yaml file from current folder (/scripts) + to /run_xxxxxxxx-xxxxxxxx/scripts/ + - Copies 'additional_files' (if any, e.g. fesom_output.yaml, that are called + via 'further_reading' in the runscript or other config file) from ... + to /run_xxxxxxxx-xxxxxxxx/scripts/ folder. + + Why here??? + + Parameters + ---------- + config : dict + Dictionary containing the configuration information. + """ + filelist = [ ( "scripts", f"{config['general']['expid']}_{config['general']['setup_name']}.date", "copy", + ), + ( + "scripts", + f"{config['general']['scriptname']}", + "copy", ) ] + for additional_file in config["general"].get("additional_files",[]): + filelist.append(("scripts", additional_file, "copy")) + for filetype, filename, copy_or_link in filelist: - source = config["general"]["experiment_" + filetype + "_dir"] - dest = config["general"]["thisrun_" + filetype + "_dir"] - if copy_or_link == "copy": - method = shutil.copy2 - elif copy_or_link == "link": - method = os.symlink + source = config["general"].get("experiment_" + filetype + "_dir", "") + dest = config["general"].get("thisrun_" + filetype + "_dir", "") + + method = filelists.get_method(copy_or_link) + if os.path.isfile(source + "/" + filename): method(source + "/" + filename, dest + "/" + filename) - this_script = config["general"]["scriptname"] - shutil.copy2("./" + this_script, config["general"]["thisrun_scripts_dir"]) - for additional_file in config["general"]["additional_files"]: - shutil.copy2(additional_file, config["general"]["thisrun_scripts_dir"]) return config From b799dba2e08a3cff28c37bf9ac71226fdd930f9b Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 13 Feb 2024 15:18:55 +0100 Subject: [PATCH 83/98] Rename step in prepexp recipe, extract function that runs esm_runscripts in subprocess. --- .../esm_runscripts/esm_plugins.yaml | 2 +- .../esm_runscripts/esm_runscripts.yaml | 2 +- src/esm_runscripts/prepexp.py | 77 +++++++++++++------ 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml index 35af8d2ec..4e74b1bd0 100644 --- a/configs/esm_software/esm_runscripts/esm_plugins.yaml +++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml @@ -34,7 +34,7 @@ core: - "_create_component_folders" - "initialize_experiment_logfile" - "copy_tools_to_thisrun" - - "call_esm_runscripts_internally" + - "call_esm_runscripts_from_prepexp" - "_copy_preliminary_files_from_experiment_to_thisrun" diff --git a/configs/esm_software/esm_runscripts/esm_runscripts.yaml b/configs/esm_software/esm_runscripts/esm_runscripts.yaml index e9177fd77..2f3783f5f 100644 --- a/configs/esm_software/esm_runscripts/esm_runscripts.yaml +++ b/configs/esm_software/esm_runscripts/esm_runscripts.yaml @@ -47,7 +47,7 @@ choose_job_type: - "_create_component_folders" - "initialize_experiment_logfile" - "copy_tools_to_thisrun" - - "call_esm_runscripts_internally" + - "call_esm_runscripts_from_prepexp" - "_copy_preliminary_files_from_experiment_to_thisrun" observe: diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 5f2ea904a..0c685c4dd 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -151,9 +151,57 @@ def copy_tools_to_thisrun(config): return config -def call_esm_runscripts_internally(config): +def _call_esm_runscripts_internally(config, command, exedir): """ - Calls esm_runscripts in a subprocess call. + - Removes update flags from command input. + - Adds additional flags to command input. + - Addes esm_runscripts command if necessary. + - Calls esm_runscipts internally in a subprocess call. + + Parameters + ---------- + config : dict + Dictionary containing the configuration information. + command : str + Command or esm_runscripts arguments + exedir : str + Path from which the command is to be executed. + + """ + + # Remove the update option otherwise it will enter an infinite loop. + options_to_remove = [" -U ", " --update "] + for option in options_to_remove: + command = command.replace(option, " ") + + # Check if 'esm_runscripts' command is given in 'command' argument. + if not command.startswith("esm_runscripts"): + command = f"esm_runscripts {command}" + + # Add non-interaction flags, current jobtype, and current task (phase) [-t] if not already in 'command' + non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"] + for ni_flag in non_interaction_flags: + # prevent continuous addition of ``ni_flag`` + if ni_flag not in command: + command += f" {ni_flag} " + + # Check if the path exists, in which 'commend' should be executed + if os.path.exists(exedir): + subprocess.check_call(command.split(), cwd=exedir) + else: + error_type = "runtime error in function ``_call_esm_runscripts_internally``" + error_text = f"{exedir} does not exists. Aborting." + esm_parser.user_error(error_type, error_text) + + if config["general"]["verbose"]: + print(command) + + end_it_all(config) + +def call_esm_runscripts_from_prepexp(config): + """ + Recipe step that creates a esm_runscripts command and submits this + to the functions that executes this command in a subprocess call. Parameters ---------- @@ -164,11 +212,12 @@ def call_esm_runscripts_internally(config): gconfig = config["general"] - # Return if called from the experiment + # Return if already called from the experiment folder if gconfig["isresubmitted"] and not gconfig["update"]: if config["general"]["verbose"]: print("Started from the experiment folder, continuing...") return config + # Not computing but initialisation else: if config["general"]["verbose"]: @@ -178,15 +227,13 @@ def call_esm_runscripts_internally(config): # remove the update option otherwise it will enter an infinite loop original_command = gconfig["original_command"] - options_to_remove = [" -U ", " --update "] - for option in options_to_remove: - original_command = original_command.replace(option, " ") # Before resubmitting the esm_runscripts, the path of the runscript # needs to be modified. Remove the absolute/relative path runscript_absdir, runscript = os.path.split(gconfig["runscript_abspath"]) original_command_list = original_command.split() new_command_list = [] + for command in original_command_list: # current command will contain the full path, so replace it with # the YAML file only since we are going to execute it from the @@ -197,24 +244,10 @@ def call_esm_runscripts_internally(config): new_command_list.append(command) new_command = " ".join(new_command_list) - restart_command = f"esm_runscripts {new_command}" - - # Add non-interaction flags - non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"] - for ni_flag in non_interaction_flags: - # prevent continuous addition of ``ni_flag`` - if ni_flag not in restart_command: - restart_command += f" {ni_flag} " - if config["general"]["verbose"]: - print(restart_command) + _call_esm_runscripts_internally(config, new_command, scriptsdir) - if os.path.exists(scriptsdir): - subprocess.check_call(restart_command.split(), cwd=scriptsdir) - # Todo: include exception if scriptsdir not found - - gconfig["profile"] = False - end_it_all(config) + return config def _create_folders(config, filetypes): """ From ec2f8a702089d037cc74852b39c9a10dc0636221 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 14 Feb 2024 16:19:37 +0100 Subject: [PATCH 84/98] Bugfix --- src/esm_runscripts/prepexp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 0c685c4dd..935dd0cec 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -212,8 +212,11 @@ def call_esm_runscripts_from_prepexp(config): gconfig = config["general"] + fromdir = os.path.realpath(gconfig["started_from"]) + scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"]) + # Return if already called from the experiment folder - if gconfig["isresubmitted"] and not gconfig["update"]: + if (fromdir == scriptsdir) and not gconfig["update"]: if config["general"]["verbose"]: print("Started from the experiment folder, continuing...") return config From 5ed14cb11ebfe40502e87dfd00c01933830c65aa Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Wed, 14 Feb 2024 16:50:47 +0100 Subject: [PATCH 85/98] Small changes. --- src/esm_runscripts/prepexp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 935dd0cec..fb127fe80 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -196,6 +196,7 @@ def _call_esm_runscripts_internally(config, command, exedir): if config["general"]["verbose"]: print(command) + # Exit after resubmission of esm_runscripts end_it_all(config) def call_esm_runscripts_from_prepexp(config): @@ -228,7 +229,6 @@ def call_esm_runscripts_from_prepexp(config): scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"]) - # remove the update option otherwise it will enter an infinite loop original_command = gconfig["original_command"] # Before resubmitting the esm_runscripts, the path of the runscript From 50d25ac407d5f070ddbd2edcdf5a1fe3ef5d03ac Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Thu, 15 Feb 2024 13:36:45 +0100 Subject: [PATCH 86/98] isort src/esm_runscripts/prepexp.py --- src/esm_runscripts/prepexp.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index fb127fe80..e9f1df4ea 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -1,23 +1,19 @@ import os -import shutil -import sys import pathlib +import shutil import subprocess +import sys import questionary from colorama import Fore +from loguru import logger -import esm_tools import esm_parser +import esm_tools +from . import filelists, prepcompute from .batch_system import batch_system from .helpers import end_it_all, evaluate, write_to_log -from loguru import logger - -from . import prepcompute -from . import filelists - -import pdb def run_job(config): @@ -418,6 +414,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): # If the target path exists compare the two scripts else: import difflib + import esm_parser script_o = open(fromdir + "/" + tfile).readlines() From 532792c9ccd6bf9bc34d35d7f8df36cc26c26631 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 19 Feb 2024 10:40:08 +0100 Subject: [PATCH 87/98] Add comment in prepexp recipe. --- configs/esm_software/esm_runscripts/esm_plugins.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/configs/esm_software/esm_runscripts/esm_plugins.yaml b/configs/esm_software/esm_runscripts/esm_plugins.yaml index 4e74b1bd0..7c5ff537b 100644 --- a/configs/esm_software/esm_runscripts/esm_plugins.yaml +++ b/configs/esm_software/esm_runscripts/esm_plugins.yaml @@ -34,11 +34,15 @@ core: - "_create_component_folders" - "initialize_experiment_logfile" - "copy_tools_to_thisrun" + # The next step will call esm_runscripts again from the experiment folder, + # if the current folder is not the experiment folder already. + # If esm_runscripts will be excuted, the following step will be skipped, since + # there is a sys.exit() after the esm_runscripts call. - "call_esm_runscripts_from_prepexp" + # The following step will be skipped, if not in experiment folder. + # It will only be called if esm_runscripts is called from experiment folder. - "_copy_preliminary_files_from_experiment_to_thisrun" - - prepcompute: - "compile_model" - "_write_finalized_config" From 7dced13d79c93474ff25b4e6af93cc8bb749bcb2 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 19 Feb 2024 11:36:02 +0100 Subject: [PATCH 88/98] Added review suggestions. --- src/esm_runscripts/prepexp.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index e9f1df4ea..bd6154f63 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -31,10 +31,14 @@ def run_job(config): def color_diff(diff): """ + Adds color to text from a diff: + - Green for lines starting with ``+`` + - Red for lines starting with ``-`` + - Blue for lines starting with ``^`` Parameters ---------- - diff : + diff : iterable object of strings to be colored """ for line in diff: if line.startswith("+"): @@ -198,13 +202,21 @@ def _call_esm_runscripts_internally(config, command, exedir): def call_esm_runscripts_from_prepexp(config): """ Recipe step that creates a esm_runscripts command and submits this - to the functions that executes this command in a subprocess call. + to the function that modifies (if necessary) and executes this command + in a subprocess call, if the current folder is NOT the experiment folder. + The function will return and do nothing, if it is called already + from the experiment folder. + Parameters ---------- config : dict Dictionary containing the configuration information. + Returns + ------- + config : dict + Dictionary containing the configuration information. """ gconfig = config["general"] @@ -212,7 +224,7 @@ def call_esm_runscripts_from_prepexp(config): fromdir = os.path.realpath(gconfig["started_from"]) scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"]) - # Return if already called from the experiment folder + # Return if already called from the experiment folder without update flag if (fromdir == scriptsdir) and not gconfig["update"]: if config["general"]["verbose"]: print("Started from the experiment folder, continuing...") @@ -477,8 +489,6 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config): via 'further_reading' in the runscript or other config file) from ... to /run_xxxxxxxx-xxxxxxxx/scripts/ folder. - Why here??? - Parameters ---------- config : dict @@ -502,8 +512,8 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config): filelist.append(("scripts", additional_file, "copy")) for filetype, filename, copy_or_link in filelist: - source = config["general"].get("experiment_" + filetype + "_dir", "") - dest = config["general"].get("thisrun_" + filetype + "_dir", "") + source = config["general"].get(f"experiment_{filetype}_dir", "") + dest = config["general"].get(f"thisrun_{filetype}_dir", "") method = filelists.get_method(copy_or_link) From 9910105a8f0dcf3c9761721226d9562a4a9aa401 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 19 Feb 2024 13:31:41 +0100 Subject: [PATCH 89/98] Changed condition for runscript update. --- src/esm_runscripts/prepexp.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index bd6154f63..6a1741e2e 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -122,13 +122,9 @@ def copy_tools_to_thisrun(config): # `killall esm_runscripts` might be required esm_parser.user_error(error_type, error_text) - # If ``fromdir`` and ``scriptsdir`` are the same (the same as ``isresubmitted=True``), - # this is already a computing simulation which means we want to use the script - # in the experiment folder, so no copying is needed. - - if not gconfig["isresubmitted"]: - # At this point, ``fromdir`` and ``scriptsdir`` are different (same as gconfig["isresubmitted"]=False). - # Update the runscript if necessary + # If ``fromdir`` and ``scriptsdir`` are different, we are not in the experiment. + # In this case, update the runscript if necessary. + if not fromdir == scriptsdir: update_runscript( fromdir, scriptsdir, gconfig["scriptname"], gconfig, "runscript" ) From b86b0712185788b4733d40fe6db78fff74d3eb4d Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 19 Feb 2024 14:16:52 +0100 Subject: [PATCH 90/98] Changed string concatenations to f-string format. --- src/esm_runscripts/prepexp.py | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 6a1741e2e..94e94dca7 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -74,8 +74,8 @@ def copy_tools_to_thisrun(config): # Paths inside the experiment directory where esm_tools and namelists # are copied to. Those are not functional but a reference to what was # the original state when the experiment was firstly started - tools_dir = scriptsdir + "/esm_tools/configs" - namelists_dir = scriptsdir + "/esm_tools/namelists" + tools_dir = f"{scriptsdir}/esm_tools/configs" + namelists_dir = f"{scriptsdir}/esm_tools/namelists" if config["general"]["verbose"]: print("Started from :", fromdir) @@ -271,10 +271,10 @@ def _create_folders(config, filetypes): for filetype in filetypes: if not filetype == "ignore": if not filetype == "work": - if not os.path.exists(config["experiment_" + filetype + "_dir"]): - os.makedirs(config["experiment_" + filetype + "_dir"]) - if not os.path.exists(config["thisrun_" + filetype + "_dir"]): - os.makedirs(config["thisrun_" + filetype + "_dir"]) + if not os.path.exists(config[f"experiment_{filetype}_dir"]): + os.makedirs(config[f"experiment_{filetype}_dir"]) + if not os.path.exists(config[f"thisrun_{filetype}_dir"]): + os.makedirs(config[f"thisrun_{filetype}_dir"]) def _create_setup_folders(config): @@ -292,7 +292,7 @@ def _create_setup_folders(config): """ _create_folders(config["general"], config["general"]["all_filetypes"]) with open( - config["general"]["experiment_dir"] + "/.top_of_exp_tree", "w" + f"{config['general']['experiment_dir']}/.top_of_exp_tree", "w" ) as top_marker: top_marker.write(f"Top of experiment {config['general']['expid']}") return config @@ -415,8 +415,8 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): # If the target file in ``scriptsdir`` does not exist, then copy the file # to the target. - if not os.path.isfile(scriptsdir + "/" + tfile): - oldscript = fromdir + "/" + tfile + if not os.path.isfile(f"{scriptsdir}/{tfile}"): + oldscript = f"{fromdir}/{tfile}" print(oldscript) shutil.copy2(oldscript, scriptsdir) # If the target path exists compare the two scripts @@ -425,16 +425,16 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): import esm_parser - script_o = open(fromdir + "/" + tfile).readlines() - script_t = open(scriptsdir + "/" + tfile).readlines() + script_o = open(f"{fromdir}/{tfile}").readlines() + script_t = open(f"{scriptsdir}/{tfile}").readlines() diffobj = difflib.SequenceMatcher(a=script_t, b=script_o) # If the files are different if not diffobj.ratio() == 1: # Find differences differences = ( - f"{fromdir + '/' + tfile} differs from " - + f"{scriptsdir + '/' + tfile}:\n" + f"{fromdir}/{tfile} differs from " + + f"{scriptsdir}/'{tfile}:\n" ) for line in color_diff(difflib.unified_diff(script_t, script_o)): differences += line @@ -444,9 +444,9 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): if gconfig["update"]: esm_parser.user_note( f"Original {file_type} different from target", - differences + "\n" + f"{scriptsdir + '/' + tfile} will be updated!", + f"{differences}\n{scriptsdir}/{tfile} will be updated!", ) - oldscript = fromdir + "/" + tfile + oldscript = f"{fromdir}/{tfile}" print(oldscript) shutil.copy2(oldscript, scriptsdir) # If the --update flag is not called, exit with an error showing the @@ -466,10 +466,10 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): + "updated with the above changes?" ).ask() if update_choice: - oldscript = fromdir + "/" + tfile + oldscript = f"{fromdir}/{tfile}" print(oldscript) shutil.copy2(oldscript, scriptsdir) - print(f"{scriptsdir + '/' + tfile} updated!") + print(f"{scriptsdir}/{tfile} updated!") else: print("Submission stopped") sys.exit(1) @@ -513,7 +513,7 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config): method = filelists.get_method(copy_or_link) - if os.path.isfile(source + "/" + filename): - method(source + "/" + filename, dest + "/" + filename) + if os.path.isfile(f"{source}/{filename}"): + method(f"{source}/{filename}", f"{dest}/{filename}") return config From 3dc080b0da0444b3d62b2d5e0a8ae59ea297f826 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Mon, 19 Feb 2024 15:07:13 +0100 Subject: [PATCH 91/98] Applied flake8 recommendations. --- src/esm_runscripts/prepexp.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/esm_runscripts/prepexp.py b/src/esm_runscripts/prepexp.py index 94e94dca7..6fe5a6625 100644 --- a/src/esm_runscripts/prepexp.py +++ b/src/esm_runscripts/prepexp.py @@ -11,8 +11,7 @@ import esm_parser import esm_tools -from . import filelists, prepcompute -from .batch_system import batch_system +from . import filelists from .helpers import end_it_all, evaluate, write_to_log @@ -96,7 +95,7 @@ def copy_tools_to_thisrun(config): print("Copying standard yamls from: ", esm_tools.get_config_filepath()) esm_tools.copy_config_folder(tools_dir) if not os.path.isdir(namelists_dir): - print("Copying standard namelists from: ",esm_tools.get_namelist_filepath()) + print("Copying standard namelists from: ", esm_tools.get_namelist_filepath()) esm_tools.copy_namelist_folder(namelists_dir) # check for recursive creation of the file tree. This prevents the risk of @@ -147,6 +146,7 @@ def copy_tools_to_thisrun(config): return config + def _call_esm_runscripts_internally(config, command, exedir): """ - Removes update flags from command input. @@ -174,8 +174,13 @@ def _call_esm_runscripts_internally(config, command, exedir): if not command.startswith("esm_runscripts"): command = f"esm_runscripts {command}" - # Add non-interaction flags, current jobtype, and current task (phase) [-t] if not already in 'command' - non_interaction_flags = ["--no-motd", f"--last-jobtype {config['general']['jobtype']}", f"-t {config['general']['jobtype']}"] + # Add non-interaction flags, current jobtype, and current task (phase) [-t] + # if not already in 'command' + non_interaction_flags = [ + "--no-motd", + f"--last-jobtype {config['general']['jobtype']}", + f"-t {config['general']['jobtype']}" + ] for ni_flag in non_interaction_flags: # prevent continuous addition of ``ni_flag`` if ni_flag not in command: @@ -195,12 +200,13 @@ def _call_esm_runscripts_internally(config, command, exedir): # Exit after resubmission of esm_runscripts end_it_all(config) + def call_esm_runscripts_from_prepexp(config): """ Recipe step that creates a esm_runscripts command and submits this to the function that modifies (if necessary) and executes this command in a subprocess call, if the current folder is NOT the experiment folder. - The function will return and do nothing, if it is called already + The function will return and do nothing, if it is called already from the experiment folder. @@ -230,7 +236,7 @@ def call_esm_runscripts_from_prepexp(config): else: if config["general"]["verbose"]: print("Not started from experiment folder, restarting...") - + scriptsdir = os.path.realpath(gconfig["experiment_scripts_dir"]) original_command = gconfig["original_command"] @@ -256,6 +262,7 @@ def call_esm_runscripts_from_prepexp(config): return config + def _create_folders(config, filetypes): """ Generates the experiment file tree. Folders are created for every filetype @@ -479,7 +486,7 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config): """ - Copies the setup *.date file from /scripts/ folder to /run_xxxxxxxx-xxxxxxxx/scripts/ folder. - - Copies the runscript yaml file from current folder (/scripts) + - Copies the runscript yaml file from current folder (/scripts) to /run_xxxxxxxx-xxxxxxxx/scripts/ - Copies 'additional_files' (if any, e.g. fesom_output.yaml, that are called via 'further_reading' in the runscript or other config file) from ... @@ -504,7 +511,7 @@ def _copy_preliminary_files_from_experiment_to_thisrun(config): ) ] - for additional_file in config["general"].get("additional_files",[]): + for additional_file in config["general"].get("additional_files", []): filelist.append(("scripts", additional_file, "copy")) for filetype, filename, copy_or_link in filelist: From 8208f29236bf552e92a7c3ab29e4605785d6349f Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 20 Feb 2024 09:36:35 +0100 Subject: [PATCH 92/98] Add flake8 recommondations. --- tests/test_esm_runscripts/test_workflow.py | 23 ++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index eb59efb9c..1e030dac1 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -6,6 +6,7 @@ import pytest import esm_parser + @pytest.fixture() def test_config(): """Setup a test config dictionary.""" @@ -116,6 +117,7 @@ def test_config(): } return config + @pytest.fixture() def test_default_config_example(): """Setup a test config dictionary.""" @@ -184,6 +186,7 @@ def test_default_config_example(): } return config + # Test scenarios # 0. Default workflow @pytest.mark.example @@ -193,6 +196,7 @@ def test_example_0(test_default_config_example): assumption = "prepcompute ['prepcompute'] -> compute ['compute'] -> tidy ['tidy'] -> prepcompute ['prepcompute']" assert order == assumption + # 1. Add one single phase at the end of the default workflow (Example 1 in documentation) @pytest.mark.example def test_example_1(test_default_config_example): @@ -208,6 +212,7 @@ def test_example_1(test_default_config_example): order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption + # 2. Prepend new phase at the beginning of workflow @pytest.mark.example def test_example_2(test_default_config_example): @@ -224,6 +229,7 @@ def test_example_2(test_default_config_example): order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption + # 3. Append new phase at the beginning of workflow @pytest.mark.example def test_example_3(test_default_config_example): @@ -240,6 +246,7 @@ def test_example_3(test_default_config_example): order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption + # 4. Append two new phases in the same cluster @pytest.mark.example def test_example_4(test_default_config_example): @@ -266,6 +273,7 @@ def test_example_4(test_default_config_example): order = workflow.display_workflow_sequence(test_default_config_example, display=False) assert order == assumption + # 5. Append two new phases in the same cluster, one of them triggers the next run @pytest.mark.example def test_example_5(test_default_config_example): @@ -294,8 +302,8 @@ def test_example_5(test_default_config_example): assert order == assumption # 6. Append two new phases in the same cluster at the beginning of run -#@pytest.mark.example -#def test_example_6(test_default_config_example): +# @pytest.mark.example +# def test_example_6(test_default_config_example): # test_default_config_example["general"]["workflow"] = { # 'phases': { # 'my_new_last_phase': { @@ -320,7 +328,6 @@ def test_example_5(test_default_config_example): # assert order == assumption - # Test exceptions # 1. If still a workflow keyword is set by user. @pytest.mark.exceptions @@ -329,6 +336,7 @@ def test_exception_test_workflow_keyword(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 2. If an invalid phase keyword is set. @pytest.mark.exceptions def test_exception_invalid_phase_keyword(test_config): @@ -336,6 +344,7 @@ def test_exception_invalid_phase_keyword(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 3. If an unknown phase is called for , e.g. in 'run_after' @pytest.mark.exceptions def test_exception_unknown_phase(test_config): @@ -343,6 +352,7 @@ def test_exception_unknown_phase(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 4. If a user phase has the same name as a default phase. @pytest.mark.exceptions def test_if_user_phase_has_default_phase_name(test_config): @@ -359,6 +369,7 @@ def test_if_user_phase_has_default_phase_name(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 5. If two user phases have the same name and are defined in different models/setups. @pytest.mark.exceptions def test_if_two_user_phase_have_the_same_name(test_config): @@ -375,6 +386,7 @@ def test_if_two_user_phase_have_the_same_name(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 6. If no queue is given for a phase that should be run on sbatch system. @pytest.mark.exceptions def test_if_queue_is_missing(test_config): @@ -382,7 +394,7 @@ def test_if_queue_is_missing(test_config): 'batch_or_shell': 'batch', 'order_in_cluster': 'concurrent', 'cluster': 'test_cluster', - #'run_on_queue': 'compute', + # 'run_on_queue': 'compute', 'nproc': 1, 'run_after': 'tidy', 'script_dir': '/work/ab0995/a270089/myrunscripts/', @@ -391,6 +403,7 @@ def test_if_queue_is_missing(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 7. If more than one phase trigger_next_run. @pytest.mark.exceptions def test_if_trigger_next_run_unclear(test_config): @@ -398,6 +411,7 @@ def test_if_trigger_next_run_unclear(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 8. If no default phases are defined in defaults.yaml. @pytest.mark.exceptions def test_if_no_default_phases(test_config): @@ -405,6 +419,7 @@ def test_if_no_default_phases(test_config): with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + # 9. If no default workflow is defined in defaults.yaml. @pytest.mark.exceptions def test_inf_no_default_workflow(test_config): From e3f8e2b264da035827031ad1d92b47c941e9006c Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Tue, 20 Feb 2024 14:32:05 +0100 Subject: [PATCH 93/98] Added workflow test. --- tests/test_esm_runscripts/test_workflow.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_esm_runscripts/test_workflow.py b/tests/test_esm_runscripts/test_workflow.py index 1e030dac1..e19c54f20 100644 --- a/tests/test_esm_runscripts/test_workflow.py +++ b/tests/test_esm_runscripts/test_workflow.py @@ -426,3 +426,11 @@ def test_inf_no_default_workflow(test_config): test_config['general']['defaults.yaml'].pop('workflow', None) with pytest.raises(SystemExit): test_config = workflow.assemble_workflow(test_config) + + +def test_get_workflow_commands_for_run(): + config = esm_parser.yaml_file_to_dict('config3.yaml') + config = workflow.get_workflow_commands_for_run(config) + # assert order == assumption + assert 1 == 2 + From 6e3c43a9b5c66a799149c8b70e80230f5e169a1b Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 22 Feb 2024 15:07:51 +0100 Subject: [PATCH 94/98] Reactivate call of maybe_resubmit. --- src/esm_runscripts/sim_objects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esm_runscripts/sim_objects.py b/src/esm_runscripts/sim_objects.py index 1bbc59222..9945869cf 100644 --- a/src/esm_runscripts/sim_objects.py +++ b/src/esm_runscripts/sim_objects.py @@ -163,7 +163,7 @@ def __call__(self, kill_after_submit=True): # Is this dunction call needed here? self.assembler() - #resubmit.maybe_resubmit(self.config) + resubmit.maybe_resubmit(self.config) # if this line is reached, the run is submitted and running or finished self.config = logfiles.finalize_logfiles(self.config, org_jobtype) From 8386d6f2137e19173e545948d593769fe5ef4731 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 22 Feb 2024 15:32:05 +0100 Subject: [PATCH 95/98] Implement new function to write run file. --- src/esm_runscripts/batch_system.py | 167 +++++++++++++++++++++++++---- 1 file changed, 148 insertions(+), 19 deletions(-) diff --git a/src/esm_runscripts/batch_system.py b/src/esm_runscripts/batch_system.py index 82cbccb53..d963dfa27 100644 --- a/src/esm_runscripts/batch_system.py +++ b/src/esm_runscripts/batch_system.py @@ -1,17 +1,16 @@ +import copy import os -import textwrap -import sys +import pdb import stat -import copy +import sys +import textwrap import esm_environment - from esm_parser import find_variable, user_error, user_note -from . import helpers -from . import dataprocess -from . import prepare -from .slurm import Slurm + +from . import dataprocess, helpers, prepare from .pbs import Pbs +from .slurm import Slurm known_batch_systems = ["slurm", "pbs"] reserved_jobtypes = ["prepcompute", "compute", "prepare", "tidy", "inspect"] @@ -263,7 +262,7 @@ def calculate_requirements(config, cluster=None): if ( not cluster - or not cluster in config["general"]["workflow"]["subjob_clusters"] + or cluster not in config["general"]["workflow"]["subjob_clusters"] ): print(f"Unknown or unset cluster: {cluster}.") sys.exit(-1) @@ -324,7 +323,7 @@ def get_extra(config): ) elif isinstance(pre_run_commands, str): extras.append(pre_run_commands) - elif pre_run_commands == None: + elif pre_run_commands is None: continue else: user_error( @@ -371,6 +370,33 @@ def append_done_statement(config, subjob): @staticmethod def get_run_commands(config, subjob, batch_or_shell): # here or in compute.py? + """ + Creates the command of the specific phase to be put in the *.run file. + + This function is covering the following phase types: + - SimulationSetup: phases that are run as 'esm_runscripts' command + - batch: phases that are run via 'srun' command + - shell: phases that are run as shell scripts. The command is generated by + a function in the 'dataprocess' module. + + Special case: phase 'compute': + - This phase is of type 'batch' + + Todo: How about other phases of type batch? in dataprocess??? + + Parameters + ---------- + config: dict + subjob: str + Name of phase + batch_or_shell: str + Type of phase (SimulationSetup, batch, shell) + + Returns + ------- + commands: list + List of command and arguments of a phase depending of its type. + """ commands = [] if subjob.startswith("compute"): @@ -385,7 +411,7 @@ def get_run_commands(config, subjob, batch_or_shell): # here or in compute.py? ) if config["general"].get("multi_srun"): return self.bs.get_run_commands_multisrun(config, commands) - # for shell scrips + # for shell scripts else: for model in config: if model == "computer": @@ -397,9 +423,48 @@ def get_run_commands(config, subjob, batch_or_shell): # here or in compute.py? + f" 2>&1{config['computer'].get('write_execution_log', '')} &" ) else: - subjob_tasks = dataprocess.subjob_tasks(config, subjob, batch_or_shell) - for task in subjob_tasks: - commands.append(task) + if batch_or_shell == "SimulationSetup": + # for phase type 'SimulationSetup' (e.g. prepcompute, tidy) + commands = [] + commands.append("esm_runscripts") + # add runscript with absolute path + runscript = config["general"]["runscript_abspath"] + commands.append(runscript) + # add experiment id + commands.append(f"-e {config['general']['expid']}") + # add task + commands.append(f"-t {subjob}") + # add date + commands.append("-s " + config['general']['current_date'].format( + form=9, givenph=False, givenpm=False, givenps=False + )) + # add + commands.append(f"-r {str(config['general']['run_number'])}") + # add verbose and no message_of_the day argument + commands.append("-v --no-motd") + # add last-jobtype argument + commands.append(f"--last-jobtype {subjob}") + # add --open-ran or use_venv argument + if "--open-run" in config["general"]["original_command"] or not config[ + "general" + ].get("use_venv"): + commands.append(" --open-run") + elif "--contained-run" in config["general"][ + "original_command" + ] or config["general"].get("use_venv"): + commands.append("--contained-run") + else: + print("ERROR -- Not sure if you were in a contained or open run!") + print( + "ERROR -- See write_simple_runscript for the code causing this." + ) + sys.exit(1) + else: + # for all other phase types (batch, shell) except phase 'compute' + subjob_tasks = dataprocess.subjob_tasks(config, subjob, batch_or_shell) + # Why was this necessary? And not set commands directly? + for task in subjob_tasks: + commands.append(task) return commands @@ -430,8 +495,73 @@ def get_submit_command(config, batch_or_shell, runfilename): return commands @staticmethod - def write_simple_runscript(config, cluster, batch_or_shell="batch"): + def write_run_batch_script(config, cluster, batch_or_shell="batch"): + + workflow = config["general"]["workflow"]["object"] + phases = workflow.phases + + self = config["general"]["batch"] + runfilename = batch_system.get_run_filename(config, cluster) + if config["general"]["verbose"]: + print("jobtype: ", config["general"]["jobtype"]) + print("writing run file for:", cluster) + + with open(runfilename, "w") as runfile: + config = batch_system.calculate_requirements(config, "compute") + # TODO: remove it once it's not needed anymore (substituted by packjob) + if ( + cluster in reserved_jobtypes + and config["computer"].get("taskset", False) + ): + config = config["general"]["batch"].write_het_par_wrappers(config) + # Prepare launcher + config = config["general"]["batch"].prepare_launcher(config, "compute") + # Initiate the header + header = batch_system.get_batch_header(config, "compute") + for line in header: + runfile.write(line + "\n") + runfile.write("\n") + # environment for each phase of a cluster + environment = batch_system.get_environment(config, "compute") + batch_system.write_env(config, environment, runfilename) + for line in environment: + runfile.write(line + "\n") + + # extra entries for each phase + extra = batch_system.get_extra(config) + for line in extra: + runfile.write(line + "\n") + + for phase in ["compute", "tidy", "prepcompute"]: + # Add actual commands + commands = batch_system.get_run_commands( + config, phase, batch_or_shell + ) + # commands = clusterconf.get("data_task_list", []) + runfile.write("\n") + runfile.write(self.append_start_statement(config, phase) + "\n") + runfile.write("\n") + runfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n") + +# if cluster in reserved_jobtypes: + config["general"]["batch"].add_pre_launcher_lines( + config, cluster, runfile + ) + + phase = workflow.get_workflow_phase_by_name(phase) + command = phase["run_command"] + runfile.write(f"{command} --run-from-batch-script\n") + runfile.write(workflow.append_done_statement(config, phase) + "\n") + + runfile.write("\n") + runfile.write("wait\n") + + breakpoint() + return config + + @staticmethod + def write_simple_runscript(config, cluster, batch_or_shell="batch"): # if no cluster is specified, work on the one we are in # if not cluster: # cluster = config["general"]["jobtype"] @@ -833,7 +963,7 @@ def calc_launcher_flags(config, model, cluster): cpus_per_proc = config[model].get("cpus_per_proc", omp_num_threads) # Check for CPUs and OpenMP threads if omp_num_threads > cpus_per_proc: - esm_parser.user_error( + user_error( "OpenMP configuration", ( "The number of OpenMP threads cannot be larger than the number" @@ -845,7 +975,7 @@ def calc_launcher_flags(config, model, cluster): elif "nproca" in config[model] and "nprocb" in config[model]: # ``nproca``/``nprocb`` not compatible with ``omp_num_threads`` if omp_num_threads > 1: - esm_parser.user_note( + user_note( "nproc", "``nproca``/``nprocb`` not compatible with ``omp_num_threads``", ) @@ -854,7 +984,7 @@ def calc_launcher_flags(config, model, cluster): omp_num_threads = 1 else: -# kh 22.06.22 defensive (user_error/user_note could also be added here) + # kh 22.06.22 defensive (user_error/user_note could also be added here) nproc = 0 cpus_per_proc = 0 # omp_num_threads = 0 @@ -885,7 +1015,6 @@ def calc_launcher_flags(config, model, cluster): return launcher_flags - def submits_another_job(config, cluster): clusterconf = config["general"]["workflow"]["subjob_clusters"][cluster] if clusterconf.get("next_submit", []) == []: From 1742320bee6511611c9e0c187f40b42922f598c5 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 22 Feb 2024 15:35:28 +0100 Subject: [PATCH 96/98] Start to refactor maybe_resubmit function. --- src/esm_runscripts/resubmit.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/esm_runscripts/resubmit.py b/src/esm_runscripts/resubmit.py index 5046372c1..c123a8629 100644 --- a/src/esm_runscripts/resubmit.py +++ b/src/esm_runscripts/resubmit.py @@ -247,7 +247,22 @@ def maybe_resubmit(config): ------- config : dict """ - jobtype = config["general"]["jobtype"] + jobtype = config["general"]["jobtype"] # current phase + workflow = config["general"]["workflow"]["object"] + first_phase_in_cluster = workflow.first_task_in_queue + if jobtype == first_phase_in_cluster: + config = config["general"]["batch"].write_run_batch_script( + config, 'sim_cluster', 'batch' + ) + print("Create *.run file") + phases = workflow.phases + + resubmit_batch_or_shell(config, "batch", "compute") + + breakpoint() + + # TODO: Check if run from *.run file + # TODO: Create *.run file # check if nextrun starts??? # this resubmits any following jobtypes/phases until nextrun is true @@ -262,6 +277,7 @@ def maybe_resubmit(config): # it will start to loop over all remaining clusters to check if it can sumbit something (SimulationSetup, sbatch, shell) and do so, # until first start of next run is reached. # 3. nextrun is fals if no entries in next_submit for that particular jobtype/cluster + nextrun = resubmit_recursively(config, jobtype=jobtype) if nextrun: # submit list contains stuff from next run From 0705bcdd8e988218d4ea29042344e775b1539b71 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 22 Feb 2024 15:44:19 +0100 Subject: [PATCH 97/98] Add workflow methon to set the run command of each phase. --- src/esm_runscripts/workflow.py | 72 +++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/src/esm_runscripts/workflow.py b/src/esm_runscripts/workflow.py index 9cb31e190..5c7d84cd5 100644 --- a/src/esm_runscripts/workflow.py +++ b/src/esm_runscripts/workflow.py @@ -1,6 +1,8 @@ import copy import esm_parser +from . import batch_system + #import pygraphviz as pgv import pdb @@ -22,10 +24,12 @@ def __init__(self, workflow_yaml): ------- none """ + # TODO: check if key is in workflow_yaml dict self.phases = [] # list for default phases (defined in defauls.yaml) self.user_phases = [] # list of user phases (collected by collect_all_user_phases) self.clusters = {} # dictionary of clusters + error = False @@ -35,6 +39,8 @@ def __init__(self, workflow_yaml): else: error = True if "next_run_triggered_by" in workflow_yaml: self.next_run_triggered_by = workflow_yaml["next_run_triggered_by"] else: error = True + if "default_cluster" in workflow_yaml: self.default_cluster = workflow_yaml["default_cluster"] + else: error = True if error: err_msg = ( @@ -348,15 +354,16 @@ def write_to_config(self, config): """ Write to config. TODO: Rename ``subjobs`` to ``phases``. But this needs changes also in resubmit.py and other files??? + TODO: Put workflow object into config. """ - # 1. Delete unnecessary config workflow entries (e.g. in general) + # Delete unnecessary config workflow entries (e.g. in general) if "workflow" in config["general"]: del config["general"]["workflow"] config["general"]["workflow"] = {} config["general"]["workflow"].update(self.__dict__) - # 3. Write clusters + # Write clusters config["general"]["workflow"]["subjob_clusters"] = {} for cluster in self.clusters: config["general"]["workflow"]["subjob_clusters"][cluster] = {} @@ -366,16 +373,20 @@ def write_to_config(self, config): for att in self.clusters[cluster]: config["general"]["workflow"]["subjob_clusters"][cluster][att] = self.clusters[cluster][att] - # 2. Write subjobs/phases + # Write subjobs/phases config["general"]["workflow"]["subjobs"] = {} for phase in self.phases + self.user_phases: - temp_dict = phase - config["general"]["workflow"]["subjobs"][phase["name"]] = temp_dict + config["general"]["workflow"]["subjobs"][phase["name"]] = {} + for key, val in phase.items(): + config["general"]["workflow"]["subjobs"][phase["name"]][key] = val - # delete phases and user_phases + # Delete phases and user_phases del config["general"]["workflow"]["phases"] del config["general"]["workflow"]["user_phases"] + # Write workflow object + config["general"]["workflow"]["object"] = self + return config def check_user_workflow_dependency(self): @@ -547,6 +558,36 @@ def order_phases_and_clusters(self): return self + + def get_workflow_commands_for_run(self, config): + """ + Gets the command for each workflow phase and writes in into config. + + Parameters + ---------- + self: workflow object + config: dict + + Returns + ------- + config: dict + """ + phases = self.phases + phase_type = "" + run_command = "" + run_commands = [] + + for phase in phases: + phase_type = phase.get("batch_or_shell", None) + phase_name = phase.get("name", "") + run_command = ' '.join(batch_system.get_run_commands(config, phase_name, phase_type)) + phase["run_command"] = run_command + run_commands.append(run_command) + + setattr(self, 'run_commands', run_commands) + return self + + def prepend_newrun_job(self): """ - Creates a new cluster "newrun" if first_task_in_queue is not of @@ -668,7 +709,7 @@ def __init__(self, phase): self["run_after"] = None self["trigger_next_run"] = False # needed self["submit_to_batch_system"] = False # needed -# self["run_on_queue"] = None + self["run_on_queue"] = None self["cluster"] = None self["next_submit"] = [] # needed self["called_from"] = None # needed @@ -679,6 +720,7 @@ def __init__(self, phase): self["skip_run_number"] = None self["call_function"] = None self["env_preparation"] = None + self["run_command"] = None # check if phase keywords are valid for key, value in phase.items(): @@ -782,6 +824,8 @@ def assemble_workflow(config): # a user phase (type batch or shell) workflow = workflow.prepend_newrun_job() + workflow = workflow.get_workflow_commands_for_run(config) + # - write the workflow to config # - Remove old worklow from config config = workflow.write_to_config(config) @@ -789,13 +833,13 @@ def assemble_workflow(config): # Set "jobtype" for the first task??? # NOTE: This is either first default phase or # newrun??? Can't this not be set in prepend_newrun then? -# if config["general"]["jobtype"] == "unknown": -# config["general"]["command_line_config"]["jobtype"] = config["general"][ -# "workflow" -# ]["first_task_in_queue"] -# config["general"]["jobtype"] = config["general"]["workflow"][ -# "first_task_in_queue" -# ] + if config["general"]["jobtype"] == "unknown": + config["general"]["command_line_config"]["jobtype"] = config["general"][ + "workflow" + ]["first_task_in_queue"] + config["general"]["jobtype"] = config["general"]["workflow"][ + "first_task_in_queue" + ] return config From 458f0a0572e071fb9a1bc276ae390a8563855388 Mon Sep 17 00:00:00 2001 From: Nadine Wieters Date: Thu, 22 Feb 2024 15:46:36 +0100 Subject: [PATCH 98/98] Add default cluster. --- configs/esm_software/esm_runscripts/defaults.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/configs/esm_software/esm_runscripts/defaults.yaml b/configs/esm_software/esm_runscripts/defaults.yaml index 1824320b9..474eebded 100644 --- a/configs/esm_software/esm_runscripts/defaults.yaml +++ b/configs/esm_software/esm_runscripts/defaults.yaml @@ -23,12 +23,13 @@ workflow: first_task_in_queue: prepcompute last_task_in_queue: tidy next_run_triggered_by: tidy + default_cluster: sim_cluster phases: prepcompute: batch_or_shell: SimulationSetup called_from: tidy - cluster: prepcompute + cluster: sim_cluster name: prepcompute next_submit: - compute @@ -39,7 +40,7 @@ workflow: submit_to_batch_system: False compute: called_from: prepcompute - cluster: compute + cluster: sim_cluster name: compute next_submit: - tidy @@ -52,7 +53,7 @@ workflow: tidy: batch_or_shell: SimulationSetup called_from: compute - cluster: tidy + cluster: sim_cluster name: tidy next_submit: - prepcompute