diff --git a/bluepyemodel/access_point/access_point.py b/bluepyemodel/access_point/access_point.py index be011065..554b816f 100644 --- a/bluepyemodel/access_point/access_point.py +++ b/bluepyemodel/access_point/access_point.py @@ -31,7 +31,6 @@ from bluepyemodel.emodel_pipeline.emodel_metadata import EModelMetadata from bluepyemodel.emodel_pipeline.emodel_settings import EModelPipelineSettings from bluepyemodel.tools.utils import get_checkpoint_path -from bluepyemodel.tools.utils import get_legacy_checkpoint_path from bluepyemodel.tools.utils import read_checkpoint # pylint: disable=no-member,unused-argument,assignment-from-no-return,no-value-for-parameter @@ -237,9 +236,7 @@ def optimisation_state(self, seed=None, continue_opt=False): # no file -> target not complete if not pathlib.Path(checkpoint_path).is_file(): - checkpoint_path = get_legacy_checkpoint_path(checkpoint_path) - if not pathlib.Path(checkpoint_path).is_file(): - return OptimisationState.EMPTY + return OptimisationState.EMPTY # there is a file & continue opt is False -> target considered complete if not continue_opt: @@ -326,8 +323,8 @@ def __str__(self): str_ += "OPTIMISATION STATUS\n" str_ += f" Number of checkpoints: {len(checkpoints)}\n" for c in checkpoints: - run, run_metadata = read_checkpoint(c) - str_ += f" Seed {run_metadata['seed']};" + run, seed = read_checkpoint(c) + str_ += f" Seed {seed};" str_ += f" Last generation: {run['logbook'].select('gen')[-1]};" str_ += f" Best fitness: {sum(run['halloffame'][0].fitness.values)}\n" str_ += "\n" diff --git a/bluepyemodel/emodel_pipeline/emodel.py b/bluepyemodel/emodel_pipeline/emodel.py index e050daf7..57c349d9 100644 --- a/bluepyemodel/emodel_pipeline/emodel.py +++ b/bluepyemodel/emodel_pipeline/emodel.py @@ -38,7 +38,72 @@ def format_dict_for_resource(d): return out -class EModel: +class EModelMixin: + """Contains functions used both in EModel and MEModel classes.""" + + def build_pdf_dependencies(self, seed): + """Find all the pdfs associated to an emodel""" + + pdfs = [] + + opt_pdf = search_pdfs.search_figure_emodel_optimisation(self.emodel_metadata, seed) + if opt_pdf: + pdfs.append(opt_pdf) + + traces_pdf = search_pdfs.search_figure_emodel_traces(self.emodel_metadata, seed) + if traces_pdf: + pdfs += [p for p in traces_pdf if p] + + scores_pdf = search_pdfs.search_figure_emodel_score(self.emodel_metadata, seed) + if scores_pdf: + pdfs += [p for p in scores_pdf if p] + + thumbnail_pdf = search_pdfs.search_figure_emodel_thumbnail(self.emodel_metadata, seed) + if thumbnail_pdf: + pdfs += [p for p in thumbnail_pdf if p] + + parameters_pdf = search_pdfs.search_figure_emodel_parameters(self.emodel_metadata) + if parameters_pdf: + pdfs += [p for p in parameters_pdf if p] + + parameters_evo_pdf = search_pdfs.search_figure_emodel_parameters_evolution( + self.emodel_metadata, seed + ) + if parameters_evo_pdf: + pdfs.append(parameters_evo_pdf) + + all_parameters_evo_pdf = search_pdfs.search_figure_emodel_parameters_evolution( + self.emodel_metadata, seed=None + ) + if all_parameters_evo_pdf: + pdfs.append(all_parameters_evo_pdf) + + currentscape_pdfs = search_pdfs.search_figure_emodel_currentscapes( + self.emodel_metadata, seed + ) + if currentscape_pdfs: + pdfs += [p for p in currentscape_pdfs if p] + + bAP_pdf = search_pdfs.search_figure_emodel_bAP(self.emodel_metadata, seed) + if bAP_pdf: + pdfs += [p for p in bAP_pdf if p] + + EPSP_pdf = search_pdfs.search_figure_emodel_EPSP(self.emodel_metadata, seed) + if EPSP_pdf: + pdfs += [p for p in EPSP_pdf if p] + + ISI_CV_pdf = search_pdfs.search_figure_emodel_ISI_CV(self.emodel_metadata, seed) + if ISI_CV_pdf: + pdfs += [p for p in ISI_CV_pdf if p] + + rheobase_pdf = search_pdfs.search_figure_emodel_rheobase(self.emodel_metadata, seed) + if rheobase_pdf: + pdfs += [p for p in rheobase_pdf if p] + + return pdfs + + +class EModel(EModelMixin): """Contains all the information related to an optimized e-model, such as its parameters or its e-feature values and scores. @@ -66,7 +131,7 @@ def __init__( scoreValidation (dict or Resource): scores obtained on the validation protocols. passedValidation (bool or None): did the model go through validation and if yes, did it pass it successfully (None: no validation, True: passed, False: didn't pass) - seed (str): seed used during optimisation for this emodel. + seed (int): seed used during optimisation for this emodel. emodel_metadata (EModelMetadata): metadata of the model (emodel name, etype, ttype, ...) """ @@ -109,69 +174,8 @@ def __init__( def copy_pdf_dependencies_to_new_path(self, seed, overwrite=False): """Copy pdf dependencies to new path using allen notation""" search_pdfs.copy_emodel_pdf_dependencies_to_new_path( - self.emodel_metadata, seed, overwrite=overwrite - ) - - def build_pdf_dependencies(self, seed): - """Find all the pdfs associated to an emodel""" - - pdfs = [] - - opt_pdf = search_pdfs.search_figure_emodel_optimisation(self.emodel_metadata, seed) - if opt_pdf: - pdfs.append(opt_pdf) - - traces_pdf = search_pdfs.search_figure_emodel_traces(self.emodel_metadata, seed) - if traces_pdf: - pdfs += [p for p in traces_pdf if p] - - scores_pdf = search_pdfs.search_figure_emodel_score(self.emodel_metadata, seed) - if scores_pdf: - pdfs += [p for p in scores_pdf if p] - - thumbnail_pdf = search_pdfs.search_figure_emodel_thumbnail(self.emodel_metadata, seed) - if thumbnail_pdf: - pdfs += [p for p in thumbnail_pdf if p] - - parameters_pdf = search_pdfs.search_figure_emodel_parameters(self.emodel_metadata) - if parameters_pdf: - pdfs += [p for p in parameters_pdf if p] - - parameters_evo_pdf = search_pdfs.search_figure_emodel_parameters_evolution( - self.emodel_metadata, seed - ) - if parameters_evo_pdf: - pdfs.append(parameters_evo_pdf) - - all_parameters_evo_pdf = search_pdfs.search_figure_emodel_parameters_evolution( - self.emodel_metadata, seed=None - ) - if all_parameters_evo_pdf: - pdfs.append(all_parameters_evo_pdf) - - currentscape_pdfs = search_pdfs.search_figure_emodel_currentscapes( - self.emodel_metadata, seed + self.emodel_metadata, self.emodel_metadata, False, True, seed, overwrite=overwrite ) - if currentscape_pdfs: - pdfs += [p for p in currentscape_pdfs if p] - - bAP_pdf = search_pdfs.search_figure_emodel_bAP(self.emodel_metadata, seed) - if bAP_pdf: - pdfs += [p for p in bAP_pdf if p] - - EPSP_pdf = search_pdfs.search_figure_emodel_EPSP(self.emodel_metadata, seed) - if EPSP_pdf: - pdfs += [p for p in EPSP_pdf if p] - - ISI_CV_pdf = search_pdfs.search_figure_emodel_ISI_CV(self.emodel_metadata, seed) - if ISI_CV_pdf: - pdfs += [p for p in ISI_CV_pdf if p] - - rheobase_pdf = search_pdfs.search_figure_emodel_rheobase(self.emodel_metadata, seed) - if rheobase_pdf: - pdfs += [p for p in rheobase_pdf if p] - - return pdfs def get_related_nexus_ids(self): return { diff --git a/bluepyemodel/emodel_pipeline/emodel_metadata.py b/bluepyemodel/emodel_pipeline/emodel_metadata.py index 5d46890f..88277cf8 100644 --- a/bluepyemodel/emodel_pipeline/emodel_metadata.py +++ b/bluepyemodel/emodel_pipeline/emodel_metadata.py @@ -198,7 +198,9 @@ def as_dict(self): """Metadata as dict.""" return vars(self) - def as_string(self, seed=None, use_allen_notation=True): + def as_string( + self, seed=None, use_allen_notation=True, replace_semicolons=True, replace_spaces=True + ): s = "" for k in [ @@ -222,7 +224,12 @@ def as_string(self, seed=None, use_allen_notation=True): s += f"seed={seed}__" # can have ':' in mtype. Replace this character. - s = s.replace(":", "_") + if replace_semicolons: + s = s.replace(":", "_") + + # also replace spaces if any + if replace_spaces: + s = s.replace(" ", "_") return s[:-2] diff --git a/bluepyemodel/emodel_pipeline/emodel_pipeline.py b/bluepyemodel/emodel_pipeline/emodel_pipeline.py index af3283a5..243bb4e5 100644 --- a/bluepyemodel/emodel_pipeline/emodel_pipeline.py +++ b/bluepyemodel/emodel_pipeline/emodel_pipeline.py @@ -30,7 +30,6 @@ from bluepyemodel.optimisation import store_best_model from bluepyemodel.tools.multiprocessing import get_mapper from bluepyemodel.tools.utils import get_checkpoint_path -from bluepyemodel.tools.utils import get_legacy_checkpoint_path from bluepyemodel.validation.validation import validate logger = logging.getLogger() @@ -201,10 +200,6 @@ def store_optimisation_results(self, seed=None): else: checkpoint_path = get_checkpoint_path(self.access_point.emodel_metadata, seed=1) checkpoint_list = glob.glob(checkpoint_path.replace("seed=1", "*")) - if not checkpoint_list: - checkpoint_list = glob.glob( - get_legacy_checkpoint_path(checkpoint_path).replace("seed=1", "*") - ) for chkp_path in checkpoint_list: file_name = pathlib.Path(chkp_path).stem @@ -280,6 +275,7 @@ def plot(self, only_validated=False, load_from_local=False): plotting.evolution_parameters_density( evaluator=cell_evaluator, checkpoint_paths=checkpoint_paths, + metadata=self.access_point.emodel_metadata, figures_dir=pathlib.Path("./figures") / self.access_point.emodel_metadata.emodel / "parameter_evolution", diff --git a/bluepyemodel/emodel_pipeline/memodel.py b/bluepyemodel/emodel_pipeline/memodel.py new file mode 100644 index 00000000..3af3ea05 --- /dev/null +++ b/bluepyemodel/emodel_pipeline/memodel.py @@ -0,0 +1,73 @@ +"""MEModel class""" + +""" +Copyright 2024, EPFL/Blue Brain Project + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from bluepyemodel.emodel_pipeline.emodel import EModelMixin + + +class MEModel(EModelMixin): + """Combination of an EModel and a Morphology. Should contain ids of these resources, + as well as analysis plotting figure paths.""" + + def __init__( + self, + seed=None, + emodel_metadata=None, + emodel_id=None, + morphology_id=None, + validated=False, + status="initialized", + ): + """Init + + Args: + seed (int): seed used during optimisation for this emodel. + emodel_metadata (EModelMetadata): metadata of the model (emodel name, etype, ttype, ...) + emodel_id (str): nexus if of the e-model used in this me-model + morphology_id (str): nexus id of the morphology used in this me-model + validated (bool): whether the MEModel has been validated by user + status (str): whether the analysis has run or not. Can be "initialized" or "done". + """ + + self.emodel_metadata = emodel_metadata + self.seed = seed + + self.emodel_id = emodel_id + self.morphology_id = morphology_id + + self.validated = validated + self.status = status + + def get_related_nexus_ids(self): + uses = [] + if self.emodel_id: + uses.append({"id": self.emodel_id, "type": "EModel"}) + if self.morphology_id: + uses.append({"id": self.morphology_id, "type": "NeuronMorphology"}) + return {"uses": uses} + + def as_dict(self): + pdf_dependencies = self.build_pdf_dependencies(self.seed) + + return { + "nexus_images": pdf_dependencies, + "seed": self.seed, + "emodel_id": self.emodel_id, + "morphology_id": self.morphology_id, + "validated": self.validated, + "status": self.status, + } diff --git a/bluepyemodel/emodel_pipeline/plotting.py b/bluepyemodel/emodel_pipeline/plotting.py index a10b1090..67c5c930 100644 --- a/bluepyemodel/emodel_pipeline/plotting.py +++ b/bluepyemodel/emodel_pipeline/plotting.py @@ -46,7 +46,6 @@ from bluepyemodel.evaluation.utils import define_EPSP_protocol from bluepyemodel.model.morphology_utils import get_basal_and_apical_max_radial_distances from bluepyemodel.tools.utils import make_dir -from bluepyemodel.tools.utils import parse_checkpoint_path from bluepyemodel.tools.utils import read_checkpoint from bluepyemodel.tools.utils import select_rec_for_thumbnail @@ -216,7 +215,8 @@ def optimisation( def _create_figure_parameter_histograms( histograms, evaluator, - checkpoint_path, + metadata, + seed, max_n_gen, gen_per_bin, figures_dir, @@ -262,18 +262,15 @@ def _create_figure_parameter_histograms( fig.supylabel("Parameter value", size="xx-large") suptitle = "Parameter evolution\n" - metadata = parse_checkpoint_path(checkpoint_path) - if metadata.get("emodel", None) is not None: - suptitle += f"e-model = {metadata['emodel']}" - if metadata.get("iteration", None) is not None: - suptitle += f" ; iteration = {metadata['iteration']}" - if metadata.get("seed", None) is not None: - suptitle += f" ; seed = {metadata['seed']}" + if metadata.emodel is not None: + suptitle += f"e-model = {metadata.emodel}" + if metadata.iteration is not None: + suptitle += f" ; iteration = {metadata.iteration}" + if seed is not None: + suptitle += f" ; seed = {seed}" fig.suptitle(suptitle, size="xx-large") - p = Path(checkpoint_path) - - figure_name = p.stem + figure_name = metadata.as_string(seed=seed) figure_name += "__evo_parameter_density.pdf" plt.tight_layout() @@ -286,7 +283,7 @@ def _create_figure_parameter_histograms( def evolution_parameters_density( - evaluator, checkpoint_paths, figures_dir="./figures", write_fig=True + evaluator, checkpoint_paths, metadata, figures_dir="./figures", write_fig=True ): """Create plots of the evolution of the density of parameters in the population as the optimisation progresses. Create one plot per checkpoint plus one plot with all checkpoints. @@ -298,6 +295,7 @@ def evolution_parameters_density( Args: evaluator (CellEvaluator): evaluator used to evaluate the individuals. checkpoint_paths (list of str): list of paths to the checkpoints .pkl. + metadata (EModelMetadata): metadata of the emodel. figures_dir (str): path to the directory where the figures will be saved. write_fig (bool): whether to write the figures to disk. """ @@ -307,12 +305,12 @@ def evolution_parameters_density( max_n_gen = 0 genealogies = {} for checkpoint_path in checkpoint_paths: - run, _ = read_checkpoint(checkpoint_path) + run, seed = read_checkpoint(checkpoint_path) if run["generation"] < 4: continue max_n_gen = max(max_n_gen, run["generation"]) - genealogies[checkpoint_path] = run["history"].genealogy_history + genealogies[checkpoint_path] = (run["history"].genealogy_history, seed) gen_per_bin = 4 pop_size = len(run["population"]) @@ -321,7 +319,7 @@ def evolution_parameters_density( # Compute and plot the histograms for each checkpoint sum_histograms = {} - for checkpoint_path, genealogy in genealogies.items(): + for checkpoint_path, (genealogy, seed) in genealogies.items(): # Get the histograms for all parameters histograms = {} for param_index in range(len(genealogy[1])): @@ -349,7 +347,8 @@ def evolution_parameters_density( _ = _create_figure_parameter_histograms( histograms, evaluator, - checkpoint_path, + metadata, + seed, max_n_gen, gen_per_bin, figures_dir, @@ -360,11 +359,11 @@ def evolution_parameters_density( fig, axs = None, None if sum_histograms: sum_histograms = {idx: h / len(checkpoint_path) for idx, h in sum_histograms.items()} - dummy_path = checkpoint_paths[0].partition("__seed=")[0] + "__all_seeds.pkl" fig, axs = _create_figure_parameter_histograms( sum_histograms, evaluator, - dummy_path, + metadata, + "all_seeds", max_n_gen, gen_per_bin, figures_dir, diff --git a/bluepyemodel/optimisation/optimisation.py b/bluepyemodel/optimisation/optimisation.py index 0e59ffde..90071692 100644 --- a/bluepyemodel/optimisation/optimisation.py +++ b/bluepyemodel/optimisation/optimisation.py @@ -25,7 +25,6 @@ from bluepyemodel.emodel_pipeline.emodel import EModel from bluepyemodel.evaluation.evaluation import get_evaluator_from_access_point from bluepyemodel.tools.utils import get_checkpoint_path -from bluepyemodel.tools.utils import get_legacy_checkpoint_path from bluepyemodel.tools.utils import logger from bluepyemodel.tools.utils import read_checkpoint @@ -102,13 +101,6 @@ def run_optimisation( "Will continue optimisation from last generation in checkpoint" ) continue_opt = True - elif Path(get_legacy_checkpoint_path(checkpoint_path)).is_file(): - checkpoint_path = get_legacy_checkpoint_path(checkpoint_path) - continue_opt = True - logger.info( - "Found a legacy checkpoint path. Will use it instead " - "and continue optimisation from last generation." - ) else: logger.info("No checkpoint found. Will start optimisation from scratch.") continue_opt = False @@ -189,7 +181,7 @@ def store_best_model( checkpoint_path = get_checkpoint_path(access_point.emodel_metadata, seed=seed) - run, run_metadata = read_checkpoint(checkpoint_path) + run, seed_from_checkpoint = read_checkpoint(checkpoint_path) best_model = run["halloffame"][0] feature_names = [obj.name for obj in cell_evaluator.fitness_calculator.objectives] @@ -207,7 +199,7 @@ def store_best_model( scores = dict(zip(feature_names, best_model.fitness.values)) - emodel_seed = run_metadata.get("seed", None) if seed is None else seed + emodel_seed = seed_from_checkpoint if seed is None else seed emodel = EModel( fitness=sum(list(scores.values())), diff --git a/bluepyemodel/tasks/emodel_creation/optimisation.py b/bluepyemodel/tasks/emodel_creation/optimisation.py index 8dec951c..6e0b5462 100644 --- a/bluepyemodel/tasks/emodel_creation/optimisation.py +++ b/bluepyemodel/tasks/emodel_creation/optimisation.py @@ -37,7 +37,6 @@ from bluepyemodel.tasks.luigi_tools import WorkflowTaskRequiringMechanisms from bluepyemodel.tasks.luigi_tools import WorkflowWrapperTask from bluepyemodel.tools.mechanisms import compile_mechs_in_emodel_dir -from bluepyemodel.tools.utils import get_legacy_checkpoint_path # pylint: disable=W0235,W0621,W0404,W0611,W0703,E1128 logger = logging.getLogger(__name__) @@ -1180,11 +1179,6 @@ def run(self): """ """ checkpoint_path = get_checkpoint_path(self.access_point.emodel_metadata, seed=self.seed) - if ( - not Path(checkpoint_path).is_file() - and Path(get_legacy_checkpoint_path(checkpoint_path)).is_file() - ): - checkpoint_path = get_legacy_checkpoint_path(checkpoint_path) optimisation( optimiser=self.access_point.pipeline_settings.optimiser, diff --git a/bluepyemodel/tools/search_pdfs.py b/bluepyemodel/tools/search_pdfs.py index b6fd7b8b..eaf580a0 100644 --- a/bluepyemodel/tools/search_pdfs.py +++ b/bluepyemodel/tools/search_pdfs.py @@ -247,8 +247,9 @@ def search_figure_emodel_rheobase(emodel_metadata, seed, use_allen_notation=True return list(search_figure_paths(str(pathname))) + list(search_figure_paths(str(pathname_val))) -def figure_emodel_parameters(emodel_metadata, use_allen_notation=True): +def figure_emodel_parameters(emodel_metadata, seed=None, use_allen_notation=True): """Get path for the pdf representing the distribution of the parameters of an emodel""" + # pylint: disable=unused-argument metadata_str = emodel_metadata.as_string(use_allen_notation=use_allen_notation) fname = f"{metadata_str}__parameters_distribution.pdf" @@ -326,83 +327,49 @@ def copy_emodel_pdf_dependency_to_new_path(old_path, new_path, overwrite=False): shutil.copy(old_path, new_path) -def copy_emodel_pdf_dependencies_to_new_path(emodel_metadata, seed, overwrite=False): +def copy_emodel_pdf_dependencies_to_new_path( + old_metadata, new_metadata, old_allen_notation, new_allen_notation, seed, overwrite=False +): """Copy dependencies to new path using allen notation""" # pylint: disable=too-many-locals - # TODO: refactor this function - old_opt_path = figure_emodel_optimisation(emodel_metadata, seed, use_allen_notation=False) - new_opt_path = figure_emodel_optimisation(emodel_metadata, seed, use_allen_notation=True) - copy_emodel_pdf_dependency_to_new_path(old_opt_path, new_opt_path, overwrite=overwrite) - - old_traces_path, old_traces_path_val = figure_emodel_traces( - emodel_metadata, seed, use_allen_notation=False - ) - new_traces_path, new_traces_path_val = figure_emodel_traces( - emodel_metadata, seed, use_allen_notation=True - ) - copy_emodel_pdf_dependency_to_new_path(old_traces_path, new_traces_path, overwrite=overwrite) - copy_emodel_pdf_dependency_to_new_path( - old_traces_path_val, new_traces_path_val, overwrite=overwrite - ) - - old_score_path, old_score_path_val = figure_emodel_score( - emodel_metadata, seed, use_allen_notation=False - ) - new_score_path, new_score_path_val = figure_emodel_score( - emodel_metadata, seed, use_allen_notation=True - ) - copy_emodel_pdf_dependency_to_new_path(old_score_path, new_score_path, overwrite=overwrite) - copy_emodel_pdf_dependency_to_new_path( - old_score_path_val, new_score_path_val, overwrite=overwrite - ) - - old_params_path, old_params_path_val = figure_emodel_parameters( - emodel_metadata, use_allen_notation=False - ) - new_params_path, new_params_path_val = figure_emodel_parameters( - emodel_metadata, use_allen_notation=True - ) - copy_emodel_pdf_dependency_to_new_path(old_params_path, new_params_path, overwrite=overwrite) - copy_emodel_pdf_dependency_to_new_path( - old_params_path_val, new_params_path_val, overwrite=overwrite - ) - - old_thumbnail_path, old_thumbnail_path_val = figure_emodel_thumbnail( - emodel_metadata, seed, use_allen_notation=False - ) - new_thumbnail_path, new_thumbnail_path_val = figure_emodel_thumbnail( - emodel_metadata, seed, use_allen_notation=True - ) - copy_emodel_pdf_dependency_to_new_path( - old_thumbnail_path, new_thumbnail_path, overwrite=overwrite - ) - copy_emodel_pdf_dependency_to_new_path( - old_thumbnail_path_val, new_thumbnail_path_val, overwrite=overwrite - ) - - old_evo_path = figure_emodel_parameters_evolution( - emodel_metadata, seed, use_allen_notation=False - ) - new_evo_path = figure_emodel_parameters_evolution( - emodel_metadata, seed, use_allen_notation=True - ) - copy_emodel_pdf_dependency_to_new_path(old_evo_path, new_evo_path, overwrite=overwrite) + # do not have all and validated subfolders + single_folder_fcts = [figure_emodel_optimisation, figure_emodel_parameters_evolution] + # have all and validated subfolders + two_folders_fcts = [ + figure_emodel_traces, + figure_emodel_score, + figure_emodel_parameters, + figure_emodel_thumbnail, + ] + + for fct in single_folder_fcts: + old_path = fct(old_metadata, seed=seed, use_allen_notation=old_allen_notation) + new_path = fct(new_metadata, seed=seed, use_allen_notation=new_allen_notation) + copy_emodel_pdf_dependency_to_new_path(old_path, new_path, overwrite=overwrite) + + for fct in two_folders_fcts: + old_path, old_path_val = fct(old_metadata, seed=seed, use_allen_notation=old_allen_notation) + new_path, new_path_val = fct(new_metadata, seed=seed, use_allen_notation=new_allen_notation) + copy_emodel_pdf_dependency_to_new_path(old_path, new_path, overwrite=overwrite) + copy_emodel_pdf_dependency_to_new_path(old_path_val, new_path_val, overwrite=overwrite) + + # also check with seed = None for figure_emodel_parameters_evolution old_all_evo_path = figure_emodel_parameters_evolution( - emodel_metadata, seed=None, use_allen_notation=False + old_metadata, seed=None, use_allen_notation=old_allen_notation ) new_all_evo_path = figure_emodel_parameters_evolution( - emodel_metadata, seed=None, use_allen_notation=True + new_metadata, seed=None, use_allen_notation=new_allen_notation ) copy_emodel_pdf_dependency_to_new_path(old_all_evo_path, new_all_evo_path, overwrite=overwrite) # take into account that we have to search for currentscape plots # because we do not know a priori the protocols and locations old_currentscape_path = search_figure_emodel_currentscapes( - emodel_metadata, seed, use_allen_notation=False + old_metadata, seed, use_allen_notation=old_allen_notation ) new_currentscape_path, new_currentscape_path_val = figure_emodel_currentscapes( - emodel_metadata, seed, use_allen_notation=True + new_metadata, seed, use_allen_notation=new_allen_notation ) for old_path in old_currentscape_path: prot = str(Path(old_path).stem).rsplit("currentscape", maxsplit=1)[-1] diff --git a/bluepyemodel/tools/utils.py b/bluepyemodel/tools/utils.py index 133f7d6e..7a6e63a6 100644 --- a/bluepyemodel/tools/utils.py +++ b/bluepyemodel/tools/utils.py @@ -28,16 +28,51 @@ logger = logging.getLogger("__main__") -def get_checkpoint_path(metadata, seed=None): - """""" +def checkpoint_path_exists(checkpoint_path): + """Returns True if checkpoint path exists, False if not. + + Args: + checkpoint_path (str or Path): checkpoint path + """ + checkpoint_path = Path(checkpoint_path) + return ( + checkpoint_path.is_file() + or checkpoint_path.with_suffix(checkpoint_path.suffix + ".tmp").is_file() + ) + +def get_checkpoint_path(metadata, seed=None): + """Get checkpoint path. Use legacy format if any is found, else use latest format.""" base_path = f"./checkpoints/{metadata.emodel}/{metadata.iteration}/" - filename = metadata.as_string(seed=seed, use_allen_notation=False) + # legacy case 1 (2023.05.11 - 2023.10.19) + filename = metadata.as_string( + seed=seed, use_allen_notation=False, replace_semicolons=False, replace_spaces=False + ) full_path = f"{base_path}{filename}.pkl" - # Switch to Allen notation if no previous legacy checkpoint is found - if not Path(full_path).is_file(): - filename = metadata.as_string(seed=seed, use_allen_notation=True) + # legacy case 0 (before 2023.05.11) + if checkpoint_path_exists(get_legacy_checkpoint_path(full_path)): + full_path = get_legacy_checkpoint_path(full_path) + + # legacy case 2 (2023.10.19 - 2024.02.14) + if not checkpoint_path_exists(full_path): + filename = metadata.as_string( + seed=seed, use_allen_notation=True, replace_semicolons=False, replace_spaces=False + ) + full_path = f"{base_path}{filename}.pkl" + + # legacy case 3 (2024.02.14 - 2024.05.29) + if not checkpoint_path_exists(full_path): + filename = metadata.as_string( + seed=seed, use_allen_notation=True, replace_semicolons=True, replace_spaces=False + ) + full_path = f"{base_path}{filename}.pkl" + + # Up-to-date checkpoint path (after 2024.05.29) + if not checkpoint_path_exists(full_path): + filename = metadata.as_string( + seed=seed, use_allen_notation=True, replace_semicolons=True, replace_spaces=True + ) full_path = f"{base_path}{filename}.pkl" return full_path @@ -74,60 +109,18 @@ def yesno(question): return False -def parse_legacy_checkpoint_path(path): - """""" - - filename = Path(path).stem.split("__") - - if len(filename) == 4: - checkpoint_metadata = { - "emodel": filename[1], - "seed": filename[3], - "iteration": filename[2], - "ttype": None, - } - elif len(filename) == 3: - checkpoint_metadata = { - "emodel": filename[1], - "seed": filename[2], - "iteration": None, - "ttype": None, - } - else: - raise ValueError(f"Invalid checkpoint path: {path}") - - return checkpoint_metadata - - -def parse_checkpoint_path(path): - """""" - - if "emodel" not in path and "checkpoint" in path: - return parse_legacy_checkpoint_path(path) +def get_seed_from_checkpoint_path(path): + """Get seed from checkpoint path name. default seed is 0 if not found.""" if path.endswith(".tmp"): path = path.replace(".tmp", "") filename = Path(path).stem.split("__") - checkpoint_metadata = {} - - for field in [ - "emodel", - "etype", - "ttype", - "mtype", - "species", - "brain_region", - "seed", - "iteration", - ]: - search_str = f"{field}=" - checkpoint_metadata[field] = next( - (e.replace(search_str, "") for e in filename if search_str in e), None - ) + search_str = "seed=" + seed = next((e.replace(search_str, "") for e in filename if search_str in e), 0) - return checkpoint_metadata + return int(seed) def read_checkpoint(checkpoint_path): @@ -136,28 +129,22 @@ def read_checkpoint(checkpoint_path): p = Path(checkpoint_path) p_tmp = p.with_suffix(p.suffix + ".tmp") - # legacy case - if not p.is_file() and not p_tmp.is_file(): - legacy_checkpoint_path = get_legacy_checkpoint_path(checkpoint_path) - p = Path(legacy_checkpoint_path) - p_tmp = p.with_suffix(p.suffix + ".tmp") - try: with open(str(p), "rb") as checkpoint_file: run = pickle.load(checkpoint_file, encoding="latin1") - run_metadata = parse_checkpoint_path(str(p)) + seed = get_seed_from_checkpoint_path(str(p)) except EOFError: try: with open(str(p_tmp), "rb") as checkpoint_tmp_file: run = pickle.load(checkpoint_tmp_file, encoding="latin1") - run_metadata = parse_checkpoint_path(str(p_tmp)) + seed = get_seed_from_checkpoint_path(str(p_tmp)) except EOFError: logger.error( "Cannot store model. Checkpoint file %s does not exist or is corrupted.", checkpoint_path, ) - return run, run_metadata + return run, seed def format_protocol_name_to_list(protocol_name): diff --git a/bluepyemodel/validation/validation.py b/bluepyemodel/validation/validation.py index 4480026a..0fdcafbe 100644 --- a/bluepyemodel/validation/validation.py +++ b/bluepyemodel/validation/validation.py @@ -61,6 +61,28 @@ def define_validation_function(access_point): return validation_function +def compute_scores(model, validation_protocols): + """Compute the scores of an emodel. + + Args: + model (EModel): emodel + validation_protocols (list): list of validation protocols + """ + model.features = model.evaluator.fitness_calculator.calculate_values(model.responses) + for key, value in model.features.items(): + if value is not None: + # turn features from arrays to float to be json serializable + model.features[key] = float(numpy.nanmean([v for v in value if v is not None])) + + scores = model.evaluator.fitness_calculator.calculate_scores(model.responses) + for feature_name in scores: + protocol_name = feature_name.split(".")[0] + if any(are_same_protocol(p, protocol_name) for p in validation_protocols): + model.scores_validation[feature_name] = scores[feature_name] + else: + model.scores[feature_name] = scores[feature_name] + + def validate(access_point, mapper, preselect_for_validation=False): """Compute the scores and traces for the optimisation and validation protocols and perform validation. @@ -88,7 +110,7 @@ def validate(access_point, mapper, preselect_for_validation=False): ) if not emodels: - logger.warning("In compute_scores, no emodels for %s", access_point.emodel_metadata.emodel) + logger.warning("In validate, no emodels for %s", access_point.emodel_metadata.emodel) return [] validation_function = define_validation_function(access_point) @@ -96,22 +118,7 @@ def validate(access_point, mapper, preselect_for_validation=False): logger.info("In validate, %s emodels found to validate.", len(emodels)) for model in emodels: - # turn features from arrays to float to be json serializable - model.features = model.evaluator.fitness_calculator.calculate_values(model.responses) - for key, value in model.features.items(): - if value is not None: - model.features[key] = float(numpy.nanmean([v for v in value if v is not None])) - - scores = model.evaluator.fitness_calculator.calculate_scores(model.responses) - for feature_name in scores: - protocol_name = feature_name.split(".")[0] - if any( - are_same_protocol(p, protocol_name) - for p in access_point.pipeline_settings.validation_protocols - ): - model.scores_validation[feature_name] = scores[feature_name] - else: - model.scores[feature_name] = scores[feature_name] + compute_scores(model, access_point.pipeline_settings.validation_protocols) # turn bool_ into bool to be json serializable model.passed_validation = bool( diff --git a/tests/conftest.py b/tests/conftest.py index e1250cb4..b697e9c4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,7 +20,6 @@ import pytest from bluepyemodel.access_point import get_access_point -from bluepyemodel.evaluation.evaluation import get_evaluator_from_access_point from tests.utils import DATA, cwd diff --git a/tests/unit_tests/test_emodelmetadata.py b/tests/unit_tests/test_emodelmetadata.py index 3520a5df..d342193a 100644 --- a/tests/unit_tests/test_emodelmetadata.py +++ b/tests/unit_tests/test_emodelmetadata.py @@ -247,18 +247,18 @@ def test_for_resource(metadata): def test_as_string(metadata): """Test as_string method.""" assert metadata.as_string(seed=42) == ( - "emodel=L5_TPC__etype=cAC__ttype=245_L5 PT CTX__mtype=L5_TPC_B__" + "emodel=L5_TPC__etype=cAC__ttype=245_L5_PT_CTX__mtype=L5_TPC_B__" "species=mouse__brain_region=SSCX__iteration=v0__seed=42" ) metadata.allen_notation = "SS" assert metadata.as_string() == ( - "emodel=L5_TPC__etype=cAC__ttype=245_L5 PT CTX__mtype=L5_TPC_B__" + "emodel=L5_TPC__etype=cAC__ttype=245_L5_PT_CTX__mtype=L5_TPC_B__" "species=mouse__brain_region=SS__iteration=v0" ) assert metadata.as_string(use_allen_notation=False) == ( - "emodel=L5_TPC__etype=cAC__ttype=245_L5 PT CTX__mtype=L5_TPC_B__" + "emodel=L5_TPC__etype=cAC__ttype=245_L5_PT_CTX__mtype=L5_TPC_B__" "species=mouse__brain_region=SSCX__iteration=v0" ) diff --git a/tests/unit_tests/test_optimisation.py b/tests/unit_tests/test_optimisation.py index 26c76e81..f6d534cf 100644 --- a/tests/unit_tests/test_optimisation.py +++ b/tests/unit_tests/test_optimisation.py @@ -14,43 +14,51 @@ limitations under the License. """ +import pytest + from bluepyemodel.tools.utils import get_checkpoint_path from bluepyemodel.tools.utils import get_legacy_checkpoint_path -from bluepyemodel.tools.utils import parse_checkpoint_path +from bluepyemodel.tools.utils import get_seed_from_checkpoint_path from bluepyemodel.emodel_pipeline.emodel_metadata import EModelMetadata -def test_get_checkpoint_path(): - metadata = EModelMetadata(emodel="L5PC", ttype="t type", iteration_tag="test") +def checkpoint_check(dir, fname, metadata, inner_dir): + f = dir / fname + f.touch() + assert str(get_checkpoint_path(metadata, seed=0)) == "/".join((".", inner_dir, fname)) + f.unlink() + +def test_get_checkpoint_path(workspace): + metadata = EModelMetadata( + emodel="L5PC", + mtype="L5TPC:A", + ttype="t type", + iteration_tag="test", + brain_region="somatosensory cortex", + allen_notation="SSCX", + ) path = get_checkpoint_path(metadata, seed=0) + fname = "emodel=L5PC__ttype=t_type__mtype=L5TPC_A__brain_region=SSCX__iteration=test__seed=0.pkl" assert ( - str(path) == "./checkpoints/L5PC/test/emodel=L5PC__ttype=t type__iteration=test__seed=0.pkl" + str(path) == f"./checkpoints/L5PC/test/{fname}" ) path = get_legacy_checkpoint_path(path) - assert str(path) == "./checkpoints/emodel=L5PC__ttype=t type__iteration=test__seed=0.pkl" + assert str(path) == f"./checkpoints/{fname}" + # test also legacy formats + inner_dir = "checkpoints/L5PC/test" + dir = workspace / inner_dir + dir.mkdir(parents=True) + fname = "emodel=L5PC__ttype=t type__mtype=L5TPC:A__brain_region=somatosensory cortex__iteration=test__seed=0.pkl" + checkpoint_check(dir, fname, metadata, inner_dir) + fname = "emodel=L5PC__ttype=t type__mtype=L5TPC:A__brain_region=SSCX__iteration=test__seed=0.pkl" + checkpoint_check(dir, fname, metadata, inner_dir) + fname = "emodel=L5PC__ttype=t type__mtype=L5TPC_A__brain_region=SSCX__iteration=test__seed=0.pkl" + checkpoint_check(dir, fname, metadata, inner_dir) -def test_parse_checkpoint_path(): - metadata = parse_checkpoint_path( - "./checkpoints/L5PC/test/emodel=L5PC__seed=0__iteration=test__ttype=t type.pkl" +def test_get_seed_from_checkpoint_path(): + seed = get_seed_from_checkpoint_path( + "./checkpoints/L5PC/test/emodel=L5PC__seed=0__iteration=test__ttype=t_type.pkl" ) - for k, v in { - "emodel": "L5PC", - "seed": "0", - "ttype": "t type", - "iteration": "test", - }.items(): - assert metadata[k] == v - - metadata = parse_checkpoint_path( - "./checkpoints/L5PC/test/checkpoint__L5PCpyr_ET1_dend__b6f7190__6.pkl" - ) - - for k, v in { - "emodel": "L5PCpyr_ET1_dend", - "seed": "6", - "ttype": None, - "iteration": "b6f7190", - }.items(): - assert metadata[k] == v + assert seed == 0