From 806fd4f7ab25c8102c6c06c8daa581308d401cd5 Mon Sep 17 00:00:00 2001 From: aax270 Date: Tue, 5 Nov 2024 12:06:19 +0000 Subject: [PATCH 1/6] Use https instead of ssh in `.gitmodules` Semi-reverts a397177 --- .gitmodules | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0b83c869..31171554 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "alphafold"] path = alphafold - url = git@github.com:KosinskiLab/alphafold.git + url = https://github.com/KosinskiLab/alphafold.git branch = main [submodule "alphapulldown/analysis_pipeline/af2plots"] path = alphapulldown/analysis_pipeline/af2plots @@ -8,9 +8,9 @@ branch = main [submodule "AlphaLink2"] path = AlphaLink2 - url = git@github.com:KosinskiLab/AlphaLink2.git + url = https://github.com/KosinskiLab/AlphaLink2.git branch = main [submodule "ColabFold"] path = ColabFold - url = git@github.com:sokrypton/ColabFold.git + url = https://github.com/sokrypton/ColabFold.git branch = main From 547f1bc6e1aaf72a5ae1be2eed6db8581160b28e Mon Sep 17 00:00:00 2001 From: Dima <33123184+DimaMolod@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:36:35 +0100 Subject: [PATCH 2/6] Added links to the features db --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d02f44ef..2ecca245 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,9 @@ AlphaPulldown is a customized implementation of [AlphaFold-Multimer](https://git AlphaPulldown can be used in two ways: either by a two-step pipeline made of **python scripts**, or by a **Snakemake pipeline** as a whole. For details on using the Snakemake pipeline, please refer to the separate GitHub [**repository**](https://github.com/KosinskiLab/AlphaPulldownSnakemake). -To enable faster usage and avoid redundant feature recalculations, we have developed a public database containing precomputed features for all major model organisms, available for download. For more details, [click here](https://github.com/KosinskiLab/AlphaPulldown/blob/main/README.md#features-database). +To enable faster usage and avoid redundant feature recalculations, we have developed a [public database](https://alphapulldown.s3.embl.de/index.html) containing precomputed features for all major model organisms, available for download. You can check the full list and download individual features at https://alphapulldown.s3.embl.de/index.html or https://s3.embl.de/alphapulldown/index.html. + +For more details, [click here](https://github.com/KosinskiLab/AlphaPulldown/blob/main/README.md#features-database). ## Overview From a4b4135287881b201887cf9a2bdd8a4c74fbd17c Mon Sep 17 00:00:00 2001 From: Dima Molodenskiy Date: Thu, 16 Jan 2025 11:53:50 +0100 Subject: [PATCH 3/6] Use https:// for alphafold3 submodule --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index b6bbaa8f..954cdc1e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,5 +16,5 @@ branch = main [submodule "alphafold3"] path = alphafold3 - url = git@github.com:google-deepmind/alphafold3.git + url = https://github.com/google-deepmind/alphafold3.git branch = main From d4fd6c68ab17d378ad9dd9996e51ea4d7b8fd078 Mon Sep 17 00:00:00 2001 From: Dima Molodenskiy Date: Thu, 16 Jan 2025 11:54:35 +0100 Subject: [PATCH 4/6] Update alphafold3 --- alphafold3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alphafold3 b/alphafold3 index aa724ca1..ea040346 160000 --- a/alphafold3 +++ b/alphafold3 @@ -1 +1 @@ -Subproject commit aa724ca1cbfc7084fa683d27418a0d86d6228cd4 +Subproject commit ea040346e10db1759170e723ef263316e64aa768 From d89b0f8b88e5c8adf98fecc1e79c3327c439ceeb Mon Sep 17 00:00:00 2001 From: Dima Molodenskiy Date: Fri, 17 Jan 2025 10:29:53 +0100 Subject: [PATCH 5/6] Check keys to be removed are not removed if --remove_keys_from_pickles=false --- test/test_post_prediction.py | 94 +++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/test/test_post_prediction.py b/test/test_post_prediction.py index e805e5a1..c6aaa1d9 100644 --- a/test/test_post_prediction.py +++ b/test/test_post_prediction.py @@ -12,11 +12,8 @@ class TestPostPrediction(parameterized.TestCase): def setUp(self) -> None: super().setUp() - # Get path of the alphapulldown module parent_dir = join(dirname(dirname(abspath(__file__)))) - # Join the path with the script name self.input_dir = join(parent_dir, "test/test_data/predictions") - # Set logging level to INFO logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @parameterized.parameters( @@ -40,71 +37,90 @@ def setUp(self) -> None: def test_files(self, prediction_dir, compress_pickles, remove_pickles, remove_keys): temp_dir = tempfile.TemporaryDirectory() try: - logging.info(f"Running test for prediction_dir='{prediction_dir}', compress_pickles={compress_pickles}, remove_pickles={remove_pickles}, remove_keys={remove_keys}") + logging.info(f"Running test for prediction_dir='{prediction_dir}', " + f"compress_pickles={compress_pickles}, remove_pickles={remove_pickles}, remove_keys={remove_keys}") temp_dir_path = temp_dir.name - # Copy the files to the temporary directory shutil.copytree(join(self.input_dir, prediction_dir), join(temp_dir_path, prediction_dir)) + # Remove existing gz files - gz_files = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.gz')] - for f in gz_files: - os.remove(join(temp_dir_path, prediction_dir, f)) - # Run the postprocessing function - post_prediction_process(join(temp_dir_path, prediction_dir), compress_pickles, remove_pickles, remove_keys) + gz_files_existing = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.gz')] + for f_ in gz_files_existing: + os.remove(join(temp_dir_path, prediction_dir, f_)) - # Get the best model from ranking_debug.json + # Run the postprocessing + post_prediction_process(join(temp_dir_path, prediction_dir), + compress_pickles, + remove_pickles, + remove_keys) + + # Identify the best model with open(join(temp_dir_path, prediction_dir, 'ranking_debug.json')) as f: best_model = json.load(f)['order'][0] - - # Define the expected best result pickle path best_result_pickle = join(temp_dir_path, prediction_dir, f"result_{best_model}.pkl") - # Check if files are removed and/or compressed based on the parameters + # Gather .pkl and .gz files pickle_files = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.pkl')] gz_files = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.gz')] + # Check if specified keys exist or were removed if remove_keys: - # Ensure specified keys are removed from the pickle files - for pickle_file in pickle_files: - with open(join(temp_dir_path, prediction_dir, pickle_file), 'rb') as f: + for pf in pickle_files: + with open(join(temp_dir_path, prediction_dir, pf), 'rb') as f: data = pickle.load(f) for key in ['aligned_confidence_probs', 'distogram', 'masked_msa']: - self.assertNotIn(key, data, f"Key {key} was not removed from {pickle_file}") + self.assertNotIn(key, data, f"Key '{key}' was not removed from {pf}") + else: + # If we're not removing keys, verify they still exist in the pickle + for pf in pickle_files: + with open(join(temp_dir_path, prediction_dir, pf), 'rb') as f: + data = pickle.load(f) + for key in ['aligned_confidence_probs', 'distogram', 'masked_msa']: + self.assertIn(key, data, f"Key '{key}' was unexpectedly removed from {pf}") + # Now check file counts / compressions if not compress_pickles and not remove_pickles: - # All pickle files should be present, no gz files - logging.info("Checking condition: not compress_pickles and not remove_pickles") - self.assertEqual(len(pickle_files), 5, f"Expected 5 pickle files, found {len(pickle_files)}.") - self.assertEqual(len(gz_files), 0, f"Expected 0 gz files, found {len(gz_files)}.") + # Expect all .pkl files (5 in your scenario), no .gz + self.assertEqual(len(pickle_files), 5, + f"Expected 5 pickle files, found {len(pickle_files)}.") + self.assertEqual(len(gz_files), 0, + f"Expected 0 gz files, found {len(gz_files)}.") if compress_pickles and not remove_pickles: - # No pickle files should be present, each compressed separately - logging.info("Checking condition: compress_pickles and not remove_pickles") - self.assertEqual(len(pickle_files), 0, f"Expected 0 pickle files, found {len(pickle_files)}.") - self.assertEqual(len(gz_files), 5, f"Expected 5 gz files, found {len(gz_files)}.") + # Expect 0 .pkl files, all compressed (5) + self.assertEqual(len(pickle_files), 0, + f"Expected 0 pickle files, found {len(pickle_files)}.") + self.assertEqual(len(gz_files), 5, + f"Expected 5 gz files, found {len(gz_files)}.") + # Validate that gz files are readable for gz_file in gz_files: with gzip.open(join(temp_dir_path, prediction_dir, gz_file), 'rb') as f: - f.read(1) # Ensure it's a valid gzip file + f.read(1) if not compress_pickles and remove_pickles: - # Only the best result pickle should be present - logging.info("Checking condition: not compress_pickles and remove_pickles") - self.assertEqual(len(pickle_files), 1, f"Expected 1 pickle file, found {len(pickle_files)}.") - self.assertEqual(len(gz_files), 0, f"Expected 0 gz files, found {len(gz_files)}.") - self.assertTrue(os.path.exists(best_result_pickle), f"Best result pickle file does not exist: {best_result_pickle}") + # Only the best pickle remains + self.assertEqual(len(pickle_files), 1, + f"Expected 1 pickle file, found {len(pickle_files)}.") + self.assertEqual(len(gz_files), 0, + f"Expected 0 gz files, found {len(gz_files)}.") + self.assertTrue(os.path.exists(best_result_pickle), + f"Best result pickle file does not exist: {best_result_pickle}") if compress_pickles and remove_pickles: - # Only the best result pickle should be compressed, no pickle files present - logging.info("Checking condition: compress_pickles and remove_pickles") - self.assertEqual(len(pickle_files), 0, f"Expected 0 pickle files, found {len(pickle_files)}.") - self.assertEqual(len(gz_files), 1, f"Expected 1 gz file, found {len(gz_files)}.") - self.assertTrue(os.path.exists(best_result_pickle + ".gz"), f"Best result pickle file not compressed: {best_result_pickle}.gz") + # Only the best pickle is compressed + self.assertEqual(len(pickle_files), 0, + f"Expected 0 pickle files, found {len(pickle_files)}.") + self.assertEqual(len(gz_files), 1, + f"Expected 1 gz file, found {len(gz_files)}.") + self.assertTrue(os.path.exists(best_result_pickle + ".gz"), + f"Best result pickle file not compressed: {best_result_pickle}.gz") with gzip.open(join(temp_dir_path, prediction_dir, gz_files[0]), 'rb') as f: - f.read(1) # Ensure it's a valid gzip file + f.read(1) # Check it's valid gzip + except AssertionError as e: logging.error(f"AssertionError: {e}") all_files = os.listdir(join(temp_dir_path, prediction_dir)) relevant_files = [f for f in all_files if f.endswith('.gz') or f.endswith('.pkl')] logging.error(f".gz and .pkl files in {join(temp_dir_path, prediction_dir)}: {relevant_files}") - raise # Re-raise the exception to ensure the test is marked as failed + raise finally: temp_dir.cleanup() From 254794bccbc696a48508a93dd8482a7d1f5bc0f1 Mon Sep 17 00:00:00 2001 From: Dima Molodenskiy Date: Mon, 20 Jan 2025 14:43:21 +0100 Subject: [PATCH 6/6] Fix PR#475 --- alphapulldown/scripts/run_multimer_jobs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alphapulldown/scripts/run_multimer_jobs.py b/alphapulldown/scripts/run_multimer_jobs.py index a6544080..24371946 100644 --- a/alphapulldown/scripts/run_multimer_jobs.py +++ b/alphapulldown/scripts/run_multimer_jobs.py @@ -93,6 +93,7 @@ def main(argv): "--path_to_mmt": FLAGS.path_to_mmt, "--compress_result_pickles": FLAGS.compress_result_pickles, "--remove_result_pickles": FLAGS.remove_result_pickles, + "--remove_keys_from_pickles": FLAGS.remove_keys_from_pickles, "--use_ap_style": True, "--use_gpu_relax": FLAGS.use_gpu_relax, "--protein_delimiter": FLAGS.protein_delimiter, @@ -138,6 +139,7 @@ def main(argv): command = base_command.copy() for arg, value in command_args.items(): command.extend([str(arg), str(value)]) + logging.info(f"command: {command}") subprocess.run(" ".join(command), check=True, shell=True)