Skip to content

Commit

Permalink
Merge branch 'main' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
DimaMolod authored Jan 20, 2025
2 parents 769cff5 + 254794b commit bea2d79
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 46 deletions.
8 changes: 4 additions & 4 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
[submodule "alphafold"]
path = alphafold
url = git@github.com:KosinskiLab/alphafold.git
url = https://github.com/KosinskiLab/alphafold.git
branch = main
[submodule "alphapulldown/analysis_pipeline/af2plots"]
path = alphapulldown/analysis_pipeline/af2plots
url = https://gitlab.com/gchojnowski/af2plots.git
branch = main
[submodule "AlphaLink2"]
path = AlphaLink2
url = git@github.com:KosinskiLab/AlphaLink2.git
url = https://github.com/KosinskiLab/AlphaLink2.git
branch = main
[submodule "ColabFold"]
path = ColabFold
url = git@github.com:sokrypton/ColabFold.git
url = https://github.com/sokrypton/ColabFold.git
branch = main
[submodule "alphafold3"]
path = alphafold3
url = git@github.com:google-deepmind/alphafold3.git
url = https://github.com/google-deepmind/alphafold3.git
branch = main
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ AlphaPulldown is a customized implementation of [AlphaFold-Multimer](https://git

AlphaPulldown can be used in two ways: either by a two-step pipeline made of **python scripts**, or by a **Snakemake pipeline** as a whole. For details on using the Snakemake pipeline, please refer to the separate GitHub [**repository**](https://github.com/KosinskiLab/AlphaPulldownSnakemake).

To enable faster usage and avoid redundant feature recalculations, we have developed a public database containing precomputed features for all major model organisms, available for download. For more details, [click here](https://github.com/KosinskiLab/AlphaPulldown/blob/main/README.md#features-database).
To enable faster usage and avoid redundant feature recalculations, we have developed a [public database](https://alphapulldown.s3.embl.de/index.html) containing precomputed features for all major model organisms, available for download. You can check the full list and download individual features at https://alphapulldown.s3.embl.de/index.html or https://s3.embl.de/alphapulldown/index.html.

For more details, [click here](https://github.com/KosinskiLab/AlphaPulldown/blob/main/README.md#features-database).
## Overview

<picture>
Expand Down
2 changes: 1 addition & 1 deletion alphafold3
Submodule alphafold3 updated 47 files
+2 −2 README.md
+6 −2 docker/Dockerfile
+231 −77 docs/input.md
+51 −15 docs/installation.md
+9 −30 docs/known_issues.md
+32 −2 docs/output.md
+25 −12 docs/performance.md
+97 −63 run_alphafold.py
+3 −3 run_alphafold_data_test.py
+21 −9 run_alphafold_test.py
+38 −0 src/alphafold3/.github/workflows/ci.yaml
+437 −152 src/alphafold3/common/folding_input.py
+16 −8 src/alphafold3/constants/chemical_components.py
+6 −1 src/alphafold3/data/featurisation.py
+1 −1 src/alphafold3/data/parsers.py
+24 −11 src/alphafold3/data/pipeline.py
+3 −1 src/alphafold3/data/templates.py
+22 −0 src/alphafold3/data/tools/rdkit_utils.py
+7 −6 src/alphafold3/jax/gated_linear_unit/matmul_ext.py
+10 −40 src/alphafold3/model/atom_layout/atom_layout.py
+0 −62 src/alphafold3/model/components/base_model.py
+4 −4 src/alphafold3/model/confidence_types.py
+12 −8 src/alphafold3/model/confidences.py
+2 −4 src/alphafold3/model/data3.py
+117 −106 src/alphafold3/model/features.py
+57 −344 src/alphafold3/model/model.py
+1 −1 src/alphafold3/model/model_config.py
+1 −1 src/alphafold3/model/network/atom_cross_attention.py
+2 −2 src/alphafold3/model/network/confidence_head.py
+3 −3 src/alphafold3/model/network/diffusion_head.py
+0 −0 src/alphafold3/model/network/diffusion_transformer.py
+0 −0 src/alphafold3/model/network/distogram_head.py
+347 −0 src/alphafold3/model/network/evoformer.py
+0 −0 src/alphafold3/model/network/featurization.py
+1 −1 src/alphafold3/model/network/modules.py
+1 −1 src/alphafold3/model/network/template_modules.py
+4 −4 src/alphafold3/model/pipeline/pipeline.py
+13 −3 src/alphafold3/model/post_processing.py
+ src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_1024.pkl
+ src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_default.pkl
+4 −4 src/alphafold3/test_data/featurised_example.json
+ src/alphafold3/test_data/featurised_example.pkl
+562 −0 src/alphafold3/test_data/miniature_databases/pdb_mmcif/5y2e.cif
+3,293 −0 src/alphafold3/test_data/miniature_databases/pdb_mmcif/6s61.cif
+2,467 −0 src/alphafold3/test_data/miniature_databases/pdb_mmcif/6ydw.cif
+893 −0 src/alphafold3/test_data/miniature_databases/pdb_mmcif/7rye.cif
+2 −1 src/alphafold3/test_data/model_config.json
3 changes: 2 additions & 1 deletion alphapulldown/scripts/run_multimer_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def main(argv):
"--path_to_mmt": FLAGS.path_to_mmt,
"--compress_result_pickles": FLAGS.compress_result_pickles,
"--remove_result_pickles": FLAGS.remove_result_pickles,
"--remove_keys_from_pickles" : FLAGS.remove_keys_from_pickles,
"--remove_keys_from_pickles": FLAGS.remove_keys_from_pickles,
"--use_ap_style": True,
"--use_gpu_relax": FLAGS.use_gpu_relax,
"--protein_delimiter": FLAGS.protein_delimiter,
Expand Down Expand Up @@ -139,6 +139,7 @@ def main(argv):
command = base_command.copy()
for arg, value in command_args.items():
command.extend([str(arg), str(value)])
logging.info(f"command: {command}")
subprocess.run(" ".join(command), check=True, shell=True)


Expand Down
94 changes: 55 additions & 39 deletions test/test_post_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,8 @@
class TestPostPrediction(parameterized.TestCase):
def setUp(self) -> None:
super().setUp()
# Get path of the alphapulldown module
parent_dir = join(dirname(dirname(abspath(__file__))))
# Join the path with the script name
self.input_dir = join(parent_dir, "test/test_data/predictions")
# Set logging level to INFO
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

@parameterized.parameters(
Expand All @@ -40,71 +37,90 @@ def setUp(self) -> None:
def test_files(self, prediction_dir, compress_pickles, remove_pickles, remove_keys):
temp_dir = tempfile.TemporaryDirectory()
try:
logging.info(f"Running test for prediction_dir='{prediction_dir}', compress_pickles={compress_pickles}, remove_pickles={remove_pickles}, remove_keys={remove_keys}")
logging.info(f"Running test for prediction_dir='{prediction_dir}', "
f"compress_pickles={compress_pickles}, remove_pickles={remove_pickles}, remove_keys={remove_keys}")
temp_dir_path = temp_dir.name
# Copy the files to the temporary directory
shutil.copytree(join(self.input_dir, prediction_dir), join(temp_dir_path, prediction_dir))

# Remove existing gz files
gz_files = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.gz')]
for f in gz_files:
os.remove(join(temp_dir_path, prediction_dir, f))
# Run the postprocessing function
post_prediction_process(join(temp_dir_path, prediction_dir), compress_pickles, remove_pickles, remove_keys)
gz_files_existing = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.gz')]
for f_ in gz_files_existing:
os.remove(join(temp_dir_path, prediction_dir, f_))

# Get the best model from ranking_debug.json
# Run the postprocessing
post_prediction_process(join(temp_dir_path, prediction_dir),
compress_pickles,
remove_pickles,
remove_keys)

# Identify the best model
with open(join(temp_dir_path, prediction_dir, 'ranking_debug.json')) as f:
best_model = json.load(f)['order'][0]

# Define the expected best result pickle path
best_result_pickle = join(temp_dir_path, prediction_dir, f"result_{best_model}.pkl")

# Check if files are removed and/or compressed based on the parameters
# Gather .pkl and .gz files
pickle_files = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.pkl')]
gz_files = [f for f in os.listdir(join(temp_dir_path, prediction_dir)) if f.endswith('.gz')]

# Check if specified keys exist or were removed
if remove_keys:
# Ensure specified keys are removed from the pickle files
for pickle_file in pickle_files:
with open(join(temp_dir_path, prediction_dir, pickle_file), 'rb') as f:
for pf in pickle_files:
with open(join(temp_dir_path, prediction_dir, pf), 'rb') as f:
data = pickle.load(f)
for key in ['aligned_confidence_probs', 'distogram', 'masked_msa']:
self.assertNotIn(key, data, f"Key {key} was not removed from {pickle_file}")
self.assertNotIn(key, data, f"Key '{key}' was not removed from {pf}")
else:
# If we're not removing keys, verify they still exist in the pickle
for pf in pickle_files:
with open(join(temp_dir_path, prediction_dir, pf), 'rb') as f:
data = pickle.load(f)
for key in ['aligned_confidence_probs', 'distogram', 'masked_msa']:
self.assertIn(key, data, f"Key '{key}' was unexpectedly removed from {pf}")

# Now check file counts / compressions
if not compress_pickles and not remove_pickles:
# All pickle files should be present, no gz files
logging.info("Checking condition: not compress_pickles and not remove_pickles")
self.assertEqual(len(pickle_files), 5, f"Expected 5 pickle files, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 0, f"Expected 0 gz files, found {len(gz_files)}.")
# Expect all .pkl files (5 in your scenario), no .gz
self.assertEqual(len(pickle_files), 5,
f"Expected 5 pickle files, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 0,
f"Expected 0 gz files, found {len(gz_files)}.")

if compress_pickles and not remove_pickles:
# No pickle files should be present, each compressed separately
logging.info("Checking condition: compress_pickles and not remove_pickles")
self.assertEqual(len(pickle_files), 0, f"Expected 0 pickle files, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 5, f"Expected 5 gz files, found {len(gz_files)}.")
# Expect 0 .pkl files, all compressed (5)
self.assertEqual(len(pickle_files), 0,
f"Expected 0 pickle files, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 5,
f"Expected 5 gz files, found {len(gz_files)}.")
# Validate that gz files are readable
for gz_file in gz_files:
with gzip.open(join(temp_dir_path, prediction_dir, gz_file), 'rb') as f:
f.read(1) # Ensure it's a valid gzip file
f.read(1)

if not compress_pickles and remove_pickles:
# Only the best result pickle should be present
logging.info("Checking condition: not compress_pickles and remove_pickles")
self.assertEqual(len(pickle_files), 1, f"Expected 1 pickle file, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 0, f"Expected 0 gz files, found {len(gz_files)}.")
self.assertTrue(os.path.exists(best_result_pickle), f"Best result pickle file does not exist: {best_result_pickle}")
# Only the best pickle remains
self.assertEqual(len(pickle_files), 1,
f"Expected 1 pickle file, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 0,
f"Expected 0 gz files, found {len(gz_files)}.")
self.assertTrue(os.path.exists(best_result_pickle),
f"Best result pickle file does not exist: {best_result_pickle}")

if compress_pickles and remove_pickles:
# Only the best result pickle should be compressed, no pickle files present
logging.info("Checking condition: compress_pickles and remove_pickles")
self.assertEqual(len(pickle_files), 0, f"Expected 0 pickle files, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 1, f"Expected 1 gz file, found {len(gz_files)}.")
self.assertTrue(os.path.exists(best_result_pickle + ".gz"), f"Best result pickle file not compressed: {best_result_pickle}.gz")
# Only the best pickle is compressed
self.assertEqual(len(pickle_files), 0,
f"Expected 0 pickle files, found {len(pickle_files)}.")
self.assertEqual(len(gz_files), 1,
f"Expected 1 gz file, found {len(gz_files)}.")
self.assertTrue(os.path.exists(best_result_pickle + ".gz"),
f"Best result pickle file not compressed: {best_result_pickle}.gz")
with gzip.open(join(temp_dir_path, prediction_dir, gz_files[0]), 'rb') as f:
f.read(1) # Ensure it's a valid gzip file
f.read(1) # Check it's valid gzip

except AssertionError as e:
logging.error(f"AssertionError: {e}")
all_files = os.listdir(join(temp_dir_path, prediction_dir))
relevant_files = [f for f in all_files if f.endswith('.gz') or f.endswith('.pkl')]
logging.error(f".gz and .pkl files in {join(temp_dir_path, prediction_dir)}: {relevant_files}")
raise # Re-raise the exception to ensure the test is marked as failed
raise
finally:
temp_dir.cleanup()

0 comments on commit bea2d79

Please sign in to comment.