-
Notifications
You must be signed in to change notification settings - Fork 772
{bio}[foss/2021b] AlphaFold v2.3.0 w/ Python 3.9.6 + CUDA 11.4.1 #16874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
boegel
merged 3 commits into
easybuilders:develop
from
ThomasHoffmann77:20221214122611_new_pr_AlphaFold230
Mar 1, 2023
Merged
Changes from 2 commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
72f9943
adding easyconfigs: AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb and pat…
ThomasHoffmann77 181af99
Merge branch 'develop' of https://github.com/easybuilders/easybuild-e…
boegel 8600fbb
delete run_alphafold_{features,resume}.py
ThomasHoffmann77 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
168 changes: 168 additions & 0 deletions
168
easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| easyblock = 'PythonBundle' | ||
|
|
||
| name = 'AlphaFold' | ||
| version = '2.3.0' | ||
| versionsuffix = '-CUDA-%(cudaver)s' | ||
|
|
||
| homepage = 'https://deepmind.com/research/case-studies/alphafold' | ||
| description = "AlphaFold can predict protein structures with atomic accuracy even where no similar structure is known" | ||
|
|
||
| toolchain = {'name': 'foss', 'version': '2021b'} | ||
|
|
||
| builddependencies = [ | ||
| # required for installing dm-tree | ||
| ('Bazel', '3.7.2'), | ||
| ('CMake', '3.22.1'), | ||
| ] | ||
|
|
||
| dependencies = [ | ||
| ('Python', '3.9.6'), | ||
| ('CUDA', '11.4.1', '', SYSTEM), | ||
| ('SciPy-bundle', '2021.10'), | ||
| ('PyYAML', '5.4.1'), | ||
| ('TensorFlow', '2.7.1', versionsuffix), | ||
| ('Biopython', '1.79'), | ||
| ('HH-suite', '3.3.0'), | ||
| ('HMMER', '3.3.2'), | ||
| ('Kalign', '3.3.2'), | ||
| ('jax', '0.3.23', versionsuffix), # also provides absl-py | ||
| ('UCX-CUDA', '1.11.2', versionsuffix), | ||
| ('cuDNN', '8.2.2.26', versionsuffix, SYSTEM), | ||
| ('NCCL', '2.10.3', versionsuffix), | ||
| ('OpenMM', '7.5.1', '-DeepMind-patch'), | ||
| ] | ||
|
|
||
| # commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common, | ||
| # see docker/Dockerfile in AlphaFold repository | ||
| local_scp_commit = '7102c6' | ||
|
|
||
| components = [ | ||
| (name, version, { | ||
| 'easyblock': 'PythonPackage', | ||
| 'source_urls': [ | ||
| 'https://github.com/deepmind/alphafold/archive/refs/tags/', | ||
| 'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit, | ||
| ], | ||
| 'sources': [ | ||
| { | ||
| 'download_filename': 'v%(version)s.tar.gz', | ||
| 'filename': SOURCE_TAR_GZ, | ||
| }, | ||
| { | ||
| 'download_filename': 'stereo_chemical_props.txt', | ||
| 'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit, | ||
| 'extract_cmd': "cp %s .", | ||
| }, | ||
| ], | ||
| 'patches': [ | ||
| 'AlphaFold-2.0.0_fix-packages.patch', | ||
| 'AlphaFold-2.3.0_data-dep-paths.patch', | ||
| 'AlphaFold-2.0.0_n-cpu.patch', | ||
| 'AlphaFold-2.1.0_fix-scp-path.patch', | ||
| 'AlphaFold-2.0.1_setup_rm_tfcpu.patch', | ||
| ], | ||
| 'checksums': [ | ||
| '52055a0b4bf194ae0e1960e6391e501490f82274c975e01c1ff0e353a1cd59d9', # v2.3.0.tar.gz | ||
| '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt | ||
| '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch | ||
| '5cff3fc7104e020ef546d23cb4fb1b8d6517562783f055cc55fc65fe2b0248d0', # AlphaFold-2.3.0_data-dep-paths.patch | ||
| 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch | ||
| '5363d403baf5ab73f4d3ddd72e19af9ff832de4b1d7ba25a5fbcc5846c1c890f', # AlphaFold-2.1.0_fix-scp-path.patch | ||
| '1a2e4e843bd9a4d15ee39e6c37cc63ba281311cc7a0a5610f0e43b52ef93faac', # AlphaFold-2.0.1_setup_rm_tfcpu.patch | ||
|
|
||
| ], | ||
| 'start_dir': 'alphafold-%(version)s', | ||
| 'use_pip': True, | ||
| }), | ||
| ] | ||
|
|
||
| use_pip = True | ||
|
|
||
| exts_list = [ | ||
| ('PDBFixer', '1.7', { | ||
| 'source_urls': ['https://github.com/openmm/pdbfixer/archive/refs/tags/'], | ||
| 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': '%(name)s-%(version)s.tar.gz'}], | ||
| 'checksums': ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea'], | ||
| }), | ||
| ('toolz', '0.11.2', { | ||
| 'checksums': ['6b312d5e15138552f1bda8a4e66c30e236c831b612b2bf0005f8a1df10a4bc33'], | ||
| }), | ||
| ('chex', '0.1.5', { | ||
| 'checksums': ['686858320f8f220c82a6c7eeb54dcdcaa4f3d7f66690dacd13a24baa1ee8299e'], | ||
| }), | ||
| ('tabulate', '0.8.10', { | ||
| 'checksums': ['6c57f3f3dd7ac2782770155f3adb2db0b1a269637e42f27599925e64b114f519'], | ||
| }), | ||
| ('jmp', '0.0.2', { | ||
| 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", | ||
| 'checksums': ['fdb5cec0d10aab4116c2770f24b2adf4f503fcfbb96ce8ef583e1879bdbf1b9b'], | ||
| }), | ||
| ('dm-haiku', '0.0.9', { | ||
| 'modulename': 'haiku', | ||
| 'source_urls': ['https://github.com/deepmind/dm-haiku/archive/refs/tags/'], | ||
| 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': '%(name)s-%(version)s.tar.gz'}], | ||
| 'checksums': ['d550f07f5891ede30ada5faafde98f549ed1b8ceadb7a601cca3d81db7d82414'], | ||
| }), | ||
| ('dm-tree', '0.1.7', { | ||
| 'modulename': 'tree', | ||
| 'checksums': ['30fec8aca5b92823c0e796a2f33b875b4dccd470b57e91e6c542405c5f77fd2a'], | ||
| }), | ||
| ('websocket-client', '1.3.3', { | ||
| 'modulename': 'websocket', | ||
| 'checksums': ['d58c5f284d6a9bf8379dab423259fe8f85b70d5fa5d2916d5791a84594b122b1'], | ||
| }), | ||
| ('docker', '5.0.3', { | ||
| 'checksums': ['d916a26b62970e7c2f554110ed6af04c7ccff8e9f81ad17d0d40c75637e227fb'], | ||
| }), | ||
| ('immutabledict', '2.2.1', { | ||
| 'checksums': ['1ddb0edf1bb6c70d0197eb90ce1fe2b2d58502334f5fdfde72d7c633d723ec3a'], | ||
| }), | ||
| ('contextlib2', '21.6.0', { | ||
| 'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'], | ||
| }), | ||
| ('ml_collections', '0.1.1', { | ||
| 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", | ||
| 'checksums': ['3fefcc72ec433aa1e5d32307a3e474bbb67f405be814ea52a2166bfc9dbe68cc'], | ||
| }), | ||
| ] | ||
|
|
||
| postinstallcmds = [ | ||
| "mkdir -p %(installdir)s/bin", | ||
| # run_alphafold.py script is missing a shebang... | ||
| "echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py", | ||
| "cat %(builddir)s/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py", | ||
| "cp %(builddir)s/alphafold-%(version)s/run_alphafold_{features,resume}.py %(installdir)s/bin", | ||
| "chmod a+x %(installdir)s/bin/run_alphafold*.py", | ||
| "cd %(installdir)s/bin && ln -s run_alphafold.py alphafold", | ||
| "cp -a %(builddir)s/alphafold-%(version)s/scripts %(installdir)s/", | ||
| "cp %%(builddir)s/stereo_chemical_props-%s.txt %%(installdir)s/stereo_chemical_props.txt" % local_scp_commit, | ||
| # run tests for run_alphafold.py script; | ||
| # shouldn't do this in sanity check to avoid breaking use of --module-only | ||
| "PYTHONPATH=%(installdir)s/lib/python%(pyshortver)s/site-packages:$PYTHONPATH " | ||
| "python %(builddir)s/alphafold-%(version)s/run_alphafold_test.py", | ||
| ] | ||
|
|
||
| sanity_check_paths = { | ||
| 'files': ['bin/alphafold', 'bin/pdbfixer', 'bin/run_alphafold.py', 'stereo_chemical_props.txt'], | ||
| 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], | ||
| } | ||
|
|
||
| sanity_check_commands = [ | ||
| "pdbfixer --help", | ||
| "python -m simtk.testInstallation", | ||
| "python -c 'import alphafold'", | ||
| "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", | ||
| ] | ||
|
|
||
| sanity_pip_check = True | ||
|
|
||
| # these allow to make predictions on proteins that would typically be too long to fit into GPU memory; | ||
| # see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py | ||
| modextravars = { | ||
| 'TF_FORCE_UNIFIED_MEMORY': '1', | ||
| 'XLA_PYTHON_CLIENT_MEM_FRACTION': '3', | ||
| # 'ALPHAFOLD_DATA_DIR': '/path/to/AlphaFold_DBs', # please adapt | ||
| 'OPENMM_RELAX': 'CUDA' # unset or set to 'CPU' in order not to run the energy minimization on GPU; PR#189 | ||
| } | ||
|
|
||
| moduleclass = 'bio' | ||
136 changes: 136 additions & 0 deletions
136
easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0_data-dep-paths.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data | ||
| (see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py); | ||
| pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild | ||
| author: Kenneth Hoste (HPC-UGent) | ||
| update 2.0.1 -> 2.1.0/2.1.2/2.3.0: Thomas Hoffmann (EMBL); | ||
| diff -ru alphafold-2.3.0/run_alphafold.py alphafold-2.3.0_data-dep-paths/run_alphafold.py | ||
| --- alphafold-2.3.0/run_alphafold.py 2022-12-11 20:36:44.000000000 +0100 | ||
| +++ alphafold-2.3.0_data-dep-paths/run_alphafold.py 2022-12-13 17:36:37.258678676 +0100 | ||
| @@ -40,6 +40,46 @@ | ||
| import numpy as np | ||
|
|
||
| # Internal import (7716). | ||
| +use_reduced_dbs = any("--db_preset=reduced_dbs" in s for s in sys.argv[1:]) | ||
| +use_monomer_preset = not any("--model_preset=multimer" in s for s in sys.argv[1:]) | ||
| + | ||
| +data_dir = os.getenv('ALPHAFOLD_DATA_DIR') | ||
| +use_gpu_relax = os.getenv('OPENMM_RELAX')=='CUDA' | ||
| + | ||
| +if data_dir: | ||
| + mgnify_database_path = os.path.join(data_dir, 'mgnify', 'mgy_clusters_2022_05.fa') | ||
| + uniref90_database_path = os.path.join(data_dir, 'uniref90', 'uniref90.fasta') | ||
| + template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files') | ||
| + obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') | ||
| + if use_monomer_preset: | ||
| + pdb_seqres_database_path = None | ||
| + uniprot_database_path = None | ||
| + pdb70_database_path = os.path.join(data_dir, 'pdb70', 'pdb70') | ||
| + else: | ||
| + pdb_seqres_database_path = os.path.join(data_dir, 'pdb_seqres', 'pdb_seqres.txt') | ||
| + uniprot_database_path = os.path.join(data_dir, 'uniprot', 'uniprot.fasta') | ||
| + pdb70_database_path = None | ||
| + if use_reduced_dbs: | ||
| + small_bfd_database_path = os.path.join(data_dir, 'small_bfd','bfd-first_non_consensus_sequences.fasta') | ||
| + uniref30_database_path = None | ||
| + bfd_database_path = None | ||
| + else: | ||
| + small_bfd_database_path = None | ||
| + bfd_database_path = os.path.join(data_dir, 'bfd', 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt') | ||
| + uniref30_database_path = os.path.join(data_dir, 'uniref30', 'UniRef30_2021_03') | ||
| +else: | ||
| + sys.stderr.write("$ALPHAFOLD_DATA_DIR is not defined!") | ||
| + uniref90_database_path = None | ||
| + mgnify_database_path = None | ||
| + bfd_database_path = None | ||
| + uniref30_database_path = None | ||
| + pdb70_database_path = None | ||
| + template_mmcif_dir = None | ||
| + obsolete_pdbs_path = None | ||
| + small_bfd_database_path = None | ||
| + uniprot_database_path = None | ||
| + pdb_seqres_database_path = None | ||
| + use_gpu_relax = None | ||
|
|
||
| logging.set_verbosity(logging.INFO) | ||
|
|
||
| @@ -50,7 +90,7 @@ | ||
| 'separated by commas. All FASTA paths must have a unique basename as the ' | ||
| 'basename is used to name the output directories for each prediction.') | ||
|
|
||
| -flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.') | ||
| +flags.DEFINE_string('data_dir', data_dir, 'Path to directory of supporting data.') | ||
| flags.DEFINE_string('output_dir', None, 'Path to a directory that will ' | ||
| 'store the results.') | ||
| flags.DEFINE_string('jackhmmer_binary_path', shutil.which('jackhmmer'), | ||
| @@ -65,27 +105,27 @@ | ||
| 'Path to the hmmbuild executable.') | ||
| flags.DEFINE_string('kalign_binary_path', shutil.which('kalign'), | ||
| 'Path to the Kalign executable.') | ||
| -flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 ' | ||
| +flags.DEFINE_string('uniref90_database_path', uniref90_database_path, 'Path to the Uniref90 ' | ||
| 'database for use by JackHMMER.') | ||
| -flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify ' | ||
| +flags.DEFINE_string('mgnify_database_path', mgnify_database_path, 'Path to the MGnify ' | ||
| 'database for use by JackHMMER.') | ||
| -flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD ' | ||
| +flags.DEFINE_string('bfd_database_path', bfd_database_path, 'Path to the BFD ' | ||
| 'database for use by HHblits.') | ||
| -flags.DEFINE_string('small_bfd_database_path', None, 'Path to the small ' | ||
| +flags.DEFINE_string('small_bfd_database_path', small_bfd_database_path, 'Path to the small ' | ||
| 'version of BFD used with the "reduced_dbs" preset.') | ||
| -flags.DEFINE_string('uniref30_database_path', None, 'Path to the UniRef30 ' | ||
| +flags.DEFINE_string('uniref30_database_path', uniref30_database_path, 'Path to the UniRef30 ' | ||
| 'database for use by HHblits.') | ||
| -flags.DEFINE_string('uniprot_database_path', None, 'Path to the Uniprot ' | ||
| +flags.DEFINE_string('uniprot_database_path', uniprot_database_path, 'Path to the Uniprot ' | ||
| 'database for use by JackHMMer.') | ||
| -flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 ' | ||
| +flags.DEFINE_string('pdb70_database_path', pdb70_database_path, 'Path to the PDB70 ' | ||
| 'database for use by HHsearch.') | ||
| -flags.DEFINE_string('pdb_seqres_database_path', None, 'Path to the PDB ' | ||
| +flags.DEFINE_string('pdb_seqres_database_path', pdb_seqres_database_path, 'Path to the PDB ' | ||
| 'seqres database for use by hmmsearch.') | ||
| -flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with ' | ||
| +flags.DEFINE_string('template_mmcif_dir', template_mmcif_dir, 'Path to a directory with ' | ||
| 'template mmCIF structures, each named <pdb_id>.cif') | ||
| flags.DEFINE_string('max_template_date', None, 'Maximum template release date ' | ||
| 'to consider. Important if folding historical test sets.') | ||
| -flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a ' | ||
| +flags.DEFINE_string('obsolete_pdbs_path', obsolete_pdbs_path, 'Path to file containing a ' | ||
| 'mapping from obsolete PDB IDs to the PDB IDs of their ' | ||
| 'replacements.') | ||
| flags.DEFINE_enum('db_preset', 'full_dbs', | ||
| @@ -124,7 +164,7 @@ | ||
| 'result in predictions with distracting stereochemical ' | ||
| 'violations but might help in case you are having issues ' | ||
| 'with the relaxation stage.') | ||
| -flags.DEFINE_boolean('use_gpu_relax', None, 'Whether to relax on GPU. ' | ||
| +flags.DEFINE_boolean('use_gpu_relax', use_gpu_relax, 'Whether to relax on GPU. ' | ||
| 'Relax on GPU can be much faster than CPU, so it is ' | ||
| 'recommended to enable if possible. GPUs must be available' | ||
| ' if this setting is enabled.') | ||
| @@ -296,6 +336,10 @@ | ||
| 'sure it is installed on your system.') | ||
|
|
||
| use_small_bfd = FLAGS.db_preset == 'reduced_dbs' | ||
| + if use_small_bfd and data_dir: | ||
| + bfd_database_path = None | ||
| + uniref30_database_path = None | ||
| + | ||
| _check_flag('small_bfd_database_path', 'db_preset', | ||
| should_be_set=use_small_bfd) | ||
| _check_flag('bfd_database_path', 'db_preset', | ||
| @@ -420,13 +464,7 @@ | ||
| flags.mark_flags_as_required([ | ||
| 'fasta_paths', | ||
| 'output_dir', | ||
| - 'data_dir', | ||
| - 'uniref90_database_path', | ||
| - 'mgnify_database_path', | ||
| - 'template_mmcif_dir', | ||
| 'max_template_date', | ||
| - 'obsolete_pdbs_path', | ||
| - 'use_gpu_relax', | ||
| ]) | ||
|
|
||
| app.run(main) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.