|
1 | | -""" |
2 | | - This script is the benchmark, it supports the phylo_grad gradients and pytorch gradients. |
3 | | -""" |
4 | | - |
5 | 1 | import sys |
6 | 2 |
|
7 | 3 | import torch |
8 | 4 |
|
9 | 5 | import input |
10 | 6 | import phylo_grad |
11 | | -import felsenstein |
12 | 7 | import numpy as np |
13 | 8 | import argparse |
14 | 9 |
|
|
27 | 22 | backend = parser.add_argument_group('Backend') |
28 | 23 | exclusive_group = backend.add_mutually_exclusive_group(required=True) |
29 | 24 | exclusive_group.add_argument('--rust', action='store_true') |
30 | | -exclusive_group.add_argument('--pytorch', action='store_true') |
31 | | -exclusive_group.add_argument('--pytorch_gpu', action='store_true') |
32 | 25 |
|
33 | 26 | fp_precision = parser.add_argument_group('fp precision') |
34 | 27 | exclusive_group = fp_precision.add_mutually_exclusive_group(required=True) |
|
45 | 38 | np_dtype = np.float32 |
46 | 39 |
|
47 | 40 | if args.fasta_amino is not None: |
48 | | - tree, L = input.read_newick_fasta(args.newick, args.fasta_amino) |
| 41 | + alignment = input.read_fasta_numeric(args.fasta_amino) |
49 | 42 | # Counts amino acids |
50 | 43 | counts = torch.zeros(21, dtype=torch.int64) |
51 | | - for _, _, seq in tree: |
52 | | - if seq is not None: |
53 | | - numeric = [input.amino_mapping[c] for c in seq] |
54 | | - for i in numeric: |
55 | | - counts[i] += 1 |
| 44 | + for seq in alignment.values(): |
| 45 | + L = len(seq) |
| 46 | + for i in seq: |
| 47 | + counts[i] += 1 |
56 | 48 |
|
57 | 49 | initial_energies = torch.log(counts[:-1]) |
58 | | - |
59 | | - if args.pytorch_gpu: |
60 | | - tree = [(par, dist, input.amino_to_embedding(seq).cuda() if seq else None) for par, dist, seq in tree] |
61 | | - else: |
62 | | - tree = [(par, dist, input.amino_to_embedding(seq)) for par, dist, seq in tree] |
63 | | - |
64 | 50 |
|
65 | 51 |
|
66 | 52 |
|
67 | 53 | #Init random parameters |
68 | 54 | torch.manual_seed(0) |
69 | 55 |
|
70 | | -if args.pytorch_gpu: |
71 | | - shared = torch.zeros(190, requires_grad=True, dtype=dtype, device="cuda") |
72 | | - energies = torch.tensor(initial_energies, requires_grad=True, dtype=dtype, device="cuda") |
73 | | -else: |
74 | | - shared = torch.zeros(190, requires_grad=True, dtype=dtype) |
75 | | - energies = torch.tensor(initial_energies, requires_grad=True, dtype=dtype) |
| 56 | +shared = torch.zeros(190, requires_grad=True, dtype=dtype) |
| 57 | +energies = initial_energies.clone().to( dtype=dtype).requires_grad_(True) |
76 | 58 |
|
77 | 59 | if args.rust: |
78 | | - leaf_log_p = torch.stack([seq for _,_, seq in tree if seq is not None]).transpose(1,0) |
79 | | - tree = np.array([(par, dist) for par, dist, _ in tree], dtype=np_dtype) |
80 | | - tree = phylo_grad.FelsensteinTree(tree, leaf_log_p.type(dtype).numpy(), 1e-4) |
81 | | - |
82 | | -else: |
83 | | - tree = felsenstein.FelsensteinTree(tree) |
| 60 | + leaf_log_p = input.read_fasta(args.fasta_amino) |
| 61 | + tree = phylo_grad.FelsensteinTree.from_newick(args.newick, leaf_log_p, np_dtype, 1e-4, gpu = False) |
84 | 62 |
|
85 | 63 | optimizer = torch.optim.Adam([shared, energies], lr=0.1) |
86 | 64 |
|
@@ -133,7 +111,7 @@ def rate_matrix(shared, energies, L): |
133 | 111 |
|
134 | 112 | S, sqrt_pi = rate_matrix(shared, energies, L) |
135 | 113 |
|
136 | | -np.savez(args.output, S=S.detach().cpu().numpy()[0], sqrt_pi=sqrt_pi.detach().cpu().numpy()[0], shared=shared.detach().cpu().numpy(), energies=energies.detach().cpu().numpy()) |
| 114 | +np.savez(args.output, S=S.detach().numpy()[0], sqrt_pi=sqrt_pi.detach().numpy()[0], shared=shared.detach().numpy(), energies=energies.detach().numpy()) |
137 | 115 |
|
138 | 116 | # Print peak memory usage |
139 | 117 | import resource |
|
0 commit comments