Skip to content

Commit a2bcb9d

Browse files
committed
Merge branch 'release/5.1.0'
2 parents d0bb91b + 4aaf081 commit a2bcb9d

18 files changed

Lines changed: 327 additions & 677 deletions

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@
99
/docs.tar.gz
1010
/setup.sh
1111
/prerelease.sh
12+
/blib

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ examples/cgp_gnos_pull.ini
1515
/c/c_tests/tests_log
1616
/bin/diff_bams
1717
/bin/reheadSQ
18+
/bin/mismatchQc
19+
/bin/mmFlagModifier
1820
/c/c_tests/01_bam_stats_output_tests
1921
/c/c_tests/02_bam_access_tests
2022
/c/c_tests/03_bam_stats_calcs_tests

CHANGES.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# CHANGES
22

3+
## 5.1.0
4+
5+
* Base image updated to Focal (Ubuntu 20.04).
6+
* Majority of biobambam2 replaced with samtools functions.
7+
* Reads undergo full collate when mapping from BAM/CRAM (bwa-mem2 prep).
8+
* Duplicate marking `samtools markdup --mode` options exposed to `bwa_mem.pl`.
9+
* Lanes mapped with earlier versions of PCAP-core cannot be merged without reporocessing to add "mate score tag" via `samtools fixmate`.
10+
* Scramble option for `bwa_mem.pl` deprecated, relevant option for fast CRAM random access exposed.
11+
312
## 5.0.5
413

514
* Add `noindex` commandline flag to `merge_or_mark.pl` for bammerge calls. Only permitted alongisde `qnamesort`

Dockerfile

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,42 @@
1-
FROM quay.io/wtsicgp/cgpbigwig:1.1.0 as builder
1+
FROM quay.io/wtsicgp/cgpbigwig:1.3.0 as builder
22

33
USER root
44

5-
ARG BBB2_URL="https://gitlab.com/german.tischler/biobambam2/uploads/178774a8ece96d2201fcd0b5249884c7/biobambam2-2.0.146-release-20191030105216-x86_64-linux-gnu.tar.xz"
5+
# ALL tool versions used by opt-build.sh
6+
# need to keep in sync with setup.sh
7+
8+
# newer gitlab versions do not work
9+
ARG BBB2_URL="https://github.com/gt1/biobambam2/releases/download/2.0.87-release-20180301132713/biobambam2-2.0.87-release-20180301132713-x86_64-etch-linux-gnu.tar.gz"
610
ARG BWAMEM2_URL="https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.0pre2/bwa-mem2-2.0pre2_x64-linux.tar.bz2"
711
ARG STADEN="https://iweb.dl.sourceforge.net/project/staden/staden/2.0.0b11/staden-2.0.0b11-2016-linux-x86_64.tar.gz"
812
ARG VER_BIODBHTS="3.01"
913
ARG VER_BWA="v0.7.17"
10-
ARG VER_HTSLIB="1.9"
11-
ARG VER_SAMTOOLS="1.9"
14+
ARG VER_HTSLIB="1.10.2"
15+
ARG VER_SAMTOOLS="1.10"
1216

1317
RUN apt-get -yq update
14-
RUN apt-get install -yq --no-install-recommends\
15-
apt-transport-https\
16-
locales\
17-
curl\
18-
ca-certificates\
19-
libperlio-gzip-perl\
20-
make\
21-
bzip2\
22-
gcc\
23-
psmisc\
24-
time\
25-
zlib1g-dev\
26-
libbz2-dev\
27-
liblzma-dev\
28-
libcurl4-gnutls-dev\
29-
libncurses5-dev\
30-
nettle-dev\
31-
libp11-kit-dev\
32-
libtasn1-dev\
33-
libgnutls-dev\
34-
libgd-dev\
35-
libdb-dev
18+
RUN apt-get install -yq --no-install-recommends apt-transport-https
19+
RUN apt-get install -yq --no-install-recommends locales
20+
RUN apt-get install -yq --no-install-recommends curl
21+
RUN apt-get install -yq --no-install-recommends ca-certificates
22+
RUN apt-get install -yq --no-install-recommends libperlio-gzip-perl
23+
RUN apt-get install -yq --no-install-recommends make
24+
RUN apt-get install -yq --no-install-recommends bzip2
25+
RUN apt-get install -yq --no-install-recommends gcc
26+
RUN apt-get install -yq --no-install-recommends psmisc
27+
RUN apt-get install -yq --no-install-recommends time
28+
RUN apt-get install -yq --no-install-recommends zlib1g-dev
29+
RUN apt-get install -yq --no-install-recommends libbz2-dev
30+
RUN apt-get install -yq --no-install-recommends liblzma-dev
31+
RUN apt-get install -yq --no-install-recommends libcurl4-gnutls-dev
32+
RUN apt-get install -yq --no-install-recommends libncurses5-dev
33+
RUN apt-get install -yq --no-install-recommends nettle-dev
34+
RUN apt-get install -yq --no-install-recommends libp11-kit-dev
35+
RUN apt-get install -yq --no-install-recommends libtasn1-dev
36+
RUN apt-get install -yq --no-install-recommends libdb-dev
37+
RUN apt-get install -yq --no-install-recommends libgnutls28-dev
38+
RUN apt-get install -yq --no-install-recommends xz-utils
39+
RUN apt-get install -yq --no-install-recommends libexpat1-dev
3640

3741
RUN locale-gen en_US.UTF-8
3842
RUN update-locale LANG=en_US.UTF-8
@@ -54,11 +58,11 @@ RUN bash build/opt-build.sh $OPT
5458
COPY . .
5559
RUN bash build/opt-build-local.sh $OPT
5660

57-
FROM ubuntu:16.04
61+
FROM ubuntu:20.04
5862

5963
LABEL maintainer="cgphelp@sanger.ac.uk"\
6064
uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Sanger Institute" \
61-
version="5.0.5" \
65+
version="5.1.0" \
6266
description="pcap-core"
6367

6468
ENV OPT /opt/wtsi-cgp
@@ -67,6 +71,7 @@ ENV PATH $OPT/bin:$PATH
6771
ENV PERL5LIB $OPT/lib/perl5
6872
ENV LD_LIBRARY_PATH $OPT/lib:$OPT/scramble/lib
6973
ENV LC_ALL C
74+
ENV GPERF_FOR_BWA /usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4
7075

7176
RUN apt-get -yq update
7277
RUN apt-get install -yq --no-install-recommends \
@@ -82,8 +87,10 @@ zlib1g \
8287
liblzma5 \
8388
libncurses5 \
8489
p11-kit \
85-
libcurl3 \
90+
libcurl3-gnutls \
91+
libcurl4 \
8692
moreutils \
93+
google-perftools \
8794
unattended-upgrades && \
8895
unattended-upgrade -d -v && \
8996
apt-get remove -yq unattended-upgrades && \

README.md

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,12 @@ Available programs are described in the [wiki][wiki].
2929

3030
## Docker, Singularity and Dockstore
3131

32-
There are docker and dockstore.org wrappers for this project at [dockstore-cgpmap][dockstore-cgpmap].
32+
There are dockstore.org CWL and wrappers for this project at [dockstore-cgpmap][dockstore-cgpmap].
3333

34-
The docker image is held on [quay.io][quay-io-cgpmap].
35-
36-
The CWL bindings of `dockstore-cgpmap` specifically target execution of the BWA mem mapping flow,
37-
however all tools are contained in the image and can be used if you construct the relevant docker
38-
commands.
34+
The docker image is held on [quay.io][quay-io-pcap-core].
3935

4036
The docker image is know to work correctly after import into a singularity image.
4137

42-
See the [dockstore-cgpmap][dockstore-cgpmap] documentation for more detail.
43-
4438
## Dependencies/Install
4539

4640
Please be aware that this expects basic C compilation libraries and tools to be available, most are listed in `INSTALL`.
@@ -69,13 +63,10 @@ Please see the respective licence for each before use.
6963
### Cutting the release
7064

7165
1. Update `lib/PCAP.pm` to the correct version.
72-
2. Ensure upgrade path for new version number is added to `lib/PCAP.pm`.
66+
2. Update `Dockerfile` to the correct version.
7367
3. Update `CHANGES.md` to show major items.
74-
4. Run `./prerelease.sh`
75-
5. Check all tests and coverage reports are acceptable.
76-
6. Commit the updated docs tree and updated module/version.
77-
7. Push commits.
78-
8. Use the GitHub tools to draft a release.
68+
4. Push commits and verify with Sanger internal CI.
69+
5. Use the GitHub tools to draft a release.
7970

8071
<!-- References -->
8172

@@ -87,7 +78,7 @@ Please see the respective licence for each before use.
8778
[cancerit_github]: https://github.com/cancerit
8879
[old_repo]: https://github.com/ICGC-TCGA-PanCancer/PCAP-core
8980
[dockstore-cgpmap]: https://github.com/cancerit/dockstore-cgpmap
90-
[quay-io-cgpmap]: https://quay.io/repository/wtsicgp/dockstore-cgpmap
81+
[quay-io-pcap-core]: https://quay.io/repository/wtsicgp/pcap-core
9182

9283
<!-- Travis -->
9384
[travis-base]: https://travis-ci.org/cancerit/PCAP-core

bin/bwa_mem.pl

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,14 @@
5252
my $options = setup();
5353

5454
my $threads = PCAP::Threaded->new($options->{'threads'});
55-
&PCAP::Threaded::disable_out_err if(exists $options->{'index'});
5655

5756
# register processes
58-
$threads->add_function('split', \&PCAP::Bwa::split_in);
59-
$threads->add_function('bwamem', \&PCAP::Bwa::bwa_mem, exists $options->{'index'} ? 1 : $options->{'map_threads'});
57+
$threads->add_function('split', \&PCAP::Bwa::split_in, split_threads($options));
58+
$threads->add_function('bwamem', \&PCAP::Bwa::bwa_mem, exists $options->{'index'} ? 1 : $options->{'map_threads'});
6059

6160
PCAP::Bwa::mem_setup($options) if(!exists $options->{'process'} || $options->{'process'} eq 'setup');
6261

63-
$threads->run($options->{'max_split'}, 'split', $options) if(!exists $options->{'process'} || $options->{'process'} eq 'split');
62+
$threads->run($options->{'max_split'}, 'split', $options) if(!exists $options->{'process'} || $options->{'process'} eq 'split');
6463

6564
if(!exists $options->{'process'} || $options->{'process'} eq 'bwamem') {
6665
$options->{'max_index'} = PCAP::Bwa::mem_mapmax($options);
@@ -78,6 +77,24 @@
7877
}
7978
}
8079

80+
sub split_threads {
81+
my $options = shift;
82+
my $div = 1;
83+
my $threads_per_split = 1;
84+
if(exists $options->{index}) {
85+
$div = 1;
86+
$threads_per_split = $options->{threads};
87+
}
88+
elsif($options->{raw_files}->[0] =~ m/(bam|cram)$/) {
89+
my $inputs = scalar @{$options->{raw_files}};
90+
$threads_per_split = int ($options->{threads} / $inputs);
91+
$threads_per_split = 1 if($threads_per_split < 1);
92+
$div = $threads_per_split;
93+
}
94+
$options->{threads_per_split} = $threads_per_split; # so can be used later
95+
return $div; # so can be used as return
96+
}
97+
8198
sub cleanup {
8299
my $options = shift;
83100
my $tmpdir = $options->{'tmp'};
@@ -91,6 +108,8 @@ sub setup {
91108
'mmqcfrac' => 0.05,
92109
'threads' => 1,
93110
'fragment' => 10,
111+
'dupmode' => 't',
112+
'seqslice' => 10000,
94113
'csi' => undef,
95114
);
96115

@@ -116,6 +135,8 @@ sub setup {
116135
'q|mmqc' => \$opts{'mmqc'},
117136
'qf|mmqcfrac:f' => \$opts{'mmqcfrac'},
118137
'bm2|bwamem2' => \$opts{'bwamem2'},
138+
'd|dupmode:s' => \$opts{'dupmode'},
139+
'ss|seqslice:i' => $opts{'seqslice'},
119140
) or pod2usage(2);
120141

121142
pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'});
@@ -145,10 +166,14 @@ sub setup {
145166
die "ERROR: Please generate $opts{dict}, e.g.\n\t\$ samtools dict -a \$ASSEMBLY -s \$SPECIES $opts{reference} > $opts{dict}\n";
146167
}
147168

169+
if(defined $opts{'scramble'}) {
170+
die "ERROR: -scramble option is deprecated, please see -seqslice\n";
171+
}
172+
148173
delete $opts{'process'} unless(defined $opts{'process'});
149174
delete $opts{'index'} unless(defined $opts{'index'});
150175
delete $opts{'bwa'} unless(defined $opts{'bwa'});
151-
delete $opts{'scramble'} unless(defined $opts{'scramble'});
176+
delete $opts{'scramble'};
152177
delete $opts{'bwa_pl'} unless(defined $opts{'bwa_pl'});
153178
delete $opts{'mmqc'} unless(defined $opts{'mmqc'});
154179
delete $opts{'csi'} unless(defined $opts{'csi'});
@@ -220,11 +245,12 @@ =head1 SYNOPSIS
220245
Optional parameters:
221246
-bwamem2 -bm2 Use bwa-mem2 instead of bwa.
222247
-fragment -f Split input into fragments of X million repairs [10]
248+
- only applies to fastq[.gz] input
223249
-nomarkdup -n Don't mark duplicates [flag]
224250
-csi Use CSI index instead of BAI for BAM files [flag].
225251
-cram -c Output cram, see '-sc' [flag]
226-
-scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used
227-
- '-I,-O' are used internally and should not be provided
252+
-seqslice -ss seqs_per_slice for CRAM compression [samtools default: 10000]
253+
-scramble -sc DEPRECATED
228254
-bwa -b Single quoted string of additional parameters to pass to BWA
229255
- '-t,-p,-R' are used internally and should not be provided.
230256
- '-v' is set to 1 unless '-bwa' is set.
@@ -234,12 +260,15 @@ =head1 SYNOPSIS
234260
-mmqc -q Mark reads as QCFAIL (0x200, 512) if mismatch rate exceeded [flag]
235261
- Please see 'bwa_mem.pl -m'
236262
-mmqcfrac -qf Mismatch fraction for -mmqc [0.05]
263+
-dupmode -d see "samtools markdup -m" [t]
237264
238265
Targeted processing:
239266
-process -p Only process this step then exit, optionally set -index
267+
setup - checks and configure workspace (-index N/A)
268+
split - split data by readgroup and chunk size (if applicable)
240269
bwamem - only applicable if input is bam
241270
mark - Run duplicate marking (-index N/A)
242-
stats - Generates the *.bas file for the final BAM.
271+
stats - Generates the *.bas file for the final BAM (-index N/A)
243272
244273
-index -i Optionally restrict '-p' to single job
245274
bwamem - 1..<lane_count>
@@ -249,6 +278,7 @@ =head1 SYNOPSIS
249278
https://github.com/gperftools/ (assuming number of cores not exceeded)
250279
If available specify the path to 'gperftools/lib/libtcmalloc_minimal.so'.
251280
- NOT APPLIED TO bwa-mem2
281+
Falls back to environment variable GPERF_FOR_BWA when not set, or nothing.
252282
253283
Other:
254284
-jobs -j For a parallel step report the number of jobs required

bin/merge_or_mark.pl

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
use PCAP::Bwa;
4040
use version;
4141

42-
const my $COORD_SORT_ORDER => 'coordinate';
4342
const my $QUERYNAME_SORT_ORDER => 'queryname';
4443
const my @VALID_PROCESS => qw(setup mark stats);
4544
const my %INDEX_FACTOR => ( 'setup' => 1,
@@ -72,7 +71,8 @@ sub setup {
7271
my %opts = (
7372
'threads' => 1,
7473
'csi' => undef,
75-
'sortorder' => $COORD_SORT_ORDER,
74+
'dupmode' => 't',
75+
'seqslice' => 10000,
7676
);
7777

7878
GetOptions( 'h|help' => \$opts{'h'},
@@ -84,11 +84,13 @@ sub setup {
8484
's|sample=s' => \$opts{'sample'},
8585
'n|nomarkdup' => \$opts{'nomarkdup'},
8686
'p|process=s' => \$opts{'process'},
87-
'q|querynamesort' => \$opts{'qnamesort'},
87+
'q|qnamesort' => \$opts{'qnamesort'},
8888
'i|noindex' => \$opts{'noindex'},
8989
'csi' => \$opts{'csi'},
9090
'c|cram' => \$opts{'cram'},
9191
'sc|scramble=s' => \$opts{'scramble'},
92+
'd|dupmode:s' => \$opts{'dupmode'},
93+
'ss|seqslice:i' => $opts{'seqslice'},
9294
) or pod2usage(2);
9395

9496
pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'});
@@ -113,17 +115,20 @@ sub setup {
113115
die "ERROR: Please generate $opts{dict}, e.g.\n\t\$ samtools dict -a \$ASSEMBLY -s \$SPECIES $opts{reference} > $opts{dict}\n";
114116
}
115117

118+
if(defined $opts{'scramble'}) {
119+
die "ERROR: -scramble option is deprecated, please see -seqslice\n";
120+
}
121+
116122
delete $opts{'process'} unless(defined $opts{'process'});
117123
delete $opts{'index'} unless(defined $opts{'index'});
118-
delete $opts{'scramble'} unless(defined $opts{'scramble'});
124+
delete $opts{'scramble'};
119125
delete $opts{'csi'} unless(defined $opts{'csi'});
120126
if($opts{'qnamesort'} && !$opts{'nomarkdup'}){
121127
die "ERROR: -qnamesort can only be used in conjunction with -nomarkdups\n";
122128
}
123129
if($opts{'noindex'} && !$opts{'qnamesort'}){
124130
die "ERROR: -noindex can only be used in conjunction with -qnamesort\n";
125131
}
126-
$opts{'sortorder'} = $QUERYNAME_SORT_ORDER if($opts{'qnamesort'});
127132

128133
if($opts{'threads'} > 4) {
129134
warn "Setting 'threads' to 4 as higher values are of limited value\n";
@@ -172,18 +177,19 @@ =head1 SYNOPSIS
172177
-nomarkdup -n Don't mark duplicates [flag]
173178
-qnamesort -q Use queryname sorting flag in bammerge rather than coordinate. [flag].
174179
To be used in conjunction with -nomarkdup only
175-
-noindex -i Don't attempt to index the merged file. Only available in conjunction with
180+
-noindex -i Don't attempt to index the merged file. Only available in conjunction with
176181
-qnamesort.
177182
-csi Use CSI index instead of BAI for BAM files [flag].
178183
-cram -c Output cram, see '-sc' [flag]
179-
-scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used
180-
- '-I,-O' are used internally and should not be provided
184+
-seqslice -ss seqs_per_slice for CRAM compression [samtools default: 10000]
185+
-scramble -sc DEPRECATED
186+
-dupmode -d see "samtools markdup -m" [t]
181187
182188
Targeted processing:
183-
-process -p Only process this step then exit, optionally set -index
184-
bwamem - only applicable if input is bam
185-
mark - Run duplicate marking (-index N/A)
186-
stats - Generates the *.bas file for the final BAM.
189+
-process -p Only process this step then exit
190+
setup - only applicable if input is bam
191+
mark - Run duplicate marking
192+
stats - Generates the *.bas file for the final BAM
187193
188194
Other:
189195
-help -h Brief help message.
@@ -261,6 +267,8 @@ =head2 OPTIONAL parameters
261267
262268
=item B<-scramble>
263269
270+
DEPRECATED - see -seqslice
271+
264272
Single quoted string of parameters to pass to Scramble when '-c' used. Please see the Scramble
265273
documentation for details.
266274

0 commit comments

Comments
 (0)