5252 my $options = setup();
5353
5454 my $threads = PCAP::Threaded-> new($options -> {' threads' });
55- &PCAP::Threaded::disable_out_err if (exists $options -> {' index' });
5655
5756 # register processes
58- $threads -> add_function(' split' , \&PCAP::Bwa::split_in);
59- $threads -> add_function(' bwamem' , \&PCAP::Bwa::bwa_mem, exists $options -> {' index' } ? 1 : $options -> {' map_threads' });
57+ $threads -> add_function(' split' , \&PCAP::Bwa::split_in, split_threads( $options ) );
58+ $threads -> add_function(' bwamem' , \&PCAP::Bwa::bwa_mem, exists $options -> {' index' } ? 1 : $options -> {' map_threads' });
6059
6160 PCAP::Bwa::mem_setup($options ) if (!exists $options -> {' process' } || $options -> {' process' } eq ' setup' );
6261
63- $threads -> run($options -> {' max_split' }, ' split' , $options ) if (!exists $options -> {' process' } || $options -> {' process' } eq ' split' );
62+ $threads -> run($options -> {' max_split' }, ' split' , $options ) if (!exists $options -> {' process' } || $options -> {' process' } eq ' split' );
6463
6564 if (!exists $options -> {' process' } || $options -> {' process' } eq ' bwamem' ) {
6665 $options -> {' max_index' } = PCAP::Bwa::mem_mapmax($options );
7877 }
7978}
8079
80+ sub split_threads {
81+ my $options = shift ;
82+ my $div = 1;
83+ my $threads_per_split = 1;
84+ if (exists $options -> {index }) {
85+ $div = 1;
86+ $threads_per_split = $options -> {threads };
87+ }
88+ elsif ($options -> {raw_files }-> [0] =~ m / (bam|cram)$ / ) {
89+ my $inputs = scalar @{$options -> {raw_files }};
90+ $threads_per_split = int ($options -> {threads } / $inputs );
91+ $threads_per_split = 1 if ($threads_per_split < 1);
92+ $div = $threads_per_split ;
93+ }
94+ $options -> {threads_per_split } = $threads_per_split ; # so can be used later
95+ return $div ; # so can be used as return
96+ }
97+
8198sub cleanup {
8299 my $options = shift ;
83100 my $tmpdir = $options -> {' tmp' };
@@ -91,6 +108,8 @@ sub setup {
91108 ' mmqcfrac' => 0.05,
92109 ' threads' => 1,
93110 ' fragment' => 10,
111+ ' dupmode' => ' t' ,
112+ ' seqslice' => 10000,
94113 ' csi' => undef ,
95114 );
96115
@@ -116,6 +135,8 @@ sub setup {
116135 ' q|mmqc' => \$opts {' mmqc' },
117136 ' qf|mmqcfrac:f' => \$opts {' mmqcfrac' },
118137 ' bm2|bwamem2' => \$opts {' bwamem2' },
138+ ' d|dupmode:s' => \$opts {' dupmode' },
139+ ' ss|seqslice:i' => $opts {' seqslice' },
119140 ) or pod2usage(2);
120141
121142 pod2usage(-verbose => 1, -exitval => 0) if (defined $opts {' h' });
@@ -145,10 +166,14 @@ sub setup {
145166 die " ERROR: Please generate $opts {dict}, e.g.\n\t\$ samtools dict -a \$ ASSEMBLY -s \$ SPECIES $opts {reference} > $opts {dict}\n " ;
146167 }
147168
169+ if (defined $opts {' scramble' }) {
170+ die " ERROR: -scramble option is deprecated, please see -seqslice\n " ;
171+ }
172+
148173 delete $opts {' process' } unless (defined $opts {' process' });
149174 delete $opts {' index' } unless (defined $opts {' index' });
150175 delete $opts {' bwa' } unless (defined $opts {' bwa' });
151- delete $opts {' scramble' } unless ( defined $opts { ' scramble ' }) ;
176+ delete $opts {' scramble' };
152177 delete $opts {' bwa_pl' } unless (defined $opts {' bwa_pl' });
153178 delete $opts {' mmqc' } unless (defined $opts {' mmqc' });
154179 delete $opts {' csi' } unless (defined $opts {' csi' });
@@ -220,11 +245,12 @@ =head1 SYNOPSIS
220245 Optional parameters:
221246 -bwamem2 -bm2 Use bwa-mem2 instead of bwa.
222247 -fragment -f Split input into fragments of X million repairs [10]
248+ - only applies to fastq[.gz] input
223249 -nomarkdup -n Don't mark duplicates [flag]
224250 -csi Use CSI index instead of BAI for BAM files [flag].
225251 -cram -c Output cram, see '-sc' [flag]
226- -scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used
227- - '-I,-O' are used internally and should not be provided
252+ -seqslice -ss seqs_per_slice for CRAM compression [samtools default: 10000]
253+ -scramble -sc DEPRECATED
228254 -bwa -b Single quoted string of additional parameters to pass to BWA
229255 - '-t,-p,-R' are used internally and should not be provided.
230256 - '-v' is set to 1 unless '-bwa' is set.
@@ -234,12 +260,15 @@ =head1 SYNOPSIS
234260 -mmqc -q Mark reads as QCFAIL (0x200, 512) if mismatch rate exceeded [flag]
235261 - Please see 'bwa_mem.pl -m'
236262 -mmqcfrac -qf Mismatch fraction for -mmqc [0.05]
263+ -dupmode -d see "samtools markdup -m" [t]
237264
238265 Targeted processing:
239266 -process -p Only process this step then exit, optionally set -index
267+ setup - checks and configure workspace (-index N/A)
268+ split - split data by readgroup and chunk size (if applicable)
240269 bwamem - only applicable if input is bam
241270 mark - Run duplicate marking (-index N/A)
242- stats - Generates the *.bas file for the final BAM.
271+ stats - Generates the *.bas file for the final BAM (-index N/A)
243272
244273 -index -i Optionally restrict '-p' to single job
245274 bwamem - 1..<lane_count>
@@ -249,6 +278,7 @@ =head1 SYNOPSIS
249278 https://github.com/gperftools/ (assuming number of cores not exceeded)
250279 If available specify the path to 'gperftools/lib/libtcmalloc_minimal.so'.
251280 - NOT APPLIED TO bwa-mem2
281+ Falls back to environment variable GPERF_FOR_BWA when not set, or nothing.
252282
253283 Other:
254284 -jobs -j For a parallel step report the number of jobs required
0 commit comments