Skip to content

Commit 6e1657f

Browse files
committed
Address feature request: trim reads exceeding a given length #191
1 parent 863d9c8 commit 6e1657f

File tree

5 files changed

+73
-1
lines changed

5 files changed

+73
-1
lines changed

bt2_search.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ static int ipause; // pause before maching?
9797
static uint32_t qUpto; // max # of queries to read
9898
static int gTrim5; // amount to trim from 5' end
9999
static int gTrim3; // amount to trim from 3' end
100+
static pair<int, int> trimReadsExceedingLen; // trim reads exceeding given length from either 3' or 5'-end
100101
static int offRate; // keep default offRate
101102
static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
102103
static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33)
@@ -288,6 +289,7 @@ static void resetOptions() {
288289
qUpto = 0xffffffff; // max # of queries to read
289290
gTrim5 = 0; // amount to trim from 5' end
290291
gTrim3 = 0; // amount to trim from 3' end
292+
trimReadsExceedingLen = pair<int, int>(5, 0); // default: don't do any trimming
291293
offRate = -1; // keep default offRate
292294
solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
293295
phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33)
@@ -643,6 +645,7 @@ static struct option long_options[] = {
643645
{(char*)"xeq", no_argument, 0, ARG_XEQ},
644646
{(char*)"thread-ceiling", required_argument, 0, ARG_THREAD_CEILING},
645647
{(char*)"thread-piddir", required_argument, 0, ARG_THREAD_PIDDIR},
648+
{(char*)"trim-reads-exceeding-len", required_argument, 0, ARG_TRIM_READS_EXCEEDING_LEN},
646649
{(char*)0, 0, 0, 0} // terminator
647650
};
648651

@@ -736,6 +739,7 @@ static void printUsage(ostream& out) {
736739
<< " -u/--upto <int> stop after first <int> reads/pairs (no limit)" << endl
737740
<< " -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0)" << endl
738741
<< " -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0)" << endl
742+
<< " --trim-reads-exceeding-len <3|5:int> trim <int> bases from either 3'/right or 5'/left end of reads (no trimming)" << endl
739743
<< " --phred33 qualities are Phred+33 (default)" << endl
740744
<< " --phred64 qualities are Phred+64" << endl
741745
<< " --int-quals qualities encoded as space-delimited integers" << endl
@@ -1101,6 +1105,17 @@ static void parseOption(int next_option, const char *arg) {
11011105
break;
11021106
case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break;
11031107
case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break;
1108+
case ARG_TRIM_READS_EXCEEDING_LEN:
1109+
trimReadsExceedingLen = parsePair<int>(arg, ':');
1110+
if (trimReadsExceedingLen.first != 3 && trimReadsExceedingLen.first != 5) {
1111+
cerr << "`--trim-reads-exceeding-len pos:n`: pos must be either 3 or 5" << endl;
1112+
throw 1;
1113+
}
1114+
if(trimReadsExceedingLen.second < 0) {
1115+
cerr << "`--trim-reads-exceeding-len pos:n`: n must be at least 0" << endl;
1116+
throw 1;
1117+
}
1118+
break;
11041119
case 'h': printUsage(cout); throw 0; break;
11051120
case ARG_USAGE: printUsage(cout); throw 0; break;
11061121
//
@@ -4717,6 +4732,7 @@ static void driver(
47174732
integerQuals, // true -> qualities are space-separated numbers
47184733
gTrim5, // amt to hard clip from 5' end
47194734
gTrim3, // amt to hard clip from 3' end
4735+
trimReadsExceedingLen, // trim reads exceeding given length from either 3' or 5'-end
47204736
fastaContLen, // length of sampled reads for FastaContinuous...
47214737
fastaContFreq, // frequency of sampled reads for FastaContinuous...
47224738
skipReads, // skip the first 'skip' patterns

opts.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ enum {
156156
ARG_XEQ, // --xeq
157157
ARG_THREAD_CEILING, // --thread-ceiling
158158
ARG_THREAD_PIDDIR, // --thread-piddir
159-
ARG_INTERLEAVED_FASTQ // --interleaved
159+
ARG_INTERLEAVED_FASTQ, // --interleaved
160+
ARG_TRIM_READS_EXCEEDING_LEN// --trim-reads-exceeding-len
160161
};
161162

162163
#endif

pat.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,11 @@ pair<bool, bool> PatternSourcePerThread::nextReadPair() {
172172
}
173173
// Finalize read/pair
174174
if(!buf_.read_b().patFw.empty()) {
175+
trim(buf_.read_a());
176+
trim(buf_.read_b());
175177
finalizePair(buf_.read_a(), buf_.read_b());
176178
} else {
179+
trim(buf_.read_a());
177180
finalize(buf_.read_a());
178181
}
179182
bool this_is_last = buf_.cur_buf_ == static_cast<unsigned int>(last_batch_size_-1);

pat.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct PatternParams {
6262
bool intQuals_,
6363
int trim5_,
6464
int trim3_,
65+
pair<int, int> trimReadsExceedingLen_,
6566
int sampleLen_,
6667
int sampleFreq_,
6768
size_t skip_,
@@ -76,6 +77,7 @@ struct PatternParams {
7677
intQuals(intQuals_),
7778
trim5(trim5_),
7879
trim3(trim3_),
80+
trimReadsExceedingLen(trimReadsExceedingLen_),
7981
sampleLen(sampleLen_),
8082
sampleFreq(sampleFreq_),
8183
skip(skip_),
@@ -91,6 +93,7 @@ struct PatternParams {
9193
bool intQuals; // true -> qualities are space-separated numbers
9294
int trim5; // amt to hard clip from 5' end
9395
int trim3; // amt to hard clip from 3' end
96+
pair<int, int> trimReadsExceedingLen;
9497
int sampleLen; // length of sampled reads for FastaContinuous...
9598
int sampleFreq; // frequency of sampled reads for FastaContinuous...
9699
size_t skip; // skip the first 'skip' patterns
@@ -988,6 +991,27 @@ class PatternSourcePerThread {
988991
return composer_.parse(ra, rb, buf_.rdid());
989992
}
990993

994+
void trim(Read& r) {
995+
if (pp_.trimReadsExceedingLen.second > 0) {
996+
switch (pp_.trimReadsExceedingLen.first) {
997+
case 3:
998+
if (r.patFw.length() > pp_.trimReadsExceedingLen.second) {
999+
r.trimmed5 = r.patFw.length() - pp_.trimReadsExceedingLen.second;
1000+
r.patFw.trimEnd(r.trimmed5);
1001+
r.qual.trimEnd(r.trimmed5);
1002+
}
1003+
break;
1004+
case 5:
1005+
if (r.patFw.length() > pp_.trimReadsExceedingLen.second) {
1006+
r.trimmed3 = r.patFw.length() - pp_.trimReadsExceedingLen.second;
1007+
r.patFw.trimBegin(r.trimmed3);
1008+
r.qual.trimBegin(r.trimmed3);
1009+
}
1010+
break;
1011+
}
1012+
}
1013+
}
1014+
9911015
PatternComposer& composer_; // pattern composer
9921016
PerThreadReadBuf buf_; // read data buffer
9931017
const PatternParams& pp_; // pattern-related parameters

scripts/test/simple_tests.pl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,34 @@
158158
cline_reads => "CATCGATCAGTATCTG:ABCDEDGHIJKLMNOPQ", # qual too long
159159
should_abort => 1},
160160

161+
{ name => "trim-reads-exceeding-len: trim from 5'-end",
162+
ref => [ "AGCATCGATCAGTATCTGA" ],
163+
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
164+
args => "--trim-reads-exceeding-len 5:12",
165+
norc => 1,
166+
hits => [{ 6 => 1 }] },
167+
168+
{ name => "trim-reads-exceeding-len: trim from 3'-end",
169+
ref => [ "AGCATCGATCAGTATCTGA" ],
170+
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
171+
args => "--trim-reads-exceeding-len 3:12",
172+
norc => 1,
173+
hits => [{ 2 => 1 }] },
174+
175+
{ name => "trim-reads-exceeding-len: invalid position",
176+
ref => [ "AGCATCGATCAGTATCTGA" ],
177+
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
178+
args => "--trim-reads-exceeding-len 4:12",
179+
norc => 1,
180+
should_abort => 1 },
181+
182+
{ name => "trim-reads-exceeding-len: invalid count",
183+
ref => [ "AGCATCGATCAGTATCTGA" ],
184+
cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
185+
args => "--trim-reads-exceeding-len 5:-12",
186+
norc => 1,
187+
should_abort => 1 },
188+
161189
# Part of sequence is trimmed
162190
{ name => "Cline 7",
163191
ref => [ "AGCATCGATCAGTATCTGA" ],

0 commit comments

Comments
 (0)