Skip to content

Commit b096aca

Browse files
committed
allow per-variant SV VAF adjustment via varfile
1 parent 6ee1f8e commit b096aca

2 files changed

Lines changed: 39 additions & 14 deletions

File tree

bin/addsv.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ def makemut(args, bedline, alignopts):
345345
chrom = c[0]
346346
start = int(c[1])
347347
end = int(c[2])
348-
araw = c[3:len(c)] # INV, DEL, INS seqfile.fa TSDlength, DUP
348+
araw = c[3:] # INV, DEL, INS, DUP, TRN
349349

350350
# translocation specific
351351
trn_chrom = None
@@ -355,6 +355,10 @@ def makemut(args, bedline, alignopts):
355355
is_transloc = c[3] == 'TRN'
356356

357357
if is_transloc:
358+
araw = [c[3]]
359+
if len(c) > 7:
360+
araw += c[7:]
361+
358362
start -= 3000
359363
end += 3000
360364
if start < 0: start = 0
@@ -366,16 +370,18 @@ def makemut(args, bedline, alignopts):
366370

367371
actions = map(lambda x: x.strip(),' '.join(araw).split(','))
368372

369-
svfrac = float(args.svfrac) # default, can be overridden by cnv file
373+
svfrac = float(args.svfrac) # default, can be overridden by cnv file or per-variant
374+
375+
cn = 1.0
370376

371377
if cnv: # CNV file is present
372378
if chrom in cnv.contigs:
373379
for cnregion in cnv.fetch(chrom,start,end):
374380
cn = float(cnregion.strip().split()[3]) # expect chrom,start,end,CN
375381
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\t" + ' '.join(("copy number in sv region:",chrom,str(start),str(end),"=",str(cn))) + "\n")
376-
svfrac = 1.0/float(cn)
377-
assert svfrac <= 1.0
378-
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tadjusted MAF: " + str(svfrac) + "\n")
382+
svfrac = svfrac/float(cn)
383+
assert svfrac <= 1.0, 'copy number from %s must be at least 1: %s' % (args.cnvfile, snregion.stri[()])
384+
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tadjusted default MAF: " + str(svfrac) + "\n")
379385

380386
print "INFO\t" + now() + "\t" + mutid + "\tinterval:", c
381387
print "INFO\t" + now() + "\t" + mutid + "\tlength:", end-start
@@ -485,22 +491,28 @@ def makemut(args, bedline, alignopts):
485491
assert len(a) > 1 # insertion syntax: INS <file.fa> [optional TSDlen]
486492
insseqfile = a[1]
487493
if not (os.path.exists(insseqfile) or insseqfile == 'RND'): # not a file... is it a sequence? (support indel ins.)
488-
assert re.search('^[ATGCatgc]*$',insseqfile) # make sure it's a sequence
494+
assert re.search('^[ATGCatgc]*$',insseqfile), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence
489495
insseq = insseqfile.upper()
490496
insseqfile = None
491497
if len(a) > 2: # field 5 for insertion is TSD Length
492498
tsdlen = int(a[2])
493499

494-
if len(a) > 3: # field 5 for insertion is motif, format = 'NNNN/NNNN where / is cut site
500+
if len(a) > 3: # field 6 for insertion is motif, format = 'NNNN^NNNN where ^ is cut site
495501
ins_motif = a[3]
496502
assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^'
497503

504+
if len(a) > 4: # field 7 is VAF
505+
svfrac = float(a[4])/cn
506+
498507
if action == 'DUP':
499508
if len(a) > 1:
500509
ndups = int(a[1])
501510
else:
502511
ndups = 1
503512

513+
if len(a) > 2: # VAF
514+
svfrac = float(a[2])/cn
515+
504516
if action == 'DEL':
505517
if len(a) > 1:
506518
dsize = float(a[1])
@@ -511,9 +523,19 @@ def makemut(args, bedline, alignopts):
511523
else:
512524
dsize = 1.0
513525

526+
if len(a) > 2: # VAF
527+
svfrac = float(a[2])/cn
528+
514529
if action == 'TRN':
515-
pass
530+
if len(a) > 1:
531+
svfrac = float(a[1])/cn
516532

533+
if action == 'INV':
534+
if len(a) > 1:
535+
svfrac = float(a[1])/cn
536+
537+
538+
print "INFO\t" + now() + "\t" + mutid + "\tfinal VAF accounting for copy number %f: %f" % (cn, svfrac)
517539

518540
logfile.write(">" + chrom + ":" + str(refstart) + "-" + str(refend) + " BEFORE\n" + str(mutseq) + "\n")
519541

@@ -579,6 +601,9 @@ def makemut(args, bedline, alignopts):
579601
print "INFO\t" + now() + "\t" + mutid + "\tset paired end mean distance: " + str(args.ismean)
580602
print "INFO\t" + now() + "\t" + mutid + "\tset paired end distance stddev: " + str(args.issd)
581603

604+
605+
606+
582607
# simulate reads
583608
(fq1, fq2) = runwgsim(maxcontig, mutseq.seq, svfrac, actions, exclude, pemean, pesd, args.tmpdir, mutid=mutid, seed=args.seed, trn_contig=trn_maxcontig)
584609

@@ -748,7 +773,7 @@ def main(args):
748773
if __name__ == '__main__':
749774
parser = argparse.ArgumentParser(description='adds SVs to reads, outputs modified reads as .bam along with mates')
750775
parser.add_argument('-v', '--varfile', dest='varFileName', required=True,
751-
help='whitespace-delimited target regions to try and add a SV: chrom,start,stop,action,seqfile (if insertion),TSDlength (if insertion)')
776+
help='whitespace-delimited target regions for SV spike-in, see manual for syntax')
752777
parser.add_argument('-f', '--bamfile', dest='bamFileName', required=True,
753778
help='sam/bam file from which to obtain reads')
754779
parser.add_argument('-r', '--reference', dest='refFasta', required=True,

test_data/test_sv.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
22 34171055 34184700 INS ACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAGAGTATAATAAAAAAAA 13 AA^TTTT
2-
22 33607508 33607508 TRN 22 34314981 34314981
3-
22 33871043 33884754 DEL 0.9
4-
22 34071043 34084754 INV
5-
22 34271043 34284754 DUP 3
1+
22 34171055 34184700 INS ACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAGAGTATAATAAAAAAAA 13 AA^TTTT 0.8
2+
22 33607508 33607508 TRN 22 34314981 34314981 0.9
3+
22 33871043 33884754 DEL 0.9 0.75
4+
22 34071043 34084754 INV 0.9
5+
22 34271043 34284754 DUP 3 0.9

0 commit comments

Comments
 (0)