@@ -345,7 +345,7 @@ def makemut(args, bedline, alignopts):
345345 chrom = c [0 ]
346346 start = int (c [1 ])
347347 end = int (c [2 ])
348- araw = c [3 :len ( c ) ] # INV, DEL, INS seqfile.fa TSDlength, DUP
348+ araw = c [3 :] # INV, DEL, INS, DUP, TRN
349349
350350 # translocation specific
351351 trn_chrom = None
@@ -355,6 +355,10 @@ def makemut(args, bedline, alignopts):
355355 is_transloc = c [3 ] == 'TRN'
356356
357357 if is_transloc :
358+ araw = [c [3 ]]
359+ if len (c ) > 7 :
360+ araw += c [7 :]
361+
358362 start -= 3000
359363 end += 3000
360364 if start < 0 : start = 0
@@ -366,16 +370,18 @@ def makemut(args, bedline, alignopts):
366370
367371 actions = map (lambda x : x .strip (),' ' .join (araw ).split (',' ))
368372
369- svfrac = float (args .svfrac ) # default, can be overridden by cnv file
373+ svfrac = float (args .svfrac ) # default, can be overridden by cnv file or per-variant
374+
375+ cn = 1.0
370376
371377 if cnv : # CNV file is present
372378 if chrom in cnv .contigs :
373379 for cnregion in cnv .fetch (chrom ,start ,end ):
374380 cn = float (cnregion .strip ().split ()[3 ]) # expect chrom,start,end,CN
375381 sys .stdout .write ("INFO\t " + now () + "\t " + mutid + "\t " + ' ' .join (("copy number in sv region:" ,chrom ,str (start ),str (end ),"=" ,str (cn ))) + "\n " )
376- svfrac = 1.0 / float (cn )
377- assert svfrac <= 1.0
378- sys .stdout .write ("INFO\t " + now () + "\t " + mutid + "\t adjusted MAF: " + str (svfrac ) + "\n " )
382+ svfrac = svfrac / float (cn )
383+ assert svfrac <= 1.0 , 'copy number from %s must be at least 1: %s' % ( args . cnvfile , snregion . stri [()])
384+ sys .stdout .write ("INFO\t " + now () + "\t " + mutid + "\t adjusted default MAF: " + str (svfrac ) + "\n " )
379385
380386 print "INFO\t " + now () + "\t " + mutid + "\t interval:" , c
381387 print "INFO\t " + now () + "\t " + mutid + "\t length:" , end - start
@@ -485,22 +491,28 @@ def makemut(args, bedline, alignopts):
485491 assert len (a ) > 1 # insertion syntax: INS <file.fa> [optional TSDlen]
486492 insseqfile = a [1 ]
487493 if not (os .path .exists (insseqfile ) or insseqfile == 'RND' ): # not a file... is it a sequence? (support indel ins.)
488- assert re .search ('^[ATGCatgc]*$' ,insseqfile ) # make sure it's a sequence
494+ assert re .search ('^[ATGCatgc]*$' ,insseqfile ), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence
489495 insseq = insseqfile .upper ()
490496 insseqfile = None
491497 if len (a ) > 2 : # field 5 for insertion is TSD Length
492498 tsdlen = int (a [2 ])
493499
494- if len (a ) > 3 : # field 5 for insertion is motif, format = 'NNNN/ NNNN where / is cut site
500+ if len (a ) > 3 : # field 6 for insertion is motif, format = 'NNNN^ NNNN where ^ is cut site
495501 ins_motif = a [3 ]
496502 assert '^' in ins_motif , 'insertion motif specification requires cut site defined by ^'
497503
504+ if len (a ) > 4 : # field 7 is VAF
505+ svfrac = float (a [4 ])/ cn
506+
498507 if action == 'DUP' :
499508 if len (a ) > 1 :
500509 ndups = int (a [1 ])
501510 else :
502511 ndups = 1
503512
513+ if len (a ) > 2 : # VAF
514+ svfrac = float (a [2 ])/ cn
515+
504516 if action == 'DEL' :
505517 if len (a ) > 1 :
506518 dsize = float (a [1 ])
@@ -511,9 +523,19 @@ def makemut(args, bedline, alignopts):
511523 else :
512524 dsize = 1.0
513525
526+ if len (a ) > 2 : # VAF
527+ svfrac = float (a [2 ])/ cn
528+
514529 if action == 'TRN' :
515- pass
530+ if len (a ) > 1 :
531+ svfrac = float (a [1 ])/ cn
516532
533+ if action == 'INV' :
534+ if len (a ) > 1 :
535+ svfrac = float (a [1 ])/ cn
536+
537+
538+ print "INFO\t " + now () + "\t " + mutid + "\t final VAF accounting for copy number %f: %f" % (cn , svfrac )
517539
518540 logfile .write (">" + chrom + ":" + str (refstart ) + "-" + str (refend ) + " BEFORE\n " + str (mutseq ) + "\n " )
519541
@@ -579,6 +601,9 @@ def makemut(args, bedline, alignopts):
579601 print "INFO\t " + now () + "\t " + mutid + "\t set paired end mean distance: " + str (args .ismean )
580602 print "INFO\t " + now () + "\t " + mutid + "\t set paired end distance stddev: " + str (args .issd )
581603
604+
605+
606+
582607 # simulate reads
583608 (fq1 , fq2 ) = runwgsim (maxcontig , mutseq .seq , svfrac , actions , exclude , pemean , pesd , args .tmpdir , mutid = mutid , seed = args .seed , trn_contig = trn_maxcontig )
584609
@@ -748,7 +773,7 @@ def main(args):
748773if __name__ == '__main__' :
749774 parser = argparse .ArgumentParser (description = 'adds SVs to reads, outputs modified reads as .bam along with mates' )
750775 parser .add_argument ('-v' , '--varfile' , dest = 'varFileName' , required = True ,
751- help = 'whitespace-delimited target regions to try and add a SV: chrom,start,stop,action,seqfile (if insertion),TSDlength (if insertion) ' )
776+ help = 'whitespace-delimited target regions for SV spike-in, see manual for syntax ' )
752777 parser .add_argument ('-f' , '--bamfile' , dest = 'bamFileName' , required = True ,
753778 help = 'sam/bam file from which to obtain reads' )
754779 parser .add_argument ('-r' , '--reference' , dest = 'refFasta' , required = True ,
0 commit comments