Skip to content

Commit 5f51484

Browse files
committed
Fixed objectify_tcs branch conflicts.
2 parents 05c38e4 + f246999 commit 5f51484

23 files changed

+2373
-695
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
*.swp
22
*.pyc
3+
#Eric5 ignores:
4+
*.e4p
5+
.eric5project/*

4Pipe4.py

Lines changed: 78 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/python3
2-
# Copyright 2011-2013 Francisco Pina Martins <[email protected]>
2+
# Copyright 2011-2015 Francisco Pina Martins <[email protected]>
33
# This file is part of 4Pipe4.
44
# 4Pipe4 is free software: you can redistribute it and/or modify
55
# it under the terms of the GNU General Public License as published by
@@ -26,48 +26,44 @@
2626
import Reporter
2727
import SSRfinder as ssr
2828
import Metrics
29-
import CAF_to_TCS
29+
import SAM_to_BAM
30+
import BAM_to_TCS
3031
import argparse
32+
import sff_extractor
3133
from argparse import RawTextHelpFormatter
3234

3335

34-
##### ARGUMENT LIST ######
36+
# # # # # ARGUMENT LIST # # # # # #
3537
parser = argparse.ArgumentParser(description="",
3638
epilog="The idea here is that to resume an \
37-
analysis that was interrupted for example \
38-
after the assembling process you should \
39-
issue -s '4,5,6,7,8,9' or -s '456789'. Note \
40-
that some steps depend on the output of \
41-
previous steps, so using some combinations \
42-
can cause errors. The arguments can be given \
43-
in any order.",
39+
analysis that was interrupted for example after the assembling process you \
40+
should issue -s '4,5,6,7,8,9' or -s '456789'. Note that some steps depend on \
41+
the output of previous steps, so using some combinations can cause errors. \
42+
The arguments can be given in any order.",
4443
prog="4Pipe4",
4544
formatter_class=RawTextHelpFormatter)
4645
parser.add_argument("-i", dest="infile", nargs=1, required=True,
4746
help="Provide the full path to your target sff file\n",
4847
metavar="sff_file")
4948
parser.add_argument("-o", dest="outfile", nargs=1, required=True,
5049
help="Provide the full path to your results directory, \
51-
plus the name you want to give your results\n",
50+
plus the name you want to give your results\n",
5251
metavar="basefile")
5352
parser.add_argument("-c", dest="configFile", nargs=1,
5453
help="Provide the full path to your configuration file. \
55-
If none is provided, the program will look in the current \
56-
working directory and then in ~/.config/4Pipe4rc (in this\
57-
order) for one. If none is found the program will stop\n",
58-
metavar="configfile")
54+
If none is provided, the program will look in the current working directory \
55+
and then in ~/.config/4Pipe4rc (in this order) for one. If none is found the \
56+
program will stop\n", metavar="configfile")
5957
parser.add_argument("-s", dest="run_list", nargs="?",
6058
default="1 2 3 4 5 6 7 8 9", help="Specify the numbers \
61-
corresponding to the pipeline steps that will be run. The \
62-
string after -s must be given inside quotation marks, and \
63-
numbers can be joined together or separated by any \
64-
symbol. The numbers are the pipeline steps that should be \
65-
run. This is an optional argument and it's omission will \
66-
run all steps by default. The numbers, from 1 to 9 \
67-
represent the following steps:\n\t1 - SFF extraction\n\t2 \
68-
- SeqClean\n\t3 - Mira\n\t4 - DiscoveryTCS\n\t5 - \
69-
SNP grabber\n\t6 - ORF finder\n\t7 - Blast2go\n\t8 - SSR \
70-
finder\n\t9 - 7zip the report")
59+
corresponding to the pipeline steps that will be run. The string after -s \
60+
must be given inside quotation marks, and numbers can be joined together or \
61+
separated by any symbol. The numbers are the pipeline steps that should be \
62+
run. This is an optional argument and it's omission will run all steps by \
63+
default. The numbers, from 1 to 9 represent the following steps:\n\t1 - SFF \
64+
extraction\n\t2 - SeqClean\n\t3 - Mira\n\t4 - DiscoveryTCS\n\t5 - \
65+
SNP grabber\n\t6 - ORF finder\n\t7 - Blast2go\n\t8 - SSR finder\n\t9 - 7zip \
66+
the report")
7167
arg = parser.parse_args()
7268

7369

@@ -143,53 +139,44 @@ def RunProgram(cli, requires_output):
143139
time.sleep(5)
144140

145141

146-
def SffExtract(sff, clip):
147-
'''Function for using the sff_extract program. The function returns the
148-
'clip' value recommended by sff_extract. If run sequentially, the
149-
recommendations should be added.'''
150-
cli = [config.get('Program paths', 'sff_extract_path'), '-c',
151-
'--min_left_clip=' + str(clip),
152-
'--min_frequency=' + config.get('Variables', 'max_equality'), '-o',
153-
basefile, "-A", sff]
154-
print("\nRunning sff_extract using the following command:")
155-
print(' '.join(cli))
156-
sff_extract_stdout = RunProgram(cli, 1)
157-
if len(sff_extract_stdout) == 20:
158-
for lines in sff_extract_stdout:
159-
if "Probably" in str(lines):
160-
warning = str(lines)
161-
number = ''
162-
for letters in warning:
163-
if letters.isdigit():
164-
number = number + letters
165-
return number
166-
else:
167-
print("The found value seems acceptable. If this message is displayed \
168-
twice in a row you have found your min_left_clip.\n")
169-
return "OK"
170-
171-
172-
def MinClip(basefile):
173-
'''Function for calling the SffExtract function and adding 1 to the 'clip'
174-
value until the ideal value is found. Depends on SffExtract.'''
175-
OK = 0
176-
clip = 0
177-
while OK < 2:
178-
turner = SffExtract(sff, clip)
179-
if turner == "OK":
180-
OK = OK + 1
181-
clip = clip + 1
182-
if OK == 2:
183-
clip = clip - 2
142+
def SffExtraction(sff, basefile):
143+
'''Function for using the sff_extractor module. It will look for an "ideal"
144+
clipping value using multiple runs before outputting the final files.'''
145+
clip_found = 0
146+
147+
# Sff_extractor parameters:
148+
sff_config = {}
149+
sff_config["append"] = False
150+
sff_config["qual_fname"] = basefile + ".fasta.qual"
151+
sff_config["want_fastq"] = False
152+
sff_config["min_leftclip"] = 0
153+
sff_config["min_freq"] = int(config.get('Variables', 'max_equality'))
154+
sff_config["xml_info"] = None
155+
sff_config["want_fr"] = False
156+
sff_config["pelinker_fname"] = ""
157+
sff_config["mix_case"] = True
158+
sff_config["clip"] = True
159+
sff_config["xml_fname"] = basefile + ".xml"
160+
sff_config["basename"] = basefile
161+
sff_config["seq_fname"] = basefile + ".fasta"
162+
163+
while clip_found < 2:
164+
extra_clip = sff_extractor.extract_reads_from_sff(sff_config, [sff])
165+
sff_config["min_leftclip"] += extra_clip
166+
if extra_clip == 0:
167+
clip_found += 1
184168
else:
185-
OK = 0
186-
clip = clip + int(turner)
187-
print("Sff_extract finished with a min_left_clip=" + str(clip) + ".\n")
169+
clip_found = 0
170+
171+
print("Sff_extractor finished with a min_left_clip=" +
172+
str(sff_config["min_leftclip"]) + ".\n")
173+
174+
return
188175

189176

190177
def SeqClean(basefile):
191178
'''Function for using seqclean and clean2qual.'''
192-
#seqclean
179+
# seqclean
193180
cli = [config.get('Program paths', 'seqclean_path'),
194181
basefile + '.fasta', '-r', basefile + '.clean.rpt', '-l',
195182
config.get('Variables', 'min_len'), '-o',
@@ -199,7 +186,7 @@ def SeqClean(basefile):
199186
print("\nRunning Seqclean using the following command:")
200187
print(' '.join(cli))
201188
RunProgram(cli, 0)
202-
#cln2qual
189+
# cln2qual
203190
cli = [config.get('Program paths', 'cln2qual_path'),
204191
basefile + '.clean.rpt', basefile + '.fasta.qual']
205192
print("\nRunning cln2qual using the following command:")
@@ -222,22 +209,34 @@ def MiraRun(basefile):
222209
manifest.write("data = " + basename + ".clean.fasta\n")
223210
manifest.close()
224211

212+
# Run mira
225213
cli = [config.get('Program paths', 'mira_path'), basefile + ".manifest"]
226214

227215
print("\nRunning Mira using the following command:")
228216
print(' '.join(cli))
229217
RunProgram(cli, 0)
230218

219+
# Convert the MAF output to SAM output
220+
cli = [config.get('Program paths', 'mira_path') + "convert", "-f", "maf",
221+
"-t", "sam", basefile + '_assembly/' + miraproject + '_d_results/' +
222+
miraproject + '_out.maf', basefile + ".sam"]
223+
224+
print("\nConverting MAF to SAM using miraconvert:")
225+
print(' '.join(cli))
226+
RunProgram(cli, 0)
227+
231228

232229
def DiscoveryTCS(basefile):
233230
'''Discovers SNPs in the TCS output file of Mira. Use only if trying to
234231
find SNPs. Output in TCS format.'''
235232
os.chdir(os.path.split(basefile)[0])
236233
print("\nRunning SNP Discovery tool module...")
237-
CAF_to_TCS.RunModule(basefile + '_assembly/' + miraproject
238-
+ '_d_results/' + miraproject + '_out.caf')
239-
TCS.RunModule(basefile + '_assembly/' + miraproject + '_d_results/'
240-
+ miraproject + '_out.tcs', basefile + '_out.short.tcs',
234+
SAM_to_BAM.RunModule(basefile + '.sam',
235+
basefile + '.bam')
236+
BAM_to_TCS.RunModule(basefile + '.bam', basefile + '_assembly/' +
237+
miraproject + '_d_results/' + miraproject +
238+
'_out.padded.fasta')
239+
TCS.RunModule(basefile + '.tcs', basefile + '_out.short.tcs',
241240
int(config.get('Variables', 'minqual')),
242241
int(config.get('Variables', 'mincov')))
243242

@@ -264,10 +263,10 @@ def ORFliner(basefile):
264263
print("\nRunning EMBOSS 'getorf' using the following command:")
265264
print(' '.join(cli))
266265
RunProgram(cli, 0)
267-
#After this we go to ORFmaker.py:
266+
# After this we go to ORFmaker.py:
268267
print("\nRunning ORFmaker module...")
269268
ORFmaker.RunModule(basefile + '.allORFs.fasta')
270-
#Next we BLAST the resulting ORFs against the local 'nr' database:
269+
# Next we BLAST the resulting ORFs against the local 'nr' database:
271270
if config.get('Program paths', 'BLAST_path').endswith('blast2'):
272271
cli = [config.get('Program paths', 'BLAST_path'), '-p', 'blastx', '-d',
273272
config.get('Program paths', 'BLASTdb_path'), '-i',
@@ -283,7 +282,7 @@ def ORFliner(basefile):
283282
print("\nRunning NCBI 'blastx' using the following command:")
284283
print(' '.join(cli))
285284
RunProgram(cli, 0)
286-
#Then we write the metrics report:
285+
# Then we write the metrics report:
287286
print("\nRunning the metrics calculator module...")
288287
seqclean_log_path = "%s/seqcl_%s.fasta.log" % (os.path.split(basefile)[0],
289288
miraproject)
@@ -294,7 +293,7 @@ def ORFliner(basefile):
294293
+ miraproject + '_info_assembly.txt', basefile
295294
+ '.SNPs.fasta', basefile + '.BestORF.fasta',
296295
basefile + '.Metrics.html')
297-
#Finally we write down our report using the data gathered so far:
296+
# Finally we write down our report using the data gathered so far:
298297
print("\nRunning Reporter module...")
299298
Reporter.RunModule(basefile + '.BestORF.fasta', basefile + '.SNPs.fasta',
300299
basefile + '.ORFblast.html', basefile + '.Report.html',
@@ -321,7 +320,7 @@ def B2G(basefile):
321320
print("\nRunning NCBI 'blastx' using the following command:")
322321
print(' '.join(cli))
323322
RunProgram(cli, 0)
324-
#After 'blasting' we run b2g4pipe:
323+
# After 'blasting' we run b2g4pipe:
325324
if os.path.isfile(config.get('Program paths', 'Blast2go_path')):
326325
cli = ['java', '-jar', config.get('Program paths', 'Blast2go_path'),
327326
'-in', basefile + '.shortlistblast.xml', '-prop',
@@ -385,7 +384,7 @@ def TidyUP(basefile):
385384
print(basefile + '.Metrics.html does not exist')
386385
shutil.copy(config.get('Program paths', 'Templates_path') +
387386
'/Report.html', 'Report/Report.html')
388-
#7zip it
387+
# 7zip it
389388
cli = [config.get('Program paths', '7z_path'), 'a', '-y', '-bd', basefile
390389
+ '.report.7z', 'Report']
391390
print("\n7ziping the Report folder using the following command:")
@@ -397,7 +396,7 @@ def RunMe(arguments):
397396
'''Function to parse which parts of 4Pipe4 will run.'''
398397
for option, number in zip(list(arguments), range(len(arguments))):
399398
if option == "1":
400-
MinClip(basefile)
399+
SffExtraction(sff, basefile)
401400
if option == "2":
402401
SeqClean(basefile)
403402
if option == "3":

4Pipe4rc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
#eg. "/home/user/software/sff_extract" instead of "~/software/sff_extract"
1111

1212
[Program paths]
13-
#Path to sff_extract:
14-
sff_extract_path =
1513
#Path to seqclean:
1614
seqclean_path =
1715
#Path to cln2qual:
@@ -54,7 +52,7 @@ min_ssr_qual = 70
5452
##Example to search for SNPs in EST data:
5553

5654
#mirajob = job = est,denovo,accurate
57-
#miracommon = parameters = COMMON_SETTINGS -AS:nop=5:ugpf=off -CO:mr=on:asir=on
55+
#miracommon = parameters = COMMON_SETTINGS -AS:nop=5:ugpf=off -CO:mr=on:asir=on -OUT:output_result_caf=off:output_result_maf=on
5856
#mira454 = 454_SETTINGS -AL:egp=off -CL:cpat=on
5957
#mirareadgroup = readgroup = Test data
6058
#miratech = technology = 454

0 commit comments

Comments
 (0)