1- """
2- This ExampleReadsApp demonstrates how to use best practices for KBase App
3- development using the SFA base package.
4- """
51import json
62import io
73import logging
84import os
95import subprocess
10- import uuid
116
127from collections import Counter
138from shutil import copyfile
149
15- import pandas as pd
1610import subprocess
1711
1812from Bio import SeqIO
1913
20- # This is the SFA base package which provides the Core app class.
2114from base import Core
2215
2316MODULE_DIR = "/kb/module"
2417TEMPLATES_DIR = os .path .join (MODULE_DIR , "lib/templates" )
2518
2619
27- class ExampleReadsApp (Core ):
28- def __init__ (self , ctx , config , clients_class = None ):
29- """
30- This is required to instantiate the Core App class with its defaults
31- and allows you to pass in more clients as needed.
32- """
33- super ().__init__ (ctx , config , clients_class )
34- # Here we adjust the instance attributes for our convenience.
35- self .report = self .clients .KBaseReport
36- self .ru = self .clients .ReadsUtils
37- # self.shared_folder is defined in the Core App class.
38- # TODO Add a self.wsid = a conversion of self.wsname
39-
40- #def do_analysis(self, params: dict):
41- # """
42- # This method is where the main computation will occur.
43- # """
44- # read_refs = params["reads_ref"]
45- # # Download the reads from KBase
46- # ret = self.download_reads(read_refs)
47- # # We use these downloaded reads and biopython to collect the first 10
48- # # reads and their phred quality scores to create a new fastq file to
49- # # upload to KBase.
50- # for file_ref, file_info in ret["files"].items():
51- # file_path = file_info["files"]["fwd"]
52- # basename = os.path.basename(file_path)
53- # with open(file_path) as reads:
54- # record_iter = SeqIO.parse(reads, "fastq")
55- # limit = 10
56- # head = []
57- # scores = []
58- # counts = Counter()
59- # for ix, record in enumerate(record_iter):
60- # if ix >= limit:
61- # break
62- # head.append(record)
63- # counts.update(str(record.seq))
64- # scores.append(record.letter_annotations["phred_quality"])
65- # filename = f"{basename}.head.fastq"
66- # out_path = os.path.join(self.shared_folder, filename)
67- # with open(out_path, "w") as out_reads:
68- # SeqIO.write(head, out_reads, "fastq")
69- #
70- # # This method runs the process first and then returns the stdout and
71- # # stderr all at once, so take care if your process produces a large
72- # # amount of output.
73- # process = subprocess.Popen(
74- # ["/kb/module/scripts/random_logger.py"],
75- # stdout=subprocess.PIPE,
76- # stderr=subprocess.PIPE,
77- # )
78- #
79- # stdout, stderr = self.get_streams(process)
80- # # We are logging everything because the script we are running does not
81- # # have a lot of output, but if what you run does then you might not
82- # # want to log *everything* to the user.
83- # logging.info(stdout)
84- # if stderr:
85- # logging.warning(stderr)
86- # output_value = stdout.split("\n")[0].split(" ")[-2]
87- # count_df = pd.DataFrame(sorted(counts.items()), columns=["base", "count"])
88- #
89- # # Upload the first 10 reads back to kbase as an object
90- # upa = self.upload_reads(
91- # name=params["output_name"], reads_path=out_path, wsname=params["workspace_name"]
92- # )
93- #
94- # # Pass new data to generate the report.
95- # params["count_df"] = count_df
96- # params["output_value"] = output_value
97- # params["scores"] = scores
98- # params["upa"] = upa # Not currently used, but the ID of the uploaded reads
99- # # This is the method that generates the HTML report
100- # return self.generate_report(params)
101- #
102- @staticmethod
103- def get_streams (process ):
104- """
105- Returns decoded stdout,stderr after loading the entire thing into memory
106- """
107- stdout , stderr = process .communicate ()
108- return (stdout .decode ("utf-8" , "ignore" ), stderr .decode ("utf-8" , "ignore" ))
109-
110- def upload_reads (self , name , reads_path , wsname ):
111- """
112- Upload reads back to the KBase Workspace. This method only uses the
113- minimal parameters necessary to provide a demonstration. There are many
114- more parameters which reads can provide, for example, interleaved, etc.
115- By default, non-interleaved objects and those uploaded without a
116- reverse file are saved as KBaseFile.SingleEndLibrary. See:
117- https://githusb.com/kbaseapps/ReadsUtils/blob/master/lib/ReadsUtils/ReadsUtilsImpl.py#L115-L119
118- param: filepath_to_reads - A filepath to a fastq fastq file to upload reads from
119- param: wsname - The name of the workspace to upload to
120- """
121- ur_params = {
122- "fwd_file" : reads_path ,
123- "name" : name ,
124- "sequencing_tech" : "Illumina" ,
125- "wsname" : wsname ,
126- "single_genome" : 0 ,
127- }
128- # It is often useful to log parameters as they are passed.
129- logging .warning (f">>>>>>>>>>>>>>>>>>>>{ ur_params } " )
130- return self .ru .upload_reads (ur_params )
131-
132- def download_reads (self , reads_ref , interleaved = False ):
133- """
134- Download a list of reads objects
135- param: reads_ref - A list of reads references/upas
136- """
137- dr_params = {"read_libraries" : [reads_ref ], "interleaved" : None }
138- # This uses the ReadsUtils client to download a specific workspace
139- # object, saving it into the shared_folder and making it available to
140- # the user.
141- return self .ru .download_reads (dr_params )
142-
143- def generate_report (self , params : dict ):
144- """
145- This method is where to define the variables to pass to the report.
146- """
147- # This path is required to properly use the template.
148- reports_path = os .path .join (self .shared_folder , "reports" )
149- # Path to the Jinja template. The template can be adjusted to change
150- # the report.
151- template_path = os .path .join (TEMPLATES_DIR , "report.html" )
152- # A sample multiplication table to use as output
153- table = [[i * j for j in range (10 )] for i in range (10 )]
154- headers = "one two three four five six seven eight nine ten" .split (" " )
155- # A count of the base calls in the reads
156- count_df_html = params ["count_df" ].to_html ()
157- # Calculate a correlation table determined by the quality scores of
158- # each base read. This requires pandas and matplotlib, and these are
159- # listed in requirements.txt. You can see the resulting HTML file after
160- # runing kb-sdk test in ./test_local/workdir/tmp/reports/index.html
161- scores_df_html = (
162- pd .DataFrame (params ["scores" ]).corr ().style .background_gradient ().render ()
163- )
164- # The keys in this dictionary will be available as variables in the
165- # Jinja template. With the current configuration of the template
166- # engine, HTML output is allowed.
167- template_variables = dict (
168- count_df_html = count_df_html ,
169- headers = headers ,
170- scores_df_html = scores_df_html ,
171- table = table ,
172- upa = params ["upa" ],
173- output_value = params ["output_value" ],
174- )
175- # The KBaseReport configuration dictionary
176- config = dict (
177- report_name = f"ExampleReadsApp_{ str (uuid .uuid4 ())} " ,
178- reports_path = reports_path ,
179- template_variables = template_variables ,
180- workspace_name = params ["workspace_name" ],
181- )
182- return self .create_report_from_template (template_path , config )
183-
18420class BamConversion (Core ):
18521 def __init__ (self , ctx , config , app_config , clients_class = None ):
18622 """
18723 This is required to instantiate the Core App class with its defaults
18824 and allows you to pass in more clients as needed.
18925 """
19026 super ().__init__ (ctx , config , clients_class )
191- # Here we adjust the instance attributes for our convenience.
19227 self .dfu = self .clients .DataFileUtil
19328 self .report = self .clients .KBaseReport
19429 self .ru = self .clients .ReadsUtils
19530 self .app_config = app_config
196- # self.shared_folder is defined in the Core App class.
197- # TODO Add a self.wsid = a conversion of self.wsname
19831
19932 def do_analysis (self , params : dict ):
20033 """
20134 This method is where the main computation will occur.
20235 """
20336 print (f"{ json .dumps (params )= } " )
20437 bam_file = params ['bam_file' ]
205- staging_path = bam_file if os .path .isfile (bam_file ) else os . path . join ( "/staging/" , bam_file )
206- # Read and print first 1000 characters
207-
208-
38+ if os .path .isfile (bam_file ):
39+ staging_path = bam_file
40+ else :
41+ staging_path = os . path . join ( "/staging/" , bam_file )
20942
210- logging .warning (f"{ '@' * 30 } params: { params } " )
21143 logging .warning (f"cwd: { os .getcwd ()} " )
212- #bam_file_staging_path = self.dfu.download_staging_file({
213- # 'staging_file_subdir_path': bam_file
214- #}).get('copy_file_path')
215- #logging.warning(f'{"&"*20}{bam_file_staging_path=}')
216- #logging.warning(f"bam_file_staging_path: {bam_file_staging_path}")
21744 output_name = params ['output_name' ]
21845 wsname = params ['workspace_name' ]
21946 sequencing_tech = 'Illumina'
@@ -256,23 +83,9 @@ def bam_to_fastq(cls, bam_file, shared_folder=""): # add a dict parameter so tho
25683 if os .path .getsize ("filename_end1.fq" ) < 100 :
25784 raise ValueError ("Generated FASTQ file is unexpectedly small — check input BAM or bedtools error" )
25885
259- with open ("filename_end1.fq" , 'rb' ) as f :
260- content = f .read (1001 )
261- print ("First 1001 characters from the file:" )
262- decoded = "" .join ([c if ord (c )>= 32 else "?" for c in content .decode ("ascii" , "ignore" )])
263- print (f"{ decoded = } " )
264-
26586 output_path = os .path .join (shared_folder , 'output.fq' )
26687 copyfile ('filename_end1.fq' , output_path )
267- # Upload the fastq file we just made to a reads object in KBase
268- # upa = self.upload_reads(
269- # name=params["output_name"], reads_path=out_path, wsname=params["workspace_name"]
270- # )
271- #logging.warning(f">>>>>>>>>>>>>>>>>>>>{os.getcwd()}")
272- #fastq_path = '/kb/module/test/filename_end1.fq'
273- #fastq_file = open(fastq_path, 'r')
274- #print(fastq_file.read())
275-
88+
27689 return output_path
27790
27891
@@ -295,7 +108,6 @@ def upload_reads(self, name, reads_path, workspace_name, sequencing_tech, interl
295108 "interleaved" : interleaved
296109 #"single_genome": single_genome
297110 }
298- # It is often useful to log parameters as they are passed.
299111 logging .warning (f">>>>>>>>>>>>>>>>>>>>{ ur_params } " )
300112 return self .ru .upload_reads (ur_params )
301113
@@ -306,11 +118,8 @@ def __init__(self, ctx, config, clients_class=None):
306118 and allows you to pass in more clients as needed.
307119 """
308120 super ().__init__ (ctx , config , clients_class )
309- # Here we adjust the instance attributes for our convenience.
310121 self .report = self .clients .KBaseReport
311122 self .ru = self .clients .ReadsUtils
312- # self.shared_folder is defined in the Core App class.
313- # TODO Add a self.wsid = a conversion of self.wsname
314123
315124 def intersection (self , first_file , second_file ):
316125 file1 = first_file
0 commit comments