Skip to content

Commit dff972c

Browse files
author
Bridget Allen
committed
Feedback Updates
- adheres to PEP 8 - Imports are always put at the top of the file" - Line lengths are appropriate - Remove references to ExampleReadsApp - remove commented out lines
1 parent dc75939 commit dff972c

File tree

6 files changed

+22
-238
lines changed

6 files changed

+22
-238
lines changed

lib/kb_bedtools/kb_bedtoolsImpl.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
# -*- coding: utf-8 -*-
22
#BEGIN_HEADER
3-
import json
43
import logging
54
import os
65
import subprocess
76

87
from installed_clients.DataFileUtilClient import DataFileUtil
98
from installed_clients.KBaseReportClient import KBaseReport
109
from installed_clients.ReadsUtilsClient import ReadsUtils
11-
from .utils import ExampleReadsApp, BamConversion, Intersection
1210
from base import Core
1311

12+
from kb_bedtools.utils import Intersection
13+
from kb_bedtools.utils import BamConversion
1414

1515
#END_HEADER
1616

@@ -51,7 +51,6 @@ def __init__(self, config):
5151

5252

5353
def run_kb_bedtools(self, ctx, params):
54-
import subprocess
5554
version = subprocess.check_output(["bedtools", "--version"])
5655
print("BEDTOOLS VERSION IN CONTAINER:", version.decode())
5756

@@ -75,21 +74,7 @@ def run_kb_bedtools(self, ctx, params):
7574
),
7675
)
7776
bam = BamConversion(ctx, config=config, app_config=self.config)
78-
#bam.bam_to_fastq(params['bam_file'], config['shared_folder'])
7977
output = bam.do_analysis(params)
80-
#fastq_path = bam.bam_to_fastq(params['bam_file']) #ExampleReadsApp.upload_reads(self, params['name'], params['reads_path'], params['wsname'])
81-
#era = ExampleReadsApp(ctx, config=config)
82-
#era.upload_reads(params["bam_file"], params["read_ref"], params["workspace_name"])
83-
84-
#out_path = os.path.join(self.shared_folder, 'filename_end1')
85-
#logging.warning(f">>>>>>>>>>>>>>>>>>>>{fastq_path}")
86-
# bam.upload_reads(params['output_name'], fastq_path, params['workspace_name'])
87-
88-
#ExampleReadsApp.upload_reads(self, params['name'], params['reads_path'], params['wsname']) #might not need this
89-
# Download Reads
90-
91-
#era = ExampleReadsApp(ctx, config=config)
92-
#output = era.do_analysis(params)
9378

9479
output = bam.do_analysis(params)
9580

@@ -98,7 +83,6 @@ def run_kb_bedtools(self, ctx, params):
9883
raise ValueError('Method run_kb_bedtools return value ' +
9984
'output is not type dict as required.')
10085
# return the results
101-
print("RETURNING:", output) # Must print this
10286
return [output]
10387
#END run_kb_bedtools
10488
def run_kb_bedtools_intersect(self, ctx, params):

lib/kb_bedtools/utils.py

Lines changed: 5 additions & 196 deletions
Original file line numberDiff line numberDiff line change
@@ -1,219 +1,46 @@
1-
"""
2-
This ExampleReadsApp demonstrates how to use best practices for KBase App
3-
development using the SFA base package.
4-
"""
51
import json
62
import io
73
import logging
84
import os
95
import subprocess
10-
import uuid
116

127
from collections import Counter
138
from shutil import copyfile
149

15-
import pandas as pd
1610
import subprocess
1711

1812
from Bio import SeqIO
1913

20-
# This is the SFA base package which provides the Core app class.
2114
from base import Core
2215

2316
MODULE_DIR = "/kb/module"
2417
TEMPLATES_DIR = os.path.join(MODULE_DIR, "lib/templates")
2518

2619

27-
class ExampleReadsApp(Core):
28-
def __init__(self, ctx, config, clients_class=None):
29-
"""
30-
This is required to instantiate the Core App class with its defaults
31-
and allows you to pass in more clients as needed.
32-
"""
33-
super().__init__(ctx, config, clients_class)
34-
# Here we adjust the instance attributes for our convenience.
35-
self.report = self.clients.KBaseReport
36-
self.ru = self.clients.ReadsUtils
37-
# self.shared_folder is defined in the Core App class.
38-
# TODO Add a self.wsid = a conversion of self.wsname
39-
40-
#def do_analysis(self, params: dict):
41-
# """
42-
# This method is where the main computation will occur.
43-
# """
44-
# read_refs = params["reads_ref"]
45-
# # Download the reads from KBase
46-
# ret = self.download_reads(read_refs)
47-
# # We use these downloaded reads and biopython to collect the first 10
48-
# # reads and their phred quality scores to create a new fastq file to
49-
# # upload to KBase.
50-
# for file_ref, file_info in ret["files"].items():
51-
# file_path = file_info["files"]["fwd"]
52-
# basename = os.path.basename(file_path)
53-
# with open(file_path) as reads:
54-
# record_iter = SeqIO.parse(reads, "fastq")
55-
# limit = 10
56-
# head = []
57-
# scores = []
58-
# counts = Counter()
59-
# for ix, record in enumerate(record_iter):
60-
# if ix >= limit:
61-
# break
62-
# head.append(record)
63-
# counts.update(str(record.seq))
64-
# scores.append(record.letter_annotations["phred_quality"])
65-
# filename = f"{basename}.head.fastq"
66-
# out_path = os.path.join(self.shared_folder, filename)
67-
# with open(out_path, "w") as out_reads:
68-
# SeqIO.write(head, out_reads, "fastq")
69-
#
70-
# # This method runs the process first and then returns the stdout and
71-
# # stderr all at once, so take care if your process produces a large
72-
# # amount of output.
73-
# process = subprocess.Popen(
74-
# ["/kb/module/scripts/random_logger.py"],
75-
# stdout=subprocess.PIPE,
76-
# stderr=subprocess.PIPE,
77-
# )
78-
#
79-
# stdout, stderr = self.get_streams(process)
80-
# # We are logging everything because the script we are running does not
81-
# # have a lot of output, but if what you run does then you might not
82-
# # want to log *everything* to the user.
83-
# logging.info(stdout)
84-
# if stderr:
85-
# logging.warning(stderr)
86-
# output_value = stdout.split("\n")[0].split(" ")[-2]
87-
# count_df = pd.DataFrame(sorted(counts.items()), columns=["base", "count"])
88-
#
89-
# # Upload the first 10 reads back to kbase as an object
90-
# upa = self.upload_reads(
91-
# name=params["output_name"], reads_path=out_path, wsname=params["workspace_name"]
92-
# )
93-
#
94-
# # Pass new data to generate the report.
95-
# params["count_df"] = count_df
96-
# params["output_value"] = output_value
97-
# params["scores"] = scores
98-
# params["upa"] = upa # Not currently used, but the ID of the uploaded reads
99-
# # This is the method that generates the HTML report
100-
# return self.generate_report(params)
101-
#
102-
@staticmethod
103-
def get_streams(process):
104-
"""
105-
Returns decoded stdout,stderr after loading the entire thing into memory
106-
"""
107-
stdout, stderr = process.communicate()
108-
return (stdout.decode("utf-8", "ignore"), stderr.decode("utf-8", "ignore"))
109-
110-
def upload_reads(self, name, reads_path, wsname):
111-
"""
112-
Upload reads back to the KBase Workspace. This method only uses the
113-
minimal parameters necessary to provide a demonstration. There are many
114-
more parameters which reads can provide, for example, interleaved, etc.
115-
By default, non-interleaved objects and those uploaded without a
116-
reverse file are saved as KBaseFile.SingleEndLibrary. See:
117-
https://githusb.com/kbaseapps/ReadsUtils/blob/master/lib/ReadsUtils/ReadsUtilsImpl.py#L115-L119
118-
param: filepath_to_reads - A filepath to a fastq fastq file to upload reads from
119-
param: wsname - The name of the workspace to upload to
120-
"""
121-
ur_params = {
122-
"fwd_file": reads_path,
123-
"name": name,
124-
"sequencing_tech": "Illumina",
125-
"wsname": wsname,
126-
"single_genome": 0,
127-
}
128-
# It is often useful to log parameters as they are passed.
129-
logging.warning(f">>>>>>>>>>>>>>>>>>>>{ur_params}")
130-
return self.ru.upload_reads(ur_params)
131-
132-
def download_reads(self, reads_ref, interleaved=False):
133-
"""
134-
Download a list of reads objects
135-
param: reads_ref - A list of reads references/upas
136-
"""
137-
dr_params = {"read_libraries": [reads_ref], "interleaved": None}
138-
# This uses the ReadsUtils client to download a specific workspace
139-
# object, saving it into the shared_folder and making it available to
140-
# the user.
141-
return self.ru.download_reads(dr_params)
142-
143-
def generate_report(self, params: dict):
144-
"""
145-
This method is where to define the variables to pass to the report.
146-
"""
147-
# This path is required to properly use the template.
148-
reports_path = os.path.join(self.shared_folder, "reports")
149-
# Path to the Jinja template. The template can be adjusted to change
150-
# the report.
151-
template_path = os.path.join(TEMPLATES_DIR, "report.html")
152-
# A sample multiplication table to use as output
153-
table = [[i * j for j in range(10)] for i in range(10)]
154-
headers = "one two three four five six seven eight nine ten".split(" ")
155-
# A count of the base calls in the reads
156-
count_df_html = params["count_df"].to_html()
157-
# Calculate a correlation table determined by the quality scores of
158-
# each base read. This requires pandas and matplotlib, and these are
159-
# listed in requirements.txt. You can see the resulting HTML file after
160-
# runing kb-sdk test in ./test_local/workdir/tmp/reports/index.html
161-
scores_df_html = (
162-
pd.DataFrame(params["scores"]).corr().style.background_gradient().render()
163-
)
164-
# The keys in this dictionary will be available as variables in the
165-
# Jinja template. With the current configuration of the template
166-
# engine, HTML output is allowed.
167-
template_variables = dict(
168-
count_df_html=count_df_html,
169-
headers=headers,
170-
scores_df_html=scores_df_html,
171-
table=table,
172-
upa=params["upa"],
173-
output_value=params["output_value"],
174-
)
175-
# The KBaseReport configuration dictionary
176-
config = dict(
177-
report_name=f"ExampleReadsApp_{str(uuid.uuid4())}",
178-
reports_path=reports_path,
179-
template_variables=template_variables,
180-
workspace_name=params["workspace_name"],
181-
)
182-
return self.create_report_from_template(template_path, config)
183-
18420
class BamConversion(Core):
18521
def __init__(self, ctx, config, app_config, clients_class=None):
18622
"""
18723
This is required to instantiate the Core App class with its defaults
18824
and allows you to pass in more clients as needed.
18925
"""
19026
super().__init__(ctx, config, clients_class)
191-
# Here we adjust the instance attributes for our convenience.
19227
self.dfu = self.clients.DataFileUtil
19328
self.report = self.clients.KBaseReport
19429
self.ru = self.clients.ReadsUtils
19530
self.app_config = app_config
196-
# self.shared_folder is defined in the Core App class.
197-
# TODO Add a self.wsid = a conversion of self.wsname
19831

19932
def do_analysis(self, params: dict):
20033
"""
20134
This method is where the main computation will occur.
20235
"""
20336
print(f"{json.dumps(params)=}")
20437
bam_file = params['bam_file']
205-
staging_path = bam_file if os.path.isfile(bam_file) else os.path.join("/staging/", bam_file)
206-
# Read and print first 1000 characters
207-
208-
38+
if os.path.isfile(bam_file):
39+
staging_path = bam_file
40+
else:
41+
staging_path = os.path.join("/staging/", bam_file)
20942

210-
logging.warning(f"{'@'*30} params: {params}")
21143
logging.warning(f"cwd: {os.getcwd()}")
212-
#bam_file_staging_path = self.dfu.download_staging_file({
213-
# 'staging_file_subdir_path': bam_file
214-
#}).get('copy_file_path')
215-
#logging.warning(f'{"&"*20}{bam_file_staging_path=}')
216-
#logging.warning(f"bam_file_staging_path: {bam_file_staging_path}")
21744
output_name = params['output_name']
21845
wsname = params['workspace_name']
21946
sequencing_tech = 'Illumina'
@@ -256,23 +83,9 @@ def bam_to_fastq(cls, bam_file, shared_folder=""): # add a dict parameter so tho
25683
if os.path.getsize("filename_end1.fq") < 100:
25784
raise ValueError("Generated FASTQ file is unexpectedly small — check input BAM or bedtools error")
25885

259-
with open("filename_end1.fq", 'rb') as f:
260-
content = f.read(1001)
261-
print("First 1001 characters from the file:")
262-
decoded = "".join([c if ord(c)>=32 else "?" for c in content.decode("ascii", "ignore")])
263-
print(f"{decoded=}")
264-
26586
output_path = os.path.join(shared_folder, 'output.fq')
26687
copyfile('filename_end1.fq', output_path)
267-
# Upload the fastq file we just made to a reads object in KBase
268-
# upa = self.upload_reads(
269-
# name=params["output_name"], reads_path=out_path, wsname=params["workspace_name"]
270-
# )
271-
#logging.warning(f">>>>>>>>>>>>>>>>>>>>{os.getcwd()}")
272-
#fastq_path = '/kb/module/test/filename_end1.fq'
273-
#fastq_file = open(fastq_path, 'r')
274-
#print(fastq_file.read())
275-
88+
27689
return output_path
27790

27891

@@ -295,7 +108,6 @@ def upload_reads(self, name, reads_path, workspace_name, sequencing_tech, interl
295108
"interleaved": interleaved
296109
#"single_genome": single_genome
297110
}
298-
# It is often useful to log parameters as they are passed.
299111
logging.warning(f">>>>>>>>>>>>>>>>>>>>{ur_params}")
300112
return self.ru.upload_reads(ur_params)
301113

@@ -306,11 +118,8 @@ def __init__(self, ctx, config, clients_class=None):
306118
and allows you to pass in more clients as needed.
307119
"""
308120
super().__init__(ctx, config, clients_class)
309-
# Here we adjust the instance attributes for our convenience.
310121
self.report = self.clients.KBaseReport
311122
self.ru = self.clients.ReadsUtils
312-
# self.shared_folder is defined in the Core App class.
313-
# TODO Add a self.wsid = a conversion of self.wsname
314123

315124
def intersection(self, first_file, second_file):
316125
file1 = first_file

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ matplotlib==3.3.4
33
pandas==1.1.5
44
pytest==7.1.1
55
pytest-cov==3.0.0
6-
pysam>=0.19.0

test/aligned.bam

-391 Bytes
Binary file not shown.

test/kb_bedtools_server_test.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
import subprocess
77
import time
88
import unittest
9-
import unittest
109

1110
from configparser import ConfigParser
1211
from shutil import copyfile
13-
from unittest.mock import patch
1412

1513
from kb_bedtools.kb_bedtoolsImpl import kb_bedtools
1614
from kb_bedtools.kb_bedtoolsServer import MethodContext
@@ -77,41 +75,36 @@ def tearDownClass(cls):
7775
print("Test workspace was deleted")
7876

7977
def copy_bam_to_scratch(self):
80-
bam_src = os.path.join(os.path.dirname(__file__), "aligned.bam")
81-
bam_dst = os.path.join(self.scratch, "aligned.bam")
78+
bam_src = os.path.join(os.path.dirname(__file__), "minimal.bam")
79+
bam_dst = os.path.join(self.scratch, "minimal.bam")
8280

8381
shutil.copy(bam_src, bam_dst)
8482
print(f"Copied BAM file to scratch: {bam_dst}")
8583
return bam_dst
8684

87-
88-
# NOTE: According to Python unittest naming rules test method names should start from 'test'. # noqa
89-
# @unittest.skip("Skip test for debugging")
90-
# Now when run_kb_bedtools calls download_staging_file, it uses your mock
91-
@patch.object(DataFileUtil, "download_staging_file", side_effect=mock_download_staging_file)
92-
def test_your_method(self, mock_download):
93-
# Prepare test objects in workspace if needed using
94-
# self.getWsClient().save_objects({'workspace': self.getWsName(),
95-
# 'objects': []})
96-
#
97-
# Run your method by
98-
# ret = self.getImpl().your_method(self.getContext(), parameters...)
99-
#
100-
# Check returned data with
101-
# self.assertEqual(ret[...], ...) or other unittest methods
85+
def test_intersect(self):
86+
# in the test, use print() to put things in stdout
87+
first_file = 'GSE203496_xmoo1_line_pooled_assembly.gff'
88+
second_file = 'GSE240325_apo_rbfox_insitu_clustered.sorted.filtered_lite.gff'
89+
self.serviceImpl.run_kb_bedtools_intersect(
90+
self.ctx,
91+
{
92+
"workspace_name": self.wsName,
93+
"first_file" : first_file,
94+
"second_file" : second_file,
95+
"output_name": "intersectOutput",
96+
})
10297

10398
params = {
10499
"workspace_name": self.wsName,
105100
"reads_ref": "70257/2/1",
106101
"output_name": "ReadsOutputName",
107102
"interleaved": True,
108-
"bam_file": "aligned.bam",
103+
"bam_file": "minimal.bam",
109104
"fastq_path_name": os.path.join("/kb/module/work/tmp", "filename_end2.fq"),
110105
}
111106

112107
ret = self.serviceImpl.run_kb_bedtools(self.ctx, params)
113108

114-
print("REPORT:", ret)
115-
116109
self.assertIn("report_name", ret[0])
117110
self.assertIn("report_ref", ret[0])

0 commit comments

Comments
 (0)