Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/opencloning/dna_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,15 @@ def custom_file_parser(
out = list()

with file_streamer as handle:
if sequence_file_format == 'genbank':
# Filter out lines starting with "BASE COUNT" (ignore leading whitespace)
# TODO: Remove if biopython handles this correctly
filtered_lines = list()
for line in handle:
if not line.lstrip().startswith('BASE COUNT'):
filtered_lines.append(line)
handle = io.StringIO(''.join(filtered_lines))

try:
for parsed_seq in seqio_parse(handle, sequence_file_format):
circularize = circularize or (
Expand Down
4 changes: 4 additions & 0 deletions tests/test_dna_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def test_permissive_parser_other(self):
plasmid = custom_file_parser(f, 'genbank')[0]
self.assertEqual(plasmid.circular, True)

def test_permissive_parser_base_count_misplaced(self):
with open(f'{test_files}/base_count_misplaced.gb', 'r') as f:
custom_file_parser(f, 'genbank')[0]


class MinorFunctionsTest(unittest.TestCase):
def test_correct_name(self):
Expand Down
27 changes: 27 additions & 0 deletions tests/test_files/base_count_misplaced.gb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
LOCUS name 136 bp DNA linear UNK 01-JAN-1980
DEFINITION description.
ACCESSION id
VERSION id
KEYWORDS .
SOURCE .
ORGANISM .
.
BASE COUNT 1284 a 1068 c 1078 g 1308 t
FEATURES Location/Qualifiers
protein_bind 1..34
/label="loxP"
protein_bind 35..68
/label="lox66"
protein_bind complement(69..102)
/label="lox66"
protein_bind 69..102
/label="lox71"
protein_bind complement(35..68)
/label="lox71"
protein_bind 103..136
/label="loxP_mutant"
ORIGIN
1 ataacttcgt atattttatt ttatacgaag ttatataact tcgtatattt tattttatac
61 gaacggtata ccgttcgtat attttatttt atacgaagtt attaccgttc gtatatttta
121 ttttatacga acggta
//