diff --git a/src/opencloning/dna_functions.py b/src/opencloning/dna_functions.py index f86937d..e415d58 100644 --- a/src/opencloning/dna_functions.py +++ b/src/opencloning/dna_functions.py @@ -274,6 +274,15 @@ def custom_file_parser( out = list() with file_streamer as handle: + if sequence_file_format == 'genbank': + # Filter out lines starting with "BASE COUNT" (ignore leading whitespace) + # TODO: Remove if biopython handles this correctly + filtered_lines = list() + for line in handle: + if not line.lstrip().startswith('BASE COUNT'): + filtered_lines.append(line) + handle = io.StringIO(''.join(filtered_lines)) + try: for parsed_seq in seqio_parse(handle, sequence_file_format): circularize = circularize or ( diff --git a/tests/test_dna_functions.py b/tests/test_dna_functions.py index d85105c..06283b5 100644 --- a/tests/test_dna_functions.py +++ b/tests/test_dna_functions.py @@ -52,6 +52,10 @@ def test_permissive_parser_other(self): plasmid = custom_file_parser(f, 'genbank')[0] self.assertEqual(plasmid.circular, True) + def test_permissive_parser_base_count_misplaced(self): + with open(f'{test_files}/base_count_misplaced.gb', 'r') as f: + custom_file_parser(f, 'genbank')[0] + class MinorFunctionsTest(unittest.TestCase): def test_correct_name(self): diff --git a/tests/test_files/base_count_misplaced.gb b/tests/test_files/base_count_misplaced.gb new file mode 100644 index 0000000..11c7809 --- /dev/null +++ b/tests/test_files/base_count_misplaced.gb @@ -0,0 +1,27 @@ +LOCUS name 136 bp DNA linear UNK 01-JAN-1980 +DEFINITION description. +ACCESSION id +VERSION id +KEYWORDS . +SOURCE . + ORGANISM . + . +BASE COUNT 1284 a 1068 c 1078 g 1308 t +FEATURES Location/Qualifiers + protein_bind 1..34 + /label="loxP" + protein_bind 35..68 + /label="lox66" + protein_bind complement(69..102) + /label="lox66" + protein_bind 69..102 + /label="lox71" + protein_bind complement(35..68) + /label="lox71" + protein_bind 103..136 + /label="loxP_mutant" +ORIGIN + 1 ataacttcgt atattttatt ttatacgaag ttatataact tcgtatattt tattttatac + 61 gaacggtata ccgttcgtat attttatttt atacgaagtt attaccgttc gtatatttta + 121 ttttatacga acggta +//