Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions phylopypruner/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,16 @@ def add_sequence(self, seq_record=None, description="", sequence_data=""):
seq_record = sequence.Sequence()
seq_record.description = description
seq_record.sequence_data = sequence_data
seq_record.otu = re.split(r"\||@", seq_record.description)[0]
seq_record.otu = re.split(r"\||@|_", seq_record.description)[0]
try:
seq_record.identifier = re.split(
r"\||@", seq_record.description)[1]
except IndexError:
report.warning("no description found on split with | or @")
seq_record.identifier = re.search(r"[|@_]([^ ]*)", seq_record.description).group(1)
except AttributeError:
report.warning("no description found on split with | , _ or @")
seq_record.identifier = None
if description:
seq_record.otu = re.split(r"\||@", seq_record.description)[0]
seq_record.otu = re.split(r"\||@|_", seq_record.description)[0]
if sequence_data:
seq_record.identifier = re.split(r"\||@", seq_record.description)[1]
seq_record.identifier = re.search(r"[|@_]([^ ]*)", seq_record.description).group(1)

self.sequences.append(seq_record)
return seq_record
Expand Down
19 changes: 11 additions & 8 deletions phylopypruner/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@

def validate_input(msa, tree, tree_path):
"Test to see if MSA and tree entries matches."
descriptions = list(msa.iter_descriptions())
names = list(tree.iter_names())

if set(descriptions).intersection(names) < set(descriptions):
print("example tree names:", names[:2], file=sys.stderr)
print("example sequences:", descriptions[:2], file=sys.stderr)
report.error("MSA names don't match tree \n {}\n {}".format(
msa.filename, tree_path))
descriptions = set(msa.iter_descriptions())
names = set(tree.iter_names())

# Find items in descriptions not in names and vice versa
missing_in_names = descriptions - names
missing_in_descriptions = names - descriptions

if missing_in_names or missing_in_descriptions:
print("Missing in tree names:", list(missing_in_names), file=sys.stderr)
print("Missing in sequences:", list(missing_in_descriptions), file=sys.stderr)
report.error("MSA names don't match tree \n {}\n {}".format(msa.filename, tree_path))


def run(settings, msa, tree):
Expand Down
3 changes: 2 additions & 1 deletion phylopypruner/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def __init__(self, description="", sequence_data=""):
self._sequence_data = str(sequence_data)
self._is_alignment = bool(self.is_alignment)
if description:
self._otu, self._identifier = re.split(r"\||@", description)
self._otu = re.split(r"\||@|_", description)[0]
self._identifier = re.search(r"[|@_]([^ ]*)", description).group(1)
else:
self._otu = ""
self._identifier = ""
Expand Down
6 changes: 3 additions & 3 deletions phylopypruner/tree_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def iter_sisters(self):

def otu(self):
"Returns the OTU to which this node belongs."
return re.split(r"\||@", self.name)[0]
return re.split(r"\||@|_", self.name)[0]

def is_root(self):
"Returns True if this node lacks a parent."
Expand Down Expand Up @@ -526,15 +526,15 @@ def iter_names(self):
def iter_otus(self):
"Returns an iterator object that includes all OTUs within this node."
for name in self.iter_names():
otu = re.split(r"\||@", name)[0]
otu = re.split(r"\||@|_", name)[0]
yield otu

def iter_identifiers(self):
"""
Returns an iterator object that includes all identifiers in this node.
"""
for name in self.iter_names():
identifier = re.split(r"\||@", name)[1]
identifier = re.search(r"[|@_]([^ ]*)", name).group(1)
yield identifier

def view(self):
Expand Down