From 944358294467d41080e9a26a01feeb02ae12bc43 Mon Sep 17 00:00:00 2001 From: Shun Sakuraba Date: Wed, 13 Aug 2025 16:03:27 +0900 Subject: [PATCH] Fixed templating only allow gapless alignment Previously the template mechanism only allows us to use the gapless alignment. The gapped alignment was previously strictly prohibited to be used due to large negative penalty. This commit fixes the problem. --- src/boltz/data/feature/featurizerv2.py | 3 ++- src/boltz/data/parse/schema.py | 24 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/boltz/data/feature/featurizerv2.py b/src/boltz/data/feature/featurizerv2.py index 2fcb30713..454b28daf 100644 --- a/src/boltz/data/feature/featurizerv2.py +++ b/src/boltz/data/feature/featurizerv2.py @@ -1800,6 +1800,7 @@ def process_template_features( for template in templates: offset = template.template_st - template.query_st + query_templated_segments = set(range(template.query_st, template.query_en)) # Get query and template tokens to map residues query_tokens = data.tokens @@ -1810,7 +1811,7 @@ def process_template_features( # Get the template tokens at the query residues chain_id = tmpl_chain_name_to_asym_id[template.template_chain] toks = template_tokens[template_tokens["asym_id"] == chain_id] - toks = [t for t in toks if t["res_idx"] - offset in q_indices] + toks = [t for t in toks if t["res_idx"] - offset in query_templated_segments] for t in toks: q_idx = q_indices[t["res_idx"] - offset] row_tokens.append( diff --git a/src/boltz/data/parse/schema.py b/src/boltz/data/parse/schema.py index ecff892c3..cd39f534a 100644 --- a/src/boltz/data/parse/schema.py +++ b/src/boltz/data/parse/schema.py @@ -523,17 +523,25 @@ def get_local_alignments(query: str, template: str) -> list[Alignment]: """ aligner = Align.PairwiseAligner(scoring="blastp") aligner.mode = "local" - aligner.open_gap_score = -1000 - aligner.extend_gap_score = -1000 + # Biopython blastp default gap score + aligner.open_gap_score = -12.0 + aligner.extend_gap_score = -1.0 alignments = [] - for result in aligner.align(query, template): - coordinates = result.coordinates + # Take first alignment as the "best" alignment + result = aligner.align(query, template)[0] + coordinates = result.coordinates + for ipos in range(0, coordinates.shape[1] - 1): + query_st, query_en = coordinates[0, ipos:ipos+2] + template_st, template_en = coordinates[1, ipos:ipos+2] + if query_st == query_en or template_st == template_en: + # insertion or deltion, skip the chunk + continue alignment = Alignment( - query_st=int(coordinates[0][0]), - query_en=int(coordinates[0][1]), - template_st=int(coordinates[1][0]), - template_en=int(coordinates[1][1]), + query_st=int(query_st), + query_en=int(query_en), + template_st=int(template_st), + template_en=int(template_en) ) alignments.append(alignment)