Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions goombay-rs/src/alignment/edit/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod needleman_wunsch;
pub mod smith_waterman;
pub mod wagner_fischer;
pub mod waterman_smith_beyer;
128 changes: 128 additions & 0 deletions goombay-rs/src/alignment/edit/waterman_smith_beyer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
use crate::align::global_base::{GlobalAlgorithm, GlobalAlignmentModel, Metric};
use crate::align::scoring::ExtendedGapScoring;
use crate::align::{AlignmentData, GlobalAlignmentMatrix, PointerValues, Scoring};

pub struct WatermanSmithBeyer<S: Scoring + Clone> {
pub scores: S,
}

impl Default for WatermanSmithBeyer<ExtendedGapScoring> {
fn default() -> Self {
let scores = ExtendedGapScoring {
identity: 2,
mismatch: 1,
gap: 3,
extended_gap: 1,
};
Self { scores }
}
}

/// Affine gap cost for a gap of length k:
/// cost = gap_open + (k - 1) * gap_extend
fn gap_cost(gap_open: i32, gap_extend: i32, k: usize) -> i32 {
gap_open + (k as i32 - 1) * gap_extend
}

impl GlobalAlignmentMatrix<ExtendedGapScoring> for WatermanSmithBeyer<ExtendedGapScoring> {
fn compute(query: &str, subject: &str) -> GlobalAlignmentModel {
let wsb_default = WatermanSmithBeyer::default();
wsb_default.calculate_matrix(query, subject)
}

fn set_scores(scores: &ExtendedGapScoring) -> Self {
Self {
scores: scores.clone(),
}
}

fn calculate_matrix(&self, query: &str, subject: &str) -> GlobalAlignmentModel {
// Use new_gotoh to get 3 pointer matrices:
// pointer_matrix[0] = direction pointers (Match/Up/Left)
// pointer_matrix[1] = i_step (optimal up gap length)
// pointer_matrix[2] = j_step (optimal left gap length)
let mut alignments = AlignmentData::new_gotoh(query, subject);
let query_len = alignments.query.len() + 1;
let subject_len = alignments.subject.len() + 1;

let gap_open = self.scores.gap as i32;
let gap_extend = self.scores.extended_gap as i32;

// Initialise first column (gaps in subject)
alignments.pointer_matrix[0][0][0] = PointerValues::Left as i32;
for i in 1..query_len {
alignments.score_matrix[0][i][0] = -gap_cost(gap_open, gap_extend, i);
alignments.pointer_matrix[0][i][0] = PointerValues::Up as i32;
alignments.pointer_matrix[1][i][0] = i as i32; // i_step
}

// Initialise first row (gaps in query)
for j in 1..subject_len {
alignments.score_matrix[0][0][j] = -gap_cost(gap_open, gap_extend, j);
alignments.pointer_matrix[0][0][j] = PointerValues::Left as i32;
alignments.pointer_matrix[2][0][j] = j as i32; // j_step
}

// Fill scoring matrix
for i in 1..query_len {
for j in 1..subject_len {
// Diagonal (match/mismatch)
let match_score = if alignments.query[i - 1] == alignments.subject[j - 1] {
alignments.score_matrix[0][i - 1][j - 1] + self.scores.identity as i32
} else {
alignments.score_matrix[0][i - 1][j - 1] - self.scores.mismatch as i32
};

// Up gap: consider all gap lengths k from 1 to i
let mut ugap_score = i32::MIN;
let mut u_step: usize = 1;
for k in 1..=i {
let candidate =
alignments.score_matrix[0][i - k][j] - gap_cost(gap_open, gap_extend, k);
if candidate > ugap_score {
ugap_score = candidate;
u_step = k;
}
}

// Left gap: consider all gap lengths k from 1 to j
let mut lgap_score = i32::MIN;
let mut l_step: usize = 1;
for k in 1..=j {
let candidate =
alignments.score_matrix[0][i][j - k] - gap_cost(gap_open, gap_extend, k);
if candidate > lgap_score {
lgap_score = candidate;
l_step = k;
}
}

let tmax = *[match_score, ugap_score, lgap_score].iter().max().unwrap();
alignments.score_matrix[0][i][j] = tmax;

// Set pointer values (cumulative, matching NW pattern)
if tmax == match_score {
alignments.pointer_matrix[0][i][j] += PointerValues::Match as i32;
}
if tmax == ugap_score {
alignments.pointer_matrix[0][i][j] += PointerValues::Up as i32;
alignments.pointer_matrix[1][i][j] = u_step as i32;
}
if tmax == lgap_score {
alignments.pointer_matrix[0][i][j] += PointerValues::Left as i32;
alignments.pointer_matrix[2][i][j] = l_step as i32;
}
}
}

GlobalAlignmentModel {
data: alignments,
aligner: GlobalAlgorithm::WatermanSmithBeyer,
metric: Metric::Similarity,
identity: self.scores.identity,
mismatch: self.scores.mismatch,
gap: self.scores.gap,
all_alignments: false,
}
}
}
110 changes: 107 additions & 3 deletions goombay-rs/src/alignment/global_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use jedvek::Matrix2D;
pub enum GlobalAlgorithm {
NeedlemanWunsch,
WagnerFischer,
WatermanSmithBeyer,
}

// Handles matrices that store similarity score vs distance score
Expand Down Expand Up @@ -39,10 +40,10 @@ impl GlobalAlignmentModel {
}

fn select_aligner(&self) -> Box<dyn Iterator<Item = (String, String)> + '_> {
let i = self.data.query.len();
let j = self.data.subject.len();
match self.aligner {
GlobalAlgorithm::NeedlemanWunsch | GlobalAlgorithm::WagnerFischer => {
let i = self.data.query.len();
let j = self.data.subject.len();
let global_aligner = GlobalAligner {
query_chars: &self.data.query,
subject_chars: &self.data.subject,
Expand All @@ -53,9 +54,20 @@ impl GlobalAlignmentModel {
up_val: PointerValues::Up as i32,
left_val: PointerValues::Left as i32,
};
// Turns struct into dynamically dispatched iterator
Box::new(global_aligner)
}
GlobalAlgorithm::WatermanSmithBeyer => {
let wsb_aligner = WsbAligner {
query_chars: &self.data.query,
subject_chars: &self.data.subject,
pointer_matrix: &self.data.pointer_matrix[0],
i_step_matrix: &self.data.pointer_matrix[1],
j_step_matrix: &self.data.pointer_matrix[2],
stack: vec![(Vec::new(), Vec::new(), i, j)],
all_alignments: self.all_alignments,
};
Box::new(wsb_aligner)
}
}
}

Expand Down Expand Up @@ -226,3 +238,95 @@ impl<'a> Iterator for GlobalAligner<'a> {
None
}
}

/// Traceback aligner for WatermanSmithBeyer that handles variable-length gaps.
///
/// Unlike GlobalAligner which always steps by 1, WSB gaps can skip multiple
/// positions. The step sizes are stored in separate matrices.
pub struct WsbAligner<'a> {
pub query_chars: &'a [char],
pub subject_chars: &'a [char],
pub pointer_matrix: &'a Matrix2D<i32>,
pub i_step_matrix: &'a Matrix2D<i32>,
pub j_step_matrix: &'a Matrix2D<i32>,
pub stack: Vec<(Vec<char>, Vec<char>, usize, usize)>,
pub all_alignments: bool,
}

impl<'a> Iterator for WsbAligner<'a> {
type Item = (String, String);

fn next(&mut self) -> Option<Self::Item> {
let identity = PointerValues::Match as i32;
let up = PointerValues::Up as i32;
let left = PointerValues::Left as i32;

let identity_array = [
identity,
identity + up,
identity + left,
identity + up + left,
];
let up_array = [up, up + identity, up + left, up + identity + left];
let left_array = [left, left + identity, left + up, left + identity + up];

while let Some((qs_align, ss_align, i, j)) = self.stack.pop() {
if i == 0 && j == 0 {
let mut qs_align = qs_align;
let mut ss_align = ss_align;
qs_align.reverse();
ss_align.reverse();
let qs_aligned = qs_align.into_iter().collect::<String>();
let ss_aligned = ss_align.into_iter().collect::<String>();

if !self.all_alignments {
self.stack.clear();
}
return Some((qs_aligned, ss_aligned));
}

// Diagonal: match/mismatch (step by 1 in both dimensions)
if identity_array.contains(&self.pointer_matrix[i][j]) {
let mut new_qs_align = qs_align.clone();
new_qs_align.push(self.query_chars[i - 1]);
let mut new_ss_align = ss_align.clone();
new_ss_align.push(self.subject_chars[j - 1]);
self.stack.push((new_qs_align, new_ss_align, i - 1, j - 1));
if !self.all_alignments {
continue;
}
}

// Up gap: step by i_step positions (gap in subject)
if up_array.contains(&self.pointer_matrix[i][j]) {
let step = self.i_step_matrix[i][j] as usize;
let mut new_qs_align = qs_align.clone();
let mut new_ss_align = ss_align.clone();
for s in 0..step {
new_qs_align.push(self.query_chars[i - 1 - s]);
new_ss_align.push('-');
}
self.stack.push((new_qs_align, new_ss_align, i - step, j));
if !self.all_alignments {
continue;
}
}

// Left gap: step by j_step positions (gap in query)
if left_array.contains(&self.pointer_matrix[i][j]) {
let step = self.j_step_matrix[i][j] as usize;
let mut new_qs_align = qs_align.clone();
let mut new_ss_align = ss_align.clone();
for s in 0..step {
new_qs_align.push('-');
new_ss_align.push(self.subject_chars[j - 1 - s]);
}
self.stack.push((new_qs_align, new_ss_align, i, j - step));
if !self.all_alignments {
continue;
}
}
}
None
}
}
1 change: 1 addition & 0 deletions goombay-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ pub mod align {
pub use edit::needleman_wunsch::NeedlemanWunsch;
pub use edit::smith_waterman::SmithWaterman;
pub use edit::wagner_fischer::WagnerFischer;
pub use edit::waterman_smith_beyer::WatermanSmithBeyer;
}
Loading