Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ed87dcc
added fasta parsing
jonas-fuchs Jan 19, 2026
9177fbc
implemented fasta parsing and dimer check
jonas-fuchs Jan 23, 2026
aab3a4c
inluded endoverlap in log
jonas-fuchs Jan 26, 2026
7f3b28b
renamed function
jonas-fuchs Jan 26, 2026
714834a
Merge branch 'master' into scheme_compatibility
jonas-fuchs Jan 26, 2026
5c6de3c
optimized the primer dimer solve
jonas-fuchs Jan 27, 2026
ae2980a
added multi-processing for final dimer solve
jonas-fuchs Jan 27, 2026
e0f94f0
added multi-batch processing for kmer evaluations and amplicon evalau…
jonas-fuchs Jan 27, 2026
8b3a88f
fix when there are no dimers
jonas-fuchs Jan 27, 2026
77cb3a5
fixed bug with compatible primers and qPCR mode
jonas-fuchs Jan 28, 2026
e52f558
moved get_permutations and changed threshold to required
jonas-fuchs Jan 29, 2026
7b1d130
Update varvamp/scripts/primers.py
jonas-fuchs Jan 31, 2026
c6cb54f
Update varvamp/scripts/primers.py
jonas-fuchs Jan 31, 2026
336b8cb
Update varvamp/command.py
jonas-fuchs Jan 31, 2026
2ec7d45
Update varvamp/scripts/qpcr.py
jonas-fuchs Jan 31, 2026
e9679cd
Update varvamp/scripts/qpcr.py
jonas-fuchs Jan 31, 2026
0cb29a7
Update varvamp/scripts/qpcr.py
jonas-fuchs Jan 31, 2026
6c4c75d
moved deduplication to parse function
jonas-fuchs Jan 31, 2026
e5ebc68
changed combination logic
jonas-fuchs Jan 31, 2026
9b8bcec
update
jonas-fuchs Jan 31, 2026
2f3c47f
Merge remote-tracking branch 'origin/scheme_compatibility' into schem…
jonas-fuchs Jan 31, 2026
4765001
fixed outer loop
jonas-fuchs Jan 31, 2026
4fb8226
batch size calc for find primers
jonas-fuchs Jan 31, 2026
549d48b
finalized review from wm75
jonas-fuchs Jan 31, 2026
5fc0795
small stuff
jonas-fuchs Jan 31, 2026
1bad536
added multi-batch processing for deltaG and fixed bug for batch calcu…
jonas-fuchs Feb 3, 2026
2cf54fa
added n_to_test again
jonas-fuchs Feb 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions varvamp/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ def get_args(sysargs):
type=str,
default="varVAMP"
)
par.add_argument(
"--compatible-primers",
metavar="None",
type=str,
default=None,
help="FASTA primer file with which new primers should not form dimers. Sequences >40 nt are ignored. Can significantly increase runtime."
)

for par in (SINGLE_parser, TILED_parser):
par.add_argument(
"-t",
Expand Down Expand Up @@ -137,7 +145,6 @@ def get_args(sysargs):
QPCR_parser.add_argument(
"-t",
"--threshold",
required=True,
metavar="0.9",
type=float,
default=0.9,
Expand Down Expand Up @@ -187,6 +194,11 @@ def shared_workflow(args, log_file):

# read in alignment and preprocess
preprocessed_alignment = alignment.preprocess(args.input[0])
# read in external primer sequences with which new primers should not form dimers
if args.compatible_primers is not None:
compatible_primers = primers.parse_primer_fasta(args.compatible_primers)
else:
compatible_primers = None
# check alignment length and number of gaps and report if its significantly more/less than expected
logging.check_alignment_length(preprocessed_alignment, log_file)
logging.check_gaped_sequences(preprocessed_alignment, log_file)
Expand Down Expand Up @@ -289,7 +301,22 @@ def shared_workflow(args, log_file):
progress_text=f"{len(left_primer_candidates)} fw and {len(right_primer_candidates)} rv potential primers"
)

return alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates
# filter primers against non-dimer sequences if provided, can use multi-processing
if compatible_primers is not None:
left_primer_candidates = primers.filter_non_dimer_candidates(
left_primer_candidates, compatible_primers, args.threads
)
right_primer_candidates = primers.filter_non_dimer_candidates(
right_primer_candidates, compatible_primers, args.threads
)
logging.varvamp_progress(
log_file,
progress=0.65,
job="Filtering primers against provided primers.",
progress_text=f"{len(left_primer_candidates)} fw and {len(right_primer_candidates)} rv primers after filtering"
)

return alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates, compatible_primers


def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_candidates, data_dir, log_file):
Expand All @@ -314,8 +341,7 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
)
if not amplicons:
logging.raise_error(
"no amplicons found. Increase the max amplicon length or \
number of ambiguous bases or lower threshold!\n",
"no amplicons found. Increase the max amplicon length or number of ambiguous bases or lower threshold!\n",
log_file,
exit=True
)
Expand Down Expand Up @@ -408,7 +434,7 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
return amplicon_scheme


def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majority_consensus, left_primer_candidates, right_primer_candidates, log_file):
def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majority_consensus, left_primer_candidates, right_primer_candidates, compatible_primers, log_file):
"""
part of the workflow specific for the tiled mode
"""
Expand Down Expand Up @@ -445,6 +471,17 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
progress_text=f"{len(qpcr_probes)} potential qPCR probes"
)

# filter primers against non-dimer sequences if provided
if compatible_primers is not None:
qpcr_probes = primers.filter_non_dimer_candidates(
qpcr_probes, compatible_primers, args.threads)
logging.varvamp_progress(
log_file,
progress=0.75,
job="Filtering probes against provided primer sequences.",
progress_text=f"{len(qpcr_probes)} potential qPCR probes after filtering"
)

# find unique amplicons with a low penalty and an internal probe
qpcr_scheme_candidates = qpcr.find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidates, majority_consensus, ambiguous_consensus)
if not qpcr_scheme_candidates:
Expand Down Expand Up @@ -507,7 +544,7 @@ def main():
blast.check_BLAST_installation(log_file)

# mode unspecific part of the workflow
alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates = shared_workflow(args, log_file)
alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates, compatible_primers = shared_workflow(args, log_file)

# write files that are shared in all modes
reporting.write_regions_to_bed(primer_regions, args.name, data_dir)
Expand Down Expand Up @@ -590,6 +627,7 @@ def main():
majority_consensus,
left_primer_candidates,
right_primer_candidates,
compatible_primers,
log_file
)

Expand Down
4 changes: 2 additions & 2 deletions varvamp/scripts/default_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
PRIMER_HAIRPIN = 47 # max melting temp for secondary structure
PRIMER_GC_END = (1, 3) # min/max GCs in the last 5 bases of the 3' end
PRIMER_MIN_3_WITHOUT_AMB = 3 # min len of 3' without ambiguous charaters
PRIMER_MAX_DIMER_TMP = 47 # max melting temp for dimers (homo- or heterodimers)
PRIMER_MAX_DIMER_TMP = 35 # max melting temp for dimers (homo- or heterodimers)
END_OVERLAP = 5 # maximum allowed nt overlap between primer ends

# QPCR parameters
# basic probe parameters
Expand All @@ -42,7 +43,6 @@
QPRIMER_DIFF = 2 # maximal temperature diff of qPCR primers
QPROBE_TEMP_DIFF = (5, 10) # min/max temp diff between probe and primers
QPROBE_DISTANCE = (4, 15) # min/max distance to the primer on the same strand
END_OVERLAP = 5 # maximum allowed nt overlap between the ends of probe and primer
QAMPLICON_LENGTH = (70, 200) # min/max length of the qPCR amplicon
QAMPLICON_GC = (40, 60) # GC min/max of the qPCR amplicon
QAMPLICON_DEL_CUTOFF = 4 # consider regions of the alignment for deltaG calculation if they have smaller deletions than cutoff
Expand Down
108 changes: 106 additions & 2 deletions varvamp/scripts/primers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,18 @@
primer creation and evaluation
"""

# BUILTIN
from itertools import chain
import re
import multiprocessing

# LIBS
from Bio.Seq import Seq
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from Bio.Seq import Seq
from Bio.Seq import MutableSeq

from Bio import SeqIO
import primer3 as p3

# varVAMP
from varvamp.scripts import config
from varvamp.scripts import config, reporting


def calc_gc(seq):
Expand Down Expand Up @@ -59,6 +65,49 @@ def calc_dimer(seq1, seq2, structure=False):
)


def has_end_overlap(dimer_result):
"""
checks if two oligos overlap at their ends
Example:
xxxxxxxxtagc-------
--------atcgxxxxxxx
"""
if dimer_result.structure_found:
# clean structure
structure = [x[4:] for x in dimer_result.ascii_structure_lines]
# check if we have an overlap that is large enough
overlap = len(structure[1].replace(" ", ""))
if overlap <= config.END_OVERLAP:
return False
# not more than one conseq. internal mismatch
if ' ' in structure[1].lstrip(" "):
return False
# The alignment length of the ACII structure is equal to the first part of the structure
# and the maximum possible alignment length is the cumulative length of both primers (-> no overlap at all)
alignment_length = len(structure[0])
maximum_alignment_length = len(re.findall("[ATCG]", "".join(structure)))
# this means that for a perfect end overlap the alignment length is equal to:
# len(primer1) + len(primer2) - overlap.
if alignment_length >= maximum_alignment_length - overlap:
return True

return False


def is_dimer(seq1, seq2):
"""
check if two sequences dimerize above threshold or are overlapping at their ends
"""
dimer_result = calc_dimer(seq1, seq2, structure=True)

if dimer_result.tm > config.PRIMER_MAX_DIMER_TMP:
return True
if has_end_overlap(dimer_result):
return True

return False


def calc_max_polyx(seq):
"""
calculate maximum polyx of a seq
Expand Down Expand Up @@ -261,13 +310,14 @@ def filter_kmer_direction_independent(seq, primer_temps=config.PRIMER_TMP, gc_ra
filter kmer for temperature, gc content,
poly x, dinucleotide repeats and homodimerization
"""

return(
(primer_temps[0] <= calc_temp(seq) <= primer_temps[1])
and (gc_range[0] <= calc_gc(seq) <= gc_range[1])
and (calc_max_polyx(seq) <= config.PRIMER_MAX_POLYX)
and (calc_max_dinuc_repeats(seq) <= config.PRIMER_MAX_DINUC_REPEATS)
and (calc_base_penalty(seq, primer_temps, gc_range, primer_sizes) <= config.PRIMER_MAX_BASE_PENALTY)
and (calc_dimer(seq, seq).tm <= config.PRIMER_MAX_DIMER_TMP)
and not is_dimer(seq, seq)
)


Expand All @@ -291,6 +341,60 @@ def filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
)


def parse_primer_fasta(fasta_path):
"""
Parse a primer FASTA file and return a list of sequences using BioPython.
"""

sequences = []

for record in SeqIO.parse(fasta_path, "fasta"):
seq = str(record.seq).lower()
# Only include primers up to 40 nucleotides
if len(seq) <= 40:
sequences.append(reporting.get_permutations(seq))

return list(chain.from_iterable(sequences))


def check_primer_against_externals(args):
"""
Worker function to check a single primer against all external sequences.
Returns the primer if it passes, None otherwise.
"""

primer, external_sequences = args

for seq in external_sequences:
if is_dimer(primer[0], seq):
return None

return primer


def filter_non_dimer_candidates(primer_candidates, external_sequences, n_threads):
"""
Filter out primer candidates that form dimers with external sequences.
Uses multiprocessing to speed up checks.
"""
# Deduplicate external sequences to reduce redundant checks
unique_sequences = []
seen = set()
for seq in external_sequences:
if seq not in seen:
unique_sequences.append(seq)
seen.add(seq)

with multiprocessing.Pool(processes=n_threads) as pool:
# Prepare arguments for each primer
args = [(primer, unique_sequences) for primer in primer_candidates]
# Process in parallel
results = pool.map(check_primer_against_externals, args)

# Filter out None results
return [primer for primer in results if primer is not None]


def find_primers(kmers, ambiguous_consensus, alignment):
"""
filter kmers direction specific and append penalties
Expand Down
23 changes: 1 addition & 22 deletions varvamp/scripts/qpcr.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,6 @@ def hardfilter_amplicon(majority_consensus, left_primer, right_primer):
)


def check_end_overlap(dimer_result):
"""
checks if two oligos overlap at their ends (pretty rare)
Example:
xxxxxxxxtagc-------
--------atcgxxxxxxx
"""
if dimer_result.structure_found:
# clean structure
structure = [x[4:] for x in dimer_result.ascii_structure_lines]
# calc overlap and the cumulative len of the oligos
overlap = len(structure[1].replace(" ", ""))
nt_count = len(re.findall("[ATCG]", "".join(structure)))
# check for overlaps at the ends and the min overlap (allows for some amount of mismatches)
if overlap > config.END_OVERLAP and nt_count <= len(structure[0]) + overlap + 1 and " " not in structure[1].lstrip(" "):
return True

return False


def forms_dimer_or_overhangs(right_primer, left_primer, probe, ambiguous_consensus):
"""
checks if combinations of primers/probe form dimers or overhangs
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs better explanation that now its a check for all permutations

Expand All @@ -179,8 +159,7 @@ def forms_dimer_or_overhangs(right_primer, left_primer, probe, ambiguous_consens
for combination in [(probe_per, left_per), (probe_per, right_per)]:
for oligo1 in combination[0]:
for oligo2 in combination[1]:
dimer_result = primers.calc_dimer(oligo1, oligo2, structure=True)
if dimer_result.tm >= config.PRIMER_MAX_DIMER_TMP or check_end_overlap(dimer_result):
if primers.is_dimer(oligo1, oligo2):
forms_structure = True
break
# break all loops because we found an unwanted structure in one of the permutations
Expand Down
6 changes: 3 additions & 3 deletions varvamp/scripts/scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def find_amplicons(all_primers, opt_len, max_len):
amplicon_length = right_primer[2] - left_primer[1]
if not opt_len <= amplicon_length <= max_len:
continue
if primers.calc_dimer(left_primer[0], right_primer[0]).tm > config.PRIMER_MAX_DIMER_TMP:
if primers.is_dimer(left_primer[0], right_primer[0]):
continue
# calculate length dependend amplicon costs as the cumulative primer
# penalty multiplied by the e^(fold length of the optimal length).
Expand Down Expand Up @@ -309,7 +309,7 @@ def test_scheme_for_dimers(amplicon_scheme):
current_seq = current_primer[2][0]
for tested in tested_primers:
tested_seq = tested[2][0]
if primers.calc_dimer(current_seq, tested_seq).tm <= config.PRIMER_MAX_DIMER_TMP:
if not primers.is_dimer(current_seq, tested_seq):
continue
primer_dimers.append((current_primer, tested))
# and remember all tested primers
Expand Down Expand Up @@ -357,7 +357,7 @@ def test_overlaps_for_dimers(overlapping_primers):
for second_overlap in overlapping_primers[1]:
# return the first match. primers are sorted by penalty.
# first pair that makes it has the lowest penalty
if primers.calc_dimer(first_overlap[2][0], second_overlap[2][0]).tm <= config.PRIMER_MAX_DIMER_TMP:
if not primers.is_dimer(first_overlap[2][0], second_overlap[2][0]):
return [first_overlap, second_overlap]


Expand Down