jonas-fuchs · jonas-fuchs · Feb 3, 2026 · Jan 19, 2026 · Jan 23, 2026 · Jan 26, 2026
diff --git a/varvamp/command.py b/varvamp/command.py
@@ -85,6 +85,14 @@ def get_args(sysargs):
             type=str,
             default="varVAMP"
         )
+        par.add_argument(
+            "--compatible-primers",
+            metavar="None",
+            type=str,
+            default=None,
+            help="FASTA primer file with which new primers should not form dimers. Sequences >40 nt are ignored. Can significantly increase runtime."
+        )
+
     for par in (SINGLE_parser, TILED_parser):
         par.add_argument(
             "-t",
@@ -137,7 +145,6 @@ def get_args(sysargs):
     QPCR_parser.add_argument(
         "-t",
         "--threshold",
-        required=True,
         metavar="0.9",
         type=float,
         default=0.9,
@@ -187,6 +194,11 @@ def shared_workflow(args, log_file):
 
     # read in alignment and preprocess
     preprocessed_alignment = alignment.preprocess(args.input[0])
+    # read in external primer sequences with which new primers should not form dimers
+    if args.compatible_primers is not None:
+        compatible_primers = primers.parse_primer_fasta(args.compatible_primers)
+    else:
+        compatible_primers = None
     # check alignment length and number of gaps and report if its significantly more/less than expected
     logging.check_alignment_length(preprocessed_alignment, log_file)
     logging.check_gaped_sequences(preprocessed_alignment, log_file)
@@ -289,7 +301,22 @@ def shared_workflow(args, log_file):
         progress_text=f"{len(left_primer_candidates)} fw and {len(right_primer_candidates)} rv potential primers"
     )
 
-    return alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates
+    # filter primers against non-dimer sequences if provided, can use multi-processing
+    if compatible_primers is not None:
+        left_primer_candidates = primers.filter_non_dimer_candidates(
+            left_primer_candidates, compatible_primers, args.threads
+        )
+        right_primer_candidates = primers.filter_non_dimer_candidates(
+            right_primer_candidates, compatible_primers, args.threads
+        )
+        logging.varvamp_progress(
+            log_file,
+            progress=0.65,
+            job="Filtering primers against provided primers.",
+            progress_text=f"{len(left_primer_candidates)} fw and {len(right_primer_candidates)} rv primers after filtering"
+        )
+
+    return alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates, compatible_primers
 
 
 def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_candidates, data_dir, log_file):
@@ -314,8 +341,7 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
     )
     if not amplicons:
         logging.raise_error(
-            "no amplicons found. Increase the max amplicon length or \
-            number of ambiguous bases or lower threshold!\n",
+            "no amplicons found. Increase the max amplicon length or number of ambiguous bases or lower threshold!\n",
             log_file,
             exit=True
         )
@@ -408,7 +434,7 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
     return amplicon_scheme
 
 
-def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majority_consensus, left_primer_candidates, right_primer_candidates, log_file):
+def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majority_consensus, left_primer_candidates, right_primer_candidates, compatible_primers, log_file):
     """
     part of the workflow specific for the tiled mode
     """
@@ -445,6 +471,17 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
         progress_text=f"{len(qpcr_probes)} potential qPCR probes"
     )
 
+    # filter primers against non-dimer sequences if provided
+    if compatible_primers is not None:
+        qpcr_probes = primers.filter_non_dimer_candidates(
+            qpcr_probes, compatible_primers, args.threads)
+        logging.varvamp_progress(
+            log_file,
+            progress=0.75,
+            job="Filtering probes against provided primer sequences.",
+            progress_text=f"{len(qpcr_probes)} potential qPCR probes after filtering"
+        )
+
     # find unique amplicons with a low penalty and an internal probe
     qpcr_scheme_candidates = qpcr.find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidates, majority_consensus, ambiguous_consensus)
     if not qpcr_scheme_candidates:
@@ -507,7 +544,7 @@ def main():
         blast.check_BLAST_installation(log_file)
 
     # mode unspecific part of the workflow
-    alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates = shared_workflow(args, log_file)
+    alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates, compatible_primers = shared_workflow(args, log_file)
 
     # write files that are shared in all modes
     reporting.write_regions_to_bed(primer_regions, args.name, data_dir)
@@ -590,6 +627,7 @@ def main():
             majority_consensus,
             left_primer_candidates,
             right_primer_candidates,
+            compatible_primers,
             log_file
         )
 

diff --git a/varvamp/scripts/default_config.py b/varvamp/scripts/default_config.py
@@ -30,7 +30,8 @@
 PRIMER_HAIRPIN = 47  # max melting temp for secondary structure
 PRIMER_GC_END = (1, 3)  # min/max GCs in the last 5 bases of the 3' end
 PRIMER_MIN_3_WITHOUT_AMB = 3  # min len of 3' without ambiguous charaters
-PRIMER_MAX_DIMER_TMP = 47  # max melting temp for dimers (homo- or heterodimers)
+PRIMER_MAX_DIMER_TMP = 35  # max melting temp for dimers (homo- or heterodimers)
+END_OVERLAP = 5  # maximum allowed nt overlap between primer ends
 
 # QPCR parameters
 # basic probe parameters
@@ -42,7 +43,6 @@
 QPRIMER_DIFF = 2  # maximal temperature diff of qPCR primers
 QPROBE_TEMP_DIFF = (5, 10)  # min/max temp diff between probe and primers
 QPROBE_DISTANCE = (4, 15)  # min/max distance to the primer on the same strand
-END_OVERLAP = 5  # maximum allowed nt overlap between the ends of probe and primer
 QAMPLICON_LENGTH = (70, 200)  # min/max length of the qPCR amplicon
 QAMPLICON_GC = (40, 60)  # GC min/max of the qPCR amplicon
 QAMPLICON_DEL_CUTOFF = 4  # consider regions of the alignment for deltaG calculation if they have smaller deletions than cutoff

diff --git a/varvamp/scripts/primers.py b/varvamp/scripts/primers.py
@@ -2,12 +2,18 @@
 primer creation and evaluation
 """
 
+# BUILTIN
+from itertools import chain
+import re
+import multiprocessing
+
 # LIBS
 from Bio.Seq import Seq
-from Bio.Seq import Seq
+from Bio.Seq import MutableSeq
-from Bio.Seq import Seq
+from Bio.Seq import MutableSeq
+from Bio import SeqIO
 import primer3 as p3
 
 # varVAMP
-from varvamp.scripts import config
+from varvamp.scripts import config, reporting
 
 
 def calc_gc(seq):
@@ -59,6 +65,49 @@ def calc_dimer(seq1, seq2, structure=False):
     )
 
 
+def has_end_overlap(dimer_result):
+    """
+    checks if two oligos overlap at their ends
+    Example:
+        xxxxxxxxtagc-------
+        --------atcgxxxxxxx
+    """
+    if dimer_result.structure_found:
+        # clean structure
+        structure = [x[4:] for x in dimer_result.ascii_structure_lines]
+        # check if we have an overlap that is large enough
+        overlap = len(structure[1].replace(" ", ""))
+        if overlap <= config.END_OVERLAP:
+            return False
+        # not more than one conseq. internal mismatch
+        if '  ' in structure[1].lstrip(" "):
+            return False
+        # The alignment length of the ACII structure is equal to the first part of the structure
+        # and the maximum possible alignment length is the cumulative length of both primers (-> no overlap at all)
+        alignment_length = len(structure[0])
+        maximum_alignment_length = len(re.findall("[ATCG]", "".join(structure)))
+        # this means that for a perfect end overlap the alignment length is equal to:
+        # len(primer1) + len(primer2) - overlap.
+        if alignment_length >= maximum_alignment_length - overlap:
+            return True
+
+    return False
+
+
+def is_dimer(seq1, seq2):
+    """
+    check if two sequences dimerize above threshold or are overlapping at their ends
+    """
+    dimer_result = calc_dimer(seq1, seq2, structure=True)
+
+    if dimer_result.tm > config.PRIMER_MAX_DIMER_TMP:
+       return True
+    if has_end_overlap(dimer_result):
+        return True
+
+    return False
+
+
 def calc_max_polyx(seq):
     """
     calculate maximum polyx of a seq
@@ -261,13 +310,14 @@ def filter_kmer_direction_independent(seq, primer_temps=config.PRIMER_TMP, gc_ra
     filter kmer for temperature, gc content,
     poly x, dinucleotide repeats and homodimerization
     """
+
     return(
         (primer_temps[0] <= calc_temp(seq) <= primer_temps[1])
         and (gc_range[0] <= calc_gc(seq) <= gc_range[1])
         and (calc_max_polyx(seq) <= config.PRIMER_MAX_POLYX)
         and (calc_max_dinuc_repeats(seq) <= config.PRIMER_MAX_DINUC_REPEATS)
         and (calc_base_penalty(seq, primer_temps, gc_range, primer_sizes) <= config.PRIMER_MAX_BASE_PENALTY)
-        and (calc_dimer(seq, seq).tm <= config.PRIMER_MAX_DIMER_TMP)
+        and not is_dimer(seq, seq)
     )
 
 
@@ -291,6 +341,60 @@ def filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
     )
 
 
+def parse_primer_fasta(fasta_path):
+    """
+    Parse a primer FASTA file and return a list of sequences using BioPython.
+    """
+
+    sequences = []
+
+    for record in SeqIO.parse(fasta_path, "fasta"):
+        seq = str(record.seq).lower()
+        # Only include primers up to 40 nucleotides
+        if len(seq) <= 40:
+            sequences.append(reporting.get_permutations(seq))
+
+    return list(chain.from_iterable(sequences))
+
+
+def check_primer_against_externals(args):
+    """
+    Worker function to check a single primer against all external sequences.
+    Returns the primer if it passes, None otherwise.
+    """
+
+    primer, external_sequences = args
+
+    for seq in external_sequences:
+        if is_dimer(primer[0], seq):
+            return None
+
+    return primer
+
+
+def filter_non_dimer_candidates(primer_candidates, external_sequences, n_threads):
+    """
+    Filter out primer candidates that form dimers with external sequences.
+    Uses multiprocessing to speed up checks.
+    """
+    # Deduplicate external sequences to reduce redundant checks
+    unique_sequences = []
+    seen = set()
+    for seq in external_sequences:
+        if seq not in seen:
+            unique_sequences.append(seq)
+            seen.add(seq)
+
+    with multiprocessing.Pool(processes=n_threads) as pool:
+        # Prepare arguments for each primer
+        args = [(primer, unique_sequences) for primer in primer_candidates]
+        # Process in parallel
+        results = pool.map(check_primer_against_externals, args)
+
+    # Filter out None results
+    return [primer for primer in results if primer is not None]
+
+
 def find_primers(kmers, ambiguous_consensus, alignment):
     """
     filter kmers direction specific and append penalties

diff --git a/varvamp/scripts/qpcr.py b/varvamp/scripts/qpcr.py
@@ -139,26 +139,6 @@ def hardfilter_amplicon(majority_consensus, left_primer, right_primer):
     )
 
 
-def check_end_overlap(dimer_result):
-    """
-    checks if two oligos overlap at their ends (pretty rare)
-    Example:
-        xxxxxxxxtagc-------
-        --------atcgxxxxxxx
-    """
-    if dimer_result.structure_found:
-        # clean structure
-        structure = [x[4:] for x in dimer_result.ascii_structure_lines]
-        # calc overlap and the cumulative len of the oligos
-        overlap = len(structure[1].replace(" ", ""))
-        nt_count = len(re.findall("[ATCG]", "".join(structure)))
-        # check for overlaps at the ends and the min overlap (allows for some amount of mismatches)
-        if overlap > config.END_OVERLAP and nt_count <= len(structure[0]) + overlap + 1 and "  " not in structure[1].lstrip(" "):
-            return True
-
-    return False
-
-
 def forms_dimer_or_overhangs(right_primer, left_primer, probe, ambiguous_consensus):
     """
     checks if combinations of primers/probe form dimers or overhangs
@@ -179,8 +159,7 @@ def forms_dimer_or_overhangs(right_primer, left_primer, probe, ambiguous_consens
     for combination in [(probe_per, left_per), (probe_per, right_per)]:
         for oligo1 in combination[0]:
             for oligo2 in combination[1]:
-                dimer_result = primers.calc_dimer(oligo1, oligo2, structure=True)
-                if dimer_result.tm >= config.PRIMER_MAX_DIMER_TMP or check_end_overlap(dimer_result):
+                if primers.is_dimer(oligo1, oligo2):
                     forms_structure = True
                     break
             # break all loops because we found an unwanted structure in one of the permutations

diff --git a/varvamp/scripts/scheme.py b/varvamp/scripts/scheme.py
@@ -73,7 +73,7 @@ def find_amplicons(all_primers, opt_len, max_len):
             amplicon_length = right_primer[2] - left_primer[1]
             if not opt_len <= amplicon_length <= max_len:
                 continue
-            if primers.calc_dimer(left_primer[0], right_primer[0]).tm > config.PRIMER_MAX_DIMER_TMP:
+            if primers.is_dimer(left_primer[0], right_primer[0]):
                 continue
             # calculate length dependend amplicon costs as the cumulative primer
             # penalty multiplied by the e^(fold length of the optimal length).
@@ -309,7 +309,7 @@ def test_scheme_for_dimers(amplicon_scheme):
                 current_seq = current_primer[2][0]
                 for tested in tested_primers:
                     tested_seq = tested[2][0]
-                    if primers.calc_dimer(current_seq, tested_seq).tm <= config.PRIMER_MAX_DIMER_TMP:
+                    if not primers.is_dimer(current_seq, tested_seq):
                         continue
                     primer_dimers.append((current_primer, tested))
                 # and remember all tested primers
@@ -357,7 +357,7 @@ def test_overlaps_for_dimers(overlapping_primers):
         for second_overlap in overlapping_primers[1]:
             # return the first match. primers are sorted by penalty.
             # first pair that makes it has the lowest penalty
-            if primers.calc_dimer(first_overlap[2][0], second_overlap[2][0]).tm <= config.PRIMER_MAX_DIMER_TMP:
+            if not primers.is_dimer(first_overlap[2][0], second_overlap[2][0]):
                 return [first_overlap, second_overlap]