From 3a0b1b665401ee41cf3f8be77085363995a49ad0 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Tue, 25 Nov 2025 19:59:54 +0000
Subject: [PATCH 01/10] Add validation error for incompatible transcript_fasta
 and additional_fasta params
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When both --transcript_fasta and --additional_fasta are provided, the
pipeline cannot append spike-in sequences to the user-provided
transcriptome. This causes downstream quantification to fail with
confusing errors.

Now the pipeline fails fast with a clear error message explaining the
issue and suggesting solutions.

Closes #1450

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/usage.md                                 |  6 ++++-
 nextflow_schema.json                          |  5 ++--
 .../utils_nfcore_rnaseq_pipeline/main.nf      | 23 +++++++++++++++++++
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index ba215ebfe..c4a9cae2e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -316,7 +316,7 @@ Notes:
 
 - If `--gff` is provided as input then this will be converted to a GTF file, or the latter will be used if both are provided.
 - If `--gene_bed` is not provided then it will be generated from the GTF file.
-- If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices.
+- If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. Note: `--additional_fasta` cannot be used together with `--transcript_fasta` because the pipeline cannot append additional sequences to a user-provided transcriptome. If you need spike-ins, omit `--transcript_fasta` and let the pipeline generate the transcriptome automatically.
 - When using `--aligner star_rsem`, the pipeline will build separate STAR and RSEM indices. STAR performs alignment with RSEM-compatible parameters, then RSEM quantifies from the resulting BAM files using `--alignments` mode.
 - If the `--skip_alignment` option is used along with `--transcript_fasta`, the pipeline can technically run without providing the genomic FASTA (`--fasta`). However, this approach is **not recommended** with `--pseudo_aligner salmon`, as any dynamically generated Salmon index will lack decoys. To ensure optimal indexing with decoys, it is **highly recommended** to include the genomic FASTA (`--fasta`) with Salmon, unless a pre-existing decoy-aware Salmon index is supplied. For more details on the benefits of decoy-aware indexing, refer to the [Salmon documentation](https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode).
 
@@ -350,6 +350,10 @@ In addition to the reference genome sequence and annotation, you can provide a r
 
 We recommend not providing a transcriptome FASTA file and instead allowing the pipeline to create it from the provided genome and annotation. Similar to aligner indexes, you can save the created transcriptome FASTA and BED files to a central location for future pipeline runs. This helps avoid redundant computation and having multiple copies on your system. Ensure that all genome, annotation, transcriptome, and index versions match to maintain consistency.
 
+:::warning
+If you are using `--additional_fasta` to add spike-in sequences (e.g. ERCC), you **must not** provide `--transcript_fasta`. The pipeline needs to generate the transcriptome itself so that it includes the spike-in sequences. Providing both parameters will cause the pipeline to exit with an error.
+:::
+
 #### Indices
 
 By default, indices are generated dynamically by the workflow for tools such as STAR and Salmon. Since indexing is an expensive process in time and resources you should ensure that it is only done once, by retaining the indices generated from each batch of reference files by specifying `--save_reference`.
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 7a3975fcb..1ff2b3303 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -111,7 +111,8 @@
                     "mimetype": "text/plain",
                     "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                     "fa_icon": "far fa-file-code",
-                    "description": "Path to FASTA transcriptome file."
+                    "description": "Path to FASTA transcriptome file.",
+                    "help_text": "If not provided, the transcriptome will be generated from the genome FASTA and GTF files. Cannot be used together with `--additional_fasta` because the pipeline cannot append spike-in sequences to a user-provided transcriptome."
                 },
                 "additional_fasta": {
                     "type": "string",
@@ -121,7 +122,7 @@
                     "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                     "fa_icon": "far fa-file-code",
                     "description": "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.",
-                    "help_text": "If provided, sequences in this file will be concatenated to the genome FASTA file. A GTF file will be automatically created using these sequences, and alignment indices will be created from the combined files. Use `--save_reference` to reuse these indices in future runs."
+                    "help_text": "If provided, sequences in this file will be concatenated to the genome FASTA file. A GTF file will be automatically created using these sequences, and alignment indices will be created from the combined files. Use `--save_reference` to reuse these indices in future runs. Cannot be used together with `--transcript_fasta` - the pipeline must generate the transcriptome itself to include the additional sequences."
                 },
                 "splicesites": {
                     "type": "string",
diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
index b799b8dfa..3f958785f 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
@@ -263,6 +263,9 @@ def validateInputParameters() {
     }
 
     if (params.transcript_fasta) {
+        if (params.additional_fasta) {
+            transcriptFastaAdditionalFastaError()
+        }
         transcriptsFastaWarn()
     }
 
@@ -496,6 +499,26 @@ def transcriptsFastaWarn() {
         "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
 }
 
+//
+// Print an error if using both '--transcript_fasta' and '--additional_fasta'
+//
+def transcriptFastaAdditionalFastaError() {
+    def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+        "  Both '--transcript_fasta' and '--additional_fasta' have been provided.\n\n" +
+        "  The pipeline cannot append additional sequences (e.g. ERCC spike-ins) to a\n" +
+        "  user-provided transcriptome FASTA file. This would cause quantification to\n" +
+        "  fail because alignments to additional sequences would not be found in the\n" +
+        "  transcript FASTA.\n\n" +
+        "  Please either:\n" +
+        "    - Remove '--transcript_fasta' and let the pipeline generate the\n" +
+        "      transcriptome from the genome FASTA and GTF (recommended), or\n" +
+        "    - Remove '--additional_fasta' if you do not need spike-in sequences.\n\n" +
+        "  Please see:\n" +
+        "  https://github.com/nf-core/rnaseq/issues/1450\n" +
+        "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+    error(error_string)
+}
+
 //
 // Print a warning if --skip_alignment has been provided
 //

From 81a35ed636226373bbd2bbc5c6f641e097f6adb9 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Tue, 25 Nov 2025 20:01:12 +0000
Subject: [PATCH 02/10] Update CHANGELOG for PR #1632

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6a61e5e0a..53cbf6910 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ Special thanks to the following for their contributions to the release:
 - [Elad Herzog](https://github.com/EladH1)
 - [Emily Miyoshi](https://github.com/emilymiyoshi)
 - [Pontus Höjer](https://github.com/pontushojer)
+- [Siddhartha Bagaria](https://github.com/siddharthab)
 
 ### Enhancements and fixes
 
@@ -27,6 +28,7 @@ Special thanks to the following for their contributions to the release:
 - [PR #1624](https://github.com/nf-core/rnaseq/pull/1624) - Document RSeQC inner_distance limitation for genomes with large chromosomes (>500 Mb), such as plant genomes
 - [PR #1625](https://github.com/nf-core/rnaseq/pull/1625) - Add documentation warning about Qualimap read counting bug ([#1273](https://github.com/nf-core/rnaseq/issues/1273))
 - [PR #1628](https://github.com/nf-core/rnaseq/pull/1628) - Template update for nf-core/tools v3.5.1
+- [PR #1632](https://github.com/nf-core/rnaseq/pull/1632) - Add validation error for incompatible `--transcript_fasta` and `--additional_fasta` params ([#1450](https://github.com/nf-core/rnaseq/issues/1450))
 
 ## [[3.21.0](https://github.com/nf-core/rnaseq/releases/tag/3.21.0)] - 2025-09-18
 

From 1f52818659ba8e2e05352e12d5a338ee9b31aee3 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Tue, 25 Nov 2025 20:11:42 +0000
Subject: [PATCH 03/10] Refine validation: only error when building
 pseudo-aligner index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The validation was too strict - it blocked all combinations of
transcript_fasta + additional_fasta, but this is only problematic
when the pipeline needs to BUILD a pseudo-aligner index (Salmon/Kallisto).

If a pre-built index is provided that already contains the spike-ins,
the combination is valid. Updated validation logic to check for this
condition and updated docs/schema accordingly.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/usage.md                                 |  4 ++--
 nextflow_schema.json                          |  4 ++--
 .../utils_nfcore_rnaseq_pipeline/main.nf      | 23 +++++++++++++++----
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index c4a9cae2e..48aa09a3a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -316,7 +316,7 @@ Notes:
 
 - If `--gff` is provided as input then this will be converted to a GTF file, or the latter will be used if both are provided.
 - If `--gene_bed` is not provided then it will be generated from the GTF file.
-- If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. Note: `--additional_fasta` cannot be used together with `--transcript_fasta` because the pipeline cannot append additional sequences to a user-provided transcriptome. If you need spike-ins, omit `--transcript_fasta` and let the pipeline generate the transcriptome automatically.
+- If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. Note: if you need the pipeline to build a pseudo-aligner index (Salmon/Kallisto), `--additional_fasta` cannot be used together with `--transcript_fasta` because the pipeline cannot append additional sequences to a user-provided transcriptome. Either omit `--transcript_fasta` and let the pipeline generate it, or provide a pre-built index that already contains the spike-ins.
 - When using `--aligner star_rsem`, the pipeline will build separate STAR and RSEM indices. STAR performs alignment with RSEM-compatible parameters, then RSEM quantifies from the resulting BAM files using `--alignments` mode.
 - If the `--skip_alignment` option is used along with `--transcript_fasta`, the pipeline can technically run without providing the genomic FASTA (`--fasta`). However, this approach is **not recommended** with `--pseudo_aligner salmon`, as any dynamically generated Salmon index will lack decoys. To ensure optimal indexing with decoys, it is **highly recommended** to include the genomic FASTA (`--fasta`) with Salmon, unless a pre-existing decoy-aware Salmon index is supplied. For more details on the benefits of decoy-aware indexing, refer to the [Salmon documentation](https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode).
 
@@ -351,7 +351,7 @@ In addition to the reference genome sequence and annotation, you can provide a r
 We recommend not providing a transcriptome FASTA file and instead allowing the pipeline to create it from the provided genome and annotation. Similar to aligner indexes, you can save the created transcriptome FASTA and BED files to a central location for future pipeline runs. This helps avoid redundant computation and having multiple copies on your system. Ensure that all genome, annotation, transcriptome, and index versions match to maintain consistency.
 
 :::warning
-If you are using `--additional_fasta` to add spike-in sequences (e.g. ERCC), you **must not** provide `--transcript_fasta`. The pipeline needs to generate the transcriptome itself so that it includes the spike-in sequences. Providing both parameters will cause the pipeline to exit with an error.
+If you are using `--additional_fasta` to add spike-in sequences (e.g. ERCC) and need the pipeline to build a pseudo-aligner index (Salmon/Kallisto), you **must not** provide `--transcript_fasta`. The pipeline needs to generate the transcriptome itself so that it includes the spike-in sequences. This combination will cause the pipeline to exit with an error unless you also provide a pre-built index (`--salmon_index` or `--kallisto_index`) that already contains the spike-in sequences.
 :::
 
 #### Indices
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1ff2b3303..852b5bdd4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -112,7 +112,7 @@
                     "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                     "fa_icon": "far fa-file-code",
                     "description": "Path to FASTA transcriptome file.",
-                    "help_text": "If not provided, the transcriptome will be generated from the genome FASTA and GTF files. Cannot be used together with `--additional_fasta` because the pipeline cannot append spike-in sequences to a user-provided transcriptome."
+                    "help_text": "If not provided, the transcriptome will be generated from the genome FASTA and GTF files. Cannot be used together with `--additional_fasta` when building a pseudo-aligner index, because the pipeline cannot append spike-in sequences to a user-provided transcriptome. Either omit this parameter or provide a pre-built index."
                 },
                 "additional_fasta": {
                     "type": "string",
@@ -122,7 +122,7 @@
                     "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                     "fa_icon": "far fa-file-code",
                     "description": "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.",
-                    "help_text": "If provided, sequences in this file will be concatenated to the genome FASTA file. A GTF file will be automatically created using these sequences, and alignment indices will be created from the combined files. Use `--save_reference` to reuse these indices in future runs. Cannot be used together with `--transcript_fasta` - the pipeline must generate the transcriptome itself to include the additional sequences."
+                    "help_text": "If provided, sequences in this file will be concatenated to the genome FASTA file. A GTF file will be automatically created using these sequences, and alignment indices will be created from the combined files. Use `--save_reference` to reuse these indices in future runs. Cannot be used together with `--transcript_fasta` when building a pseudo-aligner index - either omit `--transcript_fasta` or provide a pre-built index that already contains the spike-ins."
                 },
                 "splicesites": {
                     "type": "string",
diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
index 3f958785f..131189aeb 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
@@ -263,8 +263,19 @@ def validateInputParameters() {
     }
 
     if (params.transcript_fasta) {
+        // Only error if additional_fasta is provided AND we need to build a pseudo-aligner index
+        // (i.e., no pre-built salmon/kallisto index provided). If the user provides a pre-built
+        // index that already contains the spike-ins, the combination is valid.
         if (params.additional_fasta) {
-            transcriptFastaAdditionalFastaError()
+            def building_pseudo_index = (
+                !params.skip_pseudo_alignment &&
+                params.pseudo_aligner &&
+                !params.salmon_index &&
+                !params.kallisto_index
+            )
+            if (building_pseudo_index) {
+                transcriptFastaAdditionalFastaError()
+            }
         }
         transcriptsFastaWarn()
     }
@@ -500,18 +511,20 @@ def transcriptsFastaWarn() {
 }
 
 //
-// Print an error if using both '--transcript_fasta' and '--additional_fasta'
+// Print an error if using both '--transcript_fasta' and '--additional_fasta' without a pre-built index
 //
 def transcriptFastaAdditionalFastaError() {
     def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
-        "  Both '--transcript_fasta' and '--additional_fasta' have been provided.\n\n" +
+        "  Both '--transcript_fasta' and '--additional_fasta' have been provided,\n" +
+        "  but no pre-built pseudo-aligner index (--salmon_index/--kallisto_index).\n\n" +
         "  The pipeline cannot append additional sequences (e.g. ERCC spike-ins) to a\n" +
         "  user-provided transcriptome FASTA file. This would cause quantification to\n" +
-        "  fail because alignments to additional sequences would not be found in the\n" +
-        "  transcript FASTA.\n\n" +
+        "  fail because the built index would not contain the additional sequences.\n\n" +
         "  Please either:\n" +
         "    - Remove '--transcript_fasta' and let the pipeline generate the\n" +
         "      transcriptome from the genome FASTA and GTF (recommended), or\n" +
+        "    - Provide a pre-built index (--salmon_index/--kallisto_index) that\n" +
+        "      already contains the additional sequences, or\n" +
         "    - Remove '--additional_fasta' if you do not need spike-in sequences.\n\n" +
         "  Please see:\n" +
         "  https://github.com/nf-core/rnaseq/issues/1450\n" +

From 436097451eb82fe29d0a596055db7840964bdf92 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 20:53:14 +0000
Subject: [PATCH 04/10] Initial plan


From d43f0da835d6a0337f6b1c9a9c6f0c5292aff839 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 20:55:22 +0000
Subject: [PATCH 05/10] Fix validation: check only the relevant pseudo-aligner
 index

Co-authored-by: pinin4fjords <5775915+pinin4fjords@users.noreply.github.com>
---
 .../local/utils_nfcore_rnaseq_pipeline/main.nf | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
index 131189aeb..51cf1d29b 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
@@ -267,13 +267,17 @@ def validateInputParameters() {
         // (i.e., no pre-built salmon/kallisto index provided). If the user provides a pre-built
         // index that already contains the spike-ins, the combination is valid.
         if (params.additional_fasta) {
-            def building_pseudo_index = (
-                !params.skip_pseudo_alignment &&
-                params.pseudo_aligner &&
-                !params.salmon_index &&
-                !params.kallisto_index
-            )
-            if (building_pseudo_index) {
+            def needs_to_build_index = false
+            if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
+                // Check if the relevant index for the selected pseudo-aligner is missing
+                if (params.pseudo_aligner == 'salmon' && !params.salmon_index) {
+                    needs_to_build_index = true
+                }
+                if (params.pseudo_aligner == 'kallisto' && !params.kallisto_index) {
+                    needs_to_build_index = true
+                }
+            }
+            if (needs_to_build_index) {
                 transcriptFastaAdditionalFastaError()
             }
         }

From 81bda4658fe1bc451573f47ff5e21e772bb6e636 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 20:56:10 +0000
Subject: [PATCH 06/10] Refactor: use else-if structure for pseudo-aligner
 check

Co-authored-by: pinin4fjords <5775915+pinin4fjords@users.noreply.github.com>
---
 subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
index 51cf1d29b..a94cac101 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
@@ -272,8 +272,7 @@ def validateInputParameters() {
                 // Check if the relevant index for the selected pseudo-aligner is missing
                 if (params.pseudo_aligner == 'salmon' && !params.salmon_index) {
                     needs_to_build_index = true
-                }
-                if (params.pseudo_aligner == 'kallisto' && !params.kallisto_index) {
+                } else if (params.pseudo_aligner == 'kallisto' && !params.kallisto_index) {
                     needs_to_build_index = true
                 }
             }

From aee539355eb7fd0055439ede4c358a54076f221e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 21:15:06 +0000
Subject: [PATCH 07/10] Initial plan


From 4df6cee1b493bb24375f5361ffab411cbc93d198 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 21:24:51 +0000
Subject: [PATCH 08/10] Fix kallisto test to disable additional_fasta for
 incompatible config

Co-authored-by: pinin4fjords <5775915+pinin4fjords@users.noreply.github.com>
---
 tests/kallisto.nf.test | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/kallisto.nf.test b/tests/kallisto.nf.test
index a67b2d314..9021d8335 100644
--- a/tests/kallisto.nf.test
+++ b/tests/kallisto.nf.test
@@ -12,6 +12,8 @@ nextflow_pipeline {
                 pseudo_aligner = 'kallisto'
                 skip_qc = true
                 skip_alignment = true
+                // Disable additional_fasta since we don't have a kallisto_index with spike-ins
+                additional_fasta = null
             }
         }
 
@@ -46,6 +48,8 @@ nextflow_pipeline {
                 pseudo_aligner = 'kallisto'
                 skip_qc = true
                 skip_alignment = true
+                // Disable additional_fasta since we don't have a kallisto_index with spike-ins
+                additional_fasta = null
             }
         }
 

From 7aa6d80c0970d2bcfaf819322cda461e91dd436a Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Tue, 25 Nov 2025 22:03:05 +0000
Subject: [PATCH 09/10] Fix kallisto test by also disabling transcript_fasta
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test profile's transcript_fasta already contains spike-in sequences
(GFP) from when it was generated. Setting only additional_fasta = null
prevents GFP from being added to the GTF, but Kallisto still builds its
index from the existing transcript_fasta (which has GFP). This causes a
mismatch where Kallisto outputs GFP counts but the GTF-based metadata
doesn't include GFP, causing SE_GENE_UNIFIED to fail.

The fix is to also set transcript_fasta = null so the pipeline
regenerates the transcriptome from the GTF (consistently without
spike-ins).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 tests/kallisto.nf.test | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/kallisto.nf.test b/tests/kallisto.nf.test
index 9021d8335..8f974e1ab 100644
--- a/tests/kallisto.nf.test
+++ b/tests/kallisto.nf.test
@@ -12,8 +12,11 @@ nextflow_pipeline {
                 pseudo_aligner = 'kallisto'
                 skip_qc = true
                 skip_alignment = true
-                // Disable additional_fasta since we don't have a kallisto_index with spike-ins
+                // Disable spike-ins since we don't have a kallisto_index with spike-ins.
+                // Must also disable transcript_fasta because the test profile's transcriptome
+                // was generated with spike-ins - we need the pipeline to regenerate it.
                 additional_fasta = null
+                transcript_fasta = null
             }
         }
 
@@ -48,8 +51,11 @@ nextflow_pipeline {
                 pseudo_aligner = 'kallisto'
                 skip_qc = true
                 skip_alignment = true
-                // Disable additional_fasta since we don't have a kallisto_index with spike-ins
+                // Disable spike-ins since we don't have a kallisto_index with spike-ins.
+                // Must also disable transcript_fasta because the test profile's transcriptome
+                // was generated with spike-ins - we need the pipeline to regenerate it.
                 additional_fasta = null
+                transcript_fasta = null
             }
         }
 

From 8dc43b92a9abd1951ef3c8830514478097de0248 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Tue, 25 Nov 2025 22:35:29 +0000
Subject: [PATCH 10/10] Update kallisto test snapshots for new params
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updated both real and stub test snapshots to reflect changes from
disabling additional_fasta and transcript_fasta:

Real test:
- Task count: 48 → 47
- Removed CUSTOM_CATADDITIONALFASTA and GUNZIP_ADDITIONAL_FASTA
- Added MAKE_TRANSCRIPTS_FASTA (pipeline now generates transcriptome)
- Removed custom/out/genome_gfp.* from output files
- Updated tx2gene.tsv hash (different without spike-ins)

Stub test:
- Task count: 22 → 21
- Same process changes as real test
- Removed custom/out/genome_transcriptome.* from output files
- Empty stable_path array

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 tests/kallisto.nf.test.snap | 38 +++++++++++--------------------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/tests/kallisto.nf.test.snap b/tests/kallisto.nf.test.snap
index 6fbdcc695..e214e61b7 100644
--- a/tests/kallisto.nf.test.snap
+++ b/tests/kallisto.nf.test.snap
@@ -1,7 +1,7 @@
 {
     "Params: --pseudo_aligner kallisto --skip_qc --skip_alignment": {
         "content": [
-            48,
+            47,
             {
                 "BBMAP_BBSPLIT": {
                     "bbmap": 39.18
@@ -9,9 +9,6 @@
                 "CAT_FASTQ": {
                     "cat": 9.5
                 },
-                "CUSTOM_CATADDITIONALFASTA": {
-                    "python": "3.12.2"
-                },
                 "CUSTOM_GETCHROMSIZES": {
                     "getchromsizes": 1.21
                 },
@@ -30,9 +27,6 @@
                 "GTF_FILTER": {
                     "python": "3.9.5"
                 },
-                "GUNZIP_ADDITIONAL_FASTA": {
-                    "gunzip": 1.13
-                },
                 "GUNZIP_GTF": {
                     "gunzip": 1.13
                 },
@@ -42,6 +36,10 @@
                 "KALLISTO_QUANT": {
                     "kallisto": "0.51.1"
                 },
+                "MAKE_TRANSCRIPTS_FASTA": {
+                    "rsem": "1.3.1",
+                    "star": "2.7.10a"
+                },
                 "SALMON_QUANT": {
                     "salmon": "1.10.3"
                 },
@@ -70,10 +68,6 @@
                 "bbsplit/RAP1_UNINDUCED_REP2.stats.txt",
                 "bbsplit/WT_REP1.stats.txt",
                 "bbsplit/WT_REP2.stats.txt",
-                "custom",
-                "custom/out",
-                "custom/out/genome_gfp.fasta",
-                "custom/out/genome_gfp.gtf",
                 "fastqc",
                 "fastqc/trim",
                 "fastqc/trim/RAP1_IAA_30M_REP1_trimmed_1_val_1_fastqc.html",
@@ -248,9 +242,7 @@
                 "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt"
             ],
             [
-                "genome_gfp.fasta:md5,e23e302af63736a199985a169fdac055",
-                "genome_gfp.gtf:md5,c98b12c302f15731bfc36bcf297cfe28",
-                "tx2gene.tsv:md5,0e2418a69d2eba45097ebffc2f700bfe",
+                "tx2gene.tsv:md5,1be389a28cc26d94b19ea918959ac72e",
                 "cutadapt_filtered_reads_plot.txt:md5,6fa381627f7c1f664f3d4b2cb79cce90",
                 "cutadapt_trimmed_sequences_plot_3_Counts.txt:md5,13dfa866fd91dbb072689efe9aa83b1f",
                 "cutadapt_trimmed_sequences_plot_3_Obs_Exp.txt:md5,07145dd8dd3db654859b18eb0389046c",
@@ -277,7 +269,7 @@
     },
     "Params: --pseudo_aligner kallisto --skip_qc --skip_alignment - stub": {
         "content": [
-            22,
+            21,
             {
                 "BBMAP_BBSPLIT": {
                     "bbmap": 39.18
@@ -285,9 +277,6 @@
                 "CAT_FASTQ": {
                     "cat": 9.5
                 },
-                "CUSTOM_CATADDITIONALFASTA": {
-                    "python": null
-                },
                 "CUSTOM_GETCHROMSIZES": {
                     "getchromsizes": 1.21
                 },
@@ -300,15 +289,16 @@
                 "GTF_FILTER": {
                     "python": "3.9.5"
                 },
-                "GUNZIP_ADDITIONAL_FASTA": {
-                    "gunzip": 1.13
-                },
                 "GUNZIP_GTF": {
                     "gunzip": 1.13
                 },
                 "KALLISTO_INDEX": {
                     "kallisto": "0.51.1"
                 },
+                "MAKE_TRANSCRIPTS_FASTA": {
+                    "rsem": "1.3.1",
+                    "star": "2.7.10a"
+                },
                 "TRIMGALORE": {
                     "cutadapt": 4.9,
                     "pigz": 2.8,
@@ -319,10 +309,6 @@
                 }
             },
             [
-                "custom",
-                "custom/out",
-                "custom/out/genome_transcriptome.fasta",
-                "custom/out/genome_transcriptome.gtf",
                 "fastqc",
                 "fastqc/trim",
                 "fq_lint",
@@ -349,8 +335,6 @@
                 "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt"
             ],
             [
-                "genome_transcriptome.fasta:md5,d41d8cd98f00b204e9800998ecf8427e",
-                "genome_transcriptome.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
             ]
         ],
         "meta": {