nf-core · pinin4fjords · Dec 2, 2024 · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024
@@ -1,13 +1,17 @@
 import groovy.json.JsonSlurper
 
-include { BBMAP_BBSPLIT                   } from '../../../modules/nf-core/bbmap/bbsplit'
-include { CAT_FASTQ                       } from '../../../modules/nf-core/cat/fastq/main'
-include { SORTMERNA                       } from '../../../modules/nf-core/sortmerna/main'
-include { SORTMERNA as SORTMERNA_INDEX    } from '../../../modules/nf-core/sortmerna/main'
-
-include { FASTQ_SUBSAMPLE_FQ_SALMON        } from '../fastq_subsample_fq_salmon'
-include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../fastq_fastqc_umitools_trimgalore'
-include { FASTQ_FASTQC_UMITOOLS_FASTP      } from '../fastq_fastqc_umitools_fastp'
+include { BBMAP_BBSPLIT                      } from '../../../modules/nf-core/bbmap/bbsplit'
+include { CAT_FASTQ                          } from '../../../modules/nf-core/cat/fastq/main'
+include { SORTMERNA                          } from '../../../modules/nf-core/sortmerna/main'
+include { SORTMERNA as SORTMERNA_INDEX       } from '../../../modules/nf-core/sortmerna/main'
+include { FQ_LINT                            } from '../../../modules/nf-core/fq/lint/main'
+include { FQ_LINT as FQ_LINT_AFTER_TRIMMING  } from '../../../modules/nf-core/fq/lint/main'
+include { FQ_LINT as FQ_LINT_AFTER_BBMAP     } from '../../../modules/nf-core/fq/lint/main'
+include { FQ_LINT as FQ_LINT_AFTER_SORTMERNA } from '../../../modules/nf-core/fq/lint/main'
+
+include { FASTQ_SUBSAMPLE_FQ_SALMON          } from '../fastq_subsample_fq_salmon'
+include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE   } from '../fastq_fastqc_umitools_trimgalore'
+include { FASTQ_FASTQC_UMITOOLS_FASTP        } from '../fastq_fastqc_umitools_fastp'
 
 def pass_trimmed_reads = [:]
 
@@ -106,13 +110,27 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
     umi_discard_read     // integer: 0, 1 or 2
     stranded_threshold   // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5
     unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'
+    skip_linting         // boolean: true/false
 
     main:
 
     ch_versions        = Channel.empty()
     ch_filtered_reads  = Channel.empty()
     ch_trim_read_count = Channel.empty()
     ch_multiqc_files   = Channel.empty()
+    ch_lint_log        = Channel.empty()
+
+    //
+    // MODULE: Lint FastQ files
+    //
+    if(!skip_linting) {
+        FQ_LINT (
+            ch_reads.map{ meta, fastqs -> [meta, fastqs.flatten()] }
+        )
+        ch_versions = ch_versions.mix(FQ_LINT.out.versions.first())
+        ch_lint_log = ch_lint_log.mix(FQ_LINT.out.lint)
+        ch_reads = ch_reads.join(FQ_LINT.out.lint.map{it[0]})
+    }
 
     ch_reads
         .branch {
@@ -212,6 +230,14 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
                 .map { [[:], it] }
         )
 
+    if((!skip_linting) && (!skip_trimming)) {
+        FQ_LINT_AFTER_TRIMMING (
+            ch_filtered_reads
+        )
+        ch_lint_log = ch_lint_log.mix(FQ_LINT_AFTER_TRIMMING.out.lint)
+        ch_filtered_reads = ch_filtered_reads.join(FQ_LINT_AFTER_TRIMMING.out.lint.map{it[0]})
+    }
+
     //
     // MODULE: Remove genome contaminant reads
     //
@@ -228,6 +254,14 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
             .set { ch_filtered_reads }
 
         ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
+
+        if(!skip_linting) {
+            FQ_LINT_AFTER_BBSPLIT (
+                ch_filtered_reads
+            )
+            ch_lint_log = ch_lint_log.mix(FQ_LINT_AFTER_BBSPLIT.out.lint)
+            ch_filtered_reads = ch_filtered_reads.join(FQ_LINT_AFTER_BBSPLIT.out.lint.map{it[0]})
+        }
     }
 
     //
@@ -260,6 +294,14 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
             .mix(SORTMERNA.out.log)
 
         ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+
+        if(!skip_linting) {
+            FQ_LINT_AFTER_SORTMERNA (
+                ch_filtered_reads
+            )
+            ch_lint_log = ch_lint_log.mix(FQ_LINT_AFTER_SORTMERNA.out.lint)
+            ch_filtered_reads = ch_filtered_reads.join(FQ_LINT_AFTER_SORTMERNA.out.lint.map{it[0]})
+        }
     }
 
     // Branch FastQ channels if 'auto' specified to infer strandedness
@@ -312,6 +354,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
 
     emit:
 
+    lint_log        = ch_lint_log
     reads           = ch_strand_inferred_fastq
     trim_read_count = ch_trim_read_count
 

@@ -1,6 +1,5 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
 name: "fastq_qc_trim_filter_setstrandedness"
-description: Basic FASTQ preprocessing for RNA-seq
+description: Performs linting, quality control, trimming, filtering, and strandedness determination on RNA-seq FASTQ files, preparing them for downstream analysis.
 keywords:
   - fastq
   - rnaseq
@@ -19,39 +18,84 @@ components:
   - fastq_fastqc_umitools_trimgalore
   - fastq_fastqc_umitools_fastp
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test' ]
   - ch_reads:
-      type: file
-      description: |
-        Channel with input FastQ files of size 1 and 2 for single-end and
-        paired-end data, respectively.
+      description: Channel with input FastQ files
+      structure:
+        - meta:
+            type: map
+            description: Groovy Map containing sample information e.g. [ id:'test' ]
+        - reads:
+            type: file
+            description: FastQ files
+            pattern: "*.{fq,fastq},{,.gz}"
   - ch_fasta:
-      type: file
       description: Channel with genome sequence in fasta format
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the fasta file
+        - fasta:
+            type: file
+            description: Genome fasta file
+            pattern: "*.{fa,fasta}"
   - ch_transcript_fasta:
-      type: file
       description: Channel with transcriptome sequence in fasta format
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the transcript fasta file
+        - fasta:
+            type: file
+            description: Transcript fasta file
+            pattern: "*.{fa,fasta}"
   - ch_gtf:
-      type: file
       description: Channel with features in GTF format
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the GTF file
+        - gtf:
+            type: file
+            description: GTF file
+            pattern: "*.gtf"
   - ch_salmon_index:
-      type: file
       description: Directory containing Salmon index
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the Salmon index
+        - index:
+            type: directory
+            description: Salmon index directory
   - ch_sortmerna_index:
-      type: file
       description: Directory containing sortmerna index
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the SortMeRNA index
+        - index:
+            type: directory
+            description: SortMeRNA index directory
   - ch_bbsplit_index:
-      type: file
       description: Path to directory or tar.gz archive for pre-built BBSplit index
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the BBSplit index
+        - index:
+            type: file
+            description: BBSplit index directory or tar.gz archive
+            pattern: "{*,*.tar.gz}"
   - ch_rrna_fastas:
-      type: file
-      description: |
-        Channel containing one or more FASTA files containing rRNA sequences
-        for use with SortMeRNA
+      description: Channel containing one or more FASTA files containing rRNA sequences for use with SortMeRNA
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the rRNA fasta files
+        - fasta:
+            type: file
+            description: rRNA fasta files
+            pattern: "*.{fa,fasta}"
   - skip_bbsplit:
       type: boolean
       description: Whether to skip BBSplit for removal of non-reference genome reads
@@ -63,9 +107,7 @@ input:
       description: Whether to skip trimming
   - skip_umi_extract:
       type: boolean
-      description: |
-        Skip the UMI extraction from the read in case the UMIs have been moved
-        to the headers in advance of the pipeline run
+      description: Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run
   - make_salmon_index:
       type: boolean
       description: Whether to create salmon index before running salmon quant
@@ -74,14 +116,10 @@ input:
       description: Whether to create sortmerna index before running sortmerna
   - trimmer:
       type: string
-      description: |
-        Specifies the trimming tool to use - available options are 'trimgalore'
-        and 'fastp'
+      description: Specifies the trimming tool to use - available options are 'trimgalore' and 'fastp'
   - min_trimmed_reads:
       type: integer
-      description: |
-        Minimum number of trimmed reads below which samples are removed from
-        further processing
+      description: Minimum number of trimmed reads below which samples are removed from further processing
   - save_trimmed:
       type: boolean
       description: Save the trimmed FastQ files in the results directory?
@@ -93,39 +131,66 @@ input:
       description: Enable UMI-based read deduplication
   - umi_discard_read:
       type: integer
-      description: |
-        After UMI barcode extraction discard either R1 or R2 by setting this
-        parameter to 1 or 2, respectively
+      description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively
   - stranded_threshold:
       type: float
       min: 0.5
-      description: |
-        The fraction of stranded reads that must be assigned to a strandedness
-        for confident assignment. Must be at least 0.5.
+      description: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5.
   - unstranded_threshold:
       type: float
-      description: |
-        The difference in fraction of stranded reads assigned to 'forward' and
-        'reverse' below which a sample is classified as 'unstranded'.
+      description: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'.
+  - skip_linting:
+      type: boolean
+      description: Whether to skip linting of FastQ files
 
 output:
   - reads:
-      type: file
       description: Preprocessed fastq reads
-      pattern: "*.{fq,fastq}{,.gz}"
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the preprocessed reads
+        - reads:
+            type: file
+            description: Preprocessed FastQ files
+            pattern: "*.{fq,fastq},{,.gz}"
   - multiqc_files:
-      type: file
-      description: MultiQC-compatible output files from tools used in prepreocessing
-      pattern: "*"
+      description: MultiQC-compatible output files from tools used in preprocessing
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the MultiQC files
+        - mqc:
+            type: file
+            description: MultiQC-compatible files
+            pattern: "*"
   - trim_read_count:
-      type: integer
       description: Number of reads remaining after trimming for all input samples
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the trim read count
+        - count:
+            type: integer
+            description: Number of reads after trimming
   - versions:
-      type: file
-      description: |
-        File containing software versions
-        Structure: [ path(versions.yml) ]
-      pattern: "versions.yml"
+      description: File containing software versions
+      structure:
+        - versions:
+            type: file
+            description: File containing software versions
+            pattern: "versions.yml"
+  - lint_log:
+      description: Log files from FastQ linting
+      structure:
+        - meta:
+            type: map
+            description: Metadata for the lint log
+        - log:
+            type: file
+            description: FastQ lint log file
+            pattern: "*.log"
+
 authors:
   - "@pinin4fjords"
 maintainers: