Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import groovy.json.JsonSlurper

include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'
include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main'
include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main'

include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../fastq_subsample_fq_salmon'
include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../fastq_fastqc_umitools_trimgalore'
include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../fastq_fastqc_umitools_fastp'
include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'
include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main'
include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main'
include { FQ_LINT } from '../../../modules/nf-core/fq/lint/main'
include { FQ_LINT as FQ_LINT_AFTER_TRIMMING } from '../../../modules/nf-core/fq/lint/main'
include { FQ_LINT as FQ_LINT_AFTER_BBMAP } from '../../../modules/nf-core/fq/lint/main'
include { FQ_LINT as FQ_LINT_AFTER_SORTMERNA } from '../../../modules/nf-core/fq/lint/main'

include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../fastq_subsample_fq_salmon'
include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../fastq_fastqc_umitools_trimgalore'
include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../fastq_fastqc_umitools_fastp'

def pass_trimmed_reads = [:]

Expand Down Expand Up @@ -106,13 +110,27 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
umi_discard_read // integer: 0, 1 or 2
stranded_threshold // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5
unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'
skip_linting // boolean: true/false

main:

ch_versions = Channel.empty()
ch_filtered_reads = Channel.empty()
ch_trim_read_count = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_lint_log = Channel.empty()

//
// MODULE: Lint FastQ files
//
if(!skip_linting) {
FQ_LINT (
ch_reads.map{ meta, fastqs -> [meta, fastqs.flatten()] }
)
ch_versions = ch_versions.mix(FQ_LINT.out.versions.first())
ch_lint_log = ch_lint_log.mix(FQ_LINT.out.lint)
ch_reads = ch_reads.join(FQ_LINT.out.lint.map{it[0]})
}

ch_reads
.branch {
Expand Down Expand Up @@ -212,6 +230,14 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
.map { [[:], it] }
)

if((!skip_linting) && (!skip_trimming)) {
FQ_LINT_AFTER_TRIMMING (
ch_filtered_reads
)
ch_lint_log = ch_lint_log.mix(FQ_LINT_AFTER_TRIMMING.out.lint)
ch_filtered_reads = ch_filtered_reads.join(FQ_LINT_AFTER_TRIMMING.out.lint.map{it[0]})
}

//
// MODULE: Remove genome contaminant reads
//
Expand All @@ -228,6 +254,14 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
.set { ch_filtered_reads }

ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())

if(!skip_linting) {
FQ_LINT_AFTER_BBSPLIT (
ch_filtered_reads
)
ch_lint_log = ch_lint_log.mix(FQ_LINT_AFTER_BBSPLIT.out.lint)
ch_filtered_reads = ch_filtered_reads.join(FQ_LINT_AFTER_BBSPLIT.out.lint.map{it[0]})
}
}

//
Expand Down Expand Up @@ -260,6 +294,14 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
.mix(SORTMERNA.out.log)

ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())

if(!skip_linting) {
FQ_LINT_AFTER_SORTMERNA (
ch_filtered_reads
)
ch_lint_log = ch_lint_log.mix(FQ_LINT_AFTER_SORTMERNA.out.lint)
ch_filtered_reads = ch_filtered_reads.join(FQ_LINT_AFTER_SORTMERNA.out.lint.map{it[0]})
}
}

// Branch FastQ channels if 'auto' specified to infer strandedness
Expand Down Expand Up @@ -312,6 +354,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {

emit:

lint_log = ch_lint_log
reads = ch_strand_inferred_fastq
trim_read_count = ch_trim_read_count

Expand Down
165 changes: 115 additions & 50 deletions subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "fastq_qc_trim_filter_setstrandedness"
description: Basic FASTQ preprocessing for RNA-seq
description: Performs linting, quality control, trimming, filtering, and strandedness determination on RNA-seq FASTQ files, preparing them for downstream analysis.
keywords:
- fastq
- rnaseq
Expand All @@ -19,39 +18,84 @@ components:
- fastq_fastqc_umitools_trimgalore
- fastq_fastqc_umitools_fastp
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- ch_reads:
type: file
description: |
Channel with input FastQ files of size 1 and 2 for single-end and
paired-end data, respectively.
description: Channel with input FastQ files
structure:
- meta:
type: map
description: Groovy Map containing sample information e.g. [ id:'test' ]
- reads:
type: file
description: FastQ files
pattern: "*.{fq,fastq},{,.gz}"
- ch_fasta:
type: file
description: Channel with genome sequence in fasta format
structure:
- meta:
type: map
description: Metadata for the fasta file
- fasta:
type: file
description: Genome fasta file
pattern: "*.{fa,fasta}"
- ch_transcript_fasta:
type: file
description: Channel with transcriptome sequence in fasta format
structure:
- meta:
type: map
description: Metadata for the transcript fasta file
- fasta:
type: file
description: Transcript fasta file
pattern: "*.{fa,fasta}"
- ch_gtf:
type: file
description: Channel with features in GTF format
structure:
- meta:
type: map
description: Metadata for the GTF file
- gtf:
type: file
description: GTF file
pattern: "*.gtf"
- ch_salmon_index:
type: file
description: Directory containing Salmon index
structure:
- meta:
type: map
description: Metadata for the Salmon index
- index:
type: directory
description: Salmon index directory
- ch_sortmerna_index:
type: file
description: Directory containing sortmerna index
structure:
- meta:
type: map
description: Metadata for the SortMeRNA index
- index:
type: directory
description: SortMeRNA index directory
- ch_bbsplit_index:
type: file
description: Path to directory or tar.gz archive for pre-built BBSplit index
structure:
- meta:
type: map
description: Metadata for the BBSplit index
- index:
type: file
description: BBSplit index directory or tar.gz archive
pattern: "{*,*.tar.gz}"
- ch_rrna_fastas:
type: file
description: |
Channel containing one or more FASTA files containing rRNA sequences
for use with SortMeRNA
description: Channel containing one or more FASTA files containing rRNA sequences for use with SortMeRNA
structure:
- meta:
type: map
description: Metadata for the rRNA fasta files
- fasta:
type: file
description: rRNA fasta files
pattern: "*.{fa,fasta}"
- skip_bbsplit:
type: boolean
description: Whether to skip BBSplit for removal of non-reference genome reads
Expand All @@ -63,9 +107,7 @@ input:
description: Whether to skip trimming
- skip_umi_extract:
type: boolean
description: |
Skip the UMI extraction from the read in case the UMIs have been moved
to the headers in advance of the pipeline run
description: Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run
- make_salmon_index:
type: boolean
description: Whether to create salmon index before running salmon quant
Expand All @@ -74,14 +116,10 @@ input:
description: Whether to create sortmerna index before running sortmerna
- trimmer:
type: string
description: |
Specifies the trimming tool to use - available options are 'trimgalore'
and 'fastp'
description: Specifies the trimming tool to use - available options are 'trimgalore' and 'fastp'
- min_trimmed_reads:
type: integer
description: |
Minimum number of trimmed reads below which samples are removed from
further processing
description: Minimum number of trimmed reads below which samples are removed from further processing
- save_trimmed:
type: boolean
description: Save the trimmed FastQ files in the results directory?
Expand All @@ -93,39 +131,66 @@ input:
description: Enable UMI-based read deduplication
- umi_discard_read:
type: integer
description: |
After UMI barcode extraction discard either R1 or R2 by setting this
parameter to 1 or 2, respectively
description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively
- stranded_threshold:
type: float
min: 0.5
description: |
The fraction of stranded reads that must be assigned to a strandedness
for confident assignment. Must be at least 0.5.
description: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5.
- unstranded_threshold:
type: float
description: |
The difference in fraction of stranded reads assigned to 'forward' and
'reverse' below which a sample is classified as 'unstranded'.
description: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'.
- skip_linting:
type: boolean
description: Whether to skip linting of FastQ files

output:
- reads:
type: file
description: Preprocessed fastq reads
pattern: "*.{fq,fastq}{,.gz}"
structure:
- meta:
type: map
description: Metadata for the preprocessed reads
- reads:
type: file
description: Preprocessed FastQ files
pattern: "*.{fq,fastq},{,.gz}"
- multiqc_files:
type: file
description: MultiQC-compatible output files from tools used in prepreocessing
pattern: "*"
description: MultiQC-compatible output files from tools used in preprocessing
structure:
- meta:
type: map
description: Metadata for the MultiQC files
- mqc:
type: file
description: MultiQC-compatible files
pattern: "*"
- trim_read_count:
type: integer
description: Number of reads remaining after trimming for all input samples
structure:
- meta:
type: map
description: Metadata for the trim read count
- count:
type: integer
description: Number of reads after trimming
- versions:
type: file
description: |
File containing software versions
Structure: [ path(versions.yml) ]
pattern: "versions.yml"
description: File containing software versions
structure:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- lint_log:
description: Log files from FastQ linting
structure:
- meta:
type: map
description: Metadata for the lint log
- log:
type: file
description: FastQ lint log file
pattern: "*.log"

authors:
- "@pinin4fjords"
maintainers:
Expand Down
Loading
Loading