Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- [PR #1640](https://github.com/nf-core/rnaseq/pull/1640) - Bump version after release 3.22.0
- [PR #1641](https://github.com/nf-core/rnaseq/pull/1641) - Add arm-based CI tests and fix arm-related issues
- [PR #1642](https://github.com/nf-core/rnaseq/pull/1642) - Add long format to rsem merge

## [[3.22.0](https://github.com/nf-core/rnaseq/releases/tag/3.22.0)] - 2025-11-26

Expand All @@ -20,7 +21,7 @@ Special thanks to the following for their contributions to the release:

- [Ahwan Pandey](https://github.com/ahwanpandey)
- [Cristina Tuñí i Domínguez](https://github.com/ctuni)
- [Elad Herzog](https://github.com/EladH1)
- [Elad Herz](https://github.com/EladH1)
- [Emily Miyoshi](https://github.com/emilymiyoshi)
- [Jonathan Manning](https://github.com/pinin4fjords)
- [Pontus Höjer](https://github.com/pontushojer)
Expand Down
2 changes: 2 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ The STAR section of the MultiQC report shows a bar plot with alignment rates: go
- `rsem.merged.gene_tpm.tsv`: Matrix of gene-level TPM values across all samples.
- `rsem.merged.transcript_counts.tsv`: Matrix of isoform-level raw counts across all samples.
- `rsem.merged.transcript_tpm.tsv`: Matrix of isoform-level TPM values across all samples.
- `rsem.merged.genes_long.tsv`: long format contains length, expected_count, TPM, and FPKM across all samples.
- `rsem.merged.isoforms_long.tsv`: long format contains length, expected_count, TPM, FPKM, and IsoPct across all samples.
- `*.genes.results`: RSEM gene-level quantification results for each sample.
- `*.isoforms.results`: RSEM isoform-level quantification results for each sample.
- `*.STAR.genome.bam`: If `--save_align_intermeds` is specified the BAM file from STAR alignment containing read alignments to the reference genome will be placed in this directory. These files can be reused as `genome_bam` input in future pipeline runs.
Expand Down
18 changes: 18 additions & 0 deletions modules/local/rsem_merge_counts/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ process RSEM_MERGE_COUNTS {
path "rsem.merged.gene_tpm.tsv" , emit: tpm_gene
path "rsem.merged.transcript_counts.tsv", emit: counts_transcript
path "rsem.merged.transcript_tpm.tsv" , emit: tpm_transcript
path "rsem.merged.genes_long.tsv" , emit: genes_long
path "rsem.merged.isoforms_long.tsv" , emit: isoforms_long
path "versions.yml" , emit: versions

when:
Expand Down Expand Up @@ -47,6 +49,20 @@ process RSEM_MERGE_COUNTS {
paste transcript_ids.txt tmp/isoforms/*.counts.txt > rsem.merged.transcript_counts.tsv
paste transcript_ids.txt tmp/isoforms/*.tpm.txt > rsem.merged.transcript_tpm.tsv

# Create long format for genes (idx=1-4, concat columns 5-7)
echo -e "sample_name\tgene_id\ttranscript_id(s)\tlength\teffective_length\texpected_count\tTPM\tFPKM" > rsem.merged.genes_long.tsv
for fileid in `ls ./genes/*`; do
samplename=`basename \$fileid | sed s/\\.genes.results\$//g`
tail -n+2 \$fileid | awk -v sample=\$samplename 'BEGIN{OFS="\t"}{print sample,\$1,\$2,\$3,\$4,\$5,\$6,\$7}' >> rsem.merged.genes_long.tsv
done

# Create long format for isoforms (idx=1-4, concat columns 5-8)
echo -e "sample_name\ttranscript_id\tgene_id\tlength\teffective_length\texpected_count\tTPM\tFPKM\tIsoPct" > rsem.merged.isoforms_long.tsv
for fileid in `ls ./isoforms/*`; do
samplename=`basename \$fileid | sed s/\\.isoforms.results\$//g`
tail -n+2 \$fileid | awk -v sample=\$samplename 'BEGIN{OFS="\t"}{print sample,\$1,\$2,\$3,\$4,\$5,\$6,\$7,\$8}' >> rsem.merged.isoforms_long.tsv
done

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
Expand All @@ -59,6 +75,8 @@ process RSEM_MERGE_COUNTS {
touch rsem.merged.gene_tpm.tsv
touch rsem.merged.transcript_counts.tsv
touch rsem.merged.transcript_tpm.tsv
touch rsem.merged.genes_long.tsv
touch rsem.merged.isoforms_long.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
36 changes: 30 additions & 6 deletions modules/local/rsem_merge_counts/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
"rsem.merged.transcript_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"4": [
"rsem.merged.genes_long.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"5": [
"rsem.merged.isoforms_long.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"6": [
"versions.yml:md5,48ca3e12c91829af8019462b3f6aa29c"
],
"counts_gene": [
Expand All @@ -23,6 +29,12 @@
"counts_transcript": [
"rsem.merged.transcript_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"genes_long": [
"rsem.merged.genes_long.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"isoforms_long": [
"rsem.merged.isoforms_long.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"tpm_gene": [
"rsem.merged.gene_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
Expand All @@ -35,10 +47,10 @@
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
"nf-test": "0.9.3",
"nextflow": "25.04.6"
},
"timestamp": "2024-06-21T11:55:29.45389"
"timestamp": "2025-11-27T17:09:36.795867708"
},
"Should run without failures": {
"content": [
Expand All @@ -56,6 +68,12 @@
"rsem.merged.transcript_tpm.tsv:md5,abbaac45f9938716c58d604299ea284e"
],
"4": [
"rsem.merged.genes_long.tsv:md5,e26cd2b3b381432a011eed98f3ad4e6d"
],
"5": [
"rsem.merged.isoforms_long.tsv:md5,33b189595600493b917d786a542de8e9"
],
"6": [
"versions.yml:md5,48ca3e12c91829af8019462b3f6aa29c"
],
"counts_gene": [
Expand All @@ -64,6 +82,12 @@
"counts_transcript": [
"rsem.merged.transcript_counts.tsv:md5,e40bba0aafc5904361513b3513c217ad"
],
"genes_long": [
"rsem.merged.genes_long.tsv:md5,e26cd2b3b381432a011eed98f3ad4e6d"
],
"isoforms_long": [
"rsem.merged.isoforms_long.tsv:md5,33b189595600493b917d786a542de8e9"
],
"tpm_gene": [
"rsem.merged.gene_tpm.tsv:md5,39bad606eb012456bba1d995fe0feb5f"
],
Expand All @@ -76,9 +100,9 @@
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nf-test": "0.9.3",
"nextflow": "25.04.6"
},
"timestamp": "2024-03-09T17:13:37.377348"
"timestamp": "2025-11-27T17:09:28.461621756"
}
}
125 changes: 120 additions & 5 deletions subworkflows/local/quantify_rsem/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,119 @@
{
"homo_sapiens - sentieon - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.genes.results:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.isoforms.results:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.stat:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"4": [
"rsem.merged.gene_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"5": [
"rsem.merged.gene_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"6": [
"rsem.merged.transcript_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"7": [
"rsem.merged.transcript_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"8": [
"versions.yml:md5,2aa5252eb2ffb409cf556a165d40f8a9",
"versions.yml:md5,773c15c4ecb7d486a4bdd8ef73e7ac5d"
],
"counts_gene": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.genes.results:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"counts_transcript": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.isoforms.results:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"logs": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"merged_counts_gene": [
"rsem.merged.gene_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"merged_counts_transcript": [
"rsem.merged.transcript_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"merged_tpm_gene": [
"rsem.merged.gene_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"merged_tpm_transcript": [
"rsem.merged.transcript_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"stat": [
[
{
"id": "test",
"strandedness": "forward"
},
"test.stat:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,2aa5252eb2ffb409cf556a165d40f8a9",
"versions.yml:md5,773c15c4ecb7d486a4bdd8ef73e7ac5d"
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.04.6"
},
"timestamp": "2025-11-27T16:26:57.604104651"
},
"homo_sapiens - stub": {
"content": [
{
Expand Down Expand Up @@ -109,10 +224,10 @@
}
],
"meta": {
"nf-test": "0.9.2",
"nf-test": "0.9.3",
"nextflow": "25.04.6"
},
"timestamp": "2025-09-15T16:56:01.229068"
"timestamp": "2025-11-27T16:26:03.792661891"
},
"homo_sapiens": {
"content": [
Expand Down Expand Up @@ -165,9 +280,9 @@
]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "25.04.3"
"nf-test": "0.9.3",
"nextflow": "25.04.6"
},
"timestamp": "2025-09-16T08:19:00.078928064"
"timestamp": "2025-11-27T17:10:37.825495549"
}
}