Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions modules/nf-core/ragtag/patch/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::ragtag=2.1.0"
106 changes: 106 additions & 0 deletions modules/nf-core/ragtag/patch/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
process RAGTAG_PATCH {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0'
: 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}"

input:
tuple val(meta), path(target, name: 'target/*')
tuple val(meta2), path(query, name: 'query/*')
tuple val(meta3), path(exclude)
tuple val(meta4), path(skip)

output:
tuple val(meta), path("*.patch.fasta"), emit: patch_fasta
tuple val(meta), path("*.patch.agp"), emit: patch_agp
tuple val(meta), path("*.comps.fasta"), emit: patch_components_fasta
tuple val(meta), path("*.ragtag.patch.asm.*"), emit: assembly_alignments, optional: true
tuple val(meta), path("*.ctg.agp"), emit: target_splits_agp
tuple val(meta), path("*.ctg.fasta"), emit: target_splits_fasta
tuple val(meta), path("*.rename.agp"), emit: qry_rename_agp, optional: true
tuple val(meta), path("*.rename.fasta"), emit: qry_rename_fasta, optional: true
tuple val(meta), path("*.patch.err"), emit: stderr
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ""
def arg_exclude = exclude ? "-e ${exclude}" : ""
def arg_skip = skip ? "-j ${skip}" : ""
"""
if [[ ${target} == *.gz ]]
then
zcat ${target} > target.fa
else
cp ${target} target.fa
fi

if [[ ${query} == *.gz ]]
then
zcat ${query} > query.fa
else
cp ${query} query.fa
fi
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't really follow the logic here, why load them into a directory as defined in target if you then just use the list of input fastas directly ? And again, didn't fully undersstand why you need to rename the files - is really trying to put in two files as genome.fa going to be so common? I would still rather just decompress with gunzip, and give them direcly to the command.

I would also not cp the reference file as for large reference genomes that's a lot of HDD space to use, maybe just symlink to get the correct name?

However this is not a blocker here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't really follow the logic here, why load them into a directory as defined in target if you then just use the list of input fastas directly ? And again, didn't fully undersstand why you need to rename the files - is really trying to put in two files as genome.fa going to be so common? I would still rather just decompress with gunzip, and give them direcly to the command.

I do not know how common it is that someone uses two files with the same name for both target and reference, but it doesn't hurt to support that I guess? Loading them into a different directory is purely to avoid having a file with the same name twice; not sure if there is something I am not getting about what you are asking?

I would also not cp the reference file as for large reference genomes that's a lot of HDD space to use, maybe just symlink to get the correct name?

I have changed to symlinking.



ragtag.py patch target.fa query.fa \\
-o "${prefix}" \\
-t ${task.cpus} \\
${arg_exclude} \\
${arg_skip} \\
${args} \\
2> >( tee ${prefix}.stderr.log >&2 ) \\
| tee ${prefix}.stdout.log

mv ${prefix}/ragtag.patch.agp ${prefix}.patch.agp
mv ${prefix}/ragtag.patch.fasta ${prefix}.patch.fasta
mv ${prefix}/ragtag.patch.comps.fasta ${prefix}.comps.fasta
mv ${prefix}/ragtag.patch.ctg.agp ${prefix}.ctg.agp
mv ${prefix}/ragtag.patch.ctg.fasta ${prefix}.ctg.fasta
if [ -f ${prefix}/ragtag.patch.rename.agp ]; then
mv ${prefix}/ragtag.patch.rename.agp ${prefix}.rename.agp
fi

if [ -f ${prefix}/ragtag.patch.rename.fasta ]; then
mv ${prefix}/ragtag.patch.rename.fasta ${prefix}.rename.fasta
fi
mv ${prefix}/ragtag.patch.err ${prefix}.patch.err
# Move the assembly files from prefix folder, and add prefix
for alignment_file in \$(ls ${prefix}/ragtag.patch.asm.*);
do
mv "\$alignment_file" "\${alignment_file/${prefix}\\//${prefix}_}"
done

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ragtag: \$(echo \$(ragtag.py -v | sed 's/v//'))
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def _args = task.ext.args ?: ""
def _arg_exclude = exclude ? "-e ${exclude}" : ""
def _arg_skip = skip ? "-j ${skip}" : ""
"""
touch ${prefix}.patch.agp
touch ${prefix}.patch.fasta
touch ${prefix}.comps.fasta
touch ${prefix}.ctg.agp
touch ${prefix}.ctg.fasta
touch ${prefix}.rename.agp
touch ${prefix}.rename.fasta
touch ${prefix}.ragtag.patch.asm.1
touch ${prefix}.patch.err

cat <<-END_VERSIONS > versions.yml
ragtag: \$(echo \$(ragtag.py -v | sed 's/v//'))
END_VERSIONS
"""
}
156 changes: 156 additions & 0 deletions modules/nf-core/ragtag/patch/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
name: "ragtag_patch"
description: "Homology-based assembly patching: Make continuous joins and fill gaps
in 'target.fa' using sequences from 'query.fa'"

keywords:
- assembly
- consensus
- ragtag
- patch
tools:
- "ragtag":
description: "Fast reference-guided genome assembly scaffolding"
homepage: "https://github.com/malonge/RagTag/wiki"
documentation: "https://github.com/malonge/RagTag/wiki"
tool_dev_url: "https://github.com/malonge/RagTag"
doi: "10.1186/s13059-022-02823-7"
licence: ["MIT"]
identifier: biotools:ragtag
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- target:
type: file
description: Target assembly
pattern: "*.{fasta,fasta.gz}"
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- query:
type: file
description: Query assembly
pattern: "*.{fasta,fasta.gz}"
- - meta3:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- exclude:
type: file
description: list of target sequences to ignore
pattern: "*.txt"
- - meta4:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- skip:
type: file
description: list of query sequences to ignore
pattern: "*.txt"
output:
- patch_fasta:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.patch.fasta":
type: file
description: FASTA file containing the patched assembly
pattern: "*.patch.fasta"
- patch_agp:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.patch.agp":
type: file
description: AGP file defining how ragtag.patch.fasta is built
pattern: "*.patch.agp"
- patch_components_fasta:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.comps.fasta":
type: file
description: The split target assembly and the renamed query assembly combined
into one FASTA file. This file contains all components in ragtag.patch.agp
pattern: "*.comps.fasta"
- assembly_alignments:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.ragtag.patch.asm.*":
type: file
description: Assembly alignment files
pattern: "*.ragtag.patch.asm.*"
- target_splits_agp:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.ctg.agp":
type: file
description: An AGP file defining how the target assembly was split at gaps
pattern: "*.ctg.agp"
- target_splits_fasta:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.ctg.fasta":
type: file
description: FASTA file containing the target assembly split at gaps
pattern: "*.ctg.fasta"
- qry_rename_agp:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.rename.agp":
type: file
description: An AGP file defining the new names for query sequences
pattern: "*.rename.agp"
- qry_rename_fasta:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.rename.fasta":
type: file
description: A FASTA file with the original query sequence, but with new names
pattern: "*.rename.fasta"
- stderr:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.patch.err":
type: file
description: Standard error logging for all external RagTag commands
pattern: "*.patch.err"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@nschan"
maintainers:
- "@nschan"
89 changes: 89 additions & 0 deletions modules/nf-core/ragtag/patch/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
nextflow_process {

name "Test Process RAGTAG_PATCH"
script "../main.nf"
process "RAGTAG_PATCH"

tag "modules"
tag "modules_nfcore"
tag "ragtag"
tag "ragtag/patch"


test("A. thaliana Col-0 test data - ragtag - patch") {

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
]
input[1] = [
[], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
]
input[2] = [
[],
[]
]
input[3] = [
[],
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.patch_fasta,
process.out.patch_agp,
process.out.patch_components_fasta,
process.out.target_splits_agp,
process.out.target_splits_fasta,
process.out.versions
).match()
},
)
}

}
test("A. thaliana Col-0 test data - ragtag - patch - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
]
input[1] = [
[], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
]
input[2] = [
[],
[]
]
input[3] = [
[],
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

}
Loading
Loading