Skip to content

Commit 2296655

Browse files
nschanmashehu
authored andcommitted
ragtag/scaffold and ragtag/patch modules and tests (#8198)
* ragtag/scaffold and ragtag/patch modules and tests * more keywords in scaffold meta.yml * @jfy133 reviews * exclude some empty files from test snapshot * use modules_testdata_base_path in tests * more input channels * fix meta.ymls * disable error redirection in ragtag/patch * capture stdout * capture correctly * use symlinks instead of copying * logging for ragtag/scaffold * experimental: kill tail * experimental: kill tail * escape correctly --------- Co-authored-by: Matthias Hörtenhuber <[email protected]>
1 parent b354737 commit 2296655

File tree

10 files changed

+980
-0
lines changed

10 files changed

+980
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- "bioconda::ragtag=2.1.0"
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
process RAGTAG_PATCH {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0'
8+
: 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}"
9+
10+
input:
11+
tuple val(meta), path(target, name: 'target/*')
12+
tuple val(meta2), path(query, name: 'query/*')
13+
tuple val(meta3), path(exclude)
14+
tuple val(meta4), path(skip)
15+
16+
output:
17+
tuple val(meta), path("*.patch.fasta"), emit: patch_fasta
18+
tuple val(meta), path("*.patch.agp"), emit: patch_agp
19+
tuple val(meta), path("*.comps.fasta"), emit: patch_components_fasta
20+
tuple val(meta), path("*.ragtag.patch.asm.*"), emit: assembly_alignments, optional: true
21+
tuple val(meta), path("*.ctg.agp"), emit: target_splits_agp
22+
tuple val(meta), path("*.ctg.fasta"), emit: target_splits_fasta
23+
tuple val(meta), path("*.rename.agp"), emit: qry_rename_agp, optional: true
24+
tuple val(meta), path("*.rename.fasta"), emit: qry_rename_fasta, optional: true
25+
tuple val(meta), path("*.patch.err"), emit: stderr
26+
path "versions.yml", emit: versions
27+
28+
when:
29+
task.ext.when == null || task.ext.when
30+
31+
script:
32+
def prefix = task.ext.prefix ?: "${meta.id}"
33+
def args = task.ext.args ?: ""
34+
def arg_exclude = exclude ? "-e ${exclude}" : ""
35+
def arg_skip = skip ? "-j ${skip}" : ""
36+
"""
37+
if [[ ${target} == *.gz ]]
38+
then
39+
zcat ${target} > target.fa
40+
else
41+
ln -s ${target} target.fa
42+
fi
43+
44+
if [[ ${query} == *.gz ]]
45+
then
46+
zcat ${query} > query.fa
47+
else
48+
ln -s ${query} query.fa
49+
fi
50+
51+
tail -F ${prefix}/ragtag.patch.err >&2 &
52+
tailpid=\$!
53+
ragtag.py patch target.fa query.fa \\
54+
-o "${prefix}" \\
55+
-t ${task.cpus} \\
56+
${arg_exclude} \\
57+
${arg_skip} \\
58+
${args} # \\
59+
# 2> >( tee ${prefix}.stderr.log >&2 ) \\
60+
# | tee ${prefix}.stdout.log
61+
62+
kill -TERM "\$tailpid"
63+
64+
mv ${prefix}/ragtag.patch.agp ${prefix}.patch.agp
65+
mv ${prefix}/ragtag.patch.fasta ${prefix}.patch.fasta
66+
mv ${prefix}/ragtag.patch.comps.fasta ${prefix}.comps.fasta
67+
mv ${prefix}/ragtag.patch.ctg.agp ${prefix}.ctg.agp
68+
mv ${prefix}/ragtag.patch.ctg.fasta ${prefix}.ctg.fasta
69+
if [ -f ${prefix}/ragtag.patch.rename.agp ]; then
70+
mv ${prefix}/ragtag.patch.rename.agp ${prefix}.rename.agp
71+
fi
72+
73+
if [ -f ${prefix}/ragtag.patch.rename.fasta ]; then
74+
mv ${prefix}/ragtag.patch.rename.fasta ${prefix}.rename.fasta
75+
fi
76+
mv ${prefix}/ragtag.patch.err ${prefix}.patch.err
77+
# Move the assembly files from prefix folder, and add prefix
78+
for alignment_file in \$(ls ${prefix}/ragtag.patch.asm.*);
79+
do
80+
mv "\$alignment_file" "\${alignment_file/${prefix}\\//${prefix}_}"
81+
done
82+
83+
cat <<-END_VERSIONS > versions.yml
84+
"${task.process}":
85+
ragtag: \$(echo \$(ragtag.py -v | sed 's/v//'))
86+
END_VERSIONS
87+
"""
88+
89+
stub:
90+
def prefix = task.ext.prefix ?: "${meta.id}"
91+
def _args = task.ext.args ?: ""
92+
def _arg_exclude = exclude ? "-e ${exclude}" : ""
93+
def _arg_skip = skip ? "-j ${skip}" : ""
94+
"""
95+
touch ${prefix}.patch.agp
96+
touch ${prefix}.patch.fasta
97+
touch ${prefix}.comps.fasta
98+
touch ${prefix}.ctg.agp
99+
touch ${prefix}.ctg.fasta
100+
touch ${prefix}.rename.agp
101+
touch ${prefix}.rename.fasta
102+
touch ${prefix}.ragtag.patch.asm.1
103+
touch ${prefix}.patch.err
104+
105+
cat <<-END_VERSIONS > versions.yml
106+
ragtag: \$(echo \$(ragtag.py -v | sed 's/v//'))
107+
END_VERSIONS
108+
"""
109+
}
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
name: "ragtag_patch"
2+
description: "Homology-based assembly patching: Make continuous joins and fill gaps
3+
in 'target.fa' using sequences from 'query.fa'"
4+
5+
keywords:
6+
- assembly
7+
- consensus
8+
- ragtag
9+
- patch
10+
tools:
11+
- "ragtag":
12+
description: "Fast reference-guided genome assembly scaffolding"
13+
homepage: "https://github.com/malonge/RagTag/wiki"
14+
documentation: "https://github.com/malonge/RagTag/wiki"
15+
tool_dev_url: "https://github.com/malonge/RagTag"
16+
doi: "10.1186/s13059-022-02823-7"
17+
licence: ["MIT"]
18+
identifier: biotools:ragtag
19+
input:
20+
- - meta:
21+
type: map
22+
description: |
23+
Groovy Map containing sample information
24+
e.g. [ id:'test' ]
25+
- target:
26+
type: file
27+
description: Target assembly
28+
pattern: "*.{fasta,fasta.gz}"
29+
- - meta2:
30+
type: map
31+
description: |
32+
Groovy Map containing sample information
33+
e.g. [ id:'test' ]
34+
- query:
35+
type: file
36+
description: Query assembly
37+
pattern: "*.{fasta,fasta.gz}"
38+
- - meta3:
39+
type: map
40+
description: |
41+
Groovy Map containing sample information
42+
e.g. [ id:'test' ]
43+
- exclude:
44+
type: file
45+
description: list of target sequences to ignore
46+
pattern: "*.txt"
47+
- - meta4:
48+
type: map
49+
description: |
50+
Groovy Map containing sample information
51+
e.g. [ id:'test' ]
52+
- skip:
53+
type: file
54+
description: list of query sequences to ignore
55+
pattern: "*.txt"
56+
output:
57+
- patch_fasta:
58+
- meta:
59+
type: map
60+
description: |
61+
Groovy Map containing sample information
62+
e.g. [ id:'test' ]
63+
- "*.patch.fasta":
64+
type: file
65+
description: FASTA file containing the patched assembly
66+
pattern: "*.patch.fasta"
67+
- patch_agp:
68+
- meta:
69+
type: map
70+
description: |
71+
Groovy Map containing sample information
72+
e.g. [ id:'test' ]
73+
- "*.patch.agp":
74+
type: file
75+
description: AGP file defining how ragtag.patch.fasta is built
76+
pattern: "*.patch.agp"
77+
- patch_components_fasta:
78+
- meta:
79+
type: map
80+
description: |
81+
Groovy Map containing sample information
82+
e.g. [ id:'test' ]
83+
- "*.comps.fasta":
84+
type: file
85+
description: The split target assembly and the renamed query assembly combined
86+
into one FASTA file. This file contains all components in ragtag.patch.agp
87+
pattern: "*.comps.fasta"
88+
- assembly_alignments:
89+
- meta:
90+
type: map
91+
description: |
92+
Groovy Map containing sample information
93+
e.g. [ id:'test' ]
94+
- "*.ragtag.patch.asm.*":
95+
type: file
96+
description: Assembly alignment files
97+
pattern: "*.ragtag.patch.asm.*"
98+
- target_splits_agp:
99+
- meta:
100+
type: map
101+
description: |
102+
Groovy Map containing sample information
103+
e.g. [ id:'test' ]
104+
- "*.ctg.agp":
105+
type: file
106+
description: An AGP file defining how the target assembly was split at gaps
107+
pattern: "*.ctg.agp"
108+
- target_splits_fasta:
109+
- meta:
110+
type: map
111+
description: |
112+
Groovy Map containing sample information
113+
e.g. [ id:'test' ]
114+
- "*.ctg.fasta":
115+
type: file
116+
description: FASTA file containing the target assembly split at gaps
117+
pattern: "*.ctg.fasta"
118+
- qry_rename_agp:
119+
- meta:
120+
type: map
121+
description: |
122+
Groovy Map containing sample information
123+
e.g. [ id:'test' ]
124+
- "*.rename.agp":
125+
type: file
126+
description: An AGP file defining the new names for query sequences
127+
pattern: "*.rename.agp"
128+
- qry_rename_fasta:
129+
- meta:
130+
type: map
131+
description: |
132+
Groovy Map containing sample information
133+
e.g. [ id:'test' ]
134+
- "*.rename.fasta":
135+
type: file
136+
description: A FASTA file with the original query sequence, but with new names
137+
pattern: "*.rename.fasta"
138+
- stderr:
139+
- meta:
140+
type: map
141+
description: |
142+
Groovy Map containing sample information
143+
e.g. [ id:'test' ]
144+
- "*.patch.err":
145+
type: file
146+
description: Standard error logging for all external RagTag commands
147+
pattern: "*.patch.err"
148+
- versions:
149+
- versions.yml:
150+
type: file
151+
description: File containing software versions
152+
pattern: "versions.yml"
153+
authors:
154+
- "@nschan"
155+
maintainers:
156+
- "@nschan"
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
nextflow_process {
2+
3+
name "Test Process RAGTAG_PATCH"
4+
script "../main.nf"
5+
process "RAGTAG_PATCH"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "ragtag"
10+
tag "ragtag/patch"
11+
12+
13+
test("A. thaliana Col-0 test data - ragtag - patch") {
14+
15+
when {
16+
process {
17+
"""
18+
input[0] = [
19+
[ id:'test' ], // meta map
20+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
21+
]
22+
input[1] = [
23+
[], // meta map
24+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
25+
]
26+
input[2] = [
27+
[],
28+
[]
29+
]
30+
input[3] = [
31+
[],
32+
[]
33+
]
34+
"""
35+
}
36+
}
37+
38+
then {
39+
assertAll(
40+
{ assert process.success },
41+
{ assert snapshot(
42+
process.out.patch_fasta,
43+
process.out.patch_agp,
44+
process.out.patch_components_fasta,
45+
process.out.target_splits_agp,
46+
process.out.target_splits_fasta,
47+
process.out.versions
48+
).match()
49+
},
50+
)
51+
}
52+
53+
}
54+
test("A. thaliana Col-0 test data - ragtag - patch - stub") {
55+
56+
options "-stub"
57+
58+
when {
59+
process {
60+
"""
61+
input[0] = [
62+
[ id:'test' ], // meta map
63+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
64+
]
65+
input[1] = [
66+
[], // meta map
67+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
68+
]
69+
input[2] = [
70+
[],
71+
[]
72+
]
73+
input[3] = [
74+
[],
75+
[]
76+
]
77+
"""
78+
}
79+
}
80+
81+
then {
82+
assertAll(
83+
{ assert process.success },
84+
{ assert snapshot(process.out).match() }
85+
)
86+
}
87+
}
88+
89+
}

0 commit comments

Comments
 (0)