Skip to content

Commit 0af81a2

Browse files
jfy133SPPearceprototaxites
authored
GTDBTK_CLASSIFYWF database improvement (#8220)
* Add new input channel for premade seqid2map file * Add extra tests for using custom seqid2map file * Apply suggestions from code review * Update modules/nf-core/kraken2/build/tests/main.nf.test Co-authored-by: Simon Pearce <[email protected]> * Make database supply to GTDBTK classify WF more robust * Fix metadata * Update modules/nf-core/gtdbtk/classifywf/meta.yml Co-authored-by: Jim Downie <[email protected]> --------- Co-authored-by: Simon Pearce <[email protected]> Co-authored-by: Jim Downie <[email protected]>
1 parent a81612b commit 0af81a2

File tree

2 files changed

+22
-16
lines changed

2 files changed

+22
-16
lines changed

modules/nf-core/gtdbtk/classifywf/main.nf

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@ process GTDBTK_CLASSIFYWF {
55
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gtdbtk:2.4.0--pyhdfd78af_1' : 'biocontainers/gtdbtk:2.4.0--pyhdfd78af_1'}"
66

77
input:
8-
tuple val(meta) , path("bins/*")
9-
tuple val(db_name), path("database/*")
8+
tuple val(meta), path("bins/*")
9+
tuple val(db_name), path(db)
1010
val use_pplacer_scratch_dir
1111
path mash_db
1212

1313
output:
14-
tuple val(meta), path("gtdbtk.${prefix}.*.summary.tsv") , emit: summary
15-
tuple val(meta), path("gtdbtk.${prefix}.*.classify.tree.gz") , emit: tree , optional: true
16-
tuple val(meta), path("gtdbtk.${prefix}.*.markers_summary.tsv"), emit: markers , optional: true
17-
tuple val(meta), path("gtdbtk.${prefix}.*.msa.fasta.gz") , emit: msa , optional: true
18-
tuple val(meta), path("gtdbtk.${prefix}.*.user_msa.fasta.gz") , emit: user_msa, optional: true
19-
tuple val(meta), path("gtdbtk.${prefix}.*.filtered.tsv") , emit: filtered, optional: true
20-
tuple val(meta), path("gtdbtk.${prefix}.failed_genomes.tsv") , emit: failed , optional: true
21-
tuple val(meta), path("gtdbtk.${prefix}.log") , emit: log
22-
tuple val(meta), path("gtdbtk.${prefix}.warnings.log") , emit: warnings
14+
tuple val(meta), path("gtdbtk.${prefix}.*.summary.tsv"), emit: summary
15+
tuple val(meta), path("gtdbtk.${prefix}.*.classify.tree.gz"), emit: tree, optional: true
16+
tuple val(meta), path("gtdbtk.${prefix}.*.markers_summary.tsv"), emit: markers, optional: true
17+
tuple val(meta), path("gtdbtk.${prefix}.*.msa.fasta.gz"), emit: msa, optional: true
18+
tuple val(meta), path("gtdbtk.${prefix}.*.user_msa.fasta.gz"), emit: user_msa, optional: true
19+
tuple val(meta), path("gtdbtk.${prefix}.*.filtered.tsv"), emit: filtered, optional: true
20+
tuple val(meta), path("gtdbtk.${prefix}.failed_genomes.tsv"), emit: failed, optional: true
21+
tuple val(meta), path("gtdbtk.${prefix}.log"), emit: log
22+
tuple val(meta), path("gtdbtk.${prefix}.warnings.log"), emit: warnings
2323
path ("versions.yml"), emit: versions
2424

2525
when:
@@ -28,11 +28,12 @@ process GTDBTK_CLASSIFYWF {
2828
script:
2929
def args = task.ext.args ?: ''
3030
def pplacer_scratch = use_pplacer_scratch_dir ? "--scratch_dir pplacer_tmp" : ""
31-
def mash_mode = mash_db ? "--mash_db ${mash_db}" : "--skip_ani_screen"
31+
def mash_mode = mash_db ? "--mash_db ${mash_db}" : "--skip_ani_screen"
3232
prefix = task.ext.prefix ?: "${meta.id}"
3333

3434
"""
35-
export GTDBTK_DATA_PATH="\${PWD}/database"
35+
export GTDBTK_DATA_PATH="\$(find -L ${db} -name 'metadata' -type d -exec dirname {} \\;)"#
36+
3637
if [ ${pplacer_scratch} != "" ] ; then
3738
mkdir pplacer_tmp
3839
fi

modules/nf-core/gtdbtk/classifywf/meta.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,18 @@ input:
2727
e.g. [ id:'test', single_end:false, assembler:'spades' ]
2828
- '"bins/*"':
2929
type: file
30-
description: bins
30+
description: A list of one or more bins in FASTA format for classification
31+
pattern: "*.{fasta,fna,fas,fa}{,.gz}"
3132
- - db_name:
3233
type: string
3334
description: The name of the GTDB database to use.
34-
- '"database/*"':
35+
- db:
3536
type: file
36-
description: GTDB database
37+
description: |
38+
Path to a directory containing a GDTB database, as uncompressed from from the 'full package' gtdbdtk_data.tar.gz file.
39+
You can give the 'release<version number>' directory here.
40+
Must contain the 'metadata' subdirectory
41+
pattern: "release[0-9]+\/"
3742
- - use_pplacer_scratch_dir:
3843
type: boolean
3944
description: Set to true to reduce pplacer memory usage by writing to disk (slower)

0 commit comments

Comments
 (0)