Skip to content

Commit ecdc480

Browse files
sofstamsateeshperi
andauthored
Add kmcp profile (#3466)
* Add kmcp profile * Prettier * Tests for kmcp profile * Update modules/nf-core/kmcp/profile/meta.yml Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> * Add stub * Fix stub * Update modules/nf-core/kmcp/profile/main.nf Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> * Revert stub back * Apply review suggestion * Add stubs * Update stub test * Correct type in stub * Test for kmcp compute * Fix stub * Add stub * Revert and will add stubs in separate PRs --------- Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com>
1 parent 4b7d486 commit ecdc480

6 files changed

Lines changed: 171 additions & 0 deletions

File tree

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
process KMCP_PROFILE {
2+
tag "$meta.id"
3+
label 'process_medium'
4+
5+
conda "bioconda::kmcp=0.9.1"
6+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0':
8+
'biocontainers/kmcp:0.9.1--h9ee0642_0' }"
9+
10+
input:
11+
tuple val(meta), path(search_results)
12+
path taxdump
13+
path taxid
14+
val mode
15+
16+
output:
17+
tuple val(meta), path("*.profile"), emit: profile
18+
path "versions.yml" , emit: versions
19+
20+
when:
21+
task.ext.when == null || task.ext.when
22+
23+
script:
24+
def args = task.ext.args ?: ''
25+
def prefix = task.ext.prefix ?: "${meta.id}"
26+
"""
27+
kmcp \\
28+
profile \\
29+
$args \\
30+
-X $taxdump \\
31+
-T $taxid \\
32+
-m $mode \\
33+
-j $task.cpus \\
34+
-o ${prefix}.profile \\
35+
$search_results
36+
37+
cat <<-END_VERSIONS > versions.yml
38+
"${task.process}":
39+
kmcp: \$(echo \$(kmcp version 2>&1) | sed -n 1p | sed 's/^.*kmcp v//')
40+
END_VERSIONS
41+
"""
42+
stub:
43+
def args = task.ext.args ?: ''
44+
prefix = task.ext.prefix ?: "${meta.id}"
45+
"""
46+
touch ${prefix}.profile
47+
48+
cat <<-END_VERSIONS > versions.yml
49+
"${task.process}":
50+
kmcp: \$(echo \$(kmcp version 2>&1) | sed -n 1p | sed 's/^.*kmcp v//')
51+
END_VERSIONS
52+
"""
53+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
name: "kmcp_profile"
2+
description: Generate taxonomic profile from search results
3+
keywords:
4+
- metagenomics
5+
- classify
6+
- taxonomic profiling
7+
- fastq
8+
- sequences
9+
- kmers
10+
- index
11+
tools:
12+
- "kmcp":
13+
description: "Accurate metagenomic profiling of both prokaryotic and viral populations by pseudo-mapping"
14+
homepage: "https://github.com/shenwei356/kmcp"
15+
documentation: "https://bioinf.shenwei.me/kmcp/usage/#profile"
16+
tool_dev_url: "https://github.com/shenwei356/kmcp"
17+
doi: "10.1093/bioinformatics/btac845"
18+
licence: "['MIT']"
19+
20+
input:
21+
- meta:
22+
type: map
23+
description: |
24+
Groovy Map containing sample information
25+
e.g. [ id:'test', single_end:false ]
26+
- search_results:
27+
type: file
28+
description: Gzipped file output from kmcp search module
29+
pattern: "*.gz"
30+
- taxdump:
31+
type: directory
32+
description: Directory of NCBI taxonomy dump files - names.dmp, nodes.dmp
33+
pattern: "*"
34+
- taxid:
35+
type: file
36+
description: Tabular two-column file(s) mapping reference IDs to TaxIds.
37+
pattern: "*.{csv,tsv}"
38+
- mode:
39+
type: integer
40+
description: Profiling mode.
41+
0-pathogen detection
42+
1-higher recall
43+
2-high recall
44+
3-default
45+
4-high precision
46+
5-higher precision
47+
output:
48+
- meta:
49+
type: map
50+
description: |
51+
Groovy Map containing sample information
52+
e.g. [ id:'test', single_end:false ]
53+
- versions:
54+
type: file
55+
description: File containing software versions
56+
pattern: "versions.yml"
57+
- profile:
58+
type: file
59+
description: Tab-delimited format file with 17 columns.
60+
pattern: "*.profile"
61+
62+
authors:
63+
- "@sofstam"

tests/config/pytest_modules.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1989,6 +1989,10 @@ kmcp/index:
19891989
- modules/nf-core/kmcp/index/**
19901990
- tests/modules/nf-core/kmcp/index/**
19911991

1992+
kmcp/profile:
1993+
- modules/nf-core/kmcp/profile/**
1994+
- tests/modules/nf-core/kmcp/profile/**
1995+
19921996
kofamscan:
19931997
- modules/nf-core/kofamscan/**
19941998
- tests/modules/nf-core/kofamscan/**
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env nextflow
2+
3+
nextflow.enable.dsl = 2
4+
5+
include { UNTAR } from '../../../../../modules/nf-core/untar/main.nf'
6+
include { KMCP_COMPUTE } from '../../../../../modules/nf-core/kmcp/compute/main.nf'
7+
include { KMCP_INDEX } from '../../../../../modules/nf-core/kmcp/index/main.nf'
8+
include { KMCP_SEARCH } from '../../../../../modules/nf-core/kmcp/search/main.nf'
9+
include { KMCP_PROFILE } from '../../../../../modules/nf-core/kmcp/profile/main.nf'
10+
11+
workflow test_kmcp_profile {
12+
13+
input_compute = [
14+
[ id:'test', single_end:false ], // meta map
15+
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/kmcp/NC_045512.2.fasta', checkIfExists: true)
16+
]
17+
18+
input = [
19+
[ id:'test', single_end:true ], // meta map
20+
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
21+
]
22+
23+
taxdump = [ [],file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/kmcp/kmcp_profile.tar.gz', checkIfExists: true)]
24+
ch_taxid = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/kmcp/seqid2taxid.map', checkIfExists: true)
25+
mode = 3
26+
27+
UNTAR (taxdump)
28+
KMCP_COMPUTE ( input_compute )
29+
KMCP_INDEX ( KMCP_COMPUTE.out.outdir )
30+
KMCP_SEARCH ( KMCP_INDEX.out.kmcp.map{it[1]}, input )
31+
KMCP_PROFILE ( KMCP_SEARCH.out.result, UNTAR.out.untar.map{ it[1] }, ch_taxid, mode )
32+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
process {
2+
3+
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
4+
5+
withName: KMCP_INDEX {
6+
ext.prefix = { "${meta.id}_" }
7+
}
8+
9+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
- name: kmcp profile test_kmcp_profile
2+
command: nextflow run ./tests/modules/nf-core/kmcp/profile -entry test_kmcp_profile -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/kmcp/profile/nextflow.config
3+
tags:
4+
- kmcp
5+
- kmcp/profile
6+
files:
7+
- path: output/untar/kmcp_profile/kmcp_profile.tar.gz
8+
- path: output/untar/kmcp_profile/names.dmp
9+
- path: output/untar/kmcp_profile/nodes.dmp
10+
- path: output/untar/versions.yml

0 commit comments

Comments
 (0)