Skip to content

Commit 7a6165f

Browse files
georgiakesfamosab
authored andcommitted
Add module picard/collectvariantcallingmetrics (#9502)
* Add module picard/collectvariantcallingmetrics * Run prettier * Fix format * Make one input tuple * Change test input structure * Add md5 checksum * Update snapshot * Update meta.yml * Support multithreading * Remove TODO Co-authored-by: Famke Bäuerle <[email protected]> --------- Co-authored-by: Famke Bäuerle <[email protected]>
1 parent 9d6f628 commit 7a6165f

File tree

5 files changed

+417
-0
lines changed

5 files changed

+417
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- "bioconda::picard=3.4.0"
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
process PICARD_COLLECTVARIANTCALLINGMETRICS {
2+
tag "${meta.id}"
3+
label 'process_low'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'https://depot.galaxyproject.org/singularity/picard:3.4.0--hdfd78af_0'
8+
: 'biocontainers/picard:3.4.0--hdfd78af_0'}"
9+
10+
input:
11+
tuple val(meta), path(vcf), path(index), path(intervals_file), path(fasta), path(dict), path(dbsnp), path(dbsnp_index)
12+
13+
output:
14+
tuple val(meta), path("*.variant_calling_detail_metrics"), emit: detail_metrics
15+
tuple val(meta), path("*.variant_calling_summary_metrics"), emit: summary_metrics
16+
path "versions.yml", emit: versions
17+
18+
when:
19+
task.ext.when == null || task.ext.when
20+
21+
script:
22+
def args = task.ext.args ?: ''
23+
def prefix = task.ext.prefix ?: "${meta.id}"
24+
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
25+
def intervals = intervals_file ? "--TARGET_INTERVALS ${intervals_file}" : ""
26+
27+
def avail_mem = 3072
28+
if (!task.memory) {
29+
log.info('[Picard CollectVariantCallingMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.')
30+
}
31+
else {
32+
avail_mem = (task.memory.mega * 0.8).intValue()
33+
}
34+
"""
35+
picard \\
36+
-Xmx${avail_mem}M \\
37+
CollectVariantCallingMetrics \\
38+
${args} \\
39+
--THREAD_COUNT ${task.cpus} \\
40+
--INPUT ${vcf} \\
41+
--OUTPUT ${prefix} \\
42+
--DBSNP ${dbsnp} \\
43+
${reference} \\
44+
--TMP_DIR . \\
45+
${intervals} \\
46+
47+
cat <<-END_VERSIONS > versions.yml
48+
"${task.process}":
49+
picard: \$(echo \$(picard CollectVariantCallingMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
50+
END_VERSIONS
51+
"""
52+
53+
stub:
54+
def prefix = task.ext.prefix ?: "${meta.id}"
55+
"""
56+
touch ${prefix}.variant_calling_detail_metrics
57+
touch ${prefix}.variant_calling_summary_metrics
58+
59+
cat <<-END_VERSIONS > versions.yml
60+
"${task.process}":
61+
picard: \$(echo \$(picard CollectVariantCallingMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
62+
END_VERSIONS
63+
"""
64+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "picard_collectvariantcallingmetrics"
3+
description: Collects per-sample and aggregate (spanning all samples) metrics from the provided VCF file
4+
keywords:
5+
- vcf
6+
- metrics
7+
- variant calling
8+
- statistics
9+
tools:
10+
- "picard":
11+
description: "A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF."
12+
homepage: "https://broadinstitute.github.io/picard/"
13+
documentation: "https://broadinstitute.github.io/picard/"
14+
tool_dev_url: "https://github.com/broadinstitute/picard"
15+
licence: ["MIT"]
16+
identifier: biotools:picard_tools
17+
18+
input:
19+
- - meta:
20+
type: map
21+
description: |
22+
Groovy Map containing sample information
23+
e.g. `[ id:'sample1' ]`
24+
- vcf:
25+
type: file
26+
description: Input VCF file for analysis
27+
pattern: "*.{vcf,vcf.gz}"
28+
ontologies:
29+
- edam: "http://edamontology.org/format_3016" # VCF
30+
- index:
31+
type: file
32+
description: Index file for the input VCF file
33+
pattern: "*.{idx,tbi}"
34+
- intervals_file:
35+
type: file
36+
description: Optional BED file specifying target intervals
37+
pattern: "*.{bed,bed.gz,intervals_list}"
38+
- fasta:
39+
type: file
40+
description: Reference sequence file
41+
pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
42+
ontologies:
43+
- edam: "http://edamontology.org/format_1929" # FASTA
44+
- dict:
45+
type: file
46+
description: Reference sequence dictionary file
47+
pattern: "*.{dict}"
48+
- dbsnp:
49+
type: file
50+
description: Reference dbSNP file in dbSNP or VCF format
51+
pattern: "*.{vcf,vcf.gz}"
52+
ontologies:
53+
- edam: "http://edamontology.org/format_3016" # VCF
54+
- dbsnp_index:
55+
type: file
56+
description: Reference dbSNP file in dbSNP or VCF format
57+
pattern: "*.{idx,tbi}"
58+
output:
59+
detail_metrics:
60+
- - meta:
61+
type: map
62+
description: |
63+
Groovy Map containing sample information
64+
e.g. `[ id:'sample1' ]`
65+
- "*.variant_calling_detail_metrics":
66+
type: file
67+
description: Detailed variant calling metrics file
68+
pattern: "*.variant_calling_detail_metrics"
69+
summary_metrics:
70+
- - meta:
71+
type: map
72+
description: |
73+
Groovy Map containing sample information
74+
e.g. `[ id:'sample1' ]`
75+
- "*.variant_calling_summary_metrics":
76+
type: file
77+
description: Summary variant calling metrics file
78+
pattern: "*.variant_calling_summary_metrics"
79+
versions:
80+
- versions.yml:
81+
type: file
82+
description: File containing software versions
83+
pattern: "versions.yml"
84+
85+
authors:
86+
- "@georgiakes"
87+
maintainers:
88+
- "@georgiakes"
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
nextflow_process {
2+
3+
name "Test Process PICARD_COLLECTVARIANTCALLINGMETRICS"
4+
script "../main.nf"
5+
process "PICARD_COLLECTVARIANTCALLINGMETRICS"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "picard"
10+
tag "picard/collectvariantcallingmetrics"
11+
12+
test("homo_sapiens - vcf_gz") {
13+
14+
when {
15+
process {
16+
"""
17+
input[0] = [
18+
[ id:'test' ],
19+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
20+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
21+
[],
22+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
23+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
24+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
25+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
26+
]
27+
"""
28+
}
29+
}
30+
31+
then {
32+
assertAll(
33+
{ assert process.success },
34+
{ assert snapshot(
35+
path(process.out.detail_metrics[0][1]).readLines()[5..7].join('\n').md5(),
36+
path(process.out.summary_metrics[0][1]).readLines()[5..7].join('\n').md5(),
37+
process.out.versions
38+
).match() }
39+
)
40+
}
41+
}
42+
43+
test("homo_sapiens - vcf_gz - stub") {
44+
45+
options "-stub"
46+
47+
when {
48+
process {
49+
"""
50+
input[0] = [
51+
[ id:'test' ],
52+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
53+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
54+
[],
55+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
56+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
57+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
58+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
59+
]
60+
"""
61+
}
62+
}
63+
64+
then {
65+
assertAll(
66+
{ assert process.success },
67+
{ assert snapshot(process.out).match() }
68+
)
69+
}
70+
}
71+
test("homo_sapiens - vcf_gz - intervals"){
72+
73+
when {
74+
process {
75+
"""
76+
input[0] = [
77+
[ id:'test' ],
78+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
79+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
80+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list',checkIfExists: true),
81+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
82+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
83+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
84+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
85+
]
86+
"""
87+
}
88+
}
89+
90+
then {
91+
assertAll(
92+
{ assert process.success },
93+
{ assert snapshot(
94+
path(process.out.detail_metrics[0][1]).readLines()[5..7].join('\n').md5(),
95+
path(process.out.summary_metrics[0][1]).readLines()[5..7].join('\n').md5(),
96+
process.out.versions
97+
).match() }
98+
)
99+
}
100+
}
101+
102+
test("homo_sapiens - vcf_gz - intervals - stub"){
103+
104+
options "-stub"
105+
106+
when {
107+
process {
108+
"""
109+
input[0] = [
110+
[ id:'test' ],
111+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
112+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
113+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list', checkIfExists: true ),
114+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
115+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
116+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
117+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
118+
]
119+
"""
120+
}
121+
}
122+
123+
then {
124+
assertAll(
125+
{ assert process.success },
126+
{ assert snapshot(process.out).match() }
127+
)
128+
}
129+
}
130+
}

0 commit comments

Comments
 (0)