diff --git a/modules/nf-core/clair3/environment.yml b/modules/nf-core/clair3/environment.yml new file mode 100644 index 000000000000..e1a7b8468046 --- /dev/null +++ b/modules/nf-core/clair3/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::clair3=1.0.10" diff --git a/modules/nf-core/clair3/main.nf b/modules/nf-core/clair3/main.nf new file mode 100644 index 000000000000..d893ae133de4 --- /dev/null +++ b/modules/nf-core/clair3/main.nf @@ -0,0 +1,58 @@ +process CLAIR3 { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/clair3:1.0.10--py39hd649744_1': + 'biocontainers/clair3:1.0.10--py39hd649744_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(model), val(platform) + tuple val(meta2), path(reference) + tuple val(meta3), path(index) + + output: + tuple val(meta), path("*merge_output.vcf.gz"), emit: vcf + tuple val(meta), path("*merge_output.vcf.gz.tbi"), emit: tbi + tuple val(meta), path("*phased_merge_output.vcf.gz"), emit: phased_vcf, optional: true + tuple val(meta), path("*phased_merge_output.vcf.gz.tbi"), emit: phased_tbi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + run_clair3.sh \\ + --bam_fn=$bam \\ + --ref_fn=$reference \\ + --threads=$task.cpus \\ + --output=. \\ + --platform=$platform \\ + --model=$model \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clair3: \$(run_clair3.sh --version |& sed '1!d ; s/Clair3 v//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.phased_merge_output.vcf.gz + touch ${prefix}.phased_merge_output.vcf.gz.tbi + echo "" | gzip > ${prefix}.merge_output.vcf.gz + touch ${prefix}.merge_output.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clair3: \$(run_clair3.sh --version |& sed '1!d ; s/Clair3 v//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/clair3/meta.yml b/modules/nf-core/clair3/meta.yml new file mode 100644 index 000000000000..bc12fd627f06 --- /dev/null +++ b/modules/nf-core/clair3/meta.yml @@ -0,0 +1,119 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "clair3" +description: Clair3 is a germline small variant caller for long-reads +keywords: +- germline +- variant +- Indel +- SNV +tools: +- "clair3": + description: "Clair3 is a small variant caller for long-reads. Compare to PEPPER + (r0.4), Clair3 (v0.1) shows a better SNP F1-score with ≤30-fold of ONT data + (precisionFDA Truth Challenge V2), and a better Indel F1-score, while runs generally + four times faster. Clair3 makes the best of both worlds of using pileup or full-alignment + as input for deep-learning based long-read small variant calling. Clair3 is + simple and modular for easy deployment and integration." + homepage: "https://github.com/HKU-BAL/Clair3" + documentation: "https://github.com/HKU-BAL/Clair3" + tool_dev_url: "https://github.com/HKU-BAL/Clair3" + doi: "10.1038/s43588-022-00387-x" + licence: ['BSD-3-clause'] + identifier: biotools:clair3 + +input: +- - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - model: + type: directory + description: collection of files used in a trained Clair3 model + - platform: + type: string + description: val in ['hifi','ont', 'ilmn'] to indicate pacbio, ONT, or illumina + respectively +- - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - reference: + type: file + description: reference fasta file + pattern: "*.fasta" +- - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - index: + type: file + description: reference index file + pattern: "*.fai" +output: +- vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.{vcf,vcf.gz}" + - '*merge_output.vcf.gz': + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.{vcf,vcf.gz}" +- tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - '*merge_output.vcf.gz.tbi': + type: file + description: index for vcf files + pattern: "*.{vcf.tbi,vcf.tbi.gz}" +- phased_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - '*phased_merge_output.vcf.gz': + type: file + description: phased vcf + pattern: "*.{vcf,vcf.gz}" +- phased_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - '*phased_merge_output.vcf.gz.tbi': + type: file + description: index for vcf files + pattern: "*.{vcf.tbi,vcf.tbi.gz}" +- versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: +- "@robert-a-forsyth" +maintainers: +- "@robert-a-forsyth" diff --git a/modules/nf-core/clair3/tests/main.nf.test b/modules/nf-core/clair3/tests/main.nf.test new file mode 100644 index 000000000000..a69d417b65b7 --- /dev/null +++ b/modules/nf-core/clair3/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_process { + + name "Test Process CLAIR3" + script "../main.nf" + process "CLAIR3" + + tag "modules" + tag "modules_nfcore" + tag "clair3" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'generic/models/clair3.hifi.tar.gz', checkIfExists: true) + ] + """ + } + } + } + test("sarscov2 - bam") { + + when { + process { + """ + def model_path = UNTAR.out.untar + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ]) + .join(UNTAR.out.untar).combine(Channel.of(['hifi'])) + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + input[2] = [ + [ id: 'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.versions, + process.out.phased_vcf.collect { file(it[1]).getName() }, + process.out.phased_tbi.collect { file(it[1]).getName() }).match()} + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + def model_path = UNTAR.out.untar + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ]) + .join(UNTAR.out.untar).combine(Channel.of(['hifi'])) + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + input[2] = [ + [ id: 'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.tbi, + process.out.phased_vcf, + process.out.phased_tbi, + process.out.versions, + ).match()} + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/clair3/tests/main.nf.test.snap b/modules/nf-core/clair3/tests/main.nf.test.snap new file mode 100644 index 000000000000..fe9e76f80c0e --- /dev/null +++ b/modules/nf-core/clair3/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "test.merge_output.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.phased_merge_output.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + [ + [ + { + "id": "test" + }, + [ + "test.merge_output.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.phased_merge_output.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + [ + [ + { + "id": "test" + }, + "test.phased_merge_output.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + { + "id": "test" + }, + "test.phased_merge_output.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,10928c13418eced076964d86249aeaf8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-17T09:56:09.775735" + }, + "sarscov2 - bam": { + "content": [ + [ + "merge_output.vcf.gz" + ], + [ + "merge_output.vcf.gz.tbi" + ], + [ + "versions.yml:md5,10928c13418eced076964d86249aeaf8" + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-14T17:16:02.057639" + } +} \ No newline at end of file