diff --git a/modules/nf-core/clusty/environment.yml b/modules/nf-core/clusty/environment.yml new file mode 100644 index 000000000000..afbbde862837 --- /dev/null +++ b/modules/nf-core/clusty/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::clusty=1.2.2" diff --git a/modules/nf-core/clusty/main.nf b/modules/nf-core/clusty/main.nf new file mode 100644 index 000000000000..8d819443e628 --- /dev/null +++ b/modules/nf-core/clusty/main.nf @@ -0,0 +1,50 @@ + +process CLUSTY { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/clusty:1.2.2--h9ee0642_0': + 'biocontainers/clusty:1.2.2--h9ee0642_0' }" + + input: + tuple val(meta), path(distances) + tuple val(meta2), path(objects) + + output: + tuple val(meta), path("*.tsv"), emit: assignments + tuple val("${task.process}"), val('clusty'), eval('echo $(clusty --version 2>&1)'), topic: versions, emit: versions_clusty + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def objects_arg = objects ? "--objects-file $objects" : "" + + if ("${distances}" == "${prefix}.tsv") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + clusty \\ + $args \\ + -t $task.cpus \\ + ${objects_arg} \\ + ${distances} \\ + ${prefix}.tsv + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def objects_arg = objects ? "--objects-file $objects" : "" + + if ("${distances}" == "${prefix}.tsv") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + echo $args + echo ${objects_arg} + touch ${prefix}.tsv + """ +} diff --git a/modules/nf-core/clusty/meta.yml b/modules/nf-core/clusty/meta.yml new file mode 100644 index 000000000000..dadd04d488ad --- /dev/null +++ b/modules/nf-core/clusty/meta.yml @@ -0,0 +1,83 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "clusty" +description: Clusty is a tool for large-scale clustering using sparse distance + matrices, suitable for datasets with millions of objects. +keywords: + - cluster + - network + - contig + - scaffold + - alignment + - protein +tools: + - "clusty": + description: "Clusty is a tool for large-scale data clustering." + homepage: "https://github.com/refresh-bio/clusty" + documentation: "https://github.com/refresh-bio/clusty" + tool_dev_url: "https://github.com/refresh-bio/clusty" + doi: "10.1038/s41592-025-02701-7" + licence: ["GPL v3-or-later"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - distances: + type: file + description: pairwise distances file (e.g., TSV format) + pattern: "*.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - objects: + type: file + description: Optional TSV file containing object identifiers + pattern: "*.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + +output: + assignments: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.tsv": + type: file + description: TSV file with cluster assignments + pattern: "*.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + versions_clusty: + - - ${task.process}: + type: string + description: The name of the process + - clusty: + type: string + description: The name of the tool + - "echo $(clusty --version 2>&1": + type: string + description: The version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - clusty: + type: string + description: The name of the tool + - "echo $(clusty --version 2>&1": + type: string + description: The version of the tool +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/clusty/tests/main.nf.test b/modules/nf-core/clusty/tests/main.nf.test new file mode 100644 index 000000000000..5301da3226e4 --- /dev/null +++ b/modules/nf-core/clusty/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + name "Test Process CLUSTY" + script "../main.nf" + process "CLUSTY" + + tag "modules" + tag "modules_nfcore" + tag "clusty" + + test("generic - only distances") { + + when { + params { + clusty_args = '--id-cols id1 id2 --distance-col ani --similarity --min ani 0.70' + } + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'generic/tsv/ani.tsv', checkIfExists: true), + ] + input[1] = [[:], []] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + } + + test("generic - distances - objects") { + + when { + params { + clusty_args = '--id-cols name1 name2 --distance-col ani --similarity --min ani 0.70' + } + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'generic/tsv/ani.tsv', checkIfExists: true), + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'generic/txt/ani_ids.txt', checkIfExists: true), + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + } + + test("generic - only distances - stub") { + + options "-stub" + + when { + params { + clusty_args = '--id-cols name1 name2 --distance-col ani --similarity --min ani 0.70' + } + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'generic/tsv/ani.tsv', checkIfExists: true), + ] + input[1] = [[:], []] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/clusty/tests/main.nf.test.snap b/modules/nf-core/clusty/tests/main.nf.test.snap new file mode 100644 index 000000000000..2a389ddcabff --- /dev/null +++ b/modules/nf-core/clusty/tests/main.nf.test.snap @@ -0,0 +1,125 @@ +{ + "generic - distances - objects": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,cea71211069e4aeed9e021ebebc038e7" + ] + ], + "1": [ + [ + "CLUSTY", + "clusty", + "1.2.2" + ] + ], + "assignments": [ + [ + { + "id": "test" + }, + "test.tsv:md5,cea71211069e4aeed9e021ebebc038e7" + ] + ], + "versions_clusty": [ + [ + "CLUSTY", + "clusty", + "1.2.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T12:35:44.503401" + }, + "generic - only distances - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "CLUSTY", + "clusty", + "1.2.2" + ] + ], + "assignments": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_clusty": [ + [ + "CLUSTY", + "clusty", + "1.2.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T12:36:00.849772" + }, + "generic - only distances": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,9c4d0da996fe7c4579b473f4148623ec" + ] + ], + "1": [ + [ + "CLUSTY", + "clusty", + "1.2.2" + ] + ], + "assignments": [ + [ + { + "id": "test" + }, + "test.tsv:md5,9c4d0da996fe7c4579b473f4148623ec" + ] + ], + "versions_clusty": [ + [ + "CLUSTY", + "clusty", + "1.2.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T12:35:28.037382" + } +} \ No newline at end of file diff --git a/modules/nf-core/clusty/tests/nextflow.config b/modules/nf-core/clusty/tests/nextflow.config new file mode 100644 index 000000000000..2fa76f4f9668 --- /dev/null +++ b/modules/nf-core/clusty/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CLUSTY { + ext.args = params.clusty_args + } +}