nf-core · nschan · Apr 8, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 4, 2025
diff --git a/modules/nf-core/ragtag/patch/environment.yml b/modules/nf-core/ragtag/patch/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::ragtag=2.1.0"
diff --git a/modules/nf-core/ragtag/patch/main.nf b/modules/nf-core/ragtag/patch/main.nf
@@ -0,0 +1,106 @@
+process RAGTAG_PATCH {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0'
+        : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}"
+
+    input:
+    tuple val(meta), path(target, name: 'target/*') 
+    tuple val(meta2), path(query, name: 'query/*')
+    tuple val(meta3), path(exclude)
+    tuple val(meta4), path(skip)
+
+    output:
+    tuple val(meta), path("*.patch.fasta"),         emit: patch_fasta
+    tuple val(meta), path("*.patch.agp"),           emit: patch_agp
+    tuple val(meta), path("*.comps.fasta"),         emit: patch_components_fasta
+    tuple val(meta), path("*.ragtag.patch.asm.*"),  emit: assembly_alignments,      optional: true
+    tuple val(meta), path("*.ctg.agp"),             emit: target_splits_agp
+    tuple val(meta), path("*.ctg.fasta"),           emit: target_splits_fasta
+    tuple val(meta), path("*.rename.agp"),          emit: qry_rename_agp,           optional: true
+    tuple val(meta), path("*.rename.fasta"),        emit: qry_rename_fasta,         optional: true
+    tuple val(meta), path("*.patch.err"),           emit: stderr
+    path "versions.yml",                            emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def args = task.ext.args ?: ""
+    def arg_exclude = exclude ? "-e ${exclude}" : ""
+    def arg_skip = skip ? "-j ${skip}" : ""
+    """
+    if [[ ${target} == *.gz ]]
+    then
+        zcat ${target} > target.fa
+    else
+        cp ${target} target.fa
+    fi
+
+    if [[ ${query} == *.gz ]]
+    then
+        zcat ${query} > query.fa
+    else
+        cp ${query} query.fa
+    fi
+
+
+    ragtag.py patch target.fa query.fa \\
+        -o "${prefix}" \\
+        -t ${task.cpus} \\
+        ${arg_exclude} \\
+        ${arg_skip} \\
+        ${args} \\
+        2> >( tee ${prefix}.stderr.log >&2 ) \\
+        | tee ${prefix}.stdout.log
+
+    mv ${prefix}/ragtag.patch.agp ${prefix}.patch.agp
+    mv ${prefix}/ragtag.patch.fasta ${prefix}.patch.fasta
+    mv ${prefix}/ragtag.patch.comps.fasta ${prefix}.comps.fasta
+    mv ${prefix}/ragtag.patch.ctg.agp ${prefix}.ctg.agp
+    mv ${prefix}/ragtag.patch.ctg.fasta ${prefix}.ctg.fasta
+    if [ -f ${prefix}/ragtag.patch.rename.agp ]; then
+        mv ${prefix}/ragtag.patch.rename.agp ${prefix}.rename.agp
+    fi
+
+    if [ -f ${prefix}/ragtag.patch.rename.fasta ]; then
+        mv ${prefix}/ragtag.patch.rename.fasta ${prefix}.rename.fasta
+    fi
+    mv ${prefix}/ragtag.patch.err ${prefix}.patch.err
+    # Move the assembly files from prefix folder, and add prefix
+    for alignment_file in \$(ls ${prefix}/ragtag.patch.asm.*);
+        do 
+            mv "\$alignment_file" "\${alignment_file/${prefix}\\//${prefix}_}"
+        done
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ragtag: \$(echo \$(ragtag.py -v | sed 's/v//'))
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def _args = task.ext.args ?: ""
+    def _arg_exclude = exclude ? "-e ${exclude}" : ""
+    def _arg_skip = skip ? "-j ${skip}" : ""
+    """
+    touch ${prefix}.patch.agp
+    touch ${prefix}.patch.fasta
+    touch ${prefix}.comps.fasta
+    touch ${prefix}.ctg.agp
+    touch ${prefix}.ctg.fasta
+    touch ${prefix}.rename.agp
+    touch ${prefix}.rename.fasta
+    touch ${prefix}.ragtag.patch.asm.1
+    touch ${prefix}.patch.err
+
+    cat <<-END_VERSIONS > versions.yml
+        ragtag: \$(echo \$(ragtag.py -v | sed 's/v//'))
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/ragtag/patch/meta.yml b/modules/nf-core/ragtag/patch/meta.yml
@@ -0,0 +1,156 @@
+name: "ragtag_patch"
+description: "Homology-based assembly patching: Make continuous joins and fill gaps
+  in 'target.fa' using sequences from 'query.fa'"
+
+keywords:
+  - assembly
+  - consensus
+  - ragtag
+  - patch
+tools:
+  - "ragtag":
+      description: "Fast reference-guided genome assembly scaffolding"
+      homepage: "https://github.com/malonge/RagTag/wiki"
+      documentation: "https://github.com/malonge/RagTag/wiki"
+      tool_dev_url: "https://github.com/malonge/RagTag"
+      doi: "10.1186/s13059-022-02823-7"
+      licence: ["MIT"]
+      identifier: biotools:ragtag
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - target:
+        type: file
+        description: Target assembly
+        pattern: "*.{fasta,fasta.gz}"
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - query:
+        type: file
+        description: Query assembly
+        pattern: "*.{fasta,fasta.gz}"
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - exclude:
+        type: file
+        description: list of target sequences to ignore
+        pattern: "*.txt"
+  - - meta4:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - skip:
+        type: file
+        description: list of query sequences to ignore
+        pattern: "*.txt"
+output:
+  - patch_fasta:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.patch.fasta":
+          type: file
+          description: FASTA file containing the patched assembly
+          pattern: "*.patch.fasta"
+  - patch_agp:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.patch.agp":
+          type: file
+          description: AGP file defining how ragtag.patch.fasta is built
+          pattern: "*.patch.agp"
+  - patch_components_fasta:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.comps.fasta":
+          type: file
+          description: The split target assembly and the renamed query assembly combined
+            into one FASTA file. This file contains all components in ragtag.patch.agp
+          pattern: "*.comps.fasta"
+  - assembly_alignments:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.ragtag.patch.asm.*":
+          type: file
+          description: Assembly alignment files
+          pattern: "*.ragtag.patch.asm.*"
+  - target_splits_agp:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.ctg.agp":
+          type: file
+          description: An AGP file defining how the target assembly was split at gaps
+          pattern: "*.ctg.agp"
+  - target_splits_fasta:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.ctg.fasta":
+          type: file
+          description: FASTA file containing the target assembly split at gaps
+          pattern: "*.ctg.fasta"
+  - qry_rename_agp:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.rename.agp":
+          type: file
+          description: An AGP file defining the new names for query sequences
+          pattern: "*.rename.agp"
+  - qry_rename_fasta:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.rename.fasta":
+          type: file
+          description: A FASTA file with the original query sequence, but with new names
+          pattern: "*.rename.fasta"
+  - stderr:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.patch.err":
+          type: file
+          description: Standard error logging for all external RagTag commands
+          pattern: "*.patch.err"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@nschan"
+maintainers:
+  - "@nschan"
diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test b/modules/nf-core/ragtag/patch/tests/main.nf.test
@@ -0,0 +1,89 @@
+nextflow_process {
+
+    name "Test Process RAGTAG_PATCH"
+    script "../main.nf"
+    process "RAGTAG_PATCH"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "ragtag"
+    tag "ragtag/patch"
+
+
+test("A. thaliana Col-0 test data - ragtag - patch") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+                ]
+                input[2] = [
+                    [],
+                    []
+                ]
+                input[3] = [
+                    [],
+                    []
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.patch_fasta,
+                    process.out.patch_agp,
+                    process.out.patch_components_fasta,
+                    process.out.target_splits_agp,
+                    process.out.target_splits_fasta,
+                    process.out.versions
+                    ).match()
+                },
+            )
+        }
+
+    }
+    test("A. thaliana Col-0 test data - ragtag - patch - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+"""
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+                ]
+                input[2] = [
+                    [],
+                    []
+                ]
+                input[3] = [
+                    [],
+                    []
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+}