Skip to content

Commit 2d6be5b

Browse files
prototaxitesZachary Foster
authored andcommitted
Clean up hifiasm module + add bin file input (nf-core#7802)
* Tidy hifiasm model + update tests * Update nf.test * Fix linting * Add fasta and PAF outputs to hifiasm; combine all bin files into a single output channel. Update documentation * Improve descriptiveness of output channel names
1 parent 2ada133 commit 2d6be5b

5 files changed

Lines changed: 504 additions & 263 deletions

File tree

modules/nf-core/hifiasm/main.nf

Lines changed: 76 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -11,114 +11,100 @@ process HIFIASM {
1111
tuple val(meta) , path(long_reads) , path(ul_reads)
1212
tuple val(meta1), path(paternal_kmer_dump), path(maternal_kmer_dump)
1313
tuple val(meta2), path(hic_read1) , path(hic_read2)
14+
tuple val(meta3), path(bin_files)
1415

1516
output:
16-
tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs
17-
tuple val(meta), path("*.ec.bin") , emit: corrected_reads
18-
tuple val(meta), path("*.ovlp.source.bin") , emit: source_overlaps
19-
tuple val(meta), path("*.ovlp.reverse.bin"), emit: reverse_overlaps
20-
tuple val(meta), path("*.bp.p_ctg.gfa") , emit: processed_contigs, optional: true
21-
tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true
22-
tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true
23-
tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true
24-
tuple val(meta), path("*.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true
25-
tuple val(meta), path("*.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true
26-
tuple val(meta), path("*.log") , emit: log
27-
path "versions.yml" , emit: versions
17+
tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs
18+
tuple val(meta), path("*.bin") , emit: bin_files , optional: true
19+
tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true
20+
tuple val(meta), path("${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa"), emit: primary_contigs , optional: true
21+
tuple val(meta), path("${prefix}.{a_ctg,hic.a_ctg}.gfa") , emit: alternate_contigs, optional: true
22+
tuple val(meta), path("${prefix}.*.hap1.p_ctg.gfa") , emit: hap1_contigs , optional: true
23+
tuple val(meta), path("${prefix}.*.hap2.p_ctg.gfa") , emit: hap2_contigs , optional: true
24+
tuple val(meta), path("*.ec.fa.gz") , emit: corrected_reads , optional: true
25+
tuple val(meta), path("*.ovlp.paf.gz") , emit: read_overlaps , optional: true
26+
tuple val(meta), path("${prefix}.stderr.log") , emit: log
27+
path "versions.yml" , emit: versions
2828

2929
when:
3030
task.ext.when == null || task.ext.when
3131

3232
script:
3333
def args = task.ext.args ?: ''
34-
def prefix = task.ext.prefix ?: "${meta.id}"
34+
prefix = task.ext.prefix ?: "${meta.id}"
3535

3636
def long_reads_sorted = long_reads instanceof List ? long_reads.sort{ it.name } : long_reads
3737
def ul_reads_sorted = ul_reads instanceof List ? ul_reads.sort{ it.name } : ul_reads
3838
def ultralong = ul_reads ? "--ul ${ul_reads_sorted}" : ""
3939

40-
if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) {
41-
error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time"
42-
} else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) {
43-
error "Hifiasm Trio-binning requires maternal data"
44-
} else if (!(paternal_kmer_dump) && (maternal_kmer_dump)) {
45-
error "Hifiasm Trio-binning requires paternal data"
46-
} else if ((paternal_kmer_dump) && (maternal_kmer_dump)) {
47-
"""
48-
hifiasm \\
49-
$args \\
50-
-o ${prefix}.asm \\
51-
-t $task.cpus \\
52-
-1 $paternal_kmer_dump \\
53-
-2 $maternal_kmer_dump \\
54-
$ultralong \\
55-
$long_reads_sorted \\
56-
2> >( tee ${prefix}.stderr.log >&2 )
40+
if([paternal_kmer_dump, maternal_kmer_dump].any() && [hic_read1, hic_read2].any()) {
41+
log.error("ERROR: hifiasm trio binning mode and Hi-C phasing can not be used at the same time.")
42+
}
5743

44+
def input_trio = ""
45+
if([paternal_kmer_dump, maternal_kmer_dump].any()) {
46+
if(![paternal_kmer_dump, maternal_kmer_dump].every()) {
47+
log.error("ERROR: Either the maternal or paternal kmer dump is missing!")
48+
} else {
49+
input_trio = "-1 ${paternal_kmer_dump} -2 ${maternal_kmer_dump}"
50+
}
51+
}
5852

59-
cat <<-END_VERSIONS > versions.yml
60-
"${task.process}":
61-
hifiasm: \$(hifiasm --version 2>&1)
62-
END_VERSIONS
63-
"""
64-
} else if ((hic_read1) && !(hic_read2)) {
65-
error "Hifiasm Hi-C integrated requires paired-end data (only R1 specified here)"
66-
} else if (!(hic_read1) && (hic_read2)) {
67-
error "Hifiasm Hi-C integrated requires paired-end data (only R2 specified here)"
68-
} else if ((hic_read1) && (hic_read2)) {
69-
"""
70-
hifiasm \\
71-
$args \\
72-
-o ${prefix}.asm \\
73-
-t $task.cpus \\
74-
--h1 $hic_read1 \\
75-
--h2 $hic_read2 \\
76-
$ultralong \\
77-
$long_reads \\
78-
2> >( tee ${prefix}.stderr.log >&2 )
53+
def input_hic = ""
54+
if([hic_read1, hic_read2].any()) {
55+
if(![hic_read1, hic_read2].every()) {
56+
log.error("ERROR: Either the forward or reverse Hi-C reads are missing!")
57+
} else {
58+
input_hic = "--h1 ${hic_read1} --h2 ${hic_read2}"
59+
}
60+
}
61+
"""
62+
hifiasm \\
63+
$args \\
64+
-t ${task.cpus} \\
65+
${input_trio} \\
66+
${input_hic} \\
67+
${ultralong} \\
68+
-o ${prefix} \\
69+
${long_reads_sorted} \\
70+
2> >( tee ${prefix}.stderr.log >&2 )
7971
72+
if [ -f ${prefix}.ec.fa ]; then
73+
gzip ${prefix}.ec.fa
74+
fi
8075
81-
cat <<-END_VERSIONS > versions.yml
82-
"${task.process}":
83-
hifiasm: \$(hifiasm --version 2>&1)
84-
END_VERSIONS
85-
"""
86-
} else { // Phasing with Hi-C data is not supported yet
87-
"""
88-
hifiasm \\
89-
$args \\
90-
-o ${prefix}.asm \\
91-
-t $task.cpus \\
92-
$ultralong \\
93-
$long_reads \\
94-
2> >( tee ${prefix}.stderr.log >&2 )
76+
if [ -f ${prefix}.ovlp.paf ]; then
77+
gzip ${prefix}.ovlp.paf
78+
fi
9579
96-
cat <<-END_VERSIONS > versions.yml
97-
"${task.process}":
98-
hifiasm: \$(hifiasm --version 2>&1)
99-
END_VERSIONS
100-
"""
101-
}
102-
stub:
103-
def args = task.ext.args ?: ''
104-
def prefix = task.ext.prefix ?: "${meta.id}"
105-
"""
106-
touch ${prefix}.asm.r_utg.gfa
107-
touch ${prefix}.asm.ec.bin
108-
touch ${prefix}.asm.ovlp.source.bin
109-
touch ${prefix}.asm.ovlp.reverse.bin
110-
touch ${prefix}.asm.bp.p_ctg.gfa
111-
touch ${prefix}.asm.p_utg.gfa
112-
touch ${prefix}.asm.p_ctg.gfa
113-
touch ${prefix}.asm.a_ctg.gfa
114-
touch ${prefix}.asm.hap1.p_ctg.gfa
115-
touch ${prefix}.asm.hap2.p_ctg.gfa
116-
touch ${prefix}.stderr.log
80+
cat <<-END_VERSIONS > versions.yml
81+
"${task.process}":
82+
hifiasm: \$(hifiasm --version 2>&1)
83+
END_VERSIONS
84+
"""
11785

118-
cat <<-END_VERSIONS > versions.yml
119-
"${task.process}":
120-
hifiasm: \$(hifiasm --version 2>&1)
121-
END_VERSIONS
122-
"""
86+
stub:
87+
prefix = task.ext.prefix ?: "${meta.id}"
88+
"""
89+
touch ${prefix}.r_utg.gfa
90+
touch ${prefix}.ec.bin
91+
touch ${prefix}.ovlp.source.bin
92+
touch ${prefix}.ovlp.reverse.bin
93+
touch ${prefix}.hic.tlb.bin
94+
touch ${prefix}.hic.lk.bin
95+
touch ${prefix}.bp.p_ctg.gfa
96+
touch ${prefix}.p_utg.gfa
97+
touch ${prefix}.p_ctg.gfa
98+
touch ${prefix}.a_ctg.gfa
99+
touch ${prefix}.hap1.p_ctg.gfa
100+
touch ${prefix}.hap2.p_ctg.gfa
101+
echo "" | gzip > ${prefix}.ec.fa.gz
102+
echo "" | gzip > ${prefix}.ovlp.paf.gz
103+
touch ${prefix}.stderr.log
123104
105+
cat <<-END_VERSIONS > versions.yml
106+
"${task.process}":
107+
hifiasm: \$(hifiasm --version 2>&1)
108+
END_VERSIONS
109+
"""
124110
}

0 commit comments

Comments
 (0)