forked from PelechanoLab/TIFseq2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalign.sh
More file actions
executable file
·67 lines (59 loc) · 3.2 KB
/
align.sh
File metadata and controls
executable file
·67 lines (59 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
module load bioinfo-tools
module load star/2.5.3a
while getopts "hR:A:I:O:p:j:m:" opt; do
case $opt in
h)
echo "Usage: STAR_align.sh -R <reference genome dir> -A <annotation gtf> -I <input fastq dir> -O <output fastq dir> -p <thread number> -j <max intron> -m <max mate distance>"
exit 0
;;
R)genomedir=$OPTARG;;
A)annotation=$OPTARG;;
I)indir=$OPTARG;;
O)outdir=$OPTARG;;
p)thread=$OPTARG;;
j)intron=$OPTARG;;
m)mate=$OPTARG;;
esac
done
echo "Read parameters... DONE"
if [ -z "$thread" ]; then thread=1; fi
if [ -z "$intron" ]; then intron=0; fi
if [ -z "$mate" ]; then mate=0; fi
echo "Referenece: $genomedir"
echo "Annotation file: $annotation"
echo "Input directory: $indir"
echo "Output directory: $outdir"
echo "#thread for alignment: $thread"
echo "maximum intron length: $intron; maximum mate pair distance: $mate"
if [ ! -d "$indir" ];then
echo "Input directory does not exist!"
exit 0
fi
if [ ! -d "$outdir" ];then
echo "Output directory does not exist! Generate directory."
mkdir $outdir
fi
indir=${indir%/}
outdir=${outdir%/}
pyenv local anaconda2-2018.12
for cut5 in $indir/*5cut*.fastq.gz; do
cut3=${cut5/_5cut/_3cut}
echo "5'end file: $cut5"
echo "3'end file: $cut3"
file=${cut5##*/}
IFS=_
name=($file)
echo "${name[0]}"
unset IFS
# star --runThreadN $thread --runMode alignReads --genomeDir $genomedir --sjdbGTFfile $annotation --readFilesIn $cut5 $cut3 --alignIntronMax $intron --alignMatesGapMax $mate --alignEndsType Extend5pOfReads12 --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outFileNamePrefix $outdir/${name[0]}.
star --runThreadN $thread --runMode alignReads --genomeDir $genomedir --sjdbGTFfile $annotation --readFilesIn $cut5 --alignIntronMax $intron --alignEndsType Extend5pOfRead1 --alignSJoverhangMin 10 --outFilterMismatchNmax 5 --readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMattributes NH HI NM AS --outSAMattrRGline ID:${name[0]}_5end --outFileNamePrefix $outdir/${name[0]}_5end.
samtools sort -n -o $outdir/${name[0]}_5end.name.bam $outdir/${name[0]}_5end.Aligned.out.bam
star --runThreadN $thread --runMode alignReads --genomeDir $genomedir --sjdbGTFfile $annotation --readFilesIn $cut3 --alignIntronMax $intron --alignEndsType Extend5pOfRead1 --alignSJoverhangMin 10 --outFilterMismatchNmax 5 --readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMattributes NH HI NM AS --outSAMattrRGline ID:${name[0]}_3end --outFileNamePrefix $outdir/${name[0]}_3end.
samtools sort -@ $thread -n -o $outdir/${name[0]}_3end.name.bam $outdir/${name[0]}_3end.Aligned.out.bam
samtools merge -@ $thread -f -n $outdir/${name[0]}.name.bam $outdir/${name[0]}_5end.name.bam $outdir/${name[0]}_3end.name.bam
rm $outdir/${name[0]}_*end.name.bam
python ~/TIFseq2/combine_ends.py $outdir/${name[0]}.name.bam
# umi_tools dedup -I $outdir/${name[0]}*_sorted.bam -S $outdir/${name[0]}_unique_UMI.bam --method cluster -L $outdir/${name[0]}_unique_UMI.log --paired --output-stats $outdir/${name[0]}_unique_UMI.stats
python ~/TIFseq2/dedup.py -I $outdir/${name[0]}*_sorted.bam -S $outdir/${name[0]}_unique_UMI.bam --method=cluster -L $outdir/${name[0]}_unique_UMI.log --paired --spliced-is-unique
done