diff --git a/workflows/SNP_pipe.sh b/workflows/SNP_pipe.sh new file mode 100644 index 0000000000000000000000000000000000000000..d72565cf66a65ea2354ddf6ca916eb4e68dadc4d --- /dev/null +++ b/workflows/SNP_pipe.sh @@ -0,0 +1,411 @@ +#!/bin/bash + +#. /etc/profile.d/modules.sh + +export PATH=$PATH:/vol/biotools/bin +export MANPATH=$MANPATH:/vol/biotools/man +export PATH=$PATH:/vol/software/bin + + +# install important R package - lummerland cluster does not have it ... +#echo "install.packages(\"gplots\", repos=\"https://cran.rstudio.com\")" | R --vanilla --no-save +#R CMD INSTALL /vol/agrensing/tools/R_packages/bitops_1.0-6.tar.gz +#R CMD INSTALL /vol/agrensing/tools/R_packages/caTools_1.17.1.1.tar.gz +#R CMD INSTALL /vol/agrensing/tools/R_packages/gtools_3.8.1.tar.gz +#R CMD INSTALL /vol/agrensing/tools/R_packages/gdata_2.18.0.tar.gz +#R CMD INSTALL /vol/agrensing/tools/R_packages/gplots_3.0.1.tar.gz + +#functions + +startle () +{ + number_row_reads=`wc -l $fw | awk '{print $1/4}'` + echo "## Start log ##" + echo "Number of row reads: "$number_row_reads +} + +trimmo () + { + # Remove leading low quality or N bases (below quality 3) (LEADING:3) + # Remove trailing low quality or N bases (below quality 3) (TRAILING:3) + # Scan the read with a 4-base wide sliding window, cutting when the average quality per base drops below 15 (SLIDINGWINDOW:4:15) + # HEADCROP: Cut the specified Number of bases from the start of the read + # MINLEN was first set to 50 bp. But there are libraries with read start length of 50 bp. + + if [ -e $rv ]; then + java $java_ram -jar /nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/trimmomatic-0.39.jar PE -threads $cpu -phred33 -summary $trimmo_out".trimmo.stats" \ + $fw $rv $trimmo_out".PE.fw" $trimmo_out".SE.fw" $trimmo_out".PE.rv" $trimmo_out".SE.rv" \ + ILLUMINACLIP:/nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/adapters/TruSeq-PE_all.fna:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 HEADCROP:10 MINLEN:30 + + # stats + echo "## Trimmo statistics ##" + printf "trim\t"$trimmo_out".PE.fw\t"; grep -c "^\+$" $trimmo_out".PE.fw" + # fastqc + /nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc -f fastq -t $cpu -o $output_dir"/02_trimmomatic/"$species"/"$prefix"/fastqc" $trimmo_out".PE."* + fi + if [ ! -e $rv ]; then + java $java_ram -jar /nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/trimmomatic-0.39.jar SE -threads $cpu -phred33 -summary $trimmo_out".trimmo.stats" \ + $fw $trimmo_out".SE" \ + ILLUMINACLIP:/nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/adapters/TruSeq-PE_all.fna:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 HEADCROP:10 MINLEN:30 + + # stats + echo "## Trimmo statistics ##" + # printf "trim\t"$trimmo_out".SE\t"; grep -c "^\+$" $trimmo_out".SE" + printf "trim\t"$trimmo_out".SE\t"; wc -l $trimmo_out".SE" | awk '{print $1/4}' + # fastqc + /nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc -f fastq -t $cpu -o $output_dir"/02_trimmomatic/"$species"/"$prefix"/fastqc" $trimmo_out".SE" + fi + + } + +polyA () + { + # min_len was first set to 50 bp. But there are libraries with read start length of 50 bp. + # If you specify any statistic option, no other ouput will be generated. To preprocess data, do not specify a statistics option. + # -stats_dupl -stats_all + + inpA=$polyA_out".polyA.passed" + infA=$polyA_out".polyA.failed" + + if [ -e $rv ]; then + /nfs/agrensing/tools/prinseq/prinseq-lite-0.20.4/./prinseq-lite.pl -verbose -no_qual_header -fastq $trimmo_out".PE.fw" -fastq2 $trimmo_out".PE.rv" \ + -trim_tail_left 5 -trim_tail_right 5 -min_len 30 -out_good $inpA -out_bad $infA \ + -out_format 3 -log $polyA_out"_polyA.log" + # stats + echo "## PolyA statistics ##" + printf "polyA\t"$inpA"_1.fastq\t"; wc -l $inpA"_1.fastq" | awk '{print $1/4}' + #fastqc + /nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc -f fastq -t $cpu -o $output_dir"/03_poly-A/"$species"/"$prefix"/fastqc" $inpA"_"[12]".fastq" + fi + if [ ! -e $rv ]; then + /nfs/agrensing/tools/prinseq/prinseq-lite-0.20.4/./prinseq-lite.pl -verbose -no_qual_header -fastq $trimmo_out".SE" \ + -trim_tail_left 5 -trim_tail_right 5 -min_len 30 -out_good $inpA -out_bad $infA \ + -out_format 3 -log $polyA_out"_polyA.log" + + # stats + echo "## PolyA statistics ##" + printf "polyA\t"$inpA".fastq\t"; grep -c "^\+$" $inpA".fastq" + #fastqc + /nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc -f fastq -t $cpu -o $output_dir"/03_poly-A/"$species"/"$prefix"/fastqc" $inpA".fastq" + fi + } + +duple () +{ + # number of 100% identical duplicat reads. {29} because of min_len 30 bp. + echo "Duplication:" + for IN in $polyA_out".polyA.passed"*; do printf $IN"\t"; num_dup=`awk '$1~/[NATCG]{29}/ {foo[$1]++} END {for(o in foo){print o"\t"foo[o]}}' $IN | wc -l`; \ + printf $num_dup"\t"; printf %.5f\\n "$(( 100000 * num_dup / number_row_reads ))e-5" ; done +} + +mappi() +{ + if [ -e $rv ]; then + /homes/fhaas/src/gsnap -D $db_path_V3 -d $db_name_V3 -t $cpu -N 1 -n 7 -A sam --npaths=5 \ + --read-group-platform=illumina --read-group-id=RGUNKNOWN --read-group-name=$id --read-group-library=LBUNKNOWN \ + --failed-input=$mappi_out".unmapped" --split-output=$mappi_out".genome" \ + $polyA_out".polyA.passed_1.fastq" $polyA_out".polyA.passed_2.fastq" 2> $mappi_out".mappi.err" + + unmapped_reads=`grep -c "^\+$" $mappi_out".unmapped.1"` + + fi + if [ ! -e $rv ]; then + /ceph/agrensing/tools/gmap-gsnap/gmap-2021-03-08/src/gsnap -D $db_path_V3 -d $db_name_V3 -t $cpu -N 1 -n 5 -A sam --npaths=5 \ + --read-group-platform=illumina --read-group-id=RGUNKNOWN --read-group-name=$id --read-group-library=LBUNKNOWN \ + --failed-input=$mappi_out".unmapped" --split-output=$mappi_out".genome" \ + $polyA_out".polyA.passed.fastq" 2> $mappi_out".mappi.err" + + unmapped_reads=`grep -c "^\+$" $mappi_out".unmapped"` + fi + + + + # mapping statistics + echo "## Pp genome version 3 ##" + echo "Unmapped reads: "$unmapped_reads + +} + +mappi_gDNA() +{ + if [ -e $rv ]; then + /homes/fhaas/src/gsnap -D $db_path_V3 -d $db_name_V3 -t $cpu -n 7 -A sam --npaths=5 \ + --read-group-platform=illumina --read-group-id=RGUNKNOWN --read-group-name=$id --read-group-library=LBUNKNOWN \ + --failed-input=$mappi_out".unmapped" --split-output=$mappi_out".genome" \ + $polyA_out".polyA.passed_1.fastq" $polyA_out".polyA.passed_2.fastq" 2> $mappi_out".mappi.err" + + + unmapped_reads=`grep -c "^\+$" $mappi_out".unmapped.1"` + fi + + if [ ! -e $rv ]; then + /homes/fhaas/src/gsnap -D $db_path_V3 -d $db_name_V3 -t $cpu -n 5 -A sam --npaths=5 \ + --read-group-platform=illumina --read-group-id=RGUNKNOWN --read-group-name=$id --read-group-library=LBUNKNOWN \ + --failed-input=$mappi_out".unmapped" --split-output=$mappi_out".genome" \ + $polyA_out".polyA.passed.fastq" 2> $mappi_out".mappi.err" + + unmapped_reads=`grep -c "^\+$" $mappi_out".unmapped"` + fi + + # mapping statistics + echo "## Pp genome version 3 ##" + echo "Unmapped reads: "$unmapped_reads +} + + +sample () +{ + # if PE use ".genome"*[tcqg] + for IN in $mappi_out".genome"*[tcqg] + do + printf $IN"\tsort.bam\n" >&2 + /vol/agrensing/tools/samtools/samtools-1.12/samtools view -@ $cpu -bhS $IN | /vol/agrensing/tools/samtools/samtools-1.12/samtools sort -@ $cpu -m 2G - -o $IN".sort.bam" + + printf $IN"\trmdup.bam\n" >&2 + grep -vE "chloro|ribo|mito|ERCC" $IN | /vol/agrensing/tools/samtools/samtools-1.12/samtools sort -@ $cpu -m 2G -n -O BAM - | \ + /vol/agrensing/tools/samtools/samtools-1.12/samtools fixmate -@ $cpu -m - - | /vol/agrensing/tools/samtools/samtools-1.12/samtools sort -@ $cpu -m 2G -O BAM - | \ + /vol/agrensing/tools/samtools/samtools-1.12/samtools markdup -@ $cpu -s -r - \ + $IN".filter.fixmate.sort.markdup_r.bam" + + # samtools statistics + v_tmp=`awk '$1!~"@" {print $1}' $IN | sort | uniq | wc -l` + printf "Number of mapped reads: "$IN"\t"$v_tmp"\n" + + # compress the sam text files + # gzip $IN + # rm sam file + rm $IN + + + # bam index + /vol/agrensing/tools/samtools/samtools-1.12/samtools index $IN*"_r.bam" + /vol/agrensing/tools/samtools/samtools-1.12/samtools index $IN".sort.bam" + + done + + ls -l $mappi_out*"bam" >&2 + + # merge all unique bam files + echo "merging all .uniq bam files" >&2 + echo "merging all .uniq bam files" + /vol/agrensing/tools/samtools/samtools-1.12/samtools merge -@ $cpu $mappi_out".uniq.merged.bam" $mappi_out*"uniq"*".sort.bam" + /vol/agrensing/tools/samtools/samtools-1.12/samtools index $mappi_out".uniq.merged.bam" + + /vol/agrensing/tools/samtools/samtools-1.12/samtools merge -@ $cpu $mappi_out".uniq.merged.rmdup.bam" $mappi_out*"uniq"*"_r.bam" + /vol/agrensing/tools/samtools/samtools-1.12/samtools index $mappi_out".uniq.merged.rmdup.bam" +} + +counti() +{ + if [ -e $rv ]; then + # read count v3.3 and ERCC combined + /vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts -p -B -C -T $cpu -a /vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_ercc_gt.gff \ + -t exon -g Parent -o $counti_out"_v3.3_32458_ercc.exon.pB.txt" $mappi_out".uniq.merged.rmdup.bam" 2> $counti_out"_v3.3_32458_ercc.exon.pB.log" + /vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts -p -C -T $cpu -a /vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_ercc_gt.gff \ + -t exon -g Parent -o $counti_out"_v3.3_32458_ercc.exon.p.txt" $mappi_out".uniq.merged.rmdup.bam" 2> $counti_out"_v3.3_32458_ercc.exon.p.log" + /vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts -p -B -C -T $cpu -a /vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_rm.double_clean.N_ercc_gt.gff \ + -t exon -g Parent -o $counti_out"_v3.3_31315_ercc.exon.pB.txt" $mappi_out".uniq.merged.rmdup.bam" 2> $counti_out"_v3.3_31315_ercc.exon.pB.log" + /vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts -p -C -T $cpu -a /vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_rm.double_clean.N_ercc_gt.gff \ + -t exon -g Parent -o $counti_out"_v3.3_31315_ercc.exon.p.txt" $mappi_out".uniq.merged.rmdup.bam" 2> $counti_out"_v3.3_31315_ercc.exon.p.log" + + # statistics + printf "\n### FeatureCount statisitcs v3.3 ###\n" + # 32458 + echo "Fragment counts on gene models exon PE 32458:" + grep "Successfully assigned fragments" $counti_out"_v3.3_32458_ercc.exon.pB.log" + echo "Fragment counts on gene models exon PE+SE 32458:" + grep "Successfully assigned fragments" $counti_out"_v3.3_32458_ercc.exon.p.log" + # 31315 + echo "Fragment counts on gene models exon PE 31315:" + grep "Successfully assigned fragments" $counti_out"_v3.3_31315_ercc.exon.pB.log" + echo "Fragment counts on gene models exon PE+SE 31315:" + grep "Successfully assigned fragments" $counti_out"_v3.3_31315_ercc.exon.p.log" + + + fi + if [ ! -e $rv ]; then + + /vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts -T $cpu -a /vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_ercc_gt.gff \ + -t exon -g Parent -o $counti_out"_v3.3_32458_ercc.exon.SE.txt" $mappi_out".uniq.merged.rmdup.bam" 2> $counti_out"_v3.3_32458_ercc.exon.SE.log" + /vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts -T $cpu -a /vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_rm.double_clean.N_ercc_gt.gff \ + -t exon -g Parent -o $counti_out"_v3.3_31315_ercc.exon.SE.txt" $mappi_out".uniq.merged.rmdup.bam" 2> $counti_out"_v3.3_31315_ercc.exon.SE.log" + + # statistics + printf "\n### FeatureCount statisitcs v3.3 ###" + # 32458 + echo "Fragment counts on gene models exon SE 32458:" + grep "Successfully assigned " $counti_out"_v3.3_32458_ercc.exon.SE.log" + # 31315 + echo "Fragment counts on gene models exon SE 31315:" + grep "Successfully assigned " $counti_out"_v3.3_31315_ercc.exon.SE.log" + + fi +} + +depthi() +{ + /vol/agrensing/tools/samtools/samtools-1.12/samtools depth -aa $mappi_out".uniq.merged.rmdup.bam" > $mappi_out".uniq.merged.rmdup.depth" +} + + +snpi() +{ + gatkrun='/vol/agrensing/tools/gatk/gatk-4.2.0.0/gatk --java-options -Xmx50G' + threading_option='--native-pair-hmm-threads 8' + # V3 + # start the SNP detection + $gatkrun HaplotypeCaller $threading_option -R $reffna -I $mappi_out".uniq.merged.rmdup.bam" -ploidy 1 -O $snpi_out".HaplotypeCaller.vcf" 2> $snpi_out".HaplotypeCaller.err" + + $gatkrun SelectVariants -R $reffna --variant $snpi_out".HaplotypeCaller.vcf" -select-type INDEL -O $snpi_out".SelectVariants.vcf" 2> $snpi_out".SelectVariants.err" + + # HQ SNPs + python2.7 /vol/agrensing/fhaas/GeneAtlas/SNP/GetHighQualVcfs.py -i $snpi_out".HaplotypeCaller.vcf" -o $output_dir"/05_SNP_calling/"$species"/"$prefix"/" --ploidy 1 --percentile 90 --GQ 90 2> $snpi_out".GetHighQualVcfs.HQ.err" + +# mv $snpi_out".GetHighQualVcfs.HQ.vcf" $snpi_out".GetHighQualVcfs.V3.HQ.vcf" + + $gatkrun IndexFeatureFile -I $snpi_out".GetHighQualVcfs.HQ.vcf" 2> $snpi_out".GetHighQualVcfs.HQ.idx.err" + + $gatkrun BaseRecalibrator -R $reffna -I $mappi_out".uniq.merged.rmdup.bam" -known-sites $snpi_out".GetHighQualVcfs.HQ.vcf" -O $snpi_out".BaseRecalibrator.table" 2> $snpi_out".BaseRecalibrator.err" + + $gatkrun ApplyBQSR -R $reffna -I $mappi_out".uniq.merged.rmdup.bam" --bqsr-recal-file $snpi_out".BaseRecalibrator.table" -O $snpi_out".BaseRecalibrator.ApplyBQSR.bam" \ + 2> $snpi_out".BaseRecalibrator.ApplyBQSR.err" + + $gatkrun BaseRecalibrator -R $reffna -I $snpi_out".BaseRecalibrator.ApplyBQSR.bam" -known-sites $snpi_out".GetHighQualVcfs.HQ.vcf" -O $snpi_out".BaseRecalibrator.recal.table" \ + 2> $snpi_out".BaseRecalibrator.recal.err" + + $gatkrun AnalyzeCovariates -before $snpi_out".BaseRecalibrator.table" -after $snpi_out".BaseRecalibrator.recal.table" -verbosity DEBUG -csv $snpi_out".AnalyzeCovariates.calibration-report.csv" \ + -plots $snpi_out".AnalyzeCovariates.calibration-report.QC.pdf" 2> $snpi_out".AnalyzeCovariates.calibration-report.err" + + $gatkrun PrintReads -R $reffna -I $mappi_out".uniq.merged.rmdup.bam" -O $snpi_out".PrintReads.bam" 2> $snpi_out".PrintReads.err" + + $gatkrun PrintReads -R $reffna -I $snpi_out".BaseRecalibrator.ApplyBQSR.bam" -O $snpi_out".PrintReads.ApplyBQSR.bam" 2> $snpi_out".PrintReads.ApplyBQSR.err" + + # HaplotypeCaller and VariantFiltration w/o ApplyBQSR BAM file + $gatkrun HaplotypeCaller $threading_option -R $reffna -I $snpi_out".PrintReads.bam" -ploidy 1 -O $snpi_out".PrintReads.HaplotypeCaller.vcf" 2> $snpi_out".PrintReads.HaplotypeCaller.err" + + $gatkrun HaplotypeCaller $threading_option -R $reffna -I $snpi_out".PrintReads.ApplyBQSR.bam" -ploidy 1 -O $snpi_out".PrintReads.ApplyBQSR.HaplotypeCaller.vcf" \ + 2> $snpi_out".PrintReads.ApplyBQSR.HaplotypeCaller.err" + + # filter flags ## NOTE: it is important to look for float and integer numbers (e.g. FS must be 60.0 not 60) ## + # --filter-expression "FS > 60" --filter-name "StrandBias" --filter-expression "QD < 2.0" --filter-name "LowQD" --filter-expression "MQ < 40.0" + # --filter-name "LowMQ" --filter-expression "QUAL > 30.0 && QUAL < 60.0" --filter-name "LowQual" --filter-expression "DP < 5" --filter-name "LowCoverage" + # + $gatkrun VariantFiltration -R $reffna --variant $snpi_out".PrintReads.HaplotypeCaller.vcf" \ + --filter-expression "FS > 60.0" --filter-name "StrandBias" --filter-expression "QD < 2.0" --filter-name "LowQD" --filter-expression "MQ < 40.0" \ + --filter-name "LowMQ" --filter-expression "QUAL > 30.0" --filter-name "LowQual" --filter-expression "DP < 5" --filter-name "LowCoverage" \ + -O $snpi_out".VariantFiltration.vcf" 2> $snpi_out".VariantFiltration.err" + + $gatkrun VariantFiltration -R $reffna --variant $snpi_out".PrintReads.ApplyBQSR.HaplotypeCaller.vcf" \ + --filter-expression "FS > 60.0" --filter-name "StrandBias" --filter-expression "QD < 2.0" --filter-name "LowQD" --filter-expression "MQ < 40.0" \ + --filter-name "LowMQ" --filter-expression "QUAL > 30.0" --filter-name "LowQual" --filter-expression "DP < 5" --filter-name "LowCoverage" \ + -O $snpi_out".ApplyBQSR.VariantFiltration.vcf" 2> $snpi_out".ApplyBQSR.VariantFiltration.err" +} + +corri() +{ + for IN in $mappi_out*.gz + do + gunzip $IN + file=`echo $IN | awk '{print substr($1, 1, length($1)-3)}'` + /vol/agrensing/tools/samtools/samtools-1.12/samtools view -@ $cpu -bhS $file | /vol/agrensing/tools/samtools/samtools-1.12/samtools sort -@ $cpu -m 2G - -o $file".sort.bam" + /vol/agrensing/tools/samtools/samtools-1.12/samtools index $file".sort.bam" + gzip $file + + done + /vol/agrensing/tools/samtools/samtools-1.12/samtools merge -@ $cpu $mappi_out".uniq.merged.bam" $mappi_out*"uniq"*".sort.bam" + /vol/agrensing/tools/samtools/samtools-1.12/samtools index $mappi_out".uniq.merged.bam" + + + + gatkrun='/ceph/agrensing/tools/gatk/gatk-4.2.0.0/gatk --java-options -Xmx50G' + threading_option='--native-pair-hmm-threads 8' + $gatkrun BaseRecalibrator -R $reffna -I $snpi_out".BaseRecalibrator.ApplyBQSR.bam" -known-sites $snpi_out".GetHighQualVcfs.HQ.vcf" -O $snpi_out".BaseRecalibrator.recal.table" \ + 2> $snpi_out".BaseRecalibrator.recal.err" + + $gatkrun AnalyzeCovariates -before $snpi_out".BaseRecalibrator.table" -after $snpi_out".BaseRecalibrator.recal.table" -verbosity DEBUG -csv $snpi_out".AnalyzeCovariates.calibration-report.csv" \ + -plots $snpi_out".AnalyzeCovariates.calibration-report.QC.pdf" 2> $snpi_out".AnalyzeCovariates.calibration-report.err" + + +} +# script input variables + +cpu=$1 +output_dir=$2 +prefix=$3 +species=$4 +infile_path=$5 + +# set variables +java_ram="-Xmx50G -Xms25G" +id=$prefix + +fw=$infile_path"/fw."$prefix +rv=$infile_path"/rv."$prefix + +db_path_V3="/vol/agrensing/fhaas/DB/Pp_core_chloro_mito_ribo_ercc_sNone/Pp_core_chloro_mito_ribo_ercc_sNone" +db_name_V3="Pp_core_chloro_mito_ribo_ercc_sNone" + +reffna="/vol/agrensing/fhaas/DB/Pp_core_chloro_mito_ribo_ercc/Pp_core_chloro_mito_ribo_ercc.fna" + + + +# outdir variables +# output=$output_dir"/"$prefix +trimmo_out=$output_dir"/02_trimmomatic/"$species"/"$prefix"/"$prefix +polyA_out=$output_dir"/03_poly-A/"$species"/"$prefix"/"$prefix +mappi_out=$output_dir"/04_genome_mapping/"$species"/"$prefix"/"$prefix +snpi_out=$output_dir"/05_SNP_calling/"$species"/"$prefix"/"$prefix +counti_out=$output_dir"/06_counts/"$species"/"$prefix"/"$prefix + +# create result folder +mkdir -p $output_dir"/02_trimmomatic/"$species"/"$prefix +mkdir -p $output_dir"/03_poly-A/"$species"/"$prefix +mkdir -p $output_dir"/04_genome_mapping/"$species"/"$prefix +mkdir -p $output_dir"/05_SNP_calling/"$species"/"$prefix +mkdir -p $output_dir"/06_counts/"$species"/"$prefix + +mkdir -p $output_dir"/02_trimmomatic/"$species"/"$prefix"/fastqc" +mkdir -p $output_dir"/03_poly-A/"$species"/"$prefix"/fastqc" + + + + + +# print running machine and start date +hostname +date + +# print all variables +printf $cpu"\n"$output_dir"\n"$prefix"\n"$species"\n"$fw"\n" + +if [ -e $rv ]; then + printf $rv"\n" +fi + +printf "\'$java_ram\'""\n"$id"\n"$db_path_V3"\n"$db_name_V3"\n" + +echo "" + +echo "Phase 0 assamble data" +startle + +date +echo "Phase 1 run trimmo" +trimmo + +date +echo "Phase 2 run polyA" +polyA +duple +date +echo "Phase 3 run mapping" +mappi +#-# mappi_gDNA +sample +counti +depthi +date +echo "Phase 4 run SNP calling" +snpi +#-# corri +date