Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Physcomitrium patens light signaling 2022
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MAdLand
Physcomitrium patens light signaling 2022
Commits
7803be64
Commit
7803be64
authored
1 year ago
by
Saskia Hiltemann
Browse files
Options
Downloads
Patches
Plain Diff
add Fabian's SNP analysis pipeline
parent
de37afc6
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
workflows/SNP_pipe.sh
+411
-0
411 additions, 0 deletions
workflows/SNP_pipe.sh
with
411 additions
and
0 deletions
workflows/SNP_pipe.sh
0 → 100644
+
411
−
0
View file @
7803be64
#!/bin/bash
#. /etc/profile.d/modules.sh
export
PATH
=
$PATH
:/vol/biotools/bin
export
MANPATH
=
$MANPATH
:/vol/biotools/man
export
PATH
=
$PATH
:/vol/software/bin
# install important R package - lummerland cluster does not have it ...
#echo "install.packages(\"gplots\", repos=\"https://cran.rstudio.com\")" | R --vanilla --no-save
#R CMD INSTALL /vol/agrensing/tools/R_packages/bitops_1.0-6.tar.gz
#R CMD INSTALL /vol/agrensing/tools/R_packages/caTools_1.17.1.1.tar.gz
#R CMD INSTALL /vol/agrensing/tools/R_packages/gtools_3.8.1.tar.gz
#R CMD INSTALL /vol/agrensing/tools/R_packages/gdata_2.18.0.tar.gz
#R CMD INSTALL /vol/agrensing/tools/R_packages/gplots_3.0.1.tar.gz
#functions
startle
()
{
number_row_reads
=
`
wc
-l
$fw
|
awk
'{print $1/4}'
`
echo
"## Start log ##"
echo
"Number of row reads: "
$number_row_reads
}
trimmo
()
{
# Remove leading low quality or N bases (below quality 3) (LEADING:3)
# Remove trailing low quality or N bases (below quality 3) (TRAILING:3)
# Scan the read with a 4-base wide sliding window, cutting when the average quality per base drops below 15 (SLIDINGWINDOW:4:15)
# HEADCROP: Cut the specified Number of bases from the start of the read
# MINLEN was first set to 50 bp. But there are libraries with read start length of 50 bp.
if
[
-e
$rv
]
;
then
java
$java_ram
-jar
/nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/trimmomatic-0.39.jar PE
-threads
$cpu
-phred33
-summary
$trimmo_out
".trimmo.stats"
\
$fw
$rv
$trimmo_out
".PE.fw"
$trimmo_out
".SE.fw"
$trimmo_out
".PE.rv"
$trimmo_out
".SE.rv"
\
ILLUMINACLIP:/nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/adapters/TruSeq-PE_all.fna:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 HEADCROP:10 MINLEN:30
# stats
echo
"## Trimmo statistics ##"
printf
"trim
\t
"
$trimmo_out
".PE.fw
\t
"
;
grep
-c
"^
\+
$"
$trimmo_out
".PE.fw"
# fastqc
/nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc
-f
fastq
-t
$cpu
-o
$output_dir
"/02_trimmomatic/"
$species
"/"
$prefix
"/fastqc"
$trimmo_out
".PE."
*
fi
if
[
!
-e
$rv
]
;
then
java
$java_ram
-jar
/nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/trimmomatic-0.39.jar SE
-threads
$cpu
-phred33
-summary
$trimmo_out
".trimmo.stats"
\
$fw
$trimmo_out
".SE"
\
ILLUMINACLIP:/nfs/agrensing/tools/trimmomatic/Trimmomatic-0.39/adapters/TruSeq-PE_all.fna:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 HEADCROP:10 MINLEN:30
# stats
echo
"## Trimmo statistics ##"
# printf "trim\t"$trimmo_out".SE\t"; grep -c "^\+$" $trimmo_out".SE"
printf
"trim
\t
"
$trimmo_out
".SE
\t
"
;
wc
-l
$trimmo_out
".SE"
|
awk
'{print $1/4}'
# fastqc
/nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc
-f
fastq
-t
$cpu
-o
$output_dir
"/02_trimmomatic/"
$species
"/"
$prefix
"/fastqc"
$trimmo_out
".SE"
fi
}
polyA
()
{
# min_len was first set to 50 bp. But there are libraries with read start length of 50 bp.
# If you specify any statistic option, no other ouput will be generated. To preprocess data, do not specify a statistics option.
# -stats_dupl -stats_all
inpA
=
$polyA_out
".polyA.passed"
infA
=
$polyA_out
".polyA.failed"
if
[
-e
$rv
]
;
then
/nfs/agrensing/tools/prinseq/prinseq-lite-0.20.4/./prinseq-lite.pl
-verbose
-no_qual_header
-fastq
$trimmo_out
".PE.fw"
-fastq2
$trimmo_out
".PE.rv"
\
-trim_tail_left
5
-trim_tail_right
5
-min_len
30
-out_good
$inpA
-out_bad
$infA
\
-out_format
3
-log
$polyA_out
"_polyA.log"
# stats
echo
"## PolyA statistics ##"
printf
"polyA
\t
"
$inpA
"_1.fastq
\t
"
;
wc
-l
$inpA
"_1.fastq"
|
awk
'{print $1/4}'
#fastqc
/nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc
-f
fastq
-t
$cpu
-o
$output_dir
"/03_poly-A/"
$species
"/"
$prefix
"/fastqc"
$inpA
"_"
[
12]
".fastq"
fi
if
[
!
-e
$rv
]
;
then
/nfs/agrensing/tools/prinseq/prinseq-lite-0.20.4/./prinseq-lite.pl
-verbose
-no_qual_header
-fastq
$trimmo_out
".SE"
\
-trim_tail_left
5
-trim_tail_right
5
-min_len
30
-out_good
$inpA
-out_bad
$infA
\
-out_format
3
-log
$polyA_out
"_polyA.log"
# stats
echo
"## PolyA statistics ##"
printf
"polyA
\t
"
$inpA
".fastq
\t
"
;
grep
-c
"^
\+
$"
$inpA
".fastq"
#fastqc
/nfs/agrensing/tools/FastQC/FastQC_v0.11.7/fastqc
-f
fastq
-t
$cpu
-o
$output_dir
"/03_poly-A/"
$species
"/"
$prefix
"/fastqc"
$inpA
".fastq"
fi
}
duple
()
{
# number of 100% identical duplicat reads. {29} because of min_len 30 bp.
echo
"Duplication:"
for
IN
in
$polyA_out
".polyA.passed"
*
;
do
printf
$IN
"
\t
"
;
num_dup
=
`
awk
'$1~/[NATCG]{29}/ {foo[$1]++} END {for(o in foo){print o"\t"foo[o]}}'
$IN
|
wc
-l
`
;
\
printf
$num_dup
"
\t
"
;
printf
%.5f
\\
n
"
$((
100000
*
num_dup
/
number_row_reads
))
e-5"
;
done
}
mappi
()
{
if
[
-e
$rv
]
;
then
/homes/fhaas/src/gsnap
-D
$db_path_V3
-d
$db_name_V3
-t
$cpu
-N
1
-n
7
-A
sam
--npaths
=
5
\
--read-group-platform
=
illumina
--read-group-id
=
RGUNKNOWN
--read-group-name
=
$id
--read-group-library
=
LBUNKNOWN
\
--failed-input
=
$mappi_out
".unmapped"
--split-output
=
$mappi_out
".genome"
\
$polyA_out
".polyA.passed_1.fastq"
$polyA_out
".polyA.passed_2.fastq"
2>
$mappi_out
".mappi.err"
unmapped_reads
=
`
grep
-c
"^
\+
$"
$mappi_out
".unmapped.1"
`
fi
if
[
!
-e
$rv
]
;
then
/ceph/agrensing/tools/gmap-gsnap/gmap-2021-03-08/src/gsnap
-D
$db_path_V3
-d
$db_name_V3
-t
$cpu
-N
1
-n
5
-A
sam
--npaths
=
5
\
--read-group-platform
=
illumina
--read-group-id
=
RGUNKNOWN
--read-group-name
=
$id
--read-group-library
=
LBUNKNOWN
\
--failed-input
=
$mappi_out
".unmapped"
--split-output
=
$mappi_out
".genome"
\
$polyA_out
".polyA.passed.fastq"
2>
$mappi_out
".mappi.err"
unmapped_reads
=
`
grep
-c
"^
\+
$"
$mappi_out
".unmapped"
`
fi
# mapping statistics
echo
"## Pp genome version 3 ##"
echo
"Unmapped reads: "
$unmapped_reads
}
mappi_gDNA
()
{
if
[
-e
$rv
]
;
then
/homes/fhaas/src/gsnap
-D
$db_path_V3
-d
$db_name_V3
-t
$cpu
-n
7
-A
sam
--npaths
=
5
\
--read-group-platform
=
illumina
--read-group-id
=
RGUNKNOWN
--read-group-name
=
$id
--read-group-library
=
LBUNKNOWN
\
--failed-input
=
$mappi_out
".unmapped"
--split-output
=
$mappi_out
".genome"
\
$polyA_out
".polyA.passed_1.fastq"
$polyA_out
".polyA.passed_2.fastq"
2>
$mappi_out
".mappi.err"
unmapped_reads
=
`
grep
-c
"^
\+
$"
$mappi_out
".unmapped.1"
`
fi
if
[
!
-e
$rv
]
;
then
/homes/fhaas/src/gsnap
-D
$db_path_V3
-d
$db_name_V3
-t
$cpu
-n
5
-A
sam
--npaths
=
5
\
--read-group-platform
=
illumina
--read-group-id
=
RGUNKNOWN
--read-group-name
=
$id
--read-group-library
=
LBUNKNOWN
\
--failed-input
=
$mappi_out
".unmapped"
--split-output
=
$mappi_out
".genome"
\
$polyA_out
".polyA.passed.fastq"
2>
$mappi_out
".mappi.err"
unmapped_reads
=
`
grep
-c
"^
\+
$"
$mappi_out
".unmapped"
`
fi
# mapping statistics
echo
"## Pp genome version 3 ##"
echo
"Unmapped reads: "
$unmapped_reads
}
sample
()
{
# if PE use ".genome"*[tcqg]
for
IN
in
$mappi_out
".genome"
*
[
tcqg]
do
printf
$IN
"
\t
sort.bam
\n
"
>
&2
/vol/agrensing/tools/samtools/samtools-1.12/samtools view -@
$cpu
-bhS
$IN
| /vol/agrensing/tools/samtools/samtools-1.12/samtools
sort
-@
$cpu
-m
2G -
-o
$IN
".sort.bam"
printf
$IN
"
\t
rmdup.bam
\n
"
>
&2
grep
-vE
"chloro|ribo|mito|ERCC"
$IN
| /vol/agrensing/tools/samtools/samtools-1.12/samtools
sort
-@
$cpu
-m
2G
-n
-O
BAM - |
\
/vol/agrensing/tools/samtools/samtools-1.12/samtools fixmate -@
$cpu
-m
- - | /vol/agrensing/tools/samtools/samtools-1.12/samtools
sort
-@
$cpu
-m
2G
-O
BAM - |
\
/vol/agrensing/tools/samtools/samtools-1.12/samtools markdup -@
$cpu
-s
-r
-
\
$IN
".filter.fixmate.sort.markdup_r.bam"
# samtools statistics
v_tmp
=
`
awk
'$1!~"@" {print $1}'
$IN
|
sort
|
uniq
|
wc
-l
`
printf
"Number of mapped reads: "
$IN
"
\t
"
$v_tmp
"
\n
"
# compress the sam text files
# gzip $IN
# rm sam file
rm
$IN
# bam index
/vol/agrensing/tools/samtools/samtools-1.12/samtools index
$IN
*
"_r.bam"
/vol/agrensing/tools/samtools/samtools-1.12/samtools index
$IN
".sort.bam"
done
ls
-l
$mappi_out
*
"bam"
>
&2
# merge all unique bam files
echo
"merging all .uniq bam files"
>
&2
echo
"merging all .uniq bam files"
/vol/agrensing/tools/samtools/samtools-1.12/samtools merge -@
$cpu
$mappi_out
".uniq.merged.bam"
$mappi_out
*
"uniq"
*
".sort.bam"
/vol/agrensing/tools/samtools/samtools-1.12/samtools index
$mappi_out
".uniq.merged.bam"
/vol/agrensing/tools/samtools/samtools-1.12/samtools merge -@
$cpu
$mappi_out
".uniq.merged.rmdup.bam"
$mappi_out
*
"uniq"
*
"_r.bam"
/vol/agrensing/tools/samtools/samtools-1.12/samtools index
$mappi_out
".uniq.merged.rmdup.bam"
}
counti
()
{
if
[
-e
$rv
]
;
then
# read count v3.3 and ERCC combined
/vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts
-p
-B
-C
-T
$cpu
-a
/vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_ercc_gt.gff
\
-t
exon
-g
Parent
-o
$counti_out
"_v3.3_32458_ercc.exon.pB.txt"
$mappi_out
".uniq.merged.rmdup.bam"
2>
$counti_out
"_v3.3_32458_ercc.exon.pB.log"
/vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts
-p
-C
-T
$cpu
-a
/vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_ercc_gt.gff
\
-t
exon
-g
Parent
-o
$counti_out
"_v3.3_32458_ercc.exon.p.txt"
$mappi_out
".uniq.merged.rmdup.bam"
2>
$counti_out
"_v3.3_32458_ercc.exon.p.log"
/vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts
-p
-B
-C
-T
$cpu
-a
/vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_rm.double_clean.N_ercc_gt.gff
\
-t
exon
-g
Parent
-o
$counti_out
"_v3.3_31315_ercc.exon.pB.txt"
$mappi_out
".uniq.merged.rmdup.bam"
2>
$counti_out
"_v3.3_31315_ercc.exon.pB.log"
/vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts
-p
-C
-T
$cpu
-a
/vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_rm.double_clean.N_ercc_gt.gff
\
-t
exon
-g
Parent
-o
$counti_out
"_v3.3_31315_ercc.exon.p.txt"
$mappi_out
".uniq.merged.rmdup.bam"
2>
$counti_out
"_v3.3_31315_ercc.exon.p.log"
# statistics
printf
"
\n
### FeatureCount statisitcs v3.3 ###
\n
"
# 32458
echo
"Fragment counts on gene models exon PE 32458:"
grep
"Successfully assigned fragments"
$counti_out
"_v3.3_32458_ercc.exon.pB.log"
echo
"Fragment counts on gene models exon PE+SE 32458:"
grep
"Successfully assigned fragments"
$counti_out
"_v3.3_32458_ercc.exon.p.log"
# 31315
echo
"Fragment counts on gene models exon PE 31315:"
grep
"Successfully assigned fragments"
$counti_out
"_v3.3_31315_ercc.exon.pB.log"
echo
"Fragment counts on gene models exon PE+SE 31315:"
grep
"Successfully assigned fragments"
$counti_out
"_v3.3_31315_ercc.exon.p.log"
fi
if
[
!
-e
$rv
]
;
then
/vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts
-T
$cpu
-a
/vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_ercc_gt.gff
\
-t
exon
-g
Parent
-o
$counti_out
"_v3.3_32458_ercc.exon.SE.txt"
$mappi_out
".uniq.merged.rmdup.bam"
2>
$counti_out
"_v3.3_32458_ercc.exon.SE.log"
/vol/agrensing/tools/Subread/subread-1.6.2-Linux-x86_64/bin/featureCounts
-T
$cpu
-a
/vol/agrensing/fhaas/GeneAtlas/Pp_v3.3_rm.double_clean.N_ercc_gt.gff
\
-t
exon
-g
Parent
-o
$counti_out
"_v3.3_31315_ercc.exon.SE.txt"
$mappi_out
".uniq.merged.rmdup.bam"
2>
$counti_out
"_v3.3_31315_ercc.exon.SE.log"
# statistics
printf
"
\n
### FeatureCount statisitcs v3.3 ###"
# 32458
echo
"Fragment counts on gene models exon SE 32458:"
grep
"Successfully assigned "
$counti_out
"_v3.3_32458_ercc.exon.SE.log"
# 31315
echo
"Fragment counts on gene models exon SE 31315:"
grep
"Successfully assigned "
$counti_out
"_v3.3_31315_ercc.exon.SE.log"
fi
}
depthi
()
{
/vol/agrensing/tools/samtools/samtools-1.12/samtools depth
-aa
$mappi_out
".uniq.merged.rmdup.bam"
>
$mappi_out
".uniq.merged.rmdup.depth"
}
snpi
()
{
gatkrun
=
'/vol/agrensing/tools/gatk/gatk-4.2.0.0/gatk --java-options -Xmx50G'
threading_option
=
'--native-pair-hmm-threads 8'
# V3
# start the SNP detection
$gatkrun
HaplotypeCaller
$threading_option
-R
$reffna
-I
$mappi_out
".uniq.merged.rmdup.bam"
-ploidy
1
-O
$snpi_out
".HaplotypeCaller.vcf"
2>
$snpi_out
".HaplotypeCaller.err"
$gatkrun
SelectVariants
-R
$reffna
--variant
$snpi_out
".HaplotypeCaller.vcf"
-select-type
INDEL
-O
$snpi_out
".SelectVariants.vcf"
2>
$snpi_out
".SelectVariants.err"
# HQ SNPs
python2.7 /vol/agrensing/fhaas/GeneAtlas/SNP/GetHighQualVcfs.py
-i
$snpi_out
".HaplotypeCaller.vcf"
-o
$output_dir
"/05_SNP_calling/"
$species
"/"
$prefix
"/"
--ploidy
1
--percentile
90
--GQ
90 2>
$snpi_out
".GetHighQualVcfs.HQ.err"
# mv $snpi_out".GetHighQualVcfs.HQ.vcf" $snpi_out".GetHighQualVcfs.V3.HQ.vcf"
$gatkrun
IndexFeatureFile
-I
$snpi_out
".GetHighQualVcfs.HQ.vcf"
2>
$snpi_out
".GetHighQualVcfs.HQ.idx.err"
$gatkrun
BaseRecalibrator
-R
$reffna
-I
$mappi_out
".uniq.merged.rmdup.bam"
-known-sites
$snpi_out
".GetHighQualVcfs.HQ.vcf"
-O
$snpi_out
".BaseRecalibrator.table"
2>
$snpi_out
".BaseRecalibrator.err"
$gatkrun
ApplyBQSR
-R
$reffna
-I
$mappi_out
".uniq.merged.rmdup.bam"
--bqsr-recal-file
$snpi_out
".BaseRecalibrator.table"
-O
$snpi_out
".BaseRecalibrator.ApplyBQSR.bam"
\
2>
$snpi_out
".BaseRecalibrator.ApplyBQSR.err"
$gatkrun
BaseRecalibrator
-R
$reffna
-I
$snpi_out
".BaseRecalibrator.ApplyBQSR.bam"
-known-sites
$snpi_out
".GetHighQualVcfs.HQ.vcf"
-O
$snpi_out
".BaseRecalibrator.recal.table"
\
2>
$snpi_out
".BaseRecalibrator.recal.err"
$gatkrun
AnalyzeCovariates
-before
$snpi_out
".BaseRecalibrator.table"
-after
$snpi_out
".BaseRecalibrator.recal.table"
-verbosity
DEBUG
-csv
$snpi_out
".AnalyzeCovariates.calibration-report.csv"
\
-plots
$snpi_out
".AnalyzeCovariates.calibration-report.QC.pdf"
2>
$snpi_out
".AnalyzeCovariates.calibration-report.err"
$gatkrun
PrintReads
-R
$reffna
-I
$mappi_out
".uniq.merged.rmdup.bam"
-O
$snpi_out
".PrintReads.bam"
2>
$snpi_out
".PrintReads.err"
$gatkrun
PrintReads
-R
$reffna
-I
$snpi_out
".BaseRecalibrator.ApplyBQSR.bam"
-O
$snpi_out
".PrintReads.ApplyBQSR.bam"
2>
$snpi_out
".PrintReads.ApplyBQSR.err"
# HaplotypeCaller and VariantFiltration w/o ApplyBQSR BAM file
$gatkrun
HaplotypeCaller
$threading_option
-R
$reffna
-I
$snpi_out
".PrintReads.bam"
-ploidy
1
-O
$snpi_out
".PrintReads.HaplotypeCaller.vcf"
2>
$snpi_out
".PrintReads.HaplotypeCaller.err"
$gatkrun
HaplotypeCaller
$threading_option
-R
$reffna
-I
$snpi_out
".PrintReads.ApplyBQSR.bam"
-ploidy
1
-O
$snpi_out
".PrintReads.ApplyBQSR.HaplotypeCaller.vcf"
\
2>
$snpi_out
".PrintReads.ApplyBQSR.HaplotypeCaller.err"
# filter flags ## NOTE: it is important to look for float and integer numbers (e.g. FS must be 60.0 not 60) ##
# --filter-expression "FS > 60" --filter-name "StrandBias" --filter-expression "QD < 2.0" --filter-name "LowQD" --filter-expression "MQ < 40.0"
# --filter-name "LowMQ" --filter-expression "QUAL > 30.0 && QUAL < 60.0" --filter-name "LowQual" --filter-expression "DP < 5" --filter-name "LowCoverage"
#
$gatkrun
VariantFiltration
-R
$reffna
--variant
$snpi_out
".PrintReads.HaplotypeCaller.vcf"
\
--filter-expression
"FS > 60.0"
--filter-name
"StrandBias"
--filter-expression
"QD < 2.0"
--filter-name
"LowQD"
--filter-expression
"MQ < 40.0"
\
--filter-name
"LowMQ"
--filter-expression
"QUAL > 30.0"
--filter-name
"LowQual"
--filter-expression
"DP < 5"
--filter-name
"LowCoverage"
\
-O
$snpi_out
".VariantFiltration.vcf"
2>
$snpi_out
".VariantFiltration.err"
$gatkrun
VariantFiltration
-R
$reffna
--variant
$snpi_out
".PrintReads.ApplyBQSR.HaplotypeCaller.vcf"
\
--filter-expression
"FS > 60.0"
--filter-name
"StrandBias"
--filter-expression
"QD < 2.0"
--filter-name
"LowQD"
--filter-expression
"MQ < 40.0"
\
--filter-name
"LowMQ"
--filter-expression
"QUAL > 30.0"
--filter-name
"LowQual"
--filter-expression
"DP < 5"
--filter-name
"LowCoverage"
\
-O
$snpi_out
".ApplyBQSR.VariantFiltration.vcf"
2>
$snpi_out
".ApplyBQSR.VariantFiltration.err"
}
corri
()
{
for
IN
in
$mappi_out
*
.gz
do
gunzip
$IN
file
=
`
echo
$IN
|
awk
'{print substr($1, 1, length($1)-3)}'
`
/vol/agrensing/tools/samtools/samtools-1.12/samtools view -@
$cpu
-bhS
$file
| /vol/agrensing/tools/samtools/samtools-1.12/samtools
sort
-@
$cpu
-m
2G -
-o
$file
".sort.bam"
/vol/agrensing/tools/samtools/samtools-1.12/samtools index
$file
".sort.bam"
gzip
$file
done
/vol/agrensing/tools/samtools/samtools-1.12/samtools merge -@
$cpu
$mappi_out
".uniq.merged.bam"
$mappi_out
*
"uniq"
*
".sort.bam"
/vol/agrensing/tools/samtools/samtools-1.12/samtools index
$mappi_out
".uniq.merged.bam"
gatkrun
=
'/ceph/agrensing/tools/gatk/gatk-4.2.0.0/gatk --java-options -Xmx50G'
threading_option
=
'--native-pair-hmm-threads 8'
$gatkrun
BaseRecalibrator
-R
$reffna
-I
$snpi_out
".BaseRecalibrator.ApplyBQSR.bam"
-known-sites
$snpi_out
".GetHighQualVcfs.HQ.vcf"
-O
$snpi_out
".BaseRecalibrator.recal.table"
\
2>
$snpi_out
".BaseRecalibrator.recal.err"
$gatkrun
AnalyzeCovariates
-before
$snpi_out
".BaseRecalibrator.table"
-after
$snpi_out
".BaseRecalibrator.recal.table"
-verbosity
DEBUG
-csv
$snpi_out
".AnalyzeCovariates.calibration-report.csv"
\
-plots
$snpi_out
".AnalyzeCovariates.calibration-report.QC.pdf"
2>
$snpi_out
".AnalyzeCovariates.calibration-report.err"
}
# script input variables
cpu
=
$1
output_dir
=
$2
prefix
=
$3
species
=
$4
infile_path
=
$5
# set variables
java_ram
=
"-Xmx50G -Xms25G"
id
=
$prefix
fw
=
$infile_path
"/fw."
$prefix
rv
=
$infile_path
"/rv."
$prefix
db_path_V3
=
"/vol/agrensing/fhaas/DB/Pp_core_chloro_mito_ribo_ercc_sNone/Pp_core_chloro_mito_ribo_ercc_sNone"
db_name_V3
=
"Pp_core_chloro_mito_ribo_ercc_sNone"
reffna
=
"/vol/agrensing/fhaas/DB/Pp_core_chloro_mito_ribo_ercc/Pp_core_chloro_mito_ribo_ercc.fna"
# outdir variables
# output=$output_dir"/"$prefix
trimmo_out
=
$output_dir
"/02_trimmomatic/"
$species
"/"
$prefix
"/"
$prefix
polyA_out
=
$output_dir
"/03_poly-A/"
$species
"/"
$prefix
"/"
$prefix
mappi_out
=
$output_dir
"/04_genome_mapping/"
$species
"/"
$prefix
"/"
$prefix
snpi_out
=
$output_dir
"/05_SNP_calling/"
$species
"/"
$prefix
"/"
$prefix
counti_out
=
$output_dir
"/06_counts/"
$species
"/"
$prefix
"/"
$prefix
# create result folder
mkdir
-p
$output_dir
"/02_trimmomatic/"
$species
"/"
$prefix
mkdir
-p
$output_dir
"/03_poly-A/"
$species
"/"
$prefix
mkdir
-p
$output_dir
"/04_genome_mapping/"
$species
"/"
$prefix
mkdir
-p
$output_dir
"/05_SNP_calling/"
$species
"/"
$prefix
mkdir
-p
$output_dir
"/06_counts/"
$species
"/"
$prefix
mkdir
-p
$output_dir
"/02_trimmomatic/"
$species
"/"
$prefix
"/fastqc"
mkdir
-p
$output_dir
"/03_poly-A/"
$species
"/"
$prefix
"/fastqc"
# print running machine and start date
hostname
date
# print all variables
printf
$cpu
"
\n
"
$output_dir
"
\n
"
$prefix
"
\n
"
$species
"
\n
"
$fw
"
\n
"
if
[
-e
$rv
]
;
then
printf
$rv
"
\n
"
fi
printf
"
\'
$java_ram
\'
""
\n
"
$id
"
\n
"
$db_path_V3
"
\n
"
$db_name_V3
"
\n
"
echo
""
echo
"Phase 0 assamble data"
startle
date
echo
"Phase 1 run trimmo"
trimmo
date
echo
"Phase 2 run polyA"
polyA
duple
date
echo
"Phase 3 run mapping"
mappi
#-# mappi_gDNA
sample
counti
depthi
date
echo
"Phase 4 run SNP calling"
snpi
#-# corri
date
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment