From 3fe71fce12a370b6eeb27124cffc4efe471373be Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <dominik.brilhaus@hhu.de> Date: Tue, 9 Aug 2022 15:57:57 +0200 Subject: [PATCH] cwl kallisto_index --- runs/kallisto_index/README.md | 21 ++++++++++ runs/kallisto_index/kallisto_index.yml | 7 ++++ workflows/_dependencies/install_kallisto.md | 21 ++++++++++ workflows/_dependencies/install_r_packages.R | 43 ++++++++++++++++++++ workflows/kallisto_index.cwl | 33 +++++++++++++++ workflows/kallisto_index.sh | 31 ++++++++++++++ 6 files changed, 156 insertions(+) create mode 100644 runs/kallisto_index/README.md create mode 100644 runs/kallisto_index/kallisto_index.yml create mode 100644 workflows/_dependencies/install_kallisto.md create mode 100644 workflows/_dependencies/install_r_packages.R create mode 100644 workflows/kallisto_index.cwl create mode 100755 workflows/kallisto_index.sh diff --git a/runs/kallisto_index/README.md b/runs/kallisto_index/README.md new file mode 100644 index 0000000..6143bf5 --- /dev/null +++ b/runs/kallisto_index/README.md @@ -0,0 +1,21 @@ + +# cwltool README + +## First cd into the (runs) folder, with the .yml file + +cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_index + +## Let it flow + +```bash +### store arc root (two levels up from here) as variable +arc_root=$(echo ${PWD%/*/*}) + +### replace arc root line in yml (specific to the machine from where this is run) +### not sure, if this works on linux... +sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_index.yml + +### run with cwltool +cwltool ../../workflows/kallisto_index.cwl kallisto_index.yml + +``` diff --git a/runs/kallisto_index/kallisto_index.yml b/runs/kallisto_index/kallisto_index.yml new file mode 100644 index 0000000..1190016 --- /dev/null +++ b/runs/kallisto_index/kallisto_index.yml @@ -0,0 +1,7 @@ +cores: 1 +sh_script: + class: File + path: ../../workflows/kallisto_index.sh +in_genome_ref: studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa +out_folder: runs/kallisto_index +arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq diff --git a/workflows/_dependencies/install_kallisto.md b/workflows/_dependencies/install_kallisto.md new file mode 100644 index 0000000..af072ad --- /dev/null +++ b/workflows/_dependencies/install_kallisto.md @@ -0,0 +1,21 @@ + +# Kallisto + +## Manual + +http://pachterlab.github.io/kallisto/manual.html + +## MacOS Installation + +```bash +ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install kallisto +``` + +```bash +git clone https://github.com/pachterlab/kallisto.git +cd kallisto +mkdir build +cd build +cmake .. -DUSE_HDF5=ON +``` diff --git a/workflows/_dependencies/install_r_packages.R b/workflows/_dependencies/install_r_packages.R new file mode 100644 index 0000000..ce4e643 --- /dev/null +++ b/workflows/_dependencies/install_r_packages.R @@ -0,0 +1,43 @@ +#!/usr/bin/env Rscript + +######################## +# Installation of R dependencies +######################## + +### sleuth installation + +if (!requireNamespace("BiocManager", quietly = TRUE)){install.packages("BiocManager")} +if(!"BiocManager" %in% row.names(installed.packages())){BiocManager::install()} +if(!"devtools" %in% row.names(installed.packages())){BiocManager::install("devtools")} + +# BiocManager::install("pachterlab/sleuth") +# There's currently (early 2022) an issue with sleuth installation. +# Using this workaround from https://github.com/pachterlab/sleuth/issues/259#issuecomment-1001076030 + +if(!"sleuth" %in% row.names(installed.packages())){remotes::install_github("pachterlab/sleuth#260")} + +library(sleuth) + +### other packages + +library(tidyverse) +library(jsonlite) +library(openxlsx) + +required_packages <- + c('tidyverse', ## data wrangling and plotting + 'jsonlite', ## read/write json files + 'openxlsx') ## read/write xlsx files + +for(package in required_packages) +{ + ## Check if package is installed. If not, install + if(!package %in% row.names(installed.packages())){install.packages(package, + repos ="https://cran.uni-muenster.de/")} + # ## Check if package is up to date. If not, update + # update.packages(package, repos = "https://cran.uni-muenster.de/") + ## Load package + library(package, character.only = T) + print(paste("installed R package", package)) +} + diff --git a/workflows/kallisto_index.cwl b/workflows/kallisto_index.cwl new file mode 100644 index 0000000..a6389de --- /dev/null +++ b/workflows/kallisto_index.cwl @@ -0,0 +1,33 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.2 +class: CommandLineTool + +inputs: +- id: sh_script + type: File + inputBinding: + position: 0 +- id: arc_root + type: string + inputBinding: + position: 1 +- id: in_genome_ref + type: string + inputBinding: + position: 2 +- id: out_folder + type: string + inputBinding: + position: 3 + +outputs: +- id: outdir + type: + type: array + items: Directory + outputBinding: + glob: $(runtime.outdir)/$(inputs.out_folder) + +baseCommand: +- bash \ No newline at end of file diff --git a/workflows/kallisto_index.sh b/workflows/kallisto_index.sh new file mode 100755 index 0000000..19e4ba7 --- /dev/null +++ b/workflows/kallisto_index.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +### Build kallisto index + +################################################ +#### Read arguments from CLI +################################################ + +arc_root=$1 +in_genome_ref=$2 +out_folder=$3 + +################################################ +#### Print version and citation to test kallisto +################################################ + +kallisto version +kallisto cite + +################################################ +#### If it does not exist, create out dir +################################################ + +mkdir -p "$arc_root/$out_folder/" + + +################################################ +#### Build kallisto index +################################################ + +kallisto index -i "$arc_root/$out_folder/kallisto_index" "$arc_root/$in_genome_ref" -- GitLab