diff --git a/runs/kallisto_index/README.md b/runs/kallisto_index/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6143bf54c88709806e6d5a0b94eaa018f38426dc --- /dev/null +++ b/runs/kallisto_index/README.md @@ -0,0 +1,21 @@ + +# cwltool README + +## First cd into the (runs) folder, with the .yml file + +cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_index + +## Let it flow + +```bash +### store arc root (two levels up from here) as variable +arc_root=$(echo ${PWD%/*/*}) + +### replace arc root line in yml (specific to the machine from where this is run) +### not sure, if this works on linux... +sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_index.yml + +### run with cwltool +cwltool ../../workflows/kallisto_index.cwl kallisto_index.yml + +``` diff --git a/runs/kallisto_index/kallisto_index.yml b/runs/kallisto_index/kallisto_index.yml new file mode 100644 index 0000000000000000000000000000000000000000..11900169a67a0fa6ec84574a2d12a133112c4303 --- /dev/null +++ b/runs/kallisto_index/kallisto_index.yml @@ -0,0 +1,7 @@ +cores: 1 +sh_script: + class: File + path: ../../workflows/kallisto_index.sh +in_genome_ref: studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa +out_folder: runs/kallisto_index +arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq diff --git a/workflows/_dependencies/install_kallisto.md b/workflows/_dependencies/install_kallisto.md new file mode 100644 index 0000000000000000000000000000000000000000..af072add7c4d6bda80e017f83be41d2914342962 --- /dev/null +++ b/workflows/_dependencies/install_kallisto.md @@ -0,0 +1,21 @@ + +# Kallisto + +## Manual + +http://pachterlab.github.io/kallisto/manual.html + +## MacOS Installation + +```bash +ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install kallisto +``` + +```bash +git clone https://github.com/pachterlab/kallisto.git +cd kallisto +mkdir build +cd build +cmake .. -DUSE_HDF5=ON +``` diff --git a/workflows/_dependencies/install_r_packages.R b/workflows/_dependencies/install_r_packages.R new file mode 100644 index 0000000000000000000000000000000000000000..ce4e6435f14284a6d89d7003aa1514b01e62e06a --- /dev/null +++ b/workflows/_dependencies/install_r_packages.R @@ -0,0 +1,43 @@ +#!/usr/bin/env Rscript + +######################## +# Installation of R dependencies +######################## + +### sleuth installation + +if (!requireNamespace("BiocManager", quietly = TRUE)){install.packages("BiocManager")} +if(!"BiocManager" %in% row.names(installed.packages())){BiocManager::install()} +if(!"devtools" %in% row.names(installed.packages())){BiocManager::install("devtools")} + +# BiocManager::install("pachterlab/sleuth") +# There's currently (early 2022) an issue with sleuth installation. +# Using this workaround from https://github.com/pachterlab/sleuth/issues/259#issuecomment-1001076030 + +if(!"sleuth" %in% row.names(installed.packages())){remotes::install_github("pachterlab/sleuth#260")} + +library(sleuth) + +### other packages + +library(tidyverse) +library(jsonlite) +library(openxlsx) + +required_packages <- + c('tidyverse', ## data wrangling and plotting + 'jsonlite', ## read/write json files + 'openxlsx') ## read/write xlsx files + +for(package in required_packages) +{ + ## Check if package is installed. If not, install + if(!package %in% row.names(installed.packages())){install.packages(package, + repos ="https://cran.uni-muenster.de/")} + # ## Check if package is up to date. If not, update + # update.packages(package, repos = "https://cran.uni-muenster.de/") + ## Load package + library(package, character.only = T) + print(paste("installed R package", package)) +} + diff --git a/workflows/kallisto_index.cwl b/workflows/kallisto_index.cwl new file mode 100644 index 0000000000000000000000000000000000000000..a6389de36bedc4d46d297b71c7dabd50ed07e200 --- /dev/null +++ b/workflows/kallisto_index.cwl @@ -0,0 +1,33 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.2 +class: CommandLineTool + +inputs: +- id: sh_script + type: File + inputBinding: + position: 0 +- id: arc_root + type: string + inputBinding: + position: 1 +- id: in_genome_ref + type: string + inputBinding: + position: 2 +- id: out_folder + type: string + inputBinding: + position: 3 + +outputs: +- id: outdir + type: + type: array + items: Directory + outputBinding: + glob: $(runtime.outdir)/$(inputs.out_folder) + +baseCommand: +- bash \ No newline at end of file diff --git a/workflows/kallisto_index.sh b/workflows/kallisto_index.sh new file mode 100755 index 0000000000000000000000000000000000000000..19e4ba711d7a8306b32c1bb3640743ec2c728491 --- /dev/null +++ b/workflows/kallisto_index.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +### Build kallisto index + +################################################ +#### Read arguments from CLI +################################################ + +arc_root=$1 +in_genome_ref=$2 +out_folder=$3 + +################################################ +#### Print version and citation to test kallisto +################################################ + +kallisto version +kallisto cite + +################################################ +#### If it does not exist, create out dir +################################################ + +mkdir -p "$arc_root/$out_folder/" + + +################################################ +#### Build kallisto index +################################################ + +kallisto index -i "$arc_root/$out_folder/kallisto_index" "$arc_root/$in_genome_ref"