From 66ed13b56e6f5a8e1a81748c666738aa85cf8cb6 Mon Sep 17 00:00:00 2001 From: Adrian Zimmer <z.adrian1995@gmail.com> Date: Sat, 13 Aug 2022 16:02:45 +0200 Subject: [PATCH] Dockerize kallisto_collect --- runs/kallisto_collect/README.md | 10 +----- runs/kallisto_collect/kallisto_collect.yml | 12 +++---- runs/kallisto_collect/run.cwl | 30 ++++++++++++++++ runs/kallisto_sleuth/run.cwl | 4 +-- workflows/kallisto_collect/kallisto_collect.R | 27 +++++++------- workflows/kallisto_collect/workflow.cwl | 36 ++++++++++--------- 6 files changed, 71 insertions(+), 48 deletions(-) create mode 100644 runs/kallisto_collect/run.cwl diff --git a/runs/kallisto_collect/README.md b/runs/kallisto_collect/README.md index 574080e..b43b9ca 100644 --- a/runs/kallisto_collect/README.md +++ b/runs/kallisto_collect/README.md @@ -8,14 +8,6 @@ cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_collec ## Let it flow ```bash -### store arc root (two levels up from here) as variable -arc_root=$(echo ${PWD%/*/*}) - -### replace arc root line in yml (specific to the machine from where this is run) -### not sure, if this works on linux... -sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_collect.yml - ### run with cwltool -cwltool ../../workflows/kallisto_collect.cwl kallisto_collect.yml - +cwltool --enable-dev run.cwl kallisto_collect.yml ``` diff --git a/runs/kallisto_collect/kallisto_collect.yml b/runs/kallisto_collect/kallisto_collect.yml index 1b30051..fa4ee11 100644 --- a/runs/kallisto_collect/kallisto_collect.yml +++ b/runs/kallisto_collect/kallisto_collect.yml @@ -1,10 +1,10 @@ cores: 1 -r_script: +in_kallisto_results: + class: Directory + path: ../kallisto_sleuth/out +in_metadata_file: class: File - path: ../../workflows/kallisto_collect.R -in_kallisto_results: "runs/no_CWL_yet/kallisto_sleuth/run1/01_kallisto_results" -in_metadata_file: "runs/merged_isa_metadata/merged_isa.tsv" + path: ../merged_isa_metadata/merged_isa.tsv in_metadata_sample: "Sample.Name.2" in_metadata_factor: "Factor..Photosynthesis.mode." -out_folder: runs/kallisto_collect -arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq \ No newline at end of file +out_folder: out \ No newline at end of file diff --git a/runs/kallisto_collect/run.cwl b/runs/kallisto_collect/run.cwl new file mode 100644 index 0000000..3bd1e37 --- /dev/null +++ b/runs/kallisto_collect/run.cwl @@ -0,0 +1,30 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2.0-dev1 +class: Workflow +inputs: + in_kallisto_results: + type: Directory + in_metadata_file: + type: File + in_metadata_sample: + type: string + in_metadata_factor: + type: string + out_folder: + type: string +outputs: + out_dir: + type: + type: array + items: Directory + outputSource: kallisto_collect/outdir +steps: + kallisto_collect: + run: ../../workflows/kallisto_collect/workflow.cwl + in: + in_kallisto_results: in_kallisto_results + in_metadata_file: in_metadata_file + in_metadata_sample: in_metadata_sample + in_metadata_factor: in_metadata_factor + out_folder: out_folder + out: [outdir] diff --git a/runs/kallisto_sleuth/run.cwl b/runs/kallisto_sleuth/run.cwl index 9565052..8035414 100644 --- a/runs/kallisto_sleuth/run.cwl +++ b/runs/kallisto_sleuth/run.cwl @@ -11,9 +11,9 @@ outputs: type: type: array items: Directory - outputSource: kallisto_quant/outdir + outputSource: kallisto_sleuth/outdir steps: - kallisto_quant: + kallisto_sleuth: run: ../../workflows/kallisto_sleuth/workflow.cwl in: in_sleuth: in_sleuth diff --git a/workflows/kallisto_collect/kallisto_collect.R b/workflows/kallisto_collect/kallisto_collect.R index 205be63..15e176c 100644 --- a/workflows/kallisto_collect/kallisto_collect.R +++ b/workflows/kallisto_collect/kallisto_collect.R @@ -1,4 +1,4 @@ -ö#!/usr/bin/env Rscript +#!/usr/bin/env Rscript ################################################ #### CWL-independent tests @@ -25,31 +25,30 @@ library(jsonlite) args <- commandArgs(trailingOnly = T) -arc_root <- args[1] -in_kallisto_results <- args[2] -in_metadata_file <- args[3] -in_metadata_sample <- args[4] -in_metadata_factor <- args[5] -out_folder <- args[6] +in_kallisto_results <- args[1] +in_metadata_file <- args[2] +in_metadata_sample <- args[3] +in_metadata_factor <- args[4] +out_folder <- args[5] ################################################ #### If it does not exist, create out dir ################################################ -dir.create(paste(arc_root, out_folder, sep = "/"), recursive = T, showWarnings = F) +dir.create(out_folder, recursive = T, showWarnings = F) ################################################ #### Read ISA sample metadata ################################################ -samples <- read.table(file = paste(arc_root, in_metadata_file, sep = "/"), sep = "\t") +samples <- read.table(file = in_metadata_file, sep = "\t") ################################################ #### Read Kallisto results ################################################ -base_dir <- paste(arc_root, in_kallisto_results, sep = "/") +base_dir <- in_kallisto_results # A list of paths to the kallisto results indexed by the sample IDs is collated with kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths @@ -67,7 +66,7 @@ s2c <- merge(s2c, path_df, by = "out_name") so <- sleuth_prep(s2c, full_model = ~condition, num_cores = 1) -save(so, file = paste(arc_root, out_folder, "kallisto_sleuthObject.RData", sep = "/")) +save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/")) ################################################ @@ -78,13 +77,13 @@ save(so, file = paste(arc_root, out_folder, "kallisto_sleuthObject.RData", sep = expression_data <- kallisto_table(so) ## write to file -write.csv(expression_data, paste(arc_root, out_folder, "/kallisto_df.csv", sep = "/"), row.names = F) +write.csv(expression_data, paste(out_folder, "/kallisto_df.csv", sep = "/"), row.names = F) ## as tpm matrix (gene x sample) tpm_table <- pivot_wider(expression_data, id_cols = target_id, names_from = sample, values_from = tpm) ## write to file -write.csv(tpm_table, paste(arc_root, out_folder, "/kallisto_tpmMatrix.csv", sep = "/"), row.names = F) +write.csv(tpm_table, paste(out_folder, "/kallisto_tpmMatrix.csv", sep = "/"), row.names = F) ################################################ #### Summarize mapping stats @@ -99,4 +98,4 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) mapping_stats <- rbind(mapping_stats, z) } -write.csv(mapping_stats, paste(arc_root, out_folder, "/kallisto_mappingStats.csv", sep = "/"), row.names = F) +write.csv(mapping_stats, paste(out_folder, "/kallisto_mappingStats.csv", sep = "/"), row.names = F) diff --git a/workflows/kallisto_collect/workflow.cwl b/workflows/kallisto_collect/workflow.cwl index 8e3064c..73f215d 100644 --- a/workflows/kallisto_collect/workflow.cwl +++ b/workflows/kallisto_collect/workflow.cwl @@ -1,37 +1,39 @@ #!/usr/bin/env cwl-runner -cwlVersion: v1.2 +cwlVersion: v1.2.0-dev1 class: CommandLineTool - +hints: + DockerRequirement: + dockerPull: zimmera95/rnaseq:latest +requirements: + - class: InitialWorkDirRequirement + listing: + - class: File + location: kallisto_collect.R +arguments: + - position: 0 + valueFrom: kallisto_collect.R inputs: -- id: r_script - type: File - inputBinding: - position: 0 -- id: arc_root - type: string - inputBinding: - position: 1 - id: in_kallisto_results - type: string + type: Directory inputBinding: - position: 2 + position: 1 - id: in_metadata_file - type: string + type: File inputBinding: - position: 3 + position: 2 - id: in_metadata_sample type: string inputBinding: - position: 4 + position: 3 - id: in_metadata_factor type: string inputBinding: - position: 5 + position: 4 - id: out_folder type: string inputBinding: - position: 6 + position: 5 outputs: - id: outdir -- GitLab