diff --git a/runs/kallisto_collect/README.md b/runs/kallisto_collect/README.md index 574080e009fc120d02514c5dfa15b6b6c602f00b..b43b9ca2bef787c0e02f178356439e1378bc7723 100644 --- a/runs/kallisto_collect/README.md +++ b/runs/kallisto_collect/README.md @@ -8,14 +8,6 @@ cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_collec ## Let it flow ```bash -### store arc root (two levels up from here) as variable -arc_root=$(echo ${PWD%/*/*}) - -### replace arc root line in yml (specific to the machine from where this is run) -### not sure, if this works on linux... -sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_collect.yml - ### run with cwltool -cwltool ../../workflows/kallisto_collect.cwl kallisto_collect.yml - +cwltool --enable-dev run.cwl kallisto_collect.yml ``` diff --git a/runs/kallisto_collect/kallisto_collect.yml b/runs/kallisto_collect/kallisto_collect.yml index 1b300513c1481db8c9c0188d7dbd09f3acae4ad3..fa4ee11c61164f6e20956b206fe73a2ca1e9aaaf 100644 --- a/runs/kallisto_collect/kallisto_collect.yml +++ b/runs/kallisto_collect/kallisto_collect.yml @@ -1,10 +1,10 @@ cores: 1 -r_script: +in_kallisto_results: + class: Directory + path: ../kallisto_sleuth/out +in_metadata_file: class: File - path: ../../workflows/kallisto_collect.R -in_kallisto_results: "runs/no_CWL_yet/kallisto_sleuth/run1/01_kallisto_results" -in_metadata_file: "runs/merged_isa_metadata/merged_isa.tsv" + path: ../merged_isa_metadata/merged_isa.tsv in_metadata_sample: "Sample.Name.2" in_metadata_factor: "Factor..Photosynthesis.mode." -out_folder: runs/kallisto_collect -arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq \ No newline at end of file +out_folder: out \ No newline at end of file diff --git a/runs/kallisto_collect/run.cwl b/runs/kallisto_collect/run.cwl new file mode 100644 index 0000000000000000000000000000000000000000..3bd1e3722ae74e396574bd94ff033347d5711554 --- /dev/null +++ b/runs/kallisto_collect/run.cwl @@ -0,0 +1,30 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2.0-dev1 +class: Workflow +inputs: + in_kallisto_results: + type: Directory + in_metadata_file: + type: File + in_metadata_sample: + type: string + in_metadata_factor: + type: string + out_folder: + type: string +outputs: + out_dir: + type: + type: array + items: Directory + outputSource: kallisto_collect/outdir +steps: + kallisto_collect: + run: ../../workflows/kallisto_collect/workflow.cwl + in: + in_kallisto_results: in_kallisto_results + in_metadata_file: in_metadata_file + in_metadata_sample: in_metadata_sample + in_metadata_factor: in_metadata_factor + out_folder: out_folder + out: [outdir] diff --git a/runs/kallisto_sleuth/run.cwl b/runs/kallisto_sleuth/run.cwl index 9565052896a943443788e6d06556f8ed2631f05d..803541479d960daaf3984d6d9c9b0abe52f9845e 100644 --- a/runs/kallisto_sleuth/run.cwl +++ b/runs/kallisto_sleuth/run.cwl @@ -11,9 +11,9 @@ outputs: type: type: array items: Directory - outputSource: kallisto_quant/outdir + outputSource: kallisto_sleuth/outdir steps: - kallisto_quant: + kallisto_sleuth: run: ../../workflows/kallisto_sleuth/workflow.cwl in: in_sleuth: in_sleuth diff --git a/workflows/kallisto_collect/kallisto_collect.R b/workflows/kallisto_collect/kallisto_collect.R index 205be63e64cec3cd3f87497c6e6cd313551ee874..15e176cf8fcd9804a5778a7f2e8dad98eefdcfe9 100644 --- a/workflows/kallisto_collect/kallisto_collect.R +++ b/workflows/kallisto_collect/kallisto_collect.R @@ -1,4 +1,4 @@ -ö#!/usr/bin/env Rscript +#!/usr/bin/env Rscript ################################################ #### CWL-independent tests @@ -25,31 +25,30 @@ library(jsonlite) args <- commandArgs(trailingOnly = T) -arc_root <- args[1] -in_kallisto_results <- args[2] -in_metadata_file <- args[3] -in_metadata_sample <- args[4] -in_metadata_factor <- args[5] -out_folder <- args[6] +in_kallisto_results <- args[1] +in_metadata_file <- args[2] +in_metadata_sample <- args[3] +in_metadata_factor <- args[4] +out_folder <- args[5] ################################################ #### If it does not exist, create out dir ################################################ -dir.create(paste(arc_root, out_folder, sep = "/"), recursive = T, showWarnings = F) +dir.create(out_folder, recursive = T, showWarnings = F) ################################################ #### Read ISA sample metadata ################################################ -samples <- read.table(file = paste(arc_root, in_metadata_file, sep = "/"), sep = "\t") +samples <- read.table(file = in_metadata_file, sep = "\t") ################################################ #### Read Kallisto results ################################################ -base_dir <- paste(arc_root, in_kallisto_results, sep = "/") +base_dir <- in_kallisto_results # A list of paths to the kallisto results indexed by the sample IDs is collated with kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths @@ -67,7 +66,7 @@ s2c <- merge(s2c, path_df, by = "out_name") so <- sleuth_prep(s2c, full_model = ~condition, num_cores = 1) -save(so, file = paste(arc_root, out_folder, "kallisto_sleuthObject.RData", sep = "/")) +save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/")) ################################################ @@ -78,13 +77,13 @@ save(so, file = paste(arc_root, out_folder, "kallisto_sleuthObject.RData", sep = expression_data <- kallisto_table(so) ## write to file -write.csv(expression_data, paste(arc_root, out_folder, "/kallisto_df.csv", sep = "/"), row.names = F) +write.csv(expression_data, paste(out_folder, "/kallisto_df.csv", sep = "/"), row.names = F) ## as tpm matrix (gene x sample) tpm_table <- pivot_wider(expression_data, id_cols = target_id, names_from = sample, values_from = tpm) ## write to file -write.csv(tpm_table, paste(arc_root, out_folder, "/kallisto_tpmMatrix.csv", sep = "/"), row.names = F) +write.csv(tpm_table, paste(out_folder, "/kallisto_tpmMatrix.csv", sep = "/"), row.names = F) ################################################ #### Summarize mapping stats @@ -99,4 +98,4 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) mapping_stats <- rbind(mapping_stats, z) } -write.csv(mapping_stats, paste(arc_root, out_folder, "/kallisto_mappingStats.csv", sep = "/"), row.names = F) +write.csv(mapping_stats, paste(out_folder, "/kallisto_mappingStats.csv", sep = "/"), row.names = F) diff --git a/workflows/kallisto_collect/workflow.cwl b/workflows/kallisto_collect/workflow.cwl index 8e3064c8dae6adb671d4daa9bded1894915f32e4..73f215d56aa8eea65018435aece4f365d456510a 100644 --- a/workflows/kallisto_collect/workflow.cwl +++ b/workflows/kallisto_collect/workflow.cwl @@ -1,37 +1,39 @@ #!/usr/bin/env cwl-runner -cwlVersion: v1.2 +cwlVersion: v1.2.0-dev1 class: CommandLineTool - +hints: + DockerRequirement: + dockerPull: zimmera95/rnaseq:latest +requirements: + - class: InitialWorkDirRequirement + listing: + - class: File + location: kallisto_collect.R +arguments: + - position: 0 + valueFrom: kallisto_collect.R inputs: -- id: r_script - type: File - inputBinding: - position: 0 -- id: arc_root - type: string - inputBinding: - position: 1 - id: in_kallisto_results - type: string + type: Directory inputBinding: - position: 2 + position: 1 - id: in_metadata_file - type: string + type: File inputBinding: - position: 3 + position: 2 - id: in_metadata_sample type: string inputBinding: - position: 4 + position: 3 - id: in_metadata_factor type: string inputBinding: - position: 5 + position: 4 - id: out_folder type: string inputBinding: - position: 6 + position: 5 outputs: - id: outdir