Skip to content
Snippets Groups Projects
Commit 9ad6fdd6 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

try fix kallisto collect

parent b336ca0f
No related branches found
No related tags found
1 merge request!12Restructure cwl
...@@ -13,46 +13,59 @@ library("jsonlite") ...@@ -13,46 +13,59 @@ library("jsonlite")
args <- commandArgs(trailingOnly = T) args <- commandArgs(trailingOnly = T)
in_kallisto_results <- args[1] inKallistoResults <- args[1]
in_metadata_file <- args[2] inMetadataFile <- args[2]
in_metadata_sample <- args[3] inMetadataSample <- args[3]
in_metadata_factor <- args[4] inMetadataFactorList <- args[4]
out_folder <- args[5] outFolder <- args[5]
# inKallistoResults <- "../../runs/kallisto/kallisto_results"
# inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv"
# inMetadataSample <- "Input [Source Name]"
# inMetadataFactorList <- list("Factor [Photosynthesis mode]")
# inMetadataDataCol <- "Output [Data]"
# outFolder <- "."
################################################ ################################################
#### If it does not exist, create out dir #### If it does not exist, create out dir
################################################ ################################################
dir.create(out_folder, recursive = T, showWarnings = F) dir.create(outFolder, recursive = T, showWarnings = F)
################################################ ################################################
#### Read ISA sample metadata #### Read ISA sample metadata
################################################ ################################################
samples <- read.table(file = in_metadata_file, sep = "\t", header = TRUE) samples <- read.csv(file = inMetadataFile, check.names = FALSE)
################################################ ################################################
#### Read Kallisto results #### Read Kallisto results
################################################ ################################################
base_dir <- in_kallisto_results base_dir <- inKallistoResults
# A list of paths to the kallisto results indexed by the sample IDs is collated with # A list of paths to the kallisto results indexed by the sample IDs is collated with
kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths
s2c <- samples[order(samples[[in_metadata_sample]]), c(in_metadata_sample, in_metadata_factor, "Data.File.Name")] s2c <- samples[order(samples[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList), inMetadataDataCol)]
# For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample' # For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample'
colnames(s2c)[1:2] <- c("sample", "condition") colnames(s2c)[1] <- c("sample")
path_df <- data.frame(path = kal_dirs, sample = basename(kal_dirs))
s2c$out_name <- gsub(".fastq.+", "", s2c$Data.File.Name) s2c <- merge(s2c, path_df, by = "sample")
path_df <- data.frame(path = kal_dirs, out_name = gsub(".+/", "", kal_dirs))
s2c <- merge(s2c, path_df, by = "out_name")
# Build a sleuth object # Build a sleuth object
so <- sleuth_prep(s2c, full_model = ~condition, num_cores = 1)
save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/")) factors <- sapply(inMetadataFactorList, function(x) x[[1]])
## Annoying workaround to prevent formula error with special chars in column headers
colnames(s2c) <- make.names(colnames(s2c))
factors <- make.names(factors)
design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + ")))
so <- sleuth_prep(s2c, full_model = design_formula)
save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData"))
################################################ ################################################
#### Extract expression tables #### Extract expression tables
...@@ -62,14 +75,13 @@ save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/")) ...@@ -62,14 +75,13 @@ save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/"))
expression_data <- kallisto_table(so) expression_data <- kallisto_table(so)
## write to file ## write to file
write.csv(expression_data, paste(out_folder, "/kallisto_df.csv", sep = "/"), row.names = F) write.csv(expression_data, paste(outFolder, "/kallisto_df.csv", sep = "/"), row.names = F)
## as tpm matrix (gene x sample) ## as tpm matrix (gene x sample)
# Replacing pivot_wider() with base R operations
tpm_table <- reshape(expression_data, idvar = "target_id", timevar = "sample", direction = "wide", v.names = "tpm") tpm_table <- reshape(expression_data, idvar = "target_id", timevar = "sample", direction = "wide", v.names = "tpm")
# Write to file # Write to file
write.csv(tpm_table, paste(out_folder, "/kallisto_tpmMatrix.csv", sep = "/"), row.names = F) write.csv(tpm_table, file.path(outFolder, "kallisto_tpmMatrix.csv"), row.names = F)
################################################ ################################################
#### Summarize mapping stats #### Summarize mapping stats
...@@ -82,4 +94,4 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) { ...@@ -82,4 +94,4 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) {
mapping_stats <- rbind(mapping_stats, z) mapping_stats <- rbind(mapping_stats, z)
} }
write.csv(mapping_stats, paste(out_folder, "/kallisto_mappingStats.csv", sep = "/"), row.names = F) write.csv(mapping_stats, file.path(outFolder, "kallisto_mappingStats.csv"), row.names = F)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment