Skip to content
Snippets Groups Projects
Commit 9ad6fdd6 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

try fix kallisto collect

parent b336ca0f
No related branches found
No related tags found
1 merge request!12Restructure cwl
......@@ -13,46 +13,59 @@ library("jsonlite")
args <- commandArgs(trailingOnly = T)
in_kallisto_results <- args[1]
in_metadata_file <- args[2]
in_metadata_sample <- args[3]
in_metadata_factor <- args[4]
out_folder <- args[5]
inKallistoResults <- args[1]
inMetadataFile <- args[2]
inMetadataSample <- args[3]
inMetadataFactorList <- args[4]
outFolder <- args[5]
# inKallistoResults <- "../../runs/kallisto/kallisto_results"
# inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv"
# inMetadataSample <- "Input [Source Name]"
# inMetadataFactorList <- list("Factor [Photosynthesis mode]")
# inMetadataDataCol <- "Output [Data]"
# outFolder <- "."
################################################
#### If it does not exist, create out dir
################################################
dir.create(out_folder, recursive = T, showWarnings = F)
dir.create(outFolder, recursive = T, showWarnings = F)
################################################
#### Read ISA sample metadata
################################################
samples <- read.table(file = in_metadata_file, sep = "\t", header = TRUE)
samples <- read.csv(file = inMetadataFile, check.names = FALSE)
################################################
#### Read Kallisto results
################################################
base_dir <- in_kallisto_results
base_dir <- inKallistoResults
# A list of paths to the kallisto results indexed by the sample IDs is collated with
kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths
s2c <- samples[order(samples[[in_metadata_sample]]), c(in_metadata_sample, in_metadata_factor, "Data.File.Name")]
s2c <- samples[order(samples[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList), inMetadataDataCol)]
# For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample'
colnames(s2c)[1:2] <- c("sample", "condition")
s2c$out_name <- gsub(".fastq.+", "", s2c$Data.File.Name)
path_df <- data.frame(path = kal_dirs, out_name = gsub(".+/", "", kal_dirs))
s2c <- merge(s2c, path_df, by = "out_name")
colnames(s2c)[1] <- c("sample")
path_df <- data.frame(path = kal_dirs, sample = basename(kal_dirs))
s2c <- merge(s2c, path_df, by = "sample")
# Build a sleuth object
so <- sleuth_prep(s2c, full_model = ~condition, num_cores = 1)
save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/"))
factors <- sapply(inMetadataFactorList, function(x) x[[1]])
## Annoying workaround to prevent formula error with special chars in column headers
colnames(s2c) <- make.names(colnames(s2c))
factors <- make.names(factors)
design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + ")))
so <- sleuth_prep(s2c, full_model = design_formula)
save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData"))
################################################
#### Extract expression tables
......@@ -62,14 +75,13 @@ save(so, file = paste(out_folder, "kallisto_sleuthObject.RData", sep = "/"))
expression_data <- kallisto_table(so)
## write to file
write.csv(expression_data, paste(out_folder, "/kallisto_df.csv", sep = "/"), row.names = F)
write.csv(expression_data, paste(outFolder, "/kallisto_df.csv", sep = "/"), row.names = F)
## as tpm matrix (gene x sample)
# Replacing pivot_wider() with base R operations
tpm_table <- reshape(expression_data, idvar = "target_id", timevar = "sample", direction = "wide", v.names = "tpm")
# Write to file
write.csv(tpm_table, paste(out_folder, "/kallisto_tpmMatrix.csv", sep = "/"), row.names = F)
write.csv(tpm_table, file.path(outFolder, "kallisto_tpmMatrix.csv"), row.names = F)
################################################
#### Summarize mapping stats
......@@ -82,4 +94,4 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) {
mapping_stats <- rbind(mapping_stats, z)
}
write.csv(mapping_stats, paste(out_folder, "/kallisto_mappingStats.csv", sep = "/"), row.names = F)
write.csv(mapping_stats, file.path(outFolder, "kallisto_mappingStats.csv"), row.names = F)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment