dfr <- read.table(
  file = "runs/kallisto_combined/mothertableV1.txt",
  sep = "\t", header = T
)

# let's add convenience columns with averages and fold changes
# R has a built in function 'rowMeans'
# all we need to do is pass what we want to take the means of
mock_tpm <- dfr[, grep("mock._tpm", colnames(dfr))] # '.' is a wildcard
dfr$mean_mock <- rowMeans(mock_tpm)
head(dfr) # check your work
# and again for 'treatment'
treatment_tpm <- dfr[, grep("treatment._tpm", colnames(dfr))]
dfr$mean_treatment <- rowMeans(treatment_tpm)
head(dfr)

# OK, what if we want to calculate something that isn't built into R?
# like log2 fold change?
# we can write our own function
log2FC <- function(denominator, numerator) {
  log2((numerator + 1) / (denominator + 1))
}  # +1 so we don't divide by 0!

# always sanity check your functions / code!
log2FC(40, 80) # test where the ~ answer is obvious
log2FC(dfr[1, "mean_mock"], dfr[1, "mean_treatment"]) # test on our data
# then we apply our function to our data
# in a way 'apply' is a distant cousin of 'fill' in Excel, syntax:
# apply(data, 1, some_function) # fill down
# apply(data, 2, some_function) # fill across
# apply(data, c(1, 2), some_function) # fill down and across
dfr$log2FC <- apply(
  dfr[, c("mean_mock", "mean_treatment")], 1,
  function(x) log2FC(x[1], x[2])
)
# and look at the result
head(dfr)

# we can store the data in biologist readable format by using write.table
write.table(dfr,
  file = "runs/kallisto_combined/mothertableV2.txt",
  row.names = F, sep = "\t", quote = F
)
# and we can store the data as an R object
save(dfr, file = "runs/kallisto_combined/mothertableV2.Rdata")

# we've accumulated some variables, it's clean up time
ls() # or look under the 'Environment' tab
# remove variables we won't need any more
remove(
  new_order, newfile, sample_name, first, second, files, mock_tpm,
  treatment_tpm, locus
)
ls()
