diff --git a/runs/isaSampleToRawDataSeq/README.md b/runs/isaSampleToRawDataSeq-run/README.md similarity index 76% rename from runs/isaSampleToRawDataSeq/README.md rename to runs/isaSampleToRawDataSeq-run/README.md index d8e1dcbbfe5360db3efbbe6c68c1233ef3ad7e1a..6c83a17dc1ab439d90ae08c2749a9047ac74ba3f 100644 --- a/runs/isaSampleToRawDataSeq/README.md +++ b/runs/isaSampleToRawDataSeq-run/README.md @@ -2,7 +2,7 @@ ```bash -cd runs/isaSampleToRawDataSeq +cd runs/isaSampleToRawDataSeq-run ``` ```bash diff --git a/runs/isaSampleToRawDataSeq/job.yml b/runs/isaSampleToRawDataSeq-run/job.yml similarity index 64% rename from runs/isaSampleToRawDataSeq/job.yml rename to runs/isaSampleToRawDataSeq-run/job.yml index 803a637a8ceb31f03237a2cda90c8752c638a140..375bb88317e61a873ad8b7cd9f2bf04f4ebfda4f 100644 --- a/runs/isaSampleToRawDataSeq/job.yml +++ b/runs/isaSampleToRawDataSeq-run/job.yml @@ -2,5 +2,5 @@ arcPath: class: Directory path: ../../ assayName: "Talinum_RNASeq_minimal" -outName: "rnaseq-samples" -startingNodeNum: 1 +outName: rnaseq-samples +startingNodeNum: 0 diff --git a/runs/isaSampleToRawDataSeq-run/rnaseq-samples.csv b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.csv new file mode 100644 index 0000000000000000000000000000000000000000..d9aed97c919b39486aa0d1ccea4319a46a00dd5e --- /dev/null +++ b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.csv @@ -0,0 +1,7 @@ +Input [Sample Name],Characteristic [organism],Term Source REF (OBI:0100026),Term Accession Number (OBI:0100026),Characteristic [organism part],Term Source REF (EFO:0000635),Term Accession Number (EFO:0000635),Characteristic [plant age],Term Source REF (DPBO:0000033),Term Accession Number (DPBO:0000033),Parameter [growth day length],Term Source REF (DPBO:0000041),Term Accession Number (DPBO:0000041),Parameter [light intensity exposure],Unit,Term Source REF (PECO:0007224),Term Accession Number (PECO:0007224),Parameter [humidity day],Unit ,Term Source REF (DPBO:0000005),Term Accession Number (DPBO:0000005),Parameter [temperature day],Unit ,Term Source REF (DPBO:0000007),Term Accession Number (DPBO:0000007),Parameter [temperature night],Unit ,Term Source REF (DPBO:0000008),Term Accession Number (DPBO:0000008),Factor [watering exposure],Term Source REF (PECO:0007383),Term Accession Number (PECO:0007383),Factor [Timepoint],Term Source REF (NCIT:C68568),Term Accession Number (NCIT:C68568),Factor [timepoint-ZT],Term Source REF (),Term Accession Number (),Factor [Photosynthesis mode],Term Source REF () ,Term Accession Number () ,Parameter [biosource amount],Unit ,Term Source REF (DPBO:0000013),Term Accession Number (DPBO:0000013),Parameter [extraction method],Term Source REF (DPBO:0000054),Term Accession Number (DPBO:0000054),Parameter [extraction buffer],Term Source REF (DPBO:0000050),Term Accession Number (DPBO:0000050),Parameter [extraction buffer volume],Unit ,Term Source REF (DPBO:0000051),Term Accession Number (DPBO:0000051),Parameter [RNA quality check],Term Source REF (DPBO:0000062),Term Accession Number (DPBO:0000062),Parameter [library strategy],Term Source REF (DPBO:0000035),Term Accession Number (DPBO:0000035),Parameter [library selection],Term Source REF (DPBO:0000036),Term Accession Number (DPBO:0000036),Parameter [library layout],Term Source REF (DPBO:0000015),Term Accession Number (DPBO:0000015),Parameter [library preparation kit],Term Source REF (GENEPIO:0000085),Term Accession Number (GENEPIO:0000085),Parameter [library preparation kit version],Term Source REF (GENEPIO:0000149),Term Accession Number (GENEPIO:0000149),Parameter [adapter sequence],Term Source REF (GENEPIO:0000083),Term Accession Number (GENEPIO:0000083),Parameter [next generation sequencing instrument model],Term Source REF (DPBO:0000040),Term Accession Number (DPBO:0000040),Parameter [base-calling software],Term Source REF (DPBO:0000017),Term Accession Number (DPBO:0000017),Parameter [base-calling software version],Term Source REF (DPBO:0000018),Term Accession Number (DPBO:0000018),Parameter [Raw data file format],Term Source REF (DPBO:0000021),Term Accession Number (DPBO:0000021),Output [Data] +CAM_01,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,80,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CAGATC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz +CAM_02,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.7 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CTTGTA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz +CAM_03,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,93,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 6.5 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,AGTCAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz +reC3_01,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,82,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.8 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTCCGC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz +reC3_02,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,96,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz +reC3_03,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz \ No newline at end of file diff --git a/runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..759961513f5d109aacc26dcc233d07e5c0e1325f Binary files /dev/null and b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx differ diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index 565142189e96783199acbd5fa1df5573a2f21601..88ec8ee0f98fcd29d3bffd4ee3772dfacbc2f4d5 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -6,13 +6,14 @@ library("DESeq2") library("tximport") library("rhdf5") library("ggplot2") +library("readxl") ## In-and-out -# inKallistoResults <- "../../runs/kallisto/kallisto_results" -# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" -# inMetadataSample <- "Source.Name" -# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.") +inKallistoResults <- "../../runs/kallisto/kallisto_results" +inMetadataFile <- "../../runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx" +inMetadataSample <- "Input [Source Name]" +inMetadataFactorList <- list("Factor [Photosynthesis mode]") ### Read arguments from CLI @@ -34,13 +35,18 @@ head(txi$counts) ## Read sample metadata -samples_metadata <- read.table(file = inMetadataFile, sep = "\t") +samples_metadata <- as.data.frame(read_xlsx(path = inMetadataFile)) samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))] + rownames(samples) <- samples[,inMetadataSample] factors <- sapply(inMetadataFactorList, function(x) x[[1]]) -design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) +## Annoying workaround to prevent formula error with special chars in column headers +colnames(samples) <- make.names(colnames(samples)) +factors <- make.names(factors) + +design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) ## DESeq diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl index a00b3e8187346d19521718fed02c0fd50c10d0ce..e743f327893969785482e36b2ba868dde93de35a 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl @@ -2,7 +2,7 @@ cwlVersion: v1.2 class: CommandLineTool hints: DockerRequirement: - dockerPull: mcr.microsoft.com/dotnet/sdk:6.0 + dockerPull: mcr.microsoft.com/dotnet/sdk:8.0 requirements: - class: InitialWorkDirRequirement listing: @@ -25,12 +25,12 @@ inputs: type: string inputBinding: position: 2 - outName: - type: string - inputBinding: - position: 3 startingNodeNum: type: int + inputBinding: + position: 3 + outName: + type: string inputBinding: position: 4 @@ -39,5 +39,5 @@ outputs: type: File[] outputBinding: glob: - - "*.tsv" + - "*.csv" - "*.xlsx" diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx index 20ba577698d0671ca2a8ea97b67b6c4faa968915..5ac29ad4f621be4d6914973b674c266ef4037851 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -4,22 +4,22 @@ #r "nuget: ARCtrl.NET" #r "nuget: ARCtrl.QueryModel" +#r "nuget: FsSpreadsheet.CsvIO" -open System.IO open ARCtrl.NET open ARCtrl open ARCtrl.QueryModel open ARCtrl.Helper -open FsSpreadsheet open FsSpreadsheet.Net +open FsSpreadsheet.CsvIO // input parameters -// let args : string array = fsi.CommandLineArgs |> Array.tail -// let arcPath = args.[0] -// let assayName = args.[1] -// let startingNodeNum = args.[2] |> int -// let outName = args.[3] +let args : string array = fsi.CommandLineArgs |> Array.tail +let arcPath = args.[0] +let assayName = args.[1] +let startingNodeNum = args.[2] |> int +let outName = args.[3] type ArcTables with @@ -31,16 +31,16 @@ type ArcTables with ) |> ArcTables -// test parameters -let source = __SOURCE_DIRECTORY__ -let arcPath = Path.Combine(source, "../../") -let assayName = "Talinum_RNASeq_minimal" -let startingNodeNum = 0 -let outName = "rnaseq-samples" +// // test parameters +// let source = __SOURCE_DIRECTORY__ +// let arcPath = Path.Combine(source, "../../") +// let assayName = "Talinum_RNASeq_minimal" +// let startingNodeNum = 0 +// let outName = "rnaseq-samples" // Load ARC -// Remove all tables with either an input or output column missing 🤣😀 +// Remove all tables with either an input or output column missing let clean (a : ARC) = a.ISA.Value.Assays |> Seq.iter (fun a -> a.Tables @@ -64,7 +64,7 @@ let clean (a : ARC) = ) a -// transform all data cells to freetext cells 😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀 +// transform all data cells to freetext cells let shittify (a : ARC) = a.ISA.Value.Assays |> Seq.iter (fun a -> a.Tables @@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook() wb.AddWorksheet ws -// Write to csv +// Write to xlsx +wb.ToXlsxFile ($"{outName}.xlsx") -wb.ToXlsxFile (outName + ".xlsx") \ No newline at end of file +// Write to csv +wb.ToCsvFile ($"{outName}.csv") \ No newline at end of file