From 52a5e1f6d44557a68bb9496ce512b99feac0a58a Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Fri, 21 Mar 2025 10:59:12 +0100
Subject: [PATCH 1/3] CWL metadata notes

---
 .cwl/cwl-metadata-checklist.md        | 80 +++++++++++++++++++++++++++
 workflows/fastqc/fastqc.cwl           |  6 +-
 workflows/kallisto/kallisto-index.cwl |  2 +-
 workflows/kallisto/kallisto-quant.cwl |  2 +-
 4 files changed, 84 insertions(+), 6 deletions(-)
 create mode 100644 .cwl/cwl-metadata-checklist.md

diff --git a/.cwl/cwl-metadata-checklist.md b/.cwl/cwl-metadata-checklist.md
new file mode 100644
index 0000000..e7b070f
--- /dev/null
+++ b/.cwl/cwl-metadata-checklist.md
@@ -0,0 +1,80 @@
+
+# Checklist for good CWL documents
+
+based on recommendations from:
+
+- https://www.commonwl.org/user_guide/topics/best-practices.html
+- https://www.commonwl.org/user_guide/topics/metadata-and-authorship.html 
+
+- [Design](#design)
+  - [Single-step first](#single-step-first)
+- [Dependencies](#dependencies)
+  - [Soft requirements = `hints`](#soft-requirements--hints)
+  - [Hard requirements = `requirements`](#hard-requirements--requirements)
+- [Metadata](#metadata)
+  - [Namespaces and schemas](#namespaces-and-schemas)
+
+## Design
+
+### Keep it Simple: Single-step
+
+- follow the [KISS principle](https://en.wikipedia.org/wiki/KISS_principle)
+- a `CommandLineTool` document should only execute one process
+- use `Workflow` documents to design more complex, multi-step pipelines
+- use `scatter` to execute the process on multiple inputs
+- do not hard code input, output paths
+
+## Dependencies
+
+### Soft requirements = `hints`
+
+Specify software and resource requirements under `hints`
+
+- add `SoftwareRequirement` to specify software version and reference
+  - `package: ` name of the software or package
+  - `specs: ` reference url from https://identifiers.org/biotools/ or SciCrunch https://identifiers.org/rrid/
+  - `version: [ "0.11.9" ]`
+- add `DockerRequirement`
+  - reference a local `Dockerfile` or a published Docker image
+- add `ResourceRequirement` to specify the required compute resources
+
+### Hard requirements = `requirements`
+
+Use the `requirements` primarily to specify hard requirements needed to run the current `CommandLineTool` or `Workflow` document
+
+## Metadata
+
+### Namespaces and schemas
+
+Adding namespaces and schemas allows to reuse them elsewhere in a CWL document
+
+```yaml
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+```
+
+### Attribute authors and contributors
+
+```yaml
+s:author:
+  - class: s:Person
+    s:identifier: <author ORCID>
+    s:email: mailto:<author email>
+    s:name: <author name>
+
+s:contributor:
+  - class: s:Person
+    s:identifier: <contributor ORCID>
+    s:email: mailto:<contributor email>
+    s:name: <contributor name>
+
+s:citation: <DOI to software paper>
+s:codeRepository: <URL to software repo (e.g. github)>
+s:dateCreated: "2016-12-13"
+s:license: <URL to license, e.g. from https://spdx.org/licenses/> 
+```
diff --git a/workflows/fastqc/fastqc.cwl b/workflows/fastqc/fastqc.cwl
index 1954ad0..4f96fb6 100644
--- a/workflows/fastqc/fastqc.cwl
+++ b/workflows/fastqc/fastqc.cwl
@@ -19,10 +19,10 @@ hints:
     dockerPull: quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1
   SoftwareRequirement:
     packages:
-      fastqc:
+      - package: fastqc
         specs: 
           - https://identifiers.org/biotools/fastqc
-          - - https://identifiers.org/rrid/RRID:SCR_014583
+          - https://identifiers.org/rrid/RRID:SCR_014583
         version: [ "0.11.9" ]
 
 baseCommand: "fastqc"
@@ -58,5 +58,3 @@ $schemas:
   - https://edamontology.org/EDAM_1.25.owl
 
 s:license: https://spdx.org/licenses/GPL-3.0-or-later
-
-
diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl
index 10f879d..595142c 100644
--- a/workflows/kallisto/kallisto-index.cwl
+++ b/workflows/kallisto/kallisto-index.cwl
@@ -33,7 +33,7 @@ hints:
     dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1
   SoftwareRequirement:
     packages:
-      kallisto:
+      - package: kallisto
         version: [ "0.51.1" ]
         specs:
           - https://identifiers.org/rrid/RRID:SCR_016582
diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl
index 1a8e6ea..e250c04 100755
--- a/workflows/kallisto/kallisto-quant.cwl
+++ b/workflows/kallisto/kallisto-quant.cwl
@@ -45,7 +45,7 @@ hints:
     dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1
   SoftwareRequirement:
     packages:
-      kallisto:
+      - package: kallisto
         version: [ "0.51.1" ]
         specs: [ https://identifiers.org/biotools/kallisto ]
 
-- 
GitLab


From be3024307de8401d3db5637b5caccae00cfd5552 Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Fri, 21 Mar 2025 11:02:23 +0100
Subject: [PATCH 2/3] add author to all run.cwl

---
 runs/deseq2/run.cwl                | 13 +++++++++++++
 runs/fastqc/run.cwl                | 15 ++++++++++++++-
 runs/isaSampleToRawDataSeq/run.cwl | 15 ++++++++++++++-
 runs/kallisto/run.cwl              | 13 +++++++++++++
 runs/shiny/run.cwl                 | 13 +++++++++++++
 runs/sleuth/run.cwl                | 14 ++++++++++++++
 6 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/runs/deseq2/run.cwl b/runs/deseq2/run.cwl
index 849cd98..e0fdbf4 100644
--- a/runs/deseq2/run.cwl
+++ b/runs/deseq2/run.cwl
@@ -22,3 +22,16 @@ outputs:
   output: 
     type: File[]
     outputSource: deseq2/output
+
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+
+s:author:
+  - class: s:Person
+    s:name: Dominik Brilhaus
+    s:identifier: https://orcid.org/0000-0001-9021-3197
\ No newline at end of file
diff --git a/runs/fastqc/run.cwl b/runs/fastqc/run.cwl
index b049a5f..fb4552b 100644
--- a/runs/fastqc/run.cwl
+++ b/runs/fastqc/run.cwl
@@ -21,4 +21,17 @@ steps:
 outputs:
   fastqc_outdir:
     type: Directory
-    outputSource: fastqc/fastqc_outdir
\ No newline at end of file
+    outputSource: fastqc/fastqc_outdir
+
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+
+s:author:
+  - class: s:Person
+    s:name: Dominik Brilhaus
+    s:identifier: https://orcid.org/0000-0001-9021-3197
\ No newline at end of file
diff --git a/runs/isaSampleToRawDataSeq/run.cwl b/runs/isaSampleToRawDataSeq/run.cwl
index aca29e7..803f531 100644
--- a/runs/isaSampleToRawDataSeq/run.cwl
+++ b/runs/isaSampleToRawDataSeq/run.cwl
@@ -25,4 +25,17 @@ outputs:
     outputSource: isaSampleToRawDataSeq/sampleseqCsv
   sampleseqXlsx:
     type: File
-    outputSource: isaSampleToRawDataSeq/sampleseqXlsx
\ No newline at end of file
+    outputSource: isaSampleToRawDataSeq/sampleseqXlsx
+
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+
+s:author:
+  - class: s:Person
+    s:name: Dominik Brilhaus
+    s:identifier: https://orcid.org/0000-0001-9021-3197
\ No newline at end of file
diff --git a/runs/kallisto/run.cwl b/runs/kallisto/run.cwl
index d702ed4..f06fae2 100644
--- a/runs/kallisto/run.cwl
+++ b/runs/kallisto/run.cwl
@@ -41,3 +41,16 @@ outputs:
   kallistoOutDir:
     type: Directory
     outputSource: kallisto/kallistoOutDir
+
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+
+s:author:
+  - class: s:Person
+    s:name: Dominik Brilhaus
+    s:identifier: https://orcid.org/0000-0001-9021-3197
\ No newline at end of file
diff --git a/runs/shiny/run.cwl b/runs/shiny/run.cwl
index 71435f9..532fa88 100644
--- a/runs/shiny/run.cwl
+++ b/runs/shiny/run.cwl
@@ -16,3 +16,16 @@ steps:
     out: []
 
 outputs: []
+
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+
+s:author:
+  - class: s:Person
+    s:name: Dominik Brilhaus
+    s:identifier: https://orcid.org/0000-0001-9021-3197
diff --git a/runs/sleuth/run.cwl b/runs/sleuth/run.cwl
index e00ce51..7884d8d 100644
--- a/runs/sleuth/run.cwl
+++ b/runs/sleuth/run.cwl
@@ -27,3 +27,17 @@ outputs:
  outdir:
     type: Directory[]
     outputSource: sleuth/outdir
+
+
+$namespaces:
+  s: https://schema.org/
+  edam: http://edamontology.org/
+
+$schemas:
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
+  - http://edamontology.org/EDAM_1.18.owl
+
+s:author:
+  - class: s:Person
+    s:name: Dominik Brilhaus
+    s:identifier: https://orcid.org/0000-0001-9021-3197
\ No newline at end of file
-- 
GitLab


From ed5710494ee579783346a29c0ca506f8259c19ce Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Fri, 21 Mar 2025 12:49:32 +0100
Subject: [PATCH 3/3] add citation and repo to kallisto

---
 workflows/kallisto/kallisto-index.cwl | 3 +++
 workflows/kallisto/kallisto-quant.cwl | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl
index 595142c..1a4b3c9 100644
--- a/workflows/kallisto/kallisto-index.cwl
+++ b/workflows/kallisto/kallisto-index.cwl
@@ -82,5 +82,8 @@ $namespaces:
   s: https://schema.org/
 $schemas:
   - https://edamontology.org/EDAM_1.25.owl
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
 
 s:license: https://spdx.org/licenses/BSD-2-Clause
+s:citation: https://dx.doi.org/10.1038/nbt.3519
+s:codeRepository: https://github.com/pachterlab/kallisto
diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl
index e250c04..93ff361 100755
--- a/workflows/kallisto/kallisto-quant.cwl
+++ b/workflows/kallisto/kallisto-quant.cwl
@@ -175,5 +175,8 @@ $namespaces:
   s: https://schema.org/
 $schemas:
   - https://edamontology.org/EDAM_1.25.owl
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
 
 s:license: https://spdx.org/licenses/BSD-2-Clause
+s:citation: https://dx.doi.org/10.1038/nbt.3519
+s:codeRepository: https://github.com/pachterlab/kallisto
-- 
GitLab