diff --git a/.cwl/cwl-metadata-checklist.md b/.cwl/cwl-metadata-checklist.md new file mode 100644 index 0000000000000000000000000000000000000000..e7b070fda32343db9a484e0c39b84c3c0e484f58 --- /dev/null +++ b/.cwl/cwl-metadata-checklist.md @@ -0,0 +1,80 @@ + +# Checklist for good CWL documents + +based on recommendations from: + +- https://www.commonwl.org/user_guide/topics/best-practices.html +- https://www.commonwl.org/user_guide/topics/metadata-and-authorship.html + +- [Design](#design) + - [Single-step first](#single-step-first) +- [Dependencies](#dependencies) + - [Soft requirements = `hints`](#soft-requirements--hints) + - [Hard requirements = `requirements`](#hard-requirements--requirements) +- [Metadata](#metadata) + - [Namespaces and schemas](#namespaces-and-schemas) + +## Design + +### Keep it Simple: Single-step + +- follow the [KISS principle](https://en.wikipedia.org/wiki/KISS_principle) +- a `CommandLineTool` document should only execute one process +- use `Workflow` documents to design more complex, multi-step pipelines +- use `scatter` to execute the process on multiple inputs +- do not hard code input, output paths + +## Dependencies + +### Soft requirements = `hints` + +Specify software and resource requirements under `hints` + +- add `SoftwareRequirement` to specify software version and reference + - `package: ` name of the software or package + - `specs: ` reference url from https://identifiers.org/biotools/ or SciCrunch https://identifiers.org/rrid/ + - `version: [ "0.11.9" ]` +- add `DockerRequirement` + - reference a local `Dockerfile` or a published Docker image +- add `ResourceRequirement` to specify the required compute resources + +### Hard requirements = `requirements` + +Use the `requirements` primarily to specify hard requirements needed to run the current `CommandLineTool` or `Workflow` document + +## Metadata + +### Namespaces and schemas + +Adding namespaces and schemas allows to reuse them elsewhere in a CWL document + +```yaml +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl +``` + +### Attribute authors and contributors + +```yaml +s:author: + - class: s:Person + s:identifier: <author ORCID> + s:email: mailto:<author email> + s:name: <author name> + +s:contributor: + - class: s:Person + s:identifier: <contributor ORCID> + s:email: mailto:<contributor email> + s:name: <contributor name> + +s:citation: <DOI to software paper> +s:codeRepository: <URL to software repo (e.g. github)> +s:dateCreated: "2016-12-13" +s:license: <URL to license, e.g. from https://spdx.org/licenses/> +``` diff --git a/runs/deseq2/run.cwl b/runs/deseq2/run.cwl index 849cd989221e74fb6874efe6cc412166927a1847..e0fdbf42ee4d373eae50a4e3ccf8fc91ac7229ec 100644 --- a/runs/deseq2/run.cwl +++ b/runs/deseq2/run.cwl @@ -22,3 +22,16 @@ outputs: output: type: File[] outputSource: deseq2/output + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/runs/fastqc/run.cwl b/runs/fastqc/run.cwl index b049a5f92ccca308657a8791af4c5d52f3871da3..fb4552b550fe0896cb4d170b5b3e17a6b286d5e4 100644 --- a/runs/fastqc/run.cwl +++ b/runs/fastqc/run.cwl @@ -21,4 +21,17 @@ steps: outputs: fastqc_outdir: type: Directory - outputSource: fastqc/fastqc_outdir \ No newline at end of file + outputSource: fastqc/fastqc_outdir + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/runs/isaSampleToRawDataSeq/run.cwl b/runs/isaSampleToRawDataSeq/run.cwl index aca29e7543721a02e4a3a7eee0650b1167615a0f..803f5317df681038d9edd21271dd72e34ed82b32 100644 --- a/runs/isaSampleToRawDataSeq/run.cwl +++ b/runs/isaSampleToRawDataSeq/run.cwl @@ -25,4 +25,17 @@ outputs: outputSource: isaSampleToRawDataSeq/sampleseqCsv sampleseqXlsx: type: File - outputSource: isaSampleToRawDataSeq/sampleseqXlsx \ No newline at end of file + outputSource: isaSampleToRawDataSeq/sampleseqXlsx + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/runs/kallisto/run.cwl b/runs/kallisto/run.cwl index d702ed4426bff5a1890b4f9f0ce0de6acb1743c8..f06fae261e1b544c9e69ca475b8100a15f9f0ec9 100644 --- a/runs/kallisto/run.cwl +++ b/runs/kallisto/run.cwl @@ -41,3 +41,16 @@ outputs: kallistoOutDir: type: Directory outputSource: kallisto/kallistoOutDir + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/runs/shiny/run.cwl b/runs/shiny/run.cwl index 71435f9c300a86f8781e80fa0e7b6b927e8c9da0..532fa889f33f0683fd4dcc4be9bc140ba815571a 100644 --- a/runs/shiny/run.cwl +++ b/runs/shiny/run.cwl @@ -16,3 +16,16 @@ steps: out: [] outputs: [] + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/runs/sleuth/run.cwl b/runs/sleuth/run.cwl index e00ce51a124f5266ae6f383ba5d21ba4383ffc57..7884d8dcbd318257bfaf4de6ef0a93b603875cbb 100644 --- a/runs/sleuth/run.cwl +++ b/runs/sleuth/run.cwl @@ -27,3 +27,17 @@ outputs: outdir: type: Directory[] outputSource: sleuth/outdir + + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/workflows/fastqc/fastqc.cwl b/workflows/fastqc/fastqc.cwl index 1954ad092600ec32cd7074488ff8d1111fe142de..4f96fb6ee286f9c022671e00dbdaa04a686f98dc 100644 --- a/workflows/fastqc/fastqc.cwl +++ b/workflows/fastqc/fastqc.cwl @@ -19,10 +19,10 @@ hints: dockerPull: quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1 SoftwareRequirement: packages: - fastqc: + - package: fastqc specs: - https://identifiers.org/biotools/fastqc - - - https://identifiers.org/rrid/RRID:SCR_014583 + - https://identifiers.org/rrid/RRID:SCR_014583 version: [ "0.11.9" ] baseCommand: "fastqc" @@ -58,5 +58,3 @@ $schemas: - https://edamontology.org/EDAM_1.25.owl s:license: https://spdx.org/licenses/GPL-3.0-or-later - - diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index 10f879d61317f9d5f3dfc52f7bef054b9bb3c250..1a4b3c96e3a9b20f201c687f148ab94513f8792c 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -33,7 +33,7 @@ hints: dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1 SoftwareRequirement: packages: - kallisto: + - package: kallisto version: [ "0.51.1" ] specs: - https://identifiers.org/rrid/RRID:SCR_016582 @@ -82,5 +82,8 @@ $namespaces: s: https://schema.org/ $schemas: - https://edamontology.org/EDAM_1.25.owl + - https://schema.org/version/latest/schemaorg-current-https.rdf s:license: https://spdx.org/licenses/BSD-2-Clause +s:citation: https://dx.doi.org/10.1038/nbt.3519 +s:codeRepository: https://github.com/pachterlab/kallisto diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl index 1a8e6eaa87bbcc8a921d40416092d3768ad5ced5..93ff361284fa411c945fcbb5ddf4c498770160f8 100755 --- a/workflows/kallisto/kallisto-quant.cwl +++ b/workflows/kallisto/kallisto-quant.cwl @@ -45,7 +45,7 @@ hints: dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1 SoftwareRequirement: packages: - kallisto: + - package: kallisto version: [ "0.51.1" ] specs: [ https://identifiers.org/biotools/kallisto ] @@ -175,5 +175,8 @@ $namespaces: s: https://schema.org/ $schemas: - https://edamontology.org/EDAM_1.25.owl + - https://schema.org/version/latest/schemaorg-current-https.rdf s:license: https://spdx.org/licenses/BSD-2-Clause +s:citation: https://dx.doi.org/10.1038/nbt.3519 +s:codeRepository: https://github.com/pachterlab/kallisto