From d154be9e241dc380572ff42ba8c81d6544808efe Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Tue, 18 Mar 2025 15:56:27 +0100
Subject: [PATCH] add cwl docs

---
 .cwl/README.md           | 86 ++++++++++++++++++++++++++++++++++++++++
 .cwl/cwl-notes.md        | 39 ------------------
 .cwl/cwl-plots/README.md |  4 --
 runs/README.md           |  3 ++
 workflows/README.md      |  3 ++
 5 files changed, 92 insertions(+), 43 deletions(-)
 create mode 100644 .cwl/README.md
 delete mode 100644 .cwl/cwl-notes.md
 delete mode 100644 .cwl/cwl-plots/README.md
 create mode 100644 runs/README.md
 create mode 100644 workflows/README.md

diff --git a/.cwl/README.md b/.cwl/README.md
new file mode 100644
index 0000000..c871e53
--- /dev/null
+++ b/.cwl/README.md
@@ -0,0 +1,86 @@
+# CWL
+
+**Data analysis** in this ARC is packaged and made reusable via [Common Workflow Language (CWL)](https://www.commonwl.org).
+For details, visit the [DataPLANT knowledgebase](https://nfdi4plants.github.io/nfdi4plants.knowledgebase/cwl/).
+
+Briefly, every data analysis step (`runs`) is described with a `run.cwl` document. The `run.cwl` points (i.e. executes) one or multiple `workflows` (stored as `workflow.cwl`). The input parameters required for the `workflow.cwl` are documented in the accompanying `run.yml`. A `workflow.cwl` can be a single command line tool or a more complex workflow pipeline that references and combines other `*.cwl` documents.
+
+```bash
+...
+├── runs
+│   ├── fastqc
+│   │   ├── run.cwl
+│   │   └── run.yml
+│   ...
+├── studies
+│   ├── ...
+│
+└── workflows
+    ├── fastqc
+    │   ├── collectFilesInDir.cwl
+    │   ├── fastqc.cwl
+    │   └── workflow.cwl
+    ...
+```
+
+```mermaid
+%%{ init: {"flowchart": { "wrappingWidth": "10000" }}}%%
+
+flowchart TD
+
+workflowcwl --o runcwl
+
+subgraph r["runs/fastqc/"]
+    runcwl@{ shape: doc, label: "run.cwl" }
+    runyml@{ shape: doc, label: "run.yml" }
+end
+
+i[input: DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz] --o runyml
+r --> o[output: DB_097_CAMMD_CAGATC_L001_R1_001_fastqc.html]
+
+subgraph "workflows/fastqc"
+    workflowcwl@{ shape: doc, label: "workflow.cwl" }
+end
+```
+
+## Setup and dependencies
+
+Again, for details check the docs linked above.
+Executing cwl documents requires a cwl runner, e.g. [cwltool](https://github.com/common-workflow-language/cwltool).
+Software and package dependencies are ideally covered by Docker or Conda and described in the hints / requirements sections of cwl documents (e.g. `DockerRequirement` and / or `SoftwareRequirement`).
+
+Additional dependencies may exist for one or the other workflow (e.g. a local installation of R or F# or packages therein), if the workflow is not yet packaged perfectly reusable.
+
+## Default cwltool commands
+
+Here's a list of frequently used `cwltool` commands to validate or execute runs and workflows.
+
+### Validate document
+
+```bash
+cwltool --validate run.cwl
+```
+
+### Execute workflow in `./runs/*`
+
+```bash
+cwltool run.cwl run.yml
+```
+
+### capture log and run in bg
+
+```bash
+cwltool run.cwl run.yml > $(date +"%Y-%m-%d_%H-%M")-run.log 2>&1 &
+```
+
+### capture log, run in parallel and in bg
+
+```bash
+cwltool --parallel run.cwl run.yml > $(date +"%Y-%m-%d_%H-%M")-run.log 2>&1 &
+```
+
+### Print workflow to file
+
+```bash
+cwltool --print-dot run.cwl | dot -Tsvg > run.svg
+```
diff --git a/.cwl/cwl-notes.md b/.cwl/cwl-notes.md
deleted file mode 100644
index 6eb9975..0000000
--- a/.cwl/cwl-notes.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# CWL
-
-## Organisation
-
-### Runs
-
-Every run is described with a `run.cwl` workflow that points to (i.e. steps through) one or multiple workflows or tools.
-
-## Default cwltool commands
-
-### Validate document
-
-```bash
-cwltool --validate run.cwl
-```
-
-### Execute workflow in `./runs/*`
-
-```bash
-cwltool run.cwl run.yml
-```
-
-### capture log and run in bg
-
-```bash
-cwltool run.cwl run.yml > $(date +"%Y-%m-%d_%H-%M")-run.log 2>&1 &
-```
-
-### capture log, run in parallel and in bg
-
-```bash
-cwltool --parallel run.cwl run.yml > $(date +"%Y-%m-%d_%H-%M")-run.log 2>&1 &
-```
-
-### Print workflow to file
-
-```bash
-cwltool --print-dot run.cwl | dot -Tsvg > run.svg
-```
diff --git a/.cwl/cwl-plots/README.md b/.cwl/cwl-plots/README.md
deleted file mode 100644
index 5775769..0000000
--- a/.cwl/cwl-plots/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-
-```bash
-bash plot-cwls.sh "../../" "run.cwl" "runs-wfls.txt" "runs"
-```
\ No newline at end of file
diff --git a/runs/README.md b/runs/README.md
new file mode 100644
index 0000000..20bbc21
--- /dev/null
+++ b/runs/README.md
@@ -0,0 +1,3 @@
+# Runs
+
+See [.cwl/README.md](../.cwl/README.md) for more info.
diff --git a/workflows/README.md b/workflows/README.md
new file mode 100644
index 0000000..0f64034
--- /dev/null
+++ b/workflows/README.md
@@ -0,0 +1,3 @@
+# Workflows
+
+See [.cwl/README.md](../.cwl/README.md) for more info.
-- 
GitLab