From 750e9e06b12f98dd7b5bfa11321e178d1a04900e Mon Sep 17 00:00:00 2001
From: Dominik <dominik.brilhaus@hhu.de>
Date: Thu, 24 Mar 2022 22:19:59 +0100
Subject: [PATCH] update readme

---
 README.md         | 127 +++++++++-------------------------------------
 README_arc_wip.md |  77 ++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 104 deletions(-)
 create mode 100644 README_arc_wip.md

diff --git a/README.md b/README.md
index 3fd845d..16d11e9 100644
--- a/README.md
+++ b/README.md
@@ -1,122 +1,41 @@
-# ARC mininmal Example RNASeq
+# ARC minimal Example RNASeq
 
 This is a minimal Example ARC packaging an mRNA-Seq dataset with metadata and computations.
 
 ## Data origin
 
-- Partly published under: <https://doi.org/10.1104/pp.15.01076>
 - see [./isa.investigation.xlsx](./isa.investigation.xlsx) for details.
+- Published under: <https://doi.org/10.1104/pp.15.01076>
 
-## Additional payload
-
-> The following folders are not part of the ARC  
-> for details, see: [ARC specs:Additional paylod](https://github.com/nfdi4plants/ARC-specification/blob/main/ARC%20specification.md#additional-payload)
-
-Directory | Purpose
----- | ----
-[_GEO_submission](./_GEO_submission) | Example metadata files as required for submission to GEO
-
-## Notes and ToDos
-
-### Experimental metadata in isa.assay.xlsx
-
-- split GEO SWATE templates into four sheets
-  - 1SPL01_plants
-  - 2EXT01_RNA
-  - 3ASY01_RNASeq
-  - 4COM01_RNASeq
-
-### Adding large raw data via git lfs
-
-1. Before adding the files to the ARC, track them via `git lfs`
-
-    ```bash
-    git lfs track "01_kallisto_index"    
-    ```
-
-1. Move / add the large data files to the respective folders
-
-1. Add them via `git add`
-
-    ```bash
-    git add runs/run1/01_kallisto_index    
-    ```
-
-1. Commit
-
-
-
-## Bumping to ARC v1.1 (23.03.2022)
-
-### by arcCommander / shell
-> depends on arcCommander v3 or higher
-
-```bash
-arc a list
-
-arc study unregister -s TalinumFacultativeCAM # unregister old study version
-arc study add -s TalinumFacultativeCAM # add fresh
-mv TalinumFacultativeCAM.study.xlsx studies/TalinumFacultativeCAM/old.study.xlsx # move old study metadata to new study
-
-arc study add -s TalinumGenomeDraft # add draft genome as study
-mv externals/Talinum.gm.CDS.nt.fa studies/TalinumGenomeDraft/resources # mv genome to resources
-rm -r externals
-rm inv.json
-
-arc assay register -s TalinumFacultativeCAM -a Talinum_RNASeq_minimal # re-register assay 
-arc update
-
-mv assays/Talinum_RNASeq_minimal/protocols/01_plant_material.md studies/TalinumFacultativeCAM/protocols # move plant growth from assay to study
-
-arc assay remove -s TalinumFacultativeCAM -a RNASeq_Kallisto_quant # add computational parts as assay 
-
-```
-
-> note: soft assay 
-
-### by hand
-- move plant growth sheet isa.assay to isa.study
-- remove computational RNASeq sheet from assays as it partly duplicates the (not yet CWLed) kallisto workflow
-
-### add placeholder sample descriptors
+## Setting up the arc structure using arcCommander
 
 ```bash
-arc_root=$(pwd)
-cd studies/TalinumFacultativeCAM/resources/
-touch \
-DB_097 \
-DB_099 \
-DB_103 \
-DB_161 \
-DB_163 \
-DB_165
-cd $arc_root
-```
+# initialize the structure
+arc init 
 
-## Make workflows a bit more representative and reproducible (24.03.2022)
+# set up investigation 
+arc i create --identifier samplearc_rnaseq 
 
-## generate a common arc_root pointer
+# add study folders to place input material and data
+arc study add -s TalinumFacultativeCAM 
+arc study add -s TalinumGenomeDraft
 
-```bash
-echo "~/03DataPLANT_gitlab/samplearc_rnaseq/" > workflows/_arc_local_wd
+# add assay folder to place assay data
+arc assay add -s TalinumFacultativeCAM -a Talinum_RNASeq_minimal
 ```
 
+## Describe experiments using SWATE
 
-# replace 
-# runs/ by runs/'$run_name'/
-
-
-# PKG_PATH="http://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r|cut -f '1 2' -d.)/All/"
-# export PKG_PATH
-# pkg_add pkgin
-
+File | Selected swate template
+---------|----------
+studies/TalinumFacultativeCAM/isa.study.xlsx | "Plant growth" v. 1.1.2
+assays/Talinum_RNASeq_minimal/isa.assay.xlsx | "RNA extraction" v. 1.1.6<br>"RNA-Seq Assay" v. 1.1.7
 
+## Additional payload
 
-# git clone https://github.com/pachterlab/kallisto.git
-# cd kallisto
-# mkdir build
-# cd build
-# cmake .. -DUSE_HDF5=ON
-
--- Build files have been written to: /Users/dominikbrilhaus/kallisto/build
+> The following folders are not part of the pure ARC  
+> for details, see: [ARC specs:Additional paylod](https://github.com/nfdi4plants/ARC-specification/blob/main/ARC%20specification.md#additional-payload)
 
+Directory | Purpose
+---- | ----
+[_GEO_submission](./_GEO_submission) | Example metadata files as required for submission to GEO
\ No newline at end of file
diff --git a/README_arc_wip.md b/README_arc_wip.md
new file mode 100644
index 0000000..884dfc1
--- /dev/null
+++ b/README_arc_wip.md
@@ -0,0 +1,77 @@
+
+## Notes and ToDos
+
+### Experimental metadata in isa.assay.xlsx
+
+- split GEO SWATE templates into four sheets
+  - 1SPL01_plants
+  - 2EXT01_RNA
+  - 3ASY01_RNASeq
+  - 4COM01_RNASeq
+
+### Adding large raw data via git lfs
+
+1. Before adding the files to the ARC, track them via `git lfs`
+
+    ```bash
+    git lfs track "01_kallisto_index"    
+    ```
+
+1. Move / add the large data files to the respective folders
+
+1. Add them via `git add`
+
+    ```bash
+    git add runs/run1/01_kallisto_index    
+    ```
+
+1. Commit
+
+
+
+## Bumping to ARC v1.1 (23.03.2022)
+
+### by arcCommander / shell
+> depends on arcCommander v3 or higher
+
+```bash
+arc a list
+
+arc study unregister -s TalinumFacultativeCAM # unregister old study version
+arc study add -s TalinumFacultativeCAM # add fresh
+mv TalinumFacultativeCAM.study.xlsx studies/TalinumFacultativeCAM/old.study.xlsx # move old study metadata to new study
+
+arc study add -s TalinumGenomeDraft # add draft genome as study
+mv externals/Talinum.gm.CDS.nt.fa studies/TalinumGenomeDraft/resources # mv genome to resources
+rm -r externals
+rm inv.json
+
+arc assay register -s TalinumFacultativeCAM -a Talinum_RNASeq_minimal # re-register assay 
+arc update
+
+mv assays/Talinum_RNASeq_minimal/protocols/01_plant_material.md studies/TalinumFacultativeCAM/protocols # move plant growth from assay to study
+
+arc assay remove -s TalinumFacultativeCAM -a RNASeq_Kallisto_quant # add computational parts as assay 
+
+```
+
+> note: soft assay 
+
+### by hand
+- move plant growth sheet isa.assay to isa.study
+- remove computational RNASeq sheet from assays as it partly duplicates the (not yet CWLed) kallisto workflow
+
+### add placeholder sample descriptors
+
+```bash
+arc_root=$(pwd)
+cd studies/TalinumFacultativeCAM/resources/
+touch \
+DB_097 \
+DB_099 \
+DB_103 \
+DB_161 \
+DB_163 \
+DB_165
+cd $arc_root
+```
\ No newline at end of file
-- 
GitLab