From 750e9e06b12f98dd7b5bfa11321e178d1a04900e Mon Sep 17 00:00:00 2001 From: Dominik <dominik.brilhaus@hhu.de> Date: Thu, 24 Mar 2022 22:19:59 +0100 Subject: [PATCH] update readme --- README.md | 127 +++++++++------------------------------------- README_arc_wip.md | 77 ++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 104 deletions(-) create mode 100644 README_arc_wip.md diff --git a/README.md b/README.md index 3fd845d..16d11e9 100644 --- a/README.md +++ b/README.md @@ -1,122 +1,41 @@ -# ARC mininmal Example RNASeq +# ARC minimal Example RNASeq This is a minimal Example ARC packaging an mRNA-Seq dataset with metadata and computations. ## Data origin -- Partly published under: <https://doi.org/10.1104/pp.15.01076> - see [./isa.investigation.xlsx](./isa.investigation.xlsx) for details. +- Published under: <https://doi.org/10.1104/pp.15.01076> -## Additional payload - -> The following folders are not part of the ARC -> for details, see: [ARC specs:Additional paylod](https://github.com/nfdi4plants/ARC-specification/blob/main/ARC%20specification.md#additional-payload) - -Directory |Â Purpose ----- | ---- -[_GEO_submission](./_GEO_submission) | Example metadata files as required for submission to GEO - -## Notes and ToDos - -### Experimental metadata in isa.assay.xlsx - -- split GEO SWATE templates into four sheets - - 1SPL01_plants - - 2EXT01_RNA - - 3ASY01_RNASeq - - 4COM01_RNASeq - -### Adding large raw data via git lfs - -1. Before adding the files to the ARC, track them via `git lfs` - - ```bash - git lfs track "01_kallisto_index" - ``` - -1. Move / add the large data files to the respective folders - -1. Add them via `git add` - - ```bash - git add runs/run1/01_kallisto_index - ``` - -1. Commit - - - -## Bumping to ARC v1.1 (23.03.2022) - -### by arcCommander / shell -> depends on arcCommander v3 or higher - -```bash -arc a list - -arc study unregister -s TalinumFacultativeCAM # unregister old study version -arc study add -s TalinumFacultativeCAM # add fresh -mv TalinumFacultativeCAM.study.xlsx studies/TalinumFacultativeCAM/old.study.xlsx # move old study metadata to new study - -arc study add -s TalinumGenomeDraft # add draft genome as study -mv externals/Talinum.gm.CDS.nt.fa studies/TalinumGenomeDraft/resources # mv genome to resources -rm -r externals -rm inv.json - -arc assay register -s TalinumFacultativeCAM -a Talinum_RNASeq_minimal # re-register assay -arc update - -mv assays/Talinum_RNASeq_minimal/protocols/01_plant_material.md studies/TalinumFacultativeCAM/protocols # move plant growth from assay to study - -arc assay remove -s TalinumFacultativeCAM -a RNASeq_Kallisto_quant # add computational parts as assay - -``` - -> note: soft assay - -### by hand -- move plant growth sheet isa.assay to isa.study -- remove computational RNASeq sheet from assays as it partly duplicates the (not yet CWLed) kallisto workflow - -### add placeholder sample descriptors +## Setting up the arc structure using arcCommander ```bash -arc_root=$(pwd) -cd studies/TalinumFacultativeCAM/resources/ -touch \ -DB_097 \ -DB_099 \ -DB_103 \ -DB_161 \ -DB_163 \ -DB_165 -cd $arc_root -``` +# initialize the structure +arc init -## Make workflows a bit more representative and reproducible (24.03.2022) +# set up investigation +arc i create --identifier samplearc_rnaseq -## generate a common arc_root pointer +# add study folders to place input material and data +arc study add -s TalinumFacultativeCAM +arc study add -s TalinumGenomeDraft -```bash -echo "~/03DataPLANT_gitlab/samplearc_rnaseq/" > workflows/_arc_local_wd +# add assay folder to place assay data +arc assay add -s TalinumFacultativeCAM -a Talinum_RNASeq_minimal ``` +## Describe experiments using SWATE -# replace -# runs/ by runs/'$run_name'/ - - -# PKG_PATH="http://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r|cut -f '1 2' -d.)/All/" -# export PKG_PATH -# pkg_add pkgin - +File | Selected swate template +---------|---------- +studies/TalinumFacultativeCAM/isa.study.xlsx | "Plant growth" v. 1.1.2 +assays/Talinum_RNASeq_minimal/isa.assay.xlsx | "RNA extraction" v. 1.1.6<br>"RNA-Seq Assay" v. 1.1.7 +## Additional payload -# git clone https://github.com/pachterlab/kallisto.git -# cd kallisto -# mkdir build -# cd build -# cmake .. -DUSE_HDF5=ON - --- Build files have been written to: /Users/dominikbrilhaus/kallisto/build +> The following folders are not part of the pure ARC +> for details, see: [ARC specs:Additional paylod](https://github.com/nfdi4plants/ARC-specification/blob/main/ARC%20specification.md#additional-payload) +Directory |Â Purpose +---- | ---- +[_GEO_submission](./_GEO_submission) | Example metadata files as required for submission to GEO \ No newline at end of file diff --git a/README_arc_wip.md b/README_arc_wip.md new file mode 100644 index 0000000..884dfc1 --- /dev/null +++ b/README_arc_wip.md @@ -0,0 +1,77 @@ + +## Notes and ToDos + +### Experimental metadata in isa.assay.xlsx + +- split GEO SWATE templates into four sheets + - 1SPL01_plants + - 2EXT01_RNA + - 3ASY01_RNASeq + - 4COM01_RNASeq + +### Adding large raw data via git lfs + +1. Before adding the files to the ARC, track them via `git lfs` + + ```bash + git lfs track "01_kallisto_index" + ``` + +1. Move / add the large data files to the respective folders + +1. Add them via `git add` + + ```bash + git add runs/run1/01_kallisto_index + ``` + +1. Commit + + + +## Bumping to ARC v1.1 (23.03.2022) + +### by arcCommander / shell +> depends on arcCommander v3 or higher + +```bash +arc a list + +arc study unregister -s TalinumFacultativeCAM # unregister old study version +arc study add -s TalinumFacultativeCAM # add fresh +mv TalinumFacultativeCAM.study.xlsx studies/TalinumFacultativeCAM/old.study.xlsx # move old study metadata to new study + +arc study add -s TalinumGenomeDraft # add draft genome as study +mv externals/Talinum.gm.CDS.nt.fa studies/TalinumGenomeDraft/resources # mv genome to resources +rm -r externals +rm inv.json + +arc assay register -s TalinumFacultativeCAM -a Talinum_RNASeq_minimal # re-register assay +arc update + +mv assays/Talinum_RNASeq_minimal/protocols/01_plant_material.md studies/TalinumFacultativeCAM/protocols # move plant growth from assay to study + +arc assay remove -s TalinumFacultativeCAM -a RNASeq_Kallisto_quant # add computational parts as assay + +``` + +> note: soft assay + +### by hand +- move plant growth sheet isa.assay to isa.study +- remove computational RNASeq sheet from assays as it partly duplicates the (not yet CWLed) kallisto workflow + +### add placeholder sample descriptors + +```bash +arc_root=$(pwd) +cd studies/TalinumFacultativeCAM/resources/ +touch \ +DB_097 \ +DB_099 \ +DB_103 \ +DB_161 \ +DB_163 \ +DB_165 +cd $arc_root +``` \ No newline at end of file -- GitLab