From 9952aa5f47cea2170299b1cfc9c88d49021dfc31 Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Wed, 7 Aug 2024 13:42:24 +0200
Subject: [PATCH] add ebi trimmomatic workflows

---
 workflows/trimmomatic/README.md               |   5 +
 workflows/trimmomatic/trimmomatic-Dockerfile  |  20 ++
 workflows/trimmomatic/trimmomatic-docker.yml  |   4 +
 .../trimmomatic/trimmomatic-end_mode.yaml     |   3 +
 .../trimmomatic-illumina_clipping.yaml        |  41 +++
 .../trimmomatic/trimmomatic-max_info.yaml     |   7 +
 workflows/trimmomatic/trimmomatic-phred.yaml  |   3 +
 .../trimmomatic-sliding_window.yaml           |   7 +
 workflows/trimmomatic/trimmomatic.cwl         | 321 ++++++++++++++++++
 9 files changed, 411 insertions(+)
 create mode 100644 workflows/trimmomatic/README.md
 create mode 100644 workflows/trimmomatic/trimmomatic-Dockerfile
 create mode 100644 workflows/trimmomatic/trimmomatic-docker.yml
 create mode 100644 workflows/trimmomatic/trimmomatic-end_mode.yaml
 create mode 100755 workflows/trimmomatic/trimmomatic-illumina_clipping.yaml
 create mode 100755 workflows/trimmomatic/trimmomatic-max_info.yaml
 create mode 100644 workflows/trimmomatic/trimmomatic-phred.yaml
 create mode 100644 workflows/trimmomatic/trimmomatic-sliding_window.yaml
 create mode 100755 workflows/trimmomatic/trimmomatic.cwl

diff --git a/workflows/trimmomatic/README.md b/workflows/trimmomatic/README.md
new file mode 100644
index 0000000..87b73ce
--- /dev/null
+++ b/workflows/trimmomatic/README.md
@@ -0,0 +1,5 @@
+
+# Trimmomatic
+
+adapted from: https://github.com/EBI-Metagenomics/ebi-metagenomics-cwl/commit/7bb76f33bf40b5cd2604001cac46f967a209c47f
+
diff --git a/workflows/trimmomatic/trimmomatic-Dockerfile b/workflows/trimmomatic/trimmomatic-Dockerfile
new file mode 100644
index 0000000..9d08d88
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-Dockerfile
@@ -0,0 +1,20 @@
+#################################################################
+# Dockerfile
+#
+# Software:         trimmomatic
+# Software Version: 0.32+dfsg-1
+# Description:      DukeGCB trimmomatic image
+# Website:          http://www.usadellab.org/cms/?page=trimmomatic
+# Provides:         trimmomatic
+# Base Image:       dukegcb/trimmomatic
+# Build Cmd:        docker build --rm -t dukegcb/trimmomatic .
+# Pull Cmd:         docker pull dukegcb/trimmomatic
+# Run Cmd:          docker run --rm -it dukegcb/trimmomatic
+#################################################################
+
+FROM phusion/baseimage
+MAINTAINER Dan Leehr <dan.leehr@duke.edu>
+
+RUN apt-get update && apt-get install -y \
+  openjdk-7-jre-headless \
+  trimmomatic="0.32+dfsg-1"
diff --git a/workflows/trimmomatic/trimmomatic-docker.yml b/workflows/trimmomatic/trimmomatic-docker.yml
new file mode 100644
index 0000000..8a8df26
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-docker.yml
@@ -0,0 +1,4 @@
+class: DockerRequirement
+dockerPull: dukegcb/trimmomatic
+dockerFile: >
+  $import: trimmomatic-Dockerfile
diff --git a/workflows/trimmomatic/trimmomatic-end_mode.yaml b/workflows/trimmomatic/trimmomatic-end_mode.yaml
new file mode 100644
index 0000000..2c53a6f
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-end_mode.yaml
@@ -0,0 +1,3 @@
+type: enum
+name: end_mode
+symbols: [ SE, PE ]
diff --git a/workflows/trimmomatic/trimmomatic-illumina_clipping.yaml b/workflows/trimmomatic/trimmomatic-illumina_clipping.yaml
new file mode 100755
index 0000000..8a18874
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-illumina_clipping.yaml
@@ -0,0 +1,41 @@
+type: record
+name: illuminaClipping
+fields:
+  - name: adapters
+    type: File
+    doc: |
+      FASTA file containing adapters, PCR sequences, etc. It is used to search
+      for and remove these sequences in the input FASTQ file(s)
+  - name: seedMismatches
+    type: int
+    doc: |
+      specifies the maximum mismatch count which will still allow a full match
+      to be performed
+  - name: palindromeClipThreshold
+    type: int
+    doc: |
+      specifies how accurate the match between the two 'adapter ligated' reads
+      must be for PE palindrome read alignment.
+  - name: simpleClipThreshold
+    type: int
+    doc: |
+      specifies how accurate the match between any adapter etc. sequence must
+      be against a read
+  - name: minAdapterLength
+    type: int?
+    doc: |
+      In addition to the alignment score, palindrome mode can verify that a
+      minimum length of adapter has been detected. If unspecified, this
+      defaults to 8 bases, for historical reasons. However, since palindrome
+      mode has a very low false positive rate, this can be safely reduced, even
+      down to 1, to allow shorter adapter fragments to be removed.
+  - name: keepBothReads
+    type: boolean
+    doc: |
+      After read-though has been detected by palindrome mode, and the adapter
+      sequence removed, the reverse read contains the same sequence information
+      as the forward read, albeit in reverse complement. For this reason, the
+      default behaviour is to entirely drop the reverse read. By specifying
+      "true" for this parameter, the reverse read will also be retained, which
+      may be useful e.g. if the downstream tools cannot handle a combination of
+      paired and unpaired reads.  
diff --git a/workflows/trimmomatic/trimmomatic-max_info.yaml b/workflows/trimmomatic/trimmomatic-max_info.yaml
new file mode 100755
index 0000000..086b295
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-max_info.yaml
@@ -0,0 +1,7 @@
+type: record
+name: maxinfo
+fields:
+  - name: targetLength
+    type: int
+  - name: strictness
+    type: int
diff --git a/workflows/trimmomatic/trimmomatic-phred.yaml b/workflows/trimmomatic/trimmomatic-phred.yaml
new file mode 100644
index 0000000..6015346
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-phred.yaml
@@ -0,0 +1,3 @@
+type: enum
+name: phred
+symbols: [ '64', '33' ]
diff --git a/workflows/trimmomatic/trimmomatic-sliding_window.yaml b/workflows/trimmomatic/trimmomatic-sliding_window.yaml
new file mode 100644
index 0000000..66d3b8c
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic-sliding_window.yaml
@@ -0,0 +1,7 @@
+type: record
+name: slidingWindow
+fields:
+ - name: windowSize
+   type: int
+ - name: requiredQuality
+   type: int
diff --git a/workflows/trimmomatic/trimmomatic.cwl b/workflows/trimmomatic/trimmomatic.cwl
new file mode 100755
index 0000000..2aaf27e
--- /dev/null
+++ b/workflows/trimmomatic/trimmomatic.cwl
@@ -0,0 +1,321 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+
+hints:
+  SoftwareRequirement:
+    packages:
+      trimmomatic:
+        specs: [ "https://identifiers.org/rrid/RRID:SCR_011848" ]
+        version: [ "0.32", "0.35", "0.36" ]
+
+requirements:
+ ResourceRequirement:
+   ramMin: 10240
+   coresMin: 8
+ SchemaDefRequirement:
+   types:
+    - $import: trimmomatic-end_mode.yaml
+    - $import: trimmomatic-sliding_window.yaml
+    - $import: trimmomatic-phred.yaml
+    - $import: trimmomatic-illumina_clipping.yaml
+    - $import: trimmomatic-max_info.yaml
+ InlineJavascriptRequirement: {}
+ ShellCommandRequirement: {}
+
+# hints:
+#  - $import: trimmomatic-docker.yml
+
+inputs:
+  phred:
+    type: trimmomatic-phred.yaml#phred?
+    inputBinding:
+      prefix: -phred
+      separate: false
+      position: 4
+    doc: |
+      "33" or "64" specifies the base quality encoding. Default: 64
+
+  tophred64:
+    type: boolean?
+    inputBinding:
+      position: 12
+      prefix: TOPHRED64
+      separate: false
+    doc: This (re)encodes the quality part of the FASTQ file to base 64.
+
+  headcrop:
+    type: int?
+    inputBinding:
+      position: 13
+      prefix: 'HEADCROP:'
+      separate: false
+    doc: |
+      Removes the specified number of bases, regardless of quality, from the
+      beginning of the read.
+      The numbser specified is the number of bases to keep, from the start of
+      the read.
+
+  tophred33:
+    type: boolean?
+    inputBinding:
+      position: 12
+      prefix: TOPHRED33
+      separate: false
+    doc: This (re)encodes the quality part of the FASTQ file to base 33.
+
+  minlen:
+    type: int?
+    inputBinding:
+      position: 100
+      prefix: 'MINLEN:'
+      separate: false
+    doc: |
+      This module removes reads that fall below the specified minimal length.
+      If required, it should normally be after all other processing steps.
+      Reads removed by this step will be counted and included in the "dropped
+      reads" count presented in the trimmomatic summary.
+
+  java_opts:
+    type: string?
+    inputBinding:
+      position: 1
+      shellQuote: false
+    doc: |
+      JVM arguments should be a quoted, space separated list
+      (e.g. "-Xms128m -Xmx512m")
+
+  leading:
+    type: int?
+    inputBinding:
+      position: 14
+      prefix: 'LEADING:'
+      separate: false
+    doc: |
+      Remove low quality bases from the beginning. As long as a base has a
+      value below this threshold the base is removed and the next base will be
+      investigated.
+
+  slidingwindow:
+    type: trimmomatic-sliding_window.yaml#slidingWindow?
+    inputBinding:
+      position: 15
+      valueFrom: |
+        ${ if ( self ) {
+             return "SLIDINGWINDOW:" + self.windowSize + ":"
+               + self.requiredQuality;
+           } else {
+             return self;
+           }
+         }
+    doc: |
+      Perform a sliding window trimming, cutting once the average quality
+      within the window falls below a threshold. By considering multiple
+      bases, a single poor quality base will not cause the removal of high
+      quality data later in the read.
+      <windowSize> specifies the number of bases to average across
+      <requiredQuality> specifies the average quality required
+
+  illuminaClip:
+    type: trimmomatic-illumina_clipping.yaml#illuminaClipping?
+    inputBinding:
+      valueFrom: |
+        ${ if ( self ) {
+             return "ILLUMINACLIP:" + inputs.illuminaClip.adapters.path + ":"
+               + self.seedMismatches + ":" + self.palindromeClipThreshold + ":"
+               + self.simpleClipThreshold + ":" + self.minAdapterLength + ":"
+               + self.keepBothReads;
+           } else {
+             return self;
+           }
+         }
+      position: 11
+    doc: Cut adapter and other illumina-specific sequences from the read.
+
+  crop:
+    type: int?
+    inputBinding:
+      position: 13
+      prefix: 'CROP:'
+      separate: false
+    doc: |
+      Removes bases regardless of quality from the end of the read, so that the
+      read has maximally the specified length after this step has been
+      performed. Steps performed after CROP might of course further shorten the
+      read. The value is the number of bases to keep, from the start of the read.
+
+  reads2:
+    type: File?
+    format: edam:format_1930  # fastq
+    inputBinding:
+      position: 6
+    doc: FASTQ file of R2 reads in Paired End mode
+
+  reads1:
+    type: File
+    format: edam:format_1930  # fastq
+    inputBinding:
+      position: 5
+    doc: FASTQ file of reads (R1 reads in Paired End mode)
+
+  avgqual:
+    type: int?
+    inputBinding:
+      position: 101
+      prefix: 'AVGQUAL:'
+      separate: false
+    doc: |
+      Drop the read if the average quality is below the specified level
+
+  trailing:
+    type: int?
+    inputBinding:
+      position: 14
+      prefix: 'TRAILING:'
+      separate: false
+    doc: |
+      Remove low quality bases from the end. As long as a base has a value
+      below this threshold the base is removed and the next base (which as
+      trimmomatic is starting from the 3' prime end would be base preceding
+      the just removed base) will be investigated. This approach can be used
+      removing the special Illumina "low quality segment" regions (which are
+      marked with quality score of 2), but we recommend Sliding Window or
+      MaxInfo instead
+
+  maxinfo:
+    type: trimmomatic-max_info.yaml#maxinfo?
+    inputBinding:
+      position: 15
+      valueFrom: |
+        ${ if ( self ) {
+             return "MAXINFO:" + self.targetLength + ":" + self.strictness;
+           } else {
+             return self;
+           }
+         }
+    doc: |
+      Performs an adaptive quality trim, balancing the benefits of retaining
+      longer reads against the costs of retaining bases with errors.
+      <targetLength>: This specifies the read length which is likely to allow
+      the location of the read within the target sequence to be determined.
+      <strictness>: This value, which should be set between 0 and 1, specifies
+      the balance between preserving as much read length as possible vs.
+      removal of incorrect bases. A low value of this parameter (<0.2) favours
+      longer reads, while a high value (>0.8) favours read correctness.
+
+  end_mode:
+    type: trimmomatic-end_mode.yaml#end_mode
+    inputBinding:
+      position: 3
+    doc: |
+      Single End (SE) or Paired End (PE) mode
+
+outputs:
+  reads1_trimmed:
+    type: File
+    format: edam:format_1930  # fastq
+    outputBinding:
+      glob: $(inputs.reads1.nameroot).trimmed.fastq
+
+  output_log:
+    type: File
+    outputBinding:
+      glob: trim.log
+    label: Trimmomatic log
+    doc: |
+      log of all read trimmings, indicating the following details:
+        the read name
+        the surviving sequence length
+        the location of the first surviving base, aka. the amount trimmed from the start
+        the location of the last surviving base in the original read
+        the amount trimmed from the end
+
+  reads1_trimmed_unpaired:
+    type: File?
+    format: edam:format_1930  # fastq
+    outputBinding:
+      glob: $(inputs.reads1.nameroot).unpaired.trimmed.fastq
+
+  reads2_trimmed_paired:
+    type: File?
+    format: edam:format_1930  # fastq
+    outputBinding:
+      glob: |
+        ${ if (inputs.reads2 ) {
+             return inputs.reads2.nameroot + '.trimmed.fastq';
+           } else {
+             return null;
+           }
+         }
+
+  reads2_trimmed_unpaired:
+    type: File?
+    format: edam:format_1930  # fastq
+    outputBinding:
+      glob: |
+        ${ if (inputs.reads2 ) {
+             return inputs.reads2.nameroot + '.unpaired.trimmed.fastq';
+           } else {
+             return null;
+           }
+         }
+
+baseCommand: [ java, org.usadellab.trimmomatic.Trimmomatic ]
+
+arguments:
+- valueFrom: trim.log
+  prefix: -trimlog 
+  position: 4
+- valueFrom: $(runtime.cores)
+  position: 4
+  prefix: -threads
+- valueFrom: $(inputs.reads1.nameroot).trimmed.fastq
+  position: 7
+- valueFrom: |
+    ${
+      if (inputs.end_mode == "PE" && inputs.reads2) {
+        return inputs.reads1.nameroot + '.trimmed.unpaired.fastq';
+      } else {
+        return null;
+      }
+    }
+  position: 8
+- valueFrom: |
+    ${
+      if (inputs.end_mode == "PE" && inputs.reads2) {
+        return inputs.reads2.nameroot + '.trimmed.fastq';
+      } else {
+        return null;
+      }
+    }
+  position: 9
+- valueFrom: |
+    ${
+      if (inputs.end_mode == "PE" && inputs.reads2) {
+        return inputs.reads2.nameroot + '.trimmed.unpaired.fastq';
+      } else {
+        return null;
+      }
+    }
+  position: 10
+
+doc: |
+  Trimmomatic is a fast, multithreaded command line tool that can be used to trim and crop
+  Illumina (FASTQ) data as well as to remove adapters. These adapters can pose a real problem
+  depending on the library preparation and downstream application.
+  There are two major modes of the program: Paired end mode and Single end mode. The
+  paired end mode will maintain correspondence of read pairs and also use the additional
+  information contained in paired reads to better find adapter or PCR primer fragments
+  introduced by the library preparation process.
+  Trimmomatic works with FASTQ files (using phred + 33 or phred + 64 quality scores,
+  depending on the Illumina pipeline used).
+
+$namespaces:
+ edam: http://edamontology.org/
+ s: http://schema.org/
+$schemas:
+ - http://edamontology.org/EDAM_1.16.owl
+
+s:license: "https://www.apache.org/licenses/LICENSE-2.0"
+s:copyrightHolder: "EMBL - European Bioinformatics Institute"
-- 
GitLab