aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/pilot.sh147
1 files changed, 147 insertions, 0 deletions
diff --git a/src/pilot.sh b/src/pilot.sh
new file mode 100644
index 0000000..1256739
--- /dev/null
+++ b/src/pilot.sh
@@ -0,0 +1,147 @@
+#!/bin/sh -eu
+
+usage() {
+ echo "Usage: ${PROGRAM} [-r|--restart] [-h|--help] [-c|--container <singularity container>] [--scratch <scratch dir>] [--outdir <output dir>] [--batch_system <batch system>] (-f <input file>) (-p <pipeline>) <Workflow> <input dir>"
+ exit 0
+}
+
+error() {
+ echo "Error: $@" >&2
+ exit 1
+}
+
+set_container() {
+ [ -f "${1}" ] || error "Container ${1} does not exist."
+ CWL_SINGULARITY_CACHE="${CWL_SINGULARITY_CACHE:-$1%/*}"
+ if [ ! "${1##*/}" == "vlbi-cwl.sif" ]; then
+ ln -sf "${1}" "$CWL_SINGULARITY_CACHE/vlbi-cwl.sif"
+ ln -sf "${1}" "$CWL_SINGULARITY_CACHE/vlbi-cwl_latest.sif"
+ fi
+}
+
+opts=$(getopt -o rhf:c:p: --long restart,help,container:,scratch:,outdir:,batch_system: \
+ -n 'pilot' -- "$@")
+
+eval set -- "$opts"
+
+RESTART=""
+SCRATCH=""
+BATCH_SYSTEM="slurm"
+PROGRAM="${0##*/}"
+while true; do
+ case "$1" in
+ -c | --container) set_container "${2}"; shift 2 ;;
+ -f ) INPUT_FILE="${2}"; shift 2 ;;
+ -h | --help ) usage ;;
+ -p ) PIPELINE="${2}"; shift 2 ;;
+ -r | --restart) RESTART="--restart"; shift ;;
+ --scratch) SCRATCH="${2}"; shift 2 ;;
+ --outdir) OUTDIR="${2}"; shift 2;;
+ --batch_system) BATCH_SYSTEM="${2}"; shift 2;;
+ * ) shift; break ;;
+ esac
+done
+
+
+# TODO: clean this up
+WORKFLOW="${PIPELINE}/workflows/${1}.cwl"
+WORKFLOW_NAME=$(basename ${WORKFLOW%.cwl})
+[ -f "${WORKFLOW}" ] || error "$(realpath ${WORKFLOW}) is invalid."
+INPUT_DIR="${2}"
+[ -d "${INPUT_DIR}" ] || error "Input directory does not exist."
+
+[ -n "${INPUT_FILE}" ] || error "Missing input file."
+[ -f "${INPUT_FILE}" ] || error "Invalid input file."
+[ ! -z "${CWL_SINGULARITY_CACHE}" ] || error "\$CWL_SINGULARITY_CACHE is not set or no container has been specified."
+
+PIPELINE_LOG="${HOME}/${WORKFLOW_NAME}.log"
+
+TMP_OUTDIR="${OUTDIR:-$INPUT_DIR}/toil/tmp/tmp/"
+JOB_LOG_DIR="${OUTDIR:-$INPUT_DIR}/toil/logs/"
+BATCH_LOG_DIR="${TOIL_BATCH_LOGS_DIR:-${OUTDIR}/toil/logs}"
+WORK_DIR="${OUTDIR:-$INPUT_DIR}/toil/work/"
+
+OUTPUT_DIR="${OUTDIR:-$INPUT_DIR}/${WORKFLOW_NAME}_results"
+JOBSTORE_DIR="${OUTDIR:-$INPUT_DIR}/toil/${WORKFLOW_NAME}_job/"
+STATS_DIR="${OUTDIR:-$OUTPUT_DIR}/stats"
+
+mkdir -p "$JOB_LOG_DIR"
+mkdir -p "$BATCH_LOG_DIR"
+mkdir -p "$WORK_DIR"
+mkdir -p "$OUTPUT_DIR"
+mkdir -p "$STATS_DIR"
+mkdir -p "$TMP_OUTDIR"
+
+TMPDIR_PREFIX=""
+if [ ! -z "${SCRATCH}" ]; then
+ TMPDIR_PREFIX="--tmpdir-prefix ${SCRATCH}/tmp_${WORKFLOW_NAME}/"
+ mkdir -p "${SCRATCH}/tmp_${WORKFLOW_NAME}"
+fi
+
+# Print information relevant for the run
+cat << EOF
+The following will be used in the run:
+
+Pipeline ┃ ${WORKFLOW}
+Input file ┃ ${INPUT_FILE}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━
+Output directory ┃ ${OUTPUT_DIR}
+Log directory ┃ ${JOB_LOG_DIR}
+Jobstore directory ┃ ${JOBSTORE_DIR}
+Intermediate output directory ┃ ${TMP_OUTDIR}
+Pipeline statistics directory ┃ ${STATS_DIR}
+EOF
+
+TOIL_COMMAND="toil-cwl-runner ${RESTART} ${TMPDIR_PREFIX} \
+ --singularity \
+ --clean never \
+ --retryCount 0 \
+ --disableCaching \
+ --logFile ${PIPELINE_LOG} \
+ --writeLogs ${JOB_LOG_DIR} \
+ --stats \
+ --clusterStats ${STATS_DIR} \
+ --batchSystem ${BATCH_SYSTEM} \
+ --batchLogsDir ${BATCH_LOG_DIR} \
+ --tmp-outdir-prefix ${TMP_OUTDIR} \
+ --workDir ${WORK_DIR} \
+ --outdir ${OUTPUT_DIR} \
+ --jobStore ${JOBSTORE_DIR} \
+ --bypass-file-store \
+ ${WORKFLOW} \
+ ${INPUT_FILE}"
+
+export TOIL_SLURM_ARGS="${TOIL_SLURM_ARGS:-"-p cosma5 -A durham -t 72:00:00"}"
+# Note the meaning of these SLURM options:
+# -N # number of nodes
+# -c # number of cores; available memory is tied to this if not specified separately
+# -p # partition (queue);
+# -A # project
+# -t # runtime in d-hh:mm:ss format
+
+export APPTAINERENV_PREPEND_PATH=${APPTAINERENV_PREPEND_PATH:-"$PIPELINE/scripts"}
+export APPTAINERENV_PYTHONPATH=${APPTAINERENV_PYTHONPATH:-"$PIPELINE/scripts:\$PYTHONPATH"}
+export APPTAINER_BIND=${APPTAINER_BIND:-"$HOME,$INPUT_DIR,$OUTPUT_DIR"}
+
+echo -e "env APPTAINERENV_PREPEND_PATH="$APPTAINERENV_PREPEND_PATH" \
+ APPTAINERENV_PYTHONPATH="$APPTAINERENV_PYTHONPATH" \
+ APPTAINER_BIND="$APPTAINER_BIND" \
+ TOIL_SLURM_ARGS="$TOIL_SLURM_ARGS" \
+ ${TOIL_COMMAND}"
+
+env APPTAINERENV_PREPEND_PATH="$APPTAINERENV_PREPEND_PATH" \
+ APPTAINERENV_PYTHONPATH="$APPTAINERENV_PYTHONPATH" \
+ APPTAINER_BIND="$APPTAINER_BIND" \
+ TOIL_SLURM_ARGS="$TOIL_SLURM_ARGS" \
+ ${TOIL_COMMAND} > ${OUTPUT_DIR}/${WORKFLOW_NAME}.out && STATUS=${?} || STATUS=${?}
+
+toil stats --raw ${JOBSTORE_DIR} > ${STATS_DIR}/${WORKFLOW_NAME}.stats.json || true
+toil stats --pretty ${JOBSTORE_DIR} > ${STATS_DIR}/${WORKFLOW_NAME}.stats.txt || true
+
+echo -e "\nThe pipeline was run using\n\n${TOIL_COMMAND}\n"
+if [ ${STATUS} -eq 0 ]; then
+ echo -e "\nPipeline finished successfully.\n"
+else
+ echo -e "\nPipeline failed with exit status ${STATUS}.\n"
+fi
+exit ${STATUS}