aboutsummaryrefslogtreecommitdiff
path: root/pilot.sh
diff options
context:
space:
mode:
authorMatthijs van der Wild <matthijs.van-der-wild@durham.ac.uk>2025-07-09 15:31:41 -0400
committerMatthijs van der Wild <matthijs.van-der-wild@durham.ac.uk>2025-07-09 15:31:41 -0400
commit7b3473095fa3efcaa83d78474772e16cf66ca896 (patch)
tree99dc8c3d4a0a61228f6bdfa37b9bbbfe7fd5264e /pilot.sh
parent65ed21e4850196a2e1b27254122e1e2df816e262 (diff)
Store script in src directoryHEADmaster
Diffstat (limited to 'pilot.sh')
-rw-r--r--pilot.sh147
1 files changed, 0 insertions, 147 deletions
diff --git a/pilot.sh b/pilot.sh
deleted file mode 100644
index 1256739..0000000
--- a/pilot.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/sh -eu
-
-usage() {
- echo "Usage: ${PROGRAM} [-r|--restart] [-h|--help] [-c|--container <singularity container>] [--scratch <scratch dir>] [--outdir <output dir>] [--batch_system <batch system>] (-f <input file>) (-p <pipeline>) <Workflow> <input dir>"
- exit 0
-}
-
-error() {
- echo "Error: $@" >&2
- exit 1
-}
-
-set_container() {
- [ -f "${1}" ] || error "Container ${1} does not exist."
- CWL_SINGULARITY_CACHE="${CWL_SINGULARITY_CACHE:-$1%/*}"
- if [ ! "${1##*/}" == "vlbi-cwl.sif" ]; then
- ln -sf "${1}" "$CWL_SINGULARITY_CACHE/vlbi-cwl.sif"
- ln -sf "${1}" "$CWL_SINGULARITY_CACHE/vlbi-cwl_latest.sif"
- fi
-}
-
-opts=$(getopt -o rhf:c:p: --long restart,help,container:,scratch:,outdir:,batch_system: \
- -n 'pilot' -- "$@")
-
-eval set -- "$opts"
-
-RESTART=""
-SCRATCH=""
-BATCH_SYSTEM="slurm"
-PROGRAM="${0##*/}"
-while true; do
- case "$1" in
- -c | --container) set_container "${2}"; shift 2 ;;
- -f ) INPUT_FILE="${2}"; shift 2 ;;
- -h | --help ) usage ;;
- -p ) PIPELINE="${2}"; shift 2 ;;
- -r | --restart) RESTART="--restart"; shift ;;
- --scratch) SCRATCH="${2}"; shift 2 ;;
- --outdir) OUTDIR="${2}"; shift 2;;
- --batch_system) BATCH_SYSTEM="${2}"; shift 2;;
- * ) shift; break ;;
- esac
-done
-
-
-# TODO: clean this up
-WORKFLOW="${PIPELINE}/workflows/${1}.cwl"
-WORKFLOW_NAME=$(basename ${WORKFLOW%.cwl})
-[ -f "${WORKFLOW}" ] || error "$(realpath ${WORKFLOW}) is invalid."
-INPUT_DIR="${2}"
-[ -d "${INPUT_DIR}" ] || error "Input directory does not exist."
-
-[ -n "${INPUT_FILE}" ] || error "Missing input file."
-[ -f "${INPUT_FILE}" ] || error "Invalid input file."
-[ ! -z "${CWL_SINGULARITY_CACHE}" ] || error "\$CWL_SINGULARITY_CACHE is not set or no container has been specified."
-
-PIPELINE_LOG="${HOME}/${WORKFLOW_NAME}.log"
-
-TMP_OUTDIR="${OUTDIR:-$INPUT_DIR}/toil/tmp/tmp/"
-JOB_LOG_DIR="${OUTDIR:-$INPUT_DIR}/toil/logs/"
-BATCH_LOG_DIR="${TOIL_BATCH_LOGS_DIR:-${OUTDIR}/toil/logs}"
-WORK_DIR="${OUTDIR:-$INPUT_DIR}/toil/work/"
-
-OUTPUT_DIR="${OUTDIR:-$INPUT_DIR}/${WORKFLOW_NAME}_results"
-JOBSTORE_DIR="${OUTDIR:-$INPUT_DIR}/toil/${WORKFLOW_NAME}_job/"
-STATS_DIR="${OUTDIR:-$OUTPUT_DIR}/stats"
-
-mkdir -p "$JOB_LOG_DIR"
-mkdir -p "$BATCH_LOG_DIR"
-mkdir -p "$WORK_DIR"
-mkdir -p "$OUTPUT_DIR"
-mkdir -p "$STATS_DIR"
-mkdir -p "$TMP_OUTDIR"
-
-TMPDIR_PREFIX=""
-if [ ! -z "${SCRATCH}" ]; then
- TMPDIR_PREFIX="--tmpdir-prefix ${SCRATCH}/tmp_${WORKFLOW_NAME}/"
- mkdir -p "${SCRATCH}/tmp_${WORKFLOW_NAME}"
-fi
-
-# Print information relevant for the run
-cat << EOF
-The following will be used in the run:
-
-Pipeline ┃ ${WORKFLOW}
-Input file ┃ ${INPUT_FILE}
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━
-Output directory ┃ ${OUTPUT_DIR}
-Log directory ┃ ${JOB_LOG_DIR}
-Jobstore directory ┃ ${JOBSTORE_DIR}
-Intermediate output directory ┃ ${TMP_OUTDIR}
-Pipeline statistics directory ┃ ${STATS_DIR}
-EOF
-
-TOIL_COMMAND="toil-cwl-runner ${RESTART} ${TMPDIR_PREFIX} \
- --singularity \
- --clean never \
- --retryCount 0 \
- --disableCaching \
- --logFile ${PIPELINE_LOG} \
- --writeLogs ${JOB_LOG_DIR} \
- --stats \
- --clusterStats ${STATS_DIR} \
- --batchSystem ${BATCH_SYSTEM} \
- --batchLogsDir ${BATCH_LOG_DIR} \
- --tmp-outdir-prefix ${TMP_OUTDIR} \
- --workDir ${WORK_DIR} \
- --outdir ${OUTPUT_DIR} \
- --jobStore ${JOBSTORE_DIR} \
- --bypass-file-store \
- ${WORKFLOW} \
- ${INPUT_FILE}"
-
-export TOIL_SLURM_ARGS="${TOIL_SLURM_ARGS:-"-p cosma5 -A durham -t 72:00:00"}"
-# Note the meaning of these SLURM options:
-# -N # number of nodes
-# -c # number of cores; available memory is tied to this if not specified separately
-# -p # partition (queue);
-# -A # project
-# -t # runtime in d-hh:mm:ss format
-
-export APPTAINERENV_PREPEND_PATH=${APPTAINERENV_PREPEND_PATH:-"$PIPELINE/scripts"}
-export APPTAINERENV_PYTHONPATH=${APPTAINERENV_PYTHONPATH:-"$PIPELINE/scripts:\$PYTHONPATH"}
-export APPTAINER_BIND=${APPTAINER_BIND:-"$HOME,$INPUT_DIR,$OUTPUT_DIR"}
-
-echo -e "env APPTAINERENV_PREPEND_PATH="$APPTAINERENV_PREPEND_PATH" \
- APPTAINERENV_PYTHONPATH="$APPTAINERENV_PYTHONPATH" \
- APPTAINER_BIND="$APPTAINER_BIND" \
- TOIL_SLURM_ARGS="$TOIL_SLURM_ARGS" \
- ${TOIL_COMMAND}"
-
-env APPTAINERENV_PREPEND_PATH="$APPTAINERENV_PREPEND_PATH" \
- APPTAINERENV_PYTHONPATH="$APPTAINERENV_PYTHONPATH" \
- APPTAINER_BIND="$APPTAINER_BIND" \
- TOIL_SLURM_ARGS="$TOIL_SLURM_ARGS" \
- ${TOIL_COMMAND} > ${OUTPUT_DIR}/${WORKFLOW_NAME}.out && STATUS=${?} || STATUS=${?}
-
-toil stats --raw ${JOBSTORE_DIR} > ${STATS_DIR}/${WORKFLOW_NAME}.stats.json || true
-toil stats --pretty ${JOBSTORE_DIR} > ${STATS_DIR}/${WORKFLOW_NAME}.stats.txt || true
-
-echo -e "\nThe pipeline was run using\n\n${TOIL_COMMAND}\n"
-if [ ${STATUS} -eq 0 ]; then
- echo -e "\nPipeline finished successfully.\n"
-else
- echo -e "\nPipeline failed with exit status ${STATUS}.\n"
-fi
-exit ${STATUS}