Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions CADD.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ where:
-q print basic information about snakemake run
-p print full information about the snakemake run
-d do not remove temporary directory for debug puroposes
-t specify location for temporary directory [default: /tmp/]
-c number of cores that snakemake is allowed to use [default: 1]
-M maximum memory that snakemake is allowed to use [default: available memory]
"

unset OPTARG
Expand All @@ -30,7 +32,19 @@ SIGNULARITYARGS=""
VERBOSE="-q"
CORES="1"
RM_TMP_DIR=true
while getopts ':ho:g:v:c:amr:qpd' option; do
# set memory to available memory by default (can be lower than system memory in docker containers):
if [[ -f "/sys/fs/cgroup/memory/memory.limit_in_bytes" ]]; then
MEMORY=$(cat /sys/fs/cgroup/memory/memory.limit_in_bytes)
MEMORY=$((MEMORY / 1024 / 1024 / 1024))
elif [[ -f "/sys/fs/cgroup/memory.max" ]]; then
MEMORY=$(cat /sys/fs/cgroup/memory.max)
MEMORY=$((MEMORY / 1024 / 1024 / 1024))
else
# if files are not available, determine in snakemake
MEMORY="0"
fi

while getopts ':ho:g:v:c:M:amr:qpdt:' option; do
case "$option" in
h) echo "$usage"
exit
Expand All @@ -43,6 +57,8 @@ while getopts ':ho:g:v:c:amr:qpd' option; do
;;
c) CORES=$OPTARG
;;
M) MAXMEMORY=$OPTARG
;;
a) ANNOTATION=true
;;
m) MAMBAONLY=true
Expand All @@ -55,6 +71,8 @@ while getopts ':ho:g:v:c:amr:qpd' option; do
;;
d) RM_TMP_DIR=false
;;
t) TMP_PREFIX=$OPTARG
;;
\?) printf "illegal option: -%s\n" "$OPTARG" >&2
echo "$usage" >&2
exit 1
Expand All @@ -80,6 +98,7 @@ then
exit 1
fi

## check if temp folder was specified with
### Configuring all the paths

FILENAME=$(basename $INFILE)
Expand Down Expand Up @@ -123,7 +142,7 @@ fi

# Setup temporary folder that is removed reliably on exit and is outside of
# the CADD-scripts directory.
TMP_FOLDER=$(mktemp -d)
TMP_FOLDER=$(mktemp -d -p $TMP_PREFIX)
if [ "$RM_TMP_DIR" = 'true' ]
then
trap "rm -rf $TMP_FOLDER" ERR EXIT
Expand All @@ -145,9 +164,12 @@ fi

echo "Running snakemake pipeline:"

# resources is added to divide total load wrt number of cores & gpus
command="snakemake $TMP_OUTFILE \
--resources cpu_load=100 --resources gpu_load=100 \
--sdm conda $SIGNULARITYARGS --conda-prefix $CADD/envs/conda \
--cores $CORES --configfile $CONFIG \
--config mem_gb=$MEMORY \
--snakefile $CADD/Snakefile $VERBOSE"

echo -e $command
Expand Down
80 changes: 59 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
FROM condaforge/mambaforge:latest
LABEL io.github.snakemake.containerized="true"
LABEL io.github.snakemake.conda_env_hash="cb2c51dd0ad3df620c4914840c5ef6f5570a5ffd8cfd54cec57d2ffef0a76b08"
######################
# aws output handler #
######################

# Step 1: Retrieve conda environments
# includes:
# - the cmg modules
# - dependencies

RUN mkdir -p /conda-envs/a4fcaaffb623ea8aef412c66280bd623
COPY envs/environment_minimal.yml /conda-envs/a4fcaaffb623ea8aef412c66280bd623/environment.yaml
FROM ubuntu:24.04

RUN mkdir -p /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9
COPY envs/esm.yml /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9/environment.yaml
## needed apt packages
ARG BUILD_PACKAGES="wget git ssh bzip2 curl axel"
# needed conda packages (only packages not in the requirements of cmg-package)
ARG CONDA_PACKAGES="python==3.12.3 snakemake==8.16.0"
ENV MAMBA_ROOT_PREFIX=/opt/conda/
ENV PATH /opt/micromamba/bin:/opt/conda/bin:$PATH
# ADD credentials on build
ARG SSH_PRIVATE_KEY
## ENV SETTINGS during runtime
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH=/opt/conda/bin:/opt/CADD-scripts/:$PATH
ENV DEBIAN_FRONTEND noninteractive
ENV CADD=/opt/CADD-scripts
SHELL ["/bin/bash", "-l", "-c"]

RUN mkdir -p /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c
COPY envs/mmsplice.yml /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c/environment.yaml
# install base packages
RUN echo "Acquire::http::Pipeline-Depth 0;" > /etc/apt/apt.conf.d/99fixbadproxy && \
echo "Acquire::http::No-Cache true;" >> /etc/apt/apt.conf.d/99fixbadproxy && \
echo "Acquire::BrokenProxy true;" >> /etc/apt/apt.conf.d/99fixbadproxy && \
apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y $BUILD_PACKAGES && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN mkdir -p /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5
COPY envs/regulatorySequence.yml /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5/environment.yaml
# Install conda/miniforge3
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \
/bin/bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && \
rm Miniforge3-$(uname)-$(uname -m).sh && \
mamba install -y -c conda-forge -c bioconda $CONDA_PACKAGES && \
conda clean --tarballs --index-cache --packages --yes && \
conda config --set channel_priority strict && \
echo ". /opt/conda/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \
echo ". /opt/conda/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc

RUN mkdir -p /conda-envs/89fe1049cc18768b984c476c399b7989
COPY envs/vep.yml /conda-envs/89fe1049cc18768b984c476c399b7989/environment.yaml
# install cadd & run test file to generate all envs
RUN cd /opt && \
git clone --branch Fix/max_memory https://github.com/geertvandeweyer/CADD-scripts.git
#cd CADD-scripts && \
#snakemake test/input.vcf \
# --software-deployment-method conda \
## --conda-create-envs-only \
# --conda-prefix envs/conda \
# --configfile config/config_GRCh38_v1.7.yml \
# --snakefile Snakefile -c 1

# Step 2: Generate conda environments
#COPY Install_Annotations.sh /opt/CADD-scripts/Install_Annotations.sh
RUN chmod a+x /opt/CADD-scripts/Install_Annotations.sh

RUN mamba env create --prefix /conda-envs/a4fcaaffb623ea8aef412c66280bd623 --file /conda-envs/a4fcaaffb623ea8aef412c66280bd623/environment.yaml && \
mamba env create --prefix /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9 --file /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9/environment.yaml && \
mamba env create --prefix /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c --file /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c/environment.yaml && \
mamba env create --prefix /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5 --file /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5/environment.yaml && \
mamba env create --prefix /conda-envs/89fe1049cc18768b984c476c399b7989 --file /conda-envs/89fe1049cc18768b984c476c399b7989/environment.yaml && \
mamba clean --all -y
## some follow up instructions are needed:
RUN echo "WARNING: CADD-scripts installed. To use the container, the following commands are needed: "
RUN echo "# download the annotations sources"
RUN echo "docker run -v /mnt/CADD_data:/opt/CADD-scripts/data my-cadd-scripts:my_version /opt/CADD-Scripts/Install_Annotations.sh /opt/CADD-scripts/data GRCh38"
RUN echo "# run the script on the test data to prepare all conda envs"
RUN echo "docker run --name prep-container -w /opt/CADD-scripts -v /mnt/CADD_data/annotations:/opt/CADD-scripts/data/annotations -v /mnt/CADD_data/prescored:/opt/CADD-scripts/data/prescored my-cadd-scripts:my_version bash -c 'snakemake test/input.tsv.gz --resources load=100 --sdm conda --conda-prefix /opt/CADD-scripts/envs/conda --configfile /opt/CADD-scripts/config/config_GRCh38_v1.7_noanno.yml --snakefile /opt/CADD-scripts/Snakefile -c 1 ; rm -Rf /opt/CADD-scripts/test/input_splits /opt/CADD-scripts/test/input.chunk* /opt/CADD-scripts/test/input.*.log /opt/conda/pkgs/*' "
RUN echo "# commit the changes to the image"
RUN echo "docker commit prep-container my-cadd-scripts:my_version"
63 changes: 63 additions & 0 deletions Install_Annotations.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash

set -euo pipefail

# need two arguments:
# 1. target
# 2. build
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <target_folder> <build_version>"
exit 1
fi
TARGET=$1
BUILD=$2

# LOCATIONS:
DOWNLOAD_LOCATION=https://krishna.gs.washington.edu/download/CADD



# supported builds: GRCh37, GRCh38
if [ "$BUILD" != "GRCh37" ] && [ "$BUILD" != "GRCh38" ]; then
echo "Usage: $0 <target_folder> <build_version>"
echo "Supported builds: GRCh37, GRCh38"
exit 1
fi

## ANNOTATIONS
echo "1. ANNOTATIONS"
mkdir -p $TARGET/annotations/
cd $TARGET/annotations/
URL="$DOWNLOAD_LOCATION/v1.7/$BUILD/${BUILD}_v1.7.tar.gz"
echo " - download"
axel -a "$URL"
axel -a "$URL.md5"
echo " - md5sum"
md5sum -c ${BUILD}_v1.7.tar.gz.md5
echo " - untar"
tar -xzvf ${BUILD}_v1.7.tar.gz
rm ${BUILD}_v1.7.tar.gz
rm ${BUILD}_v1.7.tar.gz.md5

## PRESCORED
echo "2. PRESCORED"
mkdir -p $TARGET/prescored/${BUILD}_v1.7/noanno/
cd $TARGET/prescored/${BUILD}_v1.7/noanno/
URL="$DOWNLOAD_LOCATION/v1.7/$BUILD/whole_genome_SNVs.tsv.gz"
echo " - download"
axel -a "$URL"
axel -a "$URL.md5"
axel -a "$URL.tbi"
axel -a "$URL.tbi.md5"
URL="$DOWNLOAD_LOCATION/v1.7/$BUILD/gnomad.genomes.r4.0.indel.tsv.gz"
axel -a "$URL"
axel -a "$URL.md5"
axel -a "$URL.tbi"
axel -a "$URL.tbi.md5"
echo " - md5sum"
md5sum -c *.md5
rm *.md5




Loading