diff --git a/CADD.sh b/CADD.sh
index 5401eed..05e5fd4 100755
--- a/CADD.sh
+++ b/CADD.sh
@@ -14,7 +14,9 @@ where:
     -q  print basic information about snakemake run
     -p  print full information about the snakemake run
     -d  do not remove temporary directory for debug puroposes
+    -t  specify location for temporary directory [default: /tmp/]
     -c  number of cores that snakemake is allowed to use [default: 1]
+    -M  maximum memory that snakemake is allowed to use [default: available memory]
     "
 
 unset OPTARG
@@ -30,7 +32,19 @@ SIGNULARITYARGS=""
 VERBOSE="-q"
 CORES="1"
 RM_TMP_DIR=true
-while getopts ':ho:g:v:c:amr:qpd' option; do
+# set memory to available memory by default (can be lower than system memory in docker containers):
+if [[ -f "/sys/fs/cgroup/memory/memory.limit_in_bytes" ]]; then
+    MEMORY=$(cat /sys/fs/cgroup/memory/memory.limit_in_bytes)
+    MEMORY=$((MEMORY / 1024 / 1024 / 1024))
+elif [[ -f "/sys/fs/cgroup/memory.max" ]]; then
+    MEMORY=$(cat /sys/fs/cgroup/memory.max)
+    MEMORY=$((MEMORY / 1024 / 1024 / 1024))
+else
+    # if files are not available, determine in snakemake
+    MEMORY="0"
+fi
+
+while getopts ':ho:g:v:c:M:amr:qpdt:' option; do
   case "$option" in
     h) echo "$usage"
        exit
@@ -43,6 +57,8 @@ while getopts ':ho:g:v:c:amr:qpd' option; do
        ;;
     c) CORES=$OPTARG
        ;;
+    M) MAXMEMORY=$OPTARG
+       ;;
     a) ANNOTATION=true
        ;;
     m) MAMBAONLY=true
@@ -55,6 +71,8 @@ while getopts ':ho:g:v:c:amr:qpd' option; do
        ;;
     d) RM_TMP_DIR=false
        ;;
+    t) TMP_PREFIX=$OPTARG
+       ;;
    \?) printf "illegal option: -%s\n" "$OPTARG" >&2
        echo "$usage" >&2
        exit 1
@@ -80,6 +98,7 @@ then
     exit 1
 fi
 
+## check if temp folder was specified with 
 ### Configuring all the paths
 
 FILENAME=$(basename $INFILE)
@@ -123,7 +142,7 @@ fi
 
 # Setup temporary folder that is removed reliably on exit and is outside of
 # the CADD-scripts directory.
-TMP_FOLDER=$(mktemp -d)
+TMP_FOLDER=$(mktemp -d -p $TMP_PREFIX)
 if [ "$RM_TMP_DIR" = 'true' ]
 then
     trap "rm -rf $TMP_FOLDER" ERR EXIT
@@ -145,9 +164,12 @@ fi
 
 echo "Running snakemake pipeline:"
 
+# resources is added to divide total load wrt number of cores & gpus
 command="snakemake $TMP_OUTFILE \
+    --resources cpu_load=100 --resources gpu_load=100 \
     --sdm conda $SIGNULARITYARGS --conda-prefix $CADD/envs/conda \
     --cores $CORES --configfile $CONFIG \
+    --config mem_gb=$MEMORY \
     --snakefile $CADD/Snakefile $VERBOSE"
 
 echo -e $command
diff --git a/Dockerfile b/Dockerfile
index e40171f..ce7e16a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,29 +1,67 @@
-FROM condaforge/mambaforge:latest
-LABEL io.github.snakemake.containerized="true"
-LABEL io.github.snakemake.conda_env_hash="cb2c51dd0ad3df620c4914840c5ef6f5570a5ffd8cfd54cec57d2ffef0a76b08"
+######################
+# aws output handler #
+######################
 
-# Step 1: Retrieve conda environments
+# includes:
+#   - the cmg modules
+#   - dependencies
 
-RUN mkdir -p /conda-envs/a4fcaaffb623ea8aef412c66280bd623
-COPY envs/environment_minimal.yml /conda-envs/a4fcaaffb623ea8aef412c66280bd623/environment.yaml
+FROM ubuntu:24.04 
 
-RUN mkdir -p /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9
-COPY envs/esm.yml /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9/environment.yaml
+## needed apt packages
+ARG BUILD_PACKAGES="wget git ssh bzip2 curl axel"
+# needed conda packages (only packages not in the requirements of cmg-package)
+ARG CONDA_PACKAGES="python==3.12.3 snakemake==8.16.0"
+ENV MAMBA_ROOT_PREFIX=/opt/conda/
+ENV PATH /opt/micromamba/bin:/opt/conda/bin:$PATH
+# ADD credentials on build
+ARG SSH_PRIVATE_KEY
+##  ENV SETTINGS during runtime
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ENV PATH=/opt/conda/bin:/opt/CADD-scripts/:$PATH
+ENV DEBIAN_FRONTEND noninteractive
+ENV CADD=/opt/CADD-scripts
+SHELL ["/bin/bash", "-l", "-c"]
 
-RUN mkdir -p /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c
-COPY envs/mmsplice.yml /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c/environment.yaml
+# install base packages
+RUN echo "Acquire::http::Pipeline-Depth 0;" > /etc/apt/apt.conf.d/99fixbadproxy && \
+    echo "Acquire::http::No-Cache true;" >> /etc/apt/apt.conf.d/99fixbadproxy && \
+    echo "Acquire::BrokenProxy    true;" >> /etc/apt/apt.conf.d/99fixbadproxy && \
+    apt-get -y update && \
+    apt-get -y upgrade && \
+    apt-get install -y $BUILD_PACKAGES && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
-RUN mkdir -p /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5
-COPY envs/regulatorySequence.yml /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5/environment.yaml
+# Install conda/miniforge3
+RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \
+    /bin/bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda  && \
+    rm Miniforge3-$(uname)-$(uname -m).sh && \
+    mamba install -y -c conda-forge -c bioconda $CONDA_PACKAGES && \
+    conda clean --tarballs --index-cache --packages --yes  && \
+    conda config --set channel_priority strict && \
+    echo ". /opt/conda/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \
+    echo ". /opt/conda/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc
 
-RUN mkdir -p /conda-envs/89fe1049cc18768b984c476c399b7989
-COPY envs/vep.yml /conda-envs/89fe1049cc18768b984c476c399b7989/environment.yaml
+# install cadd & run test file to generate all envs
+RUN cd /opt && \
+    git clone --branch Fix/max_memory https://github.com/geertvandeweyer/CADD-scripts.git 
+    #cd CADD-scripts && \
+    #snakemake test/input.vcf \
+    #    --software-deployment-method conda \
+    ##    --conda-create-envs-only \
+    #    --conda-prefix envs/conda \
+    #    --configfile config/config_GRCh38_v1.7.yml \
+    #    --snakefile Snakefile -c 1
 
-# Step 2: Generate conda environments
+#COPY Install_Annotations.sh /opt/CADD-scripts/Install_Annotations.sh 
+RUN chmod a+x /opt/CADD-scripts/Install_Annotations.sh
 
-RUN mamba env create --prefix /conda-envs/a4fcaaffb623ea8aef412c66280bd623 --file /conda-envs/a4fcaaffb623ea8aef412c66280bd623/environment.yaml && \
-    mamba env create --prefix /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9 --file /conda-envs/ef25c8d726aebbe9e0ee64fee6c3caa9/environment.yaml && \
-    mamba env create --prefix /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c --file /conda-envs/7f88b844a05ae487b7bb6530b5e6a90c/environment.yaml && \
-    mamba env create --prefix /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5 --file /conda-envs/dfc51ced08aaeb4cbd3dcd509dec0fc5/environment.yaml && \
-    mamba env create --prefix /conda-envs/89fe1049cc18768b984c476c399b7989 --file /conda-envs/89fe1049cc18768b984c476c399b7989/environment.yaml && \
-    mamba clean --all -y
+## some follow up instructions are needed: 
+RUN echo "WARNING: CADD-scripts installed. To use the container, the following commands are needed: "
+RUN echo "# download the annotations sources" 
+RUN echo "docker run -v /mnt/CADD_data:/opt/CADD-scripts/data my-cadd-scripts:my_version /opt/CADD-Scripts/Install_Annotations.sh /opt/CADD-scripts/data GRCh38" 
+RUN echo "# run the script on the test data to prepare all conda envs" 
+RUN echo "docker run --name prep-container -w  /opt/CADD-scripts -v /mnt/CADD_data/annotations:/opt/CADD-scripts/data/annotations -v /mnt/CADD_data/prescored:/opt/CADD-scripts/data/prescored my-cadd-scripts:my_version bash -c 'snakemake test/input.tsv.gz --resources load=100 --sdm conda --conda-prefix /opt/CADD-scripts/envs/conda --configfile /opt/CADD-scripts/config/config_GRCh38_v1.7_noanno.yml --snakefile /opt/CADD-scripts/Snakefile -c 1 ; rm -Rf /opt/CADD-scripts/test/input_splits /opt/CADD-scripts/test/input.chunk* /opt/CADD-scripts/test/input.*.log /opt/conda/pkgs/*' " 
+RUN echo "# commit the changes to the image" 
+RUN echo "docker commit prep-container my-cadd-scripts:my_version" 
\ No newline at end of file
diff --git a/Install_Annotations.sh b/Install_Annotations.sh
new file mode 100644
index 0000000..01dee7f
--- /dev/null
+++ b/Install_Annotations.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# need two arguments:
+# 1. target
+# 2. build
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <target_folder> <build_version>"
+    exit 1
+fi
+TARGET=$1
+BUILD=$2
+
+# LOCATIONS:
+DOWNLOAD_LOCATION=https://krishna.gs.washington.edu/download/CADD
+
+
+
+# supported builds: GRCh37, GRCh38
+if [ "$BUILD" != "GRCh37" ] && [ "$BUILD" != "GRCh38" ]; then
+    echo "Usage: $0 <target_folder> <build_version>"
+    echo "Supported builds: GRCh37, GRCh38"
+    exit 1
+fi
+
+## ANNOTATIONS
+echo "1. ANNOTATIONS"
+mkdir -p $TARGET/annotations/
+cd $TARGET/annotations/
+URL="$DOWNLOAD_LOCATION/v1.7/$BUILD/${BUILD}_v1.7.tar.gz"
+echo "  - download"
+axel -a "$URL"
+axel -a "$URL.md5"
+echo "  - md5sum"
+md5sum -c ${BUILD}_v1.7.tar.gz.md5
+echo "  - untar"
+tar -xzvf ${BUILD}_v1.7.tar.gz
+rm ${BUILD}_v1.7.tar.gz
+rm ${BUILD}_v1.7.tar.gz.md5
+
+## PRESCORED
+echo "2. PRESCORED"
+mkdir -p $TARGET/prescored/${BUILD}_v1.7/noanno/
+cd $TARGET/prescored/${BUILD}_v1.7/noanno/
+URL="$DOWNLOAD_LOCATION/v1.7/$BUILD/whole_genome_SNVs.tsv.gz"
+echo "  - download"
+axel -a "$URL"
+axel -a "$URL.md5"
+axel -a "$URL.tbi"
+axel -a "$URL.tbi.md5"
+URL="$DOWNLOAD_LOCATION/v1.7/$BUILD/gnomad.genomes.r4.0.indel.tsv.gz"
+axel -a "$URL"
+axel -a "$URL.md5"
+axel -a "$URL.tbi"
+axel -a "$URL.tbi.md5"
+echo "  - md5sum"
+md5sum -c *.md5
+rm *.md5
+
+
+
+
diff --git a/Snakefile b/Snakefile
index d53587e..b22bc65 100644
--- a/Snakefile
+++ b/Snakefile
@@ -6,6 +6,8 @@ Note that we are declaring many files temporary here that should be located in a
 # container with conda environments
 containerized: "docker://visze/cadd-scripts-v1_7:0.1.0"
 
+# need glob to get chunked files
+import psutil 
 
 # Min version of snakemake
 from snakemake.utils import min_version
@@ -20,24 +22,97 @@ validate(config, schema="schemas/config_schema.yaml")
 # CADD environment variable
 import os
 
+###################################
+## RULE SPECFIC THREADING LIMITS ##
+###################################
+if int(config.get("mem_gb",0)) == 0:
+    system_memory = psutil.virtual_memory().total / (1024 ** 3)
+else:
+    system_memory = int(config["mem_gb"])
+try:
+    lines = subprocess.check_output("nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null", shell=True).decode('utf-8').splitlines()
+    # sum over gpu(s)
+    gpu_memory = int(sum([int(x) for x in lines])/1024)
+    # nr of gpus
+    gpu_count = len(lines)
+except Exception as e:
+    gpu_memory = 0
+    gpu_count = 0
+    pass
+
+###############
+## GPU TASKS ##
+###############
+# esm takes ~16 Gb of system memory per tread & 14 Gb of GPU ram (if available). 
+# if gpu is available, we can use it for esm : one thread per gpu.
+if gpu_count > 0:
+    config['esm_cpu_slots'] = int(max(1,(0.9*system_memory)/16))
+    config['esm_gpu_slots'] = int(0.9*gpu_memory/14)
+else:
+    config['esm_cpu_slots'] = int(max(1,0.9*system_memory / 16))
+    config['esm_gpu_slots'] = 0
+config['esm_cpu_load'] = int(100/config['esm_cpu_slots']) 
+config['esm_gpu_load'] = 100 if config['esm_gpu_slots'] < 1 else int(100/config['esm_gpu_slots'])
+config['esm_cpu_threads'] = max(1, int(workflow.cores /  config['esm_cpu_slots']))
+
+###############
+## CPU TASKS ##
+###############
+# then assign other resources
+config['vep_cpu_load'] = 100 if system_memory < 4 else int(100/int(0.9*system_memory / 4 ))  # up to 4Gb/ram
+config['vep_cpu_threads'] = max(1, int(workflow.cores / (100/config['vep_cpu_load'])))
+config['regseq_cpu_load'] = 100 if system_memory < 2 else int(100/int(0.9*system_memory / 2 ))   # up to 2Gb of ram
+config['mms_cpu_load'] = 100 if system_memory < 16 else int(100/int(0.9*system_memory / 16 ))   # up to 16Gb/ram
+config['mms_cpu_threads'] = max(1, int(workflow.cores / (100/config['mms_cpu_load'])))
+config['anno_cpu_load'] = 1 # disk IO intensive
+config['impute_cpu_load'] = 1 
+config['prescore_cpu_load'] = 10  # disk IO intensive 
+config['score_cpu_load'] = 1
+
+print("Threading Overview")
+print("##################")
+print("Assigned cores: {}".format(workflow.cores))
+print("Available system memory: {}GB".format(int(system_memory)))
+if gpu_memory > 0:
+   print("Total GPU memory: {}GB".format(gpu_memory))
+else:
+   print("No gpu found")
+
+# print threading limits
+print("Task Parallelization: ")
+print("  PreScore : {}x".format(min(workflow.cores,int(100/config['prescore_cpu_load']))))
+print("  VEP : {}x with {} threads each".format(min(workflow.cores,int(100/config['vep_cpu_load'])), config['vep_cpu_threads']))
+print("  ESM : CPU : {}x with {} threads each (memory constraints)".format(min(workflow.cores,config['esm_cpu_slots']),config['esm_cpu_threads']))
+print("  ESM : GPU : {}x (gpu memory constraints)".format(config['esm_gpu_slots']))
+print("  RegSeq : {}x".format(min(workflow.cores,int(100/config['regseq_cpu_load']))))
+print("  MMsplice : {}x with {} threads each (memory constraints)".format(min(workflow.cores,int(100/config['mms_cpu_load'])),config['mms_cpu_threads']))
+print("  Annotate : {}x".format(min(workflow.cores,int(100/config['anno_cpu_load']))))
+print("  Impute : {}x".format(min(workflow.cores,int(100/config['impute_cpu_load']))))
+          
+# flush output before starting workflow
+print("Starting workflow",flush=True)
+
+
+## allowed scattering 
+scattergather:
+    split=workflow.cores,
 
 envvars:
     "CADD",
 
 
-# wildcard_constraints:
-#     file=".*(?<!\\.vcf)$"
+#wildcard_constraints:
+#    basefile="[^/]+"
 
 # START Rules
 
-
 rule decompress:
     conda:
         "envs/environment_minimal.yml"
     input:
         "{file}.vcf.gz",
     output:
-        temp("{file}.vcf"),
+        "{file}.vcf",
     log:
         "{file}.decompress.log",
     shell:
@@ -50,19 +125,55 @@ rule prepare:
     conda:
         "envs/environment_minimal.yml"
     input:
-        "{file}.vcf",
+        vcf="{file}.vcf",
+        
     output:
-        temp("{file}.prepared.vcf"),
+        #prep="{file}.prepared.vcf.tmp",
+        #split=directory("{file}_splits"),
+        splits=scatter.split("{{file}}_splits/chunk_{scatteritem}.prepared.vcf"),
     log:
         "{file}.prepare.log",
     params:
         cadd=os.environ["CADD"],
+        threads=workflow.cores,
+    resources:
+        # < 1GB of memory
+        cpu_load=1,
     shell:
         """
-        cat {input} \
+        mkdir -p {wildcards.file}_splits/ 2>> {log}
+        cat {input.vcf} \
         | python {params.cadd}/src/scripts/VCF2vepVCF.py \
+        | (grep -v '^#' || true) \
+        | sed 's/^chr//' \
         | sort -k1,1 -k2,2n -k4,4 -k5,5 \
-        | uniq > {output} 2> {log}
+        | uniq > {wildcards.file}_splits/full.vcf 2> {log} 
+
+        # split
+        LC=$(wc -l {wildcards.file}_splits/full.vcf | cut -f1 -d' ')
+        ## if zero, stop the workflow here (as success)
+        if [[ "$LC" -eq 0 ]]; then
+            echo "No variants to process. Exiting." >> {log}
+            # put expected files in place for this step (generated by split and the for loop below)
+            touch {wildcards.file}_splits/chunk_1-of-{params.threads}.prepared.vcf
+            exit 0
+        fi
+
+        LC=$(((LC / {params.threads})+1))
+        
+        split -l $LC --numeric-suffixes=1 --additional-suffix="-of-{params.threads}.prepared.vcf" {wildcards.file}_splits/full.vcf {wildcards.file}_splits/chunk_ 2>> {log}
+
+        rm -f {wildcards.file}_splits/full.vcf
+        
+        # strip padding zeros in the file names 
+        for f in {wildcards.file}_splits/chunk_*.prepared.vcf
+        do
+            target=$(echo "$f" | sed -E 's/(chunk_)0*([1-9][0-9]*)(-of-{params.threads}\\.prepared\\.vcf)/\\1\\2\\3/')
+            if [ "$f" != "$target" ]; then
+                mv -n "$f" "$target"
+            fi
+            
+        done
         """
 
 
@@ -70,30 +181,38 @@ checkpoint prescore:
     conda:
         "envs/environment_minimal.yml"
     input:
-        vcf="{file}.prepared.vcf",
+        vcf="{file}_splits/chunk_{chunk}.prepared.vcf",
         prescored="%s/%s" % (os.environ["CADD"], config["PrescoredFolder"]),
     output:
-        novel=temp("{file}.novel.vcf"),
-        prescored=temp("{file}.pre.tsv"),
+        novel="{file}_splits/chunk_{chunk}.novel.vcf",
+        prescored="{file}_splits/chunk_{chunk}.pre.tsv",
     log:
-        "{file}.prescore.log",
+        "{file}.chunk_{chunk}.prescore.log",
     params:
         cadd=os.environ["CADD"],
+    resources:
+        # < 1GB of memory
+        cpu_load=int(config['prescore_cpu_load']),
     shell:
         """
         # Prescoring
         echo '## Prescored variant file' > {output.prescored} 2> {log};
+        # are prescored files available?
         PRESCORED_FILES=`find -L {input.prescored} -maxdepth 1 -type f -name \\*.tsv.gz | wc -l`
         cp {input.vcf} {input.vcf}.new
         if [ ${{PRESCORED_FILES}} -gt 0 ];
         then
+            # loop over all prescored files: snv , indel, ... 
             for PRESCORED in $(ls {input.prescored}/*.tsv.gz)
             do
+                # extract writes found to outfile, not-found to stdout
                 cat {input.vcf}.new \
                 | python {params.cadd}/src/scripts/extract_scored.py --header \
                     -p $PRESCORED --found_out={output.prescored}.tmp \
                 > {input.vcf}.tmp 2>> {log};
+                # get prescored to the outfile
                 cat {output.prescored}.tmp >> {output.prescored}
+                # put not-found back to the input
                 mv {input.vcf}.tmp {input.vcf}.new &> {log};
             done;
             rm {output.prescored}.tmp &>> {log}
@@ -106,16 +225,22 @@ rule annotation_vep:
     conda:
         "envs/vep.yml"
     input:
-        vcf="{file}.novel.vcf",
+        vcf="{file}_splits/chunk_{chunk}.novel.vcf",
         veppath="%s/%s" % (os.environ["CADD"], config["VEPpath"]),
     output:
-        temp("{file}.vep.vcf.gz"),
+        "{file}_splits/chunk_{chunk}.vep.vcf.gz",
     log:
-        "{file}.annotation_vep.log",
+        "{file}.chunk_{chunk}.annotation_vep.log",
     params:
         cadd=os.environ["CADD"],
         genome_build=config["GenomeBuild"],
         ensembl_db=config["EnsemblDB"],
+        threads=config['vep_cpu_threads'],
+    resources:
+        # < 1GB of memory
+        cpu_load=int(config['vep_cpu_load']),
+    threads:
+        config['vep_cpu_threads'],
     shell:
         """
         cat {input.vcf} \
@@ -124,6 +249,7 @@ rule annotation_vep:
             --db_version={params.ensembl_db} --assembly {params.genome_build} \
             --format vcf --regulatory --sift b --polyphen b --per_gene --ccds --domains \
             --numbers --canonical --total_length --vcf --force_overwrite --output_file STDOUT \
+            --fork {params.threads} \
         | bgzip -c > {output} 2> {log}
         """
 
@@ -132,7 +258,8 @@ rule annotate_esm:
     conda:
         "envs/esm.yml"
     input:
-        vcf="{file}.vep.vcf.gz",
+        #vcf="{file}_splits/chunk_{chunk}.vep.vcf.gz",
+        vcf="{file}_splits/chunk_{chunk}.vep.vcf.gz",
         models=expand(
             "{path}/{model}.pt",
             path=config["ESMpath"],
@@ -140,15 +267,22 @@ rule annotate_esm:
         ),
         transcripts="%s/pep.%s.fa" % (config["ESMpath"], config["EnsemblDB"]),
     output:
-        missens=temp("{file}.esm_missens.vcf.gz"),
-        frameshift=temp("{file}.esm_frameshift.vcf.gz"),
-        final=temp("{file}.esm.vcf.gz"),
+        missens="{file}_splits/chunk_{chunk}.esm_missens.vcf.gz",
+        frameshift="{file}_splits/chunk_{chunk}.esm_frameshift.vcf.gz",
+        final="{file}_splits/chunk_{chunk}.esm.vcf.gz",
     log:
-        "{file}.annotate_esm.log",
+        "{file}.chunk_{chunk}.annotate_esm.log",
+    resources:
+        cpu_load=int(config['esm_cpu_load']),
+        gpu_load=int(config['esm_gpu_load']),
+    threads: 
+        config['esm_cpu_threads'],
     params:
         cadd=os.environ["CADD"],
         models=["--model %s " % model for model in config["ESMmodels"]],
         batch_size=config["ESMbatchsize"],
+        #header=config["Header"],
+    
     shell:
         """
         model_directory=`dirname {input.models[0]}`;
@@ -171,6 +305,8 @@ rule annotate_esm:
         --transcripts {input.transcripts} \
         --model-directory $model_directory {params.models} \
         --output {output.final} --batch-size {params.batch_size} &>> {log}
+
+        #rm -f {wildcards.file}.esm_in.vcf.gz
         """
 
 
@@ -178,17 +314,20 @@ rule annotate_regseq:
     conda:
         "envs/regulatorySequence.yml"
     input:
-        vcf="{file}.esm.vcf.gz",
+        vcf="{file}_splits/chunk_{chunk}.esm.vcf.gz",
         reference="%s/%s" % (config["REGSEQpath"], "reference.fa"),
         genome="%s/%s" % (config["REGSEQpath"], "reference.fa.genome"),
         model="%s/%s" % (config["REGSEQpath"], "Hyperopt400InclNegatives.json"),
         weights="%s/%s" % (config["REGSEQpath"], "Hyperopt400InclNegatives.h5"),
     output:
-        temp("{file}.regseq.vcf.gz"),
+        "{file}_splits/chunk_{chunk}.regseq.vcf.gz",
     log:
-        "{file}.annotate_regseq.log",
+        "{file}.chunk_{chunk}.annotate_regseq.log",
     params:
         cadd=os.environ["CADD"],
+    resources:
+        # roughly 4GB of memory
+        cpu_load=int(config['regseq_cpu_load']),
     shell:
         """
         python {params.cadd}/src/scripts/lib/tools/regulatorySequence/predictVariants.py \
@@ -205,24 +344,45 @@ rule annotate_mmsplice:
     conda:
         "envs/mmsplice.yml"
     input:
-        vcf="{file}.regseq.vcf.gz",
+        vcf="{file}_splits/chunk_{chunk}.regseq.vcf.gz",
         transcripts="%s/homo_sapiens.110.gtf" % config.get("MMSPLICEpath", ""),
         reference="%s/reference.fa" % config.get("REFERENCEpath", ""),
     output:
-        mmsplice=temp("{file}.mmsplice.vcf.gz"),
-        idx=temp("{file}.regseq.vcf.gz.tbi"),
+        mmsplice="{file}_splits/chunk_{chunk}.mmsplice.vcf.gz",
+        # needed ? 
+        idx="{file}_splits/chunk_{chunk}.regseq.vcf.gz.tbi",
     log:
-        "{file}.annotate_mmsplice.log",
+        "{file}.chunk_{chunk}.annotate_mmsplice.log",
     params:
         cadd=os.environ["CADD"],
+        mms_threads=config['mms_cpu_threads']  # Assigning the number of threads for mmsplice
+    resources:
+        cpu_load=int(config['mms_cpu_load']),
+    threads: 
+        config['mms_cpu_threads'],
     shell:
         """
-        tabix -p vcf {input.vcf} &> {log};
-        KERAS_BACKEND=tensorflow python {params.cadd}/src/scripts/lib/tools/MMSplice.py -i {input.vcf} \
-        -g {input.transcripts} \
-        -f {input.reference} | \
-        grep -v '^Variant(CHROM=' | \
-        bgzip -c > {output.mmsplice} 2>> {log}
+        # mmsplice crashes on empty vcf files: evaluate nr of variants left
+        LC=$(bgzip -dc {input.vcf} | grep -c -v '#' || true)
+        bgzip -dc {input.vcf} > /dev/null
+        echo "nr of lines: "
+        if [[ "$LC" -eq 0 ]]; then
+            echo "Empty VCF file, skipping mmsplice annotation." >> {log}
+            tabix -p vcf {input.vcf} &>> {log};
+            cp {input.vcf} {output.mmsplice}
+        else 
+            # set parallelism for tensorflow : 
+            export OMP_NUM_THREADS={params.mms_threads}
+            export TF_NUM_INTRAOP_THREADS={params.mms_threads}
+            export TF_NUM_INTEROP_THREADS={params.mms_threads}
+            # annotate
+            tabix -p vcf {input.vcf} &> {log};
+            KERAS_BACKEND=tensorflow python {params.cadd}/src/scripts/lib/tools/MMSplice.py -i {input.vcf} \
+            -g {input.transcripts} \
+            -f {input.reference} | \
+            grep -v '^Variant(CHROM=' | \
+            bgzip -c > {output.mmsplice} 2>> {log}
+        fi
         """
 
 
@@ -230,15 +390,17 @@ rule annotation:
     conda:
         "envs/environment_minimal.yml"
     input:
-        vcf=lambda wc: "{file}.%s.vcf.gz"
+        vcf=lambda wc: "{file}_splits/chunk_{chunk}.%s.vcf.gz"
         % ("mmsplice" if config["GenomeBuild"] == "GRCh38" else "regseq"),
         reference_cfg="%s/%s" % (os.environ["CADD"], config["ReferenceConfig"]),
     output:
-        temp("{file}.anno.tsv.gz"),
+        "{file}_splits/chunk_{chunk}.anno.tsv.gz",
     log:
-        "{file}.annotation.log",
+        "{file}.chunk_{chunk}.annotation.log",
     params:
         cadd=os.environ["CADD"],
+    resources:
+        cpu_load=int(config['anno_cpu_load']),
     shell:
         """
         zcat {input.vcf} \
@@ -252,14 +414,16 @@ rule imputation:
     conda:
         "envs/environment_minimal.yml"
     input:
-        tsv="{file}.anno.tsv.gz",
+        tsv="{file}_splits/chunk_{chunk}.anno.tsv.gz",
         impute_cfg="%s/%s" % (os.environ["CADD"], config["ImputeConfig"]),
     output:
-        temp("{file}.csv.gz"),
+        "{file}_splits/chunk_{chunk}.csv.gz",
     log:
-        "{file}.imputation.log",
+        "{file}.chunk_{chunk}.imputation.log",
     params:
         cadd=os.environ["CADD"],
+    resources:
+        cpu_load=int(config['impute_cpu_load']),
     shell:
         """
         zcat {input.tsv} \
@@ -272,18 +436,20 @@ rule score:
     conda:
         "envs/environment_minimal.yml"
     input:
-        impute="{file}.csv.gz",
-        anno="{file}.anno.tsv.gz",
+        impute="{file}_splits/chunk_{chunk}.csv.gz",
+        anno="{file}_splits/chunk_{chunk}.anno.tsv.gz",
         conversion_table="%s/%s" % (os.environ["CADD"], config["ConversionTable"]),
         model_file="%s/%s" % (os.environ["CADD"], config["Model"]),
     output:
-        temp("{file}.novel.tsv"),
+        "{file}_splits/chunk_{chunk}.novel.tsv",
     log:
-        "{file}.score.log",
+        "{file}.chunk_{chunk}.score.log",
     params:
         cadd=os.environ["CADD"],
         use_anno=config["Annotation"],
         columns=config["Columns"],
+    resources:
+        cpu_load=config['score_cpu_load'],
     shell:
         """
         python {params.cadd}/src/scripts/predictSKmodel.py \
@@ -300,21 +466,39 @@ rule score:
         """
 
 
-def aggregate_input(wildcards):
-    with checkpoints.prescore.get(file=wildcards.file).output["novel"].open() as f:
-        output = ["{file}.pre.tsv"]
-        for line in f:
-            if line.strip() != "":
-                output.append("{file}.novel.tsv")
-                break
-        return output
+# def aggregate_input(wildcards):
+#     # Find all chunk files for the given wildcard
+#     chunk_files = glob.glob(f"{wildcards.file}_splits/{wildcards.file}.chunk_*.novel.vcf")
+#     pre_files = glob.glob(f"{wildcards.file}_splits/{wildcards.file}.chunk_*.pre.tsv")
+#     
+#     # Combine the novel and prescore chunk files if not empty
+#     output = [f for f in chunk_files + pre_files if os.path.getsize(f) > 0]
+#     if not output:
+#         # no output : make empty file 
+#         open(f"{wildcards.file}.empty", "w").close()
+#         output = [f"{wildcards.file}.empty"]
+# 
+#     return output
+
+
+
+#def aggregate_input(wildcards):
+#    with checkpoints.prescore.get(file=wildcards.file).output["novel"].open() as f:
+#        output = ["{file}.pre.tsv"]
+#        for line in f:
+#            if line.strip() != "":
+#                output.append("{file}.novel.tsv")
+#                break
+#        return output
 
 
 rule join:
     conda:
         "envs/environment_minimal.yml"
     input:
-        aggregate_input,
+        #aggregate_input,
+        pre=gather.split("{{file}}_splits/chunk_{scatteritem}.pre.tsv"),
+        scored=gather.split("{{file}}_splits/chunk_{scatteritem}.novel.tsv"),
     output:
         "{file,.+(?<!\\.anno)}.tsv.gz",
     log:
@@ -325,8 +509,8 @@ rule join:
         """
         (
             echo "{params.header}";
-            cat {input} | grep -v "^##" | grep "^#" | tail -n 1;
-            cat {input} | \
+            cat {input.pre} {input.scored} | grep -v "^##" | grep "^#" | tail -n 1;
+            cat {input.pre} {input.scored}| \
             grep -v "^#" | \
             sort -k1,1 -k2,2n -k3,3 -k4,4 || true;
         ) | bgzip -c > {output} 2>> {log};
diff --git a/envs/esm.yml b/envs/esm.yml
index a364f54..ad6ea64 100644
--- a/envs/esm.yml
+++ b/envs/esm.yml
@@ -6,7 +6,7 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - numpy
+  - numpy<2
   - python
   - click
   - biopython
diff --git a/schemas/config_schema.yaml b/schemas/config_schema.yaml
index 3f77190..b89bd27 100644
--- a/schemas/config_schema.yaml
+++ b/schemas/config_schema.yaml
@@ -77,6 +77,10 @@ properties:
   Columns:
     type: string
     pattern: "^[0-9]+(-[0-9]+)?(,[0-9]+(-[0-9]+)?)*$"
+  mem_gb:
+    type: integer
+    minimum: 0
+    description: Available memory in GB, to scale max thread counts
 
 additionalProperties: false
 
diff --git a/src/scripts/lib/tools/esmScore/esmScore_frameshift_av.py b/src/scripts/lib/tools/esmScore/esmScore_frameshift_av.py
index f6ba8bf..b46a6e2 100644
--- a/src/scripts/lib/tools/esmScore/esmScore_frameshift_av.py
+++ b/src/scripts/lib/tools/esmScore/esmScore_frameshift_av.py
@@ -368,6 +368,12 @@ def cli(
             data_alt.append((transcript_id[i], aa_seq_alt[i][-window:]))
 
     ref_alt_scores = []
+    # preloading reduces disk I/O time and bottlenecks
+    preloaded_models = list()
+    for k in range(0,len(modelsToUse),1):
+        # print("Preloading model {}".format(k))
+        model, alphabet = pretrained.load_model_and_alphabet(modelsToUse[k])
+        preloaded_models.append([model,alphabet])
     # load esm model(s)
     for o in range(0, len([data_ref, data_alt]), 1):
         data = [data_ref, data_alt][o]
@@ -375,7 +381,8 @@ def cli(
         if len(data) >= 1:
             for k in range(0, len(modelsToUse), 1):
                 torch.cuda.empty_cache()
-                model, alphabet = pretrained.load_model_and_alphabet(modelsToUse[k])
+                # print("fetch model {} from preloaded models".format(k))
+                model, alphabet = preloaded_models[k] # pretrained.load_model_and_alphabet(modelsToUse[k])
                 model.eval()  # disables dropout for deterministic results
                 batch_converter = alphabet.get_batch_converter()
 
@@ -386,6 +393,7 @@ def cli(
                 # apply es model to sequence, tokenProbs hat probs von allen aa an jeder pos basierend auf der seq in "data"
                 seq_scores = []
                 for t in range(0, len(data), batch_size):
+                    # print("modelling allele {}/2 : model {}/{} : {}/{}".format(o+1,k+1,len(modelsToUse),t+1,len(data)))
                     if t + batch_size > len(data):
                         batch_data = data[t:]
                     else:
@@ -423,9 +431,9 @@ def cli(
                                     else:
                                         # calc mean of all possible aa at this position
                                         aa_scores = []
-                                        for k in range(4, 24, 1):
+                                        for l in range(4, 24, 1):
                                             aa_scores.append(
-                                                token_probs[i, y + 1, k]
+                                                token_probs[i, y + 1, l]
                                             )  # for all aa (except selenocystein)
                                         aa_scores.append(
                                             token_probs[i, y + 1, 26]
diff --git a/src/scripts/lib/tools/esmScore/esmScore_inFrame_av.py b/src/scripts/lib/tools/esmScore/esmScore_inFrame_av.py
index a8ca043..8a1b0af 100644
--- a/src/scripts/lib/tools/esmScore/esmScore_inFrame_av.py
+++ b/src/scripts/lib/tools/esmScore/esmScore_inFrame_av.py
@@ -464,6 +464,11 @@ def cli(input_file, transcript_file, model_directory, modelsToUse, output_file,
             data_alt.append((transcript_id[i], aa_seq_alt[i][-window:]))
 
     ref_alt_scores = []
+    preloaded_models = list()
+    for k in range(0,len(modelsToUse),1):
+        # print("Preloading model {}".format(k))
+        model, alphabet = pretrained.load_model_and_alphabet(modelsToUse[k])
+        preloaded_models.append([model,alphabet])
     # load esm model(s)
     for o in range(0, len([data_ref, data_alt]), 1):
         data = [data_ref, data_alt][o]
@@ -471,7 +476,8 @@ def cli(input_file, transcript_file, model_directory, modelsToUse, output_file,
         if len(data) >= 1:
             for k in range(0, len(modelsToUse), 1):
                 torch.cuda.empty_cache()
-                model, alphabet = pretrained.load_model_and_alphabet(modelsToUse[k])
+                # print("fetch model {} from preloaded models".format(k))
+                model, alphabet = preloaded_models[k] # pretrained.load_model_and_alphabet(modelsToUse[k])
                 model.eval()  # disables dropout for deterministic results
                 batch_converter = alphabet.get_batch_converter()