From c0a966318da036028088798ff58608327f55e110 Mon Sep 17 00:00:00 2001 From: anushka255 Date: Wed, 24 Apr 2024 12:19:10 -0400 Subject: [PATCH 1/5] added custom snakemake profile --- snakemake/profiles/config.yaml | 26 ++++++++++++ snakemake/profiles/slurm-status.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 snakemake/profiles/config.yaml create mode 100644 snakemake/profiles/slurm-status.py diff --git a/snakemake/profiles/config.yaml b/snakemake/profiles/config.yaml new file mode 100644 index 00000000..7239fbd8 --- /dev/null +++ b/snakemake/profiles/config.yaml @@ -0,0 +1,26 @@ +--- +# basic configuration +use-conda: true +conda-frontend: conda +printshellcmds: true + +# cluster specific settings +cluster: + makedir -p logs/{rules} && + sbatch + --cpus-per-task={threads} + --mem={resources.mem}M + --time={resources.time} --output=slurm_out/%x-%A + --job-name={rule} --parsable + --partition={resources.partition} +cluster-status: "slurm-status.py" +cluster-cancel: scancel +cluster-cancel-nargs: 50 +latency-wait: 120 # wait 2 minutes for missing files before raising exception +# important for NFS +jobs: 250 # maximum jobs to run at once +max-jobs-per-second: 1 +max-status-checks-per-second: 10 +local-cores: 4 # maximum local jobs to run +default-resources: + partition=main,hoppertest,skinniderlab \ No newline at end of file diff --git a/snakemake/profiles/slurm-status.py b/snakemake/profiles/slurm-status.py new file mode 100644 index 00000000..831d4149 --- /dev/null +++ b/snakemake/profiles/slurm-status.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import re +import subprocess as sp +import shlex +import sys +import time +import logging +logger = logging.getLogger("__name__") + +STATUS_ATTEMPTS = 20 + +jobid = sys.argv[1] + +for i in range(STATUS_ATTEMPTS): + try: + sacct_res = sp.check_output(shlex.split("sacct -P -b -j {} -n".format(jobid))) + res = {x.split("|")[0]: x.split("|")[1] for x in sacct_res.decode().strip().split("\n")} + break + except sp.CalledProcessError as e: + logger.error("sacct process error") + logger.error(e) + except IndexError as e: + pass + # Try getting job with scontrol instead in case sacct is misconfigured + try: + sctrl_res = sp.check_output(shlex.split("scontrol -o show job {}".format(jobid))) + m = re.search("JobState=(\w+)", sctrl_res.decode()) + res = {jobid: m.group(1)} + break + except sp.CalledProcessError as e: + logger.error("scontrol process error") + logger.error(e) + if i >= STATUS_ATTEMPTS - 1: + print("failed") + exit(0) + else: + time.sleep(1) + +status = res[jobid] + +if (status == "BOOT_FAIL"): + print("failed") +elif (status == "OUT_OF_MEMORY"): + print("failed") +elif (status.startswith("CANCELLED")): + print("failed") +elif (status == "COMPLETED"): + print("success") +elif (status == "DEADLINE"): + print("failed") +elif (status == "FAILED"): + print("failed") +elif (status == "NODE_FAIL"): + print("failed") +elif (status == "PREEMPTED"): + print("failed") +elif (status == "TIMEOUT"): + print("failed") +# Unclear whether SUSPENDED should be treated as running or failed +elif (status == "SUSPENDED"): + print("failed") +else: + print("running") \ No newline at end of file From 75e1c3ea4a6d03f10f316712a412695dbd0c8919 Mon Sep 17 00:00:00 2001 From: Anushka Acharya Date: Wed, 24 Apr 2024 12:56:44 -0400 Subject: [PATCH 2/5] minor tweaks --- snakemake/Snakefile | 2 +- snakemake/Snakefile_model_eval | 4 ++-- snakemake/config.json | 12 ++++++------ snakemake/profiles/config.yaml | 7 +++---- snakemake/profiles/slurm-status.py | 0 src/clm/plot/train_discriminator.py | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) mode change 100644 => 100755 snakemake/profiles/slurm-status.py diff --git a/snakemake/Snakefile b/snakemake/Snakefile index e452a249..466105d0 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -195,7 +195,7 @@ rule sample_molecules_RNN: output: output_file=f"{OUTPUT_DIR}/{{enum_factor}}/prior/samples/{{dataset}}_{{repr}}_{{fold}}_{{train_seed}}_{{sample_seed}}_samples.csv" resources: - mem_mb=1000, + mem_mb=2000, runtime=15+MODEL_PARAMS["sample_mols"]//10000, slurm_extra="--gres=gpu:1" shell: diff --git a/snakemake/Snakefile_model_eval b/snakemake/Snakefile_model_eval index 97393fdd..10b4a1c8 100644 --- a/snakemake/Snakefile_model_eval +++ b/snakemake/Snakefile_model_eval @@ -25,8 +25,8 @@ rule all: input: calculate_outcomes_file=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_calculate_outcomes.csv", fold=range(FOLDS),repr=REPRESENTATIONS,dataset=DATASET,enum_factor=ENUM_FACTORS), - nn_tc_file=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_write_nn_tc.csv", - repr=REPRESENTATIONS,fold=range(FOLDS),dataset=DATASET,enum_factor=ENUM_FACTORS), + # nn_tc_file=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_write_nn_tc.csv", + # repr=REPRESENTATIONS,fold=range(FOLDS),dataset=DATASET,enum_factor=ENUM_FACTORS), train_discriminator=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_train_discriminator_.csv", repr=REPRESENTATIONS,fold=range(FOLDS),dataset=DATASET,enum_factor=ENUM_FACTORS), freq_distribution=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_freq_distribution.csv", diff --git a/snakemake/config.json b/snakemake/config.json index aa9f806c..7f13d609 100644 --- a/snakemake/config.json +++ b/snakemake/config.json @@ -1,13 +1,13 @@ { - "output_dir": "data", - "dataset": "/path/to/.txt", - "pubchem_tsv_file": "/path/to/PubChem.tsv", + "output_dir": "/Genomics/argo/users/aa9078/PED_1", + "dataset": "/Genomics/argo/users/aa9078/data/prior/raw/PED.csv", + "pubchem_tsv_file": "/Genomics/singhlab/vineetb/CLM/snakemake/data/PubChem_with_fps.tsv", "representations": ["SMILES"], "folds": 10, - "train_seeds": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "train_seeds": [0], "sample_seeds": [0], - "enum_factors": [0, 10, 30, 50, 100], + "enum_factors": [0, 1], "max_input_smiles": 0, "model_params": { @@ -30,5 +30,5 @@ "min_tc": 0, "top_k": 30, "err_ppm": 10, - "random_seed": null + "random_seed": 12345 } diff --git a/snakemake/profiles/config.yaml b/snakemake/profiles/config.yaml index 7239fbd8..c85509a7 100644 --- a/snakemake/profiles/config.yaml +++ b/snakemake/profiles/config.yaml @@ -6,11 +6,10 @@ printshellcmds: true # cluster specific settings cluster: - makedir -p logs/{rules} && sbatch --cpus-per-task={threads} - --mem={resources.mem}M - --time={resources.time} --output=slurm_out/%x-%A + --mem={resources.mem_mb} + --time={resources.runtime} --output=slurm_out/%x-%A --job-name={rule} --parsable --partition={resources.partition} cluster-status: "slurm-status.py" @@ -23,4 +22,4 @@ max-jobs-per-second: 1 max-status-checks-per-second: 10 local-cores: 4 # maximum local jobs to run default-resources: - partition=main,hoppertest,skinniderlab \ No newline at end of file + partition=main,hoppertest,skinniderlab diff --git a/snakemake/profiles/slurm-status.py b/snakemake/profiles/slurm-status.py old mode 100644 new mode 100755 diff --git a/src/clm/plot/train_discriminator.py b/src/clm/plot/train_discriminator.py index 32729308..2f99a954 100644 --- a/src/clm/plot/train_discriminator.py +++ b/src/clm/plot/train_discriminator.py @@ -86,7 +86,7 @@ def plot(outcome_dir, output_dir): # Make output directory if it doesn't exist yet os.makedirs(output_dir, exist_ok=True) - outcome_files = glob.glob(f"{outcome_dir}/*train_discriminator.csv") + outcome_files = glob.glob(f"{outcome_dir}/*train_discriminator_.csv") outcome = pd.concat( [pd.read_csv(outcome_file, delimiter=",") for outcome_file in outcome_files] ) From bdfc36dec3a1ec724df8598ce2b57ea87dc9c4c1 Mon Sep 17 00:00:00 2001 From: anushka255 Date: Wed, 24 Apr 2024 13:15:36 -0400 Subject: [PATCH 3/5] pre-commit checks --- .flake8 | 2 +- snakemake/profiles/config.yaml | 10 ++++----- snakemake/profiles/slurm-status.py | 34 ++++++++++++++++++------------ 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/.flake8 b/.flake8 index 6ad40a51..e84c7792 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,5 @@ [flake8] -extend-ignore = E203,E501 +extend-ignore = E203,E501,W605 max-complexity = 99 max-line-length = 88 exclude = .git,__pycache__,build,dist diff --git a/snakemake/profiles/config.yaml b/snakemake/profiles/config.yaml index 7239fbd8..c7a60e51 100644 --- a/snakemake/profiles/config.yaml +++ b/snakemake/profiles/config.yaml @@ -7,10 +7,10 @@ printshellcmds: true # cluster specific settings cluster: makedir -p logs/{rules} && - sbatch - --cpus-per-task={threads} - --mem={resources.mem}M - --time={resources.time} --output=slurm_out/%x-%A + sbatch + --cpus-per-task={threads} + --mem={resources.mem}M + --time={resources.time} --output=slurm_out/%x-%A --job-name={rule} --parsable --partition={resources.partition} cluster-status: "slurm-status.py" @@ -23,4 +23,4 @@ max-jobs-per-second: 1 max-status-checks-per-second: 10 local-cores: 4 # maximum local jobs to run default-resources: - partition=main,hoppertest,skinniderlab \ No newline at end of file + partition=main,hoppertest,skinniderlab diff --git a/snakemake/profiles/slurm-status.py b/snakemake/profiles/slurm-status.py index 831d4149..8eb0cbc8 100644 --- a/snakemake/profiles/slurm-status.py +++ b/snakemake/profiles/slurm-status.py @@ -5,6 +5,7 @@ import sys import time import logging + logger = logging.getLogger("__name__") STATUS_ATTEMPTS = 20 @@ -14,16 +15,21 @@ for i in range(STATUS_ATTEMPTS): try: sacct_res = sp.check_output(shlex.split("sacct -P -b -j {} -n".format(jobid))) - res = {x.split("|")[0]: x.split("|")[1] for x in sacct_res.decode().strip().split("\n")} + res = { + x.split("|")[0]: x.split("|")[1] + for x in sacct_res.decode().strip().split("\n") + } break except sp.CalledProcessError as e: logger.error("sacct process error") logger.error(e) - except IndexError as e: + except IndexError: pass # Try getting job with scontrol instead in case sacct is misconfigured try: - sctrl_res = sp.check_output(shlex.split("scontrol -o show job {}".format(jobid))) + sctrl_res = sp.check_output( + shlex.split("scontrol -o show job {}".format(jobid)) + ) m = re.search("JobState=(\w+)", sctrl_res.decode()) res = {jobid: m.group(1)} break @@ -38,26 +44,26 @@ status = res[jobid] -if (status == "BOOT_FAIL"): +if status == "BOOT_FAIL": print("failed") -elif (status == "OUT_OF_MEMORY"): +elif status == "OUT_OF_MEMORY": print("failed") -elif (status.startswith("CANCELLED")): +elif status.startswith("CANCELLED"): print("failed") -elif (status == "COMPLETED"): +elif status == "COMPLETED": print("success") -elif (status == "DEADLINE"): +elif status == "DEADLINE": print("failed") -elif (status == "FAILED"): +elif status == "FAILED": print("failed") -elif (status == "NODE_FAIL"): +elif status == "NODE_FAIL": print("failed") -elif (status == "PREEMPTED"): +elif status == "PREEMPTED": print("failed") -elif (status == "TIMEOUT"): +elif status == "TIMEOUT": print("failed") # Unclear whether SUSPENDED should be treated as running or failed -elif (status == "SUSPENDED"): +elif status == "SUSPENDED": print("failed") else: - print("running") \ No newline at end of file + print("running") From 51b7bba289def970f7aba2724a5b987ea640b5ed Mon Sep 17 00:00:00 2001 From: Anushka Acharya Date: Tue, 30 Apr 2024 09:48:52 -0400 Subject: [PATCH 4/5] minor change --- snakemake/Snakefile | 2 +- snakemake/profiles/config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 466105d0..2c46a3dc 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -195,7 +195,7 @@ rule sample_molecules_RNN: output: output_file=f"{OUTPUT_DIR}/{{enum_factor}}/prior/samples/{{dataset}}_{{repr}}_{{fold}}_{{train_seed}}_{{sample_seed}}_samples.csv" resources: - mem_mb=2000, + mem_mb=16000, runtime=15+MODEL_PARAMS["sample_mols"]//10000, slurm_extra="--gres=gpu:1" shell: diff --git a/snakemake/profiles/config.yaml b/snakemake/profiles/config.yaml index c85509a7..7efb55c1 100644 --- a/snakemake/profiles/config.yaml +++ b/snakemake/profiles/config.yaml @@ -8,7 +8,7 @@ printshellcmds: true cluster: sbatch --cpus-per-task={threads} - --mem={resources.mem_mb} + --mem={resources.mem_mb}M --time={resources.runtime} --output=slurm_out/%x-%A --job-name={rule} --parsable --partition={resources.partition} From 0cecbd73e182b96cf5cd98f235b10ff1743a1f44 Mon Sep 17 00:00:00 2001 From: anushka255 Date: Tue, 30 Apr 2024 11:21:37 -0400 Subject: [PATCH 5/5] minor tweaks --- snakemake/Snakefile | 2 +- snakemake/Snakefile_model_eval | 4 ++-- snakemake/config.json | 12 ++++++------ snakemake/profiles/config.yaml | 8 ++++---- src/clm/plot/train_discriminator.py | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 2c46a3dc..e452a249 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -195,7 +195,7 @@ rule sample_molecules_RNN: output: output_file=f"{OUTPUT_DIR}/{{enum_factor}}/prior/samples/{{dataset}}_{{repr}}_{{fold}}_{{train_seed}}_{{sample_seed}}_samples.csv" resources: - mem_mb=16000, + mem_mb=1000, runtime=15+MODEL_PARAMS["sample_mols"]//10000, slurm_extra="--gres=gpu:1" shell: diff --git a/snakemake/Snakefile_model_eval b/snakemake/Snakefile_model_eval index 10b4a1c8..6a3ea0ac 100644 --- a/snakemake/Snakefile_model_eval +++ b/snakemake/Snakefile_model_eval @@ -25,8 +25,8 @@ rule all: input: calculate_outcomes_file=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_calculate_outcomes.csv", fold=range(FOLDS),repr=REPRESENTATIONS,dataset=DATASET,enum_factor=ENUM_FACTORS), - # nn_tc_file=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_write_nn_tc.csv", - # repr=REPRESENTATIONS,fold=range(FOLDS),dataset=DATASET,enum_factor=ENUM_FACTORS), + nn_tc_file=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_write_nn_tc.csv", + repr=REPRESENTATIONS,fold=range(FOLDS),dataset=DATASET,enum_factor=ENUM_FACTORS), train_discriminator=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_train_discriminator_.csv", repr=REPRESENTATIONS,fold=range(FOLDS),dataset=DATASET,enum_factor=ENUM_FACTORS), freq_distribution=expand(f"{OUTPUT_DIR}/model_evaluation/{{enum_factor}}/{{dataset}}_{{repr}}_{{fold}}_freq_distribution.csv", diff --git a/snakemake/config.json b/snakemake/config.json index 7f13d609..aa9f806c 100644 --- a/snakemake/config.json +++ b/snakemake/config.json @@ -1,13 +1,13 @@ { - "output_dir": "/Genomics/argo/users/aa9078/PED_1", - "dataset": "/Genomics/argo/users/aa9078/data/prior/raw/PED.csv", - "pubchem_tsv_file": "/Genomics/singhlab/vineetb/CLM/snakemake/data/PubChem_with_fps.tsv", + "output_dir": "data", + "dataset": "/path/to/.txt", + "pubchem_tsv_file": "/path/to/PubChem.tsv", "representations": ["SMILES"], "folds": 10, - "train_seeds": [0], + "train_seeds": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], "sample_seeds": [0], - "enum_factors": [0, 1], + "enum_factors": [0, 10, 30, 50, 100], "max_input_smiles": 0, "model_params": { @@ -30,5 +30,5 @@ "min_tc": 0, "top_k": 30, "err_ppm": 10, - "random_seed": 12345 + "random_seed": null } diff --git a/snakemake/profiles/config.yaml b/snakemake/profiles/config.yaml index 7efb55c1..f7b911b7 100644 --- a/snakemake/profiles/config.yaml +++ b/snakemake/profiles/config.yaml @@ -6,10 +6,10 @@ printshellcmds: true # cluster specific settings cluster: - sbatch - --cpus-per-task={threads} - --mem={resources.mem_mb}M - --time={resources.runtime} --output=slurm_out/%x-%A + sbatch + --cpus-per-task={threads} + --mem={resources.mem_mb}M + --time={resources.runtime} --output=slurm_out/%x-%A --job-name={rule} --parsable --partition={resources.partition} cluster-status: "slurm-status.py" diff --git a/src/clm/plot/train_discriminator.py b/src/clm/plot/train_discriminator.py index 2f99a954..32729308 100644 --- a/src/clm/plot/train_discriminator.py +++ b/src/clm/plot/train_discriminator.py @@ -86,7 +86,7 @@ def plot(outcome_dir, output_dir): # Make output directory if it doesn't exist yet os.makedirs(output_dir, exist_ok=True) - outcome_files = glob.glob(f"{outcome_dir}/*train_discriminator_.csv") + outcome_files = glob.glob(f"{outcome_dir}/*train_discriminator.csv") outcome = pd.concat( [pd.read_csv(outcome_file, delimiter=",") for outcome_file in outcome_files] )