fmihpc · alhom · Feb 3, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/.github/workflows/generate_reference_plots.yml b/.github/workflows/generate_reference_plots.yml
@@ -51,20 +51,11 @@ jobs:
               echo $SACCT_LOG 
               exit 1
             fi
-
+            .github/workflows/make_sbatch_job.sh run_testpackage_generate_verf_set $DATAPATH
             rm $DATAPATH/.lockfile
         - name: Comparing plotted data
           run: |
             export TMPDIR=$RUNNER_TEMP
             module load Python/3.10.4-GCCcore-11.3.0
             . CI_env/bin/activate
-            sbatch -W -o "testpackage_compare.txt" ./testpackage/run_compare.sh verf_set > jobid1.txt
-            cat testpackage_compare.txt
-
-            export JOBID=$(grep -Po '\d+' jobid1.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG" 
-              exit 1
-            fi
+            .github/workflows/make_sbatch_job.sh run_compare verf_set 
diff --git a/.github/workflows/make_sbatch_job.sh b/.github/workflows/make_sbatch_job.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+if [[ ! $ARRAY_SIZE ]]; then
+    ARRAY_SIZE=14 
+fi
+# cmd argument $1 is used for the file name of the script to run, log file, jobid 
+
+#if sbatch fails as it gets the error code from python it tries to print the log 
+#   first with srun (in case the file has not yet updated on the front end)
+#   second if srun fails (in case communication failure) it tries to cat it on the frontend (may be empty if file has not been updated pyproject)
+
+sbatch -W --array=1-$ARRAY_SIZE -o "$1" ./testpackage/$1.sh $2 > jobid_$1 || srun cat $1 || cat $1 || echo "cat failed exit code $?"
+
+#in case we do exit 0 successfully
+LOG=$(srun cat $1 || cat $1)
+if [[ ! $LOG ]]; then
+  echo "::warning::Log file could not be read: srun failed and cat returned and empty logfile. Exit code $?" 
+  exit 1
+fi
+echo "$LOG"
+#It is possible that the sbatch command above returns exit 0 if only for example 1 of the array jobs failed but not all, in such a case we check sacct
+#   It is also possible that the node never ran it and silently failed which is visible on sacct
+#Additionally note that JOBID is saved by the run scripts HOWEVER if a nodes silently fails it may not get passed to the github output
+JOBID=$(srun grep -Po '\d+' jobid_$1 || grep -Po '\d+' jobid_$1)
+if [[ $? -ne 0 ]] || [[ ! $JOBID ]]; then 
+  echo "::error::Jobid could not be found, exiting. Exit code $?" 
+  exit 1
+fi
+SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
+if [[ $SACCT_LOG ]]; then
+  echo "::error::Some job failed on a node, try to take a look at the slurm log."
+  echo "$SACCT_LOG" 
+  exit 1
+fi
diff --git a/.github/workflows/test_compare_image_oldest_python.yml b/.github/workflows/test_compare_image_oldest_python.yml
@@ -75,15 +75,9 @@ jobs:
               echo -e ".lockfile found in $verf_loc/$verfset, not running test, as the verification set generation is likely still ongoing\n Check ongoing actions and/or re-run verification set generation."
               exit 1
             fi 
-            sbatch -W -o testpackage_run.txt --job-name gen_plots ./testpackage/run_testpackage_workflow.sh old_python ${{ steps.pyversion.outputs.PYTHON }} > jobid.txt
-            cat testpackage_run.txt
-            export JOBID=$(grep -Po '\d+' jobid.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG"
-              exit 1
-            fi
+
+            .github/workflows/make_sbatch_job.sh run_testpackage_workflow old_python ${{ steps.pyversion.outputs.PYTHON }} 
+
 
         - name: Comparing plotted data
           run: |
@@ -92,15 +86,7 @@ jobs:
             module load ${{ steps.pyversion.outputs.PYTHON }}
             module list
             . CI_env/bin/activate
-            sbatch -W -o "testpackage_compare.txt" ./testpackage/run_compare.sh old_python ${{ steps.pyversion.outputs.PYTHON }} > jobid1.txt
-            cat testpackage_compare.txt
-            export JOBID=$(grep -Po '\d+' jobid1.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG" 
-              exit 1
-            fi
+            .github/workflows/make_sbatch_job.sh run_compare old_python ${{ steps.pyversion.outputs.PYTHON }} 
 
         - name: scancel dangling job upon cancellation
           if: cancelled()

diff --git a/.github/workflows/test_compare_images.yml b/.github/workflows/test_compare_images.yml
@@ -57,7 +57,6 @@ jobs:
               cat diff_log.txt
             fi
             echo "DIFFRESULT=$DIFFRESULT" >> $GITHUB_OUTPUT
-            echo "Running $DIFFRESULT"
             MAX_ARRAY=14
             TOTAL_TESTS=$(ls ./testpackage/testpackage_definitions/ | wc -l) 
             if [[ $DIFFRESULT && $DIFFRESULT != "pass" ]]; then
@@ -67,32 +66,18 @@ jobs:
             else
               ARRAY_SIZE=$MAX_ARRAY
             fi
-            sbatch -W -o testpackage_run.txt --array=1-$ARRAY_SIZE --job-name gen_plots ./testpackage/run_testpackage_workflow.sh $DIFFRESULT > jobid.txt
-            cat testpackage_run.txt
-            export JOBID=$(grep -Po '\d+' jobid.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG" 
-              exit 1
-            fi
+            export ARRAY_SIZE=$ARRAY_SIZE
+            echo "Running $DIFFRESULT, ARRAY_SIZE $ARRAY_SIZE"
+            .github/workflows/make_sbatch_job.sh run_testpackage_workflow $DIFFRESULT 
+
 
         - name: Comparing plotted data
           run: |
             export TMPDIR=$RUNNER_TEMP
             module load Python/3.10.4-GCCcore-11.3.0
             . CI_env/bin/activate
-            sbatch -W -o "testpackage_compare.txt" ./testpackage/run_compare.sh ${{ steps.run_cl.outputs.DIFFRESULT }} > jobid1.txt
-            cat testpackage_compare.txt
-            export JOBID=$(grep -Po '\d+' jobid1.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG" 
-              exit 1
-            fi
+            .github/workflows/make_sbatch_job.sh run_compare ${{ steps.run_cl.outputs.DIFFRESULT }}
 
-
         - name: scancel dangling job upon cancellation
           if: cancelled()
           run: |

diff --git a/.github/workflows/test_compare_images_full.yml b/.github/workflows/test_compare_images_full.yml
@@ -49,30 +49,18 @@ jobs:
               echo -e ".lockfile found in $verf_loc/$verfset, not running test, as the verification set generation is likely still ongoing\n Check ongoing actions and/or re-run verification set generation."
               exit 1
             fi 
-            sbatch -W -o testpackage_run.txt --job-name gen_plots ./testpackage/run_testpackage_workflow.sh > jobid.txt
-            cat testpackage_run.txt
-            export JOBID=$(grep -Po '\d+' jobid.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG"
-              exit 1
-            fi
+
+            .github/workflows/make_sbatch_job.sh run_testpackage_workflow
+
 
         - name: Comparing plotted data
           run: |
             export TMPDIR=$RUNNER_TEMP
             module load Python/3.10.4-GCCcore-11.3.0
             . CI_env/bin/activate
-            sbatch -W -o "testpackage_compare.txt" ./testpackage/run_compare.sh > jobid1.txt
-            cat testpackage_compare.txt
-            export JOBID=$(grep -Po '\d+' jobid1.txt)
-            export SACCT_LOG=$(sacct -j $JOBID -o job,state,node | grep FAILED) 
-            if [[ $SACCT_LOG ]]; then
-              echo "Some job failed on a node, try to take a look at the slurm log."
-              echo "$SACCT_LOG" 
-              exit 1
-            fi
+
+            .github/workflows/make_sbatch_job.sh run_compare 
+
 
 
         - name: scancel dangling job upon cancellation

diff --git a/testpackage/run_compare.sh b/testpackage/run_compare.sh
@@ -56,7 +56,7 @@ if [[ $@ == 'verf_set' ]]; then
 elif [[ $1 == 'old_python' ]]; then
   check=false
 #Do selective compare if other arguments
-elif [ $@ ]; then
+elif [[ $@ ]]; then
   check=true
 fi