From d71479d5d254bfdec290af6e0ff7a62ddaa1af17 Mon Sep 17 00:00:00 2001 From: Benjamin Kirk Date: Sat, 28 Feb 2026 16:35:34 -0700 Subject: [PATCH 01/10] support jobhist-db as an optional installation requiring hpc-usage-queries --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 747b2dd..2f93702 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,4 +36,10 @@ issues = "https://github.com/NCAR/qhist/issues" [project.scripts] qhist = "qhist.qhist:main" +[project.optional-dependencies] +jobhist-db = [ + # hpc-usage-queries: job history database integration + "hpc-usage-queries @ git+https://github.com/benkirk/hpc-usage-queries.git@jobhist_refactor", +] + [tool.setuptools_scm] From e36de66f1745553697231cac67ed1f111730e3dd Mon Sep 17 00:00:00 2001 From: Benjamin Kirk Date: Sat, 28 Feb 2026 16:50:00 -0700 Subject: [PATCH 02/10] properly configure jobhist/postgres connection --- .env.example | 37 +++++++++++++++++++++++++++++++++++++ .gitignore | 4 ++++ pyproject.toml | 1 + 3 files changed, 42 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..acbb1ac --- /dev/null +++ b/.env.example @@ -0,0 +1,37 @@ +# job_history database configuration +# Copy this file to .env and fill in the values appropriate for your environment. +# .env is gitignored — never commit credentials. + +# ------------------------------------------------------------------- +# Backend selection: "sqlite" (default) or "postgres" +# ------------------------------------------------------------------- +JH_DB_BACKEND=sqlite + +# ------------------------------------------------------------------- +# SQLite settings (used when JH_DB_BACKEND=sqlite) +# ------------------------------------------------------------------- + +# Directory containing per-machine database files ({machine}.db). +# Default: ./data relative to the project root. +#JOB_HISTORY_DATA_DIR=./data + +# Per-machine path overrides (take precedence over JOB_HISTORY_DATA_DIR): +#QHIST_DERECHO_DB=/path/to/derecho.db +#QHIST_CASPER_DB=/path/to/casper.db + +# ------------------------------------------------------------------- +# PostgreSQL settings (used when JH_DB_BACKEND=postgres) +# ------------------------------------------------------------------- + +#JH_PG_HOST=localhost +#JH_PG_PORT=5432 +#JH_PG_USER=postgres +#JH_PG_PASSWORD=example + +# Require SSL/TLS for the PostgreSQL connection (true/false): +#JH_PG_REQUIRE_SSL=false + +# Per-machine database name overrides. +# Default pattern: {machine}_jobs (e.g. derecho_jobs, casper_jobs) +#JH_PG_DERECHO_DB=derecho_jobs +#JH_PG_CASPER_DB=casper_jobs diff --git a/.gitignore b/.gitignore index ca578ec..9e9c18e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# never commit credentials +.env + +# ...or build cruft *.pyc __pycache__ dist/ diff --git a/pyproject.toml b/pyproject.toml index 2f93702..771bf43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ qhist = "qhist.qhist:main" jobhist-db = [ # hpc-usage-queries: job history database integration "hpc-usage-queries @ git+https://github.com/benkirk/hpc-usage-queries.git@jobhist_refactor", + "psycopg2-binary", ] [tool.setuptools_scm] From f9d5cdd824b8fe2b017601f5294ea47109c58115 Mon Sep 17 00:00:00 2001 From: Benjamin Kirk Date: Sat, 28 Feb 2026 17:19:15 -0700 Subject: [PATCH 03/10] safe import of job_history --- src/qhist/qhist.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index 549c2ac..4f7a18b 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -15,6 +15,13 @@ from pbsparse import get_pbs_records from glob import glob +try: + from job_history.database import db_available + from job_history.qhist_plugin import db_get_records +except ImportError: + db_available = lambda x: False + db_get_records = None + # Use default signal behavior on system rather than throwing IOError signal.signal(signal.SIGPIPE, signal.SIG_DFL) From 0540f873d204e46e3324f6cadb20b9837271bdd6 Mon Sep 17 00:00:00 2001 From: Benjamin Kirk Date: Sat, 28 Feb 2026 17:42:38 -0700 Subject: [PATCH 04/10] Updated src/qhist/qhist.py to optionally interface with the jobhist database. Refactors the main loop into an emit_jobs() function to handle the output dispatching, and conditionally check for the presence of the DB using the machine attribute from the configuration. Key aspects of the change: 1. Scope/Binding Issues Avoided: Because averages and num_jobs were previously only initialized dynamically under the if args.average: block, they would cause an UnboundLocalError or NameError in emit_jobs() if not properly scoped. I've ensured these are initialized before emit_jobs is defined so the nonlocal directive bindings are stable regardless of the CLI arguments. 2. Simplified Output Dispatching: The repetitive job output logic is now cleanly housed inside emit_jobs(jobs_iter). 3. Database Integration with Scan Fallback: If config.machine is set and db_available(machine) evaluates to True, it fetches records from the DB using db_get_records. Otherwise, it logs a warning (if machine is present) and predictably falls back to scanning the PBS logs. --- src/qhist/qhist.py | 109 +++++++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index 4f7a18b..dd06d26 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -727,61 +727,74 @@ def main(): print(' "timestamp":{},'.format(int(datetime.datetime.today().timestamp()))) print(' "Jobs":{') + # Ensure averages and num_jobs exist for nonlocal binding + if "averages" not in locals(): + averages = None + if "num_jobs" not in locals(): + num_jobs = 0 - while keep_going(bounds, log_date, args.reverse): - data_date = datetime.datetime.strftime(log_date, config.pbs_date_format) - data_file = os.path.join(config.pbs_log_path, data_date) - jobs = get_pbs_records(data_file, CustomRecord, True, args.events, - id_filter, host_filter, data_filters, time_filters, - args.reverse, time_divisor) + is_first_json_job = True - if args.list: - for job in jobs: + def emit_jobs(jobs_iter): + nonlocal num_jobs, is_first_json_job + + for job in jobs_iter: + if args.list: list_output(job, fields, labels, list_format, nodes = args.nodes) - elif args.csv: - for job in jobs: + elif args.csv: csv_output(job, fields) - elif args.json: - first_job = True - - for job in jobs: - if not first_job: + elif args.json: + if not is_first_json_job: print(",") - print(textwrap.indent(json_output(job)[2:-2], " "), end = "") - first_job = False - elif args.nodes: - if args.average: - for job in jobs: - if '[]' not in job.id: - for category in averages: - for field in averages[category]: - averages[category][field] += getattr(job, category)[field] - - num_jobs += 1 - - print("{}\n {}".format(tabular_output(vars(job), table_format), ",".join(job.get_nodes()))) + is_first_json_job = False + elif args.nodes: + if averages and '[]' not in job.id: + for category in averages: + for field in averages[category]: + averages[category][field] += getattr(job, category)[field] + num_jobs += 1 + print("{}\n {}".format(tabular_output(vars(job), table_format), ",".join(job.get_nodes()))) else: - for job in jobs: - print("{}\n {}".format(tabular_output(vars(job), table_format), ",".join(job.get_nodes()))) - else: - if args.average: - for job in jobs: - if '[]' not in job.id: - for category in averages: - for field in averages[category]: - averages[category][field] += getattr(job, category)[field] - - num_jobs += 1 - print(tabular_output(vars(job), table_format)) + if averages and '[]' not in job.id: + for category in averages: + for field in averages[category]: + averages[category][field] += getattr(job, category)[field] + num_jobs += 1 + print(tabular_output(vars(job), table_format)) + + machine = getattr(config, "machine", None) + + if machine and db_available(machine): + emit_jobs( + db_get_records( + machine, + bounds[0], + bounds[1], + time_divisor=time_divisor, + id_filter=id_filter, + host_filter=host_filter, + data_filters=data_filters, + time_filter=time_filters, + reverse=args.reverse, + ) + ) + else: + if machine: + print(f"Warning: DB not available for {machine!r}; falling back to log scanning", file=sys.stderr) + + while keep_going(bounds, log_date, args.reverse): + data_date = datetime.datetime.strftime(log_date, config.pbs_date_format) + data_file = os.path.join(config.pbs_log_path, data_date) + jobs = get_pbs_records(data_file, CustomRecord, True, args.events, + id_filter, host_filter, data_filters, time_filters, + args.reverse, time_divisor) + emit_jobs(jobs) + + if args.reverse: + log_date -= ONE_DAY else: - for job in jobs: - print(tabular_output(vars(job), table_format)) - - if args.reverse: - log_date -= ONE_DAY - else: - log_date += ONE_DAY + log_date += ONE_DAY if args.json: print("\n }\n}") @@ -801,5 +814,5 @@ def main(): print(config.generate_header(format_type, units = units)) print(tabular_output(averages, averages_format)) - except UnboundLocalError: + except (NameError, UnboundLocalError): print("Note: statistics output is only currently supported for tabular mode", file = sys.stderr) From bb17e7e42e5dbd46583c76d167b85ffdd0daaf18 Mon Sep 17 00:00:00 2001 From: Benjamin Kirk Date: Mon, 2 Mar 2026 12:25:36 -0700 Subject: [PATCH 05/10] optionally take machine name from QHIST_MACHINE --- src/qhist/qhist.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index dd06d26..ec2e75f 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -15,6 +15,7 @@ from pbsparse import get_pbs_records from glob import glob +# import job_history database & plugin API, if available try: from job_history.database import db_available from job_history.qhist_plugin import db_get_records @@ -535,6 +536,10 @@ def main(): if not CustomRecord: exit("Error: given custom record class not found in code extensions ({})".format(config.record_class)) + # Ensure 'averages' and 'num_jobs' exist for nonlocal binding (must exist even if not used) + averages = None + num_jobs = 0 + # Long-form help if args.format == "help": print(format_help) @@ -727,15 +732,9 @@ def main(): print(' "timestamp":{},'.format(int(datetime.datetime.today().timestamp()))) print(' "Jobs":{') - # Ensure averages and num_jobs exist for nonlocal binding - if "averages" not in locals(): - averages = None - if "num_jobs" not in locals(): - num_jobs = 0 - is_first_json_job = True - def emit_jobs(jobs_iter): + def emit_formatted_jobs(jobs_iter): nonlocal num_jobs, is_first_json_job for job in jobs_iter: @@ -763,10 +762,13 @@ def emit_jobs(jobs_iter): num_jobs += 1 print(tabular_output(vars(job), table_format)) - machine = getattr(config, "machine", None) + # what machine to query. + # optional QHIST_MACHINE with fallback to "machine" from config file + machine = os.environ.get("QHIST_MACHINE", getattr(config, "machine", None)) + print(machine, db_available(machine)) if machine and db_available(machine): - emit_jobs( + emit_formatted_jobs( db_get_records( machine, bounds[0], @@ -789,7 +791,7 @@ def emit_jobs(jobs_iter): jobs = get_pbs_records(data_file, CustomRecord, True, args.events, id_filter, host_filter, data_filters, time_filters, args.reverse, time_divisor) - emit_jobs(jobs) + emit_formatted_jobs(jobs) if args.reverse: log_date -= ONE_DAY From 4358434050ef2655731e84e20f37bb653bc5f5d2 Mon Sep 17 00:00:00 2001 From: Benjamin Kirk Date: Mon, 2 Mar 2026 12:27:06 -0700 Subject: [PATCH 06/10] pull hpc-usage-queries[postgres] from main branch now after merge --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 771bf43..b9bf32d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,8 +39,7 @@ qhist = "qhist.qhist:main" [project.optional-dependencies] jobhist-db = [ # hpc-usage-queries: job history database integration - "hpc-usage-queries @ git+https://github.com/benkirk/hpc-usage-queries.git@jobhist_refactor", - "psycopg2-binary", + "hpc-usage-queries[postgres] @ git+https://github.com/benkirk/hpc-usage-queries.git", ] [tool.setuptools_scm] From d14c1a5d9826cdd6a4ac0713b939a788b1a87759 Mon Sep 17 00:00:00 2001 From: Brian Vanderwende Date: Wed, 18 Mar 2026 11:02:38 -0600 Subject: [PATCH 07/10] Makefile updates and improved filter error message --- .gitignore | 3 +++ Makefile | 15 +++++++++++---- src/qhist/qhist.py | 14 +++++++++++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 9e9c18e..64d756c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ build/ *.sh actions-runner* pbs-parser-* + +# Test make installs +test-install diff --git a/Makefile b/Makefile index d8ad6d8..fd04907 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PREFIX ?= /usr/local -VERSION := 1.1 +VERSION := 1.2 install: lib/pbsparse/Makefile mkdir -p $(PREFIX)/bin $(PREFIX)/lib/qhist @@ -9,14 +9,21 @@ install: lib/pbsparse/Makefile cp -r share $(PREFIX)/share chmod +x $(PREFIX)/bin/qhist +test-install: + @echo "Installing package into test-install directory..." + PREFIX=$(CURDIR)/test-install $(MAKE) install + PREFIX=$(CURDIR)/test-install $(MAKE) ncar-extensions + $(PREFIX)/bin/qhist: @echo "You must run 'make install' before you can install any extensions" @exit 1 -ncar-extensions: $(PREFIX)/bin/qhist - git clone https://github.com/NCAR/pbs-parser-ncar.git +ncar-extensions: $(PREFIX)/bin/qhist pbs-parser-ncar cp pbs-parser-ncar/ncar.py $(PREFIX)/lib/qhist/qhist/extensions/ +pbs-parser-ncar: + git clone https://github.com/NCAR/pbs-parser-ncar.git + lib/pbsparse/Makefile: git submodule init git submodule update @@ -44,4 +51,4 @@ man: --output share/man/man1/qhist.1 clean: - rm -rf dist build + rm -rf dist build test-install diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index ec2e75f..12dad33 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -8,7 +8,8 @@ * Memory-friendly sorting """ -import sys, os, argparse, datetime, signal, string, _string, json, operator, re, importlib, textwrap +import sys, os, argparse, datetime, signal, string, _string, json, operator +import re, importlib, textwrap, difflib from collections import OrderedDict from json.decoder import JSONDecodeError @@ -626,6 +627,8 @@ def main(): data_filters.append((False, operator.gt, "waittime", float(args.wait) / 60)) if args.filter: + available_filters = [k for k in config.format_map if k not in ("end", "start", "nodelist")] + for fexpr in args.filter.split(";"): for op in ops: if op in fexpr: @@ -636,6 +639,15 @@ def main(): negation = False field, match = [e.strip() for e in fexpr.split(op)] + if field not in available_filters: + print(f"Error: {field} is not a valid filter (see 'qhist -F help' for all)", file = sys.stderr) + possible_filters = difflib.get_close_matches(field, available_filters, 3, 0.6) + + if possible_filters: + print("\nDid you mean: " + ", ".join(possible_filters) + "?", file = sys.stderr) + + sys.exit(1) + data_filters.append((negation, ops[op], config.translate_field(field), match)) break From 82931e39da383eeccba9a5308e5f5fc55f7b41ff Mon Sep 17 00:00:00 2001 From: Brian Vanderwende Date: Wed, 18 Mar 2026 11:31:49 -0600 Subject: [PATCH 08/10] Fix for issue #26 --- src/qhist/cfg/default.json | 3 ++- src/qhist/qhist.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/qhist/cfg/default.json b/src/qhist/cfg/default.json index 645ddfb..8ba3ca1 100644 --- a/src/qhist/cfg/default.json +++ b/src/qhist/cfg/default.json @@ -113,5 +113,6 @@ "default" : "{short_id:12.12} {user:10.10} {queue:8.8} {numnodes:>5d} {numcpus:>6d} {numgpus:>5d} {end:%d-%H%M} {memory:>8.2f} {avgcpu:>6.2f} {elapsed:>6.2f}", "wide_status" : "{short_id:12.12} {user:15.15} {queue:10.10} {numnodes:>5d} {numcpus:>6d} {numgpus:>5d} {end:%m-%dT%H:%M} {reqmem:>10.2f} {memory:>10.2f} {avgcpu:>7.2f} {status:>4.4} {elapsed:>7.2f} {name}", "default_status": "{short_id:12.12} {user:10.10} {queue:8.8} {numnodes:>5d} {numcpus:>6d} {numgpus:>5d} {end:%d-%H%M} {memory:>8.2f} {avgcpu:>6.2f} {status:>4.4} {elapsed:>6.2f}" - } + }, + "pbs_log_error" : "log directory not found ({})" } diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index 12dad33..64ad3c4 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -179,7 +179,7 @@ def load_config(self, file_path): try: self.pbs_log_start = sorted(f for f in os.listdir(self.pbs_log_path) if os.path.isfile(os.path.join(self.pbs_log_path, f)))[0] except FileNotFoundError: - exit("Error: log directory nof found ({})".format(self.pbs_log_path)) + exit("Error: " + self.pbs_log_error.format(self.pbs_log_path)) except AttributeError: pass From a04c31f4758a7f06fbe761ea860849e533c864b8 Mon Sep 17 00:00:00 2001 From: Brian Vanderwende Date: Wed, 18 Mar 2026 12:09:10 -0600 Subject: [PATCH 09/10] Update pbsparse --- Makefile | 8 ++++++-- lib/pbsparse | 2 +- src/qhist/qhist.py | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index fd04907..5c586d2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,9 @@ PREFIX ?= /usr/local VERSION := 1.2 -install: lib/pbsparse/Makefile +.PHONY: update-pbsparse + +install: update-pbsparse mkdir -p $(PREFIX)/bin $(PREFIX)/lib/qhist sed 's|/src|/lib/qhist|' bin/qhist > $(PREFIX)/bin/qhist cp -r src/qhist $(PREFIX)/lib/qhist @@ -24,9 +26,11 @@ ncar-extensions: $(PREFIX)/bin/qhist pbs-parser-ncar pbs-parser-ncar: git clone https://github.com/NCAR/pbs-parser-ncar.git +update-pbsparse: lib/pbsparse/Makefile + git submodule update + lib/pbsparse/Makefile: git submodule init - git submodule update build: python3 -m build diff --git a/lib/pbsparse b/lib/pbsparse index 2d734eb..690d1da 160000 --- a/lib/pbsparse +++ b/lib/pbsparse @@ -1 +1 @@ -Subproject commit 2d734ebd57b55adb4c8f8e4f8c435a6cb90db46c +Subproject commit 690d1da58029bd75138f7e39018b1352abf2545a diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index 64ad3c4..1a90133 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -778,7 +778,6 @@ def emit_formatted_jobs(jobs_iter): # optional QHIST_MACHINE with fallback to "machine" from config file machine = os.environ.get("QHIST_MACHINE", getattr(config, "machine", None)) - print(machine, db_available(machine)) if machine and db_available(machine): emit_formatted_jobs( db_get_records( From 3e139cfa37d4b1549068f775480aafa0b8056719 Mon Sep 17 00:00:00 2001 From: Brian Vanderwende Date: Wed, 18 Mar 2026 18:41:22 -0600 Subject: [PATCH 10/10] Ensure waittime field is shown in help and filters --- src/qhist/qhist.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/qhist/qhist.py b/src/qhist/qhist.py index 1a90133..ee50cf7 100644 --- a/src/qhist/qhist.py +++ b/src/qhist/qhist.py @@ -537,6 +537,9 @@ def main(): if not CustomRecord: exit("Error: given custom record class not found in code extensions ({})".format(config.record_class)) + # These fields are computed by pbsparse and do not come directly from the PBS records + derived_fields = ["waittime"] + # Ensure 'averages' and 'num_jobs' exist for nonlocal binding (must exist even if not used) averages = None num_jobs = 0 @@ -545,7 +548,7 @@ def main(): if args.format == "help": print(format_help) - for key in ["id", "short_id"] + sorted(config.format_map): + for key in ["id", "short_id"] + sorted(derived_fields + list(config.format_map)): print(" {}".format(key)) print() @@ -553,7 +556,7 @@ def main(): elif args.filter == "help": print(filter_help) - for key in sorted(k for k in config.format_map if k not in ("end", "start", "nodelist")): + for key in sorted(k for k in (derived_fields + list(config.format_map)) if k not in ("end", "start", "nodelist")): print(" {}".format(key)) print() @@ -627,7 +630,7 @@ def main(): data_filters.append((False, operator.gt, "waittime", float(args.wait) / 60)) if args.filter: - available_filters = [k for k in config.format_map if k not in ("end", "start", "nodelist")] + available_filters = [k for k in (derived_fields + list(config.format_map)) if k not in ("end", "start", "nodelist")] for fexpr in args.filter.split(";"): for op in ops: