Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a331112
Add WIP notebooks for postfit PDF grids
kdlong Jan 12, 2026
6c66e46
Standalone script for postfit pdf grids
kdlong Jan 19, 2026
5d6afcb
Add quadratic symmetrization
kdlong Jan 21, 2026
24e5b30
Make script for PDF plotting
kdlong Jan 22, 2026
24d0736
Merge branch 'postfitPdfGrids' into main_temp
kdlong Jan 26, 2026
3f99eb2
Merge branch 'main' of github.com:WMass/WRemnants into postfitPdfGrids
kdlong Jan 29, 2026
0e899ad
Fixes for gen-level fit
kdlong Feb 2, 2026
6382a17
Fix to be able to plot pseudodata fit
kdlong Feb 3, 2026
1bda212
Add a WIP script to make a rabbit input for postfit plotting
kdlong Feb 19, 2026
def1cc5
Add the file I forgot
kdlong Feb 19, 2026
84009e6
Update notebook to plot postfit from rabbit
kdlong Feb 20, 2026
2a13f56
Add metadata to pdf tensor
kdlong Feb 23, 2026
5d5d5d2
Support for multiple PDFs, assym uncertainties in rabbit postfit
kdlong Feb 25, 2026
df399aa
Committing everything and probably making a mess...Committing everything
kdlong Mar 5, 2026
a865639
Merge branch 'main' of github.com:WMass/WRemnants into postfitPdfGrids
kdlong Mar 11, 2026
25045e1
Reformat with updated singularity
kdlong Mar 11, 2026
cf70c15
Reorganize PDF scripts, fix merge issues
kdlong Mar 12, 2026
acd9e19
Implement average symmetrization for PDF grids, bug fixes
kdlong Mar 13, 2026
690a462
Fix scripts
kdlong Mar 16, 2026
ba9ee0d
Update PDF plotting script to allow rabbit plots as well
kdlong Mar 16, 2026
d7ea14b
Refactor PDF grids to allow a rabbit-indepedent interface
kdlong Mar 20, 2026
1e4b8dd
Merge remote-tracking branch 'upstream/main' into postfitPdfGrids
kdlong Mar 24, 2026
cc09007
Submodule synchronizing...
kdlong Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
502 changes: 502 additions & 0 deletions notebooks/NNLOjet.ipynb

Large diffs are not rendered by default.

1,322 changes: 1,322 additions & 0 deletions notebooks/PdfDistributionPlots.ipynb

Large diffs are not rendered by default.

523 changes: 523 additions & 0 deletions notebooks/PostfitPDFGrids.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scripts/ci/run_with_singularity.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
export APPTAINER_BIND="/scratch,/cvmfs,/etc/pki/tls/certs,/etc/grid-security/certificates"
export APPTAINER_BIND="/ceph,/scratch,/cvmfs,/run,/etc/pki/tls/certs,/etc/grid-security/certificates"
if [[ -d $WREM_BASE ]]; then
export APPTAINER_BIND="${APPTAINER_BIND},${WREM_BASE}/.."
fi
Expand Down
32 changes: 17 additions & 15 deletions scripts/histmakers/w_z_gen_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def build_graph(df, dataset):
axis_ptV_thag = theoryAgnostic_axes[0]
axis_yV_thag = theoryAgnostic_axes[1]

if args.useUnfoldingBinning and "Z" in dataset.name:
if (args.useUnfoldingBinning or args.fiducial) and (isW or isZ):
unfolding_axes, unfolding_cols, unfolding_selections = (
binning.get_unfolding_dilepton_axes(
["ptVGen", "absYVGen"],
Expand Down Expand Up @@ -415,25 +415,13 @@ def build_graph(df, dataset):
nominal_axes += [axis_helicitygen]
nominal_cols += ["helicity_idxs", "helicity_moments"]

mode = f'{"z" if isZ else "w"}_{analysis_label}'
if args.fiducial is not None:
if isZ and args.fiducial == "singlelep":
mode += "_wlike"

df = unfolding_tools.select_fiducial_space(
df,
mode=mode,
fiducial=args.fiducial,
unfolding=True,
selections=unfolding_selections,
)

if args.singleLeptonHists and (isW or isZ):
if args.singleLeptonHists or args.fiducial:
gen_levels = ["prefsr", "postfsr"]
df = unfolding_tools.define_gen_level(
df, dataset.name, gen_levels, mode="w_mass" if isW else "z_wlike"
)

if args.singleLeptonHists and (isW or isZ):
for level in gen_levels:
lep_axes = [axis_absetal_gen, axis_ptl_gen, axis_mt_gen, axis_chargel_gen]
lep_cols = [
Expand All @@ -459,6 +447,20 @@ def build_graph(df, dataset):
)
)

mode = f'{"z" if isZ else "w"}_{analysis_label}'
if args.fiducial is not None:
if isZ and args.fiducial == "singlelep":
mode += "_wlike"

df = unfolding_tools.select_fiducial_space(
df,
mode=mode,
fiducial=args.fiducial,
unfolding=True,
selections=unfolding_selections,
gen_level="prefsr",
)

if not args.skipEWHists and (isW or isZ) and "Zmumu_powheg-weak" in dataset.name:
if isZ:
massBins = binning.make_bw_binning(
Expand Down
155 changes: 155 additions & 0 deletions scripts/pdf/make_pdf_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import argparse

import hist
import numpy as np

from rabbit import inputdata, tensorwriter
from wremnants.utilities import theory_utils
from wums import logging, output_tools

parser = argparse.ArgumentParser()
parser.add_argument("-o", "--output", default="./", help="output directory")
parser.add_argument("--outname", default="test_tensor", help="output file name")
parser.add_argument(
"--postfix",
default=None,
type=str,
help="Postfix to append on output file name",
)
parser.add_argument(
"--sparse",
default=False,
action="store_true",
help="Make sparse tensor",
)
parser.add_argument(
"--rabbit-input",
type=str,
required=True,
help="Rabbit input file for the reference fit",
)
parser.add_argument(
"--proc",
type=str,
choices=["Zmumu", "Wmunu"],
required=True,
help="Process name to use for the PDF fit (should match the signal)",
)
parser.add_argument(
"--noColorLogger", action="store_true", help="Disable colored logging output."
)
parser.add_argument(
"-l", "--fit-label", type=str, default="cmsmw", help="Label in the output PDF grids"
)
parser.add_argument(
"-v", "--verbose", choices=[0, 1, 2, 3, 4], default=3, help="Set verbosity level."
)
args = parser.parse_args()

logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger)

indata = inputdata.FitInputData(args.rabbit_input)

# Build tensor
writer = tensorwriter.TensorWriter(
sparse=args.sparse,
)

metadata = indata.metadata

pdf_input = indata.metadata["meta_info_input"]["args"]["pdfs"][0]
pdf_scale = metadata["meta_info"]["args"]["scalePdf"]

pdfInfo = theory_utils.pdf_info_map("Zmumu_2016PostVFP", pdf_input)
pdf_name = pdfInfo["lha_name"]

if pdf_scale == -1:
pdf_scale = theory_utils.pdf_inflation_factor(
theory_utils.pdfMap[pdf_input], metadata["meta_info"]["args"]["noi"]
)
logger.info(f"Using default inflation factor: {pdf_scale}")

pdf_scale *= pdfInfo["scale"] if "scale" in pdfInfo else 1.0
logger.info(f"Scaling PDF uncertainties by {pdf_scale}")

symHessian = pdfInfo["combine"] == "symHessian"
symmetrize = indata.metadata["meta_info"]["args"]["symmetrizePdfUnc"]
print(f"PDF symmetrization procedure: {symmetrize}")

if not symHessian:
logger.info(f"Applying {symmetrize} symmetrization procedure")

labels = np.array(
[
s
for s in indata.systs
if "pdf" in s.decode()
and not any(x in s.decode() for x in ["mcrange", "mbrange", "pdfAlphaS"])
],
dtype=str,
)

if symmetrize == "quadratic":
labels[::2] = [
s.replace("SymAvg", "Down").replace("SymDiff", "Down") for s in labels[::2]
]
labels[1::2] = [
s.replace("SymAvg", "Up").replace("SymDiff", "Up") for s in labels[1::2]
]
elif symmetrize == "average":
labels = np.array(
[f"{l}{shift}" for l in labels for shift in ("Down", "Up")], dtype=str
)

x_range = np.logspace(-4, -0.01, 201)

# Consistency with the incorrect treatment of the central value in setupRabbit
category_labels = labels if symHessian else ["central", *labels]

for chan in ["u", "ubar", "d", "dbar", "s", "sbar", "g", "uv", "dv"]:
pdf_data = theory_utils.pdf_data_from_lhapdf(pdf_name, chan, 80.360, x_range[:-1])
pdf_hist = hist.Hist(
hist.axis.Variable(x_range, name="x"),
hist.axis.StrCategory(category_labels, name="pdfVar", flow=False),
data=pdf_data.T,
)

writer.add_channel(pdf_hist.axes[:-1], chan)

if args.proc.encode("utf-8") not in indata.procs:
raise ValueError(f"Process {args.proc} not found in input data")

writer.add_process(pdf_hist[..., 0], args.proc, chan, signal=False)
writer.add_data(pdf_hist[..., 0], chan)

if symHessian:
# This is wrong in setupRabbit (the central val is treated as a variation) so it should also be wrong here...
for syst in pdf_hist.axes["pdfVar"]:
writer.add_systematic(
pdf_hist[..., syst],
syst,
args.proc,
chan,
kfactor=pdf_scale,
)
else:
systs = list(pdf_hist.axes["pdfVar"])[1:]
for systDown, systUp in zip(systs[::2], systs[1::2]):
writer.add_systematic(
[pdf_hist[..., systUp], pdf_hist[..., systDown]],
systUp.replace("Up", ""),
args.proc,
chan,
symmetrize=symmetrize,
kfactor=pdf_scale,
)

directory = args.output
if directory == "":
directory = "./"
filename = args.outname
if args.postfix:
filename += f"_{args.postfix}"

meta_data = {"meta_info": output_tools.make_meta_info_dict()}
writer.write(outfolder=directory, outfilename=filename, meta_data_dict=meta_data)
104 changes: 104 additions & 0 deletions scripts/pdf/make_postfit_pdf_grids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import numpy as np

# Add the alias back manually to make mc2hlib work
if not hasattr(np, "int"):
np.int = int
import argparse

import h5py

from wremnants.postprocessing.postfit_pdf_helper import (
RabbitPostfitPdfHelper,
SimplePostfitPdfHelper,
)
from wremnants.utilities import theory_utils
from wums import logging

parser = argparse.ArgumentParser()
parser.add_argument(
"-f",
"--fitresult",
type=str,
required=True,
help="Path to the fit result file (rabbit HDF5 or simple covariance HDF5).",
)
parser.add_argument(
"-o",
"--outfolder",
type=str,
required=True,
help="Output path for the postfit PDF grids (created if it doesn't already exist.",
)
parser.add_argument(
"-p",
"--pdf-name",
type=str,
required=False,
choices=["auto", *theory_utils.pdfMap.keys()],
default="auto",
help="Name of the PDF set to use. If 'auto', will use the PDF from the fit result metadata.",
)
parser.add_argument(
"-v", "--verbose", choices=[0, 1, 2, 3, 4], default=3, help="Set verbosity level."
)
parser.add_argument(
"-l", "--fit-label", type=str, default="cmsmw", help="Label in the output PDF grids"
)
parser.add_argument(
"-i", "--lhaid", type=str, required=True, help="LHAPDF ID to give the new set"
)
parser.add_argument(
"--noColorLogger", action="store_true", help="Disable colored logging output."
)
parser.add_argument(
"--pseudoData",
type=str,
default=None,
help="Pseudo-data label to use (rabbit format only).",
)
args = parser.parse_args()

logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger)


def is_simple_format(path):
"""Return True if the HDF5 file is in the simple covariance format."""
with h5py.File(path, "r") as f:
return "covariance" in f


if is_simple_format(args.fitresult):
logger.info("Detected simple covariance HDF5 format.")
pdf_helper = SimplePostfitPdfHelper(args.fitresult)
if args.pdf_name != "auto" and args.pdf_name != pdf_helper.pdf_name:
raise ValueError(
f"Specified PDF name {args.pdf_name} does not match input PDF {pdf_helper.pdf_name}."
)
else:
logger.info("Detected rabbit HDF5 format.")
pdf_helper = RabbitPostfitPdfHelper(args.fitresult, pseudoData=args.pseudoData)
if pdf_helper.pdf_name is None:
if args.pdf_name == "auto":
raise ValueError(
"PDF name must be specified if not present in fit result metadata."
)
logger.warning(
"Input metadata does not contain PDF information. Using specified PDF name."
)
pdf_helper.pdf_name = args.pdf_name
elif args.pdf_name != "auto" and args.pdf_name != pdf_helper.pdf_name:
raise ValueError(
f"Specified PDF name {args.pdf_name} does not match input PDF {pdf_helper.pdf_name}."
)

# TODO: Need to scale back at the end to get 95% CL for consistency?

postfit_matrix, new_central, central_pdf_path = pdf_helper.compute_postfit_matrix()
pdf_helper.write_grids(
central_pdf_path,
args.outfolder,
args.fit_label,
args.lhaid,
postfit_matrix,
new_central,
)
26 changes: 26 additions & 0 deletions scripts/pdf/read_postfit_pdf_covariance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import argparse

import h5py
import numpy as np

parser = argparse.ArgumentParser(
description="Read and display postfit PDF covariance, pulls, and labels from an HDF5 file."
)
parser.add_argument(
"-f",
"--input",
type=str,
required=True,
help="Input HDF5 file written by write_postfit_pdf_covariance.py.",
)
args = parser.parse_args()

with h5py.File(args.input, "r") as f:
cov = f["covariance"][:]
pulls = f["pulls"][:]
labels = f["labels"][:].astype(str)

print("labels:", labels)
print("pulls:", pulls)
print("covariance shape:", cov.shape)
print("covariance diagonal:", np.diag(cov))
Loading
Loading