Skip to content

Commit 6c10562

Browse files
authored
Merge pull request #638 from NNPDF/multiclosurev2
adding analysis tools for multiple closures/proxy fits
2 parents 9cb0eb6 + 2eb6648 commit 6c10562

File tree

10 files changed

+2279
-41
lines changed

10 files changed

+2279
-41
lines changed

validphys2/src/validphys/closuretest/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@
55
"""
66
from validphys.closuretest.closure_plots import *
77
from validphys.closuretest.closure_results import *
8+
from validphys.closuretest.multiclosure import *
9+
from validphys.closuretest.multiclosure_output import *
10+
from validphys.closuretest.multiclosure_pdf import *
11+
from validphys.closuretest.multiclosure_pdf_output import *

validphys2/src/validphys/closuretest/closure_checks.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
"""
22
closuretest/checks.py
33
4-
Module containing checks specfic to the closure tests
4+
Module containing checks specific to the closure tests.
5+
56
"""
7+
from collections import defaultdict
8+
69
from reportengine.checks import make_argcheck, CheckError
710

811

@@ -51,3 +54,85 @@ def check_fits_areclosures(fits):
5154
for fit in fits:
5255
if not fit.as_input()["closuretest"]["fakedata"]:
5356
raise CheckError(f"Specified fit: {fit}, is not a closure test")
57+
58+
59+
@make_argcheck
60+
def check_t0pdfset_matches_law(t0pdfset, fit):
61+
t0_from_fit = fit.as_input()["closuretest"]["fakepdf"]
62+
if not str(t0pdfset) == t0_from_fit:
63+
raise CheckError(
64+
f"Underlying pdf: {t0_from_fit}, does not match t0pdfset: {t0pdfset}"
65+
)
66+
67+
68+
@make_argcheck
69+
def check_at_least_10_fits(fits):
70+
if len(fits) < 10:
71+
raise CheckError(
72+
"Multiclosure actions testing finite sampling effects require at least 10 fits"
73+
)
74+
75+
76+
@make_argcheck
77+
def check_multifit_replicas(fits_pdf, _internal_max_reps, _internal_min_reps):
78+
"""Checks that all the fit pdfs have the same number of replicas N_rep. Then
79+
check that N_rep is greater than the smallest number of replicas used in
80+
actions which subsample the replicas of each fit.
81+
82+
This check also has the secondary
83+
effect of filling in the namespace key _internal_max_reps
84+
which can be used to override the number of replicas used at the level of
85+
the runcard, but by default get filled in as the number of replicas in each
86+
fit.
87+
88+
"""
89+
# we take off 1 here because we don't want to include replica 0
90+
n_reps = {len(pdf) - 1 for pdf in fits_pdf}
91+
if len(n_reps) != 1:
92+
raise CheckError(
93+
"all fits for multiclosure actions should have same number of replicas"
94+
)
95+
n_reps = n_reps.pop()
96+
if _internal_max_reps is None:
97+
_internal_max_reps = n_reps
98+
elif _internal_max_reps > n_reps:
99+
raise CheckError(
100+
f"Specified _internal_max_reps to be {_internal_max_reps} "
101+
f"however each fit only has {n_reps} replicas"
102+
)
103+
104+
if _internal_max_reps < _internal_min_reps:
105+
raise CheckError(
106+
f"maximum replicas per fit, {_internal_max_reps}, is less than minimum replicas "
107+
f", {_internal_min_reps}. If you have set _internal_max_reps and"
108+
"_internal_min_reps then ensure that they take sensible values."
109+
)
110+
return {
111+
"_internal_max_reps": _internal_max_reps,
112+
"_internal_min_reps": _internal_min_reps,
113+
}
114+
115+
116+
@make_argcheck
117+
def check_fits_different_filterseed(fits):
118+
"""Input fits should have different filter seeds if they are being
119+
used for multiple closure test studies, because in high-level hand-waving
120+
terms the different level 1 shifts represents different
121+
'runs of the universe'!
122+
123+
"""
124+
seed_fits_dict = defaultdict(list)
125+
126+
for fit in fits:
127+
pdf_name = fit.as_input()["pdf"]["id"]
128+
seed = fit.as_input()["closuretest"]["filterseed"]
129+
seed_fits_dict[seed].append(pdf_name)
130+
131+
bad_fits = [fits for _, fits in seed_fits_dict.items() if len(fits) > 1]
132+
133+
if bad_fits:
134+
raise CheckError(
135+
"Multiclosure actions require that fits have different level 1 "
136+
"noise and therefore different filter seeds. The following groups "
137+
f"of fits have the same seed: {bad_fits}."
138+
)

validphys2/src/validphys/closuretest/closure_plots.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def plot_biases(biases_table):
2424
ax.legend()
2525
return fig
2626

27+
2728
@figure
2829
def plot_delta_chi2(delta_chi2_bootstrap, fits):
2930
"""Plots distributions of delta chi2 for each fit in `fits`.
@@ -41,6 +42,7 @@ def plot_delta_chi2(delta_chi2_bootstrap, fits):
4142
ax.set_title(r"Total $\Delta_{\chi^{2}}$ for each fit")
4243
return fig
4344

45+
4446
def errorbar_figure_from_table(df):
4547
"""Given a table with even columns as central values as odd columns as errors
4648
plot an errorbar plot"""
@@ -49,9 +51,11 @@ def errorbar_figure_from_table(df):
4951
df.values[:, 1::2].T,
5052
df.index.values,
5153
df.columns.unique(0),
52-
xlim=0)
54+
xlim=0,
55+
)
5356
return fig, ax
5457

58+
5559
@figure
5660
def plot_fits_bootstrap_variance(fits_bootstrap_variance_table):
5761
"""Plot variance as error bars, with mean and central value calculated
@@ -61,10 +65,9 @@ def plot_fits_bootstrap_variance(fits_bootstrap_variance_table):
6165
ax.set_title("Variance by experiment for closure fits")
6266
return fig
6367

68+
6469
@figure
65-
def plot_fits_bootstrap_bias(
66-
fits_bootstrap_bias_table
67-
):
70+
def plot_fits_bootstrap_bias(fits_bootstrap_bias_table):
6871
"""Plot the bias for each experiment for all `fits` as a point with an error bar,
6972
where the error bar is given by bootstrapping the bias across replicas
7073

validphys2/src/validphys/closuretest/closure_results.py

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
import numpy as np
99
import pandas as pd
1010

11+
from reportengine import collect
12+
from reportengine.table import table
13+
1114
from validphys.calcutils import calc_chi2, bootstrap_values
1215
from validphys.checks import check_pdf_is_montecarlo
1316
from validphys.closuretest.closure_checks import (
@@ -17,19 +20,16 @@
1720
check_fits_same_filterseed,
1821
check_fits_underlying_law_match,
1922
)
20-
from reportengine import collect
21-
from reportengine.table import table
2223

2324

2425
BiasData = namedtuple("BiasData", ("bias", "ndata"))
2526

2627
underlying_results = collect("results", ("fitunderlyinglaw",))
2728

29+
2830
@check_fit_isclosure
2931
@check_use_fitcommondata
30-
def bias_dataset(
31-
results, underlying_results, fit, use_fitcommondata, sqrt_covariance_matrix
32-
):
32+
def bias_dataset(results, underlying_results, fit, use_fitcommondata):
3333
"""Calculate the bias for a given dataset and fit. The bias is defined as
3434
chi2 between the prediction from the underlying PDF (which was used to
3535
generate the closure pseudodata), also known as level zero closure data, and
@@ -39,11 +39,11 @@ def bias_dataset(
3939
is used to generate the multiplicative contributions to the covariance
4040
matrix
4141
"""
42-
_, th_ct = results
42+
dt_ct, th_ct = results
4343
# does collect need to collect a list even with one element?
4444
(_, th_ul), = underlying_results
4545
central_diff = th_ct.central_value - th_ul.central_value
46-
bias_out = calc_chi2(sqrt_covariance_matrix, central_diff) # unnormalised
46+
bias_out = calc_chi2(dt_ct.sqrtcovmat, central_diff) # unnormalised
4747
return BiasData(bias_out, len(th_ct))
4848

4949

@@ -53,20 +53,12 @@ def bias_dataset(
5353
@check_fit_isclosure
5454
@check_use_fitcommondata
5555
def bias_experiment(
56-
experiment_results,
57-
underlying_experiment_results,
58-
fit,
59-
use_fitcommondata,
60-
experiment_sqrt_covariance_matrix,
56+
experiment_results, underlying_experiment_results, fit, use_fitcommondata
6157
):
6258
"""Like `bias_dataset` but for a whole experiment.
6359
"""
6460
return bias_dataset(
65-
experiment_results,
66-
underlying_experiment_results,
67-
fit,
68-
use_fitcommondata,
69-
experiment_sqrt_covariance_matrix,
61+
experiment_results, underlying_experiment_results, fit, use_fitcommondata
7062
)
7163

7264

@@ -177,7 +169,7 @@ def fits_bootstrap_bias_table(
177169

178170
@check_fit_isclosure
179171
@check_use_fitcommondata
180-
def variance_dataset(results, fit, use_fitcommondata, sqrt_covariance_matrix):
172+
def variance_dataset(results, fit, use_fitcommondata):
181173
"""calculate the variance for a given dataset, which is the spread of
182174
replicas measured in the space of the covariance matrix. Given by:
183175
@@ -188,25 +180,18 @@ def variance_dataset(results, fit, use_fitcommondata, sqrt_covariance_matrix):
188180
be made fully independent of the closure data. This is useful when checking
189181
the variance of data which was not included in the fit.
190182
191-
# TODO: here we require that use_fitcommondata is true, for the generic use
192-
# case. we require another action which uses explicitly a t0pdf of the
193-
# underlying law.
194183
"""
195-
_, th = results
184+
dt, th = results
196185
diff = th.central_value[:, np.newaxis] - th._rawdata
197-
var_unnorm = calc_chi2(sqrt_covariance_matrix, diff).mean()
186+
var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
198187
return VarianceData(var_unnorm, len(th))
199188

200189

201190
@check_fit_isclosure
202191
@check_use_fitcommondata
203-
def variance_experiment(
204-
experiment_results, fit, use_fitcommondata, experiment_sqrt_covariance_matrix
205-
):
192+
def variance_experiment(experiment_results, fit, use_fitcommondata):
206193
"""Like variance_dataset but for a whole experiment"""
207-
return variance_dataset(
208-
experiment_results, fit, use_fitcommondata, experiment_sqrt_covariance_matrix
209-
)
194+
return variance_dataset(experiment_results, fit, use_fitcommondata)
210195

211196

212197
def bootstrap_variance_experiment(experiment_results, bootstrap_samples=500):

0 commit comments

Comments
 (0)