Skip to content

Commit c0529c4

Browse files
committed
Correct pivot definition
1 parent 6e1a3fc commit c0529c4

File tree

3 files changed

+288
-19
lines changed

3 files changed

+288
-19
lines changed

econml/bootstrap.py

Lines changed: 281 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,253 @@
66
from joblib import Parallel, delayed
77
from sklearn.base import clone
88
from scipy.stats import norm
9+
from collections import OrderedDict
10+
import pandas as pd
11+
12+
13+
class BootstrapInferenceResults:
14+
"""
15+
Results class for bootstrap inference.
16+
17+
Parameters
18+
----------
19+
pred_dist : array-like, shape (b, m, d_y, d_t) or (b, m, d_y)
20+
the raw predictions of the metric using b times bootstrap.
21+
Note that when Y or T is a vector rather than a 2-dimensional array,
22+
the corresponding singleton dimensions should be collapsed
23+
kind : 'percentile' or 'pivot'
24+
Whether to use percentile or pivot-based intervals
25+
d_t: int
26+
Number of treatments
27+
d_y: int
28+
Number of outputs
29+
inf_type: string
30+
The type of inference result.
31+
It could be either 'effect', 'coefficient' or 'intercept'.
32+
fname_transformer: None or predefined function
33+
The transform function to get the corresponding feature names from featurizer
34+
"""
35+
36+
def __init__(self, pred_dist, kind, d_y, d_t, inf_type, fname_transformer):
37+
self.pred_dist = pred_dist
38+
self.kind = kind
39+
self.d_t = d_t
40+
self.d_y = d_y
41+
self.inf_type = inf_type
42+
self.fname_transformer = fname_transformer
43+
44+
@property
45+
def point_estimate(self):
46+
"""
47+
Get the point estimate of each treatment on each outcome for each sample X[i].
48+
49+
Returns
50+
-------
51+
prediction : array-like, shape (m, d_y, d_t) or (m, d_y)
52+
The point estimate of each treatment on each outcome for each sample X[i].
53+
Note that when Y or T is a vector rather than a 2-dimensional array,
54+
the corresponding singleton dimensions in the output will be collapsed
55+
(e.g. if both are vectors, then the output of this method will also be a vector)
56+
"""
57+
return np.mean(self.pred_dist, axis=0)
58+
59+
@property
60+
def stderr(self):
61+
"""
62+
Get the standard error of the metric of each treatment on each outcome for each sample X[i].
63+
64+
Returns
65+
-------
66+
stderr : array-like, shape (m, d_y, d_t) or (m, d_y)
67+
The standard error of the metric of each treatment on each outcome for each sample X[i].
68+
Note that when Y or T is a vector rather than a 2-dimensional array,
69+
the corresponding singleton dimensions in the output will be collapsed
70+
(e.g. if both are vectors, then the output of this method will also be a vector)
71+
"""
72+
return np.std(self.pred_dist, axis=0)
73+
74+
@property
75+
def var(self):
76+
"""
77+
Get the variance of the metric of each treatment on each outcome for each sample X[i].
78+
79+
Returns
80+
-------
81+
var : array-like, shape (m, d_y, d_t) or (m, d_y)
82+
The variance of the metric of each treatment on each outcome for each sample X[i].
83+
Note that when Y or T is a vector rather than a 2-dimensional array,
84+
the corresponding singleton dimensions in the output will be collapsed
85+
(e.g. if both are vectors, then the output of this method will also be a vector)
86+
"""
87+
return self.stderr**2
88+
89+
def conf_int(self, alpha=0.1):
90+
"""
91+
Get the confidence interval of the metric of each treatment on each outcome for each sample X[i].
92+
93+
Parameters
94+
----------
95+
alpha: optional float in [0, 1] (Default=0.1)
96+
The overall level of confidence of the reported interval.
97+
The alpha/2, 1-alpha/2 confidence interval is reported.
98+
99+
Returns
100+
-------
101+
lower, upper: tuple of arrays, shape (m, d_y, d_t) or (m, d_y)
102+
The lower and the upper bounds of the confidence interval for each quantity.
103+
Note that when Y or T is a vector rather than a 2-dimensional array,
104+
the corresponding singleton dimensions in the output will be collapsed
105+
(e.g. if both are vectors, then the output of this method will also be a vector)
106+
"""
107+
lower = alpha / 2
108+
upper = (1 - alpha) / 2
109+
if self.kind == 'percentile':
110+
return np.percentile(self.pred_dist, lower, axis=0), np.percentile(self.pred_dist, upper, axis=0)
111+
elif self.kind == 'pivot':
112+
est = self.point_estimate
113+
return (2 * est - np.percentile(self.pred_dist, upper, axis=0),
114+
2 * est - np.percentile(self.pred_dist, lower, axis=0))
115+
else:
116+
raise ValueError("Unrecognized bootstrap kind; valid kinds are 'percentile' and 'pivot'")
117+
118+
def pvalue(self, value=0):
119+
"""
120+
Get the p value of the each treatment on each outcome for each sample X[i].
121+
122+
Parameters
123+
----------
124+
value: optinal float (default=0)
125+
The mean value of the metric you'd like to test under null hypothesis.
126+
127+
Returns
128+
-------
129+
pvalue : array-like, shape (m, d_y, d_t) or (m, d_y)
130+
The p value of of each treatment on each outcome for each sample X[i].
131+
Note that when Y or T is a vector rather than a 2-dimensional array,
132+
the corresponding singleton dimensions in the output will be collapsed
133+
(e.g. if both are vectors, then the output of this method will also be a vector)
134+
"""
135+
136+
if self.kind == 'percentile':
137+
dist = self.pred_dist
138+
elif self.kind == 'pivot':
139+
est = np.mean(self.pred_dist, axis=0)
140+
dist = 2 * est - pred_dist
141+
else:
142+
raise ValueError("Unrecognized bootstrap kind; valid kinds are 'percentile' and 'pivot'")
143+
return min((dist < value).sum(), (dist > value).sum()) / dist.shape[0]
144+
145+
def zstat(self, value=0):
146+
"""
147+
Get the z statistic of the metric of each treatment on each outcome for each sample X[i].
148+
149+
Parameters
150+
----------
151+
value: optinal float (default=0)
152+
The mean value of the metric you'd like to test under null hypothesis.
153+
154+
Returns
155+
-------
156+
zstat : array-like, shape (m, d_y, d_t) or (m, d_y)
157+
The z statistic of the metric of each treatment on each outcome for each sample X[i].
158+
Note that when Y or T is a vector rather than a 2-dimensional array,
159+
the corresponding singleton dimensions in the output will be collapsed
160+
(e.g. if both are vectors, then the output of this method will also be a vector)
161+
"""
162+
return (self.point_estimate - value) / self.stderr
163+
164+
def summary_frame(self, alpha=0.1, value=0, decimals=3, feat_name=None):
165+
"""
166+
Output the dataframe for all the inferences above.
167+
168+
Parameters
169+
----------
170+
alpha: optional float in [0, 1] (default=0.1)
171+
The overall level of confidence of the reported interval.
172+
The alpha/2, 1-alpha/2 confidence interval is reported.
173+
value: optinal float (default=0)
174+
The mean value of the metric you'd like to test under null hypothesis.
175+
decimals: optinal int (default=3)
176+
Number of decimal places to round each column to.
177+
feat_name: optional list of strings or None (default is None)
178+
The input of the feature names
179+
180+
Returns
181+
-------
182+
output: pandas dataframe
183+
The output dataframe includes point estimate, standard error, z score, p value and confidence intervals
184+
of the estimated metric of each treatment on each outcome for each sample X[i]
185+
"""
186+
ci_mean = self.conf_int(alpha=alpha)
187+
to_include = OrderedDict()
188+
to_include['point_estimate'] = self._array_to_frame(self.d_t, self.d_y, self.point_estimate)
189+
to_include['stderr'] = self._array_to_frame(self.d_t, self.d_y, self.stderr)
190+
to_include['zstat'] = self._array_to_frame(self.d_t, self.d_y, self.zstat(value))
191+
to_include['pvalue'] = self._array_to_frame(self.d_t, self.d_y, self.pvalue(value))
192+
to_include['ci_lower'] = self._array_to_frame(self.d_t, self.d_y, ci_mean[0])
193+
to_include['ci_upper'] = self._array_to_frame(self.d_t, self.d_y, ci_mean[1])
194+
res = pd.concat(to_include, axis=1, keys=to_include.keys()).round(decimals)
195+
if self.d_t == 1:
196+
res.columns = res.columns.droplevel(1)
197+
if self.d_y == 1:
198+
res.index = res.index.droplevel(1)
199+
if self.inf_type == 'coefficient':
200+
if feat_name is not None and self.fname_transformer:
201+
ind = self.fname_transformer(feat_name)
202+
else:
203+
ct = res.shape[0] // self.d_y
204+
ind = ['X' + str(i) for i in range(ct)]
205+
206+
if self.d_y > 1:
207+
res.index = res.index.set_levels(ind, level=0)
208+
else:
209+
res.index = ind
210+
elif self.inf_type == 'intercept':
211+
if self.d_y > 1:
212+
res.index = res.index.set_levels(['intercept'], level=0)
213+
else:
214+
res.index = ['intercept']
215+
return res
216+
217+
def population_summary(self, alpha=0.1, value=0, decimals=3, tol=0.001):
218+
"""
219+
Output the object of population summary results.
220+
221+
Parameters
222+
----------
223+
alpha: optional float in [0, 1] (default=0.1)
224+
The overall level of confidence of the reported interval.
225+
The alpha/2, 1-alpha/2 confidence interval is reported.
226+
value: optinal float (default=0)
227+
The mean value of the metric you'd like to test under null hypothesis.
228+
decimals: optinal int (default=3)
229+
Number of decimal places to round each column to.
230+
tol: optinal float (default=0.001)
231+
The stopping criterion. The iterations will stop when the outcome is less than ``tol``
232+
233+
Returns
234+
-------
235+
PopulationSummaryResults: object
236+
The population summary results instance contains the different summary analysis of point estimate
237+
for sample X on each treatment and outcome.
238+
"""
239+
if self.inf_type == 'effect':
240+
return PopulationSummaryResults(pred=self.point_estimate, pred_stderr=self.stderr,
241+
d_t=self.d_t, d_y=self.d_y,
242+
alpha=alpha, value=value, decimals=decimals, tol=tol)
243+
else:
244+
raise AttributeError(self.inf_type + " inference doesn't support population_summary function!")
245+
246+
def _array_to_frame(self, d_t, d_y, arr):
247+
if np.isscalar(arr):
248+
arr = np.array([arr])
249+
if self.inf_type == 'coefficient':
250+
arr = np.moveaxis(arr, -1, 0)
251+
arr = arr.reshape((-1, d_y, d_t))
252+
df = pd.concat([pd.DataFrame(x) for x in arr], keys=np.arange(arr.shape[0]))
253+
df.index = df.index.set_levels(['Y' + str(i) for i in range(d_y)], level=1)
254+
df.columns = ['T' + str(i) for i in range(d_t)]
255+
return df
9256

10257

11258
class BootstrapEstimator:
@@ -46,9 +293,10 @@ class BootstrapEstimator:
46293
that should be preferred (meaning this wrapper will compute the mean of it).
47294
This option only affects behavior if `compute_means` is set to ``True``.
48295
49-
bootstrap_type: 'percentile' or 'standard', default 'percentile'
296+
bootstrap_type: 'percentile', 'pivot', or 'normal', default 'percentile'
50297
Bootstrap method used to compute results. 'percentile' will result in using the empiracal CDF of
51-
the replicated copmutations of the statistics. 'standard' will instead compute a pivot interval
298+
the replicated copmutations of the statistics. 'pivot' will also use the replicates but create a pivot
299+
interval that also relies on the estimate over the entire dataset. 'normal' will instead compute an interval
52300
assuming the replicates are normally distributed.
53301
"""
54302

@@ -59,7 +307,7 @@ def __init__(self, wrapped, n_bootstrap_samples=1000, n_jobs=None, compute_means
59307
self._n_jobs = n_jobs
60308
self._compute_means = compute_means
61309
self._prefer_wrapped = prefer_wrapped
62-
self._boostrap_type = bootstrap_type
310+
self._bootstrap_type = bootstrap_type
63311
self._wrapped = wrapped
64312

65313
# TODO: Add a __dir__ implementation?
@@ -155,12 +403,23 @@ def call_with_bounds(can_call, lower, upper):
155403
def percentile_bootstrap(arr, _):
156404
return np.percentile(arr, lower, axis=0), np.percentile(arr, upper, axis=0)
157405

158-
def pivot_bootstrap(arr, est):
406+
def pivot_bootstrap(arr, _):
407+
# TODO: do we want the central estimate to be the average of all bootstrap estimates,
408+
# or the original estimate over the entire non-bootstrapped population?
409+
est = np.mean(arr, axis=0)
410+
return 2 * est - np.percentile(arr, upper, axis=0), 2 * est - np.percentile(arr, lower, axis=0)
411+
412+
def normal_bootstrap(arr, _):
413+
est = np.mean(arr, axis=0)
159414
std = np.std(arr, axis=0)
160415
return est - norm.ppf(upper / 100) * std, est - norm.ppf(lower / 100) * std
161-
# TODO: studentized bootstrap? would be more accurate in most cases but can we avoid
162-
# second level bootstrap which would be prohibitive computationally
163-
fn = {'percentile': percentile_bootstrap, 'standard': pivot_bootstrap}[self._boostrap_type]
416+
417+
# TODO: studentized bootstrap? this would be more accurate in most cases but can we avoid
418+
# second level bootstrap which would be prohibitive computationally?
419+
420+
fn = {'percentile': percentile_bootstrap,
421+
'normal': normal_bootstrap,
422+
'pivot': pivot_bootstrap}[self._boostrap_type]
164423
return proxy(can_call, prefix, fn)
165424

166425
can_call = callable(getattr(self._instances[0], prefix))
@@ -178,8 +437,7 @@ def call(lower=5, upper=95):
178437
def get_inference():
179438
# can't import from econml.inference at top level without creating mutual dependencies
180439
from .inference import InferenceResults
181-
# TODO: consider treating percentile bootstrap differently since we can work directly with
182-
# the empirical distribution
440+
183441
prefix = name[: - len("_inference")]
184442
if prefix in ['const_marginal_effect', 'effect']:
185443
inf_type = 'effect'
@@ -190,17 +448,26 @@ def get_inference():
190448
else:
191449
raise AttributeError("Unsupported inference: " + name)
192450

193-
def get_inference():
451+
d_t = self._wrapped._d_t[0] if self._wrapped._d_t else 1
452+
d_t = 1 if prefix == 'effect' else d_t
453+
d_y = self._wrapped._d_y[0] if self._wrapped._d_y else 1
454+
455+
def get_inference_nonparametric(kind):
456+
return proxy(callable(getattr(self._instances[0], prefix)), prefix,
457+
lambda arr, _: BootstrapInferenceResults(pred_dist=arr, kind=kind,
458+
d_t=d_t, d_y=d_y, inf_type=inf_type,
459+
fname_transformer=None))
460+
461+
def get_inference_parametric():
194462
pred = getattr(self._wrapped, prefix)
195463
stderr = getattr(self, prefix + '_std')
196-
d_t = self._wrapped._d_t[0] if self._wrapped._d_t else 1
197-
d_t = 1 if prefix == 'effect' else d_t
198-
d_y = self._wrapped._d_y[0] if self._wrapped._d_y else 1
199464
return InferenceResults(d_t=d_t, d_y=d_y, pred=pred,
200465
pred_stderr=stderr, inf_type=inf_type,
201466
pred_dist=None, fname_transformer=None)
202467

203-
return get_inference
468+
return {'normal': get_inference_parametric,
469+
'percentile': lambda: get_inference_nonparametric('percentile'),
470+
'pivot': lambda: get_inference_nonparametric('pivot')}[self._bootstrap_type]
204471

205472
caught = None
206473
m = None

econml/inference.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@ class BootstrapInference(Inference):
4545
n_jobs: int, optional (default -1)
4646
The maximum number of concurrently running jobs, as in joblib.Parallel.
4747
48-
bootstrap_type: 'percentile' or 'standard', default 'percentile'
49-
Bootstrap method used to compute results. 'percentile' will result in using the empiracal CDF of
50-
the replicated copmutations of the statistics. 'standard' will instead compute a pivot interval
51-
assuming the replicates are normally distributed.
48+
bootstrap_type: 'percentile', 'pivot', or 'normal', default 'percentile'
49+
Bootstrap method used to compute results.
50+
'percentile' will result in using the empiracal CDF of the replicated copmutations of the statistics.
51+
'pivot' will also use the replicates but create a pivot interval that also relies on the estimate
52+
over the entire dataset.
53+
'normal' will instead compute a pivot interval assuming the replicates are normally distributed.
5254
"""
5355

5456
def __init__(self, n_bootstrap_samples=100, n_jobs=-1, bootstrap_type='percentile'):

econml/tests/test_bootstrap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class TestBootstrap(unittest.TestCase):
1818
def test_with_sklearn(self):
1919
"""Test that we can bootstrap sklearn estimators."""
2020
for n_jobs in [None, -1]: # test parallelism
21-
for kind in ['percentile', 'standard']: # test both percentile and pivot intervals
21+
for kind in ['percentile', 'pivot', 'normal']: # test both percentile and pivot intervals
2222
x = np.random.normal(size=(1000, 1))
2323
y = x * 0.5 + np.random.normal(size=(1000, 1))
2424
y = y.flatten()

0 commit comments

Comments
 (0)