import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
%matplotlib inline

np.random.seed(418)             # global reproducibility (STYLE.md contract)
SEED, B = 42, 10_000            # per-example seed + bootstrap replicates (match scripts/ch4)
plt.rcParams.update({
    'figure.figsize': (7.2, 4.2), 'figure.dpi': 110,
    'axes.grid': True, 'grid.alpha': 0.25,
    'axes.spines.top': False, 'axes.spines.right': False,
    'font.size': 11,
})
BLUE, RED, GREEN, ORANGE = '#4C72B0', '#C44E52', '#55A868', '#DD8452'
PURPLE, TEAL, GRAY = '#8172B3', '#20B2AA', '#7F8C8D'

# Course data bucket: every Chapter 4 dataset lives here (no repo needed).
BASE = ('https://pqyjaywwccbnqpwgeiuv.supabase.co/storage/v1/object/public/'
        'STAT%20418%20Images/Data/Chapter4/')
def url(name):
    return BASE + name

print('environment ready · numpy', np.__version__, '· pandas', pd.__version__,
      '·  B =', B, ' seed =', SEED)

environment ready · numpy 1.26.4 · pandas 2.3.3 ·  B = 10000  seed = 42


morley = pd.read_csv(url('morley.csv'))
speed = morley['Speed'].to_numpy().astype(float)
n = len(speed)
xbar, s = speed.mean(), speed.std(ddof=1)
print(f"Michelson morley: n = {n}   mean = {xbar:.2f}  ->  {299000 + xbar:.1f} km/s"
      f"   sd = {s:.2f}")
print(f"(true speed of light c = 299792.458 km/s  ->  {299792.458 - 299000:.3f} in these units)\n")

print("Which statistics have a closed-form standard error?")
print(f"  {'statistic':<18}{'value':>10}   closed-form SE?")
print(f"  {'mean':<18}{xbar:>10.2f}   YES   s/sqrt(n)             = {s/np.sqrt(n):.3f}")
print(f"  {'std deviation':<18}{s:>10.2f}   approx s/sqrt(2(n-1))   = {s/np.sqrt(2*(n-1)):.3f}")
print(f"  {'median':<18}{np.median(speed):>10.2f}   NO    needs density f(median) -- unknown")
print(f"  {'10% trimmed mean':<18}{stats.trim_mean(speed, 0.1):>10.2f}   NO    no elementary formula")
print("\n  The mean is the EASY case. For the median/trimmed-mean the asymptotic SE")
print("  formula contains the unknown population density -- exactly where we will let")
print("  the bootstrap (Section 4.3) estimate the sampling distribution by resampling.")

Michelson morley: n = 100   mean = 852.40  ->  299852.4 km/s   sd = 79.01
(true speed of light c = 299792.458 km/s  ->  792.458 in these units)

Which statistics have a closed-form standard error?
  statistic              value   closed-form SE?
  mean                  852.40   YES   s/sqrt(n)             = 7.901
  std deviation          79.01   approx s/sqrt(2(n-1))   = 5.615
  median                850.00   NO    needs density f(median) -- unknown
  10% trimmed mean      852.25   NO    no elementary formula

  The mean is the EASY case. For the median/trimmed-mean the asymptotic SE
  formula contains the unknown population density -- exactly where we will let
  the bootstrap (Section 4.3) estimate the sampling distribution by resampling.


# DKW 95% band half-width for n = 100 (needs ONLY the sample).
alpha = 0.05
eps = np.sqrt(np.log(2.0 / alpha) / (2.0 * n))
print(f"DKW 95% band half-width  eps_100 = sqrt(ln(2/0.05)/200) = {eps:.5f}")
print("Glivenko-Cantelli guarantees uniform convergence; DKW puts a finite-n number on it:")
print(f"  the true CDF lies within +/-{eps:.3f} of the ECDF EVERYWHERE, simultaneously, w.p. 0.95.")

DKW 95% band half-width  eps_100 = sqrt(ln(2/0.05)/200) = 0.13581
Glivenko-Cantelli guarantees uniform convergence; DKW puts a finite-n number on it:
  the true CDF lies within +/-0.136 of the ECDF EVERYWHERE, simultaneously, w.p. 0.95.


# Figure: morley ECDF with its 95% DKW band and a Normal reference fit.
x = np.sort(speed)
ecdf = np.arange(1, n + 1) / n
grid = np.linspace(x.min() - 40, x.max() + 40, 400)
xs = np.concatenate([[grid[0]], x]); ys = np.concatenate([[0], ecdf])

fig, ax = plt.subplots(figsize=(7.4, 4.6))
ax.step(xs, ys, where='post', color='#1f2937', lw=2, label=r'ECDF $\hat F_n$ (n=100)')
lo = np.clip(ys - eps, 0, 1); hi = np.clip(ys + eps, 0, 1)
ax.fill_between(xs, lo, hi, step='post', color=GREEN, alpha=0.30,
                label=fr'95% DKW band ($\varepsilon_{{100}}={eps:.3f}$)')
ax.plot(grid, stats.norm.cdf(grid, xbar, s), color=RED, ls='--', lw=1.6,
        label=fr'Normal fit $N({xbar:.0f},\,{s:.0f}^2)$')
ax.set_xlabel('Speed of light (km/s - 299000)'); ax.set_ylabel('cumulative probability')
ax.set_title('Michelson 1879: ECDF with a 95% DKW confidence band')
ax.legend(loc='lower right', fontsize=9); plt.tight_layout(); plt.show()


# Plug-in can be BIASED: the plug-in variance T = (1/n) sum (x-xbar)^2 has
# E[T] - sigma^2 = -sigma^2/n exactly.  The bootstrap recovers that known bias.
var_plugin = speed.var(ddof=0)              # plug-in (1/n) divisor
analytic_bias = -var_plugin / n             # = -sigma^2/n, known in closed form

rng = np.random.default_rng(SEED)
vstar = np.empty(B)
for b in range(B):
    vstar[b] = rng.choice(speed, size=n, replace=True).var(ddof=0)
bias_boot = vstar.mean() - var_plugin
se_boot = vstar.std(ddof=1)

print(f"plug-in variance  (1/n) sum (x-xbar)^2 = {var_plugin:.2f}")
print(f"  analytic bias   -sigma^2/n           = {analytic_bias:.2f}   (known exactly)")
print(f"  bootstrap bias  (B={B:,}, seed={SEED}) = {bias_boot:.2f}   (recovers it)")
print(f"  bootstrap SE                          = {se_boot:.1f}")
print(f"  bias ratio R = |bias| / SE            = {abs(analytic_bias)/se_boot:.3f}"
      "   (<0.25 -> negligible; do NOT bias-correct)")

plug-in variance  (1/n) sum (x-xbar)^2 = 6180.24
  analytic bias   -sigma^2/n           = -61.80   (known exactly)
  bootstrap bias  (B=10,000, seed=42) = -72.67   (recovers it)
  bootstrap SE                          = 916.9
  bias ratio R = |bias| / SE            = 0.067   (<0.25 -> negligible; do NOT bias-correct)


tit = pd.read_csv(url('titanic.csv'))
tit['Fare'] = pd.to_numeric(tit['Fare'], errors='coerce')
fdf = tit.dropna(subset=['Fare'])
fdf = fdf[fdf['Fare'] > 0].copy()           # drop 15 zero/complimentary fares -> n = 876
fares = fdf['Fare'].to_numpy()
nF = len(fares); sF = fares.std(ddof=1)
print(f"Titanic Fare>0: n = {nF}   mean = {fares.mean():.2f}   median = {np.median(fares):.2f}"
      f"   skew = {pd.Series(fares).skew():.2f}")
print(f"  five-number: min={fares.min():.2f}  Q1={np.percentile(fares,25):.2f}"
      f"  med={np.median(fares):.2f}  Q3={np.percentile(fares,75):.2f}  max={fares.max():.2f}\n")

def bootstrap_dist(xx, stat, B, seed):
    rng = np.random.default_rng(seed); m = len(xx); out = np.empty(B)
    for b in range(B):
        out[b] = stat(rng.choice(xx, size=m, replace=True))
    return out

boot_mean = bootstrap_dist(fares, np.mean, B, SEED)
boot_med  = bootstrap_dist(fares, np.median, B, SEED)
se_mean, se_med = boot_mean.std(ddof=1), boot_med.std(ddof=1)
print(f"bootstrap (B={B:,}, seed={SEED}):")
print(f"  SE(mean)   = {se_mean:.4f}   (analytic s/sqrt(n) = {sF/np.sqrt(nF):.4f}  -- sanity check)")
print(f"  SE(median) = {se_med:.4f}   (NO simple closed form -- bootstrap-only)")
print(f"  ratio SE(mean)/SE(median) = {se_mean/se_med:.2f}x"
      f"  -> the median is ~{se_mean/se_med:.1f}x more precise under heavy right skew")

Titanic Fare>0: n = 876   mean = 32.76   median = 14.50   skew = 4.77
  five-number: min=4.01  Q1=7.92  med=14.50  Q3=31.27  max=512.33

bootstrap (B=10,000, seed=42):
  SE(mean)   = 1.6701   (analytic s/sqrt(n) = 1.6872  -- sanity check)
  SE(median) = 0.6973   (NO simple closed form -- bootstrap-only)
  ratio SE(mean)/SE(median) = 2.40x  -> the median is ~2.4x more precise under heavy right skew


# Figure: the two bootstrap sampling distributions, side by side.
fig, axes = plt.subplots(1, 2, figsize=(11, 3.8))
for ax, dist, est, col, name in [(axes[0], boot_mean, fares.mean(), BLUE, 'mean'),
                                  (axes[1], boot_med, np.median(fares), ORANGE, 'median')]:
    ax.hist(dist, bins=45, color=col, alpha=0.7, edgecolor='white', density=True)
    ax.axvline(est, color=RED, lw=2, label=f'$\\hat\\theta$ = {est:.2f}')
    sd = dist.std(ddof=1)
    ax.axvspan(est - sd, est + sd, color=col, alpha=0.12)
    ax.set_title(f'bootstrap {name}  (SE = {sd:.3f})')
    ax.set_xlabel(f'fare {name} ($)'); ax.legend(fontsize=9)
axes[0].set_ylabel('density')
fig.suptitle('Titanic fares: the median is estimated far more precisely than the mean')
plt.tight_layout(); plt.show()


adv = pd.read_csv(url('Advertising.csv'))
TV = adv['TV'].to_numpy(); radio = adv['Radio'].to_numpy()
news = adv['Newspaper'].to_numpy(); sales = adv['Sales'].to_numpy()
nA = len(sales)

def ols(Xm, y):
    beta, _, _, _ = np.linalg.lstsq(Xm, y, rcond=None)
    resid = y - Xm @ beta
    p = Xm.shape[1]
    s2 = resid @ resid / (len(y) - p)
    se = np.sqrt(np.diag(s2 * np.linalg.inv(Xm.T @ Xm)))
    return beta, se, resid

X = np.column_stack([np.ones(nA), TV])
beta, se, resid = ols(X, sales)
fitted = X @ beta
het = np.corrcoef(np.abs(resid), TV)[0, 1]
print(f"Sales ~ TV:  b0 = {beta[0]:.4f}   b1 = {beta[1]:.5f}   classical SE(b1) = {se[1]:.6f}")
print(f"  corr(|residual|, TV) = {het:.2f}  (>0 -> variance grows with TV: HETEROSKEDASTIC)\n")

rng = np.random.default_rng(SEED)
rc = resid - resid.mean()
br = np.empty(B); bp = np.empty(B); bw = np.empty(B)
for b in range(B):
    br[b] = np.linalg.lstsq(X, fitted + rng.choice(rc, nA, replace=True), rcond=None)[0][1]
    idx = rng.integers(0, nA, nA)
    bp[b] = np.linalg.lstsq(X[idx], sales[idx], rcond=None)[0][1]
    v = rng.choice([-1.0, 1.0], nA)
    bw[b] = np.linalg.lstsq(X, fitted + resid * v, rcond=None)[0][1]
print(f"bootstrap SE(b1) (B={B:,}):  residual = {br.std(ddof=1):.5f}"
      f"   pairs = {bp.std(ddof=1):.5f}   wild = {bw.std(ddof=1):.5f}")
print(f"  residual ~ classical ({se[1]:.5f}) -- both ASSUME homoskedasticity and UNDERSTATE;")
print(f"  pairs ~ wild inflate the SE by ~{100*(bp.std(ddof=1)/br.std(ddof=1)-1):.0f}% (the honest value).")

Sales ~ TV:  b0 = 7.0326   b1 = 0.04754   classical SE(b1) = 0.002691
  corr(|residual|, TV) = 0.48  (>0 -> variance grows with TV: HETEROSKEDASTIC)

bootstrap SE(b1) (B=10,000):  residual = 0.00267   pairs = 0.00284   wild = 0.00284
  residual ~ classical (0.00269) -- both ASSUME homoskedasticity and UNDERSTATE;
  pairs ~ wild inflate the SE by ~6% (the honest value).


# Figure: the heteroskedastic fan (left) and the three bootstrap slope SEs (right).
fig, axes = plt.subplots(1, 2, figsize=(11, 4.0))
sc = axes[0].scatter(TV, sales, c=np.abs(resid), cmap='viridis', s=22, edgecolor='white', lw=0.3)
order = np.argsort(TV)
axes[0].plot(TV[order], fitted[order], color=RED, lw=2, label='OLS fit')
fig.colorbar(sc, ax=axes[0], label='|residual|')
axes[0].set_xlabel('TV budget ($000s)'); axes[0].set_ylabel('Sales (000s units)')
axes[0].set_title(f'Variance fans out with TV  (corr(|resid|,TV) = {het:.2f})'); axes[0].legend()

for dist, col, name in [(br, BLUE, 'residual'), (bp, GREEN, 'pairs'), (bw, ORANGE, 'wild')]:
    axes[1].hist(dist, bins=50, density=True, alpha=0.5, color=col,
                 label=f'{name}  (SE={dist.std(ddof=1):.5f})')
axes[1].axvline(beta[1], color=RED, lw=1.6, ls='--', label=f'$\\hat b_1$={beta[1]:.4f}')
axes[1].set_xlabel(r'bootstrap slope $b_1^*$'); axes[1].set_title('Slope SE: residual understates; pairs/wild inflate')
axes[1].legend(fontsize=8); plt.tight_layout(); plt.show()


ls = pd.read_csv(url('law_school.csv'))
lsat = ls['LSAT'].to_numpy(); gpa = ls['GPA'].to_numpy()
nL = len(lsat)
r_hat = float(np.corrcoef(lsat, gpa)[0, 1])

rng = np.random.default_rng(SEED)
r_star = np.empty(B)
for b in range(B):
    idx = rng.integers(0, nL, size=nL)
    r_star[b] = np.corrcoef(lsat[idx], gpa[idx])[0, 1]
se_r = r_star.std(ddof=1)
bias_r = r_star.mean() - r_hat
print(f"law-school correlation: n = {nL}   r_hat = {r_hat:.4f}   bootstrap SE = {se_r:.4f}")
print(f"  1. histogram skew(r*) = {stats.skew(r_star):+.3f}   (left-skewed -> percentile over normal)")
print(f"  2. excess kurtosis     = {stats.kurtosis(r_star):+.3f}   (curves the Q-Q -> a BCa flag)")
se_grid = [500, 1000, 2000, 5000, 10000]
print("  3. SE vs B : " + ", ".join(f"B={g}:{r_star[:g].std(ddof=1):.4f}" for g in se_grid)
      + "  (flattens by ~2,000)")
print(f"  4. bias ratio |bias|/SE = {abs(bias_r)/se_r:.3f}   (negligible -> the issue is SKEW, not bias)")

law-school correlation: n = 15   r_hat = 0.7764   bootstrap SE = 0.1354
  1. histogram skew(r*) = -0.881   (left-skewed -> percentile over normal)
  2. excess kurtosis     = +0.916   (curves the Q-Q -> a BCa flag)
  3. SE vs B : B=500:0.1296, B=1000:0.1305, B=2000:0.1346, B=5000:0.1359, B=10000:0.1354  (flattens by ~2,000)
  4. bias ratio |bias|/SE = 0.043   (negligible -> the issue is SKEW, not bias)


# Figure: the four-panel diagnostic suite.
fig, ax = plt.subplots(1, 4, figsize=(16, 3.7))
bmean = r_star.mean()
ax[0].hist(r_star, bins=50, density=True, color=BLUE, alpha=0.65, edgecolor='white')
ax[0].axvline(r_hat, color=RED, lw=2, label=fr'$\hat r={r_hat:.3f}$')
ax[0].axvline(bmean, color=ORANGE, lw=2, ls='--', label=f'boot mean {bmean:.3f}')
ax[0].set_title(f'Histogram (skew {stats.skew(r_star):+.2f})'); ax[0].set_xlabel(r'$\hat r^*$'); ax[0].legend(fontsize=8)

stats.probplot(r_star, dist='norm', plot=ax[1])
ax[1].set_title('Normal Q-Q (curved = skew)')
ax[1].get_lines()[0].set(color=BLUE, markersize=2); ax[1].get_lines()[1].set_color(RED)

bgrid = np.arange(100, B + 1, 100)
ax[2].plot(bgrid, [r_star[:g].std(ddof=1) for g in bgrid], color=TEAL)
ax[2].axhline(se_r, color=GRAY, ls='--', lw=1)
ax[2].set_title('SE vs B (flattens)'); ax[2].set_xlabel('B'); ax[2].set_ylabel('running SE')

ax[3].bar(['|bias|', 'SE'], [abs(bias_r), se_r], color=[RED, BLUE])
ax[3].set_title(f'|bias|/SE = {abs(bias_r)/se_r:.3f}')
fig.suptitle('Bootstrap diagnostics: law-school correlation (n = 15, B = 10,000)')
plt.tight_layout(rect=[0, 0, 1, 0.94]); plt.show()


mu, sig = speed.mean(), speed.std(ddof=1)
# Parametric: draw X* ~ N(mu_hat, sig_hat).  Nonparametric: resample the data.
rng = np.random.default_rng(SEED)
par = rng.normal(mu, sig, size=(B, n))
rng = np.random.default_rng(SEED)
npr = speed[rng.integers(0, n, size=(B, n))]

par_sd, npr_sd = par.std(1, ddof=1), npr.std(1, ddof=1)
par_q90, npr_q90 = np.percentile(par, 90, axis=1), np.percentile(npr, 90, axis=1)

print(f"morley Normal fit:  mu_hat = {mu:.2f}   sigma_hat = {sig:.2f}   (n = {n})\n")
print("SE of the standard deviation:")
print(f"  parametric (Normal)    = {par_sd.std(ddof=1):.4f}")
print(f"  nonparametric          = {npr_sd.std(ddof=1):.4f}")
print(f"  closed form s/sqrt(2(n-1)) = {sig/np.sqrt(2*(n-1)):.4f}  <- parametric matches it;"
      " nonparametric is inflated (heavier real tails)\n")
print("SE of the 90th percentile (a quantile):")
print(f"  parametric    = {par_q90.std(ddof=1):.4f}   ({len(np.unique(par_q90)):,} distinct values: SMOOTH)")
print(f"  nonparametric = {npr_q90.std(ddof=1):.4f}   ({len(np.unique(npr_q90))} distinct values: JAGGED)")
print("  the ECDF can only land on observed order statistics, so the nonparametric")
print("  quantile bootstrap is discrete -- the parametric one interpolates smoothly.")

morley Normal fit:  mu_hat = 852.40   sigma_hat = 79.01   (n = 100)

SE of the standard deviation:
  parametric (Normal)    = 5.6078
  nonparametric          = 5.8900
  closed form s/sqrt(2(n-1)) = 5.6150  <- parametric matches it; nonparametric is inflated (heavier real tails)

SE of the 90th percentile (a quantile):
  parametric    = 13.2183   (10,000 distinct values: SMOOTH)
  nonparametric = 11.0794   (32 distinct values: JAGGED)
  the ECDF can only land on observed order statistics, so the nonparametric
  quantile bootstrap is discrete -- the parametric one interpolates smoothly.


# Figure: parametric (smooth) vs nonparametric (jagged) bootstrap of the 90th percentile.
fig, ax = plt.subplots(figsize=(7.6, 4.4))
ax.hist(npr_q90, bins=40, density=True, color=ORANGE, alpha=0.6, edgecolor='white',
        label=f'nonparametric ({len(np.unique(npr_q90))} distinct values -- jagged)')
ax.hist(par_q90, bins=60, density=True, histtype='step', color=BLUE, lw=2.2,
        label='parametric N(mu,sigma) -- smooth')
ax.axvline(np.percentile(speed, 90), color=RED, lw=2, ls='--',
           label=f'$\\hat q_{{0.9}}$ = {np.percentile(speed,90):.1f}')
ax.set_xlabel('90th percentile of speed (km/s - 299000)'); ax.set_ylabel('density')
ax.set_title('Parametric vs nonparametric bootstrap of a quantile (morley)')
ax.legend(fontsize=9); plt.tight_layout(); plt.show()


# Jackknife on the law-school correlation (the BCa input).
jack = np.array([np.corrcoef(np.delete(lsat, i), np.delete(gpa, i))[0, 1] for i in range(nL)])
jbar = jack.mean()
jack_se = np.sqrt((nL - 1) / nL * np.sum((jack - jbar) ** 2))
jack_bias = (nL - 1) * (jbar - r_hat)
accel = np.sum((jbar - jack) ** 3) / (6.0 * np.sum((jbar - jack) ** 2) ** 1.5)
pseudo = nL * r_hat - (nL - 1) * jack       # pseudo-values; their mean is bias-corrected
print(f"law-school correlation  r_hat = {r_hat:.4f}")
print(f"  jackknife SE   = {jack_se:.4f}   (bootstrap SE was {se_r:.4f}  -- close)")
print(f"  jackknife bias = {jack_bias:+.4f}   bias-corrected r = {pseudo.mean():.4f}")
print(f"  acceleration a = {accel:+.4f}   <- this is the BCa 'a' parameter used in Section 4.7")

law-school correlation  r_hat = 0.7764
  jackknife SE   = 0.1425   (bootstrap SE was 0.1354  -- close)
  jackknife bias = -0.0065   bias-corrected r = 0.7828
  acceleration a = -0.0757   <- this is the BCa 'a' parameter used in Section 4.7


# WHERE THE JACKKNIFE FAILS: a non-smooth statistic (the median).
# morley has many ties at 850, so every leave-one-out median is identical -> SE collapses to 0.
jmed = np.array([np.median(np.delete(speed, i)) for i in range(n)])
jmed_se = np.sqrt((n - 1) / n * np.sum((jmed - jmed.mean()) ** 2))
boot_med_speed = bootstrap_dist(speed, np.median, B, SEED)
print(f"median(morley) = {np.median(speed):.1f}")
print(f"  leave-one-out medians: {len(np.unique(jmed))} distinct value(s)  ->  jackknife SE = {jmed_se:.3f}")
print(f"  bootstrap SE(median)  = {boot_med_speed.std(ddof=1):.3f}   (the truthful number)")
print("  The jackknife perturbs the data too gently to move a non-smooth statistic --")
print("  for medians/quantiles use the bootstrap (or a delete-d jackknife).")

median(morley) = 850.0
  leave-one-out medians: 1 distinct value(s)  ->  jackknife SE = 0.000
  bootstrap SE(median)  = 8.073   (the truthful number)
  The jackknife perturbs the data too gently to move a non-smooth statistic --
  for medians/quantiles use the bootstrap (or a delete-d jackknife).


# Figure: the 15 leave-one-out correlations and their pseudo-values.
fig, ax = plt.subplots(figsize=(8.2, 4.0))
xi = np.arange(1, nL + 1)
ax.scatter(xi, jack, color=BLUE, s=45, zorder=3, label=r'leave-one-out $\hat r_{(i)}$')
ax.axhline(r_hat, color=RED, lw=1.8, label=fr'$\hat r$ = {r_hat:.3f}')
ax.axhline(jbar, color=ORANGE, lw=1.4, ls='--', label=fr'$\bar r_{{(\cdot)}}$ = {jbar:.3f}')
imax = int(np.argmax(np.abs(jack - jbar)))
ax.annotate(f'school #{imax+1}\nmost influential', (xi[imax], jack[imax]),
            textcoords='offset points', xytext=(12, -4), fontsize=9, color=PURPLE,
            arrowprops=dict(arrowstyle='->', color=PURPLE))
ax.set_xlabel('school left out (i)'); ax.set_ylabel(r'$\hat r_{(i)}$')
ax.set_title('Delete-1 jackknife: each point omits one law school')
ax.legend(fontsize=9, loc='lower right'); plt.tight_layout(); plt.show()


fsurv = fdf.loc[fdf['Survived'] == 1, 'Fare'].to_numpy()
fdied = fdf.loc[fdf['Survived'] == 0, 'Fare'].to_numpy()
obs_diff = fsurv.mean() - fdied.mean()
pool = np.concatenate([fsurv, fdied]); n1, N = len(fsurv), len(pool)

rng = np.random.default_rng(SEED)
perm_diffs = np.empty(B)
for b in range(B):
    pp = rng.permutation(N)
    perm_diffs[b] = pool[pp[:n1]].mean() - pool[pp[n1:]].mean()
p_perm = (np.sum(np.abs(perm_diffs) >= abs(obs_diff)) + 1) / (B + 1)
p_welch = stats.ttest_ind(fsurv, fdied, equal_var=False).pvalue
print(f"survivors: n = {n1}, mean fare = {fsurv.mean():.2f}   "
      f"non-survivors: n = {N-n1}, mean fare = {fdied.mean():.2f}")
print(f"observed gap = {obs_diff:.2f}   permutation null sd = {perm_diffs.std(ddof=1):.3f}")
print(f"permutation p = {p_perm:.5f}  (resolution floor 1/(B+1) = {1/(B+1):.5f})")
print(f"classical Welch t-test p = {p_welch:.2e}   -> survivors paid decisively more")

survivors: n = 341, mean fare = 48.54   non-survivors: n = 535, mean fare = 22.70
observed gap = 25.84   permutation null sd = 3.494
permutation p = 0.00010  (resolution floor 1/(B+1) = 0.00010)
classical Welch t-test p = 6.54e-11   -> survivors paid decisively more


# Figure: the permutation null distribution with the observed gap far in the tail.
fig, ax = plt.subplots(figsize=(7.6, 4.2))
ax.hist(perm_diffs, bins=55, density=True, color=BLUE, alpha=0.6, edgecolor='white',
        label='permutation null (labels shuffled)')
ax.axvline(obs_diff, color=RED, lw=2.4)
ax.annotate(f'observed gap\n= ${obs_diff:.2f}\n p = {p_perm:.4f}', xy=(obs_diff, 0.01),
            xytext=(obs_diff - 11, 0.06), color=RED, fontsize=10, ha='center', fontweight='bold',
            arrowprops=dict(arrowstyle='->', color=RED))
ax.set_xlim(-obs_diff - 4, obs_diff + 4)
ax.set_xlabel('mean-fare difference (survived - died)'); ax.set_ylabel('density')
ax.set_title('Permutation test: survivors vs non-survivors (Titanic fares)')
ax.legend(loc='upper left', fontsize=9); plt.tight_layout(); plt.show()


MU0 = 792.458
t_obs = (xbar - MU0) / (s / np.sqrt(n))
centered = speed - xbar + MU0                 # null enforcement: shift mean to mu0
rng = np.random.default_rng(SEED)
t_boot = np.empty(B)
for b in range(B):
    bs = rng.choice(centered, size=n, replace=True)
    t_boot[b] = (bs.mean() - MU0) / (bs.std(ddof=1) / np.sqrt(n))
n_exceed = int(np.sum(np.abs(t_boot) >= abs(t_obs)))
p_boot = (n_exceed + 1) / (B + 1)
p_classical = stats.ttest_1samp(speed, MU0).pvalue
print(f"xbar = {xbar:.2f} -> {299000+xbar:.1f} km/s   mu0 = {MU0}   systematic bias = {xbar-MU0:+.2f} km/s")
print(f"T_obs = ({xbar:.2f} - {MU0}) / ({s:.2f}/sqrt({n})) = {t_obs:.3f}")
print(f"bootstrap null T*: mean = {t_boot.mean():+.3f}, sd = {t_boot.std(ddof=1):.3f}  (~ N(0,1), pivotal)")
print(f"  # |T*| >= |T_obs|={abs(t_obs):.2f}:  {n_exceed}")
print(f"  bootstrap p = ({n_exceed}+1)/({B}+1) = {p_boot:.5f}   (floor 1/(B+1) = {1/(B+1):.5f})")
print(f"  classical one-sample t-test p = {p_classical:.3e}")
print("  Reject decisively: Michelson's apparatus overshoots true c by ~60 km/s. The")
print("  bootstrap p cannot resolve below the 1/(B+1) floor; the t-test tail resolves to ~1e-11.")

xbar = 852.40 -> 299852.4 km/s   mu0 = 792.458   systematic bias = +59.94 km/s
T_obs = (852.40 - 792.458) / (79.01/sqrt(100)) = 7.587
bootstrap null T*: mean = +0.031, sd = 1.009  (~ N(0,1), pivotal)
  # |T*| >= |T_obs|=7.59:  0
  bootstrap p = (0+1)/(10000+1) = 0.00010   (floor 1/(B+1) = 0.00010)
  classical one-sample t-test p = 1.824e-11
  Reject decisively: Michelson's apparatus overshoots true c by ~60 km/s. The
  bootstrap p cannot resolve below the 1/(B+1) floor; the t-test tail resolves to ~1e-11.


# Figure: the bootstrap null distribution of T* with T_obs off the chart to the right.
fig, ax = plt.subplots(figsize=(8.0, 4.4))
ax.hist(t_boot, bins=55, density=True, color=TEAL, alpha=0.6, edgecolor='white',
        label=r'bootstrap null $T^*$ (B = 10,000)')
gg = np.linspace(-4.2, 4.2, 300)
ax.plot(gg, stats.norm.pdf(gg), color='#1f2937', lw=1.6, ls='--', label=r'$N(0,1)$')
ax.axvline(t_obs, color=RED, lw=2.4)
ax.annotate(fr'$T_{{obs}}={t_obs:.2f}$' + f'\np = {p_boot:.4f}', xy=(t_obs, 0.015),
            xytext=(t_obs - 2.2, 0.27), color=RED, fontsize=11, ha='center', fontweight='bold',
            arrowprops=dict(arrowstyle='->', color=RED, lw=1.6))
ax.set_xlim(-4.5, t_obs + 0.9)
ax.set_xlabel(r'studentized $T = (\bar x - \mu_0)/(s/\sqrt{n})$'); ax.set_ylabel('density')
ax.set_title('One-sample bootstrap test: Michelson 1879 vs true c')
ax.legend(loc='upper center', fontsize=9); plt.tight_layout(); plt.show()


Xf = np.column_stack([np.ones(nA), TV, radio, news])
bf, sef, _ = ols(Xf, sales)
names = ['const', 'TV', 'Radio', 'Newspaper']
print('Full model  Sales ~ TV + Radio + Newspaper:')
for nm, bb, ss in zip(names, bf, sef):
    t = bb / ss
    print(f'  {nm:10} coef={bb:+.5f}  SE={ss:.5f}  t={t:+.2f}  p={2*stats.t.sf(abs(t), nA-4):.4f}')

j = 3
t_obs_news = bf[j] / sef[j]
Xr = np.column_stack([np.ones(nA), TV, radio])     # restricted model under H0
br_, _, residr = ols(Xr, sales)
fitr = Xr @ br_
rng = np.random.default_rng(SEED)
cnt = 0
for b in range(B):
    ystar = fitr + residr * rng.choice([-1.0, 1.0], nA)
    bb, ssb, _ = ols(Xf, ystar)
    if abs(bb[j] / ssb[j]) >= abs(t_obs_news):
        cnt += 1
p_wild = (cnt + 1) / (B + 1)
print(f"\ntest H0: beta_Newspaper = 0  ->  t_obs = {t_obs_news:+.2f}")
print(f"  classical p = {2*stats.t.sf(abs(t_obs_news), nA-4):.3f}   wild-bootstrap p = {p_wild:.3f}")
print("  Both agree: Newspaper adds nothing. The wild bootstrap gives VALID inference under")
print("  the documented heteroskedasticity without assuming constant variance.")

Full model  Sales ~ TV + Radio + Newspaper:
  const      coef=+2.93889  SE=0.31191  t=+9.42  p=0.0000
  TV         coef=+0.04576  SE=0.00139  t=+32.81  p=0.0000
  Radio      coef=+0.18853  SE=0.00861  t=+21.89  p=0.0000
  Newspaper  coef=-0.00104  SE=0.00587  t=-0.18  p=0.8599

test H0: beta_Newspaper = 0  ->  t_obs = -0.18
  classical p = 0.860   wild-bootstrap p = 0.877
  Both agree: Newspaper adds nothing. The wild bootstrap gives VALID inference under
  the documented heteroskedasticity without assuming constant variance.


Q = np.quantile(r_star, [0.025, 0.975])
ci_perc = (Q[0], Q[1])
ci_basic = (2 * r_hat - Q[1], 2 * r_hat - Q[0])
z = stats.norm.ppf(0.975)
ci_norm = (r_hat - z * se_r, r_hat + z * se_r)

# BCa (beyond-core): z0 from share below r_hat, a = jackknife acceleration (Section 4.5).
z0 = stats.norm.ppf(np.mean(r_star < r_hat))
def bca_endpoint(pp):
    zp = stats.norm.ppf(pp)
    adj = z0 + (z0 + zp) / (1 - accel * (z0 + zp))
    return float(np.quantile(r_star, stats.norm.cdf(adj)))
ci_bca = (bca_endpoint(0.025), bca_endpoint(0.975))

print(f"law-school correlation  r_hat = {r_hat:.4f}  (bounded on [-1, 1])")
def flag(hi):
    return '   <-- UPPER > 1: IMPOSSIBLE' if hi > 1 else ''
print(f"  Percentile  : [{ci_perc[0]:.3f}, {ci_perc[1]:.3f}]   respects the boundary")
print(f"  Basic       : [{ci_basic[0]:.3f}, {ci_basic[1]:.3f}]{flag(ci_basic[1])}")
print(f"  Normal      : [{ci_norm[0]:.3f}, {ci_norm[1]:.3f}]{flag(ci_norm[1])}")
print(f"  BCa (beyond): [{ci_bca[0]:.3f}, {ci_bca[1]:.3f}]   z0={z0:+.3f}  a={accel:+.3f}")
print("  At n=15 the left skew makes Basic & Normal push past rho=1 (impossible). Percentile")
print("  respects the bound but is only first-order accurate; BCa is the second-order fix.")

law-school correlation  r_hat = 0.7764  (bounded on [-1, 1])
  Percentile  : [0.456, 0.962]   respects the boundary
  Basic       : [0.591, 1.097]   <-- UPPER > 1: IMPOSSIBLE
  Normal      : [0.511, 1.042]   <-- UPPER > 1: IMPOSSIBLE
  BCa (beyond): [0.317, 0.943]   z0=-0.105  a=-0.076
  At n=15 the left skew makes Basic & Normal push past rho=1 (impossible). Percentile
  respects the bound but is only first-order accurate; BCa is the second-order fix.


# Figure: the bootstrap distribution + the four CIs; impossible overshoots highlighted.
fig = plt.figure(figsize=(8.8, 5.6))
gs = fig.add_gridspec(2, 1, height_ratios=[3, 2], hspace=0.12)
a0, a1 = fig.add_subplot(gs[0]), fig.add_subplot(gs[1])
a0.hist(r_star, bins=60, density=True, color=ORANGE, alpha=0.55, edgecolor='white',
        label=r'bootstrap $\hat r^*$ (B=10,000)')
a0.axvline(r_hat, color=RED, lw=2, label=fr'$\hat r$ = {r_hat:.3f}')
a0.axvline(1.0, color=GRAY, lw=1.6, ls='--', label=r'boundary $\rho=1$')
a0.axvspan(1.0, 1.16, color=RED, alpha=0.08)
a0.set_ylabel('density'); a0.set_title('Bootstrap CIs for a bounded parameter: law-school correlation (n=15)')
a0.legend(loc='upper left', fontsize=9); plt.setp(a0.get_xticklabels(), visible=False)

methods = [('Percentile', ci_perc, BLUE), ('Basic', ci_basic, PURPLE),
           ('Normal', ci_norm, TEAL), ('BCa', ci_bca, GREEN)]
yt, yl = [], []
for i, (nm, (lo, hi), col) in enumerate(methods):
    y = len(methods) - i; yt.append(y); yl.append(nm)
    a1.plot([lo, hi], [y, y], color=col, lw=5, solid_capstyle='round')
    a1.plot([r_hat], [y], '|', color=RED, ms=13, mew=2, zorder=5)
    if hi > 1.0:
        a1.plot([1.0, hi], [y, y], color=RED, lw=5, solid_capstyle='round')
        a1.text(hi + 0.012, y, f'{hi:.2f} > 1', ha='left', va='center', fontsize=8, color=RED)
a1.axvline(1.0, color=GRAY, lw=1.6, ls='--')
a1.set_yticks(yt); a1.set_yticklabels(yl); a1.set_ylim(0.4, len(methods) + 0.6)
a1.set_xlim(*a0.get_xlim()); a1.set_xlabel(r'correlation $\rho$'); a1.grid(alpha=0.25, axis='x')
plt.tight_layout(); plt.show()

/tmp/ipykernel_120918/1224531615.py:26: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout(); plt.show()


xbarF, se_hatF = fares.mean(), sF / np.sqrt(nF)
rng = np.random.default_rng(SEED)
bsF = fares[rng.integers(0, nF, size=(B, nF))]
meansF = bsF.mean(1); sesF = bsF.std(1, ddof=1) / np.sqrt(nF)
tstar = (meansF - xbarF) / sesF
qlo, qhi = np.percentile(tstar, [2.5, 97.5])
ci_stud = (xbarF - qhi * se_hatF, xbarF - qlo * se_hatF)

QF = np.percentile(meansF, [2.5, 97.5])
ci_percF = (QF[0], QF[1]); ci_basicF = (2 * xbarF - QF[1], 2 * xbarF - QF[0])
ci_normF = (xbarF - z * meansF.std(ddof=1), xbarF + z * meansF.std(ddof=1))
print(f"Titanic mean fare  xbar = {xbarF:.3f}   SE_hat = s/sqrt(n) = {se_hatF:.4f}")
print(f"  bootstrap-t quantiles: t*_.025 = {qlo:.3f},  t*_.975 = {qhi:.3f}   (vs +/-1.96: ASYMMETRIC)")
print(f"  Percentile  : ({ci_percF[0]:.3f}, {ci_percF[1]:.3f})")
print(f"  Basic       : ({ci_basicF[0]:.3f}, {ci_basicF[1]:.3f})")
print(f"  Normal      : ({ci_normF[0]:.3f}, {ci_normF[1]:.3f})")
print(f"  Studentized : ({ci_stud[0]:.3f}, {ci_stud[1]:.3f})   <- second-order accurate, shifts up for skew")

Titanic mean fare  xbar = 32.756   SE_hat = s/sqrt(n) = 1.6872
  bootstrap-t quantiles: t*_.025 = -2.196,  t*_.975 = 1.750   (vs +/-1.96: ASYMMETRIC)
  Percentile  : (29.636, 36.152)
  Basic       : (29.359, 35.875)
  Normal      : (29.482, 36.029)
  Studentized : (29.803, 36.460)   <- second-order accurate, shifts up for skew


from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression

def loocv_mse(formula):
    fit = smf.ols(formula, data=adv).fit()
    h = fit.get_influence().hat_matrix_diag           # leverages h_ii
    e = fit.resid.to_numpy()
    return float(np.mean((e / (1 - h)) ** 2))         # closed-form LOOCV, no refitting

def kfold_mse(cols, k, seed=SEED):
    Xm = adv[cols].to_numpy(); y = adv['Sales'].to_numpy()
    kf = KFold(n_splits=k, shuffle=True, random_state=seed); errs = []
    for tr, te in kf.split(Xm):
        m = LinearRegression().fit(Xm[tr], y[tr])
        errs.append(np.mean((y[te] - m.predict(Xm[te])) ** 2))
    return float(np.mean(errs))

models = [('Sales ~ TV', ['TV']),
          ('Sales ~ TV + Radio', ['TV', 'Radio']),
          ('Sales ~ TV + Radio + Newspaper', ['TV', 'Radio', 'Newspaper'])]
print(f"{'model':<34}{'LOOCV':>9}{'5-fold':>9}{'10-fold':>9}")
cv_rows = []
for f, cols in models:
    loo, k5, k10 = loocv_mse(f), kfold_mse(cols, 5), kfold_mse(cols, 10)
    cv_rows.append((f, loo, k5, k10))
    print(f"{f:<34}{loo:>9.3f}{k5:>9.3f}{k10:>9.3f}")
print("\n  Adding Radio CRASHES test MSE (10.74 -> 2.91); adding Newspaper makes it slightly")
print("  WORSE (2.91 -> 2.95) -- out-of-sample, Newspaper is not just insignificant, it hurts.")

model                                 LOOCV   5-fold  10-fold
Sales ~ TV                           10.741   10.735   10.666
Sales ~ TV + Radio                    2.911    2.921    2.916
Sales ~ TV + Radio + Newspaper        2.947    2.965    2.955

  Adding Radio CRASHES test MSE (10.74 -> 2.91); adding Newspaper makes it slightly
  WORSE (2.91 -> 2.95) -- out-of-sample, Newspaper is not just insignificant, it hurts.


# Figure: cross-validated test MSE by model (LOOCV vs 5-fold vs 10-fold).
labels = ['TV', 'TV+Radio', 'TV+Radio+News']
loo = [r[1] for r in cv_rows]; k5 = [r[2] for r in cv_rows]; k10 = [r[3] for r in cv_rows]
xpos = np.arange(len(labels)); w = 0.26
fig, ax = plt.subplots(figsize=(8.0, 4.2))
for off, vals, col, nm in [(-w, loo, BLUE, 'LOOCV'), (0, k5, GREEN, '5-fold'), (w, k10, ORANGE, '10-fold')]:
    bars = ax.bar(xpos + off, vals, width=w, color=col, edgecolor='white', label=nm)
    for bb, v in zip(bars, vals):
        ax.text(bb.get_x() + w / 2, v + 0.12, f'{v:.2f}', ha='center', fontsize=8)
ax.set_xticks(xpos); ax.set_xticklabels(labels)
ax.set_ylabel('cross-validated test MSE'); ax.set_title('Model selection by cross-validation (Advertising)')
ax.legend(); plt.tight_layout(); plt.show()


# Solution 1.
iqr = lambda a: np.percentile(a, 75) - np.percentile(a, 25)
boot_iqr = bootstrap_dist(fares, iqr, B, SEED)
print(f"IQR(fares) = {iqr(fares):.2f}   bootstrap SE = {boot_iqr.std(ddof=1):.4f}")
print(f"  vs SE(mean) = {se_mean:.3f}, SE(median) = {se_med:.3f}")
print("  The IQR is a quantile-based (robust) statistic, so like the median it has no")
print("  simple closed-form SE -- the bootstrap supplies one directly.")

IQR(fares) = 23.35   bootstrap SE = 1.7182
  vs SE(mean) = 1.670, SE(median) = 0.697
  The IQR is a quantile-based (robust) statistic, so like the median it has no
  simple closed-form SE -- the bootstrap supplies one directly.


# Solution 2.
print(f"SE(mean): parametric = {par.mean(1).std(ddof=1):.4f}   "
      f"nonparametric = {npr.mean(1).std(ddof=1):.4f}   closed s/sqrt(n) = {sig/np.sqrt(n):.4f}")
print("  The mean is a LINEAR functional: by the CLT its sampling distribution depends on")
print("  F only through its variance, which both bootstraps estimate equally well. The SD")
print("  and quantiles depend on higher-order shape, where the Normal model adds (or imposes)")
print("  information -- so there the two bootstraps part ways.")

SE(mean): parametric = 7.8493   nonparametric = 7.8476   closed s/sqrt(n) = 7.9011
  The mean is a LINEAR functional: by the CLT its sampling distribution depends on
  F only through its variance, which both bootstraps estimate equally well. The SD
  and quantiles depend on higher-order shape, where the Normal model adds (or imposes)
  information -- so there the two bootstraps part ways.


# Solution 3.
pc = pd.to_numeric(fdf['Pclass'], errors='coerce')
f1 = fdf.loc[pc == 1, 'Fare'].to_numpy(); f3 = fdf.loc[pc == 3, 'Fare'].to_numpy()
obs = f1.mean() - f3.mean()
poolx = np.concatenate([f1, f3]); m1, M = len(f1), len(f1) + len(f3)
rng = np.random.default_rng(SEED)
dd = np.empty(B)
for b in range(B):
    pp = rng.permutation(M); dd[b] = poolx[pp[:m1]].mean() - poolx[pp[m1:]].mean()
p = (np.sum(np.abs(dd) >= abs(obs)) + 1) / (B + 1)
print(f"1st class mean fare = {f1.mean():.2f} (n={m1}),  3rd class = {f3.mean():.2f} (n={M-m1})")
print(f"observed gap = {obs:.2f}   permutation p = {p:.5f}  (floor {1/(B+1):.5f}) -> reject decisively")

1st class mean fare = 86.15 (n=211),  3rd class = 13.79 (n=487)
observed gap = 72.36   permutation p = 0.00010  (floor 0.00010) -> reject decisively


# Solution 4.
for nm, (lo, hi) in [('Percentile', ci_perc), ('Basic', ci_basic),
                     ('Normal', ci_norm), ('BCa', ci_bca)]:
    legal = 'legal' if hi <= 1.0 else 'ILLEGAL (upper > 1)'
    print(f"  {nm:11}: [{lo:.3f}, {hi:.3f}]  width = {hi-lo:.3f}   {legal}")
print("  Basic & Normal buy their width by spilling past rho=1; percentile and BCa keep the")
print("  interval inside [-1,1], with BCa correcting the skew/bias the percentile ignores.")

  Percentile : [0.456, 0.962]  width = 0.506   legal
  Basic      : [0.591, 1.097]  width = 0.506   ILLEGAL (upper > 1)
  Normal     : [0.511, 1.042]  width = 0.531   ILLEGAL (upper > 1)
  BCa        : [0.317, 0.943]  width = 0.625   legal
  Basic & Normal buy their width by spilling past rho=1; percentile and BCa keep the
  interval inside [-1,1], with BCa correcting the skew/bias the percentile ignores.

§	Topic	Real-data worked example	Status
4.1	The sampling-distribution problem	Michelson speed-of-light: which SEs have a formula?	core
4.2	Empirical CDF & plug-in principle	morley ECDF + DKW band; plug-in variance bias	core
4.3	Nonparametric bootstrap	Titanic fare SE(mean) vs SE(median); Advertising regression bootstrap; diagnostics	core
4.4	Parametric bootstrap	morley Normal model: efficiency vs the nonparametric bootstrap	core
4.5	Jackknife methods	leave-one-out on the law-school correlation; where it fails	Optional / self-study
4.6	Bootstrap hypothesis testing	Titanic-fare permutation test; morley one-sample test; Newspaper wild-bootstrap test	core
4.7	Bootstrap confidence intervals	law-school correlation (percentile/basic/normal); Titanic-fare studentized CI	core (BCa = beyond-core)
4.8	Cross-validation	LOOCV + K-fold model selection on Advertising	Optional / self-study
4.9	Summary & connections	formula table → Chapter 5 (Bayesian inference)	core

Quantity	Formula	This chapter's example
Empirical CDF	$\hat F_n(x)=\frac1n\sum\mathbf 1\{X_i\le x\}$	morley ECDF
DKW band	$\varepsilon_n=\sqrt{\ln(2/\alpha)/(2n)}$	$\varepsilon_{100}=0.136$
Plug-in bias (variance)	$-\sigma^2/n$	morley $-62$, bootstrap $-73$
Bootstrap SE	$\operatorname{sd}(\hat\theta^_1,\dots,\hat\theta^_B)$	Titanic SE(median) $=0.70$
Regression bootstrap	residual / pairs / wild	Advertising slope, pairs $\approx$ wild
Parametric bootstrap	simulate $X^*\sim F_{\hat\theta}$	morley SD: $5.61$ vs closed $5.62$
Jackknife SE / accel.	$\sqrt{\tfrac{n-1}{n}\sum(\hat\theta_{(i)}-\bar\theta)^2}$	corr SE $0.143$; $a=-0.076$
Permutation $p$	$(#{	D^*	\ge	D	}+1)/(B+1)$	survivors paid more, $p$ at floor
One-sample bootstrap test	center at $\mu_0$, studentize	Michelson $T_{\rm obs}=7.6$
Percentile / studentized CI	quantiles of $\hat\theta^$ / $T^$	corr (bounded); fare (skew)
LOOCV (linear)	$\frac1n\sum(e_i/(1-h_{ii}))^2$	Advertising model choice

Chapter 4 — Resampling Methods (the Bootstrap)¶

Learning outcomes¶

Section map¶

Section 4.1 — The Sampling-Distribution Problem¶

Section 4.2 — The Empirical CDF and the Plug-in Principle¶

Section 4.3 — The Nonparametric Bootstrap¶

Bootstrap standard errors — Titanic fares (mean vs median)¶

Regression bootstrap — Advertising (residual / pairs / wild)¶

Bootstrap diagnostics — the law-school correlation¶

Section 4.4 — The Parametric Bootstrap¶

Section 4.5 — Jackknife Methods¶

Section 4.6 — Bootstrap & Permutation Hypothesis Testing¶

Permutation test — did survivors pay higher fares?¶

One-sample bootstrap test — Michelson vs the true speed of light¶

Bootstrap test for a regression coefficient — Newspaper (wild bootstrap)¶

Section 4.7 — Bootstrap Confidence Intervals¶

A bounded parameter exposes the bad intervals — law-school correlation¶

A studentized (bootstrap-$t$) interval — Titanic mean fare¶

Section 4.8 — Cross-Validation¶

Exercises¶

Section 4.9 — Summary & connections forward¶