R Code for Figure 3-5
## Code to reproduce Figure 3-5.
## Author: Christopher Paciorek (UC Berkeley)
## R version: 4.3.2
library(evd) # Version 2.3-6.1
## Location and scale parameter estimates from GEV fit to GHCN daily
## precipitation data for for Berkeley, California, but qualitative
## results are similar for parameters from GEV fits for other US locations.
location <- 4.5 # in centimeters
scale <- 1.5 # in centimeters
## Generate grid of shape parameter values for plotting.
shape_grid <- c(seq(-0.25, 0, length = 30),
seq(0, 0.2, length = 20))
bounded_shape_grid <- shape_grid[shape_grid < 0]
## Calculate return values as quantiles of GEV distribution.
rv_0.0001 <- sapply(shape_grid, function(shape) qgev(1-1/1e4, location, scale, shape))
rv_0.00001 <- sapply(shape_grid, function(shape) qgev(1-1/1e5, location, scale, shape))
rv_0.000001 <- sapply(shape_grid, function(shape) qgev(1-1/1e6, location, scale, shape))
## Calculate upper bounds for negative shape parameter values.
ub <- sapply(bounded_shape_grid, function(shape) location-scale/shape)
png(‘plot-shape-rv.png’, height = 500, width = 600)
plot(shape_grid, rv_0.000001, type = ‘l’, col = ‘red’,
xlab = ‘shape parameter’, ylab = ‘AEP level or upper bound (cm)’, ylim = c(0,120))
lines(shape_grid, rv_0.00001, col = ‘blue’)
lines(shape_grid, rv_0.0001, col = ‘green’)
lines(bounded_shape_grid, ub, col = ‘black’)
legend(‘topleft’, legend = c(‘p = 0.0001’, ‘p = 0.00001’, ‘p = 0.000001’, ‘upper bound’),
col = c(‘green’, ‘blue’,’red’,’black’), lty = rep(1,4), bty=’n’)
text(x = .1, y = 0, “no upper bound”)
text(x = -.1, y = 0, “upper bound present”)
dev.off()
R Code for Sample Size Calculation (Figure 5-3)
## R version: 4.3.2
library(evd) # Version 2.3-6.1
library(pracma) # Version 2.4.4
calc_sample_size <- function(location = 0, scale = 1, shape = .1,
return_period = 100, se_proportion = 0.125,
upper_bound = FALSE, verbose = FALSE) {
## Calculates sample size needed to limit the standard error of the estimated
## return value (or upper bound) to `se_proportion` of the estimate, using
## the expected information matrix for the GEV distribution and the delta method
## to approximate the variance of the estimate as a function of the estimated
## parameters.
## If the `se_proportion` is 0.125, that corresponds to a confidence interval
## whose length is half (50% = 0.125*4) the magnitude of the return value (or upper bound).
## Arguments:
## location: location parameter of GEV model.
## scale: scale parameter of GEV model.
## shape: shape parameter of GEV model.
## return_period: desired return period (years); not used if upper_bound is TRUE.
## se_proportion: desired magnitude of standard error of return value (or upper bound)
## as a proportion of the return value (or upper bound) estimate.
## upper_bound: determine sample size for the upper bound rather than return value.
## Output: estimate of the sample size (in years for GEV block-maxima estimation and
## number of exceedances for threshold exceedance-based estimation).
## Authors: Daniel Cooley (Colorado State University) and Christopher Paciorek (UC Berkeley).
if(shape >= 1/2){
stop(“Method not supported if variance is infinite (shape >= 1/2)”)
}
if(upper_bound & shape >= 0){
stop(“Cannot have upper bound if shape >= 0”)
}
## Calculate the expected information matrix.
## Set up grid of observation values based on quantiles.
y_low <- qgev(1e-5, loc = location, scale = scale, shape = shape)
y_high <- qgev(1 - 1e-5, loc = location, scale = scale, shape = shape)
y_len <- 2000
y <- seq(y_low, y_high, length.out = y_len)
## Evaluate density.
dens_values <- dgev(y, loc = location, scale = scale, shape = shape)
dens_array <- array(dens_values, dim = c(y_len,3,3))
## Use numerical differentiation to obtain Hessian.
param_vec <- c(location, scale, shape)
hess_array <- array(dim = c(y_len,3,3))
loglik <- function(par, y){ # Wrapper providing param vec as first argument.
return(dgev(y, loc = par[1], scale = par[2], shape = par[3], log = T))
}
for(i in seq(1, y_len)){
hess_array[i,,] <- hessian(loglik, param_vec, y = y[i])
}
## Alternative to avoid the loop:
## tmp <- sapply(y, function(val) hessian(loglik, param_vec, y =val))
## hess_array <- array(t(tmp), c(2000,3,3))
## Use trapezoidal rule to estimate integral over the density.
integrand_array <- -hess_array * dens_array
diff_array <- array(diff(y), dim = c((y_len-1),3,3))
trapezoids <- (integrand_array[-1,,] + integrand_array[-y_len,,])/2 * diff_array
exp_info_mtx <- apply(trapezoids, c(2,3), sum)
cov_mtx <- solve(exp_info_mtx) # Asymptotic var-cov matrix of parameter estimates.
## Use the information matrix to estimate the sample size.
if(upper_bound) {
ub_fun <- function(par) {
return(par[1]-par[2]/par[3])
}
ub <- ub_fun(c(location,scale,shape))
se <- se_proportion * ub
if(verbose) print(paste(“upper bound is”, ub_fun(c(location,scale,shape))))
grad_vec <- grad(ub_fun, param_vec)
} else {
prob <- 1-1/return_period
qTile <- qgev(prob, loc = location, scale = scale, shape = shape)
se <- se_proportion * qTile
if(verbose) print(paste(prob, “quantile is”, round(qTile, 2)))
qTileFn <- function(par, p){
return(qgev(p, loc = par[1], scale = par[2], shape = par[3]))
}
grad_vec <- grad(qTileFn, param_vec, p = prob)
}
## Calculate variance of return value based on delta method.
delta_var <- t(grad_vec) %*% cov_mtx %*% grad_vec
## Invert to determine sample size.
n <- ceiling(delta_var/se^2)
return(n)
}
## Example usage.
## Sample size for depth corresponding to annual exceedance probability for p=0.0001.
location <- 4.5
scale <- 1.5
shape <- 0
calc_sample_size(location, scale, shape, return_period = 10000, se_proportion = 0.125)
## Sample size for estimating upper bound.
location <- 4.5
scale <- 1.5
shape <- -0.1
calc_sample_size(location, scale, shape, upper_bound = TRUE, se_proportion = 0.125)