## basic procedure
Let $X_1,...,X_n \overset{iid}{\sim}N(0, 1)$ and let $\pi(\mu) = 1/10$ for $\mu \in (-\infty, \infty)$.
```R
set.seed(99109)
library(ggplot2)
m = 10000
mu_grid = seq(-2, 2, length.out=m)
n = 3
x = rnorm(n, 0, 1)
prior = function(mu){
return(1/10) # flat prior
}
likelihood = function(mu){
l = 1
for(i in 1:n){
l = l * dnorm(x[i], mu, 1)
}
return(l)
}
numerator = function(mu){
prior(mu) * likelihood(mu)
}
Z = integrate(numerator, -Inf, Inf)$val
posterior = function(mu){
p = numerator(mu) / Z
return(p)
}
# Show that the posterior is proper (sums to 1)
integrate(posterior, -Inf, Inf)
# Combine to dataframe
df = data.frame(
mu=mu_grid,
prior=prior(mu_grid),
likelihood=likelihood(mu_grid),
posterior=posterior(mu_grid)
)
# Plot
ggplot(data=df) +
aes(x=mu) +
geom_line(aes(y=prior, color='prior')) +
geom_line(aes(y=likelihood*600, color='likelihood')) +
geom_line(aes(y=posterior, color='posterior')) +
theme_minimal()
```
Though not analytically required, use logs to avoid [[numerical underflow]] when calculating the likelihood.
```R
log_likelihood = function(mu){
log_l = 0
for(i in 1:n){
log_l = log_l + dnorm(x[i], mu, 1, log=TRUE)
}
return(log_l)
}
likelihood = function(mu){
exp(log_likelihood(mu))
}
```
To get a point estimate for $\mu$ extract the index and value of the maximizer for the posterior.
```R
# Get index of maximizer
which.max(posterior(mu_grid))
# Get maximum value
mu_grid[which.max(posterior(mu_grid))]
```
To get a [[credible interval]] from a known distribution, use the `qnorm` function or its equivalent. For an unknown posterior distribution, first normalize the posterior values for the chosen grid spacing, then calculate the cumulative distribution function and find the relevant quantiles.
```R
sig_level = 0.05
unnormalized_posterior_values = posterior(mu_grid)
delta_mu <- mu_grid[2] - mu_grid[1] # Assuming uniform grid spacing
normalized_posterior_values <- unnormalized_posterior_values / (sum(unnormalized_posterior_values) * delta_mu)
cumulative_posterior <- cumsum(normalized_posterior_values * delta_mu)
lower_bound_index <- which(cumulative_posterior >= sig_level/2)[1]
upper_bound_index <- which(cumulative_posterior >= 1 - sig_level/2)[1]
lower_credible_interval <- mu_grid[lower_bound_index]
upper_credible_interval <- mu_grid[upper_bound_index]
cat(sprintf("95%% Credible Interval (Manual): [%.4f, %.4f]",
lower_credible_interval, upper_credible_interval))
ggplot() +
aes(x = mu_grid) +
geom_line(aes(y=normalized_posterior_values), color = "blue") +
geom_vline(xintercept = lower_credible_interval, color = "red", linetype = "dashed", linewidth = 0.8) +
geom_vline(xintercept = upper_credible_interval, color = "red", linetype = "dashed", linewidth = 0.8) +
labs(
x = "Parameter (mu)",
y = "Posterior Density",
title = "Posterior Distribution with 95% Credible Interval"
) +
theme_minimal()
```