- Give you a flavour of Bayesian thinking.
- Go through some simple examples (including code) of how to do this in R.
- But first, a question…
SFG, 13 November 2015
\(P(A|B) = \frac{P(A \cap B)}{P(B)}\)
\(P(B|A) = \frac{P(A \cap B)}{P(A)}\)
\(\Rightarrow P(A \cap B) = P(A|B)P(B) = P(B|A)P(A)\)
\(\Rightarrow P(A|B) = \frac{P(B|A)P(A)}{P(B)}\)
coin_sim <- function(p, N, M_alpha, M_beta, C_alpha, C_beta) { ## Simulate coin outcomes in advance. "Success" = heads. outcomes <- sample(1:0, N, prob = c(p, 1 - p), replace = TRUE) success <- cumsum(outcomes) ## Matt prior curve(dbeta(x, M_alpha, M_beta), col = "red", lwd = 2, lty = 2, xlim = c(0, 1), ylim = c(0, 20), xlab = "P(Heads)", ylab = "Density") ## Chris prior curve(dbeta(x, C_alpha, C_beta), col = "dodgerblue", lwd = 2, lty = 2, add = TRUE) ## True probability of coin lines(x = c(p, p), y = c(0, 20), lty = 2, lwd = 1,col = "grey60") ## Matt posterior curve(dbeta(x, M_alpha+success[N], M_beta+(N-success[N])), add = TRUE, col = "red", lwd = 2) ## Chris posterior curve(dbeta(x, C_alpha+success[N], C_beta+(N-success[N])), add = TRUE, col = "dodgerblue", lwd = 2) ##Legend legend('topleft', lty = 1, col = c("red", "dodgerblue"), legend = c(paste0("Matt ~ Beta(", M_alpha , ", " , M_beta , ")"), paste0("Chris ~ Beta(", C_alpha , ", " , C_beta , ")"))) text(0.75, 17, label = paste(N, "flips:", success[N], "heads,", N-success[N], "tails")) }
coin_sim(p = 0.25, N = 1, M_alpha = 20, M_beta = 20, C_alpha = 1, C_beta = 1 )
coin_sim(p = 0.25, N = 10, M_alpha = 20, M_beta = 20, C_alpha = 1, C_beta = 1 )
coin_sim(p = 0.25, N = 50, M_alpha = 20, M_beta = 20, C_alpha = 1, C_beta = 1 )
coin_sim(p = 0.25, N = 100, M_alpha = 20, M_beta = 20, C_alpha = 1, C_beta = 1 )
coin_sim(p = 0.25, N = 200, M_alpha = 20, M_beta = 20, C_alpha = 1, C_beta = 1 )
\[ p(\theta|X) = \frac{p(X|\theta)p(\theta)}{p(X)} \]
(cont.)
\[ p(\theta|X) \propto p(X|\theta)p(\theta) \]
## Load packages require(rjags) require(dplyr) ## For manipulating data frames require(tidyr) ## For tidying data frames require(ggplot2) ## For plotting ## Create some fake data rm(list=ls()) set.seed(123) x <- runif(50) y <- 2 + 4*x + rnorm(50, mean = 0, sd = 1) my_data <- data.frame(y, x)
plot(my_data$x, my_data$y)
model_string <- paste( "model { for (i in 1:N){ y[i] ~ dnorm (mu.y[i], tau.y) ## Our likelihood function (i.e. OLS) mu.y[i] <- alpha + beta*x[i] } ## Noninformative priors for all parameters (e.g. mean zero, large variance) alpha ~ dnorm( 0, 0.001 ) beta ~ dnorm( 0, 0.001 ) tau.y ~ dgamma(0.001, 0.001) }" ) writeLines(model_string, con = "jags_example.txt" )
## Run the JAGS model N <- length(x) my_list <- list("N" = N, "y" = my_data$y, "x" = my_data$x) jags_inits <- function() {list (alpha = 0, beta = 0, tau.y = 1)} params <- c("alpha", "beta", "tau.y") jags_mod <- jags.model(file = "jags_example.txt", data = my_list, inits = jags_inits, n.chains = 1, n.adapt = 1000)
## Compiling model graph ## Resolving undeclared variables ## Allocating nodes ## Graph information: ## Observed stochastic nodes: 50 ## Unobserved stochastic nodes: 3 ## Total graph size: 210 ## ## Initializing model
update(jags_mod, n.iter = 1000) ## burn in jags_samples <- coda.samples(jags_mod, variable.names = params, n.iter = 15000)
## Some model diagnostics geweke.diag(jags_samples)
## [[1]] ## ## Fraction in 1st window = 0.1 ## Fraction in 2nd window = 0.5 ## ## alpha beta tau.y ## -1.7784 1.7243 -0.9083
heidel.diag(as.mcmc(jags_samples))
## ## Stationarity start p-value ## test iteration ## alpha passed 1501 0.0859 ## beta passed 1501 0.1311 ## tau.y passed 1 0.7417 ## ## Halfwidth Mean Halfwidth ## test ## alpha passed 1.86 0.01271 ## beta passed 4.39 0.02117 ## tau.y passed 1.13 0.00383
raftery.diag(as.mcmc(jags_samples))
## ## Quantile (q) = 0.025 ## Accuracy (r) = +/- 0.005 ## Probability (s) = 0.95 ## ## Burn-in Total Lower bound Dependence ## (M) (N) (Nmin) factor (I) ## alpha 10 11942 3746 3.19 ## beta 10 11104 3746 2.96 ## tau.y 2 3844 3746 1.03
summary(jags_samples)
## ## Iterations = 1001:16000 ## Thinning interval = 1 ## Number of chains = 1 ## Sample size per chain = 15000 ## ## 1. Empirical mean and standard deviation for each variable, ## plus standard error of the mean: ## ## Mean SD Naive SE Time-series SE ## alpha 1.852 0.2808 0.002293 0.006218 ## beta 4.392 0.4670 0.003813 0.010279 ## tau.y 1.131 0.2297 0.001875 0.001954 ## ## 2. Quantiles for each variable: ## ## 2.5% 25% 50% 75% 97.5% ## alpha 1.3020 1.6645 1.851 2.038 2.415 ## beta 3.4653 4.0853 4.398 4.703 5.303 ## tau.y 0.7278 0.9698 1.116 1.276 1.633
reg_df <- tbl_df(as.data.frame(jags_samples[[1]])) reg_df <- reg_df %>% gather(coef, value) ggplot(reg_df, aes(x = value)) + geom_density() + facet_wrap(~ coef, scales = "free")