R
/Stan
/JAGS
A quick data generating mechanism. Roughly speaking, the \(95^{th}\) percentile of beta
will be about 2.9, and will remain about 1 for alpha
.
# Set-up the data generating mechainsm
set.seed(666)
N <- 500
x <- runif(N, max=10)
alpha <- 1
beta <- 2
y <- alpha + beta * x + rnorm(N, sd = .6 * x)
p <- 0.95
# The dataset to be used for estiamtion
data_set <- list(y = y, x = x, p = p)
data_frame <- as.data.frame(data_set)[-3]
quantreg
package in R
This fits pretty much instantaneously, and gets answers that look about right (alpha of about 1, beta of about 2.9).
library(quantreg)
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
##
## The following object is masked from 'package:base':
##
## backsolve
qreg_fit <- rq(y ~ x, data = data_frame, tau = p)
# (setting ci = TRUE seems to choke it for some reason)
# See results
summary(qreg_fit)
## Warning in rq.fit.br(x, y, tau = tau, ci = TRUE, ...): Solution may be
## nonunique
##
## Call: rq(formula = y ~ x, tau = p, data = data_frame)
##
## tau: [1] 0.95
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 1.06169 0.99663 1.91120
## x 2.87800 2.71122 2.98148
bayesQR
package in R
This fits pretty much instantaneously, and seems to get sensible answers too.
library(bayesQR)
sink(file="/dev/null") # Supress calls to cat(), for now
bqr_fit <- bayesQR(y ~ x, data = data_frame, quantile = p, ndraw = 10000)
## Current iteration :
## [1] 500
## Current iteration :
## [1] 1000
## Current iteration :
## [1] 1500
## Current iteration :
## [1] 2000
## Current iteration :
## [1] 2500
## Current iteration :
## [1] 3000
## Current iteration :
## [1] 3500
## Current iteration :
## [1] 4000
## Current iteration :
## [1] 4500
## Current iteration :
## [1] 5000
## Current iteration :
## [1] 5500
## Current iteration :
## [1] 6000
## Current iteration :
## [1] 6500
## Current iteration :
## [1] 7000
## Current iteration :
## [1] 7500
## Current iteration :
## [1] 8000
## Current iteration :
## [1] 8500
## Current iteration :
## [1] 9000
## Current iteration :
## [1] 9500
## Current iteration :
## [1] 10000
sink()
# See results
(bqr_summary <- summary(bqr_fit))
##
## Type of dependent variable: continuous
## Lasso variable selection: no
## Estimated quantile: 0.95
## Lower credible bound: 0.025
## Upper credible bound: 0.975
## Number of burnin draws: 0
## Number of retained draws: 10000
##
##
## Summary of the estimated beta:
##
## Bayes Estimate lower upper
## (Intercept) 1.06 0.968 1.18
## x 2.88 2.848 2.93
##
##
## Summary of the estimated sigma:
##
## Bayes Estimate lower upper
## sigma 0.134 0.124 0.143
JAGS
# Adapted from http://stats.stackexchange.com/q/17672
library(rjags)
jags_code <- "
model{
for(i in 1:length(y)){
mu[i] <- alpha + beta * x[i]
w[i] ~ dexp(tau)
me[i] <- (1 - 2 * p) / (p * (1 - p)) * w[i] + mu[i]
pe[i] <- (p * (1 - p) * tau) / (2 * w[i])
y[i] ~ dnorm(me[i], pe[i])
}
# Regression Priors
alpha ~ dnorm(0, 1E-6)
beta ~ dnorm(0, 1E-6)
lsigma ~ dunif(-5, 15)
sigma <- exp(lsigma / 2)
tau <- pow(sigma, -2)
}
"
# Init the model
n_iter <- 10000
jags_model <- jags.model(file = textConnection(jags_code), data = data_set,
n.chains = 4, n.adapt = n_iter / 2)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph Size: 4521
##
## Initializing model
# Run some MCMC iterations
params <- c("alpha", "beta", "sigma")
jags_samples <- coda.samples(jags_model, params, n.iter = n_iter)
# Results
t(apply(
data.frame(do.call(rbind, jags_samples)), 2, function(x)
c(mean = mean(x), quantile(x, c(0.005, 0.25, 0.5, 0.75, 0.95)))
))
## mean 0.5% 25% 50% 75% 95%
## alpha 1.0824410 0.8732152 1.0099762 1.066288 1.1363847 1.2799307
## beta 2.8882857 2.7991278 2.8642738 2.884969 2.9104886 2.9541419
## sigma 0.5502045 0.5194068 0.5417685 0.550057 0.5583782 0.5708294
Stan
library(rstan)
options(mc.cores = parallel::detectCores())
scode <- "
data {
int<lower=0> N;
real<lower=0, upper=1> p;
vector[N] x;
vector[N] y;
}
parameters {
real alpha;
real beta;
real<lower=0> sigma;
vector<lower=0>[N] w;
}
transformed parameters {
real<lower=0> tau;
vector[N] mu;
vector[N] me;
vector[N] pe;
vector[N] pe2;
tau <- pow(sigma, -2);
mu <- alpha + beta * x;
me <- (1 - 2 * p) / (p * (1 - p)) * w + mu;
pe <- (p * (1 - p) * tau) ./ (2 * w);
for(n in 1:N)
pe2[n] <- inv_sqrt(pe[n]);
}
model {
# Priors
alpha ~ normal(0, 2);
beta ~ normal(0, 2);
sigma ~ cauchy(0, 2.5);
# Data Augmentation
w ~ exponential(tau);
# The model
y ~ normal(me, pe2);
}
"
init_fun <- function(){
list(alpha = 1 + rnorm(1, 0, 0.2), beta = 2.88 + rnorm(1, 0, 0.2),
sigma = runif(1, 0, 5), w = rexp(N, runif(N, 0, 4)))
}
params <- c("alpha", "beta", "sigma")
stan_fit <- stan(model_code = scode, data = within(data_set, N <- N),
iter = 5000, verbose = FALSE,
pars = params)
## Warning: There were 262 divergent transitions after warmup. Increasing
## adapt_delta may help.
## Warning: Examine the pairs() plot to diagnose sampling problems
print(stan_fit)
## Inference for Stan model: 94300128964927dd05c21f71824b95ab.
## 4 chains, each with iter=5000; warmup=2500; thin=1;
## post-warmup draws per chain=2500, total post-warmup draws=10000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5%
## alpha 1.08 0.00 0.10 0.93 1.01 1.07 1.14 1.33
## beta 2.89 0.00 0.04 2.82 2.86 2.88 2.91 2.96
## sigma 0.55 0.00 0.01 0.53 0.54 0.55 0.56 0.58
## lp__ -1241.69 0.43 19.58 -1282.16 -1254.30 -1241.03 -1228.55 -1204.52
## n_eff Rhat
## alpha 1771 1
## beta 1217 1
## sigma 5626 1
## lp__ 2089 1
##
## Samples were drawn using NUTS(diag_e) at Thu Oct 15 15:13:46 2015.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 15.04
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rstan_2.8.0 ggplot2_1.0.1 Rcpp_0.12.1 rjags_3-15 coda_0.17-1
## [6] bayesQR_2.2 quantreg_5.19 SparseM_1.7 knitr_1.11
##
## loaded via a namespace (and not attached):
## [1] magrittr_1.5 MASS_7.3-44 munsell_0.4.2
## [4] colorspace_1.2-6 lattice_0.20-33 stringr_1.0.0
## [7] plyr_1.8.3 tools_3.2.2 parallel_3.2.2
## [10] grid_3.2.2 gtable_0.1.2 htmltools_0.2.6
## [13] MatrixModels_0.4-1 yaml_2.1.13 digest_0.6.8
## [16] Matrix_1.2-2 gridExtra_2.0.0 reshape2_1.4.1
## [19] formatR_1.2.1 codetools_0.2-14 inline_0.3.14
## [22] evaluate_0.8 rmarkdown_0.8 stringi_0.5-5
## [25] scales_0.3.0 stats4_3.2.2 proto_0.3-10