Data

A quick data generating mechanism. Roughly speaking, the \(95^{th}\) percentile of beta will be about 2.9, and will remain about 1 for alpha.

# Set-up the data generating mechainsm
set.seed(666)
N     <- 500
x     <- runif(N, max=10)
alpha <- 1
beta  <- 2
y     <- alpha + beta * x + rnorm(N, sd = .6 * x)
p     <- 0.95

# The dataset to be used for estiamtion
data_set   <- list(y = y, x = x, p = p)
data_frame <- as.data.frame(data_set)[-3]

Quantile Regression with the quantreg package in R

This fits pretty much instantaneously, and gets answers that look about right (alpha of about 1, beta of about 2.9).

library(quantreg)
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## 
## The following object is masked from 'package:base':
## 
##     backsolve
qreg_fit <- rq(y ~ x, data = data_frame, tau = p)
# (setting ci = TRUE seems to choke it for some reason)
 
# See results
summary(qreg_fit)
## Warning in rq.fit.br(x, y, tau = tau, ci = TRUE, ...): Solution may be
## nonunique
## 
## Call: rq(formula = y ~ x, tau = p, data = data_frame)
## 
## tau: [1] 0.95
## 
## Coefficients:
##             coefficients lower bd upper bd
## (Intercept) 1.06169      0.99663  1.91120 
## x           2.87800      2.71122  2.98148

Quantile Regression with the bayesQR package in R

This fits pretty much instantaneously, and seems to get sensible answers too.

library(bayesQR)
sink(file="/dev/null") # Supress calls to cat(), for now
bqr_fit <- bayesQR(y ~ x, data = data_frame, quantile = p, ndraw = 10000)
## Current iteration :
## [1] 500
## Current iteration :
## [1] 1000
## Current iteration :
## [1] 1500
## Current iteration :
## [1] 2000
## Current iteration :
## [1] 2500
## Current iteration :
## [1] 3000
## Current iteration :
## [1] 3500
## Current iteration :
## [1] 4000
## Current iteration :
## [1] 4500
## Current iteration :
## [1] 5000
## Current iteration :
## [1] 5500
## Current iteration :
## [1] 6000
## Current iteration :
## [1] 6500
## Current iteration :
## [1] 7000
## Current iteration :
## [1] 7500
## Current iteration :
## [1] 8000
## Current iteration :
## [1] 8500
## Current iteration :
## [1] 9000
## Current iteration :
## [1] 9500
## Current iteration :
## [1] 10000
sink()

# See results
(bqr_summary <- summary(bqr_fit))
## 
## Type of dependent variable: continuous
## Lasso variable selection: no
## Estimated quantile:  0.95 
## Lower credible bound:  0.025 
## Upper credible bound:  0.975 
## Number of burnin draws:  0 
## Number of retained draws:  10000 
## 
## 
## Summary of the estimated beta:
## 
##             Bayes Estimate lower upper
## (Intercept)           1.06 0.968  1.18
## x                     2.88 2.848  2.93
## 
## 
## Summary of the estimated sigma:
## 
##       Bayes Estimate lower upper
## sigma          0.134 0.124 0.143

Quantile Regression in JAGS

# Adapted from http://stats.stackexchange.com/q/17672
library(rjags)

jags_code <- "
  model{
    for(i in 1:length(y)){
      mu[i] <- alpha + beta * x[i]
      w[i]  ~ dexp(tau)
      me[i] <- (1 - 2 * p) / (p * (1 - p)) * w[i] + mu[i]
      pe[i] <- (p * (1 - p) * tau) / (2 * w[i])
      y[i]  ~ dnorm(me[i], pe[i])
    }
    
    # Regression Priors
    alpha ~ dnorm(0, 1E-6)
    beta  ~ dnorm(0, 1E-6)
    
    lsigma ~ dunif(-5, 15)
    sigma  <- exp(lsigma / 2)
    tau    <- pow(sigma, -2)
  }
"

# Init the model
n_iter <- 10000
jags_model <- jags.model(file = textConnection(jags_code), data = data_set, 
                       n.chains = 4, n.adapt = n_iter / 2)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 4521
## 
## Initializing model

# Run some MCMC iterations
params <- c("alpha", "beta", "sigma")
jags_samples <- coda.samples(jags_model, params, n.iter = n_iter)

# Results
t(apply(
  data.frame(do.call(rbind, jags_samples)), 2, function(x)
    c(mean = mean(x), quantile(x, c(0.005, 0.25, 0.5, 0.75, 0.95)))
))
##            mean      0.5%       25%      50%       75%       95%
## alpha 1.0824410 0.8732152 1.0099762 1.066288 1.1363847 1.2799307
## beta  2.8882857 2.7991278 2.8642738 2.884969 2.9104886 2.9541419
## sigma 0.5502045 0.5194068 0.5417685 0.550057 0.5583782 0.5708294

Quantile Regression in Stan

library(rstan)
options(mc.cores = parallel::detectCores())

scode <- "
data {
  int<lower=0> N;
  real<lower=0, upper=1> p;
  vector[N] x;
  vector[N] y;
}
parameters {
  real alpha;
  real beta;
  real<lower=0> sigma;
  vector<lower=0>[N] w;
}
transformed parameters {
  real<lower=0> tau;
  vector[N] mu;
  vector[N] me;
  vector[N] pe;
  vector[N] pe2;

  tau <- pow(sigma, -2);
  mu  <- alpha + beta * x;
  me  <- (1 - 2 * p) / (p * (1 - p)) * w + mu;
  pe  <- (p * (1 - p) * tau) ./ (2 * w);
  
  for(n in 1:N)
    pe2[n] <- inv_sqrt(pe[n]);
}
model {
  # Priors
  alpha ~ normal(0, 2);
  beta  ~ normal(0, 2);
  sigma ~ cauchy(0, 2.5);

  # Data Augmentation
  w     ~ exponential(tau);
  
  # The model
  y     ~ normal(me, pe2);
}
"

init_fun <- function(){
  list(alpha = 1 + rnorm(1, 0, 0.2), beta = 2.88 + rnorm(1, 0, 0.2), 
       sigma = runif(1, 0, 5), w = rexp(N, runif(N, 0, 4)))
}

params <- c("alpha", "beta", "sigma")
stan_fit <- stan(model_code = scode, data = within(data_set, N <- N), 
                 iter = 5000, verbose = FALSE,
                 pars = params)
## Warning: There were 262 divergent transitions after warmup. Increasing
## adapt_delta may help.
## Warning: Examine the pairs() plot to diagnose sampling problems

print(stan_fit)
## Inference for Stan model: 94300128964927dd05c21f71824b95ab.
## 4 chains, each with iter=5000; warmup=2500; thin=1; 
## post-warmup draws per chain=2500, total post-warmup draws=10000.
## 
##           mean se_mean    sd     2.5%      25%      50%      75%    97.5%
## alpha     1.08    0.00  0.10     0.93     1.01     1.07     1.14     1.33
## beta      2.89    0.00  0.04     2.82     2.86     2.88     2.91     2.96
## sigma     0.55    0.00  0.01     0.53     0.54     0.55     0.56     0.58
## lp__  -1241.69    0.43 19.58 -1282.16 -1254.30 -1241.03 -1228.55 -1204.52
##       n_eff Rhat
## alpha  1771    1
## beta   1217    1
## sigma  5626    1
## lp__   2089    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Oct 15 15:13:46 2015.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Session Info

sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 15.04
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] rstan_2.8.0   ggplot2_1.0.1 Rcpp_0.12.1   rjags_3-15    coda_0.17-1  
## [6] bayesQR_2.2   quantreg_5.19 SparseM_1.7   knitr_1.11   
## 
## loaded via a namespace (and not attached):
##  [1] magrittr_1.5       MASS_7.3-44        munsell_0.4.2     
##  [4] colorspace_1.2-6   lattice_0.20-33    stringr_1.0.0     
##  [7] plyr_1.8.3         tools_3.2.2        parallel_3.2.2    
## [10] grid_3.2.2         gtable_0.1.2       htmltools_0.2.6   
## [13] MatrixModels_0.4-1 yaml_2.1.13        digest_0.6.8      
## [16] Matrix_1.2-2       gridExtra_2.0.0    reshape2_1.4.1    
## [19] formatR_1.2.1      codetools_0.2-14   inline_0.3.14     
## [22] evaluate_0.8       rmarkdown_0.8      stringi_0.5-5     
## [25] scales_0.3.0       stats4_3.2.2       proto_0.3-10