This is the web material of the work On zero-inflated permutation testing and some related problems by Livio Finos and Fortunato Pesarin.
seeds
dataThe codes and the data are available here
load data
data(seeds)
seeds0=seeds
seeds0[is.na(seeds)]=1
# the two tests
(res1 = flip(x+y~grp,data=seeds0,perms=10000,tail=1))
##
## Call:
## flip(Y = x + y ~ grp, data = seeds0, tail = 1, perms = 10000)
## 9999 permutations.
## Test Stat tail p-value sig.
## x t 2.235 > 0.0166 *
## y t 2.774 > 0.0053 **
plot(res1)
# and combined
npc(res1,comb.funct = "Fisher")
## Call:
## npc(permTP = res1, comb.funct = "Fisher")
## permutations.
## comb.funct nVar Stat p-value sig.
## V1 Fisher 2 9.338 0.0080 **
# the two tests
(res2 = flip(x+y~grp,data=seeds[!is.na(seeds$x),],perms=10000,tail=1))
##
## Call:
## flip(Y = x + y ~ grp, data = seeds[!is.na(seeds$x), ], tail = 1, perms = 10000)
## 9999 permutations.
## Test Stat tail p-value sig.
## x t 1.320 > 0.0985
## y t 2.061 > 0.0240 *
# and combined
npc(res2,comb.funct = "Fisher")
## Call:
## npc(permTP = res2, comb.funct = "Fisher")
## permutations.
## comb.funct nVar Stat p-value sig.
## V1 Fisher 2 6.047 0.0417 *
# the two tests for the problem X|X>0
res3_Quant = flip(x+y~grp,data=seeds,statTest = "NA",perms=10000,tail=1,flipReturn = list(permSpace=TRUE,permT=TRUE))
plot(res3_Quant)
# the test for the problem X>0
seeds$Zeros=is.na(seeds$x)*1
res3_zero = flip(Zeros~grp,data=seeds,perms=res3_Quant@permSpace,tail=-1)
##
res3=cFlip(res3_zero,res3_Quant)
res3
## Call:
## flip(Y = Zeros ~ grp, data = seeds, tail = -1, perms = res3_Quant@permSpace)
## 9999 permutations.
## Test tail NValidPerms Stat p-value sig.
## Zeros t < NA -1.798 0.0771
## x uni-NA > 10000 60.714 0.0953
## y uni-NA > 10000 248.374 0.0269 *
# and combined
npc(res3,comb.funct = "Fisher")
## Call:
## npc(permTP = res3, comb.funct = "Fisher")
## permutations.
## comb.funct nVar Stat p-value sig.
## V1 Fisher 3 8.529 0.0175 *
# the 2+2 tests for the problem X|X>0
res4_plus = res3
(res4_Quant_minus = flip(x+y~grp,data=seeds,statTest = "NA",perms=res3_Quant@permSpace,tail=-1))
## Call:
## flip(Y = x + y ~ grp, data = seeds, tail = -1, perms = res3_Quant@permSpace, statTest = "NA")
## 9999 permutations.
## Test NValidPerms Stat tail p-value sig.
## x uni-NA 10000 60.71 < 0.8987
## y uni-NA 10000 248.37 < 0.9740
# the test for the problem X>0
(res4_zero_minus = flip(Zeros~grp,data=seeds,perms=res3@permSpace,tail=+1))
##
## Call:
## flip(Y = Zeros ~ grp, data = seeds, tail = +1, perms = res3@permSpace)
## 9999 permutations.
## Test Stat tail p-value sig.
## Zeros t -1.798 > 0.9849
res4=cFlip(res4_plus,res4_zero_minus,res4_Quant_minus)
res4
## Call:
## flip(Y = Zeros ~ grp, data = seeds, tail = -1, perms = res3_Quant@permSpace)
## 9999 permutations.
## Test tail NValidPerms Stat p-value sig.
## Zeros t < NA -1.798 0.0771
## x uni-NA > 10000 60.714 0.0953
## y uni-NA > 10000 248.374 0.0269 *
## 4 t > NA -1.798 0.9849
## 5 uni-NA < 10000 60.714 0.8987
## 6 uni-NA < 10000 248.374 0.9740
# and combined
npc(res4,comb.funct = "Fisher",subsets = list(Zeros=c(1,4),x=c(2,5),y=c(3,6),overall=1:6))
## 1 / 42 / 43 / 44 / 4
## Call:
## npc(permTP = res4, comb.funct = "Fisher", subsets = list(Zeros = c(1, 4), x = c(2, 5), y = c(3, 6), overall = 1:6))
## permutations.
## comb.funct nVar Stat p-value sig.
## Zeros Fisher 2 5.125 0.0771
## x Fisher 2 4.701 0.0953
## y Fisher 2 7.231 0.0269 *
## overall Fisher 6 17.058 0.0175 *
The codes and the data are available here
The data comes from: https://www.jstatsoft.org/index.php/jss/article/downloadSuppFile/v027i08/DebTrivedi.rda.zip
A Rdata
file is saved in this folder for your convenience.
Now, load the data and define the dataset to be used, for our convenience.
load("DebTrivedi.rda")
dati=DebTrivedi[,c("ofp","hosp","health","numchron","gender","school","privins")]
summary(dati)
## ofp hosp health numchron
## Min. : 0.000 Min. :0.000 poor : 554 Min. :0.000
## 1st Qu.: 1.000 1st Qu.:0.000 average :3509 1st Qu.:1.000
## Median : 4.000 Median :0.000 excellent: 343 Median :1.000
## Mean : 5.774 Mean :0.296 Mean :1.542
## 3rd Qu.: 8.000 3rd Qu.:0.000 3rd Qu.:2.000
## Max. :89.000 Max. :8.000 Max. :8.000
## gender school privins
## female:2628 Min. : 0.00 no : 985
## male :1778 1st Qu.: 8.00 yes:3421
## Median :11.00
## Mean :10.29
## 3rd Qu.:12.00
## Max. :18.00
formula <- ofp ~ hosp + health + numchron + gender + school + privins
modelHurdle <- pscl::hurdle(formula = formula,
dist = "negbin",
data = DebTrivedi)
(out=summary(modelHurdle))
##
## Call:
## pscl::hurdle(formula = formula, data = DebTrivedi, dist = "negbin")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.1718 -0.7080 -0.2737 0.3196 18.0092
##
## Count model coefficients (truncated negbin with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.197699 0.058973 20.309 < 2e-16 ***
## hosp 0.211898 0.021396 9.904 < 2e-16 ***
## healthpoor 0.315958 0.048056 6.575 4.87e-11 ***
## healthexcellent -0.331861 0.066093 -5.021 5.14e-07 ***
## numchron 0.126421 0.012452 10.152 < 2e-16 ***
## gendermale -0.068317 0.032416 -2.108 0.0351 *
## school 0.020693 0.004535 4.563 5.04e-06 ***
## privinsyes 0.100172 0.042619 2.350 0.0188 *
## Log(theta) 0.333255 0.042754 7.795 6.46e-15 ***
## Zero hurdle model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.043147 0.139852 0.309 0.757688
## hosp 0.312449 0.091437 3.417 0.000633 ***
## healthpoor -0.008716 0.161024 -0.054 0.956833
## healthexcellent -0.289570 0.142682 -2.029 0.042409 *
## numchron 0.535213 0.045378 11.794 < 2e-16 ***
## gendermale -0.415658 0.087608 -4.745 2.09e-06 ***
## school 0.058541 0.011989 4.883 1.05e-06 ***
## privinsyes 0.747120 0.100880 7.406 1.30e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta: count = 1.3955
## Number of iterations in BFGS optimization: 16
## Log-likelihood: -1.209e+04 on 17 Df
Before we proceed, let’s compute the Maximum cardinality for each test:
sapply(2:ncol(dati),compute_orbit_cardinality)
## [1] Inf Inf Inf Inf Inf Inf
The number is very high, certainly enough for a permutation test.
# Depending on the number of permutations 'perms', the computation time can be very high, set EVAL=TRUE (here below) to run the analysis
EVAL=FALSE
perms=1000
dati$health=relevel(dati$health,"average")
dataX <- flip:::.getXY(ofp ~ hosp + health + numchron + gender + school + privins,
data=dati,statTest="permutation",rotationTest = FALSE)
nomefile=paste0("results_permutation_MC",perms,".Rdata")
if(EVAL){
# the two response variables
id_Ynot0=which(dataX$Y!=0)
# Yno0=dataX$Y[dataX$Y!=0,]
Y01=(dataX$Y!=0)*1
# table(Y01)
# Yno0_orig=Yno0
Y01_orig=Y01
lista_tested_var=list(2,3,4,5,6,7,8)
set.seed(1)
res_permT=llply(lista_tested_var,compute_test_stats_pvalues)
save(res_permT,file=nomefile)
} else
load(file=nomefile)
## some make up of the results:
pvals=sapply(res_permT,t2p,obs.only = 1,tail=0)
pvals=c(pvals[2,],pvals[1,])
stats=unlist(sapply(res_permT,function(X)X[1,]))
stats=c(stats[2,],stats[1,])
out=new("flip.object")
out@res= data.frame(Test="z",Stat=stats,tail="><", p.value=pvals)
colnames(out@res)[4]="p-value"
rownames(out@res)=paste(rep(c("X|X>0 (Quantitative)","X>0 (Zeros)"),each=7),colnames(dataX$X)[-1],sep=":")
summary(out)
## Call:
## `<undef>`()
## permutations.
## Test Stat tail p-value sig.
## X|X>0 (Quantitative):hosp z 12.53530 >< 0.0010 ***
## X|X>0 (Quantitative):health.poor. z 5.59255 >< 0.0020 **
## X|X>0 (Quantitative):health.excellent. z -2.74832 >< 0.0110 *
## X|X>0 (Quantitative):numchron z 9.35176 >< 0.0010 ***
## X|X>0 (Quantitative):gender.male. z -2.55200 >< 0.0230 *
## X|X>0 (Quantitative):school z 4.08174 >< 0.0020 **
## X|X>0 (Quantitative):privins.yes. z 1.86305 >< 0.0849
## X>0 (Zeros):hosp z 2.80361 >< 0.0010 ***
## X>0 (Zeros):health.poor. z -0.05789 >< 0.9820
## X>0 (Zeros):health.excellent. z -2.57913 >< 0.0220 *
## X>0 (Zeros):numchron z 10.53065 >< 0.0010 ***
## X>0 (Zeros):gender.male. z -4.70975 >< 0.0010 ***
## X>0 (Zeros):school z 4.49383 >< 0.0010 ***
## X>0 (Zeros):privins.yes. z 6.15691 >< 0.0010 ***
An example for gender(male)
. Change the number 5 in res_permT[[5]]
- in the code below - to see other distributions of the test statistic
out=new("flip.object",permT=res_permT[[5]][,2:1])
colnames(out@permT)=c("X|X>0 (Quantitative)","X>0 (Zeros)")
out@res=data.frame(T=matrix(res_permT[[5]][1,]))
par(mfrow=c(2,2))
plot(out)
hist(out[2],main=colnames(out@permT)[2])
hist(out[1],main=colnames(out@permT)[1])