This is the web material of the work On zero-inflated permutation testing and some related problems by Livio Finos and Fortunato Pesarin.

Analysis of `seeds` data

The codes and the data are available here

load data

data(seeds)

The manufacturer’s point of view

seeds0=seeds
seeds0[is.na(seeds)]=1

# the two tests
(res1 = flip(x+y~grp,data=seeds0,perms=10000,tail=1))

##

##  Call:
##  flip(Y = x + y ~ grp, data = seeds0, tail = 1, perms = 10000) 
## 9999 permutations.
##   Test  Stat tail p-value sig.
## x    t 2.235    >  0.0166    *
## y    t 2.774    >  0.0053   **

plot(res1)

# and combined
npc(res1,comb.funct = "Fisher")

##  Call:
##  npc(permTP = res1, comb.funct = "Fisher") 
##  permutations.
##    comb.funct nVar  Stat p-value sig.
## V1     Fisher    2 9.338  0.0080   **

The user’s point of view

# the two tests
(res2 = flip(x+y~grp,data=seeds[!is.na(seeds$x),],perms=10000,tail=1))

##

##  Call:
##  flip(Y = x + y ~ grp, data = seeds[!is.na(seeds$x), ], tail = 1,      perms = 10000) 
## 9999 permutations.
##   Test  Stat tail p-value sig.
## x    t 1.320    >  0.0985     
## y    t 2.061    >  0.0240    *

# and combined
npc(res2,comb.funct = "Fisher")

##  Call:
##  npc(permTP = res2, comb.funct = "Fisher") 
##  permutations.
##    comb.funct nVar  Stat p-value sig.
## V1     Fisher    2 6.047  0.0417    *

The point of view of a scientist (I)

# the two tests for the problem X|X>0
res3_Quant = flip(x+y~grp,data=seeds,statTest = "NA",perms=10000,tail=1,flipReturn = list(permSpace=TRUE,permT=TRUE))

plot(res3_Quant)

# the test for the problem X>0
seeds$Zeros=is.na(seeds$x)*1
res3_zero = flip(Zeros~grp,data=seeds,perms=res3_Quant@permSpace,tail=-1)

##

res3=cFlip(res3_zero,res3_Quant)
res3

##  Call:
##  flip(Y = Zeros ~ grp, data = seeds, tail = -1, perms = res3_Quant@permSpace) 
## 9999 permutations.
##         Test tail NValidPerms    Stat p-value sig.
## Zeros      t    <          NA  -1.798  0.0771     
## x     uni-NA    >       10000  60.714  0.0953     
## y     uni-NA    >       10000 248.374  0.0269    *

# and combined
npc(res3,comb.funct = "Fisher")

##  Call:
##  npc(permTP = res3, comb.funct = "Fisher") 
##  permutations.
##    comb.funct nVar  Stat p-value sig.
## V1     Fisher    3 8.529  0.0175    *

The point of view of a scientist (II)

# the 2+2 tests for the problem X|X>0
res4_plus = res3

(res4_Quant_minus = flip(x+y~grp,data=seeds,statTest = "NA",perms=res3_Quant@permSpace,tail=-1))

##  Call:
##  flip(Y = x + y ~ grp, data = seeds, tail = -1, perms = res3_Quant@permSpace,      statTest = "NA") 
## 9999 permutations.
##     Test NValidPerms   Stat tail p-value sig.
## x uni-NA       10000  60.71    <  0.8987     
## y uni-NA       10000 248.37    <  0.9740

# the test for the problem X>0
(res4_zero_minus = flip(Zeros~grp,data=seeds,perms=res3@permSpace,tail=+1))

##

##  Call:
##  flip(Y = Zeros ~ grp, data = seeds, tail = +1, perms = res3@permSpace) 
## 9999 permutations.
##       Test   Stat tail p-value sig.
## Zeros    t -1.798    >  0.9849

res4=cFlip(res4_plus,res4_zero_minus,res4_Quant_minus)
res4

##  Call:
##  flip(Y = Zeros ~ grp, data = seeds, tail = -1, perms = res3_Quant@permSpace) 
## 9999 permutations.
##         Test tail NValidPerms    Stat p-value sig.
## Zeros      t    <          NA  -1.798  0.0771     
## x     uni-NA    >       10000  60.714  0.0953     
## y     uni-NA    >       10000 248.374  0.0269    *
## 4          t    >          NA  -1.798  0.9849     
## 5     uni-NA    <       10000  60.714  0.8987     
## 6     uni-NA    <       10000 248.374  0.9740

# and combined
npc(res4,comb.funct = "Fisher",subsets = list(Zeros=c(1,4),x=c(2,5),y=c(3,6),overall=1:6))

## 1 / 42 / 43 / 44 / 4

##  Call:
##  npc(permTP = res4, comb.funct = "Fisher", subsets = list(Zeros = c(1,      4), x = c(2, 5), y = c(3, 6), overall = 1:6)) 
##  permutations.
##         comb.funct nVar   Stat p-value sig.
## Zeros       Fisher    2  5.125  0.0771     
## x           Fisher    2  4.701  0.0953     
## y           Fisher    2  7.231  0.0269    *
## overall     Fisher    6 17.058  0.0175    *

Analysis of Medical Care data

The codes and the data are available here

Load data

The data comes from: https://www.jstatsoft.org/index.php/jss/article/downloadSuppFile/v027i08/DebTrivedi.rda.zip

A Rdata file is saved in this folder for your convenience.

Now, load the data and define the dataset to be used, for our convenience.

load("DebTrivedi.rda")

dati=DebTrivedi[,c("ofp","hosp","health","numchron","gender","school","privins")]
summary(dati)

##       ofp              hosp             health        numchron    
##  Min.   : 0.000   Min.   :0.000   poor     : 554   Min.   :0.000  
##  1st Qu.: 1.000   1st Qu.:0.000   average  :3509   1st Qu.:1.000  
##  Median : 4.000   Median :0.000   excellent: 343   Median :1.000  
##  Mean   : 5.774   Mean   :0.296                    Mean   :1.542  
##  3rd Qu.: 8.000   3rd Qu.:0.000                    3rd Qu.:2.000  
##  Max.   :89.000   Max.   :8.000                    Max.   :8.000  
##     gender         school      privins   
##  female:2628   Min.   : 0.00   no : 985  
##  male  :1778   1st Qu.: 8.00   yes:3421  
##                Median :11.00             
##                Mean   :10.29             
##                3rd Qu.:12.00             
##                Max.   :18.00

Fit parametric Hurdle model

formula <- ofp ~ hosp + health + numchron + gender + school + privins
modelHurdle <- pscl::hurdle(formula = formula,
                            dist    = "negbin",
                            data    = DebTrivedi)
(out=summary(modelHurdle))

## 
## Call:
## pscl::hurdle(formula = formula, data = DebTrivedi, dist = "negbin")
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1718 -0.7080 -0.2737  0.3196 18.0092 
## 
## Count model coefficients (truncated negbin with log link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      1.197699   0.058973  20.309  < 2e-16 ***
## hosp             0.211898   0.021396   9.904  < 2e-16 ***
## healthpoor       0.315958   0.048056   6.575 4.87e-11 ***
## healthexcellent -0.331861   0.066093  -5.021 5.14e-07 ***
## numchron         0.126421   0.012452  10.152  < 2e-16 ***
## gendermale      -0.068317   0.032416  -2.108   0.0351 *  
## school           0.020693   0.004535   4.563 5.04e-06 ***
## privinsyes       0.100172   0.042619   2.350   0.0188 *  
## Log(theta)       0.333255   0.042754   7.795 6.46e-15 ***
## Zero hurdle model coefficients (binomial with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      0.043147   0.139852   0.309 0.757688    
## hosp             0.312449   0.091437   3.417 0.000633 ***
## healthpoor      -0.008716   0.161024  -0.054 0.956833    
## healthexcellent -0.289570   0.142682  -2.029 0.042409 *  
## numchron         0.535213   0.045378  11.794  < 2e-16 ***
## gendermale      -0.415658   0.087608  -4.745 2.09e-06 ***
## school           0.058541   0.011989   4.883 1.05e-06 ***
## privinsyes       0.747120   0.100880   7.406 1.30e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta: count = 1.3955
## Number of iterations in BFGS optimization: 16 
## Log-likelihood: -1.209e+04 on 17 Df

Permutation based analysis

Before we proceed, let’s compute the Maximum cardinality for each test:

sapply(2:ncol(dati),compute_orbit_cardinality)

## [1] Inf Inf Inf Inf Inf Inf

The number is very high, certainly enough for a permutation test.

# Depending on the number of permutations 'perms', the computation time can be very high, set EVAL=TRUE (here below) to run the analysis
EVAL=FALSE
perms=1000

dati$health=relevel(dati$health,"average")
dataX <- flip:::.getXY(ofp ~ hosp + health + numchron + gender + school + privins,
                         data=dati,statTest="permutation",rotationTest = FALSE)
nomefile=paste0("results_permutation_MC",perms,".Rdata")
if(EVAL){
  
  # the two response variables
  id_Ynot0=which(dataX$Y!=0)
  # Yno0=dataX$Y[dataX$Y!=0,]
  Y01=(dataX$Y!=0)*1
  # table(Y01)
  # Yno0_orig=Yno0
  Y01_orig=Y01
  
  lista_tested_var=list(2,3,4,5,6,7,8)
  set.seed(1)
  res_permT=llply(lista_tested_var,compute_test_stats_pvalues)
  save(res_permT,file=nomefile)
} else
load(file=nomefile)

## some make up of the results:
pvals=sapply(res_permT,t2p,obs.only = 1,tail=0)
pvals=c(pvals[2,],pvals[1,])

stats=unlist(sapply(res_permT,function(X)X[1,]))
stats=c(stats[2,],stats[1,])

out=new("flip.object")
out@res= data.frame(Test="z",Stat=stats,tail="><", p.value=pvals)
colnames(out@res)[4]="p-value"
rownames(out@res)=paste(rep(c("X|X>0 (Quantitative)","X>0 (Zeros)"),each=7),colnames(dataX$X)[-1],sep=":")
summary(out)

##  Call:
##  `<undef>`() 
##  permutations.
##                                        Test     Stat tail p-value sig.
## X|X>0 (Quantitative):hosp                 z 12.53530   ><  0.0010  ***
## X|X>0 (Quantitative):health.poor.         z  5.59255   ><  0.0020   **
## X|X>0 (Quantitative):health.excellent.    z -2.74832   ><  0.0110    *
## X|X>0 (Quantitative):numchron             z  9.35176   ><  0.0010  ***
## X|X>0 (Quantitative):gender.male.         z -2.55200   ><  0.0230    *
## X|X>0 (Quantitative):school               z  4.08174   ><  0.0020   **
## X|X>0 (Quantitative):privins.yes.         z  1.86305   ><  0.0849     
## X>0 (Zeros):hosp                          z  2.80361   ><  0.0010  ***
## X>0 (Zeros):health.poor.                  z -0.05789   ><  0.9820     
## X>0 (Zeros):health.excellent.             z -2.57913   ><  0.0220    *
## X>0 (Zeros):numchron                      z 10.53065   ><  0.0010  ***
## X>0 (Zeros):gender.male.                  z -4.70975   ><  0.0010  ***
## X>0 (Zeros):school                        z  4.49383   ><  0.0010  ***
## X>0 (Zeros):privins.yes.                  z  6.15691   ><  0.0010  ***

Plots

An example for gender(male). Change the number 5 in res_permT[[5]] - in the code below - to see other distributions of the test statistic

out=new("flip.object",permT=res_permT[[5]][,2:1])
colnames(out@permT)=c("X|X>0 (Quantitative)","X>0 (Zeros)")
out@res=data.frame(T=matrix(res_permT[[5]][1,]))

par(mfrow=c(2,2))
plot(out)
hist(out[2],main=colnames(out@permT)[2])
hist(out[1],main=colnames(out@permT)[1])

On zero-inflated permutation testing and some related problems: web material

Livio Finos and Fortunato Pesarin

28 July 2018

Analysis of `seeds` data

The manufacturer’s point of view

The user’s point of view

The point of view of a scientist (I)

The point of view of a scientist (II)

Analysis of Medical Care data

Load data

Fit parametric Hurdle model

Permutation based analysis

Plots

On zero-inflated permutation testing and some related problems: web material

Livio Finos and Fortunato Pesarin

28 July 2018

Analysis of seeds data

The manufacturer’s point of view

The user’s point of view

The point of view of a scientist (I)

The point of view of a scientist (II)

Analysis of Medical Care data

Load data

Fit parametric Hurdle model

Permutation based analysis

Plots

Analysis of `seeds` data