Kody z rozdziału 4. Niezbędnik statystyka ,,Przewodnika po programie R’’ wydanie 4.
Aby zainstalować i włączyć pakiet Przewodnik
wykonaj poniższe dwie liniki.
devtools::install_github("pbiecek/PrzewodnikPakiet")
library("Przewodnik")
(RNGkind("Super", "Box"))
## [1] "Mersenne-Twister" "Inversion"
(RNGkind("Super", "Ahr"))
## [1] "Super-Duper" "Box-Muller"
.Random.seed[1:6]
## [1] 102 -1448306788 981750765 NA NA NA
save.seed <- .Random.seed
rnorm(9)
## [1] -0.7360222 -1.1023027 0.3359402 0.4308841 0.3730457 1.0314422
## [7] 0.1541606 -0.1940883 -2.4327144
.Random.seed <- save.seed
rnorm(9)
## [1] -0.7360222 -1.1023027 0.3359402 0.4308841 0.3730457 1.0314422
## [7] 0.1541606 -0.1940883 -2.4327144
set.seed(1313)
runif(10)
## [1] 0.27578588 0.06637362 0.82379757 0.52979504 0.91424061 0.51159113
## [7] 0.94690240 0.56341309 0.61462777 0.54047574
set.seed(1313)
runif(10)
## [1] 0.27578588 0.06637362 0.82379757 0.52979504 0.91424061 0.51159113
## [7] 0.94690240 0.56341309 0.61462777 0.54047574
runif(5)
## [1] 0.7338347 0.1627882 0.1364178 0.4895255 0.6499082
punif(0.5)
## [1] 0.5
dunif(0.5)
## [1] 1
qunif(0.1)
## [1] 0.1
x <- seq(-4,4,by=0.01)
plot(x, dnorm(x), type="l", lwd=3, cex.axis=1.5, cex.lab=1.5)
par(usr=c(-4,4,-0.04,1.04))
lines(x, pnorm(x), lty=2, lwd=3, cex.axis=1.5, cex.lab=1.5)
axis(side=4, cex.axis=1.5, cex.lab=1.5)
mtext(side=4, "pnorm()", line=2.5, cex.axis=1.5, cex=1.5)
pnorm(-3) + (1 - pnorm(3))
## [1] 0.002699796
dnorm(-1:1, mean=0, sd=1)
## [1] 0.2419707 0.3989423 0.2419707
qnorm(c(0.001, 0.025, 0.05, 0.5, 0.95, 0.975, 0.999))
## [1] -3.090232 -1.959964 -1.644854 0.000000 1.644854 1.959964 3.090232
rnorm(10, mean = 2, sd = 1)
## [1] 1.36302924 1.63030216 -0.02528017 3.19469911 0.91553561
## [6] 2.07589592 1.68723109 2.52672495 1.45562343 3.24331153
rnorm(10, mean = 1:10, sd=1:10)
## [1] 1.0029785 0.2818232 2.5886568 5.0915514 1.5614802 9.7741237
## [7] 12.2775071 -7.7713310 13.8131033 5.7068155
rPoisProcess <- function (T=1, lambda=1) {
N <- rpois(1, T*lambda)
sort(runif(N,0,T))
}
rPoisProcess(T=10, lambda=0.2)
## [1] 3.692069
rRandomField <- function (T1=1, T2=1, lambda=1) {
N = rpois(1, T1*T2*lambda)
data.frame(x=runif(N,0,T1), y=runif(N,0,T2))
}
rRandomField(T1=10, T2=10, lambda=0.02)
## [1] x y
## <0 rows> (or 0-length row.names)
set.seed(1313)
n <- 100; d <- 100; N <- 10000
Sigma <- cov(matrix(rnorm(n*d),n,d))
X <- matrix(rnorm(n*d),n,d)
system.time(replicate(N, {
Q <- chol(Sigma)
X %*% Q
} ))
## user system elapsed
## 10.317 0.919 11.734
system.time(replicate(N, {
tmp <- svd(Sigma)
X %*% (tmp$u %*% diag(sqrt(tmp$d)) %*% t(tmp$v))
}))
## user system elapsed
## 82.807 5.601 89.890
system.time(replicate(N, {
tmp <- eigen(Sigma, symmetric=T)
X %*% (tmp$vectors %*% diag(sqrt(abs(tmp$values))) %*% t(tmp$vectors))
}))
## user system elapsed
## 50.521 2.410 53.160
library(copula)
(norm.cop <- normalCopula(0.5))
## Normal copula, dim. d = 2
## Dimension: 2
## Parameters:
## rho.1 = 0.5
str(norm.cop)
## Formal class 'normalCopula' [package "copula"] with 8 slots
## ..@ dispstr : chr "ex"
## ..@ getRho :function (obj)
## ..@ dimension : int 2
## ..@ parameters : num 0.5
## ..@ param.names : chr "rho.1"
## ..@ param.lowbnd: num -1
## ..@ param.upbnd : num 1
## ..@ fullname : chr "<deprecated slot>"
x <- rCopula(1000, norm.cop)
head(x)
## [,1] [,2]
## [1,] 0.5499854 0.69734644
## [2,] 0.8834138 0.50708612
## [3,] 0.8137926 0.03605611
## [4,] 0.7966665 0.66943528
## [5,] 0.7677999 0.61536497
## [6,] 0.6369891 0.99347898
N <- 40
clayton.cop <- claytonCopula(1, dim = 2)
x <- rCopula(N, clayton.cop)
y <- cbind(qexp(x[,1],rate=2), qexp(x[,2],rate=2))
head(y)
## [,1] [,2]
## [1,] 0.6404904 0.3348808
## [2,] 0.1270878 0.4246461
## [3,] 0.3249810 0.8117413
## [4,] 0.2593331 0.4624904
## [5,] 0.6669418 0.3084084
## [6,] 0.3301213 0.1718270
N <- 40
pv <- matrix(0, 2, 1000)
clayton.cop <- claytonCopula(1, dim = 2)
rownames(pv) <- c("Pearson", "Spearman")
for (i in 1:1000) {
x <- rCopula(N, clayton.cop)
y <- cbind(qexp(x[,1],2), qexp(x[,2],2))
pv[1, i] <- cor.test(y[,1], y[,2], method="pearson")$p.value
pv[2, i] <- cor.test(y[,1], y[,2], method="spearman")$p.value
}
rowMeans(pv < 0.05)
## Pearson Spearman
## 0.471 0.851
library(MASS)
wek <- rlnorm(100)
fitdistr(wek, "normal")
## mean sd
## 1.3491925 1.4312316
## (0.1431232) (0.1012034)
fitdistr(wek, "gamma", list(shape=3, rate=3))
## shape rate
## 1.4143092 1.0482635
## (0.1812709) (0.1606885)
library(dprep)
data(hepatitis)
dim(hepatitis)
## [1] 155 20
sum(is.na(hepatitis))
## [1] 167
sum(apply(is.na(hepatitis),1,max))
## [1] 75
indeksyKompletnych = complete.cases(hepatitis)
dim(hepatitis[indeksyKompletnych,])
## [1] 80 20
summary(hepatitis[,19])
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 46.00 61.00 61.85 76.25 100.00 67
summary(na.omit(hepatitis[,19]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 46.00 61.00 61.85 76.25 100.00
library(Hmisc)
wektor <- c(1,5,NA,2,8,8,3,NA,10)
impute(wektor, 2.5)
## 1 2 3 4 5 6 7 8 9
## 1.0 5.0 2.5* 2.0 8.0 8.0 3.0 2.5* 10.0
impute(wektor, mean)
## 1 2 3 4 5 6
## 1.000000 5.000000 5.285714* 2.000000 8.000000 8.000000
## 7 8 9
## 3.000000 5.285714* 10.000000
impute(wektor, "random")
## 1 2 3 4 5 6 7 8 9
## 1 5 1* 2 8 8 3 10* 10
imputacjemedian <- e1071::impute(hepatitis, "median")
imputacjemean <- ce.mimp(hepatitis, "mean", atr=1:20)
##
## Summary of imputations using substitution of mean (mode for nominal features):
## Row Column Class Imput.value
## [1,] 1 19 2 66.571429
## [2,] 2 19 2 66.571429
## [3,] 3 19 2 66.571429
## [4,] 4 4 2 1.540984
## [5,] 5 16 2 101.313725
## [6,] 5 19 2 66.571429
## [7,] 7 15 1 2.543333
## [8,] 7 16 1 122.375000
## [9,] 7 17 1 99.833333
## [10,] 7 18 1 3.151852
## [11,] 7 19 1 43.500000
## [12,] 8 16 2 101.313725
## [13,] 8 17 2 82.438017
## [14,] 8 18 2 3.977679
## [15,] 8 19 2 66.571429
## [16,] 9 16 2 101.313725
## [17,] 9 19 2 66.571429
## [18,] 10 16 2 101.313725
## [19,] 10 19 2 66.571429
## [20,] 15 15 2 1.146218
## [21,] 15 16 2 101.313725
## [22,] 15 18 2 3.977679
## [23,] 15 19 2 66.571429
## [24,] 17 19 2 66.571429
## [25,] 27 19 2 66.571429
## [26,] 32 9 1 1.888889
## [27,] 32 10 1 1.518519
## [28,] 32 16 1 122.375000
## [29,] 32 18 1 3.151852
## [30,] 32 19 1 43.500000
## [31,] 36 19 1 43.500000
## [32,] 38 19 2 66.571429
## [33,] 42 9 2 1.813559
## [34,] 42 10 2 1.598291
## [35,] 42 11 2 1.848739
## [36,] 42 12 2 1.756303
## [37,] 42 13 2 1.949580
## [38,] 42 14 2 1.941176
## [39,] 42 19 2 66.571429
## [40,] 45 15 2 1.146218
## [41,] 45 16 2 101.313725
## [42,] 45 18 2 3.977679
## [43,] 45 19 2 66.571429
## [44,] 46 19 2 66.571429
## [45,] 47 19 2 66.571429
## [46,] 51 19 2 66.571429
## [47,] 52 19 2 66.571429
## [48,] 56 18 2 3.977679
## [49,] 56 19 2 66.571429
## [50,] 57 6 2 1.426230
## [51,] 57 7 2 1.688525
## [52,] 57 8 2 1.819672
## [53,] 57 9 2 1.813559
## [54,] 57 10 2 1.598291
## [55,] 57 11 2 1.848739
## [56,] 57 12 2 1.756303
## [57,] 57 13 2 1.949580
## [58,] 57 14 2 1.941176
## [59,] 57 15 2 1.146218
## [60,] 57 16 2 101.313725
## [61,] 57 17 2 82.438017
## [62,] 57 18 2 3.977679
## [63,] 57 19 2 66.571429
## [64,] 60 18 2 3.977679
## [65,] 60 19 2 66.571429
## [66,] 66 16 2 101.313725
## [67,] 66 19 2 66.571429
## [68,] 67 19 2 66.571429
## [69,] 68 16 1 122.375000
## [70,] 70 19 2 66.571429
## [71,] 71 19 2 66.571429
## [72,] 72 18 1 3.151852
## [73,] 72 19 1 43.500000
## [74,] 73 9 2 1.813559
## [75,] 73 10 2 1.598291
## [76,] 73 11 2 1.848739
## [77,] 73 12 2 1.756303
## [78,] 73 13 2 1.949580
## [79,] 73 14 2 1.941176
## [80,] 73 19 2 66.571429
## [81,] 74 16 2 101.313725
## [82,] 75 19 2 66.571429
## [83,] 77 19 1 43.500000
## [84,] 80 19 2 66.571429
## [85,] 81 16 2 101.313725
## [86,] 81 19 2 66.571429
## [87,] 84 11 2 1.848739
## [88,] 84 12 2 1.756303
## [89,] 84 13 2 1.949580
## [90,] 84 14 2 1.941176
## [91,] 84 19 2 66.571429
## [92,] 87 18 1 3.151852
## [93,] 88 19 1 43.500000
## [94,] 89 19 1 43.500000
## [95,] 92 16 1 122.375000
## [96,] 92 19 1 43.500000
## [97,] 93 9 2 1.813559
## [98,] 93 10 2 1.598291
## [99,] 93 16 2 101.313725
## [100,] 93 19 2 66.571429
## [101,] 94 16 2 101.313725
## [102,] 94 19 2 66.571429
## [103,] 98 19 2 66.571429
## [104,] 100 15 2 1.146218
## [105,] 100 16 2 101.313725
## [106,] 100 18 2 3.977679
## [107,] 100 19 2 66.571429
## [108,] 102 16 2 101.313725
## [109,] 102 18 2 3.977679
## [110,] 102 19 2 66.571429
## [111,] 106 16 2 101.313725
## [112,] 106 19 2 66.571429
## [113,] 107 9 1 1.888889
## [114,] 107 10 1 1.518519
## [115,] 107 19 1 43.500000
## [116,] 108 16 2 101.313725
## [117,] 108 18 2 3.977679
## [118,] 108 19 2 66.571429
## [119,] 111 19 2 66.571429
## [120,] 113 19 2 66.571429
## [121,] 114 19 2 66.571429
## [122,] 115 19 2 66.571429
## [123,] 116 18 2 3.977679
## [124,] 116 19 2 66.571429
## [125,] 117 16 2 101.313725
## [126,] 117 19 2 66.571429
## [127,] 119 9 1 1.888889
## [128,] 119 10 1 1.518519
## [129,] 119 15 1 2.543333
## [130,] 119 16 1 122.375000
## [131,] 119 17 1 99.833333
## [132,] 119 18 1 3.151852
## [133,] 119 19 1 43.500000
## [134,] 120 19 2 66.571429
## [135,] 121 19 1 43.500000
## [136,] 123 18 2 3.977679
## [137,] 123 19 2 66.571429
## [138,] 124 16 2 101.313725
## [139,] 124 19 2 66.571429
## [140,] 127 9 2 1.813559
## [141,] 127 10 2 1.598291
## [142,] 127 16 2 101.313725
## [143,] 127 19 2 66.571429
## [144,] 132 16 1 122.375000
## [145,] 132 19 1 43.500000
## [146,] 133 19 2 66.571429
## [147,] 137 16 2 101.313725
## [148,] 137 19 2 66.571429
## [149,] 141 19 2 66.571429
## [150,] 142 9 1 1.888889
## [151,] 142 10 1 1.518519
## [152,] 143 16 2 101.313725
## [153,] 145 16 1 122.375000
## [154,] 145 19 1 43.500000
## [155,] 147 19 1 43.500000
## [156,] 148 9 1 1.888889
## [157,] 148 10 1 1.518519
## [158,] 148 11 1 1.612903
## [159,] 148 12 1 1.290323
## [160,] 148 13 1 1.548387
## [161,] 148 14 1 1.645161
## [162,] 149 10 2 1.598291
## [163,] 149 19 2 66.571429
## [164,] 150 19 2 66.571429
## [165,] 151 16 1 122.375000
## [166,] 152 19 2 66.571429
## [167,] 153 19 2 66.571429
##
## Total number of imputations per class:
##
## 1 2
## 45 122
##
## Total number of imputations: 167
imputacjeknn <- ec.knnimp(hepatitis, k=10)
library(rms)
replikacje <- aregImpute(~V18+V12+V6, n.impute=100, data=hepatitis, pr=FALSE)
modelRepl <- fit.mult.impute(V18~V6+V12, ols, replikacje, data=hepatitis)
##
## Variance Inflation Factors Due to Imputation:
##
## Intercept V6 V12
## 1.12 1.08 1.15
##
## Rate of Missing Information:
##
## Intercept V6 V12
## 0.11 0.08 0.13
##
## d.f. for t-distribution for Tests of Single Coefficients:
##
## Intercept V6 V12
## 8927.47 17226.98 5657.04
##
## The following fit components were averaged over the 100 model fits:
##
## fitted.values stats linear.predictors
summary.lm(modelRepl)
##
## Call:
## fit.mult.impute(formula = V18 ~ V6 + V12, fitter = ols, xtrans = replikacje,
## data = hepatitis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7375 -0.2562 0.0438 0.3624 2.2438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Intercept 2.9095 0.1974 14.737 < 2e-16 ***
## V6 0.3046 0.1118 2.726 0.00717 **
## V12 0.3061 0.1133 2.703 0.00766 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6147 on 152 degrees of freedom
## Multiple R-squared: 0.1342, Adjusted R-squared: 0.1228
## F-statistic: 11.78 on 2 and 152 DF, p-value: 1.747e-05
modelBezRepl <- lm(V18~V6+V12, data=hepatitis)
summary(modelBezRepl)
##
## Call:
## lm(formula = V18 ~ V6 + V12, data = hepatitis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.72163 -0.27160 -0.00778 0.37837 2.27147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.8870 0.2047 14.104 < 2e-16 ***
## V6 0.3069 0.1167 2.630 0.00956 **
## V12 0.3139 0.1173 2.677 0.00838 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6003 on 132 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1469, Adjusted R-squared: 0.1339
## F-statistic: 11.36 on 2 and 132 DF, p-value: 2.8e-05
library(Przewodnik)
cov(daneSoc[,c(1,6,7)])
## wiek cisnienie.skurczowe cisnienie.rozkurczowe
## wiek 191.742176 -6.016662 -8.941249
## cisnienie.skurczowe -6.016662 246.903989 82.809186
## cisnienie.rozkurczowe -8.941249 82.809186 60.324616
cov(scale(daneSoc[,c(1,6,7)]))
## wiek cisnienie.skurczowe
## wiek 1.00000000 -0.02765239
## cisnienie.skurczowe -0.02765239 1.00000000
## cisnienie.rozkurczowe -0.08313656 0.67852707
## cisnienie.rozkurczowe
## wiek -0.08313656
## cisnienie.skurczowe 0.67852707
## cisnienie.rozkurczowe 1.00000000
wektor.sd <- apply(daneSoc[,c(1,6,7)], 2, sd)
head( sweep(daneSoc[,c(1,6,7)], 2, FUN= "/", wektor.sd) )
## wiek cisnienie.skurczowe cisnienie.rozkurczowe
## 1 5.055210 9.100641 10.686385
## 2 4.766341 7.827824 10.300130
## 3 5.127427 10.628021 10.300130
## 4 4.116385 9.546127 11.201391
## 5 3.249778 8.273310 10.686385
## 6 3.466430 8.782437 9.656372
head( apply(daneSoc[,c(1,6,7)], 2, function(x) x/sd(x)) )
## wiek cisnienie.skurczowe cisnienie.rozkurczowe
## [1,] 5.055210 9.100641 10.686385
## [2,] 4.766341 7.827824 10.300130
## [3,] 5.127427 10.628021 10.300130
## [4,] 4.116385 9.546127 11.201391
## [5,] 3.249778 8.273310 10.686385
## [6,] 3.466430 8.782437 9.656372
library("Przewodnik")
Lbox <- boxcox(daneO[,"VEGF"]~daneO[,"Rozmiar.guza"])
Lbox$x[which.max(Lbox$y)]
## [1] -0.02020202
Lbox <- boxcox(daneO[,"Wiek"]~daneO[,"Rozmiar.guza"])
Lbox$x[which.max(Lbox$y)]
## [1] 2
zmOryginalna <- c(0.1,0.8,0.5,0.2,0.9,0.7)
(zmZmieniona <- cut(zmOryginalna, c(0,0.33,0.66,1), c("niski", "sredni", "wysoki")))
## [1] niski wysoki sredni niski wysoki wysoki
## Levels: niski sredni wysoki
levels(zmZmieniona) <- c("niewysoki", "niewysoki", "wysoki")
zmZmieniona
## [1] niewysoki wysoki niewysoki niewysoki wysoki wysoki
## Levels: niewysoki wysoki
tDaneO <- transform(daneO, logVEGF = log(VEGF),
fNowo4wor = factor(Nowotwor), cutWiek = cut(Wiek,4))
head(tDaneO, 3)
## Wiek Rozmiar.guza Wezly.chlonne Nowotwor Receptory.estrogenowe
## 1 29 1 0 2 (-)
## 2 29 1 0 2 (++)
## 3 30 1 1 2 (-)
## Receptory.progesteronowe Niepowodzenia Okres.bez.wznowy VEGF logVEGF
## 1 (++) brak 22 914 6.817831
## 2 (++) brak 53 1118 7.019297
## 3 (+) brak 38 630 6.445720
## fNowo4wor cutWiek
## 1 2 (29,36]
## 2 2 (29,36]
## 3 2 (29,36]
library("Przewodnik")
summary(mieszkania)
## cena pokoi powierzchnia dzielnica
## Min. : 83280 Min. :1.00 Min. :17.00 Biskupin :65
## 1st Qu.:143304 1st Qu.:2.00 1st Qu.:31.15 Krzyki :79
## Median :174935 Median :3.00 Median :43.70 Srodmiescie:56
## Mean :175934 Mean :2.55 Mean :46.20
## 3rd Qu.:208741 3rd Qu.:3.00 3rd Qu.:61.40
## Max. :295762 Max. :4.00 Max. :87.70
## typ.budynku
## kamienica :61
## niski blok:63
## wiezowiec :76
##
##
##
(a1 <- anova(lm(cena~dzielnica, data = mieszkania)))
## Analysis of Variance Table
##
## Response: cena
## Df Sum Sq Mean Sq F value Pr(>F)
## dzielnica 2 1.7995e+10 8997691613 5.0456 0.007294 **
## Residuals 197 3.5130e+11 1783263361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(a2 <- anova(lm(cena~typ.budynku, data = mieszkania)))
## Analysis of Variance Table
##
## Response: cena
## Df Sum Sq Mean Sq F value Pr(>F)
## typ.budynku 2 2.2770e+10 1.1385e+10 6.4725 0.001895 **
## Residuals 197 3.4653e+11 1.7590e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
a1[1,5]
## [1] 0.007294371
a2[1,5]
## [1] 0.001894708
summary(model1 <- aov(cena~dzielnica, data = mieszkania))
## Df Sum Sq Mean Sq F value Pr(>F)
## dzielnica 2 1.800e+10 8.998e+09 5.046 0.00729 **
## Residuals 197 3.513e+11 1.783e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(model1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cena ~ dzielnica, data = mieszkania)
##
## $dzielnica
## diff lwr upr p adj
## Krzyki-Biskupin -21321.019 -38021.10 -4620.9333 0.0081457
## Srodmiescie-Biskupin -18350.541 -36532.88 -168.2053 0.0473579
## Srodmiescie-Krzyki 2970.478 -14450.28 20391.2340 0.9145465
plot(TukeyHSD(model1))
plot(cena ~ dzielnica, data = mieszkania)
library("agricolae")
df <- df.residual(model1)
MSerror <- deviance(model1)/df
HSD.test(mieszkania$cena, mieszkania$dzielnica, df, MSerror)
t(contr.treatment(5))
## 1 2 3 4 5
## 2 0 1 0 0 0
## 3 0 0 1 0 0
## 4 0 0 0 1 0
## 5 0 0 0 0 1
t(contr.helmert(5))
## 1 2 3 4 5
## [1,] -1 1 0 0 0
## [2,] -1 -1 2 0 0
## [3,] -1 -1 -1 3 0
## [4,] -1 -1 -1 -1 4
kontr <- contr.helmert(3)
model <- lm(cena~dzielnica, data=mieszkania,
contrasts = list(dzielnica=kontr))
summary(model)
##
## Call:
## lm(formula = cena ~ dzielnica, data = mieszkania, contrasts = list(dzielnica = kontr))
##
## Residuals:
## Min 1Q Median 3Q Max
## -84893 -31892 -880 29611 106268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 176270 3016 58.450 < 2e-16 ***
## dzielnica1 -10660 3536 -3.015 0.00291 **
## dzielnica2 -2563 2220 -1.155 0.24958
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42230 on 197 degrees of freedom
## Multiple R-squared: 0.04873, Adjusted R-squared: 0.03907
## F-statistic: 5.046 on 2 and 197 DF, p-value: 0.007294
kontr <- cbind(c(2, -1, -1), c(0, 1, -1))
model <- lm(cena~dzielnica, data=mieszkania,
contrasts = list(dzielnica=kontr))
summary(model)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 176270.141 3015.732 58.4501978 2.070588e-126
## dzielnica1 6611.927 2135.391 3.0963539 2.244773e-03
## dzielnica2 -1485.239 3688.392 -0.4026793 6.876208e-01
(a3 <- anova(lm(cena~dzielnica+typ.budynku, data = mieszkania)))
## Analysis of Variance Table
##
## Response: cena
## Df Sum Sq Mean Sq F value Pr(>F)
## dzielnica 2 1.7995e+10 8.9977e+09 5.3397 0.005524 **
## typ.budynku 2 2.2719e+10 1.1359e+10 6.7413 0.001476 **
## Residuals 195 3.2858e+11 1.6850e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
a3[1:2,5]
## [1] 0.005524071 0.001475884
plot.design(mieszkania[,c("dzielnica", "typ.budynku", "cena")])
interaction.plot(mieszkania$dzielnica, mieszkania$typ.budynku, mieszkania$cena, lwd=3)
a4 <- anova(lm(cena~dzielnica*typ.budynku, data = mieszkania))
a4 <- anova(lm(cena~(dzielnica+typ.budynku)^2, data = mieszkania))
(a4 <- anova(lm(cena~dzielnica+typ.budynku+dzielnica:typ.budynku, data = mieszkania)))
## Analysis of Variance Table
##
## Response: cena
## Df Sum Sq Mean Sq F value Pr(>F)
## dzielnica 2 1.7995e+10 8.9977e+09 5.2461 0.006054 **
## typ.budynku 2 2.2719e+10 1.1359e+10 6.6231 0.001656 **
## dzielnica:typ.budynku 4 9.9528e+08 2.4882e+08 0.1451 0.964995
## Residuals 191 3.2759e+11 1.7151e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(cena~dzielnica+typ.budynku, data = mieszkania))[1:2,5]
## [1] 0.005524071 0.001475884
anova(lm(cena~dzielnica, data = mieszkania))[1,5]
## [1] 0.007294371
model <- manova(cbind(cena,powierzchnia)~dzielnica+typ.budynku, data=mieszkania)
summary(model, test="Hotelling-Lawley")
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## dzielnica 2 0.86337 41.658 4 386 < 2.2e-16 ***
## typ.budynku 2 0.32249 15.560 4 386 8.198e-12 ***
## Residuals 195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelPP <- lm(cena~powierzchnia+pokoi, data = mieszkania)
modelPP$coefficients
## (Intercept) powierzchnia pokoi
## 82407.0883 2070.8966 -840.1008
modelPP$coeff
## (Intercept) powierzchnia pokoi
## 82407.0883 2070.8966 -840.1008
summary(modelPP)
##
## Call:
## lm(formula = cena ~ powierzchnia + pokoi, data = mieszkania)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39705 -9386 -863 9454 35097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 82407.1 2569.9 32.066 <2e-16 ***
## powierzchnia 2070.9 149.2 13.883 <2e-16 ***
## pokoi -840.1 2765.1 -0.304 0.762
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14110 on 197 degrees of freedom
## Multiple R-squared: 0.8937, Adjusted R-squared: 0.8926
## F-statistic: 828.3 on 2 and 197 DF, p-value: < 2.2e-16
podsumowanieModeluPP <- summary(modelPP)
podsumowanieModeluPP$coef[2:3,4]
## powierzchnia pokoi
## 5.241555e-31 7.615836e-01
podsumowanieModeluPP$coef[,1]
## (Intercept) powierzchnia pokoi
## 82407.0883 2070.8966 -840.1008
c(podsumowanieModeluPP$r.squared, podsumowanieModeluPP$adj.r.squared)
## [1] 0.8937227 0.8926437
summary(lm(cena~powierzchnia+pokoi+dzielnica, data = mieszkania))
##
## Call:
## lm(formula = cena ~ powierzchnia + pokoi + dzielnica, data = mieszkania)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30501 -8480 -144 7346 35730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94222.02 2320.36 40.607 < 2e-16 ***
## powierzchnia 2022.99 116.31 17.393 < 2e-16 ***
## pokoi 34.36 2157.52 0.016 0.987
## dzielnicaKrzyki -20934.86 1842.79 -11.360 < 2e-16 ***
## dzielnicaSrodmiescie -12722.60 2008.03 -6.336 1.6e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11000 on 195 degrees of freedom
## Multiple R-squared: 0.9362, Adjusted R-squared: 0.9348
## F-statistic: 714.8 on 4 and 195 DF, p-value: < 2.2e-16
modelDP <- lm(cena~powierzchnia+dzielnica, data = mieszkania)
ndane <- data.frame(powierzchnia=c(48,68), dzielnica=c("Biskupin", "Krzyki"))
predict(modelDP, newdata = ndane)
## 1 2
## 191415.9 210976.9
vif(lm(cena~powierzchnia+pokoi, data=mieszkania))
## powierzchnia pokoi
## 8.961022 8.961022
vif(lm(rnorm(200)~powierzchnia+pokoi+cena, data=mieszkania))
## powierzchnia pokoi cena
## 17.727848 8.965221 9.409346
library("Przewodnik")
modelNV <- glm(Niepowodzenia~Nowotwor+log(VEGF), data=daneO, family="binomial")
summary(modelNV)
##
## Call:
## glm(formula = Niepowodzenia ~ Nowotwor + log(VEGF), family = "binomial",
## data = daneO)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3475 -0.4597 -0.2263 -0.1132 2.3991
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.3914 4.3757 -3.975 7.05e-05 ***
## Nowotwor 2.2586 0.7657 2.950 0.00318 **
## log(VEGF) 1.3293 0.4250 3.128 0.00176 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 73.052 on 85 degrees of freedom
## Residual deviance: 47.152 on 83 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 53.152
##
## Number of Fisher Scoring iterations: 6
modelN <- glm(Niepowodzenia~Nowotwor, daneO, family="binomial")
ndaneO <- data.frame(Nowotwor=c(1, 2, 3))
predict(modelN, ndaneO, type = "response")
## 1 2 3
## 0.01125905 0.07249761 0.34918513
cdplot(daneO$VEGF, daneO$Niepowodzenia, bw=3000)
cdplot(mieszkania$cena, mieszkania$typ.budynku)
library(pscl)
modelN <- glm(Niepowodzenia~Nowotwor, daneO, family="binomial")
pR2(modelN)
## llh llhNull G2 McFadden r2ML r2CU
## -31.0351909 -38.2140122 14.3576426 0.1878584 0.1537575 0.2611330
x <- runif(100)*3
y <- x^2.5-5+rnorm(100,0,3)
model <- nls(y ~ x^a - b, start = list(a = 2, b = 2))
summary(model)
##
## Formula: y ~ x^a - b
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 2.57256 0.05526 46.55 <2e-16 ***
## b 5.67171 0.35765 15.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.867 on 98 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 1.33e-06
mojModel <- function(a,b,x) sin(a+b*x)
x <- runif(100)*6
y <- mojModel(pi/2,2,x)+rnorm(100,0,1)
model <- nls(y~mojModel(a,b,x), start=list(a=2,b=2), algorithm="plinear")
summary(model)
##
## Formula: y ~ mojModel(a, b, x)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 1.48396 0.31354 4.733 7.53e-06 ***
## b 2.03427 0.09304 21.864 < 2e-16 ***
## .lin 0.97579 0.16210 6.020 3.11e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.134 on 97 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 2.721e-06
library(goftest)
x <- rnorm(100, 10, 1.4)
cvm.test(x)
##
## Cramer-von Mises test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: x
## omega2 = 33.333, p-value < 2.2e-16
(pwynik <- cvm.test(x)$p.value)
## [1] 0
library(car)
y <- rnorm(100)
qqnorm(y)
qqline(y, col = "red")
qqPlot(y)
przedzialy <- seq(0,1,0.2)
(x <- table(cut(runif(100), przedzialy)))
##
## (0,0.2] (0.2,0.4] (0.4,0.6] (0.6,0.8] (0.8,1]
## 19 23 15 23 20
chisq.test(x)
##
## Chi-squared test for given probabilities
##
## data: x
## X-squared = 2.2, df = 4, p-value = 0.699
(x <- table(cut(rbeta(100,3,2), przedzialy)))
##
## (0,0.2] (0.2,0.4] (0.4,0.6] (0.6,0.8] (0.8,1]
## 4 18 28 31 19
chisq.test(x)
##
## Chi-squared test for given probabilities
##
## data: x
## X-squared = 22.3, df = 4, p-value = 0.0001747
x <- runif(100)
ks.test(x, "punif")
##
## One-sample Kolmogorov-Smirnov test
##
## data: x
## D = 0.10216, p-value = 0.2475
## alternative hypothesis: two-sided
ks.test(x, "pnorm", 1, 1)
##
## One-sample Kolmogorov-Smirnov test
##
## data: x
## D = 0.50278, p-value < 2.2e-16
## alternative hypothesis: two-sided
x <- runif(100); y <- rnorm(100)
ks.test(x, y)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: x and y
## D = 0.46, p-value = 1.292e-09
## alternative hypothesis: two-sided
x <- rexp(100)
y <- rnorm(500)
qqplot(x, y)
x <- rnorm(50)
y <- x + 0.3 + rnorm(50,0,0.01)
t.test(y, mu=0)
##
## One Sample t-test
##
## data: y
## t = 2.4908, df = 49, p-value = 0.01618
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.07394866 0.69158208
## sample estimates:
## mean of x
## 0.3827654
t.test(x, y)$p.value
## [1] 0.1717698
t.test(x, y, paired=TRUE)
##
## Paired t-test
##
## data: x and y
## t = -199.33, df = 49, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3023476 -0.2963122
## sample estimates:
## mean of the differences
## -0.2993299
t.test(x, y, paired=T)$p.value
## [1] 5.921943e-73
wilcox.test(y, mu=0)$p.value
## [1] 0.02389165
wilcox.test(x, y)$p.value
## [1] 0.1337684
x <- rnorm(20,2,1)
y <- rnorm(20,2,2)
z <- rnorm(20,2,3)
var.test(x,y)
##
## F test to compare two variances
##
## data: x and y
## F = 0.23729, num df = 19, denom df = 19, p-value = 0.002921
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.0939210 0.5994935
## sample estimates:
## ratio of variances
## 0.2372868
ansari.test(x,y)$p.value
## [1] 0.0008723042
bartlett.test(list(x,y,z))$p.value
## [1] 1.746499e-05
mood.test(x,y)$p.value
## [1] 0.001481204
grupy <- gl(3,20); wart <- c(x,y,z)
fligner.test(wart~grupy, data.frame(wart, grupy))$p.value
## [1] 8.487498e-06
library(car)
leveneTest(wart,grupy)[1,3]
## [1] 2.91234e-06
(wtestu <- prop.test(594, 1234, p=0.5))
##
## 1-sample proportions test with continuity correction
##
## data: 594 out of 1234, null probability 0.5
## X-squared = 1.641, df = 1, p-value = 0.2002
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.4531816 0.5096586
## sample estimates:
## p
## 0.4813614
wtestu$conf.int
## [1] 0.4531816 0.5096586
## attr(,"conf.level")
## [1] 0.95
x <- c(11,120,1300,14000,150000)
n <- c(100,1000,10000,100000,1000000)
x/n
## [1] 0.11 0.12 0.13 0.14 0.15
pairwise.prop.test(x, n, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Pairwise comparison of proportions
##
## data: x out of n
##
## 1 2 3 4
## 2 1.000 - - -
## 3 1.000 1.000 - -
## 4 1.000 0.769 0.061 -
## 5 1.000 0.090 2.7e-07 2.7e-16
##
## P value adjustment method: bonferroni
set.seed(227)
x <- rlnorm(50)
y <- x + rlnorm(50)
cor.test(x, y, method="pearson")
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 11.924, df = 48, p-value = 5.876e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7721256 0.9212688
## sample estimates:
## cor
## 0.8646445
cor.test(x, y, method="spearman")
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 5348, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.7431933
get.z <- function(rho) log((1+rho)/(1-rho))/2
rho0.test <- function(rho, n, rho0=0, alternative="two.sided") {
p.raw <- pnorm((get.z(rho) - get.z(rho0))/sqrt(1/(n-3)))
if (alternative=="two.sided") return(1 - 2*abs(0.5-p.raw))
if (alternative=="less") return(p.raw)
if (alternative=="greater") return(1-p.raw)
}
rho.rho.test <- function(rho1,n1, rho2,n2, alternative="two.sided"){
p.raw <- pnorm((get.z(rho1) - get.z(rho2))/sqrt(1/(n1-3)+1/(n2-3)))
if (alternative=="two.sided") return(1 - 2*abs(0.5 - p.raw))
if (alternative=="less") return(p.raw)
if (alternative=="greater") return(1-p.raw)
}
tabela <- table(daneSoc$plec, daneSoc$praca)
tabela
##
## nie pracuje uczen lub pracuje
## kobieta 7 48
## mezczyzna 45 104
chiwynik <- chisq.test(tabela)
chiwynik
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tabela
## X-squared = 5.5711, df = 1, p-value = 0.01826
chiwynik$expected
##
## nie pracuje uczen lub pracuje
## kobieta 14.01961 40.98039
## mezczyzna 37.98039 111.01961
fisher.test(tabela)
##
## Fisher's Exact Test for Count Data
##
## data: tabela
## p-value = 0.0112
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1199709 0.8308932
## sample estimates:
## odds ratio
## 0.3386347
mcnemar.test(tabela)
##
## McNemar's Chi-squared test with continuity correction
##
## data: tabela
## McNemar's chi-squared = 0.043011, df = 1, p-value = 0.8357
library(irr)
ocena1 <- factor(2 + trunc(runif(100)*4))
ocena2 <- factor(2 + trunc(runif(100)*4))
table(ocena1, ocena2)
## ocena2
## ocena1 2 3 4 5
## 2 11 4 10 7
## 3 8 5 4 9
## 4 5 5 6 8
## 5 2 7 5 4
kappa2(cbind(ocena1, ocena2))
## Cohen's Kappa for 2 Raters (Weights: unweighted)
##
## Subjects = 100
## Raters = 2
## Kappa = 0.0157
##
## z = 0.276
## p-value = 0.783
table(ocena1, ocena2)
## ocena2
## ocena1 2 3 4 5
## 2 11 4 10 7
## 3 8 5 4 9
## 4 5 5 6 8
## 5 2 7 5 4
kappa2(cbind(ocena1, ocena2), weight = "unweighted")
## Cohen's Kappa for 2 Raters (Weights: unweighted)
##
## Subjects = 100
## Raters = 2
## Kappa = 0.0157
##
## z = 0.276
## p-value = 0.783
kappa2(cbind(ocena1, ocena2), weight = "squared")
## Cohen's Kappa for 2 Raters (Weights: squared)
##
## Subjects = 100
## Raters = 2
## Kappa = 0.0815
##
## z = 0.839
## p-value = 0.401
pwart <- numeric(6)
for (i in 1:6)
pwart[i] <- t.test(rnorm(20),rnorm(20,1))$p.value
p.adjust(pwart, method=c("BH"))
## [1] 0.0063051452 0.0306423393 0.0148567296 0.0438430956 0.0004657299
## [6] 0.0252220850
N <- 10^5
getStat <- function(n=30, p=0.2) {
x <- rbinom(n,1,p)
y <- rbinom(n,1,p)
tab <- table(x,y)
chisq.test(tab)$statistic
}
st.testowa <- replicate(N, getStat())
plot(ecdf(st.testowa), xlim=c(0,5))
lines(sort(st.testowa), pchisq(sort(st.testowa),1), col="red", lwd=3)
x <- kidney[kidney$recipient.age < 40, "MDRD30"]
y <- kidney[kidney$recipient.age >= 40, "MDRD30"]
n <- length(x); m <- length(y); B <- 999
(theta <- ks.test(x,y)$statistic)
## D
## 0.219922
thetastar <- replicate(B, {
zstar <- sample(c(x,y))
xstar <- zstar[1:n]
ystar <- zstar[(1:m)+n]
ks.test(xstar, ystar)$statistic
})
(sum(thetastar >= theta) + 1) / (B+1)
## [1] 0.001
library(boot)
srednia <- function(x,w) sum(x*w)
wynikBoot <- boot(daneO$VEGF, srednia, R=999, stype="w")
quantile(wynikBoot$t, c(0.05/2, 1 - 0.05/2))
## 2.5% 97.5%
## 1958.080 3389.398
plot(wynikBoot)
mean(daneO$VEGF)
## [1] 2626.608
boot.ci(wynikBoot, conf = 0.95)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 999 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = wynikBoot, conf = 0.95)
##
## Intervals :
## Level Normal Basic
## 95% (1922, 3324 ) (1861, 3300 )
##
## Level Percentile BCa
## 95% (1953, 3392 ) (2039, 3491 )
## Calculations and Intervals on Original Scale
VEGF.stat <- function(data) {
mean(data)
}
VEGF.sim <- function(data, mle) {
rnorm(length(data), mean = mle[1], sd = mle[2])
}
VEGF.mle <- c(mean(daneO$VEGF), sd(daneO$VEGF))
bootParametryczny <- boot(daneO$VEGF, statistic = VEGF.stat, R = 999, sim = "parametric", ran.gen = VEGF.sim, mle = VEGF.mle)
plot(bootParametryczny)
boot.ci(bootParametryczny, type = "perc", conf = 0.95)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 999 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = bootParametryczny, conf = 0.95, type = "perc")
##
## Intervals :
## Level Percentile
## 95% (1937, 3327 )
## Calculations and Intervals on Original Scale
statystykaT <- function(x,i) (mean(x[i]) - mean(x))/sd(x[i])
boostrapowyTestT <- function(wektor, mu0=0) {
# wyznaczamy 999 replikacji i liczymy wartosci statystyki
wartosciStatystykiT <- boot(wektor,statystykaT,R=999,stype="i")
# wyznaczamy dystrybuante dla rozkladu statystyki T
dystrybuantaStatystykiT <- ecdf(wartosciStatystykiT$t)
T <- mean(wektor - mu0)/sd(wektor)
# wyznaczamy kwantyl odpowiadajacy wartosci statystyki T w rozkladzie statystyki testowej dla replikacji bootstrpowych
kwantyl <- dystrybuantaStatystykiT(T)
# wyznaczamy p-wartosc dla testu dwustronnego
1 - 2*abs(kwantyl-0.5)
}
boostrapowyTestT(daneO$VEGF, 3000)
## [1] 0.4304304
boostrapowyTestT(daneO$VEGF, 4000)
## [1] 0.02602603
library("Przewodnik")
czasy <- with(daneO, Surv(Okres.bez.wznowy,Niepowodzenia=="wznowa"))
czasy[1:10]
## [1] 22+ 53+ 38+ 26+ 19+ 36 33+ 38+ 38 37+
library(survival)
krzywaKM <- survfit(czasy~1)
summary(krzywaKM)
## Call: survfit(formula = czasy ~ 1)
##
## 1 observation deleted due to missingness
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 10 96 1 0.990 0.0104 0.969 1.000
## 16 95 1 0.979 0.0146 0.951 1.000
## 19 93 1 0.969 0.0178 0.934 1.000
## 21 90 1 0.958 0.0206 0.918 0.999
## 29 80 1 0.946 0.0236 0.901 0.993
## 30 77 1 0.934 0.0263 0.884 0.987
## 36 60 1 0.918 0.0301 0.861 0.979
## 38 51 1 0.900 0.0345 0.835 0.970
## 40 40 1 0.878 0.0403 0.802 0.960
## 41 39 1 0.855 0.0451 0.771 0.948
## 43 30 1 0.827 0.0518 0.731 0.935
## 48 15 1 0.771 0.0719 0.643 0.926
## 50 9 1 0.686 0.1030 0.511 0.921
plot(krzywaKM)
plot(survfit(czasy~daneO$Nowotwor))
survdiff(czasy~daneO$Nowotwor)
## Call:
## survdiff(formula = czasy ~ daneO$Nowotwor)
##
## n=86, 11 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## daneO$Nowotwor=1 7 0 0.913 0.913 0.991
## daneO$Nowotwor=2 53 4 8.532 2.408 7.091
## daneO$Nowotwor=3 26 9 3.554 8.344 11.552
##
## Chisq= 11.7 on 2 degrees of freedom, p= 0.00285
fit <- coxph(czasy~Nowotwor+Wiek, data=daneO)
(fit2 <- cox.zph(fit))
## rho chisq p
## Nowotwor 0.2266 0.692 0.406
## Wiek 0.0862 0.102 0.750
## GLOBAL NA 0.821 0.663
summary(fit)
## Call:
## coxph(formula = czasy ~ Nowotwor + Wiek, data = daneO)
##
## n= 86, number of events= 13
## (11 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Nowotwor 1.75372 5.77607 0.59566 2.944 0.00324 **
## Wiek -0.05632 0.94524 0.04340 -1.297 0.19447
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Nowotwor 5.7761 0.1731 1.7973 18.563
## Wiek 0.9452 1.0579 0.8682 1.029
##
## Concordance= 0.747 (se = 0.091 )
## Rsquare= 0.132 (max possible= 0.683 )
## Likelihood ratio test= 12.17 on 2 df, p=0.002282
## Wald test = 10 on 2 df, p=0.006722
## Score (logrank) test = 11.62 on 2 df, p=0.003004
library("partykit")
library("ipred")
library("TH.data")
data("GBSG2", package = "ipred")
GBSG2ct <- ctree(Surv(time, cens) ~ .,data = GBSG2)
plot(GBSG2ct)
x <- 1:10
y <- 5:15
setdiff(union(x,y), intersect(x,y))
## [1] 1 2 3 4 11 12 13 14 15
setequal(x, setdiff(x, setdiff(y,x)))
## [1] TRUE
is.element(3, x)
## [1] TRUE
1-exp(0.1^15)
## [1] -1.110223e-15
expm1(0.1^15)
## [1] 1e-15
log(1+0.1^20)
## [1] 0
log1p(0.1^20)
## [1] 1e-20
library(orthopolynom)
(p1 <- polynomial(c(2,0,1)))
## 2 + x^2
(p2 <- polynomial(c(2,2,1,1)))
## 2 + 2*x + x^2 + x^3
p1 + p2
## 4 + 2*x + 2*x^2 + x^3
p1 * p2
## 4 + 4*x + 4*x^2 + 4*x^3 + x^4 + x^5
integral(p1,c(0,1))
## [1] 2.333333
deriv(p2)
## 2 + 2*x + 3*x^2
poly.calc(c(-1,1))
## -1 + x^2
poly.calc(c(0,2,4), c(3,2,3))
## 3 - x + 0.25*x^2
solve(p2)
## [1] -1+0.000000i 0-1.414214i 0+1.414214i
LCM(p1,p2)
## 2 + 2*x + x^2 + x^3
GCD(p1,p2)
## 2 + x^2
library(orthopolynom)
Nmax <- 5
(wielomiany <- slegendre.polynomials(Nmax, normalized=TRUE))
## [[1]]
## 1
##
## [[2]]
## -1.732051 + 3.464102*x
##
## [[3]]
## 2.236068 - 13.41641*x + 13.41641*x^2
##
## [[4]]
## -2.645751 + 31.74902*x - 79.37254*x^2 + 52.91503*x^3
##
## [[5]]
## 3 - 60*x + 270*x^2 - 420*x^3 + 210*x^4
##
## [[6]]
## -3.316625 + 99.49874*x - 696.4912*x^2 + 1857.31*x^3 - 2089.474*x^4 +
## 835.7894*x^5
wielomian4 <- as.function(wielomiany[[4]])
wielomian4(c(0, 0.5, 1))
## [1] -2.645751e+00 1.776357e-15 2.645751e+00
curve(wielomian4, 0, 1, lwd=3, lty=4, ylab="")
for (i in 1:Nmax) {
wielomian <- as.function(wielomiany[[i]])
curve(wielomian, 0, 1, add=TRUE, lwd=3, lty=i)
}
f <- function(x, wyznacznik) {(x - 7)^wyznacznik - x}
optimize(f, interval=c(-1,10), wyznacznik = 2)
## $minimum
## [1] 7.5
##
## $objective
## [1] -7.25
uniroot(f, interval=c(-1,10), wyznacznik = 2)
## $root
## [1] 4.807418
##
## $f.root
## [1] -4.69523e-06
##
## $iter
## [1] 8
##
## $init.it
## [1] NA
##
## $estim.prec
## [1] 6.103516e-05
D(expression(3*(x-2)^2-15), "x")
## 3 * (2 * (x - 2))
(wyraz <- deriv(~(x-2)^2+15,"x"))
## expression({
## .expr1 <- x - 2
## .value <- .expr1^2 + 15
## .grad <- array(0, c(length(.value), 1L), list(NULL, c("x")))
## .grad[, "x"] <- 2 * .expr1
## attr(.value, "gradient") <- .grad
## .value
## })
x <- 1:3
eval(wyraz)
## [1] 16 15 16
## attr(,"gradient")
## x
## [1,] -2
## [2,] 0
## [3,] 2
integrate(dnorm, 0, Inf)
## 0.5 with absolute error < 4.7e-05
integrate(function(x) sin(x)^2, 0, 600)
## 300.0221 with absolute error < 0.0087
# integrate(function(x) sin(x)^2, 0, Inf)