######One-way ANOVA ###### ###### ###### ###### load tidyr library and import protein data library(tidyr) proteinData <- read.csv("protein-expression.csv") head(proteinData) dim(proteinData) summary(proteinData) ###### Show the distributions library(beeswarm) library(RColorBrewer) boxplot(proteinData,xlab="Cell Type",ylab="Protein Expression",main="Protein Expression") beeswarm(proteinData, add=TRUE,pch=16,col=brewer.pal(5,"Set1"),method="swarm") ###### Use a histogram par(mfrow=c(2,3)) cols <- brewer.pal(5,"Set1") hist(proteinData$A,xlab="A",col=cols[1],main="") hist(proteinData$B,xlab="B",col=cols[2],main="") hist(proteinData$C,xlab="C",col=cols[3],main="") hist(proteinData$D,xlab="D",col=cols[4],main="") hist(proteinData$E,xlab="E",col=cols[5],main="") ###### Log-transform proteinData.ln <- log(proteinData) head(proteinData.ln) ###### check histograms again par(mfrow=c(2,3)) hist(proteinData.ln$A,xlab="A",col=cols[1],main="") hist(proteinData.ln$B,xlab="B",col=cols[2],main="") hist(proteinData.ln$C,xlab="C",col=cols[3],main="") hist(proteinData.ln$D,xlab="D",col=cols[4],main="") hist(proteinData.ln$E,xlab="E",col=cols[5],main="") ###### fit the model and check anovaData <- gather(proteinData.ln) head(anovaData) mod <- aov(value~ key, data=anovaData) mod par(mfrow=c(2,2)) plot(mod) summary(aov(mod)) ###### barlett test for variances bt <- bartlett.test(value~key,data=anovaData) bt ###### post-hoc tests post.tests<- TukeyHSD(mod) post.tests ###### Kruskal-Wallis test ###### ###### Input headache data by-hand ###### headache <- matrix(c(62,74,86,74,91,37, 69,43,100,94,100,98, 50,-120,100,-288,4,-76), ncol=3) colnames(headache) <- c("Relaxation / response feedback","Relaxation alone","Untreated") par(mfrow=c(1,1)) boxplot(headache) ###### tidy the data library(tidyr) headache <- data.frame(headache) headache <- gather(headache) ###### do the kruskal-wallis test and check the output kt <- kruskal.test(value~key,data=headache) kt names(kt) kt$statistic kt$p.value ###### ###### Friedman’s test ###### ###### input the example data by-hand and visualise ###### rubinPeters <- data.frame(Person = 1:4, Before=c(22.2,17,14.1,17), hrs48 = c(5.4,6.3,8.5,10.7), mnths6 = c(10.6,6.2,9.3,12.3)) boxplot(rubinPeters[,2:4]) beeswarm(rubinPeters[,2:4],add=TRUE) ###### do the friedman's test fr <- friedman.test(as.matrix(rubinPeters[,-1])) fr ###### ###### Median Test ###### ###### ###### ###### RVAideMemoire required for median test library(RVAideMemoire) ###### input data by-hand mmse <- data.frame(Group1=c(19,7,17,28,21,6,21,19,27,8,25), Group2 = c(16,22,30,24,22,23,22,28,29,29,0), Group3 = c(4,9,30,29,25,22,25,26,27,18,10) ) ###### tidy the data mmse <- gather(mmse) ###### do the median test mood.medtest(value~key,data=mmse,exact=FALSE) mood.medtest(value~key,data=mmse) ###### Jonckheere-Terpstra Test ###### ###### ###### ###### clinfun is required for J-T test library(clinfun) grade <- data.frame(Grade1 = c(1.99,3.01,4.17,7.13,9.82,9.91,NA,NA), Grade2 = c(4.40,9.82,10.23,11.99,11.99,13.17,13.20,NA), Grade3 =c(6.94,8.04,9.82,15.75,18.30,25.01,26.40,28.17)) ###### tidy the data grade <- gather(grade) grade$key <- as.numeric(gsub("Grade","",grade$key)) ###### do the J-T test jonckheere.test(grade$value,grade$key) ###### Linear Regression ###### ###### ###### ###### example data is in the csv file data <- read.csv("lactoferrin.csv") data ###### scatter plot plot(data$conc, data$growth, pch=16, xlab="Concentration", ylab="Growth rate") ###### fit a linear model model <- lm(growth ~ conc, data=data) model ###### overlay the model on the scatter plot plot(data, pch=16, xlab="Concentration", ylab="Growth rate") abline(model) ###### fitted <- fitted(model) fitted ###### residuals <- data$growth - fitted residuals ###### coefficients(model) residuals(model) names(model) model$coefficients model$residuals ###### mean(data$conc) mean(data$growth) model$coefficients[1] + model$coefficients[2] * mean(data$conc) ###### Calculating standard errors in the regression parameters SSE <- sum(residuals^2) ###### mean_growth <- mean(data$growth) mean_growth SSY <- sum((data$growth - mean_growth)^2) SSY SSR <- sum((fitted - mean_growth)^2) SSR SSE <- SSY - SSR SSE ###### error_variance <- SSE / (length(data$growth) - 2) error_variance F_ratio <- SSR / error_variance F_ratio ###### Summarizing the linear model summary(model) ###### summary <- summary(model) names(summary) summary$fstatistic coefficients(summary) summary$coefficients summary$coefficients[1,2] ###### summary$sigma sqrt(error_variance) ###### Confidence intervals for the model parameters confint(model, level = 0.95) ###### Measuring the degree of fit r_squared <- SSR / SSY r_squared ###### cor.test(data$growth, data$conc) cor(data$growth, data$conc) ^ 2 ###### Checking the model assumptions par(mfrow=c(2,2)) plot(model) ###### Using the model for prediction concentrations <- data.frame(conc = c(2.5, 6.5, 12.0)) predicted_growth <- predict(model, concentrations) predicted_growth ###### Beyond Simple Linear Regression ###### ###### data <- read.csv("decay.csv") x <- data$x y <- data$y plot(x, y, pch=16, xlim=c(0, 33), ylim=c(0, 155)) model <- lm(y ~ x) abline(model) ###### Polynomial regression plot(model, which=c(1)) quadratic <- lm(y ~ x + I(x^2)) summary(quadratic) ###### linear <- lm(y ~ x) anova(quadratic, linear) ###### plot(x, y, pch=16, xlim=c(0, 39), ylim=c(0, 155)) xv <- seq(0, 40, 0.1) yv <- predict(quadratic, data.frame(x = xv)) lines(xv, yv) ###### Linearizing the model by transformation exponential <- lm(log(y) ~ x) summary(exponential)