Overview

library(ggplot2)
library(reshape)
library(gplots)
library(edgeR)
library(CHBUtils)
project_summary = "/Volumes/Clotho/Users/rory/cache/bcbio.rnaseq/resources/project/project-summary.csv"
counts_file = "/Volumes/Clotho/Users/rory/cache/bcbio.rnaseq/resources/project/combined.counts"
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", 
    "#D55E00", "#CC79A7")
summarydata = read.table(project_summary, header = TRUE, sep = ",")
rownames(summarydata) = summarydata$Name
summarydata = summarydata[order(rownames(summarydata)), ]
counts = read.table(counts_file, header = TRUE, row.names = "id")
counts = counts[, order(colnames(counts))]

Quality control metrics

Mapped reads

ggplot(summarydata, aes(x = Name, y = Mapped)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("mapped reads") + xlab("")

Genomic mapping rate

ggplot(summarydata, aes(x = Name, y = Mapping.Rate)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("mapping rate") + xlab("")

Number of genes detected

ggplot(summarydata, aes(x = Name, y = Genes.Detected)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("genes detected") + 
    xlab("")

Exonic mapping rate

ggplot(summarydata, aes(x = Name, y = Exonic.Rate)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("exonic mapping rate") + 
    xlab("")

rRNA mapping rate

ggplot(summarydata, aes(x = Name, y = rRNA.rate)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("rRNA rate") + xlab("")

Estimated fragment length of paired-end reads

ggplot(summarydata, aes(x = Name, y = Fragment.Length.Mean)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("fragment length") + 
    xlab("")

Boxplot of raw counts per gene

melted = melt(counts)
colnames(melted) = c("sample", "count")
melted$sample = factor(melted$sample)
melted$sample = reorder(melted$sample, colnames(counts))
melted$count = log(melted$count)
ggplot(melted, aes(x = sample, y = count)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1)) + xlab("")

Correlation heatmap of raw counts

p = ggheatmap(cor(counts))
ggheatmap.show(p)

Boxplot of TMM-normalized counts per gene

Trimmed mean of M-values (TMM) normalization is described here

Robinson, M. D., & Oshlack, A. (2010). A scaling normalization method for differential expression analysis of RNA-seq data. Genome Biology, 11(3). doi:10.1186/gb-2010-11-3-r25

y = DGEList(counts = counts)
y = calcNormFactors(y)
normalized_counts = cpm(y, normalized.lib.sizes = TRUE)
melted = melt(normalized_counts)
colnames(melted) = c("gene", "sample", "count")
melted$sample = factor(melted$sample)
melted$sample = reorder(melted$sample, colnames(counts))
melted$count = log(melted$count)
ggplot(melted, aes(x = sample, y = count)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1)) + xlab("")

Correlation heatmap of TMM-normalized counts

p = ggheatmap(cor(normalized_counts))
ggheatmap.show(p)

Correlation (Spearman) heatmap of TMM-normalized counts

p = ggheatmap(cor(normalized_counts, method = "spearman"))
ggheatmap.show(p)

MDS plot of TMM-normalized counts

mds(normalized_counts, k = length(colnames(normalized_counts)) - 1)

Heatmap of top 30 most expressed genes

select = order(rowMeans(counts), decreasing = TRUE)[1:30]
p = ggheatmap(as.matrix(counts[select, ]))
ggheatmap.show(p)

library(DESeq2)
library(vsn)
design = ~panel
condition = "panel"
dataset = "hsapiens_gene_ensembl"
filter = "ensembl_gene_id"
symbol = "hgnc_symbol"

Differential expression

dds = DESeqDataSetFromMatrix(countData = counts, colData = summarydata, design = design)
dds = DESeq(dds)
contrasts = combn(levels(summarydata[, condition]), 2, simplify = FALSE)

Effect of variance stabilization

par(mfrow = c(1, 3))
notAllZero <- (rowSums(counts(dds)) > 0)
rld <- rlog(dds)
vsd <- varianceStabilizingTransformation(dds)
rlogMat <- assay(rld)
vstMat <- assay(vsd)

meanSdPlot(log2(counts(dds, normalized = TRUE)[notAllZero, ] + 1), ylim = c(0, 
    2.5))
meanSdPlot(assay(rld[notAllZero, ]), ylim = c(0, 2.5))
meanSdPlot(assay(vsd[notAllZero, ]), ylim = c(0, 2.5))

Dispersion estimates

plotDispEsts(dds)

handle_deseq2 = function(dds, summarydata, column) {
    all_combs = combn(levels(summarydata[, column]), 2, simplify = FALSE)
    all_results = list()
    contrast_strings = list()
    for (comb in all_combs) {
        contrast_string = paste(comb, collapse = " vs ")
        contrast = c(column, comb)
        res = results(dds, contrast = contrast)
        res = res[order(res$padj), ]
        all_results = c(all_results, res)
        contrast_strings = c(contrast_strings, contrast_string)
    }
    names(all_results) = contrast_strings
    return(all_results)
}

annotate_df = function(df, dataset, filter, symbol) { require(biomaRt) ensembl = useMart('ensembl', dataset = dataset) annot.df = getBM(attributes=c(filter, symbol), filters=c(filter), values=rownames(df), mart=ensembl) m = merge(df, annot.df, by.x="row.names", by.y=filter) colnames(m)[1] = "id" return(m) }

MA-plots

all_results = handle_deseq2(dds, summarydata, condition)
for (i in seq(length(all_results))) {
    plotMA(all_results[[i]])
    title(paste("MA plot for contrast", names(all_results)[i]))
}

Differentially expressed genes

for (i in seq(length(all_results))) {
    cat(paste("Lowest adjusted p-value hits for", names(all_results)[i]))
    # out_df = annotate_df(data.frame(all_results[[i]]), dataset, filter,
    # symbol)
    out_df = all_results[[i]]
    knitr::kable(head(out_df))
    write.table(out_df, file = paste(names(all_results)[i], ".tsv", sep = ""), 
        quote = FALSE, sep = "\t", row.names = TRUE, col.names = TRUE)
    cat("\n")
}

Lowest adjusted p-value hits for HBRR vs UHRR

	baseMean	log2FoldChange	lfcSE	stat
AAK1	1633.3	2.629	0.0525	50.02
ABAT	2156.1	3.732	0.0699	53.42
ABLIM1	4101.2	3.199	0.0406	78.78
ACSL6	1178.5	6.862	0.1514	45.33
ADAM11	920.7	4.782	0.1034	46.23
ADAM22	1576.4	4.372	0.0727	60.17