Overview

library(ggplot2)
library(reshape)
library(gplots)
library(edgeR)
library(CHBUtils)
project_summary = "/Volumes/Clotho/Users/rory/cache/bcbio.rnaseq/resources/project/project-summary.csv"
counts_file = "/Volumes/Clotho/Users/rory/cache/bcbio.rnaseq/resources/project/combined.counts"
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", 
    "#D55E00", "#CC79A7")
summarydata = read.table(project_summary, header = TRUE, sep = ",")
rownames(summarydata) = summarydata$Name
summarydata = summarydata[order(rownames(summarydata)), ]
counts = read.table(counts_file, header = TRUE, row.names = "id")
counts = counts[, order(colnames(counts))]

Quality control metrics

Mapped reads

ggplot(summarydata, aes(x = Name, y = Mapped)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("mapped reads") + xlab("")

Genomic mapping rate

ggplot(summarydata, aes(x = Name, y = Mapping.Rate)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("mapping rate") + xlab("")

Number of genes detected

ggplot(summarydata, aes(x = Name, y = Genes.Detected)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("genes detected") + 
    xlab("")

Exonic mapping rate

ggplot(summarydata, aes(x = Name, y = Exonic.Rate)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("exonic mapping rate") + 
    xlab("")

rRNA mapping rate

ggplot(summarydata, aes(x = Name, y = rRNA.rate)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("rRNA rate") + xlab("")

Estimated fragment length of paired-end reads

ggplot(summarydata, aes(x = Name, y = Fragment.Length.Mean)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90)) + ylab("fragment length") + 
    xlab("")

Boxplot of raw counts per gene

melted = melt(counts)
colnames(melted) = c("sample", "count")
melted$sample = factor(melted$sample)
melted$sample = reorder(melted$sample, colnames(counts))
melted$count = log(melted$count)
ggplot(melted, aes(x = sample, y = count)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1)) + xlab("")

Correlation heatmap of raw counts

p = ggheatmap(cor(counts))
ggheatmap.show(p)

Boxplot of TMM-normalized counts per gene

Trimmed mean of M-values (TMM) normalization is described here

Robinson, M. D., & Oshlack, A. (2010). A scaling normalization method for differential expression analysis of RNA-seq data. Genome Biology, 11(3). doi:10.1186/gb-2010-11-3-r25

y = DGEList(counts = counts)
y = calcNormFactors(y)
normalized_counts = cpm(y, normalized.lib.sizes = TRUE)
melted = melt(normalized_counts)
colnames(melted) = c("gene", "sample", "count")
melted$sample = factor(melted$sample)
melted$sample = reorder(melted$sample, colnames(counts))
melted$count = log(melted$count)
ggplot(melted, aes(x = sample, y = count)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1)) + xlab("")

Correlation heatmap of TMM-normalized counts

p = ggheatmap(cor(normalized_counts))
ggheatmap.show(p)

Correlation (Spearman) heatmap of TMM-normalized counts

p = ggheatmap(cor(normalized_counts, method = "spearman"))
ggheatmap.show(p)

MDS plot of TMM-normalized counts

mds(normalized_counts, k = length(colnames(normalized_counts)) - 1)

Heatmap of top 30 most expressed genes

select = order(rowMeans(counts), decreasing = TRUE)[1:30]
p = ggheatmap(as.matrix(counts[select, ]))
ggheatmap.show(p)
library(DESeq2)
library(vsn)
design = ~panel
condition = "panel"
dataset = "hsapiens_gene_ensembl"
filter = "ensembl_gene_id"
symbol = "hgnc_symbol"

Differential expression

dds = DESeqDataSetFromMatrix(countData = counts, colData = summarydata, design = design)
dds = DESeq(dds)
contrasts = combn(levels(summarydata[, condition]), 2, simplify = FALSE)

Effect of variance stabilization

par(mfrow = c(1, 3))
notAllZero <- (rowSums(counts(dds)) > 0)
rld <- rlog(dds)
vsd <- varianceStabilizingTransformation(dds)
rlogMat <- assay(rld)
vstMat <- assay(vsd)

meanSdPlot(log2(counts(dds, normalized = TRUE)[notAllZero, ] + 1), ylim = c(0, 
    2.5))
meanSdPlot(assay(rld[notAllZero, ]), ylim = c(0, 2.5))
meanSdPlot(assay(vsd[notAllZero, ]), ylim = c(0, 2.5))

Dispersion estimates

plotDispEsts(dds)
handle_deseq2 = function(dds, summarydata, column) {
    all_combs = combn(levels(summarydata[, column]), 2, simplify = FALSE)
    all_results = list()
    contrast_strings = list()
    for (comb in all_combs) {
        contrast_string = paste(comb, collapse = " vs ")
        contrast = c(column, comb)
        res = results(dds, contrast = contrast)
        res = res[order(res$padj), ]
        all_results = c(all_results, res)
        contrast_strings = c(contrast_strings, contrast_string)
    }
    names(all_results) = contrast_strings
    return(all_results)
}

annotate_df = function(df, dataset, filter, symbol) { require(biomaRt) ensembl = useMart('ensembl', dataset = dataset) annot.df = getBM(attributes=c(filter, symbol), filters=c(filter), values=rownames(df), mart=ensembl) m = merge(df, annot.df, by.x="row.names", by.y=filter) colnames(m)[1] = "id" return(m) }

MA-plots

all_results = handle_deseq2(dds, summarydata, condition)
for (i in seq(length(all_results))) {
    plotMA(all_results[[i]])
    title(paste("MA plot for contrast", names(all_results)[i]))
}

Differentially expressed genes

for (i in seq(length(all_results))) {
    cat(paste("Lowest adjusted p-value hits for", names(all_results)[i]))
    # out_df = annotate_df(data.frame(all_results[[i]]), dataset, filter,
    # symbol)
    out_df = all_results[[i]]
    knitr::kable(head(out_df))
    write.table(out_df, file = paste(names(all_results)[i], ".tsv", sep = ""), 
        quote = FALSE, sep = "\t", row.names = TRUE, col.names = TRUE)
    cat("\n")
}

Lowest adjusted p-value hits for HBRR vs UHRR

baseMean log2FoldChange lfcSE stat pvalue padj
AAK1 1633.3 2.629 0.0525 50.02 0 0
ABAT 2156.1 3.732 0.0699 53.42 0 0
ABLIM1 4101.2 3.199 0.0406 78.78 0 0
ACSL6 1178.5 6.862 0.1514 45.33 0 0
ADAM11 920.7 4.782 0.1034 46.23 0 0
ADAM22 1576.4 4.372 0.0727 60.17 0 0