Below you will find information how to download emails from gmail and how to use R to process these emails

Find more details here: http://smarterpoland.pl/index.php/2016/10/ggmail-ggplot2-data-from-gmail-forecast/

1. Get emails from gmail

Follow this link to download the data from your gmail account as a single mbox file.

It may be large (15GB in my case).

2. Filtler out headers

Since I am not going to analyse the content I will remove everything except Subject, Date and Recipent.

The fastest way to do this is to use grep function in the shell

cat GGmail.mbox  | grep -e "^Date:" -e "^Subject:" -e "^From:" -e "^To:" > GGheaders.txt

3. Read the data

Reading and cleaning. Here I am using only timestamps.

lines <- readLines("GGheaders.txt")
lines <- grep(lines, pattern="^Date:", value = TRUE)
head(lines)
## [1] "Date: Fri, 30 Sep 2016 16:13:43 +0000"
## [2] "Date: Fri, 30 Sep 2016 16:13:43 +0000"
## [3] "Date: Fri, 30 Sep 2016 16:13:43 +0000"
## [4] "Date: Fri, 30 Sep 2016 17:53:49 +0200"
## [5] "Date: Fri, 30 Sep 2016 10:37:36 -0500"
## [6] "Date: Mon, 26 Sep 2016 22:12:24 +0200"
dates <- substr(lines, 11, 1000)
head(dates)
## [1] " 30 Sep 2016 16:13:43 +0000" " 30 Sep 2016 16:13:43 +0000"
## [3] " 30 Sep 2016 16:13:43 +0000" " 30 Sep 2016 17:53:49 +0200"
## [5] " 30 Sep 2016 10:37:36 -0500" " 26 Sep 2016 22:12:24 +0200"
library("lubridate")
library("forecast")
library("ggplot2")
library("forcats")

daty_clean <- dmy_hms(dates)
head(daty_clean)
## [1] "2016-09-30 16:13:43 UTC" "2016-09-30 16:13:43 UTC"
## [3] "2016-09-30 16:13:43 UTC" "2016-09-30 15:53:49 UTC"
## [5] "2016-09-30 15:37:36 UTC" "2016-09-26 20:12:24 UTC"
# daily agregates
tt <- table(substr(daty_clean, 1, 10))
head(tt)
## 
## 2001-12-31 2005-08-18 2005-09-18 2005-09-19 2007-03-27 2007-03-28 
##          1          1          1          1          1          1
# dirty dirty dirty
tt <- tt[45:3350]

4. ggplot it

# time series object
tts <- ts(tt, frequency = 356, start=c(2007, 52))

autoplot(tts) +
  geom_smooth(method="loess", span=.03, se=F) +
  xlab("") + ylab("Number of emails per day") +
  ggtitle("Mailbox statistics")

# Some boxplots
tts <- ts(c(tail(tts, 356)), frequency = 356, start=c(2007, 52))
dftts <- data.frame(mails = tts, date=ymd(names(tts)))
dftts$dow <- wday(dftts$date, label = T, abbr = F)
ggplot(dftts, aes(factor(dow), mails, fill=dow)) + geom_boxplot(coef=100) +
  geom_violin(alpha=0.5, scale = "width") +
  theme(legend.position="none") + xlab("") + ylab("Number of mails per day") +
  ggtitle("Monday's flood - mailbox statistics year 2016\n") +
  scale_y_continuous(limits=c(0,260), expand = c(0,0.5)) + coord_flip()