Data preparation

library(archivist)
library(knitr)
library(ggplot2)

# store everything in the archivist repo
addHooksToPrint(class=c("ggplot", "knitr_kable"),
                 repoDir = "arepo", 
                 repo = "Eseje", user = "pbiecek", subdir = "arepo")

library(scales)
library(dplyr)
library(tidyr)
library(gridExtra)
library(rworldmap)
library(ggthemes)
library(latticeExtra)
library(lattice)

q3 <- function(x) {
  a <- quantile(x, c(0.25,0.5,0.75))
  names(a) <- c("ymin", "y", "ymax")
  a
}

opts_chunk$set(comment=NA, fig.width=6, fig.height=6, results='asis', warning=FALSE, message=FALSE, cache=FALSE)

Data

Here we are using the SmarterPoland package and two datasets.

library(SmarterPoland)
print(kable(head(countries)))

Load: archivist::aread('pbiecek/Eseje/arepo/a0f2357b814a76a46f2a42e831ea5296')

country birth.rate death.rate population continent
Afghanistan 34.1 7.7 30552 Asia
Albania 12.9 9.4 3173 Europe
Algeria 24.3 5.7 39208 Africa
Andorra 8.9 8.4 79 Europe
Angola 44.1 13.9 21472 Africa
Antigua and Barbuda 16.5 6.8 90 Americas
print(kable(head(maturaExam)))

Load: archivist::aread('pbiecek/Eseje/arepo/9797e887eb6f35519246f51946388462')

id_ucznia punkty przedmiot rok
4 14 matematyka 2011
4 31 j. polski 2011
5 19 matematyka 2010
5 35 j. polski 2010
7 16 matematyka 2010
7 43 j. polski 2010

Layers

countriesMin <- countries %>% 
  group_by(continent) %>% 
  filter(birth.rate == min(birth.rate, na.rm=TRUE))
countriesMax <- countries %>% 
  group_by(continent) %>% 
  filter(birth.rate == max(birth.rate, na.rm=TRUE))
  
theme_ggplain <- theme_bw() + theme(panel.grid.major.x = element_line(color="white"), axis.ticks=element_line(size=0), axis.text=element_text(size=0))

# foirst example
countries$continent <- reorder(countries$continent, countries$birth.rate, median, na.rm=TRUE)

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_violin(scale="width", aes(fill=continent), color="white", alpha=0.4) + 
  stat_summary(fun.data = "q3", geom = "crossbar",
               colour = "red", width = 0.5) + 
  geom_jitter(aes(size=(population)^0.9),position=position_jitter(width = .45, height = 0),
             shape=15) +
  geom_rug(sides = "l") + 
  geom_text(data=countriesMin, vjust=2, color="blue3") + 
  geom_text(data=countriesMax, vjust=-1, color="blue3") + 
  theme_bw() + xlab("") + theme(legend.position="none", panel.grid.major.x = element_line(color="white"))

Load: archivist::aread('pbiecek/Eseje/arepo/65e430c4180e97a704249a56be4a7b88')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_jitter(color="white",size=0) +
  geom_text(data=countriesMin, vjust=2, color="blue3") + 
  geom_text(data=countriesMax, vjust=-1, color="blue3") + 
  theme_bw() + xlab("") + ylab("") + theme_ggplain

Load: archivist::aread('pbiecek/Eseje/arepo/a80ac9dca323ecdffb23d998a3b1b642')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_violin(scale="width", aes(fill=continent), color="white", alpha=0.4) + 
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") +theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/1901aec3d47335e22cc3b9db8299b6b4')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  stat_summary(fun.data = "q3", geom = "crossbar",
               colour = "red", width = 0.5) + 
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/a308c4470a3150d85ccb2ce589ceec5a')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_jitter(aes(size=(population)^0.9),position=position_jitter(width = .45, height = 0),
             shape=15) +
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/234925c6528cbde29479b53b00998e14')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_rug(sides = "l") + 
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/1408bc3d77c6a45b380772382e0d706b')

# second example

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_density_2d(h=c(10,10), color="grey") +
  geom_point()+
  geom_abline(intercept=0,slope=1) + 
  geom_point(data=countries[132,], color="red", size=4) + 
  theme_bw()  + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/04f7a71dc8abb9f318cf1f57510c1045')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point() + coord_fixed() +
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/14cc8c8bbf738872bb41f3eb0fecccf9')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point(color="white") + coord_fixed() +
  geom_density_2d(h=c(10,10), color="grey") +
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/fa748fcbe1f91f8c8f759b5dfe2d850e')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point(color="white") + coord_fixed() +
  geom_abline(intercept=0,slope=1) + 
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/addf8a5ee7806d38e9f5d748112c72dc')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point(color="white") + coord_fixed() +
  geom_point(data=countries[132,], color="red", size=4) + 
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/fcfe441650e757c01e3810f79cf81ade')

Mappings

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point() + coord_fixed() +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/0981221235d267faeb2c93af3e79f5b5')

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=continent, shape=continent)) +
  geom_point() + coord_fixed() +
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/945db9ac11b2fedb2e38bdea4afd6ec7')

countries$populationCat <- cut(countries$population, 
                               c(1, 10^3, 10^4, 10^5, 10^6, 10^7), 
                               labels = c("< 1M", "< 10M","< 100 M", "< 1 B", "> 1 B"), ordered_result = TRUE)

countries <- na.omit(countries)

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=continent, shape=continent,
                      size=populationCat)) +
  geom_point() + coord_fixed() +
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/51958f58bb2b73bb21ba6b93ce0915a5')

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=populationCat, shape=continent,
                      size=populationCat)) +
  geom_point() + coord_fixed() + scale_color_brewer(type="seq") + 
  scale_size_manual(values=c(1,2,3,6,9))+
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/f19d24cdc43a97da594a820a907825e6')

Forms / Geoms

Points

# dotplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.7) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/102ca5c2a033d2a620b198037f414730')

# dotplot
ggplot(countries, aes(x = birth.rate, y =death.rate)) +
  geom_point() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/60b2e6167f99823e40410ea6e326c050')

# jitter
ggplot(countries, aes(x = continent, y =birth.rate)) +
  geom_jitter(position = position_jitter(width = .2)) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/f07244def960f31b6a0e8f0f436da30e')

# różne mapownia
ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent), size=4) +
  theme_bw() +
  scale_shape_manual(values=c("F","A","S","E","O")) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/6e68d5cfd6a95dbae7b16928c3853192')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent, color=continent), size=4) +
  theme_bw() +
  scale_shape_manual(values=c("F","A","S","E","O")) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/7d333a3db2b3f08cef09219b30c80211')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, color=continent), size=4, shape=19) +
  theme_bw() + scale_color_brewer(type = "qual", palette=6) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/9b48f1a49df811e37c1743f4c2dc4df6')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, size=population)) +
  scale_size_continuous(trans="sqrt", label=comma, limits=c(0,1500000)) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/e513dc5faaf48db0768354a176da6e36')

Area

# density
ggplot(countries, aes(x = birth.rate, fill = continent)) +
  geom_density(alpha=0.5)+ scale_fill_brewer(type = "qual", palette=6) +
  theme_bw() + theme(legend.position=c(0.85,0.85))

Load: archivist::aread('pbiecek/Eseje/arepo/c639ffbef468fd2a3a3ab3ac0f56df9c')

ggplot(countries, aes(x = birth.rate, fill = continent)) +
  geom_density( position="fill", color=NA)+ scale_fill_brewer(type = "qual", palette=6) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/8f300e294bdd9fd808807b8dde09694f')

# vioplot
ggplot(countries, aes(x = continent, y = birth.rate, color=continent,fill=continent)) +
  geom_violin(scale = "width")+ scale_color_brewer(type = "qual", palette=6)+ scale_fill_brewer(type = "qual", palette=6) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/de3c64be8144262c48231862260b83a1')

# ribbon
ndf <- countries %>%
  group_by(continent) %>%
  summarise(birth.rate = weighted.mean(birth.rate, population, na.rm=TRUE),
            death.rate = weighted.mean(death.rate, population, na.rm=TRUE))

ggplot() + 
  geom_ribbon(data=ndf, aes(x=continent, ymax=birth.rate, y=birth.rate, ymin=0, group=1), fill="green3") +
  geom_ribbon(data=ndf, aes(x=continent, ymax=death.rate, y=death.rate, ymin=0, group=1), fill="red3") +
  theme_bw() + xlab("") + ylab("birth.rate / death.rate")

Load: archivist::aread('pbiecek/Eseje/arepo/469783a6c14c12d467d29527557c4ab0')

Rectangles

continents <- 
  countries %>%
  group_by(continent) %>%
  summarise(birth.rate = weighted.mean(birth.rate, w = population, na.rm=TRUE),
            death.rate = weighted.mean(death.rate, w = population, na.rm=TRUE),
            population = sum(population))

# hist
ggplot(countries, aes(x = birth.rate, fill=continent)) +
  geom_histogram(binwidth = 2.5) + scale_fill_brewer(type = "qual", palette = 6) + 
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/fef09c46a24ce7b68e270dda952ad7d9')

# bar
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_rect(xmin=12.38, xmax=27.85, ymin=0, ymax=18, alpha=0.3, fill="grey90") +
  geom_point() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/0ecc7c473e06f2f4d0df262fa73123e7')

# bar
ggplot(continents, aes(x = continent, y = birth.rate)) +
  geom_bar(stat = "identity") +scale_fill_brewer(type = "qual", palette = 6) + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/952d223733fda0cb06941f867d6f18a5')

continents2 <- continents %>%
  mutate(cum = cumsum(population)-population)

ggplot() +
  geom_rect(data=continents2, aes(xmin=cum, ymax=0, xmax=cum+population, ymin=birth.rate, fill=continent)) +
  geom_text(data=continents2, aes(label=continent, x=cum+population/2, y=birth.rate), vjust=-0.1) +scale_fill_brewer(type = "qual", palette = 6)+ 
  theme_bw() + theme(legend.position="none") +
  xlab("population") + ylab("birth.rate")

Load: archivist::aread('pbiecek/Eseje/arepo/5b54bb0d817fea77b73d21c576188585')

ggplot() +
  geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))-0.1, 
                                 ymin = 0, 
                                 xmax = as.numeric(factor(continent))+0.2, 
                                 ymax = birth.rate),
            fill="green3") +
  geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))+0.21, 
                                 ymin = birth.rate - death.rate, 
                                 xmax = as.numeric(factor(continent))+0.51, 
                                 ymax = birth.rate),
            fill="red3") +
  geom_text(data=continents, aes(x = as.numeric(factor(continent))+0.21, 
                                 y = birth.rate + 1, 
                                 label = continent)) + 
  geom_hline(yintercept=0) + ylab("birth.rate - death.rate") + xlab("") +
  geom_rug(data=continents, aes(x = as.numeric(factor(continent))+0.21, 
                                y = birth.rate - death.rate), sides="l") +
  theme_bw() + theme(axis.text.x = element_text(color="white"),
                     axis.ticks.x = element_line(color="white"))

Load: archivist::aread('pbiecek/Eseje/arepo/29b577d819ee503a44a2bca49052679e')

# liczba krajĂłw na kontynent
ggplot(countries, aes(x = continent, fill = continent)) +
  geom_bar() +scale_fill_brewer(type = "qual", palette = 6) + 
  theme_bw() + theme(legend.position="none") + xlab("") + ylab("Liczba krajĂłw")

Load: archivist::aread('pbiecek/Eseje/arepo/0e44cb019efb514cdf0757f2ce4fe16a')

Lines

# slope charts
countries3 <- countries %>% 
  gather(rate, values, birth.rate, death.rate) %>%
  group_by(continent, rate) %>%
  summarise(values = mean(values, na.rm=TRUE))
ggplot(countries3, aes(x = rate, y = values, group=continent, color=continent)) +
  geom_line(size=2) +
  geom_point(size=4) +
  theme_bw() + theme(legend.position="none")+scale_color_brewer(type = "qual", palette = 6) 

Load: archivist::aread('pbiecek/Eseje/arepo/fce98c75501dc46518c75719edb97f72')

# line
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_line() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/fde7662e1e4ed146b1bd0d9cfd834409')

# smooth
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(color="white", alpha=0) +
  geom_smooth(se=FALSE, size=3, color="black") +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/bf0ce991c06aa5f5d5413d09ded636cf')

ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(color="black", alpha=0.3) +
  geom_smooth(se=FALSE, size=2, color="red4", method="lm", formula = y~poly(x,1)) +
  geom_smooth(se=FALSE, size=2, color="red3", method="lm", formula = y~poly(x,2)) +
  geom_smooth(se=FALSE, size=2, color="red1", span=0.5) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/a6af3a7e58959a8f7829ba5c933be5ec')

# arrow
library(grid)
countries$country <- reorder(countries$country, countries$birth.rate, mean)
ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                  y = birth.rate, yend=death.rate),
               arrow = arrow(length = unit(0.1,"cm"))) +
  theme_bw() + coord_flip() + theme(legend.position="none") + 
  ylab("<---- more births                more deaths ---->") + xlab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/34dc9d16a42e505ed84567c93d9c1f52')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     color=death.rate > birth.rate),
               arrow = arrow(length = unit(0.1,"cm"))) +
  theme_bw() + coord_flip() + theme(legend.position="top") + 
  ylab("<---- more births                more deaths ---->") + xlab("") +
  scale_color_manual(values = c("green3", "red3"), labels=c("More births than deaths","More deaths than births"), name="")

Load: archivist::aread('pbiecek/Eseje/arepo/25b50323a884e8d0a8b68ae7805b9274')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     size=population),
               arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
  theme_bw() + coord_flip() + theme(legend.position="none") + 
  ylab("<---- more births                more deaths ---->") + xlab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/d707f0ac20235b636e005528b7fa8ab9')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     linetype=birth.rate > death.rate),
               arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
  theme_bw() + coord_flip() + theme(legend.position="top") + 
  ylab("<---- more births                more deaths ---->") + xlab("") +
  scale_linetype_manual(values = c(1,2), labels=c("More births than deaths","More deaths than births"), name="")

Load: archivist::aread('pbiecek/Eseje/arepo/e08e99bfacafda97efa0f5e68ac50d14')

# error bars
conts <- countries %>% 
  group_by(continent) %>%
  summarise(bmin = min(birth.rate, na.rm=TRUE),
            bmax = max(birth.rate, na.rm=TRUE),
            bmea = weighted.mean(birth.rate, w = population, na.rm=TRUE),
            dmin = min(death.rate, na.rm=TRUE),
            dmax = max(death.rate, na.rm=TRUE),
            dmea = weighted.mean(death.rate, w = population, na.rm=TRUE),
            population = sum(population, na.rm=TRUE)
  )

ggplot(conts, aes(x = bmea, y = dmea, 
                  ymin = dmin, ymax = dmax,
                  xmin = bmin, xmax = bmax,
                  color=continent))+
 geom_point() + 
  geom_errorbar(width=0.5) +
#  geom_errorbarh(width=0.5) + 
  theme_bw() + xlab("birth.rate") + ylab("death.rate") +
  theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/e9a66f108daaaa9d9a28d2b41106d74d')

Other geoms

# stat_binhex
# geomBinHex.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  stat_binhex(bins = 9) + scale_fill_gradient(low = "white", high = "black") + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/a8b02f8a94acb5b2fd20831b24cc960a')

# geomRug
# geomRug.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_rug() + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/f759406dd633fa28d69c9b0a531f982f')

continents <- 
  countries %>%
  group_by(continent) %>%
  summarise(birth.rate = mean(birth.rate, na.rm=TRUE),
            death.rate = mean(death.rate, na.rm=TRUE))
  
# text
#geomText.pdf
ggplot(continents, aes(x = birth.rate, y = death.rate, label = continent)) +
  geom_text(alpha=1) +
  theme_bw() + xlim(8,35)

Load: archivist::aread('pbiecek/Eseje/arepo/fd260291aa2740a2cd4ec3a092c5bbb4')

Stats

# boxplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_boxplot(fill="grey", coef = 3) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/42027f48cae40462c6ae8be7b41b3b87')

# crossbar
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_jitter(position=position_jitter(width=0.25), color="grey") + 
  stat_summary(fun.data = "mean_cl_boot", geom = "crossbar", width = 0.3)  +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/e54840a0ed4a4ea7c712c3e21304402e')

# arrows
library(tidyr)
library(grid)

countries %a%
  group_by(continent) %a%
  summarise(q1 = quantile(birth.rate, 0.25, na.rm=TRUE),
            q2 = quantile(birth.rate, 0.5, na.rm=TRUE),
            q3 = quantile(birth.rate, 0.75, na.rm=TRUE)) %a%
  gather(key, value, -continent) %a%
  ggplot(aes(x=continent, y=value, group=continent)) +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  geom_path(arrow=arrow(ends = "both"), size=2)  +
  theme_bw() + xlab("") + ylab("Quantiles")

Load: archivist::aread('pbiecek/Eseje/arepo/0c691820a9af16a8d72380defbfbf6e2')

# arrows
cq <- countries %>%
  group_by(continent) %>%
  summarise(q1 = quantile(birth.rate, 0.25, na.rm=TRUE),
            q2 = quantile(birth.rate, 0.5, na.rm=TRUE),
            q3 = quantile(birth.rate, 0.75, na.rm=TRUE))
cq13 <- cq %>%
  gather(key, value, -continent)

# statQ1.pdf
ggplot(cq13, aes(x=continent, y=value, group=continent)) +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  geom_path(arrow=arrow(ends = "both"), size=2) + 
  geom_point(data=cq13[cq13$key == "q2",], aes(x=continent, y=value), size=5) +
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/9ca8e296895486e2f8642d4283bf124a')

# statQ2.pdf
ggplot(cq, aes(x=continent, y=q2)) +
  geom_boxplot(aes(ymin=q1, lower=q1, middle=q2, upper=q3, ymax=q3), stat="identity") +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/ccdc8e4319ba8b3806b35f2fbd4b7484')

# statQ3.pdf
ggplot(cq, aes(x=continent, y=q2)) +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  geom_errorbar(aes(ymin=q1, ymax=q3), stat="identity", width=0.3, size=2) +
  geom_text(label="*", size=25) + 
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/0c7fd7840c77916d6e6547e8ed3b6fcf')

# contour
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  geom_density2d(h=c(10,10), color="grey") +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/cd45b1a233ea2740dde01cae8a5f35c3')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), geom="polygon", alpha=0.25) +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/a9f6db1b070694fa741761aa6d3fd9cc')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), geom="tile", aes(fill = ..density..), contour = FALSE) + scale_fill_gradient(low="white", high="black") +
  theme_minimal() + theme(legend.position="top")

Load: archivist::aread('pbiecek/Eseje/arepo/4771ed94154c6d4efb97ef09ba29011b')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), n=c(50,15), geom="point", aes(size = ..density..), contour = FALSE) + scale_size_continuous(range=c(0,4)) +
  theme_minimal()  + theme(legend.position="top")

Load: archivist::aread('pbiecek/Eseje/arepo/6b202c36bc0d499bcc49db1d2ce6dc74')

Position modifiers

maturaExam2 <- spread(maturaExam, przedmiot, punkty)
tab <- table(matematyka = cut(rank(maturaExam2$matematyka)/nrow(maturaExam2), c(0,0.25,0.5,0.75,1), labels = c("matematyka <25%", "matematyka 25%-50%", "matematyka 50%-75%", "matematyka >75%")),
             polski = cut(rank(maturaExam2[,"j. polski"])/nrow(maturaExam2), c(0,0.25,0.5,0.75,1), labels = c("polski <25%", "polski 25%-50%", "polski 50%-75%", "polski >75%")))

df <- data.frame(tab)
dfn <- colnames(df)
ggplot(df, aes_string(x = dfn[1], fill = dfn[2], y = dfn[3])) +
  geom_bar(stat = "identity", position = "fill") + theme_bw() +
  scale_y_continuous(label = percent) + scale_fill_brewer(type = "div")+
  ylab("") + theme(legend.position="none",
                   axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_fill") + xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/7265ee0ec5a30a1d47093794741db5f8')

ggplot(df, aes_string(x = dfn[1], fill = dfn[2], y = dfn[3])) +
  geom_bar(stat = "identity", position = "stack") + theme_bw() +
   scale_fill_brewer(type = "div")+
  ylab("") + theme(legend.position="none",
                   axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_stack")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/b341b19e2c47ef337b88d947b7c7d724')

ggplot(df, aes_string(x = dfn[1], fill = dfn[2], y = dfn[3])) +
  geom_bar(stat = "identity", position = "dodge") + theme_bw() +
  scale_fill_brewer(type = "div")+
  ylab("") + theme(legend.position="none",
                   axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_dodge")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/ba1ff89720bd278479164a3bf6a83f25')

# dotplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_point() +
  theme_bw() + #theme(axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_identity")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/037cfa6d861539aecbe690ef6aa66b9a')

ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.7) +
  theme_bw() + #theme(axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_dotplot")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/f46528c2f61ba5c5d6ae87bb8a93dd87')

ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_jitter(position=position_jitter(0.35)) +
  theme_bw() + #theme(axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_jitter")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/f869b0b8602948e8ce7420491e82372e')

Coordinate system

# dotplot
ddplot <- ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(size=2) +
  geom_smooth(size=2, se=F) + 
  theme_bw() + theme(legend.position="none", text=element_text(size=20))

ddplot

Load: archivist::aread('pbiecek/Eseje/arepo/e058baf8d00f2ab1521c610d30c496c3')

ddplot + coord_fixed()

Load: archivist::aread('pbiecek/Eseje/arepo/8ca813219ef90fabb90f30657ad53ec8')

ddplot + coord_polar()

Load: archivist::aread('pbiecek/Eseje/arepo/763f716bbbcb4caab7029a6490df9601')

ddplot + coord_trans(x = "log2", y = "sqrt")

Load: archivist::aread('pbiecek/Eseje/arepo/cfff2fd59b9db7d99c36738531fc705d')

ddplot + coord_flip()

Load: archivist::aread('pbiecek/Eseje/arepo/1bda47d7738afb4c2828e4ec26b1a9a4')

# Mapy
map.world <- map_data(map="world")
countries[177,"country"] = "UK"
countries[135,"country"] = "Russia"
countries[76,"country"] = "Iran"

map2 <- merge(map.world, countries, by.x="region", by.y="country")
map3 <- map2[order(map2$order),]

gg <- ggplot()
gg <- gg + theme(legend.position="none")
gg <- gg + geom_map(data=map3, map=map3, color="grey", aes(map_id=region, x=long, y=lat, fill=birth.rate))
gg <- gg + scale_fill_gradient(low = "yellow", high = "blue4", guide = "colourbar") + theme_bw() + ylim(10,70)+ xlim(-25,50) +
  theme(text=element_text(size=20))+theme_map()

gg + coord_map("mollweide") 

Load: archivist::aread('pbiecek/Eseje/arepo/7251aeb35171023ae01c87b501077e95')

gg + coord_map("ortho", orientation=c(60,20,-10)) 

Load: archivist::aread('pbiecek/Eseje/arepo/0ad6c1ad0b9cb34d99664e2b8c42a509')

gg+ coord_map()

Load: archivist::aread('pbiecek/Eseje/arepo/3459c89f94d267bbc041e7b1b784cadc')

Scales

# Scale_fill_brewer
df <- data.frame(a=factor(1:8))

pl1 <- list()
pl2 <- list()
pl3 <- list()
for (i in 1:8) {
  pl1[[i]] <- ggplot(df, aes(x=a,fill=a)) +
    geom_bar() +
    scale_fill_brewer(type="qual",palette = i) +
    theme(axis.title.x = element_text(size=0, color="black"),
          axis.title.y = element_text(size=0, color="black"),
          axis.text.y = element_text(size=0, color="black"),
          axis.text.x = element_text(size=0, color="black"),
          line = element_blank(), rect = element_blank(), #text = element_blank(),
          plot.margin = grid::unit(c(0, 0, 0, 0), "lines"), complete = TRUE,
          legend.position="none") +
    ggtitle(paste("type = qual, palette =",i,""))
  pl2[[i]] <- pl1[[i]] + scale_fill_brewer(type="div",palette = i) +
    ggtitle(paste("type = div, palette =",i,""))
  pl3[[i]] <- pl1[[i]] + scale_fill_brewer(type="seq",palette = i) +
    ggtitle(paste("type = seq, palette =",i,""))
}

pl <- c(pl1, pl2, pl3)
pl4 <- sapply(1:8, function(i) list(pl1[[i]], pl2[[i]], pl3[[i]]))
pl$ncol=1
pl$nrow =24
pl4$ncol=3
pl4$nrow =8

pl1$ncol=1
pl1$nrow =8
pl2$ncol=1
pl2$nrow =8
pl3$ncol=1
pl3$nrow =8
do.call(grid.arrange, pl1)

do.call(grid.arrange, pl2)

do.call(grid.arrange, pl3)

do.call(grid.arrange, pl)

do.call(grid.arrange, pl4)

scales_x

library(ggthemes)

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate)) +
  geom_point(size=2, color="black") +
  theme_bw()

pl <- list()

pl[[1]] <- plD + ggtitle("default")
pl[[2]] <- plD 
#pl[[2]] <- plD + scale_x_tufte() + scale_y_tufte() + ggtitle("_tufte")
pl[[3]] <- plD + scale_x_sqrt() + scale_y_sqrt() + ggtitle("_sqrt")
pl[[4]] <- plD + scale_x_reverse() + scale_y_reverse() + ggtitle("_reverse")
pl[[5]] <- plD + scale_x_log10() + scale_y_log10() + ggtitle("_log10")
pl[[6]] <- plD + scale_x_continuous(trans="log2") + scale_y_continuous(trans="log2") + ggtitle("_log2")


sapply(pl, print)

Load: archivist::aread('pbiecek/Eseje/arepo/324e09c32def2bbb06b239825ceae3ed') Load: archivist::aread('pbiecek/Eseje/arepo/8f826e9a988132430bc4f76924e5c643') Load: archivist::aread('pbiecek/Eseje/arepo/263d1b9bdeee040866510b3d87f402d5') Load: archivist::aread('pbiecek/Eseje/arepo/035a9fe3d931d4f5ceb7a4cc8e75a84b') Load: archivist::aread('pbiecek/Eseje/arepo/71292f5042abb3e8ddca1e54ab5364e8') Load: archivist::aread('pbiecek/Eseje/arepo/5047974779798a6fe454e6fa7bcaecc0') [,1] [,2] [,3] [,4] [,5] [,6]
data List,1 List,1 List,1 List,1 List,1 List,1 layout ? ? ? ? ? ?
plot List,9 List,9 List,9 List,9 List,9 List,9

do.call(grid.arrange, pl)

shape

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate, shape=continent, color=continent)) +
  geom_point(size=5) +
  theme_bw()

pl <- list()

pl[[1]] <- plD + ggtitle("default")
pl[[2]] <- plD + scale_shape_stata() + ggtitle("_stata")
pl[[3]] <- plD + scale_shape_calc() + ggtitle("_calc")
pl[[4]] <- plD + scale_shape_tableau() + ggtitle("_tableau")
pl[[5]] <- plD + scale_shape_cleveland() + ggtitle("_cleveland")
pl[[6]] <- plD + scale_shape_manual(values=LETTERS) + ggtitle("_manual")


sapply(pl, print)

Load: archivist::aread('pbiecek/Eseje/arepo/e6d15bdcf5c07a04e9655602313e42eb') Load: archivist::aread('pbiecek/Eseje/arepo/4ed89f38ff82ab58f7dd70c3ec168341') Load: archivist::aread('pbiecek/Eseje/arepo/012ec429e733e86c49ddc01a888eb817') Load: archivist::aread('pbiecek/Eseje/arepo/4d250d143127e9ecfadecf6afce9e7bb') Load: archivist::aread('pbiecek/Eseje/arepo/180ac55fedfa76701a9038254eb1fb72') Load: archivist::aread('pbiecek/Eseje/arepo/3a9ec693a686373807b720dea3c12cc8') [,1] [,2] [,3] [,4] [,5] [,6]
data List,1 List,1 List,1 List,1 List,1 List,1 layout ? ? ? ? ? ?
plot List,9 List,9 List,9 List,9 List,9 List,9

do.call(grid.arrange, pl)

Panels / facets

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate)) +
  stat_ellipse(color="red4")+
  geom_point(data=countries[,-5],size=1,color="grey") +
  geom_point(size=2, color="red") +
  theme_bw() + theme(legend.position="none") +
  theme(text=element_text(size=20)) +
  scale_x_sqrt() + scale_y_sqrt()

plD + facet_wrap(~continent)

Load: archivist::aread('pbiecek/Eseje/arepo/aa863dc2ccb3597b78e71ffee5efc7e2')

ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate, color=continent)) +
  stat_ellipse()+
  geom_point(size=2) +
  theme_bw() + theme(legend.position="none") +
  theme(text=element_text(size=20)) +
  scale_x_sqrt() + scale_y_sqrt()

Load: archivist::aread('pbiecek/Eseje/arepo/f957e2fe575c04ebc78162b3983cf728')

Themes

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate)) +
  geom_point(size=2, color="black") +
  scale_x_sqrt() 

pl <- list()
pl[[1]] <- plD +   theme_bw() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_bw")
#pl[[2]] <- plD # +   theme_calc() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_calc")
pl[[3]] <- plD +   theme_classic() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_classic")
#pl[[4]] <- plD +   theme_economist() +  theme(legend.position="none") +
#  theme(text=element_text(size=20, margin = 0)) + ggtitle("theme_economist")
#pl[[5]] <- plD +   theme_economist_white() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_economist_white")
pl[[6]] <- plD +   theme_excel() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_excel")
pl[[7]] <- plD +   theme_few() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_few")
#pl[[8]] <- plD +   theme_fivethirtyeight() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_fivethirtyeight")
#pl[[9]] <- plD +   theme_foundation() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_foundation")
pl[[10]] <- plD +   theme_gray() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_gray")
pl[[11]] <- plD +   theme_map() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_map")
pl[[12]] <- plD +   theme_pander() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_pander")
pl[[13]] <- plD +   theme_solarized() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_solarized")
#pl[[14]] <- plD +   theme_stata() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_stata")
#pl[[15]] <- plD +   theme_wsj() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_wsj")
pl[[16]] <- plD +   theme_tufte() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_tufte")
#pl[[17]] <- plD +   theme_solarized_2() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_solarized_2")

tmp <- sapply(pl, print)

Load: archivist::aread('pbiecek/Eseje/arepo/a9e6b4784b66e6d2254d019a306925a6') NULL Load: archivist::aread('pbiecek/Eseje/arepo/bf1dcc606e96e6a52d6eb7c9ee37309c') NULL NULL Load: archivist::aread('pbiecek/Eseje/arepo/50ddd850026e1cecfc103d4ac8c2a952') Load: archivist::aread('pbiecek/Eseje/arepo/79ed2420a66c64ca3f01011e8ffcea57') NULL NULL Load: archivist::aread('pbiecek/Eseje/arepo/826355d3f3867d0eb1829cf83968926d') Load: archivist::aread('pbiecek/Eseje/arepo/f3aeb26f4309f34a68c1fb6b389df372') Load: archivist::aread('pbiecek/Eseje/arepo/aad0a18178d7e8bdb1f6353b106f3c24') Load: archivist::aread('pbiecek/Eseje/arepo/774e7d06bcaa49d197dd49fba4c461b9') NULL NULL Load: archivist::aread('pbiecek/Eseje/arepo/72471f0af6d37a1af0895314daa900a2')

#do.call(grid.arrange, pl[c(1,10,11,  8,17,14,  4,5, 6)])