This script generates the “need” layer for the artisanal opportunities goal. The “access” layer, which is not updated due to a lack of a data source, is located here: globalprep/res_mora_ao/v2013/data/r_mora_s4_2013a.csv.
None
Downloaded: 8/17/2017
Description:
GDP adjusted per capita by PPP (ppppcgdp) http://data.worldbank.org/indicator/NY.GDP.PCAP.PP.KD Reported at country scale.
GDP per capita based on purchasing power parity (PPP). PPP GDP is gross domestic product converted to international dollars using purchasing power parity rates. An international dollar has the same purchasing power over GDP as the U.S. dollar has in the United States. GDP at purchaser’s prices is the sum of gross value added by all resident producers in the economy plus any product taxes and minus any subsidies not included in the value of the products. It is calculated without making deductions for depreciation of fabricated assets or for depletion and degradation of natural resources. Data are in constant international dollars based on the 2011 ICP round.
Time range: 1990-2016
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/', message = FALSE, warning = FALSE)
#setting up provenance
# devtools::install_github('oharac/provRmd')
# library(provRmd)
# prov_setup()
library(ohicore) # devtools::install_github('ohi-science/ohicore@dev')
library(dplyr)
library(tidyr)
library(WDI) # install.packages('WDI')
library(stringr)
library(readr)
# comment out when knitting:
# setwd("globalprep/ao/v2017")
# directory paths
source('../../../src/R/common.R')
# check website to see what years are available
yr_start = 1990
yr_end = 2016
## get description of variables (NOTE: these descriptions appear out of date, they aren't in sync with the definitions of the World Bank):
indicators <- data.frame(WDI_data[[1]])
#indicators[grep("NY.GDP.PCAP.PP.CD", indicators$indicator), ] # current dollars (influenced by inflation, not being used)
indicators[grep("NY.GDP.PCAP.PP.KD", indicators$indicator), ] # constant dollars
# download the data
gdppcppp = WDI(country = "all",
indicator = "NY.GDP.PCAP.PP.KD",
start = yr_start, end=yr_end)
write.csv(gdppcppp, 'raw/raw_gdppcppp.csv', row.names=FALSE)
When a country has some values, a within country regression model is used to predict the missing values.
d <- read.csv('raw/raw_gdppcppp.csv') %>%
select(country, value=NY.GDP.PCAP.PP.KD, year) %>%
filter(year >= 2005) %>%
spread(year, value) %>% ## fill in potentially missing values with NA
data.frame() %>%
gather(year, value, starts_with("X")) %>%
mutate(year = gsub("X", "", year)) %>%
mutate(year = as.numeric(year))
## for the first case, if there is only one value use this value for all years
## this is not ideal, but likely better than other forms of gapfilling
d <- d %>%
group_by(country) %>%
mutate(value_num = sum(!is.na(value))) %>%
filter(value_num > 0) %>% # filter out the countries with no data between 2005 and 2015
mutate(value_num_gf = ifelse(value_num==1, mean(value, na.rm=TRUE), NA)) %>%
ungroup() %>%
mutate(value = ifelse(is.na(value), value_num_gf, value)) %>%
select(country, year, value, value_num)
d_gf <- d %>%
group_by(country) %>%
do({
mod <- lm(value ~ year, data=.)
value_pred <- predict(mod, newdata=.[c('year')])
data.frame(., value_pred)
})
d_gf <- d_gf %>%
ungroup() %>%
mutate(gapfilled = ifelse(is.na(value), 1, 0)) %>%
mutate(gapfilled = ifelse(value_num == 1, 1, gapfilled)) %>%
mutate(value = ifelse(is.na(value), value_pred, value)) %>%
mutate(method = ifelse(gapfilled==1, paste("lm based on N years data:", value_num, sep=" "), NA)) %>%
mutate(method = ifelse(value_num == 1, "gapfilled using one year of data", method))
This is performed by taking the natural log of each value and then dividing by the 95th quantile of values across all years (from 2005 to 2016).
d_stand <- d_gf %>%
mutate(quantile_95 = quantile(value, probs=0.95)) %>%
mutate(value_stand = value/quantile_95) %>%
mutate(value_stand = ifelse(value_stand > 1, 1, value_stand))
d_stand <- d_stand %>%
select(country, year, value, score=value_stand, gapfilled, method)
plotData <- d_stand %>%
mutate(need = 1-score) %>%
dplyr::select(country, year, need)
# library(googleVis)
#
# Motion = gvisMotionChart(plotData,
# idvar="country", timevar="year")
#
# plot(Motion)
#
# print(Motion, file = file.path('ao_need_95q.html'))
### Function to convert to OHI region ID
d_stand_rgn <- name_2_rgn(df_in = d_stand,
fld_name='country',
flds_unique=c('year'))
##
## These data were removed for not having any match in the lookup tables:
##
## Arab World
## 1
## Caribbean small states
## 1
## Central Europe and the Baltics
## 1
## Early-demographic dividend
## 1
## East Asia & Pacific
## 1
## East Asia & Pacific (excluding high income)
## 1
## East Asia & Pacific (IDA & IBRD countries)
## 1
## Euro area
## 1
## European Union
## 1
## Europe & Central Asia
## 1
## Europe & Central Asia (excluding high income)
## 1
## Europe & Central Asia (IDA & IBRD countries)
## 1
## Fragile and conflict affected situations
## 1
## Heavily indebted poor countries (HIPC)
## 1
## High income
## 1
## IBRD only
## 1
## IDA blend
## 1
## IDA & IBRD total
## 1
## IDA only
## 1
## IDA total
## 1
## Late-demographic dividend
## 1
## Latin America & Caribbean
## 1
## Latin America & Caribbean (excluding high income)
## 1
## Latin America & the Caribbean (IDA & IBRD countries)
## 1
## Least developed countries: UN classification
## 1
## Lower middle income
## 1
## Low income
## 1
## Low & middle income
## 1
## Middle East & North Africa
## 1
## Middle East & North Africa (excluding high income)
## 1
## Middle East & North Africa (IDA & IBRD countries)
## 1
## Middle income
## 1
## North America
## 1
## OECD members
## 1
## Other small states
## 1
## Pacific island small states
## 1
## Post-demographic dividend
## 1
## Pre-demographic dividend
## 1
## Small states
## 1
## South Asia
## 1
## South Asia (IDA & IBRD)
## 1
## Sub-Saharan Africa
## 1
## Sub-Saharan Africa (excluding high income)
## 1
## Sub-Saharan Africa (IDA & IBRD countries)
## 1
## Upper middle income
## 1
##
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
## tmp_type
## tmp_name disputed landlocked largescale
## Afghanistan 0 12 0
## Armenia 0 12 0
## Austria 0 12 0
## Belarus 0 12 0
## Bhutan 0 12 0
## Bolivia 0 12 0
## Botswana 0 12 0
## Burkina Faso 0 12 0
## Burundi 0 12 0
## Central African Republic 0 12 0
## Chad 0 12 0
## Czech Republic 0 12 0
## Ethiopia 0 12 0
## Hungary 0 12 0
## Kazakhstan 0 12 0
## Kosovo 0 12 0
## Kyrgyz Republic 0 12 0
## Lao PDR 0 12 0
## Lesotho 0 12 0
## Luxembourg 0 12 0
## Macedonia, FYR 0 12 0
## Malawi 0 12 0
## Mali 0 12 0
## Moldova 0 12 0
## Mongolia 0 12 0
## Nepal 0 12 0
## Niger 0 12 0
## Paraguay 0 12 0
## Rwanda 0 12 0
## Serbia 0 12 0
## Slovak Republic 0 12 0
## South Sudan 0 12 0
## Swaziland 0 12 0
## Switzerland 0 12 0
## Tajikistan 0 12 0
## Turkmenistan 0 12 0
## Uganda 0 12 0
## Uzbekistan 0 12 0
## West Bank and Gaza 12 0 0
## World 0 0 12
## Zambia 0 12 0
## Zimbabwe 0 12 0
## # A tibble: 3 x 1
## country
## <fctr>
## 1 China
## 2 Hong Kong SAR, China
## 3 Macao SAR, China
### Combine the duplicate regions (we report these at lower resolution)
### In this case, we take the average score weighted by population
population_weights <- read.csv('../../../../ohiprep/src/LookupTables/Pop_weight_ChinaSAR_USVIslPRico.csv')
d_stand_rgn <- d_stand_rgn %>%
left_join(population_weights, by="country") %>%
mutate(population = ifelse(is.na(population), 1, population)) %>%
group_by(rgn_id, year) %>%
summarize(score = weighted.mean(score, population),
value = weighted.mean(value, population),
gapfilled = weighted.mean(gapfilled, population),
method = method[1]) %>%
ungroup()
d_stand_rgn <- d_stand_rgn %>%
filter(rgn_id <= 250)
summary(d_stand_rgn)
## rgn_id year score value
## Min. : 6.0 Min. :2005 Min. :0.01098 Min. : 548.5
## 1st Qu.: 65.0 1st Qu.:2008 1st Qu.:0.08586 1st Qu.: 4289.9
## Median :123.5 Median :2010 Median :0.24354 Median : 12182.5
## Mean :122.5 Mean :2010 Mean :0.34946 Mean : 18759.0
## 3rd Qu.:184.0 3rd Qu.:2013 3rd Qu.:0.56717 3rd Qu.: 28338.3
## Max. :250.0 Max. :2016 Max. :1.00000 Max. :129349.9
## gapfilled method
## Min. :0.00000 Length:1800
## 1st Qu.:0.00000 Class :character
## Median :0.00000 Mode :character
## Mean :0.03611
## 3rd Qu.:0.00000
## Max. :1.00000
## save the cleaned gdppcppp for other goals
gdppcppp_data <- d_stand_rgn %>%
select(rgn_id, year, value)
write_csv(gdppcppp_data, "intermediate/gdppcppp_ohi.csv")
In this case, we gapfill regions with no data using UN geopolitical means.
d_stand_gf <- data.frame(year=min(d_stand_rgn$year):max(d_stand_rgn$year)) %>%
merge(georegions, by=NULL)
d_stand_gf <- d_stand_gf %>%
left_join(d_stand_rgn, by = c("rgn_id", "year")) %>%
group_by(r2, year) %>%
mutate(r2_value = mean(score, na.rm=TRUE)) %>%
ungroup() %>%
group_by(r1, year) %>%
mutate(r1_value = mean(score, na.rm=TRUE)) %>%
ungroup() %>%
group_by(r0, year) %>%
mutate(r0_value = mean(score, na.rm=TRUE)) %>%
ungroup()
d_stand_gf <- d_stand_gf %>%
mutate(gapfilled = ifelse(is.na(score) & !is.na(r2_value), "1", gapfilled)) %>%
mutate(method = ifelse(is.na(score) & !is.na(r2_value), "UN_geopolitical region avg, r2", method)) %>%
mutate(score = ifelse(is.na(score), r2_value, score)) %>%
mutate(gapfilled = ifelse(is.na(score) & !is.na(r1_value), "1", gapfilled)) %>%
mutate(method = ifelse(is.na(score) & !is.na(r1_value), "UN_geopolitical region avg, r1", method)) %>%
mutate(score = ifelse(is.na(score), r1_value, score)) %>%
mutate(gapfilled = ifelse(is.na(score) & !is.na(r0_value), "1", gapfilled)) %>%
mutate(method = ifelse(is.na(score) & !is.na(r0_value), "UN_geopolitical region avg, r0", method)) %>%
mutate(score = ifelse(is.na(score), r0_value, score))
final <- d_stand_gf %>%
select(rgn_id, year, value=score)
write.csv(final, "output/wb_gdppcppp_rescaled.csv", row.names=FALSE)
final_gf <- d_stand_gf %>%
select(rgn_id, year, gapfilled, method)
write.csv(final_gf, "output/wb_gdppcppp_rescaled_gf.csv", row.names=FALSE)