ohi logo
OHI Science | Citation policy

Summary

This script generates the “need” layer for the artisanal opportunities goal. The “access” layer, which is not updated due to a lack of a data source, is located here: globalprep/res_mora_ao/v2013/data/r_mora_s4_2013a.csv.

Updates from previous assessment

None


Data Source

Downloaded: 8/17/2017

Description:
GDP adjusted per capita by PPP (ppppcgdp) http://data.worldbank.org/indicator/NY.GDP.PCAP.PP.KD Reported at country scale.

GDP per capita based on purchasing power parity (PPP). PPP GDP is gross domestic product converted to international dollars using purchasing power parity rates. An international dollar has the same purchasing power over GDP as the U.S. dollar has in the United States. GDP at purchaser’s prices is the sum of gross value added by all resident producers in the economy plus any product taxes and minus any subsidies not included in the value of the products. It is calculated without making deductions for depreciation of fabricated assets or for depletion and degradation of natural resources. Data are in constant international dollars based on the 2011 ICP round.

Time range: 1990-2016


Methods

Setup

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/', message = FALSE, warning = FALSE)

#setting up provenance
# devtools::install_github('oharac/provRmd')
# library(provRmd)
# prov_setup()

library(ohicore) # devtools::install_github('ohi-science/ohicore@dev')
library(dplyr)
library(tidyr)
library(WDI) # install.packages('WDI')
library(stringr)
library(readr)

# comment out when knitting:
# setwd("globalprep/ao/v2017")

# directory paths
source('../../../src/R/common.R')

Download and save data

# check website to see what years are available
yr_start = 1990
yr_end   = 2016


## get description of variables (NOTE: these descriptions appear out of date, they aren't in sync with the definitions of the World Bank):
indicators <-  data.frame(WDI_data[[1]])
#indicators[grep("NY.GDP.PCAP.PP.CD", indicators$indicator), ]  # current dollars (influenced by inflation, not being used)
indicators[grep("NY.GDP.PCAP.PP.KD", indicators$indicator), ]  # constant dollars 

# download the data
gdppcppp = WDI(country = "all",
               indicator = "NY.GDP.PCAP.PP.KD", 
               start = yr_start, end=yr_end)

write.csv(gdppcppp, 'raw/raw_gdppcppp.csv', row.names=FALSE)

Within country gapfilling

When a country has some values, a within country regression model is used to predict the missing values.

d <- read.csv('raw/raw_gdppcppp.csv') %>%
  select(country, value=NY.GDP.PCAP.PP.KD, year) %>%
  filter(year >= 2005) %>%
  spread(year, value) %>%  ## fill in potentially missing values with NA
  data.frame() %>%
  gather(year, value, starts_with("X")) %>%
  mutate(year = gsub("X", "", year)) %>%
  mutate(year = as.numeric(year))

## for the first case, if there is only one value use this value for all years
## this is not ideal, but likely better than other forms of gapfilling

d <- d %>%
  group_by(country) %>%
  mutate(value_num = sum(!is.na(value))) %>%
  filter(value_num > 0) %>%    # filter out the countries with no data between 2005 and 2015   
  mutate(value_num_gf = ifelse(value_num==1, mean(value, na.rm=TRUE), NA)) %>%
  ungroup() %>%
  mutate(value = ifelse(is.na(value), value_num_gf, value)) %>%
  select(country, year, value, value_num)

d_gf <- d %>%
  group_by(country) %>%
  do({
    mod <- lm(value ~ year, data=.)
    value_pred <- predict(mod, newdata=.[c('year')])
    data.frame(., value_pred)
  })

d_gf <- d_gf %>%
  ungroup() %>%
  mutate(gapfilled = ifelse(is.na(value), 1, 0)) %>%
  mutate(gapfilled = ifelse(value_num == 1, 1, gapfilled)) %>%
  mutate(value = ifelse(is.na(value), value_pred, value)) %>%
  mutate(method = ifelse(gapfilled==1, paste("lm based on N years data:", value_num, sep=" "), NA)) %>%
  mutate(method = ifelse(value_num == 1, "gapfilled using one year of data", method))

Calculate standardized values

This is performed by taking the natural log of each value and then dividing by the 95th quantile of values across all years (from 2005 to 2016).

d_stand <- d_gf %>%
  mutate(quantile_95 = quantile(value, probs=0.95)) %>%
  mutate(value_stand = value/quantile_95) %>%
  mutate(value_stand = ifelse(value_stand > 1, 1, value_stand))

d_stand <- d_stand %>%
  select(country, year, value, score=value_stand, gapfilled, method)

plotData <- d_stand %>%
  mutate(need = 1-score) %>%
  dplyr::select(country, year, need)

# library(googleVis)
#
# Motion = gvisMotionChart(plotData,
# idvar="country", timevar="year")
# 
#  plot(Motion)
# 
#  print(Motion, file = file.path('ao_need_95q.html'))

Convert country names to ohi regions

### Function to convert to OHI region ID
d_stand_rgn <- name_2_rgn(df_in = d_stand, 
                       fld_name='country', 
                       flds_unique=c('year'))
## 
## These data were removed for not having any match in the lookup tables:
## 
##                                           Arab World 
##                                                    1 
##                               Caribbean small states 
##                                                    1 
##                       Central Europe and the Baltics 
##                                                    1 
##                           Early-demographic dividend 
##                                                    1 
##                                  East Asia & Pacific 
##                                                    1 
##          East Asia & Pacific (excluding high income) 
##                                                    1 
##           East Asia & Pacific (IDA & IBRD countries) 
##                                                    1 
##                                            Euro area 
##                                                    1 
##                                       European Union 
##                                                    1 
##                                Europe & Central Asia 
##                                                    1 
##        Europe & Central Asia (excluding high income) 
##                                                    1 
##         Europe & Central Asia (IDA & IBRD countries) 
##                                                    1 
##             Fragile and conflict affected situations 
##                                                    1 
##               Heavily indebted poor countries (HIPC) 
##                                                    1 
##                                          High income 
##                                                    1 
##                                            IBRD only 
##                                                    1 
##                                            IDA blend 
##                                                    1 
##                                     IDA & IBRD total 
##                                                    1 
##                                             IDA only 
##                                                    1 
##                                            IDA total 
##                                                    1 
##                            Late-demographic dividend 
##                                                    1 
##                            Latin America & Caribbean 
##                                                    1 
##    Latin America & Caribbean (excluding high income) 
##                                                    1 
## Latin America & the Caribbean (IDA & IBRD countries) 
##                                                    1 
##         Least developed countries: UN classification 
##                                                    1 
##                                  Lower middle income 
##                                                    1 
##                                           Low income 
##                                                    1 
##                                  Low & middle income 
##                                                    1 
##                           Middle East & North Africa 
##                                                    1 
##   Middle East & North Africa (excluding high income) 
##                                                    1 
##    Middle East & North Africa (IDA & IBRD countries) 
##                                                    1 
##                                        Middle income 
##                                                    1 
##                                        North America 
##                                                    1 
##                                         OECD members 
##                                                    1 
##                                   Other small states 
##                                                    1 
##                          Pacific island small states 
##                                                    1 
##                            Post-demographic dividend 
##                                                    1 
##                             Pre-demographic dividend 
##                                                    1 
##                                         Small states 
##                                                    1 
##                                           South Asia 
##                                                    1 
##                              South Asia (IDA & IBRD) 
##                                                    1 
##                                   Sub-Saharan Africa 
##                                                    1 
##           Sub-Saharan Africa (excluding high income) 
##                                                    1 
##            Sub-Saharan Africa (IDA & IBRD countries) 
##                                                    1 
##                                  Upper middle income 
##                                                    1 
## 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
##                           tmp_type
## tmp_name                   disputed landlocked largescale
##   Afghanistan                     0         12          0
##   Armenia                         0         12          0
##   Austria                         0         12          0
##   Belarus                         0         12          0
##   Bhutan                          0         12          0
##   Bolivia                         0         12          0
##   Botswana                        0         12          0
##   Burkina Faso                    0         12          0
##   Burundi                         0         12          0
##   Central African Republic        0         12          0
##   Chad                            0         12          0
##   Czech Republic                  0         12          0
##   Ethiopia                        0         12          0
##   Hungary                         0         12          0
##   Kazakhstan                      0         12          0
##   Kosovo                          0         12          0
##   Kyrgyz Republic                 0         12          0
##   Lao PDR                         0         12          0
##   Lesotho                         0         12          0
##   Luxembourg                      0         12          0
##   Macedonia, FYR                  0         12          0
##   Malawi                          0         12          0
##   Mali                            0         12          0
##   Moldova                         0         12          0
##   Mongolia                        0         12          0
##   Nepal                           0         12          0
##   Niger                           0         12          0
##   Paraguay                        0         12          0
##   Rwanda                          0         12          0
##   Serbia                          0         12          0
##   Slovak Republic                 0         12          0
##   South Sudan                     0         12          0
##   Swaziland                       0         12          0
##   Switzerland                     0         12          0
##   Tajikistan                      0         12          0
##   Turkmenistan                    0         12          0
##   Uganda                          0         12          0
##   Uzbekistan                      0         12          0
##   West Bank and Gaza             12          0          0
##   World                           0          0         12
##   Zambia                          0         12          0
##   Zimbabwe                        0         12          0
## # A tibble: 3 x 1
##                country
##                 <fctr>
## 1                China
## 2 Hong Kong SAR, China
## 3     Macao SAR, China
### Combine the duplicate regions (we report these at lower resolution)
### In this case, we take the average score weighted by population
population_weights <- read.csv('../../../../ohiprep/src/LookupTables/Pop_weight_ChinaSAR_USVIslPRico.csv')

d_stand_rgn <- d_stand_rgn %>%
  left_join(population_weights, by="country") %>%
  mutate(population = ifelse(is.na(population), 1, population)) %>%
  group_by(rgn_id, year) %>%
  summarize(score = weighted.mean(score, population),
            value = weighted.mean(value, population),
            gapfilled = weighted.mean(gapfilled, population),
            method = method[1]) %>%
  ungroup() 

d_stand_rgn <- d_stand_rgn %>%
  filter(rgn_id <= 250)

summary(d_stand_rgn)
##      rgn_id           year          score             value         
##  Min.   :  6.0   Min.   :2005   Min.   :0.01098   Min.   :   548.5  
##  1st Qu.: 65.0   1st Qu.:2008   1st Qu.:0.08586   1st Qu.:  4289.9  
##  Median :123.5   Median :2010   Median :0.24354   Median : 12182.5  
##  Mean   :122.5   Mean   :2010   Mean   :0.34946   Mean   : 18759.0  
##  3rd Qu.:184.0   3rd Qu.:2013   3rd Qu.:0.56717   3rd Qu.: 28338.3  
##  Max.   :250.0   Max.   :2016   Max.   :1.00000   Max.   :129349.9  
##    gapfilled          method         
##  Min.   :0.00000   Length:1800       
##  1st Qu.:0.00000   Class :character  
##  Median :0.00000   Mode  :character  
##  Mean   :0.03611                     
##  3rd Qu.:0.00000                     
##  Max.   :1.00000
## save the cleaned gdppcppp for other goals
gdppcppp_data <- d_stand_rgn %>%
  select(rgn_id, year, value)

write_csv(gdppcppp_data, "intermediate/gdppcppp_ohi.csv")

Gapfilling: part 2

In this case, we gapfill regions with no data using UN geopolitical means.

d_stand_gf <- data.frame(year=min(d_stand_rgn$year):max(d_stand_rgn$year)) %>% 
  merge(georegions, by=NULL) 

d_stand_gf <- d_stand_gf %>%  
  left_join(d_stand_rgn, by = c("rgn_id", "year")) %>%
  group_by(r2, year) %>%
  mutate(r2_value = mean(score, na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(r1, year) %>%
  mutate(r1_value = mean(score, na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(r0, year) %>%
  mutate(r0_value = mean(score, na.rm=TRUE)) %>%
  ungroup()

d_stand_gf <- d_stand_gf %>%
  mutate(gapfilled = ifelse(is.na(score) & !is.na(r2_value), "1", gapfilled)) %>%
  mutate(method = ifelse(is.na(score) & !is.na(r2_value), "UN_geopolitical region avg, r2", method)) %>%
  mutate(score = ifelse(is.na(score), r2_value, score)) %>%
  mutate(gapfilled = ifelse(is.na(score) & !is.na(r1_value), "1", gapfilled)) %>%
  mutate(method = ifelse(is.na(score) & !is.na(r1_value), "UN_geopolitical region avg, r1", method)) %>%
  mutate(score = ifelse(is.na(score), r1_value, score)) %>%
  mutate(gapfilled = ifelse(is.na(score) & !is.na(r0_value), "1", gapfilled)) %>%
  mutate(method = ifelse(is.na(score) & !is.na(r0_value), "UN_geopolitical region avg, r0", method)) %>%
  mutate(score = ifelse(is.na(score), r0_value, score))

Save the data

final <- d_stand_gf %>%
  select(rgn_id, year, value=score)

write.csv(final, "output/wb_gdppcppp_rescaled.csv", row.names=FALSE)

final_gf <- d_stand_gf %>%
  select(rgn_id, year, gapfilled, method)

write.csv(final_gf, "output/wb_gdppcppp_rescaled_gf.csv", row.names=FALSE)