ohi logo
OHI Science | Citation policy

[REFERENCE RMD FILE: https://cdn.rawgit.com/OHI-Science/ohiprep/master/globalprep/np/v2016/WGI_dataprep.html]

1 Summary

This script downloads WGI data and prepares it for a pressures (1 - WGI) and resilience data layer.

2 Updates from previous assessment

None


3 Data Source

Reference: http://info.worldbank.org/governance/wgi/index.aspx#home

Downloaded: Sep 6 2017

Description:
The Worldwide Governance Indicators (WGI) project reports aggregate and individual governance indicators for 215 economies over the period 1996–2015, for six dimensions of governance:

Time range: 1996-2015


library(ohicore) # devtools::install_github('ohi-science/ohicore@dev')
library(tools)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(WDI) # install.packages('WDI')
## Loading required package: RJSONIO
library(stringr)

# comment out when knitting:
# setwd('globalprep/prs_res_wgi/v2017')

# check website to see what years are available: http://info.worldbank.org/governance/wgi/index.aspx#home
yr_start = 1996
yr_end   = 2015

4 Obtain the WGI data

Download each of the 6 WGI indicators:

## access data ----

## get description of variables:
indicators <-  data.frame(WDI_data[[1]])
indicators[grep("VA.EST", indicators$indicator), ]
##      indicator                               name
## 6623    VA.EST Voice and Accountability: Estimate
##                                                                                                                                                                                                                       description
## 6623 Voice and Accountability captures perceptions of the extent to which a country's citizens are able to participate in selecting their government, as well as freedom of expression, freedom of association, and a free media.
##                       sourceDatabase
## 6623 Worldwide Governance Indicators
##                                   sourceOrganization
## 6623 Worldwide Governance Indicators, The World Bank
indicators[grep("PV.EST", indicators$indicator), ]
##      indicator
## 4569    PV.EST
##                                                                 name
## 4569 Political Stability and Absence of Violence/Terrorism: Estimate
##                                                                                                                                                                                                                                            description
## 4569 Political Stability and Absence of Violence/Terrorism captures perceptions of the likelihood that the government will be destabilized or overthrown by unconstitutional or violent means, including politically-motivated violence and terrorism.
##                       sourceDatabase
## 4569 Worldwide Governance Indicators
##                                   sourceOrganization
## 4569 Worldwide Governance Indicators, The World Bank
indicators[grep("GE.EST", indicators$indicator), ]
##      indicator                               name
## 2515    GE.EST Government Effectiveness: Estimate
##                                                                                                                                                                                                                                                                                                    description
## 2515 Government Effectiveness captures perceptions of the quality of public services, the quality of the civil service and the degree of its independence from political pressures, the quality of policy formulation and implementation, and the credibility of the government's commitment to such policies.
##                       sourceDatabase
## 2515 Worldwide Governance Indicators
##                                   sourceOrganization
## 2515 Worldwide Governance Indicators, The World Bank
indicators[grep("RQ.EST", indicators$indicator), ]
##      indicator                         name
## 4625    RQ.EST Regulatory Quality: Estimate
##                                                                                                                                                                                 description
## 4625 Regulatory Quality captures perceptions of the ability of the government to formulate and implement sound policies and regulations that permit and promote private sector development.
##                       sourceDatabase
## 4625 Worldwide Governance Indicators
##                                   sourceOrganization
## 4625 Worldwide Governance Indicators, The World Bank
indicators[grep("RL.EST", indicators$indicator), ]
##      indicator                  name
## 4621    RL.EST Rule of Law: Estimate
##                                                                                                                                                                                                                                                                    description
## 4621 Rule of Law captures perceptions of the extent to which agents have confidence in and abide by the rules of society, and in particular the quality of contract enforcement, property rights, the police, and the courts, as well as the likelihood of crime and violence.
##                       sourceDatabase
## 4621 Worldwide Governance Indicators
##                                   sourceOrganization
## 4621 Worldwide Governance Indicators, The World Bank
indicators[grep("CC.EST", indicators$indicator), ]
##     indicator                            name
## 689    CC.EST Control of Corruption: Estimate
##                                                                                                                                                                                                                                                                                                                                                                                          description
## 689 Control of Corruption captures perceptions of the extent to which public power is exercised for private gain, including both petty and grand forms of corruption, as well as ""capture"" of the state by elites and private interests. \nEstimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.
##                      sourceDatabase
## 689 Worldwide Governance Indicators
##                                  sourceOrganization
## 689 Worldwide Governance Indicators, The World Bank
# identify the six indicators
# WDIsearch('violence')# general search
key_voice = WDI(
  WDIsearch('Voice and Accountability: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_polst = WDI(
  WDIsearch('Political Stability and Absence of Violence/Terrorism: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_gvtef = WDI(
  WDIsearch('Government Effectiveness: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_regqt = WDI(
  WDIsearch('Regulatory Quality: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_rolaw = WDI(
  WDIsearch('Rule of Law: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_corrp = WDI(
  WDIsearch('Control of Corruption: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

Combine the indicators.

d = key_voice %>% 
  select(country, year, VA.EST) %>%
  left_join(key_polst %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_gvtef %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_regqt %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_rolaw %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_corrp %>% select(-iso2c), by=(c('country', 'year'))); head(d); summary(d); sapply(d, class)  
##    country year    VA.EST   PV.EST   GE.EST    RQ.EST      RL.EST   CC.EST
## 1 Anguilla 2015        NA 1.249272 1.007546 0.9129404 -0.03924862 1.218489
## 2 Anguilla 2014        NA 1.164255 0.952229 0.8850665  0.01886257 1.225222
## 3 Anguilla 2013 1.0047673 1.533980 1.539397 1.3272622  1.36789608 1.260798
## 4 Anguilla 2012 1.0256717 1.476920 1.531356 1.3278290  1.36444592 1.268373
## 5 Anguilla 2011 1.0090517 1.550928 1.504689 1.3720528  1.36059737 1.279277
## 6 Anguilla 2010 0.9882845 1.373762 1.518033 1.3916917  1.38212872 1.308072
##    country               year          VA.EST             PV.EST       
##  Length:3638        Min.   :1996   Min.   :-2.31340   Min.   :-3.3149  
##  Class :character   1st Qu.:2003   1st Qu.:-0.82307   1st Qu.:-0.6665  
##  Mode  :character   Median :2007   Median : 0.03926   Median : 0.1204  
##                     Mean   :2007   Mean   : 0.00000   Mean   : 0.0000  
##                     3rd Qu.:2011   3rd Qu.: 0.93212   3rd Qu.: 0.8551  
##                     Max.   :2015   Max.   : 1.80099   Max.   : 1.9431  
##                                    NA's   :110        NA's   :159      
##      GE.EST            RQ.EST            RL.EST            CC.EST       
##  Min.   :-2.4459   Min.   :-2.6450   Min.   :-2.6065   Min.   :-1.8687  
##  1st Qu.:-0.7399   1st Qu.:-0.7071   1st Qu.:-0.7897   1st Qu.:-0.7653  
##  Median :-0.1527   Median :-0.1165   Median :-0.1294   Median :-0.2381  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.7483   3rd Qu.: 0.8022   3rd Qu.: 0.8264   3rd Qu.: 0.7329  
##  Max.   : 2.4370   Max.   : 2.2605   Max.   : 2.1003   Max.   : 2.4700  
##  NA's   :167       NA's   :167       NA's   :95        NA's   :153
##     country        year      VA.EST      PV.EST      GE.EST      RQ.EST 
## "character"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##      RL.EST      CC.EST 
##   "numeric"   "numeric"
# archived record of raw data: write.csv(d, file.path('raw', 'worldbank_wgi_from_wdi_api.csv'), row.names=F)

5 Gapfill missing data (part 1)

The first gapfilling occurs when we use the average of previous years data within each country/indicator. This occurs when a country has data, but not for all years.

Countries without 4 or more indicators are cut (gapfilled later).

d <- read.csv('raw/worldbank_wgi_from_wdi_api.csv')

d <- gather(d, "indicator", "value", VA.EST:CC.EST)


## each country has 17 years of data
d_gap_fill  <- d %>%
  group_by(country, year) %>%
  mutate(NA_count_c_y = sum(is.na(value))) %>%  # get count of NA values for the indicators prior to gapfilling, max value is 6 ()
  ungroup() %>%
  group_by(country, indicator) %>%              # this section gap-fills with the mean of values across years within the same region/indicator
  mutate(ind_mean_c_i = mean(value, na.rm=TRUE)) %>%
  ungroup() %>%
  mutate(value = ifelse(is.na(value), ind_mean_c_i, value)) %>%
  group_by(country, year) %>%
  mutate(NA_count_post_gf1 = sum(is.na(value)))     #count NA values for the num of indicators after last gap-fill (some values will be gapfilled by other years of data)


## cut regions with <4 indicators to calculate score:
countries_no_data <- d_gap_fill %>%
  filter(NA_count_post_gf1 > 3)

countries_no_data <- unique(countries_no_data$country)
countries_no_data
## [1] New Caledonia
## 215 Levels: Afghanistan Albania Algeria American Samoa Andorra ... Zimbabwe
# In this case, the countries with minimal data (< 4 indicators ever calculated) are deleted.  
# These will be gap-filled later on if they are deleted now.
d_gap_fill <- d_gap_fill %>%
  filter(!(country %in% countries_no_data))

6 Calculate overall WGI score for each country

This involves: * taking the average of the 6 indicators (assuming there are at least 4 of the 6 indicators) * rescaling the data from 0 to 1

d_calcs  <-  d_gap_fill %>%
  group_by(country, year) %>%
  summarize(score_wgi_scale = mean(value, na.rm=T),
            NA_start = mean(NA_count_c_y),
            NA_post_gf_1 = mean(NA_count_post_gf1)) %>%
  ungroup() 

wgi_range = c(-2.5, 2.5)

d_calcs <- d_calcs %>%
  mutate(score =  (score_wgi_scale - wgi_range[1]) / (wgi_range[2] - wgi_range[1])) %>%
  ungroup(); head(d_calcs); summary(d_calcs)
## # A tibble: 6 x 6
##       country  year score_wgi_scale NA_start NA_post_gf_1      score
##        <fctr> <int>           <dbl>    <dbl>        <dbl>      <dbl>
## 1 Afghanistan  1996       -2.069751        0            0 0.08604986
## 2 Afghanistan  1998       -2.095261        0            0 0.08094787
## 3 Afghanistan  2000       -2.122886        0            0 0.07542271
## 4 Afghanistan  2002       -1.747802        0            0 0.15043956
## 5 Afghanistan  2003       -1.570560        0            0 0.18588806
## 6 Afghanistan  2004       -1.510622        0            0 0.19787561
##            country          year      score_wgi_scale        NA_start     
##  Afghanistan   :  17   Min.   :1996   Min.   :-2.490863   Min.   :0.0000  
##  Albania       :  17   1st Qu.:2003   1st Qu.:-0.676018   1st Qu.:0.0000  
##  Algeria       :  17   Median :2007   Median :-0.112269   Median :0.0000  
##  American Samoa:  17   Mean   :2007   Mean   : 0.008306   Mean   :0.2389  
##  Andorra       :  17   3rd Qu.:2011   3rd Qu.: 0.740349   3rd Qu.:0.0000  
##  Angola        :  17   Max.   :2015   Max.   : 1.985402   Max.   :6.0000  
##  (Other)       :3536                                                      
##   NA_post_gf_1         score         
##  Min.   :0.00000   Min.   :0.001827  
##  1st Qu.:0.00000   1st Qu.:0.364797  
##  Median :0.00000   Median :0.477546  
##  Mean   :0.02804   Mean   :0.501661  
##  3rd Qu.:0.00000   3rd Qu.:0.648070  
##  Max.   :3.00000   Max.   :0.897080  
## 
# document gapfilling
d_calcs <- d_calcs %>%
  mutate(gap_fill = NA_start - NA_post_gf_1,   # if there are values in NA_post_gf_1, it means these weren't gapfilled
         gap_fill = ifelse(is.na(score), 0, gap_fill)) %>%
  select(-NA_start, -NA_post_gf_1)



d_calcs[d_calcs$gap_fill>0, ]     
## # A tibble: 192 x 5
##           country  year score_wgi_scale     score gap_fill
##            <fctr> <int>           <dbl>     <dbl>    <dbl>
##  1 American Samoa  1996       0.7170524 0.6434105        6
##  2 American Samoa  1998       0.7170524 0.6434105        6
##  3 American Samoa  2000       0.7170524 0.6434105        6
##  4 American Samoa  2002       0.7170524 0.6434105        6
##  5 American Samoa  2003       0.7170524 0.6434105        6
##  6 American Samoa  2014       0.8731227 0.6746245        1
##  7 American Samoa  2015       0.8914434 0.6782887        1
##  8       Anguilla  1996       1.2247660 0.7449532        6
##  9       Anguilla  1998       1.2247660 0.7449532        6
## 10       Anguilla  2000       1.2247660 0.7449532        6
## # ... with 182 more rows
d_calcs[d_calcs$country == "New Caledonia", ]  # no data, was deleted earlier
## # A tibble: 0 x 5
## # ... with 5 variables: country <fctr>, year <int>, score_wgi_scale <dbl>,
## #   score <dbl>, gap_fill <dbl>
d_calcs[d_calcs$country == "Niue", ] # should have gap-fill values between 0-6
## # A tibble: 17 x 5
##    country  year score_wgi_scale     score gap_fill
##     <fctr> <int>           <dbl>     <dbl>    <dbl>
##  1    Niue  1996      -0.3520579 0.4295884        6
##  2    Niue  1998      -0.3520579 0.4295884        6
##  3    Niue  2000      -0.3520579 0.4295884        6
##  4    Niue  2002      -0.3520579 0.4295884        6
##  5    Niue  2003      -0.3520579 0.4295884        6
##  6    Niue  2004      -0.3520579 0.4295884        6
##  7    Niue  2005      -0.3520579 0.4295884        6
##  8    Niue  2006      -0.3520579 0.4295884        6
##  9    Niue  2007      -0.3520579 0.4295884        6
## 10    Niue  2008      -0.3520579 0.4295884        6
## 11    Niue  2009      -0.3812502 0.4237500        0
## 12    Niue  2010      -0.3347712 0.4330458        0
## 13    Niue  2011      -0.3390155 0.4321969        1
## 14    Niue  2012      -0.3533298 0.4293340        1
## 15    Niue  2013      -0.3519228 0.4296154        1
## 16    Niue  2014      -0.3520579 0.4295884        6
## 17    Niue  2015      -0.3520579 0.4295884        6
## save intermediate file of wgi scores pre-gapfilling (for OHI+ use)
write.csv(d_calcs %>%
            select(country, year, score_wgi_scale, score_ohi_scale = score), 
          file.path('intermediate/wgi_combined_scores_by_country.csv'),
          row.names = FALSE)

7 Convert country names to ohi regions

## We report these regions at a greater spatial resolution:

## Aruba is part of the Netherlands Antilles, but it is reported separately
country_split_1 <- data.frame(country = "Netherlands Antilles", region = c('Bonaire', 'Curacao', 'Saba', 'Sint Maarten', 'Sint Eustatius'))
country_split_2 <- data.frame(country = "Jersey, Channel Islands", region = c('Jersey', 'Guernsey'))
country_split <- rbind(country_split_1, country_split_2)

country_split_data <- country_split %>%
  left_join(d_calcs) %>%
  select(-country) %>%
  rename(country = region)
## Joining, by = "country"
## Warning: Column `country` joining factors with different levels, coercing
## to character vector
d_calcs <- d_calcs %>%
  filter(!(country %in% c("Netherlands Antilles", "Jersey, Channel Islands"))) %>%
  rbind(country_split_data)  %>%
  mutate(country = as.character(country))

d_calcs$country[grep("Korea, Dem.", d_calcs$country)] <- "North Korea"



### Function to convert to OHI region ID
d_calcs_rgn <- name_2_rgn(df_in = d_calcs, 
                       fld_name='country', 
                       flds_unique=c('year'))
## 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
##                           tmp_type
## tmp_name                   disputed landlocked
##   Afghanistan                     0         17
##   Andorra                         0         17
##   Armenia                         0         17
##   Austria                         0         17
##   Belarus                         0         17
##   Bhutan                          0         17
##   Bolivia                         0         17
##   Botswana                        0         17
##   Burkina Faso                    0         17
##   Burundi                         0         17
##   Central African Republic        0         17
##   Chad                            0         17
##   Czech Republic                  0         17
##   Ethiopia                        0         17
##   Hungary                         0         17
##   Kazakhstan                      0         17
##   Kosovo                          0         17
##   Kyrgyz Republic                 0         17
##   Lao PDR                         0         17
##   Lesotho                         0         17
##   Liechtenstein                   0         17
##   Luxembourg                      0         17
##   Macedonia, FYR                  0         17
##   Malawi                          0         17
##   Mali                            0         17
##   Moldova                         0         17
##   Mongolia                        0         17
##   Nepal                           0         17
##   Niger                           0         17
##   Paraguay                        0         17
##   Rwanda                          0         17
##   San Marino                      0         17
##   Serbia                          0         17
##   Slovak Republic                 0         17
##   South Sudan                     0         17
##   Swaziland                       0         17
##   Switzerland                     0         17
##   Tajikistan                      0         17
##   Turkmenistan                    0         17
##   Uganda                          0         17
##   Uzbekistan                      0         17
##   West Bank and Gaza             17          0
##   Zambia                          0         17
##   Zimbabwe                        0         17
## 
## DUPLICATES found. Consider using collapse2rgn to collapse duplicates (function in progress).
## # A tibble: 5 x 1
##                 country
##                   <chr>
## 1                 China
## 2  Hong Kong SAR, China
## 3      Macao SAR, China
## 4           Puerto Rico
## 5 Virgin Islands (U.S.)
### Combine the duplicate regions (we report these at lower resolution)
### In this case, we take the weighted average
population_weights <- data.frame(country = c("Virgin Islands (U.S.)", "Puerto Rico",
                                             "China", "Hong Kong SAR, China", "Macao SAR, China"),
                                 population = c(106405, 3725789,
                                         1339724852, 7071576, 636200))

d_calcs_rgn <- d_calcs_rgn %>%
  left_join(population_weights, by="country") %>%
  mutate(population = ifelse(is.na(population), 1, population)) %>%
  group_by(rgn_id, year) %>%
  summarize(score = weighted.mean(score, population),
            gapfill_within_rgn = weighted.mean(gap_fill, population)) %>%
  ungroup() %>%
  filter(rgn_id <= 250)
## Warning: Column `country` joining character vector and factor, coercing
## into character vector
summary(d_calcs_rgn)
##      rgn_id           year          score          gapfill_within_rgn
##  Min.   :  6.0   Min.   :1996   Min.   :0.001827   Min.   :0.0000    
##  1st Qu.: 65.0   1st Qu.:2003   1st Qu.:0.389852   1st Qu.:0.0000    
##  Median :126.0   Median :2007   Median :0.513044   Median :0.0000    
##  Mean   :124.8   Mean   :2007   Mean   :0.520883   Mean   :0.2982    
##  3rd Qu.:186.0   3rd Qu.:2011   3rd Qu.:0.663986   3rd Qu.:0.0000    
##  Max.   :250.0   Max.   :2015   Max.   :0.897080   Max.   :6.0000

8 Gapfill missing data (part 2)

This gapfilling occurs when we use assign a territorial region the value of their parent country.

## data that describes territories of countries
territory = rgn_master %>% 
  select(rgn_id = rgn_id_2013,
         sov_id) %>%               
  group_by(rgn_id) %>%                  # remove duplicated countries from this rgn_id list                    
  summarize(sov_id = mean(sov_id, na.rm=T)) %>% # duplicates always have the same sov_id (r2 value)
  filter(rgn_id <= 250, rgn_id != 213)

    
## expand to include all years of data
territory <- data.frame(year=yr_start:yr_end) %>% 
  merge(territory, by=NULL) 


# assign territories the values of their country
d_sovs = d_calcs_rgn %>% 
  full_join(territory, by = c('rgn_id', 'year')) %>%
  group_by(sov_id, year) %>%
  mutate(score_gf_territory = mean(score, na.rm=TRUE),
         gapfill_within_rgn = mean(gapfill_within_rgn, na.rm=TRUE))%>%
   filter(!is.na(gapfill_within_rgn)) %>%
  ungroup()

head(d_sovs)
## # A tibble: 6 x 6
##   rgn_id  year     score gapfill_within_rgn sov_id score_gf_territory
##    <int> <int>     <dbl>              <dbl>  <dbl>              <dbl>
## 1      6  1996 0.5140133                  3      6          0.5140133
## 2      6  1998 0.5039277                  0      6          0.5039277
## 3      6  2000 0.5131196                  0      6          0.5131196
## 4      6  2002 0.4764124                  0      6          0.4764124
## 5      6  2003 0.4521804                  0      6          0.4521804
## 6      6  2004 0.4817330                  0      6          0.4817330
summary(d_sovs)
##      rgn_id            year          score        gapfill_within_rgn
##  Min.   :  1.00   Min.   :1996   Min.   :0.0018   Min.   :0.0000    
##  1st Qu.: 58.75   1st Qu.:2003   1st Qu.:0.3899   1st Qu.:0.0000    
##  Median :116.50   Median :2007   Median :0.5130   Median :0.0000    
##  Mean   :117.64   Mean   :2007   Mean   :0.5209   Mean   :0.4015    
##  3rd Qu.:176.25   3rd Qu.:2011   3rd Qu.:0.6640   3rd Qu.:0.0000    
##  Max.   :250.00   Max.   :2015   Max.   :0.8971   Max.   :6.0000    
##                                  NA's   :833                        
##      sov_id       score_gf_territory
##  Min.   :  6.00   Min.   :0.001827  
##  1st Qu.: 73.75   1st Qu.:0.423768  
##  Median :163.00   Median :0.580945  
##  Mean   :133.72   Mean   :0.560252  
##  3rd Qu.:180.00   3rd Qu.:0.695679  
##  Max.   :247.00   Max.   :0.897080  
## 
# format these data
d_gf2 <- d_sovs %>%
  mutate(gapfill_territory = ifelse(is.na(score) & !is.na(score_gf_territory), "territory", "NA")) %>%
  mutate(score = ifelse(is.na(score), score_gf_territory, score)) %>%
  select(rgn_id, year, score, gapfill_within_rgn, gapfill_territory)

# make sure that all regions have a score
regions <- rgn_master %>%
  filter(rgn_typ == "eez") %>%
  filter(rgn_id_2013 <= 250) %>%
  filter(rgn_id_2013 != 213) %>%
  select(rgn_id = rgn_id_2013) %>%
  unique() %>%
  arrange(rgn_id)

d_gf2 <- regions %>%
  left_join(d_gf2)
## Joining, by = "rgn_id"
## check for NA values within "score" variable
## if so, need to gapfill using UN geopolitical regions
summary(d_gf2)
##      rgn_id            year          score          gapfill_within_rgn
##  Min.   :  1.00   Min.   :1996   Min.   :0.001827   Min.   :0.0000    
##  1st Qu.: 58.75   1st Qu.:2003   1st Qu.:0.423459   1st Qu.:0.0000    
##  Median :116.50   Median :2007   Median :0.578439   Median :0.0000    
##  Mean   :117.64   Mean   :2007   Mean   :0.560252   Mean   :0.4015    
##  3rd Qu.:176.25   3rd Qu.:2011   3rd Qu.:0.694920   3rd Qu.:0.0000    
##  Max.   :250.00   Max.   :2015   Max.   :0.897080   Max.   :6.0000    
##  gapfill_territory 
##  Length:3740       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

9 Uninhabited regions

These regions will receive an NA for their score (when established population is < 100 people). We decided it would be better to give uninhabited regions the scores of their administrative countries.

# uninhab <- read.csv('../../../src/LookupTables/rgn_uninhabited_islands.csv') %>%
#   filter(is.na(est_population) | est_population < 100)
# 
# d_gf2 <- d_gf2 %>%
#   mutate(score = ifelse(rgn_id %in% uninhab$rgn_id, NA, score))

10 Check data

Comparing this year’s values against last year’s. These should be the same unless there have been updates to WGI source data or a change to methods. For this year, there was a small change that effected a few territorial regions. In the past, we used the sovereign country value, but in the case, we averaged the sovereign country and the available territorial values. For example,

Also look at top/bottom 10 regions to make sure these seem reasonable.

new2014 <- d_gf2 %>%
  filter(year==2014) %>%
  select(rgn_id, score)

old2014 <- read.csv('../v2016/output/wgi_res_2016.csv') %>%
  select(rgn_id, old_score=resilience_score) %>%
  full_join(new2014)
## Joining, by = "rgn_id"
## should be a 1:1 relationship
plot(old_score ~ score, data=old2014)
abline(0,1, col="red")

## Top/Bottom 10 scorers:

# get region names
regions <- rgn_master %>%
  filter(rgn_typ == "eez") %>%
  filter(rgn_id_2013 <= 250) %>%
  filter(rgn_id_2013 != 213) %>%
  select(rgn_id = rgn_id_2013, rgn_name=rgn_nam_2013) %>%
  unique() %>%
  arrange(rgn_id)


tmp <- d_gf2 %>%
  filter(year==2015) %>%
  arrange(score) %>%
  select(rgn_id, score) %>%
  left_join(regions)
## Joining, by = "rgn_id"
tmp[1:10, ]
##    rgn_id      score                         rgn_name
## 1      44 0.07284121                          Somalia
## 2      77 0.13275811                            Syria
## 3      67 0.13726052                            Libya
## 4      21 0.16035603                      North Korea
## 5      49 0.17821968                            Sudan
## 6      47 0.18152779                            Yemen
## 7      45 0.18735098                          Eritrea
## 8     199 0.19150840 Democratic Republic of the Congo
## 9     192 0.20620900                             Iraq
## 10    139 0.21579751                        Venezuela
tmp[211:220, ]
##     rgn_id     score      rgn_name
## 211    208 0.8207867     Singapore
## 212    218 0.8282229        Canada
## 213    177 0.8312352   Netherlands
## 214    175 0.8438872       Denmark
## 215    222 0.8490212        Sweden
## 216    174 0.8535604       Finland
## 217    105 0.8543166 Bouvet Island
## 218    144 0.8543166     Jan Mayen
## 219    223 0.8543166        Norway
## 220    162 0.8738509   New Zealand
hist(tmp$score)

11 Save the data

Gapfilling and score data are saved for each scenario year.

  tmp_data_res <- d_gf2 %>%
    select(rgn_id, year, resilience_score = score)
  write.csv(tmp_data_res, "output/wgi_res.csv", row.names=FALSE)
  
  tmp_data_prs <- d_gf2 %>%
    mutate(score = 1 - score) %>%
    select(rgn_id, year, pressure_score = score)
  write.csv(tmp_data_prs, "output/wgi_prs.csv", row.names=FALSE)
  
  tmp_gf <- d_gf2 %>%
    select(rgn_id, year, gapfill_within_rgn, gapfill_territory)
  write.csv(tmp_gf, "output/wgi_gf.csv", row.names=FALSE)