1 Summary
2 Updates from previous assessment
3 Data Source
4 Obtain the WGI data
5 Gapfill missing data (part 1)
6 Calculate overall WGI score for each country
7 Convert country names to ohi regions
8 Gapfill missing data (part 2)
9 Uninhabited regions
10 Check data
11 Save the data

[REFERENCE RMD FILE: https://cdn.rawgit.com/OHI-Science/ohiprep/master/globalprep/np/v2016/WGI_dataprep.html]

1 Summary

This script downloads WGI data and prepares it for a pressures (1 - WGI) and resilience data layer.

2 Updates from previous assessment

I made a slight change to the methods that changed the score of a few territories. Previously, territories without scores received the same score as their administrative country. Now territories receive the average value of their administrative country and the other territorial countries (of the same administrative country) that have scores (the WGI data includes scores for many territorial countries).

For example, in the past, Tokelau (territory of New Zealand) would have received New Zealand’s score. But now, it is the average of New Zealand and the territories: Cook Islands and Niue.

3 Data Source

Reference: http://info.worldbank.org/governance/wgi/index.aspx#home

Downloaded: Sep 1 2016

Description:
The Worldwide Governance Indicators (WGI) project reports aggregate and individual governance indicators for 215 economies over the period 1996–2014, for six dimensions of governance:

Voice and Accountability
Political Stability and Absence of Violence
Government Effectiveness
Regulatory Quality
Rule of Law
Control of Corruption

Time range: 1996-2014

library(ohicore) # devtools::install_github('ohi-science/ohicore@dev')
library(tools)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
devtools::install_github("hadley/lazyeval", build_vignettes = FALSE)

## Skipping install for github remote, the SHA1 (c155c3d5) has not changed since last install.
##   Use `force = TRUE` to force installation

devtools::install_github("rstudio/ggvis", build_vignettes = FALSE)

## Skipping install for github remote, the SHA1 (d9cbbf5d) has not changed since last install.
##   Use `force = TRUE` to force installation

library(testthat) # install.packages('testthat')

## 
## Attaching package: 'testthat'

## The following object is masked from 'package:dplyr':
## 
##     matches

library(WDI) # install.packages('WDI')

## Loading required package: RJSONIO

library(stringr)

# comment out when knitting:
# setwd('globalprep/prs_res_wgi/v2016')

# check website to see what years are available: http://info.worldbank.org/governance/wgi/index.aspx#home
yr_start = 1996
yr_end   = 2014

4 Obtain the WGI data

Download each of the 6 WGI indicators:

## access data ----

## get description of variables:
indicators <-  data.frame(WDI_data[[1]])
indicators[grep("VA.EST", indicators$indicator), ]

##      indicator                               name
## 6623    VA.EST Voice and Accountability: Estimate
##                                                                                                                                                                                                                       description
## 6623 Voice and Accountability captures perceptions of the extent to which a country's citizens are able to participate in selecting their government, as well as freedom of expression, freedom of association, and a free media.
##                       sourceDatabase
## 6623 Worldwide Governance Indicators
##                                   sourceOrganization
## 6623 Worldwide Governance Indicators, The World Bank

indicators[grep("PV.EST", indicators$indicator), ]

##      indicator
## 4569    PV.EST
##                                                                 name
## 4569 Political Stability and Absence of Violence/Terrorism: Estimate
##                                                                                                                                                                                                                                            description
## 4569 Political Stability and Absence of Violence/Terrorism captures perceptions of the likelihood that the government will be destabilized or overthrown by unconstitutional or violent means, including politically-motivated violence and terrorism.
##                       sourceDatabase
## 4569 Worldwide Governance Indicators
##                                   sourceOrganization
## 4569 Worldwide Governance Indicators, The World Bank

indicators[grep("GE.EST", indicators$indicator), ]

##      indicator                               name
## 2515    GE.EST Government Effectiveness: Estimate
##                                                                                                                                                                                                                                                                                                    description
## 2515 Government Effectiveness captures perceptions of the quality of public services, the quality of the civil service and the degree of its independence from political pressures, the quality of policy formulation and implementation, and the credibility of the government's commitment to such policies.
##                       sourceDatabase
## 2515 Worldwide Governance Indicators
##                                   sourceOrganization
## 2515 Worldwide Governance Indicators, The World Bank

indicators[grep("RQ.EST", indicators$indicator), ]

##      indicator                         name
## 4625    RQ.EST Regulatory Quality: Estimate
##                                                                                                                                                                                 description
## 4625 Regulatory Quality captures perceptions of the ability of the government to formulate and implement sound policies and regulations that permit and promote private sector development.
##                       sourceDatabase
## 4625 Worldwide Governance Indicators
##                                   sourceOrganization
## 4625 Worldwide Governance Indicators, The World Bank

indicators[grep("RL.EST", indicators$indicator), ]

##      indicator                  name
## 4621    RL.EST Rule of Law: Estimate
##                                                                                                                                                                                                                                                                    description
## 4621 Rule of Law captures perceptions of the extent to which agents have confidence in and abide by the rules of society, and in particular the quality of contract enforcement, property rights, the police, and the courts, as well as the likelihood of crime and violence.
##                       sourceDatabase
## 4621 Worldwide Governance Indicators
##                                   sourceOrganization
## 4621 Worldwide Governance Indicators, The World Bank

indicators[grep("CC.EST", indicators$indicator), ]

##     indicator                            name
## 689    CC.EST Control of Corruption: Estimate
##                                                                                                                                                                                                                                                                                                                                                                                          description
## 689 Control of Corruption captures perceptions of the extent to which public power is exercised for private gain, including both petty and grand forms of corruption, as well as ""capture"" of the state by elites and private interests. \nEstimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.
##                      sourceDatabase
## 689 Worldwide Governance Indicators
##                                  sourceOrganization
## 689 Worldwide Governance Indicators, The World Bank

# identify the six indicators
# WDIsearch('violence')# general search
key_voice = WDI(
  WDIsearch('Voice and Accountability: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_polst = WDI(
  WDIsearch('Political Stability and Absence of Violence/Terrorism: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_gvtef = WDI(
  WDIsearch('Government Effectiveness: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_regqt = WDI(
  WDIsearch('Regulatory Quality: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_rolaw = WDI(
  WDIsearch('Rule of Law: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

key_corrp = WDI(
  WDIsearch('Control of Corruption: Estimate', field='name')['indicator'],
  country='all',start = yr_start, end=yr_end)

Combine the indicators.

d = key_voice %>% 
  select(country, year, VA.EST) %>%
  left_join(key_polst %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_gvtef %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_regqt %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_rolaw %>% select(-iso2c), by=(c('country', 'year'))) %>%
  left_join(key_corrp %>% select(-iso2c), by=(c('country', 'year'))); head(d); summary(d); sapply(d, class)

##    country year    VA.EST    PV.EST    GE.EST    RQ.EST     RL.EST
## 1 Anguilla 2014        NA 1.2657502 0.9215814 0.8546115 0.01965328
## 2 Anguilla 2013 1.0442151 1.5939672 1.5268645 1.3093387 1.40268481
## 3 Anguilla 2012 1.0595049 1.5323299 1.5167528 1.3093333 1.39836526
## 4 Anguilla 2011 1.0474353 1.6078939 1.4966693 1.3521394 1.39194965
## 5 Anguilla 2010 1.0239582 1.4362270 1.5125704 1.3681173 1.42292643
## 6 Anguilla 2009 0.9956121 0.9201398 1.5215497 1.3819522 1.43941677
##     CC.EST
## 1 1.251688
## 2 1.285754
## 3 1.294390
## 4 1.307218
## 5 1.333021
## 6 1.345137

##    country               year          VA.EST             PV.EST        
##  Length:3440        Min.   :1996   Min.   :-2.28428   Min.   :-3.32390  
##  Class :character   1st Qu.:2003   1st Qu.:-0.84346   1st Qu.:-0.68844  
##  Mode  :character   Median :2006   Median : 0.01163   Median : 0.09325  
##                     Mean   :2006   Mean   :-0.00710   Mean   :-0.02161  
##                     3rd Qu.:2010   3rd Qu.: 0.91413   3rd Qu.: 0.83354  
##                     Max.   :2014   Max.   : 1.82637   Max.   : 1.93844  
##                                    NA's   :123        NA's   :170       
##      GE.EST            RQ.EST             RL.EST        
##  Min.   :-2.4797   Min.   :-2.67544   Min.   :-2.66887  
##  1st Qu.:-0.7509   1st Qu.:-0.71112   1st Qu.:-0.79960  
##  Median :-0.1668   Median :-0.11552   Median :-0.15051  
##  Mean   :-0.0060   Mean   :-0.00644   Mean   :-0.00934  
##  3rd Qu.: 0.7414   3rd Qu.: 0.79852   3rd Qu.: 0.81489  
##  Max.   : 2.4297   Max.   : 2.24735   Max.   : 2.12056  
##  NA's   :182       NA's   :182        NA's   :117       
##      CC.EST        
##  Min.   :-2.05746  
##  1st Qu.:-0.77325  
##  Median :-0.24912  
##  Mean   :-0.00509  
##  3rd Qu.: 0.70831  
##  Max.   : 2.58562  
##  NA's   :176

##     country        year      VA.EST      PV.EST      GE.EST      RQ.EST 
## "character"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##      RL.EST      CC.EST 
##   "numeric"   "numeric"

# archived record of raw data: write.csv(d, file.path('raw', 'worldbank_wgi_from_wdi_api.csv'), row.names=F)

5 Gapfill missing data (part 1)

The first gapfilling occurs when we use the average of previous years data for each country/indicator. This occurs when a country has data, but not for all years.

Countries without 3 or more indicators are cut.

d <- read.csv('raw/worldbank_wgi_from_wdi_api.csv')

d <- gather(d, "indicator", "value", VA.EST:CC.EST)

d_gap_fill  <- d %>%
  group_by(country, year) %>%
  mutate(NA_count_c_y = sum(is.na(value))) %>%  # get count of NA values
  ungroup() %>%
  group_by(country, indicator) %>%              # this section gap-fills with the mean of values across years within the same region/indicator
  mutate(ind_mean_c_i = mean(value, na.rm=TRUE)) %>%
  ungroup() %>%
  mutate(value = ifelse(is.na(value), ind_mean_c_i, value)) %>%
  group_by(country, year) %>%
  mutate(NA_count_post_gf1 = sum(is.na(value)))     #count NA values after last gap-fill


## get list of countries with no data:
countries_no_data <- d_gap_fill %>%
  filter(NA_count_post_gf1 > 3)

countries_no_data <- unique(countries_no_data$country)
countries_no_data

## [1] New Caledonia
## 215 Levels: Afghanistan Albania Algeria American Samoa Andorra ... Zimbabwe

# In this case, the countries with minimal data (< 3 indicators ever calculated) have sovereign countries.  
# These will be gap-filled later on if they are deleted now.
d_gap_fill <- d_gap_fill %>%
  filter(!(country %in% countries_no_data))

6 Calculate overall WGI score for each country

This involves: * taking the average of the 6 indicators (assuming there are at least 4 of the 6 indicators) * rescaling the data from 0 to 1

d_calcs  <-  d_gap_fill %>%
  group_by(country, year) %>%
  summarize(score_wgi_scale = mean(value, na.rm=T),
            NA_start = mean(NA_count_c_y),
            NA_post_gf_1 = mean(NA_count_post_gf1)) %>%
  ungroup() %>%
  mutate(score_wgi_scale = ifelse(NA_post_gf_1 > 3, NA, score_wgi_scale))

wgi_range = c(-2.5, 2.5)

d_calcs <- d_calcs %>%
  mutate(score =  (score_wgi_scale - wgi_range[1]) / (wgi_range[2] - wgi_range[1])) %>%
  ungroup(); head(d_calcs); summary(d_calcs)

## # A tibble: 6 × 6
##       country  year score_wgi_scale NA_start NA_post_gf_1      score
##        <fctr> <int>           <dbl>    <dbl>        <dbl>      <dbl>
## 1 Afghanistan  1996       -2.070547        0            0 0.08589069
## 2 Afghanistan  1998       -2.096064        0            0 0.08078728
## 3 Afghanistan  2000       -2.123715        0            0 0.07525693
## 4 Afghanistan  2002       -1.748490        0            0 0.15030208
## 5 Afghanistan  2003       -1.571232        0            0 0.18575368
## 6 Afghanistan  2004       -1.510618        0            0 0.19787637

##            country          year      score_wgi_scale        NA_start     
##  Afghanistan   :  16   Min.   :1996   Min.   :-2.491035   Min.   :0.0000  
##  Albania       :  16   1st Qu.:2003   1st Qu.:-0.675191   1st Qu.:0.0000  
##  Algeria       :  16   Median :2006   Median :-0.111597   Median :0.0000  
##  American Samoa:  16   Mean   :2006   Mean   : 0.008279   Mean   :0.2512  
##  Andorra       :  16   3rd Qu.:2010   3rd Qu.: 0.740940   3rd Qu.:0.0000  
##  Angola        :  16   Max.   :2014   Max.   : 1.985394   Max.   :6.0000  
##  (Other)       :3328                                                      
##   NA_post_gf_1         score         
##  Min.   :0.00000   Min.   :0.001793  
##  1st Qu.:0.00000   1st Qu.:0.364962  
##  Median :0.00000   Median :0.477681  
##  Mean   :0.02804   Mean   :0.501656  
##  3rd Qu.:0.00000   3rd Qu.:0.648188  
##  Max.   :3.00000   Max.   :0.897079  
##

# document gapfilling
d_calcs <- d_calcs %>%
  mutate(gap_fill = NA_start - NA_post_gf_1,
         gap_fill = ifelse(is.na(score), 0, gap_fill)) %>%
  select(-NA_start, -NA_post_gf_1)



d_calcs[d_calcs$gap_fill>0, ]

## # A tibble: 183 × 5
##           country  year score_wgi_scale     score gap_fill
##            <fctr> <int>           <dbl>     <dbl>    <dbl>
## 1  American Samoa  1996       0.7001840 0.6400368        6
## 2  American Samoa  1998       0.7001840 0.6400368        6
## 3  American Samoa  2000       0.7001840 0.6400368        6
## 4  American Samoa  2002       0.7001840 0.6400368        6
## 5  American Samoa  2003       0.7001840 0.6400368        6
## 6  American Samoa  2014       0.8753184 0.6750637        1
## 7        Anguilla  1996       1.2532663 0.7506533        6
## 8        Anguilla  1998       1.2532663 0.7506533        6
## 9        Anguilla  2000       1.2532663 0.7506533        6
## 10       Anguilla  2002       1.2532663 0.7506533        6
## # ... with 173 more rows

d_calcs[d_calcs$country == "New Caledonia", ]  # no data, was deleted earlier

## # A tibble: 0 × 5
## # ... with 5 variables: country <fctr>, year <int>, score_wgi_scale <dbl>,
## #   score <dbl>, gap_fill <dbl>

d_calcs[d_calcs$country == "Niue", ] # should have gap-fill values between 0-6

## # A tibble: 16 × 5
##    country  year score_wgi_scale     score gap_fill
##     <fctr> <int>           <dbl>     <dbl>    <dbl>
## 1     Niue  1996      -0.3543879 0.4291224        6
## 2     Niue  1998      -0.3543879 0.4291224        6
## 3     Niue  2000      -0.3543879 0.4291224        6
## 4     Niue  2002      -0.3543879 0.4291224        6
## 5     Niue  2003      -0.3543879 0.4291224        6
## 6     Niue  2004      -0.3543879 0.4291224        6
## 7     Niue  2005      -0.3543879 0.4291224        6
## 8     Niue  2006      -0.3543879 0.4291224        6
## 9     Niue  2007      -0.3543879 0.4291224        6
## 10    Niue  2008      -0.3543879 0.4291224        6
## 11    Niue  2009      -0.3762533 0.4247493        0
## 12    Niue  2010      -0.3341911 0.4331618        0
## 13    Niue  2011      -0.3527193 0.4294561        1
## 14    Niue  2012      -0.3543879 0.4291224        6
## 15    Niue  2013      -0.3543879 0.4291224        6
## 16    Niue  2014      -0.3543879 0.4291224        6

## save intermediate file of wgi scores pre-gapfilling (for OHI+ use)
write.csv(d_calcs %>%
            select(country, year, score_wgi_scale, score_ohi_scale = score), 
          file.path('intermediate/wgi_combined_scores_by_country.csv'),
          row.names = FALSE)

7 Convert country names to ohi regions

## We report these regions at a greater spatial resolution:

## Aruba is part of the Netherlands Antilles, but it is reported separately
country_split_1 <- data.frame(country = "Netherlands Antilles", region = c('Bonaire', 'Curacao', 'Saba', 'Sint Maarten', 'Sint Eustatius'))
country_split_2 <- data.frame(country = "Jersey, Channel Islands", region = c('Jersey', 'Guernsey'))
country_split <- rbind(country_split_1, country_split_2)

country_split_data <- country_split %>%
  left_join(d_calcs) %>%
  select(-country) %>%
  rename(country = region)

## Joining, by = "country"

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector

d_calcs <- d_calcs %>%
  filter(!(country %in% c("Netherlands Antilles", "Jersey, Channel Islands"))) %>%
  rbind(country_split_data)         

### Function to convert to OHI region ID
d_calcs_rgn <- name_2_rgn(df_in = d_calcs, 
                       fld_name='country', 
                       flds_unique=c('year'))

## 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
##                           tmp_type
## tmp_name                   disputed landlocked
##   Afghanistan                     0         16
##   Andorra                         0         16
##   Armenia                         0         16
##   Austria                         0         16
##   Belarus                         0         16
##   Bhutan                          0         16
##   Bolivia                         0         16
##   Botswana                        0         16
##   Burkina Faso                    0         16
##   Burundi                         0         16
##   Central African Republic        0         16
##   Chad                            0         16
##   Czech Republic                  0         16
##   Ethiopia                        0         16
##   Hungary                         0         16
##   Kazakhstan                      0         16
##   Kosovo                          0         16
##   Kyrgyz Republic                 0         16
##   Lao PDR                         0         16
##   Lesotho                         0         16
##   Liechtenstein                   0         16
##   Luxembourg                      0         16
##   Macedonia, FYR                  0         16
##   Malawi                          0         16
##   Mali                            0         16
##   Moldova                         0         16
##   Mongolia                        0         16
##   Nepal                           0         16
##   Niger                           0         16
##   Paraguay                        0         16
##   Rwanda                          0         16
##   San Marino                      0         16
##   Serbia                          0         16
##   Slovak Republic                 0         16
##   South Sudan                     0         16
##   Swaziland                       0         16
##   Switzerland                     0         16
##   Tajikistan                      0         16
##   Turkmenistan                    0         16
##   Uganda                          0         16
##   Uzbekistan                      0         16
##   West Bank and Gaza             16          0
##   Zambia                          0         16
##   Zimbabwe                        0         16

## 
## DUPLICATES found. Consider using collapse2rgn to collapse duplicates (function in progress).

## # A tibble: 5 × 1
##                 country
##                  <fctr>
## 1                 China
## 2  Hong Kong SAR, China
## 3      Macao SAR, China
## 4           Puerto Rico
## 5 Virgin Islands (U.S.)

### Combine the duplicate regions (we report these at lower resolution)
### In this case, we take the weighted average
population_weights <- read.csv('../../../../ohiprep/src/LookupTables/Pop_weight_ChinaSAR_USVIslPRico.csv')

d_calcs_rgn <- d_calcs_rgn %>%
  left_join(population_weights, by="country") %>%
  mutate(population = ifelse(is.na(population), 1, population)) %>%
  group_by(rgn_id, year) %>%
  summarize(score = weighted.mean(score, population),
            gapfill_within_rgn = weighted.mean(gap_fill, population)) %>%
  ungroup() %>%
  filter(rgn_id <= 250)

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector

summary(d_calcs_rgn)

##      rgn_id           year          score          gapfill_within_rgn
##  Min.   :  6.0   Min.   :1996   Min.   :0.001793   Min.   :0.0000    
##  1st Qu.: 65.0   1st Qu.:2003   1st Qu.:0.390168   1st Qu.:0.0000    
##  Median :126.0   Median :2006   Median :0.513722   Median :0.0000    
##  Mean   :124.8   Mean   :2006   Mean   :0.521058   Mean   :0.3073    
##  3rd Qu.:186.0   3rd Qu.:2010   3rd Qu.:0.663718   3rd Qu.:0.0000    
##  Max.   :250.0   Max.   :2014   Max.   :0.897079   Max.   :6.0000

8 Gapfill missing data (part 2)

This gapfilling occurs when we use assign a territorial region the value of their parent country.

## data that describes territories of countries
territory = rgn_master %>% 
  select(rgn_id = rgn_id_2013,
         sov_id) %>%               
  group_by(rgn_id) %>%                  # remove duplicated countries from this rgn_id list                    
  summarize(sov_id = mean(sov_id, na.rm=T)) %>% # duplicates always have the same sov_id (r2 value)
  filter(rgn_id <= 250, rgn_id != 213)

    
## expand to include all years of data
territory <- data.frame(year=yr_start:yr_end) %>% 
  merge(territory, by=NULL) 


# assign territories the values of their country
d_sovs = d_calcs_rgn %>% 
  full_join(territory, by = c('rgn_id', 'year')) %>%
  group_by(sov_id, year) %>%
  mutate(score_gf_territory = mean(score, na.rm=TRUE),
         gapfill_within_rgn = mean(gapfill_within_rgn, na.rm=TRUE))%>%
   filter(!is.na(gapfill_within_rgn)) %>%
  ungroup()

head(d_sovs)

## # A tibble: 6 × 6
##   rgn_id  year     score gapfill_within_rgn sov_id score_gf_territory
##    <int> <int>     <dbl>              <dbl>  <dbl>              <dbl>
## 1      6  1996 0.5147575                  3      6          0.5147575
## 2      6  1998 0.5040379                  0      6          0.5040379
## 3      6  2000 0.5132357                  0      6          0.5132357
## 4      6  2002 0.4765204                  0      6          0.4765204
## 5      6  2003 0.4522701                  0      6          0.4522701
## 6      6  2004 0.4817330                  0      6          0.4817330

summary(d_sovs)

##      rgn_id            year          score        gapfill_within_rgn
##  Min.   :  1.00   Min.   :1996   Min.   :0.0018   Min.   :0.000     
##  1st Qu.: 58.75   1st Qu.:2003   1st Qu.:0.3902   1st Qu.:0.000     
##  Median :116.50   Median :2006   Median :0.5137   Median :0.000     
##  Mean   :117.64   Mean   :2006   Mean   :0.5211   Mean   :0.416     
##  3rd Qu.:176.25   3rd Qu.:2010   3rd Qu.:0.6637   3rd Qu.:0.000     
##  Max.   :250.00   Max.   :2014   Max.   :0.8971   Max.   :6.000     
##                                  NA's   :784                        
##      sov_id       score_gf_territory
##  Min.   :  6.00   Min.   :0.001793  
##  1st Qu.: 73.75   1st Qu.:0.422824  
##  Median :163.00   Median :0.583356  
##  Mean   :133.72   Mean   :0.560418  
##  3rd Qu.:180.00   3rd Qu.:0.695723  
##  Max.   :247.00   Max.   :0.897079  
##

# format these data
d_gf2 <- d_sovs %>%
  mutate(gapfill_territory = ifelse(is.na(score) & !is.na(score_gf_territory), "territory", "NA")) %>%
  mutate(score = ifelse(is.na(score), score_gf_territory, score)) %>%
  select(rgn_id, year, score, gapfill_within_rgn, gapfill_territory)

# make sure that all regions have a score
regions <- rgn_master %>%
  filter(rgn_typ == "eez") %>%
  filter(rgn_id_2013 <= 250) %>%
  filter(rgn_id_2013 != 213) %>%
  select(rgn_id = rgn_id_2013) %>%
  unique() %>%
  arrange(rgn_id)

d_gf2 <- regions %>%
  left_join(d_gf2)

## Joining, by = "rgn_id"

## check for NA values within "score" variable
## if so, need to gapfill using UN geopolitical regions
summary(d_gf2)

##      rgn_id            year          score          gapfill_within_rgn
##  Min.   :  1.00   Min.   :1996   Min.   :0.001793   Min.   :0.000     
##  1st Qu.: 58.75   1st Qu.:2003   1st Qu.:0.422824   1st Qu.:0.000     
##  Median :116.50   Median :2006   Median :0.579226   Median :0.000     
##  Mean   :117.64   Mean   :2006   Mean   :0.560418   Mean   :0.416     
##  3rd Qu.:176.25   3rd Qu.:2010   3rd Qu.:0.694868   3rd Qu.:0.000     
##  Max.   :250.00   Max.   :2014   Max.   :0.897079   Max.   :6.000     
##  gapfill_territory 
##  Length:3520       
##  Class :character  
##  Mode  :character  
##                    
##                    
##

9 Uninhabited regions

These regions will receive an NA for their score (when established population is < 100 people). We decided it would be better to give uninhabited regions the scores of their administrative countries.

# uninhab <- read.csv('../../../src/LookupTables/rgn_uninhabited_islands.csv') %>%
#   filter(is.na(est_population) | est_population < 100)
# 
# d_gf2 <- d_gf2 %>%
#   mutate(score = ifelse(rgn_id %in% uninhab$rgn_id, NA, score))

10 Check data

Comparing this year’s values against last year’s. These should be the same unless there have been updates to WGI source data or a change to methods. For this year, there was a small change that effected a few territorial regions. In the past, we used the sovereign country value, but in the case, we averaged the sovereign country and the available territorial values. For example,

Also look at top/bottom 10 regions to make sure these seem reasonable.

new2013 <- d_gf2 %>%
  filter(year==2013) %>%
  select(rgn_id, score)

old2013 <- read.csv('../v2015/data/rgn_wb_wgi_2015a.csv') %>%
  select(rgn_id, old_score=score) %>%
  full_join(new2013)

## Joining, by = "rgn_id"

## should be a 1:1 relationship
plot(old_score ~ score, data=old2013)
abline(0,1, col="red")

## check on outliers
# Tokelau (rgn 156) went from 0.86 to 0.61...why?
# Now the score is the average of the country and the territorial regions (rather than just the sovereign country): which I think makes sense
filter(new2013, rgn_id %in% c(153, 154, 162))

##   rgn_id     score
## 1    153 0.5363623
## 2    154 0.4291224
## 3    162 0.8615412

(0.536+0.429+0.862)/3

## [1] 0.609

# Region 159, Johnston Atoll went from 74 to 66
filter(new2013, rgn_id %in% c(13, 116, 151, 163)) # these are used to obtain the values for the below regions

##   rgn_id     score
## 1     13 0.6392937
## 2    116 0.6118242
## 3    151 0.6450472
## 4    163 0.7427717

filter(new2013, rgn_id %in% c(12, 149, 150, 158, 159))

##   rgn_id     score
## 1     12 0.6597342
## 2    149 0.6597342
## 3    150 0.6597342
## 4    158 0.6597342
## 5    159 0.6597342

## Top/Bottom 10 scorers:

# get region names
regions <- rgn_master %>%
  filter(rgn_typ == "eez") %>%
  filter(rgn_id_2013 <= 250) %>%
  filter(rgn_id_2013 != 213) %>%
  select(rgn_id = rgn_id_2013, rgn_name=rgn_nam_2013) %>%
  unique() %>%
  arrange(rgn_id)


tmp <- d_gf2 %>%
  filter(year==2014) %>%
  arrange(score) %>%
  select(rgn_id, score) %>%
  left_join(regions)

## Joining, by = "rgn_id"

tmp[1:10, ]

##    rgn_id      score                         rgn_name
## 1      44 0.05694874                          Somalia
## 2      77 0.14799651                            Syria
## 3      67 0.15209258                            Libya
## 4      21 0.16797498                      North Korea
## 5      49 0.17737404                            Sudan
## 6     199 0.19203930 Democratic Republic of the Congo
## 7      45 0.20475236                          Eritrea
## 8      47 0.20550008                            Yemen
## 9     192 0.20793057                             Iraq
## 10    104 0.21624447                Equatorial Guinea

tmp[211:220, ]

##     rgn_id     score                   rgn_name
## 211     94 0.8238142 Heard and McDonald Islands
## 212    218 0.8302989                     Canada
## 213    177 0.8403218                Netherlands
## 214    175 0.8456596                    Denmark
## 215    222 0.8476865                     Sweden
## 216    105 0.8520749              Bouvet Island
## 217    144 0.8520749                  Jan Mayen
## 218    223 0.8520749                     Norway
## 219    174 0.8690655                    Finland
## 220    162 0.8736622                New Zealand

hist(tmp$score)

11 Save the data

Gapfilling and score data are saved for each scenario year.

for(data_year in (yr_end-4):yr_end){ # data_year=yr_end
  
  save_year = data_year + 2
  
  tmp <- d_gf2[d_gf2$year == data_year, ] 
  
  tmp_data_res <- tmp %>%
    select(rgn_id, resilience_score = score)
  write.csv(tmp_data_res, sprintf("output/wgi_res_%s.csv", save_year), row.names=FALSE)
  
  tmp_data_prs <- tmp %>%
    mutate(score = 1 - score) %>%
    select(rgn_id, pressure_score = score)
  write.csv(tmp_data_prs, sprintf("output/wgi_prs_%s.csv", save_year), row.names=FALSE)
  
  tmp_gf <- tmp %>%
    select(rgn_id, gapfill_within_rgn, gapfill_territory)
  write.csv(tmp_gf, sprintf("output/wgi_gf_%s.csv", save_year), row.names=FALSE)
  
}

OHI: World Governance Indicator

Compiled on Wed Sep 7 10:38:57 2016 by frazier