[REFERENCE RMD FILE: https://cdn.rawgit.com/OHI-Science/ohiprep/master/globalprep/np/v2016/WGI_dataprep.html]
This script downloads WGI data and prepares it for a pressures (1 - WGI) and resilience data layer.
I made a slight change to the methods that changed the score of a few territories. Previously, territories without scores received the same score as their administrative country. Now territories receive the average value of their administrative country and the other territorial countries (of the same administrative country) that have scores (the WGI data includes scores for many territorial countries).
For example, in the past, Tokelau (territory of New Zealand) would have received New Zealand’s score. But now, it is the average of New Zealand and the territories: Cook Islands and Niue.
Reference: http://info.worldbank.org/governance/wgi/index.aspx#home
Downloaded: Sep 1 2016
Description:
The Worldwide Governance Indicators (WGI) project reports aggregate and individual governance indicators for 215 economies over the period 1996–2014, for six dimensions of governance:
Time range: 1996-2014
library(ohicore) # devtools::install_github('ohi-science/ohicore@dev')
library(tools)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
devtools::install_github("hadley/lazyeval", build_vignettes = FALSE)
## Skipping install for github remote, the SHA1 (c155c3d5) has not changed since last install.
## Use `force = TRUE` to force installation
devtools::install_github("rstudio/ggvis", build_vignettes = FALSE)
## Skipping install for github remote, the SHA1 (d9cbbf5d) has not changed since last install.
## Use `force = TRUE` to force installation
library(testthat) # install.packages('testthat')
##
## Attaching package: 'testthat'
## The following object is masked from 'package:dplyr':
##
## matches
library(WDI) # install.packages('WDI')
## Loading required package: RJSONIO
library(stringr)
# comment out when knitting:
# setwd('globalprep/prs_res_wgi/v2016')
# check website to see what years are available: http://info.worldbank.org/governance/wgi/index.aspx#home
yr_start = 1996
yr_end = 2014
Download each of the 6 WGI indicators:
## access data ----
## get description of variables:
indicators <- data.frame(WDI_data[[1]])
indicators[grep("VA.EST", indicators$indicator), ]
## indicator name
## 6623 VA.EST Voice and Accountability: Estimate
## description
## 6623 Voice and Accountability captures perceptions of the extent to which a country's citizens are able to participate in selecting their government, as well as freedom of expression, freedom of association, and a free media.
## sourceDatabase
## 6623 Worldwide Governance Indicators
## sourceOrganization
## 6623 Worldwide Governance Indicators, The World Bank
indicators[grep("PV.EST", indicators$indicator), ]
## indicator
## 4569 PV.EST
## name
## 4569 Political Stability and Absence of Violence/Terrorism: Estimate
## description
## 4569 Political Stability and Absence of Violence/Terrorism captures perceptions of the likelihood that the government will be destabilized or overthrown by unconstitutional or violent means, including politically-motivated violence and terrorism.
## sourceDatabase
## 4569 Worldwide Governance Indicators
## sourceOrganization
## 4569 Worldwide Governance Indicators, The World Bank
indicators[grep("GE.EST", indicators$indicator), ]
## indicator name
## 2515 GE.EST Government Effectiveness: Estimate
## description
## 2515 Government Effectiveness captures perceptions of the quality of public services, the quality of the civil service and the degree of its independence from political pressures, the quality of policy formulation and implementation, and the credibility of the government's commitment to such policies.
## sourceDatabase
## 2515 Worldwide Governance Indicators
## sourceOrganization
## 2515 Worldwide Governance Indicators, The World Bank
indicators[grep("RQ.EST", indicators$indicator), ]
## indicator name
## 4625 RQ.EST Regulatory Quality: Estimate
## description
## 4625 Regulatory Quality captures perceptions of the ability of the government to formulate and implement sound policies and regulations that permit and promote private sector development.
## sourceDatabase
## 4625 Worldwide Governance Indicators
## sourceOrganization
## 4625 Worldwide Governance Indicators, The World Bank
indicators[grep("RL.EST", indicators$indicator), ]
## indicator name
## 4621 RL.EST Rule of Law: Estimate
## description
## 4621 Rule of Law captures perceptions of the extent to which agents have confidence in and abide by the rules of society, and in particular the quality of contract enforcement, property rights, the police, and the courts, as well as the likelihood of crime and violence.
## sourceDatabase
## 4621 Worldwide Governance Indicators
## sourceOrganization
## 4621 Worldwide Governance Indicators, The World Bank
indicators[grep("CC.EST", indicators$indicator), ]
## indicator name
## 689 CC.EST Control of Corruption: Estimate
## description
## 689 Control of Corruption captures perceptions of the extent to which public power is exercised for private gain, including both petty and grand forms of corruption, as well as ""capture"" of the state by elites and private interests. \nEstimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.
## sourceDatabase
## 689 Worldwide Governance Indicators
## sourceOrganization
## 689 Worldwide Governance Indicators, The World Bank
# identify the six indicators
# WDIsearch('violence')# general search
key_voice = WDI(
WDIsearch('Voice and Accountability: Estimate', field='name')['indicator'],
country='all',start = yr_start, end=yr_end)
key_polst = WDI(
WDIsearch('Political Stability and Absence of Violence/Terrorism: Estimate', field='name')['indicator'],
country='all',start = yr_start, end=yr_end)
key_gvtef = WDI(
WDIsearch('Government Effectiveness: Estimate', field='name')['indicator'],
country='all',start = yr_start, end=yr_end)
key_regqt = WDI(
WDIsearch('Regulatory Quality: Estimate', field='name')['indicator'],
country='all',start = yr_start, end=yr_end)
key_rolaw = WDI(
WDIsearch('Rule of Law: Estimate', field='name')['indicator'],
country='all',start = yr_start, end=yr_end)
key_corrp = WDI(
WDIsearch('Control of Corruption: Estimate', field='name')['indicator'],
country='all',start = yr_start, end=yr_end)
Combine the indicators.
d = key_voice %>%
select(country, year, VA.EST) %>%
left_join(key_polst %>% select(-iso2c), by=(c('country', 'year'))) %>%
left_join(key_gvtef %>% select(-iso2c), by=(c('country', 'year'))) %>%
left_join(key_regqt %>% select(-iso2c), by=(c('country', 'year'))) %>%
left_join(key_rolaw %>% select(-iso2c), by=(c('country', 'year'))) %>%
left_join(key_corrp %>% select(-iso2c), by=(c('country', 'year'))); head(d); summary(d); sapply(d, class)
## country year VA.EST PV.EST GE.EST RQ.EST RL.EST
## 1 Anguilla 2014 NA 1.2657502 0.9215814 0.8546115 0.01965328
## 2 Anguilla 2013 1.0442151 1.5939672 1.5268645 1.3093387 1.40268481
## 3 Anguilla 2012 1.0595049 1.5323299 1.5167528 1.3093333 1.39836526
## 4 Anguilla 2011 1.0474353 1.6078939 1.4966693 1.3521394 1.39194965
## 5 Anguilla 2010 1.0239582 1.4362270 1.5125704 1.3681173 1.42292643
## 6 Anguilla 2009 0.9956121 0.9201398 1.5215497 1.3819522 1.43941677
## CC.EST
## 1 1.251688
## 2 1.285754
## 3 1.294390
## 4 1.307218
## 5 1.333021
## 6 1.345137
## country year VA.EST PV.EST
## Length:3440 Min. :1996 Min. :-2.28428 Min. :-3.32390
## Class :character 1st Qu.:2003 1st Qu.:-0.84346 1st Qu.:-0.68844
## Mode :character Median :2006 Median : 0.01163 Median : 0.09325
## Mean :2006 Mean :-0.00710 Mean :-0.02161
## 3rd Qu.:2010 3rd Qu.: 0.91413 3rd Qu.: 0.83354
## Max. :2014 Max. : 1.82637 Max. : 1.93844
## NA's :123 NA's :170
## GE.EST RQ.EST RL.EST
## Min. :-2.4797 Min. :-2.67544 Min. :-2.66887
## 1st Qu.:-0.7509 1st Qu.:-0.71112 1st Qu.:-0.79960
## Median :-0.1668 Median :-0.11552 Median :-0.15051
## Mean :-0.0060 Mean :-0.00644 Mean :-0.00934
## 3rd Qu.: 0.7414 3rd Qu.: 0.79852 3rd Qu.: 0.81489
## Max. : 2.4297 Max. : 2.24735 Max. : 2.12056
## NA's :182 NA's :182 NA's :117
## CC.EST
## Min. :-2.05746
## 1st Qu.:-0.77325
## Median :-0.24912
## Mean :-0.00509
## 3rd Qu.: 0.70831
## Max. : 2.58562
## NA's :176
## country year VA.EST PV.EST GE.EST RQ.EST
## "character" "numeric" "numeric" "numeric" "numeric" "numeric"
## RL.EST CC.EST
## "numeric" "numeric"
# archived record of raw data: write.csv(d, file.path('raw', 'worldbank_wgi_from_wdi_api.csv'), row.names=F)
The first gapfilling occurs when we use the average of previous years data for each country/indicator. This occurs when a country has data, but not for all years.
Countries without 3 or more indicators are cut.
d <- read.csv('raw/worldbank_wgi_from_wdi_api.csv')
d <- gather(d, "indicator", "value", VA.EST:CC.EST)
d_gap_fill <- d %>%
group_by(country, year) %>%
mutate(NA_count_c_y = sum(is.na(value))) %>% # get count of NA values
ungroup() %>%
group_by(country, indicator) %>% # this section gap-fills with the mean of values across years within the same region/indicator
mutate(ind_mean_c_i = mean(value, na.rm=TRUE)) %>%
ungroup() %>%
mutate(value = ifelse(is.na(value), ind_mean_c_i, value)) %>%
group_by(country, year) %>%
mutate(NA_count_post_gf1 = sum(is.na(value))) #count NA values after last gap-fill
## get list of countries with no data:
countries_no_data <- d_gap_fill %>%
filter(NA_count_post_gf1 > 3)
countries_no_data <- unique(countries_no_data$country)
countries_no_data
## [1] New Caledonia
## 215 Levels: Afghanistan Albania Algeria American Samoa Andorra ... Zimbabwe
# In this case, the countries with minimal data (< 3 indicators ever calculated) have sovereign countries.
# These will be gap-filled later on if they are deleted now.
d_gap_fill <- d_gap_fill %>%
filter(!(country %in% countries_no_data))
This involves: * taking the average of the 6 indicators (assuming there are at least 4 of the 6 indicators) * rescaling the data from 0 to 1
d_calcs <- d_gap_fill %>%
group_by(country, year) %>%
summarize(score_wgi_scale = mean(value, na.rm=T),
NA_start = mean(NA_count_c_y),
NA_post_gf_1 = mean(NA_count_post_gf1)) %>%
ungroup() %>%
mutate(score_wgi_scale = ifelse(NA_post_gf_1 > 3, NA, score_wgi_scale))
wgi_range = c(-2.5, 2.5)
d_calcs <- d_calcs %>%
mutate(score = (score_wgi_scale - wgi_range[1]) / (wgi_range[2] - wgi_range[1])) %>%
ungroup(); head(d_calcs); summary(d_calcs)
## # A tibble: 6 × 6
## country year score_wgi_scale NA_start NA_post_gf_1 score
## <fctr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan 1996 -2.070547 0 0 0.08589069
## 2 Afghanistan 1998 -2.096064 0 0 0.08078728
## 3 Afghanistan 2000 -2.123715 0 0 0.07525693
## 4 Afghanistan 2002 -1.748490 0 0 0.15030208
## 5 Afghanistan 2003 -1.571232 0 0 0.18575368
## 6 Afghanistan 2004 -1.510618 0 0 0.19787637
## country year score_wgi_scale NA_start
## Afghanistan : 16 Min. :1996 Min. :-2.491035 Min. :0.0000
## Albania : 16 1st Qu.:2003 1st Qu.:-0.675191 1st Qu.:0.0000
## Algeria : 16 Median :2006 Median :-0.111597 Median :0.0000
## American Samoa: 16 Mean :2006 Mean : 0.008279 Mean :0.2512
## Andorra : 16 3rd Qu.:2010 3rd Qu.: 0.740940 3rd Qu.:0.0000
## Angola : 16 Max. :2014 Max. : 1.985394 Max. :6.0000
## (Other) :3328
## NA_post_gf_1 score
## Min. :0.00000 Min. :0.001793
## 1st Qu.:0.00000 1st Qu.:0.364962
## Median :0.00000 Median :0.477681
## Mean :0.02804 Mean :0.501656
## 3rd Qu.:0.00000 3rd Qu.:0.648188
## Max. :3.00000 Max. :0.897079
##
# document gapfilling
d_calcs <- d_calcs %>%
mutate(gap_fill = NA_start - NA_post_gf_1,
gap_fill = ifelse(is.na(score), 0, gap_fill)) %>%
select(-NA_start, -NA_post_gf_1)
d_calcs[d_calcs$gap_fill>0, ]
## # A tibble: 183 × 5
## country year score_wgi_scale score gap_fill
## <fctr> <int> <dbl> <dbl> <dbl>
## 1 American Samoa 1996 0.7001840 0.6400368 6
## 2 American Samoa 1998 0.7001840 0.6400368 6
## 3 American Samoa 2000 0.7001840 0.6400368 6
## 4 American Samoa 2002 0.7001840 0.6400368 6
## 5 American Samoa 2003 0.7001840 0.6400368 6
## 6 American Samoa 2014 0.8753184 0.6750637 1
## 7 Anguilla 1996 1.2532663 0.7506533 6
## 8 Anguilla 1998 1.2532663 0.7506533 6
## 9 Anguilla 2000 1.2532663 0.7506533 6
## 10 Anguilla 2002 1.2532663 0.7506533 6
## # ... with 173 more rows
d_calcs[d_calcs$country == "New Caledonia", ] # no data, was deleted earlier
## # A tibble: 0 × 5
## # ... with 5 variables: country <fctr>, year <int>, score_wgi_scale <dbl>,
## # score <dbl>, gap_fill <dbl>
d_calcs[d_calcs$country == "Niue", ] # should have gap-fill values between 0-6
## # A tibble: 16 × 5
## country year score_wgi_scale score gap_fill
## <fctr> <int> <dbl> <dbl> <dbl>
## 1 Niue 1996 -0.3543879 0.4291224 6
## 2 Niue 1998 -0.3543879 0.4291224 6
## 3 Niue 2000 -0.3543879 0.4291224 6
## 4 Niue 2002 -0.3543879 0.4291224 6
## 5 Niue 2003 -0.3543879 0.4291224 6
## 6 Niue 2004 -0.3543879 0.4291224 6
## 7 Niue 2005 -0.3543879 0.4291224 6
## 8 Niue 2006 -0.3543879 0.4291224 6
## 9 Niue 2007 -0.3543879 0.4291224 6
## 10 Niue 2008 -0.3543879 0.4291224 6
## 11 Niue 2009 -0.3762533 0.4247493 0
## 12 Niue 2010 -0.3341911 0.4331618 0
## 13 Niue 2011 -0.3527193 0.4294561 1
## 14 Niue 2012 -0.3543879 0.4291224 6
## 15 Niue 2013 -0.3543879 0.4291224 6
## 16 Niue 2014 -0.3543879 0.4291224 6
## save intermediate file of wgi scores pre-gapfilling (for OHI+ use)
write.csv(d_calcs %>%
select(country, year, score_wgi_scale, score_ohi_scale = score),
file.path('intermediate/wgi_combined_scores_by_country.csv'),
row.names = FALSE)
## We report these regions at a greater spatial resolution:
## Aruba is part of the Netherlands Antilles, but it is reported separately
country_split_1 <- data.frame(country = "Netherlands Antilles", region = c('Bonaire', 'Curacao', 'Saba', 'Sint Maarten', 'Sint Eustatius'))
country_split_2 <- data.frame(country = "Jersey, Channel Islands", region = c('Jersey', 'Guernsey'))
country_split <- rbind(country_split_1, country_split_2)
country_split_data <- country_split %>%
left_join(d_calcs) %>%
select(-country) %>%
rename(country = region)
## Joining, by = "country"
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
d_calcs <- d_calcs %>%
filter(!(country %in% c("Netherlands Antilles", "Jersey, Channel Islands"))) %>%
rbind(country_split_data)
### Function to convert to OHI region ID
d_calcs_rgn <- name_2_rgn(df_in = d_calcs,
fld_name='country',
flds_unique=c('year'))
##
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
## tmp_type
## tmp_name disputed landlocked
## Afghanistan 0 16
## Andorra 0 16
## Armenia 0 16
## Austria 0 16
## Belarus 0 16
## Bhutan 0 16
## Bolivia 0 16
## Botswana 0 16
## Burkina Faso 0 16
## Burundi 0 16
## Central African Republic 0 16
## Chad 0 16
## Czech Republic 0 16
## Ethiopia 0 16
## Hungary 0 16
## Kazakhstan 0 16
## Kosovo 0 16
## Kyrgyz Republic 0 16
## Lao PDR 0 16
## Lesotho 0 16
## Liechtenstein 0 16
## Luxembourg 0 16
## Macedonia, FYR 0 16
## Malawi 0 16
## Mali 0 16
## Moldova 0 16
## Mongolia 0 16
## Nepal 0 16
## Niger 0 16
## Paraguay 0 16
## Rwanda 0 16
## San Marino 0 16
## Serbia 0 16
## Slovak Republic 0 16
## South Sudan 0 16
## Swaziland 0 16
## Switzerland 0 16
## Tajikistan 0 16
## Turkmenistan 0 16
## Uganda 0 16
## Uzbekistan 0 16
## West Bank and Gaza 16 0
## Zambia 0 16
## Zimbabwe 0 16
##
## DUPLICATES found. Consider using collapse2rgn to collapse duplicates (function in progress).
## # A tibble: 5 × 1
## country
## <fctr>
## 1 China
## 2 Hong Kong SAR, China
## 3 Macao SAR, China
## 4 Puerto Rico
## 5 Virgin Islands (U.S.)
### Combine the duplicate regions (we report these at lower resolution)
### In this case, we take the weighted average
population_weights <- read.csv('../../../../ohiprep/src/LookupTables/Pop_weight_ChinaSAR_USVIslPRico.csv')
d_calcs_rgn <- d_calcs_rgn %>%
left_join(population_weights, by="country") %>%
mutate(population = ifelse(is.na(population), 1, population)) %>%
group_by(rgn_id, year) %>%
summarize(score = weighted.mean(score, population),
gapfill_within_rgn = weighted.mean(gap_fill, population)) %>%
ungroup() %>%
filter(rgn_id <= 250)
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
summary(d_calcs_rgn)
## rgn_id year score gapfill_within_rgn
## Min. : 6.0 Min. :1996 Min. :0.001793 Min. :0.0000
## 1st Qu.: 65.0 1st Qu.:2003 1st Qu.:0.390168 1st Qu.:0.0000
## Median :126.0 Median :2006 Median :0.513722 Median :0.0000
## Mean :124.8 Mean :2006 Mean :0.521058 Mean :0.3073
## 3rd Qu.:186.0 3rd Qu.:2010 3rd Qu.:0.663718 3rd Qu.:0.0000
## Max. :250.0 Max. :2014 Max. :0.897079 Max. :6.0000
This gapfilling occurs when we use assign a territorial region the value of their parent country.
## data that describes territories of countries
territory = rgn_master %>%
select(rgn_id = rgn_id_2013,
sov_id) %>%
group_by(rgn_id) %>% # remove duplicated countries from this rgn_id list
summarize(sov_id = mean(sov_id, na.rm=T)) %>% # duplicates always have the same sov_id (r2 value)
filter(rgn_id <= 250, rgn_id != 213)
## expand to include all years of data
territory <- data.frame(year=yr_start:yr_end) %>%
merge(territory, by=NULL)
# assign territories the values of their country
d_sovs = d_calcs_rgn %>%
full_join(territory, by = c('rgn_id', 'year')) %>%
group_by(sov_id, year) %>%
mutate(score_gf_territory = mean(score, na.rm=TRUE),
gapfill_within_rgn = mean(gapfill_within_rgn, na.rm=TRUE))%>%
filter(!is.na(gapfill_within_rgn)) %>%
ungroup()
head(d_sovs)
## # A tibble: 6 × 6
## rgn_id year score gapfill_within_rgn sov_id score_gf_territory
## <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 6 1996 0.5147575 3 6 0.5147575
## 2 6 1998 0.5040379 0 6 0.5040379
## 3 6 2000 0.5132357 0 6 0.5132357
## 4 6 2002 0.4765204 0 6 0.4765204
## 5 6 2003 0.4522701 0 6 0.4522701
## 6 6 2004 0.4817330 0 6 0.4817330
summary(d_sovs)
## rgn_id year score gapfill_within_rgn
## Min. : 1.00 Min. :1996 Min. :0.0018 Min. :0.000
## 1st Qu.: 58.75 1st Qu.:2003 1st Qu.:0.3902 1st Qu.:0.000
## Median :116.50 Median :2006 Median :0.5137 Median :0.000
## Mean :117.64 Mean :2006 Mean :0.5211 Mean :0.416
## 3rd Qu.:176.25 3rd Qu.:2010 3rd Qu.:0.6637 3rd Qu.:0.000
## Max. :250.00 Max. :2014 Max. :0.8971 Max. :6.000
## NA's :784
## sov_id score_gf_territory
## Min. : 6.00 Min. :0.001793
## 1st Qu.: 73.75 1st Qu.:0.422824
## Median :163.00 Median :0.583356
## Mean :133.72 Mean :0.560418
## 3rd Qu.:180.00 3rd Qu.:0.695723
## Max. :247.00 Max. :0.897079
##
# format these data
d_gf2 <- d_sovs %>%
mutate(gapfill_territory = ifelse(is.na(score) & !is.na(score_gf_territory), "territory", "NA")) %>%
mutate(score = ifelse(is.na(score), score_gf_territory, score)) %>%
select(rgn_id, year, score, gapfill_within_rgn, gapfill_territory)
# make sure that all regions have a score
regions <- rgn_master %>%
filter(rgn_typ == "eez") %>%
filter(rgn_id_2013 <= 250) %>%
filter(rgn_id_2013 != 213) %>%
select(rgn_id = rgn_id_2013) %>%
unique() %>%
arrange(rgn_id)
d_gf2 <- regions %>%
left_join(d_gf2)
## Joining, by = "rgn_id"
## check for NA values within "score" variable
## if so, need to gapfill using UN geopolitical regions
summary(d_gf2)
## rgn_id year score gapfill_within_rgn
## Min. : 1.00 Min. :1996 Min. :0.001793 Min. :0.000
## 1st Qu.: 58.75 1st Qu.:2003 1st Qu.:0.422824 1st Qu.:0.000
## Median :116.50 Median :2006 Median :0.579226 Median :0.000
## Mean :117.64 Mean :2006 Mean :0.560418 Mean :0.416
## 3rd Qu.:176.25 3rd Qu.:2010 3rd Qu.:0.694868 3rd Qu.:0.000
## Max. :250.00 Max. :2014 Max. :0.897079 Max. :6.000
## gapfill_territory
## Length:3520
## Class :character
## Mode :character
##
##
##
These regions will receive an NA for their score (when established population is < 100 people). We decided it would be better to give uninhabited regions the scores of their administrative countries.
# uninhab <- read.csv('../../../src/LookupTables/rgn_uninhabited_islands.csv') %>%
# filter(is.na(est_population) | est_population < 100)
#
# d_gf2 <- d_gf2 %>%
# mutate(score = ifelse(rgn_id %in% uninhab$rgn_id, NA, score))
Comparing this year’s values against last year’s. These should be the same unless there have been updates to WGI source data or a change to methods. For this year, there was a small change that effected a few territorial regions. In the past, we used the sovereign country value, but in the case, we averaged the sovereign country and the available territorial values. For example,
Also look at top/bottom 10 regions to make sure these seem reasonable.
new2013 <- d_gf2 %>%
filter(year==2013) %>%
select(rgn_id, score)
old2013 <- read.csv('../v2015/data/rgn_wb_wgi_2015a.csv') %>%
select(rgn_id, old_score=score) %>%
full_join(new2013)
## Joining, by = "rgn_id"
## should be a 1:1 relationship
plot(old_score ~ score, data=old2013)
abline(0,1, col="red")
## check on outliers
# Tokelau (rgn 156) went from 0.86 to 0.61...why?
# Now the score is the average of the country and the territorial regions (rather than just the sovereign country): which I think makes sense
filter(new2013, rgn_id %in% c(153, 154, 162))
## rgn_id score
## 1 153 0.5363623
## 2 154 0.4291224
## 3 162 0.8615412
(0.536+0.429+0.862)/3
## [1] 0.609
# Region 159, Johnston Atoll went from 74 to 66
filter(new2013, rgn_id %in% c(13, 116, 151, 163)) # these are used to obtain the values for the below regions
## rgn_id score
## 1 13 0.6392937
## 2 116 0.6118242
## 3 151 0.6450472
## 4 163 0.7427717
filter(new2013, rgn_id %in% c(12, 149, 150, 158, 159))
## rgn_id score
## 1 12 0.6597342
## 2 149 0.6597342
## 3 150 0.6597342
## 4 158 0.6597342
## 5 159 0.6597342
## Top/Bottom 10 scorers:
# get region names
regions <- rgn_master %>%
filter(rgn_typ == "eez") %>%
filter(rgn_id_2013 <= 250) %>%
filter(rgn_id_2013 != 213) %>%
select(rgn_id = rgn_id_2013, rgn_name=rgn_nam_2013) %>%
unique() %>%
arrange(rgn_id)
tmp <- d_gf2 %>%
filter(year==2014) %>%
arrange(score) %>%
select(rgn_id, score) %>%
left_join(regions)
## Joining, by = "rgn_id"
tmp[1:10, ]
## rgn_id score rgn_name
## 1 44 0.05694874 Somalia
## 2 77 0.14799651 Syria
## 3 67 0.15209258 Libya
## 4 21 0.16797498 North Korea
## 5 49 0.17737404 Sudan
## 6 199 0.19203930 Democratic Republic of the Congo
## 7 45 0.20475236 Eritrea
## 8 47 0.20550008 Yemen
## 9 192 0.20793057 Iraq
## 10 104 0.21624447 Equatorial Guinea
tmp[211:220, ]
## rgn_id score rgn_name
## 211 94 0.8238142 Heard and McDonald Islands
## 212 218 0.8302989 Canada
## 213 177 0.8403218 Netherlands
## 214 175 0.8456596 Denmark
## 215 222 0.8476865 Sweden
## 216 105 0.8520749 Bouvet Island
## 217 144 0.8520749 Jan Mayen
## 218 223 0.8520749 Norway
## 219 174 0.8690655 Finland
## 220 162 0.8736622 New Zealand
hist(tmp$score)
Gapfilling and score data are saved for each scenario year.
for(data_year in (yr_end-4):yr_end){ # data_year=yr_end
save_year = data_year + 2
tmp <- d_gf2[d_gf2$year == data_year, ]
tmp_data_res <- tmp %>%
select(rgn_id, resilience_score = score)
write.csv(tmp_data_res, sprintf("output/wgi_res_%s.csv", save_year), row.names=FALSE)
tmp_data_prs <- tmp %>%
mutate(score = 1 - score) %>%
select(rgn_id, pressure_score = score)
write.csv(tmp_data_prs, sprintf("output/wgi_prs_%s.csv", save_year), row.names=FALSE)
tmp_gf <- tmp %>%
select(rgn_id, gapfill_within_rgn, gapfill_territory)
write.csv(tmp_gf, sprintf("output/wgi_gf_%s.csv", save_year), row.names=FALSE)
}