1 Summary
2 Updates from previous assessment
3 Data Source
4 Methods
5 Social Progress Index data
6 Assign countries to OHI regions
7 Compare to WGI
8 Second round of gapfilling
9 Uninhabited regions
10 Save final data

1 Summary

This script gapfills the social progress index data and formats it for the OHI global assessment.

2 Updates from previous assessment

This is the first year of data.

3 Data Source

Reference:
http://www.socialprogressimperative.org/

Stern, S., A. Wares and T. Hellman. 2016. Social Progress Index: 2016 Methodological Report.

Downloaded: 9/28/2016

Description: Social Progress Index scores and components for countries.

Time range: 2016

4 Methods

# load libraries, set directories
library(ohicore)  #devtools::install_github('ohi-science/ohicore@dev')
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(tidyr)

## comment out when knitting
#setwd("globalprep/prs_res_spi/v2016")


### Load FAO-specific user-defined functions
source('../../../src/R/common.R') # directory locations

5 Social Progress Index data

Organize data and gapfill missing countries that have incomplete data. This is the first gapfilling step. The second step will involve gapfilling countries that have no data. This first level of gapfilling probably has very little error.

cats <- read.csv(file.path(dir_M, 'git-annex/globalprep/_raw_data/SocialProgressIndex/d2016/spi_categories.csv')) %>%
  mutate(category = gsub(" ", "", category))
  
spi <- read.csv(file.path(dir_M, 'git-annex/globalprep/_raw_data/SocialProgressIndex/d2016/2016-Results_spi.csv'), check.names=FALSE, stringsAsFactors=FALSE) 

names(spi) <- gsub(" ", "", names(spi))

spi <- spi %>%
  dplyr::select(-CountryCode) %>%
  gather('category', 'score', -Country) %>%
  left_join(cats, by = "category")

5.1 Gapfill bhn component

[NOTE: There seems like there must be a better way to do this.]

bhn_tmp <- spi %>%
  filter(subcategory == "bhn") %>%
  spread(category, score)

## gapfill Watershed and Sanitation data  
mod <- lm(WaterandSanitation ~ Shelter, data=bhn_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = WaterandSanitation ~ Shelter, data = bhn_tmp, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.039  -4.556   0.191   5.181  25.924 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.37068    2.75362   -3.04  0.00278 ** 
## Shelter      1.21274    0.03914   30.98  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.712 on 154 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.8617, Adjusted R-squared:  0.8608 
## F-statistic: 959.9 on 1 and 154 DF,  p-value: < 2.2e-16

bhn_tmp$WaterandSanitation_pred <-  predict(mod, newdata = select(bhn_tmp, Shelter))

bhn_tmp <- bhn_tmp %>%
  mutate(gapfill = ifelse(is.na(WaterandSanitation) & !is.na(WaterandSanitation_pred), "WaterandSanitation_gf", NA)) %>%
  mutate(WaterandSanitation = ifelse(is.na(WaterandSanitation), WaterandSanitation_pred, WaterandSanitation))

## gapfill Shelter data
mod <- lm(Shelter ~ WaterandSanitation, data=bhn_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = Shelter ~ WaterandSanitation, data = bhn_tmp, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.075  -3.879   0.208   3.869  34.703 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        15.27905    1.78092   8.579 9.29e-15 ***
## WaterandSanitation  0.71058    0.02286  31.083  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.41 on 155 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8618, Adjusted R-squared:  0.8609 
## F-statistic: 966.2 on 1 and 155 DF,  p-value: < 2.2e-16

bhn_tmp$Shelter_pred <-  predict(mod, newdata = select(bhn_tmp, WaterandSanitation))

bhn_tmp <- bhn_tmp %>%
  mutate(gapfill = ifelse(is.na(Shelter) & !is.na(Shelter_pred), paste(bhn_tmp$gapfill, "Shelter_gf"), gapfill)) %>%
  mutate(Shelter = ifelse(is.na(Shelter), Shelter_pred, Shelter))

## gapfill Nutrition and Basic Medical Care
mod <- lm(NutritionandBasicMedicalCare ~ WaterandSanitation + Shelter, data=bhn_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = NutritionandBasicMedicalCare ~ WaterandSanitation + 
##     Shelter, data = bhn_tmp, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.2332  -2.1777   0.0963   2.6544  14.0977 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        45.56510    1.71758  26.529  < 2e-16 ***
## WaterandSanitation  0.25936    0.04744   5.467 1.92e-07 ***
## Shelter             0.33963    0.06315   5.379 2.89e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.547 on 147 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.8488, Adjusted R-squared:  0.8467 
## F-statistic: 412.5 on 2 and 147 DF,  p-value: < 2.2e-16

bhn_tmp$NutritionandBasicMedicalCare_pred <-  predict(mod, newdata = select(bhn_tmp, WaterandSanitation, Shelter))

bhn_tmp <- bhn_tmp %>%
  mutate(gapfill = ifelse(is.na(NutritionandBasicMedicalCare) & !is.na(NutritionandBasicMedicalCare_pred), paste(bhn_tmp$gapfill, "NutritionandBasicMedicalCare_gf"), gapfill)) %>%
  mutate(NutritionandBasicMedicalCare = ifelse(is.na(NutritionandBasicMedicalCare), NutritionandBasicMedicalCare_pred, NutritionandBasicMedicalCare))

## gapfill personal safety
mod <- lm(PersonalSafety ~ WaterandSanitation + Shelter + NutritionandBasicMedicalCare, data=bhn_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = PersonalSafety ~ WaterandSanitation + Shelter + 
##     NutritionandBasicMedicalCare, data = bhn_tmp, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.849  -4.886   1.154   8.305  18.816 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  38.56168    8.75640   4.404 2.01e-05 ***
## WaterandSanitation           -0.01827    0.10923  -0.167  0.86742    
## Shelter                       0.45499    0.14430   3.153  0.00195 ** 
## NutritionandBasicMedicalCare  0.01631    0.17749   0.092  0.92690    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.86 on 150 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.3695, Adjusted R-squared:  0.3569 
## F-statistic:  29.3 on 3 and 150 DF,  p-value: 5.705e-15

bhn_tmp$PersonalSafety_pred <-  predict(mod, newdata = select(bhn_tmp, WaterandSanitation, Shelter, NutritionandBasicMedicalCare))

bhn_tmp <- bhn_tmp %>%
  mutate(gapfill = ifelse(is.na(PersonalSafety) & !is.na(PersonalSafety_pred), paste(bhn_tmp$gapfill, "PersonalSafety_gf"), gapfill)) %>%
  mutate(PersonalSafety = ifelse(is.na(PersonalSafety), PersonalSafety_pred, PersonalSafety))

### 
bhn_gf <- bhn_tmp %>%
  mutate(gapfill = gsub("NA ", "", gapfill)) %>%
  select(-ends_with("pred")) %>%
  gather('category', 'score', -Country, -subcategory, -gapfill)

5.2 Gapfill fw component

fw_tmp <- spi %>%
  filter(subcategory == "fw") %>%
  spread(category, score)

## gapfill EnvironmentalQuality  
mod <- lm(EnvironmentalQuality ~ AccesstoInformationandCommunications + HealthandWellness, data=fw_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = EnvironmentalQuality ~ AccesstoInformationandCommunications + 
##     HealthandWellness, data = fw_tmp, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.500  -4.999   0.470   6.951  31.978 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                          -17.84789    5.54672  -3.218  0.00157
## AccesstoInformationandCommunications   0.59338    0.05085  11.670  < 2e-16
## HealthandWellness                      0.64771    0.08926   7.257 1.75e-11
##                                         
## (Intercept)                          ** 
## AccesstoInformationandCommunications ***
## HealthandWellness                    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.667 on 156 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6338, Adjusted R-squared:  0.6291 
## F-statistic:   135 on 2 and 156 DF,  p-value: < 2.2e-16

fw_tmp$EnvironmentalQuality_pred <-  predict(mod, newdata = select(fw_tmp, AccesstoInformationandCommunications, HealthandWellness))

fw_tmp <- fw_tmp %>%
  mutate(gapfill = ifelse(is.na(EnvironmentalQuality) & !is.na(EnvironmentalQuality_pred), "EnvironmentalQuality_gf", NA)) %>%
  mutate(EnvironmentalQuality = ifelse(is.na(EnvironmentalQuality), EnvironmentalQuality_pred, EnvironmentalQuality))

## gapfill Access to Basic Knowledge  
mod <- lm(AccesstoBasicKnowledge ~ AccesstoInformationandCommunications + HealthandWellness + EnvironmentalQuality, 
          data=fw_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = AccesstoBasicKnowledge ~ AccesstoInformationandCommunications + 
##     HealthandWellness + EnvironmentalQuality, data = fw_tmp, 
##     na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.710  -4.850  -0.109   6.597  37.626 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)                          39.07838    7.17608   5.446 2.08e-07
## AccesstoInformationandCommunications  0.61979    0.08644   7.170 3.25e-11
## HealthandWellness                    -0.26308    0.13059  -2.015  0.04574
## EnvironmentalQuality                  0.27256    0.09922   2.747  0.00676
##                                         
## (Intercept)                          ***
## AccesstoInformationandCommunications ***
## HealthandWellness                    *  
## EnvironmentalQuality                 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.87 on 149 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.5333, Adjusted R-squared:  0.5239 
## F-statistic: 56.75 on 3 and 149 DF,  p-value: < 2.2e-16

fw_tmp$AccesstoBasicKnowledge_pred <-  predict(mod, newdata = select(fw_tmp, AccesstoInformationandCommunications, HealthandWellness, EnvironmentalQuality))

fw_tmp <- fw_tmp %>%
  mutate(gapfill = ifelse(is.na(AccesstoBasicKnowledge) & !is.na(AccesstoBasicKnowledge_pred), paste(gapfill, "AccesstoBasicKnowledge_gf", sep=", "), gapfill)) %>%
  mutate(AccesstoBasicKnowledge = ifelse(is.na(AccesstoBasicKnowledge), AccesstoBasicKnowledge_pred, AccesstoBasicKnowledge))

### 
fw_gf <- fw_tmp %>%
  mutate(gapfill = gsub("NA, ", "", gapfill)) %>%
  select(-ends_with("pred")) %>%
  gather('category', 'score', -Country, -subcategory, -gapfill)

5.3 Gapfill op component

op_tmp <- spi %>%
  filter(subcategory == "op") %>%
  spread(category, score)

## gapfill AccesstoAdvancedEducation  
mod <- lm(AccesstoAdvancedEducation ~ PersonalRights + PersonalFreedomandChoice, data=op_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = AccesstoAdvancedEducation ~ PersonalRights + PersonalFreedomandChoice, 
##     data = op_tmp, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.359  -9.485  -0.018   9.400  46.780 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -29.47251    5.06333  -5.821 3.36e-08 ***
## PersonalRights             0.09203    0.05550   1.658   0.0993 .  
## PersonalFreedomandChoice   1.05223    0.09809  10.727  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15 on 152 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.5837, Adjusted R-squared:  0.5782 
## F-statistic: 106.6 on 2 and 152 DF,  p-value: < 2.2e-16

op_tmp$AccesstoAdvancedEducation_pred <-  predict(mod, newdata = select(op_tmp, PersonalRights, PersonalFreedomandChoice))

op_tmp <- op_tmp %>%
  mutate(gapfill = ifelse(is.na(AccesstoAdvancedEducation) & !is.na(AccesstoAdvancedEducation_pred), "AccesstoAdvancedEducation_gf", NA)) %>%
  mutate(AccesstoAdvancedEducation = ifelse(is.na(AccesstoAdvancedEducation), AccesstoAdvancedEducation_pred, AccesstoAdvancedEducation))

## gapfill PersonalFreedomandChoice  
mod <- lm(PersonalFreedomandChoice ~ AccesstoAdvancedEducation + PersonalRights, data=op_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = PersonalFreedomandChoice ~ AccesstoAdvancedEducation + 
##     PersonalRights, data = op_tmp, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.518  -6.478   0.992   6.448  26.550 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               37.20031    1.74812  21.280  < 2e-16 ***
## AccesstoAdvancedEducation  0.41037    0.03793  10.818  < 2e-16 ***
## PersonalRights             0.15036    0.03221   4.668 6.56e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.306 on 154 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.6294, Adjusted R-squared:  0.6246 
## F-statistic: 130.8 on 2 and 154 DF,  p-value: < 2.2e-16

op_tmp$PersonalFreedomandChoice_pred <-  predict(mod, newdata = select(op_tmp, AccesstoAdvancedEducation, PersonalRights))

op_tmp <- op_tmp %>%
  mutate(gapfill = ifelse(is.na(PersonalFreedomandChoice) & !is.na(PersonalFreedomandChoice_pred), paste(gapfill, "PersonalFreedomandChoice_gf", sep=", "), gapfill)) %>%
  mutate(PersonalFreedomandChoice = ifelse(is.na(PersonalFreedomandChoice), PersonalFreedomandChoice_pred, PersonalFreedomandChoice))

## gapfill ToleranceandInclusion 
mod <- lm(ToleranceandInclusion ~ AccesstoAdvancedEducation + PersonalFreedomandChoice + PersonalRights, data=op_tmp, na.action=na.exclude)
summary(mod)

## 
## Call:
## lm(formula = ToleranceandInclusion ~ AccesstoAdvancedEducation + 
##     PersonalFreedomandChoice + PersonalRights, data = op_tmp, 
##     na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.0043  -6.4644   0.8742   6.8250  21.1301 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                8.58874    3.61295   2.377   0.0187 *  
## AccesstoAdvancedEducation  0.01790    0.05219   0.343   0.7321    
## PersonalFreedomandChoice   0.47803    0.08364   5.715 5.84e-08 ***
## PersonalRights             0.22884    0.03643   6.282 3.53e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.587 on 148 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.6308, Adjusted R-squared:  0.6233 
## F-statistic: 84.28 on 3 and 148 DF,  p-value: < 2.2e-16

op_tmp$ToleranceandInclusion_pred <-  predict(mod, newdata = select(op_tmp, AccesstoAdvancedEducation, PersonalFreedomandChoice, PersonalRights))

op_tmp <- op_tmp %>%
  mutate(gapfill = ifelse(is.na(ToleranceandInclusion) & !is.na(ToleranceandInclusion_pred), paste(gapfill, "ToleranceandInclusion_gf", sep=", "), gapfill)) %>%
  mutate(ToleranceandInclusion = ifelse(is.na(ToleranceandInclusion), ToleranceandInclusion_pred, ToleranceandInclusion))

### 
op_gf <- op_tmp %>%
  mutate(gapfill = gsub("NA, ", "", gapfill)) %>%
  select(-ends_with("pred")) %>%
  gather('category', 'score', -Country, -subcategory, -gapfill)

5.4 Combine data

data_subcategory <- rbind(bhn_gf, op_gf, fw_gf) %>%
  group_by(Country, subcategory) %>%
  summarize(score = mean(score),
            gapfill = paste(unique(gapfill), collapse=", ")) %>%
  mutate(gapfill = gsub("NA, ", "", gapfill)) %>%
  mutate(gapfill = gsub("NA", NA, gapfill)) %>%
  mutate(score = ifelse(score > 100, 100, score),
         score = ifelse(score < 0, 0, score)) %>%
  rename(score_gf = score) %>%
  ungroup()


data_subcategory$subcategory <- mapvalues(data_subcategory$subcategory, 
                              from=c("bhn", "fw", "op"), 
                              to=c("bhn_score", "fw_score", "op_score"))

spi_subcategory <- spi %>%
  filter(subcategory %in% c("bhn_score", "fw_score", "op_score")) %>%
left_join(data_subcategory, by=c("Country", "subcategory")) %>%
  mutate(score = ifelse(is.na(score), score_gf, score)) %>%
  select(Country, score, subcategory, gapfill)

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector

## Average subcategories to get spi score
spi_gf <- spi_subcategory %>%
  group_by(Country) %>%
  summarize(score_gf = mean(score),
            gapfill = paste(unique(gapfill), collapse = ", ")) %>%
  mutate(gapfill = gsub(", NA", "", gapfill)) %>%
    mutate(gapfill = gsub("NA, ", "", gapfill)) %>%
  mutate(gapfill = gsub("NA", NA, gapfill)) %>%
  mutate(subcategory = "spi")

spi_only <- spi %>%
  filter(subcategory == "spi") %>%
  left_join(spi_gf, by = c("Country", "subcategory")) %>%
  mutate(score = ifelse(is.na(score), score_gf, score)) %>%
  select(Country, score, gapfill)

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector

write.csv(spi_only, "int/Country_spi.csv", row.names=FALSE)

6 Assign countries to OHI regions

spi_only <- spi_only %>%
  mutate(Country = ifelse(Country=="C\xf4te d'Ivoire", "Cote d'Ivoire", Country)) 

spi_rgn <- name_2_rgn(df_in = spi_only, 
                       fld_name='Country')

## 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
##                           tmp_type
## tmp_name                   landlocked
##   Afghanistan                       1
##   Armenia                           1
##   Austria                           1
##   Belarus                           1
##   Bhutan                            1
##   Bolivia                           1
##   Botswana                          1
##   Burkina Faso                      1
##   Burundi                           1
##   Central African Republic          1
##   Chad                              1
##   Czech Republic                    1
##   Ethiopia                          1
##   Hungary                           1
##   Kazakhstan                        1
##   Kyrgyzstan                        1
##   Laos                              1
##   Lesotho                           1
##   Luxembourg                        1
##   Macedonia                         1
##   Malawi                            1
##   Mali                              1
##   Moldova                           1
##   Mongolia                          1
##   Nepal                             1
##   Niger                             1
##   Paraguay                          1
##   Rwanda                            1
##   Serbia                            1
##   Slovakia                          1
##   Swaziland                         1
##   Switzerland                       1
##   Tajikistan                        1
##   Turkmenistan                      1
##   Uganda                            1
##   Uzbekistan                        1
##   Zambia                            1
##   Zimbabwe                          1

#122 ohi regions with data

7 Compare to WGI

wgi <- read.csv('../../../../ohi-global/eez2016/layers/wgi_all.csv') 

wgi_spi <- wgi %>%
  left_join(spi_rgn)

## Joining, by = "rgn_id"

plot(wgi_spi$resilience_score*100, wgi_spi$score, ylab="Social Progress Index, score", xlab="Worldwide Governance Score")
abline(0,1, col="red")

mod <- lm(score ~ resilience_score, data=wgi_spi)
summary(mod)

## 
## Call:
## lm(formula = score ~ resilience_score, data = wgi_spi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.0523  -2.8331  -0.2531   3.9722  15.5516 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        30.949      1.602   19.31   <2e-16 ***
## resilience_score   71.005      3.011   23.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.958 on 119 degrees of freedom
##   (99 observations deleted due to missingness)
## Multiple R-squared:  0.8238, Adjusted R-squared:  0.8223 
## F-statistic: 556.2 on 1 and 119 DF,  p-value: < 2.2e-16

8 Second round of gapfilling

In this case, UN geopolitical regions and WGI scores are used to estimate regions with no data.

rgns_gf_un <- georegions %>%
  left_join(spi_rgn) %>%
  left_join(wgi)

## Joining, by = "rgn_id"
## Joining, by = "rgn_id"

## Compare models to select a gapfilling method
mod1 <- lm(score ~ r2, data = rgns_gf_un)
mod2 <- lm(score ~ r2 + resilience_score, data = rgns_gf_un)

mod3 <- lm(score ~ r1, data = rgns_gf_un)
mod4 <- lm(score ~ r1 + resilience_score, data = rgns_gf_un, na.action="na.exclude")

mod5 <- lm(score ~ resilience_score, data = rgns_gf_un)

mod6 <- lm(score ~ r1 + poly(resilience_score, 2), data = rgns_gf_un, na.action = "na.exclude")
mod7 <- lm(score ~ r1 + poly(resilience_score, 3), data = rgns_gf_un, na.action = "na.exclude")

plot(predict(mod7), rgns_gf_un$score)
abline(0,1)

plot(predict(mod4), rgns_gf_un$score)
abline(0,1)

plot(0:100, predict(mod7, newdata = data.frame(r1=1, resilience_score=seq(0, 1, by=0.01))), type="l")
points(rgns_gf_un$resilience_score*100, rgns_gf_un$score)

plot(0:100, predict(mod6, newdata = data.frame(r1=1, resilience_score=seq(0, 1, by=0.01))), type="l")
points(rgns_gf_un$resilience_score*100, rgns_gf_un$score)

AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7)

##      df      AIC
## mod1  3 961.7328
## mod2  4 779.6451
## mod3  3 978.0707
## mod4  4 737.0105
## mod5  3 779.2852
## mod6  5 738.9765
## mod7  6 736.0741

## Estimate missing data and gapfill
mod_gf <- lm(score ~ r1 + poly(resilience_score, 3), data = rgns_gf_un, na.action = na.exclude)
rgns_gf_un$score_pred <- predict(mod_gf, newdata = rgns_gf_un[, c("r1", "resilience_score")])

rgns_gf <- rgns_gf_un %>%
  mutate(gapfill = ifelse(is.na(score) & !is.na(score_pred), "UN georgn & WGI", gapfill)) %>%
  mutate(score = ifelse(is.na(score), score_pred, score)) %>%
  select(rgn_id, score, gapfill) %>%
  mutate(score = ifelse(score > 100, 100, score),
         score = ifelse(score < 0, 0, score))

9 Uninhabited regions

These regions will receive an NA for their score (when established population is < 100 people).

uninhab <- read.csv('../../../src/LookupTables/rgn_uninhabited_islands.csv') %>%
  filter(is.na(est_population) | est_population < 100)

rgns_gf_uninhab <- rgns_gf %>%
  mutate(score = ifelse(rgn_id %in% uninhab$rgn_id, NA, score)) %>%
  mutate(gapfill = ifelse(rgn_id %in% uninhab$rgn_id, NA, gapfill))

10 Save final data

gf_data <- rgns_gf_uninhab %>%
  select(rgn_id, method = gapfill) %>%
  mutate(gapfill = ifelse(is.na(method), 0, 1))
write.csv(gf_data, "output/spi_gf.csv", row.names=FALSE)


res_data <- rgns_gf_uninhab %>%
  select(rgn_id, resilience_score=score) %>%
  mutate(resilience_score = resilience_score/100)
write.csv(res_data, "output/spi_res.csv", row.names=FALSE)

prs_data <- rgns_gf_uninhab %>%
  select(rgn_id, pressure_score=score) %>%
  mutate(pressure_score = 1 - (pressure_score/100))
write.csv(prs_data, "output/spi_prs.csv", row.names=FALSE)

OHI: Social Progress Index

Compiled on Thu Sep 29 12:35:44 2016 by frazier