[REFERENCE RMD FILE: https://cdn.rawgit.com/OHI-Science/ohiprep/master/globalprep/mar/v2017/mar_dataprep.html]

1 Summary

This analysis converts FAO mariculture data into data used to calculate the OHI global mariculture status score. This also calculates the genetic escapee from mariculture pressure data.

2 Updates from previous assessment

New year of FAO mariculture yield data, but no changes to sustainability or genetic escapee data or general methods.

A few small corrections to make sure the FAO_species fields matched in the species_list and Trujillo data.

3 Data Source

http://www.fao.org/fishery/statistics/software/fishstatj/en#downlApp Release date: July 2017 FAO Global Aquaculture Production Quantity 1950_2014

Downloaded: 8/10/2017

Description: Quantity (tonnes) of mariculture for each county, species, year.

Time range: 1950-2015

4 Methods

### Load FAO-specific user-defined functions
source('mar_fxs.R') # functions specific to mariculture dealing with compound countries
source('../../../src/R/fao_fxn.R') # function for cleaning FAO files
source('../../../src/R/common.R') # directory locations

5 FAO Mariculuture data

Clean mariculture data: Filter freshwater mariculture, make long format, and clean FAO codes.

mar <- read.csv(file.path(dir_M, 'git-annex/globalprep/_raw_data/FAO_mariculture/d2017/FAO_GlobalAquacultureProduction_Quantity_1950_2015.csv'), check.names=FALSE, stringsAsFactors=FALSE) ; head(mar) 
mar <- mar %>%
  rename(country = `Country (Country)`,
         FAO_name = `Species (ASFIS species)`,
         fao = `Aquaculture area (FAO major fishing area)`,
         environment = `Environment (Environment)`)
## Brackishwater    Freshwater        Marine 
##           491          1722          1041
# include only marine environments
mar <- mar %>%
filter(environment %in% c("Brackishwater", "Marine"))  

## long format and clean FAO codes:
mar <- mar %>%

## for some reason, I can't provide the data range in gather programatically!
col_num <- ncol(data.frame(mar))
range <- 5:col_num
mar <- mar %>%
  gather(key="year", value="value", 5:70) %>%

Update species names and exclude non-food species. I simplified the species_list. I cut the “species”" name columns because it wasn’t clear what this was trying to accomplish and created potential error.

mar_sp <- read.csv('raw/species_list.csv', stringsAsFactors=FALSE) %>%
  select(FAO_name, exclude, alias, Taxon_code)
new.spp <- setdiff(mar$FAO_name, mar_sp$FAO_name)
new.spp # check: if dim has 0 rows it means all match
# if length(new.spp) >0 , hand check whether to keep (exclude seaweeds and species harvested for ornamental/medicinal), check if synonyms match Trujillo names

# Remove species not relevant to mariculuture goal (i.e., non-food species)
mar <- left_join (mar, mar_sp, by="FAO_name") 
mar <- filter (mar, exclude==0) 

# change names using species name and the species alias (global changes)
mar$species <- ifelse(!is.na(mar$alias), mar$alias, mar$FAO_name) 

# sum duplicates after name change (this also gets rid of the NA values)
mar <- mar %>%
  filter(!is.na(value)) %>%
  group_by(country, fao, environment, species, year, Taxon_code) %>%
    summarize(value = sum(value)) %>% 

# eliminate time-series with all 0s
mar <- mar %>%
  group_by(country, species) %>%
  mutate(total_value = sum(value)) %>%
  filter(total_value > 0) %>%
  select(-total_value) %>%

Convert country names to OHI regions

# Divide mariculture from countries that we report as separate regions (assume equal production in all regions)
# Netherlands Antilles: Conch restoration among Aruba, Bonaire, Curacao
# Channel Islands: Jersey and Guernsey
# Bonaire/S.Eustatius/Saba
# Yugoslavia SFR: no longer a country after 1992

mar <- mar %>%
  mutate(country = ifelse(country=="R\xe9union", "Reunion", country)) %>%  # this one is hard to get right
  mar_split()  # function in mar_fxs.R

mar_rgn <- name_2_rgn(df_in = mar, 
                       flds_unique=c('species', 'fao', 'environment', 'Taxon_code', 'year'))
## These data were removed for not having any match in the lookup tables:
## Yugoslavia SFR 
##              1 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
##                          tmp_type
## tmp_name                  disputed
##   Palestine, Occupied Tr.        9
## DUPLICATES found. Consider using collapse2rgn to collapse duplicates (function in progress).
## # A tibble: 15 x 1
##                     country
##                       <chr>
##  1                    Aruba
##  2                  Bonaire
##  3                    China
##  4     China, Hong Kong SAR
##  5                  Curacao
##  6               Guadeloupe
##  7                     Guam
##  8               Martinique
##  9               Montenegro
## 10     Northern Mariana Is.
## 11       Russian Federation
## 12    Serbia and Montenegro
## 13 Tanzania, United Rep. of
## 14       Un. Sov. Soc. Rep.
## 15                 Zanzibar
### sum values of regions with multiple subregions
mar_rgn <- mar_rgn %>%
  group_by(fao, environment, species, year, Taxon_code, rgn_id) %>%
  summarize(value = sum(value)) %>%

data.frame(filter(mar_rgn, rgn_id==130) %>%
  filter(year==2013) %>%
##                        fao   environment                species year
## 1 Pacific, Eastern Central        Marine Natantian decapods nei 2013
## 2 Pacific, Eastern Central Brackishwater  Pacific cupped oyster 2013
## 3 Pacific, Eastern Central Brackishwater   Spotted rose snapper 2013
## 4 Pacific, Eastern Central Brackishwater        Whiteleg shrimp 2013
##   Taxon_code rgn_id value
## 1      CRUST    130     0
## 2         BI    130    20
## 3          F    130   180
## 4         SH    130  2890

For some regions, a specific species can be altered so that it matches more general Trujillo sustainability data. In this case, I don’t want the name changes to be global because some regions may have more specific species data.

(Will explore this in the future, but will not implement this year).

# ## based on looking at the list, make a few name changes to match the regions Trujillo data
# # Chile name modification
# mar_rgn$species[mar_rgn$rgn_id==224 & mar_rgn$species == "Red abalone"] <- "Abalones nei"
# mar_rgn$species[mar_rgn$rgn_id==224 & mar_rgn$species == "Japanese abalone"] <- "Abalones nei"
# # China
# mar_rgn$species[mar_rgn$rgn_id==209 & mar_rgn$species == "Areolate grouper"] <- "Groupers nei"
# mar_rgn$species[mar_rgn$rgn_id==209 & mar_rgn$species == "Greasy grouper"] <- "Groupers nei"
# mar_rgn$species[mar_rgn$rgn_id==209 & mar_rgn$species == "Hong Kong grouper"] <- "Groupers nei"
# mar_rgn$species[mar_rgn$rgn_id==209 & mar_rgn$species == "Orange-spotted grouper"] <- "Groupers nei"
# # Honduras
# mar_rgn$species[mar_rgn$rgn_id==133 & mar_rgn$species == "Whiteleg shrimp"] <- "Penaeus shrimps nei"
# # Italy
# mar_rgn$species[mar_rgn$rgn_id==84 & mar_rgn$species == "Pacific cupped oyster"] <- "Cupped oysters nei"
# # New Zealand
# mar_rgn$species[mar_rgn$rgn_id==162 & mar_rgn$species == "Rainbow abalone"] <- "Abalones nei"
# # Pakistan
# mar_rgn$species[mar_rgn$rgn_id==53 & mar_rgn$species == "Penaeus shrimps nei"] <- "Marine crustaceans nei"
# # Philippines
# mar_rgn$species[mar_rgn$rgn_id==15 & mar_rgn$species == "Whiteleg shrimp"] <- "Penaeus shrimps nei"
# # Portugal
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Golden carpet shell"] <- "Marine molluscs nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Peppery furrow"] <- "Marine molluscs nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Solen razor clams nei"] <- "Razor clams nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Atlantic bluefin tuna"] <- "Marine fishes nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Meagre"] <- "Marine fishes nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Seabasses nei"] <- "Marine fishes nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "Soles nei"] <- "Marine fishes nei"
# mar_rgn$species[mar_rgn$rgn_id==183 & mar_rgn$species == "White seabream"] <- "Marine fishes nei"
# # Spain
# mar_rgn$species[mar_rgn$rgn_id==182 & mar_rgn$species == "Atlantic bluefin tuna"] <- "Tuna-like fishes nei"
# # Turkey
# mar_rgn$species[mar_rgn$rgn_id==76 & mar_rgn$species == "Atlantic bluefin tuna"] <- "Tuna-like fishes nei"
# mar_rgn$species[mar_rgn$rgn_id==76 & mar_rgn$species == "European seabass"] <- "Seabasses nei"
# ### sum values of regions with multiple subregions
# mar_rgn <- mar_rgn %>%
#   group_by(fao, environment, species, year, Taxon_code, rgn_id) %>%
#   summarize(value = sum(value)) %>%
#   ungroup()

5.0.1 Gapfilling

Fill in missing years after first harvest with 0 values

mar_rgn_spread <- spread(mar_rgn, year, value)
## [1] 1301   71
mar_rgn_gf <- gather(mar_rgn_spread, "year", "value", 6:71) %>%
  arrange(rgn_id, species, year, Taxon_code, fao, environment)

## NA values are converted to zero.  I checked to make sure there 
## weren't instances in which in made more sense to carry the previous
## years data forward as a method of gapfilling. This didn't seem to be the case
mar_rgn_gf <- mar_rgn_gf %>%
  mutate(year = as.numeric(as.character(year))) %>%
  mutate(value_w_0 = ifelse(is.na(value), 0, value)) %>%
  group_by(fao, environment, species, Taxon_code, rgn_id) %>%
  mutate(cum_value = cumsum(value_w_0)) %>%
  ungroup() %>%
  filter(cum_value > 0) %>%
  mutate(gap_0_fill = ifelse(is.na(value), "NA_to_zero", "0")) %>%
  mutate(value = ifelse(is.na(value), 0, value)) %>%
  select(-cum_value, -value_w_0)
##          0 NA_to_zero 
##      25842       3344
## 3344 of these out of 25842+3344 cases

Remove time series with less than four non-zero datapoints (assume these are not established mariculture programs).

mar_rgn_gf = mar_rgn_gf %>% 
  group_by(rgn_id, species, fao, environment) %>%
  mutate (not_0 = length(value[value>0])) %>% 
  filter (not_0>3) %>%
  ungroup() %>% 
  select(rgn_id, species, fao, environment, year, value, Taxon_code, gap_0_fill)

Add a unique identifier per cultivated stock that describes species, fao region, and environment.

# add a unique identifier per cultivated stock
identifier = mar_rgn_gf %>% 
  select(rgn_id, species, fao, environment) %>% 
  unique() %>% 
  mutate(species_code = 1:n())

mar_rgn_gf = left_join(mar_rgn_gf, identifier)
## Joining, by = c("rgn_id", "species", "fao", "environment")
maric <- mar_rgn_gf

Save file to estimate total mariculture yield per country.

write.csv(maric, 'output/MAR_FP_data.csv', row.names=FALSE)

6 Trujillo sustainability scores

These data describe the sustainability and genetic escapes for country/species combinations (and, in a couple cases, environment and fao region combinations). In cases where these data were not available for a specific county/species, we averaged the data across taxonomic groups to gapfill the missing data.

Convert country names to OHI region names.

# Trujillo sustainability data:
sus = read.csv('raw/Truj_label_sust.csv', stringsAsFactors = FALSE, na.strings = NA)

## these need to be re-added (get cut when associated with region ids)
sus_no_rgn <- filter(sus, is.na(country))

# convert country names to OHI region names:
sus_rgn <- name_2_rgn(df_in = sus, 
                       These data were removed for not having any match in the lookup tables:
## < table of extent 0 >
## These data were removed for not having any match in the lookup tables:
## < table of extent 0 >
sus_rgn <- bind_rows(sus_rgn, sus_no_rgn) %>%

# check the fao spp list in the Trujillo sustainability file matches FAO mariculture species
setdiff(sus_rgn$species_fao, maric$species) # species that are no longer have mariculture industry or are not included due to being freshwater or non-food
##  [1] "River Plata mussel"    "Gracilaria seaweeds"  
##  [3] "Laver (Nori)"          "Atlantic wolffish"    
##  [5] "Spotted wolffish"      "Bagrid catfish"       
##  [7] "Freshwater fishes nei" "Tilapias nei"         
##  [9] "Torpedo catfishes nei" "Brill"                
## [11] "Razor clams nei"       "Brown seaweeds"       
## [13] "Brown seaweeds (Pac)"  "Silver carp"          
## [15] "Sturgeons nei"         "Blackchin tilapia"    
## [17] "Giant river prawn"     "Nile tilapia"         
## [19] "Aquatic plants nei"    "Tuna-like fishes nei" 
## [21] "Algae & marine plants" "All others"           
## [23] "Bivalve & gastropod"   "Blue tilapia"         
## [25] "Cephalopod"            "Crabs and lobsters"   
## [27] "Eucheuma seaweeds nei" "Fish"                 
## [29] "Importo totale"        "Inverts"              
## [31] "Japanese kelp"         "Molluscs"             
## [33] "Mozambique tilapia"    "Non-crustacean invert"
## [35] "Nori nei"              "Shrimp & prawn"       
## [37] "Spiny eucheuma"        "Trouts nei"           
## [39] "Tunicate"              "Urchin"               
## [41] "Wakame"                "Warty gracilaria"
sort(setdiff(maric$species, sus_rgn$species_fao)) # species with no Trujillo data
##   [1] "Akiami paste shrimp"            "Amberjacks nei"                
##   [3] "Anadara clams nei"              "Aquatic invertebrates nei"     
##   [5] "Areolate grouper"               "Atlantic ditch shrimp"         
##   [7] "Australian mussel"              "Banded carpet shell"           
##   [9] "Bastard halibut"                "Bastard halibuts nei"          
##  [11] "Bear paw clam"                  "Blackhead seabream"            
##  [13] "Blacklip pearl oyster"          "Blackspot(=red) seabream"      
##  [15] "blood cockle"                   "Blue crab"                     
##  [17] "Blue swimming crab"             "Brine shrimp"                  
##  [19] "Brown tiger prawn"              "Butter clam"                   
##  [21] "Caramote prawn"                 "Chars nei"                     
##  [23] "Chilean flat oyster"            "Chilean mussel"                
##  [25] "Cholga mussel"                  "Choro mussel"                  
##  [27] "Cobia"                          "Cockles nei"                   
##  [29] "Common dentex"                  "Common pandora"                
##  [31] "Common prawn"                   "Cortez oyster"                 
##  [33] "Crevalle jack"                  "Crimson seabream"              
##  [35] "Croakers, drums nei"            "Crocus giant clam"             
##  [37] "Donax clams"                    "Dotted gizzard shad"           
##  [39] "Drums nei"                      "Eastern king prawn"            
##  [41] "Eastern school shrimp"          "Elongate giant clam"           
##  [43] "European flounder"              "European whitefish"            
##  [45] "Filefishes, leatherjackets nei" "Filefishes nei"                
##  [47] "Finfishes nei"                  "Flathead lobster"              
##  [49] "Fleshy prawn"                   "Florida pompano"               
##  [51] "Fluted giant clam"              "Fourfinger threadfin"          
##  [53] "Gastropods nei"                 "Gazami crab"                   
##  [55] "Giant clam"                     "Giant clams nei"               
##  [57] "Globose clam"                   "Golden carpet shell"           
##  [59] "Golden trevally"                "Goldlined seabream"            
##  [61] "Goldsilk seabream"              "Greasy grouper"                
##  [63] "Great Atlantic scallop"         "Greater amberjack"             
##  [65] "Green crab"                     "Green tiger prawn"             
##  [67] "Groundfishes nei"               "Gudgeons, sleepers nei"        
##  [69] "Hong Kong grouper"              "Hooded oyster"                 
##  [71] "Horned turban"                  "Horse mussels nei"             
##  [73] "Humpback grouper"               "Indian backwater oyster"       
##  [75] "Indo-Pacific swamp crab"        "Inflated ark"                  
##  [77] "Jacks, crevalles nei"           "Japanese abalone"              
##  [79] "Japanese amberjack"             "Japanese eel"                  
##  [81] "Japanese hard clam"             "Japanese jack mackerel"        
##  [83] "Japanese meagre"                "Japanese seabass"              
##  [85] "Japanese sea cucumber"          "Japanese spiny lobster"        
##  [87] "Jellyfishes nei"                "Korean rockfish"               
##  [89] "Largemouth black bass"          "Large yellow croaker"          
##  [91] "Lebranche mullet"               "Lefteye flounders nei"         
##  [93] "Mackerels nei"                  "Malabar grouper"               
##  [95] "Mangrove cupped oyster"         "Marbled spinefoot"             
##  [97] "Marine crabs nei"               "Meagre"                        
##  [99] "Mud spiny lobster"              "Northern quahog(=Hard clam)"   
## [101] "Northern white shrimp"          "Octopuses, etc. nei"           
## [103] "Okhotsk atka mackerel"          "Olympia oyster"                
## [105] "Orange mud crab"                "Pacific bluefin tuna"          
## [107] "Pacific calico scallop"         "Pacific geoduck"               
## [109] "Pacific horse clam"             "Pacific lion's paw"            
## [111] "Pacific littleneck clam"        "Palaemonid shrimps nei"        
## [113] "Papuan black snapper"           "Pargo breams nei"              
## [115] "Pearl oyster shells nei"        "Penguin wing oyster"           
## [117] "Pen shells nei"                 "Peppery furrow"                
## [119] "Periwinkles nei"                "Pod razor shell"               
## [121] "Pollack"                        "Porgies, seabreams nei"        
## [123] "Portunus swimcrabs nei"         "Puffers nei"                   
## [125] "Queen conch"                    "Queen scallop"                 
## [127] "Rainbow abalone"                "Red abalone"                   
## [129] "Red porgy"                      "Redtail prawn"                 
## [131] "Righteye flounders nei"         "Russell's snapper"             
## [133] "Salmonids nei"                  "Salmonoids nei"                
## [135] "Sand gaper"                     "Scats"                         
## [137] "Sciaenas nei"                   "Scorpionfishes nei"            
## [139] "Sea cucumbers nei"              "Sea snails"                    
## [141] "Sea squirts nei"                "Senegalese sole"               
## [143] "Sharpsnout seabream"            "Shi drum"                      
## [145] "Sixfinger threadfin"            "Slipper cupped oyster"         
## [147] "Smooth giant clam"              "Snappers, jobfishes nei"       
## [149] "Snooks(=Robalos) nei"           "Snubnose pompano"              
## [151] "Sobaity seabream"               "[Solea spp]"                   
## [153] "Solen razor clams nei"          "Soles nei"                     
## [155] "South American rock mussel"     "Southern Australia scallop"    
## [157] "Southern white shrimp"          "Speckled shrimp"               
## [159] "Spiny lobsters nei"             "Spotted rose snapper"          
## [161] "Spotted seabass"                "Squaretail mullet"             
## [163] "Stony sea urchin"               "Streaked spinefoot"            
## [165] "Striped bass, hybrid"           "Stromboid conchs nei"          
## [167] "Sydney cupped oyster"           "Thinlip grey mullet"           
## [169] "Tropical spiny lobsters nei"    "Trumpet emperor"               
## [171] "Variegated scallop"             "Venus clams nei"               
## [173] "Warty venus"                    "Whitemouth croaker"            
## [175] "White seabream"                 "White-spotted spinefoot"       
## [177] "White trevally"                 "Yellowfin seabream"

7 FAO maricultue and sustainability scores

Match the sustainability score to the FAO mariculture data.

The following joins the sustainability scores to regions/species that have Trujillo data.

# join taxa specific to country/species
c_sp = sus_rgn %>% 
  filter(match_type == 'c_sp') %>% 
  select(rgn_id, species=species_fao, Sust_c_sp = Maric_sustainability)

mar_sus <- mar_sus %>%
  left_join(c_sp, by= c("species", "rgn_id"))
### merge these into a single sustainability score
mar_sus = mar_sus %>% 
  mutate(Sust = ifelse(!is.na(Sust_c_sp_env), Sust_c_sp_env, Sust_c_sp_fao)) %>%
  mutate(Sust = ifelse(is.na(Sust), Sust_c_sp, Sust)) %>%
  select(-Sust_c_sp_env, -Sust_c_sp_fao, -Sust_c_sp)

This joins the sustainability data that is gapfilled either at the species level (average of specific species/genera across regions) or at a higher course taxonomic levels and documents which data are gapfilled and how.

## Gapfilled at the species/genera level:
gf_sp_sus <- filter(sus_rgn, gapfill != "actuals" & match_type == "species") %>%
  select(species = species_fao, gapfill, Sust_gf_sp = Maric_sustainability)

## check that there are no duplicated species_fao
gf_sp_sus[duplicated(gf_sp_sus$species), ]
## [1] species    gapfill    Sust_gf_sp
## <0 rows> (or 0-length row.names)
# Match gapfilling values by species
mar_sus_gf = mar_sus %>%
  left_join(gf_sp_sus, by = 'species')

# Gapfilled at the coarse taxon level:
gf_taxon_sus <- filter(sus_rgn, gapfill != "actuals" & match_type == "taxon") %>%
  select(Taxon_code=taxon, Sust_gf_taxon = Maric_sustainability)

# Match gapfilling values by species
mar_sus_gf = mar_sus_gf %>%
  left_join(gf_taxon_sus, by = c('Taxon_code'))

## genus_average    sp_average 
##          2269         16518
#Obtain a sustainability score for each record, and a book-keeping column of whether it's actual or gap-filled
mar_sus_final = mar_sus_gf %>% 
  mutate(gapfill = ifelse(!is.na(Sust), "none", gapfill)) %>% 
  mutate(Sust = ifelse(is.na(Sust), Sust_gf_sp, Sust)) %>%
  mutate(gapfill = ifelse(is.na(Sust), "taxon_average", gapfill)) %>%
  mutate(Sust = ifelse(is.na(Sust), Sust_gf_taxon, Sust)) %>%
  mutate(taxa_code = paste(species, species_code, sep="_")) %>%
  select(rgn_id, species, species_code, taxa_code, year, gapfill_sus = gapfill, gapfill_fao = gap_0_fill, tonnes=value, Sust)

## save data layers
mar_harvest_tonnes = mar_sus_final %>%
  select(rgn_id, taxa_code, year, tonnes)
## [1] 0
write.csv(mar_harvest_tonnes, 'output/mar_harvest_tonnes.csv', row.names=F)

mar_harvest_tonnes_gf = mar_sus_final %>%
  select(rgn_id, taxa_code, year, tonnes=gapfill_fao)
write.csv(mar_harvest_tonnes, 'output/mar_harvest_tonnes_gf.csv', row.names=F)

mar_sustainability_score = mar_sus_final %>% 
  mutate(year = 2012) %>%
  select(rgn_id, year, taxa_code, sust_coeff = Sust) %>% 
## [1] 0
write.csv(mar_sustainability_score, 'output/mar_sustainability.csv', row.names=F)

mar_sustainability_score_gf = mar_sus_final %>% 
  select(rgn_id, taxa_code, sust_coeff = gapfill_sus) %>% 
write.csv(mar_sustainability_score, 'output/mar_sustainability_gf.csv', row.names=F)

8 Genetic escapes data

These data are used as a pressure layer to describe the risk of genetic escapees due to mariculture.

First merge with the species data (no gapfilling) for each country/species/fao region combination.

# can eliminate the environment category because these have the same scores
esc = sus_rgn %>% 
  filter(!is.na(Genetic.escapees)) %>%
  mutate(match_type = ifelse(match_type == "c_sp_env", "c_sp", match_type)) %>%
  group_by(rgn_id, species=species_fao, fao, match_type, taxon, gapfill) %>%
  summarize(Genetic.escapees = mean(Genetic.escapees)) %>%
# join taxa specific to country/species/fao
c_sp_fao = esc %>% 
  filter(match_type == 'c_sp_fao') %>% 
  select(rgn_id, species, fao, Esc_c_sp_fao = Genetic.escapees) 

mar_esc <- maric %>%
  left_join(c_sp_fao, by= c("species", "fao", "rgn_id"))

# join taxa specific to country/species
c_sp = esc %>% 
  filter(match_type == 'c_sp') %>% 
  select(rgn_id, species, Esc_c_sp = Genetic.escapees)

mar_esc <- mar_esc %>%
  left_join(c_sp, by= c("species", "rgn_id"))
### merge these into a single sustainability score
mar_esc = mar_esc %>% 
  mutate(Escapees = ifelse(!is.na(Esc_c_sp_fao), Esc_c_sp_fao, Esc_c_sp)) %>%
  select(-Esc_c_sp_fao, -Esc_c_sp)

Join the sustainability data that is gapfilled either at the species level (average of specific species/genera across regions) or at a higher course taxonomic levels and documents which data are gapfilled and how.

## Gapfilled at the species/genera level:
gf_species_esc <- filter(esc, gapfill != "actuals" & match_type == "species") %>%
  select(species, gapfill, Esc_gf_sp = Genetic.escapees)

## check that there are no duplicated species_fao
gf_species_esc[duplicated(gf_species_esc$species), ]
## # A tibble: 0 x 3
## # ... with 3 variables: species <chr>, gapfill <chr>, Esc_gf_sp <dbl>
# Match gapfilling values by species
mar_esc_gf = mar_esc %>%
  left_join(gf_species_esc, by = 'species')

# Gapfilled at the coarse taxon level:
gf_taxon_sus <- filter(esc, gapfill != "actuals" & match_type == "taxon") %>%
  select(Taxon_code=taxon, Esc_gf_taxon = Genetic.escapees)

# Match gapfilling values by species
mar_esc_gf = mar_esc_gf %>%
  left_join(gf_taxon_sus, by = c('Taxon_code'))

##          2269
#Obtain a sustainability score for each record, and a book-keeping column of whether it's actual or gap-filled
tonnes_esc = mar_esc_gf %>% 
  mutate(gapfill = ifelse(!is.na(Escapees), "none", gapfill)) %>% 
  mutate(Escapees = ifelse(is.na(Escapees), Esc_gf_sp, Escapees)) %>%
  mutate(gapfill = ifelse(is.na(Escapees), "taxon_average", gapfill)) %>%
  mutate(Escapees = ifelse(is.na(Escapees), Esc_gf_taxon, Escapees)) %>%
  select(rgn_id, species, species_code, year, gapfill_escapees = gapfill, tonnes=value, Escapees)

Final formatting of the escapee data. This is used as a pressure layer.

# for each region/year: average the genetic escape probability for each taxa based on tonnes mariculture
genEscapes <- tonnes_esc %>%
  group_by(rgn_id, year) %>%
  summarize(genEscapes = weighted.mean(Escapees, tonnes, na.rm=TRUE))

# obtain corresponding gapfilling information for each region (average of gapfilled data, weighted by tonnes of mariculture).
genEscapes_gf <- tonnes_esc %>%
  mutate(gapfill = ifelse(gapfill_escapees=="none", 1, 0)) %>%
  group_by(rgn_id, year) %>%
  summarize(genEscapes = weighted.mean(gapfill, tonnes, na.rm=TRUE)) %>%
  ungroup() %>%
  filter(year==2015) %>%
  select(rgn_id, pressures.score=genEscapes) %>%
  mutate(pressures.score=ifelse(pressures.score=="NaN", NA, pressures.score)) %>%
write.csv(genEscapes_gf, 'output/GenEsc_gf.csv', row.names=FALSE)

# create the escapee data layers:
    data <- genEscapes %>%
    select(rgn_id, year, pressure_score = genEscapes)
write.csv(data, 'output/GenEsc.csv', row.names=FALSE)  

old <- read.csv("../v2016/output/GenEsc_v2016.csv") %>%
  select(rgn_id, prs_score_old=pressure_score)

new <- read.csv("../v2017/output/GenEsc.csv") %>%
  filter(year == 2014)

full_join(old, new, by="rgn_id")
tmp_old <- read.csv("../v2016/raw/Truj_label_sust.csv")
tmp_new <- read.csv("../v2017/raw/Truj_label_sust.csv")

test <- full_join(tmp_new, tmp_old, by=c("country", "species_fao", "fao", "environment" ))
