'''
Created on 19.02.2013
:author: Sebastian Illing
:contact: sebastian.illing@met.fu-berlin.de
Copyright (C) 2014 Sebastian Illing
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
'''
from cdo import *
cdo = Cdo()
from time import time
from metricAbstract import MetricAbstract
from taylorplot import TaylorPlotMurCSS
class MsssError(Exception): pass
[docs]class Msss(MetricAbstract):
'''
Class to calculate the MSSS. Call method "analyze" to get results
:param output: Path where the results are saved
:param output_plots: Path where the plots are saved
:param decadals: a list with decadal experiments, i.e. [1960,1960,...,1995]
:param variable: CMOR-Name of variable like 'tas' (Near Surface Temperature)
:param project1: CMOR-Name of the project like CMIP5 or BASELINE1
:param product1: CMOR-Parameter
:param institute1: CMOR-Name of the institute like MPI-M
:param model1: CMOR-Name of the used model like MPI-ESM-LR
:param experiment1: Experiment name like decs4e or historical
:param project2: CMOR-Name of the project like CMIP5 or BASELINE1
:param product2: CMOR-Parameter
:param institute2: CMOR-Name of the institute like MPI-M
:param model2: CMOR-Name of the used model like MPI-ESM-LR
:param experiment2: Experiment name like decs4e or historical
:param ensemblemembers: Comma separated string of esemble members like 'r1i1p1,r2i1p1,...'
:param leadtimes: Leadtimes to analyze, ie. 1,2-9
:param observation: Observation or Reanalysis Experiment, HadCrut or ERA-Int
:param maskMissingValues: Boolean
:param result_grid: Griddescription of resultgrid like r72x36
:param cache: Path for cachedir during analysis
:param baseDir: Path of Class to find default files
:param timeFreq: Timefrequency of the files, ie 'mon' or 'day'
:param bootstrap:
:param bootstrap2:
:param obsRemapped:
:param level: For 3D-Files --> Select a single level
:param lonlatbox: If you want to select a specific lonlat-Box
:param fieldmean: Boolean - If you want to calculate global mean
:param colormap: Matplotlib Colormap used for ploting
:param observation_type: NOT USED ANYMORE!!!
:param reanpath: NOT USED ANYMORE!!!
'''
#Here the output filenames and folder structure get defined
outputFolderStruct = 'TIME_variable_project1_product1_institute1_model1_experiment1_project2_product2_institute2_model2_experiment2_obsExp_LEVEL_SELLONLAT_YEARRANGE'.split('_')
insideFolderNameFlag = 'project_product_model_experiment'.split('_')
fileNameFlag = 'LEADTIMES_variable_project_product_model_experiment_YEARRANGE'.split('_')
fileNameFlagVs = 'LEADTIMES_variable_project1_product1_model1_experiment1_vs_project2_product2_model2_experiment2_YEARRANGE'.split('_')
def __init__( self,
output='/tmp/msss/output/',
output_plots='/tmp/msss/output/',
decadals='1960,1965,1970,1975,1980,1985,1990,1995,2000',
variable='tas',
#CMOR parameter for input1
project1='baseline1',
product1='output',
institute1='mpi-m',
model1 = 'mpi-esm-lr',
experiment1='decs4e',
ensemblemembers1='*',
#CMOR parameter for input2
project2='baseline0',
product2='output1',
institute2='mpi-m',
model2='mpi-esm-lr',
experiment2='decadal',
ensemblemembers2='*',
leadtimes='1,2-9',
observation = 'HadCrut',
observation_ensemble = '*',
maskMissingValues = True,
result_grid=None,
cache='/tmp/msss/cache/',
baseDir = '..',
timeFreq = 'mon',
#for bootstrapping
bootstrap=None,
bootstrap2=None,
obsRemapped = None,
leadtimes_mode = 'yearly',
level=None,
lonlatbox=None,
fieldmean=False, #not finally implemented
colormap='RedBlu', #not used
observation_type = 'REANALYSIS', #not used ???
reanpath='', #not used ???
months = None,
):
'''
Constructor. Set parameters and calls parent constructor (metricAbstract)
:param output: Path where the results are saved
:param output_plots: Path where the plots are saved
:param decadals: a list with decadal experiments, i.e. [1960,1960,...,1995]
:param variable: CMOR-Name of variable like 'tas' (Near Surface Temperature)
:param project1: CMOR-Name of the project like CMIP5 or BASELINE1
:param product1: CMOR-Parameter
:param institute1: CMOR-Name of the institute like MPI-M
:param model1: CMOR-Name of the used model like MPI-ESM-LR
:param experiment1: Experiment name like decs4e or historical
:param project2: CMOR-Name of the project like CMIP5 or BASELINE1
:param product2: CMOR-Parameter
:param institute2:CMOR-Name of the institute like MPI-M
:param model2: CMOR-Name of the used model like MPI-ESM-LR
:param experiment2: Experiment name like decs4e or historical
:param ensemblemembers: Comma separated string of esemble members like 'r1i1p1,r2i1p1,...'
:param leadtimes: Leadtimes to analyze, ie. 1,2-9
:param observation: Observation or Reanalysis Experiment, HadCrut or ERA-Int
:param maskMissingValues: Boolean
:param result_grid: Griddescription of resultgrid like r72x36
:param cache: Path for cachedir during analysis
:param baseDir: Path of Class to find default files
:param timeFreq: Timefrequency of the files, ie 'mon' or 'day'
:param bootstrap:
:param bootstrap2:
:param obsRemapped:
:param level: For 3D-Files --> Select a single level
:param lonlatbox: If you want to select a specific lonlat-Box
:param fieldmean: Boolean - If you want to calculate global mean
:param colormap: Matplotlib Colormap used for ploting
:param observation_type: NOT USED ANYMORE!!!
:param reanpath: NOT USED ANYMORE!!!
'''
#Add kwargs to attributes
listOfAttributes = ['decadals','variable',
'project1','product1','institute1','model1','experiment2',
'project2','product2','institute2','model2','experiment1',
'leadtimes','maskMissingValues','level','lonlatbox','fieldmean',
'bootstrap','bootstrap2','obsRemapped', 'observation_ensemble',
'observation_type','colormap','timeFreq']
for attr in listOfAttributes:
setattr(self, attr, locals()[attr])
if ensemblemembers1 != None and ensemblemembers1 != '*':
self.ensemblemembers1 = ensemblemembers1.split(',')
else:
self.ensemblemembers1 = '*'
if ensemblemembers2 != None and ensemblemembers2 != '*':
self.ensemblemembers2 = ensemblemembers2.split(',')
else:
self.ensemblemembers2 = '*'
super(Msss,self).__init__(tmpDir = self.checkPath(cache), output=output, output_plots=output_plots,
baseDir=baseDir, result_grid=result_grid, observation=observation,
leadtimes_mode=leadtimes_mode, decadals=decadals, months=months )
def prepareInput(self):
self.findFiles.observation = self.observation #WORKAROUND
self.observationDict,self.input1Remapped,self.observationRemapped,self.input2Remapped = [{} for dummy in range(4)]
if self.bootstrap is None:
print 'Searching Files'
countYears = len(self.decadals)
poolArgs = self.getPoolArgs(countYears, self.findFiles, list(self.decadals), self.project2, self.model2, self.variable,
'mon', self.product2, self.ensemblemembers2, self.institute2, self.experiment2,
self.maxLeadtime, self.minLeadtime, 'getFiles')
self.input2Dict = self.listToYeardict(self.multiProcess(poolArgs))
#print self.input2Dict
poolArgs = self.getPoolArgs(countYears, self.findFiles, list(self.decadals), self.project1,self.model1,self.variable,
'mon',self.product1,self.ensemblemembers1,self.institute1,self.experiment1,
self.maxLeadtime, self.minLeadtime,'getFiles')
self.input1Dict = self.listToYeardict(self.multiProcess(poolArgs))
for year in self.decadals:
self.observationDict[year] = self.findFiles.getReanalysis(year, self.observation_type, self.obsExp, self.variable, maxLeadtime=self.maxLeadtime, observation_ensemble=self.observation_ensemble, minLeadtime=self.minLeadtime)
#print self.observationDict
print 'Remapping Files'
for year in self.decadals:
self.input1Remapped[year] = self._remapFiles(self.input1Dict[year], flag=self.product1)
self.input2Remapped[year] = self._remapFiles(self.input2Dict[year], flag=self.product2)
self.observationRemapped[year] = self.remapFile(self.observationDict[year])
self.ensList = self.input1Dict
self.histList = self.input2Dict
else:
for year in self.decadals:
self.input2Remapped[year] = self.bootstrap2[year]
self.input1Remapped[year] = self.bootstrap[year]
if self.obsRemapped == None:
self.observationDict[year] = self.findFiles.getReanalysis(year, self.observation_type, self.obsExp, self.variable, maxLeadtime=self.maxLeadtime, observation_ensemble=self.observation_ensemble, minLeadtime=self.minLeadtime)
self.observationRemapped[year] = self.remapFile(self.observationDict[year])
else:
self.observationRemapped[year] = self.obsRemapped[year]
[docs] def analyze(self):
'''
Main method to calculate the MSSS. Following steps are performed:
1. Searching for file using solr_search
2. Remapping Files to coarser grid
3. Calculating Ensemble Mean
4. Calculating cross-validated mean
5. Calculating anomalies for both models
6. Analyzing different timeranges
'''
#check if prepare output was called or variables are set
try:
self.input1Remapped
self.input2Remapped
self.observationRemapped
except AttributeError:
raise MsssError, 'Please run "prepareInput() first or provide input data'
#select lon/lat box
if self.lonlatbox is not None:
print 'Selecting lon-lat-box %s' %(self.lonlatbox)
for year in self.decadals:
self.input1Remapped[year] = self.sellonlatbox(self.input1Remapped[year])
self.input2Remapped[year] = self.sellonlatbox(self.input2Remapped[year])
self.observationRemapped[year] = self._sellonlatbox(self.observationRemapped[year])
if self.fieldmean:
print 'Calculating field mean'
for year in self.decadals:
self.input1Remapped[year] = self.fieldMean(self.input1Remapped[year])
self.input2Remapped[year] = self.fieldMean(self.input2Remapped[year])
self.observationRemapped[year] = self._fieldMean(self.observationRemapped[year])
if self.bootstrap is None:
print 'Calculating ensemble mean'
input1Ensemblemean = self.getEnsembleMean(self.input1Remapped)
input2Ensemblemean = self.getEnsembleMean(self.input2Remapped)
else:
input1Ensemblemean = self.input1Remapped
input2Ensemblemean = self.input2Remapped
print 'Calculating crossvalidated mean'
input1CrossvalMean = self.getCrossValMean(input1Ensemblemean, 'model')
input2CrossvalMean = self.getCrossValMean(input2Ensemblemean, 'hist')
if self.obsRemapped == None or self.bootstrap == True:
obsCrossValMean = self.getCrossValMean(self.observationRemapped, 'obs')
self.observationRemappedAnom = self.getAnomalies(self.observationRemapped, obsCrossValMean)
self.obsRemapped = self.observationRemappedAnom
print 'Calculating Anomalies'
input1Anomalies = self.getAnomalies(input1Ensemblemean, input1CrossvalMean)
input2Anomalies = self.getAnomalies(input2Ensemblemean, input2CrossvalMean)
self.constantField = self.createConstantFile(self.gridFile)
self.misvalMask = self.calcMissingValueMask(self.observationRemappedAnom)
rangeCount = len(self.getYearRange())
firstYearList = list()
lastYearList = list()
for yr in self.getYearRange():
firstYearList.append(yr[0])
lastYearList.append(yr[1])
#escape attributes
self.escapeAttributes()
poolArgs = self.getPoolArgs(rangeCount, self, input1Anomalies,self.observationRemappedAnom,input2Anomalies,
firstYearList,lastYearList,'analyzeYearRange')
resultList = self.multiProcess(poolArgs)
if self.fieldmean:
#construct a taylor plot
taylor = TaylorPlotMurCSS(negativeCorr=True)
taylor.constructPlot(resultList, self.outputPlots)
else:
#plot fields
if self.bootstrap is None:
fileList = list()
for rList in resultList:
fileList = fileList + rList
filesToPlot = len(fileList)
#print fileList
min = np.array([-1]*filesToPlot)
max = np.array([1]*filesToPlot)
std_ind = [i for i,s in enumerate(fileList) if 'std_ratio' in s]
min[std_ind] = 0.5
max[std_ind] = 2
# corr_ind = [i for i,s in enumerate(fileList) if 'correlation' in s]
# min[corr_ind] = -0.6
# max[corr_ind] = 0.6
poolArgs = self.getPoolArgs(filesToPlot,self,fileList,list(min),list(max),'_plotField')
#resultList = self.multiProcess(poolArgs)
for i,f in enumerate(fileList):
if not 'biasslope' in f:
if not 'std_ratio' in f:
self._plotField(f, min[i], max[i])
[docs] def analyzeYearRange(self, input1Anomalies, observationRemapped, input2Anomalies, startYear, endYear):
'''
Calculate the MSSS for given timerange. The following steps are perfomed in this method:
1. temporal smoothing
2. calculation of correlation, bias, and msss
3. calculation of model1 vs. model2
4. plotting of the 9 fields
:todo: maybe this could be done in multiprocessing --> so different timeranges at once
:param input1Anomalies: dict with anomalies of model1
:param observationRemapped: dict with anomalies of observations
:param input2Anomalies: dict with anomalies of model2
:param startYear:
:param endYear:
'''
print 'Analyzing year %s to %s' %(startYear, endYear)
base = '%s-%s'%(startYear, endYear)
#get file names
flag1 = self.constructName(self.insideFolderNameFlag,exp='1', extra='input1')
flag2 = self.constructName(self.insideFolderNameFlag,exp='2', extra='input2')
fnFlag1 = self.constructName(self.fileNameFlag, exp='1', startYear=str(startYear), endYear=str(endYear))
fnFlag2 = self.constructName(self.fileNameFlag, exp='2', startYear=str(startYear), endYear=str(endYear))
fnFlagVs = self.constructName(self.fileNameFlagVs, exp='', startYear=str(startYear), endYear=str(endYear))
#make outputfolders
if self.bootstrap is None:
model1Output = self.makeFolder(self.outputDir+base+'/'+flag1+'/msss')
model2Output = self.makeFolder(self.outputDir+base+'/'+flag2+'/msss')
vsOutput = self.makeFolder(self.outputDir+base+'/%s_vs_%s/msss'%(flag1,flag2))
#make plot folders
model1Plot = self.makeFolder(self.outputPlots+base+'/'+flag1+'/msss')
model2Plot = self.makeFolder(self.outputPlots+base+'/'+flag2+'/msss')
vsPlot = self.makeFolder(self.outputPlots+base+'/%s_vs_%s/msss'%(flag1,flag2))
else:
model1Output = self.outputDir
model2Output = self.outputDir
vsOutput = self.outputDir
yearCount = len(self.decadals)
poolArgs = self.getPoolArgs(yearCount,self,self.yeardictToList(input1Anomalies),startYear,endYear,'_tempSmoothing')
ensListSelYearTimmean = self.listToYeardict(self.multiProcess(poolArgs))
poolArgs = self.getPoolArgs(yearCount,self,self.yeardictToList(observationRemapped),startYear,endYear,'_tempSmoothing')
obsSelYearTimmean = self.listToYeardict(self.multiProcess(poolArgs))
poolArgs = self.getPoolArgs(yearCount,self,self.yeardictToList(input2Anomalies),startYear,endYear,'_tempSmoothing')
histSelYearTimmean = self.listToYeardict(self.multiProcess(poolArgs))
startYear = str(startYear)
endYear = str(endYear)
correlation = self.getCorrelationAndVariancesAndMSSS(ensListSelYearTimmean,obsSelYearTimmean, fnFlag1, model1Output)
correlationHist = self.getCorrelationAndVariancesAndMSSS(histSelYearTimmean,obsSelYearTimmean, fnFlag2, model2Output)
msssInitVsUninit = self.getInitVsUninit(correlation, correlationHist, fnFlagVs, vsOutput)
rmss = self.getRMSS(correlation['msss'], fnFlag1, model1Output)
rmssHist = self.getRMSS(correlationHist['msss'], fnFlag2, model2Output)
rmssInitvsUninit = self.getRMSS(msssInitVsUninit, fnFlagVs, vsOutput)
return self.applyMissingValueMask(startYear, endYear, [model1Output,model2Output,vsOutput])
[docs] def getInitVsUninit(self, initResults, uninitResults, flag, outputDir):
'''
Calculates comparing values between two models/modelversions i.e. initialized vs. uninitialized.
Following variables are computed:
- correlation: corr1 - corr2
- conditional bias: abs(bias1) - abs(bias2)
- msss: (msss1-msss2)/(1-msss2)
:param initResults: dict with "corr", "bias", and "msss" of first model
:param uninitResults: dict with "corr", "bias", and "msss" of second model
:param tempRange: start- and endyear --> used for filenames
:return: msss filename
'''
constantField = self.createConstantFile(self.gridFile, flag=flag) #Create Constant Field for MSSS
corr = cdo.sub(input=' '.join([initResults['corr'], uninitResults['corr']]),
output = outputDir+flag+'_correlation.nc')
bias = cdo.sub(input='-abs ' + uninitResults['bias'] + ' -abs ' + initResults['bias'],
output=outputDir+flag+'_conditional_bias.nc')
#MSSS vs Plot
msss = cdo.div(input='-sub ' + initResults['msss'] + ' ' + uninitResults['msss'] +
' -sub ' + constantField + ' ' + uninitResults['msss'],
output=outputDir+flag+'_msss.nc')
#conditional bias slope
slopeBiasInit = cdo.abs(input=initResults['biasSlopeMinusOne'],
output=self.tmpDir+self.extractFilename(initResults['biasSlopeMinusOne'])+'_abs')
slopeBiasUninit = cdo.abs(input=uninitResults['biasSlopeMinusOne'],
output=self.tmpDir+self.extractFilename(uninitResults['biasSlopeMinusOne'])+'_abs')
slopeBias = cdo.sub(input=' '.join([slopeBiasInit, slopeBiasUninit]),
output=outputDir+flag+'_biasslope-1.nc')
#conditional bias skill score
biasSKillScore = cdo.sub(input=' '.join([self.constantField,
cdo.div(input=' '.join([slopeBiasInit, slopeBiasUninit]),
output=self.tmpDir+flag+'_biasskillDIV.nc')]),
output=outputDir+flag+'_biasSkillScore.nc')
return msss
[docs] def getCorrelationAndVariancesAndMSSS(self, hindcast, observation, flag, outputDir):
'''
Calculates the following quantities:
- correlation bewtween observation and hindcast
- temporal variance of hindcast
- temporal variance of observation
- conditional bias
- msss
- biasSlope so/sh * c
- std ratio so/sh
:param hindcast: dict of hindcasts
:param observation: dict of observations
:param flag: additional string for filenames
:return: dict with "msss", "bias", "corr", "biasSlope, "stdRatio", "biasSlopeMinusOne"
'''
hindcastStr = ''
observationStr = ''
for year in self.decadals:
hindcastStr += ' ' + hindcast[year]
observationStr += ' ' + observation[year]
mergeHindcast = cdo.mergetime(input=hindcastStr, output=hindcast[self.decadals[0]]+flag+self.getRandomStr()+'_merged.nc')
mergeObservations = cdo.mergetime(input=observationStr, output=observation[self.decadals[0]]+flag+self.getRandomStr()+'_merged.nc')
#print mergeHindcast
#remove seasonal cycle
#mergeHindcast = self.removeSeasonalCycle(mergeHindcast)
#mergeObservations = self.removeSeasonalCycle(mergeObservations)
#print mergeHindcast
#detrend data
#mergeHindcast = self.detrendTimeSeries(mergeHindcast,keepMean=False)
#mergeObservations = self.detrendTimeSeries(mergeObservations,keepMean=False)
varianceHindcast = cdo.timstd(input=mergeHindcast, output=hindcast[self.decadals[0]]+flag+'_variance')
varianceObservation = cdo.timstd(input=mergeObservations, output=observation[self.decadals[0]]+flag+'_variance')
corr = cdo.timcor(input=' '.join([mergeHindcast, mergeObservations]), output=outputDir+flag+'_correlation.nc')
#standard deviation ratio
stdRatio = cdo.div(input=' '.join([varianceObservation, varianceHindcast]),
output=outputDir+flag+'_std_ratio.nc')
#slope of conditional bias
biasSlope = cdo.mul(input=' '.join([stdRatio, corr]),
output=outputDir+flag+'_biasslope.nc')
#slope -1
biasSlopeMinusOne = cdo.sub(input=' '.join([biasSlope, self.constantField]),
output=outputDir+flag+'_biasslope-1.nc')
bias = cdo.sub(input=corr+' '+ '-div ' + varianceHindcast + ' ' + varianceObservation,
output=outputDir+flag+'_conditional_bias.nc')
# bias = cdo.sub(input=corr+' '+ cdo.div(input=varianceHindcast + ' ' + varianceObservation, output=varianceHindcast+'div'),
# output=outputDir+flag+'_conditional_bias.nc')
msss = cdo.sub(input='-sqr ' + corr + ' -sqr ' + bias,
output=outputDir+flag+'_msss.nc')
# msss = cdo.sub(input=cdo.sqr(input=corr,output=corr+'sqr') + ' ' + cdo.sqr(input=bias,output=bias+'sqr'),
# output=outputDir+flag+'_msss.nc')
return dict(msss= msss, bias= bias, corr= corr, biasSlope=biasSlope, stdRatio=stdRatio, biasSlopeMinusOne=biasSlopeMinusOne)
[docs] def getMSE(self, anomalies, obsAnom):
'''
Method calculates the Mean Squared Error.
:note: method ist not used at the moment
:param anomalies: dictionary with anomalies
:param obsAnom: dict with observation anomalies
:return: file with mse field
'''
MSEList = list()
for year in self.decadals:
subTmp = cdo.sub(input=' '.join([anomalies[year], obsAnom[year]]))
MSEList.append(cdo.sqr(input=subTmp))
return cdo.ensmean(input=' '.join(MSEList), output=self.tmpDir+'mse.nc')
[docs] def getRMSS(self, MSSS, flag, outputDir):
'''
Calculates the RMSSS using the MSSS
:param MSSS: netcdf file
:param flag: name flag
:param outputDir: output path
:return: file with RMSSS field
'''
tmp = cdo.sqrt(input="-sub " + self.constantField + " " + MSSS,
output=self.tmpDir+flag+'rmss_sqrt.nc')
return cdo.sub(input=' '.join([self.constantField,tmp]), output=outputDir+flag+'_rmss.nc')