'''
Created on 11.03.2013
:author: Sebastian Illing
:contact: sebastian.illing@met.fu-berlin.de
Copyright (C) 2014 Sebastian Illing
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
'''
import abc
import os
from cdo import *
#from integration.metrics.filehandler import FileHandler
cdo = Cdo()
import time
import numpy as N
import sys
import murcss_config
if murcss_config.file_system == 'miklip':
from findFiles import FindFiles
from findFilesSeason import FindFilesSeason
else:
from findFilesCustom import FindFilesCustom as FindFiles
FindFilesSeason = FindFiles
from filehandler import FileHandler
from plotter import Plotter
from nondeamonpool import MyPool
from tool_abstract import ToolAbstract#, unwrap_self_f
[docs]class MetricAbstract(ToolAbstract):
'''
Abstract class for calculating any metric of decadal runs with CDO
Every reusable function which uses CDO commands should go in here.
'''
def __init__(self,
tmpDir = './tmp',
output = '/',
output_plots='/',
baseDir='/',
result_grid=None,
observation='HadCrut',
leadtimes_mode='yearly',
decadals='1960,1965,1970,1975,1980,1985,1990,1995,2000',
months=None
):
'''
Constructor. Sets variables and creates Folders
:param tmpDir:
:param output:
:param output_plots: self.outputPlots
:param baseDir: Path to search for source filesself.outputPlots
:param result_grid: grid to remap to
:param observation: observation name or file used for analysis
'''
setattr(self, 'obsExp', observation) #TODO: Wrong named kwarg
self.observation = ''
if(self.obsExp == 'HadCrut' and self.variable == 'tas'):
self.observation = baseDir + '/../src/obs2/tas_obs_HadCrut3.nc'
elif os.path.isfile(self.obsExp):
self.observation = self.obsExp
self.obsExp = self.extractFilename(self.obsExp)
self.obsExp = self.obsExp.split('.')[0]
#Initialize MaxLeadYear
self.getYearRange()
outputFolder = self.constructName(self.outputFolderStruct)
super(MetricAbstract, self).__init__(output_folder = self.checkPath(output)+outputFolder,
output_plots = self.checkPath(output_plots)+outputFolder)
self.outputDir = self.checkPath(self.checkPath(output)+outputFolder)
self.outputPlots = self.checkPath(self.checkPath(output_plots)+outputFolder)
self.baseDir = baseDir
self.tmpDir = self.checkPath(tmpDir)
if not os.path.isdir(self.checkPath(tmpDir)):
os.makedirs(self.checkPath(tmpDir))
if result_grid is None:
self.gridFile = self.baseDir + '/../src/grids/griddes_HadCrut3_5x5.txt'
else:
self.gridFile = result_grid
if leadtimes_mode == 'monthly':
if months is not None:
month_list = months.split(',')
else:
month_list = ['']
init_list = list()
for month in month_list:
for year in decadals:
init_list.append(int(str(year)+month))
self.decadals = init_list
else:
self.decadals=decadals
#set special options for "decadal" or "seasonal"
self.leadtimes_mode = leadtimes_mode
if self.leadtimes_mode == 'yearly':
self.analysisOptions = {'cdoSelect' : 'selyear',
'cdomean' : 'yearmean'}
else:
self.analysisOptions = {'cdoSelect' : 'selmon',
'cdomean' : 'monmean'}
#findFiles instance
if leadtimes_mode == 'yearly':
self.findFiles = FindFiles(tmpDir=self.tmpDir, level=self.level)
elif leadtimes_mode == 'monthly':
self.findFiles = FindFilesSeason(tmpDir=self.tmpDir, level=self.level)
@abc.abstractmethod
[docs] def analyze(self):
'''
Abstract Method to calculate a metric
Must be implemented in child classes
'''
pass
[docs] def _remapFiles(self, ensList, flag=''):
'''
Multiprocessing for remapping files
remap observations and model files
:param ensList: list of files
:return: list of remapped files
'''
ensCount = len(ensList)
poolArgs = self.getPoolArgs(ensCount,self,ensList,flag,'remapFile')
return self.multiProcess(poolArgs)
[docs] def remapFile(self, *arg, **kwargs):
'''
Remaps a single file width CDO's remapcon
:param inputfile
'''
iFile= arg
iFile=iFile[0]
try:
flag = arg[1]
if flag == 'output*':
flag = ''
except:
flag = ''
flag += self.getRandomStr()
fName = iFile.split('/')[-1]
return cdo.remapcon(self.gridFile, input=iFile, output=self.tmpDir+fName+flag+'.remapped')
[docs] def getEnsembleMean(self, fileList):
'''
:todo: Multiprocessing
Should do ensmean in multiprocessing!!! At the moment only single approach
:param fileList: dict of filelists
:return: dict of ensemble means
'''
ensMeanList = self.yeardictToList(fileList)
count = len(ensMeanList)
poolArgs = self.getPoolArgs(count,self,ensMeanList,'_getEnsembleMean')
return self.listToYeardict(self.multiProcess(poolArgs))
[docs] def _getEnsembleMean(self, fileList, flag='ENSMEAN'):
'''
Calculates ensemble Mean
:param fileList: list of files
:return: ensemble mean file
'''
if len(fileList) == 1:
return fileList[0]
fileStr = ' '.join(fileList)
return cdo.ensmean(input=fileStr, output=self.tmpDir+self.extractFilename(fileList[0])+'_'+flag)
[docs] def tempSmoothing(self, ensList, startyear, endyear):
'''
Multiprocessing approach for temporal smoothing
Starts a process for every file in ensList
:param ensList: List of files
:param startyear: int
:param endyear: int
:return: list of temporal smoothed files
'''
if(isinstance(ensList,list)):
ensCount = len(ensList)
else:
ensCount = 1
poolArgs = self.getPoolArgs(ensCount,self,ensList,startyear,endyear,'_tempSmoothing')
return self.multiProcess(poolArgs)
[docs] def _tempSmoothing(self, fileName, startyear, endyear):
'''
Temporal smoothing. I.e. timmean over year 2-9
:param fileName: filepath
:param startyear: first year to select (int)
:param endyear: last year to select (int)
:return: temp smoothed file
'''
# pid= os.getpid()
# filestartyear = int(cdo.showyear(input=fileName)[0].split(' ')[0])
# selyearstr = ''
# fn = fileName.split('/')[-1]
# fileNameOut = self.tmpDir+fn
# for i in range(startyear, endyear+1):
# selyearstr = ','.join([selyearstr, str(filestartyear+i-1)])
# selyear = cdo.selyear(selyearstr, input=fileName, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid))
# return cdo.timmean(input=selyear, output=selyear+'Timmean')
#perform yearmean or monthmean
#try:
pid= os.getpid()
fn = fileName.split('/')[-1]
fileNameOut = self.tmpDir+fn
#self.analysisOptions['cdomean'] = 'seasmean'
#method = getattr(cdo, self.analysisOptions['cdomean'])
#meanFile = method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])
# #test monthly analysis
# self.analysisOptions['cdomean'] = 'monmean'
# method = getattr(cdo, self.analysisOptions['cdomean'])
# meanFile = method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])
# selstr = ','.join(map(str,range((startyear-1)*12+1,endyear*12+1)))
# seldate = cdo.seltimestep(selstr,input=meanFile, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid)+self.getRandomStr())
# tmp = cdo.ymonmean(input=seldate, output=seldate+'Timmean')
# return tmp
#
#TEST SEASONANALYSIS
# self.analysisOptions['cdomean'] = 'seasmean'
# method = getattr(cdo, self.analysisOptions['cdomean'])
# meanFile = method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])
# meanFile = cdo.selseas('JJA',input=meanFile,output=meanFile+'JJA')
# selstr = ','.join(map(str,range(startyear,endyear+1)))
# seldate = cdo.seltimestep(selstr,input=meanFile, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid)+self.getRandomStr())
# tmp = cdo.timmean(input=seldate, output=seldate+'Timmean')
# return tmp
method = getattr(cdo, self.analysisOptions['cdomean'])
meanFile = method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])
selstr = ','.join(map(str,range(startyear,endyear+1)))
seldate = cdo.seltimestep(selstr,input=meanFile, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid)+self.getRandomStr())
tmp = cdo.timmean(input=seldate, output=seldate+'Timmean')
return tmp
[docs] def removeSeasonalCycle(self, fn):
'''
Subtracts the seasonal cycle of monthly data
'''
print fn
seasonalCycle = cdo.ymonavg(input=fn, output=fn+'_meanCycle')
print seasonalCycle
tmp = cdo.ymonsub(input=' '.join([fn,seasonalCycle]),output=fn+'_noseason')
print tmp
return tmp
[docs] def getAnomalies(self, ensMeanList, crossMean):
'''
Subtracts cross-validated mean from ensembles
:param ensMEanList: dict with filenames
:param crossMean: dict width cross-val means
:return: dict width cross-validated anomalies
'''
anomalies = dict()
for year in self.decadals:
anomalies[year] = cdo.sub(input=' '.join([ensMeanList[year], crossMean[year]]), output=ensMeanList[year]+'_ANOMALIE')
#
#anomalies[year] = cdo.sub(input=' '.join([ensMeanList[year], cdo.timmean(input=crossMean[year],output=crossMean[year]+'timmean')]), output=ensMeanList[year]+'_ANOMALIE')
return anomalies
def _getAnomalies(self, ensMeanList, crossMean):
if type(ensMeanList) == list:
tmpList = list()
for i,item in enumerate(ensMeanList):
tmpList.append(self._getAnomalies(item, crossMean))
return tmpList
else:
return cdo.sub(input=' '.join([ensMeanList, crossMean]), output=ensMeanList+'_ANOMALIE')
[docs] def getCrossValMean(self, ensMeanList, flag):
'''
Calculates cross-validated means (averages through forecast times, excluding the forecast in question)
:param ensMeanList: Dict of filnames
:param flag: String value added to new filenames
:return: dict with cross-validated means
'''
poolMeanList = list()
yearList = list()
flagList = list()
for year in self.decadals:
yearList.append(year)
meanList = list()
flagList.append(str(year)+'_'+flag+self.getRandomStr())
for key,val in ensMeanList.iteritems():
if( key != year):
meanList.append(val)
poolMeanList.append(meanList)
count = len(poolMeanList)
poolArgs = self.getPoolArgs(count,self,poolMeanList,flagList,'_getEnsembleMean')
return self.listToYeardict(self.multiProcess(poolArgs))
[docs] def getVariance(self, list, flag):
'''
Calculate "variance"
:deprecated: Not used in goddard metrics --> should not be used
:param list: filelist
:param flag: string for filenames
:return: kind of standard deviation of anomalies
'''
squareList = dict()
squareStr = ''
for year in self.decadals:
squareList[year] = cdo.sqr(input=list[year], output=list[year]+'_square')
squareStr += ' '+squareList[year]
squareSum = cdo.ensmean(input=squareStr, output=self.tmpDir+flag+'_goddardVar')
return cdo.sqrt(input=squareSum, output=self.tmpDir+flag+'_goddardStd')
[docs] def createConstantFile(self, gridFile, flag=''):
'''
Creates a constant field with value 1
:param gridFile: grid description file (txt)
:return: filename
'''
if self.fieldmean:
return cdo.const('1,r1x1', output=self.tmpDir+flag+'constantField.nc', options = '-f nc')
else:
const = cdo.const('1,'+gridFile, output=self.tmpDir+flag+'constantField.nc', options = '-f nc')
if self.lonlatbox is not None:
return self._sellonlatbox(const)
else:
return const
[docs] def getEnsembleStd(self, ensList):
'''
Calculates the mean ensemble STD for a given dictionary
Keys in dict have to be the starting years
:param ensList: dict with ensembles
:return: file with mean ensemble std
'''
tmpList = list()
for year in self.decadals:
tmpList.append(ensList[year])
ensCount = len(tmpList)
poolArgs = self.getPoolArgs(ensCount,self,tmpList,'_getEnsembleVar')
result = self.listToYeardict(self.multiProcess(poolArgs))
return cdo.sqrt(input=cdo.ensmean(input=' '.join(result.values()), output=result.values()[0]+'ensmean_afgst'), output=result.values()[0]+'AVGSTD')
[docs] def _getEnsembleVar(self, fileList):
'''
Calculates the ensemble Variance of a given file list (ensembles)
:note: CDO norally divides by n here this is changed to n-1
:param fileList: List of files
:return: file with ensemble variance
'''
fileStr = ' '.join(fileList)
norm = len(fileList)/(len(fileList)-1.)
ensvar = cdo.ensvar(input=fileStr, output=fileList[0]+'_ENSVAR'+self.getRandomStr())
#print len(fileList)
return cdo.mulc(norm, input=ensvar, output=ensvar+'_BETTER')
[docs] def printValues(self, variable, msg):
'''
Method for debugging. Prints our values of a list, dict or file.
:note: Printing is only activated for TESTDATA
:todo: Maybe extend for numpy arrays
:param variable: variable to print
:param msg: name of variable
'''
if self.obsExp != 'TESTDATA':
return
typeName = type(variable).__name__
if typeName == 'str':
self.printFileValue(variable, msg)
elif typeName == 'list':
for item in variable:
self.printValues(item, msg)
elif typeName == 'dict':
for year in self.decadals:
self.printValues(variable[year], msg+'_'+str(year))
else:
print 'Cant recognize filetype.'
[docs] def printFileValue(self,filename, msg):
'''
Method for debugging. Prints a specific value of a given netCDF File.
:param filename:
:param msg: additional message to print out. Like the name of the variable
'''
variable = FileHandler.openNetCDFFile(filename, mode='var')
print np.shape(variable)
if len(np.shape(variable)) == 2:
print 'Value of %s is: %s' % (msg,variable[16,37])
else:
print 'Value of %s is: %s' % (msg,variable[0,16,37])
[docs] def calcMissingValueMask(self, observations):
'''
Calculates a missing value mask using the observations.
At the moment all gridpoints are masked at missing value where at least one value is missing
:TODO: find a less strict solution. Maybe 10% available?
DONE!
:param observations: dict with all observations
:return netcdf file with missing values mask 1/0
'''
# for year in self.decadals:
#
# tmp_array = FileHandler.openNetCDFFile(observations[year], 'var')
# tmp_array = np.expand_dims(tmp_array, axis=2)
# try:
# master = np.concatenate((master,tmp_array),axis=2)
# except NameError:
# master = tmp_array
# missing_value_mask = np.zeros((master.shape[0],master.shape[1]))
# #This loop is probably not the best idea
# #TODO: Change to numpy function
# for x in xrange(master.shape[0]):
# for y in xrange(master.shape[1]):
# missing_count=0
# for i in xrange(len(self.decadals)):
#
# if master[x,y,i] > 0.9e20:
# missing_count+=1
# if missing_count > round(0.1*len(self.decadals)):
# missing_value_mask[x,y] = 1e20
# else:
# missing_value_mask[x,y] = 1
# #print missing_count
# misval_new = FileHandler.saveToNetCDF(missing_value_mask, observations.values()[0], 'missing_value_new')
for year in self.decadals:
tmp_array = FileHandler.openNetCDFFile(observations[year], 'var')
#tmp_array = np.expand_dims(tmp_array, axis=2)
tmp_array_1d = tmp_array.reshape((1,tmp_array.size))
try:
master = np.concatenate((master,tmp_array_1d))
except NameError:
orig_shape = tmp_array.shape
master = tmp_array_1d
#sum along axis
sum_array = np.sum(master,axis=0)
#missing_value_mask = np.zeros((1,tmp_array.size))
missing_value_mask = np.where(sum_array<0.1*(len(self.decadals)*1e20),sum_array,1e20)
missing_value_mask = np.where(sum_array>=0.1*(len(self.decadals)*1e20),missing_value_mask,1)
#print missing_count
missing_value_mask = np.reshape(missing_value_mask, orig_shape)
misval_new = FileHandler.saveToNetCDF(missing_value_mask, observations.values()[0], 'missing_value_new2')
misval_new = cdo.timmean(input=misval_new, output=misval_new+'timmean')
# tmpList = list()
# for year in self.decadals:
# #Mark every gridpoint as missing, if it is missing once
# tmpList.append(cdo.timavg(input=observations[year], output=self.tmpDir+'misvalmask_tmp'+str(year)))
# #Mark only as missing if it is missing the whole year
# print observations[year]
# #tmpList.append(cdo.yearmean(input=observations[year], output=self.tmpDir+'misvalmask_tmp'+str(year)))
# misval_tmp = cdo.mergetime(input=' '.join(tmpList), output=self.tmpDir+'misvalmask_tmp_all')
# misval = cdo.timavg(input=misval_tmp, output=self.tmpDir+'misvalmask_all_years')
# misval_old = cdo.setrtoc('-1000,1000,1', input=misval, output=self.tmpDir+'missingValueMask.nc')
return misval_new
[docs] def sellonlatbox(self, fileList):
'''
Method to select a lon-lat-box using CDO
:param fileList: list with files of single file
:return: new filelist
'''
if type(fileList) == list:
#Mult Processor
count = len(fileList)
poolArgs = self.getPoolArgs(count,self,fileList,'_sellonlatbox')
resultList = self.multiProcess(poolArgs)
return resultList
else:
return self._sellonlatbox(fileList)
[docs] def _sellonlatbox(self, file):
'''
Single process for selecting lon-lat-box with cdo
:param: file
:return: new file
'''
return cdo.sellonlatbox(self.lonlatbox, input=file, output=self.tmpDir+self.extractFilename(file)+'_sellonlatbox')
[docs] def fieldMean(self, fileList):
'''
Multiprocess Method to calc fieldmean
:param fileList:
:return: fieldmean list
'''
#Mult Processor
count = len(fileList)
poolArgs = self.getPoolArgs(count,self,fileList,'_fieldMean')
resultList = self.multiProcess(poolArgs)
return resultList
[docs] def _fieldMean(self, fn):
'''
Single process calculating fieldmean using CDO
:param fn:
:return: fieldmean fn
'''
return cdo.fldmean(input=fn, output=self.tmpDir+self.extractFilename(fn)+'_fldmean')
[docs] def _plotField(self, fileName, vmin, vmax):
'''
@deprecated: use the static Plotter class instead
Plot any field variable
:param fileName: filepath
:param vmin: min value for colorbar
:param vmax: max value for colorbar
'''
Plotter.plotField(fileName, vmin, vmax, colormap='', output_folder=self.outputPlots, lonlatbox=self.lonlatbox)
Plotter.saveFig(self.outputPlots, fileName.split(self.outputDir)[-1])
[docs] def applyMissingValueMask(self, start, end, outputDirList):
'''
Multiply results with a missing value mask
:todo: At the moment all files in the outputdir are multiplied. This should be changed
:param start: startyear
:param end: endyear
:return: list of changed files
'''
fileList = list()
for outputDir in outputDirList:
for files in os.listdir(outputDir):
if(os.path.isfile(outputDir+files) and (outputDir+files).split('_')[-1] != 'masked'):
file_parts = str(files).split('_')
if(file_parts[0] == start and file_parts[1] == end):
if self.maskMissingValues:
fileList.append(cdo.mul(input = ' '.join([outputDir+files, self.misvalMask]),
output = outputDir+files+'_masked'))
else:
fileList.append(outputDir+files)
return fileList
[docs] def detrendTimeSeries(self, fn, keepMean=True):
'''
Subtracts trend of a timeseries.
If keepMean is True the mean is kept
'''
print fn
#split file in mothly files
tmp = cdo.copy(input=fn,output=fn+'copy',options='-f nc')
tmp = cdo.splitmon(input=tmp,output=fn+'monthly',options='-f nc')
#print tmp
months = ['01','02','03','04','05','06','07','08','09','10','11','12']
detrended_list = list()
for mon in months:
fn_mon = tmp+mon+'.nc'
trend_a = fn_mon + 'trend_a'
trend_b = fn_mon + 'trend_b'
cdo.trend(input=fn_mon, output=' '.join([trend_a,trend_b]))
if keepMean:
trend_a = cdo.mulc('0.00',input=trend_a,output=trend_a+'_keepMean')
detrended_tmp = cdo.subtrend(input=' '.join([fn_mon,trend_a,trend_b]), output=fn_mon+'_detrended')
detrended_list.append(detrended_tmp)
# print fn
# print detrended
detrended = cdo.mergetime(' '.join(detrended_list), output=fn+'_detrended')
print detrended
return detrended