Source code for metrics.metricAbstract

'''
Created on 11.03.2013

:author: Sebastian Illing
:contact: sebastian.illing@met.fu-berlin.de

Copyright (C) 2014  Sebastian Illing
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>
'''

import abc
import os
from cdo import *
#from integration.metrics.filehandler import FileHandler
cdo = Cdo()
import time

import numpy as N
import sys

import murcss_config
if murcss_config.file_system == 'miklip':
    from findFiles import FindFiles
    from findFilesSeason import FindFilesSeason
else:
    from findFilesCustom import FindFilesCustom as FindFiles
    FindFilesSeason = FindFiles

from filehandler import FileHandler
from plotter import Plotter
from nondeamonpool import MyPool
from tool_abstract import ToolAbstract#, unwrap_self_f

[docs]class MetricAbstract(ToolAbstract):
    '''
    Abstract class for calculating any metric of decadal runs with CDO
    Every reusable function which uses CDO commands should go in here.
    '''
    
    def __init__(self, 
                 tmpDir = './tmp', 
                 output = '/', 
                 output_plots='/', 
                 baseDir='/', 
                 result_grid=None,
                 observation='HadCrut',
                 leadtimes_mode='yearly',
                 decadals='1960,1965,1970,1975,1980,1985,1990,1995,2000',
                 months=None
                ):
        '''
        Constructor. Sets variables and creates Folders
         
        :param tmpDir: 
        :param output: 
        :param output_plots: self.outputPlots
        :param baseDir: Path to search for source filesself.outputPlots
        :param result_grid: grid to remap to
        :param observation: observation name or file used for analysis
        '''    
        setattr(self, 'obsExp', observation) #TODO: Wrong named kwarg
        self.observation = ''    
        if(self.obsExp == 'HadCrut' and self.variable == 'tas'):  
            self.observation = baseDir + '/../src/obs2/tas_obs_HadCrut3.nc'
        elif os.path.isfile(self.obsExp):
            self.observation = self.obsExp
            self.obsExp = self.extractFilename(self.obsExp)
            self.obsExp = self.obsExp.split('.')[0]
        
        
        #Initialize MaxLeadYear
        self.getYearRange()        
        outputFolder = self.constructName(self.outputFolderStruct)
        super(MetricAbstract, self).__init__(output_folder = self.checkPath(output)+outputFolder, 
                                             output_plots = self.checkPath(output_plots)+outputFolder)
        self.outputDir = self.checkPath(self.checkPath(output)+outputFolder)
        self.outputPlots = self.checkPath(self.checkPath(output_plots)+outputFolder)
        self.baseDir = baseDir
        self.tmpDir = self.checkPath(tmpDir)
        if not os.path.isdir(self.checkPath(tmpDir)):
            os.makedirs(self.checkPath(tmpDir))
        
        if result_grid is None: 
            self.gridFile = self.baseDir + '/../src/grids/griddes_HadCrut3_5x5.txt'
        else:
            self.gridFile = result_grid         
        
        
        if leadtimes_mode == 'monthly':
            if months is not None:
                month_list = months.split(',')
            else:
                month_list = ['']
            init_list = list()
            for month in month_list:
                for year in decadals:
                    init_list.append(int(str(year)+month))
            self.decadals = init_list
        else:
            self.decadals=decadals
        #set special options for "decadal" or "seasonal"
        self.leadtimes_mode = leadtimes_mode
        if self.leadtimes_mode == 'yearly':
            self.analysisOptions = {'cdoSelect' : 'selyear',
                                    'cdomean' : 'yearmean'}
        else:
            self.analysisOptions = {'cdoSelect' : 'selmon',
                                    'cdomean' : 'monmean'}
                
        #findFiles instance
        if leadtimes_mode == 'yearly':
            self.findFiles = FindFiles(tmpDir=self.tmpDir, level=self.level)
        elif leadtimes_mode == 'monthly':
            self.findFiles = FindFilesSeason(tmpDir=self.tmpDir, level=self.level)
        
    @abc.abstractmethod    
[docs]    def analyze(self):
        '''
        Abstract Method to calculate a metric
        Must be implemented in child classes
        '''
        pass
        
[docs]    def _remapFiles(self, ensList, flag=''):
        '''
        Multiprocessing for remapping files
        remap observations and model files
        
        :param ensList: list of files
        :return: list of remapped files
        '''
        
        ensCount = len(ensList)
        poolArgs = self.getPoolArgs(ensCount,self,ensList,flag,'remapFile')
        return self.multiProcess(poolArgs)

[docs]    def remapFile(self, *arg, **kwargs): 
        '''
        Remaps a single file width CDO's remapcon
        
        :param inputfile 
        '''
        iFile= arg  
        iFile=iFile[0]
        
        try:
            flag = arg[1]
            if flag == 'output*':
                flag = ''
        except:
            flag = ''
        flag += self.getRandomStr()
        fName = iFile.split('/')[-1]
        return cdo.remapcon(self.gridFile, input=iFile, output=self.tmpDir+fName+flag+'.remapped')
    
[docs]    def getEnsembleMean(self, fileList):
        '''
        :todo: Multiprocessing
        Should do ensmean in multiprocessing!!! At the moment only single approach
        
        :param fileList: dict of filelists
        :return: dict of ensemble means
        '''
        ensMeanList = self.yeardictToList(fileList)
            
        count = len(ensMeanList)
        poolArgs = self.getPoolArgs(count,self,ensMeanList,'_getEnsembleMean')
        return self.listToYeardict(self.multiProcess(poolArgs))

    
[docs]    def _getEnsembleMean(self, fileList, flag='ENSMEAN'):
        '''
        Calculates ensemble Mean
        
        :param fileList: list of files
        :return: ensemble mean file 
        '''
        if len(fileList) == 1:
            return fileList[0]
        fileStr = ' '.join(fileList)
        return cdo.ensmean(input=fileStr, output=self.tmpDir+self.extractFilename(fileList[0])+'_'+flag) 
    
[docs]    def tempSmoothing(self, ensList, startyear, endyear):
        '''
        Multiprocessing approach for temporal smoothing
        Starts a process for every file in ensList
        
        :param ensList: List of files
        :param startyear: int
        :param endyear: int
        :return: list of temporal smoothed files
        '''
        if(isinstance(ensList,list)):
            ensCount = len(ensList)
        else:
            ensCount = 1
        poolArgs = self.getPoolArgs(ensCount,self,ensList,startyear,endyear,'_tempSmoothing')
        return  self.multiProcess(poolArgs)
    
[docs]    def _tempSmoothing(self, fileName, startyear, endyear):
        '''
        Temporal smoothing. I.e. timmean over year 2-9
        
        :param fileName: filepath
        :param startyear: first year to select (int)
        :param endyear: last year to select (int)
        :return: temp smoothed file
        '''  
#        pid= os.getpid()
#        filestartyear = int(cdo.showyear(input=fileName)[0].split(' ')[0])
#        selyearstr = ''
#        fn = fileName.split('/')[-1]
#        fileNameOut = self.tmpDir+fn
#        for i in range(startyear, endyear+1):
#            selyearstr = ','.join([selyearstr, str(filestartyear+i-1)])
#        selyear = cdo.selyear(selyearstr, input=fileName, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid))
#        return cdo.timmean(input=selyear, output=selyear+'Timmean')
        
        #perform yearmean or monthmean
        #try:
        
        
        pid= os.getpid()
        fn = fileName.split('/')[-1]
        fileNameOut = self.tmpDir+fn
        
        
        #self.analysisOptions['cdomean'] = 'seasmean'
        #method = getattr(cdo, self.analysisOptions['cdomean'])
        #meanFile =  method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])
        
#        #test monthly analysis
#        self.analysisOptions['cdomean'] = 'monmean'
#        method = getattr(cdo, self.analysisOptions['cdomean'])
#        meanFile =  method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])        
#        selstr = ','.join(map(str,range((startyear-1)*12+1,endyear*12+1)))
#        seldate = cdo.seltimestep(selstr,input=meanFile, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid)+self.getRandomStr())
#        tmp = cdo.ymonmean(input=seldate, output=seldate+'Timmean')
#        return tmp
#        
        #TEST SEASONANALYSIS
#        self.analysisOptions['cdomean'] = 'seasmean'
#        method = getattr(cdo, self.analysisOptions['cdomean'])
#        meanFile =  method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])
#        meanFile = cdo.selseas('JJA',input=meanFile,output=meanFile+'JJA')
#        selstr = ','.join(map(str,range(startyear,endyear+1)))
#        seldate = cdo.seltimestep(selstr,input=meanFile, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid)+self.getRandomStr())
#        tmp = cdo.timmean(input=seldate, output=seldate+'Timmean')
#        return tmp
    
    
        method = getattr(cdo, self.analysisOptions['cdomean'])
        meanFile =  method(input = fileName, output=self.tmpDir+fn+str(pid)+self.analysisOptions['cdomean'])  
        selstr = ','.join(map(str,range(startyear,endyear+1)))
        seldate = cdo.seltimestep(selstr,input=meanFile, output=fileNameOut+'_selYear_'+str(startyear)+'-'+str(endyear)+str(pid)+self.getRandomStr())
        tmp = cdo.timmean(input=seldate, output=seldate+'Timmean')
        return tmp
        
[docs]    def removeSeasonalCycle(self, fn):
        '''
        Subtracts the seasonal cycle of monthly data
        '''
        print fn
        seasonalCycle = cdo.ymonavg(input=fn, output=fn+'_meanCycle')
        print seasonalCycle
        tmp = cdo.ymonsub(input=' '.join([fn,seasonalCycle]),output=fn+'_noseason')
        print tmp
        return tmp
    
    
    
[docs]    def getAnomalies(self, ensMeanList, crossMean):
        '''
        Subtracts cross-validated mean from ensembles
        
        :param ensMEanList: dict with filenames
        :param crossMean: dict width cross-val means
        :return: dict width cross-validated anomalies
        '''
        anomalies = dict()
        for year in self.decadals:
            anomalies[year] = cdo.sub(input=' '.join([ensMeanList[year], crossMean[year]]), output=ensMeanList[year]+'_ANOMALIE')
            #
            #anomalies[year] = cdo.sub(input=' '.join([ensMeanList[year], cdo.timmean(input=crossMean[year],output=crossMean[year]+'timmean')]), output=ensMeanList[year]+'_ANOMALIE')        
        return anomalies 
    
    def _getAnomalies(self, ensMeanList, crossMean):
        
        if type(ensMeanList) == list:
            tmpList = list()
            for i,item in enumerate(ensMeanList):
                tmpList.append(self._getAnomalies(item, crossMean))
            return tmpList
        else:
            return cdo.sub(input=' '.join([ensMeanList, crossMean]), output=ensMeanList+'_ANOMALIE')
    
[docs]    def getCrossValMean(self, ensMeanList, flag):
        '''
        Calculates cross-validated means (averages through forecast times, excluding the forecast in question)
        
        :param ensMeanList: Dict of filnames
        :param flag: String value added to new filenames
        :return: dict with cross-validated means
        '''
        poolMeanList = list()
        
        yearList = list()
        flagList = list()
        for year in self.decadals:
            yearList.append(year)
            meanList = list()
            flagList.append(str(year)+'_'+flag+self.getRandomStr())
            for key,val in ensMeanList.iteritems():
                if( key != year):
                    meanList.append(val)
        
            poolMeanList.append(meanList)
        
        count = len(poolMeanList)
        poolArgs = self.getPoolArgs(count,self,poolMeanList,flagList,'_getEnsembleMean')
        return self.listToYeardict(self.multiProcess(poolArgs))
            
    
[docs]    def getVariance(self, list, flag):
        '''
        Calculate "variance"
        
        :deprecated: Not used in goddard metrics --> should not be used 
        :param list: filelist
        :param flag: string for filenames
        :return: kind of standard deviation of anomalies
        '''
        squareList = dict()
        squareStr = ''
        for year in self.decadals:
            squareList[year] = cdo.sqr(input=list[year], output=list[year]+'_square')
            squareStr += ' '+squareList[year]
            
        squareSum = cdo.ensmean(input=squareStr, output=self.tmpDir+flag+'_goddardVar')
        return cdo.sqrt(input=squareSum, output=self.tmpDir+flag+'_goddardStd')
    
[docs]    def createConstantFile(self, gridFile, flag=''):
        '''
        Creates a constant field with value 1
        
        :param gridFile: grid description file (txt)
        :return: filename
        '''
        if self.fieldmean:
            return cdo.const('1,r1x1', output=self.tmpDir+flag+'constantField.nc', options = '-f nc')
        else:
            const = cdo.const('1,'+gridFile, output=self.tmpDir+flag+'constantField.nc', options = '-f nc')
            if self.lonlatbox is not None:
                return self._sellonlatbox(const)
            else:
                return const
   
[docs]    def getEnsembleStd(self, ensList):
        '''
        Calculates the mean ensemble STD for a given dictionary
        Keys in dict have to be the starting years 
        
        :param ensList: dict with ensembles
        :return: file with mean ensemble std
        '''
        tmpList = list()
        for year in self.decadals:
            tmpList.append(ensList[year])
        ensCount = len(tmpList)   
        poolArgs = self.getPoolArgs(ensCount,self,tmpList,'_getEnsembleVar')
        result = self.listToYeardict(self.multiProcess(poolArgs))
        return cdo.sqrt(input=cdo.ensmean(input=' '.join(result.values()), output=result.values()[0]+'ensmean_afgst'), output=result.values()[0]+'AVGSTD')
            
[docs]    def _getEnsembleVar(self, fileList):
        '''
        Calculates the ensemble Variance of a given file list (ensembles)
        
        :note: CDO norally divides by n here this is changed to n-1
        :param fileList: List of files
        :return: file with ensemble variance
        '''
        fileStr = ' '.join(fileList)
        norm = len(fileList)/(len(fileList)-1.)
        ensvar = cdo.ensvar(input=fileStr, output=fileList[0]+'_ENSVAR'+self.getRandomStr())
        #print len(fileList)
        
        return cdo.mulc(norm, input=ensvar, output=ensvar+'_BETTER')
    
    
[docs]    def printValues(self, variable, msg):
        '''
        Method for debugging. Prints our values of a list, dict or file.
        
        :note: Printing is only activated for TESTDATA
        
        :todo: Maybe extend for numpy arrays
        
        :param variable: variable to print
        :param msg: name of variable
        '''
        if self.obsExp != 'TESTDATA':
            return
        
        typeName = type(variable).__name__
        if typeName == 'str':
            self.printFileValue(variable, msg)
        elif typeName == 'list':
            for item in variable:
                self.printValues(item, msg)
        elif typeName == 'dict':
            for year in self.decadals:
                self.printValues(variable[year], msg+'_'+str(year))
        else:
            print 'Cant recognize filetype.'         
            
[docs]    def printFileValue(self,filename, msg):
        '''
        Method for debugging. Prints a specific value of a given netCDF File.
        
        :param filename:
        :param msg: additional message to print out. Like the name of the variable
        '''
        variable = FileHandler.openNetCDFFile(filename, mode='var')
        print np.shape(variable)
        if len(np.shape(variable)) == 2:
            print 'Value of %s is: %s' % (msg,variable[16,37])
        else:
            print 'Value of %s is: %s' % (msg,variable[0,16,37])
        
    
[docs]    def calcMissingValueMask(self, observations):
        '''
        Calculates a missing value mask using the observations. 
        At the moment all gridpoints are masked at missing value where at least one value is missing
        
        :TODO: find a less strict solution. Maybe 10% available?
        DONE!
        
        :param observations: dict with all observations
        :return netcdf file with missing values mask 1/0
        '''

#        for year in self.decadals:
#            
#            tmp_array = FileHandler.openNetCDFFile(observations[year], 'var')
#            tmp_array = np.expand_dims(tmp_array, axis=2)
#            try:
#                master = np.concatenate((master,tmp_array),axis=2)
#            except NameError:
#                master = tmp_array  
#        missing_value_mask = np.zeros((master.shape[0],master.shape[1]))
#        #This loop is probably not the best idea
#        #TODO: Change to numpy function 
#        for x in xrange(master.shape[0]):
#            for y in xrange(master.shape[1]):
#                missing_count=0
#                for i in xrange(len(self.decadals)):
#                    
#                    if master[x,y,i] > 0.9e20:
#                        missing_count+=1
#                if missing_count > round(0.1*len(self.decadals)):
#                    missing_value_mask[x,y] = 1e20
#                else:
#                    missing_value_mask[x,y] = 1
#                #print missing_count
#        misval_new = FileHandler.saveToNetCDF(missing_value_mask, observations.values()[0], 'missing_value_new')


        for year in self.decadals:
            
            tmp_array = FileHandler.openNetCDFFile(observations[year], 'var')
            #tmp_array = np.expand_dims(tmp_array, axis=2)
            tmp_array_1d = tmp_array.reshape((1,tmp_array.size))
            try:
                master = np.concatenate((master,tmp_array_1d))
            except NameError:
                orig_shape = tmp_array.shape
                master = tmp_array_1d  
        #sum along axis
        sum_array = np.sum(master,axis=0)
        #missing_value_mask = np.zeros((1,tmp_array.size))
        missing_value_mask = np.where(sum_array<0.1*(len(self.decadals)*1e20),sum_array,1e20)
        missing_value_mask = np.where(sum_array>=0.1*(len(self.decadals)*1e20),missing_value_mask,1)
                #print missing_count
        missing_value_mask = np.reshape(missing_value_mask, orig_shape)
        misval_new = FileHandler.saveToNetCDF(missing_value_mask, observations.values()[0], 'missing_value_new2')
        misval_new = cdo.timmean(input=misval_new, output=misval_new+'timmean')
#        tmpList = list()
#        for year in self.decadals:    
#            #Mark every gridpoint as missing, if it is missing once
#            tmpList.append(cdo.timavg(input=observations[year], output=self.tmpDir+'misvalmask_tmp'+str(year)))  
#            #Mark only as missing if it is missing the whole year
#            print observations[year]
#            #tmpList.append(cdo.yearmean(input=observations[year], output=self.tmpDir+'misvalmask_tmp'+str(year))) 
#        misval_tmp = cdo.mergetime(input=' '.join(tmpList), output=self.tmpDir+'misvalmask_tmp_all') 
#        misval = cdo.timavg(input=misval_tmp, output=self.tmpDir+'misvalmask_all_years')
#        misval_old = cdo.setrtoc('-1000,1000,1', input=misval, output=self.tmpDir+'missingValueMask.nc')                    

        return misval_new        
                
    
[docs]    def sellonlatbox(self, fileList):
        '''
        Method to select a lon-lat-box using CDO
        
        :param fileList: list with files of single file 
        :return: new filelist 
        '''
        if type(fileList) == list:        
            #Mult Processor            
            count = len(fileList)
            poolArgs = self.getPoolArgs(count,self,fileList,'_sellonlatbox')
            resultList = self.multiProcess(poolArgs)     
            return resultList 
        else:
            return self._sellonlatbox(fileList)
    
[docs]    def _sellonlatbox(self, file):
        '''
        Single process for selecting lon-lat-box with cdo
        
        :param: file
        :return: new file
        '''
        return cdo.sellonlatbox(self.lonlatbox, input=file, output=self.tmpDir+self.extractFilename(file)+'_sellonlatbox')
    
[docs]    def fieldMean(self, fileList):
        '''
        Multiprocess Method to calc fieldmean
        
        :param fileList:
        :return: fieldmean list
        '''
        #Mult Processor            
        count = len(fileList)
        poolArgs = self.getPoolArgs(count,self,fileList,'_fieldMean')
        resultList = self.multiProcess(poolArgs) 
        return resultList
    
[docs]    def _fieldMean(self, fn):
        '''
        Single process calculating fieldmean using CDO
        
        :param fn:
        :return: fieldmean fn
        '''
        return cdo.fldmean(input=fn, output=self.tmpDir+self.extractFilename(fn)+'_fldmean')

[docs]    def _plotField(self, fileName, vmin, vmax):
        '''
        @deprecated: use the static Plotter class instead
        Plot any field variable
        
        :param fileName: filepath
        :param vmin: min value for colorbar
        :param vmax: max value for colorbar
        ''' 
        Plotter.plotField(fileName, vmin, vmax, colormap='', output_folder=self.outputPlots, lonlatbox=self.lonlatbox)
        Plotter.saveFig(self.outputPlots, fileName.split(self.outputDir)[-1])
       
[docs]    def applyMissingValueMask(self, start, end, outputDirList):
        '''
        Multiply results with a missing value mask
        
        :todo: At the moment all files in the outputdir are multiplied. This should be changed
        :param start: startyear
        :param end: endyear
        :return: list of changed files
        '''
        fileList = list()
        for outputDir in outputDirList:
            for files in os.listdir(outputDir):
                if(os.path.isfile(outputDir+files) and (outputDir+files).split('_')[-1] != 'masked'):
                    file_parts = str(files).split('_')
                    if(file_parts[0] == start and file_parts[1] == end):
                        if self.maskMissingValues:
                            fileList.append(cdo.mul(input = ' '.join([outputDir+files, self.misvalMask]),
                                                    output = outputDir+files+'_masked'))
                        else:
                            fileList.append(outputDir+files) 
        return fileList 
    
    
[docs]    def detrendTimeSeries(self, fn, keepMean=True):
        '''
        Subtracts trend of a timeseries. 
        If keepMean is True the mean is kept 
        '''
        print fn
        #split file in mothly files
        tmp = cdo.copy(input=fn,output=fn+'copy',options='-f nc')
        tmp = cdo.splitmon(input=tmp,output=fn+'monthly',options='-f nc')
        #print tmp
        
        months = ['01','02','03','04','05','06','07','08','09','10','11','12']
        
        detrended_list = list()
        for mon in months:
            fn_mon = tmp+mon+'.nc'
            trend_a = fn_mon + 'trend_a'
            trend_b = fn_mon + 'trend_b'
            cdo.trend(input=fn_mon, output=' '.join([trend_a,trend_b]))
            
            if keepMean:
                trend_a = cdo.mulc('0.00',input=trend_a,output=trend_a+'_keepMean')
                
            detrended_tmp = cdo.subtrend(input=' '.join([fn_mon,trend_a,trend_b]), output=fn_mon+'_detrended')
            detrended_list.append(detrended_tmp)
    #        print fn
    #        print detrended
        detrended = cdo.mergetime(' '.join(detrended_list), output=fn+'_detrended')    
        print detrended
        return detrended
Navigation

Source code for metrics.metricAbstract

Quick search

Navigation