'''
Created on 16.08.2013
:author: Sebastian Illing
:contact: sebastian.illing@met.fu-berlin.de
Copyright (C) 2014 Sebastian Illing
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
'''
from msss import Msss
from significance2 import Significance
from plotter import Plotter
from cdo import *
cdo = Cdo()
import os
from random import choice
import shutil
[docs]class MsssBootstrap(Msss):
'''
Class with special method for bootstrapping
'''
[docs] def _prepareBootstrap(self, year):
'''
Searches the files for specific year, remaps them, and calculates ensemblemean
:param year
:return ensmeanProject1, ensmeanProject2
'''
if(hasattr(self, 'ensList')):
ensListProject1 = self.ensList[year]
else:
ensListProject1 = self.findFiles.getFiles(year, self.project1, self.model1, self.variable,
ensemblemembers=self.ensemblemembers1, product=self.product1,
institute=self.institute1, exp_prefix=self.experiment1,
maxLeadtime=self.maxLeadtime,minLeadtime=self.minLeadtime)
tmpList = list()
for ensfile in ensListProject1:
#yearmean
#tmpList.append(cdo.yearmean(input=ensfile, output=self.tmpDir+self.extractFilename(ensfile)+str(year)+self.project1))
tmpList.append(ensfile)
#remap
tmpList2 = list()
for f in tmpList:
tmpList2.append(self.remapFile(f))
tmpOut = self.tmpDir+self.project1+'_'+self.product1+'_'+self.model1+self.experiment1+'/'+str(year)+self.getRandomStr()+'/'
self.makeFolder(tmpOut)
ensmeanProject1 = cdo.ensmean(input=' '.join(tmpList2), output=tmpOut+str(year)+self.experiment1+self.project1+'_'+self.model1+'_'+self.product1+self.getRandomStr()+'.nc')
if(hasattr(self, 'histList')):
ensListProject2 = self.histList[year]
else:
ensListProject2 = self.findFiles.getFiles(year, self.project2, self.model2, self.variable,
ensemblemembers=self.ensemblemembers2, product=self.product2,
institute=self.institute2, exp_prefix=self.experiment2,
maxLeadtime=self.maxLeadtime,minLeadtime=self.minLeadtime)
tmpList = list()
for ensfile in ensListProject2:
#yearmean
#tmpList.append(cdo.yearmean(input=ensfile, output=self.tmpDir+self.extractFilename(ensfile)+str(year)+self.project2))
tmpList.append(ensfile)
#remap
tmpList2 = list()
for f in tmpList:
tmpList2.append(self.remapFile(f))
tmpOut = self.tmpDir+self.project2+'_'+self.product2+'_'+self.model2+self.experiment2+'/'+str(year)+self.getRandomStr()+'/'
#os.makedirs(tmpOut)
self.makeFolder(tmpOut)
ensmeanProject2 = cdo.ensmean(input=' '.join(tmpList2), output=tmpOut+str(year)+self.experiment2+self.project2+'_'+self.model2+'_'+self.product2+self.getRandomStr()+'.nc')
return (ensmeanProject1, ensmeanProject2,)
[docs] def prepareBootstrap(self):
'''
Multiprocess approach
Uses _prepareBootstrap to prepare the Bootstrap files for input1 and input2
:return: ensMeanProject1Dict, ensMeanProject2Dict
'''
#print self.findFiles
procs = len(self.decadals)
poolArgs = zip([self]*procs, self.decadals, ['_prepareBootstrap'] * procs)
result = self.multiProcess(poolArgs)
ensMeanProject1Dict = dict()
ensMeanProject2Dict = dict()
for i in range(0, len(result)):
ensMeanProject1Dict[self.decadals[i]] = result[i][0]
ensMeanProject2Dict[self.decadals[i]] = result[i][1]
if self.lonlatbox is not None:
print 'Selecting lon-lat-box %s' %(self.lonlatbox)
for year in self.decadals:
ensMeanProject1Dict[year] = self.sellonlatbox(ensMeanProject1Dict[year])
ensMeanProject2Dict[year] = self.sellonlatbox(ensMeanProject2Dict[year])
self.bootstrapPoolProject1 = ensMeanProject1Dict
self.bootstrapPoolProject2 = ensMeanProject2Dict
return (ensMeanProject1Dict, ensMeanProject2Dict,)
[docs] def bootstrapGoddard(self, outputFolder='./tmp/'):
'''
Actual method for selecting files of a bootstrap run.
Select input data from bootstrap pool by choice from bootstrap pool
together with corresponding obs data
--> constraint is to select within a 5 Year range only (because of a trend in the data)
:param outputFolder: temp folder
'''
if(not os.path.isdir(outputFolder)):
os.makedirs(outputFolder)
self.tmpDir = outputFolder
newPoolList1 = dict()
newPoolList2 = dict()
for year in self.decadals:
f = self.bootstrapPoolProject1[year]
new_f = self.tmpDir + self.extractFilename(f)
shutil.copyfile(f, new_f)
newPoolList1[year] = new_f
f = self.bootstrapPoolProject2[year]
new_f = self.tmpDir + self.extractFilename(f)
shutil.copyfile(f, new_f)
newPoolList2[year] = new_f
self.bootstrapPoolProject1 = newPoolList1
self.bootstrapPoolProject2 = newPoolList2
bootstrapResultM1 = dict()
bootstrapResultM2 = dict()
bootstrapObservations = dict()
for year in self.decadals:
try:
tmp = bootstrapResultM1[year]
except:
yearToSelect = choice(self.decadals)
bootstrapResultM1[year] = self.bootstrapPoolProject1[yearToSelect]
bootstrapResultM2[year] = self.bootstrapPoolProject2[yearToSelect]
bootstrapObservations[year] = self.obsRemapped[yearToSelect]
for i in range(1,6):
if yearToSelect+i < max(self.decadals) and year+i <= max(self.decadals):
if yearToSelect+i in self.decadals:
bootstrapResultM1[year+i] = self.bootstrapPoolProject1[yearToSelect+i]
bootstrapResultM2[year+i] = self.bootstrapPoolProject2[yearToSelect+i]
bootstrapObservations[year+i] = self.obsRemapped[yearToSelect+i]
return [bootstrapResultM1, bootstrapResultM2, bootstrapObservations]
[docs] def _calcSignificance(self, bootstrap_folders, output_folder, plot_folder, file_to_check):
'''
Caluclate the significance for the intput field (file_to_check)
The field is also plotted with significance crosses
:param bootstrap_folder: path of bootstraped data
:param output_folder: path
:param plot_folder: path
:param file_to_check: fn
'''
fn = file_to_check
file_to_check = self.extractFilename(file_to_check)
b_array_list = list()
for folder in bootstrap_folders:
b_array_list.append(folder+file_to_check)
significance = Significance(self.tmpDir, self.outputPlots)
(sig_lon, sig_lat) = significance.checkSignificance(b_array_list, fn)
m = Plotter.plotField(fn, -1, 1, colormap='RedBlu', lonlatbox=self.lonlatbox)
Plotter.addCrosses(m, sig_lon, sig_lat)
Plotter.saveFig(plot_folder, fn.split(output_folder)[-1])
[docs] def calcSignificance(self, bootstrap_folders, output_folder, plot_folder):
'''
Multiprocessing of "_calcSignificance". Start 1 Process for every result file
:param bootstrap_folder: path of bootstraped data
:param output_folder: path
:param plot_folder: path
'''
files_to_check = self.findFiles.getAllFilesInSubfolders(output_folder)
if self.maskMissingValues:
files_to_check = [fn for fn in files_to_check if fn.find('masked') != -1]
procs = len(files_to_check)
poolArgs = zip([self]*procs, [bootstrap_folders]*procs, [output_folder]*procs,
[plot_folder]*procs, files_to_check, ['_calcSignificance'] * procs)
self.multiProcess(poolArgs)
def main(config_dict, baseDir):
'''
Main fuinction for the whole bootstrap process
1. Normal MSSS calculation
2. Prepare bootstrap data
3. Calculate bootstraps
4. Calcualte Significance and plot files
:param config_dict: dictionary with all params needed for Msss calcualtion
:param baseDir: dir of class
'''
try: config_dict.pop('baseDir')
except: pass
bootstrap_number = config_dict.pop('bootstrap_number')
config_dict.pop('significance')
Msss = MsssBootstrap(baseDir=baseDir,**config_dict)
try:
Msss.prepareInput()
Msss.analyze()
remappedObservations = Msss.obsRemapped
print 'Prepare Bootstrap data'
(bootstrapList1, bootstrapList2) = Msss.prepareBootstrap()
bootstrap_folders = list()
for i in range(1,bootstrap_number+1):
print '###############################################'
print 'Bootstrap number '+str(i)
print '###############################################'
Msss.obsRemapped = remappedObservations
bootstrap_config_dict = config_dict.copy()
outputFolder = '/'.join([config_dict['cache'], 'bootstrap', 'msss', 'number'+str(i)]) + '/'
cacheFolder = outputFolder + 'cache/'
bootstrap_config_dict['output']= outputFolder
bootstrap_config_dict['output_plots']= outputFolder+'plots/'
bootstrap_config_dict['cache']= cacheFolder
bootstrap_config_dict['bootstrap']= True
bootstrap_config_dict['obsRemapped'] = remappedObservations
bootstrap = MsssBootstrap(baseDir=baseDir,**bootstrap_config_dict)
(bootstrap.input1Remapped, bootstrap.input2Remapped, bootstrap.observationRemapped) = Msss.bootstrapGoddard(cacheFolder)
bootstrap.analyze()
bootstrap_folders.append(bootstrap.outputDir)
print '###############################################'
print 'Calculating Significance'
print '###############################################'
bootstrap.calcSignificance(bootstrap_folders, Msss.outputDir, Msss.outputPlots)
print "Plots produced in %s" % Msss.outputPlots
return Msss
finally:
Msss.tmpDir = config_dict['cache']
#Msss.deleteCache()