'''
Created on 17.08.2013
:author: Sebastian Illing
:contact: sebastian.illing@met.fu-berlin.de
Copyright (C) 2014 Sebastian Illing
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
'''
import numpy as np
import os
import murcss_config
if murcss_config.file_system == 'miklip':
from findFiles import FindFiles
else:
from findFilesCustom import FindFilesCustom as FindFiles
from filehandler import FileHandler
class WrongArgument(Exception): pass
[docs]class Significance(object):
'''
Class for significance calculation
'''
def __init__(self, tmp_dir, output_dir):
self.find_files = FindFiles(tmpDir = tmp_dir)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
self.output_dir = output_dir
def __getQuantile(self, mVar, q1,q2, precision=1.0):
"""
Returns the q'th percentile of the distribution given in the argument
'data'. Uses the 'precision' parameter to control the noise level.
:param mvar: numpy variable
:param q1,q2: lower and upper level
:return: q'th percentile
"""
data = np.reshape(mVar, np.size(mVar))
k, bins = np.histogram(data, bins=precision*np.sqrt(len(data)))
norm_cumul = 1.0*k.cumsum() / len(data)
return [bins[norm_cumul > q2][0], bins[norm_cumul > q1][0]]
def __getLonLat(self, ifile):
'''
Get vectors with lon and lat values from a netdf file using cdo.griddes
Was introduced because we were using a damaged grid
lon,lat valued are located in the center of a gridbox
:param ifile: netcdf fn
:result: lon,lat vectors
'''
def searchGriddes(grid, needle):
tmp = [x for x in grid if x.find(needle) != -1]
return float(tmp[0].split(' ')[-1])
from cdo import Cdo
cdo = Cdo()
grid =cdo.griddes(input=ifile)
xinc = searchGriddes(grid,'xinc')
xsize = searchGriddes(grid,'xsize')
yinc = searchGriddes(grid,'yinc')
xfirst = searchGriddes(grid,'xfirst')
yfirst = searchGriddes(grid,'yfirst')
ysize = searchGriddes(grid,'ysize')
lon = np.arange(xfirst+xinc/2, xsize*xinc+xfirst+xinc/2, xinc, dtype=float)
lat = np.arange(yfirst+yinc/2, ysize*yinc+yfirst+yinc/2, yinc, dtype=float)
lon = np.arange(xfirst, xsize*xinc+xfirst, xinc, dtype=float)
lat = np.arange(yfirst, ysize*yinc+yfirst, yinc, dtype=float)
# lon = np.arange(xfirst, xsize*xinc+xfirst, xinc, dtype=float)
# lat = np.arange(yfirst, ysize*yinc+yfirst, yinc, dtype=float)
return (lon,lat)
[docs] def checkSignificance(self, input, result_file, q1=0.05, q2=0.95, check_value=0):
'''
Checks if a value is statistically significant different from zero
Uses the bootstrapped files
:param input: list with files or folder of bootstraps
:param result_file: file to check
:return: sig_lon,sig_lat lists with significant points
'''
if type(input) == list:
bootstrap_files = input
elif os.path.isdir(input):
bootstrap_files = self.find_files.getAllFilesInFolder(input)
else:
raise WrongArgument, 'Input has to be a list of files or a folder'
bootstrap_arrays = list()
#load all files to arrays
for b_file in bootstrap_files:
tmp_var = FileHandler.openNetCDFFile(b_file)
bootstrap_arrays.append(tmp_var['variable'])
result = FileHandler.openNetCDFFile(result_file, mode='var')
(imax,jmax) = np.shape(result)
lon,lat = self.__getLonLat(result_file)
sig_lon,sig_lat,sig_x,sig_y = list(),list(),list(),list()
#loop array
for i in range(0,imax):
for j in range(0,jmax):
test_sample = np.zeros(len(bootstrap_files))
k=0
for item in bootstrap_arrays:
if item[i,j] < 1e10 and item[i,j] > -1e10: #don't test missing values
test_sample[k] = item[i,j] #construct testsample
k+=1
test_mean = np.mean(test_sample)
quant = self.__getQuantile(test_sample, q1, q2)
#mark significant values
if (quant[0] > check_value and quant[1] > check_value) or (quant[0] < check_value and quant[1] < check_value):
if test_mean > -1e19 and test_mean < 1e19:
#no missingvalue in result
if result[i,j] < 1e10 and result[i,j] > -1e10:
sig_lon.append(lon[j])
sig_lat.append(lat[i])
sig_x.append(j)
sig_y.append(i)
self.save_significance_mask(sig_y, sig_x, result_file)
return (sig_lon, sig_lat)
[docs] def save_significance_mask(self, sign_lon, sign_lat, result_file):
'''
Saves the significance mask in a netcdf file
:param sign_lon,sign_lat: significant points
:param result_file: File to overwrite
'''
result = FileHandler.openNetCDFFile(result_file, mode='var')
res_shape = result.shape
sign_array = np.zeros(res_shape)
for i in range(0,len(sign_lon)):
#pass
sign_array[sign_lon[i], sign_lat[i]] = 1
return FileHandler.saveToNetCDF(sign_array, result_file, '_significance_mask')