Source code for ILAMB.Variable

from .constants import mid_months,bnd_months
from .Regions import Regions
import matplotlib.colors as colors
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from pylab import get_cmap
from cf_units import Unit
from . import ilamblib as il
from . import Post as post
import numpy as np
import matplotlib.pyplot as plt

def _shiftLon(lon):
    return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360

def _shiftFirstColumnToDateline(lon,lon_bnds=None,data=None,area=None):
    shift = lon.argmin()
    lon = np.roll(lon,-shift)
    if lon_bnds is not None:
        lon_bnds = np.roll(lon_bnds,-shift,axis=0)
        if lon_bnds[ 0,0] > lon_bnds[ 0,1]: lon_bnds[ 0,0] = max(lon_bnds[ 0,0]-360,-180)
        if lon_bnds[-1,1] < lon_bnds[-1,0]: lon_bnds[-1,1] = min(lon_bnds[-1,1]+360,+180)
    if data is not None: data = np.roll(data,-shift,axis=-1)
    if area is not None: area = np.roll(area,-shift,axis=-1)
    return lon,lon_bnds,data,area

def _createBnds(x):
    x      = np.asarray(x)
    x_bnds = np.zeros((x.size,2))
    x_bnds[+1:,0] = 0.5*(x[:-1]+x[+1:])
    x_bnds[:-1,1] = 0.5*(x[:-1]+x[+1:])
    if x.size == 1:
        x_bnds[ ...] = x
    else:
        x_bnds[ 0,0] = x[ 0] - 0.5*(x[ 1]-x[ 0])
        x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2])
    return x_bnds
        
[docs]class Variable: r"""A class for managing variables and their analysis. There are two ways to create a Variable object. Because python does not support multiple constructors, we will use keyword arguments so that the users intent may be captured. The first way to specify a Variable is by loading a netCDF4 file. You can achieve this by specifying the 'filename' and 'variable_name' keywords. The second way is to use the remaining keyword arguments to specify data arrays directly. If you use the second way, you must specify the keywords 'data' and 'unit'. The rest are truly optional and depend on the nature of your data. Parameters ---------- filename : str, optional Name of the netCDF4 file from which to extract a variable variable_name : str, optional Name of the variable to extract from the netCDF4 file data : numpy.ndarray, optional The array which contains the data which constitutes the variable unit : str, optional The unit of the input data name : str, optional The name of the variable, will be how it is saved in the netCDF4 file time : numpy.ndarray, optional a 1D array of times in days since 1850-01-01 00:00:00 time_bnds : numpy.ndarray, optional a 2D array of time bounds in days since 1850-01-01 00:00:00 lat : numpy.ndarray, optional a 1D array of latitudes of cell centroids lon : numpy.ndarray, optional a 1D array of longitudes of cell centroids area : numpy.ndarray, optional a 2D array of the cell areas ndata : int, optional number of data sites this data represents alternate_vars : list of str, optional a list of alternate acceptable variable names depth_bnds : numpy.ndarray, optional a 2D array representing the boundaries of the cells in the vertical dimension Examples -------- You can initiate a Variable by specifying the data directly. >>> lat = np.linspace(- 90, 90, 91) >>> lon = np.linspace(-180,180,181) >>> data = np.random.rand(91,181) >>> v = Variable(name="some_variable",unit="some_unit",lat=lat,lon=lon,data=data) Or you can initiate a variable by extracting a specific field from a netCDF4 file. >>> v = Variable(filename="some_netcdf_file.nc",variable_name="name_of_var_to_extract") """ def __init__(self,**keywords): r"""Constructor for the variable class by specifying the data arrays. """ # See if the user specified a netCDF4 file and variable filename = keywords.get("filename" ,None) groupname = keywords.get("groupname" ,None) variable_name = keywords.get("variable_name",None) alternate_vars = keywords.get("alternate_vars",[]) calendar = "noleap" if filename is None: # if not pull data from other arguments data = keywords.get("data" ,None) data_bnds = keywords.get("data_bnds" ,None) unit = keywords.get("unit" ,None) name = keywords.get("name" ,"unnamed") time = keywords.get("time" ,None) time_bnds = keywords.get("time_bnds" ,None) lat = keywords.get("lat" ,None) lat_bnds = keywords.get("lat_bnds" ,None) lon = keywords.get("lon" ,None) lon_bnds = keywords.get("lon_bnds" ,None) depth = keywords.get("depth" ,None) depth_bnds = keywords.get("depth_bnds" ,None) ndata = keywords.get("ndata" ,None) attr = None assert data is not None assert unit is not None cbounds = None self.filename = "" else: self.filename = filename assert variable_name is not None t0 = keywords.get("t0",None) tf = keywords.get("tf",None) convert_calendar = keywords.get("convert_calendar",True) out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar) data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata,calendar,attr = out # Add handling for some units which cf_units does not support unit = unit.replace("psu","1e-3") if not np.ma.isMaskedArray(data): data = np.ma.masked_array(data) self.data = data self.data_bnds = data_bnds self.ndata = ndata self.unit = unit self.name = name self.cbounds = cbounds self.calendar = calendar self.attr = attr # Handle time data self.time = time # time data self.time_bnds = time_bnds # bounds on time self.temporal = False # flag for temporal data self.dt = 0. # mean temporal spacing self.monthly = False # flag for monthly means if time is not None: self.temporal = True if self.time_bnds is None: self.time_bnds = _createBnds(self.time) self.dt = (self.time_bnds[:,1]-self.time_bnds[:,0]).mean() if np.allclose(self.dt,30,atol=3): self.monthly = True assert (2*self.time.size) == (self.time_bnds.size) # Handle space or multimember data self.spatial = False self.lat = lat self.lon = lon self.lat_bnds = lat_bnds self.lon_bnds = lon_bnds self.area = keywords.get("area",None) # If the last dimensions are lat and lon, this is spatial data if lat is not None and lon is not None and data.ndim >= 2: if (data.shape[-2] == lat.size and data.shape[-1] == lon.size): self.spatial = True # Exception for PEcAn if self.spatial is True: if data.shape[-2:] == (1,1): self.spatial = False self.ndata = 1 self.data = self.data.reshape(self.data.shape[:-2]+(1,)) if self.spatial is False: # Shift possible values on [0,360] to [-180,180] if self.lon is not None: self.lon = _shiftLon(self.lon ) if self.lon_bnds is not None: self.lon_bnds = _shiftLon(self.lon_bnds) else: # Exception for CABLE, flips if monotonically decreasing if np.all(np.diff(self.lat)<0): self.lat = self.lat [::-1 ] self.data = self.data[...,::-1,: ] if self.lat_bnds is not None: self.lat_bnds = self.lat_bnds[::-1,::-1] if self.area is not None: self.area = self.area [::-1,:] # Shift possible values on [0,360] to [-180,180] if self.lon is not None: self.lon = _shiftLon(self.lon ) if self.lon_bnds is not None: self.lon_bnds = _shiftLon(self.lon_bnds) # Shift first column of data to the international dateline out = _shiftFirstColumnToDateline(self.lon, lon_bnds = self.lon_bnds, data = self.data, area = self.area) self.lon,self.lon_bnds,self.data,self.area = out # Finally create bounds if they are not there if self.lat_bnds is None: self.lat_bnds = _createBnds(self.lat) if self.lon_bnds is None: self.lon_bnds = _createBnds(self.lon) # Fix potential problems with rolling the axes of the lon_bnds self.lat_bnds = self.lat_bnds.clip(- 90,+ 90) self.lon_bnds = self.lon_bnds.clip(-180,+180) # Make sure that the value lies within the bounds assert np.all((self.lat>=self.lat_bnds[:,0])*(self.lat<=self.lat_bnds[:,1])) assert np.all((self.lon>=self.lon_bnds[:,0])*(self.lon<=self.lon_bnds[:,1])) if self.area is None: self.area = il.CellAreas(None,None, lat_bnds=self.lat_bnds, lon_bnds=self.lon_bnds) # Is the data layered self.layered = False self.depth = depth self.depth_bnds = depth_bnds if (data.ndim > (self.temporal + 2*self.spatial + (self.ndata is not None))) and depth is not None: self.layered = True if depth_bnds is None: self.depth_bnds = _createBnds(self.depth) def __str__(self): if self.data is None: return "Uninitialized Variable" if self.ndata is None: ndata = "N/A" else: ndata = str(self.ndata) if not self.temporal: time = "" else: time = " (%d)" % self.time.size if not self.spatial: space = "" else: space = " (%d,%d)" % (self.lat.size,self.lon.size) if not self.layered: layer = "" else: layer = " (%d)" % (self.depth.size) s = "Variable: %s\n" % self.name s += "-"*(len(self.name)+10) + "\n" s += "{0:>20}: ".format("unit") + self.unit + "\n" s += "{0:>20}: ".format("isTemporal") + str(self.temporal) + time + "\n" s += "{0:>20}: ".format("isSpatial") + str(self.spatial) + space + "\n" s += "{0:>20}: ".format("isLayered") + str(self.layered) + layer + "\n" s += "{0:>20}: ".format("nDatasites") + ndata + "\n" s += "{0:>20}: ".format("dataShape") + "%s\n" % (self.data.shape,) np.seterr(over='ignore',under='ignore') s += "{0:>20}: ".format("dataMax") + "%e\n" % self.data.max() s += "{0:>20}: ".format("dataMin") + "%e\n" % self.data.min() s += "{0:>20}: ".format("dataMean") + "%e\n" % self.data.mean() np.seterr(over='warn',under='warn') if self.cbounds is not None: s += "{0:>20}: ".format("climatology") + "%d thru %d\n" % (self.cbounds[0],self.cbounds[1]) return s
[docs] def nbytes(self): r"""Estimate the memory usage of a variable in bytes. """ nbytes = 0. for key in self.__dict__.keys(): try: nbytes += self.__dict__[key].nbytes except: pass return nbytes
[docs] def integrateInTime(self,**keywords): r"""Integrates the variable over a given time period. Uses nodal integration to integrate to approximate .. math:: \int_{t_0}^{t_f} v(t,\dots)\ dt The arguments of the integrand reflect that while it must be at least defined in time, the remaining arguments are flexible. If :math:`t_0` or :math:`t_f` are not specified, the variable will be integrated over the extent of its time domain. If the mean function value over time is desired, this routine will approximate .. math:: \frac{1}{t_f-t_0} \int_{t_0}^{t_f} v(t,\dots)\ dt again by nodal integration. The amount of time which we divide by is the non-masked amount of time. This means that if a function has some values masked or marked as invalid, we do not penalize the average value by including this as a time at which data is expected. Parameters ---------- t0 : float, optional initial time in days since 1/1/1850 tf : float, optional final time in days since 1/1/1850 mean : boolean, optional enable to divide the integrand to get the mean function value Returns ------- integral : ILAMB.Variable.Variable a Variable instance with the integrated value along with the appropriate name and unit change """ if not self.temporal: raise il.NotTemporalVariable() t0 = keywords.get("t0",self.time_bnds[:,0].min()) tf = keywords.get("tf",self.time_bnds[:,1].max()) mean = keywords.get("mean",False) # find which time bounds are included even partially in the interval [t0,tf] time_bnds = np.copy(self.time_bnds) ind = np.where((t0<time_bnds[:,1])*(tf>time_bnds[:,0]))[0] time_bnds[(t0>time_bnds[:,0])*(t0<time_bnds[:,1]),0] = t0 time_bnds[(tf>time_bnds[:,0])*(tf<time_bnds[:,1]),1] = tf time_bnds = time_bnds[ind,:] dt = (time_bnds[:,1]-time_bnds[:,0]) # now expand this dt to the other dimensions of the data array (i.e. space or datasites) for i in range(self.data.ndim-1): dt = np.expand_dims(dt,axis=-1) # approximate the integral by nodal integration (rectangle rule) np.seterr(over='ignore',under='ignore') integral = (self.data[ind]*dt).sum(axis=0) integral_bnd = None if self.data_bnds is not None: integral_bnd = np.ma.asarray([((((self.data_bnds[...,0])[ind])*dt).sum(axis=0)).T, ((((self.data_bnds[...,1])[ind])*dt).sum(axis=0)).T]).T np.seterr(over='raise',under='raise') # the integrated array should be masked where *all* data in time was previously masked mask = False if self.data.ndim > 1 and self.data.mask.size > 1: mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units unit = Unit(self.unit) name = self.name + "_integrated_over_time" if mean: # divide thru by the non-masked amount of time, the units # can remain as input because we integrate over time and # then divide by the time interval in the same units name += "_and_divided_by_time_period" if self.data.mask.size > 1: dt = (dt*(self.data.mask[ind]==0)).sum(axis=0) else: dt = dt.sum(axis=0) np.seterr(over='ignore',under='ignore') integral = integral / dt if integral_bnd is not None: integral_bnd[...,0] = integral_bnd[...,0] / dt integral_bnd[...,1] = integral_bnd[...,1] / dt np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions unit0 = Unit("d")*unit unit = Unit(unit0.format().split()[-1]) if not isinstance(integral.mask, np.ndarray): if integral.mask == True: integral=np.ma.masked_array(data=integral.data, mask=np.ones(integral.shape,dtype='bool')) else: integral=np.ma.masked_array(data=integral.data, mask=np.zeros(integral.shape,dtype='bool')) unit0.convert(integral,unit,inplace=True) if integral_bnd is not None: unit0.convert(integral_bnd,unit,inplace=True) return Variable(data = integral, data_bnds = integral_bnd, unit = "%s" % unit, name = name, lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, depth = self.depth, depth_bnds = self.depth_bnds, area = self.area, ndata = self.ndata)
[docs] def integrateInDepth(self,**keywords): r"""Integrates the variable over a given layer limits. Uses nodal integration to integrate to approximate .. math:: \int_{z_0}^{z_f} v(z,\dots)\ dz The arguments of the integrand reflect that while it must be at least defined in depth, the remaining arguments are flexible. If :math:`z_0` or :math:`z_f` are not specified, the variable will be integrated over the extent of its depth domain. If the mean function value over depth is desired, this routine will approximate .. math:: \frac{1}{z_f-z_0} \int_{z_0}^{z_f} v(z,\dots)\ dz again by nodal integration. The amount of depth which we divide by is the non-masked amount of depth. This means that if a function has some values masked or marked as invalid, we do not penalize the average value by including this as a depth at which data is expected. Parameters ---------- z0 : float, optional initial depth in m zf : float, optional final depth in m mean : boolean, optional enable to divide the integrand to get the mean function value Returns ------- integral : ILAMB.Variable.Variable a Variable instance with the integrated value along with the appropriate name and unit change """ if not self.layered: raise il.NotLayeredVariable() z0 = keywords.get("z0",self.depth_bnds[:,0].min()) zf = keywords.get("zf",self.depth_bnds[:,1].max()) mean = keywords.get("mean",False) # find which time bounds are included even partially in the interval [z0,zf] depth_bnds = np.copy(self.depth_bnds) ind = np.where((z0<depth_bnds[:,1])*(zf>depth_bnds[:,0]))[0] depth_bnds[(z0>depth_bnds[:,0])*(z0<depth_bnds[:,1]),0] = z0 depth_bnds[(zf>depth_bnds[:,0])*(zf<depth_bnds[:,1]),1] = zf depth_bnds = depth_bnds[ind,:] dz = (depth_bnds[:,1]-depth_bnds[:,0]) args = [] axis = 0 if self.temporal: axis += 1 dz = np.expand_dims(dz,axis=0) args.append(range(self.time.size)) if self.layered: args.append(ind) if self.ndata: dz = np.expand_dims(dz,axis=-1) args.append(range(self.ndata)) if self.spatial: dz = np.expand_dims(dz,axis=-1) dz = np.expand_dims(dz,axis=-1) args.append(range(self.lat.size)) args.append(range(self.lon.size)) ind = np.ix_(*args) # approximate the integral by nodal integration (rectangle rule) shp = self.data[ind].shape np.seterr(over='ignore',under='ignore') integral = (self.data[ind]*dz).sum(axis=axis) np.seterr(over='raise',under='raise') # the integrated array should be masked where *all* data in depth was previously masked mask = False if self.data.ndim > 1 and self.data.mask.size > 1: mask = np.apply_along_axis(np.all,axis,self.data.mask[ind]) integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units unit = Unit(self.unit) name = self.name + "_integrated_over_depth" if mean: # divide thru by the non-masked amount of time, the units # can remain as input because we integrate over time and # then divide by the time interval in the same units name += "_and_divided_by_depth" if self.data.mask.size > 1: dz = (dz*(self.data.mask[ind]==0)).sum(axis=axis) else: dz = dz.sum(axis=axis) np.seterr(over='ignore',under='ignore') integral = integral / dz np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions unit0 = Unit("m")*unit unit = Unit(unit0.format().split()[-1]) unit0.convert(integral,unit,inplace=True) return Variable(data = integral, unit = "%s" % unit, name = name, time = self.time, time_bnds = self.time_bnds, lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, area = self.area, ndata = self.ndata)
[docs] def integrateInSpace(self,region=None,mean=False,weight=None,intabs=False): r"""Integrates the variable over a given region. Uses nodal integration to integrate to approximate .. math:: \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega The arguments of the integrand reflect that while it must be at least defined in space, the remaining arguments are flexible. The variable :math:`\Omega` represents the desired region over which we will integrate. If no region is specified, the variable will be integrated over the extent of its spatial domain. If the mean function value over time is desired, this routine will approximate .. math:: \frac{1}{A(\Omega)} \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega again by nodal integration. The spatial area which we divide by :math:`A(\Omega)` is the non-masked area of the given region, also given by .. math:: A(\Omega) = \int_{\Omega}\ d\Omega This means that if a function has some values masked or marked as invalid, we do not penalize the average value by including this as a point at which data is expected. We also support the inclusion of an optional weighting function :math:`w(\mathbf{x})` which is a function of space only. In this case, we approximate the following integral .. math:: \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega and if a mean value is desired, .. math:: \frac{1}{\int_{\Omega} w(\mathbf{x})\ d\Omega} \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega Parameters ---------- region : str, optional name of the region overwhich you wish to integrate mean : bool, optional enable to divide the integrand to get the mean function value weight : numpy.ndarray, optional a data array of the same shape as this variable's areas representing an additional weight in the integrand intabs : bool, optional enable to integrate the absolute value Returns ------- integral : ILAMB.Variable.Variable a Variable instace with the integrated value along with the appropriate name and unit change. """ def _integrate(var,areas): op = lambda x : x if intabs: op = np.abs assert var.shape[-2:] == areas.shape np.seterr(over='ignore',under='ignore') vbar = (op(var)*areas).sum(axis=-1).sum(axis=-1) np.seterr(over='raise',under='raise') return vbar if not self.spatial: raise il.NotSpatialVariable() # determine the measure mask = self.data.mask while mask.ndim > 2: mask = np.all(mask,axis=0) measure = np.ma.masked_array(self.area,mask=mask,copy=True) if weight is not None: measure *= weight # if we want to integrate over a region, we need add to the # measure's mask r = Regions() if region is not None: measure.mask += r.getMask(region,self) # approximate the integral integral = _integrate(self.data,measure) if mean: np.seterr(under='ignore',invalid='ignore') integral = integral / measure.sum() np.seterr(under='raise',invalid='warn') # handle the name and unit name = self.name + "_integrated_over_space" if region is not None: name = name.replace("space",region) unit = Unit(self.unit) if mean: # we have already divided thru by the non-masked area in # units of m^2, which are the same units of the integrand. name += "_and_divided_by_area" else: # if not a mean, we need to potentially handle unit conversions unit *= Unit("m2") return Variable(data = np.ma.masked_array(integral), unit = "%s" % unit, time = self.time, time_bnds = self.time_bnds, depth = self.depth, depth_bnds = self.depth_bnds, name = name)
[docs] def siteStats(self,region=None,weight=None,intabs=False): """Computes the mean and standard deviation of the variable over all data sites. Parameters ---------- region : str, optional name of the region overwhich you wish to include stats. Returns ------- mean : ILAMB.Variable.Variable a Variable instace with the mean values """ if self.ndata is None: raise il.NotDatasiteVariable() op = lambda x : x if intabs: op = np.abs rem_mask = np.copy(self.data.mask) rname = "" r = Regions() if region is not None: self.data.mask += r.getMask(region,self) rname = "_over_%s" % region np.seterr(over='ignore',under='ignore') mean = np.ma.average(op(self.data),axis=-1,weights=weight) np.seterr(over='raise',under='raise') self.data.mask = rem_mask return Variable(data = mean, unit = self.unit, time = self.time, time_bnds = self.time_bnds, depth = self.depth, depth_bnds = self.depth_bnds, name = "mean_%s%s" % (self.name,rname))
[docs] def annualCycle(self): """Computes mean annual cycle information (climatology) for the variable. For each site/cell/depth in the variable, compute the mean annual cycle. Returns ------- mean : ILAMB.Variable.Variable The annual cycle mean values """ if not self.temporal: raise il.NotTemporalVariable() assert self.monthly assert self.time.size > 11 begin = np.argmin(self.time[:11]%365) end = begin+int(self.time[begin:].size/12.)*12 shp = (-1,12) + self.data.shape[1:] v = self.data[begin:end,...].reshape(shp) np.seterr(over='ignore',under='ignore') mean = v.mean(axis=0) np.seterr(over='raise',under='raise') return Variable(data = mean, unit = self.unit, name = "annual_cycle_mean_of_%s" % self.name, time = mid_months, time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, area = self.area, depth = self.depth, depth_bnds = self.depth_bnds, ndata = self.ndata)
[docs] def timeOfExtrema(self,etype="max"): """Returns the time of the specified extrema. Parameters ---------- etype : str, optional The type of extrema to compute, either 'max' or 'min' Returns ------- extrema : ILAMB.Variable.Variable The times of the extrema computed """ if not self.temporal: raise il.NotTemporalVariable() fcn = {"max":np.argmax,"min":np.argmin} assert etype in fcn.keys() tid = np.apply_along_axis(fcn[etype],0,self.data) mask = False if self.data.ndim > 1 and self.data.mask.ndim > 0: mask = np.apply_along_axis(np.all,0,self.data.mask) # mask cells where all data is masked data = np.ma.masked_array(self.time[tid],mask=mask) return Variable(data = data, unit = "d", name = "time_of_%s_%s" % (etype,self.name), lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, area = self.area, depth = self.depth, depth_bnds = self.depth_bnds, ndata = self.ndata)
[docs] def extractDatasites(self,lat,lon): """Extracts a variable at sites defined by a set of latitude and longitude. Parameters ---------- lat : numpy.ndarray an array with the latitude values, must be same size as the longitude values lon : numpy.ndarray an array with the longitude values, must be same size as the latitude values Returns ------- extracted : ILAMB.Variable.Variable The extracted variables """ assert lat.size == lon.size if not self.spatial: raise il.NotSpatialVariable() ilat = np.apply_along_axis(np.argmin,1,np.abs(lat[:,np.newaxis]-self.lat)) ilon = np.apply_along_axis(np.argmin,1,np.abs(lon[:,np.newaxis]-self.lon)) ndata = lat.size if self.data.ndim == 2: data = self.data[ ilat,ilon] else: data = self.data[...,ilat,ilon] return Variable(data = data, unit = self.unit, name = self.name, lat = lat, lon = lon, ndata = ndata, depth = self.depth, depth_bnds = self.depth_bnds, time = self.time, time_bnds = self.time_bnds)
[docs] def spatialDifference(self,var): """Computes the point-wise difference of two spatially defined variables. If the variable is spatial or site data and is defined on the same grid, this routine will simply compute the difference in the data arrays. If the variables are spatial but defined on separate grids, the routine will interpolate both variables to a composed grid via nearest-neighbor interpolation and then return the difference. Parameters ---------- var : ILAMB.Variable.Variable The variable we wish to compare against this variable Returns ------- diff : ILAMB.Variable.Variable A new variable object representing the difference """ def _make_bnds(x): bnds = np.zeros(x.size+1) bnds[1:-1] = 0.5*(x[1:]+x[:-1]) bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds assert Unit(var.unit) == Unit(self.unit) assert self.temporal == False assert self.ndata == var.ndata assert self.layered == False # Perform a check on the spatial grid. If it is the exact same # grid, there is no need to interpolate. same_grid = False try: same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) except: pass if same_grid: error = np.ma.masked_array(var.data-self.data,mask=self.data.mask+var.data.mask) diff = Variable(data = error, unit = var.unit, lat = var.lat, lat_bnds = var.lat_bnds, lon = var.lon, lon_bnds = var.lon_bnds, ndata = var.ndata, name = "%s_minus_%s" % (var.name,self.name)) else: if not self.spatial: raise il.NotSpatialVariable() lat_bnd1 = _make_bnds(self.lat) lon_bnd1 = _make_bnds(self.lon) lat_bnd2 = _make_bnds( var.lat) lon_bnd2 = _make_bnds( var.lon) lat_bnd,lon_bnd,lat,lon,error = il.TrueError(lat_bnd1,lon_bnd1,self.lat,self.lon,self.data, lat_bnd2,lon_bnd2, var.lat, var.lon, var.data) diff = Variable(data = error, unit = var.unit, lat = lat, lat_bnd = lat_bnd, lon = lon, lon_bnd = lon_bnd, name = "%s_minus_%s" % (var.name,self.name)) return diff
[docs] def convert(self,unit,density=998.2,molar_mass=12.0107): """Convert the variable to a given unit. We use the UDUNITS library via the cf_units python interface to convert the variable's unit. Additional support is provided for unit conversions in which substance information is required. For example, in quantities such as precipitation it is common to have data in the form of a mass rate per unit area [kg s-1 m-2] yet desire it in a linear rate [m s-1]. This can be accomplished if the density of the substance is known. We assume here that water is the substance, but this can be changed by specifying the density when calling the function. Parameters ---------- unit : str the desired converted unit density : float, optional the mass density in [kg m-3] to use when converting linear rates to area density rates mol_mass : float, optional the molar mass in [kg mol-1], default value assumes substance to be converted is carbon Returns ------- self : ILAMB.Variable.Variable this object with its unit converted """ if unit is None: return self src_unit = Unit(self.unit) tar_unit = Unit( unit) mask = self.data.mask mass_density = Unit("kg m-3") molar_density = Unit("g mol-1") if ((src_unit/tar_unit)/mass_density).is_dimensionless(): with np.errstate(all='ignore'): self.data /= density src_unit /= mass_density elif ((tar_unit/src_unit)/mass_density).is_dimensionless(): with np.errstate(all='ignore'): self.data *= density src_unit *= mass_density if ((src_unit/tar_unit)/molar_density).is_dimensionless(): with np.errstate(all='ignore'): self.data /= molar_mass src_unit /= molar_density elif ((tar_unit/src_unit)/molar_density).is_dimensionless(): with np.errstate(all='ignore'): self.data *= molar_mass src_unit *= molar_density try: with np.errstate(all='ignore'): src_unit.convert(self.data,tar_unit,inplace=True) if self.data_bnds is not None: src_unit.convert(self.data_bnds.data,tar_unit,inplace=True) self.unit = unit except: raise il.UnitConversionError() return self
[docs] def toNetCDF4(self,dataset,attributes=None,group=None): """Adds the variable to the specified netCDF4 dataset. Parameters ---------- dataset : netCDF4.Dataset a dataset into which you wish to save this variable attributes : dict of scalars, optional a dictionary of additional scalars to encode as ncattrs group : str, optional the name of the netCDF4 group to to which we add this variable """ def _checkTime(t,dset): """A local function for ensuring the time dimension is saved in the dataset.""" time_name = "time" while True: if time_name in dset.dimensions.keys(): if (t.shape == dset.variables[time_name][...].shape and np.allclose(t,dset.variables[time_name][...],atol=0.5*self.dt)): return time_name else: time_name += "_" else: dset.createDimension(time_name) T = dset.createVariable(time_name,"double",(time_name)) T.setncattr("units","days since 1850-01-01 00:00:00") T.setncattr("calendar","noleap") T.setncattr("axis","T") T.setncattr("long_name","time") T.setncattr("standard_name","time") T[...] = t if self.time_bnds is not None: bnd_name = time_name.replace("time","time_bnds") T.setncattr("bounds",bnd_name) if "nb" not in dset.dimensions.keys(): D = dset.createDimension("nb",size=2) if bnd_name not in dset.variables.keys(): B = dset.createVariable(bnd_name,"double",(time_name,"nb")) B.setncattr("units","days since 1850-01-01 00:00:00") B[...] = self.time_bnds return time_name def _checkLat(lat,dset): """A local function for ensuring the lat dimension is saved in the dataset.""" lat_name = "lat" while True: if lat_name in dset.dimensions.keys(): if (lat.shape == dset.variables[lat_name][...].shape and np.allclose(lat,dset.variables[lat_name][...])): return lat_name else: lat_name += "_" else: dset.createDimension(lat_name,size=lat.size) Y = dset.createVariable(lat_name,"double",(lat_name)) Y.setncattr("units","degrees_north") Y.setncattr("axis","Y") Y.setncattr("long_name","latitude") Y.setncattr("standard_name","latitude") Y[...] = lat if self.lat_bnds is not None: bnd_name = lat_name.replace("lat","lat_bnds") Y.setncattr("bounds",bnd_name) if "nb" not in dset.dimensions.keys(): D = dset.createDimension("nb",size=2) if bnd_name not in dset.variables.keys(): B = dset.createVariable(bnd_name,"double",(lat_name,"nb")) B.setncattr("units","degrees_north") B[...] = self.lat_bnds return lat_name def _checkLon(lon,dset): """A local function for ensuring the lon dimension is saved in the dataset.""" lon_name = "lon" while True: if lon_name in dset.dimensions.keys(): if (lon.shape == dset.variables[lon_name][...].shape and np.allclose(lon,dset.variables[lon_name][...])): return lon_name else: lon_name += "_" else: dset.createDimension(lon_name,size=lon.size) X = dset.createVariable(lon_name,"double",(lon_name)) X.setncattr("units","degrees_east") X.setncattr("axis","X") X.setncattr("long_name","longitude") X.setncattr("standard_name","longitude") X[...] = lon if self.lon_bnds is not None: bnd_name = lon_name.replace("lon","lon_bnds") X.setncattr("bounds",bnd_name) if "nb" not in dset.dimensions.keys(): D = dset.createDimension("nb",size=2) if bnd_name not in dset.variables.keys(): B = dset.createVariable(bnd_name,"double",(lon_name,"nb")) B.setncattr("units","degrees_east") B[...] = self.lon_bnds return lon_name def _checkData(ndata,dset): """A local function for ensuring the data dimension is saved in the dataset.""" data_name = "data" while True: if data_name in dset.dimensions.keys(): if (ndata == len(dset.dimensions[data_name])): return data_name else: data_name += "_" else: dset.createDimension(data_name,size=ndata) return data_name def _checkLayer(layer,dataset): """A local function for ensuring the layer dimension is saved in the dataset.""" layer_name = "layer" while True: if layer_name in dataset.dimensions.keys(): if (layer.shape == dataset.variables[layer_name][...].shape and np.allclose(layer,dataset.variables[layer_name][...])): return layer_name else: layer_name += "_" else: dataset.createDimension(layer_name,size=layer.size) Z = dataset.createVariable(layer_name,"double",(layer_name)) Z.setncattr("units","m") Z.setncattr("axis","Z") Z.setncattr("long_name","depth") Z.setncattr("standard_name","depth") Z[...] = layer if self.depth_bnds is not None: bnd_name = layer_name.replace("layer","layer_bnds") Z.setncattr("bounds",bnd_name) if "nb" not in dataset.dimensions.keys(): D = dataset.createDimension("nb",size=2) if bnd_name not in dataset.variables.keys(): B = dataset.createVariable(bnd_name,"double",(layer_name,"nb")) B.setncattr("units","m") B[...] = self.depth_bnds return layer_name # if not group is desired, just write to the dataset... if group is None: dset = dataset else: # if a group is desired, check to see it exists and write into group if group not in dataset.groups: dset = dataset.createGroup(group) else: dset = dataset.groups[group] dim = [] if self.temporal: dim.append(_checkTime (self.time ,dset)) if self.layered: dim.append(_checkLayer(self.depth,dset)) if self.ndata is not None: dim.append(_checkData (self.ndata,dset)) _checkLat(self.lat,dset) _checkLon(self.lon,dset) else: if self.lat is not None: dim.append(_checkLat (self.lat ,dset)) if self.lon is not None: dim.append(_checkLon (self.lon ,dset)) grp = dset if self.data.size == 1: # we are writing out a scalar so we add it into a special # subgroup for scalars if "scalars" not in dset.groups: grp = dset.createGroup("scalars") else: grp = dset.groups["scalars"] V = grp.createVariable(self.name,"double",dim,zlib=True) V.units = self.unit if type(self.data) is np.ma.core.MaskedConstant: V[...] = np.nan else: V[...] = self.data else: # not a scalar, compute the min/max and middle 98 # percentiles try: per = np.percentile(self.data.compressed(),[0,1,99,100]) except: per = [0,0,0,0] V = grp.createVariable(self.name,"double",dim,zlib=True) V.units = self.unit V.min = per[0] V.dn99 = per[1] V.up99 = per[2] V.max = per[3] V[...] = self.data if self.data_bnds is not None: bnd_name = "%s_bnds" % (self.name) V.bounds = bnd_name VB = grp.createVariable(bnd_name,"double",dim+['nb'],zlib=True) try: per = np.percentile(self.data_bnds.compressed(),[0,1,99,100]) except: per = [0,0,0,0] VB.units = self.unit VB.min = per[0] VB.dn99 = per[1] VB.up99 = per[2] VB.max = per[3] VB[...] = self.data_bnds # optionally write out more attributes if attributes: for key in attributes.keys(): V.setncattr(key,attributes[key])
[docs] def plot(self,ax,**keywords): """Plots the variable on the given matplotlib axis. The behavior of this routine depends on the type of variable specified. If the data is purely temporal, then the plot will be a scatter plot versus time of the data. If it is purely spatial, then the plot will be a global plot of the data. The routine supports multiple keywords although some may not apply to the type of plot being generated. Parameters ---------- ax : matplotlib.axes._subplots.AxesSubplot The matplotlib axes object onto which you wish to plot the variable lw : float, optional The line width to use when plotting alpha : float, optional The degree of transparency when plotting, alpha \in [0,1] color : str or RGB tuple, optional The color to plot with in line plots label : str, optional The label to appear in the legend of line plots vmin : float, optional The minimum plotted value vmax : float, optional The maximum plotted value region : str, optional The region on which to display a spatial variable cmap : str, optional The name of the colormap to be used in plotting the spatial variable ticks : array of floats, optional Defines the locations of xtick ticklabels : array of strings, optional Defines the labels of the xticks """ data = self.data if self.data_bnds is None else self.data_bnds pad = 0.05*(data.max()-data.min()) lw = keywords.get("lw" ,1.0) alpha = keywords.get("alpha" ,1.0) color = keywords.get("color" ,"k") label = keywords.get("label" ,None) vmin = keywords.get("vmin" ,data.min()-pad) vmax = keywords.get("vmax" ,data.max()+pad) region = keywords.get("region","global") cmap = keywords.get("cmap" ,"jet") land = keywords.get("land" ,0.875) water = keywords.get("water" ,0.750) pad = keywords.get("pad" ,5.0) cbar = keywords.get("cbar" ,False) rem_mask = None r = Regions() if self.temporal and not self.spatial: ticks = keywords.get("ticks",None) ticklabels = keywords.get("ticklabels",None) t = self.time/365.+1850 ax.plot(t,self.data,'-', color = color, lw = lw, alpha = alpha, label = label) if self.data_bnds is not None: ax.fill_between(t,self.data_bnds[:,0],self.data_bnds[:,1], lw = 0, color = color, alpha = 0.25*alpha) if ticks is not None: ax.set_xticks(ticks) if ticklabels is not None: ax.set_xticklabels(ticklabels) ax.grid(True) if(abs(vmax-vmin) > 1e-14): ax.set_ylim(vmin,vmax) if (vmax-vmin) > 1e-12: ax.set_ylim(vmin,vmax) elif not self.temporal: # mask out areas outside our region rem_mask = np.copy(self.data.mask) self.data.mask += r.getMask(region,self) # determine the plotting extents percent_pad = 0.2 if self.ndata is None: lat_empty = np.where(self.data.mask.all(axis=-1)==False)[0] lon_empty = np.where(self.data.mask.all(axis=-2)==False)[0] extents = [self.lon_bnds[lon_empty[ 0],0], self.lon_bnds[lon_empty[-1],1], self.lat_bnds[lat_empty[ 0],0], self.lat_bnds[lat_empty[-1],1]] dx = percent_pad*(extents[1]-extents[0]) dy = percent_pad*(extents[3]-extents[2]) extents[0] = max(extents[0]-dx,-180); extents[1] = min(extents[1]+dx,+180) extents[2] = max(extents[2]-dy,- 90); extents[3] = min(extents[3]+dy,+ 90) lon_mid = 0.5*(extents[0]+extents[1]) # ...but the data might cross the dateline, but not be global if(lon_empty[ 0]== 0 and lon_empty[-1]==(self.lon.size-1) and np.diff(lon_empty).max() > 0.5*self.lon.size): wrap_lon = self.lon[lon_empty] wrap_lon += (wrap_lon<0)*360 extents[0] = wrap_lon.min() extents[1] = wrap_lon.max() dx = percent_pad*(extents[1]-extents[0]) extents[0] -= dx; extents[1] += dx # find the middle centroid by mean angle lons = self.lon[np.where(self.data.mask.all(axis=-2)==False)[0]] lons = lons/360*2*np.pi lon_mid = np.arctan2(np.sin(lons).mean(),np.cos(lons).mean())/2/np.pi*360 else: extents = [self.lon.min(),self.lon.max(), self.lat.min(),self.lat.max()] dx = percent_pad*(extents[1]-extents[0]) dy = percent_pad*(extents[3]-extents[2]) extents[0] = max(extents[0]-dx,-180); extents[1] = min(extents[1]+dx,+180) extents[2] = max(extents[2]-dy,- 90); extents[3] = min(extents[3]+dy,+ 90) lon_mid = 0.5*(extents[0]+extents[1]) # choose a projection based on the non-masked data proj = ccrs.PlateCarree(central_longitude=lon_mid) aspect_ratio = (extents[3]-extents[2])/(extents[1]-extents[0]) if (extents[1]-extents[0]) > 320: if np.allclose(extents[2],-90) and extents[3] <= 0: proj = ccrs.Orthographic(central_latitude=-90,central_longitude=0) aspect_ratio = 1. elif extents[3]>75 and extents[2] >= 0: proj = ccrs.Orthographic(central_latitude=+90,central_longitude=0) aspect_ratio = 1. elif (extents[3]-extents[2]) > 140: proj = ccrs.Robinson(central_longitude=0) extents = [-180,180,-90,90] aspect_ratio = 0.5 lon_mid = 0. # make the plot w = 7.5; h = w*aspect_ratio fig,ax = plt.subplots(figsize=(w,h), subplot_kw={'projection':proj}) if self.ndata is None: lat = np.hstack([self.lat_bnds[:,0],self.lat_bnds[-1,-1]]) lon = np.hstack([self.lon_bnds[:,0],self.lon_bnds[-1,-1]]) p = ax.pcolormesh(lon,lat,self.data,cmap=cmap,vmin=vmin,vmax=vmax,transform=ccrs.PlateCarree()) else: norm = colors.Normalize(vmin,vmax) cmap = get_cmap(cmap) clrs = cmap(norm(self.data)) p = ax.scatter(self.lon,self.lat,s=35,color=clrs,cmap=cmap,linewidths=0, transform=ccrs.PlateCarree()) ax.add_feature(cfeature.NaturalEarthFeature('physical','land','110m', edgecolor='face', facecolor='0.875'),zorder=-1) ax.add_feature(cfeature.NaturalEarthFeature('physical','ocean','110m', edgecolor='face', facecolor='0.750'),zorder=-1) ax.set_extent(extents,ccrs.PlateCarree()) if cbar: fig.colorbar(p,orientation='horizontal',pad=0.05,label=label) if rem_mask is not None: self.data.mask = rem_mask return ax
[docs] def interpolate(self,time=None,lat=None,lon=None,lat_bnds=None,lon_bnds=None,itype='nearestneighbor'): """Use nearest-neighbor interpolation to interpolate time and/or space at given values. Parameters ---------- time : numpy.ndarray, optional Array of times at which to interpolate the variable lat : numpy.ndarray, optional Array of latitudes at which to interpolate the variable lon : numpy.ndarray, optional Array of longitudes at which to interpolate the variable Returns ------- var : ILAMB.Variable.Variable The interpolated variable """ if time is None and lat is None and lon is None: return self output_time = self.time if (time is None) else time output_tbnd = self.time_bnds if (time is None) else None output_lat = self.lat if (lat is None) else lat output_lon = self.lon if (lon is None) else lon output_area = self.area if (lat is None and lon is None) else None data = self.data data_bnds = None if self.spatial and (lat is not None or lon is not None): if lat is None: lat = self.lat if lon is None: lon = self.lon if itype == 'nearestneighbor': rows = (np.abs(lat[:,np.newaxis]-self.lat)).argmin(axis=1) cols = (np.abs(lon[:,np.newaxis]-self.lon)).argmin(axis=1) args = [] if self.temporal: args.append(range(self.time.size)) if self.layered: args.append(range(self.depth.size)) args.append(rows) args.append(cols) ind = np.ix_(*args) mask = data.mask if data.mask.size > 1: mask = data.mask[ind] data = data.data[ind] data = np.ma.masked_array(data,mask=mask) if self.data_bnds is not None: args.append([0,1]) ind = np.ix_(*args) data_bnds = self.data_bnds[ind] np.seterr(under='ignore',over='ignore') frac = self.area / il.CellAreas(self.lat,self.lon,self.lat_bnds,self.lon_bnds).clip(1e-12) np.seterr(under='raise',over='raise') frac = frac.clip(0,1) frac = frac[np.ix_(rows,cols)] output_area = frac * il.CellAreas(lat,lon,lat_bnds,lon_bnds) elif itype == 'bilinear': from scipy.interpolate import RectBivariateSpline if self.data.ndim == 3: halo = il.LandLinInterMissingValues(self.data) data = np.ma.zeros((self.data.shape[:-2]+(lat.size,lon.size))) for i in range(self.data.shape[0]): dint = RectBivariateSpline(self.lat,self.lon, halo[i,...], kx=1,ky=1) mint = RectBivariateSpline(self.lat,self.lon,self.data[i,...].mask,kx=1,ky=1) data[i,...] = np.ma.masked_array(dint(lat,lon,grid=True), mint(lat,lon,grid=True)>0.5) frac = self.area / il.CellAreas(self.lat,self.lon).clip(1e-12) frac = frac.clip(0,1) frac = RectBivariateSpline(self.lat,self.lon,frac,kx=1,ky=1) output_area = frac(lat,lon,grid=True) * il.CellAreas(lat,lon) else: raise ValueError("Uknown interpolation type: %s" % itype) if self.temporal and time is not None: times = np.apply_along_axis(np.argmin,1,np.abs(time[:,np.newaxis]-self.time)) mask = data.mask if mask.size > 1: mask = data.mask[times,...] data = data.data[times,...] data = np.ma.masked_array(data,mask=mask) output_tbnd = self.time_bnds[times] return Variable(data = data, data_bnds = data_bnds, unit = self.unit, name = self.name, ndata = self.ndata, lat = output_lat, lon = output_lon, area = output_area, time = output_time, time_bnds = output_tbnd)
[docs] def phaseShift(self,var,method="max_of_annual_cycle"): """Computes the phase shift between a variable and this variable. Finds the phase shift as the time between extrema of the annual cycles of the variables. Note that if this var and/or the given variable are not already annual cycles, they will be computed but not returned. Parameters ---------- var : ILAMB.Variable.Variable The variable with which we will measure phase shift method : str, optional The name of the method used to compute the phase shift """ assert method in ["max_of_annual_cycle","min_of_annual_cycle"] assert self.temporal == var.temporal v1 = self; v2 = var if not self.temporal: # If the data is not temporal, then the user may have # already found the extrema. If the units of the input # variable are days, then set the extrema to this data. if not (self.unit == "d" and var.unit == "d"): raise il.NotTemporalVariable e1 = v1 e2 = v2 else: # While temporal, the user may have passed in the mean # annual cycle as the variable. So if the leading # dimension is 12 we assume the variables are already the # annual cycles. If not, we compute the cycles and then # compute the extrema. if self.time.size != 12: v1 = self.annualCycle() if var.time.size != 12: v2 = var .annualCycle() e1 = v1.timeOfExtrema(etype=method[:3]) e2 = v2.timeOfExtrema(etype=method[:3]) if e1.spatial: shift = e1.spatialDifference(e2) else: data = e2.data - e1.data mask = e1.data.mask + e2.data.mask shift = Variable(data=data,unit=e1.unit,ndata=e1.ndata,lat=e1.lat,lon=e1.lon) shift.name = "phase_shift_of_%s" % e1.name shift.data += (shift.data < -0.5*365.)*365. shift.data -= (shift.data > +0.5*365.)*365. return shift
[docs] def correlation(self,var,ctype,region=None): """Computes the correlation between two variables. Parameters ---------- var : ILAMB.Variable.Variable The variable with which we will compute a correlation ctype : str The correlation type, one of {"spatial","temporal","spatiotemporal"} region : str, optional The region over which to perform a spatial correlation Notes ----- Need to better think about what correlation means when data are masked. The sums ignore the data but then the number of items *n* is not constant and should be reduced for masked values. """ def _correlation(x,y,axes=None): if axes is None: axes = range(x.ndim) if type(axes) == int: axes = (int(axes),) axes = tuple(axes) n = 1 for ax in axes: n *= x.shape[ax] xbar = x.sum(axis=axes)/n # because np.mean() doesn't take axes which are tuples ybar = y.sum(axis=axes)/n xy = (x*y).sum(axis=axes) x2 = (x*x).sum(axis=axes) y2 = (y*y).sum(axis=axes) try: np.seterr(under='ignore',invalid='ignore') r = (xy-n*xbar*ybar)/(np.sqrt(x2-n*xbar*xbar)*np.sqrt(y2-n*ybar*ybar)) np.seterr(under='raise',invalid='warn') except: r = np.nan return r # checks on data consistency assert region is None assert self.data.shape == var.data.shape assert ctype in ["spatial","temporal","spatiotemporal"] # determine arguments for functions axes = None out_time = None out_lat = None out_lon = None out_area = None out_ndata = None if ctype == "temporal": axes = 0 if self.spatial: out_lat = self.lat out_lon = self.lon out_area = self.area elif self.ndata: out_ndata = self.ndata elif ctype == "spatial": if self.spatial: axes = range(self.data.ndim)[-2:] if self.ndata: axes = self.data.ndim-1 if self.temporal: out_time = self.time out_time_bnds = None if out_time is not None: out_time_bnds = self.time_bnds r = _correlation(self.data,var.data,axes=axes) return Variable(data=r,unit="1", name="%s_correlation_of_%s" % (ctype,self.name), time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, lat=out_lat,lon=out_lon,area=out_area)
[docs] def bias(self,var): """Computes the bias between a given variable and this variable. Parameters ---------- var : ILAMB.Variable.Variable The variable with which we will measure bias Returns ------- bias : ILAMB.Variable.Variable the bias """ # If not a temporal variable, then we assume that the user is # passing in mean data and return the difference. lat,lon,area = self.lat,self.lon,self.area if not self.temporal: assert self.temporal == var.temporal bias = self.spatialDifference(var) bias.name = "bias_of_%s" % self.name return bias if self.spatial: # If the data is spatial, then we interpolate it on a # common grid and take the difference. same_grid = False try: same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) except: pass if not same_grid: lat,lon = il.ComposeSpatialGrids(self,var) area = None self_int = self.interpolate(lat=lat,lon=lon) var_int = var .interpolate(lat=lat,lon=lon) data = var_int.data-self_int.data mask = var_int.data.mask+self_int.data.mask else: data = var.data -self.data mask = var.data.mask+self.data.mask elif (self.ndata or self.time.size == self.data.size): # If the data are at sites, then take the difference data = var.data.data-self.data.data mask = var.data.mask+self.data.mask else: raise il.NotSpatialVariable("Cannot take bias of scalars") # Finally we return the temporal mean of the difference bias = Variable(data=np.ma.masked_array(data,mask=mask), name="bias_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds, unit=self.unit,ndata=self.ndata, lat=lat,lon=lon,area=area, depth_bnds = self.depth_bnds).integrateInTime(mean=True) bias.name = bias.name.replace("_integrated_over_time_and_divided_by_time_period","") return bias
[docs] def rmse(self,var): """Computes the RMSE between a given variable and this variable. Parameters ---------- var : ILAMB.Variable.Variable The variable with which we will measure RMSE Returns ------- RMSE : ILAMB.Variable.Variable the RMSE """ # If not a temporal variable, then we assume that the user is # passing in mean data and return the difference. lat,lon,area = self.lat,self.lon,self.area if not self.temporal: assert self.temporal == var.temporal rmse = self.spatialDifference(var) rmse.name = "rmse_of_%s" % self.name return rmse if self.spatial: # If the data is spatial, then we interpolate it on a # common grid and take the difference. same_grid = False try: same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) except: pass if not same_grid: lat,lon = il.ComposeSpatialGrids(self,var) area = None self_int = self.interpolate(lat=lat,lon=lon) var_int = var .interpolate(lat=lat,lon=lon) data = var_int.data-self_int.data mask = var_int.data.mask+self_int.data.mask else: data = var.data -self.data mask = var.data.mask+self.data.mask elif (self.ndata or self.time.size == self.data.size): # If the data are at sites, then take the difference data = var.data.data-self.data.data mask = var.data.mask+self.data.mask else: raise il.NotSpatialVariable("Cannot take rmse of scalars") # Finally we return the temporal mean of the difference squared np.seterr(over='ignore',under='ignore') data *= data np.seterr(over='raise',under='raise') rmse = Variable(data=np.ma.masked_array(data,mask=mask), name="rmse_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds, unit=self.unit,ndata=self.ndata, lat=lat,lon=lon,area=area, depth_bnds = self.depth_bnds).integrateInTime(mean=True) rmse.name = rmse.name.replace("_integrated_over_time_and_divided_by_time_period","") rmse.data = np.sqrt(rmse.data) return rmse
[docs] def rms(self): """Computes the RMS of this variable. Returns ------- RMS : ILAMB.Variable.Variable the RMS """ if not self.temporal: raise il.NotTemporalVariable() unit = self.unit np.seterr(over='ignore',under='ignore') data = self.data**2 np.seterr(over='raise',under='raise') rms = Variable(data = data, unit = "1", # will change later name = "tmp", # will change later ndata = self.ndata, lat = self.lat, lon = self.lon, area = self.area, time = self.time).integrateInTime(mean=True) np.seterr(over='ignore',under='ignore') rms.data = np.sqrt(rms.data) np.seterr(over='raise',under='raise') rms.unit = unit rms.name = "rms_of_%s" % self.name return rms
[docs] def variability(self,mean=None): """Computes the variability of the variable. """ if not self.temporal: raise il.NotTemporalVariable if mean is None: mean = self.integrateInTime(mean=True) var = Variable(unit = self.unit, data = self.data - mean.data[np.newaxis,...], time = self.time, time_bnds = self.time_bnds, lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, area = self.area, ndata = self.ndata).rms() var.name = "var_of_%s" % self.name return var
[docs] def interannualVariability(self): """Computes the interannual variability. The internannual variability in this case is defined as the standard deviation of the data in the temporal dimension. Returns ------- iav : ILAMB.Variable.Variable the interannual variability variable """ if not self.temporal: raise il.NotTemporalVariable np.seterr(over='ignore',under='ignore') data = self.data.std(axis=0) np.seterr(over='raise',under='raise') return Variable(data=data, name="iav_of_%s" % self.name, unit=self.unit,ndata=self.ndata, lat=self.lat,lon=self.lon,area=self.area, depth_bnds = self.depth_bnds)
[docs] def spatialDistribution(self,var,region="global"): r"""Evaluates how well the input variable is spatially distributed relative to this variable. This routine returns the normalized standard deviation and correlation (needed for a Taylor plot) as well as a score given as .. math:: \frac{4(1+R)}{((\sigma+\frac{1}{\sigma})^2 (1+R_0))} where :math:`R` is the correlation, :math:`R_0=1` is the reference correlation, and :math:`\sigma` is the normalized standard deviation. Parameters ---------- var : ILAMB.Variable.Variable the comparison variable region : str, optional the name of the region over which to check the spatial distribution Returns ------- std : ILAMB.Variable.Variable the normalized standard deviation of the input variable R : ILAMB.Variable.Variable the correlation of the input variable score : ILAMB.Variable.Variable the spatial distribution score """ assert self.temporal == var.temporal == False r = Regions() # First compute the observational spatial/site standard deviation rem_mask0 = np.copy(self.data.mask) self.data.mask += r.getMask(region,self) np.seterr(over='ignore',under='ignore') std0 = self.data.std() np.seterr(over='raise',under='raise') # Next compute the model spatial/site standard deviation rem_mask = np.copy(var.data.mask) var.data.mask += r.getMask(region,var) np.seterr(over='ignore',under='ignore') std = var.data.std() np.seterr(over='raise',under='raise') # Interpolate to new grid for correlation if self.spatial: lat,lon = il.ComposeSpatialGrids(self,var) self_int = self.interpolate(lat=lat,lon=lon) var_int = var .interpolate(lat=lat,lon=lon) else: self_int = self var_int = var R = self_int.correlation(var_int,ctype="spatial") # add regions if type(R.data) is np.ma.core.MaskedConstant: R.data = 0. # Restore masks self.data.mask = rem_mask0 var.data.mask = rem_mask # Put together scores, we clip the standard deviation of both # variables at the same small amount, meant to avoid division # by zero errors. try: R0 = 1.0 std0 = std0.clip(1e-12) std = std .clip(1e-12) std = std/std0 score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) except: std = np.asarray([0.0]) score = np.asarray([0.0]) std = Variable(data=std ,name="normalized_spatial_std_of_%s_over_%s" % (self.name,region),unit="1") score = Variable(data=score,name="spatial_distribution_score_of_%s_over_%s" % (self.name,region),unit="1") return std,R,score
[docs] def coarsenInTime(self,intervals,window=0.): """Compute the mean function value in each of the input intervals. Parameters ---------- intervals : array of shape (n,2) An array of n intervals where the first entry is the beginning and the second entry is the end of the interval window : float, optional Extend each interval before and after by this amount of time Returns ------- coarse : ILAMB.Variable.Variable The coarsened variable """ if not self.temporal: raise il.NotTemporalVariable assert intervals.ndim == 2 n = intervals.shape[0] shp = (n,)+self.data.shape[1:] time = np.zeros(n) data = np.ma.zeros(shp) for i in range(n): t0 = intervals[i,0]-window tf = intervals[i,1]+window time[i] = 0.5*(t0+tf) mean = self.integrateInTime(mean=True,t0=t0,tf=tf).convert(self.unit) data[i,...] = mean.data return Variable(name = "coarsened_%s" % self.name, unit = self.unit, time = time, time_bnds = intervals, data = data, ndata = self.ndata, lat = self.lat, lon = self.lon, area = self.area, depth_bnds = self.depth_bnds)
[docs] def accumulateInTime(self): r"""For each time interval, accumulate variable from the beginning. For each time interval :math:`i` in the variable, defined by :math:`[t_0^i,t_f^i]`, compute .. math:: \int_{t_0^0}^{t_f^i} v(t,\dots)\ dt This routine is useful, for example, if the variable is a mass rate defined over time and we wish to know the mass accumulation as a function of time. Returns ------- sum : ILAMB.Variable.Variable The cumulative sum of this variable """ if not self.temporal: raise il.NotTemporalVariable n = self.time.size shp = (n+1,) + self.data.shape[1:] time = np.zeros(n+1) data = np.ma.zeros(shp) data_bnds = None if self.data_bnds is None else np.ma.zeros(shp+(2,)) time[0] = self.time_bnds[0,0] for i in range(n): t0 = self.time_bnds[i,0] tf = self.time_bnds[i,1] isum = self.integrateInTime(t0=t0,tf=tf) time[i+1] = tf data[i+1,...] = data[i,...] + isum.data if data_bnds is not None: data_bnds[i+1,...] = data_bnds[i,...] + isum.data_bnds return Variable(name = "cumulative_sum_%s" % self.name, unit = isum.unit, time = time, data = data, data_bnds = data_bnds, lat = self.lat, lon = self.lon, area = self.area)
[docs] def trim(self,lat=None,lon=None,t=None,d=None): """Trim away a variable in space/time in place. Parameters ---------- lat,lon,t,d : tuple or list a 2-tuple containing the lower and upper limits beyond which we trim """ def _whichInterval(val,bnds): if val < bnds.min(): val = bnds.min() if val > bnds.max(): val = bnds.max() ind = np.where((val>=bnds[:,0])*(val<=bnds[:,1]))[0] assert ind.size <= 2 ind = ind[0] return ind if lat is not None: assert len(lat) == 2 if not self.spatial: raise il.NotSpatialVariable i = _whichInterval(lat[0],self.lat_bnds) j = _whichInterval(lat[1],self.lat_bnds)+1 self.lat = self.lat [i:j] self.lat_bnds = self.lat_bnds[i:j] self.data = self.data[...,i:j,:] if self.data_bnds is not None: self.data_bnds = self.data_bnds[...,i:j,:,:] self.area = self.area[ i:j,:] if lon is not None: assert len(lon) == 2 if not self.spatial: raise il.NotSpatialVariable i = _whichInterval(lon[0],self.lon_bnds) j = _whichInterval(lon[1],self.lon_bnds)+1 self.lon = self.lon [i:j] self.lon_bnds = self.lon_bnds[i:j] self.data = self.data[...,i:j] if self.data_bnds is not None: self.data_bnds = self.data_bnds[...,i:j,:] self.area = self.area[ :,i:j] if t is not None: assert len(t) == 2 if not self.temporal: raise il.NotTemporalVariable self = il.ClipTime(self,t[0],t[1]) if d is not None: assert len(d) == 2 if self.depth_bnds is None: raise ValueError keep = (self.depth_bnds[:,1] >= d[0])*(self.depth_bnds[:,0] <= d[1]) ind = np.where(keep)[0] self.depth_bnds = self.depth_bnds[ind,:] self.depth = self.depth [ind ] self.data = self.data[...,ind,:,:] return self