from .constants import mid_months,bnd_months
from .Regions import Regions
import matplotlib.colors as colors
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from pylab import get_cmap
from cf_units import Unit
from . import ilamblib as il
from . import Post as post
import numpy as np
import matplotlib.pyplot as plt
def _shiftLon(lon):
return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360
def _shiftFirstColumnToDateline(lon,lon_bnds=None,data=None,area=None):
shift = lon.argmin()
lon = np.roll(lon,-shift)
if lon_bnds is not None:
lon_bnds = np.roll(lon_bnds,-shift,axis=0)
if lon_bnds[ 0,0] > lon_bnds[ 0,1]: lon_bnds[ 0,0] = max(lon_bnds[ 0,0]-360,-180)
if lon_bnds[-1,1] < lon_bnds[-1,0]: lon_bnds[-1,1] = min(lon_bnds[-1,1]+360,+180)
if data is not None: data = np.roll(data,-shift,axis=-1)
if area is not None: area = np.roll(area,-shift,axis=-1)
return lon,lon_bnds,data,area
def _createBnds(x):
x = np.asarray(x)
x_bnds = np.zeros((x.size,2))
x_bnds[+1:,0] = 0.5*(x[:-1]+x[+1:])
x_bnds[:-1,1] = 0.5*(x[:-1]+x[+1:])
if x.size == 1:
x_bnds[ ...] = x
else:
x_bnds[ 0,0] = x[ 0] - 0.5*(x[ 1]-x[ 0])
x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2])
return x_bnds
[docs]class Variable:
r"""A class for managing variables and their analysis.
There are two ways to create a Variable object. Because python
does not support multiple constructors, we will use keyword
arguments so that the users intent may be captured. The first way
to specify a Variable is by loading a netCDF4 file. You can
achieve this by specifying the 'filename' and 'variable_name'
keywords. The second way is to use the remaining keyword arguments
to specify data arrays directly. If you use the second way, you
must specify the keywords 'data' and 'unit'. The rest are truly
optional and depend on the nature of your data.
Parameters
----------
filename : str, optional
Name of the netCDF4 file from which to extract a variable
variable_name : str, optional
Name of the variable to extract from the netCDF4 file
data : numpy.ndarray, optional
The array which contains the data which constitutes the
variable
unit : str, optional
The unit of the input data
name : str, optional
The name of the variable, will be how it is saved in the netCDF4
file
time : numpy.ndarray, optional
a 1D array of times in days since 1850-01-01 00:00:00
time_bnds : numpy.ndarray, optional
a 2D array of time bounds in days since 1850-01-01 00:00:00
lat : numpy.ndarray, optional
a 1D array of latitudes of cell centroids
lon : numpy.ndarray, optional
a 1D array of longitudes of cell centroids
area : numpy.ndarray, optional
a 2D array of the cell areas
ndata : int, optional
number of data sites this data represents
alternate_vars : list of str, optional
a list of alternate acceptable variable names
depth_bnds : numpy.ndarray, optional
a 2D array representing the boundaries of the cells in the vertical dimension
Examples
--------
You can initiate a Variable by specifying the data directly.
>>> lat = np.linspace(- 90, 90, 91)
>>> lon = np.linspace(-180,180,181)
>>> data = np.random.rand(91,181)
>>> v = Variable(name="some_variable",unit="some_unit",lat=lat,lon=lon,data=data)
Or you can initiate a variable by extracting a specific field from a netCDF4 file.
>>> v = Variable(filename="some_netcdf_file.nc",variable_name="name_of_var_to_extract")
"""
def __init__(self,**keywords):
r"""Constructor for the variable class by specifying the data arrays.
"""
# See if the user specified a netCDF4 file and variable
filename = keywords.get("filename" ,None)
groupname = keywords.get("groupname" ,None)
variable_name = keywords.get("variable_name",None)
alternate_vars = keywords.get("alternate_vars",[])
calendar = "noleap"
if filename is None: # if not pull data from other arguments
data = keywords.get("data" ,None)
data_bnds = keywords.get("data_bnds" ,None)
unit = keywords.get("unit" ,None)
name = keywords.get("name" ,"unnamed")
time = keywords.get("time" ,None)
time_bnds = keywords.get("time_bnds" ,None)
lat = keywords.get("lat" ,None)
lat_bnds = keywords.get("lat_bnds" ,None)
lon = keywords.get("lon" ,None)
lon_bnds = keywords.get("lon_bnds" ,None)
depth = keywords.get("depth" ,None)
depth_bnds = keywords.get("depth_bnds" ,None)
ndata = keywords.get("ndata" ,None)
attr = None
assert data is not None
assert unit is not None
cbounds = None
self.filename = ""
else:
self.filename = filename
assert variable_name is not None
t0 = keywords.get("t0",None)
tf = keywords.get("tf",None)
convert_calendar = keywords.get("convert_calendar",True)
out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar)
data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata,calendar,attr = out
# Add handling for some units which cf_units does not support
unit = unit.replace("psu","1e-3")
if not np.ma.isMaskedArray(data): data = np.ma.masked_array(data)
self.data = data
self.data_bnds = data_bnds
self.ndata = ndata
self.unit = unit
self.name = name
self.cbounds = cbounds
self.calendar = calendar
self.attr = attr
# Handle time data
self.time = time # time data
self.time_bnds = time_bnds # bounds on time
self.temporal = False # flag for temporal data
self.dt = 0. # mean temporal spacing
self.monthly = False # flag for monthly means
if time is not None:
self.temporal = True
if self.time_bnds is None: self.time_bnds = _createBnds(self.time)
self.dt = (self.time_bnds[:,1]-self.time_bnds[:,0]).mean()
if np.allclose(self.dt,30,atol=3): self.monthly = True
assert (2*self.time.size) == (self.time_bnds.size)
# Handle space or multimember data
self.spatial = False
self.lat = lat
self.lon = lon
self.lat_bnds = lat_bnds
self.lon_bnds = lon_bnds
self.area = keywords.get("area",None)
# If the last dimensions are lat and lon, this is spatial data
if lat is not None and lon is not None and data.ndim >= 2:
if (data.shape[-2] == lat.size and data.shape[-1] == lon.size): self.spatial = True
# Exception for PEcAn
if self.spatial is True:
if data.shape[-2:] == (1,1):
self.spatial = False
self.ndata = 1
self.data = self.data.reshape(self.data.shape[:-2]+(1,))
if self.spatial is False:
# Shift possible values on [0,360] to [-180,180]
if self.lon is not None: self.lon = _shiftLon(self.lon )
if self.lon_bnds is not None: self.lon_bnds = _shiftLon(self.lon_bnds)
else:
# Exception for CABLE, flips if monotonically decreasing
if np.all(np.diff(self.lat)<0):
self.lat = self.lat [::-1 ]
self.data = self.data[...,::-1,: ]
if self.lat_bnds is not None: self.lat_bnds = self.lat_bnds[::-1,::-1]
if self.area is not None: self.area = self.area [::-1,:]
# Shift possible values on [0,360] to [-180,180]
if self.lon is not None: self.lon = _shiftLon(self.lon )
if self.lon_bnds is not None: self.lon_bnds = _shiftLon(self.lon_bnds)
# Shift first column of data to the international dateline
out = _shiftFirstColumnToDateline(self.lon,
lon_bnds = self.lon_bnds,
data = self.data,
area = self.area)
self.lon,self.lon_bnds,self.data,self.area = out
# Finally create bounds if they are not there
if self.lat_bnds is None: self.lat_bnds = _createBnds(self.lat)
if self.lon_bnds is None: self.lon_bnds = _createBnds(self.lon)
# Fix potential problems with rolling the axes of the lon_bnds
self.lat_bnds = self.lat_bnds.clip(- 90,+ 90)
self.lon_bnds = self.lon_bnds.clip(-180,+180)
# Make sure that the value lies within the bounds
assert np.all((self.lat>=self.lat_bnds[:,0])*(self.lat<=self.lat_bnds[:,1]))
assert np.all((self.lon>=self.lon_bnds[:,0])*(self.lon<=self.lon_bnds[:,1]))
if self.area is None: self.area = il.CellAreas(None,None,
lat_bnds=self.lat_bnds,
lon_bnds=self.lon_bnds)
# Is the data layered
self.layered = False
self.depth = depth
self.depth_bnds = depth_bnds
if (data.ndim > (self.temporal + 2*self.spatial + (self.ndata is not None))) and depth is not None:
self.layered = True
if depth_bnds is None: self.depth_bnds = _createBnds(self.depth)
def __str__(self):
if self.data is None: return "Uninitialized Variable"
if self.ndata is None:
ndata = "N/A"
else:
ndata = str(self.ndata)
if not self.temporal:
time = ""
else:
time = " (%d)" % self.time.size
if not self.spatial:
space = ""
else:
space = " (%d,%d)" % (self.lat.size,self.lon.size)
if not self.layered:
layer = ""
else:
layer = " (%d)" % (self.depth.size)
s = "Variable: %s\n" % self.name
s += "-"*(len(self.name)+10) + "\n"
s += "{0:>20}: ".format("unit") + self.unit + "\n"
s += "{0:>20}: ".format("isTemporal") + str(self.temporal) + time + "\n"
s += "{0:>20}: ".format("isSpatial") + str(self.spatial) + space + "\n"
s += "{0:>20}: ".format("isLayered") + str(self.layered) + layer + "\n"
s += "{0:>20}: ".format("nDatasites") + ndata + "\n"
s += "{0:>20}: ".format("dataShape") + "%s\n" % (self.data.shape,)
np.seterr(over='ignore',under='ignore')
s += "{0:>20}: ".format("dataMax") + "%e\n" % self.data.max()
s += "{0:>20}: ".format("dataMin") + "%e\n" % self.data.min()
s += "{0:>20}: ".format("dataMean") + "%e\n" % self.data.mean()
np.seterr(over='warn',under='warn')
if self.cbounds is not None:
s += "{0:>20}: ".format("climatology") + "%d thru %d\n" % (self.cbounds[0],self.cbounds[1])
return s
[docs] def nbytes(self):
r"""Estimate the memory usage of a variable in bytes.
"""
nbytes = 0.
for key in self.__dict__.keys():
try:
nbytes += self.__dict__[key].nbytes
except:
pass
return nbytes
[docs] def integrateInTime(self,**keywords):
r"""Integrates the variable over a given time period.
Uses nodal integration to integrate to approximate
.. math:: \int_{t_0}^{t_f} v(t,\dots)\ dt
The arguments of the integrand reflect that while it must be
at least defined in time, the remaining arguments are
flexible. If :math:`t_0` or :math:`t_f` are not specified, the
variable will be integrated over the extent of its time
domain. If the mean function value over time is desired, this
routine will approximate
.. math:: \frac{1}{t_f-t_0} \int_{t_0}^{t_f} v(t,\dots)\ dt
again by nodal integration. The amount of time which we divide
by is the non-masked amount of time. This means that if a
function has some values masked or marked as invalid, we do
not penalize the average value by including this as a time at
which data is expected.
Parameters
----------
t0 : float, optional
initial time in days since 1/1/1850
tf : float, optional
final time in days since 1/1/1850
mean : boolean, optional
enable to divide the integrand to get the mean function value
Returns
-------
integral : ILAMB.Variable.Variable
a Variable instance with the integrated value along with the
appropriate name and unit change
"""
if not self.temporal: raise il.NotTemporalVariable()
t0 = keywords.get("t0",self.time_bnds[:,0].min())
tf = keywords.get("tf",self.time_bnds[:,1].max())
mean = keywords.get("mean",False)
# find which time bounds are included even partially in the interval [t0,tf]
time_bnds = np.copy(self.time_bnds)
ind = np.where((t0<time_bnds[:,1])*(tf>time_bnds[:,0]))[0]
time_bnds[(t0>time_bnds[:,0])*(t0<time_bnds[:,1]),0] = t0
time_bnds[(tf>time_bnds[:,0])*(tf<time_bnds[:,1]),1] = tf
time_bnds = time_bnds[ind,:]
dt = (time_bnds[:,1]-time_bnds[:,0])
# now expand this dt to the other dimensions of the data array (i.e. space or datasites)
for i in range(self.data.ndim-1): dt = np.expand_dims(dt,axis=-1)
# approximate the integral by nodal integration (rectangle rule)
np.seterr(over='ignore',under='ignore')
integral = (self.data[ind]*dt).sum(axis=0)
integral_bnd = None
if self.data_bnds is not None:
integral_bnd = np.ma.asarray([((((self.data_bnds[...,0])[ind])*dt).sum(axis=0)).T,
((((self.data_bnds[...,1])[ind])*dt).sum(axis=0)).T]).T
np.seterr(over='raise',under='raise')
# the integrated array should be masked where *all* data in time was previously masked
mask = False
if self.data.ndim > 1 and self.data.mask.size > 1:
mask = np.apply_along_axis(np.all,0,self.data.mask[ind])
integral = np.ma.masked_array(integral,mask=mask,copy=False)
# handle units
unit = Unit(self.unit)
name = self.name + "_integrated_over_time"
if mean:
# divide thru by the non-masked amount of time, the units
# can remain as input because we integrate over time and
# then divide by the time interval in the same units
name += "_and_divided_by_time_period"
if self.data.mask.size > 1:
dt = (dt*(self.data.mask[ind]==0)).sum(axis=0)
else:
dt = dt.sum(axis=0)
np.seterr(over='ignore',under='ignore')
integral = integral / dt
if integral_bnd is not None:
integral_bnd[...,0] = integral_bnd[...,0] / dt
integral_bnd[...,1] = integral_bnd[...,1] / dt
np.seterr(over='raise' ,under='raise' )
else:
# if not a mean, we need to potentially handle unit conversions
unit0 = Unit("d")*unit
unit = Unit(unit0.format().split()[-1])
if not isinstance(integral.mask, np.ndarray):
if integral.mask == True:
integral=np.ma.masked_array(data=integral.data, mask=np.ones(integral.shape,dtype='bool'))
else:
integral=np.ma.masked_array(data=integral.data, mask=np.zeros(integral.shape,dtype='bool'))
unit0.convert(integral,unit,inplace=True)
if integral_bnd is not None:
unit0.convert(integral_bnd,unit,inplace=True)
return Variable(data = integral,
data_bnds = integral_bnd,
unit = "%s" % unit,
name = name,
lat = self.lat,
lat_bnds = self.lat_bnds,
lon = self.lon,
lon_bnds = self.lon_bnds,
depth = self.depth,
depth_bnds = self.depth_bnds,
area = self.area,
ndata = self.ndata)
[docs] def integrateInDepth(self,**keywords):
r"""Integrates the variable over a given layer limits.
Uses nodal integration to integrate to approximate
.. math:: \int_{z_0}^{z_f} v(z,\dots)\ dz
The arguments of the integrand reflect that while it must be
at least defined in depth, the remaining arguments are
flexible. If :math:`z_0` or :math:`z_f` are not specified, the
variable will be integrated over the extent of its depth
domain. If the mean function value over depth is desired, this
routine will approximate
.. math:: \frac{1}{z_f-z_0} \int_{z_0}^{z_f} v(z,\dots)\ dz
again by nodal integration. The amount of depth which we
divide by is the non-masked amount of depth. This means that
if a function has some values masked or marked as invalid, we
do not penalize the average value by including this as a depth
at which data is expected.
Parameters
----------
z0 : float, optional
initial depth in m
zf : float, optional
final depth in m
mean : boolean, optional
enable to divide the integrand to get the mean function value
Returns
-------
integral : ILAMB.Variable.Variable
a Variable instance with the integrated value along with the
appropriate name and unit change
"""
if not self.layered: raise il.NotLayeredVariable()
z0 = keywords.get("z0",self.depth_bnds[:,0].min())
zf = keywords.get("zf",self.depth_bnds[:,1].max())
mean = keywords.get("mean",False)
# find which time bounds are included even partially in the interval [z0,zf]
depth_bnds = np.copy(self.depth_bnds)
ind = np.where((z0<depth_bnds[:,1])*(zf>depth_bnds[:,0]))[0]
depth_bnds[(z0>depth_bnds[:,0])*(z0<depth_bnds[:,1]),0] = z0
depth_bnds[(zf>depth_bnds[:,0])*(zf<depth_bnds[:,1]),1] = zf
depth_bnds = depth_bnds[ind,:]
dz = (depth_bnds[:,1]-depth_bnds[:,0])
args = []
axis = 0
if self.temporal:
axis += 1
dz = np.expand_dims(dz,axis=0)
args.append(range(self.time.size))
if self.layered: args.append(ind)
if self.ndata:
dz = np.expand_dims(dz,axis=-1)
args.append(range(self.ndata))
if self.spatial:
dz = np.expand_dims(dz,axis=-1)
dz = np.expand_dims(dz,axis=-1)
args.append(range(self.lat.size))
args.append(range(self.lon.size))
ind = np.ix_(*args)
# approximate the integral by nodal integration (rectangle rule)
shp = self.data[ind].shape
np.seterr(over='ignore',under='ignore')
integral = (self.data[ind]*dz).sum(axis=axis)
np.seterr(over='raise',under='raise')
# the integrated array should be masked where *all* data in depth was previously masked
mask = False
if self.data.ndim > 1 and self.data.mask.size > 1:
mask = np.apply_along_axis(np.all,axis,self.data.mask[ind])
integral = np.ma.masked_array(integral,mask=mask,copy=False)
# handle units
unit = Unit(self.unit)
name = self.name + "_integrated_over_depth"
if mean:
# divide thru by the non-masked amount of time, the units
# can remain as input because we integrate over time and
# then divide by the time interval in the same units
name += "_and_divided_by_depth"
if self.data.mask.size > 1:
dz = (dz*(self.data.mask[ind]==0)).sum(axis=axis)
else:
dz = dz.sum(axis=axis)
np.seterr(over='ignore',under='ignore')
integral = integral / dz
np.seterr(over='raise' ,under='raise' )
else:
# if not a mean, we need to potentially handle unit conversions
unit0 = Unit("m")*unit
unit = Unit(unit0.format().split()[-1])
unit0.convert(integral,unit,inplace=True)
return Variable(data = integral,
unit = "%s" % unit,
name = name,
time = self.time,
time_bnds = self.time_bnds,
lat = self.lat,
lat_bnds = self.lat_bnds,
lon = self.lon,
lon_bnds = self.lon_bnds,
area = self.area,
ndata = self.ndata)
[docs] def integrateInSpace(self,region=None,mean=False,weight=None,intabs=False):
r"""Integrates the variable over a given region.
Uses nodal integration to integrate to approximate
.. math:: \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega
The arguments of the integrand reflect that while it must be
at least defined in space, the remaining arguments are
flexible. The variable :math:`\Omega` represents the desired
region over which we will integrate. If no region is
specified, the variable will be integrated over the extent of
its spatial domain. If the mean function value over time is
desired, this routine will approximate
.. math:: \frac{1}{A(\Omega)} \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega
again by nodal integration. The spatial area which we divide
by :math:`A(\Omega)` is the non-masked area of the given
region, also given by
.. math:: A(\Omega) = \int_{\Omega}\ d\Omega
This means that if a function has some values masked or marked
as invalid, we do not penalize the average value by including
this as a point at which data is expected.
We also support the inclusion of an optional weighting
function :math:`w(\mathbf{x})` which is a function of space
only. In this case, we approximate the following integral
.. math:: \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega
and if a mean value is desired,
.. math:: \frac{1}{\int_{\Omega} w(\mathbf{x})\ d\Omega} \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega
Parameters
----------
region : str, optional
name of the region overwhich you wish to integrate
mean : bool, optional
enable to divide the integrand to get the mean function value
weight : numpy.ndarray, optional
a data array of the same shape as this variable's areas
representing an additional weight in the integrand
intabs : bool, optional
enable to integrate the absolute value
Returns
-------
integral : ILAMB.Variable.Variable
a Variable instace with the integrated value along with the
appropriate name and unit change.
"""
def _integrate(var,areas):
op = lambda x : x
if intabs: op = np.abs
assert var.shape[-2:] == areas.shape
np.seterr(over='ignore',under='ignore')
vbar = (op(var)*areas).sum(axis=-1).sum(axis=-1)
np.seterr(over='raise',under='raise')
return vbar
if not self.spatial: raise il.NotSpatialVariable()
# determine the measure
mask = self.data.mask
while mask.ndim > 2: mask = np.all(mask,axis=0)
measure = np.ma.masked_array(self.area,mask=mask,copy=True)
if weight is not None: measure *= weight
# if we want to integrate over a region, we need add to the
# measure's mask
r = Regions()
if region is not None: measure.mask += r.getMask(region,self)
# approximate the integral
integral = _integrate(self.data,measure)
if mean:
np.seterr(under='ignore',invalid='ignore')
integral = integral / measure.sum()
np.seterr(under='raise',invalid='warn')
# handle the name and unit
name = self.name + "_integrated_over_space"
if region is not None: name = name.replace("space",region)
unit = Unit(self.unit)
if mean:
# we have already divided thru by the non-masked area in
# units of m^2, which are the same units of the integrand.
name += "_and_divided_by_area"
else:
# if not a mean, we need to potentially handle unit conversions
unit *= Unit("m2")
return Variable(data = np.ma.masked_array(integral),
unit = "%s" % unit,
time = self.time,
time_bnds = self.time_bnds,
depth = self.depth,
depth_bnds = self.depth_bnds,
name = name)
[docs] def siteStats(self,region=None,weight=None,intabs=False):
"""Computes the mean and standard deviation of the variable over all data sites.
Parameters
----------
region : str, optional
name of the region overwhich you wish to include stats.
Returns
-------
mean : ILAMB.Variable.Variable
a Variable instace with the mean values
"""
if self.ndata is None: raise il.NotDatasiteVariable()
op = lambda x : x
if intabs: op = np.abs
rem_mask = np.copy(self.data.mask)
rname = ""
r = Regions()
if region is not None:
self.data.mask += r.getMask(region,self)
rname = "_over_%s" % region
np.seterr(over='ignore',under='ignore')
mean = np.ma.average(op(self.data),axis=-1,weights=weight)
np.seterr(over='raise',under='raise')
self.data.mask = rem_mask
return Variable(data = mean,
unit = self.unit,
time = self.time,
time_bnds = self.time_bnds,
depth = self.depth,
depth_bnds = self.depth_bnds,
name = "mean_%s%s" % (self.name,rname))
[docs] def annualCycle(self):
"""Computes mean annual cycle information (climatology) for the variable.
For each site/cell/depth in the variable, compute the mean annual cycle.
Returns
-------
mean : ILAMB.Variable.Variable
The annual cycle mean values
"""
if not self.temporal: raise il.NotTemporalVariable()
assert self.monthly
assert self.time.size > 11
begin = np.argmin(self.time[:11]%365)
end = begin+int(self.time[begin:].size/12.)*12
shp = (-1,12) + self.data.shape[1:]
v = self.data[begin:end,...].reshape(shp)
np.seterr(over='ignore',under='ignore')
mean = v.mean(axis=0)
np.seterr(over='raise',under='raise')
return Variable(data = mean,
unit = self.unit,
name = "annual_cycle_mean_of_%s" % self.name,
time = mid_months,
time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T,
lat = self.lat,
lat_bnds = self.lat_bnds,
lon = self.lon,
lon_bnds = self.lon_bnds,
area = self.area,
depth = self.depth,
depth_bnds = self.depth_bnds,
ndata = self.ndata)
[docs] def timeOfExtrema(self,etype="max"):
"""Returns the time of the specified extrema.
Parameters
----------
etype : str, optional
The type of extrema to compute, either 'max' or 'min'
Returns
-------
extrema : ILAMB.Variable.Variable
The times of the extrema computed
"""
if not self.temporal: raise il.NotTemporalVariable()
fcn = {"max":np.argmax,"min":np.argmin}
assert etype in fcn.keys()
tid = np.apply_along_axis(fcn[etype],0,self.data)
mask = False
if self.data.ndim > 1 and self.data.mask.ndim > 0: mask = np.apply_along_axis(np.all,0,self.data.mask) # mask cells where all data is masked
data = np.ma.masked_array(self.time[tid],mask=mask)
return Variable(data = data,
unit = "d",
name = "time_of_%s_%s" % (etype,self.name),
lat = self.lat,
lat_bnds = self.lat_bnds,
lon = self.lon,
lon_bnds = self.lon_bnds,
area = self.area,
depth = self.depth,
depth_bnds = self.depth_bnds,
ndata = self.ndata)
[docs] def spatialDifference(self,var):
"""Computes the point-wise difference of two spatially defined variables.
If the variable is spatial or site data and is defined on the
same grid, this routine will simply compute the difference in
the data arrays. If the variables are spatial but defined on
separate grids, the routine will interpolate both variables to
a composed grid via nearest-neighbor interpolation and then
return the difference.
Parameters
----------
var : ILAMB.Variable.Variable
The variable we wish to compare against this variable
Returns
-------
diff : ILAMB.Variable.Variable
A new variable object representing the difference
"""
def _make_bnds(x):
bnds = np.zeros(x.size+1)
bnds[1:-1] = 0.5*(x[1:]+x[:-1])
bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180)
bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180)
return bnds
assert Unit(var.unit) == Unit(self.unit)
assert self.temporal == False
assert self.ndata == var.ndata
assert self.layered == False
# Perform a check on the spatial grid. If it is the exact same
# grid, there is no need to interpolate.
same_grid = False
try:
same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon)
except:
pass
if same_grid:
error = np.ma.masked_array(var.data-self.data,mask=self.data.mask+var.data.mask)
diff = Variable(data = error,
unit = var.unit,
lat = var.lat,
lat_bnds = var.lat_bnds,
lon = var.lon,
lon_bnds = var.lon_bnds,
ndata = var.ndata,
name = "%s_minus_%s" % (var.name,self.name))
else:
if not self.spatial: raise il.NotSpatialVariable()
lat_bnd1 = _make_bnds(self.lat)
lon_bnd1 = _make_bnds(self.lon)
lat_bnd2 = _make_bnds( var.lat)
lon_bnd2 = _make_bnds( var.lon)
lat_bnd,lon_bnd,lat,lon,error = il.TrueError(lat_bnd1,lon_bnd1,self.lat,self.lon,self.data,
lat_bnd2,lon_bnd2, var.lat, var.lon, var.data)
diff = Variable(data = error,
unit = var.unit,
lat = lat,
lat_bnd = lat_bnd,
lon = lon,
lon_bnd = lon_bnd,
name = "%s_minus_%s" % (var.name,self.name))
return diff
[docs] def convert(self,unit,density=998.2,molar_mass=12.0107):
"""Convert the variable to a given unit.
We use the UDUNITS library via the cf_units python interface to
convert the variable's unit. Additional support is provided
for unit conversions in which substance information is
required. For example, in quantities such as precipitation it
is common to have data in the form of a mass rate per unit
area [kg s-1 m-2] yet desire it in a linear rate [m s-1]. This
can be accomplished if the density of the substance is
known. We assume here that water is the substance, but this
can be changed by specifying the density when calling the
function.
Parameters
----------
unit : str
the desired converted unit
density : float, optional
the mass density in [kg m-3] to use when converting linear
rates to area density rates
mol_mass : float, optional
the molar mass in [kg mol-1], default value assumes
substance to be converted is carbon
Returns
-------
self : ILAMB.Variable.Variable
this object with its unit converted
"""
if unit is None: return self
src_unit = Unit(self.unit)
tar_unit = Unit( unit)
mask = self.data.mask
mass_density = Unit("kg m-3")
molar_density = Unit("g mol-1")
if ((src_unit/tar_unit)/mass_density).is_dimensionless():
with np.errstate(all='ignore'):
self.data /= density
src_unit /= mass_density
elif ((tar_unit/src_unit)/mass_density).is_dimensionless():
with np.errstate(all='ignore'):
self.data *= density
src_unit *= mass_density
if ((src_unit/tar_unit)/molar_density).is_dimensionless():
with np.errstate(all='ignore'):
self.data /= molar_mass
src_unit /= molar_density
elif ((tar_unit/src_unit)/molar_density).is_dimensionless():
with np.errstate(all='ignore'):
self.data *= molar_mass
src_unit *= molar_density
try:
with np.errstate(all='ignore'):
src_unit.convert(self.data,tar_unit,inplace=True)
if self.data_bnds is not None:
src_unit.convert(self.data_bnds.data,tar_unit,inplace=True)
self.unit = unit
except:
raise il.UnitConversionError()
return self
[docs] def toNetCDF4(self,dataset,attributes=None,group=None):
"""Adds the variable to the specified netCDF4 dataset.
Parameters
----------
dataset : netCDF4.Dataset
a dataset into which you wish to save this variable
attributes : dict of scalars, optional
a dictionary of additional scalars to encode as ncattrs
group : str, optional
the name of the netCDF4 group to to which we add this variable
"""
def _checkTime(t,dset):
"""A local function for ensuring the time dimension is saved in the dataset."""
time_name = "time"
while True:
if time_name in dset.dimensions.keys():
if (t.shape == dset.variables[time_name][...].shape and
np.allclose(t,dset.variables[time_name][...],atol=0.5*self.dt)):
return time_name
else:
time_name += "_"
else:
dset.createDimension(time_name)
T = dset.createVariable(time_name,"double",(time_name))
T.setncattr("units","days since 1850-01-01 00:00:00")
T.setncattr("calendar","noleap")
T.setncattr("axis","T")
T.setncattr("long_name","time")
T.setncattr("standard_name","time")
T[...] = t
if self.time_bnds is not None:
bnd_name = time_name.replace("time","time_bnds")
T.setncattr("bounds",bnd_name)
if "nb" not in dset.dimensions.keys():
D = dset.createDimension("nb",size=2)
if bnd_name not in dset.variables.keys():
B = dset.createVariable(bnd_name,"double",(time_name,"nb"))
B.setncattr("units","days since 1850-01-01 00:00:00")
B[...] = self.time_bnds
return time_name
def _checkLat(lat,dset):
"""A local function for ensuring the lat dimension is saved in the dataset."""
lat_name = "lat"
while True:
if lat_name in dset.dimensions.keys():
if (lat.shape == dset.variables[lat_name][...].shape and
np.allclose(lat,dset.variables[lat_name][...])):
return lat_name
else:
lat_name += "_"
else:
dset.createDimension(lat_name,size=lat.size)
Y = dset.createVariable(lat_name,"double",(lat_name))
Y.setncattr("units","degrees_north")
Y.setncattr("axis","Y")
Y.setncattr("long_name","latitude")
Y.setncattr("standard_name","latitude")
Y[...] = lat
if self.lat_bnds is not None:
bnd_name = lat_name.replace("lat","lat_bnds")
Y.setncattr("bounds",bnd_name)
if "nb" not in dset.dimensions.keys():
D = dset.createDimension("nb",size=2)
if bnd_name not in dset.variables.keys():
B = dset.createVariable(bnd_name,"double",(lat_name,"nb"))
B.setncattr("units","degrees_north")
B[...] = self.lat_bnds
return lat_name
def _checkLon(lon,dset):
"""A local function for ensuring the lon dimension is saved in the dataset."""
lon_name = "lon"
while True:
if lon_name in dset.dimensions.keys():
if (lon.shape == dset.variables[lon_name][...].shape and
np.allclose(lon,dset.variables[lon_name][...])):
return lon_name
else:
lon_name += "_"
else:
dset.createDimension(lon_name,size=lon.size)
X = dset.createVariable(lon_name,"double",(lon_name))
X.setncattr("units","degrees_east")
X.setncattr("axis","X")
X.setncattr("long_name","longitude")
X.setncattr("standard_name","longitude")
X[...] = lon
if self.lon_bnds is not None:
bnd_name = lon_name.replace("lon","lon_bnds")
X.setncattr("bounds",bnd_name)
if "nb" not in dset.dimensions.keys():
D = dset.createDimension("nb",size=2)
if bnd_name not in dset.variables.keys():
B = dset.createVariable(bnd_name,"double",(lon_name,"nb"))
B.setncattr("units","degrees_east")
B[...] = self.lon_bnds
return lon_name
def _checkData(ndata,dset):
"""A local function for ensuring the data dimension is saved in the dataset."""
data_name = "data"
while True:
if data_name in dset.dimensions.keys():
if (ndata == len(dset.dimensions[data_name])):
return data_name
else:
data_name += "_"
else:
dset.createDimension(data_name,size=ndata)
return data_name
def _checkLayer(layer,dataset):
"""A local function for ensuring the layer dimension is saved in the dataset."""
layer_name = "layer"
while True:
if layer_name in dataset.dimensions.keys():
if (layer.shape == dataset.variables[layer_name][...].shape and
np.allclose(layer,dataset.variables[layer_name][...])):
return layer_name
else:
layer_name += "_"
else:
dataset.createDimension(layer_name,size=layer.size)
Z = dataset.createVariable(layer_name,"double",(layer_name))
Z.setncattr("units","m")
Z.setncattr("axis","Z")
Z.setncattr("long_name","depth")
Z.setncattr("standard_name","depth")
Z[...] = layer
if self.depth_bnds is not None:
bnd_name = layer_name.replace("layer","layer_bnds")
Z.setncattr("bounds",bnd_name)
if "nb" not in dataset.dimensions.keys():
D = dataset.createDimension("nb",size=2)
if bnd_name not in dataset.variables.keys():
B = dataset.createVariable(bnd_name,"double",(layer_name,"nb"))
B.setncattr("units","m")
B[...] = self.depth_bnds
return layer_name
# if not group is desired, just write to the dataset...
if group is None:
dset = dataset
else:
# if a group is desired, check to see it exists and write into group
if group not in dataset.groups:
dset = dataset.createGroup(group)
else:
dset = dataset.groups[group]
dim = []
if self.temporal: dim.append(_checkTime (self.time ,dset))
if self.layered: dim.append(_checkLayer(self.depth,dset))
if self.ndata is not None:
dim.append(_checkData (self.ndata,dset))
_checkLat(self.lat,dset)
_checkLon(self.lon,dset)
else:
if self.lat is not None: dim.append(_checkLat (self.lat ,dset))
if self.lon is not None: dim.append(_checkLon (self.lon ,dset))
grp = dset
if self.data.size == 1:
# we are writing out a scalar so we add it into a special
# subgroup for scalars
if "scalars" not in dset.groups:
grp = dset.createGroup("scalars")
else:
grp = dset.groups["scalars"]
V = grp.createVariable(self.name,"double",dim,zlib=True)
V.units = self.unit
if type(self.data) is np.ma.core.MaskedConstant:
V[...] = np.nan
else:
V[...] = self.data
else:
# not a scalar, compute the min/max and middle 98
# percentiles
try:
per = np.percentile(self.data.compressed(),[0,1,99,100])
except:
per = [0,0,0,0]
V = grp.createVariable(self.name,"double",dim,zlib=True)
V.units = self.unit
V.min = per[0]
V.dn99 = per[1]
V.up99 = per[2]
V.max = per[3]
V[...] = self.data
if self.data_bnds is not None:
bnd_name = "%s_bnds" % (self.name)
V.bounds = bnd_name
VB = grp.createVariable(bnd_name,"double",dim+['nb'],zlib=True)
try:
per = np.percentile(self.data_bnds.compressed(),[0,1,99,100])
except:
per = [0,0,0,0]
VB.units = self.unit
VB.min = per[0]
VB.dn99 = per[1]
VB.up99 = per[2]
VB.max = per[3]
VB[...] = self.data_bnds
# optionally write out more attributes
if attributes:
for key in attributes.keys():
V.setncattr(key,attributes[key])
[docs] def plot(self,ax,**keywords):
"""Plots the variable on the given matplotlib axis.
The behavior of this routine depends on the type of variable
specified. If the data is purely temporal, then the plot will
be a scatter plot versus time of the data. If it is purely
spatial, then the plot will be a global plot of the data. The
routine supports multiple keywords although some may not apply
to the type of plot being generated.
Parameters
----------
ax : matplotlib.axes._subplots.AxesSubplot
The matplotlib axes object onto which you wish to plot the variable
lw : float, optional
The line width to use when plotting
alpha : float, optional
The degree of transparency when plotting, alpha \in [0,1]
color : str or RGB tuple, optional
The color to plot with in line plots
label : str, optional
The label to appear in the legend of line plots
vmin : float, optional
The minimum plotted value
vmax : float, optional
The maximum plotted value
region : str, optional
The region on which to display a spatial variable
cmap : str, optional
The name of the colormap to be used in plotting the spatial variable
ticks : array of floats, optional
Defines the locations of xtick
ticklabels : array of strings, optional
Defines the labels of the xticks
"""
data = self.data if self.data_bnds is None else self.data_bnds
pad = 0.05*(data.max()-data.min())
lw = keywords.get("lw" ,1.0)
alpha = keywords.get("alpha" ,1.0)
color = keywords.get("color" ,"k")
label = keywords.get("label" ,None)
vmin = keywords.get("vmin" ,data.min()-pad)
vmax = keywords.get("vmax" ,data.max()+pad)
region = keywords.get("region","global")
cmap = keywords.get("cmap" ,"jet")
land = keywords.get("land" ,0.875)
water = keywords.get("water" ,0.750)
pad = keywords.get("pad" ,5.0)
cbar = keywords.get("cbar" ,False)
rem_mask = None
r = Regions()
if self.temporal and not self.spatial:
ticks = keywords.get("ticks",None)
ticklabels = keywords.get("ticklabels",None)
t = self.time/365.+1850
ax.plot(t,self.data,'-',
color = color,
lw = lw,
alpha = alpha,
label = label)
if self.data_bnds is not None:
ax.fill_between(t,self.data_bnds[:,0],self.data_bnds[:,1],
lw = 0,
color = color,
alpha = 0.25*alpha)
if ticks is not None: ax.set_xticks(ticks)
if ticklabels is not None: ax.set_xticklabels(ticklabels)
ax.grid(True)
if(abs(vmax-vmin) > 1e-14): ax.set_ylim(vmin,vmax)
if (vmax-vmin) > 1e-12: ax.set_ylim(vmin,vmax)
elif not self.temporal:
# mask out areas outside our region
rem_mask = np.copy(self.data.mask)
self.data.mask += r.getMask(region,self)
# determine the plotting extents
percent_pad = 0.2
if self.ndata is None:
lat_empty = np.where(self.data.mask.all(axis=-1)==False)[0]
lon_empty = np.where(self.data.mask.all(axis=-2)==False)[0]
extents = [self.lon_bnds[lon_empty[ 0],0],
self.lon_bnds[lon_empty[-1],1],
self.lat_bnds[lat_empty[ 0],0],
self.lat_bnds[lat_empty[-1],1]]
dx = percent_pad*(extents[1]-extents[0])
dy = percent_pad*(extents[3]-extents[2])
extents[0] = max(extents[0]-dx,-180); extents[1] = min(extents[1]+dx,+180)
extents[2] = max(extents[2]-dy,- 90); extents[3] = min(extents[3]+dy,+ 90)
lon_mid = 0.5*(extents[0]+extents[1])
# ...but the data might cross the dateline, but not be global
if(lon_empty[ 0]== 0 and
lon_empty[-1]==(self.lon.size-1) and
np.diff(lon_empty).max() > 0.5*self.lon.size):
wrap_lon = self.lon[lon_empty]
wrap_lon += (wrap_lon<0)*360
extents[0] = wrap_lon.min()
extents[1] = wrap_lon.max()
dx = percent_pad*(extents[1]-extents[0])
extents[0] -= dx; extents[1] += dx
# find the middle centroid by mean angle
lons = self.lon[np.where(self.data.mask.all(axis=-2)==False)[0]]
lons = lons/360*2*np.pi
lon_mid = np.arctan2(np.sin(lons).mean(),np.cos(lons).mean())/2/np.pi*360
else:
extents = [self.lon.min(),self.lon.max(),
self.lat.min(),self.lat.max()]
dx = percent_pad*(extents[1]-extents[0])
dy = percent_pad*(extents[3]-extents[2])
extents[0] = max(extents[0]-dx,-180); extents[1] = min(extents[1]+dx,+180)
extents[2] = max(extents[2]-dy,- 90); extents[3] = min(extents[3]+dy,+ 90)
lon_mid = 0.5*(extents[0]+extents[1])
# choose a projection based on the non-masked data
proj = ccrs.PlateCarree(central_longitude=lon_mid)
aspect_ratio = (extents[3]-extents[2])/(extents[1]-extents[0])
if (extents[1]-extents[0]) > 320:
if np.allclose(extents[2],-90) and extents[3] <= 0:
proj = ccrs.Orthographic(central_latitude=-90,central_longitude=0)
aspect_ratio = 1.
elif extents[3]>75 and extents[2] >= 0:
proj = ccrs.Orthographic(central_latitude=+90,central_longitude=0)
aspect_ratio = 1.
elif (extents[3]-extents[2]) > 140:
proj = ccrs.Robinson(central_longitude=0)
extents = [-180,180,-90,90]
aspect_ratio = 0.5
lon_mid = 0.
# make the plot
w = 7.5; h = w*aspect_ratio
fig,ax = plt.subplots(figsize=(w,h),
subplot_kw={'projection':proj})
if self.ndata is None:
lat = np.hstack([self.lat_bnds[:,0],self.lat_bnds[-1,-1]])
lon = np.hstack([self.lon_bnds[:,0],self.lon_bnds[-1,-1]])
p = ax.pcolormesh(lon,lat,self.data,cmap=cmap,vmin=vmin,vmax=vmax,transform=ccrs.PlateCarree())
else:
norm = colors.Normalize(vmin,vmax)
cmap = get_cmap(cmap)
clrs = cmap(norm(self.data))
p = ax.scatter(self.lon,self.lat,s=35,color=clrs,cmap=cmap,linewidths=0,
transform=ccrs.PlateCarree())
ax.add_feature(cfeature.NaturalEarthFeature('physical','land','110m',
edgecolor='face',
facecolor='0.875'),zorder=-1)
ax.add_feature(cfeature.NaturalEarthFeature('physical','ocean','110m',
edgecolor='face',
facecolor='0.750'),zorder=-1)
ax.set_extent(extents,ccrs.PlateCarree())
if cbar: fig.colorbar(p,orientation='horizontal',pad=0.05,label=label)
if rem_mask is not None: self.data.mask = rem_mask
return ax
[docs] def interpolate(self,time=None,lat=None,lon=None,lat_bnds=None,lon_bnds=None,itype='nearestneighbor'):
"""Use nearest-neighbor interpolation to interpolate time and/or space at given values.
Parameters
----------
time : numpy.ndarray, optional
Array of times at which to interpolate the variable
lat : numpy.ndarray, optional
Array of latitudes at which to interpolate the variable
lon : numpy.ndarray, optional
Array of longitudes at which to interpolate the variable
Returns
-------
var : ILAMB.Variable.Variable
The interpolated variable
"""
if time is None and lat is None and lon is None: return self
output_time = self.time if (time is None) else time
output_tbnd = self.time_bnds if (time is None) else None
output_lat = self.lat if (lat is None) else lat
output_lon = self.lon if (lon is None) else lon
output_area = self.area if (lat is None and lon is None) else None
data = self.data
data_bnds = None
if self.spatial and (lat is not None or lon is not None):
if lat is None: lat = self.lat
if lon is None: lon = self.lon
if itype == 'nearestneighbor':
rows = (np.abs(lat[:,np.newaxis]-self.lat)).argmin(axis=1)
cols = (np.abs(lon[:,np.newaxis]-self.lon)).argmin(axis=1)
args = []
if self.temporal: args.append(range(self.time.size))
if self.layered: args.append(range(self.depth.size))
args.append(rows)
args.append(cols)
ind = np.ix_(*args)
mask = data.mask
if data.mask.size > 1: mask = data.mask[ind]
data = data.data[ind]
data = np.ma.masked_array(data,mask=mask)
if self.data_bnds is not None:
args.append([0,1])
ind = np.ix_(*args)
data_bnds = self.data_bnds[ind]
np.seterr(under='ignore',over='ignore')
frac = self.area / il.CellAreas(self.lat,self.lon,self.lat_bnds,self.lon_bnds).clip(1e-12)
np.seterr(under='raise',over='raise')
frac = frac.clip(0,1)
frac = frac[np.ix_(rows,cols)]
output_area = frac * il.CellAreas(lat,lon,lat_bnds,lon_bnds)
elif itype == 'bilinear':
from scipy.interpolate import RectBivariateSpline
if self.data.ndim == 3:
halo = il.LandLinInterMissingValues(self.data)
data = np.ma.zeros((self.data.shape[:-2]+(lat.size,lon.size)))
for i in range(self.data.shape[0]):
dint = RectBivariateSpline(self.lat,self.lon, halo[i,...], kx=1,ky=1)
mint = RectBivariateSpline(self.lat,self.lon,self.data[i,...].mask,kx=1,ky=1)
data[i,...] = np.ma.masked_array(dint(lat,lon,grid=True),
mint(lat,lon,grid=True)>0.5)
frac = self.area / il.CellAreas(self.lat,self.lon).clip(1e-12)
frac = frac.clip(0,1)
frac = RectBivariateSpline(self.lat,self.lon,frac,kx=1,ky=1)
output_area = frac(lat,lon,grid=True) * il.CellAreas(lat,lon)
else:
raise ValueError("Uknown interpolation type: %s" % itype)
if self.temporal and time is not None:
times = np.apply_along_axis(np.argmin,1,np.abs(time[:,np.newaxis]-self.time))
mask = data.mask
if mask.size > 1: mask = data.mask[times,...]
data = data.data[times,...]
data = np.ma.masked_array(data,mask=mask)
output_tbnd = self.time_bnds[times]
return Variable(data = data,
data_bnds = data_bnds,
unit = self.unit, name = self.name, ndata = self.ndata,
lat = output_lat,
lon = output_lon,
area = output_area,
time = output_time,
time_bnds = output_tbnd)
[docs] def phaseShift(self,var,method="max_of_annual_cycle"):
"""Computes the phase shift between a variable and this variable.
Finds the phase shift as the time between extrema of the
annual cycles of the variables. Note that if this var and/or
the given variable are not already annual cycles, they will be
computed but not returned.
Parameters
----------
var : ILAMB.Variable.Variable
The variable with which we will measure phase shift
method : str, optional
The name of the method used to compute the phase shift
"""
assert method in ["max_of_annual_cycle","min_of_annual_cycle"]
assert self.temporal == var.temporal
v1 = self; v2 = var
if not self.temporal:
# If the data is not temporal, then the user may have
# already found the extrema. If the units of the input
# variable are days, then set the extrema to this data.
if not (self.unit == "d" and var.unit == "d"): raise il.NotTemporalVariable
e1 = v1
e2 = v2
else:
# While temporal, the user may have passed in the mean
# annual cycle as the variable. So if the leading
# dimension is 12 we assume the variables are already the
# annual cycles. If not, we compute the cycles and then
# compute the extrema.
if self.time.size != 12: v1 = self.annualCycle()
if var.time.size != 12: v2 = var .annualCycle()
e1 = v1.timeOfExtrema(etype=method[:3])
e2 = v2.timeOfExtrema(etype=method[:3])
if e1.spatial:
shift = e1.spatialDifference(e2)
else:
data = e2.data - e1.data
mask = e1.data.mask + e2.data.mask
shift = Variable(data=data,unit=e1.unit,ndata=e1.ndata,lat=e1.lat,lon=e1.lon)
shift.name = "phase_shift_of_%s" % e1.name
shift.data += (shift.data < -0.5*365.)*365.
shift.data -= (shift.data > +0.5*365.)*365.
return shift
[docs] def correlation(self,var,ctype,region=None):
"""Computes the correlation between two variables.
Parameters
----------
var : ILAMB.Variable.Variable
The variable with which we will compute a correlation
ctype : str
The correlation type, one of {"spatial","temporal","spatiotemporal"}
region : str, optional
The region over which to perform a spatial correlation
Notes
-----
Need to better think about what correlation means when data
are masked. The sums ignore the data but then the number of
items *n* is not constant and should be reduced for masked
values.
"""
def _correlation(x,y,axes=None):
if axes is None: axes = range(x.ndim)
if type(axes) == int: axes = (int(axes),)
axes = tuple(axes)
n = 1
for ax in axes: n *= x.shape[ax]
xbar = x.sum(axis=axes)/n # because np.mean() doesn't take axes which are tuples
ybar = y.sum(axis=axes)/n
xy = (x*y).sum(axis=axes)
x2 = (x*x).sum(axis=axes)
y2 = (y*y).sum(axis=axes)
try:
np.seterr(under='ignore',invalid='ignore')
r = (xy-n*xbar*ybar)/(np.sqrt(x2-n*xbar*xbar)*np.sqrt(y2-n*ybar*ybar))
np.seterr(under='raise',invalid='warn')
except:
r = np.nan
return r
# checks on data consistency
assert region is None
assert self.data.shape == var.data.shape
assert ctype in ["spatial","temporal","spatiotemporal"]
# determine arguments for functions
axes = None
out_time = None
out_lat = None
out_lon = None
out_area = None
out_ndata = None
if ctype == "temporal":
axes = 0
if self.spatial:
out_lat = self.lat
out_lon = self.lon
out_area = self.area
elif self.ndata:
out_ndata = self.ndata
elif ctype == "spatial":
if self.spatial: axes = range(self.data.ndim)[-2:]
if self.ndata: axes = self.data.ndim-1
if self.temporal: out_time = self.time
out_time_bnds = None
if out_time is not None: out_time_bnds = self.time_bnds
r = _correlation(self.data,var.data,axes=axes)
return Variable(data=r,unit="1",
name="%s_correlation_of_%s" % (ctype,self.name),
time=out_time,time_bnds=out_time_bnds,ndata=out_ndata,
lat=out_lat,lon=out_lon,area=out_area)
[docs] def bias(self,var):
"""Computes the bias between a given variable and this variable.
Parameters
----------
var : ILAMB.Variable.Variable
The variable with which we will measure bias
Returns
-------
bias : ILAMB.Variable.Variable
the bias
"""
# If not a temporal variable, then we assume that the user is
# passing in mean data and return the difference.
lat,lon,area = self.lat,self.lon,self.area
if not self.temporal:
assert self.temporal == var.temporal
bias = self.spatialDifference(var)
bias.name = "bias_of_%s" % self.name
return bias
if self.spatial:
# If the data is spatial, then we interpolate it on a
# common grid and take the difference.
same_grid = False
try:
same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon)
except:
pass
if not same_grid:
lat,lon = il.ComposeSpatialGrids(self,var)
area = None
self_int = self.interpolate(lat=lat,lon=lon)
var_int = var .interpolate(lat=lat,lon=lon)
data = var_int.data-self_int.data
mask = var_int.data.mask+self_int.data.mask
else:
data = var.data -self.data
mask = var.data.mask+self.data.mask
elif (self.ndata or self.time.size == self.data.size):
# If the data are at sites, then take the difference
data = var.data.data-self.data.data
mask = var.data.mask+self.data.mask
else:
raise il.NotSpatialVariable("Cannot take bias of scalars")
# Finally we return the temporal mean of the difference
bias = Variable(data=np.ma.masked_array(data,mask=mask),
name="bias_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds,
unit=self.unit,ndata=self.ndata,
lat=lat,lon=lon,area=area,
depth_bnds = self.depth_bnds).integrateInTime(mean=True)
bias.name = bias.name.replace("_integrated_over_time_and_divided_by_time_period","")
return bias
[docs] def rmse(self,var):
"""Computes the RMSE between a given variable and this variable.
Parameters
----------
var : ILAMB.Variable.Variable
The variable with which we will measure RMSE
Returns
-------
RMSE : ILAMB.Variable.Variable
the RMSE
"""
# If not a temporal variable, then we assume that the user is
# passing in mean data and return the difference.
lat,lon,area = self.lat,self.lon,self.area
if not self.temporal:
assert self.temporal == var.temporal
rmse = self.spatialDifference(var)
rmse.name = "rmse_of_%s" % self.name
return rmse
if self.spatial:
# If the data is spatial, then we interpolate it on a
# common grid and take the difference.
same_grid = False
try:
same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon)
except:
pass
if not same_grid:
lat,lon = il.ComposeSpatialGrids(self,var)
area = None
self_int = self.interpolate(lat=lat,lon=lon)
var_int = var .interpolate(lat=lat,lon=lon)
data = var_int.data-self_int.data
mask = var_int.data.mask+self_int.data.mask
else:
data = var.data -self.data
mask = var.data.mask+self.data.mask
elif (self.ndata or self.time.size == self.data.size):
# If the data are at sites, then take the difference
data = var.data.data-self.data.data
mask = var.data.mask+self.data.mask
else:
raise il.NotSpatialVariable("Cannot take rmse of scalars")
# Finally we return the temporal mean of the difference squared
np.seterr(over='ignore',under='ignore')
data *= data
np.seterr(over='raise',under='raise')
rmse = Variable(data=np.ma.masked_array(data,mask=mask),
name="rmse_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds,
unit=self.unit,ndata=self.ndata,
lat=lat,lon=lon,area=area,
depth_bnds = self.depth_bnds).integrateInTime(mean=True)
rmse.name = rmse.name.replace("_integrated_over_time_and_divided_by_time_period","")
rmse.data = np.sqrt(rmse.data)
return rmse
[docs] def rms(self):
"""Computes the RMS of this variable.
Returns
-------
RMS : ILAMB.Variable.Variable
the RMS
"""
if not self.temporal: raise il.NotTemporalVariable()
unit = self.unit
np.seterr(over='ignore',under='ignore')
data = self.data**2
np.seterr(over='raise',under='raise')
rms = Variable(data = data,
unit = "1", # will change later
name = "tmp", # will change later
ndata = self.ndata,
lat = self.lat,
lon = self.lon,
area = self.area,
time = self.time).integrateInTime(mean=True)
np.seterr(over='ignore',under='ignore')
rms.data = np.sqrt(rms.data)
np.seterr(over='raise',under='raise')
rms.unit = unit
rms.name = "rms_of_%s" % self.name
return rms
[docs] def variability(self,mean=None):
"""Computes the variability of the variable.
"""
if not self.temporal: raise il.NotTemporalVariable
if mean is None: mean = self.integrateInTime(mean=True)
var = Variable(unit = self.unit,
data = self.data - mean.data[np.newaxis,...],
time = self.time, time_bnds = self.time_bnds,
lat = self.lat, lat_bnds = self.lat_bnds,
lon = self.lon, lon_bnds = self.lon_bnds,
area = self.area,
ndata = self.ndata).rms()
var.name = "var_of_%s" % self.name
return var
[docs] def interannualVariability(self):
"""Computes the interannual variability.
The internannual variability in this case is defined as the
standard deviation of the data in the temporal dimension.
Returns
-------
iav : ILAMB.Variable.Variable
the interannual variability variable
"""
if not self.temporal: raise il.NotTemporalVariable
np.seterr(over='ignore',under='ignore')
data = self.data.std(axis=0)
np.seterr(over='raise',under='raise')
return Variable(data=data,
name="iav_of_%s" % self.name,
unit=self.unit,ndata=self.ndata,
lat=self.lat,lon=self.lon,area=self.area,
depth_bnds = self.depth_bnds)
[docs] def spatialDistribution(self,var,region="global"):
r"""Evaluates how well the input variable is spatially distributed relative to this variable.
This routine returns the normalized standard deviation and
correlation (needed for a Taylor plot) as well as a score
given as
.. math:: \frac{4(1+R)}{((\sigma+\frac{1}{\sigma})^2 (1+R_0))}
where :math:`R` is the correlation, :math:`R_0=1` is the
reference correlation, and :math:`\sigma` is the normalized
standard deviation.
Parameters
----------
var : ILAMB.Variable.Variable
the comparison variable
region : str, optional
the name of the region over which to check the spatial distribution
Returns
-------
std : ILAMB.Variable.Variable
the normalized standard deviation of the input variable
R : ILAMB.Variable.Variable
the correlation of the input variable
score : ILAMB.Variable.Variable
the spatial distribution score
"""
assert self.temporal == var.temporal == False
r = Regions()
# First compute the observational spatial/site standard deviation
rem_mask0 = np.copy(self.data.mask)
self.data.mask += r.getMask(region,self)
np.seterr(over='ignore',under='ignore')
std0 = self.data.std()
np.seterr(over='raise',under='raise')
# Next compute the model spatial/site standard deviation
rem_mask = np.copy(var.data.mask)
var.data.mask += r.getMask(region,var)
np.seterr(over='ignore',under='ignore')
std = var.data.std()
np.seterr(over='raise',under='raise')
# Interpolate to new grid for correlation
if self.spatial:
lat,lon = il.ComposeSpatialGrids(self,var)
self_int = self.interpolate(lat=lat,lon=lon)
var_int = var .interpolate(lat=lat,lon=lon)
else:
self_int = self
var_int = var
R = self_int.correlation(var_int,ctype="spatial") # add regions
if type(R.data) is np.ma.core.MaskedConstant: R.data = 0.
# Restore masks
self.data.mask = rem_mask0
var.data.mask = rem_mask
# Put together scores, we clip the standard deviation of both
# variables at the same small amount, meant to avoid division
# by zero errors.
try:
R0 = 1.0
std0 = std0.clip(1e-12)
std = std .clip(1e-12)
std = std/std0
score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0))
except:
std = np.asarray([0.0])
score = np.asarray([0.0])
std = Variable(data=std ,name="normalized_spatial_std_of_%s_over_%s" % (self.name,region),unit="1")
score = Variable(data=score,name="spatial_distribution_score_of_%s_over_%s" % (self.name,region),unit="1")
return std,R,score
[docs] def coarsenInTime(self,intervals,window=0.):
"""Compute the mean function value in each of the input intervals.
Parameters
----------
intervals : array of shape (n,2)
An array of n intervals where the first entry is the
beginning and the second entry is the end of the interval
window : float, optional
Extend each interval before and after by this amount of time
Returns
-------
coarse : ILAMB.Variable.Variable
The coarsened variable
"""
if not self.temporal: raise il.NotTemporalVariable
assert intervals.ndim == 2
n = intervals.shape[0]
shp = (n,)+self.data.shape[1:]
time = np.zeros(n)
data = np.ma.zeros(shp)
for i in range(n):
t0 = intervals[i,0]-window
tf = intervals[i,1]+window
time[i] = 0.5*(t0+tf)
mean = self.integrateInTime(mean=True,t0=t0,tf=tf).convert(self.unit)
data[i,...] = mean.data
return Variable(name = "coarsened_%s" % self.name,
unit = self.unit,
time = time,
time_bnds = intervals,
data = data,
ndata = self.ndata,
lat = self.lat,
lon = self.lon,
area = self.area,
depth_bnds = self.depth_bnds)
[docs] def accumulateInTime(self):
r"""For each time interval, accumulate variable from the beginning.
For each time interval :math:`i` in the variable, defined by
:math:`[t_0^i,t_f^i]`, compute
.. math:: \int_{t_0^0}^{t_f^i} v(t,\dots)\ dt
This routine is useful, for example, if the variable is a mass
rate defined over time and we wish to know the mass
accumulation as a function of time.
Returns
-------
sum : ILAMB.Variable.Variable
The cumulative sum of this variable
"""
if not self.temporal: raise il.NotTemporalVariable
n = self.time.size
shp = (n+1,) + self.data.shape[1:]
time = np.zeros(n+1)
data = np.ma.zeros(shp)
data_bnds = None if self.data_bnds is None else np.ma.zeros(shp+(2,))
time[0] = self.time_bnds[0,0]
for i in range(n):
t0 = self.time_bnds[i,0]
tf = self.time_bnds[i,1]
isum = self.integrateInTime(t0=t0,tf=tf)
time[i+1] = tf
data[i+1,...] = data[i,...] + isum.data
if data_bnds is not None:
data_bnds[i+1,...] = data_bnds[i,...] + isum.data_bnds
return Variable(name = "cumulative_sum_%s" % self.name,
unit = isum.unit,
time = time,
data = data,
data_bnds = data_bnds,
lat = self.lat,
lon = self.lon,
area = self.area)
[docs] def trim(self,lat=None,lon=None,t=None,d=None):
"""Trim away a variable in space/time in place.
Parameters
----------
lat,lon,t,d : tuple or list
a 2-tuple containing the lower and upper limits beyond which we trim
"""
def _whichInterval(val,bnds):
if val < bnds.min(): val = bnds.min()
if val > bnds.max(): val = bnds.max()
ind = np.where((val>=bnds[:,0])*(val<=bnds[:,1]))[0]
assert ind.size <= 2
ind = ind[0]
return ind
if lat is not None:
assert len(lat) == 2
if not self.spatial: raise il.NotSpatialVariable
i = _whichInterval(lat[0],self.lat_bnds)
j = _whichInterval(lat[1],self.lat_bnds)+1
self.lat = self.lat [i:j]
self.lat_bnds = self.lat_bnds[i:j]
self.data = self.data[...,i:j,:]
if self.data_bnds is not None:
self.data_bnds = self.data_bnds[...,i:j,:,:]
self.area = self.area[ i:j,:]
if lon is not None:
assert len(lon) == 2
if not self.spatial: raise il.NotSpatialVariable
i = _whichInterval(lon[0],self.lon_bnds)
j = _whichInterval(lon[1],self.lon_bnds)+1
self.lon = self.lon [i:j]
self.lon_bnds = self.lon_bnds[i:j]
self.data = self.data[...,i:j]
if self.data_bnds is not None:
self.data_bnds = self.data_bnds[...,i:j,:]
self.area = self.area[ :,i:j]
if t is not None:
assert len(t) == 2
if not self.temporal: raise il.NotTemporalVariable
self = il.ClipTime(self,t[0],t[1])
if d is not None:
assert len(d) == 2
if self.depth_bnds is None: raise ValueError
keep = (self.depth_bnds[:,1] >= d[0])*(self.depth_bnds[:,0] <= d[1])
ind = np.where(keep)[0]
self.depth_bnds = self.depth_bnds[ind,:]
self.depth = self.depth [ind ]
self.data = self.data[...,ind,:,:]
return self