Source code for cf.data.data

from numpy import argsort       as numpy_argsort
from numpy import array         as numpy_array
from numpy import ceil          as numpy_ceil
from numpy import cos           as numpy_cos
from numpy import count_nonzero as numpy_count_nonzero
from numpy import dtype         as numpy_dtype
from numpy import e             as numpy_e
from numpy import empty         as numpy_empty
from numpy import exp           as numpy_exp
from numpy import floor         as numpy_floor
from numpy import ndarray       as numpy_ndarray
from numpy import ndim          as numpy_ndim
from numpy import log           as numpy_log
from numpy import log10         as numpy_log10
from numpy import log2          as numpy_log2
from numpy import ones          as numpy_ones
from numpy import reshape       as numpy_reshape
from numpy import result_type   as numpy_result_type
from numpy import rint          as numpy_rint
from numpy import round         as numpy_round
from numpy import seterr        as numpy_seterr
from numpy import shape         as numpy_shape
from numpy import sin           as numpy_sin
from numpy import size          as numpy_size
from numpy import tan           as numpy_tan
from numpy import tile          as numpy_tile
from numpy import trunc         as numpy_trunc
from numpy import unique        as numpy_unique
from numpy import unravel_index as numpy_unravel_index
from numpy import where         as numpy_where
from numpy import vectorize     as numpy_vectorize
from numpy import zeros         as numpy_zeros

from numpy.ma import array          as numpy_ma_array
from numpy.ma import count          as numpy_ma_count
from numpy.ma import empty          as numpy_ma_empty
from numpy.ma import is_masked      as numpy_ma_is_masked
from numpy.ma import isMA           as numpy_ma_isMA
from numpy.ma import masked         as numpy_ma_masked
from numpy.ma import masked_all     as numpy_ma_masked_all
from numpy.ma import masked_invalid as numpy_ma_masked_invalid
from numpy.ma import masked_where   as numpy_ma_masked_where
from numpy.ma import MaskedArray    as numpy_ma_MaskedArray
from numpy.ma import nomask         as numpy_ma_nomask
from numpy.ma import ones           as numpy_ma_ones
from numpy.ma import where          as numpy_ma_where
from numpy.ma import zeros          as numpy_ma_zeros

import operator

from functools import partial
from operator  import mul as operator_mul
from math      import ceil as math_ceil
from sys       import byteorder, getrefcount
from itertools import izip
from itertools import product as itertools_product

from ..netcdf.filearray import NetCDFFileArray
from ..um.filearray     import UMFileArray

from ..cfdatetime import st2dt, dt2rt, rt2dt
from ..units      import Units
from ..constants  import masked as cf_masked
from ..functions  import (CHUNKSIZE, FM_THRESHOLD, RTOL, ATOL, FREE_MEMORY,
                          SET_FREE_MEMORY,
                          parse_indices, _numpy_allclose, abspath,
                          pathjoin, hash_array, get_subspace, set_subspace,
                          broadcast_array)
from ..functions  import inspect as cf_inspect
from ..functions  import _section

from .filearray          import FileArray, CreateArray
from .partition          import Partition
from .partitionmatrix    import PartitionMatrix
from .collapse_functions import *

# --------------------------------------------------------------------
# _seterr = How floating-point errors in the results of arithmetic
#           operations are handled. These defaults are those of 
#           numpy 1.10.1.
# --------------------------------------------------------------------
_seterr = {'divide' : 'warn',
           'invalid': 'warn',
           'over'   : 'warn',
           'under'  : 'ignore',
           }

# --------------------------------------------------------------------
# _seterr_raise_to_ignore = As _seterr but with any values of 'raise'
#                           changed to 'ignore'.
# --------------------------------------------------------------------
_seterr_raise_to_ignore = _seterr.copy()
for key, value in _seterr.iteritems():
    if value == 'raise':
        _seterr_raise_to_ignore[key] = 'ignore'

# --------------------------------------------------------------------
# _mask_fpe[0] = Whether or not to automatically set
#                FloatingPointError exceptions to masked values in
#                arimthmetic.
# --------------------------------------------------------------------
_mask_fpe = [False]

_xxx = numpy_empty((), dtype=object)

_empty_set = set()

_units_None    = Units()
_units_1       = Units('1')
_units_radians = Units('radians')

_dtype_object = numpy_dtype(object)
_dtype_float  = numpy_dtype(float)

_cached_axes = {0: []}
def _initialise_axes(ndim):
    '''

:Parameters:

    ndim : int

:Returns:

    out : list

:Examples:

>>> _initialise_axes(0)
[]
>>> _initialise_axes(1)
['dim1']
>>> _initialise_axes(3)
['dim1', 'dim2', 'dim3']
>>> _initialise_axes(3) is _initialise_axes(3)
True

'''
    axes = _cached_axes.get(ndim, None)
    if axes is None:
        axes = ['dim%d' % i for i in xrange(ndim)]
        _cached_axes[ndim] = axes
    #--- End: if    
    return axes
#--- End: def


# ====================================================================
#
# Data object
#
# ====================================================================

[docs]class Data(object): ''' An N-dimensional data array with units and masked values. * Contains an N-dimensional, indexable and broadcastable array with many similarities to a `numpy` array. * Contains the units of the array elements. * Supports masked arrays, regardless of whether or not it was initialised with a masked array. * Stores and operates on data arrays which are larger then the available memory. **Indexing** A data array is indexable in a similar way to numpy array: >>> d.shape (12, 19, 73, 96) >>> d[...].shape (12, 19, 73, 96) >>> d[slice(0, 9), 10:0:-2, :, :].shape (9, 5, 73, 96) There are three extensions to the numpy indexing functionality: * Size 1 dimensions are never removed bi indexing. An integer index i takes the i-th element but does not reduce the rank of the output array by one: >>> d.shape (12, 19, 73, 96) >>> d[0, ...].shape (1, 19, 73, 96) >>> d[:, 3, slice(10, 0, -2), 95].shape (12, 1, 5, 1) Size 1 dimensions may be removed with the `squeeze` method. * The indices for each axis work independently. When more than one dimension's slice is a 1-d boolean sequence or 1-d sequence of integers, then these indices work independently along each dimension (similar to the way vector subscripts work in Fortran), rather than by their elements: >>> d.shape (12, 19, 73, 96) >>> d[0, :, [0, 1], [0, 13, 27]].shape (1, 19, 2, 3) * Boolean indices may be any object which exposes the numpy array interface. >>> d.shape (12, 19, 73, 96) >>> d[..., d[0, 0, 0]>d[0, 0, 0].min()] **Cyclic axes** **Miscellaneous** A `Data` object is picklable. A `Data` object is hashable, but note that, since it is mutable, its hash value is only valid whilst the data array is not changed in place. ''' def __init__(self, data=None, units=None, fill_value=None, hardmask=True, chunk=True, loadd=None, dt=False): ''' **Initialization** :Parameters: data : array-like, optional The data for the array. units : str or Units, optional The units of the data. By default the array elements are dimensionless. fill_value : optional The fill value of the data. By default, or if None, the numpy fill value appropriate to the array's data type will be used. hardmask : bool, optional If False then the mask is soft. By default the mask is hard. chunk : bool, optional If False then the data array will be stored in a single partition. By default the data array will be partitioned if it is larger than the chunk size, as returned by the `cf.CHUNKSIZE` function. dt : bool, optional If True then strings (such as ``'1990-12-1 12:00'``) given by the *data* argument are interpreted as date-times. By default they are not interpreted as date-times. loadd : dict, optional Initialise the data array from a dictionary serialization of a `cf.Data` object. All other arguments are ignored. :Examples: >>> d = cf.Data(5) >>> d = cf.Data([1,2,3], units='K') >>> import numpy >>> d = cf.Data(numpy.arange(10).reshape(2,5), units=cf.Units('m/s'), fill_value=-999) >>> d = cf.Data(tuple('fly')) ''' empty_list = [] # The _flip attribute is an unordered subset of the data # array's axis names. It is a subset of the axes given by the # _axes attribute. It is used to determine whether or not to # reverse an axis in each partition's sub-array during the # creation of the partition's data array. DO NOT CHANGE IN # PLACE. self._flip = empty_list # The _all_axes attribute must be None or a tuple self._all_axes = None self._hardmask = hardmask self._isdt = False if loadd is not None: self.loadd(loadd) return #--- End: if # The _cyclic attribute contains the axes of the data array # which are cyclic (and therefore allow cyclic slicing). It is # a subset of the axes given by the _axes attribute. DO NOT # CHANGE IN PLACE. self._cyclic = _empty_set units = Units(units) self._Units = units self._fill_value = fill_value if data is None: self._dtype = None return if not isinstance(data, FileArray): check_free_memory = True if isinstance(data, self.__class__): self.loadd(data.dumpd()) return if not isinstance(data, numpy_ndarray): data = numpy_array(data) if (data.dtype.kind == 'O' and not dt and hasattr(data.item((0,)*data.ndim), 'timetuple')): # We've been given one or more date-time objects dt = True else: check_free_memory = False #--- End: if dtype = data.dtype if dt or units.isreftime: self._isdt = dtype.kind in 'SO' if self._isdt: if not units: units = Units('days since 1970-1-1', units._calendar) self._Units = units elif not units: raise ValueError( "Can't initialise a numeric reference-time array with %r" % units) if not units.isreftime: raise ValueError( "Can't initialise a date-time array with %r" % units) if dtype.kind == 'S': # Convert date-time strings to date-time objects data = st2dt(data, units) dtype = data.dtype # dtype = _dtype_float #--- End: if shape = data.shape ndim = data.ndim axes = _initialise_axes(ndim) matrix = _xxx.copy() matrix[()] = Partition(location = [(0, n) for n in shape], shape = list(shape), axes = axes, flip = empty_list, Units = units, subarray = data, part = empty_list) self.partitions = PartitionMatrix(matrix, empty_list) self._dtype = dtype self._ndim = ndim self._shape = shape self._size = data.size # The _axes attribute is the ordered list of the data array's # axis names. Each axis name is an arbitrary, unique # string. DO NOT CHANGE IN PLACE. self._axes = axes if chunk: self.chunk() if check_free_memory and FREE_MEMORY() < FM_THRESHOLD(): self.to_disk() #--- End: def def __array__(self, *dtype): ''' Returns a numpy array copy the data array. If the data array is stored as date-time objects then a numpy array of numeric reference times will be returned. :Returns: out : numpy.ndarray The numpy array copy the data array. :Examples: >>> (d.array == numpy.array(d)).all() True ''' if not dtype: return self.array else: return self.array.astype(dtype[0]) #, copy=False) OUght to work! #--- End: def
[docs] def __contains__(self, value): ''' Membership test operator ``in`` x.__contains__(y) <==> y in x Returns True if the value is contained anywhere in the data array. The value may be a `cf.Data` object. :Examples: >>> d = cf.Data([[0.0, 1, 2], [3, 4, 5]], 'm') >>> 4 in d True >>> cf.Data(3) in d True >>> cf.Data([2.5], units='2 m') in d True >>> [[2]] in d True >>> numpy.array([[[2]]]) in d True >>> cf.Data(2, 'seconds') in d False ''' if isinstance(value, self.__class__): self_units = self.Units value_units = value.Units if value_units.equivalent(self_units): if not value_units.equals(self_units): value = value.copy() value.Units = self_units elif value_units: return False value = value.unsafe_array #--- End: if pda_args = self.pda_args(revert_to_file=True) #, readonly=True) for partition in self.partitions.matrix.flat: array = partition.dataarray(**pda_args) partition.close() if value in array: return True #--- End: for return False
#--- End: def @property def subspace(self): ''' Return a new object which will get or set a subspace of the array. ``e=d.subspace[indices]`` is equivalent to ``e=d[indices]``. ``d.subspace[indices]=x`` is equivalent to ``d[indices]=x``. ''' return SubspaceData(self) #--- End: def def __data__(self): ''' Returns a new reference to self. ''' return self #--- End: def
[docs] def __deepcopy__(self, memo): ''' Used if copy.deepcopy is called. ''' return self.copy()
#--- End: def # def __getstate__(self): # ''' # #Called when pickling. # #:Parameters: # # None # #:Returns: # # out : dict # A dictionary of the instance's attributes # #:Examples: # #''' # return dict([(attr, getattr(self, attr)) # for attr in self.__slots__ if hasattr(self, attr)]) # #--- End: def # def __setstate__(self, odict): # ''' # #Called when unpickling. # #:Parameters: # # odict : dict # The output from the instance's `__getstate__` method. # #:Returns: # # None # #''' # for attr, value in odict.iteritems(): # setattr(self, attr, value) # #--- End: def
[docs] def __hash__(self): ''' The built-in function `hash` Generating the hash temporarily realizes the entire array in memory, which may not be possible for large arrays. The hash value is dependent on the data type and shape of the data array. If the array is a masked array then the hash value is independent of the fill value and of data array values underlying any masked elements. The hash value may be different if regenerated after the data array has been changed in place. The hash value is not guaranteed to be portable across versions of Python, numpy and cf. :Returns: out : int The hash value. :Examples: >>> print d.array [[0 1 2 3]] >>> d.hash() -8125230271916303273 >>> d[1, 0] = numpy.ma.masked >>> print d.array [[0 -- 2 3]] >>> hash(d) 791917586613573563 >>> d.hardmask = False >>> d[0, 1] = 999 >>> d[0, 1] = numpy.ma.masked >>> d.hash() 791917586613573563 >>> d.squeeze() >>> print d.array [0 -- 2 3] >>> hash(d) -7007538450787927902 >>> d.dtype = float >>> print d.array [0.0 -- 2.0 3.0] >>> hash(d) -4816859207969696442 ''' return hash_array(self.unsafe_array)
#--- End: def def __float__(self): if self.size != 1: raise TypeError( "only length-1 arrays can be converted to Python scalars") return float(self.datum()) #--- End: def def __round__(self, *n): if self.size != 1: raise TypeError( "only length-1 arrays can be converted to Python scalars") return round(self.datum(), *n) #--- End: def def __int__(self): if self.size != 1: raise TypeError( "only length-1 arrays can be converted to Python scalars") return int(self.datum()) #--- End: def
[docs] def __iter__(self): ''' Efficient iteration. x.__iter__() <==> iter(x) :Examples: >>> d = cf.Data([1, 2, 3], 'metres') >>> for e in d: ... print repr(e) ... 1 2 3 >>> d = cf.Data([[1, 2], [4, 5]], 'metres') >>> for e in d: ... print repr(e) ... <CF Data: [1, 2] metres> <CF Data: [4, 5] metres> >>> d = cf.Data(34, 'metres') >>> for e in d: ... print repr(e) .. TypeError: iteration over a 0-d Data ''' ndim = self._ndim if not ndim: raise TypeError("Iteration over 0-d %s" % self.__class__.__name__) elif ndim == 1: if self.fits_in_memory(self.dtype.itemsize): i = iter(self.unsafe_array) while 1: yield i.next() else: for n in xrange(self._size): yield self[n].unsafe_array[0] else: # ndim > 1 for n in xrange(self._shape[0]): yield self[n, ...].squeeze(0, i=True)
#--- End: def
[docs] def __len__(self): ''' The built-in function `len` x.__len__() <==> len(x) :Examples: >>> len(cf.Data([1, 2, 3])) 3 >>> len(cf.Data([[1, 2, 3]])) 1 >>> len(cf.Data([[1, 2, 3], [4, 5, 6]]) 2 >>> len(cf.Data(1)) TypeError: len() of scalar Data ''' shape = self._shape if shape: return shape[0] raise TypeError("len() of scalar %s" % self.__class__.__name__)
#--- End: def
[docs] def __nonzero__(self): ''' Truth value testing and the built-in operation `bool` x.__nonzero__() <==> x != 0 :Examples: >>> bool(cf.Data(1)) True >>> bool(cf.Data([[False]])) False >>> bool(cf.Data([1, 2])) ValueError: The truth value of Data with more than one element is ambiguous. Use d.any() or d.all() ''' if self._size == 1: return bool(self.unsafe_array) raise ValueError( "The truth value of Data with more than one element is ambiguous. Use d.any() or d.all()")
#--- End: def
[docs] def __repr__(self): ''' The built-in function `repr` x.__repr__() <==> repr(x) ''' return '<CF %s: %s>' % (self.__class__.__name__, str(self))
#--- End: def
[docs] def __str__(self): ''' The built-in function `str` x.__str__() <==> str(x) ''' self_units = self.Units isreftime = self_units.isreftime if not self_units or self_units.equals(_units_1): units = None elif not isreftime: units = self_units.units else: units = getattr(self_units, 'calendar', '') size = self._size ndim = self._ndim open_brackets = '[' * ndim close_brackets = ']' * ndim first = self.datum(0) if size == 1: if isreftime and not self._isdt: # Convert reference time to date-time first = rt2dt(first, self_units).item() out = '%s%s%s' % (open_brackets, first, close_brackets) else: last = self.datum(-1) if isreftime and not self._isdt: # Convert reference times to date-times first, last = rt2dt((first, last), self_units) if size >= 3: out = '%s%s, ..., %s%s' % (open_brackets, first, last, close_brackets) else: out = '%s%s, %s%s' % (open_brackets, first, last, close_brackets) #--- End: if if units: out += ' %s' % units return out
#--- End: def
[docs] def __getitem__(self, indices): ''' Implement indexing x.__getitem__(indices) <==> x[indices] ''' size = self._size if size == 1 or indices is Ellipsis: return self.copy() d = self axes = d._axes flip = d._flip shape = d._shape cyclic_axes = d._cyclic indices, roll, flip_axes = _parse_indices(d, indices) if roll: for axis, shift in roll.iteritems(): if axes[axis] not in cyclic_axes: raise IndexError( "Can't take a cyclic slice of a non-cyclic axis (axis %d)" % axis) d = d.roll(axis, shift) #--- End: if new_shape = tuple(map(_size_of_index, indices, shape)) new_size = long(reduce(operator_mul, new_shape, 1)) new = Data.__new__(Data) new._shape = new_shape new._size = new_size new._ndim = d._ndim new._fill_value = d._fill_value new._hardmask = d._hardmask new._isdt = d._isdt new._all_axes = d._all_axes new._cyclic = cyclic_axes new._axes = axes new._flip = flip new._dtype = d.dtype new.Units = d.Units partitions = d.partitions new_partitions = PartitionMatrix(_overlapping_partitions(partitions, indices, axes, flip), partitions.axes) if new_size != size: new_partitions.set_location_map(axes) new.partitions = new_partitions # Flip axes if flip_axes: new.flip(flip_axes, i=True) # Remove size 1 axes from the partition matrix new_partitions.squeeze(i=True) if cyclic_axes: # Mark cyclic axes which have reduced size as non-cyclic x = [axis for axis, n0, n1 in izip(axes, shape, new_shape) if axis in cyclic_axes and n1 != n0] if x: new._cyclic = cyclic_axes.difference(x) #--- End: if return new
#--- End: def
[docs] def __setitem__(self, indices, value): ''' Implement indexed assignment x.__setitem__(indices, y) <==> x[indices]=y Assignment to data array elements defined by indices. Elements of a data array may be changed by assigning values to a subspace. See `__getitem__` for details on how to define subspace of the data array. **Missing data** The treatment of missing data elements during assignment to a subspace depends on the value of the `hardmask` attribute. If it is True then masked elements will notbe unmasked, otherwise masked elements may be set to any value. In either case, unmasked elements may be set, (including missing data). Unmasked elements may be set to missing data by assignment to the `cf.masked` constant or by assignment to a value which contains masked elements. .. seealso:: `cf.masked`, `hardmask`, `where` :Examples: ''' def _mirror_slice(index, size): ''' Return a slice object which creates the reverse of the input slice. The step of the input slice must have a step of `. :Parameters: index : slice A slice object with a step of 1. size : int :Returns: out : slice :Examples: >>> s = slice(2, 6) >>> t = _mirror_slice(s, 8) >>> s, t slice(2, 6), slice(5, 1, -1) >>> range(8)[s] [2, 3, 4, 5] >>> range(8)[t] [5, 4, 3, 2] >>> range(7, -1, -1)[s] [5, 4, 3, 2] >>> range(7, -1, -1)[t] [2, 3, 4, 5] ''' start, stop, step = index.indices(size) size -= 1 start = size - start stop = size - stop if stop < 0: stop = None return slice(start, stop, -1) #--- End: def pda_args = self.pda_args() # ------------------------------------------------------------ # parse the indices # ------------------------------------------------------------ indices, roll, flip_axes = _parse_indices(self, indices) if roll: for iaxis, shift in roll.iteritems(): self.roll(iaxis, shift, i=True) scalar_value = False if value is cf_masked: scalar_value = True else: copied = False if not isinstance(value, Data): # Convert to the value to a Data object value = type(self)(value, self.Units) else: if value.Units.equivalent(self.Units): if not value.Units.equals(self.Units): value = value.copy() value.Units = self.Units copied = True elif not value.Units: value = value.override_units(self.Units) copied = True else: raise ValueError("Some bad units %r, %r" % (self.Units, value.Units)) #--- End: if value2dt = self._isdt and not value._isdt if value._size == 1: scalar_value = True if value2dt: if not copied: value = value.copy() value.asdatetime() #--- End: if value = value.datum(0) #--- End: if if scalar_value: # -------------------------------------------------------- # The value is logically scalar # -------------------------------------------------------- for partition in self.partitions.matrix.flat: p_indices, shape = partition.overlaps(indices) if p_indices is None: # This partition does not overlap the indices continue array = partition.dataarray(**pda_args) if value is cf_masked and not partition.masked: # The assignment is masking elements, so turn a # non-masked sub-array into a masked one. array = array.view(numpy_ma_MaskedArray) partition.subarray = array set_subspace(array, p_indices, value) partition.close() #--- End: for if roll: for iaxis, shift in roll.iteritems(): self.roll(iaxis, -shift, i=True) return #--- End: if # ------------------------------------------------------------ # Still here? Then the value is not logically scalar. # ------------------------------------------------------------ data0_shape = self._shape value_shape = value._shape shape0 = map(_size_of_index, indices, data0_shape) self_ndim = self._ndim value_ndim = value._ndim align_offset = self_ndim - value_ndim if align_offset >= 0: # self has more dimensions than other shape0 = shape0[align_offset:] shape1 = value_shape ellipsis = None flip_axes = [i-align_offset for i in flip_axes if i >= align_offset] else: # value has more dimensions than self v_align_offset = -align_offset if value_shape[:v_align_offset] != [1] * v_align_offset: # Can only allow value to have more dimensions then # self if the extra dimensions all have size 1. raise ValueError("Can't broadcast shape %r across shape %r" % (value_shape, data0_shape)) shape1 = value_shape[v_align_offset:] ellipsis = Ellipsis align_offset = 0 #--- End: if # Find out which of the dimensions of value are to be # broadcast, and those which are not. Note that, as in numpy, # it is not allowed for a dimension in value to be larger than # a size 1 dimension in self base_value_indices = [] non_broadcast_dimensions = [] for i, (a, b) in enumerate(izip(shape0, shape1)): if b == 1: base_value_indices.append(slice(None)) elif a == b and b != 1: base_value_indices.append(None) non_broadcast_dimensions.append(i) else: raise ValueError("Can't broadcast shape %r across shape %r" % (shape1, shape0)) #--- End: for previous_location = ((-1,),) * self_ndim start = [0] * value_ndim # save = pda_args['save'] keep_in_memory = pda_args['keep_in_memory'] value.to_memory() for partition in self.partitions.matrix.flat: p_indices, shape = partition.overlaps(indices) if p_indices is None: # This partition does not overlap the indices continue # -------------------------------------------------------- # Find which elements of value apply to this partition # -------------------------------------------------------- value_indices = base_value_indices[:] for i in non_broadcast_dimensions: j = i + align_offset location = partition.location[j][0] reference_location = previous_location[j][0] if location > reference_location: stop = start[i] + shape[j] value_indices[i] = slice(start[i], stop) start[i] = stop elif location == reference_location: value_indices[i] = previous_slice[i] elif location < reference_location: stop = shape[j] value_indices[i] = slice(0, stop) start[i] = stop #--- End: for previous_location = partition.location previous_slice = value_indices[:] for i in flip_axes: value_indices[i] = _mirror_slice(value_indices[i], shape1[i]) if ellipsis: value_indices.insert(0, ellipsis) # -------------------------------------------------------- # # -------------------------------------------------------- if value2dt: v = value[tuple(value_indices)].dtarray else: v = value[tuple(value_indices)].varray if keep_in_memory: #not save: v = v.copy() # Update the partition's data array = partition.dataarray(**pda_args) if not partition.masked and numpy_ma_isMA(v): # The sub-array is not masked and the assignment is # masking elements, so turn the non-masked sub-array # into a masked one. array = array.view(numpy_ma_MaskedArray) partition.subarray = array #--- End: if set_subspace(array, p_indices, v) partition.close() #--- End: For if roll: # Unroll for iaxis, shift in roll.iteritems(): self.roll(iaxis, -shift, i=True)
#--- End: def
[docs] def dumpd(self): ''' Return a dictionary serialization of the data array. .. seealso:: `loadd` :Returns: out : dict The serialization. :Examples: >>> d = cf.Data([[1, 2, 3]], 'm') >>> d.dumpd() {'Partitions': [{'location': [(0, 1), (0, 3)], 'subarray': array([[1, 2, 3]])}], 'Units': <CF Units: m>, '_axes': ['dim0', 'dim1'], '_pmshape': (), 'dtype': dtype('int64'), 'shape': (1, 3)} >>> d.flip(1) >>> d.transpose() >>> d.Units *= 1000 >>> d.dumpd() {'Partitions': [{'Units': <CF Units: m>, 'axes': ['dim0', 'dim1'], 'location': [(0, 3), (0, 1)], 'subarray': array([[1, 2, 3]])}], 'Units': <CF Units: 1000 m>, '_axes': ['dim1', 'dim0'], '_flip': ['dim1'], '_pmshape': (), 'dtype': dtype('int64'), 'shape': (3, 1)} >>> d.dumpd() {'Partitions': [{'Units': <CF Units: m>, 'location': [(0, 1), (0, 3)], 'subarray': array([[1, 2, 3]])}], 'Units': <CF Units: 1000 m>, '_axes': ['dim0', 'dim1'], '_flip': ['dim1'], '_pmshape': (), 'dtype': dtype('int64'), 'shape': (1, 3)} >>> e = cf.Data(loadd=d.dumpd()) >>> e.equals(d) True ''' axes = self._axes units = self.Units dtype = self.dtype cfa_data = { 'dtype' : dtype, 'Units' : units, 'shape' : self._shape, '_axes' : axes[:], '_pmshape': self._pmshape, } pmaxes = self._pmaxes if pmaxes: cfa_data['_pmaxes'] = pmaxes[:] flip = self._flip if flip: cfa_data['_flip'] = flip[:] fill_value = self._fill_value if fill_value is not None: cfa_data['fill_value'] = fill_value cyclic = self._cyclic if cyclic: cfa_data['_cyclic'] = cyclic.copy() partitions = [] for index, partition in self.partitions.ndenumerate(): attrs = {} p_subarray = partition.subarray p_dtype = p_subarray.dtype # Location in partition matrix if index: attrs['index'] = index # Sub-array location attrs['location'] = partition.location[:] # Sub-array part p_part = partition.part if p_part: attrs['part'] = p_part[:] # Sub-array axes p_axes = partition.axes if p_axes != axes: attrs['axes'] = p_axes[:] # Sub-array units p_Units = partition.Units if p_Units != units: attrs['Units'] = p_Units # Sub-array flipped axes p_flip = partition.flip if p_flip: attrs['flip'] = p_flip[:] # -------------------------------------------------------- # File format specific stuff # -------------------------------------------------------- if isinstance(p_subarray, NetCDFFileArray): # ---------------------------------------------------- # NetCDF File Array # ---------------------------------------------------- attrs['format'] = 'netCDF' subarray = {} for attr in ('file', 'shape'): subarray[attr] = getattr(p_subarray, attr) for attr in ('ncvar', 'varid'): value = getattr(p_subarray, attr, None) if value is not None: subarray[attr] = value #--- End: for if p_dtype != dtype: subarray['dtype'] = p_dtype attrs['subarray'] = subarray elif isinstance(p_subarray, UMFileArray): # ---------------------------------------------------- # UM File Array # ---------------------------------------------------- attrs['format'] = 'UM' subarray = {} for attr in ('file', 'shape', 'header_offset', 'data_offset', 'disk_length'): subarray[attr] = getattr(p_subarray, attr) #--- End: for if p_dtype != dtype: subarray['dtype'] = p_dtype attrs['subarray'] = subarray else: attrs['subarray'] = p_subarray #--- End: if partitions.append(attrs) #--- End: for cfa_data['Partitions'] = partitions return cfa_data
#--- End:s def
[docs] def loadd(self, d): ''' Reset the data array in place from a dictionary serialization. .. seealso:: `dumpd` :Parameters: d : dict A dictionary serialization of a `cf.Data` object, as returned by its `dumpd` method. :Returns: None :Examples: >>> d = cf.Data([[1, 2, 3]], 'm') >>> e = cf.Data([6, 7, 8, 9], 's') >>> e.loadd(d.dumpd()) >>> e.equals(d) True >>> e is d False >>> e = cf.Data(loadd=d.dumpd()) >>> e.equals(d) True ''' axes = list(d.get('_axes', ())) shape = tuple(d.get('shape', ())) units = d.get('Units', None) if units is None: units = Units() dtype = d['dtype'] self._dtype = dtype self.Units = units self._axes = axes self._flip = list(d.get('_flip', ())) self._fill_value = d.get('fill_value', None) self._shape = shape self._ndim = len(shape) self._size = long(reduce(operator_mul, shape, 1)) cyclic = d.get('_cyclic', None) if cyclic: self._cyclic = cyclic.copy() else: self._cyclic = _empty_set filename = d.get('file', None) if filename is not None: filename = abspath(filename) base = d.get('base', None) if base is not None: base = abspath(base) # ------------------------------------------------------------ # Initialise an empty partition array # ------------------------------------------------------------ partition_matrix = PartitionMatrix( numpy_empty(d.get('_pmshape', ()), dtype=object), list(d.get('_pmaxes', ())) ) pmndim = partition_matrix.ndim # ------------------------------------------------------------ # Fill the partition array with partitions # ------------------------------------------------------------ for attrs in d['Partitions']: # Find the position of this partition in the partition # matrix if 'index' in attrs: index = attrs['index'] if len(index) == 1: index = index[0] else: index = tuple(index) else: index = (0,) * pmndim location = attrs.get('location', None) if location is not None: location = location[:] else: # Default location location = [[0, i] for i in shape] partition = Partition( location = location, axes = attrs.get('axes', axes)[:], flip = attrs.get('flip', [])[:], Units = attrs.get('Units', units), part = attrs.get('part', [])[:], ) fmt = attrs.get('format', None) if fmt is None: # ---------------------------------------------------- # Subarray is effectively a numpy array in memory # ---------------------------------------------------- partition.subarray = attrs['subarray'] else: # ---------------------------------------------------- # Subarray is in a file on disk # ---------------------------------------------------- partition.subarray = attrs['subarray'] if fmt not in ('netCDF', 'UM'): raise TypeError( "Don't know how to load sub-array from file format %r" % fmt) # Set the 'subarray' attribute kwargs = attrs['subarray'].copy() kwargs['shape'] = tuple(kwargs['shape']) kwargs['ndim'] = len(kwargs['shape']) kwargs['size'] = long(reduce(operator_mul, kwargs['shape'], 1)) kwargs.setdefault('dtype', dtype) if 'file' in kwargs: f = kwargs['file'] if f == '': kwargs['file'] = filename else: if base is not None: f = pathjoin(base, f) kwargs['file'] = abspath(f) else: kwargs['file'] = filename if fmt == 'netCDF': partition.subarray = NetCDFFileArray(**kwargs) elif fmt == 'UM': partition.subarray = UMFileArray(**kwargs) #--- End: if # Put the partition into the partition array partition_matrix[index] = partition #--- End: for # Save the partition array self.partitions = partition_matrix self.chunk()
#--- End: def
[docs] def ceil(self, i=False): ''' Return the ceiling of the data array. .. versionadded:: 1.0 .. seealso:: `floor`, `rint`, `trunc` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> print d.array [-1.9 -1.5 -1.1 -1. 0. 1. 1.1 1.5 1.9] >>> print d.ceil().array [-1. -1. -1. -1. 0. 1. 2. 2. 2.] ''' return self.func(numpy_ceil, out=True, i=i)
#---End: def
[docs] def chunk(self, chunksize=None): ''' Partition the data array :Parameters: chunksize : int, optional The :Returns: None :Examples: >>> d.chunk() >>> d.chunk(100000) ''' if not chunksize: # Set the default chunk size chunksize = CHUNKSIZE() # Define the factor which, when multiplied by the size of a # partition, determines how many chunks are in the partition. factor = (self.dtype.itemsize + 1.0)/chunksize if self._size*factor <= 1: # Don't do any partitioning if the data array is already # smaller than the chunk size return # Initialise the dictionary relating each axisq to new # partition boundaries for that axis axes = self._axes d = {} # ------------------------------------------------------------ # Find any new partition boundaries for each axis # ------------------------------------------------------------ for partition in self.partitions.matrix.flat: n_chunks = int(partition.size*factor + 0.5) if n_chunks <= 1: continue extra_boundaries = [] chunk_dims = [] for size, location, axis in izip(partition.shape, partition.location, axes): if size == 1 or n_chunks <= 1: continue if size <= n_chunks: d[axis] = range(location[0]+1, location[1]) n_chunks = int(math_ceil(n_chunks/float(size))) else: step = int(size/n_chunks) d[axis] = range(location[0]+step, location[1], step) break #--- End: if #--- End: for #--- End: for # ------------------------------------------------------------ # Create any new partition boundaries for each axis # ------------------------------------------------------------ for axis in axes[::-1]: #, extra_bounds in d.iteritems(): extra_bounds = d.get(axis, None) if not extra_bounds: continue if axis not in self.partitions.axes: # Create a new partition axis self.partitions.expand_dims(axis, i=True) # Create the new partitions self.add_partitions(sorted(extra_bounds), axis)
#--- End: for #--- End: def def _combined_units(self, data1, method, inplace): ''' :Parameters: data1 : cf.Data method : str The inplace : bool :Returns: out : {cf.Data, cf.Data, cf.Units} :Examples: >>> d._combined_units(e, '__sub__') >>> d._combined_units(e, '__imul__') >>> d._combined_units(e, '__irdiv__') >>> d._combined_units(e, '__lt__') >>> d._combined_units(e, '__rlshift__') >>> d._combined_units(e, '__iand__') ''' method_type = method[-5:-2] data0 = self units0 = data0.Units units1 = data1.Units if not units0 and not units1: return data0, data1, units0 if (units0.isreftime and units1.isreftime and not units0.equivalent(units1)): if units0._calendar and not units1._calendar: data1 = data1.override_calendar(units0._calendar) data1.Units = units0 elif units1._calendar and not units0._calendar: if not inplace: inplace = True data0 = data0.copy() data0.override_calendar(units1._calendar, i=True) data0.Units = units1 #--- End: if if method_type in ('_eq', '_ne', '_lt', '_le', '_gt', '_ge'): #--------------------------------------------------------- # Operator is one of ==, !=, >=, >, <=, < #--------------------------------------------------------- if units0.equivalent(units1): # Units are equivalent if not units0.equals(units1): data1 = data1.copy() data1.Units = units0 return data0, data1, _units_None elif not units1 or not units0: # At least one of the units is undefined return data0, data1, _units_None else: raise ValueError("Can't compare %r to %r" % (units0, units1)) #--- End: if # still here? if method_type in ('and', '_or', 'ior', 'ror', 'xor', 'ift'): #--------------------------------------------------------- # Operation is one of &, |, ^, >>, << #--------------------------------------------------------- if units0.equivalent(units1): # Units are equivalent if not units0.equals(units1): data1 = data1.copy() data1.Units = units0 return data0, data1, units0 elif not units1: # units1 is undefined return data0, data1, units0 elif not units0: # units0 is undefined return data0, data1, units1 else: # Both units are defined and not equivalent raise ValueError("Can't operate with %s on data with %r to %r" % (method, units0, units1)) #--- End: if # Still here? if units0.isreftime: #--------------------------------------------------------- # units0 is reference time #--------------------------------------------------------- if method_type == 'sub': if units1.isreftime: if units0.equivalent(units1): # Equivalent reference_times: the output units # are time if not units0.equals(units1): data1 = data1.copy() data1.Units = units0 return data0, data1, Units(_ut_unit=units0._ut_unit) else: # Non-equivalent reference_times: raise an # exception getattr(units0, method)(units1) elif units1.istime: # reference_time minus time: the output units are # reference_time time0 = Units(_ut_unit=units0._ut_unit) if not units1.equals(time0): data1 = data1.copy() data1.Units = time0 return data0, data1, units0 elif not units1: # reference_time minus no_units: the output units # are reference_time return data0, data1, units0 else: # reference_time minus something not yet accounted # for: raise an exception getattr(units0, method)(units1) elif method_type == 'add': if units1.istime: # reference_time plus time: the output units are # reference_time time0 = Units(_ut_unit=units0._ut_unit) if not units1.equals(time0): data1 = data1.copy() data1.Units = time0 return data0, data1, units0 elif not units1: # reference_time plus no_units: the output units # are reference_time return data0, data1, units0 else: # reference_time plus something not yet accounted # for: raise an exception getattr(units0, method)(units1) else: # Raise an exception getattr(units0, method)(units1) elif units1.isreftime: #--------------------------------------------------------- # units1 is reference time #--------------------------------------------------------- if method_type == 'add': if units0.istime: # Time plus reference_time: the output units are # reference_time time1 = Units(_ut_unit=units1._ut_unit) if not units0.equals(time1): if not inplace: data0 = data0.copy() data0.Units = time1 return data0, data1, units1 elif not units0: # No_units plus reference_time: the output units # are reference_time return data0, data1, units1 else: # Raise an exception getattr(units0, method)(units1) #--- End: if # Still here? if method_type in ('mul', 'div'): #--------------------------------------------------------- # Method is one of *, /, // #--------------------------------------------------------- # print '???', method_type, repr(units0), repr(units1) if not units1: # units1 is undefined return data0, data1, getattr(units0, method)(_units_1) elif not units0: # units0 is undefined return data0, data1, getattr(_units_1, method)(units1) # elif units0.equivalent(units1) and not units0.equals(units1): # # Both units are defined and equivalent but not equal # data1 = data1.copy() # data1.Units = units0 # print 'arse', units0, units1, getattr(units0, method)(units0) # return data0, data1, getattr(units0, method)(units0)# !!!!!!! units0*units0 YOWSER else: # Both units are defined (note: if the units are # noncombinable then this will raise an exception) # print 'WOOOOOOL', method_type return data0, data1, getattr(units0, method)(units1) #--- End: if # Still here? if method_type in ('sub', 'add', 'mod'): #--------------------------------------------------------- # Operator is one of +, - #--------------------------------------------------------- if units0.equivalent(units1): # Units are equivalent if not units0.equals(units1): data1 = data1.copy() data1.Units = units0 return data0, data1, units0 elif not units1: # units1 is undefined return data0, data1, units0 elif not units0: # units0 is undefined return data0, data1, units1 else: # Both units are defined and not equivalent (note: if # the units are noncombinable then this will raise an # exception) return data0, data1, getattr(units0, method)(units1) #--- End: if # Still here? if method_type == 'pow': if method == '__rpow__': #----------------------------------------------------- # Operator is __rpow__ #----------------------------------------------------- if not units1: # units1 is undefined if not units0: # units0 is undefined return data0, data1, _units_None elif units0.isdimensionless: # units0 is dimensionless if not units0.equals(_units_1): if not inplace: data0 = data0.copy() data0.Units = _units_1 return data0, data1, _units_None elif units1.isdimensionless: # units1 is dimensionless if not units1.equals(_units_1): data1 = data1.copy() data1.Units = _units_1 if not units0: # units0 is undefined return data0, data1, _units_1 elif units0.isdimensionless: # units0 is dimensionless if not units0.equals(_units_1): if not inplace: data0 = data0.copy() data0.Units = _units_1 return data0, data1, _units_1 else: # units1 is defined and is not dimensionless if data0._size > 1: raise ValueError("kkkkkkkkkjjjjjjjjjjjjjjjj") if not units0: return data0, data1, units1 ** data0.datum(0) elif units0.isdimensionless: # units0 is dimensionless if not units0.equals(_units_1): if not inplace: data0 = data0.copy() data0.Units = _units_1 return data0, data1, units1 ** data0.datum(0) # --- End: if # This will deliberately raise an exception units1 ** units0 else: #----------------------------------------------------- # Operator is __pow__ #----------------------------------------------------- if not units0: # units0 is undefined if not units1: # units0 is undefined return data0, data1, _units_None elif units1.isdimensionless: # units0 is dimensionless if not units1.equals(_units_1): data1 = data1.copy() data1.Units = _units_1 return data0, data1, _units_None elif units0.isdimensionless: # units0 is dimensionless if not units0.equals(_units_1): if not inplace: data0 = data0.copy() data0.Units = _units_1 if not units1: # units1 is undefined return data0, data1, _units_1 elif units1.isdimensionless: # units1 is dimensionless if not units1.equals(_units_1): data1 = data1.copy() data1.Units = _units_1 return data0, data1, _units_1 else: # units0 is defined and is not dimensionless if data1._size > 1: raise ValueError("kkkkkkkkkjjjjjjjjjjjjjjjj 8888888888888888") if not units1: return data0, data1, units0 ** data1.datum(0) elif units1.isdimensionless: # units1 is dimensionless if not units1.equals(_units_1): data1 = data1.copy() data1.Units = _units_1 return data0, data1, units0 ** data1.datum(0) #--- End: if # This will deliberately raise an exception units0 ** units1 #--- End: if #--- End: if # Still here? raise ValueError("Can't operate with %s on data with %r to %r" % (method, units0, units1)) #--- End: def def _binary_operation(self, other, method): ''' Implement binary arithmetic and comparison operations with the numpy broadcasting rules. It is called by the binary arithmetic and comparison methods, such as `__sub__`, `__imul__`, `__rdiv__`, `__lt__`, etc. .. seealso:: `_unary_operation` :Parameters: other : object The object on the right hand side of the operator. method : str The binary arithmetic or comparison method name (such as ``'__imul__'`` or ``'__ge__'``). :Returns: new : Data A new Data object, or if the operation was in place, the same Data object. :Examples: >>> d = cf.Data([0, 1, 2, 3]) >>> e = cf.Data([1, 1, 3, 4]) >>> f = d._binary_operation(e, '__add__') >>> print f.array [1 2 5 7] >>> e = d._binary_operation(e, '__lt__') >>> print e.array [ True False True True] >>> d._binary_operation(2, '__imul__') >>> print d.array [0 2 4 6] ''' inplace = (method[2] == 'i') method_type = method[-5:-2] # ------------------------------------------------------------ # Ensure that other is an independent Data object # ------------------------------------------------------------ if getattr(other, '_NotImplemented_RHS_Data_op', False): # Make sure that return NotImplemented elif not isinstance(other, self.__class__): other = type(self).asdata(other) if other._isdt and self.Units.isreftime: # Make sure that an array of date-time objects has the # right calendar. other.override_units(self.Units, i=True) #--- End: if data0 = self.copy() data0, other, new_Units = data0._combined_units(other, method, True) # ------------------------------------------------------------ # Bring other into memory, if appropriate. # ------------------------------------------------------------ other.to_memory() # ------------------------------------------------------------ # Find which dimensions need to be broadcast in one or other # of the arrays. # # Method: # # For each common dimension, the 'broadcast_indices' list # will have a value of None if there is no broadcasting # required (i.e. the two arrays have the same size along # that dimension) or a value of slice(None) if broadcasting # is required (i.e. the two arrays have the different sizes # along that dimension and one of the sizes is 1). # # Example: # # If c.shape is (7,1,6,1,5) and d.shape is (6,4,1) then # broadcast_indices will be # [None,slice(None),slice(None)]. # # The indices to d which correspond to a partition of c, # are the relevant subset of partition.indices updated # with the non None elements of the broadcast_indices # list. # # In this example, if a partition of c were to have a # partition.indices value of (slice(0,3), slice(0,1), # slice(2,4), slice(0,1), slice(0,5)), then the relevant # subset of these is partition.indices[2:] and the # corresponding indices to d are (slice(2,4), slice(None), # slice(None)) # # ------------------------------------------------------------ data0_shape = data0._shape data1_shape = other._shape if data0_shape == data1_shape: # self and other have the same shapes broadcasting = False align_offset = 0 ellipsis = None new_shape = data0_shape new_ndim = data0._ndim new_axes = data0._axes new_size = data0._size else: # self and other have different shapes broadcasting = True data0_ndim = data0._ndim data1_ndim = other._ndim align_offset = data0_ndim - data1_ndim if align_offset >= 0: # self has at least as many axes as other shape0 = data0_shape[align_offset:] shape1 = data1_shape new_shape = data0_shape[:align_offset] new_ndim = data0_ndim new_axes = data0._axes else: # other has more axes than self align_offset = -align_offset shape0 = data0_shape shape1 = data1_shape[align_offset:] new_shape = data1_shape[:align_offset] new_ndim = data1_ndim if not data0_ndim: new_axes = other._axes else: new_axes = [] existing_axes = self._all_axis_names() for n in new_shape: axis = data0._new_axis_identifier(existing_axes) existing_axes.append(axis) new_axes.append(axis) #--- End: for new_axes += data0._axes #--- End: for align_offset = 0 #--- End: if broadcast_indices = [] for a, b in izip(shape0, shape1): if a == b: new_shape += (a,) broadcast_indices.append(None) continue # Still here? if a > 1 and b == 1: new_shape += (a,) elif b > 1 and a == 1: new_shape += (b,) else: raise ValueError( "Can't broadcast shape %s against shape %s" % (data1_shape, data0_shape)) broadcast_indices.append(slice(None)) #--- End: for new_size = long(reduce(operator_mul, new_shape, 1)) dummy_location = [None] * new_ndim #---End: if new_flip = [] # if broadcasting: # max_size = 0 # for partition in data0.partitions.matrix.flat: # indices0 = partition.indices # indices1 = tuple([ # (index if not broadcast_index else broadcast_index) # for index, broadcast_index in izip( # indices0[align_offset:], broadcast_indices) # ]) # indices1 = (Ellipsis,) + indices # # shape0 = partition.shape # shape1 = [index.stop - index.start # for index in parse_indices(other, indices1)] # # broadcast_size = 1 # for n0, n1 in izip_longest(shape0[::-1], shape1[::-1], fillvalue=1): # if n0 > 1: # broadcast_size *= n0 # else: # broadcast_size *= n1 # #--- End: for # # if broadcast_size > max_size: # max_size = broadcast_size # #--- End: for # # chunksize = CHUNKSIZE() # ffff = max_size*(new_dtype.itemsize + 1) # if ffff > chunksize: # data0.chunk(chunksize*(chunksize/ffff)) # #--- End: if # ------------------------------------------------------------ # Create a Data object which just contains the metadata for # the result. If we're doing a binary arithmetic operation # then result will get filled with data and returned. If we're # an augmented arithmetic assignment then we'll update self # with this new metadata. # ------------------------------------------------------------ #if new_shape != data0_shape: # set_location_map = True # new_size = self._size # dummy_location = [None] * new_ndim #else: # set_location_map = False # new_size = long(reduce(mul, new_shape, 1)) # if not set_location_map: # new_size = long(reduce(mul, new_shape, 1)) # else: # new_size = self._size result = data0.copy() result._shape = new_shape result._ndim = new_ndim result._size = new_size result._axes = new_axes # result._flip = new_flip # Is the result an array of date-time objects? new_isdt = data0._isdt and new_Units.isreftime # ------------------------------------------------------------ # Set the data type of the result # ------------------------------------------------------------ if method_type in ('_eq', '_ne', '_lt', '_le', '_gt', '_ge'): new_dtype = numpy_dtype(bool) elif not new_isdt: if 'true' in method: new_dtype = numpy_dtype(float) elif not inplace: new_dtype = numpy_result_type(data0.dtype, other.dtype) else: new_dtype = data0.dtype else: new_dtype = _dtype_object # ------------------------------------------------------------ # Set flags to control whether or not the data of result and # self should be kept in memory # ------------------------------------------------------------ keep_result_in_memory = result.fits_in_memory(new_dtype.itemsize) keep_self_in_memory = data0.fits_in_memory(data0.dtype.itemsize) if not inplace: # When doing a binary arithmetic operation we need to # decide whether or not to keep self's data in memory revert_to_file = True # save_self = not data0.fits_in_memory(data0.dtype.itemsize) # keep_self_in_memory = data0.fits_in_memory(data0.dtype.itemsize) else: # When doing an augmented arithmetic assignment we don't # need to keep self's original data in memory revert_to_file = False # save_self = False # keep_self_in_memory = True #--- End: if # dimensions = self._axes # direction = self.direction # units = self.Units pda_args = data0.pda_args(keep_in_memory=keep_self_in_memory, revert_to_file=revert_to_file) if data0._isdt: # If self is a datatime array then make sure that it gets # converted to a reference time array prior to the # operation pda_args['func'] = dt2rt pda_args['update'] = False #--- End: if original_numpy_seterr = numpy_seterr(**_seterr) # Think about dtype, here. for partition_r, partition_s in izip(result.partitions.matrix.flat, data0.partitions.matrix.flat): indices = partition_s.indices array0 = partition_s.dataarray(**pda_args) # print array0, type(array0) if broadcasting: indices = tuple([ (index if not broadcast_index else broadcast_index) for index, broadcast_index in izip( indices[align_offset:], broadcast_indices) ]) indices = (Ellipsis,) + indices #--- End: if # if other._isdt: # # If other is a datatime array then make sure that it # # gets converted to a reference time array prior to # # the operation # array1 = other[indices].rtarray # else: array1 = other[indices].array #unsafe_array # UNRESOLVED ISSUE: array1 could be much larger than the chunk size. if not inplace: partition = partition_r partition.update_inplace_from(partition_s) else: partition = partition_s # -------------------------------------------------------- # Do the binary operation on this partition's data # -------------------------------------------------------- try: # print method, 'A', type(array0), array0.dtype, type(array1), array1.dtype array0 = getattr(array0, method)(array1) # print method, 'A', array0, type(array0), array0.dtype except FloatingPointError as error: # Floating point point errors have been trapped if _mask_fpe[0]: # Redo the calculation ignoring the errors and # then set invalid numbers to missing data numpy_seterr(**_seterr_raise_to_ignore) array0 = getattr(array0, method)(array1) array0 = numpy_ma_masked_invalid(array0, copy=False) numpy_seterr(**_seterr) else: # Raise the floating point error exception raise FloatingPointError(error) except TypeError as error: if inplace: raise TypeError( "Incompatible result data type ({0!r}) for in-place {1!r} arithmetic".format( numpy_result_type(array0.dtype, array1.dtype).name, array0.dtype.name)) else: raise TypeError(error) #--- End: try if new_isdt: # Convert result array to a date-time object array array0 = rt2dt(array0, data0.Units) if array0 is NotImplemented: array0 = numpy_zeros(partition.shape, dtype=bool) if not inplace: p_datatype = array0.dtype if new_dtype != p_datatype: new_dtype = numpy_result_type(p_datatype, new_dtype) partition.subarray = array0 partition.Units = new_Units partition.axes = new_axes partition.flip = new_flip if broadcasting: partition.location = dummy_location partition.shape = list(array0.shape) #--- End: if partition._original = False partition._write_to_disk = False partition.close(keep_in_memory=keep_result_in_memory) if not inplace: partition_s.close() #--- End: for # Reset numpy.seterr numpy_seterr(**original_numpy_seterr) if not inplace: result._isdt = new_isdt result.Units = new_Units result.dtype = new_dtype result._flip = new_flip if broadcasting: result.partitions.set_location_map(result._axes) return result else: # Update the metadata for the new master array in place data0._isdt = new_isdt data0._shape = new_shape data0._ndim = new_ndim data0._size = new_size data0._axes = new_axes data0._flip = new_flip data0.Units = new_Units data0.dtype = new_dtype if broadcasting: data0.partitions.set_location_map(new_axes) self.__dict__ = data0.__dict__ return self #--- End: def def _query_set(self, values, exact=True): ''' ''' if not exact: raise ValueError("Can't, as yet, regex on non string") i = iter(values) v = i.next() out = (self == v) for v in i: out |= (self == v) return out # new = self.copy() # # pda_args = new.pda_args(revert_to_file=True) # # for partition in new.partitions.matrix.flat: # array = partition.dataarray(**pda_args) # # i = iter(values) # value = i.next() # out = (array == value) # for value in i: # out |= (array == value) # # partition.subarray = out # partition.close() # #--- End: for # # new.dtype = bool # # return new #--- End: def def _query_contain(self, value): ''' ''' return self == value #--- End: def def _query_wi(self, value0, value1): ''' ''' return (self >= value0) & (self <= value1) # new = self.copy() # # pda_args = new.pda_args(revert_to_file=True) # # for partition in new.partitions.matrix.flat: # array = partition.dataarray(**pda_args) # print array, new.Units, type(value0), value1 # partition.subarray = (array >= value0) & (array <= value1) # partition.close() # #--- End: for # # new.dtype = bool # # return new #--- End: def def _query_wo(self, value0, value1): ''' ''' return (self < value0) | (self > value1) # new = self.copy() # # pda_args = new.pda_args(revert_to_file=True) # # for partition in new.partitions.matrix.flat: # array = partition.dataarray(**pda_args) # partition.subarray = (array < value0) | (array > value1) # partition.close() # #--- End: for # # new.dtype = bool # # return new #--- End: def @classmethod def concatenate(cls, data, axis=0, _preserve=True): ''' Join a sequence of data arrays together. :Parameters: data : sequence of cf.Data The data arrays to be concatenated. Concatenation is carried out in the order given. Each data array must have equivalent units and the same shape, except in the concatenation axis. Note that scalar arrays are treated as if they were one dimensionsal. axis : int, optional The axis along which the arrays will be joined. The default is 0. Note that scalar arrays are treated as if they were one dimensionsal. _preserve : bool, optional If False then the time taken to do the concatenation is reduced at the expense of changing the input data arrays given by the *data* parameter in place and **these in place changes will render the input data arrays unusable**. Therefore, only set to False if it is 100% certain that the input data arrays will not be accessed again. By default the input data arrays are preserved. :Returns: out : cf.Data The concatenated data array. :Examples: >>> d = cf.Data([[1, 2], [3, 4]], 'km') >>> e = cf.Data([[5.0, 6.0]], 'metre') >>> f = cf.Data.concatenate((d, e)) >>> print f.array [[ 1. 2. ] [ 3. 4. ] [ 0.005 0.006]] >>> f.equals(cf.Data.concatenate((d, e), axis=-2)) True >>> e = cf.Data([[5.0], [6.0]], 'metre') >>> f = cf.Data.concatenate((d, e), axis=1) >>> print f.array [[ 1. 2. 0.005] [ 3. 4. 0.006]] >>> d = cf.Data(1, 'km') >>> e = cf.Data(50.0, 'metre') >>> f = cf.Data.concatenate((d, e)) >>> print f.array [ 1. 0.05] >>> e = cf.Data([50.0, 75.0], 'metre') >>> f = cf.Data.concatenate((d, e)) >>> print f.array [ 1. 0.05 0.075] ''' data = tuple(data) if len(data) < 2: raise ValueError( "Can't concatenate: Must provide two or data arrays") data0 = data[0] data = data[1:] if _preserve: data0 = data0.copy() else: # If data0 appears more than once in the input data arrays # then we need to copy it for d in data: if d is data0: data0 = data0.copy() break #--- End: if # Turn a scalar array into a 1-d array ndim = data0._ndim if not ndim: data0.expand_dims(i=True) ndim = 1 # ------------------------------------------------------------ # Check that the axis, shapes and units of all of the input # data arrays are consistent # ------------------------------------------------------------ if axis < 0: axis += ndim if not 0 <= axis < ndim: raise ValueError( "Can't concatenate: Invalid axis (expected %d <= axis < %d)" % (-ndim, ndim)) shape0 = data0._shape units0 = data0.Units axis_p1 = axis + 1 for data1 in data: shape1 = data1._shape if (shape0[axis_p1:] != shape1[axis_p1:] or shape0[:axis] != shape1[:axis]): raise ValueError( "Can't concatenate: All the input array axes except for the concatenation axis must have the same size") if not units0.equivalent(data1.Units): raise ValueError( "Can't concatenate: All the input arrays must have equivalent units") #--- End: for for i, data1 in enumerate(data): if _preserve: data1 = data1.copy() else: # If data1 appears more than once in the input data # arrays then we need to copy it for d in data[i+1:]: if d is data1: data1 = data1.copy() break #--- End: if # Turn a scalar array into a 1-d array if not data1._ndim: data1.expand_dims(i=True) shape1 = data1._shape # ------------------------------------------------------------ # 1. Make sure that the internal names of the axes match # ------------------------------------------------------------ axis_map = {} if data1._pmsize < data0._pmsize: for axis1, axis0 in izip(data1._axes, data0._axes): axis_map[axis1] = axis0 data1._change_axis_names(axis_map) else: for axis1, axis0 in izip(data1._axes, data0._axes): axis_map[axis0] = axis1 data0._change_axis_names(axis_map) #--- End: if # ------------------------------------------------------------ # Find the internal name of the concatenation axis # ------------------------------------------------------------ Paxis = data0._axes[axis] # ------------------------------------------------------------ # 2. Make sure that the aggregating axis is an axis of the # partition matrix of both arrays and that the partition # matrix axes are the same in both arrays (although, for # now, they may have different orders) # # Note: # # a) This may involve adding new partition matrix axes to # either or both of data0 and data1. # # b) If the aggregating axis needs to be added it is inserted # as the outer (slowest varying) axis to reduce the # likelihood of having to (expensively) transpose the # partition matrix. # ------------------------------------------------------------ for f, g in izip((data0, data1), (data1, data0)): g_pmaxes = g.partitions.axes if Paxis in g_pmaxes: g_pmaxes = g_pmaxes[:] g_pmaxes.remove(Paxis) f_partitions = f.partitions f_pmaxes = f_partitions.axes for pmaxis in g_pmaxes[::-1] + [Paxis]: if pmaxis not in f_pmaxes: f_partitions.expand_dims(pmaxis, i=True) # if Paxis not in f_partitions.axes: # f_partitions.expand_dims(Paxis, i=True) #--- End: for # ------------------------------------------------------------ # 3. Make sure that aggregating axis is the outermost (slowest # varying) axis of the partition matrix of data0 # ------------------------------------------------------------ ipmaxis = data0.partitions.axes.index(Paxis) if ipmaxis: data0.partitions.swapaxes(ipmaxis, 0, i=True) # ------------------------------------------------------------ # 4. Make sure that the partition matrix axes of data1 are in # the same order as those in data0 # ------------------------------------------------------------ pmaxes1 = data1.partitions.axes ipmaxes = [pmaxes1.index(pmaxis) for pmaxis in data0.partitions.axes] data1.partitions.transpose(ipmaxes, i=True) # -------------------------------------------------------- # 5. Create new partition boundaries in the partition # matrices of data0 and data1 so that their partition # arrays may be considered as different slices of a # common, larger hyperrectangular partition array. # # Note: # # * There is no need to add any boundaries across the # concatenation axis. # -------------------------------------------------------- boundaries0 = data0.partition_boundaries() boundaries1 = data1.partition_boundaries() for dim in data0.partitions.axes[1:]: # Still here? Then see if there are any partition matrix # boundaries to be created for this partition dimension bounds0 = boundaries0[dim] bounds1 = boundaries1[dim] symmetric_diff = set(bounds0).symmetric_difference(bounds1) if not symmetric_diff: # The partition boundaries for this partition # dimension are already the same in data0 and data1 continue # Still here? Then there are some partition boundaries to # be created for this partition dimension in data0 and/or # data1. for f, g, bf, bg in ((data0, data1, bounds0, bounds1), (data1, data0, bounds1, bounds0)): extra_bounds = [i for i in bg if i in symmetric_diff] f.add_partitions(extra_bounds, dim) #--- End: for #--- End: for # ------------------------------------------------------------ # 6. Concatenate data0 and data1 partition matrices # ------------------------------------------------------------ if data0._flip != data1._flip: data0._move_flip_to_partitions() data1._move_flip_to_partitions() matrix0 = data0.partitions.matrix matrix1 = data1.partitions.matrix new_pmshape = list(matrix0.shape) new_pmshape[0] += matrix1.shape[0] # Initialise an empty partition matrix with the new shape new_matrix = numpy_empty(new_pmshape, dtype=object) # Insert the data0 partition matrix new_matrix[:matrix0.shape[0]] = matrix0 # Insert the data1 partition matrix new_matrix[matrix0.shape[0]:] = matrix1 data0.partitions.matrix = new_matrix # Update the location map of the partition matrix of data0 data0.partitions.set_location_map((Paxis,), (axis,)) # ------------------------------------------------------------ # 7. Update the size, shape and dtype of data0 # ------------------------------------------------------------ data0._size += long(data1._size) shape0 = list(shape0) shape0[axis] += shape1[axis] data0._shape = tuple(shape0) dtype0 = data0.dtype dtype1 = data1.dtype if dtype0 != dtype1: data0.dtype = numpy_result_type(dtype0, dtype1) #--- End: for # ------------------------------------------------------------ # Done # ------------------------------------------------------------ return data0 #--- End: def def concatenate2(self, data, axis=0, i=False, _preserve=True): ''' Join a sequence of data arrays together. :Parameters: data : sequence of cf.Data The data arrays to be concatenated. Concatenation is carried out in the order given. Each data array must have equivalent units and the same shape, except in the concatenation axis. Note that scalar arrays are treated as if they were one dimensionsal. axis : int, optional The axis along which the arrays will be joined. The default is 0. Note that scalar arrays are treated as if they were one dimensionsal. i : bool, optional If True then update the data array in place. By default a new data array is created. _preserve : bool, optional If False then the time taken to do the concatenation is reduced at the expense of changing the input data arrays given by the *data* parameter in place and **these in place changes will render the input data arrays unusable**. Therefore, only set to False if it is 100% certain that the input data arrays will not be accessed again. By default the input data arrays are preserved. :Returns: out : cf.Data The concatenated data array. :Examples: >>> d = cf.Data([[1, 2], [3, 4]], 'km') >>> e = cf.Data([[5.0, 6.0]], 'metre') >>> f = cf.Data.concatenate((d, e)) >>> print f.array [[ 1. 2. ] [ 3. 4. ] [ 0.005 0.006]] >>> f.equals(cf.Data.concatenate((d, e), axis=-2)) True >>> e = cf.Data([[5.0], [6.0]], 'metre') >>> f = cf.Data.concatenate((d, e), axis=1) >>> print f.array [[ 1. 2. 0.005] [ 3. 4. 0.006]] >>> d = cf.Data(1, 'km') >>> e = cf.Data(50.0, 'metre') >>> f = cf.Data.concatenate((d, e)) >>> print f.array [ 1. 0.05] >>> e = cf.Data([50.0, 75.0], 'metre') >>> f = cf.Data.concatenate((d, e)) >>> print f.array [ 1. 0.05 0.075] ''' data = tuple(data) if len(data) < 1: raise ValueError( "Can't concatenate: Must provide two or data arrays") data0 = self if _preserve: data0 = data0.copy() else: # If data0 appears more than once in the input data arrays # then we need to copy it for d in data: if d is data0: data0 = data0.copy() break #--- End: if # Turn a scalar array into a 1-d array ndim = data0._ndim if not ndim: data0.expand_dims(i=True) ndim = 1 # ------------------------------------------------------------ # Check that the axis, shapes and units of all of the input # data arrays are consistent # ------------------------------------------------------------ if axis < 0: axis += ndim if not 0 <= axis < ndim: raise ValueError( "Can't concatenate: Invalid axis (expected %d <= axis < %d)" % (-ndim, ndim)) shape0 = data0._shape units0 = data0.Units axis_p1 = axis + 1 for data1 in data: shape1 = data1._shape if (shape0[axis_p1:] != shape1[axis_p1:] or shape0[:axis] != shape1[:axis]): raise ValueError( "Can't concatenate: All the input array axes except for the concatenation axis must have the same size") if not units0.equivalent(data1.Units): raise ValueError( "Can't concatenate: All the input arrays must have equivalent units") #--- End: for for i, data1 in enumerate(data): if _preserve: data1 = data1.copy() else: # If data1 appears more than once in the input data # arrays then we need to copy it for d in data[i+1:]: if d is data1: data1 = data1.copy() break #--- End: if # Turn a scalar array into a 1-d array if not data1._ndim: data1.expand_dims(i=True) shape1 = data1._shape # ------------------------------------------------------------ # 1. Make sure that the internal names of the axes match # ------------------------------------------------------------ axis_map = {} if data1._pmsize < data0._pmsize: for axis1, axis0 in izip(data1._axes, data0._axes): axis_map[axis1] = axis0 data1._change_axis_names(axis_map) else: for axis1, axis0 in izip(data1._axes, data0._axes): axis_map[axis0] = axis1 data0._change_axis_names(axis_map) #--- End: if # ------------------------------------------------------------ # Find the internal name of the concatenation axis # ------------------------------------------------------------ Paxis = data0._axes[axis] # ------------------------------------------------------------ # 2. Make sure that the aggregating axis is an axis of the # partition matrix of both arrays and that the partition # matrix axes are the same in both arrays (although, for # now, they may have different orders) # # Note: # # a) This may involve adding new partition matrix axes to # either or both of data0 and data1. # # b) If the aggregating axis needs to be added it is inserted # as the outer (slowest varying) axis to reduce the # likelihood of having to (expensively) transpose the # partition matrix. # ------------------------------------------------------------ for f, g in izip((data0, data1), (data1, data0)): g_pmaxes = g.partitions.axes if Paxis in g_pmaxes: g_pmaxes = g_pmaxes[:] g_pmaxes.remove(Paxis) f_partitions = f.partitions f_pmaxes = f_partitions.axes for pmaxis in g_pmaxes[::-1] + [Paxis]: if pmaxis not in f_pmaxes: f_partitions.expand_dims(pmaxis, i=True) # if Paxis not in f_partitions.axes: # f_partitions.expand_dims(Paxis, i=True) #--- End: for # ------------------------------------------------------------ # 3. Make sure that aggregating axis is the outermost (slowest # varying) axis of the partition matrix of data0 # ------------------------------------------------------------ ipmaxis = data0.partitions.axes.index(Paxis) if ipmaxis: data0.partitions.swapaxes(ipmaxis, 0, i=True) # ------------------------------------------------------------ # 4. Make sure that the partition matrix axes of data1 are in # the same order as those in data0 # ------------------------------------------------------------ pmaxes1 = data1.partitions.axes ipmaxes = [pmaxes1.index(pmaxis) for pmaxis in data0.partitions.axes] data1.partitions.transpose(ipmaxes, i=True) # ------------------------------------------------------------ # 5. Create new partition boundaries in the partition # matrices of data0 and data1 so that their partition # arrays may be considered as different slices of a common, # larger hyperrectangular partition array. # # Note: # # * There is no need to add any boundaries across the # concatenation axis. # ------------------------------------------------------------ boundaries0 = data0.partition_boundaries() boundaries1 = data1.partition_boundaries() for dim in data0.partitions.axes[1:]: # Still here? Then see if there are any partition matrix # boundaries to be created for this partition dimension bounds0 = boundaries0[dim] bounds1 = boundaries1[dim] symmetric_diff = set(bounds0).symmetric_difference(bounds1) if not symmetric_diff: # The partition boundaries for this partition # dimension are already the same in data0 and data1 continue # Still here? Then there are some partition boundaries to # be created for this partition dimension in data0 and/or # data1. for f, g, bf, bg in ((data0, data1, bounds0, bounds1), (data1, data0, bounds1, bounds0)): extra_bounds = [i for i in bg if i in symmetric_diff] f.add_partitions(extra_bounds, dim) #--- End: for #--- End: for # ------------------------------------------------------------ # 6. Concatenate data0 and data1 partition matrices # ------------------------------------------------------------ if data0._flip != data1._flip: data0._move_flip_to_partitions() data1._move_flip_to_partitions() matrix0 = data0.partitions.matrix matrix1 = data1.partitions.matrix new_pmshape = list(matrix0.shape) new_pmshape[0] += matrix1.shape[0] # Initialise an empty partition matrix with the new shape new_matrix = numpy_empty(new_pmshape, dtype=object) # Insert the data0 partition matrix new_matrix[:matrix0.shape[0]] = matrix0 # Insert the data1 partition matrix new_matrix[matrix0.shape[0]:] = matrix1 data0.partitions.matrix = new_matrix # Update the location map of the partition matrix of data0 data0.partitions.set_location_map((Paxis,), (axis,)) # ------------------------------------------------------------ # 7. Update the size, shape and dtype of data0 # ------------------------------------------------------------ data0._size += long(data1._size) shape0 = list(shape0) shape0[axis] += shape1[axis] data0._shape = tuple(shape0) dtype0 = data0.dtype dtype1 = data1.dtype if dtype0 != dtype1: data0.dtype = numpy_result_type(dtype0, dtype1) #--- End: for # ------------------------------------------------------------ # Done # ------------------------------------------------------------ return data0 #--- End: def def _move_flip_to_partitions(self): ''' This does not change the master array. ''' flip = self._flip if not flip: return for partition in self.partitions.matrix.flat: p_axes = partition.axes p_flip = partition.flip[:] for axis in flip: if axis in p_flip: p_flip.remove(axis) elif axis in p_axes: p_flip.append(axis) #--- End: for partition.flip = p_flip #--- End: for self._flip = [] #--- End: def def _parse_axes(self, axes, method): ''' :Parameters: axes : (sequence of) int The axes of the data array. May be one of, or a sequence of any combination of zero or more of: * The integer position of a dimension in the data array (negative indices allowed). method : str :Returns: out : list :Examples: ''' ndim = self._ndim if isinstance(axes, (int, long)): axes = (axes,) axes2 = [] for axis in axes: if 0 <= axis < ndim: axes2.append(axis) elif -ndim <= axis < 0: axes2.append(axis + ndim) else: raise ValueError( "Can't %s: Invalid axis: %r" % (method, axis)) #--- End: for # Check for duplicate axes n = len(axes2) if n > 1 and n > len(set(axes2)): raise ValueError("Can't %s: Duplicate axis (%s)" % (method, axes2)) return axes2 #--- End: def def _unary_operation(self, operation): ''' Implement unary arithmetic operations. It is called by the unary arithmetic methods, such as __abs__(). .. seealso:: `_binary_operation` :Parameters: operation : str The unary arithmetic method name (such as "__invert__"). :Returns: new : Data A new Data array. :Examples: >>> d = cf.Data([[1, 2, -3, -4, -5]]) >>> e = d._unary_operation('__abs__') >>> print e.array [[1 2 3 4 5]] >>> e = d.__abs__() >>> print e.array [[1 2 3 4 5]] >>> e = abs(d) >>> print e.array [[1 2 3 4 5]] ''' self.to_memory() new = self.copy() pda_args = new.pda_args() for partition in new.partitions.matrix.flat: array = partition.dataarray(**pda_args) partition.subarray = getattr(operator, operation)(array) partition.close() #--- End: for return new #--- End: def
[docs] def __add__(self, other): ''' The binary arithmetic operation ``+`` x.__add__(y) <==> x+y ''' return self._binary_operation(other, '__add__')
#--- End: def
[docs] def __iadd__(self, other): ''' The augmented arithmetic assignment ``+=`` x.__iadd__(y) <==> x+=y ''' return self._binary_operation(other, '__iadd__')
#--- End: def
[docs] def __radd__(self, other): ''' The binary arithmetic operation ``+`` with reflected operands x.__radd__(y) <==> y+x ''' return self._binary_operation(other, '__radd__')
#--- End: def
[docs] def __sub__(self, other): ''' The binary arithmetic operation ``-`` x.__sub__(y) <==> x-y ''' return self._binary_operation(other, '__sub__')
#--- End: def
[docs] def __isub__(self, other): ''' The augmented arithmetic assignment ``-=`` x.__isub__(y) <==> x-=y ''' return self._binary_operation(other, '__isub__')
#--- End: def
[docs] def __rsub__(self, other): ''' The binary arithmetic operation ``-`` with reflected operands x.__rsub__(y) <==> y-x ''' return self._binary_operation(other, '__rsub__')
#--- End: def
[docs] def __mul__(self, other): ''' The binary arithmetic operation ``*`` x.__mul__(y) <==> x*y ''' return self._binary_operation(other, '__mul__')
#--- End: def
[docs] def __imul__(self, other): ''' The augmented arithmetic assignment ``*=`` x.__imul__(y) <==> x*=y ''' return self._binary_operation(other, '__imul__')
#--- End: def
[docs] def __rmul__(self, other): ''' The binary arithmetic operation ``*`` with reflected operands x.__rmul__(y) <==> y*x ''' return self._binary_operation(other, '__rmul__')
#--- End: def
[docs] def __div__(self, other): ''' The binary arithmetic operation ``/`` x.__div__(y) <==> x/y ''' return self._binary_operation(other, '__div__')
#--- End: def
[docs] def __idiv__(self, other): ''' The augmented arithmetic assignment ``/=`` x.__idiv__(y) <==> x/=y ''' return self._binary_operation(other, '__idiv__')
#--- End: def
[docs] def __rdiv__(self, other): ''' The binary arithmetic operation ``/`` with reflected operands x.__rdiv__(y) <==> y/x ''' return self._binary_operation(other, '__rdiv__')
#--- End: def
[docs] def __floordiv__(self, other): ''' The binary arithmetic operation ``//`` x.__floordiv__(y) <==> x//y ''' return self._binary_operation(other, '__floordiv__')
#--- End: def
[docs] def __ifloordiv__(self, other): ''' The augmented arithmetic assignment ``//=`` x.__ifloordiv__(y) <==> x//=y ''' return self._binary_operation(other, '__ifloordiv__')
#--- End: def
[docs] def __rfloordiv__(self, other): ''' The binary arithmetic operation ``//`` with reflected operands x.__rfloordiv__(y) <==> y//x ''' return self._binary_operation(other, '__rfloordiv__')
#--- End: def
[docs] def __truediv__(self, other): ''' The binary arithmetic operation ``/`` (true division) x.__truediv__(y) <==> x/y ''' return self._binary_operation(other, '__truediv__')
#--- End: def
[docs] def __itruediv__(self, other): ''' The augmented arithmetic assignment ``/=`` (true division) x.__itruediv__(y) <==> x/=y ''' return self._binary_operation(other, '__itruediv__')
#--- End: def
[docs] def __rtruediv__(self, other): ''' The binary arithmetic operation ``/`` (true division) with reflected operands x.__rtruediv__(y) <==> y/x ''' return self._binary_operation(other, '__rtruediv__')
#--- End: def
[docs] def __pow__(self, other, modulo=None): ''' The binary arithmetic operations ``**`` and ``pow`` x.__pow__(y) <==> x**y ''' if modulo is not None: raise NotImplementedError("3-argument power not supported for '%s'" % self.__class__.__name__) return self._binary_operation(other, '__pow__')
#--- End: def
[docs] def __ipow__(self, other, modulo=None): ''' The augmented arithmetic assignment ``**=`` x.__ipow__(y) <==> x**=y ''' if modulo is not None: raise NotImplementedError("3-argument power not supported for '%s'" % self.__class__.__name__) return self._binary_operation(other, '__ipow__')
#--- End: def
[docs] def __rpow__(self, other, modulo=None): ''' The binary arithmetic operations ``**`` and ``pow`` with reflected operands x.__rpow__(y) <==> y**x ''' if modulo is not None: raise NotImplementedError("3-argument power not supported for %r" % self.__class__.__name__) return self._binary_operation(other, '__rpow__')
#--- End: def
[docs] def __mod__(self, other): ''' The binary arithmetic operation ``%`` x.__mod__(y) <==> x % y ''' return self._binary_operation(other, '__mod__')
#--- End: def
[docs] def __imod__(self, other): ''' The binary arithmetic operation ``%=`` x.__imod__(y) <==> x %= y ''' return self._binary_operation(other, '__imod__')
#--- End: def
[docs] def __rmod__(self, other): ''' The binary arithmetic operation ``%`` with reflected operands x.__rmod__(y) <==> y % x ''' return self._binary_operation(other, '__rmod__')
#--- End: def
[docs] def __eq__(self, other): ''' The rich comparison operator ``==`` x.__eq__(y) <==> x==y ''' return self._binary_operation(other, '__eq__')
#--- End: def
[docs] def __ne__(self, other): ''' The rich comparison operator ``!=`` x.__ne__(y) <==> x!=y ''' return self._binary_operation(other, '__ne__')
#--- End: def
[docs] def __ge__(self, other): ''' The rich comparison operator ``>=`` x.__ge__(y) <==> x>=y ''' return self._binary_operation(other, '__ge__')
#--- End: def
[docs] def __gt__(self, other): ''' The rich comparison operator ``>`` x.__gt__(y) <==> x>y ''' return self._binary_operation(other, '__gt__')
#--- End: def
[docs] def __le__(self, other): ''' The rich comparison operator ``<=`` x.__le__(y) <==> x<=y ''' return self._binary_operation(other, '__le__')
#--- End: def
[docs] def __lt__(self, other): ''' The rich comparison operator ``<`` x.__lt__(y) <==> x<y ''' return self._binary_operation(other, '__lt__')
#--- End: def
[docs] def __and__(self, other): ''' The binary bitwise operation ``&`` x.__and__(y) <==> x&y ''' return self._binary_operation(other, '__and__')
#--- End: def
[docs] def __iand__(self, other): ''' The augmented bitwise assignment ``&=`` x.__iand__(y) <==> x&=y ''' return self._binary_operation(other, '__iand__')
#--- End: def
[docs] def __rand__(self, other): ''' The binary bitwise operation ``&`` with reflected operands x.__rand__(y) <==> y&x ''' return self._binary_operation(other, '__rand__')
#--- End: def
[docs] def __or__(self, other): ''' The binary bitwise operation ``|`` x.__or__(y) <==> x|y ''' return self._binary_operation(other, '__or__')
#--- End: def
[docs] def __ior__(self, other): ''' The augmented bitwise assignment ``|=`` x.__ior__(y) <==> x|=y ''' return self._binary_operation(other, '__ior__')
#--- End: def
[docs] def __ror__(self, other): ''' The binary bitwise operation ``|`` with reflected operands x.__ror__(y) <==> y|x ''' return self._binary_operation(other, '__ror__')
#--- End: def
[docs] def __xor__(self, other): ''' The binary bitwise operation ``^`` x.__xor__(y) <==> x^y ''' return self._binary_operation(other, '__xor__')
#--- End: def
[docs] def __ixor__(self, other): ''' The augmented bitwise assignment ``^=`` x.__ixor__(y) <==> x^=y ''' return self._binary_operation(other, '__ixor__')
#--- End: def
[docs] def __rxor__(self, other): ''' The binary bitwise operation ``^`` with reflected operands x.__rxor__(y) <==> y^x ''' return self._binary_operation(other, '__rxor__')
#--- End: def
[docs] def __lshift__(self, y): ''' The binary bitwise operation ``<<`` x.__lshift__(y) <==> x<<y ''' return self._binary_operation(y, '__lshift__')
#--- End: def
[docs] def __ilshift__(self, y): ''' The augmented bitwise assignment ``<<=`` x.__ilshift__(y) <==> x<<=y ''' return self._binary_operation(y, '__ilshift__')
#--- End: def
[docs] def __rlshift__(self, y): ''' The binary bitwise operation ``<<`` with reflected operands x.__rlshift__(y) <==> y<<x ''' return self._binary_operation(y, '__rlshift__')
#--- End: def
[docs] def __rshift__(self, y): ''' The binary bitwise operation ``>>`` x.__lshift__(y) <==> x>>y ''' return self._binary_operation(y, '__rshift__')
#--- End: def
[docs] def __irshift__(self, y): ''' The augmented bitwise assignment ``>>=`` x.__irshift__(y) <==> x>>=y ''' return self._binary_operation(y, '__irshift__')
#--- End: def
[docs] def __rrshift__(self, y): ''' The binary bitwise operation ``>>`` with reflected operands x.__rrshift__(y) <==> y>>x ''' return self._binary_operation(y, '__rrshift__')
#--- End: def
[docs] def __abs__(self): ''' The unary arithmetic operation ``abs`` x.__abs__() <==> abs(x) ''' return self._unary_operation('__abs__')
#--- End: def
[docs] def __neg__(self): ''' The unary arithmetic operation ``-`` x.__neg__() <==> -x ''' return self._unary_operation('__neg__')
#--- End: def
[docs] def __invert__(self): ''' The unary bitwise operation ``~`` x.__invert__() <==> ~x ''' return self._unary_operation('__invert__')
#--- End: def
[docs] def __pos__(self): ''' The unary arithmetic operation ``+`` x.__pos__() <==> +x ''' return self._unary_operation('__pos__')
#--- End: def def _all_axis_names(self): ''' Return a set of all the dimension names in use by the data array. Note that the output set includes dimensions of individual partitions which are not dimensions of the master data array. :Returns: out : list of str The axis names. :Examples: >>> d._axes ['dim1', 'dim0'] >>> d.partitions.info('_dimensions') [['dim0', 'dim0'], ['dim1', 'dim0', 'dim2']] >>> d._all_axis_names() set(['dim2', dim0', 'dim1']) ''' all_axes = self._all_axes if not all_axes: return self._axes[:] else: return list(all_axes) #--- End: def def _change_axis_names(self, axis_map): ''' Change the axis names. The axis names are arbitrary, so mapping them to another arbitrary collection does not change the data array values, units, nor axis order. :Examples: ''' # Find any axis names which are not mapped. If there are any, # then update axis_map. all_axes = self._all_axes if all_axes: d = set(all_axes).difference(axis_map) if d: axis_map = axis_map.copy() existing_axes = all_axes[:] for axis in d: if axis in axis_map.itervalues(): axis_map[axis] = self._new_axis_identifier(existing_axes) existing_axes.append(axis) else: axis_map[axis] = axis #--- End: for #--- End: if if all([axis0==axis1 for axis0, axis1 in axis_map.iteritems()]): # Return without doing anything if the mapping is null return # Axes self._axes = [axis_map[axis] for axis in self._axes] # All axes if all_axes: self._all_axes = tuple([axis_map[axis] for axis in all_axes]) # Flipped axes flip = self._flip if flip: self._flip = [axis_map[axis] for axis in flip] # Partitions in the partition matrix self.partitions.change_axis_names(axis_map) #--- End: def def _collapse(self, func, fpartial, ffinalise, axes=None, squeeze=False, weights=None, mtol=1, units=None, i=False, **kwargs): ''' Collapse the data array in place. :Parameters: func : function fpartial : function ffinalize : function axes : (sequence of) int, optional The axes to be collapsed. By default flattened input is used. Each axis is identified by its integer position. No axes are collapsed if *axes* is an empty sequence. squeeze : bool, optional If False then the axes which are collapsed are left in the result as axes with size 1. In this case the result will broadcast correctly against the original array. By default collapsed axes are removed. weights : *optional* kwargs : *optional* i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data ''' if i: d = self else: d = self.copy() ndim = d._ndim self_axes = d._axes self_shape = d._shape original_self_axes = self_axes[:] if axes is None: # Collapse all axes axes = range(ndim) n_collapse_axes = ndim n_non_collapse_axes = 0 Nmax = d._size elif not axes and axes != 0: # Collapse no axes return d else: # Collapse some (maybe all) axes axes = d._parse_axes(axes, '_collapse') n_collapse_axes = len(axes) n_non_collapse_axes = ndim - n_collapse_axes Nmax = 1 for i in axes: Nmax *= self_shape[i] #--- End: if #------------------------------------------------------------- # Parse the weights. # # * Change the keys from dimension names to the integer # positions of the dimensions. # # * Make sure all non-null weights are Data objects. # ------------------------------------------------------------ if weights is not None: if not isinstance(weights, dict): # If the shape of the weights is not the same as the # shape of the data array then the weights are assumed # to span the collapse axes in the order in which they # are given if numpy_shape(weights) == self_shape: weights = {tuple(self_axes): weights} else: weights = {tuple([self_axes[i] for i in axes]): weights} else: weights = weights.copy() weights_axes = set() for key, value in weights.items(): del weights[key] key = d._parse_axes(key, 'asdasds12983487') if weights_axes.intersection(key): raise ValueError("Duplicate weights axis") weights_axes.update(key) weights[tuple([self_axes[i] for i in key])] = value #--- End: for if not weights_axes.intersection(axes): # Ignore all of the weights if none of them span # any collapse axes weights = {} #--- End: if for key, weight in weights.items(): if weight is None or numpy_size(weight) == 1: # Ignore undefined weights and size 1 weights del weights[key] continue weight_ndim = numpy_ndim(weight) if weight_ndim != len(key): raise ValueError( "Can't collapse: Incorrect number of weights axes (%d != %d)" % (weight.ndim, len(key))) if weight_ndim > ndim: raise ValueError( "Can't collapse: Incorrect number of weights axes (%d > %d)" % (weight.ndim, ndim)) for n, axis in izip(numpy_shape(weight), key): if n != self_shape[self_axes.index(axis)]: raise ValueError( "Can't collapse: Incorrect weights shape %r" % numpy_shape(weight)) #--- End :for # Convert weight to a data object, if necessary. weight = type(self).asdata(weight) if weight.dtype.char == 'S': # Ignore string-valued weights del weights[key] continue weights[key] = weight #--- End: for #--- End: if if axes != range(n_non_collapse_axes, ndim): transpose_iaxes = [i for i in range(ndim) if i not in axes] + axes d.transpose(transpose_iaxes, i=True) #--- End: if if weights: # Optimize when weights span only non-partitioned axes # (do this before permuting the order of the weight # axes to be consistent with the order of the data # axes) weights = d._collapse_optimize_weights(weights) # Permute the order of the weight axes to be # consistent with the order of the data axes self_axes = d._axes for key, w in weights.items(): key1 = tuple([axis for axis in self_axes if axis in key]) if key1 != key: w = w.transpose([key.index(axis) for axis in key1]) del weights[key] ikey = tuple([self_axes.index(axis) for axis in key1]) weights[ikey] = w #--- End: for # Add the weights to kwargs kwargs['weights'] = weights #--- End: if #------------------------------------------------------------- # Initialise the output data array #------------------------------------------------------------- new = d[(Ellipsis,) + (0,)*n_collapse_axes] # new._auxiliary_mask = None # pending ..... for partition in new.partitions.matrix.flat: del partition.subarray d.to_memory() # save = not new.fits_in_memory(new.dtype.itemsize) keep_in_memory = new.fits_in_memory(new.dtype.itemsize) datatype = d.dtype if units is None: new_units = new.Units else: new_units = units p_axes = new._axes[:n_non_collapse_axes] p_units = new_units c_slice = (slice(None),) * n_collapse_axes for partition in new.partitions.matrix.flat: partition.axes = p_axes partition.flip = [] partition.part = [] partition.Units = p_units if squeeze: partition.location = partition.location[:n_non_collapse_axes] partition.shape = partition.shape[:n_non_collapse_axes] indices = partition.indices[:n_non_collapse_axes] + c_slice partition.subarray = d._collapse_subspace( func, fpartial, ffinalise, indices, n_non_collapse_axes, n_collapse_axes, Nmax, mtol, **kwargs) p_datatype = partition.subarray.dtype if datatype != p_datatype: datatype = numpy_result_type(p_datatype, datatype) partition.close(keep_in_memory=keep_in_memory) #--- End: for new._all_axes = None new._flip = [] new.dtype = datatype new.Units = new_units if squeeze: new._axes = p_axes new._ndim = ndim - n_collapse_axes new._shape = new._shape[:new._ndim] else: new_axes = new._axes if new_axes != original_self_axes: iaxes = [new_axes.index(axis) for axis in original_self_axes] new.transpose(iaxes, i=True) # ------------------------------------------------------------ # Update d in place # ------------------------------------------------------------ d.__dict__ = new.__dict__ # ------------------------------------------------------------ # Return # ------------------------------------------------------------ return d #--- End: def def _collapse_subspace(self, func, fpartial, ffinalise, indices, n_non_collapse_axes, n_collapse_axes, Nmax, mtol, weights=None, **kwargs): ''' Collapse a subspace of a data array. If set, *weights* and *kwargs* are passed to the function call. If there is a *weights* keyword argument then this should either evaluate to False or be a dictionary of weights for at least one of the data dimensions. :Parameters: func : function fpartial : function ffinalise : function indices: tuple The indices of the master array which would create the subspace. n_non_collapse_axes : int The number of data array axes which are not being collapsed. It is assumed that they are in the slowest moving positions. n_collapse_axes : int The number of data array axes which are being collapsed. It is assumed that they are in the fastest moving positions. weights : dict, optional kwargs : *optional* :Returns: out : list :Examples: ''' ndim = self._ndim master_shape = self.shape data = self[indices] # if data._pmndim and data.fits_in_memory(data.dtype.itemsize): # data.varray # True iff at least two, but not all, axes are to be # collapsed. reshape = 1 < n_collapse_axes < ndim out = None if n_collapse_axes == ndim: # All axes are to be collapsed kwargs.pop('axis', None) else: # At least one axis, but not all axes, are to be # collapsed. It is assumed that the collapse axes are in # the last (fastest varying) positions (-1, -2, ...). We # set kwargs['axis']=-1 (actually we use the +ve integer # equivalent of -1) if there is more then one collapse # axis because, in this case (i.e. reshape is True), we # will reshape everything. kwargs['axis'] = ndim - n_collapse_axes masked = False sub_samples = 0 pda_args = data.pda_args(revert_to_file=True) #, readonly=True) for i, partition in enumerate(data.partitions.matrix.flat): array = partition.dataarray(**pda_args) p_masked = partition.masked if p_masked: masked = True if array.mask.all(): # The array is all missing data partition.close() continue # Still here? Then there are some non-missing sub-array # elements. if weights is not None: w = self._collapse_create_weights(array, partition.indices, indices, master_shape, weights, n_non_collapse_axes, n_collapse_axes) wmin = w.min() if wmin < 0: raise ValueError("Can't collapse with negative weights") if wmin == 0: # Mask the array where the weights are zero array = numpy_ma_masked_where(w==0, array, copy=True) if array.mask.all(): # The array is all missing data partition.close() continue #--- End: if kwargs['weights'] = w #--- End: if partition.close() if reshape: # At least two, but not all, axes are to be collapsed # => we need to reshape the array and the weights. shape = array.shape ndim = array.ndim new_shape = shape[:n_non_collapse_axes] new_shape += (reduce(operator_mul, shape[n_non_collapse_axes:]),) array = numpy_reshape(array.copy(), new_shape) if weights is not None: w = kwargs['weights'] if w.ndim < ndim: # The weights span only collapse axes (as # opposed to spanning all axes) new_shape = (w.size,) kwargs['weights'] = numpy_reshape(w, new_shape) #--- End: if p_out = func(array, masked=p_masked, **kwargs) if out is None: if data.partitions.size == i + 1: # There is exactly one partition so we are done out = p_out break out = fpartial(p_out) else: out = fpartial(out, p_out) sub_samples += 1 #--- End: for if out is not None: # Finalise N, out = ffinalise(out, sub_samples) out = self._collapse_mask(out, masked, N, Nmax, mtol) else: # no data - retrun all masked out = numpy_ma_masked_all(data.shape[:n_non_collapse_axes], data.dtype) return out #--- End: def @staticmethod def _collapse_mask(array, masked, N, Nmax, mtol): ''' :Parameters: array : numpy array masked : bool N : numpy array-like Nmax : int mtol : numpy array-like :Returns: out : numpy array ''' if masked and mtol < 1: x = N < (1-mtol)*Nmax if x.any(): # array = numpy_ma_where(x, numpy_ma_masked, array) array = numpy_ma_masked_where(x, array, copy=False) #--- End: if return array #--- End: def @staticmethod def _collapse_create_weights(array, indices, master_indices, master_shape, master_weights, n_non_collapse_axes, n_collapse_axes): ''' :Parameters: array : numpy array indices : tuple master_indices : tuple master_shape : tuple master_weights : dict n_non_collapse_axes : int The number of array axes which are not being collapsed. It is assumed that they are in the slowest moving positions. n_collapse_axes : int The number of array axes which are being collapsed. It is assumed that they are in the fastest moving positions. :Returns: out : numpy array or None :Examples: ''' array_shape = array.shape array_ndim = array.ndim weights_indices = [] for master_index, index, size in izip(master_indices, indices, master_shape): start , stop , step = master_index.indices(size) size1, mod = divmod(stop-start-1, step) start1, stop1, step1 = index.indices(size1+1) size2, mod = divmod(stop1-start1, step1) if mod != 0: size2 += 1 start += start1 * step step *= step1 stop = start + (size2-1)*step + 1 weights_indices.append(slice(start, stop, step)) #--- End: for base_shape = (1,) * array_ndim masked = False zero_weights = False weights = [] for key, weight in master_weights.iteritems(): shape = list(base_shape) index = [] for i in key: shape[i] = array_shape[i] index.append(weights_indices[i]) #--- End: for weight = weight[tuple(index)].unsafe_array zero_weights = zero_weights or (weight.min() <= 0) masked = masked or numpy_ma_isMA(weight) if weight.ndim != array_ndim: # Make sure that the weight has the same number of # dimensions as the array weight = weight.reshape(shape) weights.append(weight) #--- End: for weights_out = weights[0] if len(weights) > 1: # There are two or more weights, so create their product # (can't do this in-place because of broadcasting woe) for w in weights[1:]: weights_out = weights_out * w weights_out_shape = weights_out.shape if (not masked and weights_out_shape[:n_non_collapse_axes] == base_shape[:n_non_collapse_axes]): # The input weights are not masked and only span collapse axes weights_out = weights_out.reshape(weights_out_shape[n_non_collapse_axes:]) if weights_out_shape[n_non_collapse_axes:] != array_shape[n_non_collapse_axes:]: # The input weights span some, but not all, of the # collapse axes, so broadcast the weights over all # collapse axes weights_out = broadcast_array(weights_out, array_shape[n_non_collapse_axes:]) else: if weights_out_shape != array_shape: # Either a) The input weights span at least one # non-collapse axis, so broadcast the weights over all # axes or b) The weights contain masked values weights_out = broadcast_array(weights_out, array_shape) if masked and numpy_ma_isMA(array): if not (array.mask | weights_out.mask == array.mask).all(): raise ValueError("weights mask is duff") return weights_out #--- End: def def _collapse_optimize_weights(self, weights): ''' Optimise when weights span only non-partitioned axes. weights : dict ''' non_partitioned_axes = set(self._axes).difference(self._pmaxes) x = [] new_key = () for key in weights: if non_partitioned_axes.issuperset(key): x.append(key) new_key += key #--- End: for if len(x) > 1: reshaped_weights = [] for key in x: w = weights.pop(key) w = w.array shape = [(w.shape[key.index(axis)] if axis in key else 1) for axis in new_key] w = w.reshape(shape) reshaped_weights.append(w) #--- End: for # Create their product new_weight = reshaped_weights[0] for w in reshaped_weights[1:]: new_weight = new_weight * w weights[new_key] = type(self)(new_weight) #--- End: if return weights #--- End: def def _new_axis_identifier(self, existing_axes=None): ''' Return an axis name not being used by the data array. The returned axis name will also not be referenced by partitions of the partition matrix. :Parameters: existing_axes : sequence of str, optional :Returns: out : str The new axis name. :Examples: >>> d._all_axis_names() ['dim1', 'dim0'] >>> d._new_axis_identifier() 'dim2' >>> d._all_axis_names() ['dim1', 'dim0', 'dim3'] >>> d._new_axis_identifier() 'dim4' >>> d._all_axis_names() ['dim5', 'dim6', 'dim7'] >>> d._new_axis_identifier() 'dim3' ''' if existing_axes is None: existing_axes = self._all_axis_names() n = len(existing_axes) axis = 'dim%d' % n while axis in existing_axes: n += 1 axis = 'dim%d' % n #--- End: while return axis #--- End: def # ---------------------------------------------------------------- # Attribute # ---------------------------------------------------------------- @property def Units(self): ''' The `cf.Units` object containing the units of the data array. Deleting this attribute is equivalent to setting it to an undefined units object, so this attribute is guaranteed to always exist. :Examples: >>> d.Units = cf.Units('m') >>> d.Units <CF Units: m> >>> del d.Units >>> d.Units <CF Units: > ''' return self._Units #--- End: def @Units.setter def Units(self, value): dtype = self.dtype if dtype is not None: if dtype.kind == 'i': char = dtype.char if char == 'i': old_units = getattr(self, '_Units', None) if old_units is not None and not old_units.equals(value): self.dtype = 'float32' elif char == 'l': old_units = getattr(self, '_Units', None) if old_units is not None and not old_units.equals(value): self.dtype = float #-- End: if self._Units = value #--- End: def @Units.deleter def Units(self): self._Units = _units_None # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def data(self): ''' The data array object as an object identity. :Examples: >>> d.data is d True ''' return self #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def Data(self): ''' The data array object as an object identity. :Examples: >>> d.Data is d True ''' return self #--- End: def # ---------------------------------------------------------------- # Attribute # ---------------------------------------------------------------- @property def dtype(self): ''' The `numpy` data type of the data array. By default this is the data type with the smallest size and smallest scalar kind to which all sub-arrays of the master data array may be safely cast without loss of information. For example, if the sub-arrays have data types 'int64' and 'float32' then the master data array's data type will be 'float64'; or if the sub-arrays have data types 'int64' and 'int32' then the master data array's data type will be 'int64'. Setting the data type to a `numpy.dtype` object, or any object convertible to a `numpy.dtype` object, will cause the master data array elements to be recast to the specified type at the time that they are next accessed, and not before. This does not immediately change the master data array elements, so, for example, reinstating the original data type prior to data access results in no loss of information. Deleting the data type forces the default behaviour. Note that if the data type of any sub-arrays has changed after `dtype` has been set (which could occur if the data array is accessed) then the reinstated default data type may be different to the data type prior to `dtype` being set. :Examples: >>> d = cf.Data([0.5, 1.5, 2.5]) >>> d.dtype dtype(float64') >>> type(d.dtype) <type 'numpy.dtype'> >>> d = cf.Data([0.5, 1.5, 2.5]) >>> import numpy >>> d.dtype = numpy.dtype(int) >>> print d.array [0 1 2] >>> d.dtype = bool >>> print d.array [False True True] >>> d.dtype = 'float64' >>> print d.array [ 0. 1. 1.] >>> d = cf.Data([0.5, 1.5, 2.5]) >>> d.dtype = int >>> d.dtype = bool >>> d.dtype = float >>> print d.array [ 0.5 1.5 2.5] ''' datatype = self._dtype if datatype is None: flat = self.partitions.matrix.flat datatype = flat.next().subarray.dtype for partition in flat: datatype = numpy_result_type(datatype, partition.subarray) self._dtype = datatype #--- End: if return datatype #--- End: def @dtype.setter def dtype(self, value): self._dtype = numpy_dtype(value) #--- End: def @dtype.deleter def dtype(self): self._dtype = None #--- End: def # ---------------------------------------------------------------- # Attribute # ---------------------------------------------------------------- @property def fill_value(self): ''' The data array missing data value. If set to None then the default numpy fill value appropriate to the data array's data type will be used. Deleting this attribute is equivalent to setting it to None, so this attribute is guaranteed to always exist. :Examples: >>> d.fill_value = 9999.0 >>> d.fill_value 9999.0 >>> del d.fill_value >>> d.fill_value None ''' return self._fill_value #--- End: def @fill_value.setter def fill_value(self, value): self._fill_value = value @fill_value.deleter def fill_value(self) : self._fill_value = None # ---------------------------------------------------------------- # Attribute # ---------------------------------------------------------------- @property def hardmask(self): ''' Whether the mask is hard (True) or soft (False). When the mask is hard, masked entries of the data array can not be unmasked by assignment, but unmasked entries may still be masked. When the mask is soft, masked entries of the data array may be unmasked by assignment and unmasked entries may be masked. By default, the mask is hard. :Examples: >>> d.hardmask = False >>> d.hardmask False ''' return self._hardmask @hardmask.setter def hardmask(self, value): self._hardmask = value @hardmask.deleter def hardmask(self): raise AttributeError("Won't delete %s attribute 'hardmask'" % self.__class__.__name__) #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def ismasked(self): ''' True if the data array has any masked values. :Examples: >>> d = cf.Data([[1, 2, 3], [4, 5, 6]]) >>> print d.ismasked False >>> d[0, ...] = cf.masked >>> d.ismasked True ''' pda_args = self.pda_args(revert_to_file=True) #, readonly=True) for partition in self.partitions.matrix.flat: array = partition.dataarray(**pda_args) if partition.masked: partition.close() return True partition.close() #--- End: for return False #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def ispartitioned(self): ''' True if the data array is partitioned. :Examples: >>> d._pmsize 1 >>> d.ispartitioned False >>> d._pmsize 2 >>> d.ispartitioned False ''' return self._pmsize > 1 #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def isscalar(self): ''' True if the data array is a 0-d scalar array. :Examples: >>> d.ndim 0 >>> d.isscalar True >>> d.ndim >= 1 True >>> d.isscalar False ''' return not self._ndim #--- End: def # @property # def max(self): # ''' # #The maximum of the array. # #``d.max`` is equivalent to ``d.max()``. # #.. seealso `max`, `min` # #:Examples: # #>>> d = cf.Data([[4, 5, 6], [1, 2, 3]], 'metre') #>>> d.max #<CF Data: 6 metre> #>> d.max.datum() #6 # #''' # return self.max(keepdims=False) # pda_args = self.pda_args(revert_to_file=True, # readonly=True) # # flat = self.partitions.matrix.flat # # partition = flat.next() # array = partition.dataarray(**pda_args) # m = numpy_amax(array) # partition.close() # # for partition in flat: # array = partition.dataarray(**pda_args) # m = max(m, numpy_amax(array)) # partition.close() # #--- End: for # # if m is numpy_ma_masked: # m = numpy_ma_array(0, mask=True, dtype=array.dtype) # # return type(self)(m, self.Units) #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def nbytes(self): ''' Total number of bytes consumed by the elements of the array. Does not include bytes consumed by the array mask :Examples: >>> d = cf.Data([[1, 1.5, 2]]) >>> d.dtype dtype('float64') >>> d.size, d.dtype.itemsize (3, 8) >>> d.nbytes 24 >>> d[0] = cf.masked >>> print d.array [[-- 1.5 2.0]] >>> d.nbytes 24 ''' return self._size * self.dtype.itemsize #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def ndim(self): ''' Number of dimensions in the data array. :Examples: >>> d.shape (73, 96) >>> d.ndim 2 >>> d.shape () >>> d.ndim 0 ''' return self._ndim #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def _pmaxes(self): ''' ''' return self.partitions.axes #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def _pmndim(self): ''' Number of dimensions in the partition matrix. :Examples: >>> d._pmshape (4, 7) >>> d._pmndim 2 >>> d._pmshape () >>> d._pmndim 0 ''' return self.partitions.ndim #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def _pmsize(self): ''' Number of partitions in the partition matrix. :Examples: >>> d._pmshape (4, 7) >>> d._pmsize 28 >>> d._pmndim 0 >>> d._pmsize 1 ''' return self.partitions.size #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def _pmshape(self): ''' Tuple of the partition matrix's dimension sizes. :Examples: >>> d._pmshape (4, 7) >>> d._pmndim 0 >>> d._pmshape () ''' return self.partitions.shape #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def shape(self): ''' Tuple of the data array's dimension sizes. :Examples: >>> d.shape (73, 96) >>> d.ndim 0 >>> d.shape () ''' return self._shape #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def size(self): ''' Number of elements in the data array. :Examples: >>> d.shape (73, 96) >>> d.size 7008 >>> d.shape (1, 1, 1) >>> d.size 1 >>> d.ndim 0 >>> d.shape () >>> d.size 1 ''' return self._size #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def array(self): ''' A numpy array copy the data array. If the data array is stored as date-time objects then a numpy array of numeric reference times will be returned. A numpy array of date-time objects may be returned by the `dtarray` attribute. .. seealso:: `dtarray`, `dtvarray`, `varray` :Examples: >>> a = d.array >>> isinstance(a, numpy.ndarray) True ''' pda_args = self.pda_args(revert_to_file=True) #, update=False) out_data_type = self.dtype units = self.Units # isdt = [] # print '_dtarray _isdt=', getattr(self, '_dtarray', False), self._isdt _dtarray = getattr(self, '_dtarray', False) if _dtarray: del self._dtarray # if not self._isdt: out_data_type = _dtype_object # pda_args['func'] = rt2dt # # Turn off data type checking and partition updating # pda_args['dtype'] = None # pda_args['update'] = False elif self._isdt: out_data_type = numpy_dtype(float) # pda_args['func'] = dt2rt # # Turn off data type checking and partition updating # pda_args['dtype'] = None # pda_args['update'] = False #--- End: if partitions = self.partitions # Still here? array_out = numpy_empty(self._shape, dtype=out_data_type) masked = False if not self.ndim: # -------------------------------------------------------- # array_out is a scalar array so index it with Ellipsis # (as opposed to the empty tuple which would be returned # from partition.indices). This prevents bad behaviour # when p_array is a numpy array of objects (e.g. data-time # objects). # -------------------------------------------------------- partition = partitions.matrix[()] p_array = partition.dataarray(**pda_args) # copy okect? # isdt.append(partition.isdt) if _dtarray: if not partition.isdt: # Convert the partition subarray to an array # of date-time objects p_array = rt2dt(p_array, units) elif partition.isdt: # Convert the partition subarray to an array of # reference time floats p_array = dt2rt(p_array, None, units) if not masked and partition.masked: array_out = array_out.view(numpy_ma_MaskedArray) array_out.set_fill_value(self._fill_value) masked = True array_out[...] = p_array partition.close() else: # -------------------------------------------------------- # array_out is not a scalar array, so it can safely be # indexed with partition.indices in all cases. # -------------------------------------------------------- for partition in partitions.matrix.flat: p_array = partition.dataarray(**pda_args) # isdt.append(partition.isdt) if _dtarray: if not partition.isdt: # Convert the partition subarray to an array # of date-time objects p_array = rt2dt(p_array, units) elif partition.isdt: # print p_array # Convert the partition subarray to an array of # reference time floats p_array = dt2rt(p_array, None, units) # print p_array # copy okect? if not masked and partition.masked: array_out = array_out.view(numpy_ma_MaskedArray) array_out.set_fill_value(self._fill_value) masked = True array_out[partition.indices] = p_array partition.close() #--- End: for if masked and self._hardmask: # Harden the mask of the output array array_out.harden_mask() # if isdt and all(isdt): # self.dtype = _dtype_object # self._isdt = True return array_out #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def dtarray(self): ''' An independent numpy array of date-time objects. Only applicable to data arrays with reference time units. If the calendar has not been set then the CF default calendar will be used and the units will be updated accordingly. The data type of the data array is unchanged. .. seealso:: `array`, `asdatetime`, `asreftime`, `dtvarray`, `varray` :Examples: ''' if not self.Units.isreftime: raise ValueError("Can't create date-time array from units %r" % self.Units) self._dtarray = True return self.array #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def dtvarray(self): ''' A numpy array view the data array converted to date-time objects. Only applicable for reference time units. If the calendar has not been set then the CF default calendar will be used and the units will be updated accordingly. .. seealso:: `array`, `asdatetime`, `asreftime`, `dtarray`, `varray` ''' if not self.Units.isreftime: raise ValueError("Can't create date-time array with units %r" % self.Units) self._dtarray = True return self.varray #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def unsafe_array(self): ''' A numpy array of the data array. It is unsafe because it might (or might not) be view to the master data array - you just don't know. So changing the returned array in place might have dire consequences. It is useful because if you are 100% certain that you're not going to change the returned array in place then this method may be much faster than the `array` method. Why not just use the `varray` method? Well, you might not want to destroy the partition matrix. The data type of the array is as returned by the `dtype` attribute. :Examples: >>> a = d.unsafe_array >>> isinstance(a, numpy.ndarray) True ''' # partitions = self.partitions # # if partitions.size == 1: # # If there is only one partition we can speed things up # pda_args = self.pda_args(revert_to_file=True) # partition = partitions.matrix.item() # array_out = partition.dataarray(readonly=True, **pda_args) # partition.close() # return array_out # #--- End: if # # # Still here? return self.array #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def varray(self): ''' A numpy array view the data array. Note that making changes to elements of the returned view changes the underlying data. If the data array is stored as date-time objects then a numpy array view of numeric reference times will be returned. In this case, a numpy array view of date-time objects may be created with the `dtvarray` attribute. .. seealso:: `array`, `dtarray`, `dtvarray` :Examples: >>> a = d.varray >>> type(a) <type 'numpy.ndarray'> >>> a array([0, 1, 2, 3, 4]) >>> a[0] = 999 >>> d.varray array([999, 1, 2, 3, 4]) ''' pda_args = self.pda_args(keep_in_memory=True) data_type = self.dtype if getattr(self, '_dtarray', False): isdt = True del self._dtarray if not self._isdt: data_type = _dtype_object pda_args['func'] = rt2dt # Turn off data type checking and partition updating pda_args['dtype'] = None elif self._isdt: isdt = False data_type = numpy_dtype(float) pda_args['func'] = dt2rt # Turn off data type checking and partition updating pda_args['dtype'] = None else: isdt = False #--- End: if if self.partitions.size == 1: # If there is only one partition, then we can return a # view of the partition's data array without having to # create an empty array and then filling it up partition # by partition. partition = self.partitions.matrix.item() array = partition.dataarray(**pda_args) # Note that there is no need to close the partition here. self._dtype = data_type self._isdt = isdt # Flip to []? return array #--- End: if # Still here? # pda_args['dtype'] = None ## WHY????? for speed!!! shape = self._shape array = numpy_empty(shape, dtype=data_type) masked = False for partition in self.partitions.matrix.flat: data = partition.dataarray(readonly=True, **pda_args) if not masked and partition.masked: array = array.view(numpy_ma_MaskedArray) array.set_fill_value(self._fill_value) masked = True array[partition.indices] = data # Note that there is no need to close the partition here #--- End: for if masked: if self._hardmask: # Harden the mask of the output array array.harden_mask() #--- End: if matrix = _xxx.copy() #numpy_empty((), dtype=object) matrix[()] = Partition(subarray = array, location = [(0, n) for n in shape], axes = self._axes, flip = [], shape = list(shape), Units = self.Units, part = [] ) self.partitions = PartitionMatrix(matrix, []) self._dtype = data_type self._isdt = isdt self._flip = [] return array #--- End: def @property def mask(self): ''' The boolean missing data mask of the data array. The boolean mask has True where the data array has missing data and False otherwise. :Examples: >>> d.shape (12, 73, 96) >>> m = d.mask >>> m <CF Data: > >>> m.dtype dtype('bool') >>> m.shape (12, 73, 96]) ''' self.to_memory() mask = self.copy() mask_units = _units_None pda_args = mask.pda_args( keep_in_memory=self.fits_in_memory(numpy_dtype(bool).itemsize)) for partition in mask.partitions.matrix.flat: array = partition.dataarray(**pda_args) if partition.masked: # data is a masked array partition.subarray = array.mask.copy() else: # data is not a masked array partition.subarray = numpy_zeros(array.shape, dtype=bool) partition.Units = mask_units partition.close() #--- End: for mask.Units = mask_units mask.dtype = bool mask._hardmask = False return mask #--- End: def @staticmethod
[docs] def mask_fpe(*arg): ''' Masking of floating-point errors in the results of arithmetic operations. If masking is allowed then only floating-point errors which would otherwise be raised as `FloatingPointError` exceptions are masked. Whether `FloatingPointError` exceptions may be raised is determined by `cf.Data.seterr`. If called without an argument then the current behaviour is returned. Note that if the raising of `FloatingPointError` exceptions has suppressed then invalid values in the results of arithmetic operations may be subsequently converted to masked values with the `mask_invalid` method. .. seealso:: `cf.Data.seterr`, `mask_invalid` :Parameters: arg : bool, optional The new behaviour. True means that `FloatingPointError` exceptions are suppressed and replaced with masked values. False means that `FloatingPointError` exceptions are raised. The default is not to change the current behaviour. :Returns: out : bool The behaviour prior to the change, or the current behaviour if no new value was specified. :Examples: >>> d = cf.Data([0., 1]) >>> e = cf.Data([1., 2]) >>> old = cf.Data.mask_fpe(False) >>> old = cf.Data.seterr('raise') >>> e/d FloatingPointError: divide by zero encountered in divide >>> e**123456 FloatingPointError: overflow encountered in power >>> old = cf.Data.mask_fpe(True) >>> old = cf.Data.seterr('raise') >>> e/d <CF Data: [--, 2.0] > >>> e**123456 <CF Data: [1.0, --] > >>> old = cf.Data.mask_fpe(True) >>> old = cf.Data.seterr('ignore') >>> e/d <CF Data: [inf, 2.0] > >>> e**123456 <CF Data: [1.0, inf] > ''' old = _mask_fpe[0] if arg: _mask_fpe[0] = bool(arg[0]) return old
#--- End: def @staticmethod
[docs] def seterr(all=None, divide=None, over=None, under=None, invalid=None): ''' Set how floating-point errors in the results of arithmetic operations are handled. The options for handling floating-point errors are: ============ ======================================================== Treatment Action ============ ======================================================== ``'ignore'`` Take no action. Allows invalid values to occur in the result data array. ``'warn'`` Print a `RuntimeWarning` (via the Python `warnings` module). Allows invalid values to occur in the result data array. ``'raise'`` Raise a `FloatingPointError` exception. ============ ======================================================== The different types of floating-point errors are: ================= ================================= ================= Error Description Default treatment ================= ================================= ================= Division by zero Infinite result obtained from ``'warn'`` finite numbers. Overflow Result too large to be expressed. ``'warn'`` Invalid operation Result is not an expressible ``'warn'`` number, typically indicates that a NaN was produced. Underflow Result so close to zero that some ``'ignore'`` precision was lost. ================= ================================= ================= Note that operations on integer scalar types (such as int16) are handled like floating point, and are affected by these settings. If called without any arguments then the current behaviour is returned. .. seealso:: `cf.Data.mask_fpe`, `mask_invalid` :Parameters: all : str, optional Set the treatment for all types of floating-point errors at once. The default is not to change the current behaviour. divide : str, optional Set the treatment for division by zero. The default is not to change the current behaviour. over : str, optional Set the treatment for floating-point overflow. The default is not to change the current behaviour. under : str, optional Set the treatment for floating-point underflow. The default is not to change the current behaviour. invalid : str, optional Set the treatment for invalid floating-point operation. The default is not to change the current behaviour. :Returns: out : dict The behaviour prior to the change, or the current behaviour if no new values are specified. :Examples: Set treatment for all types of floating-point errors to ``'raise'`` and then reset to the previous behaviours: >>> cf.Data.seterr() {'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'} >>> old = cf.Data.seterr('raise') >>> cf.Data.seterr(**old) {'divide': 'raise', 'invalid': 'raise', 'over': 'raise', 'under': 'raise'} >>> cf.Data.seterr() {'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'} Set the treatment of division by zero to ``'ignore'`` and overflow to ``'warn'`` without changing the treatment of underflow and invalid operation: >>> cf.Data.seterr(divide='ignore', over='warn') {'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'} >>> cf.Data.seterr() {'divide': 'ignore', 'invalid': 'warn', 'over': 'ignore', 'under': 'ignore'} Some examples with data arrays: >>> d = cf.Data([0., 1]) >>> e = cf.Data([1., 2]) >>> old = cf.Data.seterr('ignore') >>> e/d <CF Data: [inf, 2.0] > >>> e**12345 <CF Data: [1.0, inf] > >>> cf.Data.seterr(divide='warn') {'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'} >>> e/d RuntimeWarning: divide by zero encountered in divide <CF Data: [inf, 2.0] > >>> e**12345 <CF Data: [1.0, inf] > >>> old = cf.Data.mask_fpe(False) >>> cf.Data.seterr(over='raise') {'divide': 'warn', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'} >>> e/d RuntimeWarning: divide by zero encountered in divide <CF Data: [inf, 2.0] > >>> e**12345 FloatingPointError: overflow encountered in power >>> cf.Data.mask_fpe(True) False >>> cf.Data.seterr(divide='ignore') {'divide': 'warn', 'invalid': 'ignore', 'over': 'raise', 'under': 'ignore'} >>> e/d <CF Data: [inf, 2.0] > >>> e**12345 <CF Data: [1.0, --] > ''' old = _seterr.copy() if all: _seterr.update({'divide' : all, 'invalid': all, 'under' : all, 'over' : all}) if all == 'raise': _seterr_raise_to_ignore.update({'divide' : 'ignore', 'invalid': 'ignore', 'under' : 'ignore', 'over' : 'ignore'}) else: if divide: _seterr['divide'] = divide if divide == 'raise': _seterr_raise_to_ignore['divide'] = 'ignore' if over: _seterr['over'] = over if over == 'raise': _seterr_raise_to_ignore['over'] = 'ignore' if under: _seterr['under'] = under if under == 'raise': _seterr_raise_to_ignore['under'] = 'ignore' if invalid: _seterr['invalid'] = invalid if invalid == 'raise': _seterr_raise_to_ignore['invalid'] = 'ignore' #--- End: if return old
#--- End: def
[docs] def add_partitions(self, extra_boundaries, pdim): ''' Add partition boundaries. :Parameters: extra_boundaries : list of int The boundaries of the new partitions. pdim : str The name of the axis to have the new partitions. :Returns: None :Examples: >>> d.add_partitions( ) ''' self.partitions.add_partitions(self._axes, self._flip, extra_boundaries, pdim)
#--- End: def
[docs] def all(self): ''' Test whether all data array elements evaluate to True. Performs a logical ``and`` over the data array and returns the result. Masked values are considered as True during computation. .. seealso:: `allclose`, `any`, `isclose` :Returns: out : bool Whether or not all data array elements evaluate to True. :Examples: >>> d = cf.Data([[1, 3, 2]]) >>> print d.array [[1 3 2]] >>> d.all() True >>> d[0, 2] = cf.masked >>> print d.array [[1 3 --]] >>> d.all() True >>> d[0, 0] = 0 >>> print d.array [[0 3 --]] >>> d.all() False >>> d[...] = cf.masked >>> print d.array [[-- -- --]] >>> d.all() True ''' pda_args = self.pda_args(revert_to_file=True) #, readonly=True) for partition in self.partitions.matrix.flat: array = partition.dataarray(**pda_args) a = array.all() if not a and a is not numpy_ma_masked: partition.close() return False partition.close() #--- End: for return True
#--- End: def
[docs] def allclose(self, y, rtol=None, atol=None): ''' Returns True if two broadcastable arrays have equal values, False otherwise. For numeric data arrays ``d.allclose(y, rtol, atol)`` is equivalent to ``(abs(d - y) <= atol + rtol*abs(y)).all()``, otherwise it is equivalent to ``(d == y).all()``. .. seealso:: `all`, `any`, `isclose` :Parameters: y : data_like atol : float, optional The absolute tolerance for all numerical comparisons, By default the value returned by the `ATOL` function is used. rtol : float, optional The relative tolerance for all numerical comparisons, By default the value returned by the `RTOL` function is used. :Returns: out : bool :Examples: >>> d = cf.Data([1000, 2500], 'metre') >>> e = cf.Data([1, 2.5], 'km') >>> d.allclose(e) True >>> d = cf.Data(['ab', 'cdef']) >>> d.allclose([[['ab', 'cdef']]]) True >>> d.allclose(e) True >>> d = cf.Data([[1000, 2500], [1000, 2500]], 'metre') >>> e = cf.Data([1, 2.5], 'km') >>> d.allclose(e) True >>> d = cf.Data([1, 1, 1], 's') >>> d.allclose(1) True ''' return self.isclose(y, rtol=rtol, atol=atol).all()
#--- End: def
[docs] def any(self): ''' Test whether any data array elements evaluate to True. Performs a logical or over the data array and returns the result. Masked values are considered as False during computation. .. seealso:: `all`, `allclose`, `isclose` :Examples: >>> d = cf.Data([[0 0 0]]) >>> d.any() False >>> d[0, 0] = cf.masked >>> print d.array [[-- 0 0]] >>> d.any() False >>> d[0, 1] = 3 >>> print d.array [[0 3 0]] >>> d.any() True >>> print d.array [[-- -- --]] >>> d.any() False ''' pda_args = self.pda_args(revert_to_file=True) #, readonly=True) for partition in self.partitions.matrix.flat: array = partition.dataarray(**pda_args) if array.any(): partition.close() return True partition.close() #--- End: for return False
#--- End: def
[docs] def max(self, axes=None, squeeze=False, mtol=1, i=False): ''' Collapse axes with their maximum. Missing data array elements are omitted from the calculation. :Parameters: axes : (sequence of) int, optional squeeze : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. .. seealso:: `min`, `mean`, `mid_range`, `sum`, `sd`, `var` :Examples: ''' # if weights is not None: # raise ValueError("can't weight max: %s" % weights)# return self._collapse(max_f, max_fpartial, max_ffinalise, axes=axes, squeeze=squeeze, mtol=mtol, i=i)
#--- End: def
[docs] def min(self, axes=None, squeeze=False, mtol=1, i=False): ''' Collapse axes with their minimum. Missing data array elements are omitted from the calculation. :Parameters: axes : (sequence of) int, optional squeeze : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. .. seealso:: `max`, `mean`, `mid_range`, `sum`, `sd`, `var` :Examples: ''' return self._collapse(min_f, min_fpartial, min_ffinalise, axes=axes, squeeze=squeeze, mtol=mtol, i=i)
#--- End: def
[docs] def mean(self, axes=None, squeeze=False, mtol=1, weights=None, i=False): r''' Collapse axes with their weighted mean. The weighted mean, :math:`\mu`, for array elements :math:`x_i` and corresponding weights elements :math:`w_i` is .. math:: \mu=\frac{\sum w_i x_i}{\sum w_i} Missing data array elements and their corresponding weights are omitted from the calculation. :Parameters: axes : (sequence of) int, optional The axes to be collapsed. By default flattened input is used. Each axis is identified by its integer position. No axes are collapsed if *axes* is an empty sequence. squeeze : bool, optional If True then collapsed axes are removed. By default the axes which are collapsed are left in the result as axes with size 1, meaning that the result is guaranteed to broadcast correctly against the original array. weights : data-like or dict, optional Weights associated with values of the array. By default all non-missing elements of the array are assumed to have a weight equal to one. If *weights* is a data-like object then it must have either the same shape as the array or, if that is not the case, the same shape as the axes being collapsed. If *weights* is a dictionary then each key is axes of the array (an int or tuple of ints) with a corresponding data-like value of weights for those axes. In this case, the implied weights array is the outer product of the dictionary's values. Example: If ``weights={1: w, (2, 0): x}`` then ``w`` must contain 1-dimensionsal weights for axis 1 and ``x`` must contain 2-dimensionsal weights for axes 2 and 0. This is equivalent, for example, to ``weights={(1, 2, 0), y}``, where ``y`` is the outer product of ``w`` and ``x``. If ``axes=[1, 2, 0]`` then ``weights={(1, 2, 0), y}`` is equivalent to ``weights=y``. If ``axes=None`` and the array is 3-dimensionsal then ``weights={(1, 2, 0), y}`` is equivalent to ``weights=y.transpose([2, 0, 1])``. mtol : number, optional For each element in the output data array, the fraction of contributing input array elements which is allowed to contain missing data. Where this fraction exceeds *mtol*, missing data is returned. The default is 1, meaning a missing datum in the output array only occurs when its contributing input array elements are all missing data. A value of 0 means that a missing datum in the output array occurs whenever any of its contributing input array elements are missing data. Any intermediate value is permitted. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. .. seealso:: `max`, `min`, `mid_range`, `range`, `sum`, `sd`, `var` :Examples: >>> d = cf.Data([[1, 2, 4], [1, 4, 9]], 'm') >>> print d.array [[1 2 4] [1 4 9]] >>> d.mean() <CF Data: 3.5 m> >>> d.mean(squeeze=True) <CF Data: [[3.5]] m> >>> d.mean(axes=[0, 1]) <CF Data: 3.5 m> >>> d.mean(axes=[1, 0]) <CF Data: 3.5 m> >>> print d.mean(axes=0).array [ 1. 3. 6.5] >>> print d.mean(axes=1).array [ 2.33333333 4.66666667] >>> d.mean(axes=1, squeeze=True) [[ 2.33333333] [ 4.66666667]] >>> y = cf.Data([1, 3]) >>> x = cf.Data([1, 2, 1]) >>> w = cf.expand_dims(y, 1) * x >>> print w.array [[1 2 1] [3 6 3]] >>> d.mean(weights=w) <CF Data: 3.9375 m> >>> d.mean(weights={(0, 1): w}) <CF Data: 3.9375 m> >>> d.mean(axes=[0, 1], weights={(0, 1): w}) <CF Data: 3.9375 m> >>> d.mean(axes=[1, 0], weights={(0, 1): w}) <CF Data: 3.9375 m> >>> d.mean(axes=(0, 1), weights={1: x, 0: y}) <CF Data: 3.9375 m> >>> d.mean(axes=1, weights=w) <CF Data: [2.25, 4.5] m> >>> d.mean(axes=1, weights=x) <CF Data: [2.25, 4.5] m> >>> d.mean(axes=1, weights={1: x}) <CF Data: [2.25, 4.5] m> >>> d.mean(axes=1, weights={(0, 1): w}) <CF Data: [2.25, 4.5] m> >>> d.mean(axes=1, weights={0: y, (1,): x}) <CF Data: [2.25, 4.5] m> >>> d.mean(axes=1) <CF Data: [2.33333333333, 4.66666666667] m> >>> d.mean(axes=1, weights={0: y}) <CF Data: [2.33333333333, 4.66666666667] m> >>> e = cf.Data(numpy.arange(24).reshape(3, 2, 4)) >>> print e.array [[[ 0 1 2 3] [ 4 5 6 7]] [[ 8 9 10 11] [12 13 14 15]] [[16 17 18 19] [20 21 22 23]]] >>> e.mean(axes=[0, 2]) <CF Data: [9.5, 13.5] > >>> f = e.mean(axes=[0, 2], squeeze=True) >>> f <CF Data: [[[9.5, 13.5]]] > >>> f.shape (1, 2, 1) >>> print e.mean(axes=[0, 1]).array [ 10. 11. 12. 13.] >>> print e.mean(axes=[0, 1], weights={(1, 0): w}).array [ 11. 12. 13. 14.] >>> e[0, 0] = cf.masked >>> e[-1, -1] = cf.masked >>> e[..., 2] = cf.masked >>> print e.array [[[-- -- -- --] [4 5 -- 7]] [[8 9 -- 11] [12 13 -- 15]] [[16 17 -- 19] [-- -- -- --]]] >>> e.mean() <CF Data: 11.3333333333 > >>> print e.mean(axes=[0, 1]).array [10.0 11.0 -- 13.0] >>> print e.mean(axes=[0, 1], weights={(1, 0): w}).array [9.666666666666666 10.666666666666666 -- 12.666666666666666] ''' return self._collapse(mean_f, mean_fpartial, mean_ffinalise, axes=axes, squeeze=squeeze, weights=weights, mtol=mtol, i=i)
#--- End: def
[docs] def sample_size(self, axes=None, squeeze=False, mtol=1, i=False): r''' :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. ''' return self._collapse(sample_size_f, sample_size_fpartial, sample_size_ffinalise, axes=axes, squeeze=squeeze, weights=None, mtol=mtol, units=Units('1'), i=i)
#--- End: def @property def binary_mask(self): '''A binary (0 and 1) mask of the data array. The binary mask's data array comprises dimensionless 8-bit integers and has 0 where the data array has missing data and 1 otherwise. .. seealos:: `mask` :Returns: out : Data The binary mask. :Examples: >>> print d.mask.array [[ True False True False]] >>> b = d.binary_mask.array >>> print b [[0 1 0 1]] ''' self.to_memory() binary_mask = self.copy() pda_args = binary_mask.pda_args( keep_in_memory=self.fits_in_memory(numpy_dtype('int32').itemsize), dtype=None) _units_1 = _units_1 for partition in binary_mask.partitions.matrix.flat: array = partition.dataarray(**pda_args) if partition.masked: # data is masked partition.subarray = numpy_array(~array.mask, dtype='int32') else: # data is not masked partition.subarray = numpy_ones(array.shape, dtype='int32') partition.Units = _units_1 partition.close() #--- End: for binary_mask.Units = _units_1 return binary_mask #--- End: def
[docs] def clip(self, a_min, a_max, units=None, i=False): ''' Clip (limit) the values in the data array in place. Given an interval, values outside the interval are clipped to the interval edges. For example, if an interval of [0, 1] is specified then values smaller than 0 become 0 and values larger than 1 become 1. Parameters : a_min : scalar a_max : scalar units : str or Units i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: ''' if i: d = self else: d = self.copy() if units is not None: # Convert the limits to the same units as the data array units = Units(units) self_units = d.Units if self_units != units: a_min = Units.conform(a_min, units, self_units) a_max = Units.conform(a_max, units, self_units) #--- End: if pda_args = d.pda_args() for partition in d.partitions.matrix.flat: array = partition.dataarray(**pda_args) array.clip(a_min, a_max, out=array) partition.close() #--- End: if return d
#--- End: def @classmethod def asdata(cls, d, copy=False): '''Convert the input to a `cf.Data` object. :Parameters: d : data-like Input data in any form that can be converted to an cf.Data object. This includes `cf.Data` and `cf.Field` objects, numpy arrays and any object which may be converted to a numpy array. :Returns: out : cf.Data cf.Data interpretation of *d*. No copy is performed on the input. :Examples: >>> d = cf.Data([1, 2]) >>> cf.Data.asdata(d) is d True >>> d.asdata(d) is d True >>> cf.Data.asdata([1, 2]) <CF Data: [1, 2]> >>> cf.Data.asdata(numpy.array([1, 2])) <CF Data: [1, 2]> ''' data = getattr(d, '__data__', None) if data is None: return cls(d) data = data() if copy: return data.copy() else: return data #--- End: def
[docs] def close(self): ''' Close all files referenced by the data array. Note that a closed file will be automatically reopened if its contents are subsequently required. :Returns: None :Examples: >>> d.close() ''' for partition in self.partitions.matrix.flat: partition.file_close()
#--- End: def
[docs] def copy(self): ''' Return a deep copy. ``d.copy()`` is equivalent to ``copy.deepcopy(d)``. :Returns: out : The deep copy. :Examples: >>> e = d.copy() ''' new = Data.__new__(Data) ###dch change new.__dict__ = self.__dict__.copy() new.partitions = self.partitions.copy() return new
#--- End: def
[docs] def cos(self, i=False): ''' Take the trigonometric cosine of the data array in place. Units are accounted for in the calculation. If the units are not equivalent to radians (such as Kelvin) then they are treated as if they were radians. For example, the the cosine of 90 degrees_east is 0.0, as is the sine of 1.57079632 kg m-2. The Units are changed to '1' (nondimensionsal). :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d.Units <CF Units: degrees_east> >>> print d.array [[-90 0 90 --]] >>> d.cos() >>> d.Units <CF Units: 1> >>> print d.array [[0.0 1.0 0.0 --]] >>> d.Units <CF Units: m s-1> >>> print d.array [[1 2 3 --]] >>> d.cos() >>> d.Units <CF Units: 1> >>> print d.array [[0.540302305868 -0.416146836547 -0.9899924966 --]] ''' if i: d = self else: d = self.copy() if d.Units.equivalent(_units_radians): d.Units = _units_radians return d.func(numpy_cos, units=_units_1, i=True)
#--- End: def def count(self): '''Count the non-masked elements of the array !!!!!! along the given axis. :Returns: out : int :Examples: ''' pda_args = self.pda_args(revert_to_file=True) # , keep_in_memory=False) n = 0 for partition in self.partitions.matrix.flat: array = partition.dataarray(**pda_args) n += numpy_ma_count(array) # not this # partition.output = numpy_ma_count(array) # but this! or return n? partition.close() #--- End: for #if not parallel: # for p in parations: # _worker(p) # if ?? : break #else: # # return n #--- End: def def count_masked(self): ''' ''' return self._size - self.count() #--- End: def def cyclic(self, axes=None, iscyclic=True): ''' :Parameters: :Returns: out : set :Examples: ''' cyclic_axes = self._cyclic data_axes = self._axes old = set([data_axes.index(axis) for axis in cyclic_axes]) if axes is None: return old axes = [data_axes[i] for i in self._parse_axes(axes, 'cyclic')] if iscyclic: self._cyclic = cyclic_axes.union(axes) else: self._cyclic = cyclic_axes.difference(axes) return old #--- End: def
[docs] def asreftime(self, i=False): ''' Change the internal representation of data array elements from datatime-like objects to numeric reference times. If the calendar has not been set then the default CF calendar will be used and the units' and the `calendar` attribute will be updated accordingly. If the internal representations are already numeric reference times then no change occurs. .. seealso:: `asdatetime` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d.asreftime() ''' if i: d = self else: d = self.copy() units = d.Units if not d._isdt: if units.isreftime: return d else: raise ValueError( "Can't convert %r data to numeric reference times" % units) #--- End: if pda_args = d.pda_args(func=dt2rt, dtype=None) for partition in d.partitions.matrix.flat: array = partition.dataarray(**pda_args) p_units = partition.Units partition.Units = Units(p_units.units, p_units._utime.calendar) partition.close() #--- End: for d.Units = Units(units.units, units._utime.calendar) d._dtype = array.dtype d._isdt = False return d
#--- End: def
[docs] def asdatetime(self, i=False): ''' Change the internal representation of data array elements from numeric reference times to datatime-like objects. If the calendar has not been set then the default CF calendar will be used and the units' and the `calendar` attribute will be updated accordingly. If the internal representations are already datatime-like objects then no change occurs. .. seealso:: `asreftime` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> f.asdatetime() ''' if i: d = self else: d = self.copy() units = self.Units if d._isdt: return d elif not units.isreftime: raise ValueError("Can't convert %r data to date-time objects" % units) pda_args = d.pda_args(func=rt2dt, dtype=None) for partition in d.partitions.matrix.flat: array = partition.dataarray(**pda_args) p_units = partition.Units partition.Units = Units(p_units.units, p_units._utime.calendar) partition.close() #--- End: for d.Units = Units(units.units, units._utime.calendar) d._dtype = array.dtype d._isdt = True return d
#--- End: def def _YMDhms(self, attr): ''' ''' def func(array, units_in, dummy0, dummy1): ''' The returned array is always independent. :Parameters: array : numpy array units_in : cf.Units dummy0 : Ignored. dummy1 : Ignored. :Returns: out : numpy array ''' if not self._isdt: array = rt2dt(array, units_in) return _array_getattr(array, attr) #--- End: def if not self.Units.isreftime: raise ValueError( "Can't get %ss from data with %r" % (attr, self.Units)) new = self.copy() new.Units = _units_None pda_args = new.pda_args(func=func, dtype=None) for partition in new.partitions.matrix.flat: array = partition.dataarray(**pda_args) new_dtype = array.dtype partition.close() #--- End: for new._dtype = new_dtype new._isdt = False return new #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def year(self): ''' The year of each data array element. Only applicable for reference time units. .. seealso:: `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` :Examples: >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d <CF Data: [[2000-12-30 22:19:12, 2001-01-03 04:04:48]] > >>> d.year <CF Data: [[2000, 2001]] > ''' return self._YMDhms('year') #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def month(self): ''' The month of each data array element. Only applicable for reference time units. .. seealso:: `~cf.Data.year`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` :Examples: >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d <CF Data: [[2000-12-30 22:19:12, 2001-01-03 04:04:48]] > >>> d.month <CF Data: [[12, 1]] > ''' return self._YMDhms('month') #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def day(self): ''' The day of each data array element. Only applicable for reference time units. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` :Examples: >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d <CF Data: [[2000-12-30 22:19:12, 2001-01-03 04:04:48]] > >>> d.day <CF Data: [[30, 3]] > ''' return self._YMDhms('day') #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def hour(self): ''' The hour of each data array element. Only applicable for reference time units. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.minute`, `~cf.Data.second` :Examples: >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d <CF Data: [[2000-12-30 22:19:12, 2001-01-03 04:04:48]] > >>> d.hour <CF Data: [[22, 4]] > ''' return self._YMDhms('hour') #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def minute(self): ''' The minute of each data array element. Only applicable for reference time units. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.second` :Examples: >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d <CF Data: [[2000-12-30 22:19:12, 2001-01-03 04:04:48]] > >>> d.minute <CF Data: [[19, 4]] > ''' return self._YMDhms('minute') #--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def second(self): ''' The second of each data array element. Only applicable for reference time units. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.minute` >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d <CF Data: [[2000-12-30 22:19:12, 2001-01-03 04:04:48]] > >>> d.second <CF Data: [[12, 48]] > ''' return self._YMDhms('second') #--- End: def
[docs] def unique(self): ''' The unique elements of the array. Returns a new object with the sorted unique elements in a one dimensional array. :Examples: >>> d = cf.Data([[4, 2, 1], [1, 2, 3]], 'metre') >>> d.unique() <CF Data: [1, 2, 3, 4] metre> >>> d[1, -1] = cf.masked >>> d.unique() <CF Data: [1, 2, 4] metre> ''' pda_args = self.pda_args(revert_to_file=True) #, readonly=True) u = [] for partition in self.partitions.matrix.flat: array = partition.dataarray(**pda_args) array = numpy_unique(array) if partition.masked: # Note that compressing a masked array may result in # an array with zero size array = array.compressed() size = array.size if size > 1: u.extend(array) elif size == 1: u.append(array.item()) partition.close() #--- End: for u = numpy_unique(numpy_array(u, dtype=self.dtype)) return type(self)(u, units=self.Units, dt=self._isdt)
#--- End: def
[docs] def dump(self, display=True, prefix=None): ''' Return a string containing a full description of the instance. :Parameters: display : bool, optional If False then return the description as a string. By default the description is printed, i.e. ``d.dump()`` is equivalent to ``print d.dump(display=False)``. prefix : str, optional Set the common prefix of component names. By default the instance's class name is used. :Returns: out : None or str A string containing the description. :Examples: ''' if prefix is None: prefix = self.__class__.__name__ string = ['%s.shape = %s' % (prefix, self._shape)] if self._size == 1: string.append('%s.first_datum = %s' % (prefix, self.datum(0))) else: string.append('%s.first_datum = %s' % (prefix, self.datum(0))) string.append('%s.last_datum = %s' % (prefix, self.datum(-1))) #-- End: if for attr in ('fill_Value', 'Units'): string.append('%s.%s = %r' % (prefix, attr, getattr(self, attr))) #--- End: for string = '\n'.join(string) if display: print string else: return string
#--- End: def # def equivalent(self, other, rtol=None, atol=None, traceback=False, # copy=True): # ''' # #True if and only if two data arrays are logically equivalent. # #Equivalence is defined as both data arrays being the same after #accounting for different but equivalent units.* units # #:Parameters: # # other : Data # # atol : float, optional # The absolute tolerance for all numerical comparisons, By # default the value returned by the `ATOL` function is used. # # rtol : float, optional # The relative tolerance for all numerical comparisons, By # default the value returned by the `RTOL` function is used. # #:Returns: # # out : bool # Whether or not the two variables are equivalent. # #''' # if self._shape != other._shape: # # add traceback # return # # if not self.Units.equivalent(other.Units): # # add traceback # return # ## if axis_map: ## axes = [axis_map[axis] for axis in self._axes] ## if axes == other._axes: ## axis_map = None ## ## if not axis_map and self._shape != other._shape: ## # add traceback ## return ## # self_Units = self.Units ## self_fill_value = self._fill_value ## if self_Units == other.Units and self_fill_value == other._fill_value: ## copy = False # # if self_Units != other.Units: # if copy: # other = other.copy() # copy = False # other.Units = self_Units # # ## if copy: ## other = other.copy() ## ## if axis_map: ## other.transpose(axes) ## ## other.Units = self_Units ## other._fill_value = self_fill_value # # return self.equals(other, rtol=rtol, atol=atol, ignore_fill_value=True, # traceback=False) # #--- End: def
[docs] def ndindex(self): ''' Return an iterator over the N-dimensional indices of the data array. At each iteration a tuple of indices is returned, the last dimension is iterated over first. :Returns: out : itertools.product An iterator over tuples of indices of the data array. :Examples: >>> d.shape (2, 1, 3) >>> for i in d.ndindex(): ... print i ... (0, 0, 0) (0, 0, 1) (0, 0, 2) (1, 0, 0) (1, 0, 1) (1, 0, 2) > d.shape () >>> for i in d.ndindex(): ... print i ... () ''' return itertools_product(*[xrange(0, r) for r in self._shape])
#--- End: def
[docs] def equals(self, other, rtol=None, atol=None, ignore_fill_value=False, traceback=False): ''' True if two data arrays are logically equal, False otherwise. :Parameters: other : The object to compare for equality. atol : float, optional The absolute tolerance for all numerical comparisons, By default the value returned by the `ATOL` function is used. rtol : float, optional The relative tolerance for all numerical comparisons, By default the value returned by the `RTOL` function is used. ignore_fill_value : bool, optional If True then data arrays with different fill values are considered equal. By default they are considered unequal. traceback : bool, optional If True then print a traceback highlighting where the two instances differ. :Returns: out : bool Whether or not the two instances are equal. :Examples: >>> d.equals(d) True >>> d.equals(d + 1) False ''' # Check each instance's id if self is other: return True # Check that each instance is the same type if self.__class__ != other.__class__: if traceback: print("%s: Different type: %s" % (self.__class__.__name__, other.__class__.__name__)) return False # Check that each instance has the same shape if self._shape != other._shape: if traceback: print("%s: Different shape: %s, %s" % (self.__class__.__name__, self._shape, other._shape)) return False #--- End: if # Check that each instance has the same units if self.Units != other.Units: if traceback: print("%s: Different Units (%r, %r)" % (self.__class__.__name__, self.Units, other.Units)) return False #--- End: if # Check that each instance has the same fill value if not ignore_fill_value and self._fill_value != other._fill_value: if traceback: print("%s: Different fill values (%s, %s)" % (self.__class__.__name__, self._fill_value, other._fill_value)) return False #--- End: if # ------------------------------------------------------------ # Check that each instance has equal array values # ------------------------------------------------------------ # Set default tolerances if rtol is None: rtol = RTOL() if atol is None: atol = ATOL() pda_args = self.pda_args(revert_to_file=True) #, readonly=True) other.to_memory() for partition in self.partitions.matrix.flat: array0 = partition.dataarray(**pda_args) array1 = other[partition.indices].varray partition.close() if not _numpy_allclose(array0, array1, rtol=rtol, atol=atol): if traceback: print("%s: Different data array values" % self.__class__.__name__) return False #--- End: for # ------------------------------------------------------------ # Still here? Then the two instances are equal. # ------------------------------------------------------------ return True
#--- End: def def exp(self, i=False): ''' Take the exponential of the data array. :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: ''' units = self.Units if units and not units.isdimensionless: raise ValueError( "Can't take exponential of dimensional quantities: {0!r}".format(units)) if i: d = self else: d = self.copy() if d.Units: d.Units = _units_1 return d.func(numpy_exp, i=True) #--- End: def
[docs] def expand_dims(self, position=0, i=False): ''' Expand the shape of the data array in place. Insert a new size 1 axis, corresponding to a given position in the data array shape. .. seealso:: `flip`, `squeeze`, `swapaxes`, `transpose` :Parameters: position : int, optional Specify the position that the new axis will have in the data array axes. By default the new axis has position 0, the slowest varying position. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: ''' # Parse position ndim = self._ndim if -ndim-1 <= position < 0: position += ndim + 1 elif not 0 <= position <= ndim: raise ValueError( "Can't expand_dims: Invalid position (%d)" % position) #--- End: for if i: d = self else: d = self.copy() # Expand _axes axis = d._new_axis_identifier() data_axes = d._axes[:] data_axes.insert(position, axis) d._axes = data_axes # Increment ndim and expand shape d._ndim += 1 shape = list(d._shape) shape.insert(position, 1) d._shape = tuple(shape) # Expand the location and shape of each partition location = (0, 1) for partition in d.partitions.matrix.flat: partition.location = partition.location[:] partition.shape = partition.shape[:] partition.location.insert(position, location) partition.shape.insert(position, 1) #--- End: for if d._all_axes: d._all_axes += (axis,) return d
#--- End: def
[docs] def flat(self, ignore_masked=True): ''' Return a flat iterator over elements of the data array. :Parameters: ignore_masked : bool, optional If False then masked and unmasked elements will be returned. By default only unmasked elements are returned :Returns: out : generator An iterator over elements of the data array. :Examples: >>> print d.array [[1 -- 3]] >>> for x in d.flat(): ... print x ... 1 3 >>> for x in d.flat(False): ... print x ... 1 -- 3 ''' self.to_memory() mask = self.mask if ignore_masked: for index in self.ndindex(): if not mask[index]: yield self[index].unsafe_array.item() else: for index in self.ndindex(): if not mask[index]: yield self[index].unsafe_array.item() else: yield cf_masked
#--- End: def
[docs] def floor(self, i=False): ''' Return the floor of the data array. .. versionadded:: 1.0 .. seealso:: `ceil`, `rint`, `trunc` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> print d.array [-1.9 -1.5 -1.1 -1. 0. 1. 1.1 1.5 1.9] >>> print d.floor().array [-2. -2. -2. -1. 0. 1. 1. 1. 1.] ''' return self.func(numpy_floor, out=True, i=i)
#---End: def
[docs] def outerproduct(self, e, i=False): ''' Compute the outer product with another data array. The axes of result will be the combined axes of the two input arrays: >>> d.outerproduct(e).ndim == d.ndim + e.ndim True >>> d.outerproduct(e).shape == d.shape + e.shape True :Parameters: e : data-like The data array with which to form the outer product. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d = cf.Data([1, 2, 3], 'metre') >>> o = d.outerproduct([4, 5, 6, 7]) >>> o <CF Data: [[4, ..., 21]] m> >>> print o.array [[ 4 5 6 7] [ 8 10 12 14] [12 15 18 21]] >>> e = cf.Data([[4, 5, 6, 7], [6, 7, 8, 9]], 's-1') >>> o = d.outerproduct(e) >>> o <CF Data: [[[4, ..., 27]]] m.s-1> >>> print d.shape, e.shape, o.shape (3,) (2, 4) (3, 2, 4) >>> print o.array [[[ 4 5 6 7] [ 6 7 8 9]] [[ 8 10 12 14] [12 14 16 18]] [[12 15 18 21] [18 21 24 27]]] ''' e_ndim = numpy_ndim(e) if e_ndim: d = self.copy() for j in range(e_ndim): d.expand_dims(-1, i=True) else: d = self d = d*e if i: # Update self in place self.__dict__ = d.__dict__ return d
#--- End: def
[docs] def override_units(self, units, i=False): ''' Override the data array units. Not to be confused with setting the `Units` attribute to units which are equivalent to the original units. This is different because in this case the new units need not be equivalent to the original ones and the data array elements will not be changed to reflect the new units. :Parameters: units : str or Units The new units for the data array. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d = cf.Data(1012.0, 'hPa') >>> d.override_units('km') >>> d.Units <CF Units: km> >>> d.datum(0) 1012.0 >>> d.override_units(cf.Units('watts')) >>> d.Units <CF Units: watts> >>> d.datum(0) 1012.0 ''' if i: d = self else: d = self.copy() units = Units(units) pda_args = d.pda_args() for partition in d.partitions.matrix.flat: p_units = partition.Units if not p_units or p_units == units: # No need to create the data array if the sub-array # units are the same as the master data array units or # the partition units are not set partition.Units = units continue #--- End: if partition.dataarray(**pda_args) partition.Units = units partition.close() #--- End: for d.Units = units return d
#--- End: def
[docs] def override_calendar(self, calendar, i=False): '''Override the data array calendar. Not to be confused with setting `d.Units.calendar`. This is different because in this case the new units need not be equivalent to the original ones and the data array elements will not be changed to reflect the new units. :Parameters: calendar : str The new calendar. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: ''' if not self.Units.isreftime: raise ValueError( "Can't override the calender of non-reference-time units: {0!r}".format( self.Units)) if i: d = self else: d = self.copy() for partition in d.partitions.matrix.flat: partition.Units = Units(partition.Units._units, calendar) partition.close() #--- End: for d.Units = Units(d.Units._units, calendar) return d
#--- End: def
[docs] def to_disk(self): ''' Store the data array on disk in place. There is no change to partition's whose sub-arrays are already on disk. :Returns: None :Examples: >>> d.to_disk() ''' pda_args = self.pda_args(keep_in_memory=False) for partition in self.partitions.matrix.flat: if partition.in_memory: partition.dataarray(**pda_args) partition.close()
#--- End: def
[docs] def to_memory(self, regardless=False): ''' Store each partition's data in memory in place if the master array is smaller than the chunk size. There is no change to partitions with data that are already in memory. :Parameters: regardless : bool, optional If True then store all partitions' data in memory regardless of the size of the master array. By default only store all partitions' data in memory if the master array is smaller than the chunk size. :Returns: None :Examples: >>> d.to_memory() >>> d.to_memory(regardless=True) ''' if self.fits_in_memory(self.dtype.itemsize) or regardless: pda_args = self.pda_args(keep_in_memory=True) for partition in self.partitions.matrix.flat: if partition.on_disk: partition.dataarray(**pda_args) partition.close()
#--- End: def # ---------------------------------------------------------------- # Attribute (read only) # ---------------------------------------------------------------- @property def in_memory(self): ''' :Returns: :Examples: >>> d.in_memory ''' for partition in self.partitions.matrix.flat: if not partition.in_memory: return False #--- End: for return True #--- End: def def pda_args(self, keep_in_memory=None, **kwargs): ''' Return a dictionary of arguments for the `cf.Partition.dataarray` method. The values are inferred from the state of the Data object and any keyword arguments. :Parameters: :Returns: out : dict :Examples: ''' dtype = self.dtype pda_args = {'axes' : self._axes, 'flip' : self._flip, 'units' : self.Units, 'hardmask': self._hardmask, 'dtype' : dtype} if keep_in_memory is None: keep_in_memory = self.fits_in_memory(dtype.itemsize) pda_args['keep_in_memory'] = keep_in_memory if kwargs: pda_args.update(kwargs) return pda_args #--- End: def
[docs] def partition_boundaries(self): ''' Return the partition boundaries for each partition matrix dimension. :Returns: out : dict :Examples: ''' return self.partitions.partition_boundaries(self._axes)
#--- End: def
[docs] def datum(self, *index): ''' Return an element of the data array as a standard Python scalar. The first and last elements are always returned with ``d.datum(0)`` and ``d.datum(-1)`` respectively, even if the data array is a scalar array or has two or more dimensions. The returned object is of the same type as is stored internally. .. seealso:: `array`, `dtarray` :Parameters: index : *optional* Specify which element to return. When no positional arguments are provided, the method only works for data arrays with one element (but any number of dimensions), and the single element is returned. If positional arguments are given then they must be one of the following: * An integer. This argument is interpreted as a flat index into the array, specifying which element to copy and return. Example: If the data aray shape is ``(2, 3, 6)`` then: * ``d.datum(0)`` is equivalent to ``d.datum(0, 0, 0)``. * ``d.datum(-1)`` is equivalent to ``d.datum(1, 2, 5)``. * ``d.datum(16)`` is equivalent to ``d.datum(0, 2, 4)``. If *index* is ``0`` or ``-1`` then the first or last data array element respecitively will be returned, even if the data array is a scalar array. .. * Two or more integers. These arguments are interpreted as a multidimensionsal index to the array. There must be the same number of integers as data array dimensions. .. * A tuple of integers. This argument is interpreted as a multidimensionsal index to the array. There must be the same number of integers as data array dimensions. Example: ``d.datum((0, 2, 4))`` is equivalent to ``d.datum(0, 2, 4)``; and ``d.datum(())`` is equivalent to ``d.datum()``. :Returns: out : A copy of the specified element of the array as a suitable Python scalar. :Examples: >>> d = cf.Data(2) >>> d.datum() 2 >>> 2 == d.datum(0) == d.datum(-1) == d.datum(()) True >>> d = cf.Data([[2]]) >>> 2 == d.datum() == d.datum(0) == d.datum(-1) True >>> 2 == d.datum(0, 0) == d.datum((-1, -1)) == d.datum(-1, 0) True >>> d = cf.Data([[4, 5, 6], [1, 2, 3]], 'metre') >>> d[0, 1] = cf.masked >>> print d [[4 -- 6] [1 2 3]] >>> d.datum(0) 4 >>> d.datum(-1) 3 >>> d.datum(1) masked >>> d.datum(4) 2 >>> d.datum(-2) 2 >>> d.datum(0, 0) 4 >>> d.datum(-2, -1) 6 >>> d.datum(1, 2) 3 >>> d.datum((0, 2)) 6 ''' if index: n_index = len(index) if n_index == 1: index = index[0] if index == 0: # This also works for scalar arrays index = (slice(0, 1),) * self._ndim elif index == -1: # This also works for scalar arrays index = (slice(-1, None),) * self._ndim elif isinstance(index, (int, long)): if index < 0: index += self._size index = numpy_unravel_index(index, self._shape) elif len(index) != self._ndim: raise ValueError( "Incorrect number of indices for %s array" % self.__class__.__name__) #--- End: if elif n_index != self._ndim: raise ValueError( "Incorrect number of indices for %s array" % self.__class__.__name__) if self._isdt: array = self[index].dtarray else: array = self[index].array elif self._size == 1: if self._isdt: array = self.dtarray else: array = self.array else: raise ValueError( "Can only convert a %s array of size 1 to a Python scalar" % self.__class__.__name__) if not numpy_ma_isMA(array): return array.item() mask = array.mask if mask is numpy_ma_nomask or not mask.item(): return array.item() return cf_masked
#--- End: def
[docs] def mask_invalid(self, i=False): '''Mask the array where invalid values occur (NaN or inf). Note that: * Invalid values in the results of arithmetic operations may only occur if the raising of `FloatingPointError` exceptions has been suppressed by `cf.Data.seterr`. * If the raising of `FloatingPointError` exceptions has been allowed then invalid values in the results of arithmetic operations it is possible for them to be automatically converted to masked values, depending on the setting of `cf.Data.mask_fpe`. In this case, such automatic conversion might be faster than calling `mask_invalid`. .. seealso:: `cf.Data.mask_fpe`, `cf.Data.seterr` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples 2: >>> d = cf.Data([0., 1]) >>> e = cf.Data([1., 2]) >>> old = cf.Data.seterr('ignore') >>> f = e/d >>> f <CF Data: [inf, 2.0] > >>> f.mask_invalid() <CF Data: [--, 2.0] > >>> f=e**12345 >>> f <CF Data: [1.0, inf] > >>> f.mask_invalid() <CF Data: [1.0, --] > >>> old = cf.Data.seterr('raise') >>> old = cf.Data.mask_fpe(True) >>> e/d <CF Data: [--, 2.0] > >>> e**12345 <CF Data: [1.0, --] > ''' if i: d = self else: d = self.copy() pda_args = d.pda_args() for partition in d.partitions.matrix.flat: array = partition.dataarray(**pda_args) array = numpy_ma_masked_invalid(array, copy=False) array.shrink_mask() if array.mask is numpy_ma_nomask: array = array.data partition.subarray = array partition.close() #--- End: for return d
#--- End: def
[docs] def mid_range(self, axes=None, squeeze=True, mtol=1, i=False): ''' Collapse axes with the unweighted average of their maximum and minimum values. Missing data array elements are omitted from the calculation. .. seealso:: `max`, `min`, `mean`, `range`, `sum`, `sd`, `var` :Parameters: axes : (sequence of) int, optional squeeze : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. :Examples: ''' return self._collapse(mid_range_f, mid_range_fpartial, mid_range_ffinalise, axes=axes, squeeze=squeeze, mtol=mtol, i=i)
#--- End: def
[docs] def flip(self, axes=None, i=False): ''' Flip (reverse the direction of) axes of the data array in place. .. seealso:: `expand_dims`, `squeeze`, `swapaxes`, `transpose` :Parameters: axes : (sequence of) int Select the axes. By default all axes are flipped. Each axis is identified by its integer position. No axes are flipped if *axes* is an empty sequence. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d.flip() >>> d.flip(1) >>> d.flip([0, 1]) >>> d.flip([]) >>> e = d[::-1, :, ::-1] >>> d.flip((2, 0)).equals(e) True ''' if i: d = self else: d = self.copy() if axes is not None and not axes and axes != 0: # Null flip return d if axes is None: iaxes = range(d._ndim) else: iaxes = d._parse_axes(axes, 'flip') reverse = d._flip[:] data_axes = d._axes partitions = d.partitions pm_axes = partitions.axes flip_partition_matrix = False if pm_axes: indices = [slice(None)] * partitions.ndim for i in iaxes: axis = data_axes[i] if axis in reverse: reverse.remove(axis) else: reverse.append(axis) if axis in pm_axes: indices[pm_axes.index(axis)] = slice(None, None, -1) flip_partition_matrix = True #--- End: for d._flip = reverse if flip_partition_matrix: partitions = partitions[tuple(indices)] partitions.set_location_map(data_axes) d.partitions = partitions #--- End: if return d
#--- End: def def inspect(self): ''' Inspect the object for debugging. .. seealso:: `cf.inspect` :Returns: None ''' print cf_inspect(self) #--- End: def
[docs] def isclose(self, y, rtol=None, atol=None): ''' Return a boolean data array showing where two broadcastable arrays have equal values within a tolerance. For numeric data arrays, ``d.isclose(y, rtol, atol)`` is equivalent to ``abs(d - y) <= ``atol + rtol*abs(y)``, otherwise it is equivalent to ``d == y``. :Parameters: y : data_like atol : float, optional The absolute tolerance for all numerical comparisons, By default the value returned by the `ATOL` function is used. rtol : float, optional The relative tolerance for all numerical comparisons, By default the value returned by the `RTOL` function is used. :Returns: out : bool :Examples: >>> d = cf.Data([1000, 2500], 'metre') >>> e = cf.Data([1, 2.5], 'km') >>> print d.isclose(e).array [ True True] >>> d = cf.Data(['ab', 'cdef']) >>> print d.isclose([[['ab', 'cdef']]]).array [[[ True True]]] >>> d = cf.Data([[1000, 2500], [1000, 2500]], 'metre') >>> e = cf.Data([1, 2.5], 'km') >>> print d.isclose(e).array [[ True True] [ True True]] >>> d = cf.Data([1, 1, 1], 's') >>> print d.isclose(1).array [ True True True] ''' if atol is None: atol = ATOL() if rtol is None: rtol = RTOL() try: return abs(self - y) <= atol + rtol*abs(y) except (TypeError, NotImplementedError, IndexError): return self == y
#--- End: def
[docs] def rint(self, i=False): ''' Round elements of the data array to the nearest integer. .. versionadded:: 1.0 .. seealso:: `ceil`, `floor`, `trunc` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> print d.array [-1.9 -1.5 -1.1 -1. 0. 1. 1.1 1.5 1.9] >>> print d.rint().array [-2. -2. -1. -1. 0. 1. 1. 2. 2.] ''' return self.func(numpy_rint, out=True, i=i)
#---End: def def round(self, decimals=0, i=False): '''Evenly round elements of the data array to the given number of decimals. .. versionadded:: 1.1.4 .. seealso:: `ceil`, `floor`, `rint`, `trunc` :Parameters: decimals : int, optional Number of decimal places to round to (default: 0). If decimals is negative, it specifies the number of positions to the left of the decimal point. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> print d.array [-1.81, -1.41, -1.01, -0.91, 0.09, 1.09, 1.19, 1.59, 1.99]) >>> print d.round().array [-2., -1., -1., -1., 0., 1., 1., 2., 2.] >>> print d.round(1).array [-1.8, -1.4, -1. , -0.9, 0.1, 1.1, 1.2, 1.6, 2. ] >>> print d.round(-1).array [-0., -0., -0., -0., 0., 0., 0., 0., 0.] ''' return self.func(numpy_round, out=True, i=i, decimals=decimals) #---End: def
[docs] def swapaxes(self, axis0, axis1, i=False): ''' Interchange two axes of an array. .. seealso:: `expand_dims`, `flip`, `squeeze`, `transpose` :Parameters: axis0, axis1 : ints Select the axes to swap. Each axis is identified by its original integer position. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d=cf.Data([[[1, 2, 3], [4, 5, 6]]]) >>> d.shape (1, 2, 3) >>> d.swapaxes(1, 0).shape >>> d.swapaxes(2, 1).shape >>> d.swapaxes(0, -1).shape >>> d.swapaxes(1, 1).shape ''' if i: d = self else: d = self.copy() axis0 = d._parse_axes((axis0,), 'swapaxes')[0] axis1 = d._parse_axes((axis1,), 'swapaxes')[0] if axis0 != axis1: iaxes = range(d._ndim) iaxes[axis1], iaxes[axis0] = axis0, axis1 d.transpose(iaxes, i=True) #--- End: if return d
#--- End: def
[docs] def save_to_disk(self, itemsize=None): raise NotImplementedError( "cf.Data.save_to_disk is dead. Use not cf.Data.fits_in_memory instead.")
# ''' # #Return True if the master array is large enough to be saved to disk. # #:Parameters: # # itemsize : int, optional # The number of bytes per word of the master data array. By # default it taken from the array's data type. # #:Returns: # # out : bool # #:Examples: # #>>> print d.save_to_disk() #True # #>>> print d.save_to_disk(8) #False # #''' # if not itemsize: # try: # itemsize = self.dtype.itemsize # except AttributeError: # raise ValueError( # "save_to_disk: Must set itemsize if there is no dtype") # #--- End: if # # # ------------------------------------------------------------ # # Note that self._size*(itemsize+1) is the array size in bytes # # including space for a full boolean mask # # ------------------------------------------------------------ # return self._size*(itemsize+1) > FREE_MEMORY() - FM_THRESHOLD() # #--- End: def def fits_in_memory(self, itemsize): ''' Return True if the master array is small enough to be retained in memory. :Parameters: itemsize : int The number of bytes per word of the master data array. :Returns: out : bool :Examples: >>> print d.fits_in_memory(8) False ''' # ------------------------------------------------------------ # Note that self._size*(itemsize+1) is the array size in bytes # including space for a full boolean mask # ------------------------------------------------------------ return self._size*(itemsize+1) <= FREE_MEMORY() - FM_THRESHOLD() #--- End: def
[docs] def where(self, condition, x=None, y=None, i=False): ''' Set data array elements depending on a condition. Elements are set differently depending on where the condition is True or False. Two assignment values are given. From one of them, the data array is set where the condition is True and where the condition is False, the data array is set from the other. Each assignment value may either contain a single datum, or is an array-like object which is broadcastable shape of the data array. **Missing data** The treatment of missing data elements depends on the value of the `hardmask` attribute. If it is True then masked elements will not unmasked, otherwise masked elements may be set to any value. In either case, unmasked elements may be set to any value (including missing data). Unmasked elements may be set to missing data by assignment to the `cf.masked` constant or by assignment to a value which contains masked elements. .. seealso:: `cf.masked`, `hardmask`, `__setitem__` :Parameters: condition : *optional* Define the condition which determines how to set the data array. The condition is any object which is broadcastable to the data array shape. The condition is True where the object broadcast to the data array shape is True. If *condition* is unset then it defaults to a condition of True everywhere. x, y : Specify the assignment value. Where the condition defined by the *arg* and *kwargs* arguments is True, set the data array from *x* and where the condition is False, set the data array from *y*. Arguments *x* and *y* are each one of: * ``None``. The appropriate elements of the data array are unchanged. .. * Any object which is broadcastable to the data array's shape. The appropriate elements of the data array are set to the corresponding values from the object broadcast to the data array shape. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: ''' def _slice_to_partition(data, indices): ''' Return a numpy array for the part of the `cf.Data` object which spans the given indices. :Parameters: data : cf.Data indices : tuple :Returns: out : numpy array ''' indices2 = [(slice(0, 1) if n == 1 else i) for n, i in zip(data.shape[::-1], indices[::-1])] return data[tuple(indices2)[::-1]].unsafe_array #--- End: def def _is_broadcastable(data0, data1, do_not_broadcast, is_scalar): ''' Check that the input *data1* is broadcastable to *data0* and return *data1*, as a python scalar if possible. This function updates following lists: do_not_broadcast, is_scalar :Parameters: data : cf.Data do_not_broadcast : list is_scalar : list :Returns: out : cf.Data or scalar ''' shape0 = data0._shape shape1 = data1._shape size1 = data1._size if shape1 == shape0: do_not_broadcast.append(True) is_scalar.append(False) elif size1 == 1: do_not_broadcast.append(False) is_scalar.append(True) # Replace data1 with its scalar value data1 = data1.datum(0) elif data1._ndim <= data0._ndim and size1 < data0._size: do_not_broadcast.append(False) is_scalar.append(False) for n, m in zip(shape1[::-1], shape0[::-1]): if n != m and n != 1: raise ValueError( "Can't assign by where: Can't broadcast data with shape %s to shape %s" % (shape1, shape0)) else: raise ValueError( "Can't assign by where: Can't broadcast data with shape %s to shape %s" % (shape1, shape0)) #--- End: if return data1 #--- End: def if i: d = self else: d = self.copy() if x is None and y is None: return d do_not_broadcast = [] is_scalar = [] # ------------------------------------------------------------ # Make sure that the condition is a cf.Data object # ------------------------------------------------------------ if not isinstance(condition, d.__class__): condition = type(d)(condition) # Check that the condition is broadcastable condition = _is_broadcastable(d, condition, do_not_broadcast, is_scalar) # ------------------------------------------------------------ # Parse x and y so that each is one of a) None, b) a scalar or # c) a data array with the same shape as the master array # ------------------------------------------------------------ xy = [] for value in (x, y): if value is None or value is cf_masked: do_not_broadcast.append(False) is_scalar.append(True) else: # Make sure that the value is a cf.Data object and has # compatible units if not isinstance(value, d.__class__): value = type(d)(value) else: if value.Units.equivalent(d.Units): if not value.Units.equals(d.Units): value = value.copy() value.Units = d.Units elif value.Units: raise ValueError( "Some bad units %r, %r" % (d.Units, value.Units)) #--- End: if # Check that the value is broadcastable value = _is_broadcastable(d, value, do_not_broadcast, is_scalar) #--- End: if xy.append(value) #--- End: for x, y = xy c_is_scalar, x_is_scalar, y_is_scalar = is_scalar #------------------------------------------------------------- # Try some shortcuts if the condition is a scalar #------------------------------------------------------------- if c_is_scalar : if condition: if x is not None: d[...] = x return d else: if y is not None: d[...] = y return d #--- End: if # Still here? broadcast = not any(do_not_broadcast) hardmask = d._hardmask pda_args = d.pda_args() #readonly=True) for partition in d.partitions.matrix.flat: array = partition.dataarray(**pda_args) # -------------------------------------------------------- # Find the master array indices for this partition # -------------------------------------------------------- shape = array.shape indices = partition.indices # -------------------------------------------------------- # Find the condition for this partition # -------------------------------------------------------- if c_is_scalar: c = condition else: c = _slice_to_partition(condition, indices) # -------------------------------------------------------- # Find value to use where condition is True for this # partition # -------------------------------------------------------- if x_is_scalar: if x is None: # Use d T = array T_masked = partition.masked else: T = x T_masked = x is cf_masked else: T = _slice_to_partition(x, indices) T_masked = numpy_ma_isMA(T) and numpy_ma_is_masked(T) #--- End: if # -------------------------------------------------------- # Find value to use where condition is False for this # partition # -------------------------------------------------------- if y_is_scalar: if y is None: # Use d F = array F_masked = partition.masked else: F = y F_masked = y is cf_masked else: F = _slice_to_partition(y, indices) F_masked = numpy_ma_isMA(F) and numpy_ma_is_masked(F) #--- End: if # -------------------------------------------------------- # Make sure that at least one of the arrays is the same # shape as the partition # -------------------------------------------------------- if broadcast: if x is cf_masked and y is cf_masked: c = _broadcast(c, shape) else: max_sizes = max((numpy_size(c), numpy_size(T), numpy_size(F))) if numpy_size(c) == max_sizes: c = _broadcast(c, shape) elif numpy_size(T) == max_sizes: T = _broadcast(T, shape) else: F = _broadcast(F, shape) #--- End: if # -------------------------------------------------------- # Create a numpy array which takes vales from T where c # is True and from F where c is False # -------------------------------------------------------- if T_masked or F_masked: # T and/or F have missing data new = numpy_ma_where(c, T, F) if partition.masked: if hardmask: # The original partition has missing data and # a hardmask, so apply the original # partition's mask to the new array. new.mask |= array.mask elif not numpy_ma_is_masked(new): # The original partition has missing data and # a softmask and the new array doesn't have # missing data, so turn the new array into an # unmasked array. new = new.data[...] elif not numpy_ma_is_masked(new): # The original partition doesn't have missing data # and neither does the new array new = new.data[...] else: # Neither T nor F have missing data new = numpy_where(c, T, F) if partition.masked and hardmask: # The original partition has missing data and a # hardmask, so apply the original partition's mask # to the new array. new = numpy_ma_masked_where(array.mask, new, copy=False) #--- End: if # -------------------------------------------------------- # Replace the partition's subarray with the new numpy # array # -------------------------------------------------------- partition.subarray = new partition.close() #--- End: for return d
#--- End: def
[docs] def sin(self, i=False): ''' Take the trigonometric sine of the data array in place. Units are accounted for in the calculation. If the units are not equivalent to radians (such as Kelvin) then they are treated as if they were radians. For example, the the cosine of 90 degrees_east is 1.0, as is the sine of 1.57079632 radians. The Units are changed to '1' (nondimensionsal). :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d.Units <CF Units: degrees_north> >>> print d.array [[-90 0 90 --]] >>> d.sin() >>> d.Units <CF Units: 1> >>> print d.array [[-1.0 0.0 1.0 --]] >>> d.Units <CF Units: m s-1> >>> print d.array [[1 2 3 --]] >>> d.sin() >>> d.Units <CF Units: 1> >>> print d.array [[0.841470984808 0.909297426826 0.14112000806 --]] ''' if i: d = self else: d = self.copy() if d.Units.equivalent(_units_radians): d.Units = _units_radians return d.func(numpy_sin, units=_units_1, i=True)
#--- End: def def log(self, base=None, i=False): ''' :Parameters: base : i : bool, optional If True then update the data array in place. By default a new data array is created. ''' if i: d = self else: d = self.copy() if base is None: d.func(numpy_log, units=d.Units.log(numpy_e), i=True) elif base == 10: d.func(numpy_log10, units=d.Units.log(10), i=True) elif base == 2: d.func(numpy_log2, units=d.Units.log(2), i=True) else: d.func(numpy_log, units=d.Units.log(base), i=True) d /= numpy_log(base) return d #--- End: def
[docs] def squeeze(self, axes=None, i=False): ''' Remove size 1 axes from the data array. By default all size 1 axes are removed, but particular axes may be selected with the keyword arguments. .. seealso:: `expand_dims`, `flip`, `swapaxes`, `transpose` :Parameters: axes : (sequence of) int or str, optional Select the axes. By default all size 1 axes are removed. The *axes* argument may be one, or a sequence, of: * An internal axis identifier. Selects this axis. .. * An integer. Selects the axis coresponding to the given position in the list of axes of the data array. No axes are removed if *axes* is an empty sequence. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The squeezed data array. :Examples: >>> v.shape (1,) >>> v.squeeze() >>> v.shape () >>> v.shape (1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1) >>> v.squeeze((0,)) >>> v.shape (2, 1, 3, 1, 4, 1, 5, 1, 6, 1) >>> v.squeeze(1) >>> v.shape (2, 3, 1, 4, 1, 5, 1, 6, 1) >>> v.squeeze([2, 4]) >>> v.shape (2, 3, 4, 5, 1, 6, 1) >>> v.squeeze([]) >>> v.shape (2, 3, 4, 5, 1, 6, 1) >>> v.squeeze() >>> v.shape (2, 3, 4, 5, 6) ''' if i: d = self else: d = self.copy() ndim = d._ndim if not ndim: if axes or axes == 0: raise ValueError( "Can't squeeze: Can't remove an axis from a scalar %s" % d.__class__.__name__) return d #--- End: if shape = list(d._shape) if axes is None: axes = [i for i, n in enumerate(shape) if n == 1] else: axes = d._parse_axes(axes, 'squeeze') # Check the squeeze axes for i in axes: if shape[i] > 1: raise ValueError( "Can't squeeze %s: Can't remove axis of size > 1" % d.__class__.__name__) #--- End: if if not axes: return d # Still here? Then the data array is not scalar and at least # one size 1 axis needs squeezing. data_axes = d._axes[:] flip = d._flip[:] if not d._all_axes: d._all_axes = tuple(data_axes) i_axis = [] for axis in [data_axes[i] for i in axes]: if axis in flip: flip.remove(axis) i = data_axes.index(axis) shape.pop(i) data_axes.pop(i) i_axis.append((i, axis)) #--- End: for for partition in d.partitions.matrix.flat: p_location = partition.location[:] p_shape = partition.shape[:] p_flip = partition.flip[:] for i, axis in i_axis: p_location.pop(i) p_shape.pop(i) if axis in p_flip: p_flip.remove(axis) #--- End: for partition.location = p_location partition.shape = p_shape partition.flip = p_flip #--- End: for d._ndim = len(shape) d._shape = tuple(shape) d._axes = data_axes d._flip = flip # Remove size 1 partition dimensions d.partitions.squeeze(i=True) return d
#--- End: def
[docs] def tan(self, i=False): ''' Take the trigonometric tangent of the data array element-wise. Units are accounted for in the calculation. If the units are not equivalent to radians (such as Kelvin) then they are treated as if they were radians. For example, the the tangent of 45 degrees_east is 1.0, as is the tangent of 0.78539816 radians. The Units are changed to ``'1'`` (nondimensionsal). .. seealso:: `cos`, `sin` :Parmaeters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d.Units <CF Units: degrees_north> >>> print d.array [[-45 0 45 --]] >>> d.tan() >>> d.Units <CF Units: 1> >>> print d.array [[-1.0 0.0 1.0 --]] >>> d.Units <CF Units: m s-1> >>> print d.array [[1 2 3 --]] >>> d.tan() >>> d.Units <CF Units: 1> >>> print d.array [[1.55740772465 -2.18503986326 -0.142546543074 --]] ''' if i: d = self else: d = self.copy() if d.Units.equivalent(_units_radians): d.Units = _units_radians return d.func(numpy_tan, units=_units_1, i=True)
#--- End: def def tolist(self): ''' Return the array as a (possibly nested) list. Return a copy of the array data as a (nested) Python list. Data items are converted to the nearest compatible Python type. :Returns: out : list The possibly nested list of array elements. :Examples: >>> d = cf.Data([1, 2]) >>> d.tolist() [1, 2] >>> d = cf.Data(([[1, 2], [3, 4]]) >>> list(d) [array([1, 2]), array([3, 4])] # DCH CHECK >>> d.tolist() [[1, 2], [3, 4]] >>> d.equals(cf.Data(d.tolist())) True ''' return self.array.tolist() #--- End: def
[docs] def transpose(self, axes=None, i=False): ''' Permute the axes of the data array. .. seealso:: `expand_dims`, `flip`, `squeeze`, `swapaxes` :Parameters: axes : (sequence of) int The new axis order of the data array. By default the order is reversed. Each axis of the new order is identified by its original integer position. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> d.shape (19, 73, 96) >>> d.transpose() >>> d.shape (96, 73, 19) >>> d.transpose([1, 0, 2]) >>> d.shape (73, 96, 19) >>> d.transpose((-1, 0, 1)) >>> d.shape (19, 73, 96) ''' if i: d = self else: d = self.copy() ndim = d._ndim # Parse the axes. By default, reverse the order of the axes. if axes is None: if ndim <= 1: return d iaxes = range(ndim-1, -1, -1) else: iaxes = d._parse_axes(axes, 'transpose') # Return unchanged if axes are in the same order as the data if iaxes == range(ndim): return d if len(iaxes) != ndim: raise ValueError( "Can't tranpose: Axes don't match array: %s" % (iaxes,)) #--- End: if # Permute the axes data_axes = d._axes d._axes = [data_axes[i] for i in iaxes] # Permute the shape shape = d._shape d._shape = tuple([shape[i] for i in iaxes]) # Permute the locations map for partition in d.partitions.matrix.flat: location = partition.location shape = partition.shape partition.location = [location[i] for i in iaxes] partition.shape = [shape[i] for i in iaxes] #--- End: for return d
#--- End: def
[docs] def trunc(self, i=False): ''' Return the truncated values of the data array. The truncated value of the number, ``x``, is the nearest integer which is closer to zero than ``x`` is. In short, the fractional part of the signed number ``x`` is discarded. .. versionadded:: 1.0 .. seealso:: `ceil`, `floor`, `rint` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: >>> print d.array [-1.9 -1.5 -1.1 -1. 0. 1. 1.1 1.5 1.9] >>> print d.trunc().array [-1. -1. -1. -1. 0. 1. 1. 1. 1.] ''' return self.func(numpy_trunc, out=True, i=i)
#---End: def @classmethod
[docs] def empty(cls, shape, dtype=None, units=None): ''' Create a new data array without initializing the elements. .. seealso:: `full`, `ones`, `zeros` :Examples 1: >>> d = cf.Data.empty((96, 73)) :Parameters: shape : int or tuple of ints The shape of th new array. dtype : numpy.dtype or any object convertible to numpy.dtype The data type of the new array. By default the data type is numpy.float64. units : str or Units The units for the new data array. :Returns: out : cf.Data :Examples 2: ''' return cls.full(shape, None, dtype=dtype, units=units)
#--- End: def @classmethod
[docs] def full(cls, shape, fill_value, dtype=None, units=None): ''' ''' array = CreateArray(shape=tuple(shape), size=long(reduce(operator_mul, shape, 1)), ndim=len(shape), dtype=numpy_dtype(dtype), fill_value=fill_value) return cls(array, units=units)
#--- End: def @classmethod
[docs] def ones(cls, shape, dtype=None, units=None): ''' ''' return cls.full(shape, 1, dtype=dtype, units=units)
#--- End: def @classmethod
[docs] def zeros(cls, shape, dtype=None, units=None): ''' ''' return cls.full(shape, 0, dtype=dtype, units=units)
#--- End: def
[docs] def func(self, f, units=None, out=False, i=False, **kwargs): ''' Apply an element-wise array operation to the data array in place. :Parameters: f : function The function to be applied. units : cf.Units, optional out : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: None :Examples: >>> print d.Units <CF Units: radians> >>> print d.array [[ 0. 1.57079633] [ 3.14159265 4.71238898]] >>> import numpy >>> d.func(numpy.cos, i=True) >>> print d.array [[ 1.0 0.0] [-1.0 0.0]] ''' if i: d = self else: d = self.copy() pda_args = d.pda_args() datatype = d.dtype for partition in d.partitions.matrix.flat: array = partition.dataarray(**pda_args) if out: f(array, out=array, **kwargs) else: array = f(array, **kwargs) p_datatype = array.dtype if datatype != p_datatype: datatype = numpy_result_type(p_datatype, datatype) partition.subarray = array if units is not None: partition.Units = units partition.close() #--- End: for d.dtype = datatype if units is not None: d.Units = units return d
#--- End: def
[docs] def range(self, axes=None, squeeze=True, mtol=1, i=False): '''Collapse axes with the absolute difference between their maximum and minimum values. Missing data array elements are omitted from the calculation. .. seealso:: `max`, `min`, `mean`, `mid_range`, `sample_size`, `sd`, `sum`, `sum_of_weights`, `sum_of_weights2`, `var` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. :Examples: ''' return self._collapse(range_f, range_fpartial, range_ffinalise, axes=axes, squeeze=squeeze, weights=None, mtol=mtol, i=i)
#--- End: def
[docs] def roll(self, axis, shift, i=False): ''' A lot like `numpy.roll` :Parameters: i : bool, optional If True then update the data array in place. By default a new data array is created. ''' if not shift: # Null roll if i: return self else: return self.copy() iaxes = self._parse_axes(axis, 'roll') if len(iaxes) != 1: raise ValueError("987345 9087345 ^^ roll ^") axis = iaxes[0] n = self._shape[axis] shift %= n if not shift: # Null roll if i: return self else: return self.copy() shift = n - shift indices0 = [slice(None)] * self._ndim indices0[axis] = slice(None, shift) indices1 = indices0[:] indices1[axis] = slice(shift, None) indices0 = tuple(indices0) indices1 = tuple(indices1) d = type(self).concatenate((self[indices1], self[indices0]), axis=axis) if i: self.__dict__ = d.__dict__ return self else: return d
#--- End: def
[docs] def sum(self, axes=None, squeeze=False, mtol=1, i=False): '''Collapse axes with their sum. Missing data array elements are omitted from the calculation. .. seealso:: `max`, `min`, `mean`, `mid_range`, `range`, `sample_size`, `sd`, `sum_of_weights`, `sum_of_weights2`, `var` :Parameters: axes : (sequence of) int, optional squeeze : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. :Examples: ''' return self._collapse(sum_f, sum_fpartial, sum_ffinalise, axes=axes, squeeze=squeeze, weights=None, mtol=mtol, i=i)
#--- End: def
[docs] def sum_of_weights(self, axes=None, squeeze=False, mtol=1, weights=None, i=False): ''' Missing data array elements are omitted from the calculation. .. seealso:: `max`, `mean`, `mid_range`, `min`, `range`, `sample_size`, `sd`, `sum`, `sum_of_weights2`, `var` :Parameters: axes : (sequence of) int, optional squeeze : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. :Examples: ''' return self._collapse(sw_f, sw_fpartial, sw_ffinalise, axes=axes, squeeze=squeeze, weights=weights, mtol=mtol, i=i)
#--- End: def
[docs] def sum_of_weights2(self, axes=None, squeeze=False, mtol=1, weights=None, i=False): ''' Missing data array elements are omitted from the calculation. .. seealso:: `max`, `mean`, `mid_range`, `min`, `range`, `sample_size`, `sd`, `sum`, `sum_of_weights`, `var` :Parameters: axes : (sequence of) int, optional squeeze : bool, optional i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. :Examples: ''' return self._collapse(sw2_f, sw2_fpartial, sw2_ffinalise, axes=axes, squeeze=squeeze, weights=weights, mtol=mtol, i=i)
#--- End: def
[docs] def sd(self, axes=None, squeeze=False, mtol=1, weights=None, ddof=1, a=None, i=False): r''' Collapse axes by calculating their standard deviation. The standard deviation may be adjusted for the number of degrees of freedom and may be calculated with weighted values. Missing data array elements and those with zero weight are omitted from the calculation. The unweighted standard deviation, :math:`s`, of :math:`N` values :math:`x_i` with mean :math:`m` and with :math:`N-ddof` degrees of freedom (:math:`ddof\ge0`) is .. math:: s=\sqrt{\frac{1}{N-ddof} \sum_{i=1}^{N} (x_i - m)^2} The weighted standard deviation, :math:`\tilde{s}_N`, of :math:`N` values :math:`x_i` with corresponding weights :math:`w_i`, weighted mean :math:`\tilde{m}` and with :math:`N` degrees of freedom is .. math:: \tilde{s}_N=\sqrt{\frac{1}{\sum_{i=1}^{N} w_i} \sum_{i=1}^{N} w_i(x_i - \tilde{m})^2} The weighted standard deviation, :math:`\tilde{s}`, of :math:`N` values :math:`x_i` with corresponding weights :math:`w_i` and with :math:`N-ddof` degrees of freedom (:math:`ddof>0`) is .. math:: \tilde{s} = \sqrt{\frac{f \sum_{i=1}^{N} w_i}{f \sum_{i=1}^{N} w_i - ddof}} \tilde{s}_N where :math:`f` is the smallest positive number whose product with each weight is an integer. :math:`f \sum_{i=1}^{N} w_i` is the size of a new sample created by each :math:`x_i` having :math:`fw_i` repeats. In practice, :math:`f` may not exist or may be difficult to calculate, so :math:`f` is either set to a predetermined value or an approximate value is calculated. The approximation is the smallest positive number whose products with the smallest and largest weights and the sum of the weights are all integers, where a positive number is considered to be an integer if its decimal part is sufficiently small (no greater than :math:`10^{-8}` plus :math:`10^{-5}` times its integer part). This approximation will never overestimate :math:`f`, so :math:`\tilde{s}` will never be underestimated when the approximation is used. If the weights are all integers which are collectively coprime then setting :math:`f=1` will guarantee that :math:`\tilde{s}` is exact. :Parameters: axes : (sequence of) int, optional The axes to be collapsed. By default flattened input is used. Each axis is identified by its integer position. No axes are collapsed if *axes* is an empty sequence. squeeze : bool, optional If True then collapsed axes are removed. By default the axes which are collapsed are left in the result as axes with size 1. When the collapsed axes are retained, the result is guaranteed to broadcast correctly against the original array. **Example:** Suppose that an array, ``d``, has shape (2, 3, 4) and ``e = d.sd(axis=1)``. Then ``e`` has shape (2, 1, 4) and, for example, ``d/e`` is allowed. If ``e = d.sd(axis=1, squeeze=True)`` then ``e`` will have shape (2, 4) and ``d/e`` is an illegal operation. weights : data-like or dict, optional Weights associated with values of the array. By default all non-missing elements of the array are assumed to have equal weights of 1. If *weights* is a data-like object then it must have either the same shape as the array or, if that is not the case, the same shape as the axes being collapsed. If *weights* is a dictionary then each key is axes of the array (an int or tuple of ints) with a corresponding data-like value of weights for those axes. In this case, the implied weights array is the outer product of the dictionary's values it may be used in conjunction wih any value of *axes*, because the axes to which the weights apply are given explicitly. **Example:** Suppose that the original array being collapsed has shape (2, 3, 4) and *weights* is set to a data-like object, ``w``. If ``axes=None`` then ``w`` must have shape (2, 3, 4). If ``axes=(0, 1, 2)`` then ``w`` must have shape (2, 3, 4). If ``axes=(2, 0, 1)`` then ``w`` must either have shape (2, 3, 4) or else (4, 2, 3). If ``axes=1`` then ``w`` must either have shape (2, 3, 4) or else (3,). If ``axes=(2, 0)`` then ``w`` must either have shape (2, 3, 4) or else (4, 2). Suppose *weights* is a dictionary. If ``weights={1: x}`` then ``x`` must have shape (3,). If ``weights={1: x, (2, 0): y}`` then ``x`` must have shape (3,) and ``y`` must have shape (4, 2). The last example is equivalent to ``weights={(1, 2, 0): x.outerproduct(y)}`` (see `outerproduct` for details). mtol : number, optional For each element in the output data array, the fraction of contributing input array elements which is allowed to contain missing data. Where this fraction exceeds *mtol*, missing data is returned. The default is 1, meaning a missing datum in the output array only occurs when its contributing input array elements are all missing data. A value of 0 means that a missing datum in the output array occurs whenever any of its contributing input array elements are missing data. Any intermediate value is permitted. ddof : number, *optional* The delta degrees of freedom. The number of degrees of freedom used in the calculation is (N-*ddof*) where N represents the number of elements. By default *ddof* is 1, meaning the standard deviation of the population is estimated according to the usual formula with (N-1) in the denominator to avoid the bias caused by the use of the sample mean (Bessel's correction). f : data-like, *optional* Specify the value of :math:`f` in the calculation of a weighted standard deviation when *ddof* is greater than 0. *f* must be broadcastable to the collapsed data array with the collapsed axes removed. By default an approximate value is calculated. See the notes above for details. i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data :Examples: Some, not wholly comprehensive, examples: >>> d = cf.Data([1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4]) >>> e = cf.Data([1, 2, 3, 4]) >>> d.sd(squeeze=False) <CF Data: [1.06262254195] > >>> d.sd() <CF Data: 1.06262254195 > >>> e.sd(weights=[2, 3, 5, 6]) <CF Data: 1.09991882817 > >>> e.sd(weights=[2, 3, 5, 6], f=1) <CF Data: 1.06262254195 > >>> d.sd(ddof=0) <CF Data: 1.02887985207 > >>> e.sd(ddof=0, weights=[2, 3, 5, 6]) <CF Data: 1.02887985207 > ''' if a is not None: if weights is None: a = None elif a <= 0: raise ValueError("SD asdasdsa4456 ##################76* 223") #--- End: if return self._collapse(sd_f, sd_fpartial, sd_ffinalise, axes=axes, squeeze=squeeze, weights=weights, mtol=mtol, ddof=ddof, f=a, i=i)
#--- End: def
[docs] def var(self, axes=None, squeeze=False, weights=None, mtol=1, ddof=1, a=None, i=False): r''' Collapse axes with their weighted variance. The units of the returned array are the square of the units of the array. .. seealso:: `max`, `min`, `mean`, `mid_range`, `range`, `sum`, `sd` :Parameters: axes : (sequence of) int, optional squeeze : bool, optional weights : i : bool, optional If True then update the data array in place. By default a new data array is created. :Returns: out : cf.Data The collapsed array. :Examples: ''' if a is not None: if weights is None: a = None elif a <= 0: raise ValueError("Parameter 'a' must be positive") #--- End: if units = self.Units if units: units = units ** 2 return self._collapse(var_f, var_fpartial, var_ffinalise, axes=axes, squeeze=squeeze, weights=weights, mtol=mtol, units=units, ddof=ddof, f=a, i=i)
#--- End: def def section(self, axes, stop=None): """ Return a dictionary of Data objects, which are the m dimensional sections of this n dimensional Data object, where m <= n. The keys of the dictionary are the indicies of the sections in the original Data object. The m dimensions that are not sliced are marked with None as a placeholder making it possible to reconstruct the original data object. The corresponding values are the resulting sections of type cf.Data. :Parameters: axes : *optional* This is should be a tuple or a list of the m indices of the m axes that define the sections of the Data object. If axes is None (the default) all axes are selected. stop : int, optional Stop after this number of sections and return. If stop is None all sections are taken. :Returns: out : dict The dictionary of m dimensional sections of the Data object. :Examples: Section a Data object into 2D slices: >>> d.section((0,1)) """ return _section(self, axes, data=True, stop=stop)
#--- End: def #--- End: class # ==================================================================== # # SubspaceData object # # ==================================================================== class SubspaceData(object): __slots__ = ('data',) def __init__(self, data): ''' Set the contained data object. ''' self.data = data #--- End: def def __getitem__(self, indices): ''' Implement indexing x.__getitem__(indices) <==> x[indices] ''' return self.data[indices] #--- End: def def __setitem__(self, indices, value): ''' Implement indexed assignment x.__setitem__(indices, value) <==> x[indices]=value ''' self.data[indices] = value #--- End: def #--- End: class def _size_of_index(index, size=None): ''' Return the number of elements resulting in applying an index to a sequence. :Parameters: index : slice or list of ints The index being applied to the sequence. size : int, optional The number of elements in the sequence being indexed. Only required if *index* is a slice object. :Returns: out : int The length of the sequence resulting from applying the index. :Examples: >>> _size_of_index(slice(None, None, -2), 10) 5 >>> _size_of_index([1, 4, 9]) 3 ''' if isinstance(index, slice): # Index is a slice object start, stop, step = index.indices(size) div, mod = divmod(stop-start, step) if mod != 0: div += 1 return div else: # Index is a list of integers return len(index) #--- End: def def _parse_indices(data, indices): ''' :Parameters: data : indices : sequence of indices :Returns: parsed_indices, flip_axes : {list, list} :Examples: ''' parsed_indices, roll = parse_indices(data, indices, True) flip_axes = [] for i, index in enumerate(parsed_indices): if isinstance(index, slice): size = data.shape[i] if index.step < 0: # If the slice step is negative, then transform the # original slice to a new slice with a positive step # such that the result of the new slice is the reverse # of the result of the original slice. # # For example, if the original slice is slice(6,0,-2) # then the new slice will be slice(2,7,2): # # >>> a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # >>> a[slice(6, 0, -2)] # [6, 4, 2] # >>> a[slice(2, 7, 2)] # [2, 4, 6] # a[slice(6, 0, -2)] == list(reversed(a[slice(2, 7, 2)])) # True start, stop, step = index.indices(size) step *= -1 div, mod = divmod(start-stop-1, step) div_step = div*step start -= div_step stop = start + div_step + 1 index = slice(start, stop, step) flip_axes.append(i) #--- End: if # If step is greater than one, make sure that index.stop isn't # bigger than it needs to be. if index.step > 1: start, stop, step = index.indices(size) div, mod = divmod(stop-start-1, step) stop = start + div*step + 1 index = slice(start, stop, step) #--- End: if parsed_indices[i] = index else: # -------------------------------------------------------- # Check that an integer list is strictly monotonic and if # it's descending then flip it so that it's ascending # -------------------------------------------------------- step = index[1] - index[0] if step > 0: for j in xrange(len(index)-1): if index[j] >= index[j+1]: raise ValueError("Bad slice (not strictly monotonic): %s" % index) elif step < 0: for j in xrange(len(index)-1): if index[j] <= index[j+1]: raise ValueError("Bad slice (not strictly monotonic): %s" % index) # Reverse the list so that it's strictly monotonically # increasing and make a note that this dimension will # need flipping later index.reverse() flip_axes.append(i) else: # Step is 0 raise ValueError("Bad slice (not strictly monotonic): %s" % index) #--- End: if #--- End: for return parsed_indices, roll, flip_axes #--- End: def def _overlapping_partitions(partitions, indices, axes, master_flip): ''' Return the nested list of (modified) partitions which overlap the given indices to the master array. :Parameters: partitions : cf.PartitionMatrix indices : tuple axes : sequence of str master_flip : list :Returns: out : numpy array A numpy array of cf.Partition objects. :Examples: >>> type f.Data <class 'cf.data.Data'> >>> d._axes ['dim1', 'dim2', 'dim0'] >>> axis_to_position = {'dim0': 2, 'dim1': 0, 'dim2' : 1} >>> indices = (slice(None), slice(5, 1, -2), [1,3,4,8]) >>> x = _overlapping_partitions(d.partitions, indices, axis_to_position, master_flip) ''' axis_to_position = {} for i, axis in enumerate(axes): axis_to_position[axis] = i if partitions.size == 1: partition = partitions.matrix.item() # Find out if this partition overlaps the original slice p_indices, shape = partition.overlaps(indices) if p_indices is None: # This partition is not in the slice out of bounds - raise # error? return # Still here? Create a new partition partition = partition.copy() partition.new_part(p_indices, axis_to_position, master_flip) partition.shape = shape new_partition_matrix = numpy_empty(partitions.shape, dtype=object) new_partition_matrix[...] = partition return new_partition_matrix #--- End: if # Still here? Then there are 2 or more partitions. partitions_list = [] partitions_list_append = partitions_list.append flat_pm_indices = [] flat_pm_indices_append = flat_pm_indices.append partitions_flat = partitions.matrix.flat i = partitions_flat.index for partition in partitions_flat: # Find out if this partition overlaps the original slice p_indices, shape = partition.overlaps(indices) if p_indices is None: # This partition is not in the slice i = partitions_flat.index continue # Still here? Then this partition overlaps the slice, so # create a new partition. partition = partition.copy() partition.new_part(p_indices, axis_to_position, master_flip) partition.shape = shape partitions_list_append(partition) flat_pm_indices_append(i) i = partitions_flat.index #--- End: for new_shape = [len(set(s)) for s in numpy_unravel_index(flat_pm_indices, partitions.shape)] new_partition_matrix = numpy_empty((len(flat_pm_indices),), dtype=object) new_partition_matrix[...] = partitions_list new_partition_matrix.resize(new_shape) return new_partition_matrix #--- End: def # -------------------------------------------------------------------- # # -------------------------------------------------------------------- def _getattr(x, attr): return getattr(x, attr) _array_getattr = numpy_vectorize(_getattr) def _broadcast(a, shape): ''' Broadcast an array to a given shape. It is assumed that ``len(array.shape) <= len(shape)`` and that the array is broadcastable to the shape by the normal numpy boradcasting rules, but neither of these things are checked. For example, ``d[...] = d._broadcast(e, d.shape)`` gives the same result as ``d[...] = e`` :Parameters: a : numpy array-like shape : tuple :Returns: out : numpy array ''' # Replace with numpy.broadcast_to v1.10 ??/ a_shape = numpy_shape(a) if a_shape == shape: return a tile = [(m if n == 1 else 1) for n, m in zip(a_shape[::-1], shape[::-1])] tile = shape[0:len(shape)-len(a_shape)] + tuple(tile[::-1]) return numpy_tile(a, tile) #--- End: def