Python numpy 模块,integer() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.integer()。
def pack_samples(self, samples, dtype=None):
"""Pack samples into one integer per sample
Store one sample in a single integer instead of a list of
integers with length `len(self.nsoutdims)`. Example:
>>> p = pauli_mpp(nr_sites=2, local_dim=2)
>>> p.outdims
(6, 6)
>>> p.pack_samples(np.array([[0, 1], [1, 0], [1, 2], [5, 5]]))
array([ 1, 6, 8, 35])
"""
assert samples.ndim == 2
assert samples.shape[1] == len(self.nsoutdims)
samples = np.ravel_multi_index(samples.T, self.nsoutdims)
if dtype not in (True, False, None) and issubclass(dtype, np.integer):
info = np.iinfo(dtype)
assert samples.min() >= info.min
assert samples.max() <= info.max
samples = samples.astype(dtype)
return samples
def __init__(self, config, model_dir, ob_shape_list):
self.model_dir = model_dir
self.cnn_format = config.cnn_format
self.memory_size = config.memory_size
self.actions = np.empty(self.memory_size, dtype = np.uint8)
self.rewards = np.empty(self.memory_size, dtype = np.integer)
# print(self.memory_size, config.screen_height, config.screen_width)
# self.screens = np.empty((self.memory_size, config.screen_height, config.screen_width), dtype = np.float16)
self.screens = np.empty([self.memory_size] + ob_shape_list, dtype = np.float16)
self.terminals = np.empty(self.memory_size, dtype = np.bool)
self.history_length = config.history_length
# self.dims = (config.screen_height, config.screen_width)
self.dims = tuple(ob_shape_list)
self.batch_size = config.batch_size
self.count = 0
self.current = 0
# pre-allocate prestates and poststates for minibatch
self.prestates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
self.poststates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
# self.prestates = np.empty((self.batch_size, self.history_length, self.dims), dtype = np.float16)
# self.poststates = np.empty((self.batch_size, self.history_length, self.dims), dtype = np.float16)
def test_auto_dtype_largeint(self):
# Regression test for numpy/numpy#5635 whereby large integers could
# cause OverflowErrors.
# Test the automatic definition of the output dtype
#
# 2**66 = 73786976294838206464 => should convert to float
# 2**34 = 17179869184 => should convert to int64
# 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
# int64 on 64-bit systems)
data = TextIO('73786976294838206464 17179869184 1024')
test = np.ndfromtxt(data, dtype=None)
assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
assert_(test.dtype['f0'] == np.float)
assert_(test.dtype['f1'] == np.int64)
assert_(test.dtype['f2'] == np.integer)
assert_allclose(test['f0'], 73786976294838206464.)
assert_equal(test['f1'], 17179869184)
assert_equal(test['f2'], 1024)
def test_with_incorrect_minlength(self):
x = np.array([], dtype=int)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=0))
x = np.arange(5)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=0))
def test_allclose(self):
# Tests allclose on arrays
a = np.random.rand(10)
b = a + np.random.rand(10) * 1e-8
self.assertTrue(allclose(a, b))
# Test allclose w/ infs
a[0] = np.inf
self.assertTrue(not allclose(a, b))
b[0] = np.inf
self.assertTrue(allclose(a, b))
# Test allclose w/ masked
a = masked_array(a)
a[-1] = masked
self.assertTrue(allclose(a, b, masked_equal=True))
self.assertTrue(not allclose(a, b, masked_equal=False))
# Test comparison w/ scalar
a *= 1e-8
a[0] = 0
self.assertTrue(allclose(a, 0, masked_equal=True))
# Test that the function works for MIN_INT integer typed arrays
a = masked_array([np.iinfo(np.int_).min], dtype=np.int_)
self.assertTrue(allclose(a, a))
def _parse_fields(point):
output = []
for k, v in point['fields'].items():
k = escape(k, key_escape)
# noinspection PyUnresolvedReferences
if isinstance(v, bool):
output.append('{k}={v}'.format(k=k, v=str(v).upper()))
elif isinstance(v, (int, np.integer)):
output.append('{k}={v}i'.format(k=k, v=v))
elif isinstance(v, str):
output.append('{k}="{v}"'.format(k=k, v=v.translate(str_escape)))
elif v is None or np.isnan(v):
continue
else:
# Floats and other numerical formats go here.
# TODO: Add unit test
output.append('{k}={v}'.format(k=k, v=v))
return ','.join(output)
def get_subvolume(self, bounds):
if bounds.start is None or bounds.stop is None:
image_subvol = self.image_data
label_subvol = self.label_data
else:
image_subvol = self.image_data[
bounds.start[0]:bounds.stop[0],
bounds.start[1]:bounds.stop[1],
bounds.start[2]:bounds.stop[2]]
label_subvol = None
if np.issubdtype(image_subvol.dtype, np.integer):
raise ValueError('Sparse volume access does not support image data coercion.')
seed = bounds.seed
if seed is None:
seed = np.array(image_subvol.shape, dtype=np.int64) // 2
return Subvolume(image_subvol, label_subvol, seed, bounds.label_id)
def __init__(self, X_train, y_train, X_test, y_test, categorical=True):
self._x_train = X_train
self._x_test = X_test
# are the targets to be made one hot vectors
if categorical:
self._y_train = np_utils.to_categorical(y_train)
self._y_test = np_utils.to_categorical(y_test)
self._output_size = self._y_train.shape[1]
# handle sparse output classification
elif issubclass(y_train.dtype.type, np.integer):
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.max() + 1 # assume 0 based indexes
# not classification, just copy them
else:
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.shape[1]
def __init__(self, X_train, y_train, X_test, y_test, categorical=True):
self._x_train = X_train
self._x_test = X_test
# are the targets to be made one hot vectors
if categorical:
self._y_train = np_utils.to_categorical(y_train)
self._y_test = np_utils.to_categorical(y_test)
self._output_size = self._y_train.shape[1]
# handle sparse output classification
elif issubclass(y_train.dtype.type, np.integer):
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.max() + 1 # assume 0 based indexes
# not classification, just copy them
else:
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.shape[1]
def __init__(self, X_train, y_train, X_test, y_test, categorical=True):
self._x_train = X_train
self._x_test = X_test
# are the targets to be made one hot vectors
if categorical:
self._y_train = np_utils.to_categorical(y_train)
self._y_test = np_utils.to_categorical(y_test)
self._output_size = self._y_train.shape[1]
# handle sparse output classification
elif issubclass(y_train.dtype.type, np.integer):
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.max() + 1 # assume 0 based indexes
# not classification, just copy them
else:
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.shape[1]
def __init__(self, X_train, y_train, X_test, y_test, categorical=True):
self._x_train = X_train
self._x_test = X_test
# are the targets to be made one hot vectors
if categorical:
self._y_train = np_utils.to_categorical(y_train)
self._y_test = np_utils.to_categorical(y_test)
self._output_size = self._y_train.shape[1]
# handle sparse output classification
elif issubclass(y_train.dtype.type, np.integer):
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.max() + 1 # assume 0 based indexes
# not classification, just copy them
else:
self._y_train = y_train
self._y_test = y_test
self._output_size = self._y_train.shape[1]
def test_auto_dtype_largeint(self):
# Regression test for numpy/numpy#5635 whereby large integers could
# cause OverflowErrors.
# Test the automatic definition of the output dtype
#
# 2**66 = 73786976294838206464 => should convert to float
# 2**34 = 17179869184 => should convert to int64
# 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
# int64 on 64-bit systems)
data = TextIO('73786976294838206464 17179869184 1024')
test = np.ndfromtxt(data, dtype=None)
assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
assert_(test.dtype['f0'] == np.float)
assert_(test.dtype['f1'] == np.int64)
assert_(test.dtype['f2'] == np.integer)
assert_allclose(test['f0'], 73786976294838206464.)
assert_equal(test['f1'], 17179869184)
assert_equal(test['f2'], 1024)
def test_with_incorrect_minlength(self):
x = np.array([], dtype=int)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "must be positive",
lambda: np.bincount(x, minlength=0))
x = np.arange(5)
assert_raises_regex(TypeError, "an integer is required",
lambda: np.bincount(x, minlength="foobar"))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=-1))
assert_raises_regex(ValueError, "minlength must be positive",
lambda: np.bincount(x, minlength=0))
def test_allclose(self):
# Tests allclose on arrays
a = np.random.rand(10)
b = a + np.random.rand(10) * 1e-8
self.assertTrue(allclose(a, b))
# Test allclose w/ infs
a[0] = np.inf
self.assertTrue(not allclose(a, b))
b[0] = np.inf
self.assertTrue(allclose(a, b))
# Test allclose w/ masked
a = masked_array(a)
a[-1] = masked
self.assertTrue(allclose(a, b, masked_equal=True))
self.assertTrue(not allclose(a, b, masked_equal=False))
# Test comparison w/ scalar
a *= 1e-8
a[0] = 0
self.assertTrue(allclose(a, 0, masked_equal=True))
# Test that the function works for MIN_INT integer typed arrays
a = masked_array([np.iinfo(np.int_).min], dtype=np.int_)
self.assertTrue(allclose(a, a))
def _changeArrayDType(img, dtype, **kwargs):
if dtype == 'noUint':
return toNoUintArray(img)
if issubclass(np.dtype(dtype).type, np.integer):
return toUIntArray(img, dtype, **kwargs)
return img.astype(dtype)
# def bitDepth(path, img=None):
# '''
# there are no python filetypes between 8bit and 16 bit
# so, to find out whether an image is 12 or 14 bit resolved
# we need to check actual file size and image shape
# '''
# if img is None:
# img = imread(img)
# size = os.path.getsize(path)*8
# print (size, img.size,8888888,img.shape, size/img.size)
# kh
# return size/img.size
def __init__(self, config, model_dir):
self.model_dir = model_dir
self.cnn_format = config.cnn_format
self.memory_size = config.memory_size
self.actions = np.empty(self.memory_size, dtype = np.uint8)
self.rewards = np.empty(self.memory_size, dtype = np.integer)
self.screens = np.empty((self.memory_size, config.screen_height, config.screen_width), dtype = np.float16)
self.terminals = np.empty(self.memory_size, dtype = np.bool)
self.history_length = config.history_length
self.dims = (config.screen_height, config.screen_width)
self.batch_size = config.batch_size
self.count = 0
self.current = 0
# pre-allocate prestates and poststates for minibatch
self.prestates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
self.poststates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
def _get_block(self, values, key_remainder=None):
item_block = None
for i, v in enumerate(values):
# Lists in the aggregate key index in tandem;
# so, index into those lists (the first list is `values`)
v_key_remainder = key_remainder
if isinstance(values, tuple) or isinstance(values, list):
if key_remainder is not None:
broadcasted_key_remainder = ()
for k in key_remainder:
if hasattr(k, '__len__') and len(k)==np.size(k):
broadcasted_key_remainder += (k[i],)
else:
broadcasted_key_remainder += (k,)
v_key_remainder = broadcasted_key_remainder
# Make a single read at an integer index of axis 0
elem = self._get_element(v, v_key_remainder)
if item_block is None:
item_block = np.zeros((len(values),)+elem.shape,
self.dtype)
item_block[i] = elem
return item_block
def fcn(self, data_in):
"""
If return list, [0] goes to original, [1] goes to affected
"""
inst_nrb_merge = _MergeNRBs(nrb_left=self.nrb_left,
nrb_right=self.nrb_right,
pix=self.parameters['pix_switchpt'],
left_side_scale=self.parameters['scale_left'])
if self.fullRange:
pix = _np.arange(self.wn.size, dtype=_np.integer)
else:
list_rng_pix = _find_nearest(self.wn, self.rng)[1]
pix = _np.arange(list_rng_pix[0],list_rng_pix[1]+1,
dtype=_np.integer)
nrb_merged = inst_nrb_merge.calculate()
kkd = _np.zeros(data_in.shape)
# Note: kk_widget.fcn return imag part
kkd[..., pix] = self.kk_widget.fcn([nrb_merged[pix], data_in[..., pix]])
return [_np.vstack((self.nrb_left, self.nrb_right, nrb_merged)),
kkd]
def __init__(self, path, size ,historySize, dims , batchSize):
self.size = size
self.dims = dims
# preallocate memory
self.actions = np.empty(self.size, dtype=np.uint8)
self.rewards = np.empty(self.size, dtype=np.integer)
self.screens = np.empty((self.size, self.dims[0], self.dims[1] ), dtype=np.uint8)
self.terminals = np.empty(self.size, dtype=np.bool)
self.history_length = historySize
self.batch_size = batchSize
self.buffer = np.zeros((self.batch_size, self.history_length) + self.dims, dtype=np.uint8)
self.count = 0
self.current = 0
# pre-allocate prestates and poststates for minibatch
self.prestates = np.empty([self.batch_size, self.history_length] + self.dims, dtype=np.uint8)
self.poststates = np.empty([self.batch_size, self.history_length] + self.dims, dtype=np.uint8)
def _prepare_mask(mask, label, erode=True):
fgmask = mask.copy()
if np.issubdtype(fgmask.dtype, np.integer):
if isinstance(label, string_types):
label = FSL_FAST_LABELS[label]
fgmask[fgmask != label] = 0
fgmask[fgmask == label] = 1
else:
fgmask[fgmask > .95] = 1.
fgmask[fgmask < 1.] = 0
if erode:
# Create a structural element to be used in an opening operation.
struc = nd.generate_binary_structure(3, 2)
# Perform an opening operation on the background data.
fgmask = nd.binary_opening(fgmask, structure=struc).astype(np.uint8)
return fgmask
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance.
If seed is None, return the RandomState singleton used by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, (int, np.integer)):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
' instance' % seed)
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
If seed is None, return the RandomState singleton used by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, (numbers.Integral, np.integer)):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
' instance' % seed)
def _can_reindex(self, indexer):
"""
*this is an internal non-public method*
Check if we are allowing reindexing with this particular indexer
Parameters
----------
indexer : an integer indexer
Raises
------
ValueError if its a duplicate axis
"""
# trying to reindex on an axis with duplicates
if not self.is_unique and len(indexer):
raise ValueError("cannot reindex from a duplicate axis")
def unique1d(values):
"""
Hash table-based unique
"""
if np.issubdtype(values.dtype, np.floating):
table = _hash.Float64HashTable(len(values))
uniques = np.array(table.unique(_ensure_float64(values)),
dtype=np.float64)
elif np.issubdtype(values.dtype, np.datetime64):
table = _hash.Int64HashTable(len(values))
uniques = table.unique(_ensure_int64(values))
uniques = uniques.view('M8[ns]')
elif np.issubdtype(values.dtype, np.timedelta64):
table = _hash.Int64HashTable(len(values))
uniques = table.unique(_ensure_int64(values))
uniques = uniques.view('m8[ns]')
elif np.issubdtype(values.dtype, np.integer):
table = _hash.Int64HashTable(len(values))
uniques = table.unique(_ensure_int64(values))
else:
table = _hash.PyObjectHashTable(len(values))
uniques = table.unique(_ensure_object(values))
return uniques
def shift(self, periods, axis=0, mgr=None):
""" shift the block by periods """
N = len(self.values.T)
indexer = np.zeros(N, dtype=int)
if periods > 0:
indexer[periods:] = np.arange(N - periods)
else:
indexer[:periods] = np.arange(-periods, N)
new_values = self.values.to_dense().take(indexer)
# convert integer to float if necessary. need to do a lot more than
# that, handle boolean etc also
new_values, fill_value = com._maybe_upcast(new_values)
if periods > 0:
new_values[:periods] = fill_value
else:
new_values[periods:] = fill_value
return [self.make_block_same_class(new_values,
placement=self.mgr_locs)]
def to_sparse(self, fill_value=None, kind='block'):
"""
Convert to SparseDataFrame
Parameters
----------
fill_value : float, default NaN
kind : {'block', 'integer'}
Returns
-------
y : SparseDataFrame
"""
from pandas.core.sparse import SparseDataFrame
return SparseDataFrame(self._series, index=self.index,
columns=self.columns, default_kind=kind,
default_fill_value=fill_value)
def __getitem__(self, key):
# shortcut if we are an actual column
is_mi_columns = isinstance(self.columns, MultiIndex)
try:
if key in self.columns and not is_mi_columns:
return self._getitem_column(key)
except:
pass
# see if we can slice the rows
indexer = convert_to_index_sliceable(self, key)
if indexer is not None:
return self._getitem_slice(indexer)
if isinstance(key, (Series, np.ndarray, Index, list)):
# either boolean or fancy integer index
return self._getitem_array(key)
elif isinstance(key, DataFrame):
return self._getitem_frame(key)
elif is_mi_columns:
return self._getitem_multilevel(key)
else:
return self._getitem_column(key)
def test_grouper_multilevel_freq(self):
# GH 7885
# with level and freq specified in a pd.Grouper
from datetime import date, timedelta
d0 = date.today() - timedelta(days=14)
dates = date_range(d0, date.today())
date_index = pd.MultiIndex.from_product(
[dates, dates], names=['foo', 'bar'])
df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index)
# Check string level
expected = df.reset_index().groupby([pd.Grouper(
key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum()
# reset index changes columns dtype to object
expected.columns = pd.Index([0], dtype='int64')
result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(
level='bar', freq='W')]).sum()
assert_frame_equal(result, expected)
# Check integer level
result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper(
level=1, freq='W')]).sum()
assert_frame_equal(result, expected)
def test_floats(self):
arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
result = lib.infer_dtype(arr)
self.assertEqual(result, 'floating')
arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
dtype='O')
result = lib.infer_dtype(arr)
self.assertEqual(result, 'mixed-integer')
arr = np.array([1, 2, 3, 4, 5], dtype='f4')
result = lib.infer_dtype(arr)
self.assertEqual(result, 'floating')
arr = np.array([1, 2, 3, 4, 5], dtype='f8')
result = lib.infer_dtype(arr)
self.assertEqual(result, 'floating')
def test_fancy_setitem_int_labels(self):
# integer index defers to label-based indexing
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
tmp = df.copy()
exp = df.copy()
tmp.ix[[0, 2, 4]] = 5
exp.values[:3] = 5
assert_frame_equal(tmp, exp)
tmp = df.copy()
exp = df.copy()
tmp.ix[6] = 5
exp.values[3] = 5
assert_frame_equal(tmp, exp)
tmp = df.copy()
exp = df.copy()
tmp.ix[:, 2] = 5
# tmp correctly sets the dtype
# so match the exp way
exp[2] = 5
assert_frame_equal(tmp, exp)
def test_default_type_conversion(self):
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
"FloatCol loaded with incorrect type")
self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
"IntCol loaded with incorrect type")
self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_),
"BoolCol loaded with incorrect type")
# Int column with NA values stays as float
self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
"IntColWithNull loaded with incorrect type")
# Bool column with NA values becomes object
self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object),
"BoolColWithNull loaded with incorrect type")
def test_default_type_conversion(self):
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
"FloatCol loaded with incorrect type")
self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
"IntCol loaded with incorrect type")
# sqlite has no boolean type, so integer type is returned
self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
"BoolCol loaded with incorrect type")
# Int column with NA values stays as float
self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
"IntColWithNull loaded with incorrect type")
# Non-native Bool column with NA values stays as float
self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
"BoolColWithNull loaded with incorrect type")
def test_default_type_conversion(self):
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
"FloatCol loaded with incorrect type")
self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
"IntCol loaded with incorrect type")
# MySQL has no real BOOL type (it's an alias for TINYINT)
self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
"BoolCol loaded with incorrect type")
# Int column with NA values stays as float
self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
"IntColWithNull loaded with incorrect type")
# Bool column with NA = int column with NA values => becomes float
self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
"BoolColWithNull loaded with incorrect type")
def __init__(self, f, colspecs, delimiter, comment):
self.f = f
self.buffer = None
self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t '
self.comment = comment
if colspecs == 'infer':
self.colspecs = self.detect_colspecs()
else:
self.colspecs = colspecs
if not isinstance(self.colspecs, (tuple, list)):
raise TypeError("column specifications must be a list or tuple, "
"input was a %r" % type(colspecs).__name__)
for colspec in self.colspecs:
if not (isinstance(colspec, (tuple, list)) and
len(colspec) == 2 and
isinstance(colspec[0], (int, np.integer, type(None))) and
isinstance(colspec[1], (int, np.integer, type(None)))):
raise TypeError('Each column specification must be '
'2 element tuple or list of integers')
def set_atom_categorical(self, block, items, info=None, values=None):
# currently only supports a 1-D categorical
# in a 1-D block
values = block.values
codes = values.codes
self.kind = 'integer'
self.dtype = codes.dtype.name
if values.ndim > 1:
raise NotImplementedError("only support 1-d categoricals")
if len(items) > 1:
raise NotImplementedError("only support single block categoricals")
# write the codes; must be in a block shape
self.ordered = values.ordered
self.typ = self.get_atom_data(block, kind=codes.dtype.name)
self.set_data(_block_shape(codes))
# write the categories
self.meta = 'category'
self.set_metadata(block.values.categories)
# update the info
self.update_info(info)
def _handle_date_column(col, format=None):
if isinstance(format, dict):
return to_datetime(col, errors='ignore', **format)
else:
if format in ['D', 's', 'ms', 'us', 'ns']:
return to_datetime(col, errors='coerce', unit=format, utc=True)
elif (issubclass(col.dtype.type, np.floating) or
issubclass(col.dtype.type, np.integer)):
# parse dates as timestamp
format = 's' if format is None else format
return to_datetime(col, errors='coerce', unit=format, utc=True)
elif com.is_datetime64tz_dtype(col):
# coerce to UTC timezone
# GH11216
return (to_datetime(col, errors='coerce')
.astype('datetime64[ns, UTC]'))
else:
return to_datetime(col, errors='coerce', format=format, utc=True)
def _get_dtype(self, sqltype):
from sqlalchemy.types import (Integer, Float, Boolean, DateTime,
Date, TIMESTAMP)
if isinstance(sqltype, Float):
return float
elif isinstance(sqltype, Integer):
# TODO: Refine integer size.
return np.dtype('int64')
elif isinstance(sqltype, TIMESTAMP):
# we have a timezone capable type
if not sqltype.timezone:
return datetime
return DatetimeTZDtype
elif isinstance(sqltype, DateTime):
# Caution: np.datetime64 is also a subclass of np.number.
return datetime
elif isinstance(sqltype, Date):
return date
elif isinstance(sqltype, Boolean):
return bool
return object
def _sql_type_name(self, col):
dtype = self.dtype or {}
if col.name in dtype:
return dtype[col.name]
col_type = self._get_notnull_col_dtype(col)
if col_type == 'timedelta64':
warnings.warn("the 'timedelta' type is not supported, and will be "
"written as integer values (ns frequency) to the "
"database.", UserWarning, stacklevel=8)
col_type = "integer"
elif col_type == "datetime64":
col_type = "datetime"
elif col_type == "empty":
col_type = "string"
elif col_type == "complex":
raise ValueError('Complex datatypes not supported')
if col_type not in _SQL_TYPES:
col_type = "string"
return _SQL_TYPES[col_type][self.pd_sql.flavor]
def __getitem__(self, key):
"""
"""
try:
return self._get_val_at(self.index.get_loc(key))
except KeyError:
if isinstance(key, (int, np.integer)):
return self._get_val_at(key)
raise Exception('Requested index not in this series!')
except TypeError:
# Could not hash item, must be array-like?
pass
# is there a case where this would NOT be an ndarray?
# need to find an example, I took out the case for now
key = _values_from_object(key)
dataSlice = self.values[key]
new_index = Index(self.index.view(ndarray)[key])
return self._constructor(dataSlice, index=new_index).__finalize__(self)
def test_allclose(self):
# Tests allclose on arrays
a = np.random.rand(10)
b = a + np.random.rand(10) * 1e-8
self.assertTrue(allclose(a, b))
# Test allclose w/ infs
a[0] = np.inf
self.assertTrue(not allclose(a, b))
b[0] = np.inf
self.assertTrue(allclose(a, b))
# Test all close w/ masked
a = masked_array(a)
a[-1] = masked
self.assertTrue(allclose(a, b, masked_equal=True))
self.assertTrue(not allclose(a, b, masked_equal=False))
# Test comparison w/ scalar
a *= 1e-8
a[0] = 0
self.assertTrue(allclose(a, 0, masked_equal=True))
# Test that the function works for MIN_INT integer typed arrays
a = masked_array([np.iinfo(np.int_).min], dtype=np.int_)
self.assertTrue(allclose(a, a))
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
Parameters
----------
seed : None | int | instance of RandomState
If seed is None, return the RandomState singleton used by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, (numbers.Integral, np.integer)):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
' instance' % seed)
def guessCfgType( value ):
# For guessing the data type (bool, integer, float, or string only) from ConfigParser
if value.lower() == 'true':
return True
if value.lower() == 'false':
return False
try:
value = np.int( value )
return value
except:
pass
try:
value = np.float32( value )
return value
except:
pass
return value
def check_window_length(window_length):
"""
Ensure the window length provided to a transform is valid.
"""
if window_length is None:
raise InvalidWindowLength("window_length must be provided")
if not isinstance(window_length, Integral):
raise InvalidWindowLength(
"window_length must be an integer-like number")
if window_length == 0:
raise InvalidWindowLength("window_length must be non-zero")
if window_length < 0:
raise InvalidWindowLength("window_length must be positive")
def _extract_field_names(self, event):
# extract field names from sids (price, volume etc), make sure
# every sid has the same fields.
sid_keys = []
for sid in itervalues(event.data):
keys = set([name for name, value in sid.items()
if isinstance(value,
(int,
float,
numpy.integer,
numpy.float,
numpy.long))
])
sid_keys.append(keys)
# with CUSTOM data events, there may be different fields
# per sid. So the allowable keys are the union of all events.
union = set.union(*sid_keys)
unwanted_fields = {
'portfolio',
'sid',
'dt',
'type',
'source_id',
'_initial_len',
}
return union - unwanted_fields
def RATWriteArray(rat, array, field, start=0):
"""
Pure Python implementation of writing a chunk of the RAT
from a numpy array. Type of array is coerced to one of the types
(int, double, string) supported. Called from RasterAttributeTable.WriteArray
"""
if array is None:
raise ValueError("Expected array of dim 1")
# if not the array type convert it to handle lists etc
if not isinstance(array, numpy.ndarray):
array = numpy.array(array)
if array.ndim != 1:
raise ValueError("Expected array of dim 1")
if (start + array.size) > rat.GetRowCount():
raise ValueError("Array too big to fit into RAT from start position")
if numpy.issubdtype(array.dtype, numpy.integer):
# is some type of integer - coerce to standard int
# TODO: must check this is fine on all platforms
# confusingly numpy.int 64 bit even if native type 32 bit
array = array.astype(numpy.int32)
elif numpy.issubdtype(array.dtype, numpy.floating):
# is some type of floating point - coerce to double
array = array.astype(numpy.double)
elif numpy.issubdtype(array.dtype, numpy.character):
# cast away any kind of Unicode etc
array = array.astype(numpy.character)
else:
raise ValueError("Array not of a supported type (integer, double or string)")
return RATValuesIONumPyWrite(rat, field, start, array)
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, np.floating):
return float(obj)
else:
return super(MyEncoder, self).default(obj)
def writeHDF5Meta(self, root, name, data, **dsOpts):
if isinstance(data, np.ndarray):
dsOpts['maxshape'] = (None,) + data.shape[1:]
root.create_dataset(name, data=data, **dsOpts)
elif isinstance(data, list) or isinstance(data, tuple):
gr = root.create_group(name)
if isinstance(data, list):
gr.attrs['_metaType_'] = 'list'
else:
gr.attrs['_metaType_'] = 'tuple'
#n = int(np.log10(len(data))) + 1
for i in range(len(data)):
self.writeHDF5Meta(gr, str(i), data[i], **dsOpts)
elif isinstance(data, dict):
gr = root.create_group(name)
gr.attrs['_metaType_'] = 'dict'
for k, v in data.items():
self.writeHDF5Meta(gr, k, v, **dsOpts)
elif isinstance(data, int) or isinstance(data, float) or isinstance(data, np.integer) or isinstance(data, np.floating):
root.attrs[name] = data
else:
try: ## strings, bools, None are stored as repr() strings
root.attrs[name] = repr(data)
except:
print("Can not store meta data of type '%s' in HDF5. (key is '%s')" % (str(type(data)), str(name)))
raise
def writeHDF5Meta(self, root, name, data, **dsOpts):
if isinstance(data, np.ndarray):
dsOpts['maxshape'] = (None,) + data.shape[1:]
root.create_dataset(name, data=data, **dsOpts)
elif isinstance(data, list) or isinstance(data, tuple):
gr = root.create_group(name)
if isinstance(data, list):
gr.attrs['_metaType_'] = 'list'
else:
gr.attrs['_metaType_'] = 'tuple'
#n = int(np.log10(len(data))) + 1
for i in range(len(data)):
self.writeHDF5Meta(gr, str(i), data[i], **dsOpts)
elif isinstance(data, dict):
gr = root.create_group(name)
gr.attrs['_metaType_'] = 'dict'
for k, v in data.items():
self.writeHDF5Meta(gr, k, v, **dsOpts)
elif isinstance(data, int) or isinstance(data, float) or isinstance(data, np.integer) or isinstance(data, np.floating):
root.attrs[name] = data
else:
try: ## strings, bools, None are stored as repr() strings
root.attrs[name] = repr(data)
except:
print("Can not store meta data of type '%s' in HDF5. (key is '%s')" % (str(type(data)), str(name)))
raise
def repeat(self, nr_sites):
"""Construct a longer MP-POVM by repetition
The resulting POVM will have length `nr_sites`. If `nr_sites`
is not an integer multiple of `len(self)`, `self` must
factorize (have leg dimension one) at the position where it
will be cut. For example, consider the tensor product MP-POVM
of Pauli X and Pauli Y. Calling `repeat(nr_sites=5)` will
construct the tensor product POVM XYXYX:
>>> import mpnum as mp
>>> import mpnum.povm as mpp
>>> x, y = (mpp.MPPovm.from_local_povm(lp(3), 1) for lp in
... (mpp.x_povm, mpp.y_povm))
>>> xy = mp.chain([x, y])
>>> xyxyx = mp.chain([x, y, x, y, x])
>>> mp.norm(xyxyx - xy.repeat(5)) <= 1e-10
True
"""
n_repeat, n_last = nr_sites // len(self), nr_sites % len(self)
if n_last > 0:
assert self.ranks[n_last - 1] == 1, \
"Partial repetition requires factorizing MP-POVM"
return mp.chain([self] * n_repeat
+ ([MPPovm(self.lt[:n_last])] if n_last > 0 else []))
def est_pmf(self, samples, normalize=True, eps=1e-10):
"""Estimate probability mass function from samples
:param np.ndarray samples: `(n_samples, len(self.nsoutdims))`
array of samples
:param bool normalize: True: Return normalized probability
estimates (default). False: Return integer outcome counts.
:returns: Estimated probabilities as ndarray `est_pmf` with
shape `self.nsoutdims`
`n_samples * est_pmf[i1, ..., ik]` provides the number of
occurences of outcome `(i1, ..., ik)` in `samples`.
"""
n_samples = samples.shape[0]
n_out = np.prod(self.nsoutdims)
if samples.ndim > 1:
samples = self.pack_samples(samples)
counts = np.bincount(samples, minlength=n_out)
assert counts.shape == (n_out,)
counts = counts.reshape(self.nsoutdims)
assert counts.sum() == n_samples
if normalize:
return counts / n_samples
else:
return counts