Python numpy 模块,object_() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.object_()。
def categorize_columns(self, df):
"""Categorize columns of dataframe by data type
:param df: input (pandas) data frame
"""
# check presence and data type of requested columns
# sort columns into numerical, timestamp and category based
for c in self.columns:
for col in c:
if col not in df.columns:
raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key))
dt = self.get_data_type(df, col)
if col not in self.var_dtype:
self.var_dtype[col] = dt.type
if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_):
self.var_dtype[col] = str
if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)):
raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt)))
is_number = isinstance(dt.type(), np.number)
is_timestamp = isinstance(dt.type(), np.datetime64)
colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols
if col not in colset:
colset.append(col)
self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def batch_loader(self, rnd_gen=np.random, shuffle=True):
"""load_mbs yields a new minibatch at each iteration"""
batchsize = self.batchsize
inds = np.arange(self.n_samples)
if shuffle:
rnd_gen.shuffle(inds)
n_mbs = np.int(np.ceil(self.n_samples / batchsize))
x = np.zeros(self.X_shape, np.float32)
y = np.zeros(self.y_shape, np.float32)
ids = np.empty((batchsize,), np.object_)
for m in range(n_mbs):
start = m * batchsize
end = (m + 1) * batchsize
if end > self.n_samples:
end = self.n_samples
mb_slice = slice(start, end)
x[:end - start, :] = self.x[inds[mb_slice], :]
y[:end - start, :] = self.y[inds[mb_slice], :]
ids[:end - start] = self.ids[inds[mb_slice]]
yield dict(X=x, y=y, ID=ids)
def batch_loader(self, rnd_gen=np.random, shuffle=True):
"""load_mbs yields a new minibatch at each iteration"""
batchsize = self.batchsize
inds = np.arange(self.n_samples)
if shuffle:
rnd_gen.shuffle(inds)
n_mbs = np.int(np.ceil(self.n_samples / batchsize))
x = np.zeros(self.X_shape, np.float32)
y = np.zeros(self.y_shape, np.float32)
ids = np.empty((batchsize,), np.object_)
for m in range(n_mbs):
start = m * batchsize
end = (m + 1) * batchsize
if end > self.n_samples:
end = self.n_samples
mb_slice = slice(start, end)
x[:end - start, :] = self.x[inds[mb_slice], :]
y[:end - start, :] = self.y[inds[mb_slice], :]
ids[:end - start] = self.ids[inds[mb_slice]]
yield dict(X=x, y=y, ID=ids)
def pad_1d(values, limit=None, mask=None, dtype=None):
if dtype is None:
dtype = values.dtype
_method = None
if com.is_float_dtype(values):
_method = getattr(algos, 'pad_inplace_%s' % dtype.name, None)
elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values):
_method = _pad_1d_datetime
elif com.is_integer_dtype(values):
values = com._ensure_float64(values)
_method = algos.pad_inplace_float64
elif values.dtype == np.object_:
_method = algos.pad_inplace_object
if _method is None:
raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name)
if mask is None:
mask = com.isnull(values)
mask = mask.view(np.uint8)
_method(values, mask, limit=limit)
return values
def backfill_1d(values, limit=None, mask=None, dtype=None):
if dtype is None:
dtype = values.dtype
_method = None
if com.is_float_dtype(values):
_method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None)
elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values):
_method = _backfill_1d_datetime
elif com.is_integer_dtype(values):
values = com._ensure_float64(values)
_method = algos.backfill_inplace_float64
elif values.dtype == np.object_:
_method = algos.backfill_inplace_object
if _method is None:
raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name)
if mask is None:
mask = com.isnull(values)
mask = mask.view(np.uint8)
_method(values, mask, limit=limit)
return values
def is_bool_indexer(key):
if isinstance(key, (ABCSeries, np.ndarray)):
if key.dtype == np.object_:
key = np.asarray(_values_from_object(key))
if not lib.is_bool_array(key):
if isnull(key).any():
raise ValueError('cannot index with vector containing '
'NA / NaN values')
return False
return True
elif key.dtype == np.bool_:
return True
elif isinstance(key, list):
try:
arr = np.asarray(key)
return arr.dtype == np.bool_ and len(arr) == len(key)
except TypeError: # pragma: no cover
return False
return False
def test_fromValue(self):
nans = Series(np.NaN, index=self.ts.index)
self.assertEqual(nans.dtype, np.float_)
self.assertEqual(len(nans), len(self.ts))
strings = Series('foo', index=self.ts.index)
self.assertEqual(strings.dtype, np.object_)
self.assertEqual(len(strings), len(self.ts))
d = datetime.now()
dates = Series(d, index=self.ts.index)
self.assertEqual(dates.dtype, 'M8[ns]')
self.assertEqual(len(dates), len(self.ts))
# GH12336
# Test construction of categorical series from value
categorical = Series(0, index=self.ts.index, dtype="category")
expected = Series(0, index=self.ts.index).astype("category")
self.assertEqual(categorical.dtype, 'category')
self.assertEqual(len(categorical), len(self.ts))
tm.assert_series_equal(categorical, expected)
def test_astype_datetimes(self):
import pandas.tslib as tslib
s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))
s = s.astype('O')
self.assertEqual(s.dtype, np.object_)
s = Series([datetime(2001, 1, 2, 0, 0)])
s = s.astype('O')
self.assertEqual(s.dtype, np.object_)
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
s[1] = np.nan
self.assertEqual(s.dtype, 'M8[ns]')
s = s.astype('O')
self.assertEqual(s.dtype, np.object_)
def test_convert_objects_leave_decimal_alone(self):
from decimal import Decimal
s = Series(lrange(5))
labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O')
def convert_fast(x):
return Decimal(str(x.mean()))
def convert_force_pure(x):
# base will be length 0
assert (len(x.base) > 0)
return Decimal(str(x.mean()))
grouped = s.groupby(labels)
result = grouped.agg(convert_fast)
self.assertEqual(result.dtype, np.object_)
tm.assertIsInstance(result[0], Decimal)
result = grouped.agg(convert_force_pure)
self.assertEqual(result.dtype, np.object_)
tm.assertIsInstance(result[0], Decimal)
def test_set_value_resize(self):
res = self.frame.set_value('foobar', 'B', 0)
self.assertIs(res, self.frame)
self.assertEqual(res.index[-1], 'foobar')
self.assertEqual(res.get_value('foobar', 'B'), 0)
self.frame.loc['foobar', 'qux'] = 0
self.assertEqual(self.frame.get_value('foobar', 'qux'), 0)
res = self.frame.copy()
res3 = res.set_value('foobar', 'baz', 'sam')
self.assertEqual(res3['baz'].dtype, np.object_)
res = self.frame.copy()
res3 = res.set_value('foobar', 'baz', True)
self.assertEqual(res3['baz'].dtype, np.object_)
res = self.frame.copy()
res3 = res.set_value('foobar', 'baz', 5)
self.assertTrue(com.is_float_dtype(res3['baz']))
self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all())
self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
def test_stat_operators_attempt_obj_array(self):
data = {
'a': [-0.00049987540199591344, -0.0016467257772919831,
0.00067695870775883013],
'b': [-0, -0, 0.0],
'c': [0.00031111847529610595, 0.0014902627951905339,
-0.00094099200035979691]
}
df1 = DataFrame(data, index=['foo', 'bar', 'baz'],
dtype='O')
methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']
# GH #676
df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3],
2: [np.nan, 4]}, dtype=object)
for df in [df1, df2]:
for meth in methods:
self.assertEqual(df.values.dtype, np.object_)
result = getattr(df, meth)(1)
expected = getattr(df.astype('f8'), meth)(1)
if not tm._incompat_bottleneck_version(meth):
assert_series_equal(result, expected)
def test_constructor_dict_cast(self):
# cast float tests
test_data = {
'A': {'1': 1, '2': 2},
'B': {'1': '1', '2': '2', '3': '3'},
}
frame = DataFrame(test_data, dtype=float)
self.assertEqual(len(frame), 3)
self.assertEqual(frame['B'].dtype, np.float64)
self.assertEqual(frame['A'].dtype, np.float64)
frame = DataFrame(test_data)
self.assertEqual(len(frame), 3)
self.assertEqual(frame['B'].dtype, np.object_)
self.assertEqual(frame['A'].dtype, np.float64)
# can't cast to float
test_data = {
'A': dict(zip(range(20), tm.makeStringIndex(20))),
'B': dict(zip(range(15), randn(15)))
}
frame = DataFrame(test_data, dtype=float)
self.assertEqual(len(frame), 20)
self.assertEqual(frame['A'].dtype, np.object_)
self.assertEqual(frame['B'].dtype, np.float64)
def test_transpose(self):
frame = self.frame
dft = frame.T
for idx, series in compat.iteritems(dft):
for col, value in compat.iteritems(series):
if np.isnan(value):
self.assertTrue(np.isnan(frame[col][idx]))
else:
self.assertEqual(value, frame[col][idx])
# mixed type
index, data = tm.getMixedTypeDict()
mixed = DataFrame(data, index=index)
mixed_T = mixed.T
for col, s in compat.iteritems(mixed_T):
self.assertEqual(s.dtype, np.object_)
def test_nan_handling(self):
# Nans are represented as -1 in labels
s = Series(Categorical(["a", "b", np.nan, "a"]))
self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"]))
self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0]))
# If categories have nan included, the label should point to that
# instead
with tm.assert_produces_warning(FutureWarning):
s2 = Series(Categorical(
["a", "b", np.nan, "a"], categories=["a", "b", np.nan]))
self.assert_numpy_array_equal(s2.cat.categories, np.array(
["a", "b", np.nan], dtype=np.object_))
self.assert_numpy_array_equal(s2.values.codes, np.array([0, 1, 2, 0]))
# Changing categories should also make the replaced category np.nan
s3 = Series(Categorical(["a", "b", "c", "a"]))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s3.cat.categories = ["a", "b", np.nan]
self.assert_numpy_array_equal(s3.cat.categories, np.array(
["a", "b", np.nan], dtype=np.object_))
self.assert_numpy_array_equal(s3.values.codes, np.array([0, 1, 2, 0]))
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def reset_minmax(self):
try:
data = self.get_values(sample=True)
color_value = self.color_func(data) if self.color_func is not None else data
if color_value.dtype.type == np.object_:
color_value = color_value[is_number_value(color_value)]
# this is probably broken if we have complex numbers stored as objects but I don't foresee
# this case happening anytime soon.
color_value = color_value.astype(float)
# ignore nan, -inf, inf (setting them to 0 or to very large numbers is not an option)
color_value = color_value[np.isfinite(color_value)]
self.vmin = float(np.min(color_value))
self.vmax = float(np.max(color_value))
self.bgcolor_possible = True
# ValueError for empty arrays, TypeError for object/string arrays
except (TypeError, ValueError):
self.vmin = None
self.vmax = None
self.bgcolor_possible = False
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
"""
if np.issubdtype(a.dtype, np.object_):
if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
return a.astype("string_")
else:
print(type(a[0]))
raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
return a
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
def test_object_array_refcount_self_assign(self, level=rlevel):
# Ticket #711
class VictimObject(object):
deleted = False
def __del__(self):
self.deleted = True
d = VictimObject()
arr = np.zeros(5, dtype=np.object_)
arr[:] = d
del d
arr[:] = arr # refcount of 'd' might hit zero here
assert_(not arr[0].deleted)
arr[:] = arr # trying to induce a segfault by doing it again...
assert_(not arr[0].deleted)
def numpy_to_transform(arr):
from tf import transformations
shape, rest = arr.shape[:-2], arr.shape[-2:]
assert rest == (4,4)
if len(shape) == 0:
trans = transformations.translation_from_matrix(arr)
quat = transformations.quaternion_from_matrix(arr)
return Transform(
translation=Vector3(*trans),
rotation=Quaternion(*quat)
)
else:
res = np.empty(shape, dtype=np.object_)
for idx in np.ndindex(shape):
res[idx] = Transform(
translation=Vector3(*transformations.translation_from_matrix(arr[idx])),
rotation=Quaternion(*transformations.quaternion_from_matrix(arr[idx]))
)
def numpy_to_pose(arr):
from tf import transformations
shape, rest = arr.shape[:-2], arr.shape[-2:]
assert rest == (4,4)
if len(shape) == 0:
trans = transformations.translation_from_matrix(arr)
quat = transformations.quaternion_from_matrix(arr)
return Pose(
position=Vector3(*trans),
orientation=Quaternion(*quat)
)
else:
res = np.empty(shape, dtype=np.object_)
for idx in np.ndindex(shape):
res[idx] = Pose(
position=Vector3(*transformations.translation_from_matrix(arr[idx])),
orientation=Quaternion(*transformations.quaternion_from_matrix(arr[idx]))
)
def initialize(self):
"""Initialize HistogramFillerBase"""
# check basic attribute settings
assert isinstance(self.read_key, str) and len(self.read_key), 'read_key has not been set correctly'
if self.store_key is not None:
assert isinstance(self.store_key, str) and len(self.store_key), 'store_key has not been set to string'
# default histogram creation is at execute(). Storage at finalize is useful for
# looping over datasets.
if self.store_at_finalize:
self.log().debug('Storing (and possible post-processing) at finalize, not execute')
# check that columns are set correctly.
for i, c in enumerate(self.columns):
if isinstance(c, str):
self.columns[i] = [c]
if not isinstance(self.columns[i], list):
raise TypeError('columns "{}" needs to be a string or list of strings'.format(self.columns[i]))
# check for supported data types
for k in self.var_dtype.keys():
try:
self.var_dtype[k] = np.dtype(self.var_dtype[k]).type
if self.var_dtype[k] is np.string_ or self.var_dtype[k] is np.object_:
self.var_dtype[k] = str
except BaseException:
raise RuntimeError('unknown assigned datatype to variable "{}"'.format(k))
return StatusCode.Success
def initialize(self):
"""Initialize FixPandasDataFrame"""
self.check_arg_types(read_key=str, store_key=str)
self.check_arg_types(recurse=True, allow_none=True, original_columns=str)
self.check_arg_vals('read_key')
if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool):
raise AssertionError('cleanup_string_columns should be a list of column names or boolean.')
if self.read_key == self.store_key:
self.inplace = True
self.log().debug('store_key equals read_key; inplace has been set to "True"')
if self.inplace:
self.store_key = self.read_key
self.log().debug('store_key has been set to read_key "%s"', self.store_key)
if not self.store_key:
self.store_key = self.read_key + '_fix'
self.log().debug('store_key has been set to "%s"', self.store_key)
# check data types
for k in self.var_dtype.keys():
if k not in self.contaminated_columns:
self.contaminated_columns.append(k)
try:
# convert to consistent types
dt = np.dtype(self.var_dtype[k]).type
if dt is np.str_ or dt is np.object_:
dt = str
self.var_dtype[k] = dt
except BaseException:
raise TypeError('unknown assigned datatype to variable "%s"' % k)
return StatusCode.Success
def test_unpickle_dtype_with_object(self,level=rlevel):
# Implemented in r2840
dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')])
f = BytesIO()
pickle.dump(dt, f)
f.seek(0)
dt_ = pickle.load(f)
f.close()
assert_equal(dt, dt_)
def test_mem_array_creation_invalid_specification(self,level=rlevel):
# Ticket #196
dt = np.dtype([('x', int), ('y', np.object_)])
# Wrong way
self.assertRaises(ValueError, np.array, [1, 'object'], dt)
# Correct way
np.array([(1, 'object')], dt)
def test_for_object_scalar_creation(self, level=rlevel):
# Ticket #816
a = np.object_()
b = np.object_(3)
b2 = np.object_(3.0)
c = np.object_([4, 5])
d = np.object_([None, {}, []])
assert_(a is None)
assert_(type(b) is int)
assert_(type(b2) is float)
assert_(type(c) is np.ndarray)
assert_(c.dtype == object)
assert_(d.dtype == object)
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def test_split(self):
A = self.A.split(asbytes('3'))
tgt = asbytes_nested([
[[' abc '], ['']],
[['12', '45'], ['MixedCase']],
[['12', ' \t ', '45 \x00 '], ['UPPER']]])
assert_(issubclass(A.dtype.type, np.object_))
assert_equal(A.tolist(), tgt)
def test_splitlines(self):
A = np.char.array(['abc\nfds\nwer']).splitlines()
assert_(issubclass(A.dtype.type, np.object_))
assert_(A.shape == (1,))
assert_(len(A[0]) == 3)
def test_converters_cornercases(self):
# Test the conversion to datetime.
converter = {
'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')}
data = TextIO('2009-02-03 12:00:00Z, 72214.0')
test = np.ndfromtxt(data, delimiter=',', dtype=None,
names=['date', 'stid'], converters=converter)
control = np.array((datetime(2009, 2, 3), 72214.),
dtype=[('date', np.object_), ('stid', float)])
assert_equal(test, control)
def test_dtype_error(self):
for f in self.nanfuncs:
for dtype in [np.bool_, np.int_, np.object_]:
assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype)
def test_out_dtype_error(self):
for f in self.nanfuncs:
for dtype in [np.bool_, np.int_, np.object_]:
out = np.empty(_ndat.shape[0], dtype=dtype)
assert_raises(TypeError, f, _ndat, axis=1, out=out)
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
# # Splits a column into multiple columns
# dataframe : pandas dataframe to be processed
# split_col : chr string of the column name to be split
# split_char : chr to split the col on
# new_colnames : list of new name for the columns
# delete_old : logical True / False, remove original column?
# ~~~~~~~~~~~~~~~~ #
import pandas as pd
import numpy as np
# pl.my_debugger(globals().copy())
# my_debugger(locals().copy())
# save the split column as a separate object
new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
# if all values were NaN, no split occured, only one col exists still
if len(new_cols.columns) < len(new_colnames):
# create the missing cols, fill with NaN
for i in range(len(new_cols.columns), len(new_colnames)):
new_cols[new_colnames[i]] = np.nan
# rename the cols
new_cols.columns = new_colnames
# remove the original column from the df
if delete_old is True:
del dataframe[split_col]
# merge with df
new_df = dataframe.join(new_cols)
return new_df
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
# # Splits a column into multiple columns
# dataframe : pandas dataframe to be processed
# split_col : chr string of the column name to be split
# split_char : chr to split the col on
# new_colnames : list of new name for the columns
# delete_old : logical True / False, remove original column?
# ~~~~~~~~~~~~~~~~ #
import pandas as pd
import numpy as np
# pl.my_debugger(globals().copy())
# my_debugger(locals().copy())
# save the split column as a separate object
new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
# if all values were NaN, no split occured, only one col exists still
if len(new_cols.columns) < len(new_colnames):
# create the missing cols, fill with NaN
for i in range(len(new_cols.columns), len(new_colnames)):
new_cols[new_colnames[i]] = np.nan
# rename the cols
new_cols.columns = new_colnames
# remove the original column from the df
if delete_old is True:
del dataframe[split_col]
# merge with df
new_df = dataframe.join(new_cols)
return new_df
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
# # Splits a column into multiple columns
# dataframe : pandas dataframe to be processed
# split_col : chr string of the column name to be split
# split_char : chr to split the col on
# new_colnames : list of new name for the columns
# delete_old : logical True / False, remove original column?
# ~~~~~~~~~~~~~~~~ #
import pandas as pd
import numpy as np
# pl.my_debugger(globals().copy())
# my_debugger(locals().copy())
# save the split column as a separate object
new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
# if all values were NaN, no split occured, only one col exists still
if len(new_cols.columns) < len(new_colnames):
# create the missing cols, fill with NaN
for i in range(len(new_cols.columns), len(new_colnames)):
new_cols[new_colnames[i]] = np.nan
# rename the cols
new_cols.columns = new_colnames
# remove the original column from the df
if delete_old is True:
del dataframe[split_col]
# merge with df
new_df = dataframe.join(new_cols)
return new_df
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
# # Splits a column into multiple columns
# dataframe : pandas dataframe to be processed
# split_col : chr string of the column name to be split
# split_char : chr to split the col on
# new_colnames : list of new name for the columns
# delete_old : logical True / False, remove original column?
# ~~~~~~~~~~~~~~~~ #
import pandas as pd
import numpy as np
# pl.my_debugger(globals().copy())
# my_debugger(locals().copy())
# save the split column as a separate object
new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
# if all values were NaN, no split occured, only one col exists still
if len(new_cols.columns) < len(new_colnames):
# create the missing cols, fill with NaN
for i in range(len(new_cols.columns), len(new_colnames)):
new_cols[new_colnames[i]] = np.nan
# rename the cols
new_cols.columns = new_colnames
# remove the original column from the df
if delete_old is True:
del dataframe[split_col]
# merge with df
new_df = dataframe.join(new_cols)
return new_df
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
# # Splits a column into multiple columns
# dataframe : pandas dataframe to be processed
# split_col : chr string of the column name to be split
# split_char : chr to split the col on
# new_colnames : list of new name for the columns
# delete_old : logical True / False, remove original column?
# ~~~~~~~~~~~~~~~~ #
import pandas as pd
import numpy as np
# pl.my_debugger(globals().copy())
# my_debugger(locals().copy())
# save the split column as a separate object
new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
# if all values were NaN, no split occured, only one col exists still
if len(new_cols.columns) < len(new_colnames):
# create the missing cols, fill with NaN
for i in range(len(new_cols.columns), len(new_colnames)):
new_cols[new_colnames[i]] = np.nan
# rename the cols
new_cols.columns = new_colnames
# remove the original column from the df
if delete_old is True:
del dataframe[split_col]
# merge with df
new_df = dataframe.join(new_cols)
return new_df
def test_unpickle_dtype_with_object(self,level=rlevel):
# Implemented in r2840
dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')])
f = BytesIO()
pickle.dump(dt, f)
f.seek(0)
dt_ = pickle.load(f)
f.close()
assert_equal(dt, dt_)
def test_mem_array_creation_invalid_specification(self,level=rlevel):
# Ticket #196
dt = np.dtype([('x', int), ('y', np.object_)])
# Wrong way
self.assertRaises(ValueError, np.array, [1, 'object'], dt)
# Correct way
np.array([(1, 'object')], dt)
def test_for_object_scalar_creation(self, level=rlevel):
# Ticket #816
a = np.object_()
b = np.object_(3)
b2 = np.object_(3.0)
c = np.object_([4, 5])
d = np.object_([None, {}, []])
assert_(a is None)
assert_(type(b) is int)
assert_(type(b2) is float)
assert_(type(c) is np.ndarray)
assert_(c.dtype == object)
assert_(d.dtype == object)
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def test_split(self):
A = self.A.split(asbytes('3'))
tgt = asbytes_nested([
[[' abc '], ['']],
[['12', '45'], ['MixedCase']],
[['12', ' \t ', '45 \x00 '], ['UPPER']]])
assert_(issubclass(A.dtype.type, np.object_))
assert_equal(A.tolist(), tgt)
def test_splitlines(self):
A = np.char.array(['abc\nfds\nwer']).splitlines()
assert_(issubclass(A.dtype.type, np.object_))
assert_(A.shape == (1,))
assert_(len(A[0]) == 3)
def test_converters_cornercases(self):
# Test the conversion to datetime.
converter = {
'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')}
data = TextIO('2009-02-03 12:00:00Z, 72214.0')
test = np.ndfromtxt(data, delimiter=',', dtype=None,
names=['date', 'stid'], converters=converter)
control = np.array((datetime(2009, 2, 3), 72214.),
dtype=[('date', np.object_), ('stid', float)])
assert_equal(test, control)
def test_dtype_error(self):
for f in self.nanfuncs:
for dtype in [np.bool_, np.int_, np.object_]:
assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype)