Python numpy 模块,object() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.object()。
def write_data_frame(fn, df):
''' Write the pandas dataframe object to an HDF5 file. Each column is written as a single 1D dataset at the top
level of the HDF5 file, using the native pandas datatype'''
# Always write a fresh file -- the 'w' argument to h5py.File is supposed to truncate an existing file, but it doesn't appear to work correctly
if os.path.exists(fn):
os.remove(fn)
f = h5py.File(fn, "w")
# To preserve column order, write columns to an attribute
column_names = np.array(list(df.columns))
f.attrs.create("column_names", column_names)
for col in df.columns:
write_data_column(f, df[col])
f.close()
def append_data_frame(fn, df):
''' Write the pandas dataframe object to an HDF5 file. Each column is written as a single 1D dataset at the top
level of the HDF5 file, using the native pandas datatype'''
if not os.path.exists(fn):
write_data_frame(fn, df)
return
f = h5py.File(fn, "a")
column_names = f.attrs.get("column_names")
for col_name in column_names:
ds = f[col_name]
col = df[col_name]
append_data_column(ds, col)
f.close()
def compile(self, root_block_like):
"""Compiles a block, and sets it to the root.
Args:
root_block_like: A block or an object that can be converted to a block by
[`td.convert_to_block`](#td.convert_to_block). Must have at least one
output or metric tensor. The output type may not contain any
Sequence or PyObject types.
Returns:
`self`
Raises:
RuntimeError: If `init_loom()` has already been called.
TypeError: If `root_block_like` cannot be converted to a block.
TypeError: If `root_block_like` fails to compile.
TypeError: If `root_block_like` has no output or metric tensors.
TypeError: If `root_block_like` has an invalid output type.
"""
if self.is_loom_initialized:
raise RuntimeError('Loom has already been initialized.')
return self._setup(root_block_like, interactive_mode=False)
def test_wrap(self):
class with_wrap(object):
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr, context):
r = with_wrap()
r.arr = arr
r.context = context
return r
a = with_wrap()
x = ncu.minimum(a, a)
assert_equal(x.arr, np.zeros(1))
func, args, i = x.context
self.assertTrue(func is ncu.minimum)
self.assertEqual(len(args), 2)
assert_equal(args[0], a)
assert_equal(args[1], a)
self.assertEqual(i, 0)
def test_dot_override(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
return
class A(object):
def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
return "A"
class B(object):
def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
return NotImplemented
a = A()
b = B()
c = np.array([[1]])
assert_equal(np.dot(a, b), "A")
assert_equal(c.dot(a), "A")
assert_raises(TypeError, np.dot, b, c)
assert_raises(TypeError, c.dot, b)
def test_ufunc_override_normalize_signature(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
return
# gh-5674
class SomeClass(object):
def __numpy_ufunc__(self, ufunc, method, i, inputs, **kw):
return kw
a = SomeClass()
kw = np.add(a, [1])
assert_('sig' not in kw and 'signature' not in kw)
kw = np.add(a, [1], sig='ii->i')
assert_('sig' not in kw and 'signature' in kw)
assert_equal(kw['signature'], 'ii->i')
kw = np.add(a, [1], signature='ii->i')
assert_('sig' not in kw and 'signature' in kw)
assert_equal(kw['signature'], 'ii->i')
def test_object_logical(self):
a = np.array([3, None, True, False, "test", ""], dtype=object)
assert_equal(np.logical_or(a, None),
np.array([x or None for x in a], dtype=object))
assert_equal(np.logical_or(a, True),
np.array([x or True for x in a], dtype=object))
assert_equal(np.logical_or(a, 12),
np.array([x or 12 for x in a], dtype=object))
assert_equal(np.logical_or(a, "blah"),
np.array([x or "blah" for x in a], dtype=object))
assert_equal(np.logical_and(a, None),
np.array([x and None for x in a], dtype=object))
assert_equal(np.logical_and(a, True),
np.array([x and True for x in a], dtype=object))
assert_equal(np.logical_and(a, 12),
np.array([x and 12 for x in a], dtype=object))
assert_equal(np.logical_and(a, "blah"),
np.array([x and "blah" for x in a], dtype=object))
assert_equal(np.logical_not(a),
np.array([not x for x in a], dtype=object))
assert_equal(np.logical_or.reduce(a), 3)
assert_equal(np.logical_and.reduce(a), None)
def test_dtype_with_object(self):
# Test using an explicit dtype with an object
data = """ 1; 2001-01-01
2; 2002-01-31 """
ndtype = [('idx', int), ('code', np.object)]
func = lambda s: strptime(s.strip(), "%Y-%m-%d")
converters = {1: func}
test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype,
converters=converters)
control = np.array(
[(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
dtype=ndtype)
assert_equal(test, control)
ndtype = [('nest', [('idx', int), ('code', np.object)])]
try:
test = np.genfromtxt(TextIO(data), delimiter=";",
dtype=ndtype, converters=converters)
except NotImplementedError:
pass
else:
errmsg = "Nested dtype involving objects should be supported."
raise AssertionError(errmsg)
def test_gft_using_filename(self):
# Test that we can load data from a filename as well as a file
# object
tgt = np.arange(6).reshape((2, 3))
if sys.version_info[0] >= 3:
# python 3k is known to fail for '\r'
linesep = ('\n', '\r\n')
else:
linesep = ('\n', '\r\n', '\r')
for sep in linesep:
data = '0 1 2' + sep + '3 4 5'
with temppath() as name:
with open(name, 'w') as f:
f.write(data)
res = np.genfromtxt(name)
assert_array_equal(res, tgt)
def test_generic_rank3(self):
"""Test rank 3 array for all dtypes."""
def foo(t):
a = np.empty((4, 2, 3), t)
a.fill(1)
b = a.copy()
c = a.copy()
c.fill(0)
self._test_equal(a, b)
self._test_not_equal(c, b)
# Test numeric types and object
for t in '?bhilqpBHILQPfdgFDG':
foo(t)
# Test strings
for t in ['S1', 'U1']:
foo(t)
def test_TakeTransposeInnerOuter(self):
# Test of take, transpose, inner, outer products
x = arange(24)
y = np.arange(24)
x[5:6] = masked
x = x.reshape(2, 3, 4)
y = y.reshape(2, 3, 4)
assert_equal(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))
assert_equal(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))
assert_equal(np.inner(filled(x, 0), filled(y, 0)),
inner(x, y))
assert_equal(np.outer(filled(x, 0), filled(y, 0)),
outer(x, y))
y = array(['abc', 1, 'def', 2, 3], object)
y[2] = masked
t = take(y, [0, 3, 4])
assert_(t[0] == 'abc')
assert_(t[1] == 2)
assert_(t[2] == 3)
def _parase_fq_factor(code, start, end):
symbol = _code_to_symbol(code)
request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
ct.DOMAINS['vsf'], symbol))
text = urlopen(request, timeout=10).read()
text = text[1:len(text)-1]
text = text.decode('utf-8') if ct.PY3 else text
text = text.replace('{_', '{"')
text = text.replace('total', '"total"')
text = text.replace('data', '"data"')
text = text.replace(':"', '":"')
text = text.replace('",_', '","')
text = text.replace('_', '-')
text = json.loads(text)
df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
df['date'] = df['date'].map(_fun_except) # for null case
if df['date'].dtypes == np.object:
df['date'] = df['date'].astype(np.datetime64)
df = df.drop_duplicates('date')
df['factor'] = df['factor'].astype(float)
return df
def _parase_fq_factor(code, start, end):
symbol = _code_to_symbol(code)
request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
ct.DOMAINS['vsf'], symbol))
text = urlopen(request, timeout=10).read()
text = text[1:len(text)-1]
text = text.replace('{_', '{"')
text = text.replace('total', '"total"')
text = text.replace('data', '"data"')
text = text.replace(':"', '":"')
text = text.replace('",_', '","')
text = text.replace('_', '-')
text = json.loads(text)
df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
df['date'] = df['date'].map(_fun_except) # for null case
if df['date'].dtypes == np.object:
df['date'] = df['date'].astype(np.datetime64)
df = df.drop_duplicates('date')
df['factor'] = df['factor'].astype(float)
return df
def least_square_lagged_regression(u_array):
"""
u_array, q, T+1, p
"""
q,T,p = u_array.shape
T -= 1
# t0, t1 term is t1 regressed on t0
lagged_coef_mat = np.zeros([T,T],dtype = np.object)
for t0 in range(T):
for t1 in range(t0,T):
tmp_coef = np.zeros([p,p])
for i in range(p):
# least square regression u_t+h[i] u_t
tmp_y = u_array[:,t1+1,i]
tmp_x = u_array[:,t0,:]
# (X'X)^{-1} X' Y
tmp_coef[i,:] = np.linalg.inv(tmp_x.T.dot(tmp_x)).dot(tmp_x.T.dot(tmp_y))
lagged_coef_mat[t0,t1] = tmp_coef
return lagged_coef_mat
def redraw(self):
column_index1 = self.combo_box1.GetSelection()
if column_index1 != wx.NOT_FOUND and column_index1 != 0:
# subtract one to remove the neutral selection index
column_index1 -= 1
df = self.df_list_ctrl.get_filtered_df()
if len(df) > 0:
self.axes.clear()
column = df.iloc[:, column_index1]
is_string_col = column.dtype == np.object and isinstance(column.values[0], str)
if is_string_col:
value_counts = column.value_counts().sort_index()
value_counts.plot(kind='bar', ax=self.axes)
else:
self.axes.hist(column.values, bins=100)
self.canvas.draw()
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
id_0 = block_id[0]
id_1 = block_id[1]
block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff
adj_list = np.where(block[:,:] == True)
adj_list = np.vstack(adj_list)
adj_list[0] = adj_list[0]+id_0*len_chunks
adj_list[1] = adj_list[1]+id_1*len_chunks
if adj_list.shape[1] == 0:
adj_list=np.zeros((2,1))
graph = nx.Graph()
edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
graph.add_edges_from(edges)
l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)
return l
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
id_0 = block_id[0]
id_1 = block_id[1]
block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff
adj_list = np.where(block[:,:] == True)
adj_list = np.vstack(adj_list)
adj_list[0] = adj_list[0]+id_0*len_chunks
adj_list[1] = adj_list[1]+id_1*len_chunks
if adj_list.shape[1] == 0:
adj_list=np.zeros((2,1))
graph = nx.Graph()
edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
graph.add_edges_from(edges)
l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)
return l
def get_samples(desired_data):
all_samples = []
for data in desired_data:
temperatures = np.atleast_1d(data['conditions']['T'])
num_configs = np.array(data['solver'].get('sublattice_configurations'), dtype=np.object).shape[0]
site_fractions = data['solver'].get('sublattice_occupancies', [[1]] * num_configs)
site_fraction_product = [reduce(operator.mul, list(itertools.chain(*[np.atleast_1d(f) for f in fracs])), 1)
for fracs in site_fractions]
# TODO: Subtle sorting bug here, if the interactions aren't already in sorted order...
interaction_product = []
for fracs in site_fractions:
interaction_product.append(float(reduce(operator.mul,
[f[0] - f[1] for f in fracs if isinstance(f, list) and len(f) == 2],
1)))
if len(interaction_product) == 0:
interaction_product = [0]
comp_features = zip(site_fraction_product, interaction_product)
all_samples.extend(list(itertools.product(temperatures, comp_features)))
return all_samples
def _shift_reference_state(desired_data, feature_transform, fixed_model):
"""
Shift data to a new common reference state.
"""
total_response = []
for dataset in desired_data:
values = np.asarray(dataset['values'], dtype=np.object)
if dataset['solver'].get('sublattice_occupancies', None) is not None:
value_idx = 0
for occupancy, config in zip(dataset['solver']['sublattice_occupancies'],
dataset['solver']['sublattice_configurations']):
if dataset['output'].endswith('_FORM'):
pass
elif dataset['output'].endswith('_MIX'):
values[..., value_idx] += feature_transform(fixed_model.models['ref'])
pass
else:
raise ValueError('Unknown property to shift: {}'.format(dataset['output']))
value_idx += 1
total_response.append(values.flatten())
return total_response
def get_his_std( data_pixel, rois, max_cts=None):
'''
YG. Dev 16, 2016
Calculate the photon histogram for multi-q by giving
Parameters:
data_pixel: multi-D array, for the photon counts
max_cts: for bin max, bin will be [0,1,2,..., max_cts]
Return:
bins
his
std
'''
if max_cts is None:
max_cts = np.max( data_pixel ) + 1
qind, pixelist = roi.extract_label_indices( rois )
noqs = len( np.unique(qind) )
his= np.zeros( [noqs], dtype=np.object)
std= np.zeros_like( his, dtype=np.object)
kmean = np.zeros_like( his, dtype=np.object)
for qi in range(noqs):
pixelist_qi = np.where( qind == qi+1)[0]
#print(qi, max_cts)
bins, his[qi], std[qi], kmean[qi] = get_his_std_qi( data_pixel[:,pixelist_qi] , max_cts)
return bins, his, std, kmean
def get_his_std_from_pds( spec_pds, his_shapes=None):
'''Y.G.Dec 22, 2016
get spec_his, spec_std from a pandas.dataframe file
Parameters:
spec_pds: pandas.dataframe, contains columns as 'count',
spec_his (as 'his_level_0_q_0'), spec_std (as 'std_level_0_q_0')
his_shapes: the shape of the returned spec_his, if None, shapes = (2, (len(spec_pds.keys)-1)/4) )
Return:
spec_his: array, shape as his_shapes
spec_std, array, shape as his_shapes
'''
spkeys = list( spec_pds.keys() )
if his_shapes is None:
M,N = 2, int( (len(spkeys)-1)/4 )
#print(M,N)
spec_his = np.zeros( [M,N], dtype=np.object)
spec_std = np.zeros( [M,N], dtype=np.object)
for i in range(M):
for j in range(N):
spec_his[i,j] = np.array( spec_pds[ spkeys[1+ i*N + j] ][ ~np.isnan( spec_pds[ spkeys[1+ i*N + j] ] )] )
spec_std[i,j] = np.array( spec_pds[ spkeys[1+ 2*N + i*N + j]][ ~np.isnan( spec_pds[ spkeys[1+ 2*N + i*N + j]] )] )
return spec_his, spec_std
def coords_edges(self, edges):
'''
Returns a list of coordinates head and tail points for all edge in edges
'''
res = np.empty((len(edges)), dtype=object)
for r, e in zip(range(len(edges)), edges):
if e[0] is None:
e[0] = 0
res[r] = self.coords_edge(e)
if len(res[r][0]) != 2:
print 'there is an error with the edges'
import pdb
pdb.set_trace()
# v = np.vectorize(self.coords_edge, otypes=[np.object])
# res = v(edges)
return res
def DFS(self, start, fs=None):
'''
Returns the DFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[0]
to_be_processed = np.delete(to_be_processed, 0)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def prim(self):
'''
Returns Prim's minimum spanninng tree
'''
big_f = set([])
costs = np.empty((self.n), dtype=object)
costs[:] = np.max(self.costs) + 1
big_e = np.empty((self.n), dtype=object)
big_q = set(range(self.n))
tree_edges = np.array([], dtype=object)
while len(big_q) > 0:
v = np.argmin(costs)
big_q.remove(v)
costs[v] = np.Infinity
big_f.add(v)
if big_e[v] is not None:
tree_edges = np.append(tree_edges, None)
tree_edges[-1] = (big_e[v], v)
for i, w in zip(range(len(self.FSs[v])), self.FSs[v]):
if w in big_q and self.FS_costs[v][i] < costs[w]:
costs[w] = self.FS_costs[v][i]
big_e[w] = v
return tree_edges
def connect_graphs(self, sets_orig, edges_orig):
'''
Returns the edges needed to connect unconnected graphs (sets of nodes)
given a set of sets of nodes, select the master_graph (the biggest) one
and search the shortest edges to connect the other sets of nodes
'''
master_graph = max(sets_orig, key=len)
sets = sets_orig.copy()
edges = np.array([], dtype=object)
sets.remove(master_graph)
master_tree = cKDTree(self.nodes[list(master_graph)])
for s in sets:
x = np.array(list(s))
nearests = np.array([master_tree.query(self.nodes[v]) for v in x])
tails = nearests[
nearests[:, 0].argsort()][:, 1][:self.max_neighbours]
heads = x[nearests[:, 0].argsort()][:self.max_neighbours]
for head, tail in zip(heads, tails):
edges = np.append(edges, None)
edges[-1] = (head, tail)
edges = np.append(edges, None)
edges[-1] = (tail, head)
return edges
def coords_edges(self, edges):
'''
Returns a list of coordinates head and tail points for all edge in edges
'''
res = np.empty((len(edges)), dtype=object)
for r, e in zip(range(len(edges)), edges):
if e[0] is None:
e[0] = 0
res[r] = self.coords_edge(e)
if len(res[r][0]) != 2:
print 'there is an error with the edges'
import pdb
pdb.set_trace()
# v = np.vectorize(self.coords_edge, otypes=[np.object])
# res = v(edges)
return res
def DFS(self, start, fs=None):
'''
Returns the DFS tree for the graph starting from start
'''
to_be_processed = np.array([start], dtype=np.int)
known = np.array([], dtype=np.int)
tree = np.array([], dtype=object)
if fs is None:
fs = self.FSs
while len(to_be_processed) > 0:
# pop
current_node = to_be_processed[0]
to_be_processed = np.delete(to_be_processed, 0)
for node in fs[current_node]:
if node not in known:
known = np.append(known, node)
tree = np.append(tree, None)
tree[-1] = (current_node, node)
# push
to_be_processed = np.insert(to_be_processed, 0, node)
return tree
def prim(self):
'''
Returns Prim's minimum spanninng tree
'''
big_f = set([])
costs = np.empty((self.n), dtype=object)
costs[:] = np.max(self.costs) + 1
big_e = np.empty((self.n), dtype=object)
big_q = set(range(self.n))
tree_edges = np.array([], dtype=object)
while len(big_q) > 0:
v = np.argmin(costs)
big_q.remove(v)
costs[v] = np.Infinity
big_f.add(v)
if big_e[v] is not None:
tree_edges = np.append(tree_edges, None)
tree_edges[-1] = (big_e[v], v)
for i, w in zip(range(len(self.FSs[v])), self.FSs[v]):
if w in big_q and self.FS_costs[v][i] < costs[w]:
costs[w] = self.FS_costs[v][i]
big_e[w] = v
return tree_edges
def connect_graphs(self, sets_orig, edges_orig):
'''
Returns the edges needed to connect unconnected graphs (sets of nodes)
given a set of sets of nodes, select the master_graph (the biggest) one
and search the shortest edges to connect the other sets of nodes
'''
master_graph = max(sets_orig, key=len)
sets = sets_orig.copy()
edges = np.array([], dtype=object)
sets.remove(master_graph)
master_tree = cKDTree(self.nodes[list(master_graph)])
for s in sets:
x = np.array(list(s))
nearests = np.array([master_tree.query(self.nodes[v]) for v in x])
tails = nearests[
nearests[:, 0].argsort()][:, 1][:self.max_neighbours]
heads = x[nearests[:, 0].argsort()][:self.max_neighbours]
for head, tail in zip(heads, tails):
edges = np.append(edges, None)
edges[-1] = (head, tail)
edges = np.append(edges, None)
edges[-1] = (tail, head)
return edges
def test_wrap(self):
class with_wrap(object):
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr, context):
r = with_wrap()
r.arr = arr
r.context = context
return r
a = with_wrap()
x = ncu.minimum(a, a)
assert_equal(x.arr, np.zeros(1))
func, args, i = x.context
self.assertTrue(func is ncu.minimum)
self.assertEqual(len(args), 2)
assert_equal(args[0], a)
assert_equal(args[1], a)
self.assertEqual(i, 0)
def test_dot_override(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
return
class A(object):
def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
return "A"
class B(object):
def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
return NotImplemented
a = A()
b = B()
c = np.array([[1]])
assert_equal(np.dot(a, b), "A")
assert_equal(c.dot(a), "A")
assert_raises(TypeError, np.dot, b, c)
assert_raises(TypeError, c.dot, b)
def test_ufunc_override_normalize_signature(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
return
# gh-5674
class SomeClass(object):
def __numpy_ufunc__(self, ufunc, method, i, inputs, **kw):
return kw
a = SomeClass()
kw = np.add(a, [1])
assert_('sig' not in kw and 'signature' not in kw)
kw = np.add(a, [1], sig='ii->i')
assert_('sig' not in kw and 'signature' in kw)
assert_equal(kw['signature'], 'ii->i')
kw = np.add(a, [1], signature='ii->i')
assert_('sig' not in kw and 'signature' in kw)
assert_equal(kw['signature'], 'ii->i')
def write_csv(df, filename):
""" Write a pandas dataframe to CSV in a standard way """
# Verify that the data do not contain commas
for col in df.select_dtypes([np.object]):
if df[col].str.contains(',').any():
raise ValueError("Failed write to %s: Column %s contains commas" % (filename, col))
df.to_csv(filename, header=True, index=False, sep=',')
def append_data_column(ds, column):
# Extend the dataset to fit the new data
new_count = column.shape[0]
existing_count = ds.shape[0]
ds.resize((existing_count + new_count,))
levels = get_levels(ds)
if levels is not None:
# update levels if we have new unique values
if type(column.values) == p.Categorical:
added_levels = set(column.values.categories) - set(levels)
elif len(column) == 0:
# Workaround for bug in pandas - get a crash in .unique() for an empty series
added_levels = set([])
else:
added_levels = set(column.unique()) - set(levels)
new_levels = list(levels)
new_levels.extend(added_levels)
# Check if the new categorical column has more levels
# than the current bit width supports.
# If so, rewrite the existing column data w/ more bits
if len(new_levels) > np.iinfo(ds.dtype).max:
new_dtype = pick_cat_dtype(len(new_levels))
ds = widen_cat_column(ds, new_dtype)
new_levels = np.array(new_levels, dtype=np.object)
new_data = make_index_array(new_levels, column.values, ds.dtype)
clear_levels(ds)
create_levels(ds, new_levels)
else:
new_data = column
# Append new data
ds[existing_count:(existing_count + new_count)] = new_data
def analyze_pd_dataframe(dataframe, target_attributes):
"""Analyze pandas.Dataframe and convert it into internal representation.
Parameters
----------
dataframe : pd.Dataframe
input data, can contain float, int, object
target_attributes : int, str or list
Index the target attribute. If this is
* an int, use this as an index (only works with positive indices)
* a str, use this to compare with the column values
* a list (which must either consist of all ints or strs), of which
all elements that matched are assumed to be targets.
Returns
-------
np.ndarray
Data. All columns are converted to type float. Categorical data is
encoded by positive integers.
dict
Attribute types. Contains the following keys:
* `type`: `categorical` or 'numerical`
* `name`: column name of the dataframe
* `is_target`: whether this column was designated as a target column
"""
dataframe = _normalize_pd_column_names(dataframe)
attribute_types = _get_pd_attribute_types(dataframe, target_attributes)
dataframe = _replace_objects_by_integers(dataframe, attribute_types)
return dataframe.values, attribute_types
def _compute_optimal(self):
not_visited = {
(y, x)
for x in range(self.width) for y in range(self.height)
}
queue = collections.deque()
queue.append(tuple(j[0] for j in np.where(self.grid == G)))
policy = np.empty(self.grid.shape, dtype=np.object)
print("INITIAL POLICY")
print(policy)
while len(queue) > 0:
current = queue.pop()
if current in not_visited:
not_visited.remove(current)
possible_actions = self.possible_next_actions(
self._index(current), True
)
for action in possible_actions:
self._state = self._index(current)
next_state, _, _, _ = self.step(action)
next_state_pos = self._pos(next_state)
if next_state_pos not in not_visited:
continue
not_visited.remove(next_state_pos)
if not self.is_terminal(next_state) and \
self.grid[next_state_pos] != W:
policy[next_state_pos] = self.invert_action(action)
queue.appendleft(self._pos(next_state))
print("FINAL POLICY")
print(policy)
return policy
def test_run(self):
"""Only test hash runs at all."""
for t in [np.int, np.float, np.complex, np.int32, np.str, np.object,
np.unicode]:
dt = np.dtype(t)
hash(dt)
def test_shape_sequence(self):
# Any sequence of integers should work as shape, but the result
# should be a tuple (immutable) of base type integers.
a = np.array([1, 2, 3], dtype=np.int16)
l = [1, 2, 3]
# Array gets converted
dt = np.dtype([('a', 'f4', a)])
assert_(isinstance(dt['a'].shape, tuple))
assert_(isinstance(dt['a'].shape[0], int))
# List gets converted
dt = np.dtype([('a', 'f4', l)])
assert_(isinstance(dt['a'].shape, tuple))
#
class IntLike(object):
def __index__(self):
return 3
def __int__(self):
# (a PyNumber_Check fails without __int__)
return 3
dt = np.dtype([('a', 'f4', IntLike())])
assert_(isinstance(dt['a'].shape, tuple))
assert_(isinstance(dt['a'].shape[0], int))
dt = np.dtype([('a', 'f4', (IntLike(),))])
assert_(isinstance(dt['a'].shape, tuple))
assert_(isinstance(dt['a'].shape[0], int))
def test_empty_string_to_object(self):
# Pull request #4722
np.array(["", ""]).astype(object)
def test_object_nans(self):
# Multiple checks to give this a chance to
# fail if cmp is used instead of rich compare.
# Failure cannot be guaranteed.
for i in range(1):
x = np.array(float('nan'), np.object)
y = 1.0
z = np.array(float('nan'), np.object)
assert_(np.maximum(x, y) == 1.0)
assert_(np.maximum(z, y) == 1.0)
def test_object_array(self):
arg1 = np.arange(5, dtype=np.object)
arg2 = arg1 + 1
assert_equal(np.maximum(arg1, arg2), arg2)
def test_object_array(self):
arg1 = np.arange(5, dtype=np.object)
arg2 = arg1 + 1
assert_equal(np.minimum(arg1, arg2), arg1)
def test_sign_dtype_object(self):
# In reference to github issue #6229
foo = np.array([-.1, 0, .1])
a = np.sign(foo.astype(np.object))
b = np.sign(foo)
assert_array_equal(a, b)
def test_sign_dtype_nan_object(self):
# In reference to github issue #6229
def test_nan():
foo = np.array([np.nan])
a = np.sign(foo.astype(np.object))
assert_raises(TypeError, test_nan)
def test_old_wrap(self):
class with_wrap(object):
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr):
r = with_wrap()
r.arr = arr
return r
a = with_wrap()
x = ncu.minimum(a, a)
assert_equal(x.arr, np.zeros(1))
def test_failing_wrap(self):
class A(object):
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr, context):
raise RuntimeError
a = A()
self.assertRaises(RuntimeError, ncu.maximum, a, a)
def test_default_prepare(self):
class with_wrap(object):
__array_priority__ = 10
def __array__(self):
return np.zeros(1)
def __array_wrap__(self, arr, context):
return arr
a = with_wrap()
x = ncu.minimum(a, a)
assert_equal(x, np.zeros(1))
assert_equal(type(x), np.ndarray)
def test_failing_prepare(self):
class A(object):
def __array__(self):
return np.zeros(1)
def __array_prepare__(self, arr, context=None):
raise RuntimeError
a = A()
self.assertRaises(RuntimeError, ncu.maximum, a, a)
def test_array_with_context(self):
class A(object):
def __array__(self, dtype=None, context=None):
func, args, i = context
self.func = func
self.args = args
self.i = i
return np.zeros(1)
class B(object):
def __array__(self, dtype=None):
return np.zeros(1, dtype)
class C(object):
def __array__(self):
return np.zeros(1)
a = A()
ncu.maximum(np.zeros(1), a)
self.assertTrue(a.func is ncu.maximum)
assert_equal(a.args[0], 0)
self.assertTrue(a.args[1] is a)
self.assertTrue(a.i == 1)
assert_equal(ncu.maximum(a, B()), 0)
assert_equal(ncu.maximum(a, C()), 0)
def test_ufunc_override_disabled(self):
# 2016-01-29: NUMPY_UFUNC_DISABLED
# This test should be removed when __numpy_ufunc__ is re-enabled.
class MyArray(object):
def __numpy_ufunc__(self, *args, **kwargs):
self._numpy_ufunc_called = True
my_array = MyArray()
real_array = np.ones(10)
assert_raises(TypeError, lambda: real_array + my_array)
assert_raises(TypeError, np.add, real_array, my_array)
assert not hasattr(my_array, "_numpy_ufunc_called")