Python numpy 模块,bytes_() 实例源码
我们从Python开源项目中,提取了以下34个代码示例,用于说明如何使用numpy.bytes_()。
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def read_atlas_annot(atlas_dir, hemi_list=None):
" Returns atlas annotations "
if hemi_list is None:
hemi_list = ['lh', 'rh']
annot = dict()
for hemi in hemi_list:
annot[hemi] = dict()
annot_path = pjoin(atlas_dir, 'label', '{}.aparc.annot'.format(hemi))
annot[hemi]['labels'], annot[hemi]['ctab'], annot[hemi]['names'] = nib.freesurfer.io.read_annot(annot_path,
orig_ids=True)
# ensuring names are plainstring
if isinstance(annot[hemi]['names'][0], np.bytes_):
annot[hemi]['names'] = [bytestr.decode('UTF-8') for bytestr in annot[hemi]['names']]
return annot
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def hdf_attr_to_dict(attr):
"""
Convert from HDF attributes to valid dict
"""
try:
output_dict = dict(attr)
except:
output_dict = {}
for count in attr:
try:
output_dict[count] = attr[count]
except:
print('Fail: {}'.format(count))
# String in HDF are treated as numpy bytes_ literals
# We want out instance in memeory to have Python Strings
# This does a simple conversion
for k in output_dict:
if isinstance(output_dict[k], _np.bytes_):
output_dict[k] = output_dict[k].decode('UTF-8')
return output_dict
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def _extract_edge_value(tree, edge):
ft_idx = edge.calc_record.feature_idx
split_type = edge.calc_record.split_type
val = edge.value_encoded
pivot = edge.calc_record.pivot
if split_type is CalcRecord.NUM:
if val == SplitRecord.GREATER:
return ">{0:.2f}".format(pivot)
else:
return "<={0:.2f}".format(pivot)
elif tree.X_encoders is not None:
value = tree.X_encoders[ft_idx].single_inv_transform(val)
if isinstance(value, np.bytes_):
return value.decode('UTF-8')
else:
return value
else:
return val
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
"""
if np.issubdtype(a.dtype, np.object_):
if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
return a.astype("string_")
else:
print(type(a[0]))
raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
return a
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
def h5py_dataset_iterator(self,g, prefix=''):
for key in g.keys():
item = g[key]
path = '{}/{}'.format(prefix, key)
keys = [i for i in item.keys()]
if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
data = {'path':path}
for k in keys:
if not isinstance(item[k], h5py.Group):
dataset = np.array(item[k].value)
if type(dataset) is np.ndarray:
if dataset.size != 0:
if type(dataset[0]) is np.bytes_:
dataset = [a.decode('ascii') for a in dataset]
data.update({k:dataset})
yield data
else: # test for group (go down)
yield from self.h5py_dataset_iterator(item, path)
def get_data(self, path, prefix=''):
item = self.store[path]
path = '{}/{}'.format(prefix, path)
keys = [i for i in item.keys()]
data = {'path': path}
# print(path)
for k in keys:
if not isinstance(item[k], h5py.Group):
dataset = np.array(item[k].value)
if type(dataset) is np.ndarray:
if dataset.size != 0:
if type(dataset[0]) is np.bytes_:
dataset = [a.decode('ascii') for a in dataset]
data.update({k: dataset})
return data
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
def decode_qtypes(cls, value):
"""
Decode all the QCollection items to normal python types
"""
if isinstance(value, numpy.bytes_):
return value.decode("utf-8")
elif isinstance(value, list):
return value
else:
return value.item()
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
def encode_ascii(s):
if isinstance(s, str):
return s.encode('ascii')
elif isinstance(s, numpy.ndarray) and \
issubclass(s.dtype.type, numpy.str_):
ns = numpy.char.encode(s, 'ascii').view(type(s))
if ns.dtype.itemsize != s.dtype.itemsize / 4:
ns = ns.astype((numpy.bytes_, s.dtype.itemsize / 4))
return ns
return s
def decode_ascii(s):
if isinstance(s, bytes):
return s.decode('ascii')
elif (isinstance(s, numpy.ndarray) and
issubclass(s.dtype.type, numpy.bytes_)):
# np.char.encode/decode annoyingly don't preserve the type of the
# array, hence the view() call
# It also doesn't necessarily preserve widths of the strings,
# hence the astype()
ns = numpy.char.decode(s, 'ascii').view(type(s))
if ns.dtype.itemsize / 4 != s.dtype.itemsize:
ns = ns.astype((numpy.str_, s.dtype.itemsize))
return ns
return s
def _ensure_decoded(s):
""" if we have bytes, decode them to unicode """
if isinstance(s, (np.bytes_, bytes)):
s = s.decode(pd.get_option('display.encoding'))
return s
def test_isscalar_numpy_array_scalars(self):
self.assertTrue(lib.isscalar(np.int64(1)))
self.assertTrue(lib.isscalar(np.float64(1.)))
self.assertTrue(lib.isscalar(np.int32(1)))
self.assertTrue(lib.isscalar(np.object_('foobar')))
self.assertTrue(lib.isscalar(np.str_('foobar')))
self.assertTrue(lib.isscalar(np.unicode_(u('foobar'))))
self.assertTrue(lib.isscalar(np.bytes_(b'foobar')))
self.assertTrue(lib.isscalar(np.datetime64('2014-01-01')))
self.assertTrue(lib.isscalar(np.timedelta64(1, 'h')))
def _ensure_decoded(s):
""" if we have bytes, decode them to unicode """
if isinstance(s, np.bytes_):
s = s.decode('UTF-8')
return s
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
def export_text(decision_tree, feature_names=None):
"""Export a decision tree in WEKA like string format.
Parameters
----------
decision_tree : decision tree classifier
feature_names : list of strings, optional (default=None)
Names of each of the features.
Returns
-------
ret : string
"""
max_depth = 500
def build_string(node, indent, depth):
ret = ''
if node is None or depth > max_depth:
return ''
if node.is_feature:
ret += '\n'
template = '| ' * indent
if feature_names is None:
template += str(node.details.feature_idx)
else:
template += feature_names[node.details.feature_idx]
template += ' {}'
for child in node.children:
edge_value = _extract_edge_value(decision_tree, child[1])
ret += template.format(edge_value)
ret += build_string(child[0], indent + 1, depth + 1)
else:
value = decision_tree.y_encoder.single_inv_transform(node.value)
if isinstance(value, np.bytes_):
value = value.decode('UTF-8')
ret += ': {} {} \n'.format(value, _extract_class_count(node))
return ret
return build_string(decision_tree.root, 0, 0)
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
def datasetselected(self):
""" Action : One or more DataSets were selected from the list """
#print('Selection changed')
self.currentdset = self.ui.dataGroupSelect.currentText() + '/' + \
self.ui.dataSetList.currentItem().text()
# print('Current Selection : {}'.format(self.currentdset))
self.allselect = ['/' + str(self.ui.dataGroupSelect.currentText() +\
'/' + i.text()) for i in self.ui.dataSetList.selectedItems()]
if len(self.allselect) == 0:
self.allselect = None
self.ui.currentDatasetText.setText('')
attrs = {}
self.ui.dataSetAttribs.setRowCount(0)
self.ui.dataSetMemo.setText('')
else:
if len(self.allselect) == 1:
self.ui.currentDatasetText.setText(self.currentdset)
else:
self.ui.currentDatasetText.setText(self.currentdset + ' ( + ' +\
str(len(self.allselect)-1) + ' others)' )
self.ui.dataSetAttribs.setSortingEnabled(False)
self.ui.dataSetAttribs.setRowCount(0)
self.ui.dataSetAttribs.setColumnCount(2)
attrs = _h5utils.retrieve_dataset_attribute_dict(self.path + self.filename,self.currentdset)
for count, key in enumerate(attrs.keys()):
self.ui.dataSetAttribs.insertRow(self.ui.dataSetAttribs.rowCount())
self.ui.dataSetAttribs.setItem(count,0,_QTableWidgetItem(str(key)))
temp = attrs[key]
if isinstance(temp,_np.bytes_):
self.ui.dataSetAttribs.setItem(count,1,_QTableWidgetItem(temp.decode()))
else:
self.ui.dataSetAttribs.setItem(count,1,_QTableWidgetItem(str(temp)))
self.ui.dataSetAttribs.setSortingEnabled(True)
self.ui.dataSetAttribs.sortItems(0)
try:
self.ui.dataSetMemo.setText(attrs['Memo'].decode())
except:
pass
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n"
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n"
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)