Python numpy 模块,unicode_() 实例源码
我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用numpy.unicode_()。
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_select_dtypes_str_raises(self):
df = DataFrame({'a': list('abc'),
'g': list(u('abc')),
'b': list(range(1, 4)),
'c': np.arange(3, 6).astype('u1'),
'd': np.arange(4.0, 7.0, dtype='float64'),
'e': [True, False, True],
'f': pd.date_range('now', periods=3).values})
string_dtypes = set((str, 'str', np.string_, 'S1',
'unicode', np.unicode_, 'U1'))
try:
string_dtypes.add(unicode)
except NameError:
pass
for dt in string_dtypes:
with tm.assertRaisesRegexp(TypeError,
'string dtypes are not allowed'):
df.select_dtypes(include=[dt])
with tm.assertRaisesRegexp(TypeError,
'string dtypes are not allowed'):
df.select_dtypes(exclude=[dt])
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_lstrip(self):
tgt = asbytes_nested([['abc ', ''],
['12345', 'MixedCase'],
['123 \t 345 \0 ', 'UPPER']])
assert_(issubclass(self.A.lstrip().dtype.type, np.string_))
assert_array_equal(self.A.lstrip(), tgt)
tgt = asbytes_nested([[' abc', ''],
['2345', 'ixedCase'],
['23 \t 345 \x00', 'UPPER']])
assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])), tgt)
tgt = [[sixu('\u03a3 '), ''],
['12345', 'MixedCase'],
['123 \t 345 \0 ', 'UPPER']]
assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.lstrip(), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
"""
if np.issubdtype(a.dtype, np.object_):
if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
return a.astype("string_")
else:
print(type(a[0]))
raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
return a
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
def materialize_attr_values(a: np.ndarray) -> np.ndarray:
scalar = False
if np.isscalar(a):
scalar = True
a = np.array([a])
result: np.ndarray = None
if np.issubdtype(a.dtype, np.string_):
# First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
temp = np.array([x.decode('ascii', 'ignore') for x in a])
# Then unescape XML entities and convert to unicode
result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_)
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
result = np.array(a.astype(str), dtype=np.str_)
else:
result = a
if scalar:
return result[0]
else:
return result
def test_from_unicode_array(self):
A = np.array([['abc', sixu('Sigma \u03a3')],
['long ', '0123456789']])
assert_equal(A.dtype.type, np.unicode_)
B = np.char.array(A)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
B = np.char.array(A, **kw_unicode_true)
assert_array_equal(B, A)
assert_equal(B.dtype, A.dtype)
assert_equal(B.shape, A.shape)
def fail():
np.char.array(A, **kw_unicode_false)
self.assertRaises(UnicodeEncodeError, fail)
def test_join(self):
if sys.version_info[0] >= 3:
# NOTE: list(b'123') == [49, 50, 51]
# so that b','.join(b'123') results to an error on Py3
A0 = self.A.decode('ascii')
else:
A0 = self.A
A = np.char.join([',', '#'], A0)
if sys.version_info[0] >= 3:
assert_(issubclass(A.dtype.type, np.unicode_))
else:
assert_(issubclass(A.dtype.type, np.string_))
tgt = np.array([[' ,a,b,c, ', ''],
['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self):
assert_(issubclass(self.A.rstrip().dtype.type, np.string_))
tgt = asbytes_nested([[' abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_array_equal(self.A.rstrip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['1234', 'MixedCase'],
['123 \t 345 \x00', 'UPP']
])
assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)
tgt = [[sixu(' \u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self):
tgt = asbytes_nested([['abc', ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']])
assert_(issubclass(self.A.strip().dtype.type, np.string_))
assert_array_equal(self.A.strip(), tgt)
tgt = asbytes_nested([[' abc ', ''],
['234', 'ixedCas'],
['23 \t 345 \x00', 'UPP']])
assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)
tgt = [[sixu('\u03a3'), ''],
['12345', 'MixedCase'],
['123 \t 345', 'UPPER']]
assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
assert_array_equal(self.B.strip(), tgt)
def test_unicode_string_comparison(self,level=rlevel):
# Ticket #190
a = np.array('hello', np.unicode_)
b = np.array('world')
a == b
def test_pickle_py2_bytes_encoding(self):
# Check that arrays and scalars pickled on Py2 are
# unpickleable on Py3 using encoding='bytes'
test_data = [
# (original, py2_pickle)
(np.unicode_('\u6f2c'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\n"
"I0\ntp6\nbS',o\\x00\\x00'\np7\ntp8\nRp9\n.")),
(np.array([9e123], dtype=np.float64),
asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\n"
"p1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\n"
"p7\n(S'f8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'<'\np11\nNNNI-1\nI-1\n"
"I0\ntp12\nbI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np13\ntp14\nb.")),
(np.array([(9e123,)], dtype=[('name', float)]),
asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n"
"(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n"
"(S'V8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'name'\np12\ntp13\n"
"(dp14\ng12\n(g7\n(S'f8'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'<'\np18\nNNNI-1\n"
"I-1\nI0\ntp19\nbI0\ntp20\nsI8\nI1\nI0\ntp21\n"
"bI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np22\ntp23\nb.")),
]
if sys.version_info[:2] >= (3, 4):
# encoding='bytes' was added in Py3.4
for original, data in test_data:
result = pickle.loads(data, encoding='bytes')
assert_equal(result, original)
if isinstance(result, np.ndarray) and result.dtype.names:
for name in result.dtype.names:
assert_(isinstance(name, str))
def test_unicode_upconvert(self):
A = np.char.array(['abc'])
B = np.char.array([sixu('\u03a3')])
assert_(issubclass((A + B).dtype.type, np.unicode_))
def setUp(self):
TestComparisons.setUp(self)
self.B = np.array([['efg', '123 '],
['051', 'tuv']], np.unicode_).view(np.chararray)
def setUp(self):
TestComparisons.setUp(self)
self.A = np.array([['abc', '123'],
['789', 'xyz']], np.unicode_).view(np.chararray)
def test_capitalize(self):
tgt = asbytes_nested([[' abc ', ''],
['12345', 'Mixedcase'],
['123 \t 345 \0 ', 'Upper']])
assert_(issubclass(self.A.capitalize().dtype.type, np.string_))
assert_array_equal(self.A.capitalize(), tgt)
tgt = [[sixu(' \u03c3 '), ''],
['12345', 'Mixedcase'],
['123 \t 345 \0 ', 'Upper']]
assert_(issubclass(self.B.capitalize().dtype.type, np.unicode_))
assert_array_equal(self.B.capitalize(), tgt)
def test_lower(self):
tgt = asbytes_nested([[' abc ', ''],
['12345', 'mixedcase'],
['123 \t 345 \0 ', 'upper']])
assert_(issubclass(self.A.lower().dtype.type, np.string_))
assert_array_equal(self.A.lower(), tgt)
tgt = [[sixu(' \u03c3 '), sixu('')],
[sixu('12345'), sixu('mixedcase')],
[sixu('123 \t 345 \0 '), sixu('upper')]]
assert_(issubclass(self.B.lower().dtype.type, np.unicode_))
assert_array_equal(self.B.lower(), tgt)
def test_swapcase(self):
tgt = asbytes_nested([[' ABC ', ''],
['12345', 'mIXEDcASE'],
['123 \t 345 \0 ', 'upper']])
assert_(issubclass(self.A.swapcase().dtype.type, np.string_))
assert_array_equal(self.A.swapcase(), tgt)
tgt = [[sixu(' \u03c3 '), sixu('')],
[sixu('12345'), sixu('mIXEDcASE')],
[sixu('123 \t 345 \0 '), sixu('upper')]]
assert_(issubclass(self.B.swapcase().dtype.type, np.unicode_))
assert_array_equal(self.B.swapcase(), tgt)
def test_title(self):
tgt = asbytes_nested([[' Abc ', ''],
['12345', 'Mixedcase'],
['123 \t 345 \0 ', 'Upper']])
assert_(issubclass(self.A.title().dtype.type, np.string_))
assert_array_equal(self.A.title(), tgt)
tgt = [[sixu(' \u03a3 '), sixu('')],
[sixu('12345'), sixu('Mixedcase')],
[sixu('123 \t 345 \0 '), sixu('Upper')]]
assert_(issubclass(self.B.title().dtype.type, np.unicode_))
assert_array_equal(self.B.title(), tgt)
def _can_convert_to_string(value):
vtype = type(value)
return isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_]
def toString(value):
"""
Convert a value to a string, if possible.
"""
if isinstance(value, basestring):
return value
elif type(value) in [np.string_, np.str_]:
return str(value)
elif type(value) == np.unicode_:
return unicode(value)
else:
raise TypeError("Could not convert %s to string type" % type(value))
def test_writeread(tmpdir):
fname = os.path.join(tmpdir.dirname, 'temp.lbl')
times = np.reshape(np.arange(0,20), (-1,2))
labels = [chr(i) for i in np.arange(10) + 65]
dtype = [('name', np.unicode_, max([len(x) for x in labels])),
('start', float), ('stop', float)]
rec_array = np.array([(l, sta, sto) for l, (sta, sto) in zip(labels, times)],
dtype=dtype)
lbl.write(fname, rec_array)
rec_array2 = lbl.read(fname)
for x, y in zip(rec_array['name'], rec_array2['name']):
assert x == y, 'label named do not match'
assert np.all(np.isclose(rec_array['start'], rec_array2['start'])), 'starts do not match'
assert np.all(np.isclose(rec_array['stop'], rec_array2['stop'])), 'stops do not match'