Python numpy 模块,fromiter() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.fromiter()。
def make2d(array, cols=None, dtype=None):
'''
Make a 2D array from an array of arrays. The `cols' and `dtype'
arguments can be omitted if the array is not empty.
'''
if (cols is None or dtype is None) and not len(array):
raise RuntimeError("cols and dtype must be specified for empty "
"array")
if cols is None:
cols = len(array[0])
if dtype is None:
dtype = array[0].dtype
return _np.fromiter(array, [('_', dtype, (cols,))],
count=len(array))['_']
def _load_saved_predictions(self, existing_predictions, matrix_store):
index = matrix_store.matrix.index
score_lookup = {}
for prediction in existing_predictions:
score_lookup[(
prediction.entity_id,
prediction.as_of_date.date().isoformat()
)] = prediction.score
if 'as_of_date' in index.names:
score_iterator = (
score_lookup[(
entity_id,
datetime.strptime(dt, self.expected_matrix_ts_format).date().isoformat()
)]
for (entity_id, dt) in index
)
else:
as_of_date = matrix_store.metadata['end_time'].date().isoformat()
score_iterator = (score_lookup[(row, as_of_date)] for row in index)
return numpy.fromiter(score_iterator, float)
def txt2vec(self, text, vec_type=list):
"""Converts a string to a vector (list of ints).
First runs a sentence tokenizer, then a word tokenizer.
``vec_type`` is the type of the returned vector if the input is a string.
"""
if vec_type == np.ndarray:
res = np.fromiter(
(self[token] for token in self.tokenize(str(text))),
np.int
)
elif vec_type == list or vec_type == tuple or vec_type == set:
res = vec_type((self[token] for token in self.tokenize(str(text))))
else:
raise RuntimeError('Type {} not supported by dict'.format(vec_type))
assert type(res) == vec_type
return res
def weighted_avg_and_std(values, weights=None):
'''
Return the weighted average and standard deviation.
`values` - np.ndarray of values to average.
`weights` - Optional np.ndarray of weights. Otherwise all values are assumed
equally weighted.
Note the helpful np.fromiter() function, helpful building arrays.
'''
if not isinstance(values, np.ndarray):
raise TypeError("Values must be an np.array")
if len(values) == 0:
raise ValueError("Can't calculate with no values")
if weights is not None:
if not isinstance(weights, np.ndarray):
raise TypeError("Weights must be None or an np.array")
if len(values) != len(weights):
raise ValueError("Length of values and weights differ")
average = np.average(values, weights=weights)
variance = np.average((values-average)**2, weights=weights) # Fast and numerically precise
return (average, math.sqrt(variance))
def draw_links(self,n=1,log_sampling=False):
""" Draw multiple random links. """
urls = []
domain_array = np.array([dmn for dmn in self.domain_links])
domain_count = np.array([len(self.domain_links[domain_array[k]]) for k in range(domain_array.shape[0])])
p = np.array([np.float(c) for c in domain_count])
count_total = p.sum()
if log_sampling: # log-sampling [log(x+1)] to bias lower count domains
p = np.fromiter((np.log1p(x) for x in p), dtype=p.dtype)
if count_total > 0:
p = p/p.sum()
cnts = npr.multinomial(n, pvals=p)
if n > 1:
for k in range(cnts.shape[0]):
domain = domain_array[k]
cnt = min(cnts[k],domain_count[k])
for url in random.sample(self.domain_links[domain],cnt):
urls.append(url)
else:
k = int(np.nonzero(cnts)[0])
domain = domain_array[k]
url = random.sample(self.domain_links[domain],1)[0]
urls.append(url)
return urls
def __init__(self):
super().__init__()
stack = self._stack
# classes
self.classes_ = [0, 1, 2]
self.average_classes = [0, 2]
# data
self.data_dir = 'data/twitter/semeval_2016_submit'
with temp_chdir(self.data_dir):
self.train_objs = JSONDecoder(stack.enter_context(open('train.json')))
self.train_docs = FieldExtractor(self.train_objs, 'text')
self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'label'), 'int32')
distant_srs = [stack.enter_context(open('../emote/class_{}.txt'.format(i), encoding='utf-8')) for i in [0, 2]]
self.distant_docs = BalancedSlice(distant_srs)
self.distant_labels = BalancedSlice((RepeatSr(0), RepeatSr(2)))
unsup_sr = stack.enter_context(open('../unsup/all.txt', encoding='utf-8'))
self.unsup_docs = BalancedSlice([unsup_sr])
self.val_objs = JSONDecoder(stack.enter_context(open('val.json')))
self.val_docs = FieldExtractor(self.val_objs, 'text')
self.val_labels = FieldExtractor(self.val_objs, 'label')
self.test_objs = JSONDecoder(stack.enter_context(open('test.json')))
self.test_docs = FieldExtractor(self.test_objs, 'text')
self.test_labels = FieldExtractor(self.test_objs, 'label')
def __init__(self):
super().__init__()
stack = self._stack
# classes
self.classes_ = [0, 1, 2]
self.average_classes = [0, 2]
# data
self.data_dir = 'data/imdb'
with temp_chdir(self.data_dir):
self.train_objs = JSONDecoder(stack.enter_context(open('train.json')))
self.train_docs = FieldExtractor(self.train_objs, 'text')
self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'label'), 'int32')
unsup_sr = stack.enter_context(open('unsup.json'))
self.unsup_docs = BalancedSlice([FieldExtractor(unsup_sr, 'text')])
self.val_objs = JSONDecoder(stack.enter_context(open('val.json')))
self.val_docs = FieldExtractor(self.val_objs, 'text')
self.val_labels = FieldExtractor(self.val_objs, 'label')
self.test_objs = JSONDecoder(stack.enter_context(open('test.json')))
self.test_docs = FieldExtractor(self.test_objs, 'text')
self.test_labels = FieldExtractor(self.test_objs, 'label')
def __init__(self):
super().__init__()
stack = self._stack
# classes
self.classes_ = [1, 2, 3, 4, 5]
self.average_classes = [1, 2, 3, 4, 5]
# data
self.data_dir = 'data/yelp'
with temp_chdir(self.data_dir):
self.train_objs = JSONDecoder(stack.enter_context(open('train.json')))
self.train_docs = FieldExtractor(self.train_objs, 'text')
self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'stars'), 'int32')
self.val_objs = JSONDecoder(stack.enter_context(open('val.json')))
self.val_docs = FieldExtractor(self.val_objs, 'text')
self.val_labels = FieldExtractor(self.val_objs, 'stars')
self.test_objs = JSONDecoder(stack.enter_context(open('test.json')))
self.test_docs = FieldExtractor(self.test_objs, 'text')
self.test_labels = FieldExtractor(self.test_objs, 'stars')
def test():
# create a bunch of random data for X-axis
# uniformly generate 2-D vectors in [-50, 50]
X = 100*np.random.random([NUM_SAMPLES, 2]) - 50
# create a bunch of random data for Y-axis
# let's say y = 5x1 - 2x2 + 3 + noise
# true beta is then: [3, 5, -2]
Y = np.fromiter((5*x1 - 2*x2 + 3 for x1, x2 in X), np.float, count=NUM_SAMPLES)
Y += np.random.standard_normal(NUM_SAMPLES)
# fit
lr = LinearRegression()
lr.fit(X,Y)
print "beta estimated: %s" % lr.beta
r2 = lr.score(X,Y)
print "R-square is: %s" % r2
# predict
x = (100, 100)
h = lr.predict(np.array([x]))
y = 5*x[0] - 2*x[1] + 3
print "Extrapolated prediction: %.2f\nActual: %.2f" % (h, y)
def read_vectors(fin, dtype='float64', delim=' '):
"""Return a list with tuples (word, word_vector)."""
reader = csv.reader(fin, delimiter=delim, quoting=csv.QUOTE_NONE)
word_vectors = []
ncol = None
for row in reader:
if ncol is None:
if len(row) == 2:
ncol = int(row[1])
continue
else:
ncol = len(row) - 1
word = unicode(row[0], 'utf-8', errors='replace')
word_vector = np.fromiter(
[float(v) for v in row[1: ncol + 1]],
dtype=dtype, count=ncol)
word_vectors.append((word, word_vector))
return word_vectors
def lsb_encode(data, image):
bytes_io = BytesIO()
dump(data, file=bytes_io)
data_bytes = bytes_io.getvalue()
data_bytes_array = np.fromiter(data_bytes, dtype=np.uint8)
data_bits_list = np.unpackbits(data_bytes_array).tolist()
data_bits_list += [0] * (image.size[0] * image.size[1] - len(data_bits_list))
watermark = Image.frombytes(data=bytes(data_bits_list), size=image.size, mode='L')
red, green, blue = image.split()
watermarked_red = ImageMath.eval("convert(a&0xFE|b&0x1,'L')", a=red, b=watermark)
watermarked_image = Image.merge("RGB", (watermarked_red, green, blue))
return watermarked_image
def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
"""
reconstruct labels from observed group ids
Parameters
----------
xnull: boolean,
if nulls are excluded; i.e. -1 labels are passed through
"""
from pandas.hashtable import unique_label_indices
if not xnull:
lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
shape = np.asarray(shape, dtype='i8') + lift
if not _int64_overflow_possible(shape):
# obs ids are deconstructable! take the fast route!
out = decons_group_index(obs_ids, shape)
return out if xnull or not lift.any() \
else [x - y for x, y in zip(out, lift)]
i = unique_label_indices(comp_ids)
i8copy = lambda a: a.astype('i8', subok=False, copy=True)
return [i8copy(lab[i]) for lab in labels]
def cartesian_product(X):
'''
Numpy version of itertools.product or pandas.compat.product.
Sometimes faster (for large inputs)...
Examples
--------
>>> cartesian_product([list('ABC'), [1, 2]])
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
array([1, 2, 1, 2, 1, 2])]
'''
lenX = np.fromiter((len(x) for x in X), dtype=int)
cumprodX = np.cumproduct(lenX)
a = np.roll(cumprodX, 1)
a[0] = 1
b = cumprodX[-1] / cumprodX
return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]),
np.product(a[i]))
for i, x in enumerate(X)]
def r(self):
"""
Pearson correlation of the fitted Variogram
:return:
"""
# get the experimental and theoretical variogram and cacluate means
experimental, model = self.__model_deviations()
mx = np.nanmean(experimental)
my = np.nanmean(model)
# claculate the single pearson correlation terms
term1 = np.nansum(np.fromiter(map(lambda x, y: (x-mx) * (y-my), experimental, model), np.float))
t2x = np.nansum(np.fromiter(map(lambda x: (x-mx)**2, experimental), np.float))
t2y = np.nansum(np.fromiter(map(lambda y: (y-my)**2, model), np.float))
return term1 / (np.sqrt(t2x * t2y))
def trajectory_lengths(self, stride=1, skip=0):
r""" Returns the length of each trajectory.
Parameters
----------
stride : int
return value is the number of frames of the trajectories when
running through them with a step size of `stride`.
skip : int
skip parameter
Returns
-------
array(dtype=int) : containing length of each trajectory
"""
n = self.number_of_trajectories()
if isinstance(stride, np.ndarray):
return np.fromiter((self.trajectory_length(itraj, stride)
for itraj in range(n)),
dtype=int, count=n)
else:
return np.fromiter(((l - skip - 1) // stride + 1 for l in self._lengths),
dtype=int, count=n)
def test_ttv_array_like_data_source(self):
dummy_data_source = DummyDataSource()
subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))
array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv)
self.assertEqual(len(array_ds), 3)
all_values = np.fromiter((x for x in array_ds[:]), dtype='int16')
self.assertTrue(
np.all(
np.in1d(
all_values,
np.array([1, 2, 3])
)
)
)
def set_languages(self, langs=None):
logger.debug("restricting languages to: %s", langs)
# Unpack the full original model. This is needed in case the language set
# has been previously trimmed, and the new set is not a subset of the current
# set.
nb_ptc, nb_pc, nb_classes = self.__full_model
if langs is None:
self.nb_classes = nb_classes
self.nb_ptc = nb_ptc
self.nb_pc = nb_pc
else:
# We were passed a restricted set of languages. Trim the arrays accordingly
# to speed up processing.
for lang in langs:
if lang not in nb_classes:
raise ValueError("Unknown language code %s" % lang)
subset_mask = np.fromiter((l in langs for l in nb_classes), dtype=bool)
self.nb_classes = [ c for c in nb_classes if c in langs ]
self.nb_ptc = nb_ptc[:,subset_mask]
self.nb_pc = nb_pc[subset_mask]
def write_stats_to_file(filename, counts, mincount):
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename + ".txt", 'w', encoding="utf-8") as f:
if args.perc == 0:
percentile = 0
else:
percentile = numpy.percentile(numpy.fromiter(counts.values(), numpy.int32), args.perc)
threshold = max(percentile, mincount)
for k, v in counts.items():
if v >= threshold:
entry = k.split(SEPARATOR)
entry.append(str(v))
f.write('\t'.join(entry) + '\n')
if args.pickle:
with open(filename + ".pickle", 'wb') as f:
pickle.dump(counts, f)
def _eval_all(emb_simset):
inp_emb = {}
for wordvec in emb_simset.iterrows():
word, vec = wordvec[1][0], wordvec[1][1:].tolist()
vec = np.fromiter(map(float, vec[1:]), dtype = np.float32)
norm = np.linalg.norm(vec)
inp_emb[word] = vec/norm if (norm != 0) else [vec]
score_dict = {}
score_dict['score'] = 0
for root,dirs,files in os.walk('/home/jared/vecshare/Test_Input'):
files = [testfile for testfile in files if testfile[0]!='.']
for testfile in files:
f_path = '/home/jared/vecshare/Test_Input/'+testfile
score_dict[testfile[:-4].strip().lower().replace(" ", "_").replace("-", "_")] = _eval_sim(f_path, inp_emb)
if testfile != 'mc-30.csv':
score_dict['score'] += _eval_sim(f_path, inp_emb)/(len(files)-1)
return score_dict
def cdf_dlf(x, A, m1, a1, m2, a2, start=-26):
'''
Cumulative Schechter function. Second LF is set to be 2*A of first LF.
@param x: magnitude
@param A: Scale factor
@param m1: Knee of distribution 1
@param a1: Faint-end turnover of first lf
@param m2: Knee of distribution 2
@param a2: Faint-end turnover of second lf
@param start: Brightest magnitude
@return Probability that galaxy has a magnitude greater than x
'''
def integrate(in_x):
return quad(dlf, start,in_x,args=(A,m1,a1,m2,a2))[0]
if np.isscalar(x):
x = np.array([x])
return np.fromiter(map(integrate,x),np.float,count=len(x))
def inv_cdf_dlf(p, A, m1, a1, m2, a2, start=-26, end=-15):
'''
Inverse Cumulative Schechter function. Second LF is set to be 2*A of first LF.
@param p: probability
@param A: Scale factor
@param m1: Knee of distribution 1
@param a1: Faint-end turnover of first lf
@param m2: Knee of distribution 2
@param a2: Faint-end turnover of second lf
@param start: Brightest magnitude
@param end: Faintest possible magnitude
@return Magnitude associated with cdf probability p
'''
def get_root(p):
return root(lambda x: cdf_dlf(x,A,m1,a1,m2,a2,start)-p, (start + end)/2).x[0]
if np.isscalar(p):
return get_root(p)
else:
return np.fromiter(map(get_root,p),np.float,count=len(p))
def _read_symbol(self):
dividends = []
rawsymbol = self.f.read(16)
if rawsymbol == b'':
raise EOFError
symbol = unpack('16s', rawsymbol)[0].replace(b'\x00', b'')
rawdate = self.f.read(4)
dt = np.dtype([('time', np.int32),
('split', np.float32),
('purchase', np.float32),
('purchase_price', np.float32),
('dividend', np.float32)])
while (rawdate) != b"\xff" * 4:
dividend = np.frombuffer(rawdate + self.f.read(16), dtype=dt)
dividends.append(dividend)
rawdate = self.f.read(4)
if rawdate == b'':
break
return (symbol, np.fromiter(dividends, dtype=dt))
def points_random_3d(count, range_x=(-10.0, 10.0), range_y=(-10.0, 10.0), range_z=(-10.0, 10.0), seed=None):
"""
Generates random positions
:param count: Number of points
:param range_x: min-max range for x axis
:param range_y: min-max range for y axis
:param range_z: min-max range for z axis
:param seed: The random seed to be used
"""
random.seed(seed)
def gen():
for i in range(count):
yield random.uniform(*range_x)
yield random.uniform(*range_y)
yield random.uniform(*range_z)
data = numpy.fromiter(gen(), count=count * 3, dtype=numpy.float32)
pos = VBO(data)
vao = VAO("geometry:points_random_3d", mode=GL.GL_POINTS)
vao.add_array_buffer(GL.GL_FLOAT, pos)
vao.map_buffer(pos, "in_position", 3)
vao.build()
return vao
def parseNpf(self, buffer, imageWidth, imageHeight):
# Read the header
sectionLengths = self._readUgarHeader(buffer)
# Read the palette data (section number 1)
paletteData = np.frombuffer(buffer.read(roundToPower(sectionLengths[0])), dtype=np.uint16)
# Read the image data (section number 2)
imageData = np.frombuffer(buffer.read(sectionLengths[1]), dtype=np.uint8)
# NPF image data uses 1 byte per 2 pixels, so we need to split that byte into two
imageData = np.stack((np.bitwise_and(imageData, 0x0f), np.bitwise_and(imageData >> 4, 0x0f)), axis=-1).flatten()
# Unpack palette colors
palette = unpackColors(paletteData, useAlpha=False)
# Convert each pixel from a palette index to full color
pixels = np.fromiter((palette[i] if i > 0 else 0 for i in imageData), dtype=">u4")
# Clip the image data and create a Pillow image from it
return Image.fromarray(self._clipImageData(pixels, (imageWidth, imageHeight)), mode="RGBA")
# Write the image as an npf to buffer
def test_staged_predict():
# Test whether staged decision function eventually gives
# the same prediction.
X, y = datasets.make_friedman1(n_samples=1200,
random_state=1, noise=1.0)
X_train, y_train = X[:200], y[:200]
X_test = X[200:]
clf = GradientBoostingRegressor()
# test raise ValueError if not fitted
assert_raises(ValueError, lambda X: np.fromiter(
clf.staged_predict(X), dtype=np.float64), X_test)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# test if prediction for last stage equals ``predict``
for y in clf.staged_predict(X_test):
assert_equal(y.shape, y_pred.shape)
assert_array_equal(y_pred, y)
def _filter_dates(dates, freq, kwargs):
"""
This function filters dates to indicate end of periods for ordinals.
"""
indicator = DATETIME_DICT[freq]
if isinstance(indicator, str):
# no special behavior
indicators = np.fromiter(
[date.__getattribute__(indicator) for date in dates],
dtype=np.int32)
return np.argwhere(indicators[1:] - indicators[:-1] > 0)
else:
# apply a function
indicators = np.fromiter(
[indicator(date, kwargs) for date in dates], dtype=np.int32)
return np.argwhere(indicators[1:] - indicators[:-1] > 0)
def write_tables():
import tables
dtype = np.dtype("S7,f4,f4,f4,f4,i4")
t0 = time()
sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
for i in xrange(N)), dtype, count=N)
t1 = time() - t0
print "Created sarray with %d rows in %.3fs" % (N, t1)
t0 = time()
h5f = tables.openFile("market.h5", "w")
table = h5f.createTable(h5f.root, "market", dtype)
table.append(sarray)
h5f.close()
t1 = time() - t0
print "[PyTables] Stored %d rows in %.3fs" % (N, t1)
def write_tables2():
import tables
dtype = np.dtype("S7,f4,f4,f4,f4,i4")
# t0 = time()
# sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
# for i in xrange(N)), dtype, count=N)
# t1 = time() - t0
# print "Created sarray with %d rows in %.3fs" % (N, t1)
t0 = time()
h5f = tables.openFile("market.h5", "w")
table = h5f.createTable(h5f.root, "market", dtype)
count = 10000
for j in xrange(count, N, count):
sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
for i in xrange(j)), dtype)
table.append(sarray)
h5f.close()
t1 = time() - t0
print "[PyTables] Stored %d rows in %.3fs" % (N, t1)
def write_umi_info(pickles, filename):
"""" Write an H5 with (bc, chain, read_count) tuples """
filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
with tables.open_file(filename, 'w', filters=filters) as h5:
umi_info = vdj_umi_info.create_arrays(h5)
bc_to_int = {}
chain_to_int = {}
for pickle in pickles:
bc_chain_umi_counts = cPickle.load(open(pickle))
for bc, chain_umis in bc_chain_umi_counts.iteritems():
for chain, umi_counts in chain_umis.iteritems():
n_umis = len(umi_counts)
if chain != cr_constants.MULTI_REFS_PREFIX and n_umis > 0:
if bc not in bc_to_int:
bc_to_int[bc] = len(bc_to_int)
if chain not in chain_to_int:
chain_to_int[chain] = len(chain_to_int)
umi_info['barcode_idx'].append(np.full(n_umis, bc_to_int[bc],
dtype=vdj_umi_info.get_dtype('barcode_idx')))
umi_info['chain_idx'].append(np.full(n_umis, chain_to_int[chain],
dtype=vdj_umi_info.get_dtype('chain_idx')))
umi_info['reads'].append(np.fromiter(umi_counts.itervalues(),
vdj_umi_info.get_dtype('reads'), count=n_umis))
vdj_umi_info.set_ref_column(h5, 'barcodes', np.array(sorted(bc_to_int.keys(), key=bc_to_int.get)))
vdj_umi_info.set_ref_column(h5, 'chains', np.array(sorted(chain_to_int.keys(), key=chain_to_int.get)))
def where_close(pos, separation, intensity=None):
""" Returns indices of features that are closer than separation from other
features. When intensity is given, the one with the lowest intensity is
returned: else the most topleft is returned (to avoid randomness)
To be implemented in trackpy v0.4"""
if len(pos) == 0:
return []
separation = validate_tuple(separation, pos.shape[1])
if any([s == 0 for s in separation]):
return []
# Rescale positions, so that pairs are identified below a distance
# of 1.
pos_rescaled = pos / separation
duplicates = cKDTree(pos_rescaled, 30).query_pairs(1 - 1e-7)
if len(duplicates) == 0:
return []
index_0 = np.fromiter((x[0] for x in duplicates), dtype=int)
index_1 = np.fromiter((x[1] for x in duplicates), dtype=int)
if intensity is None:
to_drop = np.where(np.sum(pos_rescaled[index_0], 1) >
np.sum(pos_rescaled[index_1], 1),
index_1, index_0)
else:
intensity_0 = intensity[index_0]
intensity_1 = intensity[index_1]
to_drop = np.where(intensity_0 > intensity_1, index_1, index_0)
edge_cases = intensity_0 == intensity_1
if np.any(edge_cases):
index_0 = index_0[edge_cases]
index_1 = index_1[edge_cases]
to_drop[edge_cases] = np.where(np.sum(pos_rescaled[index_0], 1) >
np.sum(pos_rescaled[index_1], 1),
index_1, index_0)
return np.unique(to_drop)
def make_program(shape):
""" Returns numpy array containing the eval instructions for eval """
return numpy.fromiter(_make_program_pieces(shape), pyopencl.cltypes.float)
def make_program(shape):
""" Returns numpy array containing the eval instructions for eval """
return numpy.fromiter(_make_program_pieces(shape), pyopencl.cltypes.float)
def __array__(self, dtype=None):
"""NumPy array protocol; returns iterator values as an ndarray."""
if self._value is None:
# Call fromiter if we can; it is faster and avoids the extra
# copy, but doesn't support object types and requires a dtype.
if dtype is None or dtype.hasobject:
self._value = np.array(list(self._iterator), dtype)
else:
self._value = np.fromiter(self._iterator, dtype)
return self._value
def test_mem_on_invalid_dtype(self):
"Ticket #583"
self.assertRaises(ValueError, np.fromiter, [['12', ''], ['13', '']], str)
def test_mem_fromiter_invalid_dtype_string(self, level=rlevel):
x = [1, 2, 3]
self.assertRaises(ValueError,
np.fromiter, [xi for xi in x], dtype='S')
def test_fromiter_bytes(self):
# Ticket #1058
a = np.fromiter(list(range(10)), dtype='b')
b = np.fromiter(list(range(10)), dtype='B')
assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])))
assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])))
def test_fromiter_comparison(self, level=rlevel):
a = np.fromiter(list(range(10)), dtype='b')
b = np.fromiter(list(range(10)), dtype='B')
assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])))
assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])))
def test_duplicate_field_names_assign(self):
ra = np.fromiter(((i*3, i*2) for i in range(10)), dtype='i8,f8')
ra.dtype.names = ('f1', 'f2')
repr(ra) # should not cause a segmentation fault
assert_raises(ValueError, setattr, ra.dtype, 'names', ('f1', 'f1'))
def test_lengths(self):
expected = np.array(list(self.makegen()))
a = np.fromiter(self.makegen(), int)
a20 = np.fromiter(self.makegen(), int, 20)
self.assertTrue(len(a) == len(expected))
self.assertTrue(len(a20) == 20)
self.assertRaises(ValueError, np.fromiter,
self.makegen(), int, len(expected) + 10)
def test_values(self):
expected = np.array(list(self.makegen()))
a = np.fromiter(self.makegen(), int)
a20 = np.fromiter(self.makegen(), int, 20)
self.assertTrue(np.alltrue(a == expected, axis=0))
self.assertTrue(np.alltrue(a20 == expected[:20], axis=0))
def test_2592(self):
# Test iteration exceptions are correctly raised.
count, eindex = 10, 5
self.assertRaises(NIterError, np.fromiter,
self.load_data(count, eindex), dtype=int, count=count)
def test_2592_edge(self):
# Test iter. exceptions, edge case (exception at end of iterator).
count = 10
eindex = count-1
self.assertRaises(NIterError, np.fromiter,
self.load_data(count, eindex), dtype=int, count=count)
def from_pydata(cls, faces):
loop_total = np.empty(len(faces), dtype=np.uint32)
loop_start = np.zeros(len(faces), dtype=np.uint32)
loop_total[:] = tuple(map(len, faces))
loop_start[1:] = loop_total[:-1].cumsum()
vertex_indices = np.fromiter(chain.from_iterable(faces),
dtype=np.uint32,)
#count=loop_start.sum())
return cls(loop_start, loop_total, vertex_indices)
def _calculate(self, period):
data = list(self.loadTradesForPeriod(period))
if len(data) == 0:
raise InsufficientDataError()
values = np.fromiter(map(attrgetter('price'), data), np.float, len(data))
weights = np.fromiter(map(attrgetter('volume'), data), np.float, len(data))
mean, std = weighted_avg_and_std(values, weights)
return (mean,)
def rle(array, low_mem=False):
"""Calculate a run length encoding (rle), of an input vector.
:param array: 1D input array.
:param low_mem: use a lower memory implementation
returns: structured array with fields `start`, `length`, and `value`.
"""
if len(array.shape) != 1:
raise TypeError("Input array must be one dimensional.")
dtype = [('length', int), ('start', int), ('value', array.dtype)]
if not low_mem:
pos = np.where(np.diff(array) != 0)[0]
pos = np.concatenate(([0], pos+1, [len(array)]))
return np.fromiter(
((length, start, array[start]) for (length, start) in zip(pos[1:], pos[:-1])),
dtype, count=len(pos) - 1,
)
else:
def _gen():
start = 0
for key, group in itertools.groupby(array):
length = sum(1 for x in group)
yield length, start, key
start += length
return np.fromiter(_gen(), dtype=dtype)
def encode_text(text, char2id=CHAR2ID):
"""
encode text to array of integers with CHAR2ID
"""
return np.fromiter((char2id.get(ch, 0) for ch in text), int)
def entropy(self,p):
return -np.fromiter((self.xlgx(x) for x in p.flatten()),dtype=p.dtype).sum()
def draw_domain(self,log_sampling=False):
""" Draw a single, random domain. """
domain = None
domain_array = np.array([dmn for dmn in self.domain_links])
domain_count = np.array([len(self.domain_links[domain_array[k]]) for k in range(domain_array.shape[0])])
p = np.array([np.float(c) for c in domain_count])
count_total = p.sum()
if log_sampling: # log-sampling [log(x+1)] to bias lower count domains
p = np.fromiter((np.log1p(x) for x in p), dtype=p.dtype)
if count_total > 0:
p = p/p.sum()
cnts = npr.multinomial(1, pvals=p)
k = int(np.nonzero(cnts)[0])
domain = domain_array[k]
return domain
def test_mem_on_invalid_dtype(self):
"Ticket #583"
self.assertRaises(ValueError, np.fromiter, [['12', ''], ['13', '']], str)