我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用six.moves.xrange()。
def update_expectations(self): """ Since we're doing lazy updates on lambda, at any given moment the current state of lambda may not be accurate. This function updates all of the elements of lambda and Elogbeta so that if (for example) we want to print out the topics we've learned we'll get the correct behavior. """ for w in xrange(self.m_W): self.m_lambda[:, w] *= np.exp(self.m_r[-1] - self.m_r[self.m_timestamp[w]]) self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \ sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis]) self.m_timestamp[:] = self.m_updatect self.m_status_up_to_date = True
def _lost_point_level2(modules, modules_count): lost_point = 0 modules_range = xrange(modules_count - 1) for row in modules_range: this_row = modules[row] next_row = modules[row+1] for col in modules_range: count = 0 if this_row[col]: count += 1 if next_row[col]: count += 1 if this_row[col + 1]: count += 1 if next_row[col + 1]: count += 1 if count == 0 or count == 4: lost_point += 3 return lost_point
def write(self, buffer): if self.mode == MODE_NUMBER: for i in xrange(0, len(self.data), 3): chars = self.data[i:i + 3] bit_length = NUMBER_LENGTH[len(chars)] buffer.put(int(chars), bit_length) elif self.mode == MODE_ALPHA_NUM: for i in xrange(0, len(self.data), 2): chars = self.data[i:i + 2] if len(chars) > 1: buffer.put( ALPHA_NUM.find(chars[0]) * 45 + ALPHA_NUM.find(chars[1]), 11) else: buffer.put(ALPHA_NUM.find(chars), 6) else: if six.PY3: # Iterating a bytestring in Python 3 returns an integer, # no need to ord(). data = self.data else: data = [ord(c) for c in self.data] for c in data: buffer.put(c, 8)
def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100): gamma = np.ones(len(alpha)) expElogtheta = np.exp(dirichlet_expectation(gamma)) betad = beta[:, doc_word_ids] phinorm = np.dot(expElogtheta, betad) + 1e-100 counts = np.array(doc_word_counts) for _ in xrange(max_iter): lastgamma = gamma gamma = alpha + expElogtheta * np.dot(counts / phinorm, betad.T) Elogtheta = dirichlet_expectation(gamma) expElogtheta = np.exp(Elogtheta) phinorm = np.dot(expElogtheta, betad) + 1e-100 meanchange = np.mean(abs(gamma - lastgamma)) if (meanchange < meanchangethresh): break likelihood = np.sum(counts * np.log(phinorm)) likelihood += np.sum((alpha - gamma) * Elogtheta) likelihood += np.sum(sp.gammaln(gamma) - sp.gammaln(alpha)) likelihood += sp.gammaln(np.sum(alpha)) - sp.gammaln(np.sum(gamma)) return (likelihood, gamma)
def hdp_to_lda(self): """ Compute the LDA almost equivalent HDP. """ # alpha sticks = self.m_var_sticks[0] / (self.m_var_sticks[0] + self.m_var_sticks[1]) alpha = np.zeros(self.m_T) left = 1.0 for i in xrange(0, self.m_T - 1): alpha[i] = sticks[i] * left left = left - alpha[i] alpha[self.m_T - 1] = left alpha = alpha * self.m_alpha # beta beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta + \ self.m_lambda_sum[:, np.newaxis]) return (alpha, beta)
def init_sims(self, replace=False): """ Precompute L2-normalized vectors. If `replace` is set, forget the original vectors and only keep the normalized ones = saves lots of memory! Note that you **cannot continue training** after doing a replace. The model becomes effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`. """ if getattr(self, 'syn0norm', None) is None or replace: logger.info("precomputing L2-norms of word weight vectors") if replace: for i in xrange(self.syn0.shape[0]): self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1)) self.syn0norm = self.syn0 if hasattr(self, 'syn1'): del self.syn1 else: self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL)
def iter_chunks(self, chunksize=None): """ Iteratively yield the index as chunks of documents, each of size <= chunksize. The chunk is returned in its raw form (matrix or sparse matrix slice). The size of the chunk may be smaller than requested; it is up to the caller to check the result for real length, using `chunk.shape[0]`. """ self.close_shard() if chunksize is None: # if not explicitly specified, use the chunksize from the constructor chunksize = self.chunksize for shard in self.shards: query = shard.get_index().index for chunk_start in xrange(0, query.shape[0], chunksize): # scipy.sparse doesn't allow slicing beyond real size of the matrix # (unlike numpy). so, clip the end of the chunk explicitly to make # scipy.sparse happy chunk_end = min(query.shape[0], chunk_start + chunksize) chunk = query[chunk_start: chunk_end] # create a view yield chunk
def _mul(s, A, B): if len(A) == 1 and len(A[0]) == 1: A = A[0][0] return s.element_class(s, map(lambda y: map(lambda x: A * x, y), B)) elif len(B) == 1 and len(B[0]) == 1: B = B[0][0] return s.element_class(s, map(lambda y: map(lambda x: x * B, y), A)) deg_total_1 = max([len(X) + len(Y) - 1 for X, Y in itertools.product(A, B)]) deg_total_2 = len(list(itertools.product(A, B))) ret = [[0] * deg_total_1 for _ in xrange(deg_total_2)] deg1 = 0 for X in A: deg2 = 0 for Y in B: for x, y in enumerate(X): for u, v in enumerate(Y): ret[deg1 + deg2][x + u] += y * v deg2 += 1 deg1 += 1 return s.element_class(s, ret)
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav): len_cropped = pred_src1_wav.shape[-1] src1_wav = src1_wav[:len_cropped] src2_wav = src2_wav[:len_cropped] mixed_wav = mixed_wav[:len_cropped] gnsdr, gsir, gsar = np.zeros(2), np.zeros(2), np.zeros(2) total_len = 0 # for i in range(2): sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array([pred_src1_wav, pred_src2_wav]), True) sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array([mixed_wav, mixed_wav]), True) nsdr = sdr - sdr_mixed gnsdr += len_cropped * nsdr gsir += len_cropped * sir gsar += len_cropped * sar total_len += len_cropped gnsdr = gnsdr / total_len gsir = gsir / total_len gsar = gsar / total_len return gnsdr, gsir, gsar
def _enqueue_thread_fn(self, sess, coord, start_epoch, data_set): training_X, training_y, validation_X, validation_y = \ data_set.training_X, data_set.training_y, data_set.validation_X, data_set.validation_y for epoch in moves.xrange(start_epoch, self.num_epochs + 1): np.random.seed(epoch + self.seed_delta) for batch_data in self.training_iterator(training_X, training_y): if coord.should_stop(): return sess.run(self.enqueue_op, feed_dict={self.batch_X: batch_data[0], self.batch_y: self._adjust_ground_truth(batch_data[1])}) for batch_data in self.validation_iterator(validation_X, validation_y): if coord.should_stop(): return sess.run(self.enqueue_op, feed_dict={self.batch_X: batch_data[0], self.batch_y: self._adjust_ground_truth(batch_data[1])})
def df_to_padded_padded_to_df_runner(t_col): n_seqs = 5 max_seq_length = 10 ids = xrange(n_seqs) cols_to_expand = ['event', 'int_column', 'double_column'] np.random.seed(1) df = generate_random_df(n_seqs, max_seq_length) df = df.reset_index(drop=True) # Column names to transform to tensor dtypes = df[cols_to_expand].dtypes.values padded = df_to_padded(df, cols_to_expand, 'id', t_col) df_new = padded_to_df(padded, cols_to_expand, dtypes, ids, 'id', t_col) # Pandas is awful. Index changes when slicing df = df[['id', t_col] + cols_to_expand].reset_index(drop=True) pd.util.testing.assert_frame_equal(df, df_new)
def padded_events_to_not_censored(events, discrete_time): seq_lengths = get_padded_seq_lengths(events) n_seqs = events.shape[0] is_not_censored = np.copy(events) for i in xrange(n_seqs): if seq_lengths[i] > 0: is_not_censored[i][:seq_lengths[i]] = get_is_not_censored( events[i][:seq_lengths[i]], discrete_time) return is_not_censored # MISC / Data munging # def df_to_padded_memcost(df, id_col='id', t_col='t'): # """ # Calculates memory cost of padded using the alternative routes. # # number of arrays = features+tte+u = n_features+2 # # To list? Pad betweeen? # # To array ->(pad after) # """ # print('Not yet implemented') # return None
def get_is_not_censored(is_event, discrete_time=True): """ Calculates non-censoring indicator `u` for one vector. :param array is_event: logical or numeric array indicating event. :param Boolean discrete_time: if `True`, last observation is conditionally censored. """ n = len(is_event) is_not_censored = np.copy(is_event) if discrete_time: # Last obs is conditionally censored event_seen = is_event[-1] for i in reversed(xrange(n)): if is_event[i] and not event_seen: event_seen = is_event[i] is_not_censored[i] = event_seen else: # Last obs is always censored event_seen = False for i in reversed(xrange(n)): is_not_censored[i] = event_seen if is_event[i] and not event_seen: event_seen = is_event[i] return is_not_censored
def compactify(self): """ Assign new word ids to all words. This is done to make the ids more compact, e.g. after some tokens have been removed via :func:`filter_tokens` and there are gaps in the id series. Calling this method will remove the gaps. """ logger.debug("rebuilding dictionary, shrinking gaps") # build mapping from old id -> new id idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id)))) # reassign mappings to new ids self.token2id = dict((token, idmap[tokenid]) for token, tokenid in iteritems(self.token2id)) self.id2token = {} self.dfs = dict((idmap[tokenid], freq) for tokenid, freq in iteritems(self.dfs))
def test_fit(epochs=10, seed=42): rng = np.random.RandomState(seed) for nb_outputs in [1, 2]: # Generate dummy data X_tr = rng.normal(size=(N, input_shape[0], input_shape[1])) Y_tr = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)] # Build & compile the model model = build_model(nb_outputs) loss = [gen_gp_loss(gp) for gp in model.output_gp_layers] model.compile(optimizer=optimizer, loss=loss) # Train the model model.fit(X_tr, Y_tr, epochs=epochs, batch_size=batch_size, verbose=2)
def test_finetune(gp_n_iter=10, seed=42): rng = np.random.RandomState(seed) for nb_outputs in [1, 2]: # Generate dummy data X_tr = rng.normal(size=(N, input_shape[0], input_shape[1])) Y_tr = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)] # Build & compile the model model = build_model(nb_outputs) loss = [gen_gp_loss(gp) for gp in model.output_gp_layers] model.compile(optimizer=optimizer, loss=loss) # Finetune the model model.finetune(X_tr, Y_tr, batch_size=batch_size, gp_n_iter=gp_n_iter, verbose=0)
def test_predict(seed=42): rng = np.random.RandomState(seed) for nb_outputs in [1, 2]: # Generate dummy data X_tr = rng.normal(size=(N, input_shape[0], input_shape[1])) Y_tr = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)] X_ts = rng.normal(size=(N, input_shape[0], input_shape[1])) Y_ts = [rng.normal(size=(N, 1)) for _ in xrange(nb_outputs)] # Build & compile the model model = build_model(nb_outputs) loss = [gen_gp_loss(gp) for gp in model.output_gp_layers] model.compile(optimizer=optimizer, loss=loss) # Predict Y_pr = model.predict(X_ts, X_tr, Y_tr, batch_size=batch_size, verbose=0) assert type(Y_pr) is list assert len(Y_pr) == len(Y_ts) assert np.all([(yp.shape == yt.shape) for yp, yt in zip(Y_pr, Y_ts)])
def __call__(self, inputs, is_training=True, scope=None): """ Args: inputs: with shape -> (batch_size, time_step/width, units/channels) """ with tf.variable_scope(scope or type(self).__name__): in_channels = inputs.shape[-1].value conv_lst = [] for idk in xrange(1, self.K + 1): with tf.variable_scope('inner_conv_%d' % idk): conv_k = self.activation(__conv1d_alone_time__(inputs, idk, in_channels, in_channels)) conv_lst.append(conv_k) stacked_conv = tf.stack(conv_lst, axis=-1) # shape -> (batch_size, time_step/width, units/channels, K/height) #re_shape = tf.shape(stacked_conv)[:2] + [1, in_channels * self.K] re_shape = [tf.shape(stacked_conv)[0], tf.shape(stacked_conv)[1], 1, in_channels * self.K] stacked_conv = tf.reshape(stacked_conv, shape=re_shape) # shape -> (batch_size, time_step/width, 1, units*K/channels) ### max pool along time ksize = [1, 2, 1, 1] strid = [1, 1, 1, 1] pooled_conv = tf.squeeze(tf.nn.max_pool(stacked_conv, ksize, strid, 'SAME'), axis=2) # shape -> (batch_size, time_step/width, units*K/channels) return pooled_conv
def __call__(self, inputs, is_training=True, scope=None): """ Args: inputs: with shape -> (batch_size, time_step/width, units/channels) """ with tf.variable_scope(scope or type(self).__name__): in_channels = inputs.shape[-1].value conv_lst = [] for idk in xrange(1, self.K + 1): with tf.variable_scope('inner_conv_%d' % idk): conv_k = self.activation(__conv1d_alone_time__(inputs, idk, in_channels, in_channels)) norm_k = tf.contrib.layers.batch_norm(conv_k, is_training=is_training, updates_collections=None) conv_lst.append(norm_k) stacked_conv = tf.stack(conv_lst, axis=-1) # shape -> (batch_size, time_step/width, units/channels, K/height) re_shape = [tf.shape(stacked_conv)[0], tf.shape(stacked_conv)[1], 1, in_channels * self.K] stacked_conv = tf.reshape(stacked_conv, shape=re_shape) # shape -> (batch_size, time_step/width, 1, units*K/channels) ### max pool along time ksize = [1, 2, 1, 1] strid = [1, 1, 1, 1] pooled_conv = tf.squeeze(tf.nn.max_pool(stacked_conv, ksize, strid, 'SAME'), axis=2) # shape -> (batch_size, time_step/width, units*K/channels) return pooled_conv
def eval_in_batches(self, data, session): """ Get all predictions for a dataset by running it in small batches. """ size = data.shape[0] eval_batch_size = self.config.EVAL_BATCH_SIZE if size < eval_batch_size: raise ValueError("batch size for evals larger than dataset: %d" % size) predictions = numpy.ndarray(shape=(size, self.config.NUM_LABELS), dtype=numpy.float32) for begin in xrange(0, size, eval_batch_size): end = begin + eval_batch_size if end <= size: predictions[begin:end, :] = session.run( self.eval_prediction, feed_dict=self.create_feed_dict(data[begin:end, ...])) else: batch_predictions = session.run( self.eval_prediction, feed_dict=self.create_feed_dict(data[-eval_batch_size:, ...])) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions
def featurize_in_batches(self, session, sample_size): """ Get fully connected layer1 features by running it in small batches. """ batch_size = self.config.EVAL_BATCH_SIZE if sample_size < batch_size: raise ValueError("batch size for evals larger than dataset: %d" % size) features = numpy.ndarray(shape=(sample_size, 512), dtype=numpy.float32) labels = numpy.ndarray(shape=(sample_size,),dtype=numpy.int64) for begin in xrange(0, sample_size, batch_size): end = begin + batch_size if end <= sample_size: features[begin:end, :] = session.run( self.featurize, feed_dict=self.create_feed_dict(self.train_data[begin:end, ...])) labels[begin:end] = self.train_labels[begin:end] else: features[end-batch_size:end,:] = session.run( self.featurize, feed_dict=self.create_feed_dict(self.train_data[end-batch_size:end, ...])) labels[end-batch_size:end] = self.train_labels[end-batch_size:end] return features, labels
def _from_word2vec_binary(fname): with _open(fname, 'rb') as fin: words = [] header = _decode(fin.readline()) vocab_size, layer1_size = list(map(int, header.split())) # throws for invalid file format vectors = np.zeros((vocab_size, layer1_size), dtype=float32) binary_len = np.dtype(float32).itemsize * layer1_size for line_no in xrange(vocab_size): # mixed text and binary: read text first, then binary word = [] while True: ch = fin.read(1) if ch == b' ': break if ch != b'\n': # ignore newlines in front of words (some binary files have newline, some don't) word.append(ch) word = _decode(b''.join(word)) index = line_no words.append(word) vectors[index, :] = np.fromstring(fin.read(binary_len), dtype=float32) return words, vectors
def test_attentive_decoder(): np.random.seed(0) enc_shape = (2, 3, 5) dec_shape = (2, 4, 7) prod = enc_shape[0] * enc_shape[1] * enc_shape[2] enc_data = np.arange(0, prod, dtype=np.float32).reshape(enc_shape) / prod prod = dec_shape[0] * dec_shape[1] * dec_shape[2] dec_data = np.arange(0, prod, dtype=np.float32).reshape(dec_shape) / prod skip_mask = np.ones((enc_data.shape[0], enc_data.shape[2]), dtype=np.float32) skip_mask[:, :1] = 0 skip_mask[0, :2] = 0 encoder = QRNNEncoder(enc_shape[1], 4, kernel_size=4, pooling="fo", zoneout=False, zoneout_ratio=0.5) decoder = QRNNGlobalAttentiveDecoder(dec_shape[1], 4, kernel_size=4, zoneout=False, zoneout_ratio=0.5) H = encoder(enc_data, skip_mask) ht = encoder.get_last_hidden_state() Y = decoder(dec_data, ht, H, skip_mask) decoder.reset_state() for t in xrange(dec_shape[2]): y = decoder.forward_one_step(dec_data[:, :, :t+1], ht, H, skip_mask) assert np.sum((y.data - Y.data[:, :, :t+1]) ** 2) == 0 print("t = {} OK".format(t))
def create_beam_items(self, beam): beam_items = [] for b in xrange(len(beam)): config = beam[b] prevScore = config.score dense_feats = self.template.feat_template(config.nodes, config.stack, config.b0) pr_scores = self.clf.predict_proba(dense_feats)[0] pr_scores = np.log(pr_scores) predictions = zip(pr_scores, self.clf.classes_) valid_trans = self.get_valid_transitions(config) for score, (action, label) in predictions: if self.transitions[action] in valid_trans: next_transition = valid_trans[self.transitions[action]] heapq.heappush( beam_items, (prevScore + score, b, next_transition, label)) if len(beam_items) > self.beamwidth: heapq.heappop(beam_items) return beam_items
def _compute(self, signal): ltsds = np.zeros(self._windownum) prev = 0 pair = None result = [] for l in xrange(self._windownum): ltsd = self._ltsd(signal, l) ltsds[l] = ltsd x = self._is_signal(signal, ltsd, l) if x: # signal if prev == 0: # start signal segment pair = [l] prev = 1 else: # noise if prev == 1: # end signal segment pair.append(l-1) result.append(pair) pair = None prev = 0 return result, ltsds
def test_generator_param(self): result = bind_params("%s", ((i for i in xrange(3)),), Encoder()) self.assertEqual(result, "[0, 1, 2]")
def test_multiple_query_plans(self): hosts = [0, 1, 2, 3] policy = RoundRobinPolicy() policy.populate(None, hosts) for i in xrange(20): qplan = list(policy.make_query_plan()) self.assertEqual(sorted(qplan), hosts)
def push(self, data): chunk_size = self.out_buffer_size for i in xrange(0, len(data), chunk_size): self._write_queue.put(data[i:i + chunk_size])
def execute(self, concurrency, fail_fast): self._fail_fast = fail_fast self._results_queue = [] self._current = 0 self._exec_count = 0 with self._condition: for n in xrange(concurrency): if not self._execute_next(): break return self._results()
def prepare_batch(self): if not self.split_bool: self.feed_single_batch(self.fetch_one_batch()) else: many_records = [self.fetch_one_record() for _ in xrange(self.batch_size * self.split_nums)] for batch in self.split_strategy(many_records): self.feed_single_batch(self.pre_process_batch(batch))
def prepare_validation(self): if not self.split_bool: while self._record_index <= (self._total_samples - self.batch_size): self.feed_single_batch(self.fetch_one_batch()) remain_batch = [] while self._record_index != 0: remain_batch.append(self.fetch_one_record()) self.feed_single_batch(self.pre_process_batch(remain_batch)) else: many_records = [self.fetch_one_record() for _ in xrange(self._total_samples)] for batch in self.split_strategy(many_records): self.feed_single_batch(self.pre_process_batch(batch))
def fetch_one_batch(self): records = [self.fetch_one_record() for _ in xrange(self.batch_size)] try: pre_processed = self.pre_process_batch(records) except Exception as e: print('[E] pre_processed failed') pre_processed = [] return pre_processed
def _lost_point_level3(modules, modules_count): modules_range_short = xrange(modules_count-6) lost_point = 0 for row in xrange(modules_count): this_row = modules[row] for col in modules_range_short: if (this_row[col] and not this_row[col + 1] and this_row[col + 2] and this_row[col + 3] and this_row[col + 4] and not this_row[col + 5] and this_row[col + 6]): lost_point += 40 for col in xrange(modules_count): for row in modules_range_short: if (modules[row][col] and not modules[row + 1][col] and modules[row + 2][col] and modules[row + 3][col] and modules[row + 4][col] and not modules[row + 5][col] and modules[row + 6][col]): lost_point += 40 return lost_point
def _lost_point_level4(modules, modules_count): modules_range = xrange(modules_count) dark_count = 0 for row in modules_range: this_row = modules[row] for col in modules_range: if this_row[col]: dark_count += 1 ratio = abs(100 * dark_count / modules_count / modules_count - 50) / 5 return ratio * 10
def main(): line_type = {'names': ('id', 'url', 'lt_x', 'lt_y', # left top 'rb_x', 'rb_y', # right bottom 'pose', 'score', 'curation'), 'formats': ('S16', 'S256', np.float, np.float, np.float, np.float, np.float, np.float, np.int)} rt_dir_info = sys.argv[1] dir_urls = sys.argv[2] if not os.path.exists(dir_urls): os.mkdir(dir_urls) list_info_files = os.listdir(rt_dir_info) for info_file_name in list_info_files[:]: individual_name,_ = os.path.splitext(info_file_name) info_file_path = os.path.join(rt_dir_info, info_file_name) list_info = np.loadtxt(info_file_path, dtype=line_type, comments=None) total_num = list_info.size print(individual_name, total_num) save_path = os.path.join(dir_urls, info_file_name) with open(save_path, 'w') as fp: for proc_num in xrange(total_num): str_url = list_info[proc_num]['url'] fp.write('{}\n'.format(str_url))