我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.arange()。
def GMM_sample(mus, sigmas, mix_weights): """ First, sample according to the prior mixing probabilities to choose the component density. Second, draw sample from that density Inspired by implementation in `cle` """ chosen_component = \ T.argmax( srng.multinomial(pvals=mix_weights), axis=1) selected_mus = mus[T.arange(mus.shape[0]), :, chosen_component] selected_sigmas = sigmas[T.arange(sigmas.shape[0]), :, chosen_component] sample = srng.normal(size=selected_mus.shape, avg=0., std=1.) sample *= selected_sigmas sample += selected_mus return sample, selected_mus, selected_sigmas, chosen_component
def negative_log_likelihood(self, y): """ Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution. .. math:: \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\ \ell (\theta=\{W,b\}, \mathcal{D}) :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label. Note: We use the mean instead of the sum so that the learning rate is less dependent of the batch size. """ if self.is_binary: return -T.mean(T.log(self.p_y_given_x)) return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def reset(self): # Set Original ordering self.ordering.set_value(np.arange(self._input_size, dtype=theano.config.floatX)) # Reset RandomStreams self._rng.seed(self._random_seed) # Initial layer connectivity self.layers_connectivity[0].set_value((self.ordering + 1).eval()) for i in range(1, len(self.layers_connectivity)-1): self.layers_connectivity[i].set_value(np.zeros((self._hidden_sizes[i-1]), dtype=theano.config.floatX)) self.layers_connectivity[-1].set_value(self.ordering.get_value()) # Reset MRG_RandomStreams (GPU) self._mrng.rstate = self._initial_mrng_rstate for state, value in zip(self._mrng.state_updates, self._initial_mrng_state_updates): state[0].set_value(value) self.sample_connectivity()
def negativeLogLikelihoodWeighted(self, y, weightPerClass): #Weighting the cost of the different classes in the cost-function, in order to counter class imbalance. e1 = np.finfo(np.float32).tiny addTinyProbMatrix = T.lt(self.p_y_given_x_train, 4*e1) * e1 weights = weightPerClass.dimshuffle('x', 0, 'x', 'x', 'x') log_p_y_given_x_train = T.log(self.p_y_given_x_train + addTinyProbMatrix) weighted_log_probs = log_p_y_given_x_train * weights wShape = weighted_log_probs.shape # Re-arrange idx0 = T.arange( wShape[0] ).dimshuffle( 0, 'x','x','x') idx2 = T.arange( wShape[2] ).dimshuffle('x', 0, 'x','x') idx3 = T.arange( wShape[3] ).dimshuffle('x','x', 0, 'x') idx4 = T.arange( wShape[4] ).dimshuffle('x','x','x', 0) return -T.mean( weighted_log_probs[ idx0, y, idx2, idx3, idx4] )
def sample(self, n_samples): ''' Inspired by jbornschein's implementation. ''' z0 = T.zeros((n_samples, self.dim,)).astype(floatX) + T.shape_padleft(self.b) rs = self.trng.uniform((self.dim, n_samples), dtype=floatX) def _step_sample(i, W_i, r_i, z): p_i = T.nnet.sigmoid(z[:, i]) * 0.9999 + 0.000005 x_i = (r_i <= p_i).astype(floatX) z = z + T.outer(x_i, W_i) return z, x_i seqs = [T.arange(self.dim), self.W, rs] outputs_info = [z0, None] non_seqs = [] (zs, x), updates = scan(_step_sample, seqs, outputs_info, non_seqs, self.dim) return x.T, updates
def ctc_path_probs(predict, Y, alpha=1e-4): smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0] L = T.log(smoothed_predict) zeros = T.zeros_like(L[0]) log_first = zeros f_skip_idxs = ctc_create_skip_idxs(Y) b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) # there should be a shortcut to calculating this def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev): f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev) b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev) return f_active_next, log_f_next, b_active_next, log_b_next [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan( step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first]) idxs = T.arange(L.shape[1]).dimshuffle('x', 0) mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1] log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L return log_probs, mask
def batch_gather(reference, indices): '''Batchwise gathering of row indices. The numpy equivalent is reference[np.arange(batch_size), indices]. # Arguments reference: tensor with ndim >= 2 of shape (batch_size, dim1, dim2, ..., dimN) indices: 1d integer tensor of shape (batch_size) satisfiying 0 <= i < dim2 for each element i. # Returns A tensor with shape (batch_size, dim2, ..., dimN) equal to reference[1:batch_size, indices] ''' batch_size = K.shape(reference)[0] indices = tf.pack([tf.range(batch_size), indices], axis=1) return tf.gather_nd(reference, indices)
def batch_gather(reference, indices): '''Batchwise gathering of row indices. The numpy equivalent is reference[np.arange(batch_size), indices], # Arguments reference: tensor with ndim >= 2 of shape (batch_size, dim1, dim2, ..., dimN) indices: 1d integer tensor of shape (batch_size) satisfiying 0 <= i < dim2 for each element i. # Returns A tensor with shape (batch_size, dim2, ..., dimN) equal to reference[1:batch_size, indices] ''' batch_size = K.shape(reference)[0] return reference[T.arange(batch_size), indices]
def max_oracle(scores, y_truth): n_classes = scores.shape[1] t_range = T.arange(y_truth.shape[0]) # classification loss for any combination losses = 1. - T.extra_ops.to_one_hot(y_truth, n_classes) # get max score for each sample y_star = T.argmax(scores + losses, axis=1) # compute classification loss for batch delta = losses[t_range, y_star].sum() return y_star, delta
def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol): """ Based on code from Shawn Tan. Credits to Kyle Kastner as well. """ y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype='int32') y_mask_len = tensor.sum(y_mask, axis=0, dtype='int32') log_probabs = _log_path_probabs( y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol) batch_size = log_probabs.shape[1] log_labels_probab = _log_add( log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1], log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2]) return log_labels_probab
def _labeling_batch_to_class_batch(y, y_labeling, num_classes, y_hat_mask=None): # FIXME: y_hat_mask is currently not used batch_size = y.shape[1] N = y_labeling.shape[0] n_labels = y.shape[0] # sum over all repeated labels # from (T, B, L) to (T, C, B) out = T.zeros((num_classes, batch_size, N)) y_labeling = y_labeling.dimshuffle((2, 1, 0)) # L, B, T y_ = y def scan_step(index, prev_res, y_labeling, y_): res_t = T.inc_subtensor(prev_res[y_[index, T.arange(batch_size)], T.arange(batch_size)], y_labeling[index, T.arange(batch_size)]) return res_t result, updates = theano.scan(scan_step, sequences=[T.arange(n_labels)], non_sequences=[y_labeling, y_], outputs_info=[out]) # result will be (C, B, T) so we make it (T, B, C) return result[-1].dimshuffle(2, 1, 0)
def get_minibatches_idx(n, minibatch_size, shuffle=False): """ Used to shuffle the dataset at each iteration. """ idx_list = numpy.arange(n, dtype="int32") if shuffle: numpy.random.shuffle(idx_list) minibatches = [] minibatch_start = 0 for i in range(n // minibatch_size): minibatches.append(idx_list[minibatch_start: minibatch_start + minibatch_size]) minibatch_start += minibatch_size if (minibatch_start != n): # Make a minibatch out of what is left minibatches.append(idx_list[minibatch_start:]) return zip(range(len(minibatches)), minibatches)
def __init__(self, name, x, y, n_in, n_out): self.x= x self.name = name # weight matrix W (n_in, n_out) self.W = theano.shared( value=np.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) # bias vector b (n_out, ) self.b = theano.shared( value=np.zeros((n_out,), dtype=theano.config.floatX), name='b', borrow=True) # p(y|x, w, b) self.p_y_given_x = T.nnet.softmax(T.dot(x, self.W) + self.b) self.y_pred = T.argmax(self.p_y_given_x, axis=1) self.negative_log_likelihood = -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) self.errors = T.mean(T.neq(self.y_pred, y)) # params self.params = [self.W, self.b]
def nll_of_x_given_o(self, input, ordering): """ Returns the theano graph that computes $-ln p(\bx|o)$. Parameters ---------- input: 1D vector One image with shape (nb_channels * images_height * images_width). ordering: 1D vector of int List of pixel indices representing the input ordering. """ D = int(np.prod(self.image_shape)) mask_o_d = T.zeros((D, D), dtype=theano.config.floatX) mask_o_d = T.set_subtensor(mask_o_d[T.arange(D), ordering], 1.) mask_o_lt_d = T.cumsum(mask_o_d, axis=0) mask_o_lt_d = T.set_subtensor(mask_o_lt_d[1:], mask_o_lt_d[:-1]) mask_o_lt_d = T.set_subtensor(mask_o_lt_d[0, :], 0.) input = T.tile(input[None, :], (D, 1)) nll = -T.sum(self.lnp_x_o_d_given_x_o_lt_d(input, mask_o_d, mask_o_lt_d)) return nll
def get_output_for(self, inputs, **kwargs): input = inputs[0] input_word = T.flatten(inputs[1]) word_dropout = inputs[2] # Apply word embedding sentence_rep = self.SemMem.get_output_for([input, word_dropout]) # Apply GRU Layer gru_outs = self.GRU.get_output_for([sentence_rep]) # Extract candidate fact from GRU's output by input_word variable # resolving input with adtional word # e.g. John when to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5 candidate_facts = T.reshape( gru_outs[T.arange(gru_outs.shape[0],dtype='int32'), input_word-1], (-1, input.shape[1], self.hid_state_size)) return candidate_facts
def get_output_for(self, input_, **kwargs): return input_[ T.arange(input_.shape[0]).dimshuffle(0, 'x', 'x'), T.arange(input_.shape[1]).dimshuffle('x', 0, 'x'), T.sort(T.argsort(input_, axis=-1)[:, :, -self.k:], axis=-1), ]
def get_y_prob(self, h, y): """ :param h: 1D: n_words, 2D: Batch, 3D: n_y :param y: 1D: n_words, 2D: Batch :return: gradient of cross entropy: 1D: Batch """ batch_index = T.arange(h.shape[1]) z_score0 = self.BOS + h[0] # 1D: batch, 2D: n_y y_score0 = z_score0[batch_index, y[0]] # 1D: batch [_, y_scores, z_scores], _ = theano.scan(fn=self._forward_step, sequences=[h[1:], y[1:]], outputs_info=[y[0], y_score0, z_score0], non_sequences=[self.W_t, batch_index]) y_score = y_scores[-1] z_score = logsumexp(z_scores[-1], axis=1).flatten() return y_score - z_score
def __init__(self, x, y, n_x, n_y): # initialize with 0 the weights as a matrix of shape (n_in, n_out) self.w = theano.shared( value=numpy.zeros((n_x, n_y), dtype=theano.config.floatX), name='w', borrow=True ) # initialize the biases b as a vector of n_out 0s self.b = theano.shared( value=numpy.zeros((n_y,), dtype=theano.config.floatX), name='b', borrow=True ) self.params = [self.w, self.b] # save x, y self.x = x self.y = y # calculate p_y_given_x = T.nnet.softmax(T.dot(self.x, self.w) + self.b) # probability is maximal y_pred = T.argmax(p_y_given_x, axis=1) # error self.error = T.mean(T.neq(y_pred, self.y)) # cost self.cost = -T.mean(T.log(p_y_given_x)[T.arange(self.y.shape[0]), self.y])
def get_minibatches_idx(n, minibatch_size, shuffle=False): """ Used to shuffle the dataset at each iteration. """ idx_list = numpy.arange(n, dtype="int32") if shuffle: numpy.random.shuffle(idx_list) minibatches = [] minibatch_start = 0 for i in range(n // minibatch_size): minibatches.append(idx_list[minibatch_start:minibatch_start + minibatch_size]) minibatch_start += minibatch_size if minibatch_start != n: # Make a minibatch out of what is left minibatches.append(idx_list[minibatch_start:]) return zip(range(len(minibatches)), minibatches)
def sample_gmm(mu, sigma, weight, theano_rng): k = weight.shape[-1] dim = mu.shape[-1] / k shape_result = weight.shape shape_result = tensor.set_subtensor(shape_result[-1], dim) ndim_result = weight.ndim mu = mu.reshape((-1, dim, k)) sigma = sigma.reshape((-1, dim, k)) weight = weight.reshape((-1, k)) sample_weight = theano_rng.multinomial(pvals=weight, dtype=weight.dtype) idx = predict(sample_weight, axis=-1) mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] epsilon = theano_rng.normal( size=mu.shape, avg=0., std=1., dtype=mu.dtype) result = mu + sigma * epsilon return result.reshape(shape_result, ndim=ndim_result)
def __init__(self, seq_len, n_feature): import theano.tensor as T self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.buildNetwork() self.output = lasagne.layers.get_output(self.network) self.params = lasagne.layers.get_all_params(self.network, trainable=True) self.output_fn = theano.function([self.Input.input_var], self.output) fx = T.fvector().astype("float64") choices = T.ivector() px = self.output[T.arange(self.output.shape[0]), choices] log_px = T.log(px) cost = -fx.dot(log_px) updates = lasagne.updates.adagrad(cost, self.params, 0.0008) Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates)
def update_critic(self, random_sample): #random_sample = np.random.choice(np.arange(len(self.rewards)-1), self.batch_size) states_batch = np.zeros((self.batch_size, self.lookback_size, self.n_feature), dtype = "float32") states_next_batch = np.zeros((self.batch_size, self.lookback_size, self.n_feature),dtype = "float32") #print random_sample for i in range(self.batch_size): random_id = random_sample[i] states_batch[i,:,:] =np.array(self.states[random_id:random_id+self.lookback_size]).astype("float32") states_next_batch[i,:,:] =np.array(self.states[random_id + 1:(random_id+self.lookback_size +1)]).astype("float32") reward_batch = np.array([self.rewards[i] for i in random_sample]).astype("float32") #using target model to predict target_value = self.target_model.predict(states_next_batch).flatten()*self.gamma + reward_batch self.critic_model.train(states_batch, target_value.reshape(self.batch_size,1))
def get_output_for(self, input, timesteps=None, *args, **kwargs): """ Only forward outputs at certain/last sequence positions Parameters ------- input : tensor Input layer with shape: [samples, sequence positions, features] timesteps : array of integers or None None: Take output at last sequence position Array of integers: take outputs at sequence positions specified in array; values serve as indices and must not exeed sequence lenght; length of array must be number of samples """ if timesteps != None: return input[T.arange(start=0,stop=self.input_shape[0]),timesteps,:] else: return input[:,-1,:]
def _grab_probs(class_probs, target, use_fast_ver=False): if class_probs.ndim == 3: class_probs = class_probs.reshape((-1, class_probs.shape[-1])) shape0 = class_probs.shape[0] shape1 = class_probs.shape[1] p = None if target.ndim == 2 and use_fast_ver: target = target.flatten() cp = class_probs.reshape((target.shape[0], -1)) p = TT.diag(cp.T[target]) else: if target.ndim > 1: target = target.flatten() assert target.ndim == 1, 'make sure target is a vector of ints' assert 'int' in target.dtype pos = TT.arange(shape0)*shape1 new_targ = target + pos p = class_probs.reshape((shape0*shape1, 1))[new_targ].reshape((shape0,)) return p
def softmax_loss(p_true, output_before_softmax): output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True) if p_true.ndim==2: return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1)) else: return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
def cost(self, probs, y, y_mask): y_flat = y.flatten() y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat cost = -tensor.log(probs.flatten()[y_flat_idx]) cost = cost.reshape([y.shape[0], y.shape[1]]) cost = (cost * y_mask).sum(0) cost = cost.mean() return cost
def f_log_probs(self, probs, x, x_mask, y, y_mask, src_selector, trg_selector, cg=None): y_flat = y.flatten() y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat cost = -tensor.log(probs.flatten()[y_flat_idx]) cost = cost.reshape([y.shape[0], y.shape[1]]) cost = (cost * y_mask).sum(0) func_inps = [x, x_mask, y, y_mask, src_selector, trg_selector] return theano.function( inputs=func_inps, outputs=cost, on_unused_input='warn')
def T_one_hot(inp_tensor, n_classes): """ :todo: - Implement other methods from here: - Compare them speed-wise for different sizes - Implement N_one_hot for Numpy version, with speed tests. Theano one-hot (1-of-k) from an input tensor of indecies. If the indecies are of the shape (a0, a1, ..., an) the output shape would be (a0, a1, ..., a2, n_classes). :params: - inp_tensor: any theano tensor with dtype int* as indecies and all of them between [0, n_classes-1]. - n_classes: number of classes which determines the output size. :usage: >>> idx = T.itensor3() >>> idx_val = numpy.array([[[0,1,2,3],[4,5,6,7]]], dtype='int32') >>> one_hot = T_one_hot(t, 8) >>> one_hot.eval({idx:idx_val}) >>> print out array([[[[ 1., 0., 0., 0., 0., 0., 0., 0.], [ 0., 1., 0., 0., 0., 0., 0., 0.], [ 0., 0., 1., 0., 0., 0., 0., 0.], [ 0., 0., 0., 1., 0., 0., 0., 0.]], [[ 0., 0., 0., 0., 1., 0., 0., 0.], [ 0., 0., 0., 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 0., 0., 0., 1.]]]]) >>> print idx_val.shape, out.shape (1, 2, 4) (1, 2, 4, 8) """ flattened = inp_tensor.flatten() z = T.zeros((flattened.shape[0], n_classes), dtype=theano.config.floatX) one_hot = T.set_subtensor(z[T.arange(flattened.shape[0]), flattened], 1) out_shape = [inp_tensor.shape[i] for i in xrange(inp_tensor.ndim)] + [n_classes] one_hot = one_hot.reshape(out_shape) return one_hot
def connect(self, inputs, weights, labels): """ - inputs: flattened log scores from the softmax layer. """ y_flat = labels.flatten() x_flat_idx = tensor.arange(y_flat.shape[0]) cross_ent = - inputs[x_flat_idx, y_flat].reshape([labels.shape[0], labels.shape[1]]) if weights != None: cross_ent = cross_ent * weights # Summed over timesteps. Averaged across samples in the batch. return cross_ent.sum(axis=0).mean()
def build_objective(model, deterministic=False, epsilon=1e-12): predictions = nn.layers.get_output(model.l_out, deterministic=deterministic) targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32') p = predictions[T.arange(predictions.shape[0]), targets] p = T.clip(p, epsilon, 1.) loss = T.mean(T.log(p)) return -loss
def heaviside(x, size): return T.arange(0, size).dimshuffle('x', 0) - T.repeat(x, size, axis=1) >= 0.
def get_output_for(self, input, **kwargs): mu = input[0] sigma = input[1] x_range = T.arange(0, self.max_support).dimshuffle('x', 0) mu = T.repeat(mu, self.max_support, axis=1) sigma = T.repeat(sigma, self.max_support, axis=1) x = (x_range - mu) / (sigma * T.sqrt(2.) + 1e-16) cdf = (T.erf(x) + 1.) / 2. return cdf
def build_objective(model, deterministic=False, epsilon=1e-12): agg_mil_loss = nn.layers.get_output(model.l_agg_mil_loss, deterministic=deterministic) targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32') loss = agg_mil_loss[T.arange(agg_mil_loss.shape[0]), targets] return T.mean(loss)
def build_objective(model, deterministic=False, epsilon=1e-12): predictions = nn.layers.get_output(model.l_out) targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32') p = predictions[T.arange(predictions.shape[0]), targets] p = T.clip(p, epsilon, 1.) loss = T.mean(T.log(p)) return -loss