我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.ones_like()。
def crossentropy(y_pred, y_true, void_labels, one_hot=False): # Clip predictions y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON) if one_hot: y_true = T.argmax(y_true, axis=1) # Create mask mask = T.ones_like(y_true, dtype=_FLOATX) for el in void_labels: mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.) # Modify y_true temporarily y_true_tmp = y_true * mask y_true_tmp = y_true_tmp.astype('int32') # Compute cross-entropy loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp) # Compute masked mean loss loss *= mask loss = T.sum(loss) / T.sum(mask) return loss
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha)) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, v, b, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH t = T.dot(h, v) + b p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5) # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995 p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _, _], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V, self.b, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def apply(self, inputs, states, cells, mask=None): def slice_last(x, no): return x[:, no * self.dim: (no + 1) * self.dim] activation = tensor.dot(states, self.W_state) + inputs in_gate = self.gate_activation.apply( slice_last(activation, 0)) pre = slice_last(activation, 1) forget_gate = self.gate_activation.apply( pre + self.bias * tensor.ones_like(pre)) next_cells = ( forget_gate * cells + in_gate * self.activation.apply(slice_last(activation, 2))) out_gate = self.gate_activation.apply( slice_last(activation, 3)) next_states = out_gate * self.activation.apply(next_cells) if mask: next_states = (mask[:, None] * next_states + (1 - mask[:, None]) * states) next_cells = (mask[:, None] * next_cells + (1 - mask[:, None]) * cells) return next_states, next_cells
def _make_actiondist_ops(self, obsfeat_B_Df): # Computes action distribution mean (of a Gaussian) using MLP with nn.variable_scope('hidden'): net = nn.FeedforwardNet(obsfeat_B_Df, (self.obsfeat_space.dim,), self.cfg.hidden_spec) with nn.variable_scope('out'): mean_layer = nn.AffineLayer(net.output, net.output_shape, (self.action_space.dim,), initializer=np.zeros((net.output_shape[0], self.action_space.dim))) assert mean_layer.output_shape == (self.action_space.dim,) means_B_Da = mean_layer.output # Action distribution log standard deviations are parameters themselves logstdevs_1_Da = nn.get_variable('logstdevs_1_Da', np.full((1, self.action_space.dim), self.cfg.init_logstdev), broadcastable=(True,False)) stdevs_1_Da = self.cfg.min_stdev + tensor.exp(logstdevs_1_Da) # minimum stdev seems to make density / kl computations more stable stdevs_B_Da = tensor.ones_like(means_B_Da)*stdevs_1_Da # "broadcast" to (B,Da) actiondist_B_Pa = tensor.concatenate([means_B_Da, stdevs_B_Da], axis=1) return actiondist_B_Pa
def __call__(self, input): b_size = input.shape[0] input_data = input[:(b_size/2)] initial = input[(b_size/2):] input_data = input_data.dimshuffle(2, 0, 1) initial = initial.dimshuffle(2, 0, 1) me = self.dropout(T.ones_like(input_data[0])) mh = self.dropout(T.ones_like(self.encoder(input_data[0]))) def step(e, h, me, mh): ig = sigmoid(self.encode_igate(me * e) + self.recode_igate(mh * h)) fg = sigmoid(self.encode_fgate(me * e) + self.recode_fgate(mh * h)) return self.activation(fg * self.recoder(mh * h) + ig * self.encoder(me * e)) h = theano.scan(step, sequences=[input_data, initial], non_sequences=[me, mh], outputs_info=None)[0] return h.dimshuffle(1, 2, 0)
def __call__(self, input): b_size = input.shape[0] input_data = input[:(b_size/2)] initial = input[(b_size/2):] input_data = input_data.dimshuffle(2, 0, 1) initial = initial.dimshuffle(2, 0, 1) me = self.dropout1(T.ones_like(input_data[0])) mh = self.dropout2(T.ones_like(self.encoder(input_data[0]))) def step(e, h, me, mh): ig = sigmoid(self.encode_igate(me * e) + self.recode_igate(mh * h)) fg = sigmoid(self.encode_fgate(me * e) + self.recode_fgate(mh * h)) return self.activation(fg * self.recoder(mh * h) + ig * self.encoder(me * e)) h = theano.scan(step, sequences=[input_data, initial], non_sequences=[me, mh], outputs_info=None)[0] return h.dimshuffle(1, 2, 0)
def forward_prop_step(x_t, sentence_t, s_t1_prev, s_t2_prev): filtered_words = T.tanh(F.dot(sentence_t) + d) pooled_words = filtered_words.max(axis = 1) x_e = T.concatenate([x_t, pooled_words]) # GRU Layer 1. z_t1 = T.nnet.hard_sigmoid(U_1[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0]) r_t1 = T.nnet.hard_sigmoid(U_1[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1]) c_t1 = T.tanh(U_1[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2]) s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev # GRU Layer 2. z_t2 = T.nnet.hard_sigmoid(U_2[0].dot(s_t1) + W[3].dot(s_t2_prev) + b[3]) r_t2 = T.nnet.hard_sigmoid(U_2[1].dot(s_t1) + W[4].dot(s_t2_prev) + b[4]) c_t2 = T.tanh(U_2[2].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5]) s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev # Final output calculation. o_t = T.nnet.sigmoid(V.dot(s_t2) + c) return [o_t, s_t1, s_t2]
def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Computes mean and std for batch then apply batch_normalization on batch. """ # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_train is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_train'): return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon) if gamma is None: if beta is None: gamma = ones_like(x) else: gamma = ones_like(beta) if beta is None: if gamma is None: beta = zeros_like(x) beta = zeros_like(gamma) normed, mean, stdinv = T.nnet.bn.batch_normalization_train( x, gamma, beta, reduction_axes, epsilon) return normed, mean, T.inv(stdinv ** 2)
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): """Apply batch normalization on x given mean, var, beta and gamma. """ # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_test'): return _old_batch_normalization(x, mean, var, beta, gamma, epsilon) if gamma is None: gamma = ones_like(var) if beta is None: beta = zeros_like(mean) if mean.ndim == 1: # based on TensorFlow's default: normalize along rightmost dimension reduction_axes = list(range(x.ndim - 1)) else: reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]] return T.nnet.bn.batch_normalization_test( x, gamma, beta, mean, var, reduction_axes, epsilon) # TODO remove this function when Theano without # T.nnet.bn.batch_normalization_train is deprecated
def test_gpujoin_gpualloc(): a = T.fmatrix('a') a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') b = T.fmatrix('b') b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32') f = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def test_int32_dtype(self): # Reported on the theano-user mailing-list: # https://groups.google.com/d/msg/theano-users/MT9ui8LtTsY/rwatwEF9zWAJ size = 9 intX = 'int32' C = tensor.matrix('C', dtype=intX) I = tensor.matrix('I', dtype=intX) fI = I.flatten() data = tensor.ones_like(fI) indptr = tensor.arange(data.shape[0] + 1, dtype='int32') m1 = sparse.CSR(data, fI, indptr, (8, size)) m2 = sparse.dot(m1, C) y = m2.reshape(shape=(2, 4, 9), ndim=3) f = theano.function(inputs=[I, C], outputs=y) i = numpy.asarray([[4, 3, 7, 7], [2, 8, 4, 5]], dtype=intX) a = numpy.asarray(numpy.random.randint(0, 100, (size, size)), dtype=intX) f(i, a)
def test_structured_add_s_v(self): sp_types = {'csc': sp.csc_matrix, 'csr': sp.csr_matrix} for format in ['csr', 'csc']: for dtype in ['float32', 'float64']: x = theano.sparse.SparseType(format, dtype=dtype)() y = tensor.vector(dtype=dtype) f = theano.function([x, y], structured_add_s_v(x, y)) spmat = sp_types[format](random_lil((4, 3), dtype, 3)) spones = spmat.copy() spones.data = numpy.ones_like(spones.data) mat = numpy.asarray(numpy.random.rand(3), dtype=dtype) out = f(spmat, mat) utt.assert_allclose(as_ndarray(spones.multiply(spmat + mat)), out.toarray())
def sp_ones_like(x): """ Construct a sparse matrix of ones with the same sparsity pattern. Parameters ---------- x Sparse matrix to take the sparsity pattern. Returns ------- A sparse matrix The same as `x` with data changed for ones. """ # TODO: don't restrict to CSM formats data, indices, indptr, shape = csm_properties(x) return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
def setUp(self): self.k = T.iscalar("k") self.A = T.vector("A") result, _ = theano.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=T.ones_like(self.A), non_sequences=self.A, n_steps=self.k) result_check, _ = theano.scan_checkpoints( fn=lambda prior_result, A: prior_result * A, outputs_info=T.ones_like(self.A), non_sequences=self.A, n_steps=self.k, save_every_N=100) self.result = result[-1] self.result_check = result_check[-1] self.grad_A = T.grad(self.result.sum(), self.A) self.grad_A_check = T.grad(self.result_check.sum(), self.A)
def test_gpualloc_output_to_gpu(): a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') a = tcn.shared_constructor(a_val) b = T.fscalar() f = theano.function([b], T.ones_like(a) + b, mode=mode_without_gpu) f_gpu = theano.function([b], B.gpu_from_host(T.ones_like(a)) + b, mode=mode_with_gpu) f(2) f_gpu(2) assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 1 assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.fgraph.toposort()]) == 1 assert numpy.allclose(numpy.ones(a.get_value(borrow=True).shape) + 9, f_gpu(9)) assert numpy.allclose(f(5), f_gpu(5))
def make_node(self, acts, labels, input_lengths): # Unless specified, assume all sequences have full sequence length, i.e. acts_.shape[0] if input_lengths == None: input_lengths = T.cast(acts.shape[0], dtype="int32") * T.ones_like(acts[0,:,0], dtype=np.int32) # acts.shape = [seqLen, batchN, outputUnit] if acts.dtype != "float32": raise Exception("acts must be float32 instead of %s" % acts.dtype) # labels.shape = [batchN, labelLen] if labels.dtype != "int32": raise Exception("labels must be int32 instead of %s" % labels.dtype) # input_lengths.shape = [batchN] if input_lengths.dtype != "int32": raise Exception("input_lengths must be int32 instead of %s" % input_lengths.dtype) applyNode = theano.Apply(self, inputs=[acts, input_lengths, labels], outputs=[self.costs, self.gradients]) # Return only the cost. Gradient will be returned by grad() self.default_output = 0 return applyNode
def _meshgrid(height, width): # This should be equivalent to: # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), # np.linspace(-1, 1, height)) # ones = np.ones(np.prod(x_t.shape)) # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) x_t = T.dot(T.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle('x', 0)) y_t = T.dot(_linspace(-1.0, 1.0, height).dimshuffle(0, 'x'), T.ones((1, width))) x_t_flat = x_t.reshape((1, -1)) y_t_flat = y_t.reshape((1, -1)) ones = T.ones_like(x_t_flat) grid = T.concatenate([x_t_flat, y_t_flat, ones], axis=0) return grid
def get_padded_shuffled_mask(self, train, X, pad=0): mask = self.get_input_mask(train) if mask is None: mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum? # mask is (nb_samples, time) mask = T.shape_padright(mask) # (nb_samples, time, 1) mask = T.addbroadcast(mask, -1) # the new dimension (the '1') is made broadcastable # see http://deeplearning.net/software/theano/library/tensor/basic.html#broadcasting-in-theano-vs-numpy mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1) if pad > 0: # left-pad in time with 0 padding = alloc_zeros_matrix(pad, mask.shape[1], 1) mask = T.concatenate([padding, mask], axis=0) return mask.astype('int8')
def get_output_for(self, upscaled, **kwargs): a, b = self.scale_factor # get output for pooling and pre-pooling layer inp, out =\ lasagne.layers.get_output([self.pool2d_layer_in, self.pool2d_layer]) # upscale the input feature map by scale_factor if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) # get the shapes for pre-pooling layer and upscaled layer sh_pool2d_in = T.shape(inp) sh_upscaled = T.shape(upscaled) # in case the shape is different left-bottom-pad with zero tmp = T.zeros(sh_pool2d_in) indx = (slice(None), slice(None), slice(0, sh_upscaled[2]), slice(0, sh_upscaled[3])) upscaled = T.set_subtensor(tmp[indx], upscaled) # get max pool indices indices_pool = T.grad(None, wrt=inp, known_grads={out: T.ones_like(out)}) # mask values using indices_pool f = indices_pool * upscaled return f
def accuracy(y_pred, y_true, void_labels, one_hot=False): assert (y_pred.ndim == 2) or (y_pred.ndim == 1) # y_pred to indices if y_pred.ndim == 2: y_pred = T.argmax(y_pred, axis=1) if one_hot: y_true = T.argmax(y_true, axis=1) # Compute accuracy acc = T.eq(y_pred, y_true).astype(_FLOATX) # Create mask mask = T.ones_like(y_true, dtype=_FLOATX) for el in void_labels: indices = T.eq(y_true, el).nonzero() if any(indices): mask = T.set_subtensor(mask[indices], 0.) # Apply mask acc *= mask acc = T.sum(acc) / T.sum(mask) return acc
def get_output_for(self, input, **kwargs): return T.ones_like(input) * self.constant
def ones_like(x): return T.ones_like(x)
def avg_pool(input_layer, **kwargs): # hack to work around https://github.com/Theano/Theano/issues/3776 norm = nn.layers.ExpressionLayer(input_layer, lambda X: T.ones_like(X)) norm = nn.layers.Pool2DLayer(norm, mode='average_inc_pad', **kwargs) l = nn.layers.Pool2DLayer(input_layer, mode='average_inc_pad', **kwargs) l = nn.layers.ElemwiseMergeLayer([l, norm], T.true_div) return l
def loss_func(self, y_true, y_predict): active_notes = T.shape_padright(y_true[:,:,:,0]) mask = T.concatenate([T.ones_like(active_notes), active_notes, T.repeat(T.ones_like(active_notes), self.output_size-2, -1)], axis=-1) loglikelihoods = mask * T.log( 2*y_predict*y_true - y_predict - y_true + 1 + self.epsilon ) return T.neg(T.sum(loglikelihoods))
def mirror_activations(input, input_fixed): out_fixed = T.nnet.relu(input_fixed) mask = T.grad(cost=None, wrt=input_fixed, known_grads={out_fixed: T.ones_like(out_fixed)}) out = input * mask return out, out_fixed
def mirror_activations(input, input_fixed, pool_size): out_fixed = my_pool_2d(input_fixed, ds=pool_size, ignore_border=True) mask = T.grad(cost=None, wrt=input_fixed, known_grads={out_fixed: T.ones_like(out_fixed)}) masked_input = input * mask out = Cfg.floatX(pool_size[0] * pool_size[1]) * \ pool_2d(masked_input, mode='average_exc_pad', ds=pool_size, ignore_border=True) return out, out_fixed
def generative_sampling(self, seed, emb_data, sample_length): fruit = theano.shared(value=seed) def step(h_tm, y_tm): x_z = T.dot(emb_data[y_tm], self.W_z) + self.b_z x_r = T.dot(emb_data[y_tm], self.W_r) + self.b_r x_h = T.dot(emb_data[y_tm], self.W) + self.b_h z_t = self.inner_activation(x_z + T.dot(h_tm, self.U_z)) r_t = self.inner_activation(x_r + T.dot(h_tm, self.U_r)) hh_t = self.activation(x_h + T.dot(r_t * h_tm, self.U)) h_t = (T.ones_like(z_t) - z_t) * hh_t + z_t * h_tm y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.b_y) y = T.argmax(y_t, axis=1) return h_t, y[0] [_, samples], _ = theano.scan(fn=step, outputs_info=[self.h0, fruit], n_steps=sample_length) get_samples = theano.function(inputs=[], outputs=samples) return get_samples()
def set_output(self): self._output = tensor.ones_like(self._prev_layer.output) - self._prev_layer.output
def ones_like(self, x): '''Instantiates an all-ones tensor of the same shape as another tensor. ''' return T.ones_like(x)
def ones_like(x, dtype=None, name=None): return T.ones_like(x, dtype=dtype)
def oneslike(self, t, dtype): return T.ones_like(t, dtype)
def _create_components(self, deterministic=False): # load network input X = self.inputs[0] x = X.flatten(2) # load networks l_p_mu, l_q_mu, l_q_sample, _, _, _ = self.network # load network output z, q_mu = lasagne.layers.get_output([l_q_sample, l_q_mu], deterministic=deterministic) p_mu = lasagne.layers.get_output(l_p_mu, z, deterministic=deterministic) # entropy term log_qz_given_x = log_bernoulli(dg(z), q_mu).sum(axis=1) # expected p(x,z) term z_prior = T.ones_like(z)*np.float32(0.5) log_pz = log_bernoulli(z, z_prior).sum(axis=1) log_px_given_z = log_bernoulli(x, p_mu).sum(axis=1) log_pxz = log_pz + log_px_given_z # save them for later self.log_pxz = log_pxz self.log_qz_given_x = log_qz_given_x return log_pxz.flatten(), log_qz_given_x.flatten()
def _forward_step(self, x_t, s_t): """Input vector/matrix x(t) and state matrix s(t).""" # Gradient clipping E, a, U, W, b, V, c = [ th.gradient.grad_clip(p, -5.0, 5.0) for p in self.params.values() ] # Initialize state to return s_next = T.zeros_like(s_t) # Vocab-to-state encoding layer inout = T.tanh(T.dot(x_t, E) + a) # Loop over GRU layers for layer in range(self.hyper.layers): # 3 matrices per layer L = layer * 3 # Get previous state for this layer s_prev = s_t[layer] # Update gate z = T.nnet.hard_sigmoid(T.dot(inout, U[L]) + T.dot(s_prev, W[L]) + b[L]) # Reset gate r = T.nnet.hard_sigmoid(T.dot(inout, U[L+1]) + T.dot(s_prev, W[L+1]) + b[L+1]) # Candidate state h = T.tanh(T.dot(inout, U[L+2]) + T.dot(r * s_prev, W[L+2]) + b[L+2]) # New state s_new = (T.ones_like(z) - z) * h + z * s_prev s_next = T.set_subtensor(s_next[layer], s_new) # Update for next layer or final output (might add dropout here later) inout = s_new # Final output o_t = T.dot(inout, V) + c return o_t, s_next # Regularization cost
def ones_like(x): return T.ones_like(x, dtype=theano.config.floatX)
def get_padded_shuffled_mask(self, train, X, pad=0): mask = self.get_input_mask(train) if mask is None: mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum? # mask is (nb_samples, time) mask = T.shape_padright(mask) # (nb_samples, time, 1) mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix. mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1) if pad > 0: # left-pad in time with 0 padding = alloc_zeros_matrix(pad, mask.shape[1], 1) mask = T.concatenate([padding, mask], axis=0) return mask.astype('int8')
def get_output_mask(self, train=None): X = self.get_input(train) if not self.mask_zero: return None else: return T.ones_like(X) * (1 - T.eq(X,0))
def ones_like(x, name=None): return T.ones_like(x)
def apply(self, x): if self.mode is 'normal' or x.ndim == 2: shape = x.shape dropout_mask = self.rng_theano.binomial(x.shape, p=self.p, dtype='float32') / self.p elif x.ndim == 4 or x.ndim == 5: # spatial dropout, meaning drop a whole feature map shape = (x.shape[x.ndim-3],) pattern = ('x',) * (x.ndim-3) + (0,) + ('x','x',) dropout_feature_mask = self.rng_theano.binomial(shape, p=self.p, dtype='float32') / self.p dropout_mask = T.ones_like(x) * dropout_feature_mask.dimshuffle(*pattern) / self.p return x
def best_path_decoding(self, probs, probs_mask=None): # probs is T x B x C+1 T = probs.shape[0] B = probs.shape[1] C = probs.shape[2]-1 maxprob = probs.argmax(axis=2) is_double = tensor.eq(maxprob[:-1], maxprob[1:]) maxprob = tensor.switch(tensor.concatenate([tensor.zeros((1,B)), is_double]), C*tensor.ones_like(maxprob), maxprob) # maxprob = theano.printing.Print('maxprob')(maxprob.T).T # returns two values : # label : (T x) T x B # label_length : (T x) B def recursion(maxp, p_mask, label_length, label): nonzero = p_mask * tensor.neq(maxp, C) nonzero_id = nonzero.nonzero()[0] new_label = tensor.set_subtensor(label[label_length[nonzero_id], nonzero_id], maxp[nonzero_id]) new_label_length = tensor.switch(nonzero, label_length + numpy.int32(1), label_length) return new_label_length, new_label [label_length, label], _ = scan(fn=recursion, sequences=[maxprob, probs_mask], outputs_info=[tensor.zeros((B,),dtype='int32'),-tensor.ones((T,B))]) return label[-1], label_length[-1]
def build(self): state_pre=T.zeros((self.x.shape[-1],self.n_hidden),dtype=theano.config.floatX) def _recurrence(x_t,m,h_tm1): x_e=self.E[x_t,:] concated=T.concatenate([x_e,h_tm1],axis=1) # Update gate z_t=self.f(T.dot(concated,self.Wz) + self.bz ) # Input fate r_t=self.f(T.dot(concated,self.Wr) + self.br ) # Cell update c_t=T.tanh(T.dot(x_e,self.Wxc)+T.dot(r_t*h_tm1,self.Whc)+self.bc) # Hidden state h_t=(T.ones_like(z_t)-z_t) * c_t + z_t * h_tm1 # masking h_t=h_t*m[:,None] return h_t h,_=theano.scan(fn=_recurrence, sequences=[self.x,self.mask], outputs_info=state_pre, truncate_gradient=self.bptt) # Dropout if self.p>0: drop_mask=self.rng.binomial(n=1,p=1-self.p,size=h.shape,dtype=theano.config.floatX) self.activation=T.switch(self.is_train,h*drop_mask,h*(1-self.p)) else: self.activation=T.switch(self.is_train,h,h)
def build(self): state_pre = T.zeros((self.x.shape[-1], self.n_hidden), dtype=theano.config.floatX) state_below = T.dot(self.E[self.x,:], self.W) + self.b state_belowx = T.dot(self.E[self.x,:], self.Wx) + self.bx def split(x, n, dim): if x.ndim == 3: return x[:, :, n * dim: (n + 1) * dim] return x[:, n * dim:(n + 1) * dim] def _recurrence(x_t, xx_t, m, h_tm1): preact = x_t + T.dot(h_tm1, self.U) # reset fate r_t = self.f(split(preact, 0, self.n_hidden)) # Update gate z_t = self.f(split(preact, 1, self.n_hidden)) # Cell update c_t = T.tanh(T.dot(h_tm1, self.Ux) * r_t + xx_t) # Hidden state h_t = (T.ones_like(z_t) - z_t) * c_t + z_t * h_tm1 # masking h_t = h_t * m[:, None] return h_t h, _ = theano.scan(fn=_recurrence, sequences=[state_below, state_belowx, self.mask], outputs_info=state_pre, truncate_gradient=self.bptt) # Dropout if self.p > 0: drop_mask = self.rng.binomial(n=1, p=1 - self.p, size=h.shape, dtype=theano.config.floatX) self.activation = T.switch(self.is_train, h * drop_mask, h * (1 - self.p)) else: self.activation = T.switch(self.is_train, h, h)
def build(self): state_pre=T.zeros((self.n_batch,self.n_hidden),dtype=theano.config.floatX) def _recurrence(x_t,m,h_tm1): x_e=self.E[x_t,:] concated=T.concatenate([x_e,h_tm1],axis=-1) # Update gate z_t=T.nnet.sigmoid(T.dot(concated,self.Wz) + self.bz ) # Input fate r_t=T.nnet.sigmoid(T.dot(concated,self.Wr) + self.br ) # Cell update c_t=T.tanh(T.dot(x_e,self.Wxc)+T.dot(r_t*h_tm1,self.Whc)+self.bc) # Hidden state h_t=(T.ones_like(z_t)-z_t) * c_t + z_t * h_tm1 # masking h_t=h_t*m[:,None]#+(1.-m)[:,None]*h_t return h_t h,_=theano.scan(fn=_recurrence, sequences=[self.x,self.mask], outputs_info=[dict(initial=T.zeros((self.n_batch,self.n_hidden) )),]) # Dropout if self.p>0: drop_mask=self.rng.binomial(n=1,p=1-self.p,size=h.shape,dtype=theano.config.floatX) self.activation=T.switch(T.eq(self.is_train,1),h*drop_mask,h*(1-self.p)) else: self.activation=T.switch(T.eq(self.is_train,1),h,h)
def test_scan(self): """ Test the compute_test_value mechanism Scan. """ orig_compute_test_value = theano.config.compute_test_value try: theano.config.compute_test_value = 'raise' # theano.config.compute_test_value = 'warn' k = T.iscalar("k") A = T.vector("A") k.tag.test_value = 3 A.tag.test_value = numpy.random.rand(5).astype(config.floatX) def fx(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = theano.scan(fn=fx, outputs_info=T.ones_like(A), non_sequences=A, n_steps=k) # We only care about A**k, but scan has provided us with A**1 through A**k. # Discard the values that we don't care about. Scan is smart enough to # notice this and not waste memory saving them. final_result = result[-1] assert hasattr(final_result.tag, 'test_value') finally: theano.config.compute_test_value = orig_compute_test_value
def test_scan_err1(self): # This test should fail when building fx for the first time orig_compute_test_value = theano.config.compute_test_value try: theano.config.compute_test_value = 'raise' k = T.iscalar("k") A = T.matrix("A") k.tag.test_value = 3 A.tag.test_value = numpy.random.rand(5, 3).astype(config.floatX) def fx(prior_result, A): return T.dot(prior_result, A) # Since we have to inspect the traceback, # we cannot simply use self.assertRaises() try: theano.scan( fn=fx, outputs_info=T.ones_like(A), non_sequences=A, n_steps=k) assert False except ValueError: # Get traceback tb = sys.exc_info()[2] # Get frame info 4 layers up frame_info = traceback.extract_tb(tb)[-5] # We should be in the "fx" function defined above expected = 'test_compute_test_value.py' assert os.path.split(frame_info[0])[1] == expected, frame_info assert frame_info[2] == 'fx' finally: theano.config.compute_test_value = orig_compute_test_value