我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.shape_padleft()。
def sample(self, n_samples): ''' Inspired by jbornschein's implementation. ''' z0 = T.zeros((n_samples, self.dim,)).astype(floatX) + T.shape_padleft(self.b) rs = self.trng.uniform((self.dim, n_samples), dtype=floatX) def _step_sample(i, W_i, r_i, z): p_i = T.nnet.sigmoid(z[:, i]) * 0.9999 + 0.000005 x_i = (r_i <= p_i).astype(floatX) z = z + T.outer(x_i, W_i) return z, x_i seqs = [T.arange(self.dim), self.W, rs] outputs_info = [z0, None] non_seqs = [] (zs, x), updates = scan(_step_sample, seqs, outputs_info, non_seqs, self.dim) return x.T, updates
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha)) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, v, b, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH t = T.dot(h, v) + b p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5) # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995 p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _, _], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V, self.b, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def get_loss(self, raw_feature_strengths, raw_feature_vects, extra_info=False): raw_losses = self._loss_fun(raw_feature_strengths) raw_sum = T.sum(raw_losses) n_parallel, n_timestep = raw_feature_strengths.shape falloff_arr = np.array(self._falloff_rate, np.float32) ** T.cast(T.arange(n_timestep), 'float32') falloff_mat = T.shape_padright(falloff_arr) / T.shape_padleft(falloff_arr) falloff_scaling = T.switch(T.ge(falloff_mat,1), 0, falloff_mat)/self._falloff_rate # falloff_scaling is of shape (n_timestep, n_timestep) with 0 along diagonal, and jump to 1 falling off along dimension 1 # now we want to multiply through on both dimensions first_multiply = T.dot(raw_feature_strengths, falloff_scaling) # shape (n_parallel, n_timestep) second_multiply = raw_feature_strengths * first_multiply unscaled_falloff_penalty = T.sum(second_multiply) full_loss = self._penalty_base * raw_sum + self._penalty_shock * unscaled_falloff_penalty if extra_info: return full_loss, {"raw_loss_sum":raw_sum} else: return full_loss
def sample(self, n_samples, p=None): if p is None: p = self.get_prob(*self.get_params()) return T.shape_padleft(p), theano.OrderedUpdates()
def get_w_tilde(log_factor): '''Gets normalized weights. ''' log_factor = log_factor - T.log(log_factor.shape[0]).astype(floatX) w_norm = log_sum_exp(log_factor, axis=0) log_w = log_factor - T.shape_padleft(w_norm) w_tilde = T.exp(log_w) return w_tilde
def fprop(self, all_states): if self.ntimes: stateshape0 = all_states.shape[0] shape0 = TT.switch(TT.gt(self.n, 0), self.n, all_states.shape[0]) single_frame = TT.shape_padleft(all_states[stateshape0-1]) mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)]) rval = single_frame * mask self.out = rval return rval single_frame = all_states[all_states.shape[0]-1] self.out = single_frame return single_frame
def fprop(self, all_states): shape0 = all_states.shape[0] single_frame = all_states.min(0) if self.ntimes: single_frame = TT.shape_padleft(all_states.max(0)) mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)]) rval = single_frame * mask self.out = rval return rval self.out = single_frame return single_frame
def fprop(self, all_states): shape0 = all_states.shape[0] single_frame = all_states.max(0) if self.ntimes: single_frame = TT.shape_padleft(all_states.max(0)) mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)]) rval = single_frame * mask self.out = rval return rval self.out = single_frame return single_frame
def fprop(self, x): # This is black magic based on broadcasting, # that's why variable names don't make any sense. a = TT.shape_padleft(x) padding = [1] * x.ndim b = TT.alloc(numpy.float32(1), self.n_times, *padding) self.out = a * b return self.out
def set_output(self): self._output = tensor.sum(tensor.shape_padright(self._prev_layer.output) * tensor.shape_padleft(self.W.val), axis=-2) if self._bias: self._output += tensor.shape_padleft(self.b.val)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, v, c, p_prev, a_prev, x_prev, bias_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) bias = bias_prev + constantX(np.log(2)) - T.log(1 + T.exp(w)) h = self.nonlinearity(a + bias + self.b) # BxH t = T.dot(h, v) + c p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5) # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995 p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one) return (p, a, x, bias) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.zeros_like(x[0]) bias0 = T.zeros_like(self.b) ([ps, _, _, _], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V, self.c], outputs_info=[p0, a0, x0, bias0]) return (ps[-1], updates) # def sym_neg_loglikelihood_gradient(self, x): # loglikelihood, updates = self.sym_logdensity(x) # mean_loglikelihood = -loglikelihood.mean() # # Gradients # gradients = {} # for param in self.parameters_to_optimise: # gradients[param] = T.grad(mean_loglikelihood, self.__getattribute__(param)) # return (mean_loglikelihood, gradients, updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, wb, v, c, p_prev, a_prev, bias_prev): h = self.nonlinearity(a_prev + bias_prev) # BxH t = T.dot(h, v) + c p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5) # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995 p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one) a = a_prev + T.dot(T.shape_padright(x, 1), T.shape_padleft(w - wb, 1)) bias = bias_prev + T.log(1 + T.exp(wb)) - T.log(1 + T.exp(w)) return (p, a, bias) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) bias0 = self.b ([ps, _, _], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.WB, self.V, self.c], outputs_info=[p0, a0, bias0]) return (ps[-1], updates) # def sym_neg_loglikelihood_gradient(self, x): # loglikelihood, updates = self.sym_logdensity(x) # mean_loglikelihood = -loglikelihood.mean() # # Gradients # gradients = {} # for param in self.parameters_to_optimise: # gradients[param] = T.grad(mean_loglikelihood, self.__getattribute__(param)) # return (mean_loglikelihood, gradients, updates)
def sym_masked_neg_loglikelihood_gradient(self, x, mask): """ x is a matrix of column datapoints (DxB) D = n_visible, Bfloat = batch size """ logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h = self.sym_mask_logdensity_estimator_intermediate(x, mask) # nnz = output_mask.sum(0) # sparsity_multiplier = T.shape_padright(T.shape_padleft((B+1e-6)/(nnz+1e-6))) # wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) #BxDxC # lp_current = log_sum_exp(wPhi, axis = 2) * output_mask #BxD # lp_current_sum = (lp_current.sum(1) * D / (D-d)).sum() #1 loglikelihood = logdensity.mean(dtype=floatX) loss = -loglikelihood dp_dz_alpha = T.grad(loss, z_alpha) # BxDxC gb_alpha = dp_dz_alpha.sum(0) # DxC gV_alpha = T.tensordot(h.T, dp_dz_alpha, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC dp_dz_mu = T.grad(loss, z_mu) # BxDxC dp_dz_mu = dp_dz_mu * Sigma # Heuristic gb_mu = dp_dz_mu.sum(0) # DxC gV_mu = T.tensordot(h.T, dp_dz_mu, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC dp_dz_sigma = T.grad(loss, z_sigma) # BxDxC gb_sigma = dp_dz_sigma.sum(0) # DxC gV_sigma = T.tensordot(h.T, dp_dz_sigma, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC if self.n_layers > 1: gWs, gbs, gW1, gWflags, gb1 = T.grad(loss, [self.Ws, self.bs, self.W1, self.Wflags, self.b1]) gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "Ws":gWs, "bs":gbs, "W1":gW1, "b1":gb1, "Wflags":gWflags} else: gW1, gWflags, gb1 = T.grad(loss, [self.W1, self.Wflags, self.b1]) gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "W1":gW1, "b1":gb1, "Wflags":gWflags} # Gradients return (loss, gradients)
def one_hot(t, r=None): """Compute one hot encoding. given a tensor t of dimension d with integer values from range(r), return a new tensor of dimension d + 1 with values 0/1, where the last dimension gives a one-hot representation of the values in t. if r is not given, r is set to max(t) + 1 """ if r is None: r = tensor.max(t) + 1 ranges = tensor.shape_padleft(tensor.arange(r), t.ndim) return tensor.eq(ranges, tensor.shape_padright(t, 1))
def test_vector(self): y_idx = [3] def f(a): return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0] utt.verify_grad(f, [numpy.random.rand(4)])
def test_vectors(self): y_idx = [3] def f(a, b): return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0] utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])
def test_scan_output_padding(self): """ Scan outputs are usually lists, whose entries correspond to the intermediate result. When n_steps=1, some extra machinery is required in order to mimic this interface. Scan thus calls tensor.shape_padleft on the inner function outputs. However, this is not the proper behavior for shared variables, they should not be padded in any way This unit test addresses the bug fix of changeset ba7157e95cb1. """ a = theano.tensor.vector() init_a = theano.tensor.vector() b = theano.shared(numpy.random.rand(5, 4)) def inner_func(a): return a + 1, OrderedDict([(b, 2 * b)]) out, updates = theano.scan( inner_func, outputs_info=[OrderedDict([('initial', init_a)])], n_steps=1) out = out[-1] assert out.type.ndim == a.type.ndim assert updates[b].type.ndim == b.type.ndim out, updates = theano.scan(inner_func, outputs_info=[init_a], n_steps=1) assert out.type.ndim == a.type.ndim + 1 assert updates[b].type.ndim == b.type.ndim
def __init__(self, incoming, factor, **kwargs): super(UnPoolLayer, self).__init__(incoming, **kwargs) assert len(factor) == 2 assert len(self.input_shape) == 4 self.factor = factor window = np.zeros(self.factor, dtype=np.float32) window[0, 0] = 1 image_shape = self.input_shape[1:] self.mask = theano.shared(np.tile(window.reshape((1,)+self.factor), image_shape)) self.mask = T.shape_padleft(self.mask,n_ones=1)
def get_output_for(self, input, **kwargs): data, mask_max = input #return Textra.repeat(Textra.repeat(data, self.factor[0], axis=2), self.factor[1], axis=3) * mask_max window = np.zeros(self.factor, dtype=np.float32) window[0, 0] = 1 mask_unpool = np.tile(window.reshape((1,) + self.factor), self.input_shapes[0][1:]) mask_unpool = T.shape_padleft(mask_unpool, n_ones=1) rs = np.random.RandomState(1234) rng = theano.tensor.shared_randomstreams.RandomStreams(rs.randint(999999)) mask_binomial = rng.binomial(n=1, p=self.noise, size= self.input_shapes[1][1:]) mask_binomial = T.shape_padleft(T.cast(mask_binomial, dtype='float32'), n_ones=1) mask = mask_binomial * mask_unpool + (1 - mask_binomial) * mask_max return Textra.repeat(Textra.repeat(data,self.factor[0],axis=2),self.factor[1],axis=3)*mask
def _get_output_for(self, input): assert input.ndim == 3 # only for 3D mask = T.zeros_like(input) # size (None, w, h) tmp = T.concatenate([T.shape_padright(input[:, ::2, ::2]), T.shape_padright(input[:, ::2, 1::2]), T.shape_padright(input[:, 1::2, ::2]), T.shape_padright(input[:, 1::2, 1::2])], axis=-1) index = tmp.argmax(axis=-1) # size (None, w/2, h/2) i_r = 2*(np.tile(np.arange(self.i_s[0]/2), (self.i_s[1]/2,1))).T i_r = index/2 + T.shape_padleft(i_r) i_c = 2*(np.tile(np.arange(self.i_s[1]/2), (self.i_s[0]/2,1))) i_c = index%2 + T.shape_padleft(i_c) i_b = T.tile(T.arange(self.batch_size*self.n_channels),(self.i_s[0]/2*self.i_s[1]/2,1)).T mask = T.set_subtensor(mask[i_b.flatten(), i_r.flatten(), i_c.flatten()],1) return mask
def repeat_x(x, n_times): # This is black magic based on broadcasting, # that's why variable names don't make any sense. a = T.shape_padleft(x) padding = [1] * x.ndim b = T.alloc(numpy.float32(1), n_times, *padding) out = a * b return out
def gaussian_chol(mean, logvar, chol, sample=None): if sample != None: raise Exception('Not implemented') diag = gaussian_diag(mean, logvar) mask = T.shape_padleft(T.triu(T.ones_like(chol[0]), 1)) sample = diag.sample + T.batched_dot(diag.sample, chol * mask) return RandomVariable(sample, diag.logp, diag.entr, mean=mean, logvar=logvar)
def get_output(self, train=False): [X_w, X_t] = self.get_input(train) t_w = self.W_t[X_w[:,:, 0]] # doc_l, n_tags*n_samples, n_dim w_w = self.W_w[X_w[:,:, 1]] dot_tw = T.sum(w_w * t_w, axis=2) inter_1 = T.tensordot(w_w, self.S, axes = [[2],[2]]) inter_2 = T.tensordot(t_w, self.P, axes = [[2],[2]]) # doc_l, n_tags*n_samples, 2,5 inter = T.sum(inter_1 * inter_2, axis = 3) sim_tw = T.tensordot(inter + T.shape_padleft(self.B, 2), self.U, axes=[[2],[0]]) sim_tw = T.reshape(sim_tw, (X_w.shape[0], X_w.shape[1])) dot_sum_w = T.sum(dot_tw * T.nnet.sigmoid(sim_tw), axis = 0)/(X_w.shape[0]) dot_w = theano.tensor.reshape(dot_sum_w, (X_w.shape[1], 1)) return self.activation(dot_w) ''' t_t = self.W_t[X_t[:,:, 0]] # doc_l, n_tags*n_samples, n_dim w_t = self.W_t[X_t[:,:, 1]] dot_tt = T.sum(w_t * t_t, axis=2) #dot_sum = T.sum(dot_tw, axis = 0)#/(X.shape[0]) #dot_sum_t = T.sum(dot_tt , axis = 0)#/(X_t.shape[0]) inter_t_1 = T.tensordot(t_t, self.P, axes = [[2],[2]]) inter_t_2 = T.tensordot(w_t, self.P, axes = [[2],[2]]) # doc_l, n_tags*n_samples, 2,5 inter_t = T.sum(inter_t_1 * inter_t_2, axis = 3) sim_tt = T.tensordot(inter_t, self.U_t, axes=[[2],[0]]) sim_tt = T.reshape(sim_tt, (X_t.shape[0], X_t.shape[1])) dot_sum_t = T.sum(dot_tt * sim_tt, axis = 0)/(X_t.shape[0]) dot_twc_t = dot_sum_t#*dot_sum#_p dot_t = theano.tensor.reshape(dot_twc_t, (X_t.shape[1], 1)) return 0.5 * self.activation(dot_w) + 0.5 * self.activation(dot_t) '''
def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None), padding=None): assert input.ndim == 4 and filters.ndim == 4 assert (4 == len(input_shape)) and (4 == len(filter_shape)) assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2) if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or ( tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"): return tensor4dot(input, filters) else: new_row_begin = filters.shape[2] / 2 new_row_end = input.shape[2] + filters.shape[2] / 2 new_col_begin = filters.shape[3] / 2 new_col_end = input.shape[3] + filters.shape[3] / 2 if padding is not None: assert 1 == padding.ndim padded_input = TT.ones(( input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1, input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX) padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], numpy_floatX(0)) padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1) padded_input = padding * padded_input else: padded_input = TT.zeros(( input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1, input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX) padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input) new_input_shape = [None, None, None, None] if input_shape[0] is not None: new_input_shape[0] = input_shape[0] if input_shape[1] is not None: new_input_shape[1] = input_shape[1] if input_shape[2] is not None and filter_shape[2] is not None: new_input_shape[2] = input_shape[2] + filter_shape[2] - 1 if input_shape[3] is not None and filter_shape[3] is not None: new_input_shape[3] = input_shape[3] + filter_shape[3] - 1 ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid', input_shape=tuple(new_input_shape), filter_shape=filter_shape) return ret
def theano_one_hot(t, n_classes=None): if n_classes is None: n_classes = tensor.max(t) + 1 ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim) return tensor.eq(ranges, tensor.shape_padright(t, 1))