我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.zeros_like()。
def __call__(self, input_): m = input_.mean() v = input_.std() new_m = T.switch(T.eq(self.m, 0.), m, (np.float32(1.) - self.rate) * self.m + self.rate * m) new_var = T.switch(T.eq(self.var, 0.), v, (np.float32(1.) - self.rate) * self.var + self.rate * v) updates = [(self.m, new_m), (self.var, new_var)] input_centered = ( (input_ - new_m) / T.maximum(1., T.sqrt(new_var))) input_ = T.zeros_like(input_) + input_ outs = OrderedDict( x=input_, x_centered=input_centered, m=new_m, var=new_var ) return outs, updates
def ctc_path_probs(predict, Y, alpha=1e-4): smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0] L = T.log(smoothed_predict) zeros = T.zeros_like(L[0]) log_first = zeros f_skip_idxs = ctc_create_skip_idxs(Y) b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) # there should be a shortcut to calculating this def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev): f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev) b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev) return f_active_next, log_f_next, b_active_next, log_b_next [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan( step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first]) idxs = T.arange(L.shape[1]).dimshuffle('x', 0) mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1] log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L return log_probs, mask
def forward(self, inputtensor): #print('resnet.forward.shape: {}'.format(inputtensor[0].ndim)) o1 = self.conv1.forward(inputtensor) o2 = self.bn1.forward(o1) o3 = self.relu1.forward(o2) o4 = self.conv2.forward(o3) o5 = self.bn2.forward(o4) if self.increaseDim: subx = T.signal.pool.pool_2d(inputtensor[0], (2,2), ignore_border=True) #print('resnet.forward.subx.ndim: {}'.format(subx.ndim)) retx = T.zeros_like(subx) #print('resnet.forward.retx.ndim: {}'.format(retx.ndim)) sumx = T.concatenate([subx, retx], axis=1) #print('resnet.forward.sumx.ndim: {}'.format(sumx.ndim)) out = self.relu2.forward([o5[0]+sumx,]) #print('resnet.forward.out.ndim: {}'.format(out[0].ndim)) else: out = self.relu2.forward([o5[0]+inputtensor[0],]) return out
def dc_forward(nnet, layer, Z_cvx, y): # forward through piecewise maxima (ReLU, Maxpool), which keep convexity for next_layer in nnet.next_layers(layer): if next_layer in nnet.trainable_layers: break Z_cvx = next_layer.get_output_for(Z_cvx, deterministic=True) start_layer = next_layer # initialize concave part Z_ccv = T.zeros_like(Z_cvx) # compute DC forward decomposition until hinge loss for next_layer in nnet.next_layers(start_layer): Z_ccv, Z_cvx = next_layer.forward_prop(Z_ccv, Z_cvx) err_ccv, err_cvx = nnet.svm_layer.forward_prop(Z_ccv, Z_cvx, y) return err_ccv, err_cvx
def symbolic_dc_grad(nnet, layer, Z, y): """ Symbolic computation of the gradient with a DC decomposition Z should be the output of the dense layer """ svm = nnet.svm_layer Z_cvx = Z Z_ccv = T.zeros_like(Z) # feature fed to the SVM for next_layer in nnet.next_layers(layer): if not next_layer.issvm: Z_ccv, Z_cvx = next_layer.forward_prop(Z_ccv, Z_cvx) err_ccv, err_cvx = svm.forward_prop(Z_ccv, Z_cvx, y) return err_ccv, err_cvx
def _add_blanks(y, blank_symbol, y_mask=None): """Add blanks to a matrix and updates mask Input shape: output_seq_len x num_batch Output shape: 2*output_seq_len+1 x num_batch """ # for y y_extended = y.T.dimshuffle(0, 1, 'x') blanks = tensor.zeros_like(y_extended) + blank_symbol concat = tensor.concatenate([y_extended, blanks], axis=2) res = concat.reshape((concat.shape[0], concat.shape[1] * concat.shape[2])).T begining_blanks = tensor.zeros((1, res.shape[1])) + blank_symbol blanked_y = tensor.concatenate([begining_blanks, res], axis=0) # for y_mask if y_mask is not None: y_mask_extended = y_mask.T.dimshuffle(0, 1, 'x') concat = tensor.concatenate([y_mask_extended, y_mask_extended], axis=2) res = concat.reshape((concat.shape[0], concat.shape[1] * concat.shape[2])).T begining_blanks = tensor.ones((1, res.shape[1]), dtype=floatX) blanked_y_mask = tensor.concatenate([begining_blanks, res], axis=0) else: blanked_y_mask = None return blanked_y.astype('int32'), blanked_y_mask
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha)) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, v, b, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH t = T.dot(h, v) + b p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5) # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995 p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _, _], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V, self.b, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def add_loss(self, value, weight=1, name='loss'): print 'Adding loss:', (self.name, weight, name) if value.ndim > 1: raise ValueError('value must be 0 or 1D (not %dD)' % value.ndim) if name not in self.is_agg_loss: self.is_agg_loss[name] = False assert not self.is_agg_loss[name] if (name not in self.loss) and (weight == 1): # special case where we can just set the loss to value directly # maintains tensor equality (==) when possible self.loss[name] = value else: if weight == 0: value = T.zeros_like(value, dtype=theano.config.floatX) self.loss[name] = value else: if weight != 1: value *= weight if name in self.loss: self.loss[name] += value else: self.loss[name] = value
def cost(self, y): def logadd(outputs): def log_sum_exp(X): x = X.max() # log-sum-exp operations are most stable if computed as follows. return x + T.log(T.sum(T.exp(X-x), axis=0)) initial = T.zeros_like(self.W[0]) if outputs.shape[0] == 1: smax = initial.max() return smax + T.log(T.sum(T.exp(initial - smax))) else: score, _ = theano.scan(fn=lambda obs, prior, chain_potentials: log_sum_exp(prior.dimshuffle(0, 1, 'x') + obs.dimshuffle('x', 'x', 0) + chain_potentials), outputs_info=[initial], sequences=[outputs], non_sequences=self.W) smax = score[-1].max() return smax + T.log(T.sum(T.exp(score[-1] - smax))) def y_score(y, outputs): sum1 = T.sum(outputs[T.arange(y.shape[0]), y]) sum2 = T.sum(self.W[y[:-2], y[1:-1], y[2:]]) return sum1 + sum2 return -(y_score(y, self.inputs) - logadd(self.inputs)) #/ y.shape[0]
def build_mf_reset_function(self): if not hasattr(self, 'mf_reset_fn'): # Compile functions logger.debug("Building mean field reset function") mf_reset_update = [] if self.add_latent_gaussian_per_utterance: mf_reset_update.append((self.latent_gaussian_utterance_variable_approx_posterior_mean_mfbias, T.zeros_like(self.latent_gaussian_utterance_variable_approx_posterior_mean_mfbias))) mf_reset_update.append((self.latent_gaussian_utterance_variable_approx_posterior_var_mfbias, T.zeros_like(self.latent_gaussian_utterance_variable_approx_posterior_var_mfbias))) if self.add_latent_piecewise_per_utterance: mf_reset_update.append((self.latent_piecewise_utterance_variable_approx_posterior_alpha_mfbias, T.zeros_like(self.latent_piecewise_utterance_variable_approx_posterior_alpha_mfbias))) self.mf_reset_fn = theano.function(inputs=[], outputs=[], updates=mf_reset_update, on_unused_input='warn', name="mf_reset_fn") return self.mf_reset_fn # Batch saliency evaluation function.
def new_episode(self, mem): g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) if (self.normalize_attention): g = nn_utils.softmax(g) self.attentions.append(g) e, e_updates = theano.scan(fn=self.new_episode_step, sequences=[self.inp_c, g], outputs_info=T.zeros_like(self.inp_c[0])) return e[-1]
def new_episode(self, mem): g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) if (self.normalize_attention): g = nn_utils.softmax(g) e, e_updates = theano.scan(fn=self.new_episode_step, sequences=[self.inp_c, g], outputs_info=T.zeros_like(self.inp_c[0])) e_list = [] for index in range(self.batch_size): e_list.append(e[self.fact_count_var[index] - 1, :, index]) return T.stack(e_list).dimshuffle((1, 0))
def forward(self, x, seq): """ :param x: (length, dim) :param seq: (length - 1, 3) :return: """ # (length, dim) -> (2 * length - 1, dim) vector = T.concatenate([x, T.zeros_like(x)[:-1, :]], axis=0) # vector = theano.printing.Print()(vector) # scan length-1 times hs, _ = theano.scan(fn=self.encode, sequences=seq, outputs_info=[vector, shared_scalar(0)], name="compose_phrase") comp_vec_init = hs[0][-1][-1] comp_rec_init = T.sum(hs[1]) if self.normalize: hidden = x[0] / x[0].norm(2) else: hidden = x[0] comp_vec = ifelse(x.shape[0] > 1, comp_vec_init, hidden) comp_rec = ifelse(x.shape[0] > 1, comp_rec_init, shared_zero_scalar()) return comp_vec, comp_rec
def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): """Computes mean and std for batch then apply batch_normalization on batch. """ # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_train is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_train'): return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon) if gamma is None: if beta is None: gamma = ones_like(x) else: gamma = ones_like(beta) if beta is None: if gamma is None: beta = zeros_like(x) beta = zeros_like(gamma) normed, mean, stdinv = T.nnet.bn.batch_normalization_train( x, gamma, beta, reduction_axes, epsilon) return normed, mean, T.inv(stdinv ** 2)
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): """Apply batch normalization on x given mean, var, beta and gamma. """ # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_test'): return _old_batch_normalization(x, mean, var, beta, gamma, epsilon) if gamma is None: gamma = ones_like(var) if beta is None: beta = zeros_like(mean) if mean.ndim == 1: # based on TensorFlow's default: normalize along rightmost dimension reduction_axes = list(range(x.ndim - 1)) else: reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]] return T.nnet.bn.batch_normalization_test( x, gamma, beta, mean, var, reduction_axes, epsilon) # TODO remove this function when Theano without # T.nnet.bn.batch_normalization_train is deprecated
def test_gpujoin_gpualloc(): a = T.fmatrix('a') a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') b = T.fmatrix('b') b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32') f = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def sp_zeros_like(x): """ Construct a sparse matrix of zeros. Parameters ---------- x Sparse matrix to take the shape. Returns ------- A sparse matrix The same as `x` with zero entries for all element. """ # TODO: don't restrict to CSM formats _, _, indptr, shape = csm_properties(x) return CSM(format=x.format)(data=numpy.array([], dtype=x.type.dtype), indices=numpy.array([], dtype='int32'), indptr=tensor.zeros_like(indptr), shape=shape)
def grad(self, inputs, g): # g[1:] is all integers, so their Jacobian in this op # is 0. We thus don't need to worry about what their values # are. # if g[0] is disconnected, then this op doesn't contribute # any gradient anywhere. but we know that at least one of # g[1:] is connected, or this grad method wouldn't have been # called, so we should report zeros (csm,) = inputs if isinstance(g[0].type, DisconnectedType): return [csm.zeros_like()] data, indices, indptr, shape = csm_properties(csm) return [CSM(csm.format)(g[0], indices, indptr, shape)] # don't make this a function or it breaks some optimizations below
def grad(self, inputs, outputs_gradients): gz = outputs_gradients[0] if gz.dtype in complex_dtypes: raise NotImplementedError("grad not implemented for complex types") if inputs[0].dtype in complex_dtypes: raise NotImplementedError("grad not implemented for complex types") if gz.dtype in discrete_dtypes: if inputs[0].dtype in discrete_dtypes: return [inputs[0].zeros_like(dtype=theano.config.floatX)] else: return [inputs[0].zeros_like()] else: if inputs[0].dtype in discrete_dtypes: return [gz] else: return [Cast(inputs[0].dtype)(gz)]
def test_local_add_specialize(): # test of non-zero dimension a = tensor.vector() s = tensor.add(tensor.zeros_like(a)) assert local_add_specialize.transform(s.owner) # test of 0-d a = tensor.scalar() s = tensor.add(tensor.zeros_like(a)) assert local_add_specialize.transform(s.owner) # Test when the 0 input is forcing upcasting a = tensor.constant(0, dtype='int64') b = tensor.constant(1, dtype='int32') s = a + b transformed = local_add_specialize.transform(s.owner) assert transformed assert transformed[0].type == s.type
def test_bad_shape(self): a = matrix('a') shapes = ivector('shapes') rng = numpy.random.RandomState(seed=utt.fetch_seed()) a_val = rng.uniform(size=(3, 4)).astype(config.floatX) # Test reshape to 1 dim r = a.reshape(shapes, ndim=1) z = zeros_like(r) f = self.function([a, shapes], r) self.assertRaises(ValueError, f, a_val, [13]) # Test reshape to 2 dim r = a.reshape(shapes, ndim=2) z = zeros_like(r) f = self.function([a, shapes], r) self.assertRaises(ValueError, f, a_val, [-1, 5]) self.assertRaises(ValueError, f, a_val, [7, -1]) self.assertRaises(ValueError, f, a_val, [7, 5]) self.assertRaises(ValueError, f, a_val, [-1, -1])
def test_alloc_inputs2(self): raise SkipTest("This tests depends on an optimization for " "scan that has not been implemented yet.") W1 = tensor.matrix() W2 = tensor.matrix() h0 = tensor.vector() def lambda_fn(W1, h, W2): return W1 * tensor.dot(h, W2) o, _ = theano.scan(lambda_fn, sequences=tensor.zeros_like(W1), outputs_info=h0, non_sequences=[tensor.zeros_like(W2)], n_steps=5) f = theano.function([h0, W1, W2], o, mode=mode_with_opt) scan_node = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, theano.scan_module.scan_op.Scan)][0] assert len([x for x in scan_node.op.fn.maker.fgraph.toposort() if isinstance(x.op, theano.tensor.Elemwise)]) == 0
def test_alloc_inputs3(self): _W1 = tensor.matrix() _W2 = tensor.matrix() _h0 = tensor.vector() W1 = tensor.specify_shape(_W1, (3, 3)) W2 = tensor.specify_shape(_W2, (3, 3)) h0 = tensor.specify_shape(_h0, (3,)) def lambda_fn(W1, h, W2): return W1 * tensor.dot(h, W2) o, _ = theano.scan(lambda_fn, sequences=tensor.zeros_like(W1), outputs_info=h0, non_sequences=[tensor.zeros_like(W2)], n_steps=5) f = theano.function([_h0, _W1, _W2], o, mode=mode_with_opt) scan_node = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, theano.scan_module.scan_op.Scan)][0] assert len(scan_node.op.inputs) == 1
def network(input_sequences): batch_size = input_sequences.shape[0] length = input_sequences.shape[1] inp = input_sequences[:,None,None,:] dilations = np.asarray([[1,2,4,8,16]*N_BLOCKS]).tolist()[0] conv1 = lib.ops.conv1d("causal-conv",inp,2,1,n_filters,1,bias=False,batchnorm=False,pad=(0,1))[:,:,:,:length] prev_conv = conv1 #prev_skip = [] prev_skip = T.zeros_like(conv1) i=0 for value in dilations: i+=1 x,y = lib.ops.WaveNetConv1d("Block-%d"%i,prev_conv,2,n_filters,n_filters,bias=False,batchnorm=False,dilation=value) prev_conv = x prev_skip += y out = T.nnet.relu(prev_skip) out2 = T.nnet.relu(lib.ops.conv1d("Output.1",out,1,1,n_filters,n_filters,bias=False,batchnorm=False)) output = lib.ops.conv1d("Output.2",out2,1,1,34,n_filters,bias=False,batchnorm=False) result = output[:,:,0,-1] result2 = T.nnet.relu(lib.ops.Dense('Op.1',34,512,result,weightnorm=False)) result3 = lib.ops.Dense('Op.2',512,34,result2,weightnorm=False) return output[:,:,0,-1].reshape((batch_size,17,2))
def AdaMax(w, objective, alpha=.01, beta1=.1, beta2=.001, verbose=False): if verbose: print 'AdaMax', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2 g = T.grad(objective, w, disconnected_inputs='warn') new = OrderedDict() for i in range(len(w)): #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's mom1 = sharedf(w[i].get_value() * 0.) _max = sharedf(w[i].get_value() * 0.) new[mom1] = (1.0-beta1) * mom1 + beta1 * g[i] new[_max] = T.maximum((1.0-beta2)*_max, abs(g[i]) + 1e-8) new[w[i]] = w[i] - alpha * new[mom1] / new[_max] return new # Find indices of str_to_find in str_to_search
def new_episode(self, mem): g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) #g = nn_utils.softmax(g) g=g/g.sum() self.attentions.append(g) e = T.dot(g,self.inp_c) return e '''g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) if (self.normalize_attention): g = nn_utils.softmax(g) self.attentions.append(g) e, e_updates = theano.scan(fn=self.new_episode_step, sequences=[self.inp_c, g], outputs_info=T.zeros_like(self.inp_c[0])) return e[-1]'''
def random_rotate(w,h,angle,scale,*all_inputs): if type(angle)==float: angle=(-angle,angle) if type(scale)==float: scale=(1-scale,1+scale) cx=(np.random.rand(len(all_inputs[0])).astype(floatX))*w cy=(np.random.rand(len(all_inputs[0])).astype(floatX))*h actions=(np.random.rand(len(all_inputs[0]),4,1,1)).astype(floatX) actions2=np.zeros_like(actions) actions2[:,0]=(actions[:,0]*(angle[1]-angle[0])+angle[0]).astype(floatX) actions2[:,1]=(actions[:,1]*(scale[1]-scale[0])+scale[0]).astype(floatX) actions2[:,2,0,0]=cx actions2[:,3,0,0]=cy all_outputs=[] for inputs in all_inputs: outputs=np.zeros(inputs.shape,dtype=floatX) for i in range(len(inputs)): mat = cv2.getRotationMatrix2D((cx[i],cy[i]),actions2[i,0,0,0],actions2[i,1,0,0]) tmp = cv2.warpAffine(inputs[i].transpose(1,2,0),mat,inputs[i].shape[1:]).transpose(2,0,1) #tmp=np.pad(inputs[i:i+1],((0,0),(0,0),(n,n),(n,n)),mode='constant',constant_values=0) #tmp=np.roll(tmp,actions2[i,0,0,0],2) #tmp=np.roll(tmp,actions2[i,1,0,0],3) outputs[i]=tmp all_outputs+=[outputs] return all_outputs+[actions2.reshape(len(inputs),4)]
def AdaMax(w, objective, alpha=.01, beta1=.1, beta2=.001): print 'AdaMax', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2 g = T.grad(objective.sum(), w, disconnected_inputs='warn') new = OrderedDict() for i in range(len(w)): #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's mom1 = G.sharedf(w[i].get_value() * 0.) _max = G.sharedf(w[i].get_value() * 0.) new[mom1] = (1-beta1) * mom1 + beta1 * g[i] new[_max] = T.maximum((1-beta2)*_max, abs(g[i]) + 1e-8) new[w[i]] = w[i] + alpha * new[mom1] / new[_max] return new # AdaMax that averages over multiple minibatches
def _pad_blanks(queryseq, blank_symbol, queryseq_mask=None): """ Pad queryseq and corresponding queryseq_mask with blank symbol :param queryseq (L, B) :param queryseq_mask (L, B) :param blank_symbol scalar :return queryseq_padded, queryseq_mask_padded, both with shape (2L+1, B) """ # for queryseq queryseq_extended = queryseq.dimshuffle(1, 0, 'x') # (L, B) -> (B, L, 1) blanks = tensor.zeros_like(queryseq_extended) + blank_symbol # (B, L, 1) concat = tensor.concatenate([queryseq_extended, blanks], axis=2) # concat.shape = (B, L, 2) res = concat.reshape((concat.shape[0], concat.shape[1] * concat.shape[2])).T # res.shape = (2L, B), the reshape will cause the last 2 dimensions interlace begining_blanks = tensor.zeros((1, res.shape[1])) + blank_symbol # (1, B) queryseq_padded = tensor.concatenate([begining_blanks, res], axis=0) # (1+2L, B) # for queryseq_mask if queryseq_mask is not None: queryseq_mask_extended = queryseq_mask.dimshuffle(1, 0, 'x') # (L, B) -> (B, L, 1) concat = tensor.concatenate([queryseq_mask_extended, queryseq_mask_extended], axis=2) # concat.shape = (B, L, 2) res = concat.reshape((concat.shape[0], concat.shape[1] * concat.shape[2])).T begining_blanks = tensor.ones((1, res.shape[1]), dtype=floatX) queryseq_mask_padded = tensor.concatenate([begining_blanks, res], axis=0) else: queryseq_mask_padded = None return queryseq_padded, queryseq_mask_padded
def get_output_for(self, input, **kwargs): def max_fn(f, mask, prev_score, prev_back, W_sim): next_score = prev_score.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1) next_back = T.argmax(next_score, axis = 1) next_score = T.max(next_score, axis = 1) mask = mask.dimshuffle(0, 'x') next_score = next_score * mask + prev_score * (1.0 - mask) next_back = next_back * mask + prev_back * (1.0 - mask) next_back = T.cast(next_back, 'int32') return [next_score, next_back] def produce_fn(back, mask, prev_py): # back: inst * class, prev_py: inst, mask: inst next_py = back[T.arange(prev_py.shape[0]), prev_py] next_py = mask * next_py + (1.0 - mask) * prev_py next_py = T.cast(next_py, 'int32') return next_py f = T.dot(input, self.W) init_score, init_back = f[:, 0, :], T.zeros_like(f[:, 0, :], dtype = 'int32') if CRF_INIT: init_score = init_score + self.W_init[0].dimshuffle('x', 0) ([scores, backs], _) = theano.scan(fn = max_fn, \ sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ]], \ outputs_info = [init_score, init_back], non_sequences = [self.W_sim], strict = True) init_py = T.argmax(scores[-1], axis = 1) init_py = T.cast(init_py, 'int32') # init_py: inst, backs: time * inst * class pys, _ = theano.scan(fn = produce_fn, \ sequences = [backs, self.mask_input.dimshuffle(1, 0)[1:]], outputs_info = [init_py], go_backwards = True) # pys: (rev_time - 1) * inst pys = pys.dimshuffle(1, 0)[:, :: -1] # pys : inst * (time - 1) return T.concatenate([pys, init_py.dimshuffle(0, 'x')], axis = 1)
def get_output_for(self, inputs, **kwargs): coefs = inputs[-1] output = TT.zeros_like(inputs[0]) for i, input_arr in enumerate(inputs[:-1]): output += input_arr * coefs[:, i].reshape((-1, 1)) return output
def init_infer(self, q): return [T.zeros_like(q)]
def __call__(self, input_, *xs): ''' Maybe unclear: input_ is the variable to be scaled, xs are the actual inputs. ''' updates = theano.OrderedUpdates() if len(xs) != len(self.dims_in): raise ValueError('Number of (external) inputs for baseline must' ' match parameters') ws = [] for i in xrange(len(xs)): # Maybe not the most pythonic way... ws.append(self.__dict__['w%d' % i]) ids = T.sum([x.dot(W) for x, W in zip(xs, ws)], axis=0).T ids_c = T.zeros_like(ids) + ids input_scaled = input_ / ids_c input_ = T.zeros_like(input_) + input_ outs = OrderedDict( x_c=input_, x_scaled=input_scaled, ids=ids, ids_c=ids_c ) return outs, updates
def zeros_like(x): return T.zeros_like(x)
def fprop(self, var): rval = TT.zeros_like(var) if self.n >0: rval = TT.set_subtensor(rval[self.n:], var[:-self.n]) elif self.n<0: rval = TT.set_subtensor(rval[:self.n], var[-self.n:]) self.out = rval return rval
def get_f1_acc(outputs,y_labels): outputs_i=outputs+0.5 outputs_i=outputs_i.astype('int32') y_ilab=y_labels.astype('int32') gd_num=T.sum(y_ilab,axis=0) pr_num=T.sum(outputs_i,axis=0) # pr_rtm=T.eq(outputs_i,y_ilab) # pr_rt=T.sum(pr_rtm,axis=0) sum_ones=y_ilab+outputs_i pr_rtm=sum_ones/2 # pr_rtm=T.eq(outputs_i,y_ilab) pr_rt=T.sum(pr_rtm,axis=0) #prevent nan to destroy the f1 pr_rt=pr_rt.astype('float32') gd_num=gd_num.astype('float32') pr_num=pr_num.astype('float32') acc=pr_rt/outputs.shape[0] zero_scale=T.zeros_like(T.min(pr_rt)) if T.eq(zero_scale,T.min(gd_num)): gd_num+=1 if T.eq(zero_scale,T.min(pr_num)): pr_num+=1 if T.eq(zero_scale,T.min(pr_rt)): pr_rt+=0.01 recall=pr_rt/gd_num precision=pr_rt/pr_num f1=2*recall*precision/(recall+precision) # return T.min(pr_rt) return acc,f1