我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用theano.tensor.gt()。
def log_zero_inflated_poisson(x, pi, log_lambda, eps = 0.0): """ Compute log pdf of a zero-inflated Poisson distribution with success probability pi and number of failures, r, until the experiment is stopped, at values x. A simple variation of Stirling's approximation is used: log x! = x log x - x. """ pi = T.clip(pi, eps, 1.0 - eps) lambda_ = T.exp(log_lambda) lambda_ = T.clip(lambda_, eps, lambda_) y_0 = T.log(pi + (1 - pi) * T.exp(-lambda_)) y_1 = T.log(1 - pi) + log_poisson(x, log_lambda, eps) y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1 return y
def get_output(self, input_, label): """ This function overrides the parents' one. Computes the loss by model input_ion and real label. Parameters ---------- input_: TensorVariable an array of (batch size, input_ion). for accuracy task, "input_" is 2D matrix. label: TensorVariable an array of (batch size, answer) or (batchsize,) if label is a list of class labels. for classification, highly recommend second one. should make label as integer. Returns ------- TensorVariable a symbolic tensor variable which is scalar. """ # do # TODO: Not tested return T.mean(T.eq(T.gt(input_, 0.5), label))
def skip_connect(self, input, layer_index): if ([] == self.noisy_z): raise ValueError('Error: noisy_z is an empty list, noisy_fprop must be run before skip_connect') MU = self.compute_mu(input, self.As[layer_index]) V = self.compute_v(input, self.As[layer_index]) reconstruction = (self.noisy_z[-1] - MU) * V + MU # # Non trainable Batchnormalisation # mean = reconstruction.mean(0) # std = reconstruction.std(0) + 1e-10 # # # Only batchnormalise for a batchsize > 1 # mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype)) # std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype)) # reconstruction = (reconstruction - mean) / std self.tmp = reconstruction # To caluclate the reconstruction error later self.reconstructions.append(reconstruction) self.noisy_z = self.noisy_z[0:-1] return reconstruction
def forward(self, x): """ :param x: (length, dim) :return: (hidden_dim, ) """ if self.padding_size > 0: # (padding_size + length + padding_size, dim) x = temporal_padding_2d(x, (self.padding_size, self.padding_size)) safe_x = temporal_padding_2d(x, (0, self.kernel_size - x.shape[0])) # If Kernel Size is greater than sentence length, padding at the end of sentence x = ifelse(T.gt(self.kernel_size - x.shape[0], 0), safe_x, x) conv_result = self.forward_conv(x) pooling_result = get_pooling(conv_result, self.pooling) dropout_out = dropout_from_layer(pooling_result, self.dropout) return self.act.activate(dropout_out + self.b)
def test_pdbbreakpoint_op(): """ Test that PdbBreakpoint ops don't block gpu optimization""" b = tensor.fmatrix() # Create a function composed of a breakpoint followed by # some computation condition = tensor.gt(b.sum(), 0) b_monitored = PdbBreakpoint(name='TestBreakpoint')(condition, b) output = b_monitored ** 2 f = theano.function([b], output, mode=mode_with_gpu) # Ensure that, in the compiled function, the computation following the # breakpoint has been moved to the gpu. topo = f.maker.fgraph.toposort() assert isinstance(topo[-2].op, GpuElemwise) assert topo[-1].op == host_from_gpu
def setUp(self): super(TestPdbBreakpoint, self).setUp() # Sample computation that involves tensors with different numbers # of dimensions self.input1 = T.fmatrix() self.input2 = T.fscalar() self.output = T.dot((self.input1 - self.input2), (self.input1 - self.input2).transpose()) # Declare the conditional breakpoint self.breakpointOp = PdbBreakpoint("Sum of output too high") self.condition = T.gt(self.output.sum(), 1000) (self.monitored_input1, self.monitored_input2, self.monitored_output) = self.breakpointOp(self.condition, self.input1, self.input2, self.output)
def test_inequality_with_self(self): x = T.scalar('x', dtype=config.floatX) mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison') f = theano.function([x], T.lt(x, x), mode=mode) self.assert_eqs_const(f, 0) f = theano.function([x], T.le(x, x), mode=mode) self.assert_eqs_const(f, 1) f = theano.function([x], T.gt(x, x), mode=mode) self.assert_eqs_const(f, 0) f = theano.function([x], T.ge(x, x), mode=mode) self.assert_eqs_const(f, 1) f = theano.function([x], T.minimum(x, x), mode=mode) self.assert_identity(f) f = theano.function([x], T.maximum(x, x), mode=mode) self.assert_identity(f)
def test_pdbbreakpoint_op(): """ Test that PdbBreakpoint ops don't block gpu optimization""" b = tensor.fmatrix() # Create a function composed of a breakpoint followed by # some computation condition = tensor.gt(b.sum(), 0) b_monitored = PdbBreakpoint(name='TestBreakpoint')(condition, b) output = b_monitored ** 2 f = theano.function([b], output, mode=mode_with_gpu) # Ensure that, in the compiled function, the computation following the # breakpoint has been moved to the gpu. topo = f.maker.fgraph.toposort() assert isinstance(topo[-2].op, cuda.GpuElemwise) assert topo[-1].op == cuda.host_from_gpu
def greater(x, y): return T.gt(x, y)
def ternarize_weights(W,W0,deterministic=False,srng=None): """ Changed copy of the code from TernaryConnect by Zhouhan Lin, Matthieu Courbariaux, https://github.com/hantek/BinaryConnect/tree/ternary :param W: Weights :param W0: W0=0.5 :param deterministic: deterministic rounding :param srng: random number generator :return: quantized weights """ Wb=None #print 'Current W0: ',W0 if srng is None: rng = np.random.RandomState(666) srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999)) if deterministic: #print 'Deterministic Ternarization!' larger_than_neg_0_5 = T.gt(W, -W0/2.) larger_than_pos_0_5 = T.gt(W, W0/2.) W_val = larger_than_neg_0_5 * 1 + larger_than_pos_0_5 * 1 - 1 Wb = W_val * W0 else: #print 'Stochastic Ternarization!' w_sign = T.gt(W, 0) * 2 - 1 p = T.clip(T.abs_(W / (W0)), 0, 1) Wb = W0 * w_sign * T.cast(srng.binomial(n=1, p=p, size=T.shape(W)), theano.config.floatX) return Wb
def gt(self, l, r): return T.gt(l, r)
def log_zero_inflated_negative_binomial(x, pi, p, log_r, eps = 0.0): pi = T.clip(pi, eps, 1.0 - eps) p = T.clip(p, eps, 1.0 - eps) r = T.exp(log_r) r = T.clip(r, eps, r) y_0 = T.log(pi + (1 - pi) * T.pow(1 - p, r)) y_1 = T.log(1 - pi) + log_negative_binomial(x, p, log_r, eps) y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1 return y
def get_train_function(self): # specify the computational graph num_param_vecs = T.scalar('num_param_vecs') # weight = theano.shared(np.random.randn(len(self.feature_map), self.num_param_vecs), name='weight') weight = theano.shared(np.zeros((len(self.feature_map), self.num_param_vecs)), name='weight') feat_mat = sparse.csr_matrix(name='feat_mat') pred = T.nnet.sigmoid( sparse.dot(feat_mat, weight) ) # one-vs-rest o_pred = ifelse(T.gt(self.num_param_vecs, 1), pred / pred.sum(axis=1).reshape((pred.shape[0], 1)), T.concatenate( [pred, 1-pred], axis=1 ) ) f_target = T.matrix('f_target') f_mask_mat = sparse.csr_matrix(name='f_mask_mat') f_sum_pred = sparse.dot( f_mask_mat, o_pred ) f_pred = f_sum_pred / f_sum_pred.sum(axis=1).reshape((f_sum_pred.shape[0], 1)) i_target = T.matrix('i_target') i_mask_mat = sparse.csr_matrix(name='l_mask_mat') i_pred = sparse.dot( i_mask_mat, pred ) # objective = self.param.feature_lambda * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2 objective = 0.0 * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2 grad_weight = T.grad(objective, weight) # print 'Compiling function ...' # compile the function train = theano.function(inputs = [feat_mat, f_mask_mat, f_target, i_mask_mat, i_target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] ) return train
def preprocess_tweet(s): s = s.replace('@user', '<at>').replace('<heart>', '<heart>').replace('<number>', '<number>').replace(' ', ' </s> ').replace(' ', ' ') # Make sure we end with </s> token while s[-1] == ' ': s = s[0:-1] if not s[-5:] == ' </s>': s = s + ' </s>' return s
def compute_liu_pca(pca_components, twitter_dialogue_embeddings, pca): tw_embeddings_pca = np.zeros((twitter_dialogue_embeddings.shape[0], 3, pca_components)) for i in range(3): tw_embeddings_pca[:,i] = pca.transform(twitter_dialogue_embeddings[:, i]) return tw_embeddings_pca # Computes PCA decomposition for the context, gt responses, and model responses separately
def compute_separate_pca(pca_components, twitter_dialogue_embeddings): pca = PCA(n_components = pca_components) tw_embeddings_pca = np.zeros((twitter_dialogue_embeddings.shape[0], 3, pca_components)) for i in range(3): tw_embeddings_pca[:,i] = pca.fit_transform(twitter_dialogue_embeddings[:, i]) return tw_embeddings_pca # Computes PCA decomposition for the context, gt responses, and model responses together # NOTE: this computes the PCA on the training embeddings, and then applies them to the # test embeddings (it does not compute PCA on the testing embeddings)
def hinge_loss_len(self, length, alpha=1): min_loss = 1605.157 len_loss = T.mean((self.nuis - length)**2) if T.gt(min_loss, len_loss):# < min_loss: return alpha * (min_loss - len_loss) else: return 0 * len_loss
def __call__(self, input): mean = input.mean(self.axes, keepdims=True) std = input.std(self.axes, keepdims=True) + self.epsilon # Don't batchnoramlise a single data point mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype)) std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype)) return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
def forward_batch(self, x, mask): """ :param x: (batch, length, dim) :param mask: (batch, length, ) :return: (batch, length, hidden_dim) """ # conv_after_length = length - kernel + 2 * padding_size + 1 new_x = x if self.padding_size > 0: # (padding_size + length + padding_size, dim) new_x = temporal_padding_3d(x, (self.padding_size, self.padding_size)) # (batch, conv_after_length) mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=self.padding_size) elif self.padding_size == 0: # (batch, conv_after_length) mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=0) else: raise RuntimeError("Dilation Rate >= 0") # safe_x = temporal_padding_3d(x, (0, self.kernel_size - x.shape[1])) # safe_mask = T.ones((x.shape[0], ), dtype=theano.config.floatX).dimshuffle([0, 'x']) # !!! convert safe_mask from col to matrix # safe_mask = T.unbroadcast(safe_mask, 1) # x, mask = ifelse(T.gt(self.kernel_size - x.shape[1], 0), # (safe_x, safe_mask), # (new_x, mask)) # (batch, conv_after_length, hidden_dim) conv_result = self.forward_conv_batch(new_x) # new_x = Print(new_x) # mask = Print()(mask) pooling_result = get_pooling_batch(conv_result, mask, self.pooling) dropout_out = dropout_from_layer(pooling_result, self.dropout) return self.act.activate(dropout_out + self.b)
def _get_updates_for(self, param, grad): grad_tm1 = shared_like(param, 'grad') step_tm1 = shared_like(param, 'step', self.learning_rate.eval()) test = grad * grad_tm1 diff = TT.lt(test, 0) steps = step_tm1 * (TT.eq(test, 0) + TT.gt(test, 0) * self.step_increase + diff * self.step_decrease) step = TT.minimum(self.max_step, TT.maximum(self.min_step, steps)) grad = grad - diff * grad yield param, param - TT.sgn(grad) * step yield grad_tm1, grad yield step_tm1, step
def clip_gradients(gradients, clip): """ If clip > 0, clip the gradients to be within [-clip, clip] Args: gradients: the gradients to be clipped clip: the value defining the clipping interval Returns: the clipped gradients """ if T.gt(clip, 0): gradients = [T.clip(g, -clip, clip) for g in gradients] return gradients
def sylu(gain=10, spread=0.1): return lambda x: switch(T.ge(x, (1 / spread)), gain, 0) + \ switch(T.and_(T.gt((1 / spread), x), T.gt(x, -(1 / spread))), gain * spread * x, 0) + \ switch(T.le(x, -(1 / spread)), -gain, 0) # Exponential Linear Unit
def test_elemwise(self): # float Ops mats = theano.tensor.matrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div, T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq, T.pow): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # integer Ops mats = theano.tensor.imatrices('cabxy') c, a, b, x, y = mats s1 = T.switch(c, a, b) s2 = T.switch(c, x, y) for op in (T.and_, T.or_, T.xor, T.bitwise_and, T.bitwise_or, T.bitwise_xor): g = optimize(FunctionGraph(mats, [op(s1, s2)])) assert str(g).count('Switch') == 1 # add/mul with more than two inputs u, v = theano.tensor.matrices('uv') s3 = T.switch(c, u, v) for op in (T.add, T.mul): g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)])) assert str(g).count('Switch') == 1
def predict_regress(model_path): #redefine model target_var = T.fmatrix('y') target_labels = T.switch(T.gt(target_var, 0), 1, 0) dnn_strategy = model_path.split('/')[-1].split('_')[0] network = get_model_by_strategy(dnn_strategy) #load params params = [] with open(model_path, 'r') as f: lines = f.readlines() for line in lines: params.append(np.array(json.loads(line))) set_all_param_values(network, params) predict_prediction = get_output(network, deterministic=True) predict_labels = T.switch(T.gt(predict_prediction, 0), 1, 0) predict_acc = binary_accuracy(predict_labels, target_labels, threshold=0).mean() input_layer = get_all_layers(network)[0] predict = theano.function([input_layer.input_var, target_var],[predict_prediction, predict_acc]) X, y, labels, values, _, _, _, _, _, _ = load_dataset('../../data/test') predict_prediction, predict_acc = predict(X, y) sys.stdout.write(" predict accuracy:\t\t\t{} %\n".format(predict_acc * 100)) #output predict result with open('../../data/prediction', 'w') as f: for ix in xrange(len(labels)): line = str(labels[ix]) + '\t' + str(values[ix]) + '\t' + str(predict_prediction[ix][0]) + '\n' f.write(line) sys.stdout.flush()
def gt(a, b): return T.gt(a, b)
def gt(a, b): """a > b""" return T.gt(a, b)
def __init__(self, input, n_in, n_out, is_binary=False, threshold=0.4, rng=None): """ Initialize the parameters of the logistic regression. :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie (number of classes) """ self.activation = T.nnet.sigmoid self.threshold = threshold super(LogisticRegressionLayer, self).__init__( input, n_in, n_out, self.activation, rng) self.reset_layer() self.is_binary = is_binary if n_out == 1: self.is_binary = True # The number of classes self.n_classes_seen = np.zeros(n_out) # The number of the wrong classification madefor the class i self.n_wrong_classif_made = np.zeros(n_out) self.reset_conf_mat() # Compute vector class-membership probablities in symbolic form # self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+ self.b) self.p_y_given_x = self.get_class_memberships(self.input) if not self.is_binary: # Compute prediction as class whose probability is maximal # in symbolic form self.y_decision = T.argmax(self.p_y_given_x, axis=1) else: # If the probability is greater than the specified threshold # assign to the class 1, otherwise it is 0. Which alos can be # checked if p(y=1|x) > threshold. self.y_decision = T.gt(T.flatten(self.p_y_given_x), self.threshold) self.params = [self.W, self.b]
def encode(self, inputs, inputs_mask, chars, chars_mask): # First run a bidirectional LSTM encoder over the unknown word # character sequences. embedded_chars = self.src_char_embeddings(chars) fwd_char_h_seq, fwd_char_c_seq = self.fwd_char_encoder( embedded_chars, chars_mask) back_char_h_seq, back_char_c_seq = self.back_char_encoder( T.concatenate([embedded_chars, fwd_char_h_seq], axis=-1), chars_mask) # Concatenate the final states of the forward and backward character # encoders. These form a matrix of size: # n_chars x src_embedding_dims # NOTE: the batch size here is n_chars, which is the total number of # unknown words in all the sentences in the inputs matrix. # Create an empty matrix if there are no unknown words # (e.g. pure word-level encoder) char_vectors = theano.ifelse.ifelse(T.gt(chars.shape[0], 0), T.concatenate([fwd_char_h_seq[-1], back_char_h_seq[0]], axis=-1), T.zeros([0, self.config['src_embedding_dims']], dtype=theano.config.floatX)) # Compute separate masks for known words (with input symbol >= 0) # and unknown words (with input symbol < 0). known_mask = inputs_mask * T.ge(inputs, 0) unknown_mask = inputs_mask * T.lt(inputs, 0) # Split the inputs matrix into two, one indexing unknown words (from # the char_vectors matrix) and the other known words (from the source # word embeddings). unknown_indexes = (-inputs-1) * unknown_mask known_indexes = inputs * known_mask # Compute the final embedding sequence by mixing the known word # vectors with the character encoder output of the unknown words. # If there is no character encoder, just use the known word vectors. embedded_unknown = char_vectors[unknown_indexes] embedded_known = self.src_embeddings(known_indexes) embedded_inputs = theano.ifelse.ifelse(T.gt(chars.shape[0], 0), (unknown_mask.dimshuffle(0,1,'x').astype( theano.config.floatX) * embedded_unknown) + \ (known_mask.dimshuffle(0,1,'x').astype( theano.config.floatX) * embedded_known), known_mask.dimshuffle(0,1,'x').astype( theano.config.floatX) * embedded_known) # Forward encoding pass fwd_h_seq, fwd_c_seq = self.fwd_encoder(embedded_inputs, inputs_mask) # Backward encoding pass, using hidden states from forward encoder back_h_seq, back_c_seq = self.back_encoder( T.concatenate([embedded_inputs, fwd_h_seq], axis=-1), inputs_mask) # Initial states for decoder h_0 = T.tanh(self.proj_h0(back_h_seq[0])) c_0 = T.tanh(self.proj_c0(back_c_seq[0])) # Attention on concatenated forward/backward sequences attended = T.concatenate([fwd_h_seq, back_h_seq], axis=-1) return h_0, c_0, attended
def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): """ Dropout layer of a MLP: units are fully-connected and connections are dropped randomly during training. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize mask :type inputVar: theano.tensor.matrix :param inputVar: a symbolic tensor of shape (n_examples, n_in) :type cfgParams: DropoutLayerParams """ import theano import theano.tensor as T from theano.ifelse import ifelse super(DropoutLayer, self).__init__(rng) self.inputVar = inputVar self.cfgParams = cfgParams self.layerNum = layerNum assert 0. < cfgParams.p < 1. # see https://github.com/uoguelph-mlrg/theano_alexnet/blob/master/alex_net.py self.prob_drop = cfgParams.p self.prob_keep = 1.0 - cfgParams.p self.flag_on = theano.shared(numpy.cast[theano.config.floatX](1.0), name='flag_on') # mask_rng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) # faster rng on GPU from theano.sandbox.rng_mrg import MRG_RandomStreams mask_rng = MRG_RandomStreams(rng.randint(999999)) self.mask = mask_rng.binomial(n=1, p=self.prob_keep, size=self.cfgParams.inputDim, dtype=theano.config.floatX) self.output = ifelse(T.gt(self.flag_on, 0), self.mask * self.inputVar, self.prob_keep * self.inputVar) self.output.name = 'output_layer_{}'.format(self.layerNum) self.output_pre_act = self.output # for compatibility # no params and weights self.params = [] self.weights = []
def CTC_B(self,A): blank_num = 0 i = len(A) -1 j = i while i != 0 : j = i-1 if A[i]!=blank_num and A[j] == A[i]: del A[i] elif A[i] == blank_num: del A[i] i-=1 if A[0] == blank_num : del A[0] return A # def CTC_LOSS(self): # T_ctc = self.output_shape[1]#????? # L = self.Y.shape[1]*2+1#??????? # # def each_loss(index,T_ctc,L): # o = self.output[index] # y = self.Y[index] # blank_num = 0 # def extend_y(i,y): # return T.switch(T.eq(i%2, 0), blank_num, y[(i-1)//2]) # y,_ = theano.scan(extend_y,sequences=[T.arange(L)],non_sequences = [y]) # #y???2*y.len+1?blank_num?? # temp_vector = T.zeros(self.output_shape[1]*2+1) # alpha0 = T.concatenate([[o[0][y[0]]], [o[0][y[1]]], T.zeros_like(temp_vector[:L-2])],axis = 0) # #alpha0??????? # def to_T(t,alpha_pre,o,y,T_ctc,L):#??????? # alpha_e = 1 + 2*t # alpha_b = L - 2*T_ctc+2*t # def set_alpha_value(i,alpha_t,alpha_pre,t,o,y):#????????? # iff = T.cast(0,dtype = "float32") # ift = (alpha_pre[i] + T.gt(i, 0) * alpha_pre[i - 1] + (T.gt(i, 1) * T.eq(i % 2, 1)) * alpha_pre[i - 2]) * o[t][y[i]] # ans = theano.ifelse.ifelse(T.eq(alpha_t[i],1),ift,iff) # return ans # # temp_vector = T.zeros(self.output_shape[1]*2+1) # alpha_v = T.ones_like(temp_vector[:(T.switch(T.gt(alpha_e, L - 1), L - 1, alpha_e) - T.switch(T.gt(alpha_b, 0), alpha_b, 0))+1]) # alpha_t = theano.ifelse.ifelse(T.gt(alpha_b, 0), T.concatenate([T.zeros_like(temp_vector[:alpha_b]), alpha_v]), alpha_v) # alpha_t = theano.ifelse.ifelse(T.ge(alpha_e, L - 1), alpha_t, T.concatenate([alpha_t,T.zeros_like(temp_vector[:L-1-alpha_e])])) # alpha_t = theano.scan(set_alpha_value, # sequences=[T.arange(alpha_t.shape[0])], # non_sequences=[alpha_t,alpha_pre,t,o,y]) # return alpha_t # alphas,_ = theano.scan(to_T,sequences=[T.arange(1,T_ctc)], # outputs_info = [alpha0], # non_sequences = [o,y,T_ctc,L]) # loss = alphas[-1][-1]+alphas[-1][-2] # loss = T.switch(T.le(loss, 1e-40), 1e-40, loss) # loss = -T.log(loss) # return loss # # CTC_LOSSs,_ = theano.scan(each_loss, # sequences=[T.arange(self.output_shape[0])], # non_sequences = [T_ctc,L]) # self.ctc_loss = theano.function([self.X,self.Y],CTC_LOSSs) # return CTC_LOSSs
def CTC_LOSS(self): outpts = self.output inpts = self.Y def each_loss(outpt, inpt): # y ????blank???ans blank = 26 y_nblank = T.neq(inpt, blank) n = T.dot(y_nblank, y_nblank) # ??????? N = 2 * n + 1 # ?????????????????? labels = inpt[:N] labels2 = T.concatenate((labels, [blank, blank])) sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank) recurrence_relation = \ T.eye(N) + \ T.eye(N, k=1) + \ T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x')) pred_y = outpt[:, labels] fwd_pbblts, _ = theano.scan( lambda curr, accum: T.switch(T.eq(curr*T.dot(accum, recurrence_relation), 0.0), T.dot(accum, recurrence_relation) , curr*T.dot(accum, recurrence_relation)), sequences=[pred_y], outputs_info=[T.eye(N)[0]] ) #return fwd_pbblts #liklihood = fwd_pbblts[0, 0] liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2] #liklihood = T.switch(T.lt(liklihood, 1e-35), 1e-35, liklihood) #loss = -T.log(T.cast(liklihood, "float32")) #loss = 10 * (liklihood - 1) * (liklihood - 100) loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(T.gt(liklihood, 1.0)*(-T.log(T.cast(liklihood, "float32")))) return loss #return pred_y ctc_losss, _ = theano.scan(each_loss, sequences=[outpts, inpts], ) self.ctc_loss = theano.function([self.X, self.Y], ctc_losss) return ctc_losss
def GRU_step(self, x_t, m_t, bnmask_t, *args): args = iter(args) h_tm1 = next(args) n_t = next(args) if self.reset_utterance_encoder_at_end_of_utterance: new_n_t = T.gt(m_t, 0.5)*(n_t + 1) # n_t + T.gt(m_t, 0.5) else: new_n_t = n_t + 1 new_n_t = T.cast(new_n_t, 'int8') if n_t.ndim == 2: n_t_truncated = T.maximum(0, T.minimum(n_t[0,:], self.normop_max_enc_seq - 1)) else: n_t_truncated = T.maximum(0, T.minimum(n_t, self.normop_max_enc_seq - 1)) if m_t.ndim >= 1: m_t = m_t.dimshuffle(0, 'x') # If 'reset_utterance_encoder_at_end_of_utterance' flag is on, # then reset the hidden state if this is an end-of-utterance token # as given by m_t if self.reset_utterance_encoder_at_end_of_utterance: hr_tm1 = m_t * h_tm1 else: hr_tm1 = h_tm1 # Compute reset gate r_t_normop_x_inp, r_t_normop_x_mean, r_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_r), self.normop_r_x_gamma, bnmask_t, self.normop_r_x_mean[n_t_truncated, :], self.normop_r_x_var[n_t_truncated, :]) r_t_normop_h_inp, r_t_normop_h_mean, r_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_r), self.normop_r_h_gamma, bnmask_t, self.normop_r_h_mean[n_t_truncated, :], self.normop_r_h_var[n_t_truncated, :]) r_t = T.nnet.sigmoid(r_t_normop_x_inp + r_t_normop_h_inp + self.b_r) # Compute update gate z_t_normop_x_inp, z_t_normop_x_mean, z_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_z), self.normop_z_x_gamma, bnmask_t, self.normop_z_x_mean[n_t_truncated, :], self.normop_z_x_var[n_t_truncated, :]) z_t_normop_h_inp, z_t_normop_h_mean, z_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_z), self.normop_z_h_gamma, bnmask_t, self.normop_z_h_mean[n_t_truncated, :], self.normop_z_h_var[n_t_truncated, :]) z_t = T.nnet.sigmoid(z_t_normop_x_inp + z_t_normop_h_inp + self.b_z) # Compute h_tilde h_tilde_normop_x_inp, h_tilde_normop_x_mean, h_tilde_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in), self.normop_in_x_gamma, bnmask_t, self.normop_in_x_mean[n_t_truncated, :], self.normop_in_x_var[n_t_truncated, :]) h_tilde_normop_h_inp, h_tilde_normop_h_mean, h_tilde_normop_h_var = NormalizationOperator(self.normop_type, T.dot(r_t * hr_tm1, self.W_hh), self.normop_in_h_gamma, bnmask_t, self.normop_in_h_mean[n_t_truncated, :], self.normop_in_h_var[n_t_truncated, :]) h_tilde = self.sent_rec_activation(h_tilde_normop_x_inp + h_tilde_normop_h_inp + self.b_hh) # Compute h h_t = (np.float32(1.0) - z_t) * hr_tm1 + z_t * h_tilde # return states, gates and batch norm parameters return [h_t, T.cast(new_n_t, 'int8'), r_t, z_t, h_tilde, r_t_normop_x_mean, r_t_normop_x_var, r_t_normop_h_mean, r_t_normop_h_var, z_t_normop_x_mean, z_t_normop_x_var, z_t_normop_h_mean, z_t_normop_h_var, h_tilde_normop_x_mean, h_tilde_normop_x_var, h_tilde_normop_h_mean, h_tilde_normop_h_var]
def GRU_step(self, x_t, m_t, bnmask_t, *args): args = iter(args) h_tm1 = next(args) n_t = next(args) if self.reset_utterance_encoder_at_end_of_utterance: new_n_t = n_t + T.gt(m_t, 0.0) else: new_n_t = n_t + 1 if n_t.ndim == 2: n_t_truncated = T.maximum(0, T.minimum(n_t[0,:], self.normop_max_enc_seq - 1)) else: n_t_truncated = T.maximum(0, T.minimum(n_t, self.normop_max_enc_seq - 1)) if m_t.ndim >= 1: m_t = m_t.dimshuffle(0, 'x') # If 'reset_utterance_encoder_at_end_of_utterance' flag is on, # then reset the hidden state if this is an end-of-utterance token # as given by m_t if self.reset_utterance_encoder_at_end_of_utterance: hr_tm1 = m_t * h_tm1 else: hr_tm1 = h_tm1 # Compute reset gate r_t_normop_x_inp, r_t_normop_x_mean, r_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_r), self.normop_r_x_gamma, bnmask_t, self.normop_r_x_mean[n_t_truncated, :], self.normop_r_x_var[n_t_truncated, :]) r_t_normop_h_inp, r_t_normop_h_mean, r_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_r), self.normop_r_h_gamma, bnmask_t, self.normop_r_h_mean[n_t_truncated, :], self.normop_r_h_var[n_t_truncated, :]) r_t = T.nnet.sigmoid(r_t_normop_x_inp + r_t_normop_h_inp + self.b_r) # Compute update gate z_t_normop_x_inp, z_t_normop_x_mean, z_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_z), self.normop_z_x_gamma, bnmask_t, self.normop_z_x_mean[n_t_truncated, :], self.normop_z_x_var[n_t_truncated, :]) z_t_normop_h_inp, z_t_normop_h_mean, z_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_z), self.normop_z_h_gamma, bnmask_t, self.normop_z_h_mean[n_t_truncated, :], self.normop_z_h_var[n_t_truncated, :]) z_t = T.nnet.sigmoid(z_t_normop_x_inp + z_t_normop_h_inp + self.b_z) # Compute h_tilde h_tilde_normop_x_inp, h_tilde_normop_x_mean, h_tilde_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in), self.normop_in_x_gamma, bnmask_t, self.normop_in_x_mean[n_t_truncated, :], self.normop_in_x_var[n_t_truncated, :]) h_tilde_normop_h_inp, h_tilde_normop_h_mean, h_tilde_normop_h_var = NormalizationOperator(self.normop_type, T.dot(r_t * hr_tm1, self.W_hh), self.normop_in_h_gamma, bnmask_t, self.normop_in_h_mean[n_t_truncated, :], self.normop_in_h_var[n_t_truncated, :]) h_tilde = self.sent_rec_activation(h_tilde_normop_x_inp + h_tilde_normop_h_inp + self.b_hh) # Compute h h_t = (np.float32(1.0) - z_t) * hr_tm1 + z_t * h_tilde # return states, gates and batch norm parameters return [h_t, T.cast(new_n_t, 'int8'), r_t, z_t, h_tilde, r_t_normop_x_mean, r_t_normop_x_var, r_t_normop_h_mean, r_t_normop_h_var, z_t_normop_x_mean, z_t_normop_x_var, z_t_normop_h_mean, z_t_normop_h_var, h_tilde_normop_x_mean, h_tilde_normop_x_var, h_tilde_normop_h_mean, h_tilde_normop_h_var]
def __init__(self, layers, crop_size=None, mirror_pr = 0.0, flip_pr=0.0, json_param={}): super().__init__(layer_index=len(layers)) self.input = layers[-1].output self.input_shape = layers[-1].output_shape self.crop_size = json_param.get("crop", crop_size) self.mirror_pr = json_param.get("mirror", mirror_pr) self.flip_pr = json_param.get("flip", flip_pr) self.output_shape = (self.input_shape[0], self.input_shape[1], self.crop_size[0], self.crop_size[1]) self.output = [] zero = tensor.zeros((self.input_shape[0],), dtype=numpy.int8) index_b = tensor.arange(self.input_shape[0]) index_c = tensor.arange(self.input_shape[1]) index_x = tensor.arange(self.crop_size[0])[None,:] index_y = tensor.arange(self.crop_size[1])[None,:] #randomly mirror (y-axis) data if self.mirror_pr > 0.0: mirror = tensor.gt(get_rng().uniform(size=(self.input_shape[0],)), 1.0 - self.mirror_pr) mirror = tensor.switch(get_train(), mirror, zero) index_y = tensor.switch(mirror[:,None], -index_y + self.crop_size[1] - 1, index_y) #randomly flip (x-axis) data if self.flip_pr > 0.0: flip = tensor.gt(get_rng().uniform(size=(self.input_shape[0],)), 1.0 - self.flip_pr) flip = tensor.switch(get_train(), flip, zero) index_x = tensor.switch(flip[:,None], -index_x + self.crop_size[0] - 1, index_x) #randomly offset crop dx = self.input_shape[2] - self.crop_size[0] dy = self.input_shape[3] - self.crop_size[1] if self.crop_size[0] != self.input_shape[2] or self.crop_size[1] != self.input_shape[3]: center_x = theano.shared(numpy.full(shape=(self.input_shape[0],), fill_value=dx // 2, dtype=numpy.int32), borrow=False) center_y = theano.shared(numpy.full(shape=(self.input_shape[0],), fill_value=dy // 2, dtype=numpy.int32), borrow=False) offset_x = get_rng().random_integers(size=(self.input_shape[0],), low=0, high=dx) offset_y = get_rng().random_integers(size=(self.input_shape[0],), low=0, high=dy) index_x += tensor.switch(get_train(), offset_x, center_x)[:,None] index_y += tensor.switch(get_train(), offset_y, center_y)[:,None] #perform advanced indexing self.output = self.input[index_b[:,None,None,None], index_c[None,:,None,None], index_x[:,None,:,None], index_y[:,None,None,:]] logging.verbose("Adding", self) # logging.verbose("Adding", self, "layer - input:", self.input_shape, "crop size:", self.crop_size, "mirror pr:", self.mirror_pr, # "flip pr:", self.flip_pr, "test position:", (dx//2, dy//2))
def ctc_path_probability(scorematrix, queryseq, blank): """ Compute path probability based on CTC algorithm, only forward pass is used. Batch not supported, for batch version, refer to the CTC class above Speed much slower than the numba & cython version (51.5min vs ~3.9min on word_correction_CTC experiment) :param scorematrix: (T, C+1) :param queryseq: (L, 1) :param blank: scalar, blank symbol :return: (NLL, alphas), NLL > 0 (smaller is better, = -log(p(l|x)); alphas is the forward variable) """ def update_s(s, alphas, scorematrix, queryseq, blank, t): l = (s - 1) // 2 alphas = ifelse(tensor.eq(s % 2, 0), ifelse(tensor.eq(s, 0), tensor.set_subtensor(alphas[s, t], alphas[s, t - 1] * scorematrix[blank, t]), tensor.set_subtensor(alphas[s, t], (alphas[s, t - 1] + alphas[s - 1, t - 1]) * scorematrix[blank, t]), name='for_blank_symbol'), ifelse(tensor.or_(tensor.eq(s, 1), tensor.eq(queryseq[l], queryseq[l - 1])), tensor.set_subtensor(alphas[s, t], (alphas[s, t - 1] + alphas[s - 1, t - 1]) * scorematrix[ queryseq[l], t]), tensor.set_subtensor(alphas[s, t], (alphas[s, t - 1] + alphas[s - 1, t - 1] + alphas[s - 2, t - 1]) * scorematrix[queryseq[l], t]), name='for_same_label_twice')) return alphas def update_t(t, LLForward, alphas, scorematrix, queryseq, blank, T, L2): start = tensor.max([0, L2 - 2 * (T - t)]) end = tensor.min([2 * t + 2, L2]) s = tensor.arange(start, end) results, _ = theano.scan(fn=update_s, sequences=[s], non_sequences=[scorematrix, queryseq, blank, t], outputs_info=[alphas], name='scan_along_s') alphas = results[-1] c = tensor.sum(alphas[start:end, t]) c = tensor.max([1e-15, c]) alphas = tensor.set_subtensor(alphas[start:end, t], alphas[start:end, t] / c) LLForward += tensor.log(c) return LLForward, alphas L = queryseq.shape[0] # Length of label sequence L2 = 2 * L + 1 # Length of label sequence padded with blanks T = scorematrix.shape[1] # time length alphas = tensor.zeros((L2, T)) # Initialize alphas and forward pass alphas = tensor.set_subtensor(alphas[[0, 1], 0], scorematrix[[blank, queryseq[0]], 0]) c = tensor.sum(alphas[:, 0]) alphas = tensor.set_subtensor(alphas[:, 0], alphas[:, 0] / c) LLForward = tensor.log(c) t = tensor.arange(1, T) results, _ = theano.scan(fn=update_t, sequences=[t], non_sequences=[scorematrix, queryseq, blank, T, L2], outputs_info=[LLForward, alphas], name='scan_along_t') NLL, alphas = ifelse(tensor.gt(T, 1), (-results[0][-1], results[1][-1]), (-LLForward, alphas)) return NLL, alphas