Python theano.tensor 模块，gt() 实例源码

我们从Python开源项目中，提取了以下44个代码示例，用于说明如何使用theano.tensor.gt()。

项目：deep-learning-for-genomics 作者：chgroenbech | 项目源码 | 文件源码

def log_zero_inflated_poisson(x, pi, log_lambda, eps = 0.0):
    """
    Compute log pdf of a zero-inflated Poisson distribution with success probability pi and number of failures, r, until the experiment is stopped, at values x.

    A simple variation of Stirling's approximation is used: log x! = x log x - x.
    """

    pi = T.clip(pi, eps, 1.0 - eps)

    lambda_ = T.exp(log_lambda)
    lambda_ = T.clip(lambda_, eps, lambda_)

    y_0 = T.log(pi + (1 - pi) * T.exp(-lambda_))
    y_1 = T.log(1 - pi) + log_poisson(x, log_lambda, eps)

    y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1

    return y

项目：deep-learning-for-genomics 作者：chgroenbech | 项目源码 | 文件源码

def log_zero_inflated_poisson(x, pi, log_lambda, eps = 0.0):
    """
    Compute log pdf of a zero-inflated Poisson distribution with success probability pi and number of failures, r, until the experiment is stopped, at values x.

    A simple variation of Stirling's approximation is used: log x! = x log x - x.
    """

    pi = T.clip(pi, eps, 1.0 - eps)

    lambda_ = T.exp(log_lambda)
    lambda_ = T.clip(lambda_, eps, lambda_)

    y_0 = T.log(pi + (1 - pi) * T.exp(-lambda_))
    y_1 = T.log(1 - pi) + log_poisson(x, log_lambda, eps)

    y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1

    return y

项目：lemontree 作者：khshim | 项目源码 | 文件源码

def get_output(self, input_, label):
        """
        This function overrides the parents' one.
        Computes the loss by model input_ion and real label.

        Parameters
        ----------
        input_: TensorVariable
            an array of (batch size, input_ion).
            for accuracy task, "input_" is 2D matrix.
        label: TensorVariable
            an array of (batch size, answer) or (batchsize,) if label is a list of class labels.
            for classification, highly recommend second one.
            should make label as integer.

        Returns
        -------
        TensorVariable
            a symbolic tensor variable which is scalar.
        """
        # do
        # TODO: Not tested
        return T.mean(T.eq(T.gt(input_, 0.5), label))

项目：deep-motion-analysis 作者：Brimborough | 项目源码 | 文件源码

def skip_connect(self, input, layer_index):
        if ([] == self.noisy_z):
            raise ValueError('Error: noisy_z is an empty list, noisy_fprop must be run before skip_connect')

        MU = self.compute_mu(input, self.As[layer_index])
        V  = self.compute_v(input, self.As[layer_index])

        reconstruction = (self.noisy_z[-1] - MU) * V + MU

#        # Non trainable Batchnormalisation
#        mean = reconstruction.mean(0)
#        std  = reconstruction.std(0) + 1e-10
#
#        # Only batchnormalise for a batchsize > 1
#        mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
#        std  = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))

#        reconstruction = (reconstruction - mean) / std
        self.tmp = reconstruction

        # To caluclate the reconstruction error later
        self.reconstructions.append(reconstruction)
        self.noisy_z = self.noisy_z[0:-1]

        return reconstruction

项目：dl4nlp_in_theano 作者：luyaojie | 项目源码 | 文件源码

def forward(self, x):
        """
        :param x: (length, dim)
        :return: (hidden_dim, )
        """
        if self.padding_size > 0:
            # (padding_size + length + padding_size, dim)
            x = temporal_padding_2d(x, (self.padding_size, self.padding_size))
        safe_x = temporal_padding_2d(x, (0, self.kernel_size - x.shape[0]))
        # If Kernel Size is greater than sentence length, padding at the end of sentence
        x = ifelse(T.gt(self.kernel_size - x.shape[0], 0),
                   safe_x,
                   x)
        conv_result = self.forward_conv(x)
        pooling_result = get_pooling(conv_result, self.pooling)
        dropout_out = dropout_from_layer(pooling_result, self.dropout)
        return self.act.activate(dropout_out + self.b)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_pdbbreakpoint_op():
    """ Test that PdbBreakpoint ops don't block gpu optimization"""
    b = tensor.fmatrix()

    # Create a function composed of a breakpoint followed by
    # some computation
    condition = tensor.gt(b.sum(), 0)
    b_monitored = PdbBreakpoint(name='TestBreakpoint')(condition, b)
    output = b_monitored ** 2

    f = theano.function([b], output, mode=mode_with_gpu)

    # Ensure that, in the compiled function, the computation following the
    # breakpoint has been moved to the gpu.
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[-2].op, GpuElemwise)
    assert topo[-1].op == host_from_gpu

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def setUp(self):

        super(TestPdbBreakpoint, self).setUp()

        # Sample computation that involves tensors with different numbers
        # of dimensions
        self.input1 = T.fmatrix()
        self.input2 = T.fscalar()
        self.output = T.dot((self.input1 - self.input2),
                            (self.input1 - self.input2).transpose())

        # Declare the conditional breakpoint
        self.breakpointOp = PdbBreakpoint("Sum of output too high")
        self.condition = T.gt(self.output.sum(), 1000)
        (self.monitored_input1,
         self.monitored_input2,
         self.monitored_output) = self.breakpointOp(self.condition,
                                                    self.input1,
                                                    self.input2, self.output)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_inequality_with_self(self):
        x = T.scalar('x', dtype=config.floatX)
        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison')

        f = theano.function([x], T.lt(x, x), mode=mode)
        self.assert_eqs_const(f, 0)

        f = theano.function([x], T.le(x, x), mode=mode)
        self.assert_eqs_const(f, 1)

        f = theano.function([x], T.gt(x, x), mode=mode)
        self.assert_eqs_const(f, 0)

        f = theano.function([x], T.ge(x, x), mode=mode)
        self.assert_eqs_const(f, 1)

        f = theano.function([x], T.minimum(x, x), mode=mode)
        self.assert_identity(f)

        f = theano.function([x], T.maximum(x, x), mode=mode)
        self.assert_identity(f)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_pdbbreakpoint_op():
    """ Test that PdbBreakpoint ops don't block gpu optimization"""
    b = tensor.fmatrix()

    # Create a function composed of a breakpoint followed by
    # some computation
    condition = tensor.gt(b.sum(), 0)
    b_monitored = PdbBreakpoint(name='TestBreakpoint')(condition, b)
    output = b_monitored ** 2

    f = theano.function([b], output, mode=mode_with_gpu)

    # Ensure that, in the compiled function, the computation following the
    # breakpoint has been moved to the gpu.
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[-2].op, cuda.GpuElemwise)
    assert topo[-1].op == cuda.host_from_gpu

项目：keras 作者：GeekLiB | 项目源码 | 文件源码

def greater(x, y):
    return T.gt(x, y)

项目：QuantizedRNN 作者：ottj | 项目源码 | 文件源码

def ternarize_weights(W,W0,deterministic=False,srng=None):
    """
    Changed copy of the code from TernaryConnect by Zhouhan Lin, Matthieu Courbariaux,
    https://github.com/hantek/BinaryConnect/tree/ternary
    :param W: Weights
    :param W0: W0=0.5
    :param deterministic: deterministic rounding
    :param srng: random number generator
    :return: quantized weights
    """
    Wb=None
    #print 'Current W0: ',W0
    if srng is None:
        rng = np.random.RandomState(666)
        srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999))
    if deterministic:
        #print 'Deterministic Ternarization!'

        larger_than_neg_0_5 = T.gt(W, -W0/2.)
        larger_than_pos_0_5 = T.gt(W, W0/2.)
        W_val = larger_than_neg_0_5 * 1 + larger_than_pos_0_5 * 1 - 1
        Wb = W_val * W0

    else:
        #print 'Stochastic Ternarization!'
        w_sign = T.gt(W, 0) * 2 - 1
        p = T.clip(T.abs_(W / (W0)), 0, 1)
        Wb = W0 * w_sign * T.cast(srng.binomial(n=1, p=p, size=T.shape(W)), theano.config.floatX)

    return Wb

项目：deep-learning-keras-projects 作者：jasmeetsb | 项目源码 | 文件源码

def greater(x, y):
    return T.gt(x, y)

项目：NNBuilder 作者：aeloyq | 项目源码 | 文件源码

def gt(self, l, r):
            return T.gt(l, r)

项目：deep-learning-for-genomics 作者：chgroenbech | 项目源码 | 文件源码

def log_zero_inflated_negative_binomial(x, pi, p, log_r, eps = 0.0):

    pi = T.clip(pi, eps, 1.0 - eps)

    p = T.clip(p, eps, 1.0 - eps)

    r = T.exp(log_r)
    r = T.clip(r, eps, r)

    y_0 = T.log(pi + (1 - pi) * T.pow(1 - p, r))
    y_1 = T.log(1 - pi) + log_negative_binomial(x, p, log_r, eps)

    y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1

    return y

项目：deep-learning-for-genomics 作者：chgroenbech | 项目源码 | 文件源码

def log_zero_inflated_negative_binomial(x, pi, p, log_r, eps = 0.0):

    pi = T.clip(pi, eps, 1.0 - eps)

    p = T.clip(p, eps, 1.0 - eps)

    r = T.exp(log_r)
    r = T.clip(r, eps, r)

    y_0 = T.log(pi + (1 - pi) * T.pow(1 - p, r))
    y_1 = T.log(1 - pi) + log_negative_binomial(x, p, log_r, eps)

    y = T.eq(x, 0) * y_0 + T.gt(x, 0) * y_1

    return y

项目：gelearn 作者：lookatmoon | 项目源码 | 文件源码

def get_train_function(self):
        # specify the computational graph
        num_param_vecs = T.scalar('num_param_vecs')
        # weight = theano.shared(np.random.randn(len(self.feature_map), self.num_param_vecs), name='weight')
        weight = theano.shared(np.zeros((len(self.feature_map), self.num_param_vecs)), name='weight')
        feat_mat = sparse.csr_matrix(name='feat_mat')
        pred = T.nnet.sigmoid( sparse.dot(feat_mat, weight) ) # one-vs-rest

        o_pred = ifelse(T.gt(self.num_param_vecs, 1), pred / pred.sum(axis=1).reshape((pred.shape[0], 1)), T.concatenate( [pred, 1-pred], axis=1 ) )  

        f_target = T.matrix('f_target')
        f_mask_mat = sparse.csr_matrix(name='f_mask_mat')
        f_sum_pred = sparse.dot( f_mask_mat, o_pred )
        f_pred = f_sum_pred / f_sum_pred.sum(axis=1).reshape((f_sum_pred.shape[0], 1))

        i_target = T.matrix('i_target')
        i_mask_mat = sparse.csr_matrix(name='l_mask_mat')
        i_pred = sparse.dot( i_mask_mat, pred )

        # objective = self.param.feature_lambda * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2
        objective = 0.0 * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2

        grad_weight = T.grad(objective, weight)

        # print 'Compiling function ...'
        # compile the function
        train = theano.function(inputs = [feat_mat, f_mask_mat, f_target, i_mask_mat, i_target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] )

        return train

项目：keras-customized 作者：ambrite | 项目源码 | 文件源码

def greater(x, y):
    return T.gt(x, y)

项目：ADEM 作者：mike-n-7 | 项目源码 | 文件源码

def preprocess_tweet(s):
    s = s.replace('@user', '<at>').replace('&lt;heart&gt;', '<heart>').replace('&lt;number&gt;', '<number>').replace('  ', ' </s> ').replace('  ', ' ')
    # Make sure we end with </s> token
    while s[-1] == ' ':
        s = s[0:-1]
    if not s[-5:] == ' </s>':
        s = s + ' </s>'

    return s

项目：ADEM 作者：mike-n-7 | 项目源码 | 文件源码

def compute_liu_pca(pca_components, twitter_dialogue_embeddings, pca):
    tw_embeddings_pca = np.zeros((twitter_dialogue_embeddings.shape[0], 3, pca_components))
    for i in range(3):
        tw_embeddings_pca[:,i] = pca.transform(twitter_dialogue_embeddings[:, i])
    return tw_embeddings_pca


# Computes PCA decomposition for the context, gt responses, and model responses separately

项目：ADEM 作者：mike-n-7 | 项目源码 | 文件源码

def compute_separate_pca(pca_components, twitter_dialogue_embeddings):
    pca = PCA(n_components = pca_components)
    tw_embeddings_pca = np.zeros((twitter_dialogue_embeddings.shape[0], 3, pca_components))
    for i in range(3):
        tw_embeddings_pca[:,i] = pca.fit_transform(twitter_dialogue_embeddings[:, i])
    return tw_embeddings_pca

# Computes PCA decomposition for the context, gt responses, and model responses together
# NOTE: this computes the PCA on the training embeddings, and then applies them to the
# test embeddings (it does not compute PCA on the testing embeddings)

项目：ADEM 作者：mike-n-7 | 项目源码 | 文件源码

def hinge_loss_len(self, length, alpha=1):
        min_loss = 1605.157
        len_loss = T.mean((self.nuis - length)**2)
        if T.gt(min_loss, len_loss):# < min_loss:
            return alpha * (min_loss - len_loss)
        else:
            return 0 * len_loss

项目：deep-motion-analysis 作者：Brimborough | 项目源码 | 文件源码

def __call__(self, input): 
        mean = input.mean(self.axes, keepdims=True) 
        std = input.std(self.axes, keepdims=True) + self.epsilon 

        # Don't batchnoramlise a single data point
        mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
        std  = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))

        return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)

项目：dl4nlp_in_theano 作者：luyaojie | 项目源码 | 文件源码

def forward_batch(self, x, mask):
        """
        :param x: (batch, length, dim)
        :param mask: (batch, length, )
        :return: (batch, length, hidden_dim)
        """
        # conv_after_length = length - kernel + 2 * padding_size + 1
        new_x = x
        if self.padding_size > 0:
            # (padding_size + length + padding_size, dim)
            new_x = temporal_padding_3d(x, (self.padding_size, self.padding_size))
            # (batch, conv_after_length)
            mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=self.padding_size)
        elif self.padding_size == 0:
            # (batch, conv_after_length)
            mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=0)
        else:
            raise RuntimeError("Dilation Rate >= 0")
        # safe_x = temporal_padding_3d(x, (0, self.kernel_size - x.shape[1]))
        # safe_mask = T.ones((x.shape[0], ), dtype=theano.config.floatX).dimshuffle([0, 'x'])
        # !!! convert safe_mask from col to matrix
        # safe_mask = T.unbroadcast(safe_mask, 1)
        # x, mask = ifelse(T.gt(self.kernel_size - x.shape[1], 0),
        #                  (safe_x, safe_mask),
        #                  (new_x, mask))
        # (batch, conv_after_length, hidden_dim)
        conv_result = self.forward_conv_batch(new_x)
        # new_x = Print(new_x)
        # mask = Print()(mask)
        pooling_result = get_pooling_batch(conv_result, mask, self.pooling)
        dropout_out = dropout_from_layer(pooling_result, self.dropout)
        return self.act.activate(dropout_out + self.b)

项目：keras 作者：NVIDIA | 项目源码 | 文件源码

def greater(x, y):
    return T.gt(x, y)

项目：seq2graph 作者：masterkeywikz | 项目源码 | 文件源码

def _get_updates_for(self, param, grad):
        grad_tm1 = shared_like(param, 'grad')
        step_tm1 = shared_like(param, 'step', self.learning_rate.eval())
        test = grad * grad_tm1
        diff = TT.lt(test, 0)
        steps = step_tm1 * (TT.eq(test, 0) +
                            TT.gt(test, 0) * self.step_increase +
                            diff * self.step_decrease)
        step = TT.minimum(self.max_step, TT.maximum(self.min_step, steps))
        grad = grad - diff * grad
        yield param, param - TT.sgn(grad) * step
        yield grad_tm1, grad
        yield step_tm1, step

项目：neural_topic_models 作者：dallascard | 项目源码 | 文件源码

def clip_gradients(gradients, clip):
    """
    If clip > 0, clip the gradients to be within [-clip, clip]

    Args:
        gradients: the gradients to be clipped
        clip: the value defining the clipping interval

    Returns:
        the clipped gradients
    """
    if T.gt(clip, 0):
        gradients = [T.clip(g, -clip, clip) for g in gradients]
    return gradients

项目：nature_methods_multicut_pipeline 作者：ilastik | 项目源码 | 文件源码

def sylu(gain=10, spread=0.1):
    return lambda x: switch(T.ge(x, (1 / spread)), gain, 0) + \
                     switch(T.and_(T.gt((1 / spread), x), T.gt(x, -(1 / spread))), gain * spread * x, 0) + \
                     switch(T.le(x, -(1 / spread)), -gain, 0)


# Exponential Linear Unit

项目：nature_methods_multicut_pipeline 作者：ilastik | 项目源码 | 文件源码

def sylu(gain=10, spread=0.1):
    return lambda x: switch(T.ge(x, (1 / spread)), gain, 0) + \
                     switch(T.and_(T.gt((1 / spread), x), T.gt(x, -(1 / spread))), gain * spread * x, 0) + \
                     switch(T.le(x, -(1 / spread)), -gain, 0)


# Exponential Linear Unit

项目：keras_superpixel_pooling 作者：parag2489 | 项目源码 | 文件源码

def greater(x, y):
    return T.gt(x, y)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_elemwise(self):
        # float Ops
        mats = theano.tensor.matrices('cabxy')
        c, a, b, x, y = mats
        s1 = T.switch(c, a, b)
        s2 = T.switch(c, x, y)
        for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div,
                   T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq,
                   T.pow):
            g = optimize(FunctionGraph(mats, [op(s1, s2)]))
            assert str(g).count('Switch') == 1
        # integer Ops
        mats = theano.tensor.imatrices('cabxy')
        c, a, b, x, y = mats
        s1 = T.switch(c, a, b)
        s2 = T.switch(c, x, y)
        for op in (T.and_, T.or_, T.xor,
                   T.bitwise_and, T.bitwise_or, T.bitwise_xor):
            g = optimize(FunctionGraph(mats, [op(s1, s2)]))
            assert str(g).count('Switch') == 1
        # add/mul with more than two inputs
        u, v = theano.tensor.matrices('uv')
        s3 = T.switch(c, u, v)
        for op in (T.add, T.mul):
            g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)]))
            assert str(g).count('Switch') == 1

项目：StockPredictor 作者：wallsbreaker | 项目源码 | 文件源码

def predict_regress(model_path):
    #redefine model
    target_var = T.fmatrix('y')
    target_labels = T.switch(T.gt(target_var, 0), 1, 0)
    dnn_strategy = model_path.split('/')[-1].split('_')[0]
    network = get_model_by_strategy(dnn_strategy)

    #load params
    params = []
    with open(model_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            params.append(np.array(json.loads(line)))
    set_all_param_values(network, params)

    predict_prediction = get_output(network, deterministic=True)
    predict_labels = T.switch(T.gt(predict_prediction, 0), 1, 0)
    predict_acc = binary_accuracy(predict_labels, target_labels, threshold=0).mean()

    input_layer = get_all_layers(network)[0]
    predict = theano.function([input_layer.input_var, target_var],[predict_prediction, predict_acc])

    X, y, labels, values, _, _, _, _, _, _ = load_dataset('../../data/test')
    predict_prediction, predict_acc = predict(X, y)

    sys.stdout.write("  predict accuracy:\t\t\t{} %\n".format(predict_acc * 100))

    #output predict result
    with open('../../data/prediction', 'w') as f:
        for ix in xrange(len(labels)):
            line = str(labels[ix]) + '\t' + str(values[ix]) + '\t' + str(predict_prediction[ix][0]) + '\n'
            f.write(line)
    sys.stdout.flush()

项目：Hat 作者：qiuqiangkong | 项目源码 | 文件源码

def gt(a, b):
    return T.gt(a, b)

项目：InnerOuterRNN 作者：Chemoinformatics | 项目源码 | 文件源码

def greater(x, y):
    return T.gt(x, y)

项目：odin_old 作者：trungnt13 | 项目源码 | 文件源码

def gt(a, b):
    """a > b"""
    return T.gt(a, b)

项目：structured-output-ae 作者：sbelharbi | 项目源码 | 文件源码

def __init__(self, input, n_in, n_out, is_binary=False, threshold=0.4,
                 rng=None):
        """
        Initialize the parameters of the logistic regression.
        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)
        :type n_in: int
        :param n_in: number of input units, the dimension of the space in which
        the datapoints lie
        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie (number of classes)
        """
        self.activation = T.nnet.sigmoid
        self.threshold = threshold
        super(LogisticRegressionLayer, self).__init__(
            input,
            n_in,
            n_out,
            self.activation,
            rng)

        self.reset_layer()

        self.is_binary = is_binary
        if n_out == 1:
            self.is_binary = True
        # The number of classes
        self.n_classes_seen = np.zeros(n_out)
        # The number of the wrong classification madefor the class i
        self.n_wrong_classif_made = np.zeros(n_out)

        self.reset_conf_mat()

        # Compute vector class-membership probablities in symbolic form
        # self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+ self.b)
        self.p_y_given_x = self.get_class_memberships(self.input)

        if not self.is_binary:
            # Compute prediction as class whose probability is maximal
            # in symbolic form
            self.y_decision = T.argmax(self.p_y_given_x, axis=1)
        else:
            # If the probability is greater than the specified threshold
            # assign to the class 1, otherwise it is 0. Which alos can be
            # checked if p(y=1|x) > threshold.
            self.y_decision = T.gt(T.flatten(self.p_y_given_x), self.threshold)

        self.params = [self.W, self.b]

项目：hnmt 作者：robertostling | 项目源码 | 文件源码

def encode(self, inputs, inputs_mask, chars, chars_mask):
        # First run a bidirectional LSTM encoder over the unknown word
        # character sequences.
        embedded_chars = self.src_char_embeddings(chars)
        fwd_char_h_seq, fwd_char_c_seq = self.fwd_char_encoder(
                embedded_chars, chars_mask)
        back_char_h_seq, back_char_c_seq = self.back_char_encoder(
                T.concatenate([embedded_chars, fwd_char_h_seq], axis=-1),
                chars_mask)

        # Concatenate the final states of the forward and backward character
        # encoders. These form a matrix of size:
        #   n_chars x src_embedding_dims
        # NOTE: the batch size here is n_chars, which is the total number of
        # unknown words in all the sentences in the inputs matrix.
        # Create an empty matrix if there are no unknown words
        # (e.g. pure word-level encoder)
        char_vectors = theano.ifelse.ifelse(T.gt(chars.shape[0], 0),
                T.concatenate([fwd_char_h_seq[-1], back_char_h_seq[0]], axis=-1),
                T.zeros([0, self.config['src_embedding_dims']],
                dtype=theano.config.floatX))

        # Compute separate masks for known words (with input symbol >= 0)
        # and unknown words (with input symbol < 0).
        known_mask = inputs_mask * T.ge(inputs, 0)
        unknown_mask = inputs_mask * T.lt(inputs, 0)
        # Split the inputs matrix into two, one indexing unknown words (from
        # the char_vectors matrix) and the other known words (from the source
        # word embeddings).
        unknown_indexes = (-inputs-1) * unknown_mask
        known_indexes = inputs * known_mask

        # Compute the final embedding sequence by mixing the known word
        # vectors with the character encoder output of the unknown words.
        # If there is no character encoder, just use the known word vectors.
        embedded_unknown = char_vectors[unknown_indexes]
        embedded_known = self.src_embeddings(known_indexes)
        embedded_inputs = theano.ifelse.ifelse(T.gt(chars.shape[0], 0),
                (unknown_mask.dimshuffle(0,1,'x').astype(
                    theano.config.floatX) * embedded_unknown) + \
                (known_mask.dimshuffle(0,1,'x').astype(
                    theano.config.floatX) * embedded_known),
                known_mask.dimshuffle(0,1,'x').astype(
                    theano.config.floatX) * embedded_known)

        # Forward encoding pass
        fwd_h_seq, fwd_c_seq = self.fwd_encoder(embedded_inputs, inputs_mask)
        # Backward encoding pass, using hidden states from forward encoder
        back_h_seq, back_c_seq = self.back_encoder(
                T.concatenate([embedded_inputs, fwd_h_seq], axis=-1),
                inputs_mask)
        # Initial states for decoder
        h_0 = T.tanh(self.proj_h0(back_h_seq[0]))
        c_0 = T.tanh(self.proj_c0(back_c_seq[0]))
        # Attention on concatenated forward/backward sequences
        attended = T.concatenate([fwd_h_seq, back_h_seq], axis=-1)
        return h_0, c_0, attended

项目：deep-prior-pp 作者：moberweger | 项目源码 | 文件源码

def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None):
        """
        Dropout layer of a MLP: units are fully-connected and connections are
        dropped randomly during training.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize mask

        :type inputVar: theano.tensor.matrix
        :param inputVar: a symbolic tensor of shape (n_examples, n_in)

        :type cfgParams: DropoutLayerParams
        """
        import theano
        import theano.tensor as T
        from theano.ifelse import ifelse

        super(DropoutLayer, self).__init__(rng)

        self.inputVar = inputVar
        self.cfgParams = cfgParams
        self.layerNum = layerNum

        assert 0. < cfgParams.p < 1.

        # see https://github.com/uoguelph-mlrg/theano_alexnet/blob/master/alex_net.py
        self.prob_drop = cfgParams.p
        self.prob_keep = 1.0 - cfgParams.p
        self.flag_on = theano.shared(numpy.cast[theano.config.floatX](1.0), name='flag_on')

        # mask_rng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999))
        # faster rng on GPU
        from theano.sandbox.rng_mrg import MRG_RandomStreams
        mask_rng = MRG_RandomStreams(rng.randint(999999))
        self.mask = mask_rng.binomial(n=1, p=self.prob_keep, size=self.cfgParams.inputDim, dtype=theano.config.floatX)
        self.output = ifelse(T.gt(self.flag_on, 0), self.mask * self.inputVar, self.prob_keep * self.inputVar)
        self.output.name = 'output_layer_{}'.format(self.layerNum)
        self.output_pre_act = self.output  # for compatibility

        # no params and weights
        self.params = []
        self.weights = []

项目：Theano-NN_Starter 作者：nightinwhite | 项目源码 | 文件源码

def CTC_B(self,A):
        blank_num = 0
        i = len(A) -1 
        j = i
        while i != 0 :
            j = i-1
            if A[i]!=blank_num and A[j] == A[i]:
                del A[i]
            elif A[i] == blank_num:
                del A[i]
            i-=1
        if A[0] == blank_num :
            del A[0]
        return A

    # def CTC_LOSS(self):
    #     T_ctc = self.output_shape[1]#?????
    #     L = self.Y.shape[1]*2+1#???????
    #
    #     def each_loss(index,T_ctc,L):
    #         o = self.output[index]
    #         y = self.Y[index]
    #         blank_num = 0
    #         def extend_y(i,y):
    #             return T.switch(T.eq(i%2, 0), blank_num, y[(i-1)//2])
    #         y,_ = theano.scan(extend_y,sequences=[T.arange(L)],non_sequences = [y])
    #         #y???2*y.len+1?blank_num??
    #         temp_vector = T.zeros(self.output_shape[1]*2+1)
    #         alpha0 = T.concatenate([[o[0][y[0]]], [o[0][y[1]]], T.zeros_like(temp_vector[:L-2])],axis = 0)
    #         #alpha0???????
    #         def to_T(t,alpha_pre,o,y,T_ctc,L):#???????
    #             alpha_e = 1 + 2*t
    #             alpha_b = L - 2*T_ctc+2*t
    #             def set_alpha_value(i,alpha_t,alpha_pre,t,o,y):#?????????
    #                 iff = T.cast(0,dtype = "float32")
    #                 ift = (alpha_pre[i] + T.gt(i, 0) * alpha_pre[i - 1] + (T.gt(i, 1) * T.eq(i % 2, 1)) * alpha_pre[i - 2]) * o[t][y[i]]
    #                 ans = theano.ifelse.ifelse(T.eq(alpha_t[i],1),ift,iff)
    #                 return ans
    #
    #             temp_vector = T.zeros(self.output_shape[1]*2+1)
    #             alpha_v = T.ones_like(temp_vector[:(T.switch(T.gt(alpha_e, L - 1), L - 1, alpha_e) - T.switch(T.gt(alpha_b, 0), alpha_b, 0))+1])
    #             alpha_t = theano.ifelse.ifelse(T.gt(alpha_b, 0), T.concatenate([T.zeros_like(temp_vector[:alpha_b]), alpha_v]), alpha_v)
    #             alpha_t = theano.ifelse.ifelse(T.ge(alpha_e, L - 1), alpha_t, T.concatenate([alpha_t,T.zeros_like(temp_vector[:L-1-alpha_e])]))
    #             alpha_t = theano.scan(set_alpha_value,
    #                                   sequences=[T.arange(alpha_t.shape[0])],
    #                                   non_sequences=[alpha_t,alpha_pre,t,o,y])
    #             return alpha_t
    #         alphas,_ = theano.scan(to_T,sequences=[T.arange(1,T_ctc)],
    #                                outputs_info = [alpha0],
    #                                non_sequences = [o,y,T_ctc,L])
    #         loss = alphas[-1][-1]+alphas[-1][-2]
    #         loss = T.switch(T.le(loss, 1e-40), 1e-40, loss)
    #         loss = -T.log(loss)
    #         return loss
    #
    #     CTC_LOSSs,_ = theano.scan(each_loss,
    #                               sequences=[T.arange(self.output_shape[0])],
    #                               non_sequences = [T_ctc,L])
    #     self.ctc_loss = theano.function([self.X,self.Y],CTC_LOSSs)
    #     return CTC_LOSSs

项目：Theano-NN_Starter 作者：nightinwhite | 项目源码 | 文件源码

def CTC_LOSS(self):
        outpts = self.output
        inpts = self.Y
        def each_loss(outpt, inpt):
            # y ????blank???ans
            blank = 26
            y_nblank = T.neq(inpt, blank)
            n = T.dot(y_nblank, y_nblank)  # ???????
            N = 2 * n + 1  # ??????????????????
            labels = inpt[:N]
            labels2 = T.concatenate((labels, [blank, blank]))
            sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank)
            recurrence_relation = \
                T.eye(N) + \
                T.eye(N, k=1) + \
                T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x'))

            pred_y = outpt[:, labels]

            fwd_pbblts, _ = theano.scan(
                lambda curr, accum: T.switch(T.eq(curr*T.dot(accum, recurrence_relation), 0.0),
                                             T.dot(accum, recurrence_relation)
                                             , curr*T.dot(accum, recurrence_relation)),
                sequences=[pred_y],
                outputs_info=[T.eye(N)[0]]
            )
            #return fwd_pbblts
            #liklihood = fwd_pbblts[0, 0]
            liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]
            #liklihood = T.switch(T.lt(liklihood, 1e-35), 1e-35, liklihood)
            #loss = -T.log(T.cast(liklihood, "float32"))
            #loss = 10 * (liklihood - 1) * (liklihood - 100)
            loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(T.gt(liklihood, 1.0)*(-T.log(T.cast(liklihood, "float32"))))
            return loss
            #return pred_y

        ctc_losss, _ = theano.scan(each_loss,
                                   sequences=[outpts, inpts],
                                   )
        self.ctc_loss = theano.function([self.X, self.Y], ctc_losss)

        return ctc_losss

项目：hred-latent-piecewise 作者：julianser | 项目源码 | 文件源码

def GRU_step(self, x_t, m_t, bnmask_t, *args):
        args = iter(args)
        h_tm1 = next(args)
        n_t = next(args)

        if self.reset_utterance_encoder_at_end_of_utterance:
            new_n_t = T.gt(m_t, 0.5)*(n_t + 1) # n_t + T.gt(m_t, 0.5)
        else:
            new_n_t = n_t + 1

        new_n_t = T.cast(new_n_t, 'int8')

        if n_t.ndim == 2:
            n_t_truncated = T.maximum(0, T.minimum(n_t[0,:], self.normop_max_enc_seq - 1))
        else:
            n_t_truncated = T.maximum(0, T.minimum(n_t, self.normop_max_enc_seq - 1))


        if m_t.ndim >= 1:
            m_t = m_t.dimshuffle(0, 'x') 

        # If 'reset_utterance_encoder_at_end_of_utterance' flag is on,
        # then reset the hidden state if this is an end-of-utterance token
        # as given by m_t
        if self.reset_utterance_encoder_at_end_of_utterance:
            hr_tm1 = m_t * h_tm1
        else:
            hr_tm1 = h_tm1

        # Compute reset gate
        r_t_normop_x_inp, r_t_normop_x_mean, r_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_r), self.normop_r_x_gamma, bnmask_t, self.normop_r_x_mean[n_t_truncated, :], self.normop_r_x_var[n_t_truncated, :])
        r_t_normop_h_inp, r_t_normop_h_mean, r_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_r), self.normop_r_h_gamma, bnmask_t, self.normop_r_h_mean[n_t_truncated, :], self.normop_r_h_var[n_t_truncated, :])
        r_t = T.nnet.sigmoid(r_t_normop_x_inp + r_t_normop_h_inp + self.b_r)



        # Compute update gate
        z_t_normop_x_inp, z_t_normop_x_mean, z_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_z), self.normop_z_x_gamma, bnmask_t, self.normop_z_x_mean[n_t_truncated, :], self.normop_z_x_var[n_t_truncated, :])
        z_t_normop_h_inp, z_t_normop_h_mean, z_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_z), self.normop_z_h_gamma, bnmask_t, self.normop_z_h_mean[n_t_truncated, :], self.normop_z_h_var[n_t_truncated, :])
        z_t = T.nnet.sigmoid(z_t_normop_x_inp + z_t_normop_h_inp + self.b_z)

        # Compute h_tilde
        h_tilde_normop_x_inp, h_tilde_normop_x_mean, h_tilde_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in), self.normop_in_x_gamma, bnmask_t, self.normop_in_x_mean[n_t_truncated, :], self.normop_in_x_var[n_t_truncated, :])

        h_tilde_normop_h_inp, h_tilde_normop_h_mean, h_tilde_normop_h_var = NormalizationOperator(self.normop_type, T.dot(r_t * hr_tm1, self.W_hh), self.normop_in_h_gamma, bnmask_t, self.normop_in_h_mean[n_t_truncated, :], self.normop_in_h_var[n_t_truncated, :])

        h_tilde = self.sent_rec_activation(h_tilde_normop_x_inp + h_tilde_normop_h_inp + self.b_hh)

        # Compute h
        h_t = (np.float32(1.0) - z_t) * hr_tm1 + z_t * h_tilde

        # return states, gates and batch norm parameters
        return [h_t, T.cast(new_n_t, 'int8'), r_t, z_t, h_tilde, r_t_normop_x_mean, r_t_normop_x_var, r_t_normop_h_mean, r_t_normop_h_var, z_t_normop_x_mean, z_t_normop_x_var, z_t_normop_h_mean, z_t_normop_h_var, h_tilde_normop_x_mean, h_tilde_normop_x_var, h_tilde_normop_h_mean, h_tilde_normop_h_var]

项目：ADEM 作者：mike-n-7 | 项目源码 | 文件源码

def GRU_step(self, x_t, m_t, bnmask_t, *args):
        args = iter(args)
        h_tm1 = next(args)
        n_t = next(args)

        if self.reset_utterance_encoder_at_end_of_utterance:
            new_n_t = n_t + T.gt(m_t, 0.0)
        else:
            new_n_t = n_t + 1

        if n_t.ndim == 2:
            n_t_truncated = T.maximum(0, T.minimum(n_t[0,:], self.normop_max_enc_seq - 1))
        else:
            n_t_truncated = T.maximum(0, T.minimum(n_t, self.normop_max_enc_seq - 1))


        if m_t.ndim >= 1:
            m_t = m_t.dimshuffle(0, 'x') 

        # If 'reset_utterance_encoder_at_end_of_utterance' flag is on,
        # then reset the hidden state if this is an end-of-utterance token
        # as given by m_t
        if self.reset_utterance_encoder_at_end_of_utterance:
            hr_tm1 = m_t * h_tm1
        else:
            hr_tm1 = h_tm1

        # Compute reset gate
        r_t_normop_x_inp, r_t_normop_x_mean, r_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_r), self.normop_r_x_gamma, bnmask_t, self.normop_r_x_mean[n_t_truncated, :], self.normop_r_x_var[n_t_truncated, :])
        r_t_normop_h_inp, r_t_normop_h_mean, r_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_r), self.normop_r_h_gamma, bnmask_t, self.normop_r_h_mean[n_t_truncated, :], self.normop_r_h_var[n_t_truncated, :])
        r_t = T.nnet.sigmoid(r_t_normop_x_inp + r_t_normop_h_inp + self.b_r)



        # Compute update gate
        z_t_normop_x_inp, z_t_normop_x_mean, z_t_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in_z), self.normop_z_x_gamma, bnmask_t, self.normop_z_x_mean[n_t_truncated, :], self.normop_z_x_var[n_t_truncated, :])
        z_t_normop_h_inp, z_t_normop_h_mean, z_t_normop_h_var = NormalizationOperator(self.normop_type, T.dot(hr_tm1, self.W_hh_z), self.normop_z_h_gamma, bnmask_t, self.normop_z_h_mean[n_t_truncated, :], self.normop_z_h_var[n_t_truncated, :])
        z_t = T.nnet.sigmoid(z_t_normop_x_inp + z_t_normop_h_inp + self.b_z)

        # Compute h_tilde
        h_tilde_normop_x_inp, h_tilde_normop_x_mean, h_tilde_normop_x_var = NormalizationOperator(self.normop_type, T.dot(x_t, self.W_in), self.normop_in_x_gamma, bnmask_t, self.normop_in_x_mean[n_t_truncated, :], self.normop_in_x_var[n_t_truncated, :])

        h_tilde_normop_h_inp, h_tilde_normop_h_mean, h_tilde_normop_h_var = NormalizationOperator(self.normop_type, T.dot(r_t * hr_tm1, self.W_hh), self.normop_in_h_gamma, bnmask_t, self.normop_in_h_mean[n_t_truncated, :], self.normop_in_h_var[n_t_truncated, :])

        h_tilde = self.sent_rec_activation(h_tilde_normop_x_inp + h_tilde_normop_h_inp + self.b_hh)

        # Compute h
        h_t = (np.float32(1.0) - z_t) * hr_tm1 + z_t * h_tilde

        # return states, gates and batch norm parameters
        return [h_t, T.cast(new_n_t, 'int8'), r_t, z_t, h_tilde, r_t_normop_x_mean, r_t_normop_x_var, r_t_normop_h_mean, r_t_normop_h_var, z_t_normop_x_mean, z_t_normop_x_var, z_t_normop_h_mean, z_t_normop_h_var, h_tilde_normop_x_mean, h_tilde_normop_x_var, h_tilde_normop_h_mean, h_tilde_normop_h_var]

项目：learning-class-invariant-features 作者：sbelharbi | 项目源码 | 文件源码

def __init__(self, input, n_in, n_out, is_binary=False, threshold=0.4,
                 rng=None):
        """
        Initialize the parameters of the logistic regression.
        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)
        :type n_in: int
        :param n_in: number of input units, the dimension of the space in which
        the datapoints lie
        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie (number of classes)
        """
        self.activation = T.nnet.sigmoid
        self.threshold = threshold
        super(LogisticRegressionLayer, self).__init__(
            input,
            n_in,
            n_out,
            self.activation,
            rng)

        self.reset_layer()

        self.is_binary = is_binary
        if n_out == 1:
            self.is_binary = True
        # The number of classes
        self.n_classes_seen = np.zeros(n_out)
        # The number of the wrong classification madefor the class i
        self.n_wrong_classif_made = np.zeros(n_out)

        self.reset_conf_mat()

        # Compute vector class-membership probablities in symbolic form
        # self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+ self.b)
        self.p_y_given_x = self.get_class_memberships(self.input)

        if not self.is_binary:
            # Compute prediction as class whose probability is maximal
            # in symbolic form
            self.y_decision = T.argmax(self.p_y_given_x, axis=1)
        else:
            # If the probability is greater than the specified threshold
            # assign to the class 1, otherwise it is 0. Which alos can be
            # checked if p(y=1|x) > threshold.
            self.y_decision = T.gt(T.flatten(self.p_y_given_x), self.threshold)

        self.params = [self.W, self.b]

项目：denet 作者：lachlants | 项目源码 | 文件源码

def __init__(self, layers, crop_size=None, mirror_pr = 0.0, flip_pr=0.0, json_param={}):
        super().__init__(layer_index=len(layers))

        self.input = layers[-1].output
        self.input_shape = layers[-1].output_shape

        self.crop_size = json_param.get("crop", crop_size)
        self.mirror_pr = json_param.get("mirror", mirror_pr)
        self.flip_pr = json_param.get("flip", flip_pr)
        self.output_shape = (self.input_shape[0], self.input_shape[1], self.crop_size[0], self.crop_size[1])
        self.output = []

        zero = tensor.zeros((self.input_shape[0],), dtype=numpy.int8)
        index_b = tensor.arange(self.input_shape[0])
        index_c = tensor.arange(self.input_shape[1])
        index_x = tensor.arange(self.crop_size[0])[None,:]
        index_y = tensor.arange(self.crop_size[1])[None,:]

        #randomly mirror (y-axis) data
        if self.mirror_pr > 0.0:
            mirror = tensor.gt(get_rng().uniform(size=(self.input_shape[0],)), 1.0 - self.mirror_pr)
            mirror = tensor.switch(get_train(), mirror, zero)
            index_y = tensor.switch(mirror[:,None], -index_y + self.crop_size[1] - 1, index_y)

        #randomly flip (x-axis) data
        if self.flip_pr > 0.0:
            flip = tensor.gt(get_rng().uniform(size=(self.input_shape[0],)), 1.0 - self.flip_pr)
            flip = tensor.switch(get_train(), flip, zero)
            index_x = tensor.switch(flip[:,None], -index_x + self.crop_size[0] - 1, index_x)

        #randomly offset crop
        dx = self.input_shape[2] - self.crop_size[0]
        dy = self.input_shape[3] - self.crop_size[1]
        if self.crop_size[0] != self.input_shape[2] or self.crop_size[1] != self.input_shape[3]:
            center_x = theano.shared(numpy.full(shape=(self.input_shape[0],), fill_value=dx // 2, dtype=numpy.int32), borrow=False)
            center_y = theano.shared(numpy.full(shape=(self.input_shape[0],), fill_value=dy // 2, dtype=numpy.int32), borrow=False)
            offset_x = get_rng().random_integers(size=(self.input_shape[0],), low=0, high=dx)
            offset_y = get_rng().random_integers(size=(self.input_shape[0],), low=0, high=dy)
            index_x += tensor.switch(get_train(), offset_x, center_x)[:,None]
            index_y += tensor.switch(get_train(), offset_y, center_y)[:,None]

        #perform advanced indexing
        self.output = self.input[index_b[:,None,None,None], index_c[None,:,None,None], index_x[:,None,:,None], index_y[:,None,None,:]]

        logging.verbose("Adding", self)

        # logging.verbose("Adding", self, "layer - input:", self.input_shape, "crop size:", self.crop_size, "mirror pr:", self.mirror_pr, 
        #                 "flip pr:", self.flip_pr, "test position:", (dx//2, dy//2))

项目：Precise-CTC 作者：Michlong | 项目源码 | 文件源码

def ctc_path_probability(scorematrix, queryseq, blank):
    """
    Compute path probability based on CTC algorithm, only forward pass is used.
    Batch not supported, for batch version, refer to the CTC class above
    Speed much slower than the numba & cython version (51.5min vs ~3.9min on word_correction_CTC experiment)
    :param scorematrix: (T, C+1)
    :param queryseq:    (L, 1)
    :param blank:       scalar, blank symbol
    :return: (NLL, alphas), NLL > 0 (smaller is better, = -log(p(l|x)); alphas is the forward variable)
    """

    def update_s(s, alphas, scorematrix, queryseq, blank, t):
        l = (s - 1) // 2
        alphas = ifelse(tensor.eq(s % 2, 0),
                        ifelse(tensor.eq(s, 0),
                               tensor.set_subtensor(alphas[s, t], alphas[s, t - 1] * scorematrix[blank, t]),
                               tensor.set_subtensor(alphas[s, t],
                                                    (alphas[s, t - 1] + alphas[s - 1, t - 1]) * scorematrix[blank, t]),
                               name='for_blank_symbol'),
                        ifelse(tensor.or_(tensor.eq(s, 1), tensor.eq(queryseq[l], queryseq[l - 1])),
                               tensor.set_subtensor(alphas[s, t],
                                                    (alphas[s, t - 1] + alphas[s - 1, t - 1]) * scorematrix[
                                                        queryseq[l], t]),
                               tensor.set_subtensor(alphas[s, t],
                                                    (alphas[s, t - 1] + alphas[s - 1, t - 1] + alphas[s - 2, t - 1]) *
                                                    scorematrix[queryseq[l], t]),
                               name='for_same_label_twice'))
        return alphas

    def update_t(t, LLForward, alphas, scorematrix, queryseq, blank, T, L2):
        start = tensor.max([0, L2 - 2 * (T - t)])
        end = tensor.min([2 * t + 2, L2])
        s = tensor.arange(start, end)
        results, _ = theano.scan(fn=update_s, sequences=[s], non_sequences=[scorematrix, queryseq, blank, t],
                                 outputs_info=[alphas], name='scan_along_s')
        alphas = results[-1]
        c = tensor.sum(alphas[start:end, t])
        c = tensor.max([1e-15, c])
        alphas = tensor.set_subtensor(alphas[start:end, t], alphas[start:end, t] / c)
        LLForward += tensor.log(c)
        return LLForward, alphas

    L = queryseq.shape[0]                                                 # Length of label sequence
    L2 = 2 * L + 1                                                        # Length of label sequence padded with blanks
    T = scorematrix.shape[1]                                              # time length
    alphas = tensor.zeros((L2, T))
    # Initialize alphas and forward pass
    alphas = tensor.set_subtensor(alphas[[0, 1], 0], scorematrix[[blank, queryseq[0]], 0])
    c = tensor.sum(alphas[:, 0])
    alphas = tensor.set_subtensor(alphas[:, 0], alphas[:, 0] / c)
    LLForward = tensor.log(c)
    t = tensor.arange(1, T)
    results, _ = theano.scan(fn=update_t, sequences=[t], non_sequences=[scorematrix, queryseq, blank, T, L2],
                             outputs_info=[LLForward, alphas], name='scan_along_t')
    NLL, alphas = ifelse(tensor.gt(T, 1), (-results[0][-1], results[1][-1]), (-LLForward, alphas))
    return NLL, alphas