Python theano.tensor 模块,eye() 实例源码


项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def op_ortho_loss(s_x_, axes_=(-2, -1), ndim_=None):
    orthogoal matrix loss
    used to regularize parameter to unitary

        s_x_: (batch of) matrices
        axes_: tuple of two integers, specify which axes to be for matrix,
            defaults to last two axes
        ndim_: specify args to be (ndim_ x ndim_) matrices

    if ndim_ is None:
        ax = axes_[0]
        ndim = T.shape(s_x_)[ax]
        ndim = ndim_

    tpat = list(range(ndim))
    bpat = ['x'] * s_x_.ndim
    tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]]
    bpat[axes_[0]] = 0
    bpat[axes_[1]] = 1
    s_y =*tpat), s_x_)
    return T.sqr(s_y - T.eye(ndim).dimshuffle(*bpat))
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def op_covmat(s_x_, l1_normize_=True, eps_=1e-7):
    Return covariance matrix given a batch of data points

            batch of row vectors
            Defatuls to True.
            Make covariance matrix is L1 normalized wrt number of data points.
            Adds a small identity matrix I*eps_ to result, this is applied after
            L1 - normalization

    assert s_x_.ndim == 2
    s_mean = s_x_ - T.mean(s_x_, axis=0, keepdims=True)
    s_shp = T.shape(s_x_)
    s_covmat =, s_mean)
    if l1_normize_:
        s_covmat /= s_shp[0]
    return s_covmat + T.eye(s_shp[1]) * eps_
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def _ctc_normal(self, predict,labels):

        n = labels.shape[0]

        labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]]))
        sec_diag = T.neq(labels2[:-2], labels2[2:]) * \
                   T.eq(labels2[1:-1], self.tpo["CTC_blank"])

        recurrence_relation = \
            T.eye(n) + \
            T.eye(n, k=1) + \
            T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x'))

        pred_y = predict[:, labels]

        probabilities, _ = theano.scan(
            lambda curr, accum: curr *, recurrence_relation),

        labels_probab = T.sum(probabilities[-1, -2:])
        return -T.log(labels_probab)
项目:opt-mmd    作者:dougalsutherland    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
项目:opt-mmd    作者:dougalsutherland    | 项目源码 | 文件源码
def linear_mmd2_and_hotelling(X, Y, biased=True, reg=0):
    if not biased:
        raise ValueError("linear_mmd2_and_hotelling only works for biased est")

    n = X.shape[0]
    p = X.shape[1]
    Z = X - Y
    Z_bar = Z.mean(axis=0)
    mmd2 =

    Z_cent = Z - Z_bar
    S = / (n - 1)
    # z' inv(S) z = z' inv(L L') z = z' inv(L)' inv(L) z = ||inv(L) z||^2
    L = slinalg.cholesky(S + reg * T.eye(p))
    Linv_Z_bar = slinalg.solve_lower_triangular(L, Z_bar)
    lambda_ = n *
    # happens on the CPU!
    return mmd2, lambda_
项目:DeepMonster    作者:olimastro    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
项目:WGAN_mnist    作者:rajeswar18    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def note_to_encoding(self, chosen_note, relative_position, low_bound, high_bound):
        assert chosen_note.ndim == 1
        n_batch = chosen_note.shape[0]

        dont_play_version = T.switch( T.shape_padright(T.eq(chosen_note, 0)),
                                        T.tile(np.array([[1,0] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)),
                                        T.tile(np.array([[0,1] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)))

        rcp = T.tile(np.array([0,0,1],dtype=np.float32), (n_batch, 1))
        circle_1 = T.eye(4)[(chosen_note-2)%4]
        circle_2 = T.eye(3)[(chosen_note-2)%3]
        octave = T.eye(self.num_octaves)[(chosen_note-2+low_bound-self.octave_start)//12]

        play_version = T.concatenate([rcp, circle_1, circle_2, octave], 1)

        encoded_form = T.switch( T.shape_padright(, 2)), dont_play_version, play_version )
        return encoded_form
项目:triple-gan    作者:zhenxuan00    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
项目:saliency-salgan-2017    作者:imatge-upc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
项目:Precise-CTC    作者:Michlong    | 项目源码 | 文件源码
def _recurrence_relation(queryseq_padded, queryseq_mask_padded=None, blank_symbol=None):
        Generate structured matrix r2 & r3 for dynamic programming recurrence
        :param queryseq_padded: (2L+1, B)
        :param queryseq_mask_padded: (2L+1, B)
        :param blank_symbol: = C
        :return: r2 (2L+1, 2L+1), r3 (2L+1, 2L+1, B)
        L2 = queryseq_padded.shape[0]                                                           # = 2L+1
        blanks = tensor.zeros((2, queryseq_padded.shape[1])) + blank_symbol                     # (2, B)
        ybb = tensor.concatenate((queryseq_padded, blanks), axis=0).T                           # (2L+3, B) -> (B, 2L+3)
        sec_diag = tensor.neq(ybb[:, :-2], ybb[:, 2:]) * tensor.eq(ybb[:, 1:-1], blank_symbol)  # (B, 2L+1)
        if queryseq_mask_padded is not None:
            sec_diag *= queryseq_mask_padded.T
        r2 = tensor.eye(L2, k=1)                                                                # upper diagonal matrix (2L+1, 2L+1)
        r3 = tensor.eye(L2, k=2).dimshuffle(0, 1, 'x') * sec_diag.dimshuffle(1, 'x', 0)         # (2L+1, 2L+1, B)
        return r2, r3
项目:SEVDL_MGP    作者:AMLab-Amsterdam    | 项目源码 | 文件源码
def f(self, x, sampling=True, **kwargs):
        x /= np.cast[theano.config.floatX](np.sqrt(self.dim_in))
        indx, indy = self.params[3], self.params[4]
        indx /= np.cast[theano.config.floatX](np.sqrt(self.dim_in))
        if sampling:
            stdx, stdy = self._get_stds()
            noisex, noisey = sample_mult_noise(stdx, indx.shape), sample_mult_noise(stdy, indy.shape)
            indy *= noisey; indx *= noisex
        Rr, Rc = T.exp(self.params[1]), T.exp(self.params[2])
        U = T.sqr(Rr)
        sigma11 = * U.dimshuffle('x', 0), indx.T) + eps_ind * T.eye(self.n_inducing)
        sigma22 = * U.dimshuffle('x', 0), x.T)
        sigma12 = * U.dimshuffle('x', 0), x.T)
        mu_ind =, self.params[0])
        inv_sigma11 = Tn.matrix_inverse(sigma11)
        mu_x =, self.params[0]) +, inv_sigma11).dot(indy - mu_ind)
        if not sampling:
            return mu_x
        sigma_x = Tn.extract_diag(sigma22 -, inv_sigma11).dot(sigma12))

        std = T.outer(T.sqrt(sigma_x), Rc)
        out_sample = sample_gauss(mu_x, std)
        return out_sample
项目:SEVDL_MGP    作者:AMLab-Amsterdam    | 项目源码 | 文件源码
def f(self, x, sampling=True, **kwargs):
        x /= np.cast[theano.config.floatX](np.sqrt(self.dim_in))
        indx, indy = self.params[3], self.params[4]
        indx /= np.cast[theano.config.floatX](np.sqrt(self.dim_in))
        if sampling:
            noisex = sample_mult_noise(T.exp(self.params[-2]), indx.shape)
            noisey = sample_mult_noise(T.exp(self.params[-1]), indy.shape)
            indy *= noisey; indx *= noisex
        Rr, Rc = T.exp(self.params[1]), T.exp(self.params[2])
        U = T.sqr(Rr)
        sigma11 = * U.dimshuffle('x', 0), indx.T) + eps_ind * T.eye(self.n_inducing)
        sigma22 = * U.dimshuffle('x', 0), x.T)
        sigma12 = * U.dimshuffle('x', 0), x.T)
        mu_ind =, self.params[0])
        inv_sigma11 = Tn.matrix_inverse(sigma11)
        mu_x =, self.params[0]) +, inv_sigma11).dot(indy - mu_ind)
        if not sampling:
            return mu_x
        sigma_x = Tn.extract_diag(sigma22 -, inv_sigma11).dot(sigma12))
        std = T.outer(T.sqrt(sigma_x), Rc)
        out_sample = sample_gauss(mu_x, std)
        return out_sample
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def op_unitary_loss(s_re_, s_im_, axes_=None, size_=None):
    unitary matrix loss of real/imag part,
    used to regularize parameter to unitary

        s_re_: real part, square matrix
        s_im_: imag part, square matrix
        size_: specify args to be (size_ x size_) matrices
        axes_: tuple of two integers, specify which axes to be for matrix,
            defaults to last two axes
    if axes_ is None:
        axes_ = (-2, -1)

    if size_ is None:
        ax = axes_[0]
        size = T.shape(s_re_)[ax]
        size = size_

    assert s_re_.ndim == s_im_.ndim

    tpat = list(range(s_re_.ndim))
    bpat = ['x'] * s_re_.ndim
    tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]]
    bpat[axes_[0]] = 0
    bpat[axes_[1]] = 1
    s_y_re_ =*tpat), s_re_) +*tpat), s_im_)
    s_tmp =*tpat), s_im_)
    s_y_im_ = s_tmp - s_tmp.transpose(*tpat)
    return T.mean(T.sqr(s_y_re_ - T.eye(size).dimshuffle(*bpat)) + T.sqr(s_y_im_))
项目:Neural-Photo-Editor    作者:ajbrock    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)  

# Convenience function to define an inception-style block
项目:lowrank-highwaynetwork    作者:Avmb    | 项目源码 | 文件源码
def _cost_func(self, y):
        #y = T.clip(y, EPSILON, 1.0 - EPSILON)
        #return CrossEntropyCost(y, self.k).get()

        k_onehot = T.eye(y.shape[1])[self.k]
        k_centered = 2.0 * k_onehot - 1.0
        loss = T.mean(T.sqr(T.maximum(0.0, 1.0 - y*k_centered)))
        return loss
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def generate_latent_pair(self):
        h0 = T.zeros((self.dim,)).astype(floatX)[None, :]
        h = T.eye(self.dim).astype(floatX)
        return h0, h
项目:dense_tensor    作者:bstriner    | 项目源码 | 文件源码
def eye(n, m):
    return T.eye(n=n, m=m)
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def _recurrence_relation(y, y_mask, blank_symbol):
    Construct a permutation matrix and tensor for computing CTC transitions.
    y : matrix (L, B)
        the target label sequences
    y_mask : matrix (L, B)
        indicates which values of y to use
    blank_symbol: integer
        indicates the symbol that signifies a blank label.
    matrix (L, L)
    tensor3 (L, L, B)
    n_y = y.shape[0]
    blanks = tensor.zeros((2, y.shape[1])) + blank_symbol
    ybb = tensor.concatenate((y, blanks), axis=0).T
    sec_diag = (tensor.neq(ybb[:, :-2], ybb[:, 2:]) *
                tensor.eq(ybb[:, 1:-1], blank_symbol) *

    # r1: LxL
    # r2: LxL
    # r3: LxLxB
    eye2 = tensor.eye(n_y + 2)
    r2 = eye2[2:, 1:-1]  # tensor.eye(n_y, k=1)
    r3 = (eye2[2:, :-2].dimshuffle(0, 1, 'x') *
          sec_diag.dimshuffle(1, 'x', 0))

    return r2, r3
项目:rngru    作者:rneilson    | 项目源码 | 文件源码
def dotcol(x, dim, mat_b):
项目:pyGPGO    作者:hawk31    | 项目源码 | 文件源码
def fit(self, X, y):
        Fits a Student-t regressor using MCMC.

        X: np.ndarray, shape=(nsamples, nfeatures)
            Training instances to fit the GP.
        y: np.ndarray, shape=(nsamples,)
            Corresponding continuous target values to `X`.

        self.X = X
        self.n = self.X.shape[0]
        self.y = y
        self.model = pm.Model()

        with self.model as model:
            l = pm.Uniform('l', 0, 10)

            log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5)
            s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f))

            log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5)
            s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n))

            f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l)
            Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2
            y_obs = pm.MvStudentT('y_obs',, mu=np.zeros(self.n), Sigma=Sigma, observed=self.y)
        with self.model as model:
            if self.step is not None:
                self.trace = pm.sample(self.niter, step=self.step())[self.burnin:]
                self.trace = pm.sample(self.niter, init=self.init)[self.burnin:]
项目:pyGPGO    作者:hawk31    | 项目源码 | 文件源码
def fit(self, X, y):
        Fits a Gaussian Process regressor using MCMC.

        X: np.ndarray, shape=(nsamples, nfeatures)
            Training instances to fit the GP.
        y: np.ndarray, shape=(nsamples,)
            Corresponding continuous target values to `X`.

        self.X = X
        self.n = self.X.shape[0]
        self.y = y
        self.model = pm.Model()

        with self.model as model:
            l = pm.Uniform('l', 0, 10)

            log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5)
            s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f))

            log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5)
            s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n))

            f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l)
            Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2
            y_obs = pm.MvNormal('y_obs', mu=np.zeros(self.n), cov=Sigma, observed=self.y)
        with self.model as model:
            if self.step is not None:
                self.trace = pm.sample(self.niter, step=self.step())[self.burnin:]
                self.trace = pm.sample(self.niter, init=self.init)[self.burnin:]
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def l1_regularization_F_I(self):
        return (self.F - T.eye(self.emb_dim)).norm(1)
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def l2_regularization_F(self):
        return (self.F - np.eye(self.emb_dim)).norm(2)
项目:Dynamic-memory-networks-in-Theano    作者:YerevaNN    | 项目源码 | 文件源码
def new_attention_step(self, ct, prev_g, mem, q_q, c_vecs):
        cWq =, 4), dtype=floatX),, self.W_b), q_q) * T.eye(n=4, m=4, dtype=floatX))
        cWm =, 4), dtype=floatX),, self.W_b), mem) * T.eye(n=4, m=4, dtype=floatX))
        cWc_vecs =, 4), dtype=floatX),, self.W_b), c_vecs) * T.eye(n=4, m=4, dtype=floatX))

        z = T.concatenate([ct, mem, q_q, c_vecs, ct * q_q, ct * mem, ct * c_vecs, 
                           ct - q_q, ct - mem, ct - c_vecs, 
                           cWq, cWm, cWc_vecs], axis=0)

        l_1 =, z) + self.b_1.dimshuffle(0, 'x')
        l_1 = T.tanh(l_1)
        l_2 =, l_1) + self.b_2.dimshuffle(0, 'x')
        G = T.nnet.sigmoid(l_2)[0]
        return G
项目:DMTN    作者:rgsachin    | 项目源码 | 文件源码
def new_attention_step(self, ct, prev_g, mem, q_q, c_vecs):
        cWq =, 4), dtype=floatX),, self.W_b), q_q) * T.eye(n=4, m=4, dtype=floatX))
        cWm =, 4), dtype=floatX),, self.W_b), mem) * T.eye(n=4, m=4, dtype=floatX))
        cWc_vecs =, 4), dtype=floatX),, self.W_b), c_vecs) * T.eye(n=4, m=4, dtype=floatX))

        z = T.concatenate([ct, mem, q_q, c_vecs, ct * q_q, ct * mem, ct * c_vecs, 
                           ct - q_q, ct - mem, ct - c_vecs, 
                           cWq, cWm, cWc_vecs], axis=0)

        l_1 =, z) + self.b_1.dimshuffle(0, 'x')
        l_1 = T.tanh(l_1)
        l_2 =, l_1) + self.b_2.dimshuffle(0, 'x')
        G = T.nnet.sigmoid(l_2)[0]
        return G
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_gpueye():
    def check(dtype, N, M_=None):
        # Theano does not accept None as a tensor.
        # So we must use a real value.
        M = M_
        # Currently DebugMode does not support None as inputs even if this is
        # allowed.
        if M is None:
            M = N
        N_symb = T.iscalar()
        M_symb = T.iscalar()
        k_symb = numpy.asarray(0)
        out = T.eye(N_symb, M_symb, k_symb, dtype=dtype)
        f = theano.function([N_symb, M_symb],
        result = numpy.asarray(f(N, M))
        assert numpy.allclose(result, numpy.eye(N, M_, dtype=dtype))
        assert result.dtype == numpy.dtype(dtype)
        assert any([isinstance(node.op, GpuEye)
                    for node in f.maker.fgraph.toposort()])

    for dtype in ['float32', 'int32', 'float16']:
        yield check, dtype, 3
        # M != N, k = 0
        yield check, dtype, 3, 5
        yield check, dtype, 5, 3
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_gpueye():
    def check(dtype, N, M_=None, K=0):
        # Theano does not accept None as a tensor.
        # So we must use a real value.
        M = M_
        # Currently DebugMode does not support None as inputs even if this is
        # allowed.
        if M is None:
            M = N
        N_symb = T.iscalar()
        M_symb = T.iscalar()
        k_symb = numpy.asarray(K)
        out = T.eye(N_symb, M_symb, k_symb, dtype=dtype)
        f = theano.function([N_symb, M_symb],
        result = numpy.asarray(f(N, M))
        utt.assert_allclose(result, numpy.eye(N, M_, K, dtype=dtype))
        assert result.dtype == numpy.dtype(dtype)
        if K == 0:
            assert any([isinstance(node.op, B.GpuEye)
                        for node in f.maker.fgraph.toposort()])

    for dtype in ['float32']:
        yield check, dtype, 3
        # M != N, k = 0
        yield check, dtype, 3, 5
        yield check, dtype, 5, 3
        yield check, dtype, 5, 3, 1
项目:DMN_MCTest    作者:linqian0908    | 项目源码 | 文件源码
def new_attention_step(self, ct, prev_g, mem, q_q, c_vecs):
        cWq =, 4), dtype=floatX),, self.W_b), q_q) * T.eye(n=4, m=4, dtype=floatX))
        cWm =, 4), dtype=floatX),, self.W_b), mem) * T.eye(n=4, m=4, dtype=floatX))
        cWc_vecs =, 4), dtype=floatX),, self.W_b), c_vecs) * T.eye(n=4, m=4, dtype=floatX))

        z = T.concatenate([ct, mem, q_q, c_vecs, ct * q_q, ct * mem, ct * c_vecs, 
                           ct - q_q, ct - mem, ct - c_vecs, 
                           cWq, cWm, cWc_vecs], axis=0)

        l_1 =, z) + self.b_1.dimshuffle(0, 'x')
        l_1 = T.tanh(l_1)
        l_2 =, l_1) + self.b_2.dimshuffle(0, 'x')
        G = T.nnet.sigmoid(l_2)[0]
        return G
项目:DMN_MCTest    作者:linqian0908    | 项目源码 | 文件源码
def new_attention_step(self, ct, prev_g, mem, q_q, c_vecs):
        cWq =, 4), dtype=floatX),, self.W_b), q_q) * T.eye(n=4, m=4, dtype=floatX))
        cWm =, 4), dtype=floatX),, self.W_b), mem) * T.eye(n=4, m=4, dtype=floatX))
        cWc_vecs =, 4), dtype=floatX),, self.W_b), c_vecs) * T.eye(n=4, m=4, dtype=floatX))

        z = T.concatenate([ct, mem, q_q, c_vecs, ct * q_q, ct * mem, ct * c_vecs, 
                           ct - q_q, ct - mem, ct - c_vecs, 
                           cWq, cWm, cWc_vecs], axis=0)

        l_1 =, z) + self.b_1.dimshuffle(0, 'x')
        l_1 = T.tanh(l_1)
        l_2 =, l_1) + self.b_2.dimshuffle(0, 'x')
        G = T.nnet.sigmoid(l_2)[0]
        return G
项目:DMN_MCTest    作者:linqian0908    | 项目源码 | 文件源码
def new_attention_step(self, ct, prev_g, mem, q_q, c_vecs):
        cWq =, 4), dtype=floatX),, self.W_b), q_q) * T.eye(n=4, m=4, dtype=floatX))
        cWm =, 4), dtype=floatX),, self.W_b), mem) * T.eye(n=4, m=4, dtype=floatX))
        cWc_vecs =, 4), dtype=floatX),, self.W_b), c_vecs) * T.eye(n=4, m=4, dtype=floatX))

        z = T.concatenate([ct, mem, q_q, c_vecs, ct * q_q, ct * mem, ct * c_vecs, 
                           ct - q_q, ct - mem, ct - c_vecs, 
                           cWq, cWm, cWc_vecs], axis=0)

        l_1 =, z) + self.b_1.dimshuffle(0, 'x')
        l_1 = T.tanh(l_1)
        l_2 =, l_1) + self.b_2.dimshuffle(0, 'x')
        G = T.nnet.sigmoid(l_2)[0]
        return G
项目:iaf    作者:openai    | 项目源码 | 文件源码
def to_one_hot(x, n_y):
    # TODO: Replace this with built-in Theano function in extra_ops
    assert type(n_y) == int
    return T.eye(n_y)[x]
项目:dcnn    作者:jcatw    | 项目源码 | 文件源码
def _symbolic_arrows(self, A):
        """Computes the number of unclosed triangles involving any two nodes.

        (1 - A) A^2 + A (D + D^T - A^2 - 1)
        # Compute and broadcast degree.
        num_nodes = A.shape[0]
        D = T.tile(T.sum(A, axis=1), (num_nodes, 1))

        return (
            (T.eye(num_nodes) - A) *, A) +
            A * (D + D.T -, A) - 2)
项目:LasagneNLP    作者:XuezheMax    | 项目源码 | 文件源码
def parser_loss(energies, heads, types, masks):
    compute minus log likelihood of parser as parser loss.
    :param energies: Theano 4D tensor
        energies of each edge. the shape is [batch_size, n_steps, n_steps, num_labels],
        where the summy root is at index 0.
    :param heads: Theano 2D tensor
        heads in the shape [batch_size, n_steps].
    :param types: Theano 2D tensor
        types in the shape [batch_size, n_steps].
    :param masks: Theano 2D tensor
        masks in the shape [batch_size, n_steps].
    :return: Theano 1D tensor
        an expression for minus log likelihood loss.
    input_shape = energies.shape
    batch_size = input_shape[0]
    length = input_shape[1]
    # get the exp of energies, and add along the label axis.
    # the shape is [batch_size, n, n].
    E = T.exp(energies).sum(axis=3)

    # zero out the elements out the length of each sentence.
    if masks is not None:
        masks_shuffled = masks.dimshuffle(0, 1, 'x')
        E = E * masks_shuffled
        masks_shuffled = masks.dimshuffle(0, 'x', 1)
        E = E * masks_shuffled

    # compute the D tensor.
    # the shape is [batch_size, n, n]
    D = E.sum(axis=1)
    D = T.zeros_like(E) + D.dimshuffle(0, 1, 'x')
    # zeros out all elements except diagonal.
    D = D * T.eye(length, length, 0).dimshuffle('x', 0, 1)

    # compute lengths
    lengths = T.cast(masks, dtype='int32').sum(axis=1)
    # compute laplacian matrix
    L = D - E

    # compute partition Z(x)
    partitions, _ = theano.scan(fn=lambda laps, length: nlinalg.logabsdet(laps[1:length, 1:length]), outputs_info=None,
                                sequences=[L, lengths])

    # compute targets energy
    # first create indice matrix
    indices = T.zeros_like(heads) + T.arange(length).dimshuffle('x', 0)
    # compute loss matrix shape = [n_steps, batch_size]
    target_energy = energies[T.arange(batch_size), heads.T, indices.T, types.T]
    # shuffle loss to [batch_size, n_steps]
    target_energy = target_energy.dimshuffle(1, 0)
    # remove the first element [batch, n_steps -1]
    target_energy = target_energy[:, 1:]
    # sum over n_step shape = [batch_size]
    target_energy = target_energy.sum(axis=1)

    return partitions - target_energy#, E, D, L, partitions, target_energy
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def _ctc_log(self, predict,labels):

        def safe_log(x):
            return T.log(T.maximum(x, 1e-20).astype(theano.config.floatX))

        def safe_exp(x):
            return T.exp(T.minimum(x, 1e20).astype(theano.config.floatX))

        def logadd_simple(x, y):
            return x + safe_log(1 + safe_exp(y - x))

        def logadd(x, y, *zs):
            sum = logadd_simple(x, y)
            for z in zs:
                sum = logadd_simple(sum, z)
            return sum

        def logmul(x, y):
            return x + y

        n = labels.shape[0]

        _1000 = T.eye(n)[0]
        prev_mask = 1 - _1000
        prevprev_mask = T.neq(labels[:-2], labels[2:]) * \
                        T.eq(labels[1:-1], self.tpo["CTC_blank"])
        prevprev_mask = T.concatenate(([0, 0], prevprev_mask))
        prev_mask = safe_log(prev_mask)
        prevprev_mask = safe_log(prevprev_mask)
        prev = T.arange(-1, n-1)
        prevprev = T.arange(-2, n-2)
        log_pred_y = T.log(predict[:, labels])

        def step(curr, accum):
            return logmul(curr,
                                 logmul(prev_mask, accum[prev]),
                                 logmul(prevprev_mask, accum[prevprev])))

        log_probs, _ = theano.scan(

        # TO DO: Add -2 if n > 1 and blank at end
        log_labels_probab = log_probs[-1, -1] #T.sum(log_probs[-1, -2:]) to do
        return -log_labels_probab
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def _mb_normal_ctc(self, network_output,   labels, mask):

        n_y = labels.shape[1] / 2
        y = labels[:,:n_y]
        y = y.dimshuffle(1,0)
        y_mask = labels[:,n_y:].astype(theano.config.floatX)

        # y_row = labels.dimshuffle(1,0)
        # n_y = y_row.shape[0] / 2
        # y = y_row[:n_y,:]
        # y_mask = y_row[n_y:,:].astype(theano.config.floatX)

        y_hat = network_output.dimshuffle(0, 2, 1)

        pred_y = y_hat[:, y.astype('int32'), T.arange(self.tpo["batch_size"])]

        ybb = T.concatenate((y, self.blanks), axis=0).T
        sec_diag = (T.neq(ybb[:, :-2], ybb[:, 2:]) *
                    T.eq(ybb[:, 1:-1], self.tpo["CTC_blank"]) *

        # r1: LxL
        # r2: LxL
        # r3: LxLxB
        r2 = T.eye(n_y, k=1)
        r3 = (T.eye(n_y, k=2).dimshuffle(0, 1, 'x') *
              sec_diag.dimshuffle(1, 'x', 0))

        def step(p_curr, p_prev):
            # instead of dot product, we * first
            # and then sum oven one dimension.
            # objective:, LxLxB)
            # solusion: Lx1xB * LxLxB --> LxLxB --> (sumover)xLxB
            dotproduct = (p_prev +, r2) +
                          (p_prev.dimshuffle(1, 'x', 0) * r3).sum(axis=0).T)
            return p_curr.T * dotproduct * y_mask  # B x L

        probabilities, _ = theano.scan(
            outputs_info=[T.eye(n_y)[0] * T.ones([self.tpo["batch_size"], n_y])])

        labels_probab = T.sum(probabilities[-1,:, -2:])
        return T.mean(-T.log(labels_probab))
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def _mb_log_ctc(self, network_output,   labels, mask):

        #y_row = labels.dimshuffle(1,0)
        n_y = labels.shape[1] / 2
        y = labels[:,:n_y]
        y = y.dimshuffle(1,0)
        y_mask = labels[:,n_y:].astype(theano.config.floatX)

        y_hat = network_output.dimshuffle(0, 2, 1)

        pred_y = y_hat[:, y.astype('int32'), T.arange(self.tpo["batch_size"])]

        ybb = T.concatenate((y, self.blanks), axis=0).T
        sec_diag = (T.neq(ybb[:, :-2], ybb[:, 2:]) *
                    T.eq(ybb[:, 1:-1], self.tpo["CTC_blank"]) *

        r2 = T.eye(n_y, k=1)
        r3 = (T.eye(n_y, k=2).dimshuffle(0, 1, 'x') *
              sec_diag.dimshuffle(1, 'x', 0))

        def step(log_p_curr, log_p_prev):
            p1 = log_p_prev
            p2 = self.log_dot_matrix(p1, r2)
            p3 = self.log_dot_T(p1, r3)
            p123 = self.log_add(p3, self.log_add(p1, p2))

            return (log_p_curr.T +
                    p123 +

        log_probabs, _ = theano.scan(
            outputs_info=[self._epslog(T.eye(n_y)[0] *
                                      T.ones([self.tpo["batch_size"], n_y]))])

        labels_probab = T.sum(log_probabs[-1,:, -2:])
        return T.mean(-labels_probab)
项目:Theano-NN_Starter    作者:nightinwhite    | 项目源码 | 文件源码
def get_output(self):
        if self.dropout_rate!=0:
            seed = np.random.randint(10e6)
            rng = RandomStreams(seed=seed)
            retain_prob = 1. - self.dropout_rate
            self.input *= rng.binomial(self.input.shape, p=retain_prob, dtype=self.input.dtype)
            self.input /= retain_prob
        conv_out = conv2d(self.input, self.Cnn_W) #(batch size, output channels, output rows, output columns)
        conv_out = conv_out + self.Cnn_B.dimshuffle('x', 0, 'x', 'x')
        # out_put_shape = self.get_output_shape()
        # r_matrix_s = np.eye(out_put_shape[3], out_put_shape[3], 0)
        # r_matrix_x = np.eye(out_put_shape[3], out_put_shape[3], -1)
        # test = [[r_matrix_s for i in range(self.input_shape[1])] for j in range(self.input_shape[0])]
        # print test
        # r_matrix_s = theano.shared(np.array(r_matrix_s).astype(np.float32))
        # r_matrix_x = theano.shared(np.array(r_matrix_x).astype(np.float32))
        # r_matrix = r_matrix_s*self.Rnn_W_s.dimshuffle(0, 'x', 'x') + \
        #             r_matrix_x*(1-self.Rnn_W_s).dimshuffle(0, 'x', 'x')
        # conv_out = conv_out.dimshuffle(1, 0, 2, 3)
        # def step (con, r_m, r_b):
        #     return, r_m) + r_b
        # conv_out, _ = theano.scan(step, sequences=[conv_out, r_matrix, self.Rnn_W_b])
        # conv_out = conv_out.dimshuffle(1, 0, 2, 3)
        # R_conv_out = T.concatenate([T.zeros_like(conv_out[:, :, :, :1]), conv_out], axis = 3)
        # R_conv_out = R_conv_out[:, :, :,:conv_out.shape[3]]
        # RNN_Ws = self.Rnn_W_s.dimshuffle('x', 0, 'x', 'x')
        # RNN_b = self.Rnn_W_b
        # R_conv_out = R_conv_out *RNN_Ws + conv_out * (1-RNN_Ws) + RNN_b
        # conv_out = conv_out.dimshuffle(1,0,2,3)
        # def Rnn_add(channel,RNN_b,RNN_Ws,RNN_Wx):
        #     RNN_channel = T.concatenate([T.zeros_like(channel[:, :, :1]),channel],axis = 2)
        #     RNN_channel = RNN_channel[:,:,:channel.shape[2]]
        #     res = RNN_channel*RNN_Ws + channel*RNN_Wx + RNN_b
        #     return res
        #self.Rnn_W_s = T.abs_(self.Rnn_W_s)
        # R_conv_out,_ = theano.scan(Rnn_add,sequences= [conv_out,self.Rnn_W_b,self.Rnn_W_s,1 - self.Rnn_W_s])
        # R_conv_out = R_conv_out.dimshuffle(1,0,2,3)
        #output = self.activition(R_conv_out)
        #return self.input
        return self.activition(conv_out)
        #return output
项目:Theano-NN_Starter    作者:nightinwhite    | 项目源码 | 文件源码
def CTC_LOSS(self):
        outpts = self.output
        inpts = self.Y
        def each_loss(outpt, inpt):
            # y ????blank???ans
            blank = 26
            y_nblank = T.neq(inpt, blank)
            n =, y_nblank)  # ???????
            N = 2 * n + 1  # ??????????????????
            labels = inpt[:N]
            labels2 = T.concatenate((labels, [blank, blank]))
            sec_diag = T.neq(labels2[:-2], labels2[2:]) * T.eq(labels2[1:-1], blank)
            recurrence_relation = \
                T.eye(N) + \
                T.eye(N, k=1) + \
                T.eye(N, k=2) * sec_diag.dimshuffle((0, 'x'))

            pred_y = outpt[:, labels]

            fwd_pbblts, _ = theano.scan(
                lambda curr, accum: T.switch(T.eq(curr*, recurrence_relation), 0.0),
                                   , recurrence_relation)
                                             , curr*, recurrence_relation)),
            #return fwd_pbblts
            #liklihood = fwd_pbblts[0, 0]
            liklihood = fwd_pbblts[-1, -1] + fwd_pbblts[-1, -2]
            #liklihood = T.switch(, 1e-35), 1e-35, liklihood)
            #loss = -T.log(T.cast(liklihood, "float32"))
            #loss = 10 * (liklihood - 1) * (liklihood - 100)
            loss = (T.le(liklihood, 1.0)*(10*(liklihood-1)*(liklihood-100)))+(, 1.0)*(-T.log(T.cast(liklihood, "float32"))))
            return loss
            #return pred_y

        ctc_losss, _ = theano.scan(each_loss,
                                   sequences=[outpts, inpts],
        self.ctc_loss = theano.function([self.X, self.Y], ctc_losss)

        return ctc_losss
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def __init__(self, input, feat, emb_dim, batch_size, init_mean, init_range, mean_length, feat_dim=0):
        # Matrices for predicting score
        mean_length = float(mean_length)
        self.emb_dim = emb_dim
        self.M = theano.shared(np.eye(emb_dim).astype(theano.config.floatX), borrow=True)
        self.N = theano.shared(np.eye(emb_dim).astype(theano.config.floatX), borrow=True)
        # Fair projection matrix 
        self.F = theano.shared(np.eye(emb_dim).astype(theano.config.floatX), borrow=True)
        # Parameters for auxiliary features
        self.f = theano.shared(np.zeros((feat_dim,)).astype(theano.config.floatX), borrow=True)

        # Set embeddings by slicing tensor
        self.emb_context = input[:,0,:]
        self.emb_true_response = input[:,1,:]
        self.emb_response = input[:,2,:]
        self.feat = feat
        self.x = input

        # Projects embeddings into 'fair' space
        self.emb_response_fair =, self.F)

        # Compute score predictions
        self.pred1 = T.sum(self.emb_context *, self.M), axis=1)
        self.pred2 = T.sum(self.emb_true_response *, self.N), axis=1)
        self.pred3 =, self.f)
        self.pred = self.pred1 + self.pred2 + self.pred3
        #self.pred = T.sum(, self.f), axis=1) + 0*T.sum(self.feat) # If only using response
        self.output = 2.5 + 5 * (self.pred - init_mean) / init_range # to re-scale dot product values to [0,5] range

        # Feed-forward neural net for predicting length
        #n_hid = emb_dim
        #self.q = theano.shared(np.zeros((emb_dim, n_hid)).astype(theano.config.floatX), borrow=True)
        #self.q2 = theano.shared(np.zeros((n_hid,)).astype(theano.config.floatX), borrow=True)
        #self.b = theano.shared(np.zeros((n_hid,)))
        #self.b2 = theano.shared(1. * mean_length)

        #l1 =, self.q) + self.b
        #l1 = T.nnet.relu(l1)
        #l2 =, self.q2) + self.b2 * np.ones((batch_size,))
        #self.nuis = l2

        # Using Lasagne
        #net = lasagne.layers.InputLayer((None, emb_dim), self.emb_response_fair.T)
        #net = lasagne.layers.DenseLayer(net, num_units=n_hid)
        #net = lasagne.layers.DenseLayer(net, num_units=1, b=lasagne.init.Constant(mean_length),\
        #        nonlinearity=lasagne.nonlinearities.linear)
        #self.nuis = lasagne.layers.get_output(net) = net

        # Compute length with linear regression
        self.q = theano.shared(np.zeros((emb_dim,)).astype(theano.config.floatX), borrow=True)
        self.b = theano.shared(1. * mean_length)
        self.nuis =, self.q) + self.b * np.ones((batch_size,))
项目:crnn_tf    作者:liuhu-bigeye    | 项目源码 | 文件源码
def em_ctc_cost(e_pred, pred, pred_len, token, blank):
    ctc_cost of only one sentence
    :param e_pred: (T, nb, voca_size + 1)
    :param pred: (T, nb, voca_size + 1)                 (4,1,3)
    :param pred_len: (nb,)  pred_len of prediction      (1)
    :param token: (nb, U)   -1 for NIL                  (1,2)
    :param blank: (1)
    :return: ctc_cost
    nb, U = token.shape[0], token.shape[1]
    token_len = T.sum(T.neq(token, -1), axis=-1)

    # token_with_blank
    token = token[:, :, None]   # (nb, U, 1)
    token_with_blank = T.concatenate((T.ones_like(token, dtype='int32')*blank, token), axis=2).reshape((nb, 2*U))
    token_with_blank = T.concatenate((token_with_blank, T.ones((nb, 1), dtype='int32')*blank), axis=1)  # (nb, 2*U+1)
    length = token_with_blank.shape[1]

    # only use these predictions
    pred = pred[:, T.arange(nb, dtype='int32')[:, None], token_with_blank]  # (T, nb, 2U+1)
    e_pred = e_pred[:, T.arange(nb, dtype='int32')[:, None], token_with_blank]  # (T, nb, 2U+1)

    # recurrence relation
    sec_diag = T.concatenate((T.zeros((nb, 2), dtype=intX), T.neq(token_with_blank[:, :-2], token_with_blank[:, 2:])), axis=1) * T.neq(token_with_blank, blank) # (nb, 2U+1)
    recurrence_relation = T.tile((T.eye(length) + T.eye(length, k=1)), (nb, 1, 1)) + T.tile(T.eye(length, k=2), (nb, 1, 1))*sec_diag[:, None, :]    # (nb, 2U+1, 2U+1)
    recurrence_relation = recurrence_relation.astype(floatX)

    # alpha for estimate
    alpha = T.zeros_like(token_with_blank, dtype=floatX)
    alpha = T.set_subtensor(alpha[:, :2], e_pred[0, :, :2])################(nb, 2U+1)   p
    # beta for maximize
    beta = T.zeros_like(token_with_blank, dtype=floatX)
    beta = T.set_subtensor(beta[:, :2], e_pred[0, :, :2]*log_safe(pred[0, :, :2]))################(nb, 2U+1) e_p * log(p)

    # dynamic programming
    # (T, nb, 2U+1)
    (probability_alpha, probability_beta), _ = theano.scan(compute_one_step, sequences=[e_pred[1:], pred[1:]], outputs_info=[alpha, beta], non_sequences=[recurrence_relation])

    # estimate prob
    labels_e_2 = probability_alpha[pred_len - 2, T.arange(nb, dtype='int32'), 2 * token_len - 1]
    labels_e_1 = probability_alpha[pred_len - 2, T.arange(nb, dtype='int32'), 2 * token_len]
    labels_e_prob = labels_e_2 + labels_e_1

    # maximize prob
    labels_m_2 = probability_beta[pred_len - 2, T.arange(nb, dtype='int32'), 2 * token_len - 1]
    labels_m_1 = probability_beta[pred_len - 2, T.arange(nb, dtype='int32'), 2 * token_len]
    labels_m_prob = labels_m_2 + labels_m_1

    cost = -divide_safe(labels_m_prob, labels_e_prob)
    return cost
项目:crnn_tf    作者:liuhu-bigeye    | 项目源码 | 文件源码
def ctc_cost(pred, pred_len, token, blank):
    ctc_cost of multi sentences
    :param pred: (T, nb, voca_size+1)                    (4,1,3)
    :param pred_len: (nb,)    pred_len of prediction        (1)
    :param token: (nb, U)    -1 for NIL                    (1,2)
    :param blank: (1)
    :return: ctc_cost

    eps = theano.shared(np.float32(1e-35))
    nb, U = token.shape[0], token.shape[1]
    token_len = T.sum(T.neq(token, -1), axis=-1)

    # token_with_blank
    token = token[:, :, None]    # (nb, U, 1)
    token_with_blank = T.concatenate((T.ones_like(token, dtype=intX)*blank, token), axis=2).reshape((nb, 2*U))
    token_with_blank = T.concatenate((token_with_blank, T.ones((nb, 1), dtype=intX)*blank), axis=1)    # (nb, 2*U+1)
    length = token_with_blank.shape[1]

    # only use these predictions
    pred = pred[:, T.tile(T.arange(nb), (length, 1)).T, token_with_blank]    # (T, nb, 2U+1)

    # recurrence relation
    sec_diag = T.concatenate((T.zeros((nb, 2), dtype=intX), T.neq(token_with_blank[:, :-2], token_with_blank[:, 2:])), axis=1) * T.neq(token_with_blank, blank)    # (nb, 2U+1)
    recurrence_relation = T.tile((T.eye(length) + T.eye(length, k=1)), (nb, 1, 1)) + T.tile(T.eye(length, k=2), (nb, 1, 1))*sec_diag[:, None, :]    # (nb, 2U+1, 2U+1)
    recurrence_relation = recurrence_relation.astype(floatX)

    # alpha
    alpha = T.zeros_like(token_with_blank, dtype=floatX)
    alpha = T.set_subtensor(alpha[:, :2], pred[0, :, :2])################(nb, 2U+1)

    # dynamic programming
    # (T, nb, 2U+1)
    probability, _ = theano.scan(lambda curr, accum: T.batched_dot(accum, recurrence_relation) * curr, sequences=[pred[1:]], outputs_info=[alpha])

    labels_2 = probability[pred_len-2, T.arange(nb), 2*token_len-1]
    labels_1 = probability[pred_len-2, T.arange(nb), 2*token_len]
    labels_prob = labels_2 + labels_1

    cost = -T.log(labels_prob+eps)
    return cost
项目:Precise-CTC    作者:Michlong    | 项目源码 | 文件源码
def path_probability(self, queryseq_padded, scorematrix, queryseq_mask_padded=None, scorematrix_mask=None, blank_symbol=None):
        Compute p(l|x) using only the forward variable
        :param queryseq_padded: (2L+1, B)
        :param scorematrix: (T, C+1, B)
        :param queryseq_mask_padded: (2L+1, B)
        :param scorematrix_mask: (T, B)
        :param blank_symbol: = C by default
        if blank_symbol is None:
            blank_symbol = scorematrix.shape[1] - 1
        if queryseq_mask_padded is None:
                    queryseq_mask_padded = tensor.ones_like(queryseq_padded, dtype=floatX)

        pred_y = self._class_batch_to_labeling_batch(queryseq_padded, scorematrix, scorematrix_mask)  # (T, 2L+1, B), reshaped scorematrix

        r2, r3 = self._recurrence_relation(queryseq_padded, queryseq_mask_padded, blank_symbol)       # r2 (2L+1, 2L+1), r3 (2L+1, 2L+1, B)

        def step(p_curr, p_prev, LLForward, countdown, r2, r3, queryseq_mask_padded):
            [DV, 1-14-2016]: A very weird problem encountered when integrating this CTC implementation into Keras. Before this revision
                             there were no input parameters (r2, r3, queryseq_mask_padded) specified, they just referred to the outer scope ones.
                             However, this will cause the CTC integrated within Keras producing inaccurate loss value, meanwhile when compiled
                             as a separate function, the returned ctc loss value is accurate anyway. But if with these 3 parameters added as
                             input, the problem vanished. This took me two days to find this remedy. I suspect this'd be the bug of theano.
            :param p_curr:     (2L+1, B), one column of scorematrix
            :param p_prev:     (B, 2L+1)
            :param LLForward:  (B, 1)
            :param countdown:  scalar
            :param r2:
            :param r3:
            :param queryseq_mask_padded:
            dotproduct = (p_prev +, r2) +                                           #, r2) = alpha(t-1, u-1)
                          (p_prev.dimshuffle(1, 'x', 0) * r3).sum(axis=0).T)                          # = alpha(t-1, u-2) conditionally
            p_curr = p_curr.T * dotproduct
            if queryseq_mask_padded is not None:
                p_curr *= queryseq_mask_padded.T                                                      # (B, 2L+1) * (B, 2L+1) * (B, 2L+1) = (B, 2L+1)
            start = tensor.max([0, queryseq_padded.shape[0] - 2 * countdown])
            mask = tensor.concatenate([tensor.zeros([queryseq_padded.shape[1], start]),
                                       tensor.ones([queryseq_padded.shape[1], queryseq_padded.shape[0] - start])], axis=1)
            p_curr *= mask
            c_batch = p_curr.sum(axis=1, keepdims=True)                                               # (B, 1)
            p_curr /= c_batch
            LLForward += tensor.log(c_batch)
            countdown -= 1
            return p_curr, LLForward, countdown                                                       # (B, 2L+1), (B, 1), scalar

        results, _ = theano.scan(
                sequences=[pred_y],                                                                   # scan only work on the first dimension
                outputs_info=[tensor.eye(queryseq_padded.shape[0])[0] * tensor.ones(queryseq_padded.T.shape),
                              tensor.unbroadcast(tensor.zeros([queryseq_padded.shape[1], 1]), 1), scorematrix.shape[0]],
                non_sequences=[r2, r3, queryseq_mask_padded])
        return results