Python theano.tensor 模块,shape_padright() 实例源码


项目:pl-cnn    作者:oval-group    | 项目源码 | 文件源码
def compile_eval_function(nnet):

    X = T.tensor4()
    y = T.ivector()

    # get prediciton by fully convolutional network
    prediction = lasagne.layers.get_output(nnet.dense3_conv_layer,
                                           deterministic=True, inputs=X)

    # get output scores on first dim
    # before flattening on 2dim and then get scores on second dim
    prediction = prediction.transpose((1, 0, 2, 3))\
        .flatten(2).transpose((1, 0))
    prediction = T.nnet.softmax(prediction)

    # spatial averaging
    prediction = T.mean(prediction, axis=0)

    # compute top1 and top5 accuracies
    sorted_pred = T.argsort(prediction)
    top1_acc = T.mean(T.eq(sorted_pred[-1], y), dtype='floatX')
    top5_acc = T.mean(T.any(T.eq(sorted_pred[-5:],
                                 T.shape_padright(y)), axis=1), dtype='floatX')

    return theano.function([X, y], [top1_acc, top5_acc])
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev +, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu =, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev +, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu =, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def note_to_encoding(self, chosen_note, relative_position, low_bound, high_bound):
        assert chosen_note.ndim == 1
        n_batch = chosen_note.shape[0]

        dont_play_version = T.switch( T.shape_padright(T.eq(chosen_note, 0)),
                                        T.tile(np.array([[1,0] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)),
                                        T.tile(np.array([[0,1] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)))

        rcp = T.tile(np.array([0,0,1],dtype=np.float32), (n_batch, 1))
        circle_1 = T.eye(4)[(chosen_note-2)%4]
        circle_2 = T.eye(3)[(chosen_note-2)%3]
        octave = T.eye(self.num_octaves)[(chosen_note-2+low_bound-self.octave_start)//12]

        play_version = T.concatenate([rcp, circle_1, circle_2, octave], 1)

        encoded_form = T.switch( T.shape_padright(, 2)), dont_play_version, play_version )
        return encoded_form
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def get_loss(self, raw_feature_strengths, raw_feature_vects, extra_info=False):
        raw_losses = self._loss_fun(raw_feature_strengths)
        raw_sum = T.sum(raw_losses)

        n_parallel, n_timestep = raw_feature_strengths.shape

        falloff_arr = np.array(self._falloff_rate, np.float32) ** T.cast(T.arange(n_timestep), 'float32')
        falloff_mat = T.shape_padright(falloff_arr) / T.shape_padleft(falloff_arr)
        falloff_scaling = T.switch(,1), 0, falloff_mat)/self._falloff_rate
        # falloff_scaling is of shape (n_timestep, n_timestep) with 0 along diagonal, and jump to 1 falling off along dimension 1
        # now we want to multiply through on both dimensions
        first_multiply =, falloff_scaling) # shape (n_parallel, n_timestep)
        second_multiply = raw_feature_strengths * first_multiply
        unscaled_falloff_penalty = T.sum(second_multiply)

        full_loss = self._penalty_base * raw_sum + self._penalty_shock * unscaled_falloff_penalty

        if extra_info:
            return full_loss, {"raw_loss_sum":raw_sum}
            return full_loss
项目:deep-coref    作者:clarkkev    | 项目源码 | 文件源码
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1))  # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # the new dimension (the '1') is made broadcastable
        # see
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
项目:online_action    作者:zhenyangli    | 项目源码 | 文件源码
def fprop(self):

        # The dimension of self.mask is (Timestep, Minibatch).
        # We need to pad it to (Timestep, Minibatch, FeatureDim)
        # and keep the last one added dimensions broadcastable. TT.shape_padright
        # function is thus a good choice

        if self.mask is None:
            scan_input = [self.input]
            scan_fn = self.step_fprop
            scan_input = [self.input, TT.shape_padright(self.mask, 1)]
            scan_fn = self.step_masked_fprop

        non_seqs = self.param
        [self.output, self.cell_output], self.output_update = quick_unroll_scan(fn=scan_fn,
        #[self.output, self.cell_output], self.output_update = theano.scan(fn=scan_fn,
项目:epfl-semester-project-biaxialnn    作者:onanypoint    | 项目源码 | 文件源码
def loss_func(self, y_true, y_predict):
        active_notes = T.shape_padright(y_true[:,:,:,0])
        mask = T.concatenate([T.ones_like(active_notes), active_notes, T.repeat(T.ones_like(active_notes), self.output_size-2, -1)], axis=-1)
        loglikelihoods = mask * T.log( 2*y_predict*y_true - y_predict - y_true + 1 + self.epsilon )
        return T.neg(T.sum(loglikelihoods))
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert(non_linearity_name == "sigmoid" or non_linearity_name == "RLU")
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(, self.W1) +, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(, self.Ws[l]) +[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi))  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_masked_neg_loglikelihood_gradient(self, x, mask):
        """ x is a matrix of column datapoints (DxB) D = n_visible, Bfloat = batch size """
        logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h = self.sym_mask_logdensity_estimator_intermediate(x, mask)

#        nnz = output_mask.sum(0)
#        sparsity_multiplier = T.shape_padright(T.shape_padleft((B+1e-6)/(nnz+1e-6)))

#        wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) #BxDxC
#        lp_current = log_sum_exp(wPhi, axis = 2) * output_mask #BxD
#        lp_current_sum = (lp_current.sum(1) * D / (D-d)).sum() #1

        loglikelihood = logdensity.mean(dtype=floatX)
        loss = -loglikelihood

        dp_dz_alpha = T.grad(loss, z_alpha)  # BxDxC
        gb_alpha = dp_dz_alpha.sum(0)  # DxC
        gV_alpha = T.tensordot(h.T, dp_dz_alpha, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        dp_dz_mu = T.grad(loss, z_mu)  # BxDxC
        dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
        gb_mu = dp_dz_mu.sum(0)  # DxC
        gV_mu = T.tensordot(h.T, dp_dz_mu, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        dp_dz_sigma = T.grad(loss, z_sigma)  # BxDxC
        gb_sigma = dp_dz_sigma.sum(0)  # DxC
        gV_sigma = T.tensordot(h.T, dp_dz_sigma, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        if self.n_layers > 1:
            gWs, gbs, gW1, gWflags, gb1 = T.grad(loss, [self.Ws,, self.W1, self.Wflags, self.b1])
            gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "Ws":gWs, "bs":gbs, "W1":gW1, "b1":gb1, "Wflags":gWflags}
            gW1, gWflags, gb1 = T.grad(loss, [self.W1, self.Wflags, self.b1])
            gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "W1":gW1, "b1":gb1, "Wflags":gWflags}
        # Gradients
        return (loss, gradients)
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    max_x = T.max(x, axis)
    return max_x + T.log(T.sum(T.exp(x - T.shape_padright(max_x, 1)), axis))
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask) # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
项目    作者:fizerkhan    | 项目源码 | 文件源码
def _simple_norm(x, eps=1e-5):
    output = (x - tensor.shape_padright(x.mean(-1))) / \
        (eps + tensor.shape_padright(x.std(-1)))
    return output
项目    作者:fizerkhan    | 项目源码 | 文件源码
def one_hot(t, r=None):
    """Compute one hot encoding.

    given a tensor t of dimension d with integer values from range(r), return a
    new tensor of dimension d + 1 with values 0/1, where the last dimension
    gives a one-hot representation of the values in t.
    if r is not given, r is set to max(t) + 1

    if r is None:
        r = tensor.max(t) + 1

    ranges = tensor.shape_padleft(tensor.arange(r), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目    作者:fizerkhan    | 项目源码 | 文件源码
def cost_gmm(y, mu, sig, weight):
    """Gaussian mixture model negative log-likelihood.

    Computes the cost.

    n_dim = y.ndim
    shape_y = y.shape

    k = weight.shape[-1]

    y = y.reshape((-1, shape_y[-1]))
    y = tensor.shape_padright(y)

    mu = mu.reshape((-1, shape_y[-1], k))
    sig = sig.reshape((-1, shape_y[-1], k))
    weight = weight.reshape((-1, k))

    diff = tensor.sqr(y - mu)

    inner = -0.5 * tensor.sum(
        diff / sig**2 +
        2 * tensor.log(sig) + tensor.log(2 * numpy.pi), axis=-2)

    nll = -logsumexp(tensor.log(weight) + inner, axis=-1)

    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
项目:scribe    作者:sotelo    | 项目源码 | 文件源码
def one_hot(t, r=None):
    """Compute one hot encoding.

    given a tensor t of dimension d with integer values from range(r), return a
    new tensor of dimension d + 1 with values 0/1, where the last dimension
    gives a one-hot representation of the values in t.

    if r is not given, r is set to max(t) + 1
    if r is None:
        r = tensor.max(t) + 1

    ranges = tensor.shape_padleft(tensor.arange(r), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:scribe    作者:sotelo    | 项目源码 | 文件源码
def bivariate_gmm(y, mu, sigma, corr, coeff, binary, epsilon=1e-5):
    """Bivariate gaussian mixture model negative log-likelihood.


    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = tensor.shape_padright(y)

    data_pen = y[:, 0, :]
    data_x = y[:, 1, :]
    data_y = y[:, 2, :]

    sigma_x = sigma[:, 0, :]
    sigma_y = sigma[:, 1, :]

    std_e_x = (data_x - mu[:, 0, :]) / sigma_x
    std_e_y = (data_y - mu[:, 1, :]) / sigma_y

    binary = (binary + epsilon) * (1. - 2. * epsilon)

    c_b = tensor.sum(
        tensor.xlogx.xlogy0(data_pen, binary) +
        tensor.xlogx.xlogy0(1. - data_pen, 1. - binary), axis=1)

    buff = 1. - corr**2 + epsilon

    z = std_e_x**2 + std_e_y**2 - 2. * corr * std_e_x * std_e_y

    cost = - z / (2. * buff) - 0.5 * tensor.log(buff) - \
        tensor.log(sigma_x) - tensor.log(sigma_y) - tensor.log(2. * numpy.pi)

    nll = -logsumexp(tensor.log(coeff) + cost, axis=1) - c_b

    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
项目:scribe    作者:sotelo    | 项目源码 | 文件源码
def emit(self, readouts):
        """Sample from the distribution.

            readouts: readouts from the rnn + attention

        mu, sigma, corr, coeff, penup = self.components(readouts)

        idx = predict(
            ), axis=1)

        mu = mu[tensor.arange(mu.shape[0]), :, idx]
        sigma = sigma[tensor.arange(sigma.shape[0]), :, idx]
        corr = corr[tensor.arange(corr.shape[0]), idx]

        mu_x = mu[:, 0]
        mu_y = mu[:, 1]
        sigma_x = sigma[:, 0]
        sigma_y = sigma[:, 1]

        z = self.theano_rng.normal(
            size=mu.shape, avg=0., std=1., dtype=mu.dtype)

        un = self.theano_rng.uniform(size=penup.shape)
        penup = tensor.cast(un < penup, floatX)

        s_x = tensor.shape_padright(mu_x + sigma_x * z[:, 0])
        s_y = mu_y + sigma_y * ((z[:, 0] * corr) + (
            z[:, 1] * tensor.sqrt(1. - corr**2)))
        s_y = tensor.shape_padright(s_y)
        s = tensor.concatenate([penup, s_x, s_y], axis=1)

        return s
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def queue_transform(feature_strengths, feature_vects, return_strengths=False):
        Process features according to a "fragmented queue", where each timestep
        gets a size-1 window onto a feature queue. Effectively,
            feature_strengths gives how much to push onto queue
            feature_vects gives what to push on
            pop weights are tied to feature_strengths
            output is a size-1 peek (without popping)

            - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1]
            - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim)

            - peek_vects: float32 tensor of shape (batch, timestep, feature_dim)
        n_batch, n_time, n_feature = feature_vects.shape

        cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1)

        # We will be working in (batch, timestep, push_timestep)
        # For each timestep, if we subtract out the sum of pushes before that timestep
        # and then cap to 0-1 we get the cumsums for just the features active in that
        # timestep
        timestep_adjustments = T.shape_padright(cum_sum_str - feature_strengths)
        push_time_cumsum = T.shape_padaxis(cum_sum_str, 1)
        relative_cumsum = push_time_cumsum - timestep_adjustments
        capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1)

        # Now we can recover the peek strengths by taking a diff
        shifted = T.concatenate([T.zeros((n_batch, n_time, 1)), capped_cumsum[:,:,:-1]],2)
        peek_strengths = capped_cumsum-shifted
        # Peek strengths is now (batch, timestep, push_timestep)

        result = T.batched_dot(peek_strengths, feature_vects)

        if return_strengths:
            return peek_strengths, result
            return result
项目:lstmprovisor-python    作者:Impro-Visor    | 项目源码 | 文件源码
def extract_sample_scan_results(self, spec, outputs):
        Extract outputs from the scan results. 

            outputs: The outputs from the scan associated with this stack

            positions, raw_output, sampled_output
        positions = T.concatenate([T.shape_padright(spec.start_pos), outputs[0].transpose((1,0))[:,:-1]], 1)
        sampled_output = outputs[2].transpose((1,0,2))
        raw_output = outputs[-1].transpose((1,0,2))

        return positions, raw_output, sampled_output
项目:mmdcgm-ssl    作者:thu-ml    | 项目源码 | 文件源码
def _get_output_for(self, input):
        assert input.ndim == 3 # only for 3D
        mask = T.zeros_like(input) # size (None, w, h)
        tmp = T.concatenate([T.shape_padright(input[:, ::2, ::2]), 
            T.shape_padright(input[:, ::2, 1::2]), T.shape_padright(input[:, 1::2, ::2]), 
            T.shape_padright(input[:, 1::2, 1::2])], axis=-1)
        index =  tmp.argmax(axis=-1) # size (None, w/2, h/2)
        i_r = 2*(np.tile(np.arange(self.i_s[0]/2), (self.i_s[1]/2,1))).T
        i_r = index/2 + T.shape_padleft(i_r)
        i_c = 2*(np.tile(np.arange(self.i_s[1]/2), (self.i_s[0]/2,1)))
        i_c = index%2 + T.shape_padleft(i_c)
        i_b = T.tile(T.arange(self.batch_size*self.n_channels),(self.i_s[0]/2*self.i_s[1]/2,1)).T
        mask = T.set_subtensor(mask[i_b.flatten(), i_r.flatten(), i_c.flatten()],1)
        return mask
项目:Relation-Network    作者:subercui    | 项目源码 | 文件源码
def apply(self, source, source_mask=None, source_x=None, attention=None):

        :param source: the input tensor you want put attention on; shape (length, batch, 'embedding_len or feature_len')
        :param source_mask: mask (length, batch)
        :param source_x: this is the (Ua * h_j)
        :param attention: this is the si-1 in the original paper, dynamic
        :return: 2d (batch, 'embedding_len or feature_len')
        # attention is 2
        if source.ndim != 3 or attention.ndim != 2:
            raise NotImplementedError

        align_matrix = T.tanh(source_x +, self.Wa)[None, :, :])
        align =, self.v)
        align = T.exp(align - align.max(axis=0, keepdims=True))
        # my note: align is the attention scores, like [0.1, 0.2, 0.4, 0.3]
        if source_mask:
            align = align * source_mask
            normalization = align.sum(axis=0) + T.all(1 - source_mask, axis=0)
            normalization = align.sum(axis=0)
        align = align / normalization
        self.output = (T.shape_padright(align) * source).sum(axis=0)

        return self.output
项目:Relation-Network    作者:subercui    | 项目源码 | 文件源码
def apply(self, source, tag):
        if source.ndim != 3:
            raise NotImplementedError

        source_x =, self.Ws) +
        align_matrix = T.tanh(source_x)
        align =, self.v[tag])
        align = T.exp(align - align.max(axis=0, keepdims=True))
        normalization = align.sum(axis=0)
        # shape is (length, batch)
        self.align = align / normalization
        self.output = (T.shape_padright(self.align) * source).sum(axis=0)
        return self.output
项目:deep-coref    作者:clarkkev    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = self.get_input(train)
        return X * T.shape_padright(T.any((1. - T.eq(X, self.mask_value)), axis=-1))
项目:online_action    作者:zhenyangli    | 项目源码 | 文件源码
def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None),
    assert input.ndim == 4 and filters.ndim == 4
    assert (4 == len(input_shape)) and (4 == len(filter_shape))
    assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2)
    if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or (
                    tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"):
        return tensor4dot(input, filters)
        new_row_begin = filters.shape[2] / 2
        new_row_end = input.shape[2] + filters.shape[2] / 2
        new_col_begin = filters.shape[3] / 2
        new_col_end = input.shape[3] + filters.shape[3] / 2
        if padding is not None:
            assert 1 == padding.ndim
            padded_input = TT.ones((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
            padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end],
            padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1)
            padded_input = padding * padded_input
            padded_input = TT.zeros((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
        padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input)
        new_input_shape = [None, None, None, None]
        if input_shape[0] is not None:
            new_input_shape[0] = input_shape[0]
        if input_shape[1] is not None:
            new_input_shape[1] = input_shape[1]
        if input_shape[2] is not None and filter_shape[2] is not None:
            new_input_shape[2] = input_shape[2] + filter_shape[2] - 1
        if input_shape[3] is not None and filter_shape[3] is not None:
            new_input_shape[3] = input_shape[3] + filter_shape[3] - 1
        ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid',
                             input_shape=tuple(new_input_shape), filter_shape=filter_shape)

        return ret
项目:online_action    作者:zhenyangli    | 项目源码 | 文件源码
def quick_aggregate_pooling(input, pooling_func, mask=None):
    assert input.ndim == 5
    assert mask.ndim == 2 if mask is not None else True
    if pooling_func == "max":
        if mask is None:
            return input.max(axis=0)
    elif pooling_func == "mean":
        if mask is None:
            return TT.cast(input.mean(axis=0), theano.config.floatX)
            return (input * TT.shape_padright(mask / mask.sum(axis=0), 3)).sum(axis=0)
    elif pooling_func == "L2":
        # TODO Add Lp Pooling proposed by Yann LeCun
        return None
    return None
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:crikey    作者:kastnerkyle    | 项目源码 | 文件源码
def theano_one_hot(t, n_classes=None):
    if n_classes is None:
        n_classes = tensor.max(t) + 1
    ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))
项目:RecommendationSystem    作者:TURuibo    | 项目源码 | 文件源码
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask) # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')
项目:pl-cnn    作者:oval-group    | 项目源码 | 文件源码
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
    This function is a patch to the maxpool op of Theano:
    contrarily to current implementation of maxpool, the gradient is backpropagated
    to only one input of a given patch if several inputs have the same value. This is
    consistent with the CuDNN implementation (and therefore the op is replaced by the
    CuDNN version when possible).

    if input.ndim < 2:
        raise NotImplementedError('pool_2d requires a dimension >= 2')

    if not ignore_border is None:
        # check that ignore_border is True if provided
        assert ignore_border
    ignore_border = True

    if input.ndim == 4:
        op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size =[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (15, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7, 9).
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims,
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size =[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(0, batch_size,

    # store in the required shape
    new_shape = tensor.cast(new_shape, 'int64')
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (3, 1, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (8, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (3, 1, 5, 7, 9).
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims,
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size =[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(0, batch_size,

    # store in the required shape
    new_shape = tensor.cast(new_shape, 'int64')
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND