Python theano.tensor 模块，shape_padright() 实例源码

我们从Python开源项目中，提取了以下42个代码示例，用于说明如何使用theano.tensor.shape_padright()。

项目：pl-cnn 作者：oval-group | 项目源码 | 文件源码

def compile_eval_function(nnet):

    X = T.tensor4()
    y = T.ivector()

    # get prediciton by fully convolutional network
    prediction = lasagne.layers.get_output(nnet.dense3_conv_layer,
                                           deterministic=True, inputs=X)

    # get output scores on first dim
    # before flattening on 2dim and then get scores on second dim
    prediction = prediction.transpose((1, 0, 2, 3))\
        .flatten(2).transpose((1, 0))
    prediction = T.nnet.softmax(prediction)

    # spatial averaging
    prediction = T.mean(prediction, axis=0)

    # compute top1 and top5 accuracies
    sorted_pred = T.argsort(prediction)
    top1_acc = T.mean(T.eq(sorted_pred[-1], y), dtype='floatX')
    top5_acc = T.mean(T.any(T.eq(sorted_pred[-5:],
                                 T.shape_padright(y)), axis=1), dtype='floatX')

    return theano.function([X, y], [top1_acc, top5_acc])

项目：NADE 作者：MarcCote | 项目源码 | 文件源码

def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)

项目：NADE 作者：MarcCote | 项目源码 | 文件源码

def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)

项目：lstmprovisor-python 作者：Impro-Visor | 项目源码 | 文件源码

def note_to_encoding(self, chosen_note, relative_position, low_bound, high_bound):
        assert chosen_note.ndim == 1
        n_batch = chosen_note.shape[0]

        dont_play_version = T.switch( T.shape_padright(T.eq(chosen_note, 0)),
                                        T.tile(np.array([[1,0] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)),
                                        T.tile(np.array([[0,1] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)))

        rcp = T.tile(np.array([0,0,1],dtype=np.float32), (n_batch, 1))
        circle_1 = T.eye(4)[(chosen_note-2)%4]
        circle_2 = T.eye(3)[(chosen_note-2)%3]
        octave = T.eye(self.num_octaves)[(chosen_note-2+low_bound-self.octave_start)//12]

        play_version = T.concatenate([rcp, circle_1, circle_2, octave], 1)

        encoded_form = T.switch( T.shape_padright(T.lt(chosen_note, 2)), dont_play_version, play_version )
        return encoded_form

项目：lstmprovisor-python 作者：Impro-Visor | 项目源码 | 文件源码

def get_loss(self, raw_feature_strengths, raw_feature_vects, extra_info=False):
        raw_losses = self._loss_fun(raw_feature_strengths)
        raw_sum = T.sum(raw_losses)

        n_parallel, n_timestep = raw_feature_strengths.shape

        falloff_arr = np.array(self._falloff_rate, np.float32) ** T.cast(T.arange(n_timestep), 'float32')
        falloff_mat = T.shape_padright(falloff_arr) / T.shape_padleft(falloff_arr)
        falloff_scaling = T.switch(T.ge(falloff_mat,1), 0, falloff_mat)/self._falloff_rate
        # falloff_scaling is of shape (n_timestep, n_timestep) with 0 along diagonal, and jump to 1 falling off along dimension 1
        # now we want to multiply through on both dimensions
        first_multiply = T.dot(raw_feature_strengths, falloff_scaling) # shape (n_parallel, n_timestep)
        second_multiply = raw_feature_strengths * first_multiply
        unscaled_falloff_penalty = T.sum(second_multiply)

        full_loss = self._penalty_base * raw_sum + self._penalty_shock * unscaled_falloff_penalty

        if extra_info:
            return full_loss, {"raw_loss_sum":raw_sum}
        else:
            return full_loss

项目：deep-coref 作者：clarkkev | 项目源码 | 文件源码

def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1))  # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # the new dimension (the '1') is made broadcastable
        # see http://deeplearning.net/software/theano/library/tensor/basic.html#broadcasting-in-theano-vs-numpy
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')

项目：online_action 作者：zhenyangli | 项目源码 | 文件源码

def fprop(self):

        # The dimension of self.mask is (Timestep, Minibatch).
        # We need to pad it to (Timestep, Minibatch, FeatureDim)
        # and keep the last one added dimensions broadcastable. TT.shape_padright
        # function is thus a good choice

        if self.mask is None:
            scan_input = [self.input]
            scan_fn = self.step_fprop
        else:
            scan_input = [self.input, TT.shape_padright(self.mask, 1)]
            scan_fn = self.step_masked_fprop

        non_seqs = self.param
        [self.output, self.cell_output], self.output_update = quick_unroll_scan(fn=scan_fn,
        #[self.output, self.cell_output], self.output_update = theano.scan(fn=scan_fn,
                                                                          outputs_info=[self.init_hidden_state,
                                                                                        self.init_cell_state],
                                                                          sequences=scan_input,
                                                                          non_sequences=non_seqs,
                                                                          n_steps=self.n_steps
                                                                          )

项目：epfl-semester-project-biaxialnn 作者：onanypoint | 项目源码 | 文件源码

def loss_func(self, y_true, y_predict):
        active_notes = T.shape_padright(y_true[:,:,:,0])
        mask = T.concatenate([T.ones_like(active_notes), active_notes, T.repeat(T.ones_like(active_notes), self.output_size-2, -1)], axis=-1)
        loglikelihoods = mask * T.log( 2*y_predict*y_true - y_predict - y_true + 1 + self.epsilon )
        return T.neg(T.sum(loglikelihoods))

项目：NADE 作者：MarcCote | 项目源码 | 文件源码

def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert(non_linearity_name == "sigmoid" or non_linearity_name == "RLU")
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi))  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)

项目：NADE 作者：MarcCote | 项目源码 | 文件源码

def sym_masked_neg_loglikelihood_gradient(self, x, mask):
        """ x is a matrix of column datapoints (DxB) D = n_visible, Bfloat = batch size """
        logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h = self.sym_mask_logdensity_estimator_intermediate(x, mask)

#        nnz = output_mask.sum(0)
#        sparsity_multiplier = T.shape_padright(T.shape_padleft((B+1e-6)/(nnz+1e-6)))

#        wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) #BxDxC
#        lp_current = log_sum_exp(wPhi, axis = 2) * output_mask #BxD
#        lp_current_sum = (lp_current.sum(1) * D / (D-d)).sum() #1

        loglikelihood = logdensity.mean(dtype=floatX)
        loss = -loglikelihood

        dp_dz_alpha = T.grad(loss, z_alpha)  # BxDxC
        gb_alpha = dp_dz_alpha.sum(0)  # DxC
        gV_alpha = T.tensordot(h.T, dp_dz_alpha, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        dp_dz_mu = T.grad(loss, z_mu)  # BxDxC
        dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
        gb_mu = dp_dz_mu.sum(0)  # DxC
        gV_mu = T.tensordot(h.T, dp_dz_mu, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        dp_dz_sigma = T.grad(loss, z_sigma)  # BxDxC
        gb_sigma = dp_dz_sigma.sum(0)  # DxC
        gV_sigma = T.tensordot(h.T, dp_dz_sigma, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        if self.n_layers > 1:
            gWs, gbs, gW1, gWflags, gb1 = T.grad(loss, [self.Ws, self.bs, self.W1, self.Wflags, self.b1])
            gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "Ws":gWs, "bs":gbs, "W1":gW1, "b1":gb1, "Wflags":gWflags}
        else:
            gW1, gWflags, gb1 = T.grad(loss, [self.W1, self.Wflags, self.b1])
            gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "W1":gW1, "b1":gb1, "Wflags":gWflags}
        # Gradients
        return (loss, gradients)

项目：NADE 作者：MarcCote | 项目源码 | 文件源码

def log_sum_exp(x, axis=1):
    max_x = T.max(x, axis)
    return max_x + T.log(T.sum(T.exp(x - T.shape_padright(max_x, 1)), axis))

项目：keras-recommendation 作者：sonyisme | 项目源码 | 文件源码

def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask) # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8')

项目：mimicry.ai 作者：fizerkhan | 项目源码 | 文件源码

def _simple_norm(x, eps=1e-5):
    output = (x - tensor.shape_padright(x.mean(-1))) / \
        (eps + tensor.shape_padright(x.std(-1)))
    return output

项目：mimicry.ai 作者：fizerkhan | 项目源码 | 文件源码

def one_hot(t, r=None):
    """Compute one hot encoding.

    given a tensor t of dimension d with integer values from range(r), return a
    new tensor of dimension d + 1 with values 0/1, where the last dimension
    gives a one-hot representation of the values in t.
    if r is not given, r is set to max(t) + 1

    """
    if r is None:
        r = tensor.max(t) + 1

    ranges = tensor.shape_padleft(tensor.arange(r), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))

项目：mimicry.ai 作者：fizerkhan | 项目源码 | 文件源码

def cost_gmm(y, mu, sig, weight):
    """Gaussian mixture model negative log-likelihood.

    Computes the cost.

    """
    n_dim = y.ndim
    shape_y = y.shape

    k = weight.shape[-1]

    y = y.reshape((-1, shape_y[-1]))
    y = tensor.shape_padright(y)

    mu = mu.reshape((-1, shape_y[-1], k))
    sig = sig.reshape((-1, shape_y[-1], k))
    weight = weight.reshape((-1, k))

    diff = tensor.sqr(y - mu)

    inner = -0.5 * tensor.sum(
        diff / sig**2 +
        2 * tensor.log(sig) + tensor.log(2 * numpy.pi), axis=-2)

    nll = -logsumexp(tensor.log(weight) + inner, axis=-1)

    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)

项目：scribe 作者：sotelo | 项目源码 | 文件源码

def one_hot(t, r=None):
    """Compute one hot encoding.

    given a tensor t of dimension d with integer values from range(r), return a
    new tensor of dimension d + 1 with values 0/1, where the last dimension
    gives a one-hot representation of the values in t.

    if r is not given, r is set to max(t) + 1
    """
    if r is None:
        r = tensor.max(t) + 1

    ranges = tensor.shape_padleft(tensor.arange(r), t.ndim)
    return tensor.eq(ranges, tensor.shape_padright(t, 1))

项目：scribe 作者：sotelo | 项目源码 | 文件源码

def bivariate_gmm(y, mu, sigma, corr, coeff, binary, epsilon=1e-5):
    """Bivariate gaussian mixture model negative log-likelihood.

    Parameters

    """
    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = tensor.shape_padright(y)

    data_pen = y[:, 0, :]
    data_x = y[:, 1, :]
    data_y = y[:, 2, :]

    sigma_x = sigma[:, 0, :]
    sigma_y = sigma[:, 1, :]

    std_e_x = (data_x - mu[:, 0, :]) / sigma_x
    std_e_y = (data_y - mu[:, 1, :]) / sigma_y

    binary = (binary + epsilon) * (1. - 2. * epsilon)

    c_b = tensor.sum(
        tensor.xlogx.xlogy0(data_pen, binary) +
        tensor.xlogx.xlogy0(1. - data_pen, 1. - binary), axis=1)

    buff = 1. - corr**2 + epsilon

    z = std_e_x**2 + std_e_y**2 - 2. * corr * std_e_x * std_e_y

    cost = - z / (2. * buff) - 0.5 * tensor.log(buff) - \
        tensor.log(sigma_x) - tensor.log(sigma_y) - tensor.log(2. * numpy.pi)

    nll = -logsumexp(tensor.log(coeff) + cost, axis=1) - c_b

    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)

项目：scribe 作者：sotelo | 项目源码 | 文件源码

def emit(self, readouts):
        """Sample from the distribution.

        Parameters:
            readouts: readouts from the rnn + attention

        """
        mu, sigma, corr, coeff, penup = self.components(readouts)

        idx = predict(
            self.theano_rng.multinomial(
                pvals=coeff,
                dtype=coeff.dtype
            ), axis=1)

        mu = mu[tensor.arange(mu.shape[0]), :, idx]
        sigma = sigma[tensor.arange(sigma.shape[0]), :, idx]
        corr = corr[tensor.arange(corr.shape[0]), idx]

        mu_x = mu[:, 0]
        mu_y = mu[:, 1]
        sigma_x = sigma[:, 0]
        sigma_y = sigma[:, 1]

        z = self.theano_rng.normal(
            size=mu.shape, avg=0., std=1., dtype=mu.dtype)

        un = self.theano_rng.uniform(size=penup.shape)
        penup = tensor.cast(un < penup, floatX)

        s_x = tensor.shape_padright(mu_x + sigma_x * z[:, 0])
        s_y = mu_y + sigma_y * ((z[:, 0] * corr) + (
            z[:, 1] * tensor.sqrt(1. - corr**2)))
        s_y = tensor.shape_padright(s_y)
        s = tensor.concatenate([penup, s_x, s_y], axis=1)

        return s

项目：lstmprovisor-python 作者：Impro-Visor | 项目源码 | 文件源码

def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed

项目：lstmprovisor-python 作者：Impro-Visor | 项目源码 | 文件源码

def queue_transform(feature_strengths, feature_vects, return_strengths=False):
        """
        Process features according to a "fragmented queue", where each timestep
        gets a size-1 window onto a feature queue. Effectively,
            feature_strengths gives how much to push onto queue
            feature_vects gives what to push on
            pop weights are tied to feature_strengths
            output is a size-1 peek (without popping)

        Parameters:
            - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1]
            - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim)

        Returns:
            - peek_vects: float32 tensor of shape (batch, timestep, feature_dim)
        """
        n_batch, n_time, n_feature = feature_vects.shape

        cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1)

        # We will be working in (batch, timestep, push_timestep)
        # For each timestep, if we subtract out the sum of pushes before that timestep
        # and then cap to 0-1 we get the cumsums for just the features active in that
        # timestep
        timestep_adjustments = T.shape_padright(cum_sum_str - feature_strengths)
        push_time_cumsum = T.shape_padaxis(cum_sum_str, 1)
        relative_cumsum = push_time_cumsum - timestep_adjustments
        capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1)

        # Now we can recover the peek strengths by taking a diff
        shifted = T.concatenate([T.zeros((n_batch, n_time, 1)), capped_cumsum[:,:,:-1]],2)
        peek_strengths = capped_cumsum-shifted
        # Peek strengths is now (batch, timestep, push_timestep)

        result = T.batched_dot(peek_strengths, feature_vects)

        if return_strengths:
            return peek_strengths, result
        else:
            return result

项目：lstmprovisor-python 作者：Impro-Visor | 项目源码 | 文件源码

def extract_sample_scan_results(self, spec, outputs):
        """
        Extract outputs from the scan results. 

        Parameters:
            outputs: The outputs from the scan associated with this stack

        Returns:
            positions, raw_output, sampled_output
        """
        positions = T.concatenate([T.shape_padright(spec.start_pos), outputs[0].transpose((1,0))[:,:-1]], 1)
        sampled_output = outputs[2].transpose((1,0,2))
        raw_output = outputs[-1].transpose((1,0,2))

        return positions, raw_output, sampled_output

项目：mmdcgm-ssl 作者：thu-ml | 项目源码 | 文件源码

def _get_output_for(self, input):
        assert input.ndim == 3 # only for 3D
        mask = T.zeros_like(input) # size (None, w, h)
        tmp = T.concatenate([T.shape_padright(input[:, ::2, ::2]), 
            T.shape_padright(input[:, ::2, 1::2]), T.shape_padright(input[:, 1::2, ::2]), 
            T.shape_padright(input[:, 1::2, 1::2])], axis=-1)
        index =  tmp.argmax(axis=-1) # size (None, w/2, h/2)
        i_r = 2*(np.tile(np.arange(self.i_s[0]/2), (self.i_s[1]/2,1))).T
        i_r = index/2 + T.shape_padleft(i_r)
        i_c = 2*(np.tile(np.arange(self.i_s[1]/2), (self.i_s[0]/2,1)))
        i_c = index%2 + T.shape_padleft(i_c)
        i_b = T.tile(T.arange(self.batch_size*self.n_channels),(self.i_s[0]/2*self.i_s[1]/2,1)).T
        mask = T.set_subtensor(mask[i_b.flatten(), i_r.flatten(), i_c.flatten()],1)
        return mask

项目：Relation-Network 作者：subercui | 项目源码 | 文件源码

def apply(self, source, source_mask=None, source_x=None, attention=None):
        """

        :param source: the input tensor you want put attention on; shape (length, batch, 'embedding_len or feature_len')
        :param source_mask: mask (length, batch)
        :param source_x: this is the (Ua * h_j)
        :param attention: this is the si-1 in the original paper, dynamic
        :return: 2d (batch, 'embedding_len or feature_len')
        """
        # attention is 2
        if source.ndim != 3 or attention.ndim != 2:
            raise NotImplementedError

        align_matrix = T.tanh(source_x + T.dot(attention, self.Wa)[None, :, :])
        align = theano.dot(align_matrix, self.v)
        align = T.exp(align - align.max(axis=0, keepdims=True))
        # my note: align is the attention scores, like [0.1, 0.2, 0.4, 0.3]
        if source_mask:
            align = align * source_mask
            normalization = align.sum(axis=0) + T.all(1 - source_mask, axis=0)
        else:
            normalization = align.sum(axis=0)
        align = align / normalization
        self.output = (T.shape_padright(align) * source).sum(axis=0)

        return self.output

项目：Relation-Network 作者：subercui | 项目源码 | 文件源码

def apply(self, source, tag):
        if source.ndim != 3:
            raise NotImplementedError

        source_x = T.dot(source, self.Ws) + self.bs
        align_matrix = T.tanh(source_x)
        align = T.dot(align_matrix, self.v[tag])
        align = T.exp(align - align.max(axis=0, keepdims=True))
        normalization = align.sum(axis=0)
        # shape is (length, batch)
        self.align = align / normalization
        self.output = (T.shape_padright(self.align) * source).sum(axis=0)
        return self.output

项目：deep-coref 作者：clarkkev | 项目源码 | 文件源码

def get_output(self, train=False):
        X = self.get_input(train)
        return X * T.shape_padright(T.any((1. - T.eq(X, self.mask_value)), axis=-1))

项目：online_action 作者：zhenyangli | 项目源码 | 文件源码

def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None),
                padding=None):
    assert input.ndim == 4 and filters.ndim == 4
    assert (4 == len(input_shape)) and (4 == len(filter_shape))
    assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2)
    if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or (
                    tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"):
        return tensor4dot(input, filters)
    else:
        new_row_begin = filters.shape[2] / 2
        new_row_end = input.shape[2] + filters.shape[2] / 2
        new_col_begin = filters.shape[3] / 2
        new_col_end = input.shape[3] + filters.shape[3] / 2
        if padding is not None:
            assert 1 == padding.ndim
            padded_input = TT.ones((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
            padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end],
                                            numpy_floatX(0))
            padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1)
            padded_input = padding * padded_input
        else:
            padded_input = TT.zeros((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
        padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input)
        new_input_shape = [None, None, None, None]
        if input_shape[0] is not None:
            new_input_shape[0] = input_shape[0]
        if input_shape[1] is not None:
            new_input_shape[1] = input_shape[1]
        if input_shape[2] is not None and filter_shape[2] is not None:
            new_input_shape[2] = input_shape[2] + filter_shape[2] - 1
        if input_shape[3] is not None and filter_shape[3] is not None:
            new_input_shape[3] = input_shape[3] + filter_shape[3] - 1
        ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid',
                             input_shape=tuple(new_input_shape), filter_shape=filter_shape)

        return ret

项目：online_action 作者：zhenyangli | 项目源码 | 文件源码

def quick_aggregate_pooling(input, pooling_func, mask=None):
    assert input.ndim == 5
    assert mask.ndim == 2 if mask is not None else True
    if pooling_func == "max":
        if mask is None:
            return input.max(axis=0)
    elif pooling_func == "mean":
        if mask is None:
            return TT.cast(input.mean(axis=0), theano.config.floatX)
        else:
            return (input * TT.shape_padright(mask / mask.sum(axis=0), 3)).sum(axis=0)
    elif pooling_func == "L2":
        # TODO Add Lp Pooling proposed by Yann LeCun
        return None
    return None