Python theano.tensor 模块，sum() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用theano.tensor.sum()。

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel

        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def l2normalize(layer, train_scale=True):
    W_param = layer.W
    s = W_param.get_value().shape
    if len(s)==4:
        axes_to_sum = (1,2,3)
        dimshuffle_args = [0,'x','x','x']
        k = s[0]
    else:
        axes_to_sum = 0
        dimshuffle_args = ['x',0]
        k = s[1]
    layer.W_scale = layer.add_param(lasagne.init.Constant(1.),
                          (k,), name="W_scale", trainable=train_scale, regularizable=False)
    layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args)
    return layer

# fully connected layer with weight normalization

项目：geomdn 作者：afshinrahimi | 项目源码 | 文件源码

def nll_loss_sharedparams(self, mus, sigmas, corxy, pis, y_true):
        mus_ex = mus[np.newaxis, :, :]
        X = y_true[:, np.newaxis, :]
        diff = X - mus_ex
        diffprod = T.prod(diff, axis=-1)
        corxy2 = corxy **2
        diff2 = diff ** 2
        sigmas2 = sigmas ** 2
        sigmainvs = 1.0 / sigmas
        sigmainvprods = sigmainvs[:, 0] * sigmainvs[:, 1]
        diffsigma = diff2 / sigmas2
        diffsigmanorm = T.sum(diffsigma, axis=-1)
        z = diffsigmanorm - 2 * corxy * diffprod * sigmainvprods
        oneminuscorxy2inv = 1.0 / (1.0 - corxy2)
        expterm = -0.5 * z * oneminuscorxy2inv
        new_exponent = T.log(0.5/np.pi) + T.log(sigmainvprods) + T.log(np.sqrt(oneminuscorxy2inv)) + expterm + T.log(pis)
        max_exponent = T.max(new_exponent ,axis=1, keepdims=True)
        mod_exponent = new_exponent - max_exponent
        gauss_mix = T.sum(T.exp(mod_exponent),axis=1)
        log_gauss = max_exponent + T.log(gauss_mix)
        loss = -T.mean(log_gauss)
        return loss

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def l2normalize(layer, train_scale=True):
    W_param = layer.W
    s = W_param.get_value().shape
    if len(s)==4:
        axes_to_sum = (1,2,3)
        dimshuffle_args = [0,'x','x','x']
        k = s[0]
    else:
        axes_to_sum = 0
        dimshuffle_args = ['x',0]
        k = s[1]
    layer.W_scale = layer.add_param(lasagne.init.Constant(1.),
                          (k,), name="W_scale", trainable=train_scale, regularizable=False)
    layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args)
    return layer

# fully connected layer with weight normalization

项目：CopyNet 作者：MultiPath | 项目源码 | 文件源码

def __call__(self, X, w_temp, m_temp):
        # input dimensions
        # X:      (nb_samples, input_dim)
        # w_temp: (nb_samples, memory_dim)
        # m_temp: (nb_samples, memory_dim, memory_width) ::tensor_memory

        key = dot(X, self.W_key, self.b_key)  # (nb_samples, memory_width)
        shift = self.softmax(
            dot(X, self.W_shift, self.b_shift))  # (nb_samples, shift_width)

        beta = self.softplus(dot(X, self.W_beta, self.b_beta))[:, None]  # (nb_samples, x)
        gamma = self.softplus(dot(X, self.W_gama, self.b_gama)) + 1.  # (nb_samples,)
        gamma = gamma[:, None]  # (nb_samples, x)
        g = self.sigmoid(dot(X, self.W_g, self.b_g))[:, None]  # (nb_samples, x)

        signal = [key, shift, beta, gamma, g]

        w_c = self.softmax(
            beta * cosine_sim2d(key, m_temp))  # (nb_samples, memory_dim) //content-based addressing
        w_g = g * w_c + (1 - g) * w_temp  # (nb_samples, memory_dim) //history interpolation
        w_s = shift_convolve2d(w_g, shift, self.shift_conv)  # (nb_samples, memory_dim) //convolutional shift
        w_p = w_s ** gamma  # (nb_samples, memory_dim) //sharpening
        w_t = w_p / T.sum(w_p, axis=1)[:, None]  # (nb_samples, memory_dim)
        return w_t

项目：CopyNet 作者：MultiPath | 项目源码 | 文件源码

def dot(inp, matrix, bias=None):
    """
    Decide the right type of dot product depending on the input
    arguments
    """
    if 'int' in inp.dtype and inp.ndim == 2:
        return matrix[inp.flatten()]
    elif 'int' in inp.dtype:
        return matrix[inp]
    elif 'float' in inp.dtype and inp.ndim == 3:
        shape0 = inp.shape[0]
        shape1 = inp.shape[1]
        shape2 = inp.shape[2]
        if bias:
            return (T.dot(inp.reshape((shape0 * shape1, shape2)), matrix) + bias).reshape((shape0, shape1, matrix.shape[1]))
        else:
            return T.dot(inp.reshape((shape0 * shape1, shape2)), matrix).reshape((shape0, shape1, matrix.shape[1]))
    else:
        if bias:
            return T.dot(inp, matrix) + bias
        else:
            return T.dot(inp, matrix)


# Numerically stable log(sum(exp(A))). Can also be used in softmax function.

项目：CopyNet 作者：MultiPath | 项目源码 | 文件源码

def dot_2d(k, M, b=None, g=None):
    # k: (nb_samples, memory_width)
    # M: (nb_samples, memory_dim, memory_width)

    # norms of keys and memories
    # k_norm = T.sqrt(T.sum(T.sqr(k), 1)) + 1e-5  # (nb_samples,)
    # M_norm = T.sqrt(T.sum(T.sqr(M), 2)) + 1e-5  # (nb_samples, memory_dim,)

    k      = k[:, None, :]                      # (nb_samples, 1, memory_width)
    value  = k * M
    if b is not None:
        b  = b[:, None, :]
        value *= b         # (nb_samples, memory_dim,)

    if g is not None:
        g  = g[None, None, :]
        value *= g

    sim    = T.sum(value, axis=2)
    return sim

项目：iterative_inference_segm 作者：adri-romsor | 项目源码 | 文件源码

def crossentropy(y_pred, y_true, void_labels, one_hot=False):
    # Clip predictions
    y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON)

    if one_hot:
        y_true = T.argmax(y_true, axis=1)

    # Create mask
    mask = T.ones_like(y_true, dtype=_FLOATX)
    for el in void_labels:
        mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.)

    # Modify y_true temporarily
    y_true_tmp = y_true * mask
    y_true_tmp = y_true_tmp.astype('int32')

    # Compute cross-entropy
    loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp)

    # Compute masked mean loss
    loss *= mask
    loss = T.sum(loss) / T.sum(mask)

    return loss

项目：iterative_inference_segm 作者：adri-romsor | 项目源码 | 文件源码

def dice_loss(y_pred, y_true, void_class, class_for_dice=1):
        '''
        Dice loss -- works for only binary classes.
        y_pred is a softmax output
        y_true is one hot
        '''
        smooth = 1
        y_true_f = T.flatten(y_true[:, class_for_dice, :, :])
        y_true_f = T.cast(y_true_f, 'int32')
        y_pred_f = T.flatten(y_pred[:, class_for_dice, :, :])
        # remove void classes from dice
        if len(void_class):
            for i in range(len(void_class)):
                # get idx of non void classes and remove void classes
                # from y_true and y_pred
                idxs = T.neq(y_true_f, void_class[i]).nonzero()
                y_pred_f = y_pred_f[idxs]
                y_true_f = y_true_f[idxs]

        intersection = T.sum(y_true_f * y_pred_f)
        return -(2.*intersection+smooth) / (T.sum(y_true_f)+T.sum(y_pred_f)+smooth)

项目：dsb3 作者：EliasVansteenkiste | 项目源码 | 文件源码

def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(nn.layers.get_output(model.l_target), 'int32')
    enable_targets = nn.layers.get_output(model.l_enable_target)
    predictions = T.clip(predictions, epsilon, 1.-epsilon)

    sum_of_objectives = 0
    unit_ptr = 0
    for obj_idx, obj_name in enumerate(order_objectives):
        n_classes = len(property_bin_borders[obj_name])
        v_obj = objective(obj_idx, (unit_ptr, unit_ptr+n_classes), predictions, targets)
        # take the mean of the objectives where it matters (enabled targets)
        obj_scalar =  T.sum(enable_targets[:,obj_idx] * v_obj) / (0.00001 + T.sum(enable_targets[:,obj_idx]))
        if deterministic:
            d_objectives_deterministic[obj_name] = obj_scalar
        else:
            d_objectives[obj_name] = obj_scalar

        sum_of_objectives += T.mean(v_obj)
        unit_ptr = unit_ptr+n_classes


    return sum_of_objectives

项目：dsb3 作者：EliasVansteenkiste | 项目源码 | 文件源码

def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(nn.layers.get_output(model.l_target), 'int32')
    enable_targets = nn.layers.get_output(model.l_enable_target)
    predictions = T.clip(predictions, epsilon, 1.-epsilon)

    sum_of_objectives = 0
    unit_ptr = 0
    for obj_idx, obj_name in enumerate(order_objectives):
        n_classes = len(property_bin_borders[obj_name])
        v_obj = objective(obj_idx, (unit_ptr, unit_ptr+n_classes), predictions, targets)
        # take the mean of the objectives where it matters (enabled targets)
        obj_scalar =  T.sum(enable_targets[:,obj_idx] * v_obj) / (0.00001 + T.sum(enable_targets[:,obj_idx]))
        if deterministic:
            d_objectives_deterministic[obj_name] = obj_scalar
        else:
            d_objectives[obj_name] = obj_scalar

        sum_of_objectives += obj_scalar
        unit_ptr = unit_ptr+n_classes


    return sum_of_objectives

项目：dnc-theano 作者：khaotik | 项目源码 | 文件源码

def op_cosine_c(
    s_xr_, s_xi_, s_yr_, s_yi_, axis_=-1, keepdims_=True, eps_=1e-7):
    '''
    cosine between two complex vectors, uses standard complex inner product

    Args:
        s_xr_: real part of x
        s_xi_: imag part of x
        s_yr_: real part of y
        s_yi_: imag part of y
        eps_: small number to prevent divide by zero
    '''
    s_nrm = s_xr_*s_yr_ + s_xi_*s_yi_
    s_nx = T.sum(T.sqr(s_xr_) + T.sqr(s_xi_), axis=axis_, keepdims=keepdims_)
    s_ny = T.sum(T.sqr(s_yr_) + T.sqr(s_yi_), axis=axis_, keepdims=keepdims_)
    return T.sum(s_nrm, axis=axis_, keepdims=keepdims_) / T.sqrt(s_nx * s_ny + eps_)

项目：structured-output-ae 作者：sbelharbi | 项目源码 | 文件源码

def sample(self, x, K):
        if x.ndim == 1:
            x = x.reshape(1, x.shape[0])
        hn = self.encode(x)
        W = self.params[0]
        ww = T.dot(W.T, W)
        samples = []
        for _ in range(K):
            s = hn * (1. - hn)
            jj = ww * s.dimshuffle(0, 'x', 1) * s.dimshuffle(0, 1, 'x')
            alpha = self.srng.normal(size=hn.shape,
                                     avg=0.,
                                     std=self.sigma,
                                     dtype=theano.config.floatX)

            delta = (alpha.dimshuffle(0, 1, 'x')*jj).sum(1)

            zn = self.decode(hn + delta)
            hn = self.encode(zn)
            # zn2 = self.decode(hn)
            samples.append(zn.eval())
        return samples

项目：structured-output-ae 作者：sbelharbi | 项目源码 | 文件源码

def get_cost(aes, l, eye=True):
    """Get the sum of all the reconstruction costs of the AEs.
    Input:
        aes_in: list. List of all the aes.
        l: shared variable or a list of shared variables for the importance
            weights.
    """
    costs = []
    for ae, i in zip(aes, range(len(aes))):
        if isinstance(ae, ConvolutionalAutoencoder):
            costs.append(l[i] * ae.get_train_cost()[0])
        else:
            costs.append(l[i] * ae.get_train_cost(face=eye)[0])
    cost = None
    if costs not in [[], None]:
        cost = reduce(lambda x, y: x + y, costs)
    return cost

项目：structured-output-ae 作者：sbelharbi | 项目源码 | 文件源码

def get_eval_fn(model, in3D=False, use_dice=False):
    """Compile the evaluation function of the model."""
    if use_dice:
        insec = T.sum(model.trg * model.output, axis=1)
        tmp = 1 - 2.0 * insec/(T.sum(model.trg, axis=1) + T.sum(model.output,
                               axis=1))
        error = T.mean(tmp)
    else:
        error = T.mean(T.mean(T.power(model.output - model.trg, 2), axis=1))
    if in3D:
        x = T.tensor4('x')
    else:
        x = T.fmatrix("x")
    y = T.fmatrix("y")

    theano_arg_vl = [x, y]
    output_fn_vl = [error, model.output]

    eval_fn = theano.function(
        theano_arg_vl, output_fn_vl,
        givens={model.x: x,
                model.trg: y})

    return eval_fn

项目：untwist 作者：IoSR-Surrey | 项目源码 | 文件源码

def __init__(self, input_size, output_size, hidden_sizes, activation = T.nnet.sigmoid):

        self.hidden_layers = []
        self.params = []
        self.input = T.matrix('x')
        self.target = T.matrix('y')

        for i, layer_size in enumerate(hidden_sizes):
            if i == 0:
               layer_input_size = input_size
               layer_input = self.input
            else:
                layer_input_size = hidden_sizes[i - 1]
                layer_input = self.hidden_layers[-1].output
            layer = Layer(layer_input, layer_input_size, layer_size, activation = activation)
            self.hidden_layers.append(layer)
            self.params.extend(layer.params)

        self.output_layer = Layer(self.hidden_layers[-1].output, hidden_sizes[-1], output_size)
        self.params.extend(self.output_layer.params)
        self.output = self.output_layer.output
        self.cost = T.sum((self.output - self.target)**2)

项目：geomdn 作者：afshinrahimi | 项目源码 | 文件源码

def get_output_for(self, input, **kwargs):
        """
        Given 2d input find the probability of each input in each of num_units
        Diagonal Gaussians using the formula from http://mathworld.wolfram.com/BivariateNormalDistribution.html
        """
        #make sure sigma is positive and nonzero softplus(x) (0, +inf)
        sigmas = T.nnet.softplus(self.sigmas)
        sigmainvs = 1.0 / sigmas
        sigmainvprods = sigmainvs[:, 0] * sigmainvs[:, 1]
        sigmas2 = sigmas ** 2
        mus = self.mus[np.newaxis, :, :]
        X = input[:, np.newaxis, :]
        diff = (X - mus) ** 2
        diffsigma = diff / sigmas2
        diffsigmanorm = T.sum(diffsigma, axis=-1)
        expterm = T.exp(-0.5 * diffsigmanorm)
        probs = (0.5 / np.pi) * sigmainvprods * expterm
        return probs

项目：top-k-rec 作者：domainxz | 项目源码 | 文件源码

def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            return T.sum((y - self.y_pred) ** 2);

项目：top-k-rec 作者：domainxz | 项目源码 | 文件源码

def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  = T.dot(self.W, self.S[u,i,:].T);
       x_uj  = T.dot(self.W, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
               )
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
               ]
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])

        def Hx_plain():
            Hx_plain_splits = TT.grad(
                TT.sum([TT.sum(g * x)
                        for g, x in zip(constraint_grads, xs)]),
                wrt=params,
                disconnected_inputs='warn'
            )
            return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ),
        )

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
        old_means = old_dist_info_vars["mean"]
        old_log_stds = old_dist_info_vars["log_std"]
        new_means = new_dist_info_vars["mean"]
        new_log_stds = new_dist_info_vars["log_std"]
        """
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        """
        old_std = TT.exp(old_log_stds)
        new_std = TT.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = TT.square(old_means - new_means) + \
                    TT.square(old_std) - TT.square(new_std)
        denominator = 2 * TT.square(new_std) + 1e-8
        return TT.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def kl(self, old_dist_info, new_dist_info):
        old_means = old_dist_info["mean"]
        old_log_stds = old_dist_info["log_std"]
        new_means = new_dist_info["mean"]
        new_log_stds = new_dist_info["log_std"]
        """
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        """
        old_std = np.exp(old_log_stds)
        new_std = np.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = np.square(old_means - new_means) + \
                    np.square(old_std) - np.square(new_std)
        denominator = 2 * np.square(new_std) + 1e-8
        return np.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def discrim(X):
    current_input = dropout(X, 0.3)
    ### encoder ###
    cv1 = relu(dnn_conv(current_input, aew1, subsample=(1,1), border_mode=(1,1)))
    cv2 = relu(batchnorm(dnn_conv(cv1, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2, b=aeb2))
    cv3 = relu(batchnorm(dnn_conv(cv2, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3, b=aeb3))
    cv4 = relu(batchnorm(dnn_conv(cv3, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4, b=aeb4))
    cv5 = relu(batchnorm(dnn_conv(cv4, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5, b=aeb5))
    cv6 = relu(batchnorm(dnn_conv(cv5, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6, b=aeb6))

    ### decoder ###
    dv6 = relu(batchnorm(deconv(cv6, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6t, b=aeb6t)) 
    dv5 = relu(batchnorm(deconv(dv6, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5t, b=aeb5t))
    dv4 = relu(batchnorm(deconv(dv5, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4t, b=aeb4t)) 
    dv3 = relu(batchnorm(deconv(dv4, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3t, b=aeb3t))
    dv2 = relu(batchnorm(deconv(dv3, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2t, b=aeb2t))
    dv1 = tanh(deconv(dv2, aew1, subsample=(1,1), border_mode=(1,1)))

    rX = dv1

    mse = T.sqrt(T.sum(T.abs_(T.flatten(X-rX, 2)),axis=1)) + T.sqrt(T.sum(T.flatten((X-rX)**2, 2), axis=1))
    return T.flatten(cv6, 2), rX, mse

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def discrim(X):
    current_input = dropout(X, 0.3)
    ### encoder ###
    cv1 = relu(dnn_conv(current_input, aew1, subsample=(1,1), border_mode=(1,1)))
    cv2 = relu(batchnorm(dnn_conv(cv1, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2, b=aeb2))
    cv3 = relu(batchnorm(dnn_conv(cv2, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3, b=aeb3))
    cv4 = relu(batchnorm(dnn_conv(cv3, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4, b=aeb4))
    cv5 = relu(batchnorm(dnn_conv(cv4, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5, b=aeb5))
    cv6 = relu(batchnorm(dnn_conv(cv5, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6, b=aeb6))

    ### decoder ###
    dv6 = relu(batchnorm(deconv(cv6, aew6, subsample=(4,4), border_mode=(0,0)), g=aeg6t, b=aeb6t)) 
    dv5 = relu(batchnorm(deconv(dv6, aew5, subsample=(1,1), border_mode=(1,1)), g=aeg5t, b=aeb5t))
    dv4 = relu(batchnorm(deconv(dv5, aew4, subsample=(4,4), border_mode=(2,2)), g=aeg4t, b=aeb4t)) 
    dv3 = relu(batchnorm(deconv(dv4, aew3, subsample=(1,1), border_mode=(1,1)), g=aeg3t, b=aeb3t))
    dv2 = relu(batchnorm(deconv(dv3, aew2, subsample=(4,4), border_mode=(2,2)), g=aeg2t, b=aeb2t))
    dv1 = tanh(deconv(dv2, aew1, subsample=(1,1), border_mode=(1,1)))

    rX = dv1

    mse = T.sqrt(T.sum(T.abs_(T.flatten(X-rX, 2)),axis=1)) + T.sqrt(T.sum(T.flatten((X-rX)**2, 2), axis=1)) # L1 and L2 loss
    return T.flatten(cv6, 2), rX, mse

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def svgd_gradient(X0):

    hidden, _, mse = discrim(X0)
    grad = -1.0 * T.grad( mse.sum(), X0)

    kxy, neighbors, h = rbf_kernel(hidden)  #TODO

    coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
    v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2

    X1 = X0[neighbors]
    hidden1, _, _ = discrim(X1)
    dxkxy = T.Lop(hidden1, X1, v)

    #svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
    svgd_grad = grad + dxkxy / 2.
    return grad, svgd_grad, dxkxy

项目：aspect_adversarial 作者：yuanzh | 项目源码 | 文件源码

def get_recon_loss(self, idxs, sent_output):
        len_sent, len_doc_batch, n_d = sent_output.shape
        recon_layer = self.recon_layer
        padding_id = self.padding_id
        dropout = self.dropout

        # (len(sent)*len(doc)*batch)*n_e
        input_flat = idxs.ravel()
        true_recon = self.embedding_layer.recon_forward(input_flat)
        sent_output = apply_dropout(sent_output, dropout)
        pred_recon = recon_layer.forward(sent_output.reshape((len_sent*len_doc_batch, n_d)))

        # (len(sent)*len(doc)*batch)
        mask = T.cast(T.neq(input_flat, padding_id), theano.config.floatX)
        n = T.sum(mask)
        loss = T.sum((true_recon - pred_recon) ** 2, axis=1) * mask
        loss = T.sum(loss) / n
        return loss

项目：NMT 作者：tuzhaopeng | 项目源码 | 文件源码

def sample_weights(sizeX, sizeY, sparsity, scale, rng):
    """
    Initialization that fixes the largest singular value.
    """
    sizeX = int(sizeX)
    sizeY = int(sizeY)
    sparsity = numpy.minimum(sizeY, sparsity)
    values = numpy.zeros((sizeX, sizeY), dtype=theano.config.floatX)
    for dx in xrange(sizeX):
        perm = rng.permutation(sizeY)
        new_vals = rng.uniform(low=-scale, high=scale, size=(sparsity,))
        vals_norm = numpy.sqrt((new_vals**2).sum())
        new_vals = scale*new_vals/vals_norm
        values[dx, perm[:sparsity]] = new_vals
    _,v,_ = numpy.linalg.svd(values)
    values = scale * values/v[0]
    return values.astype(theano.config.floatX)

项目：NMT 作者：tuzhaopeng | 项目源码 | 文件源码

def sample_weights(sizeX, sizeY, sparsity, scale, rng):
    """
    Initialization that fixes the largest singular value.
    """
    sizeX = int(sizeX)
    sizeY = int(sizeY)
    sparsity = numpy.minimum(sizeY, sparsity)
    values = numpy.zeros((sizeX, sizeY), dtype=theano.config.floatX)
    for dx in xrange(sizeX):
        perm = rng.permutation(sizeY)
        new_vals = rng.uniform(low=-scale, high=scale, size=(sparsity,))
        vals_norm = numpy.sqrt((new_vals**2).sum())
        new_vals = scale*new_vals/vals_norm
        values[dx, perm[:sparsity]] = new_vals
    _,v,_ = numpy.linalg.svd(values)
    values = scale * values/v[0]
    return values.astype(theano.config.floatX)

项目：MachineComprehension 作者：sa-j | 项目源码 | 文件源码

def categorical_crossentropy(logit, y, mask, length_var, need_softmax=False):
    logit_shp = logit.shape
    # (n_samples, n_timesteps_f, n_labels)
    # softmax, predict label prob
    # (n_samples * n_timesteps_f, n_labels)
    if need_softmax:
        probs = T.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]]))
    else:
        probs = logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])
    # (n_samples * n_timesteps_f)
    y_flat = y.flatten()
    # clip to avoid nan loss
    probs = T.clip(probs, _EPSILON, 1.0 - _EPSILON)
    loss = lasagne.objectives.categorical_crossentropy(probs, y_flat)
    # (n_samples, n_timesteps_f)
    loss = loss.reshape((logit_shp[0], logit_shp[1]))
    loss = loss * mask
    loss = T.sum(loss, axis=1) / length_var
    probs = probs.reshape(logit_shp)
    return loss, probs

项目：MachineComprehension 作者：sa-j | 项目源码 | 文件源码

def binary_crossentropy(logit, y, mask, length_var):
    logit_shp = logit.shape
    # logit_shp[2] == 1
    # n_labels is 1
    # (n_samples, n_timesteps_f, n_labels)
    # softmax, predict label prob
    # (n_samples * n_timesteps_f, n_labels)
    probs = T.nnet.sigmoid(logit.flatten())
    # (n_samples * n_timesteps_f)
    y_flat = y.flatten()
    loss = lasagne.objectives.binary_crossentropy(probs, y_flat)
    # (n_samples, n_timesteps_f)
    loss = loss.reshape((logit_shp[0], logit_shp[1]))
    loss = loss * mask
    loss = T.sum(loss, axis=1) / length_var
    # (n_samples, n_timesteps_f)
    probs = probs.reshape([logit_shp[0], logit_shp[1]])
    return loss, probs

项目：GELUs 作者：hendrycks | 项目源码 | 文件源码

def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))

项目：GELUs 作者：hendrycks | 项目源码 | 文件源码

def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
    else:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])

项目：fxnn 作者：khaotik | 项目源码 | 文件源码

def op_l2norm(s_x_, eps_=1e-6):
    return T.sqrt(eps_+T.sum(T.sqr(s_x_)))

项目：fxnn 作者：khaotik | 项目源码 | 文件源码

def op_cosine(s_u_, s_v_, flatten_=True, eps_=1e-6):
    if flatten_:
        s_u = s_u_.flatten()
        s_v = s_v_.flatten()
        return T.dot(s_u, s_v) / T.sqrt(eps_+T.sum(T.sqr(s_u))*T.sum(T.sqr(s_v)))
    else:
        s_u = s_u_
        s_v = s_v_
        T.sum(s_u*s_v, axis=-1)/T.sqrt(eps_+T.sum(T.sqr(s_u), axis=-1)*T.sum(T.sqr(s_v), axis=-1))

项目：recom-system 作者：tizot | 项目源码 | 文件源码

def compute_loss(output, num_samples, num_entries=6, gamma=500.0):
    """Compute the loss of a dataset, given the output of the DSSM.

    Args:
        output (:class:`lasagne.layers.Layer`): the output of the DSSM
        num_samples (int): the number of samples in the dataset
        num_entries (int): the number of compared papers in the DSSM structure
        gamma (float): the coefficient applied in the softmax of the similarities

    Returns:
        theano.tensor.TensorType: the loss of the dataset
    """
    assert (num_entries > 2)
    assert (num_samples > 0)

    # Post-NN operations to compute the loss
    # First, we extract the first output of each bundle
    mask = np.zeros(num_entries * num_samples)
    mask[::num_entries] = 1
    unmask = np.ones(num_entries * num_samples) - mask
    cited = T.extra_ops.compress(mask, output, axis=0)
    odocs = T.extra_ops.compress(unmask, output, axis=0)

    # We duplicate each row 'x' num_entries-1 times
    cited = T.extra_ops.repeat(cited, num_entries-1, axis=0)
    # Then we compute element-wise product of x with each y, for each bundle
    sims = T.sum(cited * odocs, axis=1)

    # We reshape the similarities
    sims = T.reshape(sims, (num_samples, num_entries-1))
    sims = gamma * sims

    # We take the softmax of each row
    probs = T.nnet.softmax(sims)

    # We compute the loss as the sum of element on the first column
    loss_mask = np.zeros(num_entries-1)
    loss_mask[0] = 1
    loss = T.extra_ops.compress(loss_mask, probs, axis=1)

    return -T.log(T.prod(loss))

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def softmax(x):
    e_x = T.exp(x - x.max(axis=0, keepdims=True))
    out = e_x / e_x.sum(axis=0, keepdims=True)
    return out

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def get_norm(x):
    x = np.array(x)
    return np.sum(x * x)

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def l2_reg(params):
    return T.sum([T.sum(x ** 2) for x in params])

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def l1_reg(params):
    return T.sum([T.sum(abs(x)) for x in params])

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
                 W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
        super(WeightNormLayer, self).__init__(incoming, **kwargs)
        self.nonlinearity = nonlinearity
        self.init_stdv = init_stdv
        k = self.input_shape[1]
        if b is not None:
            self.b = self.add_param(b, (k,), name="b", regularizable=False)
        if g is not None:
            self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
        if len(self.input_shape)==4:
            self.axes_to_sum = (0,2,3)
            self.dimshuffle_args = ['x',0,'x','x']
        else:
            self.axes_to_sum = 0
            self.dimshuffle_args = ['x',0]

        # scale weights in layer below
        incoming.W_param = incoming.W
        #incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
        if incoming.W_param.ndim==4:
            if isinstance(incoming, Deconv2DLayer):
                W_axes_to_sum = (0,2,3)
                W_dimshuffle_args = ['x',0,'x','x']
            else:
                W_axes_to_sum = (1,2,3)
                W_dimshuffle_args = [0,'x','x','x']
        else:
            W_axes_to_sum = 0
            W_dimshuffle_args = ['x',0]
        if g is not None:
            incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
        else:
            incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
                 log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
        super(MinibatchLayer, self).__init__(incoming, **kwargs)
        self.num_kernels = num_kernels
        num_inputs = int(np.prod(self.input_shape[1:]))
        self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
        self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
        self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
        self.b = self.add_param(b, (num_kernels,), name="b")

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
                 W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
        super(WeightNormLayer, self).__init__(incoming, **kwargs)
        self.nonlinearity = nonlinearity
        self.init_stdv = init_stdv
        k = self.input_shape[1]
        if b is not None:
            self.b = self.add_param(b, (k,), name="b", regularizable=False)
        if g is not None:
            self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
        if len(self.input_shape)==4:
            self.axes_to_sum = (0,2,3)
            self.dimshuffle_args = ['x',0,'x','x']
        else:
            self.axes_to_sum = 0
            self.dimshuffle_args = ['x',0]

        # scale weights in layer below
        incoming.W_param = incoming.W
        #incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
        if incoming.W_param.ndim==4:
            if isinstance(incoming, Deconv2DLayer):
                W_axes_to_sum = (0,2,3)
                W_dimshuffle_args = ['x',0,'x','x']
            else:
                W_axes_to_sum = (1,2,3)
                W_dimshuffle_args = [0,'x','x','x']
        else:
            W_axes_to_sum = 0
            W_dimshuffle_args = ['x',0]
        if g is not None:
            incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
        else:
            incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
                 log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
        super(MinibatchLayer, self).__init__(incoming, **kwargs)
        self.num_kernels = num_kernels
        num_inputs = int(np.prod(self.input_shape[1:]))
        self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
        self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
        self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
        self.b = self.add_param(b, (num_kernels,), name="b")

项目：deligan 作者：val-iisc | 项目源码 | 文件源码

def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer

项目：CopyNet 作者：MultiPath | 项目源码 | 文件源码

def _read(w_read, memory):
        # w_read : (nb_sample, memory_dim)
        # memory : (nb_sample, memory_dim, memory_width)
        # return dot(w_read, memory)

        return T.sum(w_read[:, :, None] * memory, axis=1)