Python theano.tensor 模块，log() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用theano.tensor.log()。

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel

        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def rbf_kernel(X):

    XY = T.dot(X, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) 

    # compute the rbf kernel
    kxy = T.exp(-H / (h ** 2) / 2.0)

    dxkxy = -T.dot(kxy, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2)

    return kxy, dxkxy

项目：sampleRNN_ICLR2017 作者：soroushmehr | 项目源码 | 文件源码

def gaussian_nll(x, mus, sigmas):
    """
    NLL for Multivariate Normal with diagonal covariance matrix
    See:
        wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function
    where \Sigma = diag(s_1^2,..., s_n^2).

    x, mus, sigmas all should have the same shape.
    sigmas (s_1,..., s_n) should be strictly positive.
    Results in output shape of similar but without the last dimension.
    """
    nll = lib.floatX(numpy.log(2. * numpy.pi))
    nll += 2. * T.log(sigmas)
    nll += ((x - mus) / sigmas) ** 2.
    nll = nll.sum(axis=-1)
    nll *= lib.floatX(0.5)
    return nll

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def step(self, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            batch_gen = self.train_batch_gen
        elif mode == "test":    
            theano_fn = self.test_fn
            batch_gen = self.test_batch_gen
        else:
            raise Exception("Invalid mode")

        data = next(batch_gen)
        ret = theano_fn(*data)

        return {"prediction": np.array(ret[0]),
                "answers": data[-1],
                "current_loss": ret[1],
                "log": ""}

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def step(self, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            batch_gen = self.train_batch_gen
        elif mode == "test":    
            theano_fn = self.test_fn
            batch_gen = self.test_batch_gen
        else:
            raise Exception("Invalid mode")

        data = next(batch_gen)
        ret = theano_fn(*data)

        return {"prediction": np.array(ret[0]),
                "answers": data[-1],
                "current_loss": ret[1],
                "log": ""}

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def evaluation(self, X_test, y_test):
        # normalization
        X_test = self.normalization(X_test)

        # average over the output
        pred_y_test = np.zeros([self.M, len(y_test)])
        prob = np.zeros([self.M, len(y_test)])

        '''
            Since we have M particles, we use a Bayesian view to calculate rmse and log-likelihood
        '''
        for i in range(self.M):
            w1, b1, w2, b2, loggamma, loglambda = self.unpack_weights(self.theta[i, :])
            pred_y_test[i, :] = self.nn_predict(X_test, w1, b1, w2, b2) * self.std_y_train + self.mean_y_train
            prob[i, :] = np.sqrt(np.exp(loggamma)) /np.sqrt(2*np.pi) * np.exp( -1 * (np.power(pred_y_test[i, :] - y_test, 2) / 2) * np.exp(loggamma) )
        pred = np.mean(pred_y_test, axis=0)

        # evaluation
        svgd_rmse = np.sqrt(np.mean((pred - y_test)**2))
        svgd_ll = np.mean(np.log(np.mean(prob, axis = 0)))

        return (svgd_rmse, svgd_ll)

项目：gram 作者：mp2893 | 项目源码 | 文件源码

def parse_arguments(parser):
    parser.add_argument('seq_file', type=str, metavar='<visit_file>', help='The path to the Pickled file containing visit information of patients')
    parser.add_argument('label_file', type=str, metavar='<label_file>', help='The path to the Pickled file containing label information of patients')
    parser.add_argument('tree_file', type=str, metavar='<tree_file>', help='The path to the Pickled files containing the ancestor information of the input medical codes. Only use the prefix and exclude ".level#.pk".')
    parser.add_argument('out_file', metavar='<out_file>', help='The path to the output models. The models will be saved after every epoch')
    parser.add_argument('--embed_file', type=str, default='', help='The path to the Pickled file containing the representation vectors of medical codes. If you are not using medical code representations, do not use this option')
    parser.add_argument('--embed_size', type=int, default=128, help='The dimension size of the visit embedding. If you are providing your own medical code vectors, this value will be automatically decided. (default value: 128)')
    parser.add_argument('--rnn_size', type=int, default=128, help='The dimension size of the hidden layer of the GRU (default value: 128)')
    parser.add_argument('--attention_size', type=int, default=128, help='The dimension size of hidden layer of the MLP that generates the attention weights (default value: 128)')
    parser.add_argument('--batch_size', type=int, default=100, help='The size of a single mini-batch (default value: 100)')
    parser.add_argument('--n_epochs', type=int, default=100, help='The number of training epochs (default value: 100)')
    parser.add_argument('--L2', type=float, default=0.001, help='L2 regularization coefficient for all weights except RNN (default value: 0.001)')
    parser.add_argument('--dropout_rate', type=float, default=0.5, help='Dropout rate used for the hidden layer of RNN (default value: 0.5)')
    parser.add_argument('--log_eps', type=float, default=1e-8, help='A small value to prevent log(0) (default value: 1e-8)')
    parser.add_argument('--verbose', action='store_true', help='Print output after every 100 mini-batches (default false)')
    args = parser.parse_args()
    return args

项目：face_detection 作者：chintak | 项目源码 | 文件源码

def iou_loss(p, t):
    # print "pass"
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps_t0 = T.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps_t1 = T.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps_t1 - overlaps_t0
    bool_overlap = T.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = T.maximum(intersection, np.float32(0.))
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    loss = 1. - T.minimum(
        T.exp(T.log(T.abs_(intersection)) -
              T.log(T.abs_(union) + np.float32(1e-5))),
        np.float32(1.)
    )
    # return loss
    return T.mean(loss)

项目：face_detection 作者：chintak | 项目源码 | 文件源码

def iou_loss_val(p, t):
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps = np.zeros_like(tp, dtype=np.float32)
    overlaps[:, 0, :] = np.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps[:, 1, :] = np.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps[:, 1, :] - overlaps[:, 0, :]
    bool_overlap = np.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = np.maximum(intersection, 0.)
    # print "bool", bool_overlap
    # print "Int", intersection
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    # print "un", union
    loss = 1. - np.minimum(
        np.exp(np.log(np.abs(intersection)) - np.log(np.abs(union) + 1e-5)),
        1.
    )
    # print loss
    return np.mean(loss)

项目：LiviaNET 作者：josedolz | 项目源码 | 文件源码

def negativeLogLikelihoodWeighted(self, y, weightPerClass):      
        #Weighting the cost of the different classes in the cost-function, in order to counter class imbalance.
        e1 = np.finfo(np.float32).tiny
        addTinyProbMatrix = T.lt(self.p_y_given_x_train, 4*e1) * e1

        weights = weightPerClass.dimshuffle('x', 0, 'x', 'x', 'x')
        log_p_y_given_x_train = T.log(self.p_y_given_x_train + addTinyProbMatrix) 
        weighted_log_probs = log_p_y_given_x_train * weights

        wShape =  weighted_log_probs.shape

        # Re-arrange 
        idx0 = T.arange( wShape[0] ).dimshuffle( 0, 'x','x','x')
        idx2 = T.arange( wShape[2] ).dimshuffle('x', 0, 'x','x')
        idx3 = T.arange( wShape[3] ).dimshuffle('x','x', 0, 'x')
        idx4 = T.arange( wShape[4] ).dimshuffle('x','x','x', 0)

        return -T.mean( weighted_log_probs[ idx0, y, idx2, idx3, idx4] )

项目：cortex 作者：rdevon | 项目源码 | 文件源码

def log_marginal(self, y, h, py, q):
        '''Computes the approximate log marginal.

        Uses \log \sum p / q - \log N

        Args:
            y: T.tensor, target values.
            h: T.tensor, latent samples.
            py: T.tesnor, conditional density p(y | h)
            q: approximate posterior q(h | y)
        Returns:
            approximate log marginal.
        '''
        log_py_h = -self.conditional.neg_log_prob(y, py)
        log_ph   = -self.prior.neg_log_prob(h)
        log_qh   = -self.posterior.neg_log_prob(h, q)
        assert log_py_h.ndim == log_ph.ndim == log_qh.ndim

        log_p     = log_py_h + log_ph - log_qh
        log_p_max = T.max(log_p, axis=0, keepdims=True)
        w         = T.exp(log_p - log_p_max)

        return (T.log(w.mean(axis=0, keepdims=True)) + log_p_max).mean()

项目：cortex 作者：rdevon | 项目源码 | 文件源码

def step_free_energy(self, x, beta, *params):
        '''Step free energy function.

        Args:
            x (T.tensor): data sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

        Returns:
            T.tensor: free energy.

        '''
        W, v_params, h_params = self.split_params(*params)

        vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
        x = self.v_dist.scale_for_energy_model(x, *v_params)
        hid_act = beta * (T.dot(x, W) + self.h_dist.get_center(*h_params))
        fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
        return fe

项目：cortex 作者：rdevon | 项目源码 | 文件源码

def step_free_energy_h(self, h, beta, *params):
        '''Step free energy function for hidden states.

        Args:
            h (T.tensor): hidden sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

        Returns:
            T.tensor: free energy.

        '''
        W, v_params, h_params = self.split_params(*params)

        hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
        h = self.h_dist.scale_for_energy_model(h, *h_params)
        vis_act = beta * (T.dot(h, W.T) + self.v_dist.get_center(*v_params))
        fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
        return fe

项目：top-k-rec 作者：domainxz | 项目源码 | 文件源码

def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  = T.dot(self.W, self.S[u,i,:].T);
       x_uj  = T.dot(self.W, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
               )
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
               ]
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);

项目：keras 作者：GeekLiB | 项目源码 | 文件源码

def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next

项目：keras 作者：GeekLiB | 项目源码 | 文件源码

def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask

项目：SteinGAN 作者：DartML | 项目源码 | 文件源码

def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def theano_logsumexp(x, axis=None):
    """
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    fashion.
    Parameters
    ----------
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    Returns
    -------
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.
    """

    xmax = x.max(axis=axis, keepdims=True)
    xmax_ = x.max(axis=axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))

项目：doctorai 作者：mp2893 | 项目源码 | 文件源码

def padMatrixWithTime(seqs, labels, times, options):
    lengths = np.array([len(seq) for seq in seqs]) - 1
    n_samples = len(seqs)
    maxlen = np.max(lengths)
    inputDimSize = options['inputDimSize']
    numClass = options['numClass']

    x = np.zeros((maxlen, n_samples, inputDimSize)).astype(config.floatX)
    y = np.zeros((maxlen, n_samples, numClass)).astype(config.floatX)
    t = np.zeros((maxlen, n_samples)).astype(config.floatX)
    mask = np.zeros((maxlen, n_samples)).astype(config.floatX)
    for idx, (seq,time,label) in enumerate(zip(seqs,times,labels)):
        for xvec, subseq in zip(x[:,idx,:], seq[:-1]):
            xvec[subseq] = 1.
        for yvec, subseq in zip(y[:,idx,:], label[1:]):
            yvec[subseq] = 1.
        mask[:lengths[idx], idx] = 1.
        t[:lengths[idx], idx] = time[:-1]

    lengths = np.array(lengths, dtype=config.floatX)
    if options['useLogTime']:
        t = np.log(t + options['logEps'])

    return x, y, t, mask, lengths

项目：doctorai 作者：mp2893 | 项目源码 | 文件源码

def parse_arguments(parser):
    parser.add_argument('seq_file', type=str, metavar='<visit_file>', help='The path to the Pickled file containing visit information of patients')
    parser.add_argument('n_input_codes', type=int, metavar='<n_input_codes>', help='The number of unique input medical codes')
    parser.add_argument('label_file', type=str, metavar='<label_file>', help='The path to the Pickled file containing label information of patients')
    parser.add_argument('n_output_codes', type=int, metavar='<n_output_codes>', help='The number of unique label medical codes')
    parser.add_argument('out_file', metavar='out_file', help='The path to the output models. The models will be saved after every epoch')
    parser.add_argument('--time_file', type=str, default='', help='The path to the Pickled file containing durations between visits of patients. If you are not using duration information, do not use this option')
    parser.add_argument('--predict_time', type=int, default=0, choices=[0,1], help='Use this option if you want the GRU to also predict the time duration until the next visit (0 for false, 1 for true) (default value: 0)')
    parser.add_argument('--tradeoff', type=float, default=1.0, help='Tradeoff variable for balancing the two loss functions: code prediction function and duration prediction function (default value: 1.0)')
    parser.add_argument('--use_log_time', type=int, default=1, choices=[0,1], help='Use logarithm of time duration to dampen the impact of the outliers (0 for false, 1 for true) (default value: 1)')
    parser.add_argument('--embed_file', type=str, default='', help='The path to the Pickled file containing the representation vectors of medical codes. If you are not using medical code representations, do not use this option')
    parser.add_argument('--embed_size', type=int, default=200, help='The size of the visit embedding before passing it to the GRU layers. If you are not providing your own medical code vectors, you must specify this value (default value: 200)')
    parser.add_argument('--embed_finetune', type=int, default=1, choices=[0,1], help='If you are using randomly initialized code representations, always use this option. If you are using an external medical code representations, and you want to fine-tune them as you train the GRU, use this option as well. (0 for false, 1 for true) (default value: 1)')
    parser.add_argument('--hidden_dim_size', type=str, default='[200,200]', help='The size of the hidden layers of the GRU. This is a string argument. For example, [500,400] means you are using a two-layer GRU where the lower layer uses a 500-dimensional hidden layer, and the upper layer uses a 400-dimensional hidden layer. (default value: [200,200])')
    parser.add_argument('--batch_size', type=int, default=100, help='The size of a single mini-batch (default value: 100)')
    parser.add_argument('--n_epochs', type=int, default=10, help='The number of training epochs (default value: 10)')
    parser.add_argument('--L2_softmax', type=float, default=0.001, help='L2 regularization for the softmax function (default value: 0.001)')
    parser.add_argument('--L2_time', type=float, default=0.001, help='L2 regularization for the linear regression (default value: 0.001)')
    parser.add_argument('--dropout_rate', type=float, default=0.5, help='Dropout rate between GRU hidden layers, and between the final hidden layer and the softmax layer (default value: 0.5)')
    parser.add_argument('--log_eps', type=float, default=1e-8, help='A small value to prevent log(0) (default value: 1e-8)')
    parser.add_argument('--verbose', action='store_true', help='Print output after every 10 mini-batches (default false)')
    args = parser.parse_args()
    return args

项目：NCRF-AE 作者：cosmozhang | 项目源码 | 文件源码

def theano_logsumexp(x, axis=None):
    """
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    fashion.
    Parameters
    ----------
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    Returns
    -------
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.
    """
    xmax = T.max(x, axis = axis, keepdims = True)
    xmax_ = T.max(x, axis = axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis = axis))

项目：hnmt 作者：robertostling | 项目源码 | 文件源码

def xent(self, inputs, inputs_mask, chars, chars_mask,
             outputs, outputs_mask, attention):
        pred_outputs, pred_attention = self(
                inputs, inputs_mask, chars, chars_mask, outputs, outputs_mask)
        outputs_xent = batch_sequence_crossentropy(
                pred_outputs, outputs[1:], outputs_mask[1:])
        # Note that pred_attention will contain zero elements for masked-out
        # character positions, to avoid trouble with log() we add 1 for zero
        # element of attention (which after multiplication will be removed
        # anyway).
        batch_size = attention.shape[1].astype(theano.config.floatX)
        attention_mask = (inputs_mask.dimshuffle('x', 1, 0) *
                          outputs_mask[1:].dimshuffle(0, 1, 'x')
                          ).astype(theano.config.floatX)
        epsilon = 1e-6
        attention_xent = (
                   -attention[1:]
                 * T.log(epsilon + pred_attention + (1-attention_mask))
                 * attention_mask).sum() / batch_size
        return outputs_xent, attention_xent

项目：EAC-Net 作者：wiibrew | 项目源码 | 文件源码

def multi_label_ACE(outputs,y_labels):
    data_shape=outputs.shape
    loss_buff=0
    # num=T.iscalar(data_shape[0]) #theano int to get value from tensor
    # for i in range(int(num)):
    #     for j in range(12):
    #         y_exp=outputs[i,j]
    #         y_tru=y_labels[i,0,0,j]
    #         if y_tru==0:
    #             loss_ij=math.log(1-outputs[i,j])
    #             loss_buff-=loss_ij
    #         if y_tru>0:
    #             loss_ij=math.log(outputs[i,j])
    #             loss_buff-=loss_ij

    # wts=[ 0.24331649,  0.18382575,  0.23082499,  0.44545567,  0.52901483,  0.58482504, \
    # 0.57321465,  0.43411294,  0.15502839,  0.36377019,  0.19050646,  0.16083916]
    # for i in [3,4,5,6,7,9]:

    for i in range(12):
        target=y_labels[:,i]
        output=outputs[:,i]
        loss_au=T.sum(-(target * T.log((output+0.05)/1.05) + (1.0 - target) * T.log((1.05 - output)/1.05)))
        loss_buff+=loss_au
    return loss_buff/(12*BATCH_SIZE)

项目：EAC-Net 作者：wiibrew | 项目源码 | 文件源码

def multi_label_ACE(outputs,y_labels):
    data_shape=outputs.shape
    loss_buff=0
    # num=T.iscalar(data_shape[0]) #theano int to get value from tensor
    # for i in range(int(num)):
    #     for j in range(12):
    #         y_exp=outputs[i,j]
    #         y_tru=y_labels[i,0,0,j]
    #         if y_tru==0:
    #             loss_ij=math.log(1-outputs[i,j])
    #             loss_buff-=loss_ij
    #         if y_tru>0:
    #             loss_ij=math.log(outputs[i,j])
    #             loss_buff-=loss_ij

    # wts=[ 0.24331649,  0.18382575,  0.23082499,  0.44545567,  0.52901483,  0.58482504, \
    # 0.57321465,  0.43411294,  0.15502839,  0.36377019,  0.19050646,  0.16083916]
    # for i in [3,4,5,6,7,9]:

    for i in range(12):
        target=y_labels[:,i]
        output=outputs[:,i]
        loss_au=T.sum(-(target * T.log((output+0.05)/1.05) + (1.0 - target) * T.log((1.05 - output)/1.05)))
        loss_buff+=loss_au
    return loss_buff/(12*BATCH_SIZE)

项目：KGP-ASR 作者：KGPML | 项目源码 | 文件源码

def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol):
    """
    Based on code from Shawn Tan.
    Credits to Kyle Kastner as well.
    """
    y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype='int32')
    y_mask_len = tensor.sum(y_mask, axis=0, dtype='int32')
    log_probabs = _log_path_probabs(
        y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol)
    batch_size = log_probabs.shape[1]
    log_labels_probab = _log_add(
        log_probabs[y_hat_mask_len - 1,
                    tensor.arange(batch_size),
                    y_mask_len - 1],
        log_probabs[y_hat_mask_len - 1,
                    tensor.arange(batch_size),
                    y_mask_len - 2])
    return log_labels_probab

项目：GELUs 作者：hendrycks | 项目源码 | 文件源码

def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))

项目：GELUs 作者：hendrycks | 项目源码 | 文件源码

def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
    else:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])

项目：fxnn 作者：khaotik | 项目源码 | 文件源码

def build_model(model_):
    global fn_predict, fn_record
    global g_ozer, g_mdl

    g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]()
    g_ozer.lr = LEARN_RATE

    s_x = T.tensor4('x')
    s_y = T.ivector('y')
    s_pdpo = T.scalar()
    s_out = model_(s_x, s_pdpo)

    s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map))
    s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3))
    s_accr = T.mean( T.switch(
            T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0))

    no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))]
    fn_predict = th.function(
        [s_x, s_y],
        {'pred':s_out, 'accr':s_accr, 'loss':s_loss},
        givens=no_dropout, profile=PROFILE)
    rec_fetches = {
        'x': s_x, 'y': s_y,
        'pred': s_out}
    rec_fetches.update(g_mdl.params_di)
    fn_record = th.function(
        [s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE)
    g_ozer.compile(
        [s_x, s_y],
        s_loss,
        g_mdl.params_di.values(),
        fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr},
        givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))],
        profile_=PROFILE)

项目：recom-system 作者：tizot | 项目源码 | 文件源码

def compute_loss(output, num_samples, num_entries=6, gamma=500.0):
    """Compute the loss of a dataset, given the output of the DSSM.

    Args:
        output (:class:`lasagne.layers.Layer`): the output of the DSSM
        num_samples (int): the number of samples in the dataset
        num_entries (int): the number of compared papers in the DSSM structure
        gamma (float): the coefficient applied in the softmax of the similarities

    Returns:
        theano.tensor.TensorType: the loss of the dataset
    """
    assert (num_entries > 2)
    assert (num_samples > 0)

    # Post-NN operations to compute the loss
    # First, we extract the first output of each bundle
    mask = np.zeros(num_entries * num_samples)
    mask[::num_entries] = 1
    unmask = np.ones(num_entries * num_samples) - mask
    cited = T.extra_ops.compress(mask, output, axis=0)
    odocs = T.extra_ops.compress(unmask, output, axis=0)

    # We duplicate each row 'x' num_entries-1 times
    cited = T.extra_ops.repeat(cited, num_entries-1, axis=0)
    # Then we compute element-wise product of x with each y, for each bundle
    sims = T.sum(cited * odocs, axis=1)

    # We reshape the similarities
    sims = T.reshape(sims, (num_samples, num_entries-1))
    sims = gamma * sims

    # We take the softmax of each row
    probs = T.nnet.softmax(sims)

    # We compute the loss as the sum of element on the first column
    loss_mask = np.zeros(num_entries-1)
    loss_mask[0] = 1
    loss = T.extra_ops.compress(loss_mask, probs, axis=1)

    return -T.log(T.prod(loss))

项目：dl4mt-multi 作者：nyu-dl | 项目源码 | 文件源码

def cost(self, probs, y, y_mask):
        y_flat = y.flatten()
        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
        cost = -tensor.log(probs.flatten()[y_flat_idx])
        cost = cost.reshape([y.shape[0], y.shape[1]])
        cost = (cost * y_mask).sum(0)
        cost = cost.mean()
        return cost

项目：dl4mt-multi 作者：nyu-dl | 项目源码 | 文件源码

def f_log_probs(self, probs, x, x_mask, y, y_mask,
                    src_selector, trg_selector, cg=None):
        y_flat = y.flatten()
        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
        cost = -tensor.log(probs.flatten()[y_flat_idx])
        cost = cost.reshape([y.shape[0], y.shape[1]])
        cost = (cost * y_mask).sum(0)
        func_inps = [x, x_mask, y, y_mask, src_selector, trg_selector]
        return theano.function(
            inputs=func_inps,
            outputs=cost, on_unused_input='warn')

项目：sampleRNN_ICLR2017 作者：soroushmehr | 项目源码 | 文件源码

def GMM_nll(x, mus, sigmas, mix_weights):
    """
    D is dimension of each observation (e.g. frame_size) for each component
    (multivariate Normal with diagonal covariance matrix)
    See `gaussian_nll`

    x : (batch_size, D)
    mus : (batch_size, D, num_gaussians)
    sigmas : (batch_size, D, num_gaussians)
    mix_weights : (batch_size, num_gaussians)
    """
    x = x.dimshuffle(0, 1, 'x')

    # Similar to `gaussian_nll`
    ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi))
    ll_component_wise += 2. * T.log(sigmas)
    ll_component_wise += ((x - mus) / sigmas) ** 2.
    ll_component_wise = ll_component_wise.sum(axis=1)  # on FRAME_SIZE
    ll_component_wise *= lib.floatX(-0.5)  # LL not NLL

    # Now ready to take care of weights of each component
    # Simply applying exp could potentially cause inf/NaN.
    # Look up LogSumExp trick, Softmax in theano, or this:
    # hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
    weighted_ll = ll_component_wise + T.log(mix_weights)
    ll_max = T.max(weighted_ll, axis=1, keepdims=True)
    nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True))
    nll += ll_max
    nll = -nll.sum(axis=1)
    return nll

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def multilabel_loss(preds, labels):
    eps = 1e-4
    preds = T.clip(preds, eps, 1-eps)
    return -(labels * T.log(preds) + (1 - labels) * T.log(1 - preds)).mean(axis=1).mean(axis=0)

项目：mimic3-benchmarks 作者：YerevaNN | 项目源码 | 文件源码

def multilabel_loss_with_mask(preds, labels, mask):
    eps = 1e-4
    preds = T.clip(preds, eps, 1-eps)
    return -(mask * (labels * T.log(preds) + (1 - labels) * T.log(1 - preds))).mean(axis=1).mean(axis=0)

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def init_weights(self, a0, b0):
        w1 = 1.0 / np.sqrt(self.d + 1) * np.random.randn(self.d, self.n_hidden)
        b1 = np.zeros((self.n_hidden,))
        w2 = 1.0 / np.sqrt(self.n_hidden + 1) * np.random.randn(self.n_hidden)
        b2 = 0.
        loggamma = np.log(np.random.gamma(a0, b0))
        loglambda = np.log(np.random.gamma(a0, b0))
        return (w1, b1, w2, b2, loggamma, loglambda)

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def unpack_weights(self, z):
        w = z
        w1 = np.reshape(w[:self.d*self.n_hidden], [self.d, self.n_hidden])
        b1 = w[self.d*self.n_hidden:(self.d+1)*self.n_hidden]

        w = w[(self.d+1)*self.n_hidden:]
        w2, b2 = w[:self.n_hidden], w[-3] 

        # the last two parameters are log variance
        loggamma, loglambda= w[-2], w[-1]

        return (w1, b1, w2, b2, loggamma, loglambda)