我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用theano.tensor.diag()。
def compute_output_shape(self, input_shape): if len(input_shape) != 3: raise RuntimeError("Expects 3 inputs: L, mu, a") for i, shape in enumerate(input_shape): if len(shape) != 2: raise RuntimeError("Input {} has {} dimensions but should have 2".format(i, len(shape))) assert self.mode in ('full','diag') if self.mode == 'full': expected_elements = (self.nb_actions * self.nb_actions + self.nb_actions) // 2 elif self.mode == 'diag': expected_elements = self.nb_actions else: expected_elements = None assert expected_elements is not None if input_shape[0][1] != expected_elements: raise RuntimeError("Input 0 (L) should have {} elements but has {}".format(input_shape[0][1])) if input_shape[1][1] != self.nb_actions: raise RuntimeError( "Input 1 (mu) should have {} elements but has {}".format(self.nb_actions, input_shape[1][1])) if input_shape[2][1] != self.nb_actions: raise RuntimeError( "Input 2 (action) should have {} elements but has {}".format(self.nb_actions, input_shape[1][1])) return input_shape[0][0], 1
def _grab_probs(class_probs, target, use_fast_ver=False): if class_probs.ndim == 3: class_probs = class_probs.reshape((-1, class_probs.shape[-1])) shape0 = class_probs.shape[0] shape1 = class_probs.shape[1] p = None if target.ndim == 2 and use_fast_ver: target = target.flatten() cp = class_probs.reshape((target.shape[0], -1)) p = TT.diag(cp.T[target]) else: if target.ndim > 1: target = target.flatten() assert target.ndim == 1, 'make sure target is a vector of ints' assert 'int' in target.dtype pos = TT.arange(shape0)*shape1 new_targ = target + pos p = class_probs.reshape((shape0*shape1, 1))[new_targ].reshape((shape0,)) return p
def __init__(self, nb_actions, mode='full', **kwargs): if mode not in ('full', 'diag'): raise RuntimeError('Unknown mode "{}" in NAFLayer.'.format(self.mode)) self.nb_actions = nb_actions self.mode = mode super(NAFLayer, self).__init__(**kwargs)
def _lin_loss(self, predictions, n_targets): neg = predictions[:, n_targets:].sum(axis=-1) pos = T.diag(predictions) return neg - pos
def _BPR_loss(self, predictions, n_targets): diff = (predictions - T.diag(predictions).dimshuffle([0,'x']))[:, n_targets:] return -(T.log(T.nnet.sigmoid(-diff))).mean(axis=-1)
def _BPRelu_loss(self, predictions, n_targets): diff = (predictions - T.diag(predictions).dimshuffle([0,'x']))[:, n_targets:] return lasagne.nonlinearities.leaky_rectify(diff+0.5).mean(axis=-1)
def _TOP1_loss(self, predictions, n_targets): diff = (predictions - T.diag(predictions).dimshuffle([0,'x']))[:, n_targets:] reg = T.sqr(predictions[:, n_targets:]) return (T.nnet.sigmoid(diff) + T.nnet.sigmoid(reg)).mean(axis=-1)
def _BPRI_loss(self, predictions, targets): if self.last_layer_tanh: predictions = T.tanh(predictions) diff = (predictions - T.diag(predictions).dimshuffle([0,'x']))[:, targets.shape[0]:] return (T.log(T.nnet.sigmoid(diff))).mean(axis=-1)
def _TOP1_loss(self, predictions, targets): if self.last_layer_tanh: predictions = T.tanh(predictions) diff = (predictions - T.diag(predictions).dimshuffle([0,'x']))[:, targets.shape[0]:] reg = T.sqr(predictions[:, targets.shape[0]:]) return (T.nnet.sigmoid(diff) + T.nnet.sigmoid(reg)).mean(axis=-1)
def cross_entropy(self, yhat): return T.cast(T.mean(-T.log(T.diag(yhat) + 1e-24)), theano.config.floatX)
def bpr(self, yhat): return T.cast(T.mean(-T.log(T.nnet.sigmoid(T.diag(yhat) - yhat.T))), theano.config.floatX)
def top1(self, yhat): yhatT = yhat.T return T.cast(T.mean( T.mean(T.nnet.sigmoid(-T.diag(yhat) + yhatT) + T.nnet.sigmoid(yhatT ** 2), axis=0) - T.nnet.sigmoid( T.diag(yhat) ** 2) / self.batch_size), theano.config.floatX) ###############################################################################
def GrabProbs(classProbs, target, gRange=None): if classProbs.ndim > 2: classProbs = classProbs.reshape((classProbs.shape[0] * classProbs.shape[1], classProbs.shape[2])) else: classProbs = classProbs if target.ndim > 1: tflat = target.flatten() else: tflat = target return T.diag(classProbs.T[tflat])
def grad(self, inputs, gradients): """ Cholesky decomposition reverse-mode gradient update. Symbolic expression for reverse-mode Cholesky gradient taken from [0]_ References ---------- .. [0] I. Murray, "Differentiation of the Cholesky decomposition", http://arxiv.org/abs/1602.07527 """ x = inputs[0] dz = gradients[0] chol_x = self(x) # deal with upper triangular by converting to lower triangular if not self.lower: chol_x = chol_x.T dz = dz.T def tril_and_halve_diagonal(mtx): """Extracts lower triangle of square matrix and halves diagonal.""" return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.) def conjugate_solve_triangular(outer, inner): """Computes L^{-T} P L^{-1} for lower-triangular L.""" return solve_upper_triangular( outer.T, solve_upper_triangular(outer.T, inner.T).T) s = conjugate_solve_triangular( chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) if self.lower: return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))] else: return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
def perform(self, node, inputs, outputs): (a, b, gw) = inputs w, v = scipy.linalg.eigh(a, b, lower=self.lower) gA = v.dot(numpy.diag(gw).dot(v.T)) gB = - v.dot(numpy.diag(gw * w).dot(v.T)) # See EighGrad comments for an explanation of these lines out1 = self.tri0(gA) + self.tri1(gA).T out2 = self.tri0(gB) + self.tri1(gB).T outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype) outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)