Python theano.tensor 模块，Rop() 实例源码

我们从Python开源项目中，提取了以下28个代码示例，用于说明如何使用theano.tensor.Rop()。

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    if not isinstance(s, (list, tuple)):
        s = [s]
    sum_Gv = None
    for si in s:
        Jv = T.Rop(si, p, v)
        HJv = T.grad(T.sum(T.grad(cost, si, disconnected_inputs='ignore') * Jv), si, consider_constant=[Jv], disconnected_inputs='ignore')
        Gv = T.grad(T.sum(HJv * si), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
        Gv = list(map(T.as_tensor_variable, Gv))  # for CudaNdarray
        if sum_Gv is None:
            sum_Gv = Gv
        else:
            sum_Gv = [a+b for a, b in zip(Gv, sum_Gv)]
    return sum_Gv

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    if not isinstance(s, (list, tuple)):
        s = [s]
    sum_Gv = None
    for si in s:
        Jv = T.Rop(si, p, v)
        HJv = T.grad(T.sum(T.grad(cost, si, disconnected_inputs='ignore') * Jv), si, consider_constant=[Jv], disconnected_inputs='ignore')
        Gv = T.grad(T.sum(HJv * si), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
        Gv = list(map(T.as_tensor_variable, Gv))  # for CudaNdarray
        if sum_Gv is None:
            sum_Gv = Gv
        else:
            sum_Gv = [a+b for a, b in zip(Gv, sum_Gv)]
    return sum_Gv

项目：seq2graph 作者：masterkeywikz | 项目源码 | 文件源码

def _get_updates_for(self, param, grad):
        D_tm1 = shared_like(param, 'D_ewma')
        Hv = TT.Rop(grad, param, self.rng.normal(param.shape))
        D_t = self.ewma * D_tm1 + (1 - self.ewma) * Hv * Hv
        den = TT.sqrt(D_t) + self.epsilon
        yield D_tm1, D_t
        yield param, param - grad * self.learning_rate / den

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def check_nondiff_rop(self, y):
        """ If your op is not differentiable(so you can't define Rop)
        test that an error is raised."""
        raised = False
        try:
            tensor.Rop(y, self.x, self.v)
        except ValueError:
            raised = True
        if not raised:
            self.fail((
                'Op did not raise an error even though the function'
                ' is not differentiable'))

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_invalid_input(self):
        success = False

        try:
            tensor.Rop(0., [tensor.matrix()], [tensor.vector()])
            success = True
        except ValueError:
            pass

        assert not success

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
        # This test refers to a bug reported by Jeremiah Lowin on 18th Oct
        # 2013. The bug consists when through a dot operation there is only
        # one differentiable path (i.e. there is no gradient wrt to one of
        # the inputs).
        x = tensor.arange(20.0).reshape([1, 20])
        v = theano.shared(numpy.ones([20]))
        d = tensor.dot(x, v).sum()
        tensor.Rop(tensor.grad(d, v), v, v)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def __call__(self, v, cost, parameters, damp):
        # compute Gauss-Newton Matrix right-multiplied by `v`
        Jv = T.Rop(self._s, parameters, v)
        HJv = T.grad(T.sum(T.grad(cost, self._s) * Jv), self._s,
                     consider_constant=[Jv])
        JHJv = T.grad(T.sum(HJv * self._s), parameters,
                      consider_constant=[HJv, Jv])

        # apply Tikhonov damping
        JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)]
        return JHJv

项目：rllab 作者：rll | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    if not isinstance(s, (list, tuple)):
        s = [s]
    sum_Gv = None
    for si in s:
        Jv = T.Rop(si, p, v)
        HJv = T.grad(T.sum(T.grad(cost, si, disconnected_inputs='ignore') * Jv), si, consider_constant=[Jv], disconnected_inputs='ignore')
        Gv = T.grad(T.sum(HJv * si), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
        Gv = list(map(T.as_tensor_variable, Gv))  # for CudaNdarray
        if sum_Gv is None:
            sum_Gv = Gv
        else:
            sum_Gv = [a+b for a, b in zip(Gv, sum_Gv)]
    return sum_Gv

项目：single-cell-classification 作者：whuTommy | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    Jv = T.Rop(s, p, v)
    HJv = T.grad(T.sum(T.grad(cost, s) * Jv), s, consider_constant=[Jv], disconnected_inputs='ignore')
    Gv = T.grad(T.sum(HJv * s), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
    Gv = map(T.as_tensor_variable, Gv)  # for CudaNdarray
    return Gv

项目：single-cell-classification 作者：whuTommy | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    Jv = T.Rop(s, p, v)
    HJv = T.grad(T.sum(T.grad(cost, s) * Jv), s, consider_constant=[Jv], disconnected_inputs='ignore')
    Gv = T.grad(T.sum(HJv * s), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
    Gv = map(T.as_tensor_variable, Gv)  # for CudaNdarray
    return Gv

项目：single-cell-classification 作者：whuTommy | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    Jv = T.Rop(s, p, v)
    HJv = T.grad(T.sum(T.grad(cost, s) * Jv), s, consider_constant=[Jv], disconnected_inputs='ignore')
    Gv = T.grad(T.sum(HJv * s), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
    Gv = map(T.as_tensor_variable, Gv)  # for CudaNdarray
    return Gv

项目：maml_rl 作者：cbfinn | 项目源码 | 文件源码

def gauss_newton_product(cost, p, v, s):  # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
    if not isinstance(s, (list, tuple)):
        s = [s]
    sum_Gv = None
    for si in s:
        Jv = T.Rop(si, p, v)
        HJv = T.grad(T.sum(T.grad(cost, si, disconnected_inputs='ignore') * Jv), si, consider_constant=[Jv], disconnected_inputs='ignore')
        Gv = T.grad(T.sum(HJv * si), p, consider_constant=[HJv, Jv], disconnected_inputs='ignore')
        Gv = list(map(T.as_tensor_variable, Gv))  # for CudaNdarray
        if sum_Gv is None:
            sum_Gv = Gv
        else:
            sum_Gv = [a+b for a, b in zip(Gv, sum_Gv)]
    return sum_Gv

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def __init__(self, _p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.

        p : list of Theano shared variables
            Parameters of the model to be optimized.
        inputs : list of Theano variables
            Symbolic variables that are inputs to your graph (they should also
            include your model 'output'). Your training examples must fit these.
        s : Theano variable
            Symbolic variable with respect to which the Hessian of the objective is
            positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
            it is the activation of the output layer.
        costs : list of Theano variables
            Monitoring costs, the first of which will be the optimized objective.
        h: Theano variable or None
            Structural damping is applied to this variable (typically the hidden units
            of an RNN).
        ha: Theano variable or None
            Symbolic variable that implicitly defines the Gauss-Newton matrix for the
            structural damping term (typically the activation of the hidden layer). If
            None, it will be set to `h`.'''

        self.p = _p
        self.shapes = [i.get_value().shape for i in _p]
        self.sizes = list(map(numpy.prod, self.shapes))
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], _p)
        g = list(map(T.as_tensor_variable, g))  # for CudaNdarray
        self.f_gc = compile_function(inputs, g + costs)  # during gradient computation
        self.f_cost = compile_function(inputs, costs)  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], _p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
                -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, _p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = compile_function(inputs + v + [coefficient], Gv, givens=givens)

项目：NMT 作者：tuzhaopeng | 项目源码 | 文件源码

def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads

项目：NMT 作者：tuzhaopeng | 项目源码 | 文件源码

def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def __init__(self, _p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.

        p : list of Theano shared variables
            Parameters of the model to be optimized.
        inputs : list of Theano variables
            Symbolic variables that are inputs to your graph (they should also
            include your model 'output'). Your training examples must fit these.
        s : Theano variable
            Symbolic variable with respect to which the Hessian of the objective is
            positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
            it is the activation of the output layer.
        costs : list of Theano variables
            Monitoring costs, the first of which will be the optimized objective.
        h: Theano variable or None
            Structural damping is applied to this variable (typically the hidden units
            of an RNN).
        ha: Theano variable or None
            Symbolic variable that implicitly defines the Gauss-Newton matrix for the
            structural damping term (typically the activation of the hidden layer). If
            None, it will be set to `h`.'''

        self.p = _p
        self.shapes = [i.get_value().shape for i in _p]
        self.sizes = list(map(numpy.prod, self.shapes))
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], _p)
        g = list(map(T.as_tensor_variable, g))  # for CudaNdarray
        self.f_gc = compile_function(inputs, g + costs)  # during gradient computation
        self.f_cost = compile_function(inputs, costs)  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], _p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
                -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, _p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = compile_function(inputs + v + [coefficient], Gv, givens=givens)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def check_mat_rop_lop(self, y, out_shape):
        """ Test the Rop/Lop when input is a matrix and the output is a vector

        :param y: the output variable of the op applied to self.mx
        :param out_shape: Used to generate a random tensor
                          corresponding to the evaluation point of the Rop
                          (i.e. the tensor with which you multiply the
                          Jacobian). It should be a tuple of ints.

        If the Op has more than 1 input, one of them must be mx, while
        others must be shared variables / constants. We will test only
        against the input self.mx, so you must call
        check_mat_rop_lop/check_rop_lop for the other inputs.

        We expect all inputs/outputs have dtype floatX.

        If you want to test an Op with an output matrix, add a sum
        after the Op you want to test.
        """
        vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape),
                           theano.config.floatX)
        yv = tensor.Rop(y, self.mx, self.mv)
        rop_f = function([self.mx, self.mv], yv, on_unused_input='ignore')
        sy, _ = theano.scan(lambda i, y, x, v:
                            (tensor.grad(y[i], x) * v).sum(),
                            sequences=tensor.arange(y.shape[0]),
                            non_sequences=[y, self.mx, self.mv])
        scan_f = function([self.mx, self.mv], sy, on_unused_input='ignore')

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)

        assert numpy.allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))

        self.check_nondiff_rop(theano.clone(y, replace={self.mx: break_op(self.mx)}))

        vv = numpy.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
        yv = tensor.Lop(y, self.mx, self.v)
        lop_f = function([self.mx, self.v], yv)

        sy = tensor.grad((self.v * y).sum(), self.mx)
        scan_f = function([self.mx, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.

        """
        # TEST ROP
        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input='ignore')

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))
        known_fail = False
        try:
            self.check_nondiff_rop(theano.clone(y, replace={self.x: break_op(self.x)}))
        except AssertionError:
            known_fail = True

        # TEST LOP

        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=out_shape),
                           theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

        if known_fail:
            raise SkipTest('Rop does not handle non-differentiable inputs '
                           'correctly. Bug exposed by fixing Add.grad method.')

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_conv(self):
        for conv_op in [conv.conv2d, conv2d]:
            for border_mode in ['valid', 'full']:
                image_shape = (2, 2, 4, 5)
                filter_shape = (2, 2, 2, 3)
                image_dim = len(image_shape)
                filter_dim = len(filter_shape)
                input = tensor.TensorType(
                    theano.config.floatX,
                    [False] * image_dim)(name='input')
                filters = tensor.TensorType(
                    theano.config.floatX,
                    [False] * filter_dim)(name='filter')
                ev_input = tensor.TensorType(
                    theano.config.floatX,
                    [False] * image_dim)(name='ev_input')
                ev_filters = tensor.TensorType(
                    theano.config.floatX,
                    [False] * filter_dim)(name='ev_filters')

                def sym_conv2d(input, filters):
                    return conv_op(input, filters, border_mode=border_mode)
                output = sym_conv2d(input, filters).flatten()
                yv = tensor.Rop(output, [input, filters], [ev_input, ev_filters])
                mode = None
                if theano.config.mode == "FAST_COMPILE":
                    mode = "FAST_RUN"
                rop_f = function([input, filters, ev_input, ev_filters],
                                 yv, on_unused_input='ignore', mode=mode)
                sy, _ = theano.scan(lambda i, y, x1, x2, v1, v2:
                                    (tensor.grad(y[i], x1) * v1).sum() +
                                    (tensor.grad(y[i], x2) * v2).sum(),
                                    sequences=tensor.arange(output.shape[0]),
                                    non_sequences=[output, input, filters,
                                                   ev_input, ev_filters],
                                    mode=mode)
                scan_f = function([input, filters, ev_input, ev_filters], sy,
                                  on_unused_input='ignore', mode=mode)
                dtype = theano.config.floatX
                image_data = numpy.random.random(image_shape).astype(dtype)
                filter_data = numpy.random.random(filter_shape).astype(dtype)
                ev_image_data = numpy.random.random(image_shape).astype(dtype)
                ev_filter_data = numpy.random.random(filter_shape).astype(dtype)
                v1 = rop_f(image_data, filter_data, ev_image_data, ev_filter_data)
                v2 = scan_f(image_data, filter_data, ev_image_data, ev_filter_data)
                assert numpy.allclose(v1, v2), ("Rop mismatch: %s %s" % (v1, v2))

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_rop_mitmot(self):
        # this test is a copy paste from the script given by Justin Bayer to
        # reproduce this bug
        # We have 2 parameter groups with the following shapes.
        W1shape = (1, 3)
        W2shape = (3, 3)

        n_pars = 1 * 3 + 3 * 3

        # Allocate big parameter array.
        pars = theano.shared(numpy.empty(n_pars))

        # Assign slices.
        W1 = pars[:3].reshape(W1shape)
        W2 = pars[3:].reshape(W2shape)

        # Define recurrent model. We are using a model where each input is a
        # tensor
        # of shape (T, B, D) where T is the number of timesteps, B is the
        # number of
        # sequences iterated over in parallel and D is the dimensionality of
        # each
        # item at a timestep.

        inpt = tensor.tensor3('inpt')
        target = tensor.tensor3('target')

        # Make these flat in order to be able to use dot products instead of
        # tensordot,
        # which is slower.
        inpt_flat = inpt.reshape((inpt.shape[0] * inpt.shape[1],
                                  inpt.shape[2]))
        hidden_flat = tensor.dot(inpt_flat, W1)
        hidden = hidden_flat.reshape((inpt.shape[0], inpt.shape[1], 3))

        transfer = tensor.nnet.sigmoid

        hidden_rec, _ = theano.scan(
                lambda x, h_tm1: transfer(tensor.dot(h_tm1, W2) + x),
                sequences=hidden,
                outputs_info=[tensor.zeros_like(hidden[0])])

        hidden_rec_flat = hidden_rec.reshape(
                    (hidden_rec.shape[0] * hidden_rec.shape[1],
                     hidden_rec.shape[2]))

        cost = ((hidden_rec - target) ** 2).mean()
        d_cost_wrt_pars = tensor.grad(cost, pars)

        p = tensor.dvector()
        Hp = tensor.Rop(d_cost_wrt_pars, pars, p)

项目：Theano-Deep-learning 作者：GeekLiB | 项目源码 | 文件源码

def test_rop_lop():
    mx = tensor.matrix('mx')
    mv = tensor.matrix('mv')
    v = tensor.vector('v')
    y = matrix_inverse(mx).sum(axis=0)

    yv = tensor.Rop(y, mx, mv)
    rop_f = function([mx, mv], yv)

    sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
                        sequences=tensor.arange(y.shape[0]),
                        non_sequences=[y, mx, mv])
    scan_f = function([mx, mv], sy)

    rng = numpy.random.RandomState(utt.fetch_seed())
    vx = numpy.asarray(rng.randn(4, 4), theano.config.floatX)
    vv = numpy.asarray(rng.randn(4, 4), theano.config.floatX)

    v1 = rop_f(vx, vv)
    v2 = scan_f(vx, vv)

    assert _allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))

    raised = False
    try:
        tensor.Rop(
            theano.clone(y, replace={mx: break_op(mx)}),
            mx,
            mv)
    except ValueError:
        raised = True
    if not raised:
        raise Exception((
            'Op did not raised an error even though the function'
            ' is not differentiable'))

    vv = numpy.asarray(rng.uniform(size=(4,)), theano.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)

    v1 = lop_f(vx, vv)
    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

项目：rllab 作者：rll | 项目源码 | 文件源码

def __init__(self, _p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.

        p : list of Theano shared variables
            Parameters of the model to be optimized.
        inputs : list of Theano variables
            Symbolic variables that are inputs to your graph (they should also
            include your model 'output'). Your training examples must fit these.
        s : Theano variable
            Symbolic variable with respect to which the Hessian of the objective is
            positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
            it is the activation of the output layer.
        costs : list of Theano variables
            Monitoring costs, the first of which will be the optimized objective.
        h: Theano variable or None
            Structural damping is applied to this variable (typically the hidden units
            of an RNN).
        ha: Theano variable or None
            Symbolic variable that implicitly defines the Gauss-Newton matrix for the
            structural damping term (typically the activation of the hidden layer). If
            None, it will be set to `h`.'''

        self.p = _p
        self.shapes = [i.get_value().shape for i in _p]
        self.sizes = list(map(numpy.prod, self.shapes))
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], _p)
        g = list(map(T.as_tensor_variable, g))  # for CudaNdarray
        self.f_gc = compile_function(inputs, g + costs)  # during gradient computation
        self.f_cost = compile_function(inputs, costs)  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], _p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
                -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, _p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = compile_function(inputs + v + [coefficient], Gv, givens=givens)

项目：NMT-Coverage 作者：tuzhaopeng | 项目源码 | 文件源码

def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads

项目：NMT-Coverage 作者：tuzhaopeng | 项目源码 | 文件源码

def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads

项目：VNMT 作者：DeepLearnXMU | 项目源码 | 文件源码

def get_grads(self, state_below, target, mask = None, reg = None,
                  scale=None, sum_over_time=True, use_noise=True,
                 additional_inputs=None):
        """
        This function implements both the forward and backwards pass of this
        layer. The reason we do this in a single function is because for the
        factorized softmax layer is hard to rely on grad and get an
        optimized graph. For uniformity I've implemented this method for
        this layer as well (though one doesn't need to use it)

        :param state_below: theano variable representing the input to the
            softmax layer
        :param target: theano variable representing the target for this
            layer
        :return: cost, dC_dstate_below, param_grads, new_properties
            dC_dstate_below is a computational graph representing the
            gradient of the cost wrt to state_below
            param_grads is a list containing the gradients wrt to the
            different parameters of the layer
            new_properties is a dictionary containing additional properties
            of the model; properties are theano expression that are
            evaluated and reported by the model
        """
        cost = self.get_cost(state_below,
                             target,
                             mask = mask,
                             reg = reg,
                             scale=scale,
                             sum_over_time=sum_over_time,
                             use_noise=use_noise,
                             additional_inputs=additional_inputs)
        grads = TT.grad(cost, self.params)
        if self.additional_gradients:
            for new_grads, to_replace, properties in self.additional_gradients:
                gparams, params = new_grads
                prop_expr = [x[1] for x in properties]
                replace = [(x[0], TT.grad(cost, x[1])) for x in to_replace]
                rval = theano.clone(gparams + prop_expr,
                                    replace=replace)
                gparams = rval[:len(gparams)]
                prop_expr = rval[len(gparams):]
                self.properties += [(x[0], y) for x,y in zip(properties,
                                                             prop_expr)]
                for gp, p in zip(gparams, params):
                    grads[self.params.index(p)] += gp

        self.cost = cost
        self.grads = grads
        def Gvs_fn(*args):
            w = (1 - self.model_output) * self.model_output * state_below.shape[1]
            Gvs = TT.Lop(self.model_output, self.params,
                         TT.Rop(self.model_output, self.params, args)/w)
            return Gvs
        self.Gvs = Gvs_fn
        return cost, grads

项目：single-cell-classification 作者：whuTommy | 项目源码 | 文件源码

def __init__(self, p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.
      p : list of Theano shared variables
          Parameters of the model to be optimized.
      inputs : list of Theano variables
          Symbolic variables that are inputs to your graph (they should also
          include your model 'output'). Your training examples must fit these.
      s : Theano variable
        Symbolic variable with respect to which the Hessian of the objective is
        positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
        it is the activation of the output layer.
      costs : list of Theano variables
          Monitoring costs, the first of which will be the optimized objective.
      h: Theano variable or None
          Structural damping is applied to this variable (typically the hidden units
          of an RNN).
      ha: Theano variable or None
        Symbolic variable that implicitly defines the Gauss-Newton matrix for the
        structural damping term (typically the activation of the hidden layer). If
        None, it will be set to `h`.'''

        self.p = p
        self.shapes = [i.get_value().shape for i in p]
        self.sizes = map(numpy.prod, self.shapes)
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], p)
        g = map(T.as_tensor_variable, g)  # for CudaNdarray
        self.f_gc = theano.function(inputs, g + costs, on_unused_input='ignore')  # during gradient computation
        self.f_cost = theano.function(inputs, costs, on_unused_input='ignore')  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
            -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = theano.function(inputs + v + [coefficient], Gv, givens=givens,
                                           on_unused_input='ignore')

项目：single-cell-classification 作者：whuTommy | 项目源码 | 文件源码

def __init__(self, p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.
      p : list of Theano shared variables
          Parameters of the model to be optimized.
      inputs : list of Theano variables
          Symbolic variables that are inputs to your graph (they should also
          include your model 'output'). Your training examples must fit these.
      s : Theano variable
        Symbolic variable with respect to which the Hessian of the objective is
        positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
        it is the activation of the output layer.
      costs : list of Theano variables
          Monitoring costs, the first of which will be the optimized objective.
      h: Theano variable or None
          Structural damping is applied to this variable (typically the hidden units
          of an RNN).
      ha: Theano variable or None
        Symbolic variable that implicitly defines the Gauss-Newton matrix for the
        structural damping term (typically the activation of the hidden layer). If
        None, it will be set to `h`.'''

        self.p = p
        self.shapes = [i.get_value().shape for i in p]
        self.sizes = map(numpy.prod, self.shapes)
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], p)
        g = map(T.as_tensor_variable, g)  # for CudaNdarray
        self.f_gc = theano.function(inputs, g + costs, on_unused_input='ignore')  # during gradient computation
        self.f_cost = theano.function(inputs, costs, on_unused_input='ignore')  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
            -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = theano.function(inputs + v + [coefficient], Gv, givens=givens,
                                           on_unused_input='ignore')

项目：maml_rl 作者：cbfinn | 项目源码 | 文件源码

def __init__(self, _p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.

        p : list of Theano shared variables
            Parameters of the model to be optimized.
        inputs : list of Theano variables
            Symbolic variables that are inputs to your graph (they should also
            include your model 'output'). Your training examples must fit these.
        s : Theano variable
            Symbolic variable with respect to which the Hessian of the objective is
            positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
            it is the activation of the output layer.
        costs : list of Theano variables
            Monitoring costs, the first of which will be the optimized objective.
        h: Theano variable or None
            Structural damping is applied to this variable (typically the hidden units
            of an RNN).
        ha: Theano variable or None
            Symbolic variable that implicitly defines the Gauss-Newton matrix for the
            structural damping term (typically the activation of the hidden layer). If
            None, it will be set to `h`.'''

        self.p = _p
        self.shapes = [i.get_value().shape for i in _p]
        self.sizes = list(map(numpy.prod, self.shapes))
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], _p)
        g = list(map(T.as_tensor_variable, g))  # for CudaNdarray
        self.f_gc = compile_function(inputs, g + costs)  # during gradient computation
        self.f_cost = compile_function(inputs, costs)  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], _p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
                -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, _p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = compile_function(inputs + v + [coefficient], Gv, givens=givens)