Python theano.tensor 模块,constant() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.constant()

项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        if not isinstance(input, (S.SparseVariable, S.SparseConstant,
                                  S.sharedvar.SparseTensorSharedVariable)):
            raise ValueError("Input for this layer must be sparse")

        if deterministic or self.p == 0:
            return input
        else:
            # Using Theano constant to prevent upcasting
            one = T.constant(1, name='one')
            retain_prob = one - self.p

            if self.rescale:
                input = S.mul(input, one/retain_prob)

            input_shape = self.input_shape
            if any(s is None for s in input_shape):
                input_shape = input.shape

            return input * self._srng.binomial(input_shape, p=retain_prob,
                                               dtype=input.dtype)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def l2_decay(self, gamma, layers=None):
        '''L2 decay cost.

        Args:
            gamma (float): l2 decay rate.
            layers (Optional[list]): layer numbers to do l2 decay on.

        Returns:
            T.tensor: L2 cost.

        '''
        if layers is None:
            layers = range(self.n_layers)

        cost = T.constant(0.).astype(floatX)
        for l in layers:
            W = self.__dict__['W%d' % l]
            cost += gamma * (W ** 2).sum()

        return cost
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def l2_decay(self, rate):
        rec_l2_cost = T.constant(0.).astype(floatX)
        gen_l2_cost = T.constant(0.).astype(floatX)

        for l in xrange(self.n_layers):
            rec_l2_cost += self.posteriors[l].l2_decay(rate)
            gen_l2_cost += self.conditionals[l].l2_decay(rate)

        rval = OrderedDict(
            rec_l2_cost=rec_l2_cost,
            gen_l2_cost=gen_l2_cost,
            cost = rec_l2_cost + gen_l2_cost
        )

        return rval

    # --------------------------------------------------------------------------
项目:sesame-paste-noodle    作者:aissehust    | 项目源码 | 文件源码
def forward(self, inputtensor):
        inputimage = inputtensor[0]
        #print('conv2d.forward.type: {}'.format(inputimage.ndim))
        if self.dc == 0.0:
            pass
        else:
            if 0 <self.dc <=1:
                _srng = RandomStreams(np.random.randint(1, 2147462579))
                one = T.constant(1)
                retain_prob = one - self.dc
                mask_shape = self.w.shape
                mask = _srng.binomial(mask_shape, p=retain_prob,
                                           dtype=self.w.dtype)
                self.w = self.w * mask
            else:
                raise IndexError

        l3conv = T.nnet.conv2d(inputimage,
                               self.w,
                               border_mode=self.border,
                               subsample=self.subsample)
        if self.need_bias:            
            return ((l3conv+self.b.dimshuffle('x', 0, 'x', 'x')), )
        else:
            return (l3conv, )
项目:sesame-paste-noodle    作者:aissehust    | 项目源码 | 文件源码
def forward(self, inputtensor):
        inputimage = inputtensor[0]

        if self.dc == 0.0:
            pass
        else:
            if 0 <self.dc <=1:
                _srng = RandomStreams(np.random.randint(1, 2147462579))
                one = T.constant(1)
                retain_prob = one - self.dc
                mask_shape = self.w.shape
                mask = _srng.binomial(mask_shape, p=retain_prob,
                                           dtype=self.w.dtype)
                self.w = self.w * mask
            else:
                raise IndexError

        if self.need_bias:
            return ((T.dot(inputimage, self.w)+self.b), )
        else:
            return (T.dot(inputimage, self.w),)
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def RmsProp(cost, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
    updates = OrderedDict()
    grads = T.grad(cost, params)
    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                             broadcastable=param.broadcastable)
        accu_new = rho * accu + (one - rho) * grad ** 2
        updates[accu] = accu_new
        updates[param] = param - (learning_rate * grad /
                                  T.sqrt(accu_new + epsilon))

    return updates
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def EGD(cost, params, learning_rate = 0.33, constraint = 1.0):

    updates = OrderedDict()

    grads = T.grad(cost, params)
    U = T.constant(constraint)

    #first half of params
    rw_pos = T.exp(-learning_rate * U * grads[0])
    rb_pos = T.exp(-learning_rate * U * grads[1])

    #second half
    rw_neg = 1/rw_pos
    rb_neg = 1/rb_pos

    rs = [rw_pos, rb_pos, rw_neg, rb_neg]

    partition = T.sum(params[0]*rs[0]) + T.sum(params[1]*rs[1]) + T.sum(params[2]*rs[2]) + T.sum(params[3]*rs[3])

    for param, r in zip(params, rs):
        updates[param] = U*param*r/partition

    return updates
项目:luna16    作者:gzuidhof    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        """
        Parameters
        ----------
        input : tensor
            output from the previous layer
        deterministic : bool
            If true dropout and scaling is disabled, see notes
        """
        if deterministic or self.p == 0:
            return input
        else:
            # Using theano constant to prevent upcasting
            one = T.constant(1)

            retain_prob = one - self.p
            if self.rescale:
                input /= retain_prob

            mask = _srng.binomial(input.shape[:2], p=retain_prob,
                                      dtype=theano.config.floatX)
            axes = [0, 1] + (['x'] * (input.ndim - 2))
            mask = mask.dimshuffle(*axes)

            return input * mask
项目:kaggle_dsb    作者:syagev    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        """
        Parameters
        ----------
        input : tensor
            output from the previous layer
        deterministic : bool
            If true dropout and scaling is disabled, see notes
        """
        if deterministic or self.p == 0:
            return input
        else:
            # Using theano constant to prevent upcasting
            one = T.constant(1)

            retain_prob = one - self.p
            if self.rescale:
                input /= retain_prob

            mask = _srng.binomial(input.shape[:2], p=retain_prob,
                                      dtype=theano.config.floatX)
            axes = [0, 1] + (['x'] * (input.ndim - 2))
            mask = mask.dimshuffle(*axes)

            return input * mask
项目:dl4nlp_in_theano    作者:luyaojie    | 项目源码 | 文件源码
def temporal_padding_mask(mask, kernel_size, padding_size):
    """Pad the middle dimension of a 2D matrix
    with "padding" zeros left and right.

    Apologies for the inane API, but Theano makes this
    really hard.
    Code from https://github.com/fchollet/keras/blob/master/keras/backend/theano_backend.py
    x: (batch, length)
    """
    mask_shape = mask.shape
    mask_sum = T.sum(mask, axis=1)
    output_length = mask_sum - kernel_size + 2 * padding_size + 1
    max_output_length = mask_shape[1] - kernel_size + 2 * padding_size + 1
    real_output_length = T.maximum(output_length, 1)
    range_base = T.arange(max_output_length)
    range_matrix = T.outer(T.ones((mask_shape[0],)), range_base)
    mask = (range_matrix < real_output_length[:, None]) * T.constant(1.0)
    return mask
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def print_graph_linker(print_prog=True):
    if 1:
        imap = {None:'-'}
        def blah(i, node, thunk):
            imap[node] = str(i)
            if print_prog:# and node.op.__class__ is T.DimShuffle:
                if False and  node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
                    print(node.op == T.DimShuffle((), ['x', 'x'],
                                                  inplace=True), end=' ')
                    print(node.inputs[0], type(node.inputs[0]), end=' ')
                    print(node.inputs[0].equals(T.constant(2)), end=' ')
                outputs = node.outputs
                inputs = theano.gof.graph.inputs(outputs)
                print('node ', i, node, end=' ')
                print(':'.join([imap[inp.owner] for inp in node.inputs]))
                #print theano.sandbox.pprint.pp.process_graph(inputs, outputs)
        return theano.sandbox.wraplinker.WrapLinkerMany(
                [theano.gof.OpWiseCLinker()],
                [theano.sandbox.wraplinker.run_all
                    ,blah
                    #,theano.sandbox.wraplinker.numpy_notall_isfinite
                    ])
    else:
        return theano.gof.OpWiseCLinker()
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_csm_unsorted(self):
        """
        Test support for gradients of unsorted inputs.
        """
        sp_types = {'csc': sp.csc_matrix,
                    'csr': sp.csr_matrix}

        for format in ['csr', 'csc', ]:
            for dtype in ['float32', 'float64']:
                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
                y = tensor.ivector()
                z = tensor.ivector()
                s = tensor.ivector()
                # Sparse advanced indexing produces unsorted sparse matrices
                a = sparse_random_inputs(format, (4, 3), out_dtype=dtype,
                                         unsorted_indices=True)[1][0]
                # Make sure it's unsorted
                assert not a.has_sorted_indices
                def my_op(x):
                    y = tensor.constant(a.indices)
                    z = tensor.constant(a.indptr)
                    s = tensor.constant(a.shape)
                    return tensor.sum(
                        dense_from_sparse(CSM(format)(x, y, z, s) * a))
                verify_grad_sparse(my_op, [a.data])
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_constant_folding():
    """ Test that constant folding get registered at fast_compile

    An error removed that registration during the registration.
    """
    x = tensor.dvector()
    mode = theano.compile.get_mode("FAST_COMPILE").excluding("fusion")
    f = theano.function([x], [x * 2, x + x], mode=mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 2

    # Test that we do not crash when constant folding elemwise scalar
    # as they should not generate c code.

    x = tensor.constant(3)
    assert x.ndim == 0
    mode = theano.compile.get_mode("FAST_COMPILE").excluding("fusion")
    f = theano.function([], [x * 2, x + x], mode=mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 2
    assert all([isinstance(n.op, DeepCopyOp) for n in topo])
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_add_specialize():
    # test of non-zero dimension
    a = tensor.vector()
    s = tensor.add(tensor.zeros_like(a))
    assert local_add_specialize.transform(s.owner)

    # test of 0-d
    a = tensor.scalar()
    s = tensor.add(tensor.zeros_like(a))
    assert local_add_specialize.transform(s.owner)

    # Test when the 0 input is forcing upcasting
    a = tensor.constant(0, dtype='int64')
    b = tensor.constant(1, dtype='int32')
    s = a + b
    transformed = local_add_specialize.transform(s.owner)
    assert transformed
    assert transformed[0].type == s.type
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_lt(self):
        for dtype in self.dtypes:
            l = numpy.asarray([0., -1., 1.], dtype=dtype)
            r = numpy.asarray([0., 1., -1.], dtype=dtype)
            for x, y, err in [
                (self.shared(l.astype(dtype)), self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
                (tensor.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
                (self.shared(l.astype(dtype)), tensor.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], x < y)
                    v = fn()
                    self.assertTrue(numpy.all(v == (l < r)), (v, (l < r)))
                except TypeError:
                    assert err
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_le(self):
        for dtype in self.dtypes:
            l = numpy.asarray([0., -1., 1.], dtype=dtype)
            r = numpy.asarray([0., 1., -1.], dtype=dtype)
            for x, y, err in [
                (self.shared(l.astype(dtype)),
                 self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
                (tensor.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
                (self.shared(l.astype(dtype)), tensor.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], x <= y)
                    v = fn()
                    self.assertTrue(numpy.all(v == (l <= r)), (v, (l <= r)))
                except TypeError:
                    assert err
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_eq(self):
        for dtype in self.dtypes:
            l = numpy.asarray([0., -1., 1.], dtype=dtype)
            r = numpy.asarray([0., 1., -1.], dtype=dtype)
            for x, y, err in [
                (self.shared(l.astype(dtype)),
                 self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
                (tensor.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
                (self.shared(l.astype(dtype)), tensor.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], eq(x, y))
                    v = fn()
                    self.assertTrue(numpy.all(v == (l == r)), (v, (l == r)))
                except TypeError:
                    assert err
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_neq(self):
        for dtype in self.dtypes:
            l = numpy.asarray([0., -1., 1.], dtype=dtype)
            r = numpy.asarray([0., 1., -1.], dtype=dtype)
            for x, y, err in [
                (self.shared(l.astype(dtype)),
                 self.shared(r.astype(dtype)), False),
                (l, self.shared(r.astype(dtype)), True),
                (tensor.constant(l), self.shared(r.astype(dtype)), False),
                (self.shared(l.astype(dtype)), r, False),
                (self.shared(l.astype(dtype)), tensor.constant(r), False),
            ]:
                try:
                    fn = self.inplace_func([], neq(x, y))
                    v = fn()
                    self.assertTrue(numpy.all(v == (l != r)), (v, (l != r)))
                except TypeError:
                    assert err
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test1(self):
        s = scal.constant(56)
        t = as_tensor_variable(s)
        self.assertTrue(t.owner.op is tensor_from_scalar)
        self.assertTrue(t.type.broadcastable == (), t.type.broadcastable)
        self.assertTrue(t.type.ndim == 0, t.type.ndim)
        self.assertTrue(t.type.dtype == s.type.dtype)

        v = eval_outputs([t])

        self.assertTrue(v == 56, v)
        self.assertTrue(isinstance(v, numpy.ndarray))
        self.assertTrue(v.shape == (), v.shape)

        g = grad(t, s)
        self.assertTrue(eval_outputs([g]) == 0.)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test2(self):
        s = scal.constant(56.)
        t = as_tensor_variable(s)
        self.assertTrue(t.owner.op is tensor_from_scalar)
        self.assertTrue(t.type.broadcastable == (), t.type.broadcastable)
        self.assertTrue(t.type.ndim == 0, t.type.ndim)
        self.assertTrue(t.type.dtype == s.type.dtype)

        v = eval_outputs([t])

        self.assertTrue(v == 56., v)
        self.assertTrue(isinstance(v, numpy.ndarray))
        self.assertTrue(v.shape == (), v.shape)

        g = grad(t, s)
        self.assertTrue(eval_outputs([g]) == 1.)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test0(self):
        tt = constant(56)  # scal.constant(56)
        ss = scalar_from_tensor(tt)
        self.assertTrue(ss.owner.op is scalar_from_tensor)
        self.assertTrue(ss.type.dtype == tt.type.dtype)

        v = eval_outputs([ss])

        self.assertTrue(v == 56, v)
        if config.cast_policy == 'custom':
            self.assertTrue(isinstance(v, numpy.int16))
        elif config.cast_policy in ('numpy', 'numpy+floatX'):
            self.assertTrue(isinstance(
                v, getattr(numpy, str(numpy.asarray(56).dtype))))
        else:
            raise NotImplementedError(config.cast_policy)
        self.assertTrue(v.shape == (), v.shape)
        tt = lscalar()
        ss = scalar_from_tensor(tt)
        g = ss.owner.op.grad([tt], [ss])
        fff = function([tt], ss)
        v = fff(numpy.asarray(5))
        self.assertTrue(v == 5, v)
        self.assertTrue(isinstance(v, numpy.int64))
        self.assertTrue(v.shape == (), v.shape)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def _test_autocast_numpy():
    """Called from `test_autocast`."""
    assert config.cast_policy == 'numpy'
    # Go through some typical scalar values.

    def ok(z):
        assert tensor.constant(z).dtype == numpy.asarray(z).dtype
    for x in ([2 ** i for i in xrange(63)] +
              [0, L(0), L(1), L(2 ** 63 - 1)] +
              [0., 1., 1.1, 1.5]):
        n_x = numpy.asarray(x)
        # Make sure the data type is the same as the one found by numpy.
        ok(x)
        ok(-x)
        ok(x - 1)
        ok(-x + 1)
        ok(n_x)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def infer_shape(self, node, i_shapes):
        r, shp = node.inputs[0:2]

        # if shp is a constant array of len 0, then it means 'automatic shape'
        unknown_shape = len(getattr(shp, 'data', [0, 1, 2])) == 0

        # if ndim_added == 0 and shape != () then shape
        if self.ndim_added == 0 and not unknown_shape:
            sample_shp = shp
        else:
            # if shape == () then it will depend on args
            # if ndim_added != 0 and shape != () then it will depend on args
            # Use the default infer_shape implementation.
            raise tensor.ShapeError()

        return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def make_node(self, x, index):
        assert isinstance(x.type, TypedListType)
        if not isinstance(index, Variable):
            if isinstance(index, slice):
                index = Constant(SliceType(), index)
                return Apply(self, [x, index], [x.type()])
            else:
                index = T.constant(index, ndim=0, dtype='int64')
                return Apply(self, [x, index], [x.ttype()])
        if isinstance(index.type, SliceType):
            return Apply(self, [x, index], [x.type()])
        elif isinstance(index, T.TensorVariable) and index.ndim == 0:
            assert index.dtype == 'int64'
            return Apply(self, [x, index], [x.ttype()])
        else:
            raise TypeError('Expected scalar or slice as index.')
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_constant(self):
        orig_compute_test_value = theano.config.compute_test_value
        try:
            theano.config.compute_test_value = 'raise'

            x = T.constant(numpy.random.rand(2, 3), dtype=config.floatX)
            y = theano.shared(numpy.random.rand(3, 6).astype(config.floatX),
                              'y')

            # should work
            z = T.dot(x, y)
            assert hasattr(z.tag, 'test_value')
            f = theano.function([], z)
            assert _allclose(f(), z.tag.test_value)

            # this test should fail
            x = T.constant(numpy.random.rand(2, 4), dtype=config.floatX)
            self.assertRaises(ValueError, T.dot, x, y)
        finally:
            theano.config.compute_test_value = orig_compute_test_value
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_gpualloc():
    '''
    This tests tries to catch the scenario when, due to infer_shape,
    the input of the alloc changes from tensor scalar to a constant
    1. In this case the original constracted broadcastable pattern will
    have a False for that dimension, but the new broadcastable pattern
    that will be inserted by gpualloc will have  a True since it knows the
    dimension is 1 and therefore broadcastable.
    '''

    x = theano.shared(numpy.ones(3, dtype='float32'), 'x')
    m = (x).dimshuffle(['x', 0])
    v = tensor.alloc(1., *m.shape)
    f = theano.function([],
                        v + x,
                        mode=mode_with_gpu.excluding(
                            "local_elemwise_alloc"))
    l = f.maker.fgraph.toposort()
    assert numpy.any([isinstance(y.op, cuda.GpuAlloc) for y in l])
项目:a3c    作者:hercky    | 项目源码 | 文件源码
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
    """
    """
    updates = OrderedDict()

    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                             broadcastable=param.broadcastable)
        accu_new = rho * accu + (one - rho) * grad ** 2
        updates[accu] = accu_new
        try: 
            updates[param] = lasagne.updates.norm_constraint( param - (learning_rate * grad /
                                  T.sqrt(accu_new + epsilon)) , MAX_NORM )
        except:
            updates[param] = param - (learning_rate * grad /
                                 T.sqrt(accu_new + epsilon))

    return updates
项目:a3c    作者:hercky    | 项目源码 | 文件源码
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
    updates = OrderedDict()
    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)
    c = 0
    for param, grad in zip(params, grads):
        print c 
        value = param.get_value(borrow=True)
        accu = theano.shared(numpy.zeros(value.shape, dtype=value.dtype),broadcastable=param.broadcastable)
        accu_new = rho * accu + (one - rho) * grad ** 2
        updates[accu] = accu_new
        mid_up = param - (learning_rate * grad / (T.sqrt(accu_new + epsilon)))
        try:
            updates[param] = lasagne.updates.norm_constraint( mid_up , 40 , 0)
        except:
            updates[param] = mid_up
        c+=1
    return updates
项目:a3c    作者:hercky    | 项目源码 | 文件源码
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
    """
    """
    updates = OrderedDict()

    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                             broadcastable=param.broadcastable)
        accu_new = rho * accu + (one - rho) * grad ** 2
        updates[accu] = accu_new
        try: 
            updates[param] = lasagne.updates.norm_constraint( param - (learning_rate * grad /
                                  T.sqrt(accu_new + epsilon)) , MAX_NORM )
        except:
            updates[param] = param - (learning_rate * grad /
                                 T.sqrt(accu_new + epsilon))

    return updates
项目:a3c    作者:hercky    | 项目源码 | 文件源码
def rmsprop_updates(grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
    """
    """
    updates = OrderedDict()

    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                             broadcastable=param.broadcastable)
        accu_new = rho * accu + (one - rho) * grad ** 2
        updates[accu] = accu_new
        try: 
            updates[param] = lasagne.updates.norm_constraint( param - (learning_rate * grad /
                                  T.sqrt(accu_new + epsilon)) , MAX_NORM )
        except:
            updates[param] = param - (learning_rate * grad /
                                 T.sqrt(accu_new + epsilon))

    return updates
项目:deep-hashtagprediction    作者:jderiu    | 项目源码 | 文件源码
def __init__(self,rng, W=None,m=1.0, n_samples=50,shape=None,batch_size=1000):
        if W is None:
            W = numpy.asarray(rng.uniform(
                low=-numpy.sqrt(6. / (shape[0] + shape[1])),
                high=numpy.sqrt(6. / (shape[0] + shape[1])),
                size=(shape[0], shape[1])), dtype=theano.config.floatX)

        self.W = theano.shared(value=W, name='Hashtag_emb', borrow=True)
        self.batch_size = batch_size
        self.n_ht = W.shape[0]
        self.m = m
        self.n_samples = n_samples
        self.csrng = CURAND_RandomStreams(123)
        mask = self.csrng.uniform(size=(self.n_samples,1),low=0.0,high=1.0,dtype=theano.config.floatX)
        self.rfun = theano.function([],mask.argsort(axis=0))

        self.alpha = T.constant(1.0/numpy.arange(start=1,stop=self.n_ht + 1,step=1))

        self.weights = [self.W]
        self.biases = []
项目:IQA_BIECON_release    作者:jongyookim    | 项目源码 | 文件源码
def get_updates_rmsprop(self, cost, params, rho=0.9, eps=1e-8):
        lr = self.lr
        print(' - RMSprop: lr = %.2e' % (lr.get_value(borrow=True)))
        one = T.constant(1.)

        grads = T.grad(cost=cost, wrt=params)

        updates = []
        for p, g in zip(params, grads):
            value = p.get_value(borrow=True)
            accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                 broadcastable=p.broadcastable)
            accu_new = rho * accu + (one - rho) * g ** 2
            gradient_scaling = T.sqrt(accu_new + eps)
            g = g / gradient_scaling

            updates.append((accu, accu_new))
            updates.append((p, p - lr * g))

        return updates
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def careful_rmsprop(loss_or_grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6, grad_clipping=1.0e-2):
  """
  RMSProp with gradient clipping.
  :param grad_clipping: maximal norm of gradient, if norm of the actual gradient exceeds this values it is rescaled.
  :return: updates
  """
  grads = get_or_compute_grads(loss_or_grads, params)
  updates = OrderedDict()
  grads = total_norm_constraint(grads, max_norm=grad_clipping, epsilon=epsilon)

  # Using theano constant to prevent upcasting of float32
  one = T.constant(1)

  for param, grad in zip(params, grads):
    value = param.get_value(borrow=True)
    accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                         broadcastable=param.broadcastable)
    accu_new = rho * accu + (one - rho) * grad ** 2
    updates[accu] = accu_new
    updates[param] = param - (learning_rate * grad /
                              T.sqrt(accu_new + epsilon))

  return updates
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def adadelta(loss, params, learning_rate=1.0, rho=0.95, epsilon=1e-6):
    grad_shared_flat, flat_grad, unflat_grads = flat_unflat_grads(loss, params)
    grad_updates = [(grad_shared_flat, flat_grad)]
    one = T.constant(1)
    param_updates = list()
    for p, g in zip(params, unflat_grads):
        value = p.get_value(borrow=True)
        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                             broadcastable=p.broadcastable)
        delta_accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=p.broadcastable)
        accu_new = rho * accu + (one - rho) * g ** 2
        update = g * T.sqrt(delta_accu + epsilon) / T.sqrt(accu_new + epsilon)
        delta_accu_new = rho * delta_accu + (one - rho) * update ** 2
        param_updates += [(accu, accu_new)]
        param_updates += [(p, p - learning_rate * update)]
        param_updates += [(delta_accu, delta_accu_new)]
    return grad_updates, param_updates, grad_shared_flat
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def adam(loss, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
    grad_shared_flat, flat_grad, unflat_grads = flat_unflat_grads(loss, params)
    grad_updates = [(grad_shared_flat, flat_grad)]
    t_prev = theano.shared(np.array(0, dtype=theano.config.floatX))
    one = T.constant(1)
    t = t_prev + one
    a_t = learning_rate * T.sqrt(one - beta2 ** t) / (one - beta1 ** t)
    param_updates = list()
    for p, g in zip(params, unflat_grads):
        value = p.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=p.broadcastable)
        v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=p.broadcastable)
        m_t = beta1 * m_prev + (one - beta1) * g
        v_t = beta2 * v_prev + (one - beta2) * g ** 2
        step = a_t * m_t / (T.sqrt(v_t) + epsilon)
        param_updates += [(m_prev, m_t), (v_prev, v_t), (p, p - step)]
        param_updates += [(t_prev, t)]
    return grad_updates, param_updates, grad_shared_flat
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def adamax(loss, params, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8):
    grad_shared_flat, flat_grad, unflat_grads = flat_unflat_grads(loss, params)
    grad_updates = [(grad_shared_flat, flat_grad)]
    t_prev = theano.shared(np.array(0, dtype=theano.config.floatX))
    one = T.constant(1)
    t = t_prev + one
    a_t = learning_rate / (one - beta1 ** t)
    param_updates = list()
    for p, g in zip(params, unflat_grads):
        value = p.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=p.broadcastable)
        u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=p.broadcastable)
        m_t = beta1 * m_prev + (one - beta1) * g
        u_t = T.maximum(beta2 * u_prev, abs(g))
        step = a_t * m_t / (u_t + epsilon)
        param_updates += [(m_prev, m_t), (u_prev, u_t), (p, p - step)]
        param_updates += [(t_prev, t)]
    return grad_updates, param_updates, grad_shared_flat
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def build_model(model_):
    global fn_predict, fn_record
    global g_ozer, g_mdl

    g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]()
    g_ozer.lr = LEARN_RATE

    s_x = T.tensor4('x')
    s_y = T.ivector('y')
    s_pdpo = T.scalar()
    s_out = model_(s_x, s_pdpo)

    s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map))
    s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3))
    s_accr = T.mean( T.switch(
            T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0))

    no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))]
    fn_predict = th.function(
        [s_x, s_y],
        {'pred':s_out, 'accr':s_accr, 'loss':s_loss},
        givens=no_dropout, profile=PROFILE)
    rec_fetches = {
        'x': s_x, 'y': s_y,
        'pred': s_out}
    rec_fetches.update(g_mdl.params_di)
    fn_record = th.function(
        [s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE)
    g_ozer.compile(
        [s_x, s_y],
        s_loss,
        g_mdl.params_di.values(),
        fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr},
        givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))],
        profile_=PROFILE)
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def get_updates(self, learning_rate, params, grads, lr_scalers):
        """Compute the parameters' updates.

        """
        t_prev = theano.shared(floatX(0.))
        updates = OrderedDict()

        # Using theano constant to prevent upcasting of float32
        one = T.constant(1)

        t = t_prev + 1
        a_t = learning_rate*T.sqrt(one-self.beta2**t)/(one-self.beta1**t)

        for param, g_t in zip(params, grads):
            value = param.get_value(borrow=True)
            m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)
            v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)

            m_t = self.beta1*m_prev + (one-self.beta1)*g_t
            v_t = self.beta2*v_prev + (one-self.beta2)*g_t**2
            step = a_t*m_t/(T.sqrt(v_t) + self.epsilon)

            updates[m_prev] = m_t
            updates[v_prev] = v_t
            new_param = param - step
            if self.max_colm_norm and param.name in ["W", "w"]:
                new_param_final = norm_constraint(tensor_var=new_param,
                                                  max_norm=self.max_norm)
            else:
                new_param_final = new_param
            updates[param] = new_param_final

        updates[t_prev] = t

        return updates
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def get_updates(self, learning_rate, params, grads, lr_scalers):
        """Compute the parameters' updates.

        """
        t_prev = theano.shared(floatX(0.))
        updates = OrderedDict()

        # Using theano constant to prevent upcasting of float32
        one = T.constant(1)

        t = t_prev + 1
        a_t = learning_rate/(one-self.beta1**t)

        for param, g_t in zip(params, grads):
            value = param.get_value(borrow=True)
            m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)
            u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)

            m_t = self.beta1*m_prev + (one-self.beta1)*g_t
            u_t = T.maximum(self.beta2*u_prev, abs(g_t))
            step = a_t*m_t/(u_t + self.epsilon)

            updates[m_prev] = m_t
            updates[u_prev] = u_t
            new_param = param - step
            if self.max_colm_norm and param.name in ["W", "w"]:
                new_param_final = norm_constraint(tensor_var=new_param,
                                                  max_norm=self.max_norm)
            else:
                new_param_final = new_param
            updates[param] = new_param_final

        updates[t_prev] = t

        return updates
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def dropout_from_layer(rng, layer_output, p):
    """
    p: float. The probablity of dropping a unit.
    """
    srng = theano.tensor.shared_randomstreams.RandomStreams(
        rng.randint(99999))
    one = T.constant(1)
    retain_prob = one - p
    mask = srng.binomial(n=1, p=retain_prob, size=layer_output.shape,
                         dtype=layer_output.dtype)
    output = layer_output * mask

    return output
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def __init__(self, rng, input, dropout_rate, rescale):
        """
        rescale: Boolean. Can be only used when applying dropout.
        """
        if rescale:
            one = T.constant(1)
            retain_prob = one - dropout_rate
            input /= retain_prob

        super(DropoutIdentityHiddenLayer, self).__init__(rng=rng, input=input)
        if dropout_rate > 0.:
            self.output = dropout_from_layer(rng, self.output, p=dropout_rate)
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def __init__(self, rng, input, n_in, n_out, dropout_rate, rescale,
                 W=None, b=None, b_v=0., activation=None):
        """
        rescale: Boolean. Can be only used when applying dropout.
        """
        if rescale:
            one = T.constant(1)
            retain_prob = one - dropout_rate
            input /= retain_prob

        super(DropoutHiddenLayer, self).__init__(
            input=input, n_in=n_in, n_out=n_out, W=W, b=b,
            activation=activation, rng=rng)
        if dropout_rate > 0.:
            self.output = dropout_from_layer(rng, self.output, p=dropout_rate)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_infer(self, *params):
        model = self.model

        params       = list(params)
        rs           = params[:model.n_layers]
        qs           = params[model.n_layers:2*model.n_layers]
        y            = params[2*model.n_layers]
        params       = params[1+2*model.n_layers:]
        prior_params = model.get_prior_params(*params)

        hs     = []
        new_qs = []

        for l, (q, r) in enumerate(zip(qs, rs)):
            h = (r <= q[None, :, :]).astype(floatX)
            hs.append(h)

        ys   = [y[None, :, :]] + hs[:-1]
        p_ys = [model.p_y_given_h(h, l, *params) for l, h in enumerate(hs)]

        log_ph = -model.prior.step_neg_log_prob(hs[-1], *prior_params)
        log_py_h = T.constant(0.).astype(floatX)
        log_qh = T.constant(0.).astype(floatX)
        for l in xrange(model.n_layers):
            log_py_h += -model.conditionals[l].neg_log_prob(ys[l], p_ys[l])
            log_qh += -model.posteriors[l].neg_log_prob(hs[l], qs[l][None, :, :])

        log_p   = log_py_h + log_ph - log_qh
        w_tilde = get_w_tilde(log_p)
        cost = -log_p.mean()

        for q, h in zip(qs, hs):
            q_ = (w_tilde[:, :, None] * h).sum(axis=0)
            new_qs.append(self.inference_rate * q_ + (1 - self.inference_rate) * q)

        return tuple(new_qs) + (cost,)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def params_infer(self):
        return [T.constant(self.momentum).astype(floatX)]
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def entropy(self):
        return T.constant(0.).astype(floatX)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def get_L2_weight_cost(self, gamma, layers=None):
        if layers is None:
            layers = range(self.n_layers)

        cost = T.constant(0.).astype(floatX)
        for l in layers:
            W = self.__dict__['W%d' % l]
            cost += gamma * (W ** 2).sum()

        return cost
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __init__(self, ntimes = False, n = TT.constant(0)):
        """
        :type ntimes: bool
        :param ntimes: If the last state needs to be repeated `n` times

        :type n: int, theano constant, None
        :param n: how many times the last state is repeated
        """
        self.ntimes = ntimes
        self.n = n
        super(LastState, self).__init__(0, 0, None)
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def const(value):
    return TT.constant(numpy.asarray(value, dtype=theano.config.floatX))
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __init__(self, ntimes = False, n = TT.constant(0)):
        """
        :type ntimes: bool
        :param ntimes: If the last state needs to be repeated `n` times

        :type n: int, theano constant, None
        :param n: how many times the last state is repeated
        """
        self.ntimes = ntimes
        self.n = n
        super(LastState, self).__init__(0, 0, None)
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def const(value):
    return TT.constant(numpy.asarray(value, dtype=theano.config.floatX))