Python theano.tensor 模块,shape() 实例源码


def load_params(self, f_, filter_=None):
        di = pickle.load(f_)
        if filter_ is None:
            for k,v in di.items():
                p = self._vars_di[k].get_value(borrow=True)
                if p.shape != v.shape:
                    raise ValueError('Shape mismatch, need %s, got %s'%(v.shape, p.shape), p.shape)
            pat = re.compile(filter_)
            for k,v in di.items():
                if not pat.fullmatch(k): continue
                p = self._vars_di[k].get_value(borrow=True)
                if p.shape != v.shape:
                    raise ValueError('Shape mismatch, need %s, got %s'%(v.shape, p.shape), p.shape)
def get_output_for(self, input, **kwargs):
        a, b, c = self.scale_factor
        upscaled = input
        if self.mode == 'repeat':
            if c > 1:
                upscaled = T.extra_ops.repeat(upscaled, c, 4)
            if b > 1:
                upscaled = T.extra_ops.repeat(upscaled, b, 3)
            if a > 1:
                upscaled = T.extra_ops.repeat(upscaled, a, 2)
        elif self.mode == 'dilate':
            if c > 1 or b > 1 or a > 1:
                output_shape = self.get_output_shape_for(input.shape)
                upscaled = T.zeros(shape=output_shape, dtype=input.dtype)
                upscaled = T.set_subtensor(
                    upscaled[:, :, ::a, ::b, ::c], input)
        return upscaled
def op_ortho_loss(s_x_, axes_=(-2, -1), ndim_=None):
    orthogoal matrix loss
    used to regularize parameter to unitary

        s_x_: (batch of) matrices
        axes_: tuple of two integers, specify which axes to be for matrix,
            defaults to last two axes
        ndim_: specify args to be (ndim_ x ndim_) matrices

    if ndim_ is None:
        ax = axes_[0]
        ndim = T.shape(s_x_)[ax]
        ndim = ndim_

    tpat = list(range(ndim))
    bpat = ['x'] * s_x_.ndim
    tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]]
    bpat[axes_[0]] = 0
    bpat[axes_[1]] = 1
    s_y =*tpat), s_x_)
    return T.sqr(s_y - T.eye(ndim).dimshuffle(*bpat))
def op_covmat(s_x_, l1_normize_=True, eps_=1e-7):
    Return covariance matrix given a batch of data points

            batch of row vectors
            Defatuls to True.
            Make covariance matrix is L1 normalized wrt number of data points.
            Adds a small identity matrix I*eps_ to result, this is applied after
            L1 - normalization

    assert s_x_.ndim == 2
    s_mean = s_x_ - T.mean(s_x_, axis=0, keepdims=True)
    s_shp = T.shape(s_x_)
    s_covmat =, s_mean)
    if l1_normize_:
        s_covmat /= s_shp[0]
    return s_covmat + T.eye(s_shp[1]) * eps_
def unflatten_into_tensors(flatparams_P, output_shapes, name=None):
    Unflattens a vector produced by flatcat into a list of tensors of the specified shapes.
    outputs = []
    curr_pos = 0
    for shape in output_shapes:
        size =
        flatval = flatparams_P[curr_pos:curr_pos+size]
        curr_pos += size
    # assert curr_pos == flatparams_P.get_shape().num_elements()
    return outputs

def adam(cost, params, lr, beta1=0.9, beta2=0.999, eps=1e-8):
    updates = []
    grads = tensor.grad(cost, params); assert len(params) == len(grads)
    t0 = theano.shared(np.array(0., dtype=theano.config.floatX))
    t = t0 + 1
    corr1 = (1 - beta1**t)
    corr2 = (1 - beta2**t)
    alpha = lr * tensor.sqrt(corr2) / corr1
    for p, g in zip(params, grads):
        m = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable)
        v = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable)
        m_t = beta1 * m + (1 - beta1) * g
        v_t = beta2 * v + (1 - beta2) * tensor.square(g)
        p_t = p - alpha * m_t/(tensor.sqrt(v_t) + eps)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((t0, t))
    return updates
def __init__(self, input_var=None, num_styles=None, shape=(None, 3, 256, 256), net_type=1, **kwargs):
        net_type: 0 (fast neural style- fns) or 1 (conditional instance norm- cin)
        assert net_type in [0, 1]
        self.net_type = net_type = {}

        if len(shape) == 2:
            shape=(None, 3, shape[0], shape[1])
        elif len(shape) == 3:
            shape=(None, shape[0], shape[1], shape[2])
        self.shape = shape

        self.num_styles = num_styles['loss_net'] = {}
        self.load_loss_net_weights()['transform_net'] = {}
def setup_transform_net(self, input_var=None):
        transform_net = InputLayer(shape=self.shape, input_var=input_var)
        transform_net = style_conv_block(transform_net, self.num_styles, 32, 9, 1)
        transform_net = style_conv_block(transform_net, self.num_styles, 64, 3, 2)
        transform_net = style_conv_block(transform_net, self.num_styles, 128, 3, 2)
        for _ in range(5):
            transform_net = residual_block(transform_net, self.num_styles)
        transform_net = nn_upsample(transform_net, self.num_styles)
        transform_net = nn_upsample(transform_net, self.num_styles)

        if self.net_type == 0:
            transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, tanh)
            transform_net = ExpressionLayer(transform_net, lambda X: 150.*X, output_shape=None)
        elif self.net_type == 1:
            transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, sigmoid)['transform_net'] = transform_net
def sp_ones_like(x):
    Construct a sparse matrix of ones with the same sparsity pattern.

        Sparse matrix to take the sparsity pattern.

    A sparse matrix
        The same as `x` with data changed for ones.

    # TODO: don't restrict to CSM formats
    data, indices, indptr, shape = csm_properties(x)
    return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
def sp_zeros_like(x):
    Construct a sparse matrix of zeros.

        Sparse matrix to take the shape.

    A sparse matrix
        The same as `x` with zero entries for all element.


    # TODO: don't restrict to CSM formats
    _, _, indptr, shape = csm_properties(x)
    return CSM(format=x.format)(data=numpy.array([], dtype=x.type.dtype),
                                indices=numpy.array([], dtype='int32'),
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def perform(self, node, inputs, outputs):
        # for efficiency, if remap does nothing, then do not apply it
        (data, indices, indptr, shape) = inputs
        (out,) = outputs

        if len(shape) != 2:
            raise ValueError('Shape should be an array of length 2')
        if data.shape != indices.shape:
            errmsg = ('Data (shape ' + repr(data.shape) +
                      ' must have the same number of elements ' +
                      'as indices (shape' + repr(indices.shape) +
            raise ValueError(errmsg)
        if self.format == 'csc':
            out[0] = scipy.sparse.csc_matrix((data, indices.copy(),
                                             numpy.asarray(shape), copy=False)
            assert self.format == 'csr'
            out[0] = scipy.sparse.csr_matrix((data, indices.copy(),
                                              indptr.copy()), shape.copy(),
def perform(self, node, inputs, outputs):
        (x_data, x_indices, x_indptr, x_shape,
         g_data, g_indices, g_indptr, g_shape) = inputs
        (g_out,) = outputs
        if len(x_indptr) - 1 == x_shape[0]:
            sp_dim = x_shape[1]
            sp_dim = x_shape[0]

        g_row = numpy.zeros(sp_dim, dtype=g_data.dtype)
        gout_data = numpy.zeros(x_data.shape, dtype=node.outputs[0].dtype)

        for i in range(len(x_indptr) - 1):
            for j_ptr in range(g_indptr[i], g_indptr[i + 1]):
                g_row[g_indices[j_ptr]] += g_data[j_ptr]

            for j_ptr in range(x_indptr[i], x_indptr[i + 1]):
                gout_data[j_ptr] = g_row[x_indices[j_ptr]]

            for j_ptr in range(g_indptr[i], g_indptr[i + 1]):
                g_row[g_indices[j_ptr]] = 0

        g_out[0] = gout_data
def perform(self, node, inputs, outputs):
        (x, s) = inputs
        (z,) = outputs
        M, N = x.shape
        assert x.format == 'csc'
        assert s.shape == (M,)

        indices = x.indices
        indptr = x.indptr

        y_data =

        for j in xrange(0, N):
            for i_idx in xrange(indptr[j], indptr[j + 1]):
                y_data[i_idx] *= s[indices[i_idx]]

        z[0] = scipy.sparse.csc_matrix((y_data, indices, indptr), (M, N))
def grad(self, inputs, gout):
        (gz,) = gout
        is_continuous = [(inputs[i].dtype in tensor.continuous_dtypes)
                         for i in range(len(inputs))]

        if _is_sparse_variable(gz):
            gz = dense_from_sparse(gz)

        split = tensor.Split(len(inputs))(gz, 0,
                                               for x in inputs]))
        if not isinstance(split, list):
            split = [split]

        derivative = [SparseFromDense(self.format)(s) for s in split]

        def choose(continuous, derivative):
            if continuous:
                return derivative
                return None
        return [choose(c, d) for c, d in zip(is_continuous, derivative)]
def structured_monoid(tensor_op):
    # Generic operation to perform many kinds of monoid element-wise
    # operations on the non-zeros of a sparse matrix.

    # The first parameter must always be a sparse matrix. The other parameters
    # must be scalars which will be passed as argument to the tensor_op.

    def decorator(f):
        def wrapper(*args):
            x = as_sparse_variable(args[0])
            assert x.format in ["csr", "csc"]

            xs = [scalar.as_scalar(arg) for arg in args[1:]]

            data, ind, ptr, shape = csm_properties(x)

            data = tensor_op(data, *xs)

            return CSM(x.format)(data, ind, ptr, shape)
        wrapper.__name__ = str(tensor_op.scalar_op)
        return wrapper
    return decorator
def perform(self, node, inputs, outputs):
        (a_indices, a_indptr, b, g_ab) = inputs
        (out,) = outputs
        g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
        for j in xrange(len(a_indptr) - 1):
            ind0 = a_indptr[j]
            ind1 = a_indptr[j + 1]
            for i_idx in xrange(ind0, ind1):
                i = a_indices[i_idx]
                # Depending on the type of g_ab and b (sparse or dense),
                # the following dot product can result in a scalar or
                # a (1, 1) sparse matrix.
                dot_val =[i], b[j].T)
                if isinstance(dot_val, scipy.sparse.spmatrix):
                    dot_val = dot_val[0, 0]
                g_a_data[i_idx] = dot_val
        out[0] = g_a_data
def perform(self, node, inputs, outputs):
        (a_indices, a_indptr, b, g_ab) = inputs
        (out,) = outputs
        g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
        for i in xrange(len(a_indptr) - 1):  # loop over rows
            ind0 = a_indptr[i]
            ind1 = a_indptr[i + 1]
            # loop over values in that row (columns)
            for j_idx in xrange(ind0, ind1):
                j = a_indices[j_idx]
                # grad is dot product of i-th row of gradient with j-th row of b
                # Depending on the type of g_ab and b (sparse or dense),
                # the following dot product can result in a scalar or
                # a (1, 1) sparse matrix.
                dot_val =[i], b[j].T)
                if isinstance(dot_val, scipy.sparse.spmatrix):
                    dot_val = dot_val[0, 0]
                g_a_data[j_idx] = dot_val
        out[0] = g_a_data
def test_only_nonseq_inputs(self):
        # Compile the Theano function
        n_steps = 2
        inp = tensor.matrix()
        broadcasted_inp, _ = theano.scan(lambda x: x,
        out = broadcasted_inp.sum()
        gr = tensor.grad(out, inp)
        fun = theano.function([inp], [broadcasted_inp, gr])

        # Execute the Theano function and compare outputs to the expected outputs
        inputs = numpy.array([[1, 2], [3, 4]], dtype=theano.config.floatX)
        expected_out1 = numpy.repeat(inputs[None], n_steps, axis=0)
        expected_out2 = numpy.ones(inputs.shape, dtype="int8") * n_steps

        out1, out2 = fun(inputs)
        utt.assert_allclose(out1, expected_out1)
        utt.assert_allclose(out2, expected_out2)

    # simple rnn, one input, one state, weights for each; input/state
    # are vectors, weights are scalars
def test_draw_as_input_to_scan(self):
        trng = theano.tensor.shared_randomstreams.RandomStreams(123)

        x = theano.tensor.matrix('x')
        y = trng.binomial(size=x.shape, p=x)
        z, updates = theano.scan(lambda a: a, non_sequences=y, n_steps=2)

        f = theano.function([x],
                            [y, z],

        rng = numpy.random.RandomState(utt.fetch_seed())
        nx = rng.uniform(size=(10, 10))
        ny1, nz1 = f(nx)
        ny2, nz2 = f(nx)

        utt.assert_allclose([ny1, ny1], nz1)
        utt.assert_allclose([ny2, ny2], nz2)
        assert not numpy.allclose(ny1, ny2)
def test_infershape_seq_shorter_nsteps(self):
        raise SkipTest("This is a generic problem with "
                       "infershape that has to be discussed "
                       "and figured out")
        x = tensor.vector('x')
        [o1, o2], _ = theano.scan(lambda x, y: (x + 1, y + x),
                         outputs_info=[None, x[0]],

        f = theano.function([x],
                            [o1.shape[0], o2.shape[0]],

        vx = numpy.ones((10,), dtype=theano.config.floatX)
        out1, out2 = f(vx)
        assert out1 == 10
        assert out2 == 10
        lssc = [x for x in f.maker.fgraph.toposort()
                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
        assert len(lssc) == 0
def test_infershape_nsteps_smaller_seq_length(self):
        x = tensor.vector('x')
        [o1, o2], _ = theano.scan(lambda x, y: (x + 1, y + x),
                         outputs_info=[None, x[0]],

        f = theano.function([x],
                            [o1.shape[0], o2.shape[0]],

        vx = numpy.ones((30,), dtype=theano.config.floatX)
        o1, o2 = f(vx)
        assert o1 == 20
        assert o2 == 20
        lssc = [x for x in f.maker.fgraph.toposort()
                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
        assert len(lssc) == 0
def model_baseline(s_x_, s_pdpo_):
    '''very simple logistic regression model'''
    global g_mdl, g_dataset
    s_bsize = T.shape(s_x_)[0]
    idim, odim = reduce(int.__mul__, g_dataset.imsize), len(g_dataset.label_map)
    return T.nnet.softmax(
            'm', T.reshape(s_x_, (s_bsize,idim)),
            idim, odim), s_pdpo_))
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def lyr_sconv_gen(
    name_, s_x_,
    idim_, odim_,
    quick & dirty implementation of fxnn convolution layer
    global g_mdl
    dilation = kwargs_.get('dilation_')
    if dilation is None:
        dilation = 1
    init_scale = kwargs_.get('init_scale_')
    bias = kwargs_.get('bias_')
    op_conv = partial(
        filter_dilation = (dilation, dilation))
    ir = 0.5/sqrt(idim_*5+odim_)
    s_dims = T.shape(s_x_)
    s_x = T.reshape(s_x_, (s_dims[0]*idim_, 1, s_dims[2], s_dims[3]))
    s_x1 = T.reshape(op_conv(
        s_x, g_sconv_ker,
        filter_shape=(2, 1, 1, 3), **kwargs_),
        (s_dims[0]*idim_*2, 1, s_dims[2], s_dims[3]))
    s_x2 = T.reshape(op_conv(
        s_x1, g_sconv_ker.transpose(0,1,3,2),
        filter_shape=(2, 1, 3, 1),
    ), (s_dims[0], idim_*4, s_dims[2], s_dims[3]))
    s_y = T.join(1, s_x2, s_x_)
    return g_mdl.lyr_conv(
        name_, s_y, idim_*5, odim_, fsize_=1, init_scale_=ir, **kwargs_);
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def op_noise_uniform(self, s_x_, s_scale_):
        return s_x_ + self.rng.uniform(low=-s_scale_, high=s_scale_, size=T.shape(s_x_), dtype=th.config.floatX)
def get_output_shape_for(self, input_shapes):
        input_shapes = autocrop_array_shapes(input_shapes, self.cropping)
        # Infer the output shape by grabbing, for each axis, the first
        # input size that is not `None` (if there is any)
        output_shape = input_shapes[1]

        return output_shape
def get_output_for(self, upscaled, **kwargs):
        a, b = self.scale_factor
        # get output for pooling and pre-pooling layer
        inp, out =\
        # upscale the input feature map by scale_factor
        if b > 1:
            upscaled = T.extra_ops.repeat(upscaled, b, 3)
        if a > 1:
            upscaled = T.extra_ops.repeat(upscaled, a, 2)
        # get the shapes for pre-pooling layer and upscaled layer
        sh_pool2d_in = T.shape(inp)
        sh_upscaled = T.shape(upscaled)
        # in case the shape is different left-bottom-pad with zero
        tmp = T.zeros(sh_pool2d_in)

        indx = (slice(None),
                slice(0, sh_upscaled[2]),
                slice(0, sh_upscaled[3]))
        upscaled = T.set_subtensor(tmp[indx], upscaled)
        # get max pool indices
        indices_pool = T.grad(None, wrt=inp,
                known_grads={out: T.ones_like(out)})
        # mask values using indices_pool
        f = indices_pool * upscaled
        return f
def get_output_for(self, input, deterministic=False, **kwargs):
        input : tensor
            output from the previous layer
        deterministic : bool
            If true noise is disabled, see notes
        if deterministic or self.sigma == 0:
            return input
            return softmax4D(input + self._srng.normal(input.shape,
def get_output_for(self, input, deterministic=False, **kwargs):
        input : tensor
            output from the previous layer
        deterministic : bool
            If true noise is disabled, see notes
        if deterministic or self.sigma == 0:
            return input
            return T.clip(input + self._srng.normal(input.shape,
                                                       std=self.sigma), 0.0, 1.0)
def get_output_for(self, input, **kwargs):
        all_dims = range(len(T.shape(input)))
        print all_dims
        return T.Unbroadcast(input, *all_dims)
def op_matmul(s_x_, s_y_, axes_=(-2, -1)):
    limited implementation of np.matmul, does not support broadcasting

        s_x_: (batch of) matrix(matrices)
        s_y_: (batch of) matrix(matrices)
        axes_: tuple of int, the axes for the matrix
    assert s_x_.ndim == s_y_.ndim
    ndim = s_x_.ndim
    assert -ndim <= axes_[0] < ndim
    assert -ndim <= axes_[1] < ndim
    assert ndim >= 2
    axes = axes_[0]%ndim, axes_[1]%ndim
    if ndim == 2:
        if axes == (0,1):
            return, s_y_)
            return, s_x_)
    s_shp = T.shape(s_x_)
    s_size = reduce(T.mul, [s_shp[i] for i in range(s_x_.ndim) if i not in axes])
    s_szu = s_shp[axes[0]]
    s_szv = s_shp[axes[1]]
    s_szw = T.shape(s_y_)[axes[1]]
    transpp = list(range(ndim))
    transpp[axes[0]], transpp[ndim-2] = transpp[ndim-2], transpp[axes[0]]
    transpp[axes[1]], transpp[ndim-1] = transpp[ndim-1], transpp[axes[1]]
    s_shp2 = [s_shp[a] for a in transpp]
    s_shp2[axes[1]] = s_szw
    s_x = s_x_.transpose(*transpp).reshape((s_size, s_szu, s_szv))
    s_y = s_y_.transpose(*transpp).reshape((s_size, s_szv, s_szw))
    return T.batched_dot(s_x, s_y).reshape(s_shp2).transpose(transpp)
def op_unitary_loss(s_re_, s_im_, axes_=None, size_=None):
    unitary matrix loss of real/imag part,
    used to regularize parameter to unitary

        s_re_: real part, square matrix
        s_im_: imag part, square matrix
        size_: specify args to be (size_ x size_) matrices
        axes_: tuple of two integers, specify which axes to be for matrix,
            defaults to last two axes
    if axes_ is None:
        axes_ = (-2, -1)

    if size_ is None:
        ax = axes_[0]
        size = T.shape(s_re_)[ax]
        size = size_

    assert s_re_.ndim == s_im_.ndim

    tpat = list(range(s_re_.ndim))
    bpat = ['x'] * s_re_.ndim
    tpat[axes_[0]], tpat[axes_[1]] = tpat[axes_[1]], tpat[axes_[0]]
    bpat[axes_[0]] = 0
    bpat[axes_[1]] = 1
    s_y_re_ =*tpat), s_re_) +*tpat), s_im_)
    s_tmp =*tpat), s_im_)
    s_y_im_ = s_tmp - s_tmp.transpose(*tpat)
    return T.mean(T.sqr(s_y_re_ - T.eye(size).dimshuffle(*bpat)) + T.sqr(s_y_im_))
def load_params(self, f_, format_=Default, filter_=None):
        This loads parameters into current group

        An dict containing string->shared_variables will be loaded.

            f_: readable file or filename string
            format_: string, file format.
                Default is to interpret from file name
                supported formats: "pkl"
            filter_: string or None, regex pattern to filter

            This will update the current group with newly loaded key-value pairs,
            rather than clear-then-load.
        if isinstance(f_, str):
            f_ = open(f_, 'rb')
        di = pickle.load(f_)
        if filter_ is None:
            pat = re.compile(filter_)
            self._current_group_di.update({k:v for k,v in di.items() if pat.fullmatch(k)})
            for k,v in di.items():
                if not pat.fullmatch(k): continue
                p = self._current_group_di[k].get_value(borrow=True)
                if p.shape != v.shape:
                    raise ValueError('Shape mismatch, need %s, got %s'%(v.shape, p.shape), p.shape)
def op_noise_uniform(self, s_x_, s_scale_):
        return s_x_ + self.rng.uniform(low=-s_scale_, high=s_scale_, size=T.shape(s_x_), dtype=th.config.floatX)
def forward(self, inputtensor):
        x = inputtensor[0]

        #             self.inputFeatureDim,
        #             int(self.dataSize[0]*self.subsample[0]),
        #             int(self.dataSize[1]*self.subsample[1]))
        #              self.inputFeatureDim,
        #              *self.filterSize)
        #print("{}, {}".format(input_shape, filter_shape))

        if self.isAfterFullConn:
            x = T.reshape(x, (T.shape(x)[0], self.outputFeature, 1, 1))

        # All input/output are refering to convolutional operator
        # So when using it, think in oppersite way.
        y = T.nnet.abstract_conv.conv2d_grad_wrt_inputs(x, self.w,
                                                                     #None, None is also ok for inputFeatureDim,
        return (y,)
def binarize_weights(W,H,srng=None,deterministic=False):
    Copied from BinaryNet by Matthieu Courbariaux,
    :param W:
    :param H:
    :param srng:
    :param deterministic:
    :return: quantized weights
    if srng is None:
        rng = np.random.RandomState(666)
        srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999))

    # [-1,1] -> [0,1]
    Wb=T.clip(((W / H)+1.)/2.,0,1)

    # Deterministic BinaryConnect (round to nearest)
    if deterministic:
        # print("det")
        Wb = T.cast(GradPreserveRoundTensor(Wb), theano.config.floatX)

    # Stochastic BinaryConnect
        # print("stoch")
        Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX)

    Wb = T.cast(T.switch(Wb, H, -H), theano.config.floatX)

    return Wb
def ternarize_weights(W,W0,deterministic=False,srng=None):
    Changed copy of the code from TernaryConnect by Zhouhan Lin, Matthieu Courbariaux,
    :param W: Weights
    :param W0: W0=0.5
    :param deterministic: deterministic rounding
    :param srng: random number generator
    :return: quantized weights
    #print 'Current W0: ',W0
    if srng is None:
        rng = np.random.RandomState(666)
        srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999))
    if deterministic:
        #print 'Deterministic Ternarization!'

        larger_than_neg_0_5 =, -W0/2.)
        larger_than_pos_0_5 =, W0/2.)
        W_val = larger_than_neg_0_5 * 1 + larger_than_pos_0_5 * 1 - 1
        Wb = W_val * W0

        #print 'Stochastic Ternarization!'
        w_sign =, 0) * 2 - 1
        p = T.clip(T.abs_(W / (W0)), 0, 1)
        Wb = W0 * w_sign * T.cast(srng.binomial(n=1, p=p, size=T.shape(W)), theano.config.floatX)

项目:QuantizedRNN    作者:ottj    | 项目源码 | 文件源码
def quantize_weights(W,srng=None,bitlimit=None,deterministic=False):
    Exponential quantization
    :param W: Weights
    :param srng: random number generator
    :param bitlimit: limit values to be in power of 2 range, e.g. for values in 2^-22 to 2^9 set it to [-22, 9]
    :param deterministic: deterministic rounding
    :return: quantized weights
    bitlimit=[-22, 9] #hardcoded for experiments
    if srng is None:
        rng = np.random.RandomState(666)
        srng = theano.sandbox.rng_mrg.MRG_RandomStreams(rng.randint(999999))

    if bitlimit:
        index_low = T.clip(
            T.switch(W > 0., T.floor(T.log2(W)), T.floor(T.log2(-W))),
            bitlimit[0], bitlimit[1])
        index_low = T.switch(
            W > 0., T.floor(T.log2(W)), T.floor(T.log2(-W)))
    sign = T.switch(W > 0., 1., -1.)
    p_up = sign * W / 2 ** (index_low) - 1  # percentage of upper index.
    if deterministic:
        index_deterministic = index_low + T.switch(p_up > 0.5, 1, 0)
        quantized_W = sign * 2 ** index_deterministic
        index_random = index_low + srng.binomial(
            n=1, p=p_up, size=T.shape(W), dtype=theano.config.floatX)
        quantized_W = sign * 2 ** index_random
    return quantized_W
def compute_estimator(self, log_p_all, log_q_all):
        n_samples = tt.shape(log_p_all)[1]

        # See equation 14, for definition of I see equation 2
        f_x_h = log_p_all - log_q_all  # f_x_h: (batch_size, n_samples)
        sum_p_over_q = logsumexp(f_x_h, axis=1)  # sum_p_over_q: (batch_size, )
        L = sum_p_over_q - tt.log(n_samples) # L: (batch_size, )

        # Equation 10
        sum_min_i = logsubexp(sum_p_over_q.dimshuffle(0, 'x'), f_x_h)
        sum_min_i_normalized = sum_min_i - np.log(n_samples - 1).astype(theano.config.floatX)

        L_h_given_h = L.dimshuffle(0, 'x') - sum_min_i_normalized  # equation (10)

        # Get gradient of log Q and scale
        part_1 = L_h_given_h * log_q_all  # equation 11, part 1

        weights = f_x_h - sum_p_over_q.dimshuffle(0, 'x')
        exp_weights = tt.exp(weights)

        part_2 = exp_weights * f_x_h

        estimator = (part_1 + part_2).sum() / self.batch_size

        gradients = tt.grad(estimator,
                            consider_constant=[exp_weights, L_h_given_h])

        likelihood = L.sum() / self.batch_size

        return likelihood, gradients
def train(self, learning_rate, epoch):
        batch_likelihood_list = np.array([])
        batch_order = self.get_batch_order(self.N_train)

        for i, batch in enumerate(batch_order):
            samples = self.sample_train(batch)
            batch_L = self.update_train(learning_rate, epoch, *samples)

            batch_likelihood_list = np.append(batch_likelihood_list, batch_L)

        assert(batch_likelihood_list.shape[0] == len(batch_order))

项目:VIMCO    作者:y0ast    | 项目源码 | 文件源码
def valid(self):
        batch_likelihood_list = np.array([])
        batch_order = self.get_batch_order(self.N_valid)

        for i, batch in enumerate(batch_order):
            samples = self.sample_valid(batch)
            batch_L = self.likelihood_valid(*samples)

            batch_likelihood_list = np.append(batch_likelihood_list, batch_L)

        assert(batch_likelihood_list.shape[0] == len(batch_order))

项目:VIMCO    作者:y0ast    | 项目源码 | 文件源码
def test(self):
        batch_likelihood_list = np.array([])
        batch_order = self.get_batch_order(self.N_test)

        for i, batch in enumerate(batch_order):
            samples = self.sample_test(batch)
            batch_L = self.likelihood_test(*samples)

            batch_likelihood_list = np.append(batch_likelihood_list, batch_L)

        assert(batch_likelihood_list.shape[0] == len(batch_order))

        return np.mean(batch_likelihood_list)
def compute_samples(self, srng, Z_below, layer):
        q_z_above_given_z_below = sigmoid(, self.params['W_enc_' + str(layer)]) + self.params['b_enc_' + str(layer)])

        U = srng.uniform(q_z_above_given_z_below.shape)        
        Z = tt.cast(q_z_above_given_z_below > U, dtype=theano.config.floatX)

        return Z
def binarization(W,H,binary=True,deterministic=False,stochastic=False,srng=None):

    # (deterministic == True) <-> test-time <-> inference-time
    if not binary or (deterministic and stochastic):
        # print("not binary")
        Wb = W


        # [-1,1] -> [0,1]
        Wb = hard_sigmoid(W/H)

        # Stochastic BinaryConnect
        if stochastic:

            # print("stoch")
            Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX)

        # Deterministic BinaryConnect (round to nearest)
            # print("det")
            Wb = T.round(Wb)

        # 0 or 1 -> -1 or 1
        Wb = T.cast(T.switch(Wb,H,-H), theano.config.floatX)

    return Wb

# This class extends the Lasagne DenseLayer to support BinaryConnect
def zeros(shape, dtype=np.float32):
    return np.zeros(shape, dtype)

项目:anirban-imitation    作者:Santara    | 项目源码 | 文件源码
def gaussian_kl(means1_N_D, stdevs1_N_D, means2_N_D, stdevs2_N_D):
    KL divergences between Gaussians with diagonal covariances
    Covariances matrices are specified with square roots of the diagonal (standard deviations)
    D = tensor.shape(means1_N_D)[1]
    return (
        .5 * (tensor.sqr(stdevs1_N_D/stdevs2_N_D).sum(axis=1) +
              tensor.sqr((means2_N_D-means1_N_D)/stdevs2_N_D).sum(axis=1) +
              2.*(tensor.log(stdevs2_N_D).sum(axis=1) - tensor.log(stdevs1_N_D).sum(axis=1)) - D
项目:anirban-imitation    作者:Santara    | 项目源码 | 文件源码
    '''Log density of a Gaussian distribution with diagonal covariance (specified as standard deviations).'''
    D = tensor.shape(means_N_D)[1]
    lognormconsts_B = -.5*(D*np.log(2.*np.pi) + 2.*tensor.log(stdevs_N_D).sum(axis=1)) # log normalization constants
    logprobs_B = -.5*tensor.sqr((x_N_D - means_N_D)/stdevs_N_D).sum(axis=1) + lognormconsts_B
    return logprobs_B
def function(inputs, outputs, **kwargs):
    # Cache compiled function
    f = theano.function(inputs, outputs, **kwargs)
    def wrapper(*args):
        # Execute
        out = f(*args)
        # Find output elements with shape == () and convert them to scalars
        is_list = isinstance(out, (list,tuple))
        out_as_list = list(out) if is_list else [out]
        for i in xrange(len(out_as_list)):
            if isinstance(out_as_list[i], np.ndarray) and out_as_list[i].shape == ():
                out_as_list[i] = np.asscalar(out_as_list[i])
        return out_as_list if is_list else out_as_list[0]
def setup_loss_net(self):
        Create a network of convolution layers based on the VGG16 architecture from the paper:
        "Very Deep Convolutional Networks for Large-Scale Image Recognition"

        Original source:
        License: see

        Based on code in the Lasagne Recipes repository:
        loss_net =['loss_net']
        loss_net['input'] = InputLayer(shape=self.shape)
        loss_net['conv1_1'] = ConvLayer(loss_net['input'], 64, 3, pad=1, flip_filters=False)
        loss_net['conv1_2'] = ConvLayer(loss_net['conv1_1'], 64, 3, pad=1, flip_filters=False)
        loss_net['pool1'] = PoolLayer(loss_net['conv1_2'], 2)
        loss_net['conv2_1'] = ConvLayer(loss_net['pool1'], 128, 3, pad=1, flip_filters=False)
        loss_net['conv2_2'] = ConvLayer(loss_net['conv2_1'], 128, 3, pad=1, flip_filters=False)
        loss_net['pool2'] = PoolLayer(loss_net['conv2_2'], 2)
        loss_net['conv3_1'] = ConvLayer(loss_net['pool2'], 256, 3, pad=1, flip_filters=False)
        loss_net['conv3_2'] = ConvLayer(loss_net['conv3_1'], 256, 3, pad=1, flip_filters=False)
        loss_net['conv3_3'] = ConvLayer(loss_net['conv3_2'], 256, 3, pad=1, flip_filters=False)
        loss_net['pool3'] = PoolLayer(loss_net['conv3_3'], 2)
        loss_net['conv4_1'] = ConvLayer(loss_net['pool3'], 512, 3, pad=1, flip_filters=False)
        loss_net['conv4_2'] = ConvLayer(loss_net['conv4_1'], 512, 3, pad=1, flip_filters=False)
        loss_net['conv4_3'] = ConvLayer(loss_net['conv4_2'], 512, 3, pad=1, flip_filters=False)
        loss_net['pool4'] = PoolLayer(loss_net['conv4_3'], 2)
        loss_net['conv5_1'] = ConvLayer(loss_net['pool4'], 512, 3, pad=1, flip_filters=False)
        loss_net['conv5_2'] = ConvLayer(loss_net['conv5_1'], 512, 3, pad=1, flip_filters=False)
        loss_net['conv5_3'] = ConvLayer(loss_net['conv5_2'], 512, 3, pad=1, flip_filters=False)
def batched_gram5d(self, fmap):
        # (layer, batch, featuremaps, height*width)

        # (layer*batch, featuremaps, height*width)
        fmap2=fmap.reshape((-1, fmap.shape[-2], fmap.shape[-1]))

        # The term can't be taken outside as a T.mean in style_loss(), since the width and height of the image might vary
        return T.batched_dot(fmap2, fmap2.dimshuffle(0,2,1)).reshape(fmap.shape)/[-2:])
def style_loss5d(self, out_layer, target_style_layer):
        # Each input is a 5D tensor: (style loss layer, batch, feature map, height, width)
        return T.mean(T.sum(T.sqr(self.batched_gram(out_layer) - T.tile(self.batched_gram(target_style_layer), (1, T.shape(out_layer)[0], 1, 1))), axis=(2,3)), axis=1)