def lyr_lstm(
        self, name_,
        s_x_, s_cell_, s_hid_,
        idim_, hdim_,
        s_inp = T.join(axis_, s_x_, s_hid_)
        if lyr_linear_ is None:
            lyr_linear_ = self.lyr_linear
        s_gates_lin, s_inp_lin = T.split(
            lyr_linear_(name_+'_rec', s_inp, idim_+hdim_, hdim_*4),
            [hdim_*3,hdim_], 2, axis=axis_)
        s_igate, s_fgate, s_ogate = T.split(op_gate_(s_gates_lin), [hdim_]*3, 3, axis=axis_)
        s_cell_tp1 = s_igate*op_act_(s_inp_lin) + s_fgate*s_cell_
        s_hid_tp1 = op_act_(s_cell_tp1)*s_ogate
        return s_cell_tp1, s_hid_tp1
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def lyr_gru_flat(
        self, name_,
        s_x_, s_state_,
        idim_, hdim_,
        GRU layer, flat version

        In order to use, you need to provide state variable

        if lyr_linear_ is None:
            lyr_linear_ = self.lyr_linear
        s_igate = lyr_linear_(name_+'_igate', idim_+hdim_, idim_, params_group_=params_group_)
        s_inp_gated = T.join(axis_, s_x_ * op_gate_(s_igate), s_state_)
        s_gate_lin, s_state_tp1_lin = T.split(lyr_linear_(name_+'_gate', s_inp_gated, idim_+hdim_, hdim_*2), [hdim_,hdim_], 2, axis_)
        s_gate = op_gate_(s_gate_lin)
        return s_state_*s_gate + op_act_(s_state_tp1_lin)*(1.-s_gate)
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def _k_max_pooling(input, kmax):
  pool = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2).dimshuffle(1,0)
  neighborsArgSorted = T.argsort(pool, axis=1)
  yy = T.sort(neighborsArgSorted[:, -kmax:], axis=1).flatten()
  xx = T.repeat(T.arange(neighborsArgSorted.shape[0]), kmax)
  pool_kmax = pool[xx, yy]
  pool_kmax_shape = T.join(0, T.as_tensor([input.shape[0], input.shape[1], input.shape[3], kmax]))
  pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2)
  return pooled_out
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpua_alloc2(node):
    Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.

    except ContextNotDefined:
        # If there is no default context then we do not perform the move here.
    if (isinstance(node.op, tensor.Alloc) and
        all(c != 'output' and
            c.op == tensor.join and
            all(i.owner and
                i.owner.op in [host_from_gpu, tensor.alloc]
                for i in c.inputs[1:])
            for c, idx in node.outputs[0].clients)):
        return [host_from_gpu(gpu_alloc(None)(*node.inputs))]
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_no_cycle(self):
        # Optimizing this graph resulted in a cycle, see gh-1549
        # This test depends on cuda
        import theano.sandbox.cuda as cuda
        if not cuda.cuda_available:
            raise SkipTest("cuda not available")
        if sys.version_info[:2] < (2, 5):
            raise SkipTest("Test skipped due to a too old python")

        # This pickle file has undergone manual surgery due to changes
        # in scan and may or may not run correctly.  It does passes
        # the test below.
        pkl_filename = os.path.join(os.path.dirname(theano.__file__),
                                    'tensor', 'tests', 'shape_opt_cycle.pkl')
        # Due to incompatibilities between python 2 and 3 in the format
        # of pickled numpy ndarray, we have to force an encoding
        from theano.misc.pkl_utils import CompatUnpickler
        with open(pkl_filename, "rb") as pkl_file:
            if PY3:
                u = CompatUnpickler(pkl_file, encoding="latin1")
                u = CompatUnpickler(pkl_file)
            fn_args = u.load()
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_join_make_vector():
    a, b, c, d, e = tensor.scalars('abcde')
    v = tensor.vector('v')
    mv = MakeVector(config.floatX)
    s = tensor.join(0, mv(a), v, mv(b, c), mv(d, e))
    f = function([a, b, c, d, e, v], s, mode=mode_opt)
    val = f(1, 2, 3, 4, 6, [7, 8])
    assert numpy.all(val == [1, 7, 8, 2, 3, 4, 6])
    e = f.maker.fgraph.toposort()
    assert len([n for n in e if isinstance(n.op, Join)]) == 1
    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
                for n in e if isinstance(n.op, Join)])
    assert f.maker.fgraph.outputs[0].dtype == config.floatX

    assert check_stack_trace(f, ops_to_check='all')
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def lyr_sconv_gen(
    name_, s_x_,
    idim_, odim_,
    quick & dirty implementation of fxnn convolution layer
    global g_mdl
    dilation = kwargs_.get('dilation_')
    if dilation is None:
        dilation = 1
    init_scale = kwargs_.get('init_scale_')
    bias = kwargs_.get('bias_')
    op_conv = partial(
        filter_dilation = (dilation, dilation))
    ir = 0.5/sqrt(idim_*5+odim_)
    s_dims = T.shape(s_x_)
    s_x = T.reshape(s_x_, (s_dims[0]*idim_, 1, s_dims[2], s_dims[3]))
    s_x1 = T.reshape(op_conv(
        s_x, g_sconv_ker,
        filter_shape=(2, 1, 1, 3), **kwargs_),
        (s_dims[0]*idim_*2, 1, s_dims[2], s_dims[3]))
    s_x2 = T.reshape(op_conv(
        s_x1, g_sconv_ker.transpose(0,1,3,2),
        filter_shape=(2, 1, 3, 1),
    ), (s_dims[0], idim_*4, s_dims[2], s_dims[3]))
    s_y = T.join(1, s_x2, s_x_)
    return g_mdl.lyr_conv(
        name_, s_y, idim_*5, odim_, fsize_=1, init_scale_=ir, **kwargs_);
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def lyr_gru(
        self, name_,
        s_x_, s_state_,
        idim_, hdim_,
        if lyr_linear_ is None:
            lyr_linear_ = self.lyr_linear
        s_igate = lyr_linear_(name_+'_igate', idim_+hdim_, idim_)
        s_inp_gated = T.join(axis_, s_x_ * op_gate_(s_igate), s_state_)
        s_gate_lin, s_state_tp1_lin = T.split(lyr_linear_(name_+'_gate', s_inp_gated, idim_+hdim_, hdim_*2), [hdim_,hdim_], 2, axis_)
        s_gate = op_gate_(s_gate_lin)
        return s_state_*s_gate + op_act_(s_state_tp1_lin)*(1.-s_gate)
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def lyr_lstm_flat(
        self, name_,
        s_x_, s_cell_, s_hid_,
        idim_, hdim_,
        LSTM layer, flat version

        In order to use, you need to provide state variable

            hidden_state, cell_state

        s_inp = T.join(axis_, s_x_, s_hid_)
        if lyr_linear_ is None:
            lyr_linear_ = self.lyr_linear
        s_gates_lin, s_inp_lin = T.split(
            lyr_linear_(name_+'_rec', s_inp, idim_+hdim_, hdim_*4),
            [hdim_*3,hdim_], 2, axis=axis_)
        s_igate, s_fgate, s_ogate = T.split(op_gate_(s_gates_lin), [hdim_]*3, 3, axis=axis_)
        s_cell_tp1 = s_igate*op_act_(s_inp_lin) + s_fgate*s_cell_
        s_hid_tp1 = op_act_(s_cell_tp1)*s_ogate
        return s_cell_tp1, s_hid_tp1
项目:CNNbasedMedicalSegmentation    作者:BRML    | 项目源码 | 文件源码
def max_pool_3d(inpt, inpt_shape, ds, ignore_border=True):
    # Downsize 'into the depth' by downsizing twice.
    inpt_shape_4d = (
        inpt_shape[0] * inpt_shape[1],

    inpt_as_tensor4 = T.reshape(inpt, inpt_shape_4d, ndim=4)

    # The first pooling only downsizes the height and the width.
    pool_out1 = pool.pool_2d(inpt_as_tensor4, (ds[1], ds[2]),
    out_shape1 = T.join(0, inpt_shape[:-2], pool_out1.shape[-2:])

    inpt_pooled_once = T.reshape(pool_out1, out_shape1, ndim=5)

    # Shuffle dimensions so the depth is the last dimension.
    inpt_shuffled = inpt_pooled_once.dimshuffle(0, 4, 2, 3, 1)

    shuffled_shape = inpt_shuffled.shape
    # Reshape input to be 4 dimensional.
    shuffle_shape_4d = (
        shuffled_shape[0] * shuffled_shape[1],

    inpt_shuffled_4d = T.reshape(inpt_shuffled, shuffle_shape_4d, ndim=4)

    pool_out2 = pool.pool_2d(inpt_shuffled_4d, (1, ds[0]),
    out_shape2 = T.join(0, shuffled_shape[:-2], pool_out2.shape[-2:])

    inpt_pooled_twice = T.reshape(pool_out2, out_shape2, ndim=5)
    pool_output_fin = inpt_pooled_twice.dimshuffle(0, 4, 2, 3, 1)

    return pool_output_fin
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def setUp(self):
        super(G_Join_and_Split, self).setUp()
        self.mode = mode_with_gpu.excluding('constant_folding')
        self.join_op = GpuJoin()
        self.split_op_class = GpuSplit
        # Use join instead of MakeVector since there is no MakeVector on GPU
        self.make_vector_op = GpuJoin()
        # this is to avoid errors with limited devices
        self.floatX = 'float32'
        self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']
        self.shared = gpuarray_shared_constructor
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpua_join_1(node):
    # join of a single element
    if (isinstance(node.op, GpuJoin) and
            len(node.inputs) == 2):
        return [node.inputs[1]]
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_join(self):
        tv = numpy.asarray(self.rng.uniform(size=(10,)),
        t = theano.shared(tv)
        out = tensor.join(0, self.x, t)
        self.check_rop_lop(out, (self.in_shape[0] + 10,))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_equality_shapes(self):
        # Test equality where one sides contain only shapes related
        # stuff.
        if theano.config.mode == "FAST_COMPILE":
            raise SkipTest("Skip opt test as the opt is disabled")
        x = T.vector('x', dtype=config.floatX)
        for g in [x.shape[0],
            f = theano.function([x], T.eq(g, 0))
            assert f([3, 3]) == 0
            assert f([]) == 1

            f = theano.function([x], T.eq(g, -1))
            self.assert_eqs_const(f, 0)
            assert f([3, 3]) == 0

        g = join(0,
                 x.shape[0:],  # todo test reshape, dimshuffle
        f = theano.function([x], T.eq(g, 0))
        assert (f([3, 3]) == 0).all()
        assert (f([]) == 1).all()

        f = theano.function([x], T.eq(g, -1))
        self.assert_eqs_const(f, 0, op=T.alloc)
        assert (f([3, 3]) == 0).all()
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_constant_merge(self):
        """This test the error in gh-1122 that is a caused by the
        combination of merge optimizer and ShapeFeature.
        x = tensor.constant([0, 0])
        y = x[1:]
        x1 = x - tensor.join(0, y, y)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_useless_rebroadcast(self):
        mode = theano.compile.get_default_mode().including('canonicalize')
        v1 = T.vector()
        v2 = T.vector()
        j = T.join(0, v1, v2)
        f = theano.function([v1, v2], j, mode=mode)
        f([1, 2], [3, 4, 5])
        e = f.maker.fgraph.toposort()
        assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0

        assert check_stack_trace(f, ops_to_check='all')
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_opt_gpujoin_onlyajoin():
    # from a bug in normal sampling
    _a = numpy.asarray([[1, 2], [3, 4]], dtype='float32')
    _b = numpy.asarray([[5, 6, 7], [8, 9, 10]], dtype='float32')
    a = cuda.shared_constructor(_a)
    b = cuda.shared_constructor(_b)

    c = tensor.join(1, a, b)

    f = theano.function([], c, mode=mode_with_gpu)


    graph_nodes = f.maker.fgraph.toposort()

    assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
    assert isinstance(graph_nodes[-2].op, cuda.GpuJoin)

    assert numpy.all(f() == numpy.concatenate([_a, _b], axis=1))

    # test mixed dtype
    _b = numpy.asarray([[5, 6, 7], [8, 9, 10]], dtype='float64')
    b = theano.tensor.constant(_b)

    c = tensor.join(1, a, b)

    f = theano.function([], c, mode=mode_with_gpu)


    graph_nodes = f.maker.fgraph.toposort()
    assert isinstance(graph_nodes[-1].op, theano.tensor.Join)

    assert numpy.all(f() == numpy.concatenate([_a, _b], axis=1))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_opt_gpujoin_joinvectors_elemwise_then_minusone():
    # from a bug in gpu normal sampling
    _a = numpy.asarray([1, 2, 3, 4], dtype='float32')
    _b = numpy.asarray([5, 6, 7, 8], dtype='float32')
    a = cuda.shared_constructor(_a)
    b = cuda.shared_constructor(_b)

    a_prime = tensor.cos(a)
    b_prime = tensor.sin(b)

    c = tensor.join(0, a_prime, b_prime)

    d = c[:-1]

    f = theano.function([], d, mode=mode_with_gpu)

    graph_nodes = f.maker.fgraph.toposort()

    assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
    assert isinstance(graph_nodes[-2].op, cuda.GpuSubtensor)
    assert isinstance(graph_nodes[-3].op, cuda.GpuJoin)

    concat = numpy.concatenate([numpy.cos(_a), numpy.sin(_b)], axis=0)
    concat = concat[:-1]

    assert numpy.allclose(numpy.asarray(f()), concat)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_gpujoin_no_rebroadcast():
    _a = numpy.asarray([[1, 2], [3, 4]], dtype='float32')
    a = tcn.shared_constructor(_a)
    f = theano.function([], T.join(1, a))
    l = f.maker.fgraph.toposort()
    assert not any([isinstance(x.op, T.Rebroadcast) for x in l])
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def unpad_dims(output, input, leftdims, rightdims):
    """Reshapes the output after pad_dims.

    This reverts the padding by `pad_dims`.
    if output.ndim == input.ndim:
        return output

    # restore the output to the original shape
    outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
    return GpuReshape(input.ndim)(output, outshp)
项目:pl-cnn    作者:oval-group    | 项目源码 | 文件源码
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
    This function is a patch to the maxpool op of Theano:
    contrarily to current implementation of maxpool, the gradient is backpropagated
    to only one input of a given patch if several inputs have the same value. This is
    consistent with the CuDNN implementation (and therefore the op is replaced by the
    CuDNN version when possible).

    if input.ndim < 2:
        raise NotImplementedError('pool_2d requires a dimension >= 2')

    if not ignore_border is None:
        # check that ignore_border is True if provided
        assert ignore_border
    ignore_border = True

    if input.ndim == 4:
        op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size =[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (15, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7, 9).
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims,
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size =[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(0, batch_size,

    # store in the required shape
    new_shape = tensor.cast(new_shape, 'int64')
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_join_empty():
    # test for vector, vector, empty to vector
    empty_vec = numpy.asarray([], dtype=config.floatX)
    a = tensor.vector('a')
    s = tensor.join(0, a, a, empty_vec)
    f = function([a], s, mode=mode_opt)
    val = f([1])
    assert numpy.all(val == [1])
    e = f.maker.fgraph.toposort()
    assert len([n for n in e if isinstance(n.op, Join)]) == 1
    assert all([not isinstance(n.op, Join) or len(n.inputs) == 3
                for n in e if isinstance(n.op, Join)])
    assert f.maker.fgraph.outputs[0].dtype == config.floatX

    # test for matrix join(1,a)
    empty_mat = numpy.asarray([[]], dtype=config.floatX)
    m = tensor.matrix('m')
    s = join(1, empty_mat, m, m, m)
    f = function([m], s, mode=mode_opt)
    val = f([[1]])
    assert numpy.all(val == [[1]])
    e = f.maker.fgraph.toposort()
    assert len([n for n in e if isinstance(n.op, Join)]) == 1
    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
                for n in e if isinstance(n.op, Join)])
    assert f.maker.fgraph.outputs[0].dtype == config.floatX
    # test for vector, vector, empty to matrix
    # We can't optimize this case.
    s = tensor.stack([a, a, empty_vec])
    f = function([a], s, mode=mode_opt)
    val = f([])
    assert numpy.all(val == [1])
    e = f.maker.fgraph.toposort()
    assert len([n for n in e if isinstance(n.op, Join)]) == 1
    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
                for n in e if isinstance(n.op, Join)])
    assert f.maker.fgraph.outputs[0].dtype == config.floatX
    # test for matrix join(0,a)
    # We can't optimize this case.
    s = join(0, m, numpy.asarray([[2.]], dtype=config.floatX), m)
    f = function([m], s, mode=mode_opt)
    val = f([[1]])
    assert numpy.all(val == [[1], [2], [1]])
    e = f.maker.fgraph.toposort()
    assert len([n for n in e if isinstance(n.op, Join)]) == 1
    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
                for n in e if isinstance(n.op, Join)])
    assert f.maker.fgraph.outputs[0].dtype == config.floatX
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpu_join(node):
    Inspired by the opt for convop.

    Very loose notation follows.

    Subgraphs concerned first look like
        [array of HostTensor] -> HostToGpu -> GpuToHost
        -> Join -> HostToGpu -> GpuToHost

    First we apply this Opt:

    join(host_from_gpu) -> host_from_gpu(gpu_join)

    then, as an intermediate result, there should be
    host_from_gpu(gpu_join) -> HostToGpu -> GpuToHost
    this unnecessary GpuToHost -> HostToGpu should be removed
    by other opts, leaving us with

    For intermediate places in the graph not covered by the first opt, the
    following could be useful:

    gpu_from_host(join) -> gpu_join(gpu_from_host)

    not implemented yet.

    if isinstance(node.op, tensor.Join):
        # optimizing this case:
        # join(host_from_gpu) -> host_from_gpu(gpu_join)

        axis_and_tensors = node.inputs

        matches = [t.dtype == 'float32' and
                   ((t.owner is not None and
                     isinstance(t.owner.op, HostFromGpu)) or
                    isinstance(t, gof.Constant)) for t in axis_and_tensors[1:]]

        if all(matches):
            new_tensors = [as_cuda_ndarray_variable(t)
                           for t in axis_and_tensors[1:]]
            new_a_and_t = [axis_and_tensors[0]] + new_tensors

            replacement_node = host_from_gpu(gpu_join(*new_a_and_t))

            return [replacement_node]

# This is a copy of the same opt in tensor to make the tests happy,
# but I'm not convinced it is actually needed.
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpualloc(node):
    replace = False
    if node.op == tensor.alloc:
        if node.inputs[0].owner and \
           isinstance(node.inputs[0].owner.op, HostFromGpu):
            replace = True
        elif all([c != 'output' and c.op == gpu_from_host
                  for c, idx in node.outputs[0].clients]):
            # if all clients are on gpu
            replace = True
        elif all([c != 'output' and
                  c.op == tensor.join and
                  all(i.owner and
                      i.owner.op in [host_from_gpu, tensor.alloc]
                      for i in c.inputs[1:])
                  for c, idx in node.outputs[0].clients]):
            # if the client is on gpu or alloc
            replace = True
        if replace and node.inputs[0].dtype != 'float32':
            replace = False
    if replace:
        val = node.inputs[0]
        shp = node.inputs[1:]
        old_out = node.outputs[0]
        new_out = host_from_gpu(gpu_alloc(val, *shp))

        # Sigh. it's an annoying thing about theano
        # that you can't add information to the graph.
        # If for some reason it has come to light that
        # one of the dimensions is broadcastable, we have to hide that
        # or the optimization won't go through.
        if new_out.type != old_out.type:
            assert new_out.type.ndim == old_out.type.ndim
            assert new_out.type.dtype == old_out.type.dtype
            # it seems to have happened that new_out has some broadcastable
            # dimensions that old_out did not have
            for b_old, b_new in zip(old_out.type.broadcastable,
                assert b_new or (not b_old)
            new_out = tensor.patternbroadcast(new_out, old_out.broadcastable)

        return [new_out]
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (3, 1, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (8, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (3, 1, 5, 7, 9).
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims,
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size =[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(0, batch_size,

    # store in the required shape
    new_shape = tensor.cast(new_shape, 'int64')
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND