Python theano.tensor 模块,Elemwise() 实例源码


项目:pyrl    作者:frsong    | 项目源码 | 文件源码
def get_processor_type():
    Test whether the GPU is being used, based on the example in

    rng = np.random.RandomState(1234)

    n = 10*30*768
    x = shared(rng.rand(n))
    f = theano.function([], tensor.exp(x))

    if np.any([isinstance(x.op, tensor.Elemwise) and ('Gpu' not in type(x.op).__name__)
               for x in f.maker.fgraph.toposort()]):
        return 'cpu'
    return 'gpu'
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_infer_shape(self):

        for s_left, s_right in [((5, 6), (5, 6)),
                           ((5, 6), (5, 1)),
                           ((5, 6), (1, 6)),
                           ((5, 1), (5, 6)),
                           ((1, 6), (5, 6)),
                           ((2, 3, 4, 5), (2, 3, 4, 5)),
                           ((2, 3, 4, 5), (2, 3, 1, 5)),
                            ((2, 3, 4, 5), (1, 3, 4, 5)),
                            ((2, 1, 4, 5), (2, 3, 4, 5)),
                            ((2, 3, 4, 1), (2, 3, 4, 5))]:
            dtype = theano.config.floatX
            t_left = TensorType(dtype, [(entry == 1) for entry in s_left])()
            t_right = TensorType(dtype, [(entry == 1) for entry in s_right])()
            t_left_val = numpy.zeros(s_left, dtype=dtype)
            t_right_val = numpy.zeros(s_right, dtype=dtype)
            self._compile_and_check([t_left, t_right],
                            [Elemwise(scalar.add)(t_left, t_right)],
                            [t_left_val, t_right_val], Elemwise)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_recursive_lift(self):
        v = T.vector(dtype="float64")
        m = T.matrix(dtype="float64")
        out = ((v + 42) * (m + 84)).T
        g = FunctionGraph([v, m], [out])
        init_str_g = ("[InplaceDimShuffle{1,0}(Elemwise{mul,no_inplace}"
                      "(<TensorType(float64, vector)>, "
                      "InplaceDimShuffle{x}(TensorConstant{42}))), "
                      "(<TensorType(float64, matrix)>, "
        self.assertTrue(str(g) == init_str_g)
        new_out = local_dimshuffle_lift.transform(g.outputs[0].owner)[0]
        new_g = FunctionGraph(g.inputs, [new_out])
        opt_str_g = ("[Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}"
                     "(InplaceDimShuffle{0,x}(<TensorType(float64, vector)>), "
                     "InplaceDimShuffle{x,x}(TensorConstant{42})), "
                     "(<TensorType(float64, matrix)>), "
        self.assertTrue(str(new_g) == opt_str_g)
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(new_g, ops_to_check='all'))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_eq(self):
        x = T.dmatrix()
        y = T.dmatrix()
        f = theano.function([x, y], T.eq(x, y), mode=self.mode)
        vx = numpy.random.rand(5, 4)
        vy = numpy.random.rand(5, 4)
        f(vx, vy)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert isinstance(topo[0].op, T.Elemwise)
        assert isinstance(topo[0].op.scalar_op, theano.scalar.EQ)
        f2 = theano.function([x], T.eq(x, x), mode=self.mode)
        assert numpy.all(f2(vx) == numpy.ones((5, 4)))
        topo2 = f2.maker.fgraph.toposort()
        # Shape_i{1}(<TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), Alloc([[1]], Shape_i{0}.0, Shape_i{1}.0
        assert len(topo2) == 3
        assert isinstance(topo2[-1].op, T.Alloc)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_neq(self):
        x = T.dmatrix()
        y = T.dmatrix()
        f = theano.function([x, y], T.neq(x, y), mode=self.mode)
        vx = numpy.random.rand(5, 4)
        vy = numpy.random.rand(5, 4)
        f(vx, vy)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert isinstance(topo[0].op, T.Elemwise)
        assert isinstance(topo[0].op.scalar_op, theano.scalar.NEQ)
        f2 = theano.function([x], T.neq(x, x), mode=self.mode)
        assert numpy.all(f2(vx) == numpy.zeros((5, 4)))
        topo2 = f2.maker.fgraph.toposort()
        assert len(topo2) == 3
        assert isinstance(topo2[-1].op, T.Alloc)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_mul(self):
        x = T.dmatrix()
        y = T.dmatrix()
        f = theano.function([x], T.mul(x), mode=self.mode)
        vx = numpy.random.rand(5, 4)
        vy = numpy.random.rand(5, 4)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert topo[0].op == deep_copy_op
        f2 = theano.function([x, y], T.mul(x, y), mode=self.mode)
        assert numpy.all(f2(vx, vy) == vx * vy)
        topo2 = f2.maker.fgraph.toposort()
        assert len(topo2) == 1
        assert isinstance(topo2[0].op, T.Elemwise)
        assert isinstance(topo2[0].op.scalar_op, theano.scalar.Mul)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test(self):
        x = T.fmatrix()
        o = T.Elemwise(scal.Cast(scal.Scalar("float64")))(x.astype("float64"))
        f = theano.function([x], o, mode=self.mode)
        dx = numpy.random.rand(5, 4).astype("float32")
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert isinstance(topo[0].op, T.Elemwise)

        x = T.dmatrix()
        o = T.Elemwise(scal.Cast(scal.Scalar("float32")))(x.astype("float32"))
        f = theano.function([x], o, mode=self.mode)
        dx = numpy.random.rand(5, 4)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert isinstance(topo[0].op, T.Elemwise)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_one_plus_erf(self):
        val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
        x = T.vector()

        f = theano.function([x], 1 + T.erf(x), mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [
            T.mul, T.erfc], f.maker.fgraph.toposort()

        f = theano.function([x], T.erf(x) + 1, mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [
            T.mul, T.erfc], f.maker.fgraph.toposort()

        f = theano.function([x], T.erf(x) + 2, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert topo[0].op == T.erf
        assert isinstance(topo[1].op, T.Elemwise)
        assert isinstance(topo[1].op.scalar_op, scal.Add)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_erf_minus_one(self):
        val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
        x = T.vector()

        f = theano.function([x], T.erf(x) - 1, mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul]

        f = theano.function([x], T.erf(x) + (-1), mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul]

        f = theano.function([x], -1 + T.erf(x), mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul]

        f = theano.function([x], T.erf(x) - 2, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert topo[0].op == T.erf
        assert isinstance(topo[1].op, T.Elemwise)
        assert isinstance(topo[1].op.scalar_op, scal.Add)\
            or isinstance(topo[1].op.scalar_op, scal.Sub)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_broadcast1(self):
        # test switch(cst, matrix, row)
        x = theano.tensor.matrix('x', dtype='int32')
        y = theano.tensor.vector('y', dtype='int64')

        z = theano.tensor.switch(1, x, y)
        f = theano.function([x, y], z, mode=self.mode)
        assert len([node.op for node in f.maker.fgraph.toposort() if
                    isinstance(node.op, theano.tensor.Elemwise) and
                    not isinstance(node.op.scalar_op, theano.scalar.basic.Cast)]) == 0
        vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='int32')
        vy = numpy.array([10, 11, 12], dtype='int64')
        assert numpy.all(f(vx, vy) == vx)

        z = theano.tensor.switch(0, x, y)
        f = theano.function([x, y], z, mode=self.mode)
        assert len([node.op for node in f.maker.fgraph.toposort() if
                    isinstance(node.op, theano.tensor.Elemwise)]) == 0
        vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='int32')
        vy = numpy.array([10, 11, 12], dtype='int64')
        assert numpy.all(f(vx, vy) == vy)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_broadcast2(self):
        # test switch(cst, vector, matrix)

        # This case is not optimized for now.
        x = theano.tensor.vector('x', dtype='int32')
        y = theano.tensor.matrix('y', dtype='int64')
        z = theano.tensor.switch(1, x, y)
        f = theano.function([x, y], z, mode=self.mode)
        assert len([node.op for node in f.maker.fgraph.toposort() if
                    isinstance(node.op, theano.tensor.Elemwise) and
                    not isinstance(node.op.scalar_op, theano.scalar.basic.Cast)]) == 0
        vx = numpy.array([4, 5, 6], dtype='int32')
        vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype='int64')
        assert numpy.all(f(vx, vy) == vx)

        z = theano.tensor.switch(0, x, y)
        f = theano.function([x, y], z, mode=self.mode)
        assert len([node.op for node in f.maker.fgraph.toposort() if
                    isinstance(node.op, theano.tensor.Elemwise)]) == 0
        vx = numpy.array([4, 5, 6], dtype='int32')
        vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype='int64')
        assert numpy.all(f(vx, vy) == vy)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_zero_div():
    """Tests 0/x -> 0"""
    mode = theano.compile.mode.get_default_mode().including("local_zero_div")
    for t in (T.scalar, T.ivector, T.ftensor4):
        x = t('x')
        for op in (T.int_div, T.true_div):
            y = op(0, x)
            g = optimize(FunctionGraph([x], [y]))
            # the division should be gone
            divs = [node for node in g.toposort()
                    if isinstance(node.op, T.elemwise.Elemwise) and
                    isinstance(node.op.scalar_op, type(op.scalar_op))]
            assert len(divs) == 0
            # the output type should match the unoptimized one
            output = g.outputs[0]
            assert output.ndim == y.ndim
            assert output.type == y.type
            # and the output should be zero
            assert theano.tensor.get_scalar_constant_value(output) == 0
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_incsubtensor_mixed():

    # This catches a bug that occurred when incrementing
    # a float32 tensor by a float64 tensor.
    # The result is defined to be float32, so it is OK
    # to downcast the float64 increment in order to
    # transfer it to the GPU.
    # The bug was that the optimization called GpuFromHost
    # without casting first, causing the optimization to
    # fail.
    X = tensor.fmatrix()
    Y = tensor.dmatrix()
    Z = tensor.inc_subtensor(X[0:1, 0:1], Y)
    f = theano.function([X, Y], Z, mode=mode_with_gpu)
    packed, = f.maker.fgraph.inputs[1].clients
    client, idx = packed
    assert isinstance(client.op, tensor.Elemwise)
    assert isinstance(client.op.scalar_op, theano.scalar.Cast)
    packed, = client.outputs[0].clients
    client, idx = packed
    assert isinstance(client.op, cuda.GpuFromHost)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_elemwise4():
    """ Test that two vectors can be broadcast to form an outer
    product (by performing rank-1 matrix update"""

    shape = (3, 4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32'), 'a')
    b = tensor.fvector()
    c = tensor.fvector()
    f = pfunc([b, c], [],
              updates=[(a, (a + b.dimshuffle('x', 0) * c.dimshuffle(0, 'x')))],
    has_elemwise = False
    for i, node in enumerate(f.maker.fgraph.toposort()):
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    # let debugmode catch errors
    f(theano._asarray(numpy.random.rand(4), dtype='float32'),
      theano._asarray(numpy.random.rand(3), dtype='float32'))
项目:machine-deep_learning    作者:Charleswyt    | 项目源码 | 文件源码
def testTheano():
    from theano import function, config, shared, sandbox
    import theano.tensor as T
    import numpy
    import time
    print("Testing Theano library...")
    vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
    iters = 1000

    rng = numpy.random.RandomState(22)
    x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
    f = function([], T.exp(x))
    t0 = time.time()
    for i in range(iters):
        r = f()
    t1 = time.time()
    print("Looping %d times took %f seconds" % (iters, t1 - t0))
    print("Result is %s" % (r,))
    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
        print('Used the cpu')
        print('Used the gpu')
# Perform check:

# ----------------------
# - example:
项目:traffic-prediction    作者:JonnoFTW    | 项目源码 | 文件源码
def check_gpu():
    from theano import function
    import theano.tensor as T
    f = function([], T.exp([9]))
    if np.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
        print('Used the cpu')
        print('Used the gpu')
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def with_linker(self, linker, op, type, rand_val):
        for xsh, ysh in [((3, 5), (3, 5)),
                         ((3, 5), (1, 5)),
                         ((3, 5), (3, 1)),
                         ((1, 5), (5, 1)),
                         ((1, 1), (1, 1)),
                         ((self.openmp_minsize,), (self.openmp_minsize,)),
                         ((2, 3, 4, 5), (2, 3, 4, 5)),
                         ((2, 3, 4, 5), (1, 3, 1, 5)),
                         ((2, 3, 4, 5), (1, 1, 1, 1)),
                         ((), ())]:
            x = type(theano.config.floatX,
                     [(entry == 1) for entry in xsh])('x')
            y = type(theano.config.floatX,
                     [(entry == 1) for entry in ysh])('y')
            e = op(scalar.add)(x, y)
            f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
            xv = rand_val(xsh)
            yv = rand_val(ysh)
            zv = xv + yv

            unittest_tools.assert_allclose(f(xv, yv), zv)

            # test Elemwise.infer_shape
            # the Shape op don't implement c_code!
            if isinstance(linker, gof.PerformLinker):
                x = type(theano.config.floatX,
                         [(entry == 1) for entry in xsh])('x')
                y = type(theano.config.floatX,
                         [(entry == 1) for entry in ysh])('y')
                e = op(scalar.add)(x, y)
                f = copy(linker).accept(FunctionGraph(
                    [x, y], [e.shape])).make_function()
                assert tuple(f(xv, yv)) == tuple(zv.shape)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def with_linker_inplace(self, linker, op, type, rand_val):
        for xsh, ysh in [((5, 5), (5, 5)),
                         ((5, 5), (1, 5)),
                         ((5, 5), (5, 1)),
                         ((1, 1), (1, 1)),
                         ((2, 3, 4, 5), (2, 3, 4, 5)),
                         ((2, 3, 4, 5), (1, 3, 1, 5)),
                         ((2, 3, 4, 5), (1, 1, 1, 1)),
                         ((), ())]:
            x = type(theano.config.floatX,
                     [(entry == 1) for entry in xsh])('x')
            y = type(theano.config.floatX,
                     [(entry == 1) for entry in ysh])('y')
            e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
            f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
            xv = rand_val(xsh)
            yv = rand_val(ysh)
            zv = xv + yv

            f(xv, yv)

            self.assertTrue((xv == zv).all())
            # test Elemwise.infer_shape
            # the Shape op don't implement c_code!
            if isinstance(linker, gof.PerformLinker):
                x = type(theano.config.floatX,
                         [(entry == 1) for entry in xsh])('x')
                y = type(theano.config.floatX,
                         [(entry == 1) for entry in ysh])('y')
                e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
                f = copy(linker).accept(FunctionGraph(
                    [x, y], [e.shape])).make_function()
                xv = rand_val(xsh)
                yv = rand_val(ysh)
                zv = xv + yv

                f(xv, yv)

                assert xv.shape == zv.shape
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_input_dimensions_overflow(self):
        # Elemwise.perform used to compute the product
        # of input shapes to check if there was a zero in them,
        # it overflowed in this case.
        a, b, c, d, e, f = tensor.vectors('abcdef')
        s = a + b + c + d + e + f
        g = theano.function([a, b, c, d, e, f], s,
        g(*[numpy.zeros(2 ** 11, config.floatX) for i in xrange(6)])
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_not_implemented_elemwise_grad():
    Regression test for unimplemented gradient in an Elemwise Op.

    class TestOp(scalar.ScalarOp):

        def __init__(self):
            self.output_types_preference = scalar.upgrade_to_float

        def impl(self, n, x):
            return x * n

        def grad(self, inputs, gout):
            (n, x) = inputs
            (gz,) = gout
            dy_dx = n
            return [theano.gradient.grad_not_implemented(self, 0, n),
                    gz * dy_dx]

    test_op = tensor.Elemwise(TestOp())
    x = tensor.scalar()
    # The call to `grad` used to crash.
    tensor.grad(test_op(2, x), x)
    # Verify that trying to use the not implemented gradient fails.
        tensor.grad(test_op(x, 2), x)
        assert False
    except theano.gradient.NullTypeGradError:
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_lift(self):
        x, y, z = inputs([False] * 1, [False] * 2, [False] * 3)
        e = x + y + z
        g = FunctionGraph([x, y, z], [e])

        # It does not really matter if the DimShuffles are inplace
        # or not.
        init_str_g_inplace = (
            "(Elemwise{add,no_inplace}(InplaceDimShuffle{x,0}(x), y)), z)]")
        init_str_g_noinplace = (
            "(Elemwise{add,no_inplace}(DimShuffle{x,0}(x), y)), z)]")
        self.assertTrue(str(g) in (init_str_g_inplace, init_str_g_noinplace),

        opt_str_g_inplace = (
            "(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]")
        opt_str_g_noinplace = (
            "(DimShuffle{x,x,0}(x), DimShuffle{x,0,1}(y)), z)]")
        self.assertTrue(str(g) in (opt_str_g_inplace, opt_str_g_noinplace),
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def setUp(self):
        upgrade_to_float = theano.scalar.basic.upgrade_to_float

        self.scal_times_2 = TimesN(2, upgrade_to_float, name='times_2')
        self.times_2 = theano.tensor.elemwise.Elemwise(

        self.scal_times_3 = TimesN(3, upgrade_to_float, name='times_3')
        self.times_3 = theano.tensor.elemwise.Elemwise(

        self.x = fvector()
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_IncSubtensor_serialize():
    d = numpy.random.normal(0, 0.01, size=(100, 100))
    d = d.astype(theano.config.floatX)

    W = theano.shared(d, name='W')
    i = T.vector('i', dtype='int64')
    j = T.vector('j', dtype='int64')
    t = T.scalar('t')
    if theano.tensor.subtensor.inplace_increment:
        y = (W[i] + W[j] + W[1] + W[i, j]).sum()
        y = (W[i] + W[j] + W[1]).sum()
    cost = T.sqr(t - y)
    dW = theano.grad(cost, W)
    mode = theano.compile.mode.get_default_mode().excluding('fusion')
    mode = mode.including("local_IncSubtensor_serialize")
    f = theano.function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode)
    topo = f.maker.fgraph.toposort()
    adds = [n for n in topo if isinstance(n.op, T.Elemwise) and
            isinstance(n.op.scalar_op, theano.scalar.Add)]
    for a in adds:
        assert not any([inp.owner and
                        for inp in a.inputs])

    # Now test that the stack trace is copied over properly,
    # if we return the gradients. We need to use same mode as before.
    f = theano.function([i, j, t], dW, mode=mode)
    assert check_stack_trace(f, ops_to_check=[
        tensor.IncSubtensor, tensor.AdvancedIncSubtensor,
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_identity(self):
        # scalar.identity is used in 2 Elemwise functions:
        # tensor_copy, and view
        x = T.matrix()
        f = theano.function([x], T.tensor_copy(x), mode=self.mode)
        vx = numpy.random.rand(5, 4).astype(config.floatX)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert topo[0].op == deep_copy_op
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_one_minus_erf(self):
        val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
        x = T.vector()

        f = theano.function([x], 1 - T.erf(x), mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\
            , f.maker.fgraph.toposort()

        f = theano.function([x], 1 + (-T.erf(x)), mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\
            , f.maker.fgraph.toposort()

        f = theano.function([x], (-T.erf(x)) + 1, mode=self.mode)
        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\
            , f.maker.fgraph.toposort()

        f = theano.function([x], 2 - T.erf(x), mode=self.mode)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2, f.maker.fgraph.toposort()
        assert topo[0].op == T.erf, f.maker.fgraph.toposort()
        assert isinstance(topo[1].op, T.Elemwise), f.maker.fgraph.toposort()
        assert isinstance(topo[1].op.scalar_op, scal.Add)\
            or isinstance(topo[1].op.scalar_op, scal.Sub), f.maker.fgraph.toposort()
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_const0(self):

        for dtype1 in ['int32', 'int64']:
            for dtype2 in ['int32', 'int64']:
                x = theano.tensor.matrix('x', dtype=dtype1)
                y = theano.tensor.matrix('y', dtype=dtype2)
                z = theano.tensor.switch(0, x, y)
                f = theano.function([x, y], z, mode=self.mode)
                assert len([node.op for node in f.maker.fgraph.toposort() if
                            (isinstance(node.op, theano.tensor.Elemwise)
                           and isinstance(node.op.scalar_op,
                                          theano.scalar.basic.Switch))]) == 0
                vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
                vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
                assert numpy.all(f(vx, vy) == vy)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_const1(self):

        for dtype1 in ['int32', 'int64']:
            for dtype2 in ['int32', 'int64']:
                x = theano.tensor.matrix('x', dtype=dtype1)
                y = theano.tensor.matrix('y', dtype=dtype2)
                z = theano.tensor.switch(1, x, y)
                f = theano.function([x, y], z, mode=self.mode)
                assert len([node.op for node in f.maker.fgraph.toposort() if
                            (isinstance(node.op, theano.tensor.Elemwise)
                           and isinstance(node.op.scalar_op,
                                          theano.scalar.basic.Switch))]) == 0
                vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
                vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
                assert numpy.all(f(vx, vy) == vx)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_broadcast3(self):
        # test switch(matrix, same_vector, same_vector)

        x = theano.tensor.matrix('x', dtype='int32')
        y = theano.tensor.vector('y', dtype='int64')
        z = theano.tensor.switch(x, y, y)
        f = theano.function([x, y], z, mode=self.mode)
        vx = numpy.array([[0, 1], [1, 0]], dtype='int32')
        vy = numpy.array([7, 8], dtype='int64')
        utt.assert_allclose(f(vx, vy), numpy.where(vx, vy, vy))
        assert len([node.op for node in f.maker.fgraph.toposort() if
                    isinstance(node.op, theano.tensor.Elemwise)]) == 0
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_reshape_lift():
    x = tensor.tensor4()
    out = T.exp(x).reshape([x.size])
    assert out.ndim == 1
    mode = compile.mode.get_default_mode()
    mode = mode.including('local_reshape_lift')
    f = theano.function([x], out, mode=mode)
    f(numpy.random.rand(5, 4, 3, 2).astype(config.floatX))
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[-2].op, tensor.Reshape)
    assert isinstance(topo[-1].op, tensor.Elemwise)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test1(self):
        """Tests removing the extra floor_div by 1 introduced by
        local_subtensor_merge optimization"""
        y = T.tensor4('y')
        self.mode = self.mode.excluding('fusion')
        f = theano.function([y], y[::-1][::-1], mode=self.mode)

        graph = f.maker.fgraph.toposort()
        divs = [node for node in graph
                if isinstance(node.op, T.elemwise.Elemwise) and
                isinstance(node.op.scalar_op, theano.scalar.IntDiv)]
        assert len(divs) == 0
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test2(self):
        """Simple test case for removing dividing by 1"""
        y = T.tensor4('y')
        z = y // 1
        f = theano.function([y], z, mode = self.mode)
        graph = f.maker.fgraph.toposort()
        divs = [node for node in graph
                if isinstance(node.op, T.elemwise.Elemwise) and
                isinstance(node.op.scalar_op, theano.scalar.IntDiv)]
        assert len(divs) == 0
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test3(self):
        """Simple test case for removing dividing by a tensor of ones"""
        y = T.tensor4('y')
        z = y // numpy.ones((2,2,2,2))
        f = theano.function([y], z, mode=self.mode)
        graph = f.maker.fgraph.toposort()
        divs = [node for node in graph
                if isinstance(node.op, T.elemwise.Elemwise) and
                isinstance(node.op.scalar_op, theano.scalar.IntDiv)]
        assert len(divs) == 0
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_erfinvgpu():
    """ Test that local_gpu_elemwise_0 replaces Erfinv with ErfinvGPU """
    x = tensor.fmatrix()
    f = theano.function([x],
    assert isinstance(f.maker.fgraph.toposort()[1].op,
    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op,
    numpy.random.rand(7, 8).astype('float32')
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_elemwise2():
    """ Several kinds of elemwise expressions with dimension permutations """
    rng = numpy.random.RandomState(int(time.time()))
    shape = (3, 5)
    for pattern in [(0, 1), (1, 0)]:
        a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),
                                                   dtype='float32'), name=None)
        b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))()
        f = pfunc([b], [], updates=[(a, (a + b).dimshuffle(pattern))],
        has_elemwise = False
        for i, node in enumerate(f.maker.fgraph.toposort()):
            has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
        assert not has_elemwise
        # let debugmode catch errors
        f(theano._asarray(rng.rand(*shape), dtype='float32') * .3)

    shape = (3, 4, 5, 6)
    a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),
                                               dtype='float32'), 'a')
    b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))()
    f = pfunc([b], [], updates=[(a, (a + b).dimshuffle([2, 0, 3, 1]) *
              tensor.exp(b ** a).dimshuffle([2, 0, 3, 1]))],
    has_elemwise = False
    for i, node in enumerate(f.maker.fgraph.toposort()):
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    # let debugmode catch errors
    f(theano._asarray(rng.rand(*shape), dtype='float32'))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_elemwise_composite_support_code():
    This was generating an error at compile time.
    Commit 3d1690fa346103594356ecaeceeb2c6757b45d2b fixed that.
    X = tcn.shared_constructor(value=numpy.zeros((100, 10), dtype="float32"),
    W = tcn.shared_constructor(value=numpy.zeros((10, 1), dtype="float32"),
    U =, W)
    Y = tcn.shared_constructor(value=numpy.zeros((100, 1), dtype="float32"),
    P = T.exp(-(Y - U) ** 2)
    epsilon = numpy.asarray(0.001, dtype="float32")
    NLL = -T.mean(T.log(P + epsilon))  # SupportCodeError
    G = theano.gradient.grad(NLL, wrt=[W])

    backup = theano.config.warn.identify_1pexp_bug
    theano.config.warn.identify_1pexp_bug = False
        f_grad = theano.function(inputs=[], outputs=G, mode=mode_with_gpu)
        theano.config.warn.identify_1pexp_bug = backup

    topo = f_grad.maker.fgraph.toposort()
    assert sum([isinstance(node.op, T.Elemwise) for node in topo]) == 1
    # I suspect this was failing in the original branch too
    assert sum([isinstance(node.op, tcn.GpuElemwise) for node in topo]) == 1
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def dtype_in_elemwise_supported(op):
    Return True of the Elemwise op is supported on the gpu.
    Return False otherwise.

    We need to check inside the Composite op.

    def get_all_basic_scalar(composite_op):
        l = []
        for i in composite_op.fgraph.toposort():
            if isinstance(i, theano.scalar.Composite):
                l += get_all_basic_scalar(i)
        return l
    if isinstance(op, GpuElemwise) or isinstance(op, tensor.Elemwise):
        if isinstance(op.scalar_op, theano.scalar.Composite):
            scals = get_all_basic_scalar(op.scalar_op)
            for s in scals:
                if any([i.type.dtype not in elemwise_cuda_dtype_supported
                        for i in s.inputs + s.outputs]):
                    return False
    return True
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpu_elemwise_1(node):
    gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))

    if isinstance(node.op, GpuFromHost):
        host_i, = node.inputs
        if (host_i.owner and
                isinstance(host_i.owner.op, tensor.Elemwise) and
                len(host_i.clients) == 1 and

            elemwise_node = host_i.owner
            # Don't set any inplace pattern.
            # gpu_inplace_elemwise_optimizer will do it later

            if isinstance(elemwise_node.op.scalar_op, Erfinv):
                new_op = GpuElemwise(erfinv_gpu)
            elif isinstance(elemwise_node.op.scalar_op, Erfcx):
                new_op = GpuElemwise(erfcx_gpu)
                    new_op = GpuElemwise(elemwise_node.op.scalar_op)
                except SupportCodeError:
                    # This happens when scalar_op requires support code
                    return False

            if all([i.dtype == 'float32' for i in elemwise_node.inputs]):
                gpu_elemwise = new_op(*[as_cuda_ndarray_variable(i)
                                        for i in elemwise_node.inputs])
                gpu_elemwise = split_huge_add_or_mul(gpu_elemwise.owner)
                if not gpu_elemwise:
                    return False
                return [gpu_elemwise.outputs[0]]
    return False
项目:DeepLearningPython35    作者:MichalDanielDobrzanski    | 项目源码 | 文件源码
def testTheano():
    from theano import function, config, shared, sandbox
    import theano.tensor as T
    import numpy
    import time
    print("Testing Theano library...")
    vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
    iters = 1000

    rng = numpy.random.RandomState(22)
    x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
    f = function([], T.exp(x))
    t0 = time.time()
    for i in range(iters):
        r = f()
    t1 = time.time()
    print("Looping %d times took %f seconds" % (iters, t1 - t0))
    print("Result is %s" % (r,))
    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
        print('Used the cpu')
        print('Used the gpu')
# Perform check:

# ----------------------
# - example:
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def init_device(device='gpu0'):

    if device.startswith('cuda'):

        import os
        if 'THEANO_FLAGS' in os.environ:
            raise ValueError('Use theanorc to set the theano config')

        os.environ['THEANO_FLAGS'] = 'device={0}'.format(device)
        import theano.gpuarray
        # This is a bit of black magic that may stop working in future
        # theano releases
        ctx = theano.gpuarray.type.get_context(None)
        drv = None

    elif device.startswith('gpu'):

        gpuid = int(device[-1])

        import pycuda.driver as drv
        dev = drv.Device(gpuid)
        ctx = dev.make_context()
        import theano.sandbox.cuda
        import theano
        import theano.sandbox.cuda
        import theano

    from theano import function, config, shared, sandbox, tensor

    vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
    iters = 1000

    rng = np.random.RandomState(22)
    arr = rng.rand(vlen)

    shared_x = theano.shared(np.asarray(arr, config.floatX))
    shared_xx = theano.shared(np.asarray(arr, config.floatX))

    # compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray)
    f = function([], tensor.exp(x), givens=[(x,shared_x)]) 

    if np.any([isinstance(x.op, tensor.Elemwise) and
                  ('Gpu' not in type(x.op).__name__)
                  for x in f.maker.fgraph.toposort()]):
        print('Used the cpu')
        print('Used the gpu')

    # if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu':
    #     raise TypeError('graph not compiled on GPU') 

    return drv,ctx, arr, shared_x, shared_xx
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_gpu_subtensor():
    # Test shared forced on CPU.
    t = tensor._shared(numpy.zeros(20, "float32"))
    f = theano.function([], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test graph input.
    t = tensor.fmatrix()
    f = theano.function([t], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input + input as output
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test shared forced on CPU end we do computation on the output of
    # the subtensor.
    t = tensor._shared(numpy.zeros(20, "float32"))
    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
    # Our optimizer isn't smart enough to move to the GPU Elemwise.
    # If it where just a little bit smarter, it could wrongly move it to the GPU.
    # If it where super smart, it would know it should not move it to the GPU.
    assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_multiple_case_that_fail(self):
        raise SkipTest("Current implementation of Canonizer does not "
                       "implement all cases. Skip the corresponding test.")

        shp = (4, 4)
        fx, fy, fz = fmatrices('xyz')
        dx, dy, dz = dmatrices('xyz')
        fxv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        fyv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        fzv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        dxv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        dyv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        dzv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        fvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
        # We must be sure that the Canonizer is working, but that we don't have other
        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
        mode = compile.mode.get_default_mode()

        opt = gof.Query(["canonicalize"])
        opt = opt.excluding(
        mode = mode.__class__(linker=mode.linker, optimizer=opt)
# test fail!
        # test x / y / z -> x / (y * z)
        for (g, sym_inputs, val_inputs, out_dtype) in [
                                                       ((dx/dy)/dz, [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
                                                       ((fx/fy)/fz, [fx, fy, fz], [fxv, fyv, fzv], 'float32')
            f = compile.function(list(sym_inputs), g,
            out = f(*val_inputs)
            utt.assert_allclose(out, val_inputs[0] /
                val_inputs[1] / val_inputs[2])
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, (T.Elemwise, ))
            assert isinstance(topo[0].op.scalar_op,
            assert len(topo[0].inputs) == 1
            assert(out_dtype == out.dtype)

        # test x / (y / z) -> (x * z) / y
        for (g, sym_inputs, val_inputs, out_dtype) in [
                                                       (dx/(dy/dz), [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
                                                       (fx/(fy/fz), [fx, fy, fz], [fxv, fyv, fzv], 'float32')
            f = compile.function(list(sym_inputs), g,
            out = f(*val_inputs)
            utt.assert_allclose(out, val_inputs[0] / (
                val_inputs[1] / val_inputs[2]))
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, (T.Elemwise, ))
            assert isinstance(topo[0].op.scalar_op,
            assert len(topo[0].inputs) == 1
            assert(out_dtype == out.dtype)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test0(self):
        for dtype1, dtype2 in [("float32", "float32"),
                               ("float32", "float64"),
                               ("float64", "float32"),
                               ("float64", "float64")]:
            x = tensor.matrix(dtype=dtype1)
            y = tensor.matrix(dtype=dtype2)
            idx = tensor.ivector()

            dx = numpy.random.rand(4, 5).astype(dtype1)
            dy = numpy.random.rand(2, 5).astype(dtype2)
            didx = numpy.asarray([1, 3], "int32")

            # set_subtensor
            inc = tensor.set_subtensor(x[idx], y)
            o = inc[idx]
            f = theano.function([x, y, idx], o, self.mode_no_assert)

            res = f(dx, dy, didx)
            utt.assert_allclose(dy, res)
            topo = f.maker.fgraph.toposort()
            if opt:
                assert len(topo) == 1
                assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise))
                assert len(topo) == 2

            # inc_subtensor(data[idx], y)
            inc = tensor.inc_subtensor(x[idx], y)
            o = inc[idx]
            f = theano.function([x, y, idx], o, self.mode_no_assert)

            res = f(dx, dy, didx)
            utt.assert_allclose((dx[didx] + dy), res)
            topo = f.maker.fgraph.toposort()
            len(topo) == 2

            # inc_subtensor(0[idx], y)
            inc = tensor.inc_subtensor(x.zeros_like()[idx], y)
            o = inc[idx]
            f = theano.function([x, y, idx], o, self.mode_no_assert)

            res = f(dx, dy, didx)
            utt.assert_allclose(dy, res)
            topo = f.maker.fgraph.toposort()
            if opt:
                assert len(topo) == 1
                assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise))
                assert len(topo) > 2
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_argmax_pushdown():
    x = tensor.matrix()
    for sm in [softmax_graph, softmax_op]:
        # test that the max_and_argmax is pushed down if the max is not used
        out = tensor.max_and_argmax(
        fgraph = gof.FunctionGraph(

        # print 'AFTER'
        # for node in fgraph.toposort():
        # print node.op
        assert len(fgraph.toposort()) == 2  # an output_guard is second
        assert fgraph.toposort()[0].op == tensor.basic._max_and_argmax
        assert str(fgraph.toposort()[1].op) == 'OutputGuard'
        assert check_stack_trace(
            fgraph, ops_to_check=tensor.basic._max_and_argmax)
        x = tensor.matrix()
        # test that the max_and_argmax is not pushed down if the max is used
        out = tensor.max_and_argmax(
        fgraph = gof.FunctionGraph(

        assert hasattr(fgraph.outputs[0].tag, 'trace')
        backup = config.warn.argmax_pushdown_bug
        config.warn.argmax_pushdown_bug = False
            config.warn.argmax_pushdown_bug = backup

        # print 'AFTER'
        # for node in fgraph.toposort():
            # print node.op
        assert len(fgraph.toposort()) == 4  # an output_guard is second
        assert isinstance(fgraph.toposort()[0].op, tensor.Elemwise)
        assert isinstance(fgraph.toposort()[1].op, Softmax)
        assert isinstance(fgraph.toposort()[2].op, tensor.CAReduce)
        assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum)
        assert str(fgraph.toposort()[3].op) == 'OutputGuard'
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_argmax_pushdown_bias():
    x = tensor.matrix()
    b = tensor.vector()

    out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
    fgraph = gof.FunctionGraph(
        [x, b],


    # print 'AFTER'
    # for node in fgraph.toposort():
    #    print node.op
    types_to_check = (tensor.DimShuffle, tensor.Elemwise, tensor.MaxAndArgmax)
    assert len(fgraph.toposort()) == 4
    for i, type in enumerate(types_to_check):
        assert isinstance(fgraph.toposort()[i].op, type)
    assert str(fgraph.toposort()[3].op) == 'OutputGuard'
    assert check_stack_trace(fgraph, ops_to_check=types_to_check)

    x = tensor.matrix()
    b = tensor.vector()
    out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
    fgraph = gof.FunctionGraph(
        [x, b],

    backup = config.warn.argmax_pushdown_bug
    config.warn.argmax_pushdown_bug = False
        config.warn.argmax_pushdown_bug = backup

    # print 'AFTER'
    # for node in fgraph.toposort():
    #    print node.op
    assert len(fgraph.toposort()) == 3
    assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias)
    assert isinstance(fgraph.toposort()[1].op, tensor.CAReduce)
    assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
    assert str(fgraph.toposort()[2].op) == 'OutputGuard'
    assert check_stack_trace(
        fgraph, ops_to_check=(SoftmaxWithBias, tensor.CAReduce))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpu_elemwise_0(node):
    Elemwise(..., host_from_gpu, ...)
    -> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host)

    if (isinstance(node.op, tensor.Elemwise) and
        if any([i.owner and
                isinstance(i.owner.op, HostFromGpu)
                for i in node.inputs]):
            if all([o.type.dtype == 'float32' for o in node.outputs]):
                # Don't set any inplace pattern.
                # gpu_inplace_elemwise_optimizer will do it later

                if isinstance(node.op.scalar_op, Erfinv):
                    new_op = GpuElemwise(erfinv_gpu)
                elif isinstance(node.op.scalar_op, Erfcx):
                    new_op = GpuElemwise(erfcx_gpu)
                        new_op = GpuElemwise(node.op.scalar_op)
                    except SupportCodeError:
                        # This happens when scalar_op requires support code
                        return False

                #   first establish that float32 can store all inputs
                upcastable = set(['float32', 'int8', 'int16', 'uint8',
                # case 1 - all inputs are already float32
                if all([i.type.dtype == 'float32' for i in node.inputs]):
                    # TODO: change this when fusion makes Elemwise with
                    # multiple outputs
                    gpu_elemwise = new_op(*(as_cuda_ndarray_variable(i)
                                            for i in node.inputs),
                # case 2 - it is still ok if some inputs were upcast to float32
                elif all([i.type.dtype in upcastable
                          for i in node.inputs]):
                    # second - establish that a new node with upcasted inputs
                    # has the same outputs types as the original node
                    upcasted = node.op.make_node(*[tensor.cast(i, 'float32')
                                                   for i in node.inputs])
                    if [o.type for o in upcasted.outputs] ==\
                       [o.type for o in node.outputs]:

                        new_inputs = [as_cuda_ndarray_variable(tensor.cast(i, 'float32'))
                                      for i in node.inputs]
                        gpu_elemwise = new_op(*new_inputs, return_list=True)
                        return False
                    return False

                gpu_elemwise = split_huge_add_or_mul(gpu_elemwise[0].owner)
                if not gpu_elemwise:
                    return False
                if (max_inputs_to_GpuElemwise(node) <
                    return False
                return [host_from_gpu(out) for out in gpu_elemwise.outputs]