我们从Python开源项目中,提取了以下45个代码示例,用于说明如何使用theano.tensor.Elemwise()。
def get_processor_type(): """ Test whether the GPU is being used, based on the example in http://deeplearning.net/software/theano/tutorial/using_gpu.html """ rng = np.random.RandomState(1234) n = 10*30*768 x = shared(rng.rand(n)) f = theano.function([], tensor.exp(x)) if np.any([isinstance(x.op, tensor.Elemwise) and ('Gpu' not in type(x.op).__name__) for x in f.maker.fgraph.toposort()]): return 'cpu' return 'gpu'
def test_infer_shape(self): for s_left, s_right in [((5, 6), (5, 6)), ((5, 6), (5, 1)), ((5, 6), (1, 6)), ((5, 1), (5, 6)), ((1, 6), (5, 6)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (2, 3, 1, 5)), ((2, 3, 4, 5), (1, 3, 4, 5)), ((2, 1, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 1), (2, 3, 4, 5))]: dtype = theano.config.floatX t_left = TensorType(dtype, [(entry == 1) for entry in s_left])() t_right = TensorType(dtype, [(entry == 1) for entry in s_right])() t_left_val = numpy.zeros(s_left, dtype=dtype) t_right_val = numpy.zeros(s_right, dtype=dtype) self._compile_and_check([t_left, t_right], [Elemwise(scalar.add)(t_left, t_right)], [t_left_val, t_right_val], Elemwise)
def test_recursive_lift(self): v = T.vector(dtype="float64") m = T.matrix(dtype="float64") out = ((v + 42) * (m + 84)).T g = FunctionGraph([v, m], [out]) init_str_g = ("[InplaceDimShuffle{1,0}(Elemwise{mul,no_inplace}" "(InplaceDimShuffle{x,0}(Elemwise{add,no_inplace}" "(<TensorType(float64, vector)>, " "InplaceDimShuffle{x}(TensorConstant{42}))), " "Elemwise{add,no_inplace}" "(<TensorType(float64, matrix)>, " "InplaceDimShuffle{x,x}(TensorConstant{84}))))]") self.assertTrue(str(g) == init_str_g) new_out = local_dimshuffle_lift.transform(g.outputs[0].owner)[0] new_g = FunctionGraph(g.inputs, [new_out]) opt_str_g = ("[Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}" "(InplaceDimShuffle{0,x}(<TensorType(float64, vector)>), " "InplaceDimShuffle{x,x}(TensorConstant{42})), " "Elemwise{add,no_inplace}(InplaceDimShuffle{1,0}" "(<TensorType(float64, matrix)>), " "InplaceDimShuffle{x,x}(TensorConstant{84})))]") self.assertTrue(str(new_g) == opt_str_g) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(new_g, ops_to_check='all'))
def test_eq(self): x = T.dmatrix() y = T.dmatrix() f = theano.function([x, y], T.eq(x, y), mode=self.mode) vx = numpy.random.rand(5, 4) vy = numpy.random.rand(5, 4) f(vx, vy) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, T.Elemwise) assert isinstance(topo[0].op.scalar_op, theano.scalar.EQ) f2 = theano.function([x], T.eq(x, x), mode=self.mode) assert numpy.all(f2(vx) == numpy.ones((5, 4))) topo2 = f2.maker.fgraph.toposort() # Shape_i{1}(<TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), Alloc([[1]], Shape_i{0}.0, Shape_i{1}.0 assert len(topo2) == 3 assert isinstance(topo2[-1].op, T.Alloc)
def test_neq(self): x = T.dmatrix() y = T.dmatrix() f = theano.function([x, y], T.neq(x, y), mode=self.mode) vx = numpy.random.rand(5, 4) vy = numpy.random.rand(5, 4) f(vx, vy) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, T.Elemwise) assert isinstance(topo[0].op.scalar_op, theano.scalar.NEQ) f2 = theano.function([x], T.neq(x, x), mode=self.mode) assert numpy.all(f2(vx) == numpy.zeros((5, 4))) topo2 = f2.maker.fgraph.toposort() assert len(topo2) == 3 assert isinstance(topo2[-1].op, T.Alloc)
def test_mul(self): x = T.dmatrix() y = T.dmatrix() f = theano.function([x], T.mul(x), mode=self.mode) vx = numpy.random.rand(5, 4) vy = numpy.random.rand(5, 4) f(vx) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == deep_copy_op f2 = theano.function([x, y], T.mul(x, y), mode=self.mode) assert numpy.all(f2(vx, vy) == vx * vy) topo2 = f2.maker.fgraph.toposort() assert len(topo2) == 1 assert isinstance(topo2[0].op, T.Elemwise) assert isinstance(topo2[0].op.scalar_op, theano.scalar.Mul)
def test(self): x = T.fmatrix() o = T.Elemwise(scal.Cast(scal.Scalar("float64")))(x.astype("float64")) f = theano.function([x], o, mode=self.mode) dx = numpy.random.rand(5, 4).astype("float32") f(dx) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, T.Elemwise) x = T.dmatrix() o = T.Elemwise(scal.Cast(scal.Scalar("float32")))(x.astype("float32")) f = theano.function([x], o, mode=self.mode) dx = numpy.random.rand(5, 4) f(dx) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, T.Elemwise)
def test_local_one_plus_erf(self): val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30], dtype=config.floatX) x = T.vector() f = theano.function([x], 1 + T.erf(x), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [ T.mul, T.erfc], f.maker.fgraph.toposort() f(val) f = theano.function([x], T.erf(x) + 1, mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [ T.mul, T.erfc], f.maker.fgraph.toposort() f(val) f = theano.function([x], T.erf(x) + 2, mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert topo[0].op == T.erf assert isinstance(topo[1].op, T.Elemwise) assert isinstance(topo[1].op.scalar_op, scal.Add) f(val)
def test_local_erf_minus_one(self): val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30], dtype=config.floatX) x = T.vector() f = theano.function([x], T.erf(x) - 1, mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul] print(f(val)) f = theano.function([x], T.erf(x) + (-1), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul] print(f(val)) f = theano.function([x], -1 + T.erf(x), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul] print(f(val)) f = theano.function([x], T.erf(x) - 2, mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert topo[0].op == T.erf assert isinstance(topo[1].op, T.Elemwise) assert isinstance(topo[1].op.scalar_op, scal.Add)\ or isinstance(topo[1].op.scalar_op, scal.Sub) print(f(val))
def test_broadcast1(self): # test switch(cst, matrix, row) x = theano.tensor.matrix('x', dtype='int32') y = theano.tensor.vector('y', dtype='int64') z = theano.tensor.switch(1, x, y) f = theano.function([x, y], z, mode=self.mode) assert len([node.op for node in f.maker.fgraph.toposort() if isinstance(node.op, theano.tensor.Elemwise) and not isinstance(node.op.scalar_op, theano.scalar.basic.Cast)]) == 0 vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='int32') vy = numpy.array([10, 11, 12], dtype='int64') assert numpy.all(f(vx, vy) == vx) z = theano.tensor.switch(0, x, y) f = theano.function([x, y], z, mode=self.mode) assert len([node.op for node in f.maker.fgraph.toposort() if isinstance(node.op, theano.tensor.Elemwise)]) == 0 vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='int32') vy = numpy.array([10, 11, 12], dtype='int64') assert numpy.all(f(vx, vy) == vy)
def test_broadcast2(self): # test switch(cst, vector, matrix) # This case is not optimized for now. x = theano.tensor.vector('x', dtype='int32') y = theano.tensor.matrix('y', dtype='int64') z = theano.tensor.switch(1, x, y) f = theano.function([x, y], z, mode=self.mode) assert len([node.op for node in f.maker.fgraph.toposort() if isinstance(node.op, theano.tensor.Elemwise) and not isinstance(node.op.scalar_op, theano.scalar.basic.Cast)]) == 0 vx = numpy.array([4, 5, 6], dtype='int32') vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype='int64') assert numpy.all(f(vx, vy) == vx) z = theano.tensor.switch(0, x, y) f = theano.function([x, y], z, mode=self.mode) assert len([node.op for node in f.maker.fgraph.toposort() if isinstance(node.op, theano.tensor.Elemwise)]) == 0 vx = numpy.array([4, 5, 6], dtype='int32') vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype='int64') assert numpy.all(f(vx, vy) == vy)
def test_local_zero_div(): """Tests 0/x -> 0""" mode = theano.compile.mode.get_default_mode().including("local_zero_div") for t in (T.scalar, T.ivector, T.ftensor4): x = t('x') for op in (T.int_div, T.true_div): y = op(0, x) g = optimize(FunctionGraph([x], [y])) # the division should be gone divs = [node for node in g.toposort() if isinstance(node.op, T.elemwise.Elemwise) and isinstance(node.op.scalar_op, type(op.scalar_op))] assert len(divs) == 0 # the output type should match the unoptimized one output = g.outputs[0] assert output.ndim == y.ndim assert output.type == y.type # and the output should be zero assert theano.tensor.get_scalar_constant_value(output) == 0
def test_incsubtensor_mixed(): # This catches a bug that occurred when incrementing # a float32 tensor by a float64 tensor. # The result is defined to be float32, so it is OK # to downcast the float64 increment in order to # transfer it to the GPU. # The bug was that the optimization called GpuFromHost # without casting first, causing the optimization to # fail. X = tensor.fmatrix() Y = tensor.dmatrix() Z = tensor.inc_subtensor(X[0:1, 0:1], Y) f = theano.function([X, Y], Z, mode=mode_with_gpu) packed, = f.maker.fgraph.inputs[1].clients client, idx = packed print(client) assert isinstance(client.op, tensor.Elemwise) assert isinstance(client.op.scalar_op, theano.scalar.Cast) packed, = client.outputs[0].clients client, idx = packed assert isinstance(client.op, cuda.GpuFromHost)
def test_elemwise4(): """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update""" shape = (3, 4) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fvector() c = tensor.fvector() f = pfunc([b, c], [], updates=[(a, (a + b.dimshuffle('x', 0) * c.dimshuffle(0, 'x')))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.fgraph.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise # let debugmode catch errors f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))
def testTheano(): from theano import function, config, shared, sandbox import theano.tensor as T import numpy import time print("Testing Theano library...") vlen = 10 * 30 * 768 # 10 x #cores x # threads per core iters = 1000 rng = numpy.random.RandomState(22) x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) f = function([], T.exp(x)) print(f.maker.fgraph.toposort()) t0 = time.time() for i in range(iters): r = f() t1 = time.time() print("Looping %d times took %f seconds" % (iters, t1 - t0)) print("Result is %s" % (r,)) if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]): print('Used the cpu') else: print('Used the gpu') # Perform check: #testTheano() # ---------------------- # - network3.py example:
def check_gpu(): from theano import function import theano.tensor as T f = function([], T.exp([9])) if np.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]): print('Used the cpu') else: print('Used the gpu')
def with_linker(self, linker, op, type, rand_val): for xsh, ysh in [((3, 5), (3, 5)), ((3, 5), (1, 5)), ((3, 5), (3, 1)), ((1, 5), (5, 1)), ((1, 1), (1, 1)), ((self.openmp_minsize,), (self.openmp_minsize,)), ((self.openmp_minsize_sqrt, self.openmp_minsize_sqrt), (self.openmp_minsize_sqrt, self.openmp_minsize_sqrt)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (1, 3, 1, 5)), ((2, 3, 4, 5), (1, 1, 1, 1)), ((), ())]: x = type(theano.config.floatX, [(entry == 1) for entry in xsh])('x') y = type(theano.config.floatX, [(entry == 1) for entry in ysh])('y') e = op(scalar.add)(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv unittest_tools.assert_allclose(f(xv, yv), zv) # test Elemwise.infer_shape # the Shape op don't implement c_code! if isinstance(linker, gof.PerformLinker): x = type(theano.config.floatX, [(entry == 1) for entry in xsh])('x') y = type(theano.config.floatX, [(entry == 1) for entry in ysh])('y') e = op(scalar.add)(x, y) f = copy(linker).accept(FunctionGraph( [x, y], [e.shape])).make_function() assert tuple(f(xv, yv)) == tuple(zv.shape)
def with_linker_inplace(self, linker, op, type, rand_val): for xsh, ysh in [((5, 5), (5, 5)), ((5, 5), (1, 5)), ((5, 5), (5, 1)), ((1, 1), (1, 1)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (1, 3, 1, 5)), ((2, 3, 4, 5), (1, 1, 1, 1)), ((), ())]: x = type(theano.config.floatX, [(entry == 1) for entry in xsh])('x') y = type(theano.config.floatX, [(entry == 1) for entry in ysh])('y') e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv f(xv, yv) self.assertTrue((xv == zv).all()) # test Elemwise.infer_shape # the Shape op don't implement c_code! if isinstance(linker, gof.PerformLinker): x = type(theano.config.floatX, [(entry == 1) for entry in xsh])('x') y = type(theano.config.floatX, [(entry == 1) for entry in ysh])('y') e = op(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y) f = copy(linker).accept(FunctionGraph( [x, y], [e.shape])).make_function() xv = rand_val(xsh) yv = rand_val(ysh) zv = xv + yv f(xv, yv) assert xv.shape == zv.shape
def test_input_dimensions_overflow(self): # Elemwise.perform used to compute the product # of input shapes to check if there was a zero in them, # it overflowed in this case. a, b, c, d, e, f = tensor.vectors('abcdef') s = a + b + c + d + e + f g = theano.function([a, b, c, d, e, f], s, mode=theano.compile.Mode(linker='py')) g(*[numpy.zeros(2 ** 11, config.floatX) for i in xrange(6)])
def test_not_implemented_elemwise_grad(): """ Regression test for unimplemented gradient in an Elemwise Op. """ class TestOp(scalar.ScalarOp): def __init__(self): self.output_types_preference = scalar.upgrade_to_float def impl(self, n, x): return x * n def grad(self, inputs, gout): (n, x) = inputs (gz,) = gout dy_dx = n return [theano.gradient.grad_not_implemented(self, 0, n), gz * dy_dx] test_op = tensor.Elemwise(TestOp()) x = tensor.scalar() # The call to `grad` used to crash. tensor.grad(test_op(2, x), x) # Verify that trying to use the not implemented gradient fails. try: tensor.grad(test_op(x, 2), x) assert False except theano.gradient.NullTypeGradError: pass
def test_lift(self): x, y, z = inputs([False] * 1, [False] * 2, [False] * 3) e = x + y + z g = FunctionGraph([x, y, z], [e]) # It does not really matter if the DimShuffles are inplace # or not. init_str_g_inplace = ( "[Elemwise{add,no_inplace}(InplaceDimShuffle{x,0,1}" "(Elemwise{add,no_inplace}(InplaceDimShuffle{x,0}(x), y)), z)]") init_str_g_noinplace = ( "[Elemwise{add,no_inplace}(DimShuffle{x,0,1}" "(Elemwise{add,no_inplace}(DimShuffle{x,0}(x), y)), z)]") self.assertTrue(str(g) in (init_str_g_inplace, init_str_g_noinplace), str(g)) opt_str_g_inplace = ( "[Elemwise{add,no_inplace}(Elemwise{add,no_inplace}" "(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]") opt_str_g_noinplace = ( "[Elemwise{add,no_inplace}(Elemwise{add,no_inplace}" "(DimShuffle{x,x,0}(x), DimShuffle{x,0,1}(y)), z)]") dimshuffle_lift.optimize(g) self.assertTrue(str(g) in (opt_str_g_inplace, opt_str_g_noinplace), str(g)) # Check stacktrace was copied over correctly after opt was applied self.assertTrue(check_stack_trace(g, ops_to_check='all'))
def setUp(self): upgrade_to_float = theano.scalar.basic.upgrade_to_float self.scal_times_2 = TimesN(2, upgrade_to_float, name='times_2') self.times_2 = theano.tensor.elemwise.Elemwise( self.scal_times_2, name='times_2') self.scal_times_3 = TimesN(3, upgrade_to_float, name='times_3') self.times_3 = theano.tensor.elemwise.Elemwise( self.scal_times_3, name='times_3') self.x = fvector()
def test_local_IncSubtensor_serialize(): d = numpy.random.normal(0, 0.01, size=(100, 100)) d = d.astype(theano.config.floatX) W = theano.shared(d, name='W') i = T.vector('i', dtype='int64') j = T.vector('j', dtype='int64') t = T.scalar('t') if theano.tensor.subtensor.inplace_increment: y = (W[i] + W[j] + W[1] + W[i, j]).sum() else: y = (W[i] + W[j] + W[1]).sum() cost = T.sqr(t - y) dW = theano.grad(cost, W) mode = theano.compile.mode.get_default_mode().excluding('fusion') mode = mode.including("local_IncSubtensor_serialize") f = theano.function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode) topo = f.maker.fgraph.toposort() adds = [n for n in topo if isinstance(n.op, T.Elemwise) and isinstance(n.op.scalar_op, theano.scalar.Add)] for a in adds: assert not any([inp.owner and isinstance(inp.owner.op, (tensor.IncSubtensor, tensor.AdvancedIncSubtensor, tensor.AdvancedIncSubtensor1)) for inp in a.inputs]) # Now test that the stack trace is copied over properly, # if we return the gradients. We need to use same mode as before. f = theano.function([i, j, t], dW, mode=mode) assert check_stack_trace(f, ops_to_check=[ tensor.IncSubtensor, tensor.AdvancedIncSubtensor, tensor.AdvancedIncSubtensor1])
def test_identity(self): # scalar.identity is used in 2 Elemwise functions: # tensor_copy, and view x = T.matrix() f = theano.function([x], T.tensor_copy(x), mode=self.mode) vx = numpy.random.rand(5, 4).astype(config.floatX) f(vx) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == deep_copy_op
def test_local_one_minus_erf(self): val = numpy.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30], dtype=config.floatX) x = T.vector() f = theano.function([x], 1 - T.erf(x), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\ , f.maker.fgraph.toposort() print(f(val)) f = theano.function([x], 1 + (-T.erf(x)), mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\ , f.maker.fgraph.toposort() print(f(val)) f = theano.function([x], (-T.erf(x)) + 1, mode=self.mode) assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\ , f.maker.fgraph.toposort() print(f(val)) f = theano.function([x], 2 - T.erf(x), mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2, f.maker.fgraph.toposort() assert topo[0].op == T.erf, f.maker.fgraph.toposort() assert isinstance(topo[1].op, T.Elemwise), f.maker.fgraph.toposort() assert isinstance(topo[1].op.scalar_op, scal.Add)\ or isinstance(topo[1].op.scalar_op, scal.Sub), f.maker.fgraph.toposort() print(f(val))
def test_const0(self): for dtype1 in ['int32', 'int64']: for dtype2 in ['int32', 'int64']: x = theano.tensor.matrix('x', dtype=dtype1) y = theano.tensor.matrix('y', dtype=dtype2) z = theano.tensor.switch(0, x, y) f = theano.function([x, y], z, mode=self.mode) assert len([node.op for node in f.maker.fgraph.toposort() if (isinstance(node.op, theano.tensor.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.basic.Switch))]) == 0 vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1) vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2) assert numpy.all(f(vx, vy) == vy)
def test_const1(self): for dtype1 in ['int32', 'int64']: for dtype2 in ['int32', 'int64']: x = theano.tensor.matrix('x', dtype=dtype1) y = theano.tensor.matrix('y', dtype=dtype2) z = theano.tensor.switch(1, x, y) f = theano.function([x, y], z, mode=self.mode) assert len([node.op for node in f.maker.fgraph.toposort() if (isinstance(node.op, theano.tensor.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.basic.Switch))]) == 0 vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1) vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2) assert numpy.all(f(vx, vy) == vx)
def test_broadcast3(self): # test switch(matrix, same_vector, same_vector) x = theano.tensor.matrix('x', dtype='int32') y = theano.tensor.vector('y', dtype='int64') z = theano.tensor.switch(x, y, y) f = theano.function([x, y], z, mode=self.mode) vx = numpy.array([[0, 1], [1, 0]], dtype='int32') vy = numpy.array([7, 8], dtype='int64') utt.assert_allclose(f(vx, vy), numpy.where(vx, vy, vy)) assert len([node.op for node in f.maker.fgraph.toposort() if isinstance(node.op, theano.tensor.Elemwise)]) == 0
def test_local_reshape_lift(): x = tensor.tensor4() out = T.exp(x).reshape([x.size]) assert out.ndim == 1 mode = compile.mode.get_default_mode() mode = mode.including('local_reshape_lift') f = theano.function([x], out, mode=mode) f(numpy.random.rand(5, 4, 3, 2).astype(config.floatX)) topo = f.maker.fgraph.toposort() assert isinstance(topo[-2].op, tensor.Reshape) assert isinstance(topo[-1].op, tensor.Elemwise)
def test1(self): """Tests removing the extra floor_div by 1 introduced by local_subtensor_merge optimization""" y = T.tensor4('y') self.mode = self.mode.excluding('fusion') f = theano.function([y], y[::-1][::-1], mode=self.mode) graph = f.maker.fgraph.toposort() divs = [node for node in graph if isinstance(node.op, T.elemwise.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.IntDiv)] assert len(divs) == 0
def test2(self): """Simple test case for removing dividing by 1""" y = T.tensor4('y') z = y // 1 f = theano.function([y], z, mode = self.mode) graph = f.maker.fgraph.toposort() divs = [node for node in graph if isinstance(node.op, T.elemwise.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.IntDiv)] assert len(divs) == 0
def test3(self): """Simple test case for removing dividing by a tensor of ones""" y = T.tensor4('y') z = y // numpy.ones((2,2,2,2)) f = theano.function([y], z, mode=self.mode) graph = f.maker.fgraph.toposort() divs = [node for node in graph if isinstance(node.op, T.elemwise.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.IntDiv)] assert len(divs) == 0
def test_erfinvgpu(): """ Test that local_gpu_elemwise_0 replaces Erfinv with ErfinvGPU """ x = tensor.fmatrix() f = theano.function([x], tensor.Elemwise(erfinv)(x), mode=mode_with_gpu) theano.function([x], tensor.Elemwise(erfinv)(x), mode=mode_without_gpu) assert isinstance(f.maker.fgraph.toposort()[1].op, cuda.GpuElemwise) assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, cuda.elemwise.ErfinvGPU) numpy.random.rand(7, 8).astype('float32')
def test_elemwise2(): """ Several kinds of elemwise expressions with dimension permutations """ rng = numpy.random.RandomState(int(time.time())) shape = (3, 5) for pattern in [(0, 1), (1, 0)]: a = tcn.shared_constructor(theano._asarray(rng.rand(*shape), dtype='float32'), name=None) b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))() f = pfunc([b], [], updates=[(a, (a + b).dimshuffle(pattern))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.fgraph.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise # let debugmode catch errors f(theano._asarray(rng.rand(*shape), dtype='float32') * .3) shape = (3, 4, 5, 6) a = tcn.shared_constructor(theano._asarray(rng.rand(*shape), dtype='float32'), 'a') b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))() f = pfunc([b], [], updates=[(a, (a + b).dimshuffle([2, 0, 3, 1]) * tensor.exp(b ** a).dimshuffle([2, 0, 3, 1]))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.fgraph.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise # let debugmode catch errors f(theano._asarray(rng.rand(*shape), dtype='float32'))
def test_elemwise_composite_support_code(): """ This was generating an error at compile time. Commit 3d1690fa346103594356ecaeceeb2c6757b45d2b fixed that. """ X = tcn.shared_constructor(value=numpy.zeros((100, 10), dtype="float32"), name='X') W = tcn.shared_constructor(value=numpy.zeros((10, 1), dtype="float32"), name='W') U = T.dot(X, W) Y = tcn.shared_constructor(value=numpy.zeros((100, 1), dtype="float32"), name='Y') P = T.exp(-(Y - U) ** 2) epsilon = numpy.asarray(0.001, dtype="float32") NLL = -T.mean(T.log(P + epsilon)) # SupportCodeError G = theano.gradient.grad(NLL, wrt=[W]) backup = theano.config.warn.identify_1pexp_bug theano.config.warn.identify_1pexp_bug = False try: f_grad = theano.function(inputs=[], outputs=G, mode=mode_with_gpu) finally: theano.config.warn.identify_1pexp_bug = backup f_grad() topo = f_grad.maker.fgraph.toposort() assert sum([isinstance(node.op, T.Elemwise) for node in topo]) == 1 # I suspect this was failing in the original branch too assert sum([isinstance(node.op, tcn.GpuElemwise) for node in topo]) == 1
def dtype_in_elemwise_supported(op): """ Return True of the Elemwise op is supported on the gpu. Return False otherwise. Notes ----- We need to check inside the Composite op. """ def get_all_basic_scalar(composite_op): l = [] for i in composite_op.fgraph.toposort(): if isinstance(i, theano.scalar.Composite): l += get_all_basic_scalar(i) else: l.append(i) return l if isinstance(op, GpuElemwise) or isinstance(op, tensor.Elemwise): if isinstance(op.scalar_op, theano.scalar.Composite): scals = get_all_basic_scalar(op.scalar_op) for s in scals: if any([i.type.dtype not in elemwise_cuda_dtype_supported for i in s.inputs + s.outputs]): return False return True
def local_gpu_elemwise_1(node): """ gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...)) """ if isinstance(node.op, GpuFromHost): host_i, = node.inputs if (host_i.owner and isinstance(host_i.owner.op, tensor.Elemwise) and len(host_i.clients) == 1 and dtype_in_elemwise_supported(node.op)): elemwise_node = host_i.owner # Don't set any inplace pattern. # gpu_inplace_elemwise_optimizer will do it later if isinstance(elemwise_node.op.scalar_op, Erfinv): new_op = GpuElemwise(erfinv_gpu) elif isinstance(elemwise_node.op.scalar_op, Erfcx): new_op = GpuElemwise(erfcx_gpu) else: try: new_op = GpuElemwise(elemwise_node.op.scalar_op) except SupportCodeError: # This happens when scalar_op requires support code return False if all([i.dtype == 'float32' for i in elemwise_node.inputs]): gpu_elemwise = new_op(*[as_cuda_ndarray_variable(i) for i in elemwise_node.inputs]) gpu_elemwise = split_huge_add_or_mul(gpu_elemwise.owner) if not gpu_elemwise: return False return [gpu_elemwise.outputs[0]] return False
def init_device(device='gpu0'): if device.startswith('cuda'): import os if 'THEANO_FLAGS' in os.environ: raise ValueError('Use theanorc to set the theano config') os.environ['THEANO_FLAGS'] = 'device={0}'.format(device) import theano.gpuarray # This is a bit of black magic that may stop working in future # theano releases ctx = theano.gpuarray.type.get_context(None) drv = None elif device.startswith('gpu'): gpuid = int(device[-1]) import pycuda.driver as drv drv.init() dev = drv.Device(gpuid) ctx = dev.make_context() import theano.sandbox.cuda theano.sandbox.cuda.use(device) import theano else: drv=None ctx=None import theano.sandbox.cuda theano.sandbox.cuda.use(device) import theano from theano import function, config, shared, sandbox, tensor vlen = 10 * 30 * 768 # 10 x #cores x # threads per core iters = 1000 rng = np.random.RandomState(22) arr = rng.rand(vlen) shared_x = theano.shared(np.asarray(arr, config.floatX)) shared_xx = theano.shared(np.asarray(arr, config.floatX)) x=tensor.fvector("x") # compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray) f = function([], tensor.exp(x), givens=[(x,shared_x)]) if np.any([isinstance(x.op, tensor.Elemwise) and ('Gpu' not in type(x.op).__name__) for x in f.maker.fgraph.toposort()]): print('Used the cpu') else: print('Used the gpu') # if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu': # raise TypeError('graph not compiled on GPU') return drv,ctx, arr, shared_x, shared_xx
def test_local_gpu_subtensor(): # Test shared forced on CPU. t = tensor._shared(numpy.zeros(20, "float32")) f = theano.function([], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test graph input. t = tensor.fmatrix() f = theano.function([t], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test multiple use of the input # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test multiple use of the input + input as output # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test shared forced on CPU end we do computation on the output of # the subtensor. t = tensor._shared(numpy.zeros(20, "float32")) f = theano.function([], t[3:4] + 1, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Our optimizer isn't smart enough to move to the GPU Elemwise. # If it where just a little bit smarter, it could wrongly move it to the GPU. # If it where super smart, it would know it should not move it to the GPU. assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
def test_multiple_case_that_fail(self): raise SkipTest("Current implementation of Canonizer does not " "implement all cases. Skip the corresponding test.") shp = (4, 4) fx, fy, fz = fmatrices('xyz') dx, dy, dz = dmatrices('xyz') fxv = theano._asarray(numpy.random.rand(*shp), dtype='float32') fyv = theano._asarray(numpy.random.rand(*shp), dtype='float32') fzv = theano._asarray(numpy.random.rand(*shp), dtype='float32') dxv = theano._asarray(numpy.random.rand(*shp), dtype='float32') dyv = theano._asarray(numpy.random.rand(*shp), dtype='float32') dzv = theano._asarray(numpy.random.rand(*shp), dtype='float32') fvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float32').reshape(1, shp[0]) # We must be sure that the Canonizer is working, but that we don't have other # optimisation that could hide bug in the Canonizer as local_elemwise_fusion mode = compile.mode.get_default_mode() opt = gof.Query(["canonicalize"]) opt = opt.excluding( 'local_elemwise_fusion') mode = mode.__class__(linker=mode.linker, optimizer=opt) # test fail! # test x / y / z -> x / (y * z) for (g, sym_inputs, val_inputs, out_dtype) in [ ((dx/dy)/dz, [dx, dy, dz], [dxv, dyv, dzv], 'float64'), ((fx/fy)/fz, [fx, fy, fz], [fxv, fyv, fzv], 'float32') ]: f = compile.function(list(sym_inputs), g, mode=mode) out = f(*val_inputs) utt.assert_allclose(out, val_inputs[0] / val_inputs[1] / val_inputs[2]) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op, (T.Elemwise, )) assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Inv) assert len(topo[0].inputs) == 1 assert(out_dtype == out.dtype) # test x / (y / z) -> (x * z) / y for (g, sym_inputs, val_inputs, out_dtype) in [ (dx/(dy/dz), [dx, dy, dz], [dxv, dyv, dzv], 'float64'), (fx/(fy/fz), [fx, fy, fz], [fxv, fyv, fzv], 'float32') ]: f = compile.function(list(sym_inputs), g, mode=mode) out = f(*val_inputs) utt.assert_allclose(out, val_inputs[0] / ( val_inputs[1] / val_inputs[2])) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op, (T.Elemwise, )) assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Inv) assert len(topo[0].inputs) == 1 assert(out_dtype == out.dtype)
def test0(self): for dtype1, dtype2 in [("float32", "float32"), ("float32", "float64"), ("float64", "float32"), ("float64", "float64")]: x = tensor.matrix(dtype=dtype1) y = tensor.matrix(dtype=dtype2) idx = tensor.ivector() dx = numpy.random.rand(4, 5).astype(dtype1) dy = numpy.random.rand(2, 5).astype(dtype2) didx = numpy.asarray([1, 3], "int32") # set_subtensor inc = tensor.set_subtensor(x[idx], y) o = inc[idx] f = theano.function([x, y, idx], o, self.mode_no_assert) res = f(dx, dy, didx) utt.assert_allclose(dy, res) topo = f.maker.fgraph.toposort() if opt: assert len(topo) == 1 assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise)) else: assert len(topo) == 2 # inc_subtensor(data[idx], y) inc = tensor.inc_subtensor(x[idx], y) o = inc[idx] f = theano.function([x, y, idx], o, self.mode_no_assert) res = f(dx, dy, didx) utt.assert_allclose((dx[didx] + dy), res) topo = f.maker.fgraph.toposort() len(topo) == 2 # inc_subtensor(0[idx], y) inc = tensor.inc_subtensor(x.zeros_like()[idx], y) o = inc[idx] f = theano.function([x, y, idx], o, self.mode_no_assert) res = f(dx, dy, didx) utt.assert_allclose(dy, res) topo = f.maker.fgraph.toposort() if opt: assert len(topo) == 1 assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise)) else: assert len(topo) > 2
def test_argmax_pushdown(): x = tensor.matrix() for sm in [softmax_graph, softmax_op]: # test that the max_and_argmax is pushed down if the max is not used out = tensor.max_and_argmax( sm(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[1] fgraph = gof.FunctionGraph( [x], [out]) theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) # print 'AFTER' # for node in fgraph.toposort(): # print node.op assert len(fgraph.toposort()) == 2 # an output_guard is second assert fgraph.toposort()[0].op == tensor.basic._max_and_argmax assert str(fgraph.toposort()[1].op) == 'OutputGuard' assert check_stack_trace( fgraph, ops_to_check=tensor.basic._max_and_argmax) x = tensor.matrix() # test that the max_and_argmax is not pushed down if the max is used out = tensor.max_and_argmax( sm(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[0] fgraph = gof.FunctionGraph( [x], [out]) assert hasattr(fgraph.outputs[0].tag, 'trace') backup = config.warn.argmax_pushdown_bug config.warn.argmax_pushdown_bug = False try: theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) finally: config.warn.argmax_pushdown_bug = backup # print 'AFTER' # for node in fgraph.toposort(): # print node.op assert len(fgraph.toposort()) == 4 # an output_guard is second assert isinstance(fgraph.toposort()[0].op, tensor.Elemwise) assert isinstance(fgraph.toposort()[1].op, Softmax) assert isinstance(fgraph.toposort()[2].op, tensor.CAReduce) assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum) assert str(fgraph.toposort()[3].op) == 'OutputGuard'
def test_argmax_pushdown_bias(): x = tensor.matrix() b = tensor.vector() out = tensor.argmax(softmax_with_bias(x, b), axis=-1) fgraph = gof.FunctionGraph( [x, b], [out]) theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) # print 'AFTER' # for node in fgraph.toposort(): # print node.op types_to_check = (tensor.DimShuffle, tensor.Elemwise, tensor.MaxAndArgmax) assert len(fgraph.toposort()) == 4 for i, type in enumerate(types_to_check): assert isinstance(fgraph.toposort()[i].op, type) assert str(fgraph.toposort()[3].op) == 'OutputGuard' assert check_stack_trace(fgraph, ops_to_check=types_to_check) x = tensor.matrix() b = tensor.vector() out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0] fgraph = gof.FunctionGraph( [x, b], [out]) backup = config.warn.argmax_pushdown_bug config.warn.argmax_pushdown_bug = False try: theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) finally: config.warn.argmax_pushdown_bug = backup # print 'AFTER' # for node in fgraph.toposort(): # print node.op assert len(fgraph.toposort()) == 3 assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias) assert isinstance(fgraph.toposort()[1].op, tensor.CAReduce) assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum) assert str(fgraph.toposort()[2].op) == 'OutputGuard' assert check_stack_trace( fgraph, ops_to_check=(SoftmaxWithBias, tensor.CAReduce))
def local_gpu_elemwise_0(node): """ Elemwise(..., host_from_gpu, ...) -> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host) """ if (isinstance(node.op, tensor.Elemwise) and dtype_in_elemwise_supported(node.op)): if any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if all([o.type.dtype == 'float32' for o in node.outputs]): # Don't set any inplace pattern. # gpu_inplace_elemwise_optimizer will do it later if isinstance(node.op.scalar_op, Erfinv): new_op = GpuElemwise(erfinv_gpu) elif isinstance(node.op.scalar_op, Erfcx): new_op = GpuElemwise(erfcx_gpu) else: try: new_op = GpuElemwise(node.op.scalar_op) except SupportCodeError: # This happens when scalar_op requires support code return False # first establish that float32 can store all inputs upcastable = set(['float32', 'int8', 'int16', 'uint8', 'uint16']) # case 1 - all inputs are already float32 if all([i.type.dtype == 'float32' for i in node.inputs]): # TODO: change this when fusion makes Elemwise with # multiple outputs gpu_elemwise = new_op(*(as_cuda_ndarray_variable(i) for i in node.inputs), return_list=True) # case 2 - it is still ok if some inputs were upcast to float32 elif all([i.type.dtype in upcastable for i in node.inputs]): # second - establish that a new node with upcasted inputs # has the same outputs types as the original node upcasted = node.op.make_node(*[tensor.cast(i, 'float32') for i in node.inputs]) if [o.type for o in upcasted.outputs] ==\ [o.type for o in node.outputs]: new_inputs = [as_cuda_ndarray_variable(tensor.cast(i, 'float32')) for i in node.inputs] gpu_elemwise = new_op(*new_inputs, return_list=True) else: return False else: return False gpu_elemwise = split_huge_add_or_mul(gpu_elemwise[0].owner) if not gpu_elemwise: return False if (max_inputs_to_GpuElemwise(node) < len(gpu_elemwise.inputs)): return False return [host_from_gpu(out) for out in gpu_elemwise.outputs]