我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.fvector()。
def __init__(self, seq_len, n_feature): import theano.tensor as T self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.buildNetwork() self.output = lasagne.layers.get_output(self.network) self.params = lasagne.layers.get_all_params(self.network, trainable=True) self.output_fn = theano.function([self.Input.input_var], self.output) fx = T.fvector().astype("float64") choices = T.ivector() px = self.output[T.arange(self.output.shape[0]), choices] log_px = T.log(px) cost = -fx.dot(log_px) updates = lasagne.updates.adagrad(cost, self.params, 0.0008) Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates)
def test_select_distinct(self): """ Tests that MultinomialWOReplacementFromUniform always selects distinct elements """ p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) numpy.random.seed(12345) for i in [5, 10, 50, 100, 500, n_elements]: uni = numpy.random.rand(i).astype(config.floatX) pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) res = numpy.squeeze(res) assert len(res) == i, res assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
def test_Strides1D(self): x = T.fvector('x') for axis in [0, None, -1]: a = np.random.random((42,)).astype("float32") cumsum_function = theano.function([x], cumsum(x, axis=axis), mode=self.mode) slicings = [slice(None, None, None), # Normal strides slice(None, None, 2), # Stepped strides slice(None, None, -1), # Negative strides ] # Cartesian product of all slicings to test. for slicing in itertools.product(slicings, repeat=x.ndim): f = theano.function([x], cumsum(x[slicing], axis=axis), mode=self.mode) assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumsum)] utt.assert_allclose(np.cumsum(a[slicing], axis=axis), f(a)) utt.assert_allclose(np.cumsum(a[slicing], axis=axis), cumsum_function(a[slicing]))
def test_GpuCumsum1D(self): block_max_size = self.max_threads_dim0 * 2 x = T.fvector('x') f = theano.function([x], cumsum(x), mode=self.mode) assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumsum)] # Extensive testing for the first 1025 sizes a = np.random.random(1025).astype("float32") for i in xrange(a.shape[0]): utt.assert_allclose(np.cumsum(a[:i]), f(a[:i])) # Use multiple GPU threadblocks a = np.random.random((block_max_size + 2, )).astype("float32") utt.assert_allclose(np.cumsum(a), f(a)) # Use recursive cumsum a = np.ones((block_max_size * (block_max_size + 1) + 2,), dtype="float32") utt.assert_allclose(np.cumsum(a), f(a))
def test_cloning_replace_not_strict_copy_inputs(self): # This has nothing to do with scan, but it refers to the clone # function that scan uses internally and that pfunc uses now and # that users might want to use x = theano.tensor.vector('x') y = theano.tensor.fvector('y') y2 = theano.tensor.dvector('y2') z = theano.shared(0.25) f1 = z * (x + y) ** 2 + 5 f2 = theano.clone(f1, replace=OrderedDict([(y, y2)]), strict=False, share_inputs=True) f2_inp = theano.gof.graph.inputs([f2]) assert z in f2_inp assert x in f2_inp assert y2 in f2_inp
def test_cloning_replace_not_strict_not_copy_inputs(self): # This has nothing to do with scan, but it refers to the clone # function that scan uses internally and that pfunc uses now and # that users might want to use x = theano.tensor.vector('x') y = theano.tensor.fvector('y') y2 = theano.tensor.dvector('y2') z = theano.shared(0.25) f1 = z * (x + y) ** 2 + 5 f2 = theano.clone(f1, replace=[(y, y2)], strict=False, share_inputs=False) f2_inp = theano.gof.graph.inputs([f2]) assert not z in f2_inp assert not x in f2_inp assert not y2 in f2_inp # TEST RE-ordering of inputs # some rnn with multiple outputs and multiple inputs; other # dimension instead of scalars/vectors
def test_n_samples_2(): p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialFromUniform('auto')(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) numpy.random.seed(12345) for i in [1, 5, 10, 100, 1000]: uni = numpy.random.rand(i).astype(config.floatX) pvals = numpy.random.randint(1, 1000, (1, 1000)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) assert res.sum() == i for i in [1, 5, 10, 100, 1000]: uni = numpy.random.rand(i).astype(config.floatX) pvals = numpy.random.randint( 1, 1000000, (1, 1000000)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) assert res.sum() == i
def test_select_distinct(self): """ Tests that MultinomialWOReplacementFromUniform always selects distinct elements """ p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) numpy.random.seed(12345) for i in [5, 10, 50, 100, 500, n_elements]: uni = numpy.random.rand(i).astype(config.floatX) pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) res = numpy.squeeze(res) assert len(res) == i assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
def test_fail_select_alot(self): """ Tests that MultinomialWOReplacementFromUniform fails when asked to sample more elements than the actual number of elements """ p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 200 numpy.random.seed(12345) uni = numpy.random.rand(n_selected).astype(config.floatX) pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) self.assertRaises(ValueError, f, pvals, uni, n_selected)
def test_GpuCumsum1D(self): block_max_size = self.max_threads_dim0 * 2 x = T.fvector('x') f = theano.function([x], cumsum(x), mode=self.mode) assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumsum)] # Extensive testing for the first 1025 sizes a = np.random.random(1025).astype("float32") for i in xrange(a.shape[0]): utt.assert_allclose(np.cumsum(a[:i]), f(a[:i])) # Use multiple GPU threadblocks a = np.random.random((block_max_size + 2,)).astype("float32") utt.assert_allclose(np.cumsum(a), f(a)) # Use recursive cumsum a = np.ones((block_max_size * (block_max_size + 1) + 2,), dtype="float32") utt.assert_allclose(np.cumsum(a), f(a))
def test_elemwise3(): """ Several kinds of elemwise expressions with dimension permutations and broadcasting""" shape = (3, 4, 5, 6) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fvector() new_val = (a + b).dimshuffle([2, 0, 3, 1]) new_val *= tensor.exp(1 + b ** a).dimshuffle([2, 0, 3, 1]) f = pfunc([b], [], updates=[(a, new_val)], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.fgraph.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise # let debugmode catch errors f(theano._asarray(numpy.random.rand(6), dtype='float32'))
def test_elemwise4(): """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update""" shape = (3, 4) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fvector() c = tensor.fvector() f = pfunc([b, c], [], updates=[(a, (a + b.dimshuffle('x', 0) * c.dimshuffle(0, 'x')))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.fgraph.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise # let debugmode catch errors f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))
def get_tile_coder(min_val, max_val, num_tiles, num_tilings, num_features, learning_rate): # x.shape: (num_features), y.shape: () x = T.fvector('x') y = T.fscalar('y') tile_coding_layer = TileCodingLayer( min_val=min_val, max_val=max_val, num_tiles=num_tiles, num_tilings=num_tilings, num_features=num_features) # quantized_x q_x = tile_coding_layer.quantize(x) y_hat = tile_coding_layer.approximate(q_x) updates = tile_coding_layer.update_rule(y, y_hat, 0.1) train = theano.function([x, y], y_hat, updates=updates, allow_input_downcast=True) eval_ = theano.function([x], y_hat, allow_input_downcast=True) return train, eval_
def __init__(self, game_params, arch_params, solver_params, trained_model, sn_dir): params=None if trained_model: params = common.load_params(trained_model) self.lr_func = create_learning_rate_func(solver_params) self.v_h_0 = tt.fvector('v_h_0') self.x_h_0 = tt.fvector('x_h_0') self.v_t_0 = tt.fmatrix('v_t_0') self.x_t_0 = tt.fmatrix('x_t_0') self.a_t_0 = tt.fmatrix('a_t_0') self.is_aggressive = tt.fmatrix('is_aggressive') self.lr_ = tt.fscalar('lr') self.n_steps_ = tt.iscalar('n_steps') self.sn_dir = sn_dir self.game_params = game_params self.arch_params = arch_params self.solver_params = solver_params self.model = CONTROLLER(self.v_h_0, self.x_h_0, self.v_t_0, self.x_t_0, self.a_t_0, self.is_aggressive, self.lr_, self.n_steps_, self.game_params, self.arch_params, self.solver_params, params)
def setUp(self): self.initial = np.array([0.1, 0.1], dtype='float32') x = theano.shared(self.initial) self.params = [x] left_bound = T.fscalar('left bound') right_bound = T.fscalar('right bound') self.inputs = [left_bound, right_bound] y = T.sum(x) loss = -T.log(y - left_bound) - T.log(right_bound - y) + 1.0e-3 * T.sum(x ** 2) self.loss = loss x0 = (0.01 + 0.011 + 2.0 + 2.1) / 4.0 self.approx_solution = np.array([x0 / 2, x0 / 2], dtype='float32') self.get_inputs = lambda : [ np.float32(np.random.uniform(0.01, 0.011)), np.float32(np.random.uniform(2.0, 2.1)), ] x_sub = T.fvector('x sub') self.get_loss = theano.function([x_sub] + self.inputs, self.loss, givens=[(self.params[0], x_sub)])
def define(self, n_units = 1): self.sample_weights = T.fvector(name='weights') self.labels = T.fvector(name='labels') self.input = T.fmatrix(name='input') input_layer = layers.InputLayer(shape=(None , 1), input_var=self.input) dense1 = layers.DenseLayer( input_layer, num_units=n_units, nonlinearity=nonlinearities.sigmoid ) self.net = layers.DenseLayer( dense1, num_units=1, nonlinearity=nonlinearities.sigmoid )
def _finalize(self, loss_fn, weight_layers, projection_layers, eval_layer, predict_layer=None, additional_params=[]): assert hasattr(weight_layers, '__iter__') assert hasattr(projection_layers, '__iter__') output_layer = eval_layer # Be flexible in terms of batch input formats. x_batch = T.matrix('input_indices', dtype=self.input_dtype) # Do not be flexible w.r.t. output type. y_batch = ( T.ivector('y') if self.training_set[1].ndim == 1 else T.fmatrix('y')) # Instance weights for training. w_batch = T.fvector('weights') objective = WeightedObjective( output_layer, loss_function=loss_fn) loss_train = objective.get_loss( weights=w_batch, input=x_batch, target=y_batch, deterministic=False) loss_eval = objective.get_loss( input=x_batch, target=y_batch, deterministic=True) loss_train += self._regularization( weight_layers, projection_layers) self._create_functions(output_layer, loss_train, loss_eval, dict(x_batch=x_batch, y_batch=y_batch, w_batch=w_batch), predict_layer=predict_layer, additional_params=additional_params)
def make_node(self, x, x2, x3, x4): # check that the theano version has support for __props__. # This next line looks like it has a typo, # but it's actually a way to detect the theano version # is sufficiently recent to support the use of __props__. assert hasattr(self, '_props'), "Your version of theano is too old to support __props__." x = tensor.as_tensor_variable(x) x2 = tensor.as_tensor_variable(x2) x3 = tensor.as_tensor_variable(x3) x4 = tensor.as_tensor_variable(x4) return theano.Apply(self, [x, x2, x3, x4], [tensor.fvector().type(), tensor.imatrix().type()])
def train_linreg(X_train, y_train, eta, epochs): costs = [] eta0 = T.fscalar('eta0') y = T.fvector(name='y') X = T.fmatrix(name='X') w = theano.shared( np.zeros(shape=(X_train.shape[1] + 1), dtype=theano.config.floatX), name='w', ) net_input = T.dot(X, w[1:]) + w[0] errors = y - net_input cost = T.sum(T.pow(errors, 2)) gradient = T.grad(cost, wrt=w) update = [(w, w - (eta0 * gradient))] train = theano.function( inputs=[eta0], outputs=cost, updates=update, givens={X: X_train, y: y_train}, ) for _ in range(epochs): costs.append(train(eta)) return costs, w
def test_multinomial_0(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = tensor.fmatrix() u = tensor.fvector() for dtype in ['int64', 'float32', 'auto']: m = theano.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [.1, .1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[.2, .8], [.3, .7]], [.31, .31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[.2, .8], [.3, .7]], [.21, .21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[.2, .8]], [.25]) utt.assert_allclose(r, [[0, 2]]) # TODO: check a bigger example (make sure blocking on GPU is handled correctly)
def test_multinomial_large(): # DEBUG_MODE will test this on GPU p = tensor.fmatrix() u = tensor.fvector() m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u) f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 mval = f(pval, uval) assert mval.shape == pval.shape if config.cast_policy == 'custom': assert mval.dtype == pval.dtype elif config.cast_policy == 'numpy+floatX': assert mval.dtype == config.floatX elif config.cast_policy == 'numpy': assert mval.dtype == 'float64' else: raise NotImplementedError(config.cast_policy) utt.assert_allclose(mval.sum(axis=1), 2) asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval utt.assert_allclose(mval, asdf) # broadcast over all rows
def test_gpu_opt_dtypes(): # Test if the returned samples are of the datatype specified for dtype in ['uint32', 'float32', 'int64', 'float64']: p = tensor.fmatrix() u = tensor.fvector() m = theano.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 samples = f(pval, uval) assert samples.dtype == dtype, "%s != %s" % (samples.dtype, dtype)
def test_gpu_opt(): # Does have some overlap with test_multinomial_0 # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u) assert m.dtype == 'float32', m.dtype f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval) # Test with a row, it was failing in the past. r = tensor.frow() m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(r, u) assert m.dtype == 'float32', m.dtype f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval)
def test_select_proportional_to_weight(self): """ Tests that MultinomialWOReplacementFromUniform selects elements, on average, proportional to the their probabilities """ p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 10 mean_rtol = 0.0005 numpy.random.seed(12345) pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX) for rep in range(10000): uni = numpy.random.rand(n_selected).astype(config.floatX) res = f(pvals, uni, n_selected) res = numpy.squeeze(res) avg_pvals[res] += 1 avg_pvals /= avg_pvals.sum() avg_diff = numpy.mean(abs(avg_pvals - pvals)) assert avg_diff < mean_rtol, avg_diff
def test_gpu_opt_wor(): # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n) assert m.dtype == 'int64', m.dtype f = function([p, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialWOReplacementFromUniform for node in f.maker.fgraph.toposort()]) n_samples = 3 pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones(pval.shape[0] * n_samples) * 0.5 f(pval, uval, n_samples) # Test with a row, it was failing in the past. r = tensor.frow() m = multinomial.MultinomialWOReplacementFromUniform('auto')(r, u, n) assert m.dtype == 'int64', m.dtype f = function([r, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialWOReplacementFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval, 1)
def setUp(self): upgrade_to_float = theano.scalar.basic.upgrade_to_float self.scal_times_2 = TimesN(2, upgrade_to_float, name='times_2') self.times_2 = theano.tensor.elemwise.Elemwise( self.scal_times_2, name='times_2') self.scal_times_3 = TimesN(3, upgrade_to_float, name='times_3') self.times_3 = theano.tensor.elemwise.Elemwise( self.scal_times_3, name='times_3') self.x = fvector()
def setUp(self): self.rng = numpy.random.RandomState(seed=utt.fetch_seed()) self.s = tensor.iscalar() self.v = tensor.fvector() self.m = tensor.dmatrix() self.t = tensor.ctensor3() self.adv1q = tensor.lvector() # advanced 1d query
def setUp(self): self.s = iscalar() self.v = fvector() self.m = dmatrix() self.t = ctensor3() self.ft4 = ftensor4() self.ix1 = lvector() # advanced 1d query self.ix12 = lvector() self.ix2 = lmatrix() self.ixr = lrow()
def test_allow_downcast_floatX(self): a = tensor.fscalar('a') b = tensor.fvector('b') f = pfunc([a, b], (a + b), allow_input_downcast=True) g = pfunc([a, b], (a + b), allow_input_downcast=False) h = pfunc([a, b], (a + b), allow_input_downcast=None) # If the values can be accurately represented, OK assert numpy.all(f(0, [0]) == 0) assert numpy.all(g(0, [0]) == 0) assert numpy.all(h(0, [0]) == 0) # For the vector: OK iff allow_input_downcast is True assert numpy.allclose(f(0, [0.1]), 0.1) self.assertRaises(TypeError, g, 0, [0.1]) self.assertRaises(TypeError, h, 0, [0.1]) # For the scalar: OK if allow_input_downcast is True, # or None and floatX==float32 assert numpy.allclose(f(0.1, [0]), 0.1) self.assertRaises(TypeError, g, 0.1, [0]) if config.floatX == 'float32': assert numpy.allclose(h(0.1, [0]), 0.1) else: self.assertRaises(TypeError, h, 0.1, [0])
def test_using_negative_taps_sequence(self): # This test refers to a bug reported on github on May 22 2015 by # user june-qijun def lp(x, x2): return x x = tensor.fvector('x') res, upd = theano.scan(lp, sequences=dict(input=x, taps=[-2, -1])) f = theano.function([x], res, updates = upd) output = f([1, 2, 3, 4, 5]) expected_output = numpy.array([1, 2, 3], dtype="float32") utt.assert_allclose(output, expected_output)
def test_hessian_bug_grad_grad_two_scans(self): # Bug reported by Bitton Tenessi # NOTE : The test to reproduce the bug reported by Bitton Tenessi # was modified from its original version to be faster to run. W = tensor.fvector(name='W') n_steps = tensor.iscalar(name='Nb_steps') def loss_outer(sum_outer, W): def loss_inner(sum_inner, W): return sum_inner + (W**2).sum() result_inner, _ = theano.scan( fn=loss_inner, outputs_info=tensor.as_tensor_variable( numpy.asarray(0, dtype=numpy.float32)), non_sequences=[W], n_steps=1, ) return sum_outer + result_inner[-1] result_outer, _ = theano.scan( fn=loss_outer, outputs_info=tensor.as_tensor_variable( numpy.asarray(0, dtype=numpy.float32)), non_sequences=[W], n_steps=n_steps, ) cost = result_outer[-1] H = theano.gradient.hessian(cost, W) print(".", file=sys.stderr) f = theano.function([W, n_steps], H) f(numpy.ones((8,), dtype='float32'), 1)
def test_n_samples_1(): p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialFromUniform('auto')(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) numpy.random.seed(12345) for i in [1, 5, 10, 100, 1000, 10000]: uni = numpy.random.rand(2 * i).astype(config.floatX) res = f([[1.0, 0.0], [0.0, 1.0]], uni, i) utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
def test_multinomial_0(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform('auto')(p, u) def body(mode, gpu): # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True, mode=mode) if gpu: assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()]) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [.1, .1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[.2, .8], [.3, .7]], [.31, .31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[.2, .8], [.3, .7]], [.21, .21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[.2, .8]], [.25]) utt.assert_allclose(r, [[0, 2]]) run_with_c(body) if cuda.cuda_available: run_with_c(body, True) # TODO: check a bigger example (make sure blocking on GPU is handled correctly)
def test_multinomial_dtypes(): p = tensor.dmatrix() u = tensor.dvector() m = multinomial.MultinomialFromUniform('auto')(p, u) assert m.dtype == 'float64', m.dtype p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform('auto')(p, u) assert m.dtype == 'float32', m.dtype p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform('float64')(p, u) assert m.dtype == 'float64', m.dtype
def test_gpu_opt(): if not cuda.cuda_available: # Skip test if cuda_ndarray is not available. from nose.plugins.skip import SkipTest raise SkipTest('Optional package cuda not available') # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform('auto')(p, u) assert m.dtype == 'float32', m.dtype m_gpu = cuda.gpu_from_host(m) f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True)) assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval) # Test with a row, it was failing in the past. r = tensor.frow() m = multinomial.MultinomialFromUniform('auto')(r, u) assert m.dtype == 'float32', m.dtype m_gpu = cuda.gpu_from_host(m) f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True)) assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval)
def test_local_softmax_dnn_grad(self): """ Check for optimization error when grad of summed softmax is taken over tensor with fixed shape. """ x = T.fvector('x') xp = x.reshape((5, 5)) y = T.nnet.softmax(xp.flatten()).sum() g = T.grad(y, x) f = theano.function(inputs=[x], outputs=g, mode=self.mode) assert(any(n for n in f.maker.fgraph.toposort() if isinstance(n.op, dnn.GpuDnnSoftmaxGrad)))
def __init__(self, computeGradient = True): super(GpuCtc,self).__init__() self.computeGradient = computeGradient self.costs = T.fvector(name="ctc_cost") if self.computeGradient: self.gradients = CudaNdarrayVariable(name="ctc_grad", type=CudaNdarrayType(broadcastable=[False, False, False]))
def __init__(self, computeGradient = True): super(CpuCtc,self).__init__() self.computeGradient = computeGradient self.costs = T.fvector(name="ctc_cost") if self.computeGradient: self.gradients = T.ftensor3(name="ctc_grad")
def __init__(self, seq_len, n_feature): self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.buildNetwork() self.output = lasagne.layers.get_output(self.network) self.params = lasagne.layers.get_all_params(self.network, trainable=True) self.output_fn = theano.function([self.Input.input_var], self.output) fx = T.fvector().astype("float64") choices = T.ivector() px = self.output[T.arange(self.output.shape[0]), choices] log_px = T.log(px) cost = -fx.dot(log_px) updates = lasagne.updates.adagrad(cost, self.params, 0.0008) Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates)
def build_model_train(tparams, train_opt): t_f = T.fmatrix('train_feats') t_n = T.ivector('train_nframe') omega = T.fvector('omega') """ Cost """ trng = RandomStreams() def _cost(i, cost_prev): x_start = T.sum(t_n[:i]) x_end = x_start+t_n[i] x = t_f[x_start:x_end] outs = SFM(tparams, x, omega, train_opt) log_lik = T.nlinalg.trace(T.dot(x, T.log(T.nnet.softmax(outs)).transpose())) total_cost = cost_prev+(-log_lik) return total_cost t_l2 = T.arange(t_n.shape[0], dtype='int64') costs, updates = theano.scan(_cost, sequences=[t_l2], outputs_info=T.alloc(np.float64(0.)), name='log_likelyhood', n_steps=t_n.shape[0]) cost = costs[-1] """ Gradient """ grads = theano.grad(cost, wrt=list(tparams.values())) """ Update Function """ f_grad, f_update = adadelta(tparams, t_f, t_n, omega, cost, grads) return f_grad, f_update
def _prepare_networks(self, n_items): ''' Prepares the building blocks of the RNN, but does not compile them: ''' self.n_items = n_items # The input is composed of to parts : the on-hot encoding of the movie, and the features of the movie self.l_in = lasagne.layers.InputLayer(shape=(self.batch_size, self.max_length, self._input_size())) # The input is completed by a mask to inform the LSTM of the length of the sequence self.l_mask = lasagne.layers.InputLayer(shape=(self.batch_size, self.max_length)) # recurrent layer if not self.use_movies_features: l_recurrent = self.recurrent_layer(self.l_in, self.l_mask, true_input_size=self.n_items + self._n_optional_features(), only_return_final=True) else: l_recurrent = self.recurrent_layer(self.l_in, self.l_mask, true_input_size=None, only_return_final=True) # l_last_slice gets the last output of the recurrent layer l_last_slice = l_recurrent # l_last_slice = lasagne.layers.SliceLayer(l_recurrent, -1, 1) # Theano tensor for the targets target = T.ivector('target_output') target_popularity = T.fvector('target_popularity') self.exclude = T.fmatrix('excluded_items') self.theano_inputs = [self.l_in.input_var, self.l_mask.input_var, target, target_popularity, self.exclude] # The sliced output is then passed through linear layer to obtain the right output size self.l_out = lasagne.layers.DenseLayer(l_last_slice, num_units=self.n_items, nonlinearity=lasagne.nonlinearities.softmax) # lasagne.layers.get_output produces a variable for the output of the net network_output = lasagne.layers.get_output(self.l_out) # loss function self.cost = (T.nnet.categorical_crossentropy(network_output, target) / target_popularity).mean() if self.regularization > 0.: self.cost += self.regularization * lasagne.regularization.l2(self.l_out.b) # self.cost += self.regularization * lasagne.regularization.regularize_layer_params(self.l_out, lasagne.regularization.l2) elif self.regularization < 0.: self.cost -= self.regularization * lasagne.regularization.l1(self.l_out.b) # self.cost -= self.regularization * lasagne.regularization.regularize_layer_params(self.l_out, lasagne.regularization.l1)
def _prepare_networks(self, n_items): ''' Prepares the building blocks of the RNN, but does not compile them: self.l_in : input layer self.l_mask : mask of the input layer self.target : target of the network self.l_out : output of the network self.cost : cost function ''' self.n_items = n_items if self.sampling < 1: self.effective_sampling = int(self.sampling * self.n_items) else: self.effective_sampling = int(self.sampling) # The input is composed of to parts : the on-hot encoding of the movie, and the features of the movie self.l_in = lasagne.layers.InputLayer(shape=(self.batch_size, self.max_length, self._input_size())) # The input is completed by a mask to inform the LSTM of the length of the sequence self.l_mask = lasagne.layers.InputLayer(shape=(self.batch_size, self.max_length)) # recurrent layer if not self.use_movies_features: l_recurrent = self.recurrent_layer(self.l_in, self.l_mask, true_input_size=self.n_items + self._n_optional_features(), only_return_final=True) else: l_recurrent = self.recurrent_layer(self.l_in, self.l_mask, true_input_size=None, only_return_final=True) # l_last_slice gets the last output of the recurrent layer l_last_slice = l_recurrent # l_last_slice = lasagne.layers.SliceLayer(l_recurrent, -1, 1) # Theano tensor for the targets target = T.ivector('target_output') samples = T.ivector('samples') self.exclude = T.fmatrix('excluded_items') target_popularity = T.fvector('target_popularity') self.theano_inputs = [self.l_in.input_var, self.l_mask.input_var, target, samples, target_popularity, self.exclude] # The sliced output is then passed through linear layer to obtain the right output size self.l_out = BlackoutLayer(l_last_slice, num_units=self.n_items, num_outputs=self.sampling, nonlinearity=None, W=lasagne.init.GlorotUniform(gain=self.last_layer_init)) # lasagne.layers.get_output produces a variable for the output of the net network_output = lasagne.layers.get_output(self.l_out, targets = target, samples=samples) # loss function self.cost = (self.loss_function(network_output,np.arange(self.batch_size)) / target_popularity).mean()
def init_device(device='gpu0'): if device.startswith('cuda'): import os if 'THEANO_FLAGS' in os.environ: raise ValueError('Use theanorc to set the theano config') os.environ['THEANO_FLAGS'] = 'device={0}'.format(device) import theano.gpuarray # This is a bit of black magic that may stop working in future # theano releases ctx = theano.gpuarray.type.get_context(None) drv = None elif device.startswith('gpu'): gpuid = int(device[-1]) import pycuda.driver as drv drv.init() dev = drv.Device(gpuid) ctx = dev.make_context() import theano.sandbox.cuda theano.sandbox.cuda.use(device) import theano else: drv=None ctx=None import theano.sandbox.cuda theano.sandbox.cuda.use(device) import theano from theano import function, config, shared, sandbox, tensor vlen = 10 * 30 * 768 # 10 x #cores x # threads per core iters = 1000 rng = np.random.RandomState(22) arr = rng.rand(vlen) shared_x = theano.shared(np.asarray(arr, config.floatX)) shared_xx = theano.shared(np.asarray(arr, config.floatX)) x=tensor.fvector("x") # compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray) f = function([], tensor.exp(x), givens=[(x,shared_x)]) if np.any([isinstance(x.op, tensor.Elemwise) and ('Gpu' not in type(x.op).__name__) for x in f.maker.fgraph.toposort()]): print('Used the cpu') else: print('Used the gpu') # if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu': # raise TypeError('graph not compiled on GPU') return drv,ctx, arr, shared_x, shared_xx
def find_sigma(X_shared, sigma_shared, N, perplexity, sigma_iters, verbose=0): X = T.fmatrix('X') sigma = T.fvector('sigma') target = np.log(perplexity) P = T.maximum(p_ij_conditional_var(X, sigma), epsilon) entropy = -T.sum(P * T.log(P), axis=1) # Setting update for binary search interval sigmin_shared = theano.shared(np.full(N, np.sqrt(epsilon), dtype=floath)) sigmax_shared = theano.shared(np.full(N, np.inf, dtype=floath)) sigmin = T.fvector('sigmin') sigmax = T.fvector('sigmax') upmin = T.switch(T.lt(entropy, target), sigma, sigmin) upmax = T.switch(T.gt(entropy, target), sigma, sigmax) givens = {X: X_shared, sigma: sigma_shared, sigmin: sigmin_shared, sigmax: sigmax_shared} updates = [(sigmin_shared, upmin), (sigmax_shared, upmax)] update_intervals = theano.function([], entropy, givens=givens, updates=updates) # Setting update for sigma according to search interval upsigma = T.switch(T.isinf(sigmax), sigma * 2, (sigmin + sigmax) / 2.) givens = {sigma: sigma_shared, sigmin: sigmin_shared, sigmax: sigmax_shared} updates = [(sigma_shared, upsigma)] update_sigma = theano.function([], sigma, givens=givens, updates=updates) for i in range(sigma_iters): e = update_intervals() update_sigma() if verbose: print('Finding sigmas... Iteration {0}/{1}: Perplexities in [{2:.4f}, {3:.4f}].'.format(i + 1, sigma_iters, np.exp(e.min()), np.exp(e.max())), end='\r') if np.any(np.isnan(np.exp(e))): raise SigmaTooLowException('Invalid sigmas. The perplexity is probably too low.') if verbose: print('\nDone. Perplexities in [{0:.4f}, {1:.4f}].'.format(np.exp(e.min()), np.exp(e.max()))) # Perform momentum-based gradient descent on the cost function with the given # parameters. Return the vertex coordinates and per-vertex cost.
def __init__(self, num_emb, emb_dim, hidden_dim, output_dim, degree=2, learning_rate=0.01, momentum=0.9, trainable_embeddings=True, labels_on_nonroot_nodes=False, irregular_tree=False): assert emb_dim > 1 and hidden_dim > 1 self.num_emb = num_emb self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.degree = degree self.learning_rate = learning_rate self.momentum = momentum self.irregular_tree = irregular_tree self.params = [] self.embeddings = theano.shared(self.init_matrix([self.num_emb, self.emb_dim])) if trainable_embeddings: self.params.append(self.embeddings) self.x = T.ivector(name='x') # word indices self.tree = T.imatrix(name='tree') # shape [None, self.degree] if labels_on_nonroot_nodes: self.y = T.fmatrix(name='y') # output shape [None, self.output_dim] self.y_exists = T.fvector(name='y_exists') # shape [None] else: self.y = T.fvector(name='y') # output shape [self.output_dim] self.num_words = self.x.shape[0] # total number of nodes (leaves + internal) in tree emb_x = self.embeddings[self.x] emb_x = emb_x * T.neq(self.x, -1).dimshuffle(0, 'x') # zero-out non-existent embeddings self.tree_states = self.compute_tree(emb_x, self.tree) self.final_state = self.tree_states[-1] if labels_on_nonroot_nodes: self.output_fn = self.create_output_fn_multi() self.pred_y = self.output_fn(self.tree_states) self.loss = self.loss_fn_multi(self.y, self.pred_y, self.y_exists) else: self.output_fn = self.create_output_fn() self.pred_y = self.output_fn(self.final_state) self.loss = self.loss_fn(self.y, self.pred_y) updates = self.gradient_descent(self.loss) train_inputs = [self.x, self.tree, self.y] if labels_on_nonroot_nodes: train_inputs.append(self.y_exists) self._train = theano.function(train_inputs, [self.loss, self.pred_y], updates=updates) self._evaluate = theano.function([self.x, self.tree], self.final_state) self._predict = theano.function([self.x, self.tree], self.pred_y)
def __init__(self, n_voc, trainset, testset,dataname, classes, prefix): if prefix != None: prefix += '/' self.trainset = trainset self.testset = testset docs = T.imatrix() label = T.ivector() length = T.fvector() sentencenum = T.fvector() wordmask = T.fmatrix() sentencemask = T.fmatrix() maxsentencenum = T.iscalar() isTrain = T.iscalar() rng = numpy.random layers = [] layers.append(EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix)) layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 200, 200, 'wordlstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, length)) layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum)) layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, sentencenum)) layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix)) layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax)) self.layers = layers cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32') correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32') err = T.argmax(layers[-1].output, axis=1) - label mse = T.sum(err * err) params = [] for layer in layers: params += layer.params L2_rate = numpy.float32(1e-5) for param in params[1:]: cost += T.sum(L2_rate * (param * param), acc_dtype='float32') gparams = [T.grad(cost, param) for param in params] updates = AdaUpdates(params, gparams, 0.95, 1e-6) self.train_model = theano.function( inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum], outputs=cost, updates=updates, ) self.test_model = theano.function( inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum], outputs=[correct, mse], )
def test_dnn_batchnorm_train(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) if dnn.version(raises=False) < 5000: raise SkipTest("batch normalization requires cudnn v5+") utt.seed_rng() for mode in ('per-activation', 'spatial'): for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias')) ndim = x.ndim eps = 5e-3 # some non-standard value to test if it's used # forward pass out, x_mean, x_invstd = dnn.dnn_batch_normalization_train( x, scale, bias, mode, eps) # reference forward pass if mode == 'per-activation': axes = (0,) elif mode == 'spatial': axes = (0,) + tuple(range(2, ndim)) x_mean2 = x.mean(axis=axes, keepdims=True) x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps)) scale2 = T.addbroadcast(scale, *axes) bias2 = T.addbroadcast(bias, *axes) out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2 # backward pass dy = vartype('dy') grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy}) # reference backward pass grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy}) # compile f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] + grads + grads2, mode=mode_with_gpu) # run for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)): data_shape = data_shape[:ndim] param_shape = tuple(1 if d in axes else s for d, s in enumerate(data_shape)) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32') Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32') Scale = numpy.random.randn(*param_shape).astype('float32') Bias = numpy.random.randn(*param_shape).astype('float32') outputs = f(X, Scale, Bias, Dy) # compare outputs utt.assert_allclose(outputs[0], outputs[0 + 3]) # out utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd # compare gradients utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4) # dx utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4) # dscale utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
def test_asymptotic_32(): """ This test makes sure that our functions behave sensibly when huge values are present """ # TODO: consider adding the optimization of crossentropy into the current # mode for the purpose of running this test for dtype in 'float32', 'float64': if dtype == 'float32': x = tensor.fmatrix() x2 = tensor.fvector() else: x = tensor.dmatrix() x2 = tensor.dvector() y = tensor.lvector() c = categorical_crossentropy(softmax(x + x2), y) f = theano.function([x, y, x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN') if 0: for i, n in enumerate(f.maker.fgraph.toposort()): print(i, n) xval = numpy.zeros((5, 5), dtype=dtype).astype(dtype) x2val = numpy.zeros(5, dtype=xval.dtype).astype(dtype) for i in xrange(100): cval, gxval = f(xval, numpy.arange(5), x2val) xval -= 100.3 * gxval # print cval, gxval assert cval == 0 # no problem going to zero error # what about when x gets really big? xval = numpy.zeros((5, 5), dtype=dtype) x2val = numpy.zeros(5, dtype=xval.dtype) for i in xrange(100): cval, gxval = f(xval, numpy.arange(5), x2val) xval += 100000.3 * gxval # print cval, gxval assert cval > 61750000 assert gxval[0, 0] == -1.0 assert gxval[0, 1] == 0.25