我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.tensor3()。
def build_encoder_bi(tparams, options): """ build bidirectional encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') embeddingr = embedding[::-1] x_mask = tensor.matrix('x_mask', dtype='float32') xr_mask = x_mask[::-1] # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, prefix='encoder_r', mask=xr_mask) ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1) return embedding, x_mask, ctx # some utilities
def compile(self): x_train = T.tensor4('x_train') actions_train = T.matrix('actions_train') y_train = T.matrix('y_train') cost_function = self.squared_error(x_train, actions_train, y_train) self.train_function = theano.function([x_train, actions_train, y_train], cost_function, updates=self.sgd(cost_function, self.params), on_unused_input='ignore', allow_input_downcast=True) x_pred = T.tensor3('x_pred') actions_pred = T.vector('actions_pred') output_function = self.output(x_pred, actions_pred) self.predict_function = theano.function([x_pred, actions_pred], output_function, on_unused_input='ignore', allow_input_downcast=True) return self
def exe_rnn(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), name='RNN') # W = layer_rnn.W_hid_to_hid.sum() # U = layer_rnn.W_in_to_hid.sum() # b = layer_rnn.b.sum() layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)
def __init__(self, input_dim, output_dim, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, truncate_gradient=-1, return_sequences=False): super(SimpleRNN,self).__init__() self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.return_sequences = return_sequences self.input = T.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.U = self.inner_init((self.output_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.params = [self.W, self.U, self.b] if weights is not None: self.set_weights(weights)
def __init__(self, input_dim, output_dim, depth=3, init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', inner_activation='hard_sigmoid', weights=None, truncate_gradient=-1, return_sequences=False): super(SimpleDeepRNN,self).__init__() self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.input_dim = input_dim self.output_dim = output_dim self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.depth = depth self.return_sequences = return_sequences self.input = T.tensor3() self.W = self.init((self.input_dim, self.output_dim)) self.Us = [self.inner_init((self.output_dim, self.output_dim)) for _ in range(self.depth)] self.b = shared_zeros((self.output_dim)) self.params = [self.W] + self.Us + [self.b] if weights is not None: self.set_weights(weights)
def symbolic_input_variables(self): features = tensor.tensor3('features') features_mask = tensor.matrix('features_mask') labels = tensor.imatrix('labels') labels_mask = tensor.matrix('labels_mask') start_flag = tensor.scalar('start_flag') if self.use_speaker: speaker = tensor.imatrix('speaker_index') else: speaker = None if self.raw_output: raw_sequence = tensor.itensor3('raw_audio') else: raw_sequence = None return features, features_mask, labels, labels_mask, \ speaker, start_flag, raw_sequence
def test_lnlstm_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_fwd = LNLSTMLayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_lstm_bck = LNLSTMLayer(l_inp, num_units=num_units, backwards=True) output_fwd = helper.get_output(l_lstm_fwd, x) output_bck = helper.get_output(l_lstm_bck, x) output_fwd_val = output_fwd.eval({x: x_in}) output_bck_val = output_bck.eval({x: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
def test_lnlstm_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_scan = LNLSTMLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_lstm_unrolled = LNLSTMLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_lstm_scan, x) output_scan_unrolled = helper.get_output(l_lstm_unrolled, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_scan_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_lstm_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_scan = LSTMLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_lstm_unrolled = LSTMLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_lstm_scan, x) output_scan_unrolled = helper.get_output(l_lstm_unrolled, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_scan_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def build_encoder_w2v(tparams, options): """ Computation graph for encoder, given pre-trained word embeddings """ opt_ret = dict() trng = RandomStreams(1234) # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') x_mask = tensor.matrix('x_mask', dtype='float32') # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, None, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return trng, embedding, x_mask, ctx
def build_net(in_shape, out_size, model): # input variables input_var = (tt.tensor4('input', dtype='float32') if len(in_shape) > 1 else tt.tensor3('input', dtype='float32')) target_var = tt.tensor3('target_output', dtype='float32') mask_var = tt.matrix('mask_input', dtype='float32') # stack more layers network = lnn.layers.InputLayer( name='input', shape=(None, None) + in_shape, input_var=input_var ) mask_in = lnn.layers.InputLayer(name='mask', input_var=mask_var, shape=(None, None)) network = spg.layers.CrfLayer( network, mask_input=mask_in, num_states=out_size, name='CRF') return network, input_var, target_var, mask_var
def build_model(tparams, options): alphaHiddenDimSize = options['alphaHiddenDimSize'] betaHiddenDimSize = options['betaHiddenDimSize'] x = T.tensor3('x', dtype=config.floatX) reverse_emb_t = x[::-1] reverse_h_a = gru_layer(tparams, reverse_emb_t, 'a', alphaHiddenDimSize)[::-1] * 0.5 reverse_h_b = gru_layer(tparams, reverse_emb_t, 'b', betaHiddenDimSize)[::-1] * 0.5 preAlpha = T.dot(reverse_h_a, tparams['w_alpha']) + tparams['b_alpha'] preAlpha = preAlpha.reshape((preAlpha.shape[0], preAlpha.shape[1])) alpha = (T.nnet.softmax(preAlpha.T)).T beta = T.tanh(T.dot(reverse_h_b, tparams['W_beta']) + tparams['b_beta']) return x, alpha, beta
def test_local_log_sum_exp1(): # Tests if optimization is applied by checking the presence of the maximum x = tensor3('x') check_max_log_sum_exp(x, axis=(0,), dimshuffle_op=None) check_max_log_sum_exp(x, axis=(1,), dimshuffle_op=None) check_max_log_sum_exp(x, axis=(2,), dimshuffle_op=None) check_max_log_sum_exp(x, axis=(0, 1), dimshuffle_op=None) check_max_log_sum_exp(x, axis=(0, 1, 2), dimshuffle_op=None) # If a transpose is applied to the sum transpose_op = DimShuffle((False, False), (1, 0)) check_max_log_sum_exp(x, axis=2, dimshuffle_op=transpose_op) # If the sum is performed with keepdims=True x = TensorType(dtype='floatX', broadcastable=(False, True, False))('x') sum_keepdims_op = x.sum(axis=(0, 1), keepdims=True).owner.op check_max_log_sum_exp(x, axis=(0, 1), dimshuffle_op=sum_keepdims_op)
def test_local_log_sum_exp2(): # Tests if the optimization works (result is correct) around 1.0 x = tensor3('x') x_val = 1.0 + numpy.random.rand(4, 3, 2).astype(config.floatX) / 10.0 f = compile_graph_log_sum_exp(x, axis=(1,)) naive_ret = numpy.log(numpy.sum(numpy.exp(x_val), axis=1)) optimised_ret = f(x_val) assert numpy.allclose(naive_ret, optimised_ret) # If a transpose is applied transpose_op = DimShuffle((False, False), (1, 0)) f = compile_graph_log_sum_exp(x, axis=(1,), dimshuffle_op=transpose_op) naive_ret = numpy.log(numpy.sum(numpy.exp(x_val), axis=1).T) optimised_ret = f(x_val) assert numpy.allclose(naive_ret, optimised_ret)
def test_diag(self): # test that it builds a matrix with given diagonal when using # vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag # other types should raise error x = theano.tensor.tensor3() ok = False try: y = extract_diag(x) except TypeError: ok = True assert ok # not testing the view=True case since it is not used anywhere.
def test_batched_dot(): first = theano.tensor.tensor3("first") second = theano.tensor.tensor3("second") output = theano.tensor.basic.batched_dot(first, second) first_val = numpy.random.rand(10, 10, 20).astype(config.floatX) second_val = numpy.random.rand(10, 20, 5).astype(config.floatX) result_fn = theano.function([first, second], output) result = result_fn(first_val, second_val) assert result.shape[0] == first_val.shape[0] assert result.shape[1] == first_val.shape[1] assert result.shape[2] == second_val.shape[2] first_mat = theano.tensor.dmatrix("first") second_mat = theano.tensor.dmatrix("second") output = theano.tensor.basic.batched_dot(first_mat, second_mat) first_mat_val = numpy.random.rand(10, 10).astype(config.floatX) second_mat_val = numpy.random.rand(10, 10).astype(config.floatX) result_fn = theano.function([first_mat, second_mat], output) result = result_fn(first_mat_val, second_mat_val) assert result.shape[0] == first_mat_val.shape[0]
def test_Op_dims(self): # _dot is a Dot op instance _dot = theano.tensor.basic._dot d0 = scalar() d1 = vector() d2 = matrix() d3 = tensor3() self.assertRaises(TypeError, _dot, d0, d0) self.assertRaises(TypeError, _dot, d0, d1) self.assertRaises(TypeError, _dot, d0, d2) self.assertRaises(TypeError, _dot, d0, d3) self.assertRaises(TypeError, _dot, d1, d0) _dot(d1, d1) _dot(d1, d2) self.assertRaises(TypeError, _dot, d1, d3) self.assertRaises(TypeError, _dot, d2, d0) _dot(d2, d1) _dot(d2, d2) self.assertRaises(TypeError, _dot, d2, d3) self.assertRaises(TypeError, _dot, d3, d0) self.assertRaises(TypeError, _dot, d3, d1) self.assertRaises(TypeError, _dot, d3, d2) self.assertRaises(TypeError, _dot, d3, d3)
def test_scalar_axes(self): # Test matrix-matrix amat = fmatrix() bmat = dmatrix() # We let at float64 to test mix of float32 and float64. axes = 1 aval = rand(4, 5).astype('float32') bval = rand(5, 3) c = tensordot(amat, bmat, axes) f3 = inplace_func([amat, bmat], c) self.assertTrue(numpy.allclose(numpy.tensordot(aval, bval, axes), f3(aval, bval))) utt.verify_grad(self.TensorDot(axes), [aval, bval]) # Test tensor-tensor amat = tensor3() bmat = tensor3() axes = 2 aval = rand(3, 4, 5) bval = rand(4, 5, 3) c = tensordot(amat, bmat, axes) f3 = inplace_func([amat, bmat], c) self.assertTrue(numpy.allclose(numpy.tensordot(aval, bval, axes), f3(aval, bval))) utt.verify_grad(self.TensorDot(axes), [aval, bval])
def ___test_infer_shape_tuple(self): a = tensor.tensor3(dtype='int32') b = tensor.tensor3(dtype='int32') c = tensor.tensor3(dtype='int32') A = numpy.asarray([1, 0], dtype='int32').reshape((2, 1, 1)) B = numpy.asarray(numpy.random.rand(1, 4, 1), dtype='int32') C = numpy.asarray(numpy.random.rand(1, 1, 7), dtype='int32') f = function([a, b, c], choose(a, (b, c))) shape = (2, 4, 7) assert numpy.allclose(f(A, B, C).shape, shape) self._compile_and_check([a, b, c], # theano.function inputs [self.op(a, (b, c))], # theano.function outputs # Always use not square matrix! # inputs data [A, B, C], # Op that should be removed from the graph. self.op_class)
def test_correct_answer(self): a = T.matrix() b = T.matrix() x = T.tensor3() y = T.tensor3() A = numpy.cast[theano.config.floatX](numpy.random.rand(5, 3)) B = numpy.cast[theano.config.floatX](numpy.random.rand(7, 2)) X = numpy.cast[theano.config.floatX](numpy.random.rand(5, 6, 1)) Y = numpy.cast[theano.config.floatX](numpy.random.rand(1, 9, 3)) make_list((3., 4.)) c = make_list((a, b)) z = make_list((x, y)) fc = theano.function([a, b], c) fz = theano.function([x, y], z) self.assertTrue((m == n).all() for m, n in zip(fc(A, B), [A, B])) self.assertTrue((m == n).all() for m, n in zip(fz(X, Y), [X, Y]))
def setUp(self): super(Test_local_elemwise_alloc, self).setUp() self.fast_run_mode = mode_with_gpu # self.vec = tensor.vector('vec', dtype=dtype) # self.mat = tensor.matrix('mat', dtype=dtype) # self.tens = tensor.tensor3('tens', dtype=dtype) # self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2) # self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape) self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2) self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape) self.alloc_w_dep_tens = basic_ops.gpu_alloc( self.vec, self.tens.shape[0], self.tens.shape[1] ) self.tv_wo_dep = basic_ops.gpu_alloc(self.vec, 5, 5) self.tm_wo_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5) self.s = tensor.iscalar('s') self.tv_w_dep = basic_ops.gpu_alloc(self.vec, self.s, self.s) self.tm_w_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5) self.row = tensor.row(dtype=self.dtype) self.o = basic_ops.gpu_alloc(self.row, 5, 5)
def test_shape(): input_var = T.tensor3('input') target_var = T.imatrix('target') output_var, _, _ = memory_augmented_neural_network( input_var, target_var, batch_size=16, nb_class=5, memory_shape=(128, 40), controller_size=200, input_size=20 * 20, nb_reads=4) posterior_fn = theano.function([input_var, target_var], output_var) test_input = np.random.rand(16, 50, 20 * 20) test_target = np.random.randint(5, size=(16, 50)).astype('int32') test_input_invalid_batch_size = np.random.rand(16 + 1, 50, 20 * 20) test_input_invalid_depth = np.random.rand(16, 50, 20 * 20 - 1) test_output = posterior_fn(test_input, test_target) assert test_output.shape == (16, 50, 5) with pytest.raises(ValueError) as e_info: posterior_fn(test_input_invalid_batch_size, test_target) with pytest.raises(ValueError) as e_info: posterior_fn(test_input_invalid_depth, test_target)
def test_load_params(self): window = T.iscalar('theta') inputs1 = T.tensor3('inputs1', dtype='float32') mask = T.matrix('mask', dtype='uint8') network = deltanet_majority_vote.load_saved_model('../oulu/results/best_models/1stream_mfcc_w3s3.6.pkl', ([500, 200, 100, 50], [rectify, rectify, rectify, linear]), (None, None, 91), inputs1, (None, None), mask, 250, window, 10) d = deltanet_majority_vote.extract_encoder_weights(network, ['fc1', 'fc2', 'fc3', 'bottleneck'], [('w1', 'b1'), ('w2', 'b2'), ('w3', 'b3'), ('w4', 'b4')]) b = deltanet_majority_vote.extract_lstm_weights(network, ['f_blstm1', 'b_blstm1'], ['flstm', 'blstm']) expected_keys = ['w1', 'w2', 'w3', 'w4', 'b1', 'b2', 'b3', 'b4'] keys = d.keys() for k in keys: assert k in expected_keys assert type(d[k]) == np.ndarray save_mat(d, '../oulu/models/oulu_1stream_mfcc_w3s3.mat')
def main(): """ test runner, computes delta for an array of sequences :return: None """ A = T.tensor3('A', dtype='float32') theta = T.iscalar('theta') # compute delta coefficients for multiple sequences results, updates = theano.scan(append_delta_coeff, sequences=A, non_sequences=theta) compute_deltas = theano.function([A, theta], outputs=results, updates=updates) seqs = np.array([[[1, 2, 3, 4, 5], [10, 12, 13, 14, 15], [300, 1, 23, 56, 22]], [[1, 1, 1, 1, 1], [1, 1, 100, 1, 1], [1, 1, 1, 1, 1]]], dtype='float32') res = compute_deltas(seqs, 1) print(res)
def main(): options = parse_options() print(options) window = T.iscalar('theta') inputs1 = T.tensor3('inputs1', dtype='float32') mask = T.matrix('mask', dtype='uint8') shape = [int(i) for i in options['shape'].split(',')] nonlinearities = [select_nonlinearity(s) for s in options['nonlinearities'].split(',')] network = deltanet_majority_vote.load_saved_model(options['input'], (shape, nonlinearities), (None, None, options['input_dim']), inputs1, (None, None), mask, options['lstm_size'], window, options['output_classes'], use_blstm=options['use_blstm']) d = deltanet_majority_vote.extract_encoder_weights(network, ['fc1', 'fc2', 'fc3', 'bottleneck'], [('w1', 'b1'), ('w2', 'b2'), ('w3', 'b3'), ('w4', 'b4')]) expected_keys = ['w1', 'w2', 'w3', 'w4', 'b1', 'b2', 'b3', 'b4'] keys = d.keys() for k in keys: assert k in expected_keys assert type(d[k]) == np.ndarray if 'output' in options: print('save extracted weights to {}'.format(options['output'])) save_mat(d, options['output'])
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice(self): input_shape = 14 sequence_length = 4 batch_size = 1 _, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge( input_shape=input_shape, sequence_length=sequence_length, batch_size=batch_size, output_shape=4) states = T.tensor3('states') lstm_out = lasagne.layers.get_output(l_lstm, states) slice_out = lasagne.layers.get_output(l_slice, states) run = theano.function([states], [lstm_out, slice_out]) sample_states = np.zeros((batch_size, sequence_length, input_shape)) sample_lstm_out, sample_slice_out = run(sample_states) self.assertEquals(sample_lstm_out[:, 1::2, :].tolist(), sample_slice_out.tolist())
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_short_seq(self): input_shape = 14 sequence_length = 2 batch_size = 1 _, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge( input_shape=input_shape, sequence_length=sequence_length, batch_size=batch_size, output_shape=4) states = T.tensor3('states') lstm_out = lasagne.layers.get_output(l_lstm, states) slice_out = lasagne.layers.get_output(l_slice, states) run = theano.function([states], [lstm_out, slice_out]) sample_states = np.zeros((batch_size, sequence_length, input_shape)) sample_lstm_out, sample_slice_out = run(sample_states) self.assertEquals(sample_lstm_out[:, 1::2, :].tolist(), sample_slice_out.tolist())
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_len_1_seq(self): input_shape = 14 sequence_length = 1 batch_size = 1 l_out, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge( input_shape=input_shape, sequence_length=sequence_length, batch_size=batch_size, output_shape=4, start=0, downsample=3) states = T.tensor3('states') l_out_out = lasagne.layers.get_output(l_out, states) lstm_out = lasagne.layers.get_output(l_lstm, states) slice_out = lasagne.layers.get_output(l_slice, states) run = theano.function([states], [l_out_out, lstm_out, slice_out]) sample_states = np.zeros((batch_size, sequence_length, input_shape)) sample_out, sample_lstm_out, sample_slice_out = run(sample_states) self.assertEquals(sample_lstm_out[:, 0::3, :].tolist(), sample_slice_out.tolist())
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_longer_len_seq(self): input_shape = 14 sequence_length = 7 batch_size = 1 l_out, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge( input_shape=input_shape, sequence_length=sequence_length, batch_size=batch_size, output_shape=4, start=0, downsample=3) states = T.tensor3('states') l_out_out = lasagne.layers.get_output(l_out, states) lstm_out = lasagne.layers.get_output(l_lstm, states) slice_out = lasagne.layers.get_output(l_slice, states) run = theano.function([states], [l_out_out, lstm_out, slice_out]) sample_states = np.zeros((batch_size, sequence_length, input_shape)) sample_out, sample_lstm_out, sample_slice_out = run(sample_states) self.assertEquals(sample_lstm_out[:, 0::3, :].tolist(), sample_slice_out.tolist())
def test_build_hierachical_stacked_lstm_network_with_merge_correct_slice_shared_var(self): input_shape = 14 sequence_length = 1 batch_size = 1 _, l_lstm, l_slice = build_hierachical_stacked_lstm_network_with_merge( input_shape=input_shape, sequence_length=sequence_length, batch_size=batch_size, output_shape=4) states = T.tensor3('states') lstm_out = lasagne.layers.get_output(l_lstm, states) slice_out = lasagne.layers.get_output(l_slice, states) states_shared = theano.shared(np.zeros((batch_size, sequence_length, input_shape))) run = theano.function([], [lstm_out, slice_out], givens={states: states_shared}) sample_states = np.zeros((batch_size, sequence_length, input_shape)) states_shared.set_value(sample_states) sample_lstm_out, sample_slice_out = run() self.assertEquals(sample_lstm_out[:, 1::2, :].tolist(), sample_slice_out.tolist())
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1) self.W = self.init(self.W_shape) self.b = shared_zeros((self.nb_filter,)) self.params = [self.W, self.b] self.regularizers = [] if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_z = self.init((input_dim, self.output_dim)) self.U_z = self.inner_init((self.output_dim, self.output_dim)) self.b_z = shared_zeros((self.output_dim)) self.W_r = self.init((input_dim, self.output_dim)) self.U_r = self.inner_init((self.output_dim, self.output_dim)) self.b_r = shared_zeros((self.output_dim)) self.W_h = self.init((input_dim, self.output_dim)) self.U_h = self.inner_init((self.output_dim, self.output_dim)) self.b_h = shared_zeros((self.output_dim)) self.params = [ self.W_z, self.U_z, self.b_z, self.W_r, self.U_r, self.b_r, self.W_h, self.U_h, self.b_h, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W = self.init((input_dim, self.output_dim)) self.b = shared_zeros((self.output_dim)) self.params = [self.W, self.b] self.regularizers = [] if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build_encoder(tparams, options): """ build an encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') x_mask = tensor.matrix('x_mask', dtype='float32') # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return embedding, x_mask, ctx
def ndim_tensor(ndim): if ndim == 1: return T.vector() elif ndim == 2: return T.matrix() elif ndim == 3: return T.tensor3() elif ndim == 4: return T.tensor4() return T.matrix() # get int32 tensor
def init_func(self, img_value, scene_value): if self._proj_func is None: img = T.tensor3() self._proj_func = theano.function([img], self.proj_mlp.compute(img)) if self._init_func is None: init_e = self._feat_shared.mean(axis=1) init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1) self._init_func = theano.function([], init_state) self._feat_shared.set_value(self._proj_func(img_value)) self._scene_shared.set_value(scene_value) return self._init_func()
def init_func(self, img_value): if self._proj_func is None: img = T.tensor3() self._proj_func = theano.function([img], self.proj_mlp.compute(img)) if self._init_func is None: init_e = self._feat_shared.mean(axis=1) init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1) self._init_func = theano.function([], init_state) self._feat_shared.set_value(self._proj_func(img_value)) return self._init_func()
def build_model(tparams, leavesList, ancestorsList, options): dropoutRate = options['dropoutRate'] trng = RandomStreams(123) use_noise = theano.shared(numpy_floatX(0.)) x = T.tensor3('x', dtype=config.floatX) y = T.tensor3('y', dtype=config.floatX) mask = T.matrix('mask', dtype=config.floatX) lengths = T.vector('lengths', dtype=config.floatX) n_timesteps = x.shape[0] n_samples = x.shape[1] embList = [] for leaves, ancestors in zip(leavesList, ancestorsList): tempAttention = generate_attention(tparams, leaves, ancestors) tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1) embList.append(tempEmb) emb = T.concatenate(embList, axis=0) x_emb = T.tanh(T.dot(x, emb)) hidden = gru_layer(tparams, x_emb, options) hidden = dropout_layer(hidden, use_noise, trng, dropoutRate) y_hat = softmax_layer(tparams, hidden) * mask[:,:,None] logEps = 1e-8 cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps)) output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths cost_noreg = T.mean(output_loglikelihood) if options['L2'] > 0.: cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum()) return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat
def exe_maxru(length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_taru = MAXRULayer(layer_input, num_units, max_length=length, P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, time_updategate=time_updategate, time_update=time_update, only_return_final=True, name='MAXRU', p=0.) # W = layer_taru.W_hid_to_hidden_update.sum() # U = layer_taru.W_in_to_hidden_update.sum() # b = layer_taru.b_hidden_update.sum() layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM') # W = layer_lstm.W_hid_to_cell.sum() # U = layer_lstm.W_in_to_cell.sum() # b = layer_lstm.b_cell.sum() layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_gru(use_embedd, length, num_units, position, binominal, reset_input): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(batch_size, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_gru = GRULayer_ANA(layer_input, num_units, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, reset_input=reset_input, only_return_final=True, name='GRU') # W = layer_gru.W_hid_to_hidden_update.sum() # U = layer_gru.W_in_to_hidden_update.sum() # b = layer_gru.b_hidden_update.sum() layer_output = DenseLayer(layer_gru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_gru, input_var, target_var, batch_size, length, position, binominal)