我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.ivector()。
def exe_rnn(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), name='RNN') # W = layer_rnn.W_hid_to_hid.sum() # U = layer_rnn.W_in_to_hid.sum() # b = layer_rnn.b.sum() layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)
def compile_grad(nnet, layer): """ """ assert layer.issvm y = T.ivector() idx = T.iscalar() dW, db, loss = symbolic_grad(nnet, layer, idx, y) updates = [(layer.dW, dW), (layer.db, db), (layer.loss, loss)] # return compiled function return theano.function([idx, y], updates=updates, profile=nnet.profile)
def compile_eval_function(nnet): X = T.tensor4() y = T.ivector() # get prediciton by fully convolutional network prediction = lasagne.layers.get_output(nnet.dense3_conv_layer, deterministic=True, inputs=X) # get output scores on first dim # before flattening on 2dim and then get scores on second dim prediction = prediction.transpose((1, 0, 2, 3))\ .flatten(2).transpose((1, 0)) prediction = T.nnet.softmax(prediction) # spatial averaging prediction = T.mean(prediction, axis=0) # compute top1 and top5 accuracies sorted_pred = T.argsort(prediction) top1_acc = T.mean(T.eq(sorted_pred[-1], y), dtype='floatX') top5_acc = T.mean(T.any(T.eq(sorted_pred[-5:], T.shape_padright(y)), axis=1), dtype='floatX') return theano.function([X, y], [top1_acc, top5_acc])
def compile_update_svm(nnet, layer): assert layer.issvm and not Cfg.store_on_gpu X = T.tensor4() y = T.ivector() XX = layer.get_input_for(X) if XX.ndim > 2: XX = XX.flatten(2) dW, db, loss = grad_svm(nnet, layer, XX, y) updates = _update_cps(nnet=nnet, layer=layer, X=XX, dW=dW, db=db, loss=loss) return theano.function([X, y], updates=updates, profile=nnet.profile)
def compile_update_conv(nnet, layer): assert layer.isconv and Configuration.store_on_gpu X = T.tensor4("X") y = T.ivector("y") idx = T.iscalar("idx") dW, db, loss = grad_conv(nnet=nnet, layer=layer, X=X, y=y) updates = _update_std(nnet=nnet, layer=layer, dW=dW, db=db, loss=loss, idx=idx) return theano.function([idx, X, y], updates=updates, profile=nnet.profile)
def compile_update_svm(nnet, layer): assert layer.issvm and Configuration.store_on_gpu idx = T.iscalar() y = T.ivector() X = layer.X_layer[idx, :y.shape[0]] dW, db, loss = grad_svm(nnet=nnet, layer=layer, X=X, y=y) updates = _update_std(nnet=nnet, layer=layer, dW=dW, db=db, loss=loss, idx=idx) return theano.function([idx, y], updates=updates, profile=nnet.profile)
def __init__(self): metric_names = ['Loss','L2','Accuracy'] super(Fr3dNetTrainer, self).__init__(metric_names) tensor5 = T.TensorType(theano.config.floatX, (False,) * 5) input_var = tensor5('inputs') target_var = T.ivector('targets') logging.info("Defining network") net = fr3dnet.define_network(input_var) self.network = net train_fn, val_fn, l_r = fr3dnet.define_updates(net, input_var, target_var) self.train_fn = train_fn self.val_fn = val_fn self.l_r = l_r
def __init__(self, layers, mini_batch_size): """Takes a list of `layers`, describing the network architecture, and a value for the `mini_batch_size` to be used during training by stochastic gradient descent. """ self.layers = layers self.mini_batch_size = mini_batch_size self.params = [param for layer in self.layers for param in layer.params] self.x = T.matrix("x") self.y = T.ivector("y") init_layer = self.layers[0] init_layer.set_inpt(self.x, self.x, self.mini_batch_size) for j in range(1, len(self.layers)): # xrange() was renamed to range() in Python 3. prev_layer, layer = self.layers[j-1], self.layers[j] layer.set_inpt( prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) self.output = self.layers[-1].output self.output_dropout = self.layers[-1].output_dropout
def __init__(self, batch_size, emb_X, lstm_param, output_size, f1_classes): super().__init__(batch_size) self.inputs = [T.imatrix('input'), T.matrix('mask')] self.target = T.ivector('target') l = InputLayer((batch_size, None), self.inputs[0]) l_mask = InputLayer((batch_size, None), self.inputs[1]) l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X) l = LSTMLayer( l, lstm_param, mask_input=l_mask, grad_clipping=100, nonlinearity=tanh, only_return_final=True ) l = DenseLayer(l, output_size, nonlinearity=log_softmax) self.pred = T.exp(get_output(l, deterministic=True)) self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l))) params = get_all_params(l, trainable=True) self.updates = rmsprop(self.loss, params, learning_rate=0.01) self.metrics = {'train': [acc], 'val': [acc, f1(f1_classes)]} self.network = l self.compile()
def __init__(self, batch_size, emb_X, input_size, conv_param, lstm_param, output_size, f1_classes): super().__init__(batch_size) self.input_size = input_size self.conv_param = conv_param self.inputs = [T.imatrix('input'), T.matrix('mask')] self.target = T.ivector('target') l = InputLayer((batch_size, input_size), self.inputs[0]) l_mask = InputLayer((batch_size, input_size + conv_param - 1), self.inputs[1]) l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X) l = DimshuffleLayer(l, (0, 2, 1)) l = Conv1DLayer(l, 300, conv_param, pad='full', nonlinearity=rectify) l = DimshuffleLayer(l, (0, 2, 1)) l = LSTMLayer( l, lstm_param, mask_input=l_mask, grad_clipping=100, nonlinearity=tanh, only_return_final=True ) l = DenseLayer(l, output_size, nonlinearity=log_softmax) self.pred = T.exp(get_output(l, deterministic=True)) self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l))) params = get_all_params(l, trainable=True) self.updates = adadelta(self.loss, params) self.metrics = {'train': [acc], 'val': [acc, f1(f1_classes)]} self.network = l self.compile()
def __init__(self, models_, output_size): super().__init__(models_[0].batch_size) self.models = models_ self.inputs = sum((model.inputs for model in models_), []) self.target = T.ivector('target') l_features = [] for model in models_: l_features.append(model.network.input_layer) l = ConcatLayer(l_features) l = DropoutLayer(l) l = DenseLayer(l, output_size, nonlinearity=log_softmax) self.pred = T.exp(get_output(l, deterministic=True)) self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l))) params = get_all_params(l, trainable=True) self.updates = adadelta(self.loss, params) self.metrics = models_[0].metrics self.network = l self.compile()
def __init__(self, classifier, args): self.y = T.ivector('y') self.w = T.vector('w') if args.instance_weights_path: self.cost = classifier.negative_log_likelihood(self.y, self.w) else: self.cost = classifier.negative_log_likelihood(self.y) if args.L1_reg > 0: self.cost = self.cost + args.L1_reg * classifier.L1 if args.L2_reg > 0: self.cost = self.cost + args.L2_reg * classifier.L2_sqr if args.alpha and args.alpha > 0: self.cost = self.cost + args.alpha * classifier.log_Z_sqr self.test = ( T.mean(classifier.p_y_given_x(self.y)) )
def __init__(self, classifier, args): self.y = T.ivector('y') self.cost = ( classifier.negative_log_likelihood(self.y) + args.L1_reg * classifier.L1 + args.L2_reg * classifier.L2_sqr ) if args.alpha is not None and args.alpha > 0: self.cost = self.cost + args.alpha * classifier.log_Z_sqr self.test = ( T.mean(classifier.p_y_given_x(self.y)) )
def __init__(self, seq_len, n_feature): import theano.tensor as T self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.buildNetwork() self.output = lasagne.layers.get_output(self.network) self.params = lasagne.layers.get_all_params(self.network, trainable=True) self.output_fn = theano.function([self.Input.input_var], self.output) fx = T.fvector().astype("float64") choices = T.ivector() px = self.output[T.arange(self.output.shape[0]), choices] log_px = T.log(px) cost = -fx.dot(log_px) updates = lasagne.updates.adagrad(cost, self.params, 0.0008) Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature)) self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates)
def __init__(self,nn_name,batch_size=1024,freeze=1,l_rates = sp.float32(0.05)*sp.ones(512,dtype=sp.float32),verbose = 1,subnet= None): self.nn_name = nn_name self.subnet = subnet if subnet != None and freeze: self.subnet.__freeze__() self.batch_size = batch_size self.verbose = verbose self.l_rates = l_rates self.__input_var__ = T.tensor4('X'+self.nn_name[:2]) self.__target_var__ = T.ivector('y+'+self.nn_name[:2]) self.max_epochs = self.l_rates.shape[0] if self.nn_name == '12-net': self.net = self.__build_12_net__() elif self.nn_name == '24-net': self.net = self.__build_24_net__() elif self.nn_name == '48-net': self.net = self.__build_48_net__() elif self.nn_name =='12-calib_net': self.net = self.__build_12_calib_net__() elif self.nn_name =='24-calib_net': self.net = self.__build_24_calib_net__() elif self.nn_name =='48-calib_net': self.net = self.__build_48_calib_net__() self.__build_loss_train__fn__()
def __init__(self, layers, mini_batch_size): ''' Takes a list of `layers`, describing the network architecture, and a value for the `mini_batch_size` to be used during training by stochastic gradient descent. ''' self.layers = layers self.mini_batch_size = mini_batch_size self.params = [param for layer in self.layers for param in layer.params] self.x = T.matrix("x") # input self.y = T.ivector("y") # output init_layer = self.layers[0] init_layer.set_inpt(self.x, self.x, self.mini_batch_size) for i in xrange(1, len(layers)): prev_layer, layer = self.layers[i-1], self.layers[i] layer.set_inpt(prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) self.output = self.layers[-1].output self.output_dropout = self.layers[-1].output_dropout
def __init__(self, layers, mini_batch_size): """Takes a list of `layers`, describing the network architecture, and a value for the `mini_batch_size` to be used during training by stochastic gradient descent. """ self.layers = layers self.mini_batch_size = mini_batch_size self.params = [param for layer in self.layers for param in layer.params] self.x = T.matrix("x") self.y = T.ivector("y") init_layer = self.layers[0] init_layer.set_inpt(self.x, self.x, self.mini_batch_size) for j in xrange(1, len(self.layers)): prev_layer, layer = self.layers[j-1], self.layers[j] layer.set_inpt( prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) self.output = self.layers[-1].output self.output_dropout = self.layers[-1].output_dropout
def setupVariables(self): floatX = theano.config.floatX # @UndefinedVariable # params self.learning_rate = T.scalar('learning_rate',dtype=floatX) self.momentum = T.scalar('momentum',dtype=floatX) # input self.tvIndex = T.lscalar() # index to a [mini]batch #self.tvIndex.tag.test_value = 10 self.tvX = self.descrNet.inputVar # targets self.tvY = T.ivector('y') self.tvYr = T.tensor4('yr') self.tvPairIdx = T.imatrix('pairIdx') self.tvPairLabels = T.ivector('pairLabels') self.tvTripletIdx = T.imatrix('tripletIdx') self.tvTripletThresh = T.scalar('tripletThresh') self.tvTripletPoolIdx = T.imatrix('tripletPoolIdx') self.tvTripletPoolThresh = T.scalar('tripletPoolThresh') self.tvPosTripletPoolSize = T.iscalar('posTripletPoolSize') self.tvNegTripletPoolSize = T.iscalar('negTripletPoolSize')
def test_local_csm_properties_csm(): data = tensor.vector() indices, indptr, shape = (tensor.ivector(), tensor.ivector(), tensor.ivector()) mode = theano.compile.mode.get_default_mode() mode = mode.including("specialize", "local_csm_properties_csm") for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]: f = theano.function([data, indices, indptr, shape], sparse.csm_properties( CS(data, indices, indptr, shape)), mode=mode) assert not any( isinstance(node.op, (sparse.CSM, sparse.CSMProperties)) for node in f.maker.fgraph.toposort()) v = cast(random_lil((10, 40), config.floatX, 3)) f(v.data, v.indices, v.indptr, v.shape)
def test_local_csm_grad_c(): raise SkipTest("Opt disabled as it don't support unsorted indices") if not theano.config.cxx: raise SkipTest("G++ not available, so we need to skip this test.") data = tensor.vector() indices, indptr, shape = (tensor.ivector(), tensor.ivector(), tensor.ivector()) mode = theano.compile.mode.get_default_mode() if theano.config.mode == 'FAST_COMPILE': mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile') mode = mode.including("specialize", "local_csm_grad_c") for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]: cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape))) f = theano.function( [data, indices, indptr, shape], tensor.grad(cost, data), mode=mode) assert not any(isinstance(node.op, sparse.CSMGrad) for node in f.maker.fgraph.toposort()) v = cast(random_lil((10, 40), config.floatX, 3)) f(v.data, v.indices, v.indptr, v.shape)
def test_csm_grad(self): for sparsetype in ('csr', 'csc'): x = tensor.vector() y = tensor.ivector() z = tensor.ivector() s = tensor.ivector() call = getattr(sp, sparsetype + '_matrix') spm = call(random_lil((300, 400), config.floatX, 5)) out = tensor.grad(dense_from_sparse( CSM(sparsetype)(x, y, z, s) ).sum(), x) self._compile_and_check([x, y, z, s], [out], [spm.data, spm.indices, spm.indptr, spm.shape], (CSMGrad, CSMGradC) )
def test_csm_unsorted(self): """ Test support for gradients of unsorted inputs. """ sp_types = {'csc': sp.csc_matrix, 'csr': sp.csr_matrix} for format in ['csr', 'csc', ]: for dtype in ['float32', 'float64']: x = tensor.tensor(dtype=dtype, broadcastable=(False,)) y = tensor.ivector() z = tensor.ivector() s = tensor.ivector() # Sparse advanced indexing produces unsorted sparse matrices a = sparse_random_inputs(format, (4, 3), out_dtype=dtype, unsorted_indices=True)[1][0] # Make sure it's unsorted assert not a.has_sorted_indices def my_op(x): y = tensor.constant(a.indices) z = tensor.constant(a.indptr) s = tensor.constant(a.shape) return tensor.sum( dense_from_sparse(CSM(format)(x, y, z, s) * a)) verify_grad_sparse(my_op, [a.data])
def test_csm(self): sp_types = {'csc': sp.csc_matrix, 'csr': sp.csr_matrix} for format in ['csc', 'csr']: for dtype in ['float32', 'float64']: x = tensor.tensor(dtype=dtype, broadcastable=(False,)) y = tensor.ivector() z = tensor.ivector() s = tensor.ivector() f = theano.function([x, y, z, s], CSM(format)(x, y, z, s)) spmat = sp_types[format](random_lil((4, 3), dtype, 3)) res = f(spmat.data, spmat.indices, spmat.indptr, numpy.asarray(spmat.shape, 'int32')) assert numpy.all(res.data == spmat.data) assert numpy.all(res.indices == spmat.indices) assert numpy.all(res.indptr == spmat.indptr) assert numpy.all(res.shape == spmat.shape)
def test_assert(self): x = tensor.matrix("x") y = tensor.matrix("y") idx = tensor.ivector() dx = numpy.random.rand(4, 5).astype(config.floatX) dy = numpy.random.rand(2, 5).astype(config.floatX) didx = numpy.asarray([1, 3], "int32") # set_subtensor inc = tensor.set_subtensor(x[idx], y) o = inc[idx] f = theano.function([x, y, idx], o, self.mode) # test wrong index for i in [dx.shape[0], -dx.shape[0] - 1]: self.assertRaises((AssertionError, IndexError), f, dx, dy, [i, i]) # test wrong shape self.assertRaises((AssertionError, ValueError), f, dx, dy, [1])
def test_stack_trace(self): x = tensor.matrix("x") # test cases with y.dtype # - equal to x.dtype # - different from x.dtype (to trigger the cast in # local_adv_sub1_adv_inc_sub1) ys = [tensor.matrix("y"), tensor.dmatrix("y")] idx = tensor.ivector() # set_subtensor and then subtensor with both ys incs = [tensor.set_subtensor(x[idx], y) for y in ys] outs = [inc[idx] for inc in incs] for y, out in zip(ys, outs): f = theano.function([x, y, idx], out, self.mode) self.assertTrue(check_stack_trace( f, ops_to_check=(Assert, scal.Cast)))
def test_local_useless_split(): x = tensor.matrix('x') splits = tensor.ivector('splits') opt = tensor.split(x, splits, n_splits=1) nonopt = tensor.split(x, splits, n_splits=3) mode = compile.get_default_mode().including("local_useless_split") f_opt = theano.function([x, splits], opt, mode=mode) f_nonopt = theano.function([x, splits], nonopt, mode=mode) f_opt(numpy.random.rand(4,4).astype(config.floatX), [4]) f_nonopt(numpy.random.rand(4,4).astype(config.floatX), [1,2,1]) graph_opt = f_opt.maker.fgraph.toposort() graph_nonopt = f_nonopt.maker.fgraph.toposort() assert isinstance(graph_opt[-1].op, DeepCopyOp) assert len(graph_nonopt)==1 assert isinstance(graph_nonopt[0].op, tensor.Split) assert check_stack_trace(f_opt, ops_to_check=[Assert]) assert check_stack_trace(f_nonopt, ops_to_check='all')
def test_local_zero_div(): """Tests 0/x -> 0""" mode = theano.compile.mode.get_default_mode().including("local_zero_div") for t in (T.scalar, T.ivector, T.ftensor4): x = t('x') for op in (T.int_div, T.true_div): y = op(0, x) g = optimize(FunctionGraph([x], [y])) # the division should be gone divs = [node for node in g.toposort() if isinstance(node.op, T.elemwise.Elemwise) and isinstance(node.op.scalar_op, type(op.scalar_op))] assert len(divs) == 0 # the output type should match the unoptimized one output = g.outputs[0] assert output.ndim == y.ndim assert output.type == y.type # and the output should be zero assert theano.tensor.get_scalar_constant_value(output) == 0
def test_bad_shape(self): a = matrix('a') shapes = ivector('shapes') rng = numpy.random.RandomState(seed=utt.fetch_seed()) a_val = rng.uniform(size=(3, 4)).astype(config.floatX) # Test reshape to 1 dim r = a.reshape(shapes, ndim=1) z = zeros_like(r) f = self.function([a, shapes], r) self.assertRaises(ValueError, f, a_val, [13]) # Test reshape to 2 dim r = a.reshape(shapes, ndim=2) z = zeros_like(r) f = self.function([a, shapes], r) self.assertRaises(ValueError, f, a_val, [-1, 5]) self.assertRaises(ValueError, f, a_val, [7, -1]) self.assertRaises(ValueError, f, a_val, [7, 5]) self.assertRaises(ValueError, f, a_val, [-1, -1])
def test_dim1(self): """Test the inversion of one permutation (int vector)""" p = ivector() inv = inverse_permutation(p) assert inv.dtype == p.dtype f_inverse = function([p], inv) # Generate a random permutation rng = numpy.random.RandomState(utt.fetch_seed()) p_val = rng.permutation(10).astype('int32') inv_val = f_inverse(p_val) # Check that the inverse of the inverse is the original permutation assert numpy.all(f_inverse(inv_val) == p_val) # Check that permutation(inverse) == inverse(permutation) = identity assert numpy.all(p_val[inv_val] == numpy.arange(10)) assert numpy.all(inv_val[p_val] == numpy.arange(10))
def test_1_1(self): """Test PermuteRowElements(vector, vector)""" input = dvector() p = ivector() out = permute_row_elements(input, p) permute = function([input, p], out) rng = numpy.random.RandomState(utt.fetch_seed()) input_val = rng.uniform(size=(5,)) p_val = rng.permutation(5).astype('int32') out_val = permute(input_val, p_val) # Should be equivalent to advanced indexing out_bis = input_val[p_val] assert numpy.all(out_val == out_bis) # Verify gradient def permute_fixed(s_input): """Auxiliary op defined to get rid of gradient wrt p_val""" return permute_row_elements(s_input, p_val) utt.verify_grad(permute_fixed, [input_val])
def test_2_1(self): """Test broadcasting in PermuteRowElements(matrix, vector)""" input = matrix() p = ivector() out = permute_row_elements(input, p) permute = function([input, p], out) rng = numpy.random.RandomState(utt.fetch_seed()) input_val = rng.uniform(size=(3, 5)).astype(config.floatX) p_val = rng.permutation(5).astype('int32') out_val = permute(input_val, p_val) # The same permutation should be applied to every row of the input matrix. out_bis = numpy.asarray([r[p_val] for r in input_val]) assert numpy.all(out_val == out_bis) # Verify gradient def permute_fixed(s_input): """Auxiliary op defined to get rid of gradient wrt p_val""" return permute_row_elements(s_input, p_val) utt.verify_grad(permute_fixed, [input_val])
def test_givens(self): x = shared(0) assign = pfunc([], x, givens={x: 3}) assert assign() == 3 assert x.get_value(borrow=True) == 0 y = tensor.ivector() f = pfunc([y], (y * x), givens={x: 6}) assert numpy.all(f([1, 1, 1]) == [6, 6, 6]) assert x.get_value() == 0 z = tensor.ivector() c = z * y f = pfunc([y], (c + 7), givens={z: theano._asarray([4, 4, 4], dtype='int32')}) assert numpy.all(f([1, 1, 1]) == [11, 11, 11]) assert x.get_value() == 0
def __init__(self, n=None, train_batch_size=None, validate_batch_size=None): super(CIFARModel, self).__init__(train_batch_size, validate_batch_size) n = n or ParamConfig['n'] self.learning_rate = theano.shared(lasagne.utils.floatX(ParamConfig['init_learning_rate'])) # Prepare Theano variables for inputs and targets self.input_var = T.tensor4('inputs') self.target_var = T.ivector('targets') self.network = self.build_cnn(self.input_var, n) message("number of parameters in model: %d" % lasagne.layers.count_params(self.network, trainable=True)) self.saved_init_parameters_values = get_all_param_values(self.network, trainable=True) self.build_train_function() self.build_validate_function()
def __init__(self, train_batch_size=None, valid_batch_size=None ): super(VanillaCNNModel, self).__init__(train_batch_size, valid_batch_size) # Prepare Theano variables for inputs and targets self.input_var = T.tensor4('inputs') self.target_var = T.ivector('targets') self.learning_rate = theano.shared(lasagne.utils.floatX(ParamConfig['init_learning_rate'])) self.network = self.build_cnn(self.input_var) message("number of parameters in model: %d" % lasagne.layers.count_params(self.network, trainable=True)) self.saved_init_parameters_values = get_all_param_values(self.network, trainable=True) self.build_train_function() self.build_validate_function()
def __init__(self, n=None, train_batch_size=None, validate_batch_size=None): super(ResNetTFModel, self).__init__(train_batch_size, validate_batch_size) n = n or ParamConfig['n'] self.learning_rate = theano.shared(lasagne.utils.floatX(ParamConfig['init_learning_rate'])) # Prepare Theano variables for inputs and targets self.input_var = T.tensor4('inputs') self.target_var = T.ivector('targets') self.network = self.build_cnn(self.input_var, n) message("number of parameters in model: %d" % lasagne.layers.count_params(self.network, trainable=True)) self.saved_init_parameters_values = get_all_param_values(self.network, trainable=True) self.build_train_function() self.build_validate_function()
def __init__(self, weight_file=None, forward=False, learning_rate=0.001, dropout=0.4, lamb=0.00001): self.input_var = T.tensor4('inputs') self.net = self.build_model(self.input_var, forward, dropout) if weight_file is not None: self.load_weights(weight_file) prediction = lasagne.layers.get_output(self.net['prob']) self.target_var = T.ivector('targets') loss = lasagne.objectives.categorical_crossentropy(prediction, self.target_var) loss = loss.mean() + lamb * lasagne.regularization.l2(self.net['prob'].W) params = lasagne.layers.get_all_params(self.net['prob'], trainable=True) updates = lasagne.updates.adagrad(loss, params, learning_rate) test_prediction = lasagne.layers.get_output(self.net['prob'], deterministic=True) test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), self.target_var), dtype=theano.config.floatX) self.train_fn = theano.function([self.input_var, self.target_var], [loss, test_acc], updates=updates) self.val_fn = theano.function([self.input_var, self.target_var], [loss, test_acc]) self.predict_fn = theano.function([self.input_var], [T.argmax(test_prediction, axis=1)])
def build_model(model_): global fn_predict, fn_record global g_ozer, g_mdl g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]() g_ozer.lr = LEARN_RATE s_x = T.tensor4('x') s_y = T.ivector('y') s_pdpo = T.scalar() s_out = model_(s_x, s_pdpo) s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map)) s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3)) s_accr = T.mean( T.switch( T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0)) no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))] fn_predict = th.function( [s_x, s_y], {'pred':s_out, 'accr':s_accr, 'loss':s_loss}, givens=no_dropout, profile=PROFILE) rec_fetches = { 'x': s_x, 'y': s_y, 'pred': s_out} rec_fetches.update(g_mdl.params_di) fn_record = th.function( [s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE) g_ozer.compile( [s_x, s_y], s_loss, g_mdl.params_di.values(), fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr}, givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))], profile_=PROFILE)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p = self.compute(state, w, self._scene_shared) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p = self.compute(state, w) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p, _ = self.compute(state, w, self._feat_shared, self._scene_shared) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p, _ = self.compute(state, w, self._feat_shared) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def setup_variables(self): super(L2HingeNeuralClassifier, self).setup_variables() self.k = T.ivector('k') self.target_variables.append(self.k)
def exe_maxru(length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_taru = MAXRULayer(layer_input, num_units, max_length=length, P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, time_updategate=time_updategate, time_update=time_update, only_return_final=True, name='MAXRU', p=0.) # W = layer_taru.W_hid_to_hidden_update.sum() # U = layer_taru.W_in_to_hidden_update.sum() # b = layer_taru.b_hidden_update.sum() layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM') # W = layer_lstm.W_hid_to_cell.sum() # U = layer_lstm.W_in_to_cell.sum() # b = layer_lstm.b_cell.sum() layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_gru(use_embedd, length, num_units, position, binominal, reset_input): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(batch_size, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_gru = GRULayer_ANA(layer_input, num_units, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, reset_input=reset_input, only_return_final=True, name='GRU') # W = layer_gru.W_hid_to_hidden_update.sum() # U = layer_gru.W_in_to_hidden_update.sum() # b = layer_gru.b_hidden_update.sum() layer_output = DenseLayer(layer_gru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_gru, input_var, target_var, batch_size, length, position, binominal)
def exe_sgru(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_sgru = SGRULayer(layer_input, num_units, resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden, updategate=updategate, hidden_update=hiden_update, only_return_final=True, name='SGRU') # W = layer_gru.W_hid_to_hidden_update.sum() # U = layer_gru.W_in_to_hidden_update.sum() # b = layer_gru.b_hidden_update.sum() layer_output = DenseLayer(layer_sgru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_sgru, input_var, target_var, batch_size, length, position, binominal)
def _finalize(self, loss_fn, weight_layers, projection_layers, eval_layer, predict_layer=None, additional_params=[]): assert hasattr(weight_layers, '__iter__') assert hasattr(projection_layers, '__iter__') output_layer = eval_layer # Be flexible in terms of batch input formats. x_batch = T.matrix('input_indices', dtype=self.input_dtype) # Do not be flexible w.r.t. output type. y_batch = ( T.ivector('y') if self.training_set[1].ndim == 1 else T.fmatrix('y')) # Instance weights for training. w_batch = T.fvector('weights') objective = WeightedObjective( output_layer, loss_function=loss_fn) loss_train = objective.get_loss( weights=w_batch, input=x_batch, target=y_batch, deterministic=False) loss_eval = objective.get_loss( input=x_batch, target=y_batch, deterministic=True) loss_train += self._regularization( weight_layers, projection_layers) self._create_functions(output_layer, loss_train, loss_eval, dict(x_batch=x_batch, y_batch=y_batch, w_batch=w_batch), predict_layer=predict_layer, additional_params=additional_params)
def get_analogy_prediction_model(embeddings, emb_size, vocab_size): # The eval feeds three vectors of word ids for a, b, c, each of # which is of size bsz, where bsz is the number of analogies we want to # evaluate in one batch. analogy_a = T.ivector('analogy_a') analogy_b = T.ivector('analogy_b') analogy_c = T.ivector('analogy_c') # Each row of a_emb, b_emb, c_emb is a word's embedding vector. a_emb = embeddings[analogy_a] # a's embs b_emb = embeddings[analogy_b] # b's embs c_emb = embeddings[analogy_c] # c's embs # We expect that d's embedding vectors on the unit hyper-sphere is # near: c_emb + (b_emb - a_emb), which has the shape [bsz, emb_size]. target = c_emb + (b_emb - a_emb) # Compute cosine distance between each pair of target and vocab. # dist has shape [bsz, vocab_size]. dist = T.dot(target, embeddings.T) # For each question (row in dist), find the top 4 words. pred_idx = T.argsort(dist, axis=1)[:, -4:] prediction_fn = theano.function([analogy_a, analogy_b, analogy_c], pred_idx) return prediction_fn