我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.matrix()。
def lyr_linear( self, name_, s_x_, idim_, odim_, init_=None, bias_=0., params_di_='params'): ''' dense matrix multiplication, optionally adding a bias vector ''' name_W = name_+'_w' name_B = name_+'_b' self.set_vars(params_di_) if init_ is None: init_ = dict(init_=[1.4/sqrt(idim_+odim_)]) v_W = self.get_variable(name_W, (idim_,odim_), **init_) if bias_ is None: s_ret = T.dot(s_x_, v_W) else: v_B = self.get_variable(name_B, (odim_,), bias_) s_ret = T.dot(s_x_, v_W) + v_B return s_ret
def build_encoder_bi(tparams, options): """ build bidirectional encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') embeddingr = embedding[::-1] x_mask = tensor.matrix('x_mask', dtype='float32') xr_mask = x_mask[::-1] # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, prefix='encoder_r', mask=xr_mask) ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1) return embedding, x_mask, ctx # some utilities
def dot(inp, matrix, bias=None): """ Decide the right type of dot product depending on the input arguments """ if 'int' in inp.dtype and inp.ndim == 2: return matrix[inp.flatten()] elif 'int' in inp.dtype: return matrix[inp] elif 'float' in inp.dtype and inp.ndim == 3: shape0 = inp.shape[0] shape1 = inp.shape[1] shape2 = inp.shape[2] if bias: return (T.dot(inp.reshape((shape0 * shape1, shape2)), matrix) + bias).reshape((shape0, shape1, matrix.shape[1])) else: return T.dot(inp.reshape((shape0 * shape1, shape2)), matrix).reshape((shape0, shape1, matrix.shape[1])) else: if bias: return T.dot(inp, matrix) + bias else: return T.dot(inp, matrix) # Numerically stable log(sum(exp(A))). Can also be used in softmax function.
def lyr_linear( self, name_, s_x_, idim_, odim_, init_=None, bias_=0., params_group_='params' ): ''' dense matrix multiplication, optionally adding a bias vector ''' name_W = name_+'_w' name_B = name_+'_b' if init_ is None: init_ = [1.4/sqrt(idim_+odim_)] with self.get_group(params_group_): v_W = self.get_variable(name_W, (idim_,odim_), init_=init_) if bias_ is None: s_ret = T.dot(s_x_, v_W) else: with self.get_group(params_group_): v_B = self.get_variable(name_B, (odim_,), bias_) s_ret = T.dot(s_x_, v_W) + v_B return s_ret
def __init__(self, input_size, output_size, hidden_sizes, activation = T.nnet.sigmoid): self.hidden_layers = [] self.params = [] self.input = T.matrix('x') self.target = T.matrix('y') for i, layer_size in enumerate(hidden_sizes): if i == 0: layer_input_size = input_size layer_input = self.input else: layer_input_size = hidden_sizes[i - 1] layer_input = self.hidden_layers[-1].output layer = Layer(layer_input, layer_input_size, layer_size, activation = activation) self.hidden_layers.append(layer) self.params.extend(layer.params) self.output_layer = Layer(self.hidden_layers[-1].output, hidden_sizes[-1], output_size) self.params.extend(self.output_layer.params) self.output = self.output_layer.output self.cost = T.sum((self.output - self.target)**2)
def dist_info(self, obs, state_infos=None): if state_infos is None or len(state_infos) == 0: return self._f_dist_info(obs) if self._f_dist_info_givens is None: # compile function obs_var = self._mean_network.input_var latent_keys = ["latent_%d" % idx for idx in range(self._n_latent_layers)] latent_vars = [TT.matrix("latent_%d" % idx) for idx in range(self._n_latent_layers)] latent_dict = dict(list(zip(latent_keys, latent_vars))) self._f_dist_info_givens = ext.compile_function( inputs=[obs_var] + latent_vars, outputs=self.dist_info_sym(obs_var, latent_dict), ) latent_vals = [] for idx in range(self._n_latent_layers): latent_vals.append(state_infos["latent_%d" % idx]) return self._f_dist_info_givens(*[obs] + latent_vals)
def test_infer(): data_iter = Euclidean(batch_size=27, dim_in=17) gbn = test_vae.test_build_GBN(dim_in=data_iter.dims[data_iter.name]) inference_args = dict( n_inference_steps=7, pass_gradients=True ) gdir = test_build_gdir(gbn, **inference_args) X = T.matrix('x', dtype=floatX) rval, constants, updates = gdir.inference(X, X) f = theano.function([X], rval.values(), updates=updates) x = data_iter.next()[data_iter.name] results, samples, full_results, updates = gdir(X, X) f = theano.function([X], results.values(), updates=updates) print f(x)
def test_sample(n_steps=3, dim_v=13, batch_size=7): data_iter = euclidean.Euclidean(dims=dim_v, batch_size=batch_size) x = data_iter.next()[data_iter.name] model = test_build(dim_v=dim_v) X = T.matrix('X', dtype=floatX) ph0 = model.ph_v(X) r = model.trng.uniform(size=(X.shape[0], model.dim_h)) h_p = (r <= ph0).astype(floatX) outs, updates = model.sample(h_p, n_steps=n_steps) keys = outs.keys() f = theano.function([X], outs.values(), updates=updates) values = f(x) outs = model(X, n_chains=batch_size, n_steps=n_steps) results, samples, updates, constants = outs f = theano.function([X], results.values(), updates=updates) f(x)
def compile(self): x_train = T.tensor4('x_train') actions_train = T.matrix('actions_train') y_train = T.matrix('y_train') cost_function = self.squared_error(x_train, actions_train, y_train) self.train_function = theano.function([x_train, actions_train, y_train], cost_function, updates=self.sgd(cost_function, self.params), on_unused_input='ignore', allow_input_downcast=True) x_pred = T.tensor3('x_pred') actions_pred = T.vector('actions_pred') output_function = self.output(x_pred, actions_pred) self.predict_function = theano.function([x_pred, actions_pred], output_function, on_unused_input='ignore', allow_input_downcast=True) return self
def build_theano_function_wdu(self): W_static = T.matrix('W_static') W_delta = T.matrix('W_delta') W_acc = T.matrix('W_acc') D_static = T.matrix('D_static') D_delta = T.matrix('D_delta') D_acc = T.matrix('D_acc') U_static = T.matrix('U_static') U_delta = T.matrix('U_delta') U_acc = T.matrix('U_acc') WDU = T.dot(T.dot(W_static.T, D_static), U_static) + T.dot(T.dot(W_delta.T, D_delta), U_delta) + T.dot(T.dot(W_acc.T, D_acc), U_acc) fn = theano.function(inputs=[W_static, W_delta, W_acc, D_static, D_delta, D_acc, U_static, U_delta, U_acc], outputs=WDU) return fn
def symbolic_distance_matrix(A, B): """ Defines the symbolic matrix that contains the distances between the vectors of A and B :param A: :param B: :return: """ aa = T.sum(A * A, axis=1) bb = T.sum(B * B, axis=1) AB = T.dot(A, T.transpose(B)) AA = T.transpose(T.tile(aa, (bb.shape[0], 1))) BB = T.tile(bb, (aa.shape[0], 1)) D = AA + BB - 2 * AB D = T.maximum(D, 0) D = T.sqrt(D) return D
def build_model(self): rng=np.random.RandomState(1234) lasagne.random.set_rng(rng) # Prepare Theano variables for inputs and targets self.noise_var = T.matrix('noise') self.input_var = T.tensor4('inputs') # Create neural network model generator = build_generator(self.noise_var) critic = build_critic(self.input_var) # Create expression for passing real data through the critic self.real_out = lasagne.layers.get_output(critic) # Create expression for passing fake data through the critic self.fake_out = lasagne.layers.get_output(critic, lasagne.layers.get_output(generator)) # Create update expressions for training self.generator_params = lasagne.layers.get_all_params(generator, trainable=True) self.critic_params = lasagne.layers.get_all_params(critic, trainable=True) self.generator = generator self.critic = critic
def build_model(self): rng=np.random.RandomState(1234) lasagne.random.set_rng(rng) # Prepare Theano variables for inputs and targets self.noise_var = T.matrix('noise') self.input_var = T.tensor4('inputs') # Create neural network model generator = build_generator(self.noise_var,self.verbose) critic = build_critic(self.input_var,self.verbose) # Create expression for passing real data through the critic self.real_out = lasagne.layers.get_output(critic) # Create expression for passing fake data through the critic self.fake_out = lasagne.layers.get_output(critic, lasagne.layers.get_output(generator)) # Create update expressions for training self.generator_params = lasagne.layers.get_all_params(generator, trainable=True) self.critic_params = lasagne.layers.get_all_params(critic, trainable=True) self.generator = generator self.critic = critic
def forward_pass(self, input_data, convert_to_class=False): """ Allow the implementer to quickly get outputs from the network. Args: input_data: Numpy matrix to make the predictions on convert_to_class: If true, output the class with highest probability Returns: Numpy matrix with the output probabilities with each class unless otherwise specified. """ if convert_to_class: return get_class(self.output(input_data)) else: return self.output(input_data)
def __init__(self, layers, mini_batch_size): """Takes a list of `layers`, describing the network architecture, and a value for the `mini_batch_size` to be used during training by stochastic gradient descent. """ self.layers = layers self.mini_batch_size = mini_batch_size self.params = [param for layer in self.layers for param in layer.params] self.x = T.matrix("x") self.y = T.ivector("y") init_layer = self.layers[0] init_layer.set_inpt(self.x, self.x, self.mini_batch_size) for j in range(1, len(self.layers)): # xrange() was renamed to range() in Python 3. prev_layer, layer = self.layers[j-1], self.layers[j] layer.set_inpt( prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) self.output = self.layers[-1].output self.output_dropout = self.layers[-1].output_dropout
def normalize_data(X): """ This function adds padding to the X. Padding is added to convert data to matrix data type. Parameters ---------- :type X: list of list :param X: Values to be added padding :rtype : list containing actual sizes of X """ X_sizes = [] size = max([len(case) for case in X]) for case in X: l = len(case) X_sizes.append(l) case.extend([9999] * (size - l)) # 9999 is used to make index error of anything doesn't work as planned return X_sizes
def test(): energies_var = T.tensor4('energies', dtype=theano.config.floatX) targets_var = T.imatrix('targets') masks_var = T.matrix('masks', dtype=theano.config.floatX) layer_input = lasagne.layers.InputLayer([2, 2, 3, 3], input_var=energies_var) out = lasagne.layers.get_output(layer_input) loss = crf_loss(out, targets_var, masks_var) prediction, acc = crf_accuracy(energies_var, targets_var) fn = theano.function([energies_var, targets_var, masks_var], [loss, prediction, acc]) energies = np.array([[[[10, 15, 20], [5, 10, 15], [3, 2, 0]], [[5, 10, 1], [5, 10, 1], [5, 10, 1]]], [[[5, 6, 7], [2, 3, 4], [2, 1, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]]]], dtype=np.float32) targets = np.array([[0, 1], [0, 2]], dtype=np.int32) masks = np.array([[1, 1], [1, 0]], dtype=np.float32) l, p, a = fn(energies, targets, masks) print l print p print a
def get_output(self, X): convnet_output = 0 if self.has_convnet: # Hack: input_masked is a 2D matrix instead of a 4D tensor, but we have all the information to fix that. input_4D = X.reshape((-1, self.nb_channels) + self.image_shape) convnet_output = self.convnet.get_output(input_4D) # Returns the convnet's output preactivation. # This will generate a matrix of shape (batch_size, nb_kernels * kernel_height * kernel_width). convnet_output = convnet_output.flatten(2) fullnet_output = 0 if self.has_fullnet: fullnet_output = self.fullnet.get_output(X) # Returns the fullnet's output preactivation. output = convnet_output + fullnet_output # TODO: sigmoid should be applied here instead of within loss function. return output
def __init__(self, n_dim, n_out, n_chan=1, n_batch=128, n_superbatch=12800, model='bernoulli', opt_alg='adam', opt_params={'lr' : 1e-3, 'b1': 0.9, 'b2': 0.99}): # save model that wil be created self.model = model self.n_batch = n_batch self.n_lat = 100 self.n_dim = n_dim self.n_chan = n_chan # invoke parent constructor Model.__init__(self, n_dim, n_chan, n_out, n_superbatch, opt_alg, opt_params) # sample generation Z = T.matrix(dtype=theano.config.floatX) # noise matrix _, _, _, _, l_sample, l_p_z = self.network sample = lasagne.layers.get_output(l_sample, {l_p_z : Z}, deterministic=True) self.sample = theano.function([Z], sample, on_unused_input='warn')
def __init__(self, n_dim, n_out, n_chan=1, n_batch=128, n_superbatch=12800, model='bernoulli', opt_alg='adam', opt_params={'lr' : 1e-3, 'b1': 0.9, 'b2': 0.99}): # save model that wil be created self.model = model self.n_sample = 1 # adjustable parameter, though 1 works best in practice self.n_batch = n_batch self.n_lat = 200 self.n_dim = n_dim self.n_chan = n_chan self.n_batch = n_batch Model.__init__(self, n_dim, n_chan, n_out, n_superbatch, opt_alg, opt_params) # sample generation Z = T.matrix(dtype=theano.config.floatX) # noise matrix l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \ l_qz_mu, l_qz_logsigma, l_qa_mu, l_qa_logsigma, \ l_qa, l_qz, l_d = self.network sample = lasagne.layers.get_output(l_px_mu, {l_qz : Z}, deterministic=True) self.sample = theano.function([Z], sample, on_unused_input='warn')
def __init__(self, n_dim, n_out, n_chan=1, n_batch=128, n_superbatch=12800, model='bernoulli', opt_alg='adam', opt_params={'lr' : 1e-3, 'b1': 0.9, 'b2': 0.99}): # save model that wil be created self.model = model self.n_batch = n_batch self.n_lat = 100 self.n_dim = n_dim self.n_chan = n_chan self.n_batch = n_batch # invoke parent constructor Model.__init__(self, n_dim, n_chan, n_out, n_superbatch, opt_alg, opt_params) # sample generation Z = T.matrix(dtype=theano.config.floatX) # noise matrix _, _, _, _, l_sample, l_p_z = self.network sample = lasagne.layers.get_output(l_sample, {l_p_z : Z}, deterministic=True) self.sample = theano.function([Z], sample, on_unused_input='warn')
def __init__(self, n_dim, n_out, n_chan=1, n_batch=128, n_superbatch=12800, model='bernoulli', opt_alg='adam', opt_params={'lr' : 1e-3, 'b1': 0.9, 'b2': 0.99}): # save model that wil be created self.model = model self.n_sample = 1 # adjustable parameter, though 1 works best in practice self.n_batch = n_batch self.n_lat = 200 self.n_dim = n_dim self.n_chan = n_chan self.n_batch = n_batch Model.__init__(self, n_dim, n_chan, n_out, n_superbatch, opt_alg, opt_params) # sample generation Z = T.matrix(dtype=theano.config.floatX) # noise matrix l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \ l_qz_mu, l_qz_logsigma, l_qa_mu, l_qa_logsigma, \ l_qa, l_qz = self.network sample = lasagne.layers.get_output(l_px_mu, {l_qz : Z}, deterministic=True) self.sample = theano.function([Z], sample, on_unused_input='warn')
def __init__(self, batch_size, emb_X, lstm_param, output_size, f1_classes): super().__init__(batch_size) self.inputs = [T.imatrix('input'), T.matrix('mask')] self.target = T.ivector('target') l = InputLayer((batch_size, None), self.inputs[0]) l_mask = InputLayer((batch_size, None), self.inputs[1]) l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X) l = LSTMLayer( l, lstm_param, mask_input=l_mask, grad_clipping=100, nonlinearity=tanh, only_return_final=True ) l = DenseLayer(l, output_size, nonlinearity=log_softmax) self.pred = T.exp(get_output(l, deterministic=True)) self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l))) params = get_all_params(l, trainable=True) self.updates = rmsprop(self.loss, params, learning_rate=0.01) self.metrics = {'train': [acc], 'val': [acc, f1(f1_classes)]} self.network = l self.compile()
def __init__(self, batch_size, emb_X, input_size, conv_param, lstm_param, output_size, f1_classes): super().__init__(batch_size) self.input_size = input_size self.conv_param = conv_param self.inputs = [T.imatrix('input'), T.matrix('mask')] self.target = T.ivector('target') l = InputLayer((batch_size, input_size), self.inputs[0]) l_mask = InputLayer((batch_size, input_size + conv_param - 1), self.inputs[1]) l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X) l = DimshuffleLayer(l, (0, 2, 1)) l = Conv1DLayer(l, 300, conv_param, pad='full', nonlinearity=rectify) l = DimshuffleLayer(l, (0, 2, 1)) l = LSTMLayer( l, lstm_param, mask_input=l_mask, grad_clipping=100, nonlinearity=tanh, only_return_final=True ) l = DenseLayer(l, output_size, nonlinearity=log_softmax) self.pred = T.exp(get_output(l, deterministic=True)) self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l))) params = get_all_params(l, trainable=True) self.updates = adadelta(self.loss, params) self.metrics = {'train': [acc], 'val': [acc, f1(f1_classes)]} self.network = l self.compile()
def __init__(self, batch_size, emb_X, lstm_params, output_size): super().__init__(batch_size) self.inputs = [T.imatrix('input'), T.matrix('mask')] self.target = T.matrix('target') l = InputLayer((batch_size, None), self.inputs[0]) l_mask = InputLayer((batch_size, None), self.inputs[1]) l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X) for lstm_param in lstm_params: l = LSTMLayer( l, lstm_param, grad_clipping=100, nonlinearity=tanh, mask_input=l_mask, only_return_final=True ) l = DenseLayer(l, output_size, nonlinearity=identity) self.pred = get_output(l, deterministic=True) self.loss = T.mean(aggregate(squared_error(get_output(l), self.target))) params = get_all_params(l, trainable=True) self.update_params = [T.scalar('learning_rate')] self.updates = rmsprop(self.loss, params, learning_rate=self.update_params[0]) self.metrics = {'train': [rmse], 'val': [rmse]} self.network = l self.compile()
def __init__(self, n_inputs): """Constructs a net with a given number of inputs and no layers.""" assert isposint(n_inputs), 'Number of inputs must be a positive integer.' self.n_inputs = n_inputs self.n_outputs = n_inputs self.n_units = [n_inputs] self.n_layers = 0 self.n_params = 0 self.Ws = [] self.bs = [] self.hs = [tt.matrix('x')] self.parms = self.Ws + self.bs self.input = self.hs[0] self.output = self.hs[-1] self.eval_f = None
def squareError(x): """Square error loss function.""" if x.ndim == 1: y = tt.vector('y') L = tt.mean((x - y) ** 2) elif x.ndim == 2: y = tt.matrix('y') L = tt.mean(tt.sum((x - y) ** 2, axis=1)) else: raise ValueError('x must be either a vector or a matrix.') L.name = 'loss' return y, L
def crossEntropy(x): """Cross entropy loss function. Only works for networks with one output.""" if x.ndim == 1: pass elif x.ndim == 2: x = x[:, 0] else: raise ValueError('x must be either a vector or a matrix.') y = tt.vector('y') L = -tt.mean(y * tt.log(x) + (1-y) * tt.log(1-x)) L.name = 'loss' return y, L
def savetofile(self, outfile): """Save model parameters to file.""" # Pickle non-matrix params into bytestring, then convert to numpy byte array pklbytes = pickle.dumps({'hyper': self.hyper, 'epoch': self.epoch, 'pos': self.pos}, protocol=pickle.HIGHEST_PROTOCOL) p = np.fromstring(pklbytes, dtype=np.uint8) # Gather parameter matrices and names pvalues = { n:m.get_value() for n, m in self.params.items() } # Now save params and matrices to file try: np.savez_compressed(outfile, p=p, **pvalues) except OSError as e: raise e else: if isinstance(outfile, str): stdout.write("Saved model parameters to {0}\n".format(outfile))
def load_data(dataset): print('... loading data') # Load the dataset with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix) # where each row corresponds to an example. target is a # numpy.ndarray of 1 dimension (vector) that has the same length as # the number of rows in the input. It should give the target # to the example with the same index in the input. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def times_diag(input, n_hidden, diag, swap_re_im): # input is a Ix2n_hidden matrix, where I is number # of training examples # diag is a n_hidden-dimensional real vector, which creates # the 2n_hidden x 2n_hidden complex diagonal matrix using # e.^{j.*diag}=cos(diag)+j.*sin(diag) d = T.concatenate([diag, -diag]) #d is 2n_hidden Re = T.cos(d).dimshuffle('x',0) Im = T.sin(d).dimshuffle('x',0) input_times_Re = input * Re input_times_Im = input * Im output = input_times_Re + input_times_Im[:, swap_re_im] return output
def times_unitary(x,n,swap_re_im,Wparams,Wimpl): # multiply tensor x on the right by the unitary matrix W parameterized by Wparams if (Wimpl == 'adhoc'): theta=Wparams[0] reflection=Wparams[1] index_permute_long=Wparams[2] step1 = times_diag(x, n, theta[0,:], swap_re_im) step2 = do_fft(step1, n) step3 = times_reflection(step2, n, reflection[0,:]) step4 = vec_permutation(step3, index_permute_long) step5 = times_diag(step4, n, theta[1,:], swap_re_im) step6 = do_ifft(step5, n) step7 = times_reflection(step6, n, reflection[1,:]) step8 = times_diag(step7, n, theta[2,:], swap_re_im) y = step8 elif (Wimpl == 'full'): Waug=Wparams[0] y = T.dot(x,Waug) return y
def make_idx_data_cv(revs, word_idx_map, cv, max_l=51, k=100, filter_h=5): """ Transforms sentences into a 2-d matrix. """ train, test = [], [] for rev in revs: sent = get_idx_from_sent(rev["text"], word_idx_map, max_l, k, filter_h) sent.append(rev["y"]) if rev["split"]==cv: test.append(sent) else: train.append(sent) train = np.array(train,dtype="int") train = np.random.permutation(train)[:10000] test = np.array(test,dtype="int") test = np.random.permutation(test)[:10000] return [train, test]
def word_features(table): """ Extract word features into a normalized matrix """ features = numpy.zeros((len(table), 620), dtype='float32') keys = table.keys() for i in range(len(table)): f = table[keys[i]] features[i] = f / norm(f) return features
def build_encoder(tparams, options): """ build an encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') x_mask = tensor.matrix('x_mask', dtype='float32') # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return embedding, x_mask, ctx
def ndim_tensor(ndim): if ndim == 1: return T.vector() elif ndim == 2: return T.matrix() elif ndim == 3: return T.tensor3() elif ndim == 4: return T.tensor4() return T.matrix() # get int32 tensor
def init_func(self, img_value, scene_value): if self._init_func is None: img = T.matrix() init_state = self.proj_mlp.compute(img) self._init_func = theano.function([img], init_state) self._scene_shared.set_value(scene_value) return self._init_func(img_value)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p = self.compute(state, w, self._scene_shared) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def __init__(self, name='scene_mlp', layer_sizes=(2048, 1024, 1024, 80), model_file=None): self.name = name if model_file is not None: with h5py.File(model_file, 'r') as f: layer_sizes = f.attrs['layer_sizes'] self.config = {'layer_sizes': layer_sizes} # define inputs x = T.matrix('x') y = T.matrix('y') self.inputs = [x, y] # define computation graph self.mlp = MLP(layer_sizes=layer_sizes, name='mlp', output_type='softmax') self.proba = self.mlp.compute(x) self.log_proba = T.log(self.proba) # define costs def kl_divergence(p, q): kl = T.mean(T.sum(p * T.log((p+1e-30)/(q+1e-30)), axis=1)) kl += T.mean(T.sum(q * T.log((q+1e-30)/(p+1e-30)), axis=1)) return kl kl = kl_divergence(self.proba, y) acc = T.mean(T.eq(self.proba.argmax(axis=1), y.argmax(axis=1))) self.costs = [kl, acc] # layers and parameters self.layers = [self.mlp] self.params = sum([l.params for l in self.layers], []) # load weights from file, if model_file is not None if model_file is not None: self.load_weights(model_file)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p = self.compute(state, w) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p, _ = self.compute(state, w, self._feat_shared, self._scene_shared) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def step_func(self, state_value, w_value): if self._step_func is None: w = T.ivector() state = T.matrix() new_state, p, _ = self.compute(state, w, self._feat_shared) self._step_func = theano.function([state, w], [new_state, T.log(p)]) return self._step_func(state_value, w_value)
def sharedX_mtx(mtx, name=None, borrow=None, dtype=None): """Share a matrix value with type theano.confgig.floatX. Parameters: value: matrix array name: variable name (str) borrow: boolean dtype: the type of the value when shared. default: theano.config.floatX """ if dtype is None: dtype = theano.config.floatX return theano.shared( np.array(mtx, dtype=dtype), name=name, borrow=borrow)