我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用lasagne.nonlinearities.tanh()。
def exe_rnn(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), name='RNN') # W = layer_rnn.W_hid_to_hid.sum() # U = layer_rnn.W_in_to_hid.sum() # b = layer_rnn.b.sum() layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)
def build_BiRNN_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh, precompute_input=True, num_filters=20, dropout=True, in_to_out=False): # first get some necessary dimensions or parameters conv_window = 3 _, sent_length, _ = incoming2.output_shape # dropout before cnn? if dropout: incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5) # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match rnn incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) return build_BiRNN(incoming, num_units, mask=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity, precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def build_BiLSTM_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, num_filters=20, dropout=True, in_to_out=False): # first get some necessary dimensions or parameters conv_window = 3 _, sent_length, _ = incoming2.output_shape # dropout before cnn? if dropout: incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5) # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes, precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def __init__(self): self.hdn = None self.rep = None self.fname_in = None self.mod2size = None self.mod1size = None self.fname_out = None self.lr = 0.1 self.act = "tanh" self.epochs = 1000 self.verbosity = 3 self.dropout = 0.2 self.momentum = 0.9 self.untied = False self.l2_norm = False self.batch_size = 128 self.load_model = None self.save_model = None self.write_after = None self.crossmodal = False self.exec_command = None self.ignore_zeroes = False
def setup_transform_net(self, input_var=None): transform_net = InputLayer(shape=self.shape, input_var=input_var) transform_net = style_conv_block(transform_net, self.num_styles, 32, 9, 1) transform_net = style_conv_block(transform_net, self.num_styles, 64, 3, 2) transform_net = style_conv_block(transform_net, self.num_styles, 128, 3, 2) for _ in range(5): transform_net = residual_block(transform_net, self.num_styles) transform_net = nn_upsample(transform_net, self.num_styles) transform_net = nn_upsample(transform_net, self.num_styles) if self.net_type == 0: transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, tanh) transform_net = ExpressionLayer(transform_net, lambda X: 150.*X, output_shape=None) elif self.net_type == 1: transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, sigmoid) self.network['transform_net'] = transform_net
def build_cnn(self): # Building the network layer_in = InputLayer(shape=(None, 784), input_var=self.input_var) # Hidden layer layer = DenseLayer( layer_in, num_units=self.hidden_size, W=lasagne.init.Uniform( range=(-np.sqrt(6. / (784 + self.hidden_size)), np.sqrt(6. / (784 + self.hidden_size)))), nonlinearity=tanh, ) # LR layer layer = DenseLayer( layer, num_units=self.output_size, W=lasagne.init.Constant(0.), nonlinearity=softmax, ) return layer
def build_multi_dssm(input_var=None, num_samples=None, num_entries=6, num_ngrams=42**3, num_hid1=300, num_hid2=300, num_out=128): """Builds a DSSM structure in a Lasagne/Theano way. The built DSSM is the neural network that computes the projection of only one paper. The input ``input_var`` should have two dimensions: (``num_samples * num_entries``, ``num_ngrams``). The output is then computed in a batch way: one paper at a time, but all papers from the same sample in the dataset are grouped (cited papers, citing papers and ``num_entries - 2`` irrelevant papers). Args: input_var (:class:`theano.tensor.TensorType` or None): symbolic input variable of the DSSM num_samples (int): the number of samples in the batch input dataset (number of rows) num_entries (int): the number of compared papers in the DSSM structure num_ngrams (int): the size of the vocabulary num_hid1 (int): the number of units in the first hidden layer num_hid2 (int): the number of units in the second hidden layer num_out (int): the number of units in the output layer Returns: :class:`lasagne.layers.Layer`: the output layer of the DSSM """ assert (num_entries > 2) # Initialise input layer if num_samples is None: num_rows = None else: num_rows = num_samples * num_entries l_in = layers.InputLayer(shape=(num_rows, num_ngrams), input_var=input_var) # Initialise the hidden and output layers or the DSSM l_hid1 = layers.DenseLayer(l_in, num_units=num_hid1, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform()) l_hid2 = layers.DenseLayer(l_hid1, num_units=num_hid2, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform()) l_out = layers.DenseLayer(l_hid2, num_units=num_out, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform()) l_out = layers.ExpressionLayer(l_out, lambda X: X / X.norm(2), output_shape='auto') return l_out
def __init__( self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.tanh, num_seq_inputs=1, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) if prob_network is None: prob_network = MLP( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,), output_dim=env_spec.action_space.n, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output( prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def exe_maxru(length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_taru = MAXRULayer(layer_input, num_units, max_length=length, P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, time_updategate=time_updategate, time_update=time_update, only_return_final=True, name='MAXRU', p=0.) # W = layer_taru.W_hid_to_hidden_update.sum() # U = layer_taru.W_in_to_hidden_update.sum() # b = layer_taru.b_hidden_update.sum() layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM') # W = layer_lstm.W_hid_to_cell.sum() # U = layer_lstm.W_in_to_cell.sum() # b = layer_lstm.b_cell.sum() layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_sgru(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_sgru = SGRULayer(layer_input, num_units, resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden, updategate=updategate, hidden_update=hiden_update, only_return_final=True, name='SGRU') # W = layer_gru.W_hid_to_hidden_update.sum() # U = layer_gru.W_in_to_hidden_update.sum() # b = layer_gru.b_hidden_update.sum() layer_output = DenseLayer(layer_sgru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_sgru, input_var, target_var, batch_size, length, position, binominal)
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, **kwargs): super(CustomLSTMEncoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity, cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps, grad_clipping, unroll_scan, precompute_input, mask_input, False, **kwargs)
def build_BiRNN(incoming, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh, precompute_input=True, dropout=True, in_to_out=False): # construct the forward and backward rnns. Now, Ws are initialized by He initializer with default arguments. # Need to try other initializers for specific tasks. # dropout for incoming if dropout: incoming = lasagne.layers.DropoutLayer(incoming, p=0.5) rnn_forward = lasagne.layers.RecurrentLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity, precompute_input=precompute_input, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), name='forward') rnn_backward = lasagne.layers.RecurrentLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity, precompute_input=precompute_input, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), backwards=True, name='backward') # concatenate the outputs of forward and backward RNNs to combine them. concat = lasagne.layers.concat([rnn_forward, rnn_backward], axis=2, name="bi-rnn") # dropout for output if dropout: concat = lasagne.layers.DropoutLayer(concat, p=0.5) if in_to_out: concat = lasagne.layers.concat([concat, incoming], axis=2) # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units) return concat
def build_BiLSTM_HighCNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, num_filters=20, dropout=True, in_to_out=False): # first get some necessary dimensions or parameters conv_window = 3 _, sent_length, _ = incoming2.output_shape # dropout before cnn if dropout: incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5) # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match highway incoming layer [batch * sent_length, num_filters, 1] --> [batch * sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], -1)) # dropout after cnn? # if dropout: # output_cnn_layer = lasagne.layers.DropoutLayer(output_cnn_layer, p=0.5) # construct highway layer highway_layer = HighwayDenseLayer(output_cnn_layer, nonlinearity=nonlinearities.rectify) # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters] --> [batch, sent_length, number_filters] output_highway_layer = lasagne.layers.reshape(highway_layer, (-1, sent_length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_highway_layer, incoming2], axis=2) return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes, precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def __init__(self, incomings, hid_state_size, voc_size, resetgate = GRU_Gate(), updategate = GRU_Gate(), hid_update = GRU_Gate(nonlinearity=nonlin.tanh), W=Normal(), max_answer_word=1, **kwargs): super(AnswerModule, self).__init__(incomings, **kwargs) self.hid_state_size = hid_state_size #FOR GRU input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[1]) + voc_size # concatenation of previous prediction def add_gate(gate, gate_name): return (self.add_param(gate.W_in, (num_inputs, hid_state_size), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (hid_state_size, hid_state_size), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (hid_state_size,), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in all parameters from gates (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate)= add_gate(updategate, 'updategate') (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate') (self.W_in_to_hid_update, self.W_hid_to_hid_update, self.b_hid_update, self.nonlinearity_hid) = add_gate(hid_update, 'hid_update') self.W = self.add_param(W, (hid_state_size, voc_size), name="W") self.max_answer_word = max_answer_word self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def run_task(*_): trpo_stepsize = 0.01 trpo_subsample_factor = 0.2 env = PointGatherEnv(apple_reward=10,bomb_cost=1,n_apples=2, activity_range=6) policy = GaussianMLPPolicy(env.spec, hidden_sizes=(64,32) ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args={ 'hidden_sizes': (64,32), 'hidden_nonlinearity': NL.tanh, 'learn_std':False, 'step_size':trpo_stepsize, 'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor) } ) safety_constraint = GatherSafetyConstraint(max_value=0.1) algo = PDO( env=env, policy=policy, baseline=baseline, safety_constraint=safety_constraint, batch_size=50000, max_path_length=15, n_itr=100, gae_lambda=0.95, discount=0.995, step_size=trpo_stepsize, optimizer_args={'subsample_factor':trpo_subsample_factor}, #plot=True, ) algo.train()
def __init__( self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.tanh, num_seq_inputs=1, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) if prob_network is None: prob_network = MLP( input_shape=( env_spec.observation_space.flat_dim * num_seq_inputs,), output_dim=env_spec.action_space.n, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output( prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def get_output_for(self, arguments, **kwargs): input, hprev, Cprev = arguments i = nl.sigmoid(self.Wi * input + self.Ui* hprev + self.bi) cand = nl.tanh(self.Wc *input + self.Uc * hprev + self.bc) f = nl.sigmoid(self.Wf*input + self.Uf*hprev + self.bf) C = i*cand + f * Cprev o = nl.sigmoid(self.Wo*input + self.Uo*hprev + self.Vo*C + self.bo) h = o*nl.tanh(C) return h, C
def build_combination(input_var, output_nodes, input_size, stocks, period, feature_types): # Input layer input_layer = InputLayer(shape=(None, 1, input_size), input_var=input_var) assert input_size == stocks * period * feature_types input_layer = ReshapeLayer(input_layer, (([0], stocks, period, feature_types))) #slice for partition stock_feature_type_layers = [] for ix in range(stocks): stock_layer = SliceLayer(input_layer, indices=ix, axis=1) this_stock_feature_type_layers = [] for rx in range(feature_types): this_stock_feature_type_layers.append(SliceLayer(stock_layer, indices=rx, axis=1)) stock_feature_type_layers.append(this_stock_feature_type_layers) stock_networks = [] for this_stock_feature_type_layers in stock_feature_type_layers: this_stock_networks = [] for feature_type_layer in this_stock_feature_type_layers: tmp = DenseLayer(dropout(feature_type_layer, p=.2), num_units=10, nonlinearity=tanh) tmp = DenseLayer(dropout(tmp, p=.5), num_units=1, nonlinearity=tanh) this_stock_networks.append(tmp) this_stock_network = ConcatLayer(this_stock_networks) stock_network = DenseLayer(dropout(this_stock_network, p=.5), num_units=1, nonlinearity=tanh) stock_networks.append(stock_network) network = ConcatLayer(stock_networks) network = DenseLayer(dropout(network, p=.5), num_units=output_nodes, nonlinearity=sigmoid) return network, stock_networks
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, inter_drop=0.05, **kwargs): super(DropLSTMLayer, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity, cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps, grad_clipping, unroll_scan, precompute_input, mask_input, only_return_final, **kwargs) self.inter_retain_prob = 1 - inter_drop self._srng = RandomStreams( lasagne.random.get_rng().randint(1, 2147462579))
def generate_lstm_parameters(): gate_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) return gate_parameters, cell_parameters
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.Orthogonal()): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm') l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, name='mask') symbolic_seqlen = l_in.input_var.shape[1] l_delta = DeltaLayer(l_in, window, name='delta') if use_blstm: f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen_raw = l_mask.input_var.shape[1] # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer(substreams, name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer(substreams, axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output') return l_out, l_fuse
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen = l_in.input_var.shape[1] if use_blstm: f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def __init__(self, W_t=init.Normal(0.1), W_x=init.Normal(0.1), b=init.Constant(0.), nonlinearity_inside=nonlinearities.tanh, nonlinearity_outside=nonlinearities.sigmoid): self.W_t = W_t self.W_x = W_x self.b = b self.nonlinearity_inside = nonlinearity_inside self.nonlinearity_outside = nonlinearity_outside
def __init__( self, env_spec, latent_dim=0, # all this is fake latent_name='categorical', bilinear_integration=False, resample=False, # until here hidden_sizes=(32, 32), hidden_nonlinearity=NL.tanh, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ #bullshit self.latent_dim = latent_dim ##could I avoid needing this self for the get_action? self.latent_name = latent_name self.bilinear_integration = bilinear_integration self.resample = resample self._set_std_to_0 = False Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) if prob_network is None: prob_network = MLP( input_shape=(env_spec.observation_space.flat_dim,), output_dim=env_spec.action_space.n, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output( prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__( self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, hidden_W_init=LI.HeUniform(), hidden_b_init=LI.Constant(0.), output_nonlinearity=NL.tanh, output_W_init=LI.Uniform(-3e-3, 3e-3), output_b_init=LI.Uniform(-3e-3, 3e-3), bn=False): Serializable.quick_init(self, locals()) l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim)) l_hidden = l_obs if bn: l_hidden = batch_norm(l_hidden) for idx, size in enumerate(hidden_sizes): l_hidden = L.DenseLayer( l_hidden, num_units=size, W=hidden_W_init, b=hidden_b_init, nonlinearity=hidden_nonlinearity, name="h%d" % idx ) if bn: l_hidden = batch_norm(l_hidden) l_output = L.DenseLayer( l_hidden, num_units=env_spec.action_space.flat_dim, W=output_W_init, b=output_b_init, nonlinearity=output_nonlinearity, name="output" ) # Note the deterministic=True argument. It makes sure that when getting # actions from single observations, we do not update params in the # batch normalization layers action_var = L.get_output(l_output, deterministic=True) self._output_layer = l_output self._f_actions = ext.compile_function([l_obs.input_var], action_var) super(DeterministicMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [l_output])
def build_recur_dropout(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p): # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=p, shared_axes=(1,)) ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) lstm_forward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearities.tanh, peepholes=False, ingate=ingate_forward, outgate=outgate_forward, forgetgate=forgetgate_forward, cell=cell_forward, p=p, name='forward') ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) lstm_backward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearities.tanh, peepholes=False, backwards=True, ingate=ingate_backward, outgate=outgate_backward, forgetgate=forgetgate_backward, cell=cell_backward, p=p, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_lstm_cnn = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm") # shape = [batch, n-step, num_units] bi_lstm_cnn = lasagne.layers.DropoutLayer(bi_lstm_cnn, p=p, shared_axes=(1,)) return ChainCRFLayer(bi_lstm_cnn, num_labels, mask_input=mask)
def build_RNN(architec, layer_input, layer_mask, num_units, grad_clipping): def build_GRU(reset_input): resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) return GRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, reset_input=reset_input, only_return_final=True, p=0.5, name='GRU') def build_LSTM(): ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) return LSTMLayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, p=0.5, name='LSTM') def build_SGRU(): resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) hidden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) return SGRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping, resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden, updategate=updategate, hidden_update=hidden_update, only_return_final=True, p=0.5, name='SGRU') if architec == 'gru0': return build_GRU(False) elif architec == 'gru1': return build_GRU(True) elif architec == 'lstm': return build_LSTM() elif architec == 'sgru': return build_SGRU() else: raise ValueError('unkown architecture: %s' % architec)
def build_std_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p, reset_input): # Construct Bi-directional LSTM-CNNs-CRF with standard dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p) # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2) resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, reset_input=reset_input, name='forward') resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, reset_input=reset_input, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru") bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def build_std_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p): # Construct Bi-directional LSTM-CNNs-CRF with standard dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p) # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2) resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_forward = SGRULayer(incoming, num_units, mask_input=mask, resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, name='forward') resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru") bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def build_recur_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p, reset_input): # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,)) resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='forward') resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru") # shape = [batch, n-step, num_units] bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p, shared_axes=(1,)) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def build_recur_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p): # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,)) resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_forward = SGRULayer(incoming, num_units, mask_input=mask, resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, p=p, name='forward') resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, p=p, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru") # shape = [batch, n-step, num_units] bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p, shared_axes=(1,)) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, precompute_input=True, mask_input=None, encoder_mask_input=None, attention=False, word_by_word=False, **kwargs): super(CustomLSTMDecoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity, cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps, grad_clipping, False, precompute_input, mask_input, True, **kwargs) self.attention = attention self.word_by_word = word_by_word # encoder mask self.encoder_mask_incoming_index = -1 if encoder_mask_input is not None: self.input_layers.append(encoder_mask_input) self.input_shapes.append(encoder_mask_input.output_shape) self.encoder_mask_incoming_index = len(self.input_layers) - 1 # check encoder if not isinstance(self.cell_init, CustomLSTMEncoder) \ or self.num_units != self.cell_init.num_units: raise ValueError('cell_init must be CustomLSTMEncoder' ' and num_units should equal') self.r_init = None self.r_init = self.add_param(init.Constant(0.), (1, num_units), name="r_init", trainable=False, regularizable=False) if self.word_by_word: # rewrites self.attention = True if self.attention: if not isinstance(encoder_mask_input, lasagne.layers.Layer): raise ValueError('Attention mechnism needs encoder mask layer') # initializes attention weights self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'V_pointer') self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend') # doesn't need transpose self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_p_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_p_attend') self.W_x_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_x_attend') if self.word_by_word: self.W_r_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_r_attend') self.W_t_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_t_attend')
def network_generator(self, input_var, network_weights=None): # Input layer layers = [] n_blocks = int(np.log2(self.input_size / 8)) + 1 # end up with 8x8 output layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input')) # Dense layer up (from h to n*8*8) layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters), name='generator/dense%d' % len(layers), network_weights=network_weights)) layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters, 8, 8), name='generator/reshape%d' % len(layers))) # Convolutional blocks (decoder) for i_block in range(1, n_blocks+1): layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights)) layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights)) if i_block != n_blocks: layers.append(Upscale2DLayer(layers[-1], scale_factor=2, name='generator/upsample%d' % len(layers))) # Final layer (make sure input images are in the range [-1, 1] if tanh used) layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=sigmoid)) # Network in dictionary form network = {layer.name: layer for layer in layers} return network # def network_generator_alt(self, input_var, network_weights=None): # # # Input layer # layers = [] # n_blocks = int(np.log2(self.input_size / 8)) + 1 # end up with 8x8 output # layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input')) # # # Dense layer up (from h to n*8*8) # layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters*n_blocks), name='generator/dense%d' % len(layers), network_weights=network_weights, nonlinearity=elu, bn=True)) # layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters*n_blocks, 8, 8), name='generator/reshape%d' % len(layers))) # # # Convolutional blocks (decoder) # for i_block in range(1, n_blocks+1)[::-1]: # # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True)) # # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True)) # if i_block != 1: # layers.append(transposed_conv_layer(layers[-1], n_filters=self.n_filters*(i_block-1), stride=2, name='generator/upsample%d' % len(layers), # output_size=8*2**(n_blocks-i_block+1), network_weights=network_weights, nonlinearity=elu, bn=True)) # # # Final layer (make sure input images are in the range [-1, 1] # layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=tanh, bn=False)) # # # Network in dictionary form # network = {layer.name: layer for layer in layers} # # return network
def build_BiLSTM(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, dropout=True, in_to_out=False): # construct the forward and backward rnns. Now, Ws are initialized by Glorot initializer with default arguments. # Need to try other initializers for specific tasks. # dropout for incoming if dropout: incoming = lasagne.layers.DropoutLayer(incoming, p=0.5) ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) lstm_forward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearities.tanh, peepholes=peepholes, precompute_input=precompute_input, ingate=ingate_forward, outgate=outgate_forward, forgetgate=forgetgate_forward, cell=cell_forward, name='forward') ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) lstm_backward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearities.tanh, peepholes=peepholes, precompute_input=precompute_input, backwards=True, ingate=ingate_backward, outgate=outgate_backward, forgetgate=forgetgate_backward, cell=cell_backward, name='backward') # concatenate the outputs of forward and backward RNNs to combine them. concat = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm") # dropout for output if dropout: concat = lasagne.layers.DropoutLayer(concat, p=0.5) if in_to_out: concat = lasagne.layers.concat([concat, incoming], axis=2) # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units) return concat
def build_BiGRU(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, dropout=True, in_to_out=False): # construct the forward and backward grus. Now, Ws are initialized by Glorot initializer with default arguments. # Need to try other initializers for specific tasks. # dropout for incoming if dropout: incoming = lasagne.layers.DropoutLayer(incoming, p=0.5) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # now use tanh for nonlinear function of hidden gate hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_forward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, precompute_input=precompute_input, resetgate=resetgate_forward, updategate=updategate_forward, hidden_update=hidden_forward, name='forward') # according to Jozefowicz et al.(2015), init bias of forget gate to 1. resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # now use tanh for nonlinear function of hidden gate hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_backward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, precompute_input=precompute_input, backwards=True, resetgate=resetgate_backward, updategate=updategate_backward, hidden_update=hidden_backward, name='backward') # concatenate the outputs of forward and backward GRUs to combine them. concat = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru") # dropout for output if dropout: concat = lasagne.layers.DropoutLayer(concat, p=0.5) if in_to_out: concat = lasagne.layers.concat([concat, incoming], axis=2) # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units) return concat
def __init__( self, incomings, num_units, W_g=init.Normal(0.1), W_h=init.Normal(0.1), W_v=init.Normal(0.1), W_s=init.Normal(0.1), W_p=init.Normal(0.1), nonlinearity=nonlinearities.tanh, nonlinearity_atten=nonlinearities.softmax, **kwargs ): super(AttenLayer, self).__init__(incomings, **kwargs) self.batch_size = self.input_shapes[0][0] # None num_inputs = self.input_shapes[2][1] # k feature_dim = self.input_shapes[0][1] # d self.num_units = num_units self.nonlinearity = nonlinearity self.nonlinearity_atten = nonlinearity_atten self.W_h_to_attenGate = self.add_param( W_h, (num_inputs, 1), name='W_h_to_atten' ) self.W_g_to_attenGate = self.add_param( W_g, (feature_dim, num_inputs), name='W_g_to_atten' ) self.W_v_to_attenGate = self.add_param( W_v, (feature_dim, num_inputs), name='W_v_to_atten' ) self.W_s_to_attenGate = self.add_param( W_s, (feature_dim, num_inputs), name='W_s_to_atten' ) self.W_p = self.add_param( W_p, (feature_dim, num_units), name='W_p_to_atten' ) self.num_inputs = num_inputs
def __init__(self, conf): self.conf = conf if self.conf.act == "linear": self.conf.act = linear elif self.conf.act == "sigmoid": self.conf.act = sigmoid elif self.conf.act == "relu": self.conf.act = rectify elif self.conf.act == "tanh": self.conf.act = tanh else: raise ValueError("Unknown activation function", self.conf.act) input_var_first = T.matrix('inputs1') input_var_second = T.matrix('inputs2') target_var = T.matrix('targets') # create network self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second) self.out = get_output(self.autoencoder) loss = squared_error(self.out, target_var) loss = loss.mean() params = get_all_params(self.autoencoder, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum) # training function self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates) # fuction to reconstruct test_reconstruction = get_output(self.autoencoder, deterministic=True) self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction) # encoding function test_encode = get_output([encoder_first, encoder_second], deterministic=True) self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode) # utils blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0] self.blas_nrm2 = blas('nrm2', np.array([], dtype=float)) self.blas_scal = blas('scal', np.array([], dtype=float)) # load weights if necessary if self.conf.load_model is not None: self.load_model()
def run_task(*_): trpo_stepsize = 0.01 trpo_subsample_factor = 0.2 env = PointGatherEnv(apple_reward=10,bomb_cost=1,n_apples=2, activity_range=6) policy = GaussianMLPPolicy(env.spec, hidden_sizes=(64,32) ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args={ 'hidden_sizes': (64,32), 'hidden_nonlinearity': NL.tanh, 'learn_std':False, 'step_size':trpo_stepsize, 'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor) } ) safety_baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args={ 'hidden_sizes': (64,32), 'hidden_nonlinearity': NL.tanh, 'learn_std':False, 'step_size':trpo_stepsize, 'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor) }, target_key='safety_returns', ) safety_constraint = GatherSafetyConstraint(max_value=0.1, baseline=safety_baseline) algo = CPO( env=env, policy=policy, baseline=baseline, safety_constraint=safety_constraint, safety_gae_lambda=1, batch_size=50000, max_path_length=15, n_itr=100, gae_lambda=0.95, discount=0.995, step_size=trpo_stepsize, optimizer_args={'subsample_factor':trpo_subsample_factor}, #plot=True, ) algo.train()
def build(input_height, input_width, concat_var): """ Build the discriminator, all weights initialized from scratch :param input_width: :param input_height: :param concat_var: Theano symbolic tensor variable :return: Dictionary that contains the discriminator """ net = {'input': InputLayer((None, 4, input_height, input_width), input_var=concat_var)} print "Input: {}".format(net['input'].output_shape[1:]) net['merge'] = ConvLayer(net['input'], 3, 1, pad=0, flip_filters=False) print "merge: {}".format(net['merge'].output_shape[1:]) net['conv1'] = ConvLayer(net['merge'], 32, 3, pad=1) print "conv1: {}".format(net['conv1'].output_shape[1:]) net['pool1'] = PoolLayer(net['conv1'], 4) print "pool1: {}".format(net['pool1'].output_shape[1:]) net['conv2_1'] = ConvLayer(net['pool1'], 64, 3, pad=1) print "conv2_1: {}".format(net['conv2_1'].output_shape[1:]) net['conv2_2'] = ConvLayer(net['conv2_1'], 64, 3, pad=1) print "conv2_2: {}".format(net['conv2_2'].output_shape[1:]) net['pool2'] = PoolLayer(net['conv2_2'], 2) print "pool2: {}".format(net['pool2'].output_shape[1:]) net['conv3_1'] = nn.weight_norm(ConvLayer(net['pool2'], 64, 3, pad=1)) print "conv3_1: {}".format(net['conv3_1'].output_shape[1:]) net['conv3_2'] = nn.weight_norm(ConvLayer(net['conv3_1'], 64, 3, pad=1)) print "conv3_2: {}".format(net['conv3_2'].output_shape[1:]) net['pool3'] = PoolLayer(net['conv3_2'], 2) print "pool3: {}".format(net['pool3'].output_shape[1:]) net['fc4'] = DenseLayer(net['pool3'], num_units=100, nonlinearity=tanh) print "fc4: {}".format(net['fc4'].output_shape[1:]) net['fc5'] = DenseLayer(net['fc4'], num_units=2, nonlinearity=tanh) print "fc5: {}".format(net['fc5'].output_shape[1:]) net['prob'] = DenseLayer(net['fc5'], num_units=1, nonlinearity=sigmoid) print "prob: {}".format(net['prob'].output_shape[1:]) return net
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True): weights, biases, shapes, nonlinearities = dbn gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') if use_blstm: l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') # reshape, flatten to 2 dimensions to run softmax on all timesteps l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26): dbn_layers = dbn.get_all_layers() weights = [] biases = [] weights.append(dbn_layers[1].W.astype('float32')) weights.append(dbn_layers[2].W.astype('float32')) weights.append(dbn_layers[3].W.astype('float32')) weights.append(dbn_layers[4].W.astype('float32')) biases.append(dbn_layers[1].b.astype('float32')) biases.append(dbn_layers[2].b.astype('float32')) biases.append(dbn_layers[3].b.astype('float32')) biases.append(dbn_layers[4].b.astype('float32')) gate_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(weights, biases, l_reshape1) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') # l_delta = DeltaLayer(l_reshape2, win, name='delta') # l_lstm = create_lstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') l_lstm, l_lstm_back = create_blstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def create_pretrained_substream(weights, biases, input_shape, input_var, mask_shape, mask_var, name, lstm_size=250, win=T.iscalar('theta'), nonlinearity=rectify, w_init_fn=las.init.Orthogonal(), use_peepholes=True): gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_input = InputLayer(input_shape, input_var, 'input_'+name) l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize_raw = l_input.input_var.shape[0] symbolic_seqlen_raw = l_input.input_var.shape[1] l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]), name='reshape1_'+name) l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, weights, biases, [2000, 1000, 500, 50], [nonlinearity, nonlinearity, nonlinearity, linear], ['fc1_'+name, 'fc2_'+name, 'fc3_'+name, 'bottleneck_'+name]) input_len = las.layers.get_output_shape(l_encoder_raw)[-1] l_reshape2 = ReshapeLayer(l_encoder_raw, (symbolic_batchsize_raw, symbolic_seqlen_raw, input_len), name='reshape2_'+name) l_delta = DeltaLayer(l_reshape2, win, name='delta_'+name) l_lstm = LSTMLayer( l_delta, int(lstm_size), peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_'+name) return l_lstm