我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用lasagne.nonlinearities.softmax()。
def build_mlp(input_var=None): l_in = InputLayer(shape=(None, 1, 28, 28), input_var=input_var) l_hid1 = DenseLayer( l_in, num_units=500, nonlinearity=rectify, W=lasagne.init.GlorotUniform()) l_hid1_drop = DropoutLayer(l_hid1, p=0.4) l_hid2 = DenseLayer( l_hid1_drop, num_units=300, nonlinearity=rectify) l_hid2_drop = DropoutLayer(l_hid2, p=0.4) l_out = DenseLayer( l_hid2_drop, num_units=10, nonlinearity=softmax) return l_out # generator giving the batches
def __build_48_net__(self): network = layers.InputLayer((None, 3, 48, 48), input_var=self.__input_var__) network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.batch_norm(network) network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu) network = layers.batch_norm(network) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.Conv2DLayer(network,num_filters=64,filter_size=(3,3),stride=1,nonlinearity=relu) network = layers.batch_norm(network) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.DenseLayer(network,num_units = 256,nonlinearity = relu) network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax) return network
def network_classifier(self, input_var): network = {} network['classifier/input'] = InputLayer(shape=(None, 3, 64, 64), input_var=input_var, name='classifier/input') network['classifier/conv1'] = Conv2DLayer(network['classifier/input'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv1') network['classifier/pool1'] = MaxPool2DLayer(network['classifier/conv1'], pool_size=2, stride=2, pad=0, name='classifier/pool1') network['classifier/conv2'] = Conv2DLayer(network['classifier/pool1'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv2') network['classifier/pool2'] = MaxPool2DLayer(network['classifier/conv2'], pool_size=2, stride=2, pad=0, name='classifier/pool2') network['classifier/conv3'] = Conv2DLayer(network['classifier/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv3') network['classifier/pool3'] = MaxPool2DLayer(network['classifier/conv3'], pool_size=2, stride=2, pad=0, name='classifier/pool3') network['classifier/conv4'] = Conv2DLayer(network['classifier/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv4') network['classifier/pool4'] = MaxPool2DLayer(network['classifier/conv4'], pool_size=2, stride=2, pad=0, name='classifier/pool4') network['classifier/dense1'] = DenseLayer(network['classifier/pool4'], num_units=64, nonlinearity=rectify, name='classifier/dense1') network['classifier/output'] = DenseLayer(network['classifier/dense1'], num_units=10, nonlinearity=softmax, name='classifier/output') return network
def build_cnn(self): # Building the network layer_in = InputLayer(shape=(None, 784), input_var=self.input_var) # Hidden layer layer = DenseLayer( layer_in, num_units=self.hidden_size, W=lasagne.init.Uniform( range=(-np.sqrt(6. / (784 + self.hidden_size)), np.sqrt(6. / (784 + self.hidden_size)))), nonlinearity=tanh, ) # LR layer layer = DenseLayer( layer, num_units=self.output_size, W=lasagne.init.Constant(0.), nonlinearity=softmax, ) return layer
def __init__( self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.tanh, num_seq_inputs=1, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) if prob_network is None: prob_network = MLP( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,), output_dim=env_spec.action_space.n, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output( prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def calc_loss(prediction, targets): #categorical crossentropy is the best choice for a multi-class softmax output loss = T.mean(objectives.categorical_crossentropy(prediction, targets)) return loss
def create_network(): l = 1000 pool_size = 5 test_size1 = 13 test_size2 = 7 test_size3 = 5 kernel1 = 128 kernel2 = 128 kernel3 = 128 layer1 = InputLayer(shape=(None, 1, 4, l+1024)) layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis = -1) layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis = -1) layer2_3 = SliceLayer(layer2_2, indices = slice(0,4), axis = -2) layer2_f = FlattenLayer(layer2_3) layer3 = Conv2DLayer(layer2_1,num_filters = kernel1, filter_size = (4,test_size1)) layer4 = Conv2DLayer(layer3,num_filters = kernel1, filter_size = (1,test_size1)) layer5 = Conv2DLayer(layer4,num_filters = kernel1, filter_size = (1,test_size1)) layer6 = MaxPool2DLayer(layer5, pool_size = (1,pool_size)) layer7 = Conv2DLayer(layer6,num_filters = kernel2, filter_size = (1,test_size2)) layer8 = Conv2DLayer(layer7,num_filters = kernel2, filter_size = (1,test_size2)) layer9 = Conv2DLayer(layer8,num_filters = kernel2, filter_size = (1,test_size2)) layer10 = MaxPool2DLayer(layer9, pool_size = (1,pool_size)) layer11 = Conv2DLayer(layer10,num_filters = kernel3, filter_size = (1,test_size3)) layer12 = Conv2DLayer(layer11,num_filters = kernel3, filter_size = (1,test_size3)) layer13 = Conv2DLayer(layer12,num_filters = kernel3, filter_size = (1,test_size3)) layer14 = MaxPool2DLayer(layer13, pool_size = (1,pool_size)) layer14_d = DenseLayer(layer14, num_units= 256) layer3_2 = DenseLayer(layer2_f, num_units = 128) layer15 = ConcatLayer([layer14_d,layer3_2]) layer16 = DropoutLayer(layer15,p=0.5) layer17 = DenseLayer(layer16, num_units=256) network = DenseLayer(layer17, num_units= 2, nonlinearity=softmax) return network #random search to initialize the weights
def get_output_for(self, input, **kwargs): activation = T.dot(input, self.C) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return nonlinearities.softmax(activation)
def __init__(self, x, y, args): self.params_theta = [] self.params_lambda = [] self.params_weight = [] if args.dataset == 'mnist': input_size = (None, 1, 28, 28) elif args.dataset == 'cifar10': input_size = (None, 3, 32, 32) else: raise AssertionError layers = [ll.InputLayer(input_size)] self.penalty = theano.shared(np.array(0.)) #conv1 layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5)) self.add_params_to_self(args, layers[-1]) layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2)) #conv1 layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5)) self.add_params_to_self(args, layers[-1]) layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2)) #fc1 layers.append(DenseLayerWithReg(args, layers[-1], num_units=500)) self.add_params_to_self(args, layers[-1]) #softmax layers.append(DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax)) self.add_params_to_self(args, layers[-1]) self.layers = layers self.y = ll.get_output(layers[-1], x, deterministic=False) self.prediction = T.argmax(self.y, axis=1) # self.penalty = penalty if penalty != 0. else T.constant(0.) print(self.params_lambda) # time.sleep(20) # cost function self.loss = T.mean(categorical_crossentropy(self.y, y)) self.lossWithPenalty = T.add(self.loss, self.penalty) print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
def __init__(self, W_g=init.Normal(0.1), W_s=init.Normal(0.1), W_h=init.Normal(0.1), W_v=init.Normal(0.1), nonlinearity=nonlinearities.softmax): self.W_s = W_s self.W_h = W_h self.W_g = W_g self.W_v = W_v if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def __build_12_net__(self): network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__) network = layers.dropout(network, p=0.1) network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu) network = layers.batch_norm(network) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.DropoutLayer(network,p=0.3) network = layers.DenseLayer(network,num_units = 16,nonlinearity = relu) network = layers.batch_norm(network) network = layers.DropoutLayer(network,p=0.3) network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax) return network
def __build_24_net__(self): network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__) network = layers.dropout(network, p=0.1) network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu) network = layers.batch_norm(network) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.DropoutLayer(network,p=0.5) network = layers.batch_norm(network) network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu) network = layers.DropoutLayer(network,p=0.5) network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax) return network
def __build_12_calib_net__(self): network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__) network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.DenseLayer(network,num_units = 128,nonlinearity = relu) network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax) return network
def __build_24_calib_net__(self): network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__) network = layers.Conv2DLayer(network,num_filters=32,filter_size=(5,5),stride=1,nonlinearity=relu) network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2) network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu) network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax) return network
def network_discriminator(self, features): network = {} network['discriminator/conv2'] = Conv2DLayer(features, num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv2') network['discriminator/pool2'] = MaxPool2DLayer(network['discriminator/conv2'], pool_size=2, stride=2, pad=0, name='discriminator/pool2') network['discriminator/conv3'] = Conv2DLayer(network['discriminator/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv3') network['discriminator/pool3'] = MaxPool2DLayer(network['discriminator/conv3'], pool_size=2, stride=2, pad=0, name='discriminator/pool3') network['discriminator/conv4'] = Conv2DLayer(network['discriminator/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv4') network['discriminator/pool4'] = MaxPool2DLayer(network['discriminator/conv4'], pool_size=2, stride=2, pad=0, name='discriminator/pool4') network['discriminator/dense1'] = DenseLayer(network['discriminator/pool4'], num_units=64, nonlinearity=rectify, name='discriminator/dense1') network['discriminator/output'] = DenseLayer(network['discriminator/dense1'], num_units=2, nonlinearity=softmax, name='discriminator/output') return network
def build_model(input_var): net = {} net['input'] = InputLayer((None, 3, 224, 224), input_var=input_var) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) return net
def __init__( self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.tanh, num_seq_inputs=1, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) if prob_network is None: prob_network = MLP( input_shape=( env_spec.observation_space.flat_dim * num_seq_inputs,), output_dim=env_spec.action_space.n, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output( prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def build_classification_network(r1): if not isinstance(r1, lasagne.layers.Layer): l_in = lasagne.layers.InputLayer((None, glimpse_output_size, recurrent_output_size), r1) else: l_in = r1 output = lasagne.layers.DenseLayer(l_in, classification_units, nonlinearity = nl.softmax, W = class_weights, b = class_bias) return output #input is downsampled batch of images #output is initial r2, of length glimpse_output_size
def build_cnn(self, input_var=None): # Building the network layer_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # Conv1 # [NOTE]: normal vs. truncated normal? # [NOTE]: conv in lasagne is not same as it in TensorFlow. layer = ConvLayer(layer_in, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(), flip_filters=False) # Pool1 layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2)) # Norm1 layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75) # Conv2 layer = ConvLayer(layer, num_filters=64, filter_size=(5, 5), stride=(1, 1), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(), flip_filters=False) # Norm2 # [NOTE]: n must be odd, but n in Chang's code is 4? layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75) # Pool2 layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2)) # Reshape layer = lasagne.layers.ReshapeLayer(layer, shape=([0], -1)) # Dense3 layer = DenseLayer(layer, num_units=384, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1)) # Dense4 layer = DenseLayer(layer, num_units=192, W=lasagne.init.Normal(std=0.04), b=lasagne.init.Constant(0.1)) # Softmax layer = DenseLayer(layer, num_units=self.output_size, W=lasagne.init.Normal(std=1. / 192.0), nonlinearity=softmax) return layer
def __init__( self, env_spec, latent_dim=0, # all this is fake latent_name='categorical', bilinear_integration=False, resample=False, # until here hidden_sizes=(32, 32), hidden_nonlinearity=NL.tanh, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ #bullshit self.latent_dim = latent_dim ##could I avoid needing this self for the get_action? self.latent_name = latent_name self.bilinear_integration = bilinear_integration self.resample = resample self._set_std_to_0 = False Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) if prob_network is None: prob_network = MLP( input_shape=(env_spec.observation_space.flat_dim,), output_dim=env_spec.action_space.n, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output( prob_network.output_layer)) self._dist = Categorical(env_spec.action_space.n) super(CategoricalMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(self, input_shape, output_dim, hidden_sizes, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.), output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.), # conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.), hidden_nonlinearity=LN.rectify, output_nonlinearity=LN.softmax, name=None, input_var=None): if name is None: prefix = "" else: prefix = name + "_" if len(input_shape) == 3: l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var) l_hid = L.reshape(l_in, ([0],) + input_shape) elif len(input_shape) == 2: l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var) input_shape = (1,) + input_shape l_hid = L.reshape(l_in, ([0],) + input_shape) else: l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var) l_hid = l_in for idx, conv_filter, filter_size, stride, pad in zip( range(len(conv_filters)), conv_filters, conv_filter_sizes, conv_strides, conv_pads, ): l_hid = L.Conv2DLayer( l_hid, num_filters=conv_filter, filter_size=filter_size, stride=(stride, stride), pad=pad, nonlinearity=hidden_nonlinearity, name="%sconv_hidden_%d" % (prefix, idx), convolution=wrapped_conv, ) for idx, hidden_size in enumerate(hidden_sizes): l_hid = L.DenseLayer( l_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name="%shidden_%d" % (prefix, idx), W=hidden_W_init, b=hidden_b_init, ) l_out = L.DenseLayer( l_hid, num_units=output_dim, nonlinearity=output_nonlinearity, name="%soutput" % (prefix,), W=output_W_init, b=output_b_init, ) self._l_in = l_in self._l_out = l_out self._input_var = l_in.input_var
def __init__( self, name, env_spec, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes=[], hidden_nonlinearity=NL.rectify, output_nonlinearity=NL.softmax, prob_network=None, ): """ :param env_spec: A spec for the mdp. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Discrete) self._env_spec = env_spec if prob_network is None: prob_network = ConvNetwork( input_shape=env_spec.observation_space.shape, output_dim=env_spec.action_space.n, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, name="prob_network", ) self._l_prob = prob_network.output_layer self._l_obs = prob_network.input_layer self._f_prob = ext.compile_function( [prob_network.input_layer.input_var], L.get_output(prob_network.output_layer) ) self._dist = Categorical(env_spec.action_space.n) super(CategoricalConvPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [prob_network.output_layer])
def build_std_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p, reset_input): # Construct Bi-directional LSTM-CNNs-CRF with standard dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p) # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2) resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, reset_input=reset_input, name='forward') resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, reset_input=reset_input, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru") bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def build_std_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p): # Construct Bi-directional LSTM-CNNs-CRF with standard dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p) # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2) resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_forward = SGRULayer(incoming, num_units, mask_input=mask, resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, name='forward') resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru") bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def build_recur_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p, reset_input): # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,)) resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='forward') resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru") # shape = [batch, n-step, num_units] bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p, shared_axes=(1,)) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def build_recur_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p): # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout. # first get some necessary dimensions or parameters conv_window = 3 # shape = [batch, n-step, c_dim, char_length] # construct convolution layer # shape = [batch, n-step, c_filters, output_length] cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, _, pool_size = cnn_layer.output_shape # construct max pool layer # shape = [batch, n-step, c_filters, 1] pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size) # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2])) # finally, concatenate the two incoming layers together. # shape = [batch, n-step, c_filter&w_dim] incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) # dropout for incoming incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,)) resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_forward = SGRULayer(incoming, num_units, mask_input=mask, resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward, updategate=updategate_forward, hidden_update=hidden_update_forward, grad_clipping=grad_clipping, p=p, name='forward') resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward, updategate=updategate_backward, hidden_update=hidden_update_backward, grad_clipping=grad_clipping, p=p, name='backward') # concatenate the outputs of forward and backward LSTMs to combine them. bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru") # shape = [batch, n-step, num_units] bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p, shared_axes=(1,)) # reshape bi-rnn-cnn to [batch * max_length, num_units] bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2])) # construct output layer (dense layer with softmax) layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax, name='softmax') return layer_output
def buildModel(mtype=1): print "BUILDING MODEL TYPE", mtype, "..." #default settings (Model 1) filters = 64 first_stride = 2 last_filter_multiplier = 16 #specific model type settings (see working notes for details) if mtype == 2: first_stride = 1 elif mtype == 3: filters = 32 last_filter_multiplier = 8 #input layer net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0])) #conv layers net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) if mtype == 2: net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) #dense layers net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.DropoutLayer(net, DROPOUT) net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.DropoutLayer(net, DROPOUT) #Classification Layer if MULTI_LABEL: net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1)) else: net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1)) print "...DONE!" #model stats print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS" print "MODEL HAS", l.count_params(net), "PARAMS" return net
def buildModel(mtype=1): print "BUILDING MODEL TYPE", mtype, "..." #default settings (Model 1) filters = 64 first_stride = 2 last_filter_multiplier = 16 #specific model type settings (see working notes for details) if mtype == 2: first_stride = 1 elif mtype == 3: filters = 32 last_filter_multiplier = 8 #input layer net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0])) #conv layers net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) if mtype == 2: net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) #dense layers net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) #Classification Layer if MULTI_LABEL: net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1)) else: net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1)) print "...DONE!" #model stats print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS" print "MODEL HAS", l.count_params(net), "PARAMS" return net
def build_model_resnet50(input_shape): net = {} net['input'] = InputLayer(input_shape) sub_net, parent_layer_name = build_simple_block( net['input'], ['conv1', 'bn_conv1', 'conv1_relu'], 64, 7, 2, 3, use_bias=True) net.update(sub_net) net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False) block_size = list('abc') parent_layer_name = 'pool1' for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='2%s' % c) net.update(sub_net) # block_size = ['a'] + ['b'+str(i+1) for i in range(7)] block_size = list('abcd') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='3%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='3%s' % c) net.update(sub_net) # block_size = ['a'] + ['b'+str(i+1) for i in range(35)] block_size = list('abcdef') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='4%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='4%s' % c) net.update(sub_net) block_size = list('abc') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='5%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='5%s' % c) net.update(sub_net) net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0, mode='average_exc_pad', ignore_border=False) net['fc1000'] = DenseLayer(net['pool5'], num_units=1000, nonlinearity=None, W=lasagne.init.Normal(std=0.01, mean=0.0)) net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax) return net # model hyperparams
def build_model_resnet152(input_shape): net = {} net['input'] = InputLayer(input_shape) sub_net, parent_layer_name = build_simple_block( net['input'], ['conv1', 'bn_conv1', 'conv1_relu'], 64, 7, 2, 3, use_bias=True) net.update(sub_net) net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False) block_size = list('abc') parent_layer_name = 'pool1' for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='2%s' % c) net.update(sub_net) block_size = ['a'] + ['b'+str(i+1) for i in range(7)] # block_size = list('abcd') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='3%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='3%s' % c) net.update(sub_net) block_size = ['a'] + ['b'+str(i+1) for i in range(35)] # block_size = list('abcdef') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='4%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='4%s' % c) net.update(sub_net) block_size = list('abc') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='5%s' % c) else: sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='5%s' % c) net.update(sub_net) net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0, mode='average_exc_pad', ignore_border=False) net['fc1000'] = DenseLayer(net['pool5'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax) print('Total number of layers:', len(lasagne.layers.get_all_layers(net['prob']))) return net # model hyperparams
def main(): ################ # LOAD DATASET # ################ dataset = './data/ubiquitous_aug.hkl' kfd = './data/ubiquitous_kfold.hkl' print('Loading dataset {}...'.format(dataset)) X, y = hkl.load(open(dataset, 'r')) X = X.reshape(-1, 4, 1, 400).astype(floatX) y = y.astype('int32') print('X shape: {}, y shape: {}'.format(X.shape, y.shape)) kf = hkl.load(open(kfd, 'r')) kfold = [(train, test) for train, test in kf] (train, test) = kfold[0] print('train_set size: {}, test_set size: {}'.format(len(train), len(test))) # shuffle +/- labels in minibatch print('shuffling train_set and test_set') shuffle(train) shuffle(test) X_train = X[train] X_test = X[test] y_train = y[train] y_test = y[test] print('data prepared!') layers = [ (InputLayer, {'shape': (None, 4, 1, 400)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}), (MaxPool2DLayer, {'pool_size': (1, 2)}), (Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}), (Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}), (Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}), (MaxPool2DLayer, {'pool_size': (1, 2)}), (DenseLayer, {'num_units': 64}), (DropoutLayer, {}), (DenseLayer, {'num_units': 64}), (DenseLayer, {'num_units': 2, 'nonlinearity': softmax})] net = NeuralNet( layers=layers, max_epochs=100, update=adam, update_learning_rate=1e-4, train_split=TrainSplit(eval_size=0.1), on_epoch_finished=[ AdjustVariable(1e-4, target=0, half_life=20)], verbose=2) net.fit(X_train, y_train) plot_loss(net)
def main(resume=None): l = 300 dataset = './data/ubiquitous_train.hkl' print('Loading dataset {}...'.format(dataset)) X_train, y_train = hkl.load(dataset) X_train = X_train.reshape(-1, 4, 1, l).astype(floatX) y_train = np.array(y_train, dtype='int32') indice = np.arange(X_train.shape[0]) np.random.shuffle(indice) X_train = X_train[indice] y_train = y_train[indice] print('X_train shape: {}, y_train shape: {}'.format(X_train.shape, y_train.shape)) layers = [ (InputLayer, {'shape': (None, 4, 1, l)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}), (MaxPool2DLayer, {'pool_size': (1, 2)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}), (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}), (MaxPool2DLayer, {'pool_size': (1, 2)}), (DenseLayer, {'num_units': 64}), (DropoutLayer, {}), (DenseLayer, {'num_units': 64}), (DenseLayer, {'num_units': 2, 'nonlinearity': softmax})] lr = theano.shared(np.float32(1e-4)) net = NeuralNet( layers=layers, max_epochs=100, update=adam, update_learning_rate=lr, train_split=TrainSplit(eval_size=0.1), on_epoch_finished=[ AdjustVariable(lr, target=1e-8, half_life=20)], verbose=4) if resume != None: net.load_params_from(resume) net.fit(X_train, y_train) net.save_params_to('./models/net_params.pkl')
def get_output_for(self, inputs, **kwargs): # typical GRU, but prediction produced by softmax layer is applied to GRU's input q = inputs[0] m = inputs[1] epmem_dropout = inputs[2] #q = q * self.rand_stream.binomial(q.shape, p=1-epmem_dropout, dtype=theano.config.floatX) m = m * self.rand_stream.binomial(m.shape, p=1-epmem_dropout, dtype=theano.config.floatX) W_in_stacked = T.concatenate([self.W_in_to_resetgate, self.W_in_to_updategate, self.W_in_to_hid_update], axis=1) W_hid_stacked = T.concatenate([self.W_hid_to_resetgate, self.W_hid_to_updategate, self.W_hid_to_hid_update], axis=1) b_stacked = T.concatenate([self.b_resetgate, self.b_updategate, self.b_hid_update], axis=0) def slice_w(x, n): return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size] def get_output(a): return nonlin.softmax(T.dot(a,self.W)) def step(hid_previous, out_previous, *args): input_n = T.concatenate([out_previous, q], axis=1) hid_input = T.dot(hid_previous, W_hid_stacked) input_n = T.dot(input_n, W_in_stacked) + b_stacked resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0) updategate = slice_w(hid_input, 1) + slice_w(input_n, 1) resetgate = self.nonlinearity_resetgate(resetgate) updategate = self.nonlinearity_updategate(updategate) hid_update_in = slice_w(input_n, 2) hid_update_hid = slice_w(hid_input, 2) hid_update = hid_update_in + resetgate*hid_update_hid hid_update = self.nonlinearity_hid(hid_update) hid = (1 - updategate)*hid_previous + updategate+hid_update out = nonlin.softmax(T.dot(hid, self.W)) return (hid, out) non_seqs = [W_in_stacked, b_stacked, W_hid_stacked, q, m, self.W] hid_and_out, b = theano.scan( fn=step, outputs_info=[m, get_output(m)], non_sequences=non_seqs, strict=True, n_steps=self.max_answer_word) return T.transpose(hid_and_out[1], (1,0,2))
def __init__(self, x, y, args): self.params_theta = [] self.params_lambda = [] self.params_weight = [] if args.dataset == 'mnist': input_size = (None, 28*28) elif args.dataset == 'cifar10': input_size = (None, 3, 32*32) else: raise AssertionError layers = [ll.InputLayer(input_size)] penalty = theano.shared(np.array(0.)) for (k, num) in enumerate(args.MLPlayer): # the last layer should use softmax if k == len(args.MLPlayer) - 1: # layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax)) layers.append(DenseLayerWithReg(args, layers[-1], num_units=num, nonlinearity=nonlinearities.softmax)) else: # layers.append(ll.DenseLayer(layers[-1], num)) layers.append(DenseLayerWithReg(args, layers[-1], num_units=num)) if layers[-1].W is not None: self.params_theta += [layers[-1].W, layers[-1].b] self.params_weight += [layers[-1].W] # define new regularization term for a layer if args.regL2 is True: tempL2 = layers[-1].L2 * T.sqr(layers[-1].W) penalty += T.sum(tempL2) self.params_lambda += [layers[-1].L2] if args.regL1 is True: tempL1 = layers[-1].L1 * layers[-1].W penalty += T.sum(tempL1) self.params_lambda += [layers[-1].L1] self.layers = layers self.y = ll.get_output(layers[-1], x, deterministic=False) self.prediction = T.argmax(self.y, axis=1) self.penalty = penalty # self.penalty = penalty if penalty != 0. else T.constant(0.) print(self.params_lambda) # time.sleep(20) # cost function self.loss = T.mean(categorical_crossentropy(self.y, y)) self.lossWithPenalty = T.add(self.loss, self.penalty) print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty) # self.classError = T.mean(T.cast(T.neq(self.prediction, y), 'float32'))
def __init__( self, incomings, num_units, W_g=init.Normal(0.1), W_h=init.Normal(0.1), W_v=init.Normal(0.1), W_s=init.Normal(0.1), W_p=init.Normal(0.1), nonlinearity=nonlinearities.tanh, nonlinearity_atten=nonlinearities.softmax, **kwargs ): super(AttenLayer, self).__init__(incomings, **kwargs) self.batch_size = self.input_shapes[0][0] # None num_inputs = self.input_shapes[2][1] # k feature_dim = self.input_shapes[0][1] # d self.num_units = num_units self.nonlinearity = nonlinearity self.nonlinearity_atten = nonlinearity_atten self.W_h_to_attenGate = self.add_param( W_h, (num_inputs, 1), name='W_h_to_atten' ) self.W_g_to_attenGate = self.add_param( W_g, (feature_dim, num_inputs), name='W_g_to_atten' ) self.W_v_to_attenGate = self.add_param( W_v, (feature_dim, num_inputs), name='W_v_to_atten' ) self.W_s_to_attenGate = self.add_param( W_s, (feature_dim, num_inputs), name='W_s_to_atten' ) self.W_p = self.add_param( W_p, (feature_dim, num_units), name='W_p_to_atten' ) self.num_inputs = num_inputs
def get_output_for(self, inputs, **kwargs): s_hat_t = inputs[0] h_hat_t = inputs[1] # s_hat_t = s_hat_t.dimshuffle(1, 0) # h_hat_t = h_hat_t.dimshuffle(1, 0) H = inputs[2] # H = H.dimshuffle(2, 0, 1) # H_len = H.shape[-1] # z_t 1*none*k zt = T.dot( self.nonlinearity( T.dot(H, self.W_v_to_attenGate) + T.dot( T.dot(h_hat_t, self.W_g_to_attenGate).dimshuffle(0, 1, 'x'), T.ones((1, self.num_inputs)) ) ), self.W_h_to_attenGate )[:, :, 0] vt = T.dot( self.nonlinearity( T.dot( s_hat_t, self.W_s_to_attenGate ) + T.dot( h_hat_t, self.W_g_to_attenGate ) ), self.W_h_to_attenGate ) alpha_hat_t = self.nonlinearity_atten(T.concatenate( [zt, vt], axis=-1 )) feature = T.concatenate( [H, s_hat_t.dimshuffle(0, 'x', 1)], axis=1 ).dimshuffle(2, 0, 1) c_hat_t = T.sum(alpha_hat_t*feature, axis=-1) out = T.dot( (c_hat_t.T+h_hat_t), self.W_p ) return nonlinearities.softmax(out)
def build_model(self, input_var, forward, dropout): net = dict() net['input'] = InputLayer((None, 3, None, None), input_var=input_var) net['conv1/7x7_s2'] = ConvLayer( net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer( net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer( net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer( net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayerDNN(net['conv2/norm2'], pool_size=3, stride=2) net.update(self.build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update(self.build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayerDNN(net['inception_3b/output'], pool_size=3, stride=2) net.update(self.build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update(self.build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update(self.build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update(self.build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update(self.build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayerDNN(net['inception_4e/output'], pool_size=3, stride=2) net.update(self.build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update(self.build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) if forward: #net['fc6'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000) net['prob'] = DenseLayer(net['pool5/7x7_s1'], num_units=4, nonlinearity=softmax) else: net['dropout1'] = DropoutLayer(net['pool5/7x7_s1'], p=dropout) #net['fc6'] = DenseLayer(net['dropout1'], num_units=1000) #net['dropout2'] = DropoutLayer(net['fc6'], p=dropout) net['prob'] = DenseLayer(net['dropout1'], num_units=4, nonlinearity=softmax) return net