Python lasagne.nonlinearities 模块，softmax() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用lasagne.nonlinearities.softmax()。

项目：deep_learning 作者：Vict0rSch | 项目源码 | 文件源码

def build_mlp(input_var=None):
    l_in = InputLayer(shape=(None, 1, 28, 28), input_var=input_var)

    l_hid1 = DenseLayer(
            l_in, num_units=500,
            nonlinearity=rectify,
            W=lasagne.init.GlorotUniform())
    l_hid1_drop = DropoutLayer(l_hid1, p=0.4)

    l_hid2 = DenseLayer(
            l_hid1_drop, num_units=300,
            nonlinearity=rectify)
    l_hid2_drop = DropoutLayer(l_hid2, p=0.4)

    l_out = DenseLayer(
            l_hid2_drop, num_units=10,
            nonlinearity=softmax)

    return l_out


# generator giving the batches

项目：Cascade-CNN-Face-Detection 作者：gogolgrind | 项目源码 | 文件源码

def __build_48_net__(self):
        network = layers.InputLayer((None, 3, 48, 48), input_var=self.__input_var__)

        network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)        
        network = layers.batch_norm(network)

        network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
        network = layers.batch_norm(network)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)

        network = layers.Conv2DLayer(network,num_filters=64,filter_size=(3,3),stride=1,nonlinearity=relu)
        network = layers.batch_norm(network)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)

        network = layers.DenseLayer(network,num_units = 256,nonlinearity = relu)
        network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
        return network

项目：adda_mnist64 作者：davidtellez | 项目源码 | 文件源码

def network_classifier(self, input_var):

        network = {}
        network['classifier/input'] = InputLayer(shape=(None, 3, 64, 64), input_var=input_var, name='classifier/input')
        network['classifier/conv1'] = Conv2DLayer(network['classifier/input'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv1')
        network['classifier/pool1'] = MaxPool2DLayer(network['classifier/conv1'], pool_size=2, stride=2, pad=0, name='classifier/pool1')
        network['classifier/conv2'] = Conv2DLayer(network['classifier/pool1'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv2')
        network['classifier/pool2'] = MaxPool2DLayer(network['classifier/conv2'], pool_size=2, stride=2, pad=0, name='classifier/pool2')
        network['classifier/conv3'] = Conv2DLayer(network['classifier/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv3')
        network['classifier/pool3'] = MaxPool2DLayer(network['classifier/conv3'], pool_size=2, stride=2, pad=0, name='classifier/pool3')
        network['classifier/conv4'] = Conv2DLayer(network['classifier/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv4')
        network['classifier/pool4'] = MaxPool2DLayer(network['classifier/conv4'], pool_size=2, stride=2, pad=0, name='classifier/pool4')
        network['classifier/dense1'] = DenseLayer(network['classifier/pool4'], num_units=64, nonlinearity=rectify, name='classifier/dense1')
        network['classifier/output'] = DenseLayer(network['classifier/dense1'], num_units=10, nonlinearity=softmax, name='classifier/output')

        return network

项目：RL4Data 作者：fyabc | 项目源码 | 文件源码

def build_cnn(self):
        # Building the network
        layer_in = InputLayer(shape=(None, 784), input_var=self.input_var)

        # Hidden layer
        layer = DenseLayer(
            layer_in,
            num_units=self.hidden_size,
            W=lasagne.init.Uniform(
                range=(-np.sqrt(6. / (784 + self.hidden_size)),
                       np.sqrt(6. / (784 + self.hidden_size)))),
            nonlinearity=tanh,
        )

        # LR layer
        layer = DenseLayer(
            layer,
            num_units=self.output_size,
            W=lasagne.init.Constant(0.),
            nonlinearity=softmax,
        )

        return layer

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：BirdCLEF2017 作者：kahst | 项目源码 | 文件源码

def calc_loss(prediction, targets):

    #categorical crossentropy is the best choice for a multi-class softmax output
    loss = T.mean(objectives.categorical_crossentropy(prediction, targets))

    return loss

项目：Deopen 作者：kimmo1019 | 项目源码 | 文件源码

def create_network():
    l = 1000
    pool_size = 5
    test_size1 = 13
    test_size2 = 7
    test_size3 = 5
    kernel1 = 128
    kernel2 = 128
    kernel3 = 128
    layer1 = InputLayer(shape=(None, 1, 4, l+1024))
    layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis = -1)
    layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis = -1)
    layer2_3 = SliceLayer(layer2_2, indices = slice(0,4), axis = -2)
    layer2_f = FlattenLayer(layer2_3)
    layer3 = Conv2DLayer(layer2_1,num_filters = kernel1, filter_size = (4,test_size1))
    layer4 = Conv2DLayer(layer3,num_filters = kernel1, filter_size = (1,test_size1))
    layer5 = Conv2DLayer(layer4,num_filters = kernel1, filter_size = (1,test_size1))
    layer6 = MaxPool2DLayer(layer5, pool_size = (1,pool_size))
    layer7 = Conv2DLayer(layer6,num_filters = kernel2, filter_size = (1,test_size2))
    layer8 = Conv2DLayer(layer7,num_filters = kernel2, filter_size = (1,test_size2))
    layer9 = Conv2DLayer(layer8,num_filters = kernel2, filter_size = (1,test_size2))
    layer10 = MaxPool2DLayer(layer9, pool_size = (1,pool_size))
    layer11 = Conv2DLayer(layer10,num_filters = kernel3, filter_size = (1,test_size3))
    layer12 = Conv2DLayer(layer11,num_filters = kernel3, filter_size = (1,test_size3))
    layer13 = Conv2DLayer(layer12,num_filters = kernel3, filter_size = (1,test_size3))
    layer14 = MaxPool2DLayer(layer13, pool_size = (1,pool_size))
    layer14_d = DenseLayer(layer14, num_units= 256)
    layer3_2 = DenseLayer(layer2_f, num_units = 128)
    layer15 = ConcatLayer([layer14_d,layer3_2])
    layer16 = DropoutLayer(layer15,p=0.5)
    layer17 = DenseLayer(layer16, num_units=256)
    network = DenseLayer(layer17, num_units= 2, nonlinearity=softmax)
    return network


#random search to initialize the weights

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：MEM_DGM 作者：thu-ml | 项目源码 | 文件源码

def get_output_for(self, input, **kwargs):
        activation = T.dot(input, self.C)
        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)
        return nonlinearities.softmax(activation)

项目：drmad 作者：bigaidream-projects | 项目源码 | 文件源码

def __init__(self, x, y, args):
        self.params_theta = []
        self.params_lambda = []
        self.params_weight = []
        if args.dataset == 'mnist':
            input_size = (None, 1, 28, 28)
        elif args.dataset == 'cifar10':
            input_size = (None, 3, 32, 32)
        else:
            raise AssertionError
        layers = [ll.InputLayer(input_size)]
        self.penalty = theano.shared(np.array(0.))

        #conv1
        layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
        #conv1
        layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
        #fc1
        layers.append(DenseLayerWithReg(args, layers[-1], num_units=500))
        self.add_params_to_self(args, layers[-1])
        #softmax
        layers.append(DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax))
        self.add_params_to_self(args, layers[-1])

        self.layers = layers
        self.y = ll.get_output(layers[-1], x, deterministic=False)
        self.prediction = T.argmax(self.y, axis=1)
        # self.penalty = penalty if penalty != 0. else T.constant(0.)
        print(self.params_lambda)
        # time.sleep(20)
        # cost function
        self.loss = T.mean(categorical_crossentropy(self.y, y))
        self.lossWithPenalty = T.add(self.loss, self.penalty)
        print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)

项目：CIKM2017 作者：MovieFIB | 项目源码 | 文件源码

def __init__(self, W_g=init.Normal(0.1), W_s=init.Normal(0.1),
                 W_h=init.Normal(0.1), W_v=init.Normal(0.1),
                 nonlinearity=nonlinearities.softmax):
        self.W_s = W_s
        self.W_h = W_h
        self.W_g = W_g
        self.W_v = W_v
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

项目：Cascade-CNN-Face-Detection 作者：gogolgrind | 项目源码 | 文件源码

def __build_12_net__(self):

        network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__)
        network = layers.dropout(network, p=0.1)
        network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu)
        network = layers.batch_norm(network)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
        network = layers.DropoutLayer(network,p=0.3)        
        network = layers.DenseLayer(network,num_units = 16,nonlinearity = relu)
        network = layers.batch_norm(network)
        network = layers.DropoutLayer(network,p=0.3)
        network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
        return network

项目：Cascade-CNN-Face-Detection 作者：gogolgrind | 项目源码 | 文件源码

def __build_24_net__(self):

        network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__)
        network = layers.dropout(network, p=0.1)
        network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
        network = layers.batch_norm(network)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
        network = layers.DropoutLayer(network,p=0.5)
        network = layers.batch_norm(network)
        network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu)
        network = layers.DropoutLayer(network,p=0.5)
        network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
        return network

项目：Cascade-CNN-Face-Detection 作者：gogolgrind | 项目源码 | 文件源码

def __build_12_calib_net__(self):
        network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__)
        network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
        network = layers.DenseLayer(network,num_units = 128,nonlinearity = relu)
        network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax)
        return network

项目：Cascade-CNN-Face-Detection 作者：gogolgrind | 项目源码 | 文件源码

def __build_24_calib_net__(self):
        network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__)
        network = layers.Conv2DLayer(network,num_filters=32,filter_size=(5,5),stride=1,nonlinearity=relu)
        network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
        network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu)
        network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax)
        return network

项目：adda_mnist64 作者：davidtellez | 项目源码 | 文件源码

def network_discriminator(self, features):

        network = {}
        network['discriminator/conv2'] = Conv2DLayer(features, num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv2')
        network['discriminator/pool2'] = MaxPool2DLayer(network['discriminator/conv2'], pool_size=2, stride=2, pad=0, name='discriminator/pool2')
        network['discriminator/conv3'] = Conv2DLayer(network['discriminator/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv3')
        network['discriminator/pool3'] = MaxPool2DLayer(network['discriminator/conv3'], pool_size=2, stride=2, pad=0, name='discriminator/pool3')
        network['discriminator/conv4'] = Conv2DLayer(network['discriminator/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv4')
        network['discriminator/pool4'] = MaxPool2DLayer(network['discriminator/conv4'], pool_size=2, stride=2, pad=0, name='discriminator/pool4')
        network['discriminator/dense1'] = DenseLayer(network['discriminator/pool4'], num_units=64, nonlinearity=rectify, name='discriminator/dense1')
        network['discriminator/output'] = DenseLayer(network['discriminator/dense1'], num_units=2, nonlinearity=softmax, name='discriminator/output')

        return network

项目：vgg-benchmarks 作者：aizvorski | 项目源码 | 文件源码

def build_model(input_var):
    net = {}
    net['input'] = InputLayer((None, 3, 224, 224), input_var=input_var)
    net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False)
    net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False)
    net['pool1'] = PoolLayer(net['conv1_2'], 2)
    net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False)
    net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False)
    net['pool2'] = PoolLayer(net['conv2_2'], 2)
    net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False)
    net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False)
    net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False)
    net['pool3'] = PoolLayer(net['conv3_3'], 2)
    net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False)
    net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False)
    net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False)
    net['pool4'] = PoolLayer(net['conv4_3'], 2)
    net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False)
    net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False)
    net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False)
    net['pool5'] = PoolLayer(net['conv5_3'], 2)
    net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
    net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
    net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
    net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
    net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None)
    net['prob'] = NonlinearityLayer(net['fc8'], softmax)

    return net

项目：gail-driver 作者：sisl | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(
                    env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：AttentionNet 作者：sayvazov | 项目源码 | 文件源码

def build_classification_network(r1):
    if not isinstance(r1, lasagne.layers.Layer):
        l_in = lasagne.layers.InputLayer((None, glimpse_output_size, recurrent_output_size), r1)
    else:
        l_in = r1
    output = lasagne.layers.DenseLayer(l_in, classification_units, 
                                       nonlinearity = nl.softmax, 
                                       W = class_weights, b = class_bias)
    return output

#input is downsampled batch of images
#output is initial r2, of length glimpse_output_size

项目：RL4Data 作者：fyabc | 项目源码 | 文件源码

def build_cnn(self, input_var=None):
        # Building the network
        layer_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var)

        # Conv1
        # [NOTE]: normal vs. truncated normal?
        # [NOTE]: conv in lasagne is not same as it in TensorFlow.
        layer = ConvLayer(layer_in, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify,
                          pad='same', W=lasagne.init.HeNormal(), flip_filters=False)
        # Pool1
        layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2))
        # Norm1
        layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75)

        # Conv2
        layer = ConvLayer(layer, num_filters=64, filter_size=(5, 5), stride=(1, 1), nonlinearity=rectify,
                          pad='same', W=lasagne.init.HeNormal(), flip_filters=False)
        # Norm2
        # [NOTE]: n must be odd, but n in Chang's code is 4?
        layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75)
        # Pool2
        layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2))

        # Reshape
        layer = lasagne.layers.ReshapeLayer(layer, shape=([0], -1))

        # Dense3
        layer = DenseLayer(layer, num_units=384, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1))

        # Dense4
        layer = DenseLayer(layer, num_units=192, W=lasagne.init.Normal(std=0.04), b=lasagne.init.Constant(0.1))

        # Softmax
        layer = DenseLayer(layer, num_units=self.output_size,
                           W=lasagne.init.Normal(std=1. / 192.0), nonlinearity=softmax)

        return layer

项目：rllab 作者：rll | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：maml_rl 作者：cbfinn | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：AcousticEventDetection 作者：kahst | 项目源码 | 文件源码

def calc_loss(prediction, targets):

    #categorical crossentropy is the best choice for a multi-class softmax output
    loss = T.mean(objectives.categorical_crossentropy(prediction, targets))

    return loss

项目：MEM_DGM 作者：zhenxuan00 | 项目源码 | 文件源码

def get_output_for(self, input, **kwargs):
        activation = T.dot(input, self.C)
        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)
        return nonlinearities.softmax(activation)

项目：snn4hrl 作者：florensacc | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            latent_dim=0,    # all this is fake
            latent_name='categorical',
            bilinear_integration=False,
            resample=False,  # until here
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        #bullshit
        self.latent_dim = latent_dim  ##could I avoid needing this self for the get_action?
        self.latent_name = latent_name
        self.bilinear_integration = bilinear_integration
        self.resample = resample
        self._set_std_to_0 = False

        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def __init__(self, input_shape, output_dim, hidden_sizes,
                 conv_filters, conv_filter_sizes, conv_strides, conv_pads,
                 hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
                 # conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=LN.softmax,
                 name=None, input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            input_shape = (1,) + input_shape
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
            l_hid = l_in
        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="%sconv_hidden_%d" % (prefix, idx),
                convolution=wrapped_conv,
            )
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix,),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_std_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
                          reset_input):
    # Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)

    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)

    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
                           updategate=updategate_forward, hidden_update=hidden_update_forward,
                           grad_clipping=grad_clipping, reset_input=reset_input, name='forward')

    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
                            updategate=updategate_backward, hidden_update=hidden_update_backward,
                            grad_clipping=grad_clipping, reset_input=reset_input, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")

    bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p)

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_std_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
    # Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)

    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)

    resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
                             resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
                             updategate=updategate_forward, hidden_update=hidden_update_forward,
                             grad_clipping=grad_clipping, name='forward')

    resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
                              resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
                              updategate=updategate_backward, hidden_update=hidden_update_backward,
                              grad_clipping=grad_clipping, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")

    bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p)

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_recur_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
                            reset_input):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))

    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
                           updategate=updategate_forward, hidden_update=hidden_update_forward,
                           grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='forward')

    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
                            updategate=updategate_backward, hidden_update=hidden_update_backward,
                            grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
    # shape = [batch, n-step, num_units]
    bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p, shared_axes=(1,))

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_recur_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))

    resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
                             resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
                             updategate=updategate_forward, hidden_update=hidden_update_forward,
                             grad_clipping=grad_clipping, p=p, name='forward')

    resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
                              resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
                              updategate=updategate_backward, hidden_update=hidden_update_backward,
                              grad_clipping=grad_clipping, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")
    # shape = [batch, n-step, num_units]
    bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p, shared_axes=(1,))

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：BirdCLEF2017 作者：kahst | 项目源码 | 文件源码

def buildModel(mtype=1):

    print "BUILDING MODEL TYPE", mtype, "..."

    #default settings (Model 1)
    filters = 64
    first_stride = 2
    last_filter_multiplier = 16

    #specific model type settings (see working notes for details)
    if mtype == 2:
        first_stride = 1
    elif mtype == 3:
        filters = 32
        last_filter_multiplier = 8

    #input layer
    net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))

    #conv layers
    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    if mtype == 2:
        net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
        net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) 

    #dense layers
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.DropoutLayer(net, DROPOUT)  
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.DropoutLayer(net, DROPOUT)  

    #Classification Layer
    if MULTI_LABEL:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
    else:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))

    print "...DONE!"

    #model stats
    print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
    print "MODEL HAS", l.count_params(net), "PARAMS"

    return net

项目：BirdCLEF2017 作者：kahst | 项目源码 | 文件源码

def buildModel(mtype=1):

    print "BUILDING MODEL TYPE", mtype, "..."

    #default settings (Model 1)
    filters = 64
    first_stride = 2
    last_filter_multiplier = 16

    #specific model type settings (see working notes for details)
    if mtype == 2:
        first_stride = 1
    elif mtype == 3:
        filters = 32
        last_filter_multiplier = 8

    #input layer
    net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))

    #conv layers
    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    if mtype == 2:
        net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
        net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) 

    #dense layers
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))

    #Classification Layer
    if MULTI_LABEL:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
    else:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))

    print "...DONE!"

    #model stats
    print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
    print "MODEL HAS", l.count_params(net), "PARAMS"

    return net

项目：BirdCLEF2017 作者：kahst | 项目源码 | 文件源码

def buildModel(mtype=1):

    print "BUILDING MODEL TYPE", mtype, "..."

    #default settings (Model 1)
    filters = 64
    first_stride = 2
    last_filter_multiplier = 16

    #specific model type settings (see working notes for details)
    if mtype == 2:
        first_stride = 1
    elif mtype == 3:
        filters = 32
        last_filter_multiplier = 8

    #input layer
    net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))

    #conv layers
    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    if mtype == 2:
        net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
        net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) 

    #dense layers
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))

    #Classification Layer
    if MULTI_LABEL:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
    else:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))

    print "...DONE!"

    #model stats
    print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
    print "MODEL HAS", l.count_params(net), "PARAMS"

    return net

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def __init__(self, input_shape, output_dim, hidden_sizes,
                 conv_filters, conv_filter_sizes, conv_strides, conv_pads,
                 hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
                 # conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=LN.softmax,
                 name=None, input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            input_shape = (1,) + input_shape
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
            l_hid = l_in
        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="%sconv_hidden_%d" % (prefix, idx),
                convolution=wrapped_conv,
            )
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix,),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：Theano-MPI 作者：uoguelph-mlrg | 项目源码 | 文件源码

def build_model_resnet50(input_shape): 
    net = {}
    net['input'] = InputLayer(input_shape)
    sub_net, parent_layer_name = build_simple_block(
        net['input'], ['conv1', 'bn_conv1', 'conv1_relu'],
        64, 7, 2, 3, use_bias=True)
    net.update(sub_net)
    net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False)
    block_size = list('abc')
    parent_layer_name = 'pool1'
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='2%s' % c)
        net.update(sub_net)

    # block_size = ['a'] + ['b'+str(i+1) for i in range(7)]
    block_size = list('abcd')
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(
                net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='3%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='3%s' % c)
        net.update(sub_net)

    # block_size = ['a'] + ['b'+str(i+1) for i in range(35)]
    block_size = list('abcdef')
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(
                net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='4%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='4%s' % c)
        net.update(sub_net)

    block_size = list('abc')
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(
                net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='5%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='5%s' % c)
        net.update(sub_net)
    net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0,
                             mode='average_exc_pad', ignore_border=False)
    net['fc1000'] = DenseLayer(net['pool5'], num_units=1000, nonlinearity=None, W=lasagne.init.Normal(std=0.01, mean=0.0))
    net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax)

    return net

# model hyperparams

项目：Theano-MPI 作者：uoguelph-mlrg | 项目源码 | 文件源码

def build_model_resnet152(input_shape): 
    net = {}
    net['input'] = InputLayer(input_shape)
    sub_net, parent_layer_name = build_simple_block(
        net['input'], ['conv1', 'bn_conv1', 'conv1_relu'],
        64, 7, 2, 3, use_bias=True)
    net.update(sub_net)
    net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False)
    block_size = list('abc')
    parent_layer_name = 'pool1'
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='2%s' % c)
        net.update(sub_net)

    block_size = ['a'] + ['b'+str(i+1) for i in range(7)]
    # block_size = list('abcd')
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(
                net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='3%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='3%s' % c)
        net.update(sub_net)

    block_size = ['a'] + ['b'+str(i+1) for i in range(35)]
    # block_size = list('abcdef')
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(
                net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='4%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='4%s' % c)
        net.update(sub_net)

    block_size = list('abc')
    for c in block_size:
        if c == 'a':
            sub_net, parent_layer_name = build_residual_block(
                net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='5%s' % c)
        else:
            sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='5%s' % c)
        net.update(sub_net)
    net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0,
                             mode='average_exc_pad', ignore_border=False)
    net['fc1000'] = DenseLayer(net['pool5'], num_units=1000, nonlinearity=None)
    net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax)

    print('Total number of layers:', len(lasagne.layers.get_all_layers(net['prob'])))

    return net

# model hyperparams

项目：DeepEnhancer 作者：minxueric | 项目源码 | 文件源码

def main():
    ################
    # LOAD DATASET #
    ################
    dataset = './data/ubiquitous_aug.hkl'
    kfd = './data/ubiquitous_kfold.hkl'
    print('Loading dataset {}...'.format(dataset))
    X, y = hkl.load(open(dataset, 'r'))
    X = X.reshape(-1, 4, 1, 400).astype(floatX)
    y = y.astype('int32')
    print('X shape: {}, y shape: {}'.format(X.shape, y.shape))
    kf = hkl.load(open(kfd, 'r'))
    kfold = [(train, test) for train, test in kf]
    (train, test) = kfold[0]
    print('train_set size: {}, test_set size: {}'.format(len(train), len(test)))
    # shuffle +/- labels in minibatch
    print('shuffling train_set and test_set')
    shuffle(train)
    shuffle(test)
    X_train = X[train]
    X_test = X[test]
    y_train = y[train]
    y_test = y[test]
    print('data prepared!')

    layers = [
            (InputLayer, {'shape': (None, 4, 1, 400)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
            (MaxPool2DLayer, {'pool_size': (1, 2)}),
            (Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
            (Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
            (Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
            (MaxPool2DLayer, {'pool_size': (1, 2)}),
            (DenseLayer, {'num_units': 64}),
            (DropoutLayer, {}),
            (DenseLayer, {'num_units': 64}),
            (DenseLayer, {'num_units': 2, 'nonlinearity': softmax})]

    net = NeuralNet(
            layers=layers,
            max_epochs=100,
            update=adam,
            update_learning_rate=1e-4,
            train_split=TrainSplit(eval_size=0.1),
            on_epoch_finished=[
                AdjustVariable(1e-4, target=0, half_life=20)],
            verbose=2)

    net.fit(X_train, y_train)
    plot_loss(net)

项目：DeepEnhancer 作者：minxueric | 项目源码 | 文件源码

def main(resume=None):
    l = 300
    dataset = './data/ubiquitous_train.hkl'
    print('Loading dataset {}...'.format(dataset))
    X_train, y_train = hkl.load(dataset)
    X_train = X_train.reshape(-1, 4, 1, l).astype(floatX)
    y_train = np.array(y_train, dtype='int32')
    indice = np.arange(X_train.shape[0])
    np.random.shuffle(indice)
    X_train = X_train[indice]
    y_train = y_train[indice]
    print('X_train shape: {}, y_train shape: {}'.format(X_train.shape, y_train.shape))

    layers = [
            (InputLayer, {'shape': (None, 4, 1, l)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
            (MaxPool2DLayer, {'pool_size': (1, 2)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
            (Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
            (MaxPool2DLayer, {'pool_size': (1, 2)}),
            (DenseLayer, {'num_units': 64}),
            (DropoutLayer, {}),
            (DenseLayer, {'num_units': 64}),
            (DenseLayer, {'num_units': 2, 'nonlinearity': softmax})]

    lr = theano.shared(np.float32(1e-4))

    net = NeuralNet(
            layers=layers,
            max_epochs=100,
            update=adam,
            update_learning_rate=lr,
            train_split=TrainSplit(eval_size=0.1),
            on_epoch_finished=[
                AdjustVariable(lr, target=1e-8, half_life=20)],
            verbose=4)

    if resume != None:
        net.load_params_from(resume)

    net.fit(X_train, y_train)

    net.save_params_to('./models/net_params.pkl')

项目：DynamicMemoryNetworks 作者：swstarlab | 项目源码 | 文件源码

def get_output_for(self, inputs, **kwargs):
        # typical GRU, but prediction produced by softmax layer is applied to GRU's input

        q = inputs[0]
        m = inputs[1]
        epmem_dropout = inputs[2]

        #q = q * self.rand_stream.binomial(q.shape, p=1-epmem_dropout, dtype=theano.config.floatX)
        m = m * self.rand_stream.binomial(m.shape, p=1-epmem_dropout, dtype=theano.config.floatX)

        W_in_stacked  = T.concatenate([self.W_in_to_resetgate, 
                                       self.W_in_to_updategate,
                                       self.W_in_to_hid_update], axis=1)
        W_hid_stacked = T.concatenate([self.W_hid_to_resetgate,
                                       self.W_hid_to_updategate,
                                       self.W_hid_to_hid_update], axis=1)
        b_stacked     = T.concatenate([self.b_resetgate,       
                                       self.b_updategate,       
                                       self.b_hid_update], axis=0)
        def slice_w(x, n):
            return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size]

        def get_output(a):
            return nonlin.softmax(T.dot(a,self.W))
        def step(hid_previous, out_previous, *args):
            input_n = T.concatenate([out_previous, q], axis=1)

            hid_input = T.dot(hid_previous, W_hid_stacked)
            input_n = T.dot(input_n, W_in_stacked) + b_stacked

            resetgate  = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate  = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            hid_update_in  = slice_w(input_n, 2)
            hid_update_hid = slice_w(hid_input, 2)
            hid_update     = hid_update_in + resetgate*hid_update_hid

            hid_update = self.nonlinearity_hid(hid_update)

            hid = (1 - updategate)*hid_previous + updategate+hid_update
            out = nonlin.softmax(T.dot(hid, self.W))

            return (hid, out)

        non_seqs = [W_in_stacked, b_stacked, W_hid_stacked, q, m, self.W]
        hid_and_out, b = theano.scan(
            fn=step,
            outputs_info=[m, get_output(m)],
            non_sequences=non_seqs,
            strict=True,
            n_steps=self.max_answer_word)

        return T.transpose(hid_and_out[1], (1,0,2))

项目：drmad 作者：bigaidream-projects | 项目源码 | 文件源码

def __init__(self, x, y, args):
        self.params_theta = []
        self.params_lambda = []
        self.params_weight = []
        if args.dataset == 'mnist':
            input_size = (None, 28*28)
        elif args.dataset == 'cifar10':
            input_size = (None, 3, 32*32)
        else:
            raise AssertionError
        layers = [ll.InputLayer(input_size)]
        penalty = theano.shared(np.array(0.))
        for (k, num) in enumerate(args.MLPlayer):
            # the last layer should use softmax
            if k == len(args.MLPlayer) - 1:
                # layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax))
                layers.append(DenseLayerWithReg(args, layers[-1], num_units=num,
                                                nonlinearity=nonlinearities.softmax))
            else:
                # layers.append(ll.DenseLayer(layers[-1], num))
                layers.append(DenseLayerWithReg(args, layers[-1], num_units=num))
            if layers[-1].W is not None:
                self.params_theta += [layers[-1].W, layers[-1].b]
                self.params_weight += [layers[-1].W]

                # define new regularization term for a layer
                if args.regL2 is True:
                    tempL2 = layers[-1].L2 * T.sqr(layers[-1].W)
                    penalty += T.sum(tempL2)
                    self.params_lambda += [layers[-1].L2]
                if args.regL1 is True:
                    tempL1 = layers[-1].L1 * layers[-1].W
                    penalty += T.sum(tempL1)
                    self.params_lambda += [layers[-1].L1]

        self.layers = layers
        self.y = ll.get_output(layers[-1], x, deterministic=False)
        self.prediction = T.argmax(self.y, axis=1)
        self.penalty = penalty
        # self.penalty = penalty if penalty != 0. else T.constant(0.)
        print(self.params_lambda)
        # time.sleep(20)
        # cost function
        self.loss = T.mean(categorical_crossentropy(self.y, y))
        self.lossWithPenalty = T.add(self.loss, self.penalty)
        print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
        # self.classError = T.mean(T.cast(T.neq(self.prediction, y), 'float32'))

项目：CIKM2017 作者：MovieFIB | 项目源码 | 文件源码

def __init__(
        self, incomings, num_units,
        W_g=init.Normal(0.1),
        W_h=init.Normal(0.1),
        W_v=init.Normal(0.1),
        W_s=init.Normal(0.1),
        W_p=init.Normal(0.1),
        nonlinearity=nonlinearities.tanh,
        nonlinearity_atten=nonlinearities.softmax,
        **kwargs
    ):
        super(AttenLayer, self).__init__(incomings, **kwargs)
        self.batch_size = self.input_shapes[0][0]  # None
        num_inputs = self.input_shapes[2][1]  # k
        feature_dim = self.input_shapes[0][1]  # d
        self.num_units = num_units
        self.nonlinearity = nonlinearity
        self.nonlinearity_atten = nonlinearity_atten
        self.W_h_to_attenGate = self.add_param(
            W_h, (num_inputs, 1),
            name='W_h_to_atten'
        )
        self.W_g_to_attenGate = self.add_param(
            W_g,
            (feature_dim, num_inputs),
            name='W_g_to_atten'
        )
        self.W_v_to_attenGate = self.add_param(
            W_v,
            (feature_dim, num_inputs),
            name='W_v_to_atten'
        )
        self.W_s_to_attenGate = self.add_param(
            W_s,
            (feature_dim, num_inputs),
            name='W_s_to_atten'
        )
        self.W_p = self.add_param(
            W_p,
            (feature_dim, num_units),
            name='W_p_to_atten'
        )
        self.num_inputs = num_inputs

项目：CIKM2017 作者：MovieFIB | 项目源码 | 文件源码

def get_output_for(self, inputs, **kwargs):
        s_hat_t = inputs[0]
        h_hat_t = inputs[1]
        # s_hat_t = s_hat_t.dimshuffle(1, 0)
        # h_hat_t = h_hat_t.dimshuffle(1, 0)
        H = inputs[2]
        # H = H.dimshuffle(2, 0, 1)
        # H_len = H.shape[-1]
        # z_t 1*none*k
        zt = T.dot(
            self.nonlinearity(
                T.dot(H, self.W_v_to_attenGate) +
                T.dot(
                    T.dot(h_hat_t, self.W_g_to_attenGate).dimshuffle(0, 1, 'x'),
                    T.ones((1, self.num_inputs))
                )
            ),
            self.W_h_to_attenGate
        )[:, :, 0]
        vt = T.dot(
            self.nonlinearity(
                T.dot(
                    s_hat_t, self.W_s_to_attenGate
                ) +
                T.dot(
                    h_hat_t, self.W_g_to_attenGate
                )
            ),
            self.W_h_to_attenGate
        )

        alpha_hat_t = self.nonlinearity_atten(T.concatenate(
            [zt, vt],
            axis=-1
        ))
        feature = T.concatenate(
            [H, s_hat_t.dimshuffle(0, 'x', 1)],
            axis=1
        ).dimshuffle(2, 0, 1)
        c_hat_t = T.sum(alpha_hat_t*feature, axis=-1)
        out = T.dot(
            (c_hat_t.T+h_hat_t), self.W_p
        )

        return nonlinearities.softmax(out)

项目：gail-driver 作者：sisl | 项目源码 | 文件源码

def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：rllab 作者：rll | 项目源码 | 文件源码

def __init__(self, input_shape, output_dim, hidden_sizes,
                 conv_filters, conv_filter_sizes, conv_strides, conv_pads,
                 hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
                 # conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=LN.softmax,
                 name=None, input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            input_shape = (1,) + input_shape
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
            l_hid = l_in
        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="%sconv_hidden_%d" % (prefix, idx),
                convolution=wrapped_conv,
            )
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix,),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var

项目：rllab 作者：rll | 项目源码 | 文件源码

def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：maml_rl 作者：cbfinn | 项目源码 | 文件源码

def __init__(self, input_shape, output_dim, hidden_sizes,
                 conv_filters, conv_filter_sizes, conv_strides, conv_pads,
                 hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
                 # conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=LN.softmax,
                 name=None, input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
            input_shape = (1,) + input_shape
            l_hid = L.reshape(l_in, ([0],) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
            l_hid = l_in
        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="%sconv_hidden_%d" % (prefix, idx),
                convolution=wrapped_conv,
            )
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix,),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var

项目：maml_rl 作者：cbfinn | 项目源码 | 文件源码

def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：kaggle-dsg-qualification 作者：Ignotus | 项目源码 | 文件源码

def build_model(self, input_var, forward, dropout):
        net = dict()
        net['input'] = InputLayer((None, 3, None, None), input_var=input_var)
        net['conv1/7x7_s2'] = ConvLayer(
            net['input'], 64, 7, stride=2, pad=3, flip_filters=False)
        net['pool1/3x3_s2'] = PoolLayer(
            net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False)
        net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1)
        net['conv2/3x3_reduce'] = ConvLayer(
            net['pool1/norm1'], 64, 1, flip_filters=False)
        net['conv2/3x3'] = ConvLayer(
            net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False)
        net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1)
        net['pool2/3x3_s2'] = PoolLayerDNN(net['conv2/norm2'], pool_size=3, stride=2)

        net.update(self.build_inception_module('inception_3a',
                                               net['pool2/3x3_s2'],
                                               [32, 64, 96, 128, 16, 32]))
        net.update(self.build_inception_module('inception_3b',
                                               net['inception_3a/output'],
                                               [64, 128, 128, 192, 32, 96]))
        net['pool3/3x3_s2'] = PoolLayerDNN(net['inception_3b/output'],
                                           pool_size=3, stride=2)

        net.update(self.build_inception_module('inception_4a',
                                               net['pool3/3x3_s2'],
                                               [64, 192, 96, 208, 16, 48]))
        net.update(self.build_inception_module('inception_4b',
                                               net['inception_4a/output'],
                                               [64, 160, 112, 224, 24, 64]))
        net.update(self.build_inception_module('inception_4c',
                                               net['inception_4b/output'],
                                               [64, 128, 128, 256, 24, 64]))
        net.update(self.build_inception_module('inception_4d',
                                               net['inception_4c/output'],
                                               [64, 112, 144, 288, 32, 64]))
        net.update(self.build_inception_module('inception_4e',
                                               net['inception_4d/output'],
                                               [128, 256, 160, 320, 32, 128]))
        net['pool4/3x3_s2'] = PoolLayerDNN(net['inception_4e/output'],
                                           pool_size=3, stride=2)

        net.update(self.build_inception_module('inception_5a',
                                               net['pool4/3x3_s2'],
                                               [128, 256, 160, 320, 32, 128]))
        net.update(self.build_inception_module('inception_5b',
                                               net['inception_5a/output'],
                                               [128, 384, 192, 384, 48, 128]))

        net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output'])

        if forward:
            #net['fc6'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000)
            net['prob'] = DenseLayer(net['pool5/7x7_s1'], num_units=4, nonlinearity=softmax)
        else:
            net['dropout1'] = DropoutLayer(net['pool5/7x7_s1'], p=dropout)
            #net['fc6'] = DenseLayer(net['dropout1'], num_units=1000)
            #net['dropout2'] = DropoutLayer(net['fc6'], p=dropout)
            net['prob'] = DenseLayer(net['dropout1'], num_units=4, nonlinearity=softmax)
        return net