我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用lasagne.init.Normal()。
def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs): """ initialization :param incoming: bidirectional mLSTM for passane :param num_units: :param max_steps: max num steps to generate answer words, can be tensor scalar variable :param peepholes: :param mask_input: passage's length mask :param kwargs: """ super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes, precompute_input=False, mask_input=mask_input, only_return_final=False, **kwargs) self.max_steps = max_steps # initializes attention weights input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer') # doesn't need transpose self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer') self.b_a_pointer = self.add_param(init.Constant(0.), (1, num_units), 'b_a_pointer') self.c_pointer = self.add_param(init.Constant(0.), (1, 1), 'c_pointer')
def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs): """ initialization :param incoming: bidirectional mLSTM for passane :param num_units: :param max_steps: max num steps to generate answer words, can be tensor scalar variable :param peepholes: :param mask_input: passage's length mask :param kwargs: """ super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes, precompute_input=False, mask_input=mask_input, only_return_final=False, **kwargs) self.max_steps = max_steps # initializes attention weights input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer') # doesn't need transpose self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer') self.b_a_pointer = self.add_param(init.Constant(0.), (num_units, ), 'b_a_pointer') c_pointer = theano.shared(np.array([0.], dtype='float32'), name='c_pointer', broadcastable=(True, )) self.c_pointer = self.add_param(c_pointer, (1,), 'c_pointer')
def __init__(self, incoming, n_slots, d_slots, C=init.GlorotUniform(), M=init.Normal(), b=init.Constant(0.), nonlinearity_final=nonlinearities.identity, **kwargs): super(MemoryLayer, self).__init__(incoming, **kwargs) self.nonlinearity_final = nonlinearity_final self.n_slots = n_slots self.d_slots = d_slots num_inputs = int(np.prod(self.input_shape[1:])) self.C = self.add_param(C, (num_inputs, n_slots), name="C") # controller self.M = self.add_param(M, (n_slots, d_slots), name="M") # memory slots if b is None: self.b = None else: self.b = self.add_param(b, (n_slots,), name="b", regularizable=False)
def discriminator(input_var): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) network = ll.DropoutLayer(network, p=0.5) network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 64, (4,4), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu)) network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 32, (5,5), stride=2, pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu)) network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 32, (5,5), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu)) network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 32, (5,5), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu)) network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 16, (3,3), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu)) network =nn.weight_norm(ll.DenseLayer(network, num_units=1, W=Normal(0.05), nonlinearity=None), train_g=True, init_stdv=0.1) return network
def generator(input_var): network = lasagne.layers.InputLayer(shape=(None, NLAT,1,1), input_var=input_var) network = ll.DenseLayer(network, num_units=4*4*64, W=Normal(0.05), nonlinearity=nn.relu) #print(input_var.shape[0]) network = ll.ReshapeLayer(network, (batch_size,64,4,4)) network = nn.Deconv2DLayer(network, (batch_size,32,7,7), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,32,11,11), (5,5), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,32,25,25), (5,5), stride=(2,2), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,1,28,28), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=sigmoid) #network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1, nonlinearity=sigmoid) return network # In[23]:
def __init__(self, vocab_size, learning_rate=LEARNING_RATE, grad_clip=GRAD_CLIP, init_embedding=Normal()): self.vocab_size = vocab_size self.lr = learning_rate self.gc = grad_clip self.W = init_embedding if USE_GRU: self.rnn_layer = GRULayer else: self.rnn_layer = LSTMLayer if CONSTANTLY_FEED_HIDDEN_STATE: self.net = self._get_feed_net() # seq2seq v2 else: self.net = self._get_net() # seq2seq v1 self.train = self._get_train_fun() self.predict = self._get_predict_fun() # self.encode = self._get_encoder_fun() # self.decode = self._get_decoder_fun() # self.embedding = self._get_embedding_fun() # self.slicing = self._get_slice_fun() # self.decoding = self._get_dec_fun()
def __init__(self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1), W_cell=init.Normal(0.1), W_to=init.Normal(0.1), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_in = W_in self.W_hid = W_hid self.W_to = W_to # Don't store a cell weight vector when cell is None if W_cell is not None: self.W_cell = W_cell self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), crop=0, untie_biases=False, W=initmethod(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs): super(DeconvLayer, self).__init__( incoming, num_filters, filter_size, stride, crop, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) # rename self.crop to self.pad self.crop = self.pad del self.pad
def InceptionUpscaleLayer(incoming,param_dict,block_name): branch = [0]*len(param_dict) # Loop across branches for i,dict in enumerate(param_dict): for j,style in enumerate(dict['style']): # Loop up branch branch[i] = TC2D( incoming = branch[i] if j else incoming, num_filters = dict['num_filters'][j], filter_size = dict['filter_size'][j], crop = dict['pad'][j] if 'pad' in dict else None, stride = dict['stride'][j], W = initmethod('relu'), nonlinearity = dict['nonlinearity'][j], name = block_name+'_'+str(i)+'_'+str(j)) if style=='convolutional'\ else NL( incoming = lasagne.layers.dnn.Pool2DDNNLayer( incoming = lasagne.layers.Upscale2DLayer( incoming=incoming if j == 0 else branch[i], scale_factor = dict['stride'][j]), pool_size = dict['filter_size'][j], stride = [1,1], mode = dict['mode'][j], pad = dict['pad'][j], name = block_name+'_'+str(i)+'_'+str(j)), nonlinearity = dict['nonlinearity'][j]) # Apply Batchnorm branch[i] = BN(branch[i],name = block_name+'_bnorm_'+str(i)+'_'+str(j)) if dict['bnorm'][j] else branch[i] # Concatenate Sublayers return CL(incomings=branch,name=block_name) # Convenience function to efficiently generate param dictionaries for use with InceptioNlayer
def pd(num_layers=2,num_filters=32,filter_size=(3,3),pad=1,stride = (1,1),nonlinearity=elu,style='convolutional',bnorm=1,**kwargs): input_args = locals() input_args.pop('num_layers') return {key:entry if type(entry) is list else [entry]*num_layers for key,entry in input_args.iteritems()} # Possible Conv2DDNN convenience function. Remember to delete the C2D import at the top if you use this # def C2D(incoming = None, num_filters = 32, filter_size= [3,3],pad = 'same',stride = [1,1], W = initmethod('relu'),nonlinearity = elu,name = None): # return lasagne.layers.dnn.Conv2DDNNLayer(incoming,num_filters,filter_size,stride,pad,False,W,None,nonlinearity,False) # Shape-Preserving Gaussian Sample layer for latent vectors with spatial dimensions. # This is a holdover from an "old" (i.e. I abandoned it last month) idea.
def __init__(self, incoming, num_units, peepholes=True, backwards=False, mask_input=None, only_return_final=True, encoder_input=None, encoder_mask_input=None, **kwargs): super(MatchLSTM, self).__init__(incoming, num_units, peepholes=peepholes, backwards=backwards, precompute_input=False, mask_input=mask_input, only_return_final=only_return_final, **kwargs) # encoder mask self.encoder_input_incoming_index = -1 self.encoder_mask_incoming_index = -1 if encoder_mask_input is not None: self.input_layers.append(encoder_mask_input) self.input_shapes.append(encoder_mask_input.output_shape) self.encoder_mask_incoming_index = len(self.input_layers) - 1 if encoder_input is not None: self.input_layers.append(encoder_input) encoder_input_output_shape = encoder_input.output_shape self.input_shapes.append(encoder_input_output_shape) self.encoder_input_incoming_index = len(self.input_layers) - 1 # hidden state length should equal to embedding size assert encoder_input_output_shape[-1] == num_units # input features length should equal to embedding size plus hidden state length assert encoder_input_output_shape[-1] + num_units == self.input_shapes[0][-1] # initializes attention weights self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'V_pointer') self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend') # doesn't need transpose self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_m_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer')
def __init__(self, incoming, n_slots, d_slots, M=init.Normal(), nonlinearity_final=nonlinearities.identity, **kwargs): super(SeparateMemoryLayer, self).__init__(incoming, **kwargs) self.nonlinearity_final = nonlinearity_final self.n_slots = n_slots self.d_slots = d_slots self.M = self.add_param(M, (n_slots, d_slots), name="M") # memory slots
def __init__(self, incomings, voc_size, hid_state_size, W=Normal(), **kwargs): # Initialize parameters and create theano variables super(SemMemModule, self).__init__(incomings, **kwargs) self.hid_state_size = hid_state_size self.W = self.add_param(W, (voc_size, hid_state_size), name='Word_Embedding', regularizable=False) self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def __init__(self, W_in=Normal(0.1), W_hid=Normal(0.1), b=Constant(0.), nonlinearity=nonlin.sigmoid): self.W_in = W_in self.W_hid = W_hid self.b = b if nonlinearity is None: self.nonlinearity = nonlin.identity else: self.nonlinearity = nonlinearity
def __init__(self, incomings, hid_state_size, voc_size, resetgate = GRU_Gate(), updategate = GRU_Gate(), hid_update = GRU_Gate(nonlinearity=nonlin.tanh), W=Normal(), max_answer_word=1, **kwargs): super(AnswerModule, self).__init__(incomings, **kwargs) self.hid_state_size = hid_state_size #FOR GRU input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[1]) + voc_size # concatenation of previous prediction def add_gate(gate, gate_name): return (self.add_param(gate.W_in, (num_inputs, hid_state_size), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (hid_state_size, hid_state_size), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (hid_state_size,), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in all parameters from gates (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate)= add_gate(updategate, 'updategate') (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate') (self.W_in_to_hid_update, self.W_hid_to_hid_update, self.b_hid_update, self.nonlinearity_hid) = add_gate(hid_update, 'hid_update') self.W = self.add_param(W, (hid_state_size, voc_size), name="W") self.max_answer_word = max_answer_word self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def __init__(self, W_g=init.Normal(0.1), W_s=init.Normal(0.1), W_h=init.Normal(0.1), W_v=init.Normal(0.1), nonlinearity=nonlinearities.softmax): self.W_s = W_s self.W_h = W_h self.W_g = W_g self.W_v = W_v if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def __init__(self, incoming, num_centers, locs=init.Normal(std=1), log_sigma=init.Constant(0.), **kwargs): super(RBFLayer, self).__init__(incoming, **kwargs) self.num_centers = num_centers assert len(self.input_shape) == 2 in_dim = self.input_shape[1] self.locs = self.add_param(locs, (num_centers, in_dim), name='locs', regularizable=False) self.log_sigma = self.add_param(log_sigma, (), name='log_sigma')
def __init__(self, incoming, num_freqs, freqs=init.Normal(std=1), log_sigma=init.Constant(0.), **kwargs): super(SmoothedCFLayer, self).__init__(incoming, **kwargs) self.num_freqs = num_freqs assert len(self.input_shape) == 2 in_dim = self.input_shape[1] self.freqs = self.add_param(freqs, (num_freqs, in_dim), name='freqs') self.log_sigma = self.add_param(log_sigma, (), name='log_sigma')
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs): super(WeightNormLayer, self).__init__(incoming, **kwargs) self.nonlinearity = nonlinearity self.init_stdv = init_stdv k = self.input_shape[1] if b is not None: self.b = self.add_param(b, (k,), name="b", regularizable=False) if g is not None: self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g) if len(self.input_shape)==4: self.axes_to_sum = (0,2,3) self.dimshuffle_args = ['x',0,'x','x'] else: self.axes_to_sum = 0 self.dimshuffle_args = ['x',0] # scale weights in layer below incoming.W_param = incoming.W #incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape)) if incoming.W_param.ndim==4: if isinstance(incoming, Deconv2DLayer): W_axes_to_sum = (0,2,3) W_dimshuffle_args = ['x',0,'x','x'] else: W_axes_to_sum = (1,2,3) W_dimshuffle_args = [0,'x','x','x'] else: W_axes_to_sum = 0 W_dimshuffle_args = ['x',0] if g is not None: incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args) else: incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), pad='half', W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs): super(Deconv2DLayer, self).__init__(incoming, **kwargs) self.target_shape = target_shape self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity) self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2) self.stride = lasagne.layers.dnn.as_tuple(stride, 2) self.pad = pad self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1]) self.W = self.add_param(W, self.W_shape, name="W") if b is not None: self.b = self.add_param(b, (target_shape[1],), name="b") else: self.b = None
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05), log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs): super(MinibatchLayer, self).__init__(incoming, **kwargs) self.num_kernels = num_kernels num_inputs = int(np.prod(self.input_shape[1:])) self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta") self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale") self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1) self.b = self.add_param(b, (num_kernels,), name="b")
def __init__(self, incoming, num_units, theta=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.), weight_scale=lasagne.init.Constant(1.), train_scale=False, nonlinearity=relu, **kwargs): super(DenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units num_inputs = int(np.prod(self.input_shape[1:])) self.theta = self.add_param(theta, (num_inputs, num_units), name="theta") self.weight_scale = self.add_param(weight_scale, (num_units,), name="weight_scale", trainable=train_scale) self.W = self.theta * (self.weight_scale/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0) self.b = self.add_param(b, (num_units,), name="b")
def conv_layer(input_, filter_size, num_filters, stride, pad, nonlinearity=relu, W=Normal(0.02), **kwargs): return layers.conv.Conv2DDNNLayer(input_, num_filters=num_filters, stride=parse_tuple(stride), filter_size=parse_tuple(filter_size), pad=pad, W=W, nonlinearity=nonlinearity, **kwargs)
def style_conv_block(conv_in, num_styles, num_filters, filter_size, stride, nonlinearity=rectify, normalization=instance_norm): sc_network = ReflectLayer(conv_in, filter_size//2) sc_network = normalization(ConvLayer(sc_network, num_filters, filter_size, stride, nonlinearity=nonlinearity, W=Normal()), num_styles=num_styles) return sc_network
def conv_layer(input_, filter_size, num_filters, stride, pad, nonlinearity=relu, W=Normal(0.02), **kwargs): return dnn.Conv2DDNNLayer(input_, num_filters=num_filters, stride=parse_tuple(stride), filter_size=parse_tuple(filter_size), pad=pad, W=W, nonlinearity=nonlinearity, **kwargs)
def smart_init(shape): if len(shape) > 1: return init.GlorotUniform()(shape) else: return init.Normal()(shape)
def __init__(self, incomings, coeffs=Normal(std=0.01, mean=1.0), cropping=None, **kwargs): super(AdaptiveElemwiseSumLayer, self).__init__(incomings, T.add, cropping=cropping, **kwargs) ''' if isinstance(coeffs, list): if len(coeffs) != len(incomings): raise ValueError("Mismatch: got %d coeffs for %d incomings" % (len(coeffs), len(incomings))) else: coeffs = [coeffs] * len(incomings) ''' self.coeffs = [] for i in range(len(incomings)): coeff = theano.shared(np.float32(1.0), 'adacoeff{}'.format(i)) self.coeffs.append(self.add_param(coeff, coeff.shape, trainable=True, scaling_param=True))
def __init__(self, W_t=init.Normal(0.1), W_x=init.Normal(0.1), b=init.Constant(0.), nonlinearity_inside=nonlinearities.tanh, nonlinearity_outside=nonlinearities.sigmoid): self.W_t = W_t self.W_x = W_x self.b = b self.nonlinearity_inside = nonlinearity_inside self.nonlinearity_outside = nonlinearity_outside
def MDCL(incoming,num_filters,scales,name,dnn=True): if dnn: from lasagne.layers.dnn import Conv2DDNNLayer as C2D # W initialization method--this should also work as Orthogonal('relu'), but I have yet to validate that as thoroughly. winit = initmethod(0.02) # Initialization method for the coefficients sinit = lasagne.init.Constant(1.0/(1+len(scales))) # Number of incoming channels ni =lasagne.layers.get_output_shape(incoming)[1] # Weight parameter--the primary parameter for this block W = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,lasagne.layers.get_output_shape(incoming)[1],3,3))),name=name+'W') # Primary Convolution Layer--No Dilation n = C2D(incoming = incoming, num_filters = num_filters, filter_size = [3,3], stride = [1,1], pad = (1,1), W = W*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_base').dimshuffle(0,'x','x','x'), # Note the broadcasting dimshuffle for the num_filter scalars. b = None, nonlinearity = None, name = name+'base' ) # List of remaining layers. This should probably just all be concatenated into a single list rather than being a separate deal. nd = [] for i,scale in enumerate(scales): # I don't think 0 dilation is technically defined (or if it is it's just the regular filter) but I use it here as a convenient keyword to grab the 1x1 mean conv. if scale==0: nd.append(C2D(incoming = incoming, num_filters = num_filters, filter_size = [1,1], stride = [1,1], pad = (0,0), W = T.mean(W,axis=[2,3]).dimshuffle(0,1,'x','x')*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_1x1').dimshuffle(0,'x','x','x'), b = None, nonlinearity = None, name = name+str(scale))) # Note the dimshuffles in this layer--these are critical as the current DilatedConv2D implementation uses a backward pass. else: nd.append(lasagne.layers.DilatedConv2DLayer(incoming = lasagne.layers.PadLayer(incoming = incoming, width=(scale,scale)), num_filters = num_filters, filter_size = [3,3], dilation=(scale,scale), W = W.dimshuffle(1,0,2,3)*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_'+str(scale)).dimshuffle('x',0,'x','x'), b = None, nonlinearity = None, name = name+str(scale))) return ESL(nd+[n]) # MDC-based Upsample Layer. # This is a prototype I don't make use of extensively. It's operational but it doesn't seem to improve results yet.
def InceptionLayer(incoming,param_dict,block_name): branch = [0]*len(param_dict) # Loop across branches for i,dict in enumerate(param_dict): for j,style in enumerate(dict['style']): # Loop up branch branch[i] = C2D( incoming = branch[i] if j else incoming, num_filters = dict['num_filters'][j], filter_size = dict['filter_size'][j], pad = dict['pad'][j] if 'pad' in dict else None, stride = dict['stride'][j], W = initmethod('relu'), nonlinearity = dict['nonlinearity'][j], name = block_name+'_'+str(i)+'_'+str(j)) if style=='convolutional'\ else NL(lasagne.layers.dnn.Pool2DDNNLayer( incoming=incoming if j == 0 else branch[i], pool_size = dict['filter_size'][j], mode = dict['mode'][j], stride = dict['stride'][j], pad = dict['pad'][j], name = block_name+'_'+str(i)+'_'+str(j)), nonlinearity = dict['nonlinearity'][j]) if style=='pool'\ else lasagne.layers.DilatedConv2DLayer( incoming = lasagne.layers.PadLayer(incoming = incoming if j==0 else branch[i],width = dict['pad'][j]) if 'pad' in dict else incoming if j==0 else branch[i], num_filters = dict['num_filters'][j], filter_size = dict['filter_size'][j], dilation = dict['dilation'][j], # pad = dict['pad'][j] if 'pad' in dict else None, W = initmethod('relu'), nonlinearity = dict['nonlinearity'][j], name = block_name+'_'+str(i)+'_'+str(j)) if style== 'dilation'\ else DL( incoming = incoming if j==0 else branch[i], num_units = dict['num_filters'][j], W = initmethod('relu'), b = None, nonlinearity = dict['nonlinearity'][j], name = block_name+'_'+str(i)+'_'+str(j)) # Apply Batchnorm branch[i] = BN(branch[i],name = block_name+'_bnorm_'+str(i)+'_'+str(j)) if dict['bnorm'][j] else branch[i] # Concatenate Sublayers return CL(incomings=branch,name=block_name) # Convenience function to define an inception-style block with upscaling
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, precompute_input=True, mask_input=None, encoder_mask_input=None, attention=False, word_by_word=False, **kwargs): super(CustomLSTMDecoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity, cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps, grad_clipping, False, precompute_input, mask_input, True, **kwargs) self.attention = attention self.word_by_word = word_by_word # encoder mask self.encoder_mask_incoming_index = -1 if encoder_mask_input is not None: self.input_layers.append(encoder_mask_input) self.input_shapes.append(encoder_mask_input.output_shape) self.encoder_mask_incoming_index = len(self.input_layers) - 1 # check encoder if not isinstance(self.cell_init, CustomLSTMEncoder) \ or self.num_units != self.cell_init.num_units: raise ValueError('cell_init must be CustomLSTMEncoder' ' and num_units should equal') self.r_init = None self.r_init = self.add_param(init.Constant(0.), (1, num_units), name="r_init", trainable=False, regularizable=False) if self.word_by_word: # rewrites self.attention = True if self.attention: if not isinstance(encoder_mask_input, lasagne.layers.Layer): raise ValueError('Attention mechnism needs encoder mask layer') # initializes attention weights self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'V_pointer') self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend') # doesn't need transpose self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_p_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_p_attend') self.W_x_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_x_attend') if self.word_by_word: self.W_r_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_r_attend') self.W_t_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_t_attend')
def __init__( self, incomings, num_units, W_g=init.Normal(0.1), W_h=init.Normal(0.1), W_v=init.Normal(0.1), W_s=init.Normal(0.1), W_p=init.Normal(0.1), nonlinearity=nonlinearities.tanh, nonlinearity_atten=nonlinearities.softmax, **kwargs ): super(AttenLayer, self).__init__(incomings, **kwargs) self.batch_size = self.input_shapes[0][0] # None num_inputs = self.input_shapes[2][1] # k feature_dim = self.input_shapes[0][1] # d self.num_units = num_units self.nonlinearity = nonlinearity self.nonlinearity_atten = nonlinearity_atten self.W_h_to_attenGate = self.add_param( W_h, (num_inputs, 1), name='W_h_to_atten' ) self.W_g_to_attenGate = self.add_param( W_g, (feature_dim, num_inputs), name='W_g_to_atten' ) self.W_v_to_attenGate = self.add_param( W_v, (feature_dim, num_inputs), name='W_v_to_atten' ) self.W_s_to_attenGate = self.add_param( W_s, (feature_dim, num_inputs), name='W_s_to_atten' ) self.W_p = self.add_param( W_p, (feature_dim, num_units), name='W_p_to_atten' ) self.num_inputs = num_inputs
def _sample_trained_minibatch_gan(params_file, n, batch_size, rs): import lasagne from lasagne.init import Normal import lasagne.layers as ll import theano as th from theano.sandbox.rng_mrg import MRG_RandomStreams import theano.tensor as T import nn theano_rng = MRG_RandomStreams(rs.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rs.randint(2 ** 15))) noise_dim = (batch_size, 100) noise = theano_rng.uniform(size=noise_dim) ls = [ll.InputLayer(shape=noise_dim, input_var=noise)] ls.append(nn.batch_norm( ll.DenseLayer(ls[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) ls.append(ll.ReshapeLayer(ls[-1], (batch_size,512,4,4))) ls.append(nn.batch_norm( nn.Deconv2DLayer(ls[-1], (batch_size,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 ls.append(nn.batch_norm( nn.Deconv2DLayer(ls[-1], (batch_size,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 ls.append(nn.weight_norm( nn.Deconv2DLayer(ls[-1], (batch_size,3,32,32), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(ls[-1]) with np.load(params_file) as d: params = [d['arr_{}'.format(i)] for i in range(9)] ll.set_all_param_values(ls[-1], params, trainable=True) sample_batch = th.function(inputs=[], outputs=gen_dat) samps = [] while len(samps) < n: samps.extend(sample_batch()) samps = np.array(samps[:n]) return samps
def __init__(self, args): self.args = args rng = np.random.RandomState(self.args.seed) # fixed random seeds theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) data_rng = np.random.RandomState(self.args.seed_data) ''' specify pre-trained generator E ''' self.enc_layers = [LL.InputLayer(shape=(None, 3, 32, 32), input_var=None)] enc_layer_conv1 = dnn.Conv2DDNNLayer(self.enc_layers[-1], 64, (5,5), pad=0, stride=1, W=Normal(0.01), nonlinearity=nn.relu) self.enc_layers.append(enc_layer_conv1) enc_layer_pool1 = LL.MaxPool2DLayer(self.enc_layers[-1], pool_size=(2, 2)) self.enc_layers.append(enc_layer_pool1) enc_layer_conv2 = dnn.Conv2DDNNLayer(self.enc_layers[-1], 128, (5,5), pad=0, stride=1, W=Normal(0.01), nonlinearity=nn.relu) self.enc_layers.append(enc_layer_conv2) enc_layer_pool2 = LL.MaxPool2DLayer(self.enc_layers[-1], pool_size=(2, 2)) self.enc_layers.append(enc_layer_pool2) self.enc_layer_fc3 = LL.DenseLayer(self.enc_layers[-1], num_units=256, nonlinearity=T.nnet.relu) self.enc_layers.append(self.enc_layer_fc3) self.enc_layer_fc4 = LL.DenseLayer(self.enc_layers[-1], num_units=10, nonlinearity=T.nnet.softmax) self.enc_layers.append(self.enc_layer_fc4) ''' load pretrained weights for encoder ''' weights_toload = np.load('pretrained/encoder.npz') weights_list_toload = [weights_toload['arr_{}'.format(k)] for k in range(len(weights_toload.files))] LL.set_all_param_values(self.enc_layers[-1], weights_list_toload) ''' input tensor variables ''' #self.G_weights #self.D_weights self.dummy_input = T.scalar() self.G_layers = [] self.z = theano_rng.uniform(size=(self.args.batch_size, self.args.z0dim)) self.x = T.tensor4() self.meanx = T.tensor3() self.Gen_x = T.tensor4() self.D_layers = [] self.D_layer_adv = [] self.D_layer_z_recon = [] self.gen_lr = T.scalar() # learning rate self.disc_lr = T.scalar() # learning rate self.y = T.ivector() self.y_1hot = T.matrix() self.Gen_x_list = [] self.y_recon_list = [] self.mincost = T.scalar() #self.enc_layer_fc3 = self.get_enc_layer_fc3() self.real_fc3 = LL.get_output(self.enc_layer_fc3, self.x, deterministic=True)
def get_generator(self, meanx, z0, y_1hot): ''' specify generator G0, gen_x = G0(z0, h1) ''' """ #z0 = theano_rng.uniform(size=(self.args.batch_size, 16)) # uniform noise gen0_layers = [LL.InputLayer(shape=(self.args.batch_size, 50), input_var=z0)] # Input layer for z0 gen0_layers.append(nn.batch_norm(LL.DenseLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[0], num_units=128, W=Normal(0.02), nonlinearity=nn.relu)), num_units=128, W=Normal(0.02), nonlinearity=nn.relu))) # embedding, 50 -> 128 gen0_layer_z_embed = gen0_layers[-1] #gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 256), input_var=real_fc3)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layer_fc3 = gen0_layers[-1] gen0_layers.append(LL.ConcatLayer([gen0_layer_fc3,gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features gen0_layers.append(LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[-1], num_units=256*5*5, W=Normal(0.02), nonlinearity=T.nnet.relu)), (self.args.batch_size,256,5,5))) # fc gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid)) # deconv gen_x_pre = LL.get_output(gen0_layers[-1], deterministic=False) gen_x = gen_x_pre - meanx # gen_x_joint = LL.get_output(gen0_layers[-1], {gen0_layer_fc3: gen_fc3}, deterministic=False) - meanx return gen0_layers, gen_x """ gen_x_layer_z = LL.InputLayer(shape=(self.args.batch_size, self.args.z0dim), input_var=z0) # z, 20 # gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64 gen_x_layer_y = LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768 gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (self.args.batch_size,256,5,5)) gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid) # gen_x_layer_x = dnn.Conv2DDNNLayer(gen_x_layer_dconv1_2, 3, (1,1), pad=0, stride=1, # W=Normal(0.02), nonlinearity=T.nnet.sigmoid) gen_x_layers = [gen_x_layer_z, gen_x_layer_y, gen_x_layer_y_z, gen_x_layer_pool2, gen_x_layer_dconv2_1, gen_x_layer_dconv2_2, gen_x_layer_dconv1_1, gen_x_layer_x] gen_x_pre = LL.get_output(gen_x_layer_x, deterministic=False) gen_x = gen_x_pre - meanx return gen_x_layers, gen_x
def get_discriminator(self): ''' specify discriminator D0 ''' """ disc0_layers = [LL.InputLayer(shape=(self.args.batch_size, 3, 32, 32))] disc0_layers.append(LL.GaussianNoiseLayer(disc0_layers[-1], sigma=0.05)) disc0_layers.append(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 16x16 disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu))) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 8x8 disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=0, W=Normal(0.02), nonlinearity=nn.lrelu))) # 6x6 disc0_layer_shared = LL.NINLayer(disc0_layers[-1], num_units=192, W=Normal(0.02), nonlinearity=nn.lrelu) # 6x6 disc0_layers.append(disc0_layer_shared) disc0_layer_z_recon = LL.DenseLayer(disc0_layer_shared, num_units=50, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_z_recon) # also need to recover z from x disc0_layers.append(LL.GlobalPoolLayer(disc0_layer_shared)) disc0_layer_adv = LL.DenseLayer(disc0_layers[-1], num_units=10, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_adv) return disc0_layers, disc0_layer_adv, disc0_layer_z_recon """ disc_x_layers = [LL.InputLayer(shape=(None, 3, 32, 32))] disc_x_layers.append(LL.GaussianNoiseLayer(disc_x_layers[-1], sigma=0.2)) disc_x_layers.append(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=0, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers_shared = LL.NINLayer(disc_x_layers[-1], num_units=192, W=Normal(0.01), nonlinearity=nn.lrelu) disc_x_layers.append(disc_x_layers_shared) disc_x_layer_z_recon = LL.DenseLayer(disc_x_layers_shared, num_units=self.args.z0dim, nonlinearity=None) disc_x_layers.append(disc_x_layer_z_recon) # also need to recover z from x # disc_x_layers.append(nn.MinibatchLayer(disc_x_layers_shared, num_kernels=100)) disc_x_layers.append(LL.GlobalPoolLayer(disc_x_layers_shared)) disc_x_layer_adv = LL.DenseLayer(disc_x_layers[-1], num_units=10, W=Normal(0.01), nonlinearity=None) disc_x_layers.append(disc_x_layer_adv) #output_before_softmax_x = LL.get_output(disc_x_layer_adv, x, deterministic=False) #output_before_softmax_gen = LL.get_output(disc_x_layer_adv, gen_x, deterministic=False) # temp = LL.get_output(gen_x_layers[-1], deterministic=False, init=True) # temp = LL.get_output(disc_x_layers[-1], x, deterministic=False, init=True) # init_updates = [u for l in LL.get_all_layers(gen_x_layers)+LL.get_all_layers(disc_x_layers) for u in getattr(l,'init_updates',[])] return disc_x_layers, disc_x_layer_adv, disc_x_layer_z_recon
def load_data(): xs = [] ys = [] for j in range(5): d = unpickle('data/cifar-10-python/cifar-10-batches-py/data_batch_'+`j+1`) x = d['data'] y = d['labels'] xs.append(x) ys.append(y) d = unpickle('data/cifar-10-python/cifar-10-batches-py/test_batch') xs.append(d['data']) ys.append(d['labels']) x = np.concatenate(xs)/np.float32(255) y = np.concatenate(ys) x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:])) x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0,3,1,2) # subtract per-pixel mean pixel_mean = np.mean(x[0:50000],axis=0) #pickle.dump(pixel_mean, open("cifar10-pixel_mean.pkl","wb")) x -= pixel_mean # create mirrored images X_train = x[0:50000,:,:,:] Y_train = y[0:50000] # X_train_flip = X_train[:,:,:,::-1] # Y_train_flip = Y_train # X_train = np.concatenate((X_train,X_train_flip),axis=0) # Y_train = np.concatenate((Y_train,Y_train_flip),axis=0) X_test = x[50000:,:,:,:] Y_test = y[50000:] return pixel_mean, dict( X_train=lasagne.utils.floatX(X_train), Y_train=Y_train.astype('int32'), X_test = lasagne.utils.floatX(X_test), Y_test = Y_test.astype('int32'),) ## specify generator, gen_pool5 = G(z, y_1hot) #z = theano_rng.uniform(size=(args.batch_size, 100)) # uniform noise #y_1hot = T.matrix() #gen_pool5_layer_z = LL.InputLayer(shape=(args.batch_size, 100), input_var=z) # z, 100 #gen_pool5_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_z, num_units=256, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # 100 -> 256 #gen_pool5_layer_y = LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot) # y, 10 #gen_pool5_layer_y_embed = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_y, num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # 10 -> 512 #gen_pool5_layer_fc4 = LL.ConcatLayer([gen_pool5_layer_z_embed,gen_pool5_layer_y_embed],axis=1) #512+256 = 768 ##gen_pool5_layer_fc4 = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc5, num_units=512, nonlinearity=T.nnet.relu))#, g=None) #gen_pool5_layer_fc3 = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc4, num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) #gen_pool5_layer_pool5_flat = LL.DenseLayer(gen_pool5_layer_fc3, num_units=4*4*32, nonlinearity=T.nnet.relu) # NO batch normalization at output layer ##gen_pool5_layer_pool5_flat = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc3, num_units=4*4*32, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # no batch-norm at output layer #gen_pool5_layer_pool5 = LL.ReshapeLayer(gen_pool5_layer_pool5_flat, (args.batch_size,32,4,4)) #gen_pool5_layers = [gen_pool5_layer_z, gen_pool5_layer_z_embed, gen_pool5_layer_y, gen_pool5_layer_y_embed, #gen_pool5_layer_fc5, # gen_pool5_layer_fc4, gen_pool5_layer_fc3, gen_pool5_layer_pool5_flat, gen_pool5_layer_pool5] #gen_pool5 = LL.get_output(gen_pool5_layer_pool5, deterministic=False)