def compute(self, state, w_idx, feat, scene): # word embedding word_vec = self.embedding.compute(w_idx) # split states e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])]) # attention e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1)) # lstm step e_w_s = T.concatenate([e_t, word_vec, scene], axis=-1) c_t, h_t = self.lstm.compute(e_w_s, c_tm1, h_tm1) # merge state new_state = T.concatenate([e_t, c_t, h_t], axis=-1) # add w_{t-1} as feature e_h_w_s = T.concatenate([e_t, h_t, word_vec, scene], axis=-1) # predict probability p = self.pred_mlp.compute(e_h_w_s) return new_state, p, alpha
def build_encoder_bi(tparams, options): """ build bidirectional encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') embeddingr = embedding[::-1] x_mask = tensor.matrix('x_mask', dtype='float32') xr_mask = x_mask[::-1] # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, prefix='encoder_r', mask=xr_mask) ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1) return embedding, x_mask, ctx # some utilities
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ parameter init for GRU """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') return params
def get_output_for(self, input, deterministic=False, **kwargs): def _phase_shift(input,r): bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r X = T.reshape(input, (bsize,r,r,a,b)) X = T.transpose(X, (0, 3,4,1,2)) # bsize, a, b, r2,r1 X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1) # a, [bsize, b, r, r] X = [T.reshape(x,(bsize,b,r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, b, a*r, r X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1) # b, [bsize, a*r, r] X = [T.reshape(x,(bsize,a*r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, a*r, b*r return X.dimshuffle(0,'x',1,2) Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1) return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1) # Multiscale Dilated Convolution Block # This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers. # Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv). # The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales, # meaning that were starting by taking an elementwise mean. These should be learnable parameters. # NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list. # - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path! # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work if latent_var is None: latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed! latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1]) # generate the generalized input (append latents to obs.) if self.bilinear_integration: extended_obs_var = TT.concatenate([obs_var, latent_var, TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :], outdim=2)] , axis=1) else: extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1) mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var) if self.min_std is not None: log_std_var = TT.maximum(log_std_var, np.log(self.min_std)) return dict(mean=mean_var, log_std=log_std_var)
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev): active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()] active_next = T.cast(T.minimum( T.maximum( active + 1, T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1 ), log_p_curr.shape[0]), 'int32') common_factor = T.max(log_p_prev[:active]) p_prev = T.exp(log_p_prev[:active] - common_factor) _p_prev = zeros[:active_next] # copy over _p_prev = T.set_subtensor(_p_prev[:active], p_prev) # previous transitions _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1]) # skip transitions _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs]) updated_log_p_prev = T.log(_p_prev) + common_factor log_p_next = T.set_subtensor( zeros[:active_next], log_p_curr[:active_next] + updated_log_p_prev ) return active_next, log_p_next
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad( f, wrt=params, disconnected_inputs='warn') xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) def Hx_plain(): Hx_plain_splits = TT.grad( TT.sum([TT.sum(g * x) for g, x in zip(constraint_grads, xs)]), wrt=params, disconnected_inputs='warn' ) return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self.opt_fun = ext.lazydict( f_Hx_plain=lambda: ext.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def get_action(self, observation): if self.state_include_action: if self.prev_action is None: prev_action = np.zeros((self.action_space.flat_dim,)) else: prev_action = self.action_space.flatten(self.prev_action) all_input = np.concatenate([ self.observation_space.flatten(observation), prev_action ]) else: all_input = self.observation_space.flatten(observation) # should not be used prev_action = np.nan probs, hidden_vec = [x[0] for x in self.f_step_prob([all_input], [self.prev_hidden])] action = special.weighted_sample(probs, range(self.action_space.n)) self.prev_action = action self.prev_hidden = hidden_vec agent_info = dict(prob=probs) if self.state_include_action: agent_info["prev_action"] = prev_action return action, agent_info
def get_action(self, observation): if self._state_include_action: if self._prev_action is None: prev_action = np.zeros((self.action_space.flat_dim,)) else: prev_action = self.action_space.flatten(self._prev_action) all_input = np.concatenate([ self.observation_space.flatten(observation), prev_action ]) else: all_input = self.observation_space.flatten(observation) # should not be used prev_action = np.nan mean, log_std, hidden_vec = [x[0] for x in self._f_step_mean_std([all_input], [self._prev_hidden])] rnd = np.random.normal(size=mean.shape) action = rnd * np.exp(log_std) + mean self._prev_action = action self._prev_hidden = hidden_vec agent_info = dict(mean=mean, log_std=log_std) if self._state_include_action: agent_info["prev_action"] = prev_action return action, agent_info
def __init__(self, incoming, unchanged_W, unchanged_W_shape, oov_in_train_W, oov_in_train_W_shape, p=0.5, rescale=True, dropout_mask=None, **kwargs): super(CustomEmbedding, self).__init__(incoming, **kwargs) self.output_size = unchanged_W_shape[1] self.unchanged_W = self.add_param(unchanged_W, unchanged_W_shape, name="unchanged_W", trainable=False, regularizable=False) self.oov_in_train_W = self.add_param(oov_in_train_W, oov_in_train_W_shape, name='oov_in_train_W') self.W = T.concatenate([self.unchanged_W, self.oov_in_train_W]) self.p = p self.rescale = rescale if dropout_mask is None: dropout_mask = RandomStreams(_rng.randint(1, 2147462579)).binomial(self.W.shape, p=1 - self.p, dtype=self.W.dtype) self.dropout_mask = dropout_mask
def forward(self, inputtensor): #print('resnet.forward.shape: {}'.format(inputtensor[0].ndim)) o1 = self.conv1.forward(inputtensor) o2 = self.bn1.forward(o1) o3 = self.relu1.forward(o2) o4 = self.conv2.forward(o3) o5 = self.bn2.forward(o4) if self.increaseDim: subx = T.signal.pool.pool_2d(inputtensor[0], (2,2), ignore_border=True) #print('resnet.forward.subx.ndim: {}'.format(subx.ndim)) retx = T.zeros_like(subx) #print('resnet.forward.retx.ndim: {}'.format(retx.ndim)) sumx = T.concatenate([subx, retx], axis=1) #print('resnet.forward.sumx.ndim: {}'.format(sumx.ndim)) out = self.relu2.forward([o5[0]+sumx,]) #print('resnet.forward.out.ndim: {}'.format(out[0].ndim)) else: out = self.relu2.forward([o5[0]+inputtensor[0],]) return out
def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n-n_gen ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def __init__(self, prev_layers, axis=1): """ list of prev layers to concatenate axis to concatenate For tensor5, channel dimension is axis=2 (due to theano conv3d convention). For image, axis=1 """ assert (len(prev_layers) > 1) super().__init__(prev_layers[0]) self._axis = axis self._prev_layers = prev_layers self._output_shape = self._input_shape.copy() for prev_layer in prev_layers[1:]: self._output_shape[axis] += prev_layer._output_shape[axis] print('Concat the prev layer to [%s]' % ','.join(str(x) for x in self._output_shape))
def project3Dto2D(self, Li, idxs): """ Project 3D point to 2D :param Li: joints in normalized 3D :param idxs: frames specified by subset :return: 2D points, in normalized 2D coordinates """ if not isinstance(idxs, numpy.ndarray): idxs = numpy.asarray([idxs]) # 3D -> 2D projection also shift by M to cropped window Li_glob3D = (numpy.reshape(Li, (len(idxs), self.numJoints, 3))*self.Di_scale[idxs][:, None, None]+self.Di_off3D[idxs][:, None, :]).reshape((len(idxs)*self.numJoints, 3)) Li_glob3D_hom = numpy.concatenate([Li_glob3D, numpy.ones((len(idxs)*self.numJoints, 1), dtype='float32')], axis=1) Li_glob2D_hom = numpy.dot(Li_glob3D_hom, self.cam_proj.T) Li_glob2D = (Li_glob2D_hom[:, 0:3] / Li_glob2D_hom[:, 3][:, None]).reshape((len(idxs), self.numJoints, 3)) Li_img2D_hom = numpy.einsum('ijk,ikl->ijl', Li_glob2D, self.Di_trans2D[idxs]) Li_img2D = (Li_img2D_hom[:, :, 0:2] / Li_img2D_hom[:, :, 2][:, :, None]).reshape((len(idxs), self.numJoints*2)) Li_img2Dcrop = (Li_img2D - (self.Di.shape[3]/2.)) / (self.Di.shape[3]/2.) return Li_img2Dcrop
def _add_blanks(y, blank_symbol, y_mask=None): """Add blanks to a matrix and updates mask Input shape: output_seq_len x num_batch Output shape: 2*output_seq_len+1 x num_batch """ # for y y_extended = y.T.dimshuffle(0, 1, 'x') blanks = tensor.zeros_like(y_extended) + blank_symbol concat = tensor.concatenate([y_extended, blanks], axis=2) res = concat.reshape((concat.shape[0], concat.shape[1] * concat.shape[2])).T begining_blanks = tensor.zeros((1, res.shape[1])) + blank_symbol blanked_y = tensor.concatenate([begining_blanks, res], axis=0) # for y_mask if y_mask is not None: y_mask_extended = y_mask.T.dimshuffle(0, 1, 'x') concat = tensor.concatenate([y_mask_extended, y_mask_extended], axis=2) res = concat.reshape((concat.shape[0], concat.shape[1] * concat.shape[2])).T begining_blanks = tensor.ones((1, res.shape[1]), dtype=floatX) blanked_y_mask = tensor.concatenate([begining_blanks, res], axis=0) else: blanked_y_mask = None return blanked_y.astype('int32'), blanked_y_mask
def get_output_for(self, input, **kwargs): input_shape = input.shape if self.dilation[0] > 1: # pad such that the time axis length is divisible by the dilation factor pad_w = (self.dilation[0] - input_shape[2] % self.dilation[0]) % self.dilation[0] input = T.concatenate((input, T.zeros((input_shape[0], input_shape[1], pad_w, input_shape[3]), input.dtype)), axis=2) # rearrange data to fold the time axis into the minibatch dimension input = input.reshape((input_shape[0], input_shape[1], -1, self.dilation[0], input_shape[3])) input = input.transpose(0, 3, 1, 2, 4) input = input.reshape((-1,) + tuple(input.shape[2:])) output = super(TimeDilatedMaxPool2DLayer, self).get_output_for(input, **kwargs) if self.dilation[0] > 1: # restore the time axis from the minibatch dimension output = output.reshape((input_shape[0], self.dilation[0]) + tuple(output.shape[1:])) output = output.transpose(0, 2, 3, 1, 4) output = output.reshape((input_shape[0], output.shape[1], -1, output.shape[4])) # remove the padding output = output[:, :, :output.shape[2] - pad_w] return output
def _ctc_normal(self, predict,labels): n = labels.shape[0] labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]])) sec_diag = T.neq(labels2[:-2], labels2[2:]) * \ T.eq(labels2[1:-1], self.tpo["CTC_blank"]) recurrence_relation = \ T.eye(n) + \ T.eye(n, k=1) + \ T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x')) pred_y = predict[:, labels] probabilities, _ = theano.scan( lambda curr, accum: curr * T.dot(accum, recurrence_relation), sequences=[pred_y], outputs_info=[T.eye(n)[0]] ) labels_probab = T.sum(probabilities[-1, -2:]) return -T.log(labels_probab)
def visualize_weights(self, layer, imsize, layout): """ Displays the weights of a specified layer as images. :param layer: the layer whose weights to display :param imsize: the image size :param layout: number of rows and columns for each page :return: none """ if layer < self.net.n_layers: self.net.visualize_weights(layer, imsize, layout) elif layer == self.net.n_layers: helper.disp_imdata(np.concatenate([W.get_value() for W in [self.Wa] + self.Wms + self.WUs], axis=1).T, imsize, layout) plt.show(block=False) else: raise ValueError('Layer {} doesn\'t exist.'.format(layer))
def visualize_activations(self, x): """ Visualizes the activations in the mdn caused by a given data minibatch. :param x: a minibatch of data :return: none """ self.net.visualize_activations(x) forwprop = theano.function( inputs=[self.input], outputs=[self.a, tt.concatenate(self.ms, axis=1) + tt.concatenate([tt.reshape(U, [U.shape[0], -1]) for U in self.Us], axis=1)] ) activations = forwprop(x.astype(dtype)) for a, title in izip(activations, ['mixing coefficients', 'means', 'scale matrices']): fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.imshow(a, cmap='gray', interpolation='none') ax.set_title(title) ax.set_xlabel('layer units') ax.set_ylabel('data points') plt.show(block=False)
def randomize_parameters(params, sigmas, sig_min_perturbations): r_params = [] r_epsilons = [] for i in range(len(params)): epsilon_half = theano_rng.normal((n_perturbations/2,params[i].shape[1],params[i].shape[2]), dtype = theano.config.floatX) r_epsilon = T.concatenate( [epsilon_half, -1.0*epsilon_half], axis = 0 ) r_param = params[i] + r_epsilon*(T.nnet.softplus( sigmas[i] ) + sig_min_perturbations) r_params.append(r_param) r_epsilons.append(r_epsilon) return r_params, r_epsilons #################################################################### # # Create randomly perturbed version of parameters # ####################################################################
def renet_layer_lr_noscan(X, rnn1, rnn2, w, h, wp, hp): list_of_images = [] for i in xrange(h/hp): # x = X[:,i*hp:(i*hp + hp),:].dimshuffle((2, 0, 1)).flatten().reshape((w/wp, X.shape[0]*wp*hp)) h_tm1 = rnn1.H0 hr_tm1 = rnn2.H0 h1 = [] h2 = [] for j in xrange(w/wp): x = X[:,i*hp:(i*hp + hp),j*wp:(j*wp + wp)].flatten() h_t = rnn1.recurrence(x, h_tm1) h1.append(h_t) h_tm1 = h_t jr = w/wp - j - 1 xr = X[:,i*hp:(i*hp + hp),jr*wp:(jr*wp + wp)].flatten() hr_t = rnn2.recurrence(x, hr_tm1) h2.append(hr_t) hr_tm1 = hr_t img = T.concatenate([h1, h2]) list_of_images.append(img) return T.stacklists(list_of_images).dimshuffle((1, 0, 2))
def renet_layer_lr_allscan(X, rnn1, rnn2, w, h, wp, hp): # list_of_images = [] C = X.shape[0] X = X.dimshuffle((1, 0, 2)).reshape((h/hp, hp*C*w)) # split the rows for the first scan def rnn_pass(x): x = x.reshape((hp, C, w)).dimshuffle((2, 1, 0)).reshape((w/wp, C*wp*hp)) h1 = rnn1.output(x) h2 = rnn2.output(x, go_backwards=True) img = T.concatenate([h1.T, h2.T]) # list_of_images.append(img) return img results, _ = theano.scan( fn=rnn_pass, sequences=X, outputs_info=None, n_steps=h/hp, ) return results.dimshuffle((1, 0, 2)) # return T.stacklists(list_of_images).dimshuffle((1, 0, 2))
def get_output_for(self, input, init=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.tensordot(input, self.W, [[1], [0]]) abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2) + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1)) if init: mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0) abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x') self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))] f = T.sum(T.exp(-abs_dif),axis=2) if init: mf = T.mean(f,axis=0) f -= mf.dimshuffle('x',0) self.init_updates.append((self.b, -mf)) else: f += self.b.dimshuffle('x',0) return T.concatenate([input, f], axis=1)
def initialize_matrix(n_in, n_out, name, rng, init='rand'): if (init=='rand') or (init=='randSmall'): bin = np.sqrt(6. / (n_in + n_out)) values = np.asarray(rng.uniform(low=-bin, high=bin, size=(n_in, n_out)), dtype=theano.config.floatX) if (init=='randSmall'): values=np.float32(0.01)*values elif (init=='identity'): if (n_in >= n_out): values = np.concatenate([np.eye(n_out).astype(theano.config.floatX),np.zeros((n_in-n_out,n_out)).astype(theano.config.floatX)],axis=0) else: values = np.concatenate([np.eye(n_in).astype(theano.config.floatX),np.zeros((n_in,n_out-n_in)).astype(theano.config.floatX)],axis=1) else: raise ValueError("Unknown initialization method ["+init+"]") return theano.shared(value=values, name=name)
def times_diag(input, n_hidden, diag, swap_re_im): # input is a Ix2n_hidden matrix, where I is number # of training examples # diag is a n_hidden-dimensional real vector, which creates # the 2n_hidden x 2n_hidden complex diagonal matrix using # e.^{j.*diag}=cos(diag)+j.*sin(diag) d = T.concatenate([diag, -diag]) #d is 2n_hidden Re = T.cos(d).dimshuffle('x',0) Im = T.sin(d).dimshuffle('x',0) input_times_Re = input * Re input_times_Im = input * Im output = input_times_Re + input_times_Im[:, swap_re_im] return output
def forward(self, x, mask, hc): n_in, n_out, activation = self.n_in, self.n_out_t, self.activation if hc.ndim > 1: c_tm1 = hc[:, :n_out] h_tm1 = hc[:, n_out:] else: c_tm1 = hc[:n_out] h_tm1 = hc[n_out:] in_t = self.in_gate.forward(x,h_tm1) forget_t = self.forget_gate.forward(x,h_tm1) out_t = self.out_gate.forward(x, h_tm1) c_t = forget_t * c_tm1 + in_t * self.input_layer.forward(x,h_tm1) c_t = c_t * mask.dimshuffle(0, 'x') c_t = T.cast(c_t, 'float32') h_t = out_t * T.tanh(c_t) h_t = h_t * mask.dimshuffle(0, 'x') h_t = T.cast(h_t, 'float32') if hc.ndim > 1: return T.concatenate([ c_t, h_t ], axis=1) else: return T.concatenate([ c_t, h_t ])
def backward(self, x, mask, hc): n_in, n_out, activation = self.n_in, self.n_out_t, self.activation if hc.ndim > 1: c_tm1 = hc[:, :n_out] h_tm1 = hc[:, n_out:] else: c_tm1 = hc[:n_out] h_tm1 = hc[n_out:] in_t = self.in_gate_b.forward(x,h_tm1) forget_t = self.forget_gate_b.forward(x,h_tm1) out_t = self.out_gate_b.forward(x, h_tm1) c_t = forget_t * c_tm1 + in_t * self.input_layer_b.forward(x,h_tm1) c_t = c_t * mask.dimshuffle(0, 'x') c_t = T.cast(c_t, 'float32') h_t = out_t * T.tanh(c_t) h_t = h_t * mask.dimshuffle(0, 'x') h_t = T.cast(h_t, 'float32') if hc.ndim > 1: return T.concatenate([ c_t, h_t ], axis=1) else: return T.concatenate([ c_t, h_t ])
def _getLSTMWeight(self, shape): """ http://yyue.blogspot.com/2015/01/a-brief-overview-of-deep-learning.html For LSTMs, use orthogonal initializations for the weight matrices and set the forget gate biases to be high """ if len(shape)==1: #bias dim = int(shape[0]/4) self._p('Sampling biases for LSTM from exponential distribution') return np.random.laplace(size=shape).astype(config.floatX) #return np.concatenate([self._getUniformWeight((dim,)),np.ones((dim,))*self.params['forget_bias'], # self._getUniformWeight((dim*2,))]).astype(config.floatX) elif len(shape)==2: #weight nin = shape[0] nout= shape[1] assert int(nout/4)==nin,'Not LSTM weight.' return np.concatenate([self._getOrthogonalWeight((nin,int(nout/4))), self._getOrthogonalWeight((nin,int(nout/4))), self._getOrthogonalWeight((nin,int(nout/4))), self._getOrthogonalWeight((nin,int(nout/4)))] ,axis=1).astype(config.floatX) else: assert False,'Should not get here'
def MakeVisual( X_src, X_tar): #LAB pair #pdb.set_trace() #X_rst = np.zeros( X_src.shape, np.float32) #for i in range( X_src.shape[0]): # X_rst[i,:,:,:] = np.concatenate( # (np.resize( X_src[i,:,:,:], (1,nc,npx,npx/2)), # np.resize( X_tar[i,:,:,:], (1,nc,npx,npx/2))), axis =3 ) X_src = np.resize(X_src,(X_src.shape[0],nc,npx,npx/2)) X_tar = np.resize(X_tar,(X_tar.shape[0],nc,npx,npx/2)) return X_tar #return np.concatenate( (X_src,X_tar), axis = 2) # SET PARAMETERS.
def get_output_for(self, input, init=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.tensordot(input, self.W, [[1], [0]]) abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2) + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1)) if init: mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0) abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x') self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))] f = T.sum(T.exp(-abs_dif),axis=2) if init: mf = T.mean(f,axis=0) f -= mf.dimshuffle('x',0) self.init_updates.append((self.b, -mf)) else: f += self.b.dimshuffle('x',0) return T.concatenate([input, f], axis=1) # Input Mixture of Gaussian Layer
def connect(self, inputs): features = [None] * self.num_feature_types for i in range(self.num_feature_types): indices = inputs[:,:,i].flatten() proj_shape = [inputs.shape[0], inputs.shape[1], self.embedding_shapes[i][1]] features[i] = self.embeddings[i][indices].reshape(proj_shape) if self.num_feature_types == 1: return features[0] return tensor.concatenate(features, axis=2)
def get_output_for(self, input, **kwargs): ps = nonlinearities.sigmoid(input) sum_p_r_benign = T.sum(ps,axis=1) sum_log = T.sum(T.log(1-ps+1.e-12),axis=1) return T.concatenate([sum_log, sum_p_r_benign])
def compute(self, x_t, c_tm1, h_tm1): x_and_h = T.concatenate([x_t, h_tm1], axis=1) # x:(mb, dim_x), h:(mb,dim_h) state = T.dot(x_and_h, self.w) + self.b # split state to (c, i, o, f) c_tilde = T.tanh(state[:, 0:self.dim_h]) i_t = T.nnet.sigmoid(state[:, self.dim_h:2*self.dim_h]) o_t = T.nnet.sigmoid(state[:, 2*self.dim_h:3*self.dim_h]) f_t = T.nnet.sigmoid(state[:, 3*self.dim_h:4*self.dim_h]) c_t = i_t * c_tilde + f_t * c_tm1 h_t = o_t * T.tanh(c_t) return c_t, h_t
def compute(self, state, w_idx, scene): # word embedding word_vec = self.embedding.compute(w_idx) # split states c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])]) # lstm step w_s = T.concatenate([word_vec, scene], axis=1) c_t, h_t = self.lstm.compute(w_s, c_tm1, h_tm1) # merge state new_state = T.concatenate([c_t, h_t], axis=-1) # add w_{t-1} as feature h_and_w = T.concatenate([h_t, word_vec], axis=-1) # predict probability p = self.pred_mlp.compute(h_and_w) return new_state, p
def compute(self, state, w_idx): # word embedding word_vec = self.embedding.compute(w_idx) # split states c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])]) # lstm step c_t, h_t = self.lstm.compute(word_vec, c_tm1, h_tm1) # merge state new_state = T.concatenate([c_t, h_t], axis=-1) # add w_{t-1} as feature h_and_w = T.concatenate([h_t, word_vec], axis=-1) # predict probability p = self.pred_mlp.compute(h_and_w) return new_state, p
def compute(self, state, w_idx, feat): # word embedding word_vec = self.embedding.compute(w_idx) # split states e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])]) # attention e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1)) # lstm step e_w = T.concatenate([e_t, word_vec], axis=-1) c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1) # (mb,nh) # merge state new_state = T.concatenate([e_t, c_t, h_t], axis=-1) # predict word probability p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1)) return new_state, p, alpha
def init_func(self, img_value): if self._proj_func is None: img = T.tensor3() self._proj_func = theano.function([img], self.proj_mlp.compute(img)) if self._init_func is None: init_e = self._feat_shared.mean(axis=1) init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1) self._init_func = theano.function([], init_state) self._feat_shared.set_value(self._proj_func(img_value)) return self._init_func()
def generate_attention(tparams, leaves, ancestors): attentionInput = T.concatenate([tparams['W_emb'][leaves], tparams['W_emb'][ancestors]], axis=2) mlpOutput = T.tanh(T.dot(attentionInput, tparams['W_attention']) + tparams['b_attention']) preAttention = T.dot(mlpOutput, tparams['v_attention']) attention = T.nnet.softmax(preAttention) return attention
def build_model(tparams, leavesList, ancestorsList, options): dropoutRate = options['dropoutRate'] trng = RandomStreams(123) use_noise = theano.shared(numpy_floatX(0.)) x = T.tensor3('x', dtype=config.floatX) y = T.tensor3('y', dtype=config.floatX) mask = T.matrix('mask', dtype=config.floatX) lengths = T.vector('lengths', dtype=config.floatX) n_timesteps = x.shape[0] n_samples = x.shape[1] embList = [] for leaves, ancestors in zip(leavesList, ancestorsList): tempAttention = generate_attention(tparams, leaves, ancestors) tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1) embList.append(tempEmb) emb = T.concatenate(embList, axis=0) x_emb = T.tanh(T.dot(x, emb)) hidden = gru_layer(tparams, x_emb, options) hidden = dropout_layer(hidden, use_noise, trng, dropoutRate) y_hat = softmax_layer(tparams, hidden) * mask[:,:,None] logEps = 1e-8 cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps)) output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths cost_noreg = T.mean(output_loglikelihood) if options['L2'] > 0.: cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum()) return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat