我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.reshape()。
def rbf_kernel(X0): XY = T.dot(X0, X0.transpose()) x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1)) X2e = T.repeat(x2, X0.shape[0], axis=1) H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY) V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2. Kxy = T.exp(-H / h ** 2 / 2.0) neighbors = T.argsort(H, axis=1)[:, 1] return Kxy, neighbors, h
def get_output_for(self, input, deterministic=False, **kwargs): def _phase_shift(input,r): bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r X = T.reshape(input, (bsize,r,r,a,b)) X = T.transpose(X, (0, 3,4,1,2)) # bsize, a, b, r2,r1 X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1) # a, [bsize, b, r, r] X = [T.reshape(x,(bsize,b,r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, b, a*r, r X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1) # b, [bsize, a*r, r] X = [T.reshape(x,(bsize,a*r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, a*r, b*r return X.dimshuffle(0,'x',1,2) Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1) return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1) # Multiscale Dilated Convolution Block # This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers. # Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv). # The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales, # meaning that were starting by taking an elementwise mean. These should be learnable parameters. # NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list. # - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
def get_output_for(self, inputs, attention_only=False, **kwargs): # inputs[0]: B x N x D # inputs[1]: B x Q x D # inputs[2]: B x N x Q / B x Q x N # self.mask: B x Q if self.transpose: M = inputs[2].dimshuffle((0,2,1)) else: M = inputs[2] alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2]))) alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \ self.mask[:,np.newaxis,:] # B x N x Q alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D return eval(self.gating_fn)(inputs[0],q_rep)
def svgd_gradient(X0): hidden, _, mse = discrim(X0) grad = -1.0 * T.grad( mse.sum(), X0) kxy, neighbors, h = rbf_kernel(hidden) #TODO coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 ) v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2 X1 = X0[neighbors] hidden1, _, _ = discrim(X1) dxkxy = T.Lop(hidden1, X1, v) #svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x') svgd_grad = grad + dxkxy / 2. return grad, svgd_grad, dxkxy
def get_output_for(self, input, **kwargs): # [batch, n-step, num_input_channels, input_length] input_shape = input.shape batch_size = input_shape[0] time_steps = input_shape[1] # [batch * n-step, num_input_channels, input_length] input_shape = (batch_size * time_steps, input_shape[2], input_shape[3]) output = self.conv1d.get_output_for(T.reshape(input, input_shape), **kwargs) # [batch * n-step, num_filters, output_length] output_shape = output.shape # [batch, n-step, num_filters, output_length] output_shape = (batch_size, time_steps, output_shape[1], output_shape[2]) return T.reshape(output, output_shape)
def get_output_for(self, input, **kwargs): # [batch, n-step, num_input_channels, input_length] input_shape = input.shape batch_size = input_shape[0] time_steps = input_shape[1] # [batch * n-step, num_input_channels, input_length] input_shape = (batch_size * time_steps, input_shape[2], input_shape[3]) output = super(PoolTimeStep1DLayer, self).get_output_for(T.reshape(input, input_shape), **kwargs) # [batch * n-step, num_input_channels, pool_length] output_shape = output.shape # [batch, n-step, num_input_channels, pool_length] output_shape = (batch_size, time_steps, output_shape[1], output_shape[2]) return T.reshape(output, output_shape)
def get_output_for(self, input, **kwargs): # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input_reshape = input.flatten(2) if input.ndim > 2 else input activation = T.dot(input_reshape, self.W_h) if self.b_h is not None: activation = activation + self.b_h.dimshuffle('x', 0) activation = self.nonlinearity(activation) transform = T.dot(input_reshape, self.W_t) if self.b_t is not None: transform = transform + self.b_t.dimshuffle('x', 0) transform = nonlinearities.sigmoid(transform) carry = 1.0 - transform output = activation * transform + input_reshape * carry # reshape output back to orignal input_shape if input.ndim > 2: output = T.reshape(output, input.shape) return output
def multiclass_hinge_loss(self, predictions, targets, delta=1): num_cls = predictions.shape[1] if targets.ndim == predictions.ndim - 1: targets = T.extra_ops.to_one_hot(targets, num_cls) elif targets.ndim != predictions.ndim: raise TypeError('rank mismatch between targets and predictions') corrects = predictions[targets.nonzero()] rest = T.reshape(predictions[(1-targets).nonzero()], (-1, num_cls-1)) rest = T.max(rest, axis=1) return T.nnet.relu(rest - corrects + delta).mean()
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[ 1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) fc_output = tensor.reshape( tensor.dot(self._fc_layer.output, self.Wx.val), self._output_shape) self._output = conv3d2d.conv3d(padded_input, self.Wh.val) + \ fc_output + self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def project3Dto2D(self, Li, idxs): """ Project 3D point to 2D :param Li: joints in normalized 3D :param idxs: frames specified by subset :return: 2D points, in normalized 2D coordinates """ if not isinstance(idxs, numpy.ndarray): idxs = numpy.asarray([idxs]) # 3D -> 2D projection also shift by M to cropped window Li_glob3D = (numpy.reshape(Li, (len(idxs), self.numJoints, 3))*self.Di_scale[idxs][:, None, None]+self.Di_off3D[idxs][:, None, :]).reshape((len(idxs)*self.numJoints, 3)) Li_glob3D_hom = numpy.concatenate([Li_glob3D, numpy.ones((len(idxs)*self.numJoints, 1), dtype='float32')], axis=1) Li_glob2D_hom = numpy.dot(Li_glob3D_hom, self.cam_proj.T) Li_glob2D = (Li_glob2D_hom[:, 0:3] / Li_glob2D_hom[:, 3][:, None]).reshape((len(idxs), self.numJoints, 3)) Li_img2D_hom = numpy.einsum('ijk,ikl->ijl', Li_glob2D, self.Di_trans2D[idxs]) Li_img2D = (Li_img2D_hom[:, :, 0:2] / Li_img2D_hom[:, :, 2][:, :, None]).reshape((len(idxs), self.numJoints*2)) Li_img2Dcrop = (Li_img2D - (self.Di.shape[3]/2.)) / (self.Di.shape[3]/2.) return Li_img2Dcrop
def evaluateToGT(self, Li, idxs): """ Evaluate the current estimate to a ground truth :param Li: current estimates :param idxs: idxs to evaluate :return: mean error, max error and MD score """ if not isinstance(idxs, numpy.ndarray): idxs = numpy.asarray(idxs) if self.gt3D is not None: gt3D_subset = self.gt3D[idxs] if Li.shape[0] == len(idxs): Li_subset = Li else: Li_subset = Li[idxs] mean_error = numpy.mean(numpy.sqrt(numpy.square((gt3D_subset - Li_subset.reshape(gt3D_subset.shape))*self.Di_scale[idxs, None, None]).sum(axis=2)), axis=1).mean() max_error = numpy.max(numpy.sqrt(numpy.square((gt3D_subset - Li_subset.reshape(gt3D_subset.shape))*self.Di_scale[idxs, None, None]).sum(axis=2))) vals = [(numpy.nanmax(numpy.sqrt(numpy.square((gt3D_subset - Li_subset.reshape(gt3D_subset.shape))*self.Di_scale[idxs, None, None]).sum(axis=2)), axis=1) <= j).sum() / float(gt3D_subset.shape[0]) for j in range(0, 80)] md_score = numpy.asarray(vals).sum() / float(80.) return mean_error, max_error, md_score else: return 0., 0., 0.
def get_output_for(self, inputs, **kwargs): input = inputs[0] input_word = T.flatten(inputs[1]) word_dropout = inputs[2] # Apply word embedding sentence_rep = self.SemMem.get_output_for([input, word_dropout]) # Apply GRU Layer gru_outs = self.GRU.get_output_for([sentence_rep]) # Extract candidate fact from GRU's output by input_word variable # resolving input with adtional word # e.g. John when to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5 candidate_facts = T.reshape( gru_outs[T.arange(gru_outs.shape[0],dtype='int32'), input_word-1], (-1, input.shape[1], self.hid_state_size)) return candidate_facts
def visualize_activations(self, x): """ Visualizes the activations in the mdn caused by a given data minibatch. :param x: a minibatch of data :return: none """ self.net.visualize_activations(x) forwprop = theano.function( inputs=[self.input], outputs=[self.a, tt.concatenate(self.ms, axis=1) + tt.concatenate([tt.reshape(U, [U.shape[0], -1]) for U in self.Us], axis=1)] ) activations = forwprop(x.astype(dtype)) for a, title in izip(activations, ['mixing coefficients', 'means', 'scale matrices']): fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.imshow(a, cmap='gray', interpolation='none') ax.set_title(title) ax.set_xlabel('layer units') ax.set_ylabel('data points') plt.show(block=False)
def construct(self, input_tv): T_ = self._declare_mat('T', self.in_dim, self.out_dim) T_.clip_gradient = self.prm('clip_gradient') T_.l2_project = self.prm('l2_project') T_.l2_projection_axis = 1 n_timesteps = input_tv.shape[0] window_size = self.prm('win_size') self.out_dim = window_size * self.out_dim output_tv = T_[input_tv.flatten()].reshape( [n_timesteps, self.out_dim]) if self.prm('do_dropout'): T_.dropout_retention_freq = self.prm('dropout_retention_freq') dropout_mask = dropout_mask_creator( self.out_dim, self.prm('dropout_retention_freq')) self.output_tv = output_tv * dropout_mask else: self.output_tv = output_tv return (T_,)
def needed_key(self): return self._needed_key_impl('activation_fn') # class MaxPool(Chip): # ''' This class_chip collapses the input tensor by max pooling along its last dimension. # ''' # def construct(self, input_tv): # pool_size = self.prm('pool_size') # y = T.reshape(input_tv, # ([input_tv.shape[i] for i in range(input_tv.ndim - 1)] # + [T.floor_div(input_tv.shape[input_tv.ndim - 1], pool_size).astype('int32'), pool_size]), # ndim=input_tv.ndim + 1) # self.output_tv = T.max(y, axis=y.ndim - 1) # return tuple() # def needed_key(self): # return self._needed_key_impl('pool_size')
def log_cross_entropy_extended(x, x_theta, log_distribution, k_max, eps = 0.0): p_k = x_theta["p_k"] F = x.shape[1] p_k = T.clip(p_k, eps, 1.0) x_k = T.clip(x, 0, k_max) p_k = T.reshape(p_k, (-1, k_max + 1)) x_k = T.reshape(x_k, (-1, 1)) y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k) y_cross_entropy = T.reshape(y_cross_entropy, (-1, F)) y_log_distribution = T.ge(x, k_max) * log_distribution(x - k_max, x_theta, eps) # y = - T.lt(x, 0) * y_cross_entropy + y_log_distribution y = - y_cross_entropy + T.lt(x, 0) * y_log_distribution # y = - y_cross_entropy + y_log_distribution return y
def log_softmax_poisson(x, p_k, log_lambda, k_max = 10, eps = 0.0): F = x.shape[1] p_k = T.clip(p_k, eps, 1.0 - eps) x_k = T.clip(x, 0, k_max) p_k = T.reshape(p_k, (-1, k_max + 1)) x_k = T.reshape(x_k, (-1, 1)) y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k) y_cross_entropy = T.reshape(y_cross_entropy, (-1, F)) y_log_poisson = T.ge(x, k_max) * log_poisson(x - k_max, log_lambda, eps) y = - y_cross_entropy + y_log_poisson return y
def log_softmax_negative_binomial(x, p_k, p, log_r, k_max = 10, eps = 0.0): F = x.shape[1] p_k = T.clip(p_k, eps, 1.0 - eps) x_k = T.clip(x, 0, k_max) p_k = T.reshape(p_k, (-1, k_max + 1)) x_k = T.reshape(x_k, (-1, 1)) y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k) y_cross_entropy = T.reshape(y_cross_entropy, (-1, F)) y_log_negative_binomial = T.ge(x, k_max) \ * log_negative_binomial(x - k_max, p, log_r, eps) y = - y_cross_entropy + y_log_negative_binomial return y
def get_output(self, input_): """ This function overrides the parents' one. Creates symbolic function to compute output from an input. Parameters ---------- input_: TensorVariable Returns ------- TensorVariable """ result = pool_2d(input_, ws=self.input_shape[1:], ignore_border=True, stride=self.input_shape[1:], pad=self.padding, mode='average_exc_pad') # result is 4D tensor yet, (batch size, output channel, 1, 1) return T.reshape(result, (input_.shape[0], input_.shape[1])) # flatten to 2D matrix
def get_output_for(self, inputs, **kwargs): p_gru, q_gru, q_mask, feature = tuple(inputs) time_p = p_gru.shape[1] time_q = q_gru.shape[1] p_gru_re = p_gru.dimshuffle(0, 1, 'x', 2) # (batch, time_p, 1, units) q_gru_re = q_gru.dimshuffle(0, 'x', 1, 2) # (batch, 1, time_q, units) gru_merge = T.tanh(p_gru_re * q_gru_re).reshape((-1, time_q, self.units)) # (batch * time_p, time_q, units) att = T.dot(gru_merge, self.v1).reshape((-1, time_p, time_q)) # (batch, time_p, time_q) att_q = T.dot(q_gru, self.v2).squeeze() # (batch, time_q) att = att + att_q.dimshuffle(0, 'x', 1) + feature # (batch, time_p, time_q) att = T.nnet.softmax(att.reshape((-1, time_q))) # (batch * time_p, time_q) att = att.reshape((-1, time_p, time_q)) * q_mask.dimshuffle(0, 'x', 1) # (batch, time_p, time_q) att = att / (att.sum(axis = 2, keepdims = True) + 1e-8) # (batch, time_p, time_q) att = att.reshape((-1, time_q)) output = T.batched_dot(att, gru_merge) # (batch * time_p, units) output = output.reshape((-1, time_p, self.units)) return output
def get_output(self, train=False): print(len(self.layers)) u=self.layers[0].get_output(train) t=self.layers[1].get_output(train) #tp=t[0] #tn=t[1] #un=T.dot(u,u) #return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))] #theano.printing.pprint('vals') #x=T.dvector() #printed_u = hello_world_op(x) #f = theano.function([x], printed_u) #f(['here']) #T.reshape(u,[2,1]) #T.reshape(t,[1,2,2]) #d=T.dot(t.dimshuffle(1, 0, 2), u) #u1=self.activation(u) #t.reshape([2,2,2]) return (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2))#.reshape([2,2]) #return d.dimshuffle(1,0,2) #just dot product
def get_output(self, train=False): print(len(self.layers)) u=self.layers[0].get_output(train) t=self.layers[1].get_output(train) #tp=t[0] #tn=t[1] #un=T.dot(u,u) #return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))] #theano.printing.pprint('vals') #x=T.dvector() #printed_u = hello_world_op(x) #f = theano.function([x], printed_u) #f(['here']) #T.reshape(u,[2,1]) #T.reshape(t,[1,2,2]) #d=T.dot(t.dimshuffle(1, 0, 2), u) #u1=self.activation(u) #t.reshape([2,2,2]) return T.max( (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2]) #return d.dimshuffle(1,0,2) #just dot product
def get_output(self, train=False): print(len(self.layers)) u=self.layers[0].get_output(train) t=self.layers[1].get_output(train) #tp=t[0] #tn=t[1] #un=T.dot(u,u) #return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))] #theano.printing.pprint('vals') #x=T.dvector() #printed_u = hello_world_op(x) #f = theano.function([x], printed_u) #f(['here']) #T.reshape(u,[2,1]) #T.reshape(t,[1,2,2]) #d=T.dot(t.dimshuffle(1, 0, 2), u) #u1=self.activation(u) #t.reshape([2,2,2]) return T.sum( (([u ,u,u,u,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2]) #return d.dimshuffle(1,0,2) #just dot product
def get_output(self, train=False): print(len(self.layers)) u=self.layers[0].get_output(train) t=self.layers[1].get_output(train) #tp=t[0] #tn=t[1] #un=T.dot(u,u) #return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))] #theano.printing.pprint('vals') #x=T.dvector() #printed_u = hello_world_op(x) #f = theano.function([x], printed_u) #f(['here']) #T.reshape(u,[2,1]) #T.reshape(t,[1,2,2]) #d=T.dot(t.dimshuffle(1, 0, 2), u) #u1=self.activation(u) #t.reshape([2,2,2]) return T.sum( (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2]) #return d.dimshuffle(1,0,2) #just dot product
def tanimoto_wmap(target_in, prediction, eps=1e-8): ''' Tanimoto distance, see: https://en.wikipedia.org/wiki/Jaccard_index#Other_definitions_of_Tanimoto_distance ''' target_in = T.reshape(target_in, (target_in.shape[1], target_in.shape[2])) target = target_in[:, :2] wmap = T.repeat(target_in[:, 2].dimshuffle(('x', 0)), 2, axis=0).dimshuffle((1, 0)) prediction = T.reshape(prediction, (prediction.shape[1], prediction.shape[2])) prediction = T.clip(prediction, eps, 1 - eps) target_w = T.sum(T.sqr(target * wmap), axis=0, keepdims=True) pred_w = T.sum(T.sqr(prediction * wmap), axis=0, keepdims=True) intersection_w = T.sum(target_w * pred_w, axis=0, keepdims=True) intersection = T.sum(target * prediction, axis=0, keepdims=True) prediction_sq = T.sum(T.sqr(prediction), axis=0, keepdims=True) target_sq = T.sum(T.sqr(target), axis=0, keepdims=True) loss = (target_w + pred_w - 2 * intersection_w) / (target_sq + prediction_sq - intersection) return loss
def unpack_params(self, hyper): t_ind = 0 a = hyper[0];t_ind+=1 b = hyper[1];t_ind+=1 c = hyper[2];t_ind+=1 l_f = hyper[t_ind:t_ind+self.D*self.S];t_ind+=self.D*self.S l_F = TT.reshape(l_f, (self.D, self.S)) r_f = hyper[t_ind:t_ind+self.M*self.S];t_ind+=self.M*self.S r_F = TT.reshape(r_f, (self.M, self.S)) F = l_F.dot(r_F.T) l_p = hyper[t_ind:t_ind+self.S];t_ind+=self.S l_P = TT.reshape(l_p, (1, self.S)) p = hyper[t_ind:t_ind+self.M];t_ind+=self.M P = TT.reshape(p, (1, self.M)) l_FC = l_P-TT.mean(l_F, 0)[None, :] FC = P-TT.mean(F, 0)[None, :] return a, b, c, l_F, F, l_FC, FC
def forward(self, data, stable_version=False): """input has each row as data vector; output also does so""" count = 1 for bias, weight, pre_w, post_w in zip(self.biases, self.weights, self.pre_w, self.post_w): size = pre_w[0].shape[0] zeros_pre_w = T.zeros((size + 4, size + 4)) zeros_post_w = T.zeros((size + 4, size + 4)) pre_w_padding = T.set_subtensor(zeros_pre_w[2: size + 2, 2: size + 2], pre_w[0]) post_w_padding_T = T.set_subtensor(zeros_post_w[2: size + 2, 2: size + 2], post_w[0]) pre, updt = scan(process_pre_post_w, sequences=[pre_w_padding, zeros_pre_w]) post_T, updt = scan(process_pre_post_w, sequences=[post_w_padding_T, zeros_post_w]) pre, post_T = pre[2:size + 2, :], post_T[2:size + 2, :] ori_shape = data.shape data = T.reshape(data, (ori_shape[0], pre_w[0].shape[0], pre_w[0].shape[0])) product, updt = scan(lambda x, A, B: T.dot(T.dot(A, x), B), sequences=data, non_sequences=[pre, post_T.T]) data = T.reshape(product, ori_shape) if count < self.num_layers - 1: data = T.nnet.relu(T.dot(data, weight) + bias) elif not stable_version: data = T.nnet.softmax(T.dot(data, weight) + bias) else: data = log_softmax(T.dot(data, weight) + bias) count += 1 return data
def _step_batch(self, x_t, mask, h_t_1, w, u, b): """ step function of forward in batch version :param x_t: (batch, in) :param mask: (batch, ) :param h_t_1: (batch, hidden) :param w: (hidden, in) :param u: (hidden, hidden) :param b: (hidden) :return: (batch, hidden) """ # (batch, in) (in, hidden) -> (batch, hidden) h_t_1 = T.reshape(h_t_1, (h_t_1.shape[0], 8, 8)) x_t = T.reshape(x_t, (x_t.shape[0], 8, 8)) x_t = x_t / x_t.norm(2, axis=1)[:, None, :] h_t = self.act.activate(T.dot(x_t, w.T) + T.dot(h_t_1, u.T) + b) h_t = h_t / h_t.norm(2, axis=1)[:, None, :] h_t_1 = T.reshape(h_t_1, (h_t_1.shape[0], 64)) h_t = T.reshape(h_t, (h_t.shape[0], 64)) # (batch, hidden) * (batch, None) + (batch, hidden) * (batch, None) -> (batch, hidden) return h_t * mask[:, None] + h_t_1 * (1 - mask[:, None])
def model_baseline(s_x_, s_pdpo_): '''very simple logistic regression model''' global g_mdl, g_dataset s_bsize = T.shape(s_x_)[0] idim, odim = reduce(int.__mul__, g_dataset.imsize), len(g_dataset.label_map) return T.nnet.softmax( g_mdl.op_dropout(g_mdl.lyr_linear( 'm', T.reshape(s_x_, (s_bsize,idim)), idim, odim), s_pdpo_))
def lyr_sconv_gen( name_, s_x_, idim_, odim_, **kwargs_): ''' quick & dirty implementation of fxnn convolution layer ''' global g_mdl dilation = kwargs_.get('dilation_') if dilation is None: dilation = 1 init_scale = kwargs_.get('init_scale_') bias = kwargs_.get('bias_') op_conv = partial( T.nnet.conv2d, border_mode='half', filter_dilation = (dilation, dilation)) ir = 0.5/sqrt(idim_*5+odim_) s_dims = T.shape(s_x_) s_x = T.reshape(s_x_, (s_dims[0]*idim_, 1, s_dims[2], s_dims[3])) s_x1 = T.reshape(op_conv( s_x, g_sconv_ker, filter_shape=(2, 1, 1, 3), **kwargs_), (s_dims[0]*idim_*2, 1, s_dims[2], s_dims[3])) s_x2 = T.reshape(op_conv( s_x1, g_sconv_ker.transpose(0,1,3,2), filter_shape=(2, 1, 3, 1), ), (s_dims[0], idim_*4, s_dims[2], s_dims[3])) s_y = T.join(1, s_x2, s_x_) return g_mdl.lyr_conv( name_, s_y, idim_*5, odim_, fsize_=1, init_scale_=ir, **kwargs_);
def compute_loss(output, num_samples, num_entries=6, gamma=500.0): """Compute the loss of a dataset, given the output of the DSSM. Args: output (:class:`lasagne.layers.Layer`): the output of the DSSM num_samples (int): the number of samples in the dataset num_entries (int): the number of compared papers in the DSSM structure gamma (float): the coefficient applied in the softmax of the similarities Returns: theano.tensor.TensorType: the loss of the dataset """ assert (num_entries > 2) assert (num_samples > 0) # Post-NN operations to compute the loss # First, we extract the first output of each bundle mask = np.zeros(num_entries * num_samples) mask[::num_entries] = 1 unmask = np.ones(num_entries * num_samples) - mask cited = T.extra_ops.compress(mask, output, axis=0) odocs = T.extra_ops.compress(unmask, output, axis=0) # We duplicate each row 'x' num_entries-1 times cited = T.extra_ops.repeat(cited, num_entries-1, axis=0) # Then we compute element-wise product of x with each y, for each bundle sims = T.sum(cited * odocs, axis=1) # We reshape the similarities sims = T.reshape(sims, (num_samples, num_entries-1)) sims = gamma * sims # We take the softmax of each row probs = T.nnet.softmax(sims) # We compute the loss as the sum of element on the first column loss_mask = np.zeros(num_entries-1) loss_mask[0] = 1 loss = T.extra_ops.compress(loss_mask, probs, axis=1) return -T.log(T.prod(loss))
def jacobian_h_x(self, inputs): h, act_grad = self.act_grads(inputs) jacobian = self.hidden.W * act_grad.dimshuffle(0, 'x', 1) return (h, T.reshape(jacobian, newshape=(self.nhid, self.nvis)))
def compute_jacobian_h_x(self, inputs): inputs = theano.shared(inputs.flatten()) h = self.encode(inputs) # see later # h = h.faltten() # inputs = inputs.flatten() # inputs = T.reshape(inputs, newshape=(self.nvis)) J = theano.gradient.jacobian(h, inputs) return h, J
def flatten(W): """ Get the flattened version of this weight matrix :param W: :return: W with D,O """ if W.ndim==4: W = W.reshape(W.shape[0],-1) W = W.T return W
def get_conv_xy_all(layer, deterministic=True): w_np = layer.W.get_value() w = layer.W if layer.flip_filters: w = w[:, :, ::-1, ::-1] input_layer = layer.input_layer if layer.pad == 'same': input_layer = L.PadLayer(layer.input_layer, width=np.array(w_np.shape[2:])//2, batch_ndim=2) input_shape = L.get_output_shape(input_layer) output_shape = L.get_output_shape(layer) max_x = input_shape[2] - w_np.shape[2]+1 max_y = input_shape[3] - w_np.shape[3]+1 #print("input_shape shape: ", input_shape) #print("output_shape shape: ", output_shape,np.prod(output_shape[2:])) #print("pad: \"%s\""%layer.pad) #print(" stride: " ,layer.stride) #print("max_x %d max_y %d"%(max_x,max_y)) x_orig = L.get_output(input_layer, deterministic=True) x = theano.tensor.nnet.neighbours.images2neibs(x_orig, neib_shape=layer.filter_size, neib_step=layer.stride, mode='valid') x = T.reshape(x, (x_orig.shape[0], -1, np.prod(output_shape[2:]), np.prod(w_np.shape[2:]))) x = T.transpose(x, (0, 2, 1, 3)) x = T.reshape(x, (-1, T.prod(x.shape[2:]))) w = T.flatten(w, outdim=2).T # D,O y = T.dot(x, w) # N,O if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def get_output_for(self, inputs, **kwargs): coefs = inputs[-1] output = TT.zeros_like(inputs[0]) for i, input_arr in enumerate(inputs[:-1]): output += input_arr * coefs[:, i].reshape((-1, 1)) return output
def get_output_for(self, all_obs_var, **kwargs): # n_batch = all_obs_var.shape[:-1] # out = TT.tile(self.output_var, (n_batch, 1)) # out = TT.tile(self.output_var, TT.concatenate([n_batch, [1]])) # return out ndim = all_obs_var.ndim reshaped_cnt = TT.reshape(self.output_var, (1,) * (ndim - 1) + self.output_var.get_value().shape) tile_arg = TT.concatenate([all_obs_var.shape[:-1], [1]]) tiled = TT.tile(reshaped_cnt, tile_arg, ndim=ndim) return tiled
def output(self, x, a): x = T.reshape(x, (-1, self.n_inputs, self.height, self.width)) return T.tanh(conv2d(x, self.W) + self.b.dimshuffle('x', 0, 'x', 'x'))
def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=0.0001): '''Computes mean and std for batch then apply batch_normalization on batch. ''' dev = theano.config.device use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu')) if use_cudnn: broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x') broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x') try: normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train( x, broadcast_gamma, broadcast_beta, 'spatial', epsilon) var = T.inv(stdinv ** 2) return normed, T.flatten(mean), T.flatten(var) except AttributeError: pass var = x.var(reduction_axes) mean = x.mean(reduction_axes) target_shape = [] for axis in range(ndim(x)): if axis in reduction_axes: target_shape.append(1) else: target_shape.append(x.shape[axis]) target_shape = T.stack(*target_shape) broadcast_mean = T.reshape(mean, target_shape) broadcast_var = T.reshape(var, target_shape) broadcast_beta = T.reshape(beta, target_shape) broadcast_gamma = T.reshape(gamma, target_shape) normed = batch_normalization(x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normed, mean, var
def reshape(x, shape): return T.reshape(x, shape)
def batch_flatten(x): '''Turn a n-D tensor into a 2D tensor where the first dimension is conserved. ''' x = T.reshape(x, (x.shape[0], T.prod(x.shape) // x.shape[0])) return x
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' shape = list(x.shape) shape.pop(axis) return T.reshape(x, tuple(shape))
def sparse_categorical_crossentropy(output, target, from_logits=False): target = T.cast(T.flatten(target), 'int32') target = T.extra_ops.to_one_hot(target, nb_class=output.shape[-1]) target = reshape(target, shape(output)) return categorical_crossentropy(output, target, from_logits)
def get_output_for(self, input, **kwargs): ndim = input.ndim reshaped_param = TT.reshape(self.param, (1,) * (ndim - 1) + (self.num_units,)) tile_arg = TT.concatenate([input.shape[:-1], [1]]) tiled = TT.tile(reshaped_param, tile_arg, ndim=ndim) return tiled