我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用theano.tensor.shape_padaxis()。
def get_dense_xy(layer, deterministic=True): x = L.get_output(L.FlattenLayer(layer.input_layer), deterministic=deterministic) # N, D w = layer.W # D, O y = T.dot(x, w) # (N,O) if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def get_conv_xy(layer, deterministic=True): w_np = layer.W.get_value() input_layer = layer.input_layer if layer.pad == 'same': input_layer = L.PadLayer(layer.input_layer, width=np.array(w_np.shape[2:])/2, batch_ndim=2) input_shape = L.get_output_shape(input_layer) max_x = input_shape[2] - w_np.shape[2] max_y = input_shape[3] - w_np.shape[3] srng = RandomStreams() patch_x = srng.random_integers(low=0, high=max_x) patch_y = srng.random_integers(low=0, high=max_y) #print("input_shape shape: ", input_shape) #print("pad: \"%s\""% (layer.pad,)) #print(" stride: " ,layer.stride) #print("max_x %d max_y %d"%(max_x,max_y)) x = L.get_output(input_layer, deterministic=deterministic) x = x[:, :, patch_x:patch_x + w_np.shape[2], patch_y:patch_y + w_np.shape[3]] x = T.flatten(x, 2) # N,D w = layer.W if layer.flip_filters: w = w[:, :, ::-1, ::-1] w = T.flatten(w, outdim=2).T # D,O y = T.dot(x, w) # N,O if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def get_conv_xy_all(layer, deterministic=True): w_np = layer.W.get_value() w = layer.W if layer.flip_filters: w = w[:, :, ::-1, ::-1] input_layer = layer.input_layer if layer.pad == 'same': input_layer = L.PadLayer(layer.input_layer, width=np.array(w_np.shape[2:])//2, batch_ndim=2) input_shape = L.get_output_shape(input_layer) output_shape = L.get_output_shape(layer) max_x = input_shape[2] - w_np.shape[2]+1 max_y = input_shape[3] - w_np.shape[3]+1 #print("input_shape shape: ", input_shape) #print("output_shape shape: ", output_shape,np.prod(output_shape[2:])) #print("pad: \"%s\""%layer.pad) #print(" stride: " ,layer.stride) #print("max_x %d max_y %d"%(max_x,max_y)) x_orig = L.get_output(input_layer, deterministic=True) x = theano.tensor.nnet.neighbours.images2neibs(x_orig, neib_shape=layer.filter_size, neib_step=layer.stride, mode='valid') x = T.reshape(x, (x_orig.shape[0], -1, np.prod(output_shape[2:]), np.prod(w_np.shape[2:]))) x = T.transpose(x, (0, 2, 1, 3)) x = T.reshape(x, (-1, T.prod(x.shape[2:]))) w = T.flatten(w, outdim=2).T # D,O y = T.dot(x, w) # N,O if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def _get_split(self, layer, deterministic=True, conv_all_patches=True, **kwargs): # Get the patches and the outputs without the non-linearities. if type(layer) is L.DenseLayer: x, y = putils.get_dense_xy(layer, deterministic) elif type(layer) is L.Conv2DLayer: if conv_all_patches is True: x, y = putils.get_conv_xy_all(layer, deterministic) else: x, y = putils.get_conv_xy(layer, deterministic) else: raise ValueError("Unknown layer as input") # Create an output dictionary outputs = dict() for name, fun in subtypes: outputs[name] = dict() mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX) # (N,O) y_current = y*mrk_y # This has a binary mask cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0) # (1,O) norm = T.maximum(cnt_y, 1.) # Count how many datapoints are considered outputs[name]['cnt'] = cnt_y # The mean of the current batch outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm # (1,O) mean output for batch outputs[name]['m_x'] = T.dot(x.T, mrk_y) / norm # (D,O) mean input for batch # The mean of the current batch outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm # (1,O) outputs[name]['xty'] = T.dot(x.T, y_current) / norm # D,O return dict_to_list(outputs)
def get_split(self, layer, deterministic=True, conv_all_patches=True, **kwargs): # Get the patches and the outputs without the non-linearities. if type(layer) is L.DenseLayer: x, y = get_dense_xy(layer, deterministic) elif type(layer) is L.Conv2DLayer: if conv_all_patches is True: x, y = get_conv_xy_all(layer, deterministic) else: x, y = get_conv_xy(layer, deterministic) else: raise ValueError("Unknown layer as input") # Create an output dictionary outputs = dict() for name, fun in subtypes: outputs[name] = dict() mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX) # (N,O) y_current = y*mrk_y # This has a binary mask cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0) # (1,O) norm = T.maximum(cnt_y, 1.) # Count how many datapoints are considered outputs[name]['cnt'] = cnt_y # The mean of the current batch outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm # (1,O) mean output for batch outputs[name]['m_x'] = T.dot(x.T, mrk_y) / norm # (D,O) mean input for batch # The mean of the current batch outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm # (1,O) outputs[name]['xty'] = T.dot(x.T, y_current) / norm # D,O return dict_to_list(outputs)
def get_output_for(self, inputs, deterministic=False, **kwargs): return T.shape_padaxis(inputs, axis=self.n_ax).repeat(self.n_rep, self.n_ax)
def _forward(self): if theano.config.device.startswith('gpu'): from theano.tensor.nnet.abstract_conv import bilinear_upsampling else: raise AssertionError('Bilinear interpolation requires GPU and cuDNN.') inpt = T.reshape(self.inpt, (self.inpt_depth, self.n_inpt, self.inpt_height, self.inpt_width)) pre_res = bilinear_upsampling(input=inpt, ratio=self.up_factor) shuffle_res = pre_res.dimshuffle((2, 3, 0, 1)) res = self._bilinear_upsampling_1D(inpt=shuffle_res, ratio=self.up_factor) self.output = res.dimshuffle((2, 3, 0, 1)) self.output = T.shape_padaxis(self.output, axis=0) self.output = T.unbroadcast(self.output, 0)
def decode_to_probs(self, activations, relative_position, low_bound, high_bound): assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves" squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH)) rsp = T.nnet.softmax(squashed[:,:3]) c1 = T.nnet.softmax(squashed[:,3:7]) c2 = T.nnet.softmax(squashed[:,7:10]) octave_choice = T.nnet.softmax(squashed[:,10:]) octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4)) full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves)) full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1) newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0) fixed = T.reshape(full_probs, newshape, ndim=activations.ndim) return fixed
def queue_transform(feature_strengths, feature_vects, return_strengths=False): """ Process features according to a "fragmented queue", where each timestep gets a size-1 window onto a feature queue. Effectively, feature_strengths gives how much to push onto queue feature_vects gives what to push on pop weights are tied to feature_strengths output is a size-1 peek (without popping) Parameters: - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1] - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim) Returns: - peek_vects: float32 tensor of shape (batch, timestep, feature_dim) """ n_batch, n_time, n_feature = feature_vects.shape cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1) # We will be working in (batch, timestep, push_timestep) # For each timestep, if we subtract out the sum of pushes before that timestep # and then cap to 0-1 we get the cumsums for just the features active in that # timestep timestep_adjustments = T.shape_padright(cum_sum_str - feature_strengths) push_time_cumsum = T.shape_padaxis(cum_sum_str, 1) relative_cumsum = push_time_cumsum - timestep_adjustments capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1) # Now we can recover the peek strengths by taking a diff shifted = T.concatenate([T.zeros((n_batch, n_time, 1)), capped_cumsum[:,:,:-1]],2) peek_strengths = capped_cumsum-shifted # Peek strengths is now (batch, timestep, push_timestep) result = T.batched_dot(peek_strengths, feature_vects) if return_strengths: return peek_strengths, result else: return result
def calc_binaryVal_negative_log_likelihood(data, probabilities, axis_to_sum=1): if axis_to_sum != 1: # addresses the case where we marginalize data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = probabilities.shape[1], axis=1) return - T.sum(data * T.log(probabilities) + (1 - data) * T.log(1 - probabilities), axis=axis_to_sum)
def calc_categoricalVal_negative_log_likelihood(data, probabilities, axis_to_sum=1): if axis_to_sum != 1: # addresses the case where we marginalize data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = probabilities.shape[1], axis=1) return - T.sum(data * T.log(probabilities), axis=axis_to_sum)
def calc_realVal_negative_log_likelihood(data, recon, axis_to_sum=1): if axis_to_sum != 1: # addresses the case where we marginalize data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = recon.shape[1], axis=1) return .5 * T.sum( (data - recon)**2, axis=axis_to_sum )
def calc_poissonVal_negative_log_likelihood(data, recon, axis_to_sum=1): if axis_to_sum != 1: # addresses the case where we marginalize data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = recon.shape[1], axis=1) return T.sum( T.exp(recon) - data * recon, axis=axis_to_sum )
def __init__(self, rng, input, batch_size, in_size, latent_size, W_a = None, W_b = None, epsilon = 0.01): self.srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) self.input = input # setup variational params if W_a is None: W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX) W_a = theano.shared(value=W_values, name='W_a') if W_b is None: W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX) W_b = theano.shared(value=W_values, name='W_b') self.W_a = W_a self.W_b = W_b # compute Kumaraswamy samples uniform_samples = T.cast(self.srng.uniform(size=(batch_size, latent_size-1), low=0.01, high=0.99), theano.config.floatX) self.a = Softplus(T.dot(self.input, self.W_a)) self.b = Softplus(T.dot(self.input, self.W_b)) v_samples = (1-(uniform_samples**(1/self.b)))**(1/self.a) # setup variables for recursion stick_segment = theano.shared(value=np.zeros((batch_size,), dtype=theano.config.floatX), name='stick_segment') remaining_stick = theano.shared(value=np.ones((batch_size,), dtype=theano.config.floatX), name='remaining_stick') def compute_latent_vars(i, stick_segment, remaining_stick, v_samples): # compute stick segment stick_segment = v_samples[:,i] * remaining_stick remaining_stick *= (1-v_samples[:,i]) return (stick_segment, remaining_stick) (stick_segments, remaining_sticks), updates = theano.scan(fn=compute_latent_vars, outputs_info=[stick_segment, remaining_stick],sequences=T.arange(latent_size-1), non_sequences=[v_samples], strict=True) self.avg_used_dims = T.mean(T.sum(remaining_sticks > epsilon, axis=0)) self.latent_vars = T.transpose(T.concatenate([stick_segments, T.shape_padaxis(remaining_sticks[-1, :],axis=1).T], axis=0)) self.params = [self.W_a, self.W_b]
def repeat_x_row(x, n_times): # This is black magic based on broadcasting, # that's why variable names don't make any sense. a = T.shape_padaxis padding = [1] * x.ndim b = T.alloc(numpy.float32(1), n_times, *padding) out = a * b return out
def gated_mean(x, p=0.5, axis=2): import theano.tensor as T thres = T.shape_padaxis((p * T.mean(x, axis=axis) + (1 - p) * T.max(x, axis=axis)), axis=-1) mask = T.ge(x, thres) g_values = mask*x g_means = T.sum(g_values, axis=-1) / T.sum(mask, axis=-1) return g_means
def make_reduce_f(mode, dtype, ndim): t_type = T.TensorType(dtype=dtype, broadcastable=[False] * ndim) x = t_type('accum').transfer(None) y = t_type('slice').transfer(None) if mode == "gather": z = T.concatenate([x, y]) else: T_op = getattr(T, mode) x_pad = T.shape_padaxis(x, axis=0) y_pad = T.shape_padaxis(y, axis=0) z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0) name = mode + "_" + str(dtype) return theano.function([x, y], z.transfer(None), name=name, allow_input_downcast=True)
def make_reduce_f(var, mode): dtype = var.dtype bcast = var.broadcastable t_type = T.TensorType(dtype=dtype, broadcastable=bcast) x = t_type('accum').transfer(None) y = t_type('slice').transfer(None) if mode == "gather": z = T.concatenate([x, y]) else: T_op = getattr(T, mode) x_pad = T.shape_padaxis(x, axis=0) y_pad = T.shape_padaxis(y, axis=0) z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0) name = mode + "_" + str(dtype) + broadcastable_string(bcast) return theano.function([x, y], z.transfer(None), name=name)
def _step(self,st_s,t,onoise,inoise): on_t = onoise[:,:,t] in_t = inoise[:,:,t:t+1] # get action at_s = self.predict(st_s) # obtain new steering variables A_t1 = self.aAction(st_s,at_s) # time-shift steerings 1 into the future # (A(t-15),..A(t) -> A(t-14),..,A(t+1) st_s3 = st_s[:,1:].reshape((st_s.shape[0],self.params_task['history'],4)) st1_s3 = T.set_subtensor(st_s3[:,:,:3],T.concatenate((st_s3[:,1:,:3],T.shape_padaxis(A_t1,1)),axis=1)) xt1_s = T.concatenate((st_s[:,:1],st1_s3.reshape((st_s.shape[0],st_s.shape[1]-1))),axis=1) # Obtain \delta R(t+1) by BNN xt1_s = xt1_s.reshape((self.params['samples'],xt1_s.shape[0]/self.params['samples'],xt1_s.shape[1])) drt1_s,vdrt1_s = self.model.predict(xt1_s,mode='symbolic',provide_noise=True,noise=in_t) drt1_s = drt1_s.reshape((drt1_s.shape[0]*drt1_s.shape[1],drt1_s.shape[2])) vdrt1_s = vdrt1_s.reshape((vdrt1_s.shape[0]*vdrt1_s.shape[1],vdrt1_s.shape[2])) # sample from output noise drt1_s = on_t * T.sqrt(vdrt1_s) + drt1_s #obtain R(t+1) by adding \delta R(t+1) rt1_s = st_s[:,-1:] + drt1_s[:,0:1] # undo log-logit transformation to obtain unnormalized reward rew1 = 1. / (1. + T.exp(-rt1_s)) # undo logit rew1 = rew1 * (self.model.params['bounds'][3] - self.model.params['bounds'][1]) + self.model.params['bounds'][1] rew1 = T.exp(rew1) - 1 # update time-embedding: R(t-15)..R(t) -> R(t-14) .. R(t+1) st1_s3 = T.set_subtensor(st1_s3[:,:,3:],T.concatenate((st1_s3[:,1:,3:],T.shape_padaxis(rt1_s,1)),axis=1)) st1_s = T.concatenate((st_s[:,:1],st1_s3.reshape((st_s.shape[0],st_s.shape[1]-1))),axis=1) return [st1_s,t+1,rew1[:,0]]