Python theano.tensor 模块,shape_padaxis() 实例源码


def get_dense_xy(layer, deterministic=True):
    x = L.get_output(L.FlattenLayer(layer.input_layer),
                     deterministic=deterministic)  # N, D
    w = layer.W # D, O
    y =, w)  # (N,O)
    if layer.b is not None:
        y += T.shape_padaxis(layer.b, axis=0)
    return x, y
def get_conv_xy(layer, deterministic=True):
    w_np = layer.W.get_value()
    input_layer = layer.input_layer
    if layer.pad == 'same':
        input_layer = L.PadLayer(layer.input_layer,
    input_shape = L.get_output_shape(input_layer)
    max_x = input_shape[2] - w_np.shape[2]
    max_y = input_shape[3] - w_np.shape[3]
    srng = RandomStreams()
    patch_x = srng.random_integers(low=0, high=max_x)
    patch_y = srng.random_integers(low=0, high=max_y)

    #print("input_shape shape: ", input_shape)
    #print("pad: \"%s\""% (layer.pad,))
    #print(" stride: " ,layer.stride)
    #print("max_x %d max_y %d"%(max_x,max_y))

    x = L.get_output(input_layer, deterministic=deterministic)
    x = x[:, :,
          patch_x:patch_x + w_np.shape[2], patch_y:patch_y + w_np.shape[3]]
    x = T.flatten(x, 2)  # N,D

    w = layer.W
    if layer.flip_filters:
        w = w[:, :, ::-1, ::-1]
    w = T.flatten(w, outdim=2).T  # D,O
    y =, w) # N,O
    if layer.b is not None:
        y += T.shape_padaxis(layer.b, axis=0)
    return x, y
def get_conv_xy_all(layer, deterministic=True):
    w_np = layer.W.get_value()
    w = layer.W
    if layer.flip_filters:
        w = w[:, :, ::-1, ::-1]

    input_layer = layer.input_layer
    if layer.pad == 'same':
        input_layer = L.PadLayer(layer.input_layer,
    input_shape = L.get_output_shape(input_layer)
    output_shape = L.get_output_shape(layer)
    max_x = input_shape[2] - w_np.shape[2]+1
    max_y = input_shape[3] - w_np.shape[3]+1
    #print("input_shape shape: ", input_shape)
    #print("output_shape shape: ", output_shape,[2:]))
    #print("pad: \"%s\""%layer.pad)
    #print(" stride: " ,layer.stride)
    #print("max_x %d max_y %d"%(max_x,max_y))
    x_orig = L.get_output(input_layer, deterministic=True)

    x = theano.tensor.nnet.neighbours.images2neibs(x_orig,
    x = T.reshape(x, (x_orig.shape[0], -1,
    x = T.transpose(x, (0, 2, 1, 3))
    x = T.reshape(x, (-1,[2:])))

    w = T.flatten(w, outdim=2).T  # D,O
    y =, w) # N,O
    if layer.b is not None:
        y += T.shape_padaxis(layer.b, axis=0)
    return x, y
def _get_split(self, layer,
                   deterministic=True, conv_all_patches=True, **kwargs):

        # Get the patches and the outputs without the non-linearities.
        if type(layer) is L.DenseLayer:
            x, y = putils.get_dense_xy(layer, deterministic)
        elif type(layer) is L.Conv2DLayer:
            if conv_all_patches is True:
                x, y = putils.get_conv_xy_all(layer, deterministic)
                x, y = putils.get_conv_xy(layer, deterministic)
            raise ValueError("Unknown layer as input")

        # Create an output dictionary
        outputs = dict()

        for name, fun in subtypes:
            outputs[name] = dict()
            mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX)  # (N,O)
            y_current = y*mrk_y # This has a binary mask
            cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0)  # (1,O)
            norm = T.maximum(cnt_y, 1.)

            # Count how many datapoints are considered
            outputs[name]['cnt'] = cnt_y

            # The mean of the current batch
            outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm  # (1,O) mean output for batch
            outputs[name]['m_x'] =, mrk_y) / norm  # (D,O) mean input for batch

            # The mean of the current batch
            outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm  # (1,O)
            outputs[name]['xty'] =, y_current) / norm  # D,O

        return dict_to_list(outputs)
def get_output_for(self, inputs, deterministic=False, **kwargs):
        return T.shape_padaxis(inputs, axis=self.n_ax).repeat(self.n_rep, self.n_ax)
def _forward(self):
        if theano.config.device.startswith('gpu'):
            from theano.tensor.nnet.abstract_conv import bilinear_upsampling
            raise AssertionError('Bilinear interpolation requires GPU and cuDNN.')

        inpt = T.reshape(self.inpt, (self.inpt_depth, self.n_inpt, self.inpt_height, self.inpt_width))
        pre_res = bilinear_upsampling(input=inpt, ratio=self.up_factor)
        shuffle_res = pre_res.dimshuffle((2, 3, 0, 1))
        res = self._bilinear_upsampling_1D(inpt=shuffle_res, ratio=self.up_factor)
        self.output = res.dimshuffle((2, 3, 0, 1))
        self.output = T.shape_padaxis(self.output, axis=0)
        self.output = T.unbroadcast(self.output, 0)
def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed
def queue_transform(feature_strengths, feature_vects, return_strengths=False):
        Process features according to a "fragmented queue", where each timestep
        gets a size-1 window onto a feature queue. Effectively,
            feature_strengths gives how much to push onto queue
            feature_vects gives what to push on
            pop weights are tied to feature_strengths
            output is a size-1 peek (without popping)

            - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1]
            - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim)

            - peek_vects: float32 tensor of shape (batch, timestep, feature_dim)
        n_batch, n_time, n_feature = feature_vects.shape

        cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1)

        # We will be working in (batch, timestep, push_timestep)
        # For each timestep, if we subtract out the sum of pushes before that timestep
        # and then cap to 0-1 we get the cumsums for just the features active in that
        # timestep
        timestep_adjustments = T.shape_padright(cum_sum_str - feature_strengths)
        push_time_cumsum = T.shape_padaxis(cum_sum_str, 1)
        relative_cumsum = push_time_cumsum - timestep_adjustments
        capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1)

        # Now we can recover the peek strengths by taking a diff
        shifted = T.concatenate([T.zeros((n_batch, n_time, 1)), capped_cumsum[:,:,:-1]],2)
        peek_strengths = capped_cumsum-shifted
        # Peek strengths is now (batch, timestep, push_timestep)

        result = T.batched_dot(peek_strengths, feature_vects)

        if return_strengths:
            return peek_strengths, result
            return result
def calc_binaryVal_negative_log_likelihood(data, probabilities, axis_to_sum=1):
    if axis_to_sum != 1:
            # addresses the case where we marginalize                                                                                                           
            data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = probabilities.shape[1], axis=1)
        return - T.sum(data * T.log(probabilities) + (1 - data) * T.log(1 - probabilities), axis=axis_to_sum)
def calc_categoricalVal_negative_log_likelihood(data, probabilities, axis_to_sum=1):
    if axis_to_sum != 1:
            # addresses the case where we marginalize                                                                                                                                                                    
            data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = probabilities.shape[1], axis=1)
        return - T.sum(data * T.log(probabilities), axis=axis_to_sum)
def calc_realVal_negative_log_likelihood(data, recon, axis_to_sum=1):
    if axis_to_sum != 1:
        # addresses the case where we marginalize                 
        data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = recon.shape[1], axis=1)
    return .5 * T.sum( (data - recon)**2, axis=axis_to_sum )
def calc_poissonVal_negative_log_likelihood(data, recon, axis_to_sum=1):
    if axis_to_sum != 1:
        # addresses the case where we marginalize                                              
        data = T.extra_ops.repeat(T.shape_padaxis(data, axis=1), repeats = recon.shape[1], axis=1)
    return T.sum( T.exp(recon) - data * recon, axis=axis_to_sum )
def __init__(self, rng, input, batch_size, in_size, latent_size, W_a = None, W_b = None, epsilon = 0.01):
        self.srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999))
        self.input = input

        # setup variational params
        if W_a is None:
            W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX)
            W_a = theano.shared(value=W_values, name='W_a')
        if W_b is None:
            W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX)
            W_b = theano.shared(value=W_values, name='W_b')
        self.W_a = W_a
        self.W_b = W_b

        # compute Kumaraswamy samples                                                                                                                                                      
        uniform_samples = T.cast(self.srng.uniform(size=(batch_size, latent_size-1), low=0.01, high=0.99), theano.config.floatX)
        self.a = Softplus(, self.W_a))
        self.b = Softplus(, self.W_b))
        v_samples = (1-(uniform_samples**(1/self.b)))**(1/self.a)

        # setup variables for recursion                                                                                                                                   
        stick_segment = theano.shared(value=np.zeros((batch_size,), dtype=theano.config.floatX), name='stick_segment')
        remaining_stick = theano.shared(value=np.ones((batch_size,), dtype=theano.config.floatX), name='remaining_stick')

        def compute_latent_vars(i, stick_segment, remaining_stick, v_samples):
            # compute stick segment                                                                                                     
            stick_segment = v_samples[:,i] * remaining_stick
            remaining_stick *= (1-v_samples[:,i])
            return (stick_segment, remaining_stick)

        (stick_segments, remaining_sticks), updates = theano.scan(fn=compute_latent_vars,
                                                                  outputs_info=[stick_segment, remaining_stick],sequences=T.arange(latent_size-1),
                                                                  non_sequences=[v_samples], strict=True)

        self.avg_used_dims = T.mean(T.sum(remaining_sticks > epsilon, axis=0))
        self.latent_vars = T.transpose(T.concatenate([stick_segments, T.shape_padaxis(remaining_sticks[-1, :],axis=1).T], axis=0))

        self.params = [self.W_a, self.W_b]
def  repeat_x_row(x, n_times):
    # This is black magic based on broadcasting,
    # that's why variable names don't make any sense.
    a = T.shape_padaxis
    padding = [1] * x.ndim
    b = T.alloc(numpy.float32(1), n_times, *padding)
    out = a * b
    return out
def gated_mean(x, p=0.5, axis=2):
    import theano.tensor as T
    thres = T.shape_padaxis((p * T.mean(x, axis=axis) + 
                            (1 - p) * T.max(x, axis=axis)), 
    mask =, thres)
    g_values = mask*x
    g_means = T.sum(g_values, axis=-1) / T.sum(mask, axis=-1)
    return g_means
def make_reduce_f(mode, dtype, ndim):
    t_type = T.TensorType(dtype=dtype, broadcastable=[False] * ndim)
    x = t_type('accum').transfer(None)
    y = t_type('slice').transfer(None)
    if mode == "gather":
        z = T.concatenate([x, y])
        T_op = getattr(T, mode)
        x_pad = T.shape_padaxis(x, axis=0)
        y_pad = T.shape_padaxis(y, axis=0)
        z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0)
    name = mode + "_" + str(dtype)
    return theano.function([x, y], z.transfer(None), name=name, allow_input_downcast=True)
def make_reduce_f(var, mode):
    dtype = var.dtype
    bcast = var.broadcastable
    t_type = T.TensorType(dtype=dtype, broadcastable=bcast)
    x = t_type('accum').transfer(None)
    y = t_type('slice').transfer(None)
    if mode == "gather":
        z = T.concatenate([x, y])
        T_op = getattr(T, mode)
        x_pad = T.shape_padaxis(x, axis=0)
        y_pad = T.shape_padaxis(y, axis=0)
        z = T_op(T.concatenate([x_pad, y_pad], axis=0), axis=0)
    name = mode + "_" + str(dtype) + broadcastable_string(bcast)
    return theano.function([x, y], z.transfer(None), name=name)
def _step(self,st_s,t,onoise,inoise):

        on_t =  onoise[:,:,t]
        in_t = inoise[:,:,t:t+1]

        # get action
        at_s = self.predict(st_s)

        # obtain new steering variables
        A_t1 = self.aAction(st_s,at_s)

        # time-shift steerings 1 into the future
        # (A(t-15),..A(t)  ->  A(t-14),..,A(t+1)
        st_s3 = st_s[:,1:].reshape((st_s.shape[0],self.params_task['history'],4))
        st1_s3 = T.set_subtensor(st_s3[:,:,:3],T.concatenate((st_s3[:,1:,:3],T.shape_padaxis(A_t1,1)),axis=1))
        xt1_s = T.concatenate((st_s[:,:1],st1_s3.reshape((st_s.shape[0],st_s.shape[1]-1))),axis=1)

        # Obtain \delta R(t+1) by BNN
        xt1_s  = xt1_s.reshape((self.params['samples'],xt1_s.shape[0]/self.params['samples'],xt1_s.shape[1]))
        drt1_s,vdrt1_s = self.model.predict(xt1_s,mode='symbolic',provide_noise=True,noise=in_t)
        drt1_s = drt1_s.reshape((drt1_s.shape[0]*drt1_s.shape[1],drt1_s.shape[2]))
        vdrt1_s = vdrt1_s.reshape((vdrt1_s.shape[0]*vdrt1_s.shape[1],vdrt1_s.shape[2]))

        # sample from output noise
        drt1_s = on_t * T.sqrt(vdrt1_s) + drt1_s 

        #obtain R(t+1) by adding \delta R(t+1)
        rt1_s = st_s[:,-1:] + drt1_s[:,0:1] 

        # undo log-logit transformation to obtain unnormalized reward
        rew1 = 1. / (1. +  T.exp(-rt1_s)) # undo logit
        rew1 = rew1  * (self.model.params['bounds'][3] - self.model.params['bounds'][1])  + self.model.params['bounds'][1]
        rew1 = T.exp(rew1) - 1

        # update time-embedding: R(t-15)..R(t) -> R(t-14) .. R(t+1)
        st1_s3 = T.set_subtensor(st1_s3[:,:,3:],T.concatenate((st1_s3[:,1:,3:],T.shape_padaxis(rt1_s,1)),axis=1))
        st1_s = T.concatenate((st_s[:,:1],st1_s3.reshape((st_s.shape[0],st_s.shape[1]-1))),axis=1)

        return [st1_s,t+1,rew1[:,0]]