Python theano.tensor 模块,maximum() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.maximum()

项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            updates.append((v,v_t))
        else:
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates
项目:nn-patterns    作者:pikinder    | 项目源码 | 文件源码
def _update_statistics(self, new_stats, stats):
        new_stats = create_dict(new_stats)
        if stats is None:
            stats = new_stats
            return stats

        # update the stats layerwise
        for l_i in range(len(stats)):

            for subtype,_ in subtypes:
                # TODO: Have to check the type to see if this is needed
                cnt_old = 1.0 * stats[l_i][subtype]['cnt']
                stats[l_i][subtype]['cnt'] = (stats[l_i][subtype]['cnt']
                                              + new_stats[l_i][subtype]['cnt'])
                norm = np.maximum(stats[l_i][subtype]['cnt'], 1.0)

                for key in subtype_keys:
                    if key not in subtype_keys_no_aggregation:
                        tmp_old = cnt_old / norm * stats[l_i][subtype][key]
                        tmp_new = (new_stats[l_i][subtype]['cnt']
                                   / norm * new_stats[l_i][subtype][key])
                        stats[l_i][subtype][key] = tmp_old + tmp_new
        return stats
项目:nn-patterns    作者:pikinder    | 项目源码 | 文件源码
def _update_statistics(self, new_stats, stats):
        new_stats = create_dict(new_stats)
        if stats is None:
            stats = new_stats
            return stats

        # update the stats layerwise
        for l_i in range(len(stats)):

            for subtype,_ in subtypes:
                # TODO: Have to check the type to see if this is needed
                cnt_old = 1.0 * stats[l_i][subtype]['cnt']
                stats[l_i][subtype]['cnt'] = (stats[l_i][subtype]['cnt']
                                              + new_stats[l_i][subtype]['cnt'])
                norm = np.maximum(stats[l_i][subtype]['cnt'], 1.0)

                for key in subtype_keys:
                    if key not in subtype_keys_no_aggregation:
                        tmp_old = cnt_old / norm * stats[l_i][subtype][key]
                        tmp_new = (new_stats[l_i][subtype]['cnt']
                                   / norm * new_stats[l_i][subtype][key])
                        stats[l_i][subtype][key] = tmp_old + tmp_new
        return stats
项目:face_detection    作者:chintak    | 项目源码 | 文件源码
def iou_loss(p, t):
    # print "pass"
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps_t0 = T.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps_t1 = T.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps_t1 - overlaps_t0
    bool_overlap = T.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = T.maximum(intersection, np.float32(0.))
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    loss = 1. - T.minimum(
        T.exp(T.log(T.abs_(intersection)) -
              T.log(T.abs_(union) + np.float32(1e-5))),
        np.float32(1.)
    )
    # return loss
    return T.mean(loss)
项目:face_detection    作者:chintak    | 项目源码 | 文件源码
def iou_loss_val(p, t):
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps = np.zeros_like(tp, dtype=np.float32)
    overlaps[:, 0, :] = np.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps[:, 1, :] = np.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps[:, 1, :] - overlaps[:, 0, :]
    bool_overlap = np.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = np.maximum(intersection, 0.)
    # print "bool", bool_overlap
    # print "Int", intersection
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    # print "un", union
    loss = 1. - np.minimum(
        np.exp(np.log(np.abs(intersection)) - np.log(np.abs(union) + 1e-5)),
        1.
    )
    # print loss
    return np.mean(loss)
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def dist_info_sym(self, obs_var, latent_var=None):  # this is ment to be for one path!
        # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
        if latent_var is None:
            latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0))  # new fix to avoid putting the latent as an input: just take the one fixed!
            latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])

        # generate the generalized input (append latents to obs.)
        if self.bilinear_integration:
            extended_obs_var = TT.concatenate([obs_var, latent_var,
                                               TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
                                                          outdim=2)]
                                              , axis=1)
        else:
            extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
项目:LiviaNET    作者:josedolz    | 项目源码 | 文件源码
def applyActivationFunction_PReLU( inputData, PreluActivations ) :
    """Parametric Rectified Linear Unit.
    It follows:
    `f(x) = alpha * x for x < 0`,
    `f(x) = x for x >= 0`,
    where `alpha` is a learned array with the same shape as x.

    - The input is a tensor of shape (batchSize, FeatMaps, xDim, yDim, zDim) """
    preluActivationsAsRow = PreluActivations.dimshuffle('x', 0, 'x', 'x', 'x')

    pos = T.maximum(0, inputData)
    neg = preluActivationsAsRow * (inputData - abs(inputData)) * 0.5
    output = pos + neg

    return (output)

# --- version 2 ---
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def __call__(self, input_):
        m = input_.mean()
        v = input_.std()

        new_m = T.switch(T.eq(self.m, 0.),
                         m,
                         (np.float32(1.) - self.rate) * self.m + self.rate * m)
        new_var = T.switch(T.eq(self.var, 0.),
                           v,
                           (np.float32(1.) - self.rate) * self.var + self.rate * v)

        updates = [(self.m, new_m), (self.var, new_var)]

        input_centered = (
            (input_ - new_m) / T.maximum(1., T.sqrt(new_var)))

        input_ = T.zeros_like(input_) + input_

        outs = OrderedDict(
            x=input_,
            x_centered=input_centered,
            m=new_m,
            var=new_var
        )
        return outs, updates
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:keras-contrib    作者:farizrahman4u    | 项目源码 | 文件源码
def clip(x, min_value, max_value):
    """Element-wise value clipping.

    If min_value > max_value, clipping range is [min_value,min_value].

    # Arguments
        x: Tensor or variable.
        min_value: Tensor, float, int, or None.
            If min_value is None, defaults to -infinity.
        max_value: Tensor, float, int, or None.
            If max_value is None, defaults to infinity.

    # Returns
        A tensor.
    """
    if max_value is None:
        max_value = np.inf
    if min_value is None:
        min_value = -np.inf
    max_value = T.maximum(min_value, max_value)
    return T.clip(x, min_value, max_value)
项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def apply_activation(self, lin_output, activation):
        if activation == 'SIGMOID':
            final_output = T.nnet.sigmoid(lin_output)

        elif activation == 'TANH':
            final_output = T.tanh(lin_output)

        elif activation == 'LINEAR':
            final_output = lin_output

        elif activation == 'ReLU':  ## rectifier linear unit
            final_output = T.maximum(0.0, lin_output)

        elif activation == 'ReSU':  ## rectifier smooth unit
            final_output = numpy.log(1.0 + numpy.exp(lin_output))

        else:
            self.logger.critical('the input activation function: %s is not supported right now. Please modify layers.py to support' % (activation))
            raise

        return final_output
项目:yadll    作者:pchavanne    | 项目源码 | 文件源码
def adamax(cost, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-6, **kwargs):
    """Adam Gradient Descent
    Scale learning rates by adaptive moment estimation

    References
    ----------
    .. [1] https://arxiv.org/pdf/1412.6980v8.pdf
    """
    gparams = T.grad(cost, params)
    updates = OrderedDict()
    t = shared_variable(to_float_X(0.))
    t_t = 1. + t
    l_r_t = learning_rate / (1. - beta1 ** t_t)
    for param, gparam in zip(params, gparams):
        m = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        u = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        m_t = beta1 * m + (1. - beta1) * gparam
        u_t = T.maximum(beta2 * u, abs(gparam))
        updates[m] = m_t
        updates[u] = u_t
        updates[param] = param - l_r_t * m_t / (u_t + epsilon)
    updates[t] = t_t
    return updates
项目:cbof    作者:passalis    | 项目源码 | 文件源码
def symbolic_distance_matrix(A, B):
    """
    Defines the symbolic matrix that contains the distances between the vectors of A and B
    :param A:
    :param B:
    :return:
    """
    aa = T.sum(A * A, axis=1)
    bb = T.sum(B * B, axis=1)
    AB = T.dot(A, T.transpose(B))

    AA = T.transpose(T.tile(aa, (bb.shape[0], 1)))
    BB = T.tile(bb, (aa.shape[0], 1))

    D = AA + BB - 2 * AB
    D = T.maximum(D, 0)
    D = T.sqrt(D)
    return D
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def __init__(self, input, n_out, W, b,
                 printinfo=True, input_shape=None):

        self.get_input_shape(input,input_shape)
        n_in = self.input_shape[-1]

        if W and b:
            self.W = W
            self.b = b
        else:
            self.W = Normal((n_in, n_out),std=0.005)
            self.b = Constant((n_out,), val=0.1)

        lin_output = T.dot(self.input, self.W.val) + self.b.val
        #ReLU
        self.output = T.maximum(lin_output, 0)
        self.params = [self.W.val, self.b.val]
        self.weight_type = ['W', 'b']
        self.output_shape = self.get_output_shape(self.input_shape)

        self.name = 'FC\t'
        if printinfo: self.print_shape()
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:weightnorm    作者:openai    | 项目源码 | 文件源码
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            updates.append((v,v_t))
        else:
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:theanomodels    作者:clinicalml    | 项目源码 | 文件源码
def _applyNL(self,lin_out):
        if self.params['nonlinearity']=='relu':
            if 'leaky_params' in self.params:
                return T.nnet.relu(lin_out, alpha = self.params['leaky_params'])
            else:
                return T.nnet.relu(lin_out)
        elif self.params['nonlinearity']=='softplus':
            return T.nnet.softplus(lin_out)
        elif self.params['nonlinearity']=='elu':
            return T.switch(lin_out > 0, lin_out, T.exp(lin_out) - 1)
        elif self.params['nonlinearity']=='maxout':
            maxout_out = None
            for i in xrange(self.params['maxout_stride']):
                tmp = lin_out[:,i::self.params['maxout_stride']]
                if maxout_out is None:
                    maxout_out = tmp
                else:
                    maxout_out = T.maximum(maxout_out, tmp)
            return maxout_out
        else:
            return T.tanh(lin_out)
项目:deep-prior-pp    作者:moberweger    | 项目源码 | 文件源码
def RMSProp(self, learning_rate=0.01, decay=0.9, epsilon=1.0 / 100.):
        """
        RMSProp of Tieleman et al.
        :param learning_rate: learning rate
        :param decay: decay rate of gradient history
        :param epsilon: gradient clip
        :return: update
        """

        for param_i, grad_i in zip(self.params, self.grads):
            # Accumulate gradient
            msg = theano.shared(numpy.zeros(param_i.get_value().shape, dtype=theano.config.floatX))
            self.shared.append(msg)
            new_mean_squared_grad = (decay * msg + (1 - decay) * T.sqr(grad_i))

            # Compute update
            rms_grad_t = T.sqrt(new_mean_squared_grad)
            rms_grad_t = T.maximum(rms_grad_t, epsilon)
            delta_x_t = -learning_rate * grad_i / rms_grad_t

            # Apply update
            self.updates.append((param_i, param_i + delta_x_t))
            self.updates.append((msg, new_mean_squared_grad))

        return self.updates
项目:tree_rnn    作者:ofirnachum    | 项目源码 | 文件源码
def gradient_descent(self, loss):
        """Momentum GD with gradient clipping."""
        grad = T.grad(loss, self.params)
        self.momentum_velocity_ = [0.] * len(grad)
        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
        updates = OrderedDict()
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
        scaling_den = T.maximum(5.0, grad_norm)
        for n, (param, grad) in enumerate(zip(self.params, grad)):
            grad = T.switch(not_finite, 0.1 * param,
                            grad * (5.0 / scaling_den))
            velocity = self.momentum_velocity_[n]
            update_step = self.momentum * velocity - self.learning_rate * grad
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step
        return updates
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def dynamic_k_max_pooling(input, sent_sizes, k_max_factor, k_max_final):
  """
    k_max_factor -- multiplied by sentence_sizes gives the value of kmax for each sentence
  """
  # Unroll input into (batch_size x nchannels x nwords) x ndim
  nbatches, nchannels, nwords, ndim = input.shape[0], input.shape[1], input.shape[2], input.shape[3]
  x = input.dimshuffle(0,1,3,2)

  sent_sizes = T.cast(T.ceil(sent_sizes * k_max_factor), dtype='int32')
  sent_sizes = T.maximum(sent_sizes, k_max_final)
  # sent_sizes_matrix = T.repeat(sent_sizes, nwords, axis=1)
  sent_sizes_matrix = T.repeat(sent_sizes.dimshuffle(0, 'x'), nwords, axis=1)

  idx = T.arange(nwords).dimshuffle('x', 0)
  idx_matrix = T.repeat(idx, nbatches, axis=0)

  sent_sizes_mask = T.lt(idx_matrix, sent_sizes_matrix)[:,::-1]

  neighborsArgSorted = T.argsort(x, axis=3)
  neighborsArgSorted_masked = ((neighborsArgSorted + 1) * sent_sizes_mask.dimshuffle(0,'x','x',1)) - 1
  neighborsArgSorted_masked_sorted = neighborsArgSorted_masked.sort(axis=3)

  nwords_max = T.cast(T.ceil(nwords * k_max_factor), 'int32')
  # print nwords_max.eval()
  neighborsArgSorted_masked_sorted_clipped = neighborsArgSorted_masked_sorted[:,:,:,-nwords_max:]

  ax0 = T.repeat(T.arange(nbatches), nchannels*ndim*nwords_max)
  ax1 = T.repeat(T.arange(nchannels), ndim * nwords_max).dimshuffle('x', 0)
  ax1 = T.repeat(ax1, nbatches, axis=0).flatten()
  ax2 = T.repeat(T.arange(ndim), nwords_max, axis=0).dimshuffle('x', 'x', 0)
  ax2 = T.repeat(ax2, nchannels, axis=1)
  ax2 = T.repeat(ax2, nbatches, axis=0).flatten()
  ax3 = neighborsArgSorted_masked_sorted_clipped.flatten()

  pooled_out = x[ax0, ax1, ax2, ax3]
  pooled_out = pooled_out.reshape((nbatches, nchannels, ndim, nwords_max)).dimshuffle(0,1,3,2)

  return pooled_out
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def dynamic_k_max_pooling(input, sent_sizes, k_max_factor, k_max_final):
  """
    k_max_factor -- multiplied by sentence_sizes gives the value of kmax for each sentence
  """
  # Unroll input into (batch_size x nchannels x nwords) x ndim
  nbatches, nchannels, nwords, ndim = input.shape[0], input.shape[1], input.shape[2], input.shape[3]
  x = input.dimshuffle(0,1,3,2)

  sent_sizes = T.cast(T.ceil(sent_sizes * k_max_factor), dtype='int32')
  sent_sizes = T.maximum(sent_sizes, k_max_final)
  # sent_sizes_matrix = T.repeat(sent_sizes, nwords, axis=1)
  sent_sizes_matrix = T.repeat(sent_sizes.dimshuffle(0, 'x'), nwords, axis=1)

  idx = T.arange(nwords).dimshuffle('x', 0)
  idx_matrix = T.repeat(idx, nbatches, axis=0)

  sent_sizes_mask = T.lt(idx_matrix, sent_sizes_matrix)[:,::-1]

  neighborsArgSorted = T.argsort(x, axis=3)
  neighborsArgSorted_masked = ((neighborsArgSorted + 1) * sent_sizes_mask.dimshuffle(0,'x','x',1)) - 1
  neighborsArgSorted_masked_sorted = neighborsArgSorted_masked.sort(axis=3)

  nwords_max = T.cast(T.ceil(nwords * k_max_factor), 'int32')
  # print nwords_max.eval()
  neighborsArgSorted_masked_sorted_clipped = neighborsArgSorted_masked_sorted[:,:,:,-nwords_max:]

  ax0 = T.repeat(T.arange(nbatches), nchannels*ndim*nwords_max)
  ax1 = T.repeat(T.arange(nchannels), ndim * nwords_max).dimshuffle('x', 0)
  ax1 = T.repeat(ax1, nbatches, axis=0).flatten()
  ax2 = T.repeat(T.arange(ndim), nwords_max, axis=0).dimshuffle('x', 'x', 0)
  ax2 = T.repeat(ax2, nchannels, axis=1)
  ax2 = T.repeat(ax2, nbatches, axis=0).flatten()
  ax3 = neighborsArgSorted_masked_sorted_clipped.flatten()

  pooled_out = x[ax0, ax1, ax2, ax3]
  pooled_out = pooled_out.reshape((nbatches, nchannels, ndim, nwords_max)).dimshuffle(0,1,3,2)

  return pooled_out
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def relu(x):
    return T.maximum(x, 0)
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def lrelu(x, a=0.1):
    return T.maximum(x, a*x)
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def relu(x):
    return tensor.maximum(0.0, x)
项目:pdnn    作者:petered    | 项目源码 | 文件源码
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(rx*re, t_start=t_last, t_end=0)  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (xr[:, :, None]*er[:, None, :]*spikes)*sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr*rx + xs/(kp_x+kd_x))
    add_update(er, er*re + es/(kp_e+kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last-1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last-1))

    return dw_es.sum(axis=0)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def relu(x):
    return T.maximum(x, 0)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def lrelu(x, a=0.2):
    return T.maximum(x, a*x)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def relu(x):
    return T.maximum(x, 0)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def lrelu(x, a=0.2):
    return T.maximum(x, a*x)
项目:deep-prior    作者:moberweger    | 项目源码 | 文件源码
def RMSProp(self, learning_rate=0.01, decay=0.9, epsilon=1.0 / 100.):
        """
        RMSProp of Tieleman et al.
        :param learning_rate: learning rate
        :param decay: decay rate of gradient history
        :param epsilon: gradient clip
        :return: update
        """

        updates = []

        for param_i, grad_i in zip(self.params, self.grads):
            # Accumulate gradient
            msg = theano.shared(numpy.zeros(param_i.get_value().shape, dtype=theano.config.floatX))
            new_mean_squared_grad = (decay * msg + (1 - decay) * T.sqr(grad_i))

            # Compute update
            rms_grad_t = T.sqrt(new_mean_squared_grad)
            rms_grad_t = T.maximum(rms_grad_t, epsilon)
            delta_x_t = -learning_rate * grad_i / rms_grad_t

            # Apply update
            updates.append((param_i, param_i + delta_x_t))
            updates.append((msg, new_mean_squared_grad))

        return updates
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def squared_hinge(y_true, y_pred):
    return T.sqr(T.maximum(1. - y_true * y_pred, 0.)).mean(axis=-1)
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def hinge(y_true, y_pred):
    return T.maximum(1. - y_true * y_pred, 0.).mean(axis=-1)
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def get_updates(self, learning_rate, params, grads, lr_scalers):
        """Compute the parameters' updates.

        """
        if self._first_time:
            self.mean_square_grads = [
                sharedX_mtx(
                    param.get_value() * 0.,
                    name='mean_square_grad_'+param.name,
                    borrow=True) for param in params]
            self._first_time = False
        updates = []
        for (param, grad, mean_square_grad, lr_sc) in zip(
                params, grads, self.mean_square_grads, lr_scalers):
            new_mean_square_grad = (
                self.decay * mean_square_grad + (1-self.decay) * T.sqr(grad))
            # the update
            rms_grad_t = T.sqrt(new_mean_square_grad)
            rms_grad_t = T.maximum(rms_grad_t, self.epsilon)
            lr_scaled = learning_rate * lr_sc
            delta_x_t = - lr_scaled * grad / rms_grad_t

            new_param = param + delta_x_t
            # updates
            if self.max_colm_norm and param.name in ["W", "w"]:
                new_param_final = norm_constraint(tensor_var=new_param,
                                                  max_norm=self.max_norm)
            else:
                new_param_final = new_param
            updates.append((param, new_param_final))
            updates.append((mean_square_grad, new_mean_square_grad))

        return updates
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def get_updates(self, learning_rate, params, grads, lr_scalers):
        """Compute the parameters' updates.

        """
        t_prev = theano.shared(floatX(0.))
        updates = OrderedDict()

        # Using theano constant to prevent upcasting of float32
        one = T.constant(1)

        t = t_prev + 1
        a_t = learning_rate/(one-self.beta1**t)

        for param, g_t in zip(params, grads):
            value = param.get_value(borrow=True)
            m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)
            u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)

            m_t = self.beta1*m_prev + (one-self.beta1)*g_t
            u_t = T.maximum(self.beta2*u_prev, abs(g_t))
            step = a_t*m_t/(u_t + self.epsilon)

            updates[m_prev] = m_t
            updates[u_prev] = u_t
            new_param = param - step
            if self.max_colm_norm and param.name in ["W", "w"]:
                new_param_final = norm_constraint(tensor_var=new_param,
                                                  max_norm=self.max_norm)
            else:
                new_param_final = new_param
            updates[param] = new_param_final

        updates[t_prev] = t

        return updates
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def nonlinearity_fn(self, d_in=None, recons=False):
        if self.nonlinearity == NonLinearity.SIGMOID:
            return T.nnet.sigmoid(d_in)
        elif self.nonlinearity == NonLinearity.RELU and not recons:
            return T.maximum(d_in, 0)
        elif self.nonlinearity == NonLinearity.RELU and recons:
            return T.nnet.softplus(d_in)
        elif self.nonlinearity == NonLinearity.TANH:
            return T.tanh(d_in)
项目:nn-patterns    作者:pikinder    | 项目源码 | 文件源码
def _get_split(self, layer,
                   deterministic=True, conv_all_patches=True, **kwargs):

        # Get the patches and the outputs without the non-linearities.
        if type(layer) is L.DenseLayer:
            x, y = putils.get_dense_xy(layer, deterministic)
        elif type(layer) is L.Conv2DLayer:
            if conv_all_patches is True:
                x, y = putils.get_conv_xy_all(layer, deterministic)
            else:
                x, y = putils.get_conv_xy(layer, deterministic)
        else:
            raise ValueError("Unknown layer as input")

        # Create an output dictionary
        outputs = dict()

        for name, fun in subtypes:
            outputs[name] = dict()
            mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX)  # (N,O)
            y_current = y*mrk_y # This has a binary mask
            cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0)  # (1,O)
            norm = T.maximum(cnt_y, 1.)

            # Count how many datapoints are considered
            outputs[name]['cnt'] = cnt_y

            # The mean of the current batch
            outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm  # (1,O) mean output for batch
            outputs[name]['m_x'] = T.dot(x.T, mrk_y) / norm  # (D,O) mean input for batch

            # The mean of the current batch
            outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm  # (1,O)
            outputs[name]['xty'] = T.dot(x.T, y_current) / norm  # D,O

        return dict_to_list(outputs)
项目:nn-patterns    作者:pikinder    | 项目源码 | 文件源码
def get_split(self, layer,
                  deterministic=True, conv_all_patches=True, **kwargs):

        # Get the patches and the outputs without the non-linearities.
        if type(layer) is L.DenseLayer:
            x, y = get_dense_xy(layer, deterministic)
        elif type(layer) is L.Conv2DLayer:
            if conv_all_patches is True:
                x, y = get_conv_xy_all(layer, deterministic)
            else:
                x, y = get_conv_xy(layer, deterministic)
        else:
            raise ValueError("Unknown layer as input")

        # Create an output dictionary
        outputs = dict()

        for name, fun in subtypes:
            outputs[name] = dict()
            mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX)  # (N,O)
            y_current = y*mrk_y # This has a binary mask
            cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0)  # (1,O)
            norm = T.maximum(cnt_y, 1.)

            # Count how many datapoints are considered
            outputs[name]['cnt'] = cnt_y

            # The mean of the current batch
            outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm  # (1,O) mean output for batch
            outputs[name]['m_x'] = T.dot(x.T, mrk_y) / norm  # (D,O) mean input for batch

            # The mean of the current batch
            outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm  # (1,O)
            outputs[name]['xty'] = T.dot(x.T, y_current) / norm  # D,O

        return dict_to_list(outputs)
项目:lightML    作者:jfzhang95    | 项目源码 | 文件源码
def rectify(X):
    return T.maximum(X, 0.)
项目:lightML    作者:jfzhang95    | 项目源码 | 文件源码
def clip(X, epsilon):
    return T.maximum(T.minimum(X, epsilon), -1*epsilon)
项目:punctuator2    作者:ottokart    | 项目源码 | 文件源码
def PReLU(a, x):
    return T.maximum(0.0, x) + a * T.minimum(0.0, x)
项目:punctuator2    作者:ottokart    | 项目源码 | 文件源码
def ReLU(x):
    return T.maximum(0.0, x)
项目:lowrank-highwaynetwork    作者:Avmb    | 项目源码 | 文件源码
def _cost_func(self, y):
        #y = T.clip(y, EPSILON, 1.0 - EPSILON)
        #return CrossEntropyCost(y, self.k).get()

        k_onehot = T.eye(y.shape[1])[self.k]
        k_centered = 2.0 * k_onehot - 1.0
        loss = T.mean(T.sqr(T.maximum(0.0, 1.0 - y*k_centered)))
        return loss
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def dist_info_sym(self, obs_var, state_info_var=None):
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def dist_info_sym(self, obs_var, state_info_var=None):
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def dist_info_sym(self, obs_var, state_info_vars=None):
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
项目:LiviaNET    作者:josedolz    | 项目源码 | 文件源码
def applyActivationFunction_ReLU_v1(inputData):
    """ inputData is a tensor5D with shape:
    # (batchSize,
    # Number of feature Maps,
    # convolvedImageShape[0],
    # convolvedImageShape[1],
    # convolvedImageShape[2]) """

    return T.maximum(inputData,0)

# --- Version 2 ---
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_kl_divergence(self, q, mu, log_sigma):
        mu_q = _slice(q, 0, self.dim)
        log_sigma_q = _slice(q, 1, self.dim)
        log_sigma_q = T.maximum(log_sigma_q, self.clip)
        log_sigma = T.maximum(log_sigma, self.clip)

        kl = log_sigma - log_sigma_q + 0.5 * (
            (T.exp(2 * log_sigma_q) + (mu - mu_q) ** 2) /
            T.exp(2 * log_sigma)
            - 1)
        return kl.sum(axis=kl.ndim-1)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def _neg_normal_log_prob(x, p, clip=None, sum_probs=True):
    dim = p.shape[p.ndim-1] // 2
    mu = _slice(p, 0, dim)
    log_sigma = _slice(p, 1, dim)
    if clip is not None:
        log_sigma = T.maximum(log_sigma, clip)
    energy = 0.5 * (
        (x - mu)**2 / (T.exp(2 * log_sigma)) + 2 * log_sigma + T.log(2 * pi))
    if sum_probs:
        return energy.sum(axis=energy.ndim-1)
    else:
        return energy
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def _normal_entropy(p, clip=None):
    dim = p.shape[p.ndim-1] // 2
    log_sigma = _slice(p, 1, dim)
    if clip is not None:
        log_sigma = T.maximum(log_sigma, clip)
    entropy = 0.5 * T.log(2 * pi * e) + log_sigma
    return entropy.sum(axis=entropy.ndim-1)

# LOGISTIC ---------------------------------------------------------------------