我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用theano.tensor.isnan()。
def masked_mse(y_true, y_pred): mask = T.isnan(y_true) diff = y_pred - y_true squared = K.square(diff) sum_squared_error = K.sum( K.switch(mask, 0.0, squared), axis=-1) n_valid_per_sample = K.sum(~mask, axis=-1) return sum_squared_error / n_valid_per_sample
def masked_binary_crossentropy(y_true, y_pred): mask = T.isnan(y_true) cross_entropy_values = K.binary_crossentropy( output=y_pred, target=y_true) sum_cross_entropy_values = K.sum( K.switch(mask, 0.0, cross_entropy_values), axis=-1) n_valid_per_sample = K.sum(~mask, axis=-1) return sum_cross_entropy_values / n_valid_per_sample
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999, epsilon=1e-8, grads=None): # Gradients if grads is None: grads = tensor.grad(loss, self.trainables) # Clipping norm = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads])) m = theanotools.clipping_multiplier(norm, max_norm) grads = [m*g for g in grads] # Safeguard against numerical instability new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)), tensor.or_(norm < 0, norm > 1e10)) grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads] # Safeguard against numerical instability #cond = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm))) #grads = [tensor.switch(cond, np.float32(0), g) for g in grads] # New values t = self.time + 1 lr_t = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t) means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)] vars_t = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)] steps = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon) for m_t, v_t in zip(means_t, vars_t)] # Updates updates = [(x, x - step) for x, step in zip(self.trainables, steps)] updates += [(m, m_t) for m, m_t in zip(self.means, means_t)] updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)] updates += [(self.time, t)] return norm, grads, updates
def replace_inf_nan(x, v): return tensor.switch(tensor.or_(tensor.isnan(x), tensor.isinf(x)), v, x) #apply r = x + delta if r is not inf / nan, else return x
def update_inf_nan(x, delta, v): r = x + delta return tensor.switch(tensor.or_(tensor.isnan(r), tensor.isinf(r)), x, r) #will check if shuffle is needed
def sgd(params, cost=None, gradients=None, learningrate=1e-4): """ Computes the updates for Stochastic Gradient Descent (without momentum) :type params: list :param params: Network parameters. :type cost: theano.tensor.var.TensorVariable :param cost: Cost variable (scalar). Optional if the gradient is provided. :type gradients: list :param gradients: Gradient of a cost w.r.t. parameters. Optional if the cost is provided. :type learningrate: theano.tensor.var.TensorVariable or float :param learningrate: Learning rate of SGD. Can be a float (static) or a dynamic theano variable. :return: List of updates """ # Validate input assert not (cost is None and gradients is None), "Update function sgd requires either a cost scalar or a list of " \ "gradients." # Compute gradients if requested if gradients is None and cost is not None: pdC = T.grad(cost, wrt=params) # Kill gradients if cost is nan dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC] else: dC = gradients # Compute updates upd = [(param, param - learningrate * dparam) for param, dparam in zip(params, dC)] # Return return upd # ADAM
def momsgd(params, cost=None, gradients=None, learningrate=0.01, momentum=0.9, nesterov=True): # TODO: Docstring # Validate input assert not (cost is None and gradients is None), "Update function momsgd requires either a cost scalar or a " \ "list of gradients." # Compute gradients if requested if gradients is None and cost is not None: pdC = T.grad(cost, wrt=params) # Kill gradients if cost is nan dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC] else: dC = gradients # Init update list updates = [] for param, dparam in zip(params, dC): # Fetch parameter shape paramshape = param.get_value().shape # ... and init initial momentum mom = th.shared(np.zeros(paramshape, dtype=th.config.floatX)) # Compute velocity vel = momentum * mom - learningrate * dparam # Compute new parameters if nesterov: newparam = param + momentum * vel - learningrate * dparam else: newparam = param + vel # update update list updates.append((param, newparam)) updates.append((mom, vel)) # Return return updates
def rmsprop(params, cost=None, gradients=None, learningrate=0.0005, rho=0.9, epsilon=1e-6): # Validate input assert not (cost is None and gradients is None), "Update function rmsprop requires either a cost scalar or a " \ "list of gradients." # Compute gradients if requested if gradients is None and cost is not None: pdC = T.grad(cost, wrt=params) # Kill gradients if cost is nan dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC] else: dC = gradients # Init update list updates = [] for p, g in zip(params, dC): acc = th.shared(p.get_value() * 0.) newacc = rho * acc + (1 - rho) * g ** 2 gradscale = T.sqrt(newacc + epsilon) g = g / gradscale updates.append((acc, newacc)) updates.append((p, p - learningrate * g)) return updates # Aliases
def nadam(params, cost=None, gradients=None, learningrate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8, scheduledecay=0.004, iterstart=0): """See also: https://github.com/fchollet/keras/blob/master/keras/optimizers.py#L441""" # Validate input assert not (cost is None and gradients is None), "Update function rmsprop requires either a cost scalar or a " \ "list of gradients." # Compute gradients if requested if gradients is None and cost is not None: pdC = T.grad(cost, wrt=params) # Kill gradients if cost is nan dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC] else: dC = gradients # Init update list updates = [] tm1 = th.shared(np.asarray(iterstart, dtype=th.config.floatX)) t = tm1 + 1 momcachet = beta1 * (1. - 0.5 * 0.96**(t * scheduledecay)) pass # Momentum SGD
def rmsprop(params, cost=None, gradients=None, learningrate=0.0005, rho=0.9, epsilon=1e-6): # Validate input assert not (cost is None and gradients is None), "Update function rmsprop requires either a cost scalar or a " \ "list of gradients." # Compute gradients if requested if gradients is None and cost is not None: pdC = T.grad(cost, wrt=params) # Kill gradients if cost is nan dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC] else: dC = gradients # Init update list updates = [] for param, dparam in zip(params, dC): paramshape = param.get_value().shape acc = th.shared(np.zeros(paramshape, dtype=th.config.floatX)) newacc = rho * acc + (1 - rho) * dparam ** 2 gradscale = T.sqrt(newacc + epsilon) dparam = dparam / gradscale updates.append((acc, newacc)) updates.append((param, param - learningrate * dparam)) return updates # Aliases
def pseudograd(loss, params, srng=None, temperature = 1.0e-1, learning_rate=1.0e-2, rho2=0.95): one = T.constant(1.0) zero = T.constant(0.0) deltas = [ make_normal(param, srng=srng) for param in params ] momentum = [ make_copy(param) for param in params ] new_params = [ param + learning_rate * delta for param, delta, m in zip(params, deltas, momentum) ] new_loss = theano.clone( loss, replace=dict(zip(params, new_params)) ) accepting_p = T.exp((loss - new_loss) / temperature) u = srng.uniform(size=(), dtype=loss.dtype) cond = T.or_(T.or_(u > accepting_p, T.isnan(new_loss)), T.isinf(new_loss)) step = T.switch(cond, zero, one) updates = OrderedDict() for m, delta in zip(momentum, deltas): updates[m] = m * rho2 + (one - rho2) * delta * step for param, m in zip(params, momentum): updates[param] = param + learning_rate * m return updates