我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用keras.backend.update_add()。
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) vs = [K.zeros(K.get_variable_shape(p)) for p in params] self.weights = [self.iterations]+ vs for p, g, v in zip(params, grads, vs): v_t = v + K.square(g) p_t = p - self.lr * g / (v_t + self.xi_2*K.exp(-self.xi_1*v_t) ) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 vs = [K.zeros(K.get_variable_shape(p)) for p in params] self.weights = [self.iterations]+ vs for p, g, v in zip(params, grads, vs): v_t = (1-(self.gamma/t))*v + (self.gamma/t)*K.square(g) p_t = p - self.lr * g / (t*v_t + self.xi_2*K.exp(-self.xi_1*t*v_t) ) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 vs = [K.zeros(K.get_variable_shape(p)) for p in params] self.weights = [self.iterations]+ vs for p, g, v in zip(params, grads, vs): v_t = (1-(self.gamma/t))*v + (self.gamma/t)*K.square(g) p_t = p - self.lr * g / (K.sqrt(t*v_t) + self.delta ) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems for p, g, m, v, mem in zip(params, grads, ms, vs, mems): r = 1. / (1. + mem) m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise = K.square(m_t) / (v_t + self.epsilon) p_t = p - g * K.minimum(lr, denoise) / (K.sqrt(v_t) + self.epsilon) mem_t = 1. + mem * (1. - denoise) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] denoises = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems + denoises for p, g, m, v, mem, denoise in zip(params, grads, ms, vs, mems, denoises): r = K.minimum(0.2, K.maximum(0.005, 1. / (1. + mem))) mem_t = 1. / r - 1. m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise_t = 0.99 * denoise + 0.01 * K.square(m_t) / (v_t + self.epsilon) p_t = p - g * denoise_t / (K.sqrt(v_t) + self.epsilon) mem_t = K.maximum(0., 1. + mem_t * (1. - denoise_t)) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) self.updates.append(K.update(denoise, denoise_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): tower_gradvars = [] gdev_list = self._gdev_list global_scope = tf.get_variable_scope() for idev, device in enumerate(gdev_list): with tf.device(device), \ tf.variable_scope(global_scope, reuse=idev > 0), \ tf.name_scope('tower_%i' % idev): grads = self.optimizer.compute_gradients(loss, params) gradvars = zip(grads, params) tower_gradvars.append(gradvars) tower_gradvars = all_avg_gradients(tower_gradvars, gdev_list, usenccl=False) self.updates = [K.update_add(self.iterations, 1)] for device_num, device in enumerate(gdev_list): with tf.device(device): gradvars = tower_gradvars[device_num] opt_update = self.optimizer.apply_gradients( grads, global_step=self.iterations) self.updates.append(opt_update) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [] self.updates.append(K.update_add(self.iterations, 1)) for p, g in zip(params, grads): self.updates.append((p, p - self.lr * g)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] f = K.variable(0) d = K.variable(1) self.weights = [self.iterations] + ms + vs + [f, d] cond = K.greater(t, K.variable(1)) small_delta_t = K.switch(K.greater(loss, f), self.small_k + 1, 1. / (self.big_K + 1)) big_delta_t = K.switch(K.greater(loss, f), self.big_K + 1, 1. / (self.small_k + 1)) c_t = K.minimum(K.maximum(small_delta_t, loss / (f + self.epsilon)), big_delta_t) f_t = c_t * f r_t = K.abs(f_t - f) / (K.minimum(f_t, f)) d_t = self.beta_3 * d + (1 - self.beta_3) * r_t f_t = K.switch(cond, f_t, loss) d_t = K.switch(cond, d_t, K.variable(1.)) self.updates.append(K.update(f, f_t)) self.updates.append(K.update(d, d_t)) for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (d_t * K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = (self.iterations + 1.)/self.accum_iters accum_switch = K.cast(K.equal((self.iterations + 1.) % self.accum_iters, 0), dtype=K.floatx()) # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(0.96, t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(0.96, (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, accum_switch*m_schedule_new + (1. - accum_switch)*self.m_schedule)) shapes = [x.shape for x in K.batch_get_value(params)] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] gs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, gp, m, v, ga in zip(params, grads, ms, vs, gs): g = (ga + gp)/self.accum_iters # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(K.update(m, (1. - accum_switch)*m + accum_switch*m_t)) self.updates.append(K.update(v, (1. - accum_switch)*v + accum_switch*v_t)) self.updates.append(K.update(ga, (1. - accum_switch)*(ga + gp))) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, (1-accum_switch)*p + accum_switch*new_p)) return self.updates