我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.backend.sum()。
def get_initial_states(self, onto_nse_input, input_mask=None): input_to_read = onto_nse_input # (batch_size, num_words, num_senses, num_hyps, output_dim + 1) memory_input = input_to_read[:, :, :, :, :-1] # (bs, words, senses, hyps, output_dim) if input_mask is None: mem_0 = K.mean(memory_input, axis=(2, 3)) # (batch_size, num_words, output_dim) else: memory_mask = input_mask if K.ndim(onto_nse_input) != K.ndim(input_mask): memory_mask = K.expand_dims(input_mask) memory_mask = K.cast(memory_mask / (K.sum(memory_mask) + K.epsilon()), 'float32') mem_0 = K.sum(memory_input * memory_mask, axis=(2,3)) # (batch_size, num_words, output_dim) flattened_mem_0 = K.batch_flatten(mem_0) initial_states = self.reader.get_initial_states(input_to_read) initial_states += [flattened_mem_0] return initial_states
def call(self, x, mask=None): mean = super(IntraAttention, self).call(x, mask) # x: (batch_size, input_length, input_dim) # mean: (batch_size, input_dim) ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0) # (1, input_length) # (batch_size, input_length, input_dim) tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1)) if mask is not None: if K.ndim(mask) > K.ndim(x): # Assuming this is because of the bug in Bidirectional. Temporary fix follows. # TODO: Fix Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) x = switch(mask, x, K.zeros_like(x)) # (batch_size, input_length, proj_dim) projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector)) scores = K.dot(projected_combination, self.scorer) # (batch_size, input_length) weights = K.softmax(scores) # (batch_size, input_length) attended_x = K.sum(K.expand_dims(weights) * x, axis=1) # (batch_size, input_dim) return attended_x
def _drop_path(self, inputs): count = len(inputs) drops = K.switch( self.is_global, self._gen_global_path(count), self._gen_local_drops(count, self.p) ) ave = K.zeros(shape=self.average_shape) for i in range(0, count): ave += inputs[i] * drops[i] sum = K.sum(drops) # Check that the sum is not 0 (global droppath can make it # 0) to avoid divByZero ave = K.switch( K.not_equal(sum, 0.), ave/sum, ave) return ave
def call(self, x, mask=None): if K.image_dim_ordering == "th": _, f, r, c = self.shape else: _, r, c, f = self.shape half_n = self.n // 2 squared = K.square(x) pooled = K.pool2d(squared, (half_n, half_n), strides=(1, 1), padding="same", pool_mode="avg") if K.image_dim_ordering == "th": summed = K.sum(pooled, axis=1, keepdims=True) averaged = (self.alpha / self.n) * K.repeat_elements(summed, f, axis=1) else: summed = K.sum(pooled, axis=3, keepdims=True) averaged = (self.alpha / self.n) * K.repeat_elements(summed, f, axis=3) denom = K.pow(self.k + averaged, self.beta) return x / denom
def categorical_crossentropy_3d(y_true, y_predicted): """ Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array with shape (num_samples, num_classes, dim1, dim2, dim3) Parameters ---------- y_true : keras.placeholder [batches, dim0,dim1,dim2] Placeholder for data holding the ground-truth labels encoded in a one-hot representation y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2] Placeholder for data holding the softmax distribution over classes Returns ------- scalar Categorical cross-entropy loss value """ y_true_flatten = K.flatten(y_true) y_pred_flatten = K.flatten(y_predicted) y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon()) num_total_elements = K.sum(y_true_flatten) # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log)) cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log)) mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon()) return mean_cross_entropy
def elementwise_softmax_3d(matrix): """ Computes element-wise softmax for 3D arrays (volumes), that is, for a matrix with shape (num_samples, dim1, dim2, dim3, num_classes) Parameters ---------- matrix : keras.placeholder Placeholder for the 3D array whose softmax distribution we want to compute Returns ------- keras.placeholder Placeholder for a 3D array with the softmax distribution for all classes with shape (num_samples, dim1, dim2, dim3, num_classes) """ expon = lambda x: K.exp(x) expon_matrix = expon(matrix) softmax_matrix = expon_matrix / K.sum(expon_matrix, axis=4, keepdims=True) return softmax_matrix
def dice(y_true, y_pred): """ Computes the Sorensen-Dice metric TP Dice = 2 ------- T + P Parameters ---------- y_true : keras.placeholder Placeholder that contains the ground truth labels of the classes y_pred : keras.placeholder Placeholder that contains the class prediction Returns ------- scalar Dice metric """ y_pred_decision = tf.floor((y_pred + K.epsilon()) / K.max(y_pred, axis=4, keepdims=True)) y_sum = K.sum(y_true * y_pred_decision) return (2. * y_sum + K.epsilon()) / (K.sum(y_true) + K.sum(y_pred_decision) + K.epsilon())
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va) eijs = K.repeat_elements(eij, self.h_dim, axis=1) #alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) #cisum = K.sum(ci, axis=1) cisum = eijs*inputs[:, :self.h_dim] #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1]
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = K.dot(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def _mean_attentive_vectors(self, x2, cosine_matrix): """Mean attentive vectors. Calculate mean attentive vector for the entire sentence by weighted summing all the contextual embeddings of the entire sentence # Arguments x2: sequence vectors, (batch_size, x2_timesteps, embedding_size) cosine_matrix: cosine similarities matrix of x1 and x2, (batch_size, x1_timesteps, x2_timesteps) # Output shape (batch_size, x1_timesteps, embedding_size) """ # (batch_size, x1_timesteps, x2_timesteps, 1) expanded_cosine_matrix = K.expand_dims(cosine_matrix, axis=-1) # (batch_size, 1, x2_timesteps, embedding_size) x2 = K.expand_dims(x2, axis=1) # (batch_size, x1_timesteps, embedding_size) weighted_sum = K.sum(expanded_cosine_matrix * x2, axis=2) # (batch_size, x1_timesteps, 1) sum_cosine = K.expand_dims(K.sum(cosine_matrix, axis=-1) + self.epsilon, axis=-1) # (batch_size, x1_timesteps, embedding_size) attentive_vector = weighted_sum / sum_cosine return attentive_vector
def vae_loss(self, x, x_decoded_mean): xent_loss = K.sum(K.binary_crossentropy(x_decoded_mean, x), axis=-1) kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) return xent_loss + kl_loss # def weighted_vae_loss(self, feature_weights): # def loss(y_true, y_pred): # try: # x = K.binary_crossentropy(y_pred, y_true) # y = tf.Variable(feature_weights.astype('float32')) # # y2 = y_true / K.sum(y_true) # # import pdb;pdb.set_trace() # xent_loss = K.dot(x, y) # kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) # except Exception as e: # print e # import pdb;pdb.set_trace() # return xent_loss + kl_loss # return loss
def rpn_loss_regr(num_anchors): def rpn_loss_regr_fixed_num(y_true, y_pred): if K.image_dim_ordering() == 'th': x = y_true[:, 4 * num_anchors:, :, :] - y_pred x_abs = K.abs(x) x_bool = K.less_equal(x_abs, 1.0) return lambda_rpn_regr * K.sum( y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :]) else: x = y_true[:, :, :, 4 * num_anchors:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors]) return rpn_loss_regr_fixed_num
def call(self, x,mask=None): import theano.tensor as T newx = T.sort(x) #response = K.reverse(newx, axes=1) #response = K.sum(x> 0.5, axis=1) / self.k return newx #response = K.reshape(newx,[-1,1]) #return K.concatenate([1-response, response], axis=self.label) #response = K.reshape(x[:,self.axis], (-1,1)) #return K.concatenate([1-response, response], axis=self.axis) #e = K.exp(x - K.max(x, axis=self.axis, keepdims=True)) #s = K.sum(e, axis=self.axis, keepdims=True) #return e / s
def call(self, x,mask=None): newx = K.sort(x) #response = K.reverse(newx, axes=1) #response = K.sum(x> 0.5, axis=1) / self.k return K.concatenate([newx[:,:self.softmink], newx[:,newx.shape[1]-self.softmaxk:]], axis=-1) #response = K.reshape(newx,[-1,1]) #return K.concatenate([1-response, response], axis=self.label) #response = K.reshape(x[:,self.axis], (-1,1)) #return K.concatenate([1-response, response], axis=self.axis) #e = K.exp(x - K.max(x, axis=self.axis, keepdims=True)) #s = K.sum(e, axis=self.axis, keepdims=True) #return e / s
def to_configs(states, verbose=True, **kwargs): base = setting['base'] width = states.shape[1] // base height = states.shape[1] // base load(width,height) def build(): P = len(setting['panels']) states = Input(shape=(height*base,width*base)) error = build_error(states, height, width, base) matches = 1 - K.clip(K.sign(error - threshold),0,1) # a, h, w, panel matches = K.reshape(matches, [K.shape(states)[0], height * width, -1]) # a, pos, panel matches = K.permute_dimensions(matches, [0,2,1]) # a, panel, pos config = matches * K.arange(height*width,dtype='float') config = K.sum(config, axis=-1) return Model(states, wrap(states, config)) model = build() return model.predict(states, **kwargs)
def to_configs(states, verbose=True, **kwargs): base = panels.shape[1] dim = states.shape[1] - pad*2 size = dim // base def build(): states = Input(shape=(dim+2*pad,dim+2*pad)) s = tensor_swirl(states, radius=dim+2*pad * relative_swirl_radius, **unswirl_args) error = build_errors(s,base,pad,dim,size) matches = 1 - K.clip(K.sign(error - threshold),0,1) # a, h, w, panel matches = K.reshape(matches, [K.shape(states)[0], size * size, -1]) # a, pos, panel config = matches * K.arange(2,dtype='float') config = K.sum(config, axis=-1) # this is 0,1 configs; for compatibility, we need -1 and 1 config = - (config - 0.5)*2 return Model(states, wrap(states, K.round(config))) return build().predict(states, **kwargs)
def call(self, x, mask=None): # x: [..., time_steps, features] # ut = [..., time_steps, attention_dims] ut = K.dot(x, self.kernel) if self.use_bias: ut = K.bias_add(ut, self.bias) ut = K.tanh(ut) if self.use_context: ut = ut * self.context_kernel # Collapse `attention_dims` to 1. This indicates the weight for each time_step. ut = K.sum(ut, axis=-1, keepdims=True) # Convert those weights into a distribution but along time axis. # i.e., sum of alphas along `time_steps` axis should be 1. self.at = _softmax(ut, dim=1) if mask is not None: self.at *= K.cast(K.expand_dims(mask, -1), K.floatx()) # Weighted sum along `time_steps` axis. return K.sum(x * self.at, axis=-2)
def dice(y_true, y_pred, smooth=1.0): """ The Dice coefficient, defined as :: \frac{2 |X \intersect Y|}{|X| + |Y|} Parameters ---------- y_true, y_pred : tensors The predicted and binary classification in an image """ y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return ((2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))
def reactionrnn_model(weights_path, num_classes, maxlen=140): ''' Builds the model architecture for textgenrnn and loads the pretrained weights for the model. ''' input = Input(shape=(maxlen,), name='input') embedded = Embedding(num_classes, 100, input_length=maxlen, name='embedding')(input) rnn = GRU(256, return_sequences=False, name='rnn')(embedded) output = Dense(5, name='output', activation=lambda x: K.relu(x) / K.sum(K.relu(x), axis=-1))(rnn) model = Model(inputs=[input], outputs=[output]) model.load_weights(weights_path, by_name=True) model.compile(loss='mse', optimizer='nadam') return model
def get_initial_states(self, x): init_state_h = K.zeros_like(x) init_state_h = K.sum(init_state_h, axis = 1) reducer_s = K.zeros((self.input_dim, self.hidden_dim)) reducer_f = K.zeros((self.hidden_dim, self.freq_dim)) reducer_p = K.zeros((self.hidden_dim, self.output_dim)) init_state_h = K.dot(init_state_h, reducer_s) init_state_p = K.dot(init_state_h, reducer_p) init_state = K.zeros_like(init_state_h) init_freq = K.dot(init_state_h, reducer_f) init_state = K.reshape(init_state, (-1, self.hidden_dim, 1)) init_freq = K.reshape(init_freq, (-1, 1, self.freq_dim)) init_state_S_re = init_state * init_freq init_state_S_im = init_state * init_freq init_state_time = K.cast_to_floatx(0.) initial_states = [init_state_p, init_state_h, init_state_S_re, init_state_S_im, init_state_time] return initial_states
def dice_coef(y_true, y_pred): smooth = 1. y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def get_split_averages(input_tensor, input_mask, indices): # Splits input tensor into three parts based on the indices and # returns average of values prior to index, values at the index and # average of values after the index. # input_tensor: (batch_size, input_length, input_dim) # input_mask: (batch_size, input_length) # indices: (batch_size, 1) # (1, input_length) length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0) # (batch_size, input_length) batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0) tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1) # (batch_size, input_length) greater_mask = K.greater(batched_range, tiled_indices) # (batch_size, input_length) lesser_mask = K.lesser(batched_range, tiled_indices) # (batch_size, input_length) equal_mask = K.equal(batched_range, tiled_indices) # (batch_size, input_length) # We also need to mask these masks using the input mask. # (batch_size, input_length) if input_mask is not None: greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask)) lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask)) post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
def summarize_memory(o_t, mem_tm1): ''' This method selects the relevant parts of the memory given the read output and summarizes the memory. Implements Equations 2-3 or 8-11 in the paper. ''' # Selecting relevant memory slots, Equation 2 z_t = K.softmax(K.sum(K.expand_dims(o_t, dim=1) * mem_tm1, axis=2)) # (batch_size, input_length) # Summarizing memory, Equation 3 m_rt = K.sum(K.expand_dims(z_t, dim=2) * mem_tm1, axis=1) # (batch_size, output_dim) return z_t, m_rt
def dice_coef(y_true, y_pred): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_coef_np(y_true, y_pred): y_true_f = y_true.flatten() y_pred_f = y_pred.flatten() y_pred_f [y_pred_f < DICE_LOW_LIMIT] = 0. y_pred_f [y_pred_f > 1- DICE_LOW_LIMIT] = 1. intersection = np.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (np.sum(y_true_f) + np.sum(y_pred_f) + smooth)
def dice_coef_pos_np(y_true, y_pred, pos = 0): y_true_f = y_true[:,pos].flatten() y_pred_f = y_pred[:,pos].flatten() intersection = np.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (np.sum(y_true_f) + np.sum(y_pred_f) + smooth)