我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用keras.backend.squeeze()。
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': # todo: check that this is correct return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def call(self, x): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if self.window_size > 1: x = K.temporal_padding(x, (self.window_size-1, 0)) x = K.expand_dims(x, 2) # add a dummy dimension # z, g output = K.conv2d(x, self.kernel, strides=self.strides, padding='valid', data_format='channels_last') output = K.squeeze(output, 2) # remove the dummy dimension if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') z = output[:, :, :self.output_dim] g = output[:, :, self.output_dim:] return self.activation(z) * K.sigmoid(g)
def preprocess_input(self, inputs, training=None): if self.window_size > 1: inputs = K.temporal_padding(inputs, (self.window_size-1, 0)) inputs = K.expand_dims(inputs, 2) # add a dummy dimension output = K.conv2d(inputs, self.kernel, strides=self.strides, padding='valid', data_format='channels_last') output = K.squeeze(output, 2) # remove the dummy dimension if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.dropout is not None and 0. < self.dropout < 1.: z = output[:, :, :self.units] f = output[:, :, self.units:2 * self.units] o = output[:, :, 2 * self.units:] f = K.in_train_phase(1 - _dropout(1 - f, self.dropout), f, training=training) return K.concatenate([z, f, o], -1) else: return output
def generate_gpu(configs,**kwargs): configs = np.array(configs) import math size = int(math.sqrt(len(configs[0]))) base = panels.shape[1] dim = base*size def build(): P = 2 configs = Input(shape=(size*size,)) _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0 configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P) configs_one_hot = K.reshape(configs_one_hot, [-1,P]) _panels = K.variable(panels) _panels = K.reshape(_panels, [P, base*base]) states = tf.matmul(configs_one_hot, _panels) states = K.reshape(states, [-1, size, size, base, base]) states = K.permute_dimensions(states, [0, 1, 3, 2, 4]) states = K.reshape(states, [-1, size*base, size*base, 1]) states = K.spatial_2d_padding(states, padding=((pad,pad),(pad,pad))) states = K.squeeze(states, -1) return Model(configs, wrap(configs, states)) return preprocess(batch_swirl(build().predict(configs,**kwargs)))
def generate_gpu2(configs,**kwargs): configs = np.array(configs) import math size = int(math.sqrt(len(configs[0]))) base = panels.shape[1] dim = base*size def build(): P = 2 configs = Input(shape=(size*size,)) _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0 configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P) configs_one_hot = K.reshape(configs_one_hot, [-1,P]) _panels = K.variable(panels) _panels = K.reshape(_panels, [P, base*base]) states = tf.matmul(configs_one_hot, _panels) states = K.reshape(states, [-1, size, size, base, base]) states = K.permute_dimensions(states, [0, 1, 3, 2, 4]) states = K.reshape(states, [-1, size*base, size*base, 1]) states = K.spatial_2d_padding(states, padding=((pad,pad),(pad,pad))) states = K.squeeze(states, -1) states = tensor_swirl(states, radius=dim+2*pad * relative_swirl_radius, **swirl_args) return Model(configs, wrap(configs, states)) return preprocess(build().predict(configs,**kwargs))
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = KC.batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def make_model(state_shape, n_actions): in_t = Input(shape=(HISTORY_STEPS,) + state_shape, name='input') action_t = Input(shape=(1,), dtype='int32', name='action') advantage_t = Input(shape=(1,), name='advantage') fl_t = Flatten(name='flat')(in_t) l1_t = Dense(SIMPLE_L1_SIZE, activation='relu', name='l1')(fl_t) l2_t = Dense(SIMPLE_L2_SIZE, activation='relu', name='l2')(l1_t) policy_t = Dense(n_actions, name='policy', activation='softmax')(l2_t) def loss_func(args): p_t, act_t, adv_t = args oh_t = K.one_hot(act_t, n_actions) oh_t = K.squeeze(oh_t, 1) p_oh_t = K.log(1e-6 + K.sum(oh_t * p_t, axis=-1, keepdims=True)) res_t = adv_t * p_oh_t return -res_t loss_t = Lambda(loss_func, output_shape=(1,), name='loss')([policy_t, action_t, advantage_t]) return Model(input=[in_t, action_t, advantage_t], output=[policy_t, loss_t])
def layout_loss_hard(y_true,y_pred): y_pred=K.clip(y_pred,-50.,50.)#prevent overflow exp_pred=K.exp(y_pred-K.max(y_pred,axis=1,keepdims=True)) y_pred_softmax=exp_pred/K.sum(exp_pred,axis=1,keepdims=True) max_pred_softmax=K.max(y_pred_softmax,axis=1,keepdims=True) bin_pred_softmax_a=y_pred_softmax/max_pred_softmax bin_pred_softmax=bin_pred_softmax_a**6. final_pred=K.mean(bin_pred_softmax,axis=[2,3]) final_pred=final_pred/(K.sum(final_pred,axis=1,keepdims=True)+K.epsilon()) y_true_s=K.squeeze(y_true,axis=3) y_true_s=K.squeeze(y_true_s,axis=2) tier_wise_loss_v=-K.clip(K.log(final_pred),-500,500)*y_true_s return K.mean(K.sum(tier_wise_loss_v,axis=1)) #compile
def custom_for_keras(self, ALL_word_embeds): ## only the top 20 rows from word_vectors is legit! def top_accuracy(true_word_indices, image_vectors): l2 = lambda x, axis: K.sqrt(K.sum(K.square(x), axis=axis, keepdims=True)) l2norm = lambda x, axis: x/l2(x, axis) l2_words = l2norm(ALL_word_embeds, axis=1) l2_images = l2norm(image_vectors, axis=1) tiled_words = K.tile(K.expand_dims(l2_words, axis=1) , (1, 200, 1)) tiled_images = K.tile(K.expand_dims(l2_images, axis=1), (1, 20, 1)) diff = K.squeeze(l2(l2_words - l2_images, axis=2)) # slice_top3 = lambda x: x[:, 0:3] # slice_top1 = lambda x: x[:, 0:1] diff_top5 = metrics.top_k_categorical_accuracy(tiled_images, diff) return diff_top5 return top_accuracy
def _backward(gamma, mask): """Backward recurrence of the linear chain crf.""" gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def lin_interpolation_2d(inp, dim): num_rows, num_cols, num_filters = K.int_shape(inp)[1:] conv = SeparableConv2D(num_filters, (num_rows, num_cols), use_bias=False) x = conv(inp) w = conv.get_weights() w[0].fill(0) w[1].fill(0) linspace = linspace_2d(num_rows, num_cols, dim=dim) for i in range(num_filters): w[0][:,:, i, 0] = linspace[:,:] w[1][0, 0, i, i] = 1. conv.set_weights(w) conv.trainable = False x = Lambda(lambda x: K.squeeze(x, axis=1))(x) x = Lambda(lambda x: K.squeeze(x, axis=1))(x) x = Lambda(lambda x: K.expand_dims(x, axis=-1))(x) return x
def augmented_loss(self, y_true, y_pred): _y_pred = Activation("softmax")(y_pred) loss = K.categorical_crossentropy(_y_pred, y_true) # y is (batch x seq x vocab) y_indexes = K.argmax(y_true, axis=2) # turn one hot to index. (batch x seq) y_vectors = self.embedding(y_indexes) # lookup the vector (batch x seq x vector_length) #v_length = self.setting.vector_length #y_vectors = K.reshape(y_vectors, (-1, v_length)) #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors) #y_t = K.squeeze(y_t, axis=2) # unknown but necessary operation #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # vector x embedding dot products (batch x seq x vocab) y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1) y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # explicitly set shape y_t = K.softmax(y_t / self.temperature) _y_pred_t = Activation("softmax")(y_pred / self.temperature) aug_loss = kullback_leibler_divergence(y_t, _y_pred_t) loss += (self.gamma * self.temperature) * aug_loss return loss
def step(self, x_input, states): #print "x_input:", x_input, x_input.shape # <TensorType(float32, matrix)> input_shape = self.input_spec[0].shape en_seq = states[-1] _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1]) # vt*tanh(W1*e+W2*d) dec_seq = K.repeat(h, input_shape[1]) Eij = time_distributed_dense(en_seq, self.W1, output_dim=1) Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1) U = self.vt * tanh(Eij + Dij) U = K.squeeze(U, 2) # make probability tensor pointer = softmax(U) return pointer, [h, c]
def weight_and_reduce(self, inputs): """Define a function for a lambda layer of a model.""" inp, inp_cont = inputs reduced = K.batch_dot(inp_cont, K.permute_dimensions(inp, (0,2,1)), axes=[1,2]) return K.squeeze(reduced, 1)
def terminal_f(self, inp): val = np.concatenate((np.zeros((self.max_sequence_length-1,1)), np.ones((1,1))), axis=0) kcon = K.constant(value=val, dtype='float32') inp = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp) last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp) return K.squeeze(last_state, 1)
def terminal_b(self, inp): val = np.concatenate((np.ones((1,1)), np.zeros((self.max_sequence_length-1,1))), axis=0) kcon = K.constant(value=val, dtype='float32') inp = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp) last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp) return K.squeeze(last_state, 1)
def sparse_loss(self, y_true, y_pred): '''Linear Chain Conditional Random Field loss function with sparse tag sequences. ''' y_true = K.cast(y_true, 'int32') y_true = K.squeeze(y_true, 2) mask = self._fetch_mask() return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
def _layer_LRN(self): self.add_body(0, ''' from keras.layers.core import Layer class LRN(Layer): def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): self.n = size self.alpha = alpha self.beta = beta self.k = k super(LRN, self).__init__(**kwargs) def build(self, input_shape): self.shape = input_shape super(LRN, self).build(input_shape) def call(self, x, mask=None): half_n = self.n - 1 squared = K.square(x) scale = self.k norm_alpha = self.alpha / (2 * half_n + 1) if K.image_dim_ordering() == "th": b, f, r, c = self.shape squared = K.expand_dims(squared, 0) squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0))) squared = K.squeeze(squared, 0) for i in range(half_n*2+1): scale += norm_alpha * squared[:, i:i+f, :, :] else: b, r, c, f = self.shape squared = K.expand_dims(squared, -1) squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n))) squared = K.squeeze(squared, -1) for i in range(half_n*2+1): scale += norm_alpha * squared[:, :, :, i:i+f] scale = K.pow(scale, self.beta) return x / scale def compute_output_shape(self, input_shape): return input_shape''')
def compute_mask(self, inputs, mask=None): """Computes an output mask tensor. # Arguments inputs: Tensor or list of tensors. mask: Tensor or list of tensors. # Returns None or a tensor (or list of tensors, one per output tensor of the layer). """ if mask is None: return None # dimensions of mask should be (batch_size, time_steps) assert mask.ndim == 2 # add a dummy dimension so that the shape is now # (batch_size, time_steps, 1) mask = K.expand_dims(mask, 2) # now add a fake 2nd spatial dimension # (batch_size, time_steps, 1, 1) mask = K.expand_dims(mask, 3) strides = self.strides + (1,) pool_size = self.pool_size + (1,) mask = K.pool2d( mask, pool_size=pool_size, strides=strides, padding=self.padding, data_format="channels_last", pool_mode='max') # get rid of dummy dimensions mask = K.squeeze(mask, 3) mask = K.squeeze(mask, 2) return mask
def call(self, x, mask=None): x = K.permute_dimensions(x, (0, 2, 1)) x = K.reshape(x, (-1, self.input_length)) x = K.expand_dims(x, 1) x = K.expand_dims(x, -1) if self.real_filts is not None: conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') else: conv_out_r = x if self.complex_filts is not None: conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon()) output = K.concatenate((conv_out_r, conv_out_c), axis=1) else: output = conv_out_r output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim)) output = K.squeeze(output, 3) # remove the dummy 3rd dimension output = K.permute_dimensions(output, (2, 1, 0)) output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2])) return output
def create_policy_loss(policy_t, value_t, n_actions): """ Policy loss :param policy_t: policy tensor from prediction part :param value_t: value tensor from prediction part :param n_actions: count of actions in space :param entropy_beta: entropy loss scaling factor :return: action_t, advantage_t, policy_loss_t """ action_t = Input(batch_shape=(None, 1), name='action', dtype='int32') reward_t = Input(batch_shape=(None, 1), name="reward") def policy_loss_func(args): p_t, v_t, act_t, rew_t = args log_p_t = tf.nn.log_softmax(p_t) oh_t = K.one_hot(act_t, n_actions) oh_t = K.squeeze(oh_t, 1) p_oh_t = K.sum(log_p_t * oh_t, axis=-1, keepdims=True) adv_t = (rew_t - K.stop_gradient(v_t)) tf.summary.scalar("advantage_mean", K.mean(adv_t)) tf.summary.scalar("advantage_rms", K.sqrt(K.mean(K.square(adv_t)))) res_t = -adv_t * p_oh_t tf.summary.scalar("loss_policy_mean", K.mean(res_t)) tf.summary.scalar("loss_policy_rms", K.sqrt(K.mean(K.square(res_t)))) return res_t loss_args = [policy_t, value_t, action_t, reward_t] policy_loss_t = Lambda(policy_loss_func, output_shape=(1,), name='policy_loss')(loss_args) tf.summary.scalar("value_mean", K.mean(value_t)) tf.summary.scalar("reward_mean", K.mean(reward_t)) return action_t, reward_t, policy_loss_t
def predictions_and_gradient(self, image, label): predictions, gradient = self._pred_grad_fn([ self._process_input(image[np.newaxis]), np.array([label])]) predictions = np.squeeze(predictions, axis=0) gradient = np.squeeze(gradient, axis=0) gradient = self._process_gradient(gradient) assert predictions.shape == (self.num_classes(),) assert gradient.shape == image.shape return predictions, gradient
def backward(self, gradient, image): assert gradient.ndim == 1 gradient = np.reshape(gradient, (-1, 1)) gradient = self._bw_grad_fn([ gradient, self._process_input(image[np.newaxis]), ]) gradient = np.squeeze(gradient, axis=0) gradient = self._process_gradient(gradient) assert gradient.shape == image.shape return gradient
def binarize_label(batch_seg): label_tensor_to_return=np.zeros((batch_seg.shape[0],class_number,image_size[0],image_size[1]),dtype=np.bool) count=0 for i in range(batch_seg.shape[0]): label=np.squeeze(batch_seg[i,:,:]) label_return=np.zeros((class_number,label.shape[0],label.shape[1]),dtype=np.bool) it = np.nditer(label, flags=['multi_index']) while not it.finished: if np.asscalar(it[0]) <= 12 or np.asscalar(it[0]) ==15 or np.asscalar(it[0]) ==17 or np.asscalar(it[0]) ==19 or np.asscalar(it[0]) ==21: label_return[it[0],it.multi_index[0],it.multi_index[1]]=True it.iternext() label_return = label_return[np.newaxis, ...] label_tensor_to_return[count,:,:,:]=label_return count+=1 return label_tensor_to_return
def binarize_SP(batch_seg): max_dim=np.amax(batch_seg) label_tensor_to_return=np.zeros((batch_seg.shape[0],max_dim,image_size[0],image_size[1]),dtype=np.bool) count=0 for i in range(batch_seg.shape[0]): label=np.squeeze(batch_seg[i,:,:]) label_return=np.zeros((max_dim,label.shape[0],label.shape[1]),dtype=np.bool) it = np.nditer(label, flags=['multi_index']) while not it.finished: label_return[it[0]-1,it.multi_index[0],it.multi_index[1]]=True it.iternext() label_return = label_return[np.newaxis, ...] label_tensor_to_return[count,:,:,:]=label_return count+=1 return label_tensor_to_return
def step(self, x, states): h_tm1, c_tm1, y_tm1, B, U, H = states s = K.dot(c_tm1, self.W_h) + self.b_h s = K.repeat(s, self.input_length) energy = time_distributed_dense(s + H, self.W_a, self.b_a) energy = K.squeeze(energy, 2) alpha = K.softmax(energy) alpha = K.repeat(alpha, self.input_dim) alpha = K.permute_dimensions(alpha, (0, 2, 1)) weighted_H = H * alpha v = K.sum(weighted_H, axis=1) y, new_states = super(AttentionDecoder, self).step(v, states[:-1]) return y, new_states
def sparse_loss(self, y_true, y_pred): """Linear Chain Conditional Random Field loss function with sparse tag sequences. """ y_true = K.cast(y_true, 'int32') y_true = K.squeeze(y_true, 2) mask = self._fetch_mask() return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end, mask)
def masked_categorical_crossentropy(y_true, y_pred): mask = K.cast(K.expand_dims(K.any(y_true, -1), axis=-1), 'float32') y_pred *= mask y_pred += 1-mask y_pred += 1-mask losses = K.categorical_crossentropy(y_pred, y_true) losses *= K.squeeze(mask, -1) ## Normalize by number of real segments, using a small non-zero denominator in cases of padding characters ## in order to avoid division by zero #losses /= (K.mean(mask) + (1e-10*(1-K.mean(mask)))) return losses
def masked_categorical_accuracy(y_true, y_pred): mask = K.cast(K.expand_dims(K.greater(K.argmax(y_true, axis=-1), 0), axis=-1), 'float32') accuracy = K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), 'float32') accuracy *= K.squeeze(mask, -1) ## Normalize by number of real segments, using a small non-zero denominator in cases of padding characters ## in order to avoid division by zero #accuracy /= (K.mean(mask) + (1e-10*(1-K.mean(mask)))) return accuracy
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
def gramian(filters): c_filters = K.batch_flatten(K.permute_dimensions(K.squeeze(filters, axis=0), pattern=(2, 0, 1))) return K.dot(c_filters, K.transpose(c_filters))
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def get_output(self, train=False): X = train X = K.expand_dims(X, -1) # add a dimension of the right X = K.permute_dimensions(X, (0, 2, 1, 3)) conv_out = K.conv2d(X, self.W, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1)) output = self.activation(output) output = K.squeeze(output, 3) # remove the dummy 3rd dimension output = K.permute_dimensions(output, (0, 2, 1)) return output
def call(self, x, mask=None): energy = K.squeeze(self.layer(x), 2) p_matrix = softmax(energy) if mask is not None: mask = self.squash_mask(mask) p_matrix = make_safe(p_matrix * mask) # remove unwanted items p_matrix = p_matrix / K.sum(p_matrix, axis=-1, keepdims=True) # renormalize return make_safe(p_matrix)
def call(self, x, mask=None): # x: (batch_size, input_length, input_dim) where input_length = head_size + 2 head_encoding = x[:, :-2, :] # (batch_size, head_size, input_dim) prep_encoding = x[:, -2, :] # (batch_size, input_dim) child_encoding = x[:, -1, :] # (batch_size, input_dim) if self.composition_type == 'HPCD': # TODO: The following line may not work with TF. # (batch_size, head_size, input_dim, 1) * (1, head_size, input_dim, proj_dim) head_proj_prod = K.expand_dims(head_encoding) * K.expand_dims(self.dist_proj_head, dim=0) head_projection = K.sum(head_proj_prod, axis=2) # (batch_size, head_size, proj_dim) else: head_projection = K.dot(head_encoding, self.proj_head) # (batch_size, head_size, proj_dim) prep_projection = K.expand_dims(K.dot(prep_encoding, self.proj_prep), dim=1) # (batch_size, 1, proj_dim) child_projection = K.expand_dims(K.dot(child_encoding, self.proj_child), dim=1) # (batch_size, 1, proj_dim) #(batch_size, head_size, proj_dim) if self.composition_type == 'HPCT': composed_projection = K.tanh(head_projection + prep_projection + child_projection) elif self.composition_type == 'HPC' or self.composition_type == "HPCD": prep_child_projection = K.tanh(prep_projection + child_projection) # (batch_size, 1, proj_dim) composed_projection = K.tanh(head_projection + prep_child_projection) else: # Composition type in HC composed_projection = K.tanh(head_projection + child_projection) for hidden_layer in self.hidden_layers: composed_projection = K.tanh(K.dot(composed_projection, hidden_layer)) # (batch_size, head_size, proj_dim) # (batch_size, head_size) head_word_scores = K.squeeze(K.dot(composed_projection, self.scorer), axis=-1) if mask is None: attachment_probabilities = K.softmax(head_word_scores) # (batch_size, head_size) else: if K.ndim(mask) > 2: # This means this layer came after a Bidirectional layer. Keras has this bug which # concatenates input masks instead of output masks. # TODO: Fix Bidirectional instead. mask = K.any(mask, axis=(-2, -1)) # We need to do a masked softmax. exp_scores = K.exp(head_word_scores) # (batch_size, head_size) head_mask = mask[:, :-2] # (batch_size, head_size) # (batch_size, head_size) masked_exp_scores = switch(head_mask, exp_scores, K.zeros_like(head_encoding[:, :, 0])) # (batch_size, 1). Adding epsilon to avoid divison by 0. But epsilon is float64. exp_sum = K.cast(K.expand_dims(K.sum(masked_exp_scores, axis=1) + K.epsilon()), 'float32') attachment_probabilities = masked_exp_scores / exp_sum # (batch_size, head_size) return attachment_probabilities
def call(self, x, mask=None): # x[0]: (batch_size, input_length, input_dim) # x[1]: (batch_size, 1) indices of prepositions # Optional: x[2]: (batch_size, input_length - 2) assert isinstance(x, list) or isinstance(x, tuple) encoded_sentence = x[0] prep_indices = K.squeeze(x[1], axis=-1) #(batch_size,) batch_indices = K.arange(K.shape(encoded_sentence)[0]) # (batch_size,) if self.with_attachment_probs: # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable! head_probs = x[2] head_probs_padding = K.zeros_like(x[2])[:, :2] # (batch_size, 2) # (batch_size, input_length) padded_head_probs = K.concatenate([head_probs, head_probs_padding]) # (batch_size, 1) max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1)) # (batch_size, input_length, 1) max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs)) # (batch_size, input_length, input_dim) masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence)) # (batch_size, input_dim) head_encoding = K.sum(masked_head_encoding, axis=1) else: head_indices = prep_indices - 1 # (batch_size,) head_encoding = encoded_sentence[batch_indices, head_indices, :] # (batch_size, input_dim) prep_encoding = encoded_sentence[batch_indices, prep_indices, :] # (batch_size, input_dim) child_encoding = encoded_sentence[batch_indices, prep_indices+1, :] # (batch_size, input_dim) ''' prep_indices = x[1] sentence_mask = mask[0] if sentence_mask is not None: if K.ndim(sentence_mask) > 2: # This means this layer came after a Bidirectional layer. Keras has this bug which # concatenates input masks instead of output masks. # TODO: Fix Bidirectional instead. sentence_mask = K.any(sentence_mask, axis=(-2, -1)) head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask, prep_indices) ''' head_projection = K.dot(head_encoding, self.proj_head) # (batch_size, proj_dim) prep_projection = K.dot(prep_encoding, self.proj_prep) # (batch_size, proj_dim) child_projection = K.dot(child_encoding, self.proj_child) # (batch_size, proj_dim) #(batch_size, proj_dim) if self.composition_type == 'HPCT': composed_projection = K.tanh(head_projection + prep_projection + child_projection) elif self.composition_type == 'HPC': prep_child_projection = K.tanh(prep_projection + child_projection) # (batch_size, proj_dim) composed_projection = K.tanh(head_projection + prep_child_projection) else: # Composition type in HC composed_projection = K.tanh(head_projection + child_projection) for hidden_layer in self.hidden_layers: composed_projection = K.tanh(K.dot(composed_projection, hidden_layer)) # (batch_size, proj_dim) # (batch_size, num_classes) class_scores = K.dot(composed_projection, self.scorer) label_probabilities = K.softmax(class_scores) return label_probabilities
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(K.repeat(h_tm1a, K.shape(self.h)[1]) + self.ha), self.Va) eijs = K.squeeze(eij, -1) alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) cisum = K.sum(ci, axis=1) #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs, self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs, self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1] '''if self.consume_less == 'gpu': matrix_x = K.dot(x * B_W[0], self.W) + self.b matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.units]) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units: 2 * self.units] inner_z = matrix_inner[:, :self.units] inner_r = matrix_inner[:, self.units: 2 * self.units] z = self.inner_activation(x_z + inner_z) r = self.inner_activation(x_r + inner_r) x_h = matrix_x[:, 2 * self.units:] inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.units:]) hh = self.activation(x_h + inner_h) else: if self.consume_less == 'cpu': x_z = x[:, :self.units] x_r = x[:, self.units: 2 * self.units] x_h = x[:, 2 * self.units:] elif self.consume_less == 'mem': x_z = K.dot(x * B_W[0], self.W_z) + self.b_z x_r = K.dot(x * B_W[1], self.W_r) + self.b_r x_h = K.dot(x * B_W[2], self.W_h) + self.b_h else: raise ValueError('Unknown `consume_less` mode.') z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z)) r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r)) hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h)) h = z * h_tm1 + (1 - z) * hh return h, [h]'''
def call(self, x, mask=None): ndim = K.ndim(x) xshape = K.shape(x) W = self.kernel_activation(self.kernel) if self.filter_axes == self.sum_axes: ax1 = [a-1 for a in self.sum_axes] ax1 = ax1 + list(set(range(ndim)) - set(ax1)) ax2 = list(set(range(ndim)) - set(self.sum_axes)) permute_dims = list(range(len(ax2))) permute_dims.insert(self.sum_axes[0], len(ax2)) outdims = [-1] + [xshape[a] for a in ax2[1:]] + [self.filters_complex + self.filters_simple] ax2 = ax2 + self.sum_axes W = K.permute_dimensions(W, ax1) W = K.reshape(W, (-1, 2 * self.filters_complex + self.filters_simple)) x = K.permute_dimensions(x, ax2) x = K.reshape(x, (-1, K.shape(W)[0])) output = K.dot(x, W) output_complex = K.sqrt(K.square(output[:, :self.filters_complex]) + K.square(output[:, self.filters_complex:2*self.filters_complex]) + K.epsilon()) output_simple = output[:, 2*self.filters_complex:] output = K.reshape(K.concatenate([output_complex, output_simple], axis=1), outdims) if self.use_bias: b_broadcast = [i for j, i in enumerate(self.bias_broadcast) if j not in self.sum_axes] b = K.squeeze(self.bias, self.sum_axes[0]) if len(self.sum_axes) > 1: b = K.squeeze(b, self.sum_axes[1] - 1) if len(self.sum_axes) > 2: b = K.squeeze(b, self.sum_axes[2] - 2) if K.backend() == 'theano': output += K.pattern_broadcast(b, b_broadcast) else: output += b output = K.permute_dimensions(output, permute_dims) else: # bcast = list(np.where(self.broadcast)[0]) permute_dims = list(range(ndim + 1)) permute_dims[self.sum_axes[0]] = ndim permute_dims[ndim] = self.sum_axes[0] if K.backend() == 'theano': output = K.sum(x[..., None] * K.pattern_broadcast(W, self.kernel_broadcast), axis=self.sum_axes, keepdims=True) else: output = K.sum(x[..., None] * W, axis=self.sum_axes, keepdims=True) output_complex = K.sqrt(K.square(output[..., :self.filters_complex]) + K.square(output[..., self.filters_complex:2*self.filters_complex]) + K.epsilon()) output_simple = output[..., 2*self.filters_complex:] output = K.concatenate([output_complex, output_simple], axis=-1) if self.use_bias: if K.backend() == 'theano': output += K.pattern_broadcast(self.bias, self.bias_broadcast) else: output += self.bias output = K.squeeze(K.permute_dimensions(output, permute_dims), ndim) if len(self.sum_axes) > 1: output = K.squeeze(output, self.sum_axes[1]) return self.activation(output)
def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1-zt)*stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def plot(self, Xs, Xs_mask, Y, logdir, prefix, iteration, batch_size=128): ## Initialize plotting objects fig = plt.figure() fig.set_size_inches(10, 10) ax_input = fig.add_subplot(311) ax_targ = fig.add_subplot(312) ax_pred = fig.add_subplot(313) inputs_raw = Xs masks_raw = Xs_mask preds_raw = self.predict(inputs_raw, masks_raw, batch_size) if inputs_raw.shape[-1] == 1 and self.charDim: inputs_raw = oneHot(inputs_raw, self.charDim) targs_raw = np.expand_dims(Y, -1) for u in range(len(Xs)): ## Set up plotting canvas fig.patch.set_visible(False) fig.suptitle('Utterance %d, Checkpoint %d' % (u, iteration)) ## Plot inputs (heatmap) inputs = inputs_raw[u] inputs = inputs[np.where(1 - masks_raw[u])] inputs = np.swapaxes(inputs, 0, 1) ax_input.clear() ax_input.axis('off') ax_input.set_title('Input', loc='left') hm_input = ax_input.pcolor(inputs, cmap=plt.cm.Blues) ## Plot targets (bar chart) targs = targs_raw[u] targs = targs[np.where(1 - masks_raw[u])] ax_targ.clear() ax_targ.axis('off') ax_targ.set_title('Target', loc='left') ax_targ.set_ylim([0, 1]) ax_targ.margins(0) hm_targ = ax_targ.bar(np.arange(len(targs)), targs) ## Plot predictions (bar chart) preds = preds_raw[u] preds = preds[np.where(1 - masks_raw[u])] preds = np.squeeze(preds, -1) ax_pred.clear() ax_pred.axis('off') ax_pred.set_title('Prediction', loc='left') ax_pred.set_ylim([0, 1]) ax_pred.margins(0) hm_pred = ax_pred.bar(np.arange(len(preds)), preds) ## Save plot fig.savefig(logdir + '/barchart_' + prefix + '_utt' + str(u) + '_iter' + str(iteration) + '.jpg') plt.close(fig)
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs