我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.backend.permute_dimensions()。
def call(self, x, mask=None): mean = super(IntraAttention, self).call(x, mask) # x: (batch_size, input_length, input_dim) # mean: (batch_size, input_dim) ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0) # (1, input_length) # (batch_size, input_length, input_dim) tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1)) if mask is not None: if K.ndim(mask) > K.ndim(x): # Assuming this is because of the bug in Bidirectional. Temporary fix follows. # TODO: Fix Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) x = switch(mask, x, K.zeros_like(x)) # (batch_size, input_length, proj_dim) projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector)) scores = K.dot(projected_combination, self.scorer) # (batch_size, input_length) weights = K.softmax(scores) # (batch_size, input_length) attended_x = K.sum(K.expand_dims(weights) * x, axis=1) # (batch_size, input_dim) return attended_x
def call(self, inputs, mask=None): input_shape = K.int_shape(inputs) outputs = self.layer.call(inputs) outputs = K.permute_dimensions( outputs, self.permute_pattern + [len(input_shape) - 1] ) outputs_shape = self.compute_output_shape(input_shape) outputs = K.reshape( outputs, (-1, outputs_shape[1], outputs_shape[2]) ) mask_tensor = self.compute_mask( inputs, mask ) mask_tensor = K.cast(mask_tensor, K.floatx()) mask_tensor = K.expand_dims(mask_tensor) mask_output = K.repeat_elements( mask_tensor, outputs_shape[2], 2 ) return outputs * mask_output
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va) eijs = K.repeat_elements(eij, self.h_dim, axis=1) #alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) #cisum = K.sum(ci, axis=1) cisum = eijs*inputs[:, :self.h_dim] #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1]
def create_attention_layer(self, input_dim_a, input_dim_b): """Create an attention layer of a model.""" inp_a = Input(shape=(input_dim_a, self.hidden_dim,)) inp_b = Input(shape=(input_dim_b, self.hidden_dim,)) val = np.concatenate((np.zeros((self.max_sequence_length-1,1)), np.ones((1,1))), axis=0) kcon = K.constant(value=val, dtype='float32') inp_b_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp_b) last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp_b_perm) ker_in = glorot_uniform(seed=self.seed) outp_a = Dense(self.attention_dim, input_shape=(input_dim_a, self.hidden_dim), kernel_initializer=ker_in, activation='relu')(inp_a) outp_last = Dense(self.attention_dim, input_shape=(1, self.hidden_dim), kernel_initializer=ker_in, activation='relu')(last_state) outp_last_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_last) outp = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[1, 2]))([outp_last_perm, outp_a]) outp_norm = Activation('softmax')(outp) outp_norm_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_norm) model = Model(inputs=[inp_a, inp_b], outputs=outp_norm_perm, name="attention_generator") return model
def create_attention_layer_f(self, input_dim_a, input_dim_b): """Create an attention layer of a model.""" inp_a = Input(shape=(input_dim_a, self.hidden_dim,)) inp_b = Input(shape=(input_dim_b, self.hidden_dim,)) val = np.concatenate((np.zeros((self.max_sequence_length-1,1)), np.ones((1,1))), axis=0) kcon = K.constant(value=val, dtype='float32') inp_b_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp_b) last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp_b_perm) ker_in = glorot_uniform(seed=self.seed) outp_a = Dense(self.attention_dim, input_shape=(input_dim_a, self.hidden_dim), kernel_initializer=ker_in, activation='relu')(inp_a) outp_last = Dense(self.attention_dim, input_shape=(1, self.hidden_dim), kernel_initializer=ker_in, activation='relu')(last_state) outp_last_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_last) outp = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[1, 2]))([outp_last_perm, outp_a]) outp_norm = Activation('softmax')(outp) outp_norm_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_norm) model = Model(inputs=[inp_a, inp_b], outputs=outp_norm_perm, name="att_generator_forw") return model
def create_attention_layer_b(self, input_dim_a, input_dim_b): """Create an attention layer of a model.""" inp_a = Input(shape=(input_dim_a, self.hidden_dim,)) inp_b = Input(shape=(input_dim_b, self.hidden_dim,)) val = np.concatenate((np.ones((1,1)), np.zeros((self.max_sequence_length-1,1))), axis=0) kcon = K.constant(value=val, dtype='float32') inp_b_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(inp_b) last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0,2,1)))(inp_b_perm) ker_in = glorot_uniform(seed=self.seed) outp_a = Dense(self.attention_dim, input_shape=(input_dim_a, self.hidden_dim), kernel_initializer=ker_in, activation='relu')(inp_a) outp_last = Dense(self.attention_dim, input_shape=(1, self.hidden_dim), kernel_initializer=ker_in, activation='relu')(last_state) outp_last_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_last) outp = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[1, 2]))([outp_last_perm, outp_a]) outp_norm = Activation('softmax')(outp) outp_norm_perm = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_norm) model = Model(inputs=[inp_a, inp_b], outputs=outp_norm_perm, name="att_generator_back") return model
def crosschannelnormalization(alpha = 1e-4, k=2, beta=0.75, n=5,**kwargs): """ This is the function used for cross channel normalization in the original Alexnet """ def f(X): b, ch, r, c = X.shape half = n // 2 square = K.square(X) extra_channels = K.spatial_2d_padding(K.permute_dimensions(square, (0,2,3,1)) , (0,half)) extra_channels = K.permute_dimensions(extra_channels, (0,3,1,2)) scale = k for i in range(n): scale += alpha * extra_channels[:,i:i+ch,:,:] scale = scale ** beta return X / scale return Lambda(f, output_shape=lambda input_shape:input_shape,**kwargs)
def to_configs(states, verbose=True, **kwargs): base = setting['base'] width = states.shape[1] // base height = states.shape[1] // base load(width,height) def build(): P = len(setting['panels']) states = Input(shape=(height*base,width*base)) error = build_error(states, height, width, base) matches = 1 - K.clip(K.sign(error - threshold),0,1) # a, h, w, panel matches = K.reshape(matches, [K.shape(states)[0], height * width, -1]) # a, pos, panel matches = K.permute_dimensions(matches, [0,2,1]) # a, panel, pos config = matches * K.arange(height*width,dtype='float') config = K.sum(config, axis=-1) return Model(states, wrap(states, config)) model = build() return model.predict(states, **kwargs)
def generate_gpu(configs,**kwargs): configs = np.array(configs) import math size = int(math.sqrt(len(configs[0]))) base = panels.shape[1] dim = base*size def build(): P = 2 configs = Input(shape=(size*size,)) _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0 configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P) configs_one_hot = K.reshape(configs_one_hot, [-1,P]) _panels = K.variable(panels) _panels = K.reshape(_panels, [P, base*base]) states = tf.matmul(configs_one_hot, _panels) states = K.reshape(states, [-1, size, size, base, base]) states = K.permute_dimensions(states, [0, 1, 3, 2, 4]) states = K.reshape(states, [-1, size*base, size*base, 1]) states = K.spatial_2d_padding(states, padding=((pad,pad),(pad,pad))) states = K.squeeze(states, -1) return Model(configs, wrap(configs, states)) return preprocess(batch_swirl(build().predict(configs,**kwargs)))
def generate_gpu2(configs,**kwargs): configs = np.array(configs) import math size = int(math.sqrt(len(configs[0]))) base = panels.shape[1] dim = base*size def build(): P = 2 configs = Input(shape=(size*size,)) _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0 configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P) configs_one_hot = K.reshape(configs_one_hot, [-1,P]) _panels = K.variable(panels) _panels = K.reshape(_panels, [P, base*base]) states = tf.matmul(configs_one_hot, _panels) states = K.reshape(states, [-1, size, size, base, base]) states = K.permute_dimensions(states, [0, 1, 3, 2, 4]) states = K.reshape(states, [-1, size*base, size*base, 1]) states = K.spatial_2d_padding(states, padding=((pad,pad),(pad,pad))) states = K.squeeze(states, -1) states = tensor_swirl(states, radius=dim+2*pad * relative_swirl_radius, **swirl_args) return Model(configs, wrap(configs, states)) return preprocess(build().predict(configs,**kwargs))
def generate_gpu(configs, **kwargs): import math size = int(math.sqrt(len(configs[0]))) base = panels.shape[1] dim = base*size def build(): P = 2 configs = Input(shape=(size*size,)) _configs = 1 - K.round((configs/2)+0.5) # from -1/1 to 1/0 configs_one_hot = K.one_hot(K.cast(_configs,'int32'), P) configs_one_hot = K.reshape(configs_one_hot, [-1,P]) _panels = K.variable(panels) _panels = K.reshape(_panels, [P, base*base]) states = tf.matmul(configs_one_hot, _panels) states = K.reshape(states, [-1, size, size, base, base]) states = K.permute_dimensions(states, [0, 1, 3, 2, 4]) states = K.reshape(states, [-1, size*base, size*base]) return Model(configs, wrap(configs, states)) return build().predict(np.array(configs),**kwargs)
def _softmax(x, dim): """Computes softmax along a specified dim. Keras currently lacks this feature. """ if K.backend() == 'tensorflow': import tensorflow as tf return tf.nn.softmax(x, dim) elif K.backend() is 'cntk': import cntk return cntk.softmax(x, dim) elif K.backend() == 'theano': # Theano cannot softmax along an arbitrary dim. # So, we will shuffle `dim` to -1 and un-shuffle after softmax. perm = np.arange(K.ndim(x)) perm[dim], perm[-1] = perm[-1], perm[dim] x_perm = K.permute_dimensions(x, perm) output = K.softmax(x_perm) # Permute back perm[dim], perm[-1] = perm[-1], perm[dim] output = K.permute_dimensions(x, output) return output else: raise ValueError("Backend '{}' not supported".format(K.backend()))
def call(self, x, mask=None): stride = self.subsample_length output_length, feature_dim, nb_filter = self.W_shape xs = [] for i in range(output_length): slice_length = slice(i * stride, i * stride + self.filter_length) xs.append(K.reshape(x[:, slice_length, :], (1, -1, feature_dim))) x_aggregate = K.concatenate(xs, axis=0) # (output_length, batch_size, nb_filter) output = K.batch_dot(x_aggregate, self.W) output = K.permute_dimensions(output, (1, 0, 2)) if self.bias: output += K.reshape(self.b, (1, output_length, nb_filter)) output = self.activation(output) return output
def region_style_loss(style_image, target_image, style_mask, target_mask): '''Calculate style loss between style_image and target_image, for one common region specified by their (boolean) masks ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 2 == K.ndim(style_mask) == K.ndim(target_mask) if K.image_dim_ordering() == 'th': masked_style = style_image * style_mask masked_target = target_image * target_mask nb_channels = K.shape(style_image)[0] else: masked_style = K.permute_dimensions( style_image, (2, 0, 1)) * style_mask masked_target = K.permute_dimensions( target_image, (2, 0, 1)) * target_mask nb_channels = K.shape(style_image)[-1] s = gram_matrix(masked_style) / K.mean(style_mask) / nb_channels c = gram_matrix(masked_target) / K.mean(target_mask) / nb_channels return K.mean(K.square(s - c))
def call(self, x, mask=None): y = K.dot(x, self.att_W) if not self.activation: if K.backend() == 'theano': weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2]) elif K.backend() == 'tensorflow': weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, y, axes=[0, 2]) elif self.activation == 'tanh': if K.backend() == 'theano': weights = K.theano.tensor.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) elif K.backend() == 'tensorflow': weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, K.tanh(y), axes=[0, 2]) weights = K.softmax(weights) out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]), [0, 2, 1]) if self.op == 'attsum': out = out.sum(axis=1) elif self.op == 'attmean': out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True) return K.cast(out, K.floatx())
def region_style_loss(style_image, target_image, style_mask, target_mask): '''Calculate style loss between style_image and target_image, for one common region specified by their (boolean) masks ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 2 == K.ndim(style_mask) == K.ndim(target_mask) if K.image_data_format() == 'channels_first': masked_style = style_image * style_mask masked_target = target_image * target_mask num_channels = K.shape(style_image)[0] else: masked_style = K.permute_dimensions( style_image, (2, 0, 1)) * style_mask masked_target = K.permute_dimensions( target_image, (2, 0, 1)) * target_mask num_channels = K.shape(style_image)[-1] s = gram_matrix(masked_style) / K.mean(style_mask) / num_channels c = gram_matrix(masked_target) / K.mean(target_mask) / num_channels return K.mean(K.square(s - c))
def get_output(self, x): """ Generate filters for given input """ # Assuming 'th' ordering # Input shape (batch, channels, rows, columns) # Output shape (batch, filter_size ** 2, rows, columns) # Use input to generate filter # (batch, 15, rows, columns) output = K.relu(K.conv2d(x, self.kernel1, border_mode="same")) # (batch, rows, columns, 15) output = K.permute_dimensions(output, (0, 2, 3, 1)) # (batch, rows, columns, 20) # output = K.tanh(K.dot(output, self.w1) + self.b1) # (batch, rows, columns, fs**2) output = K.tanh(K.dot(output, self.w2) + self.b2) # (batch, fs**2, rows, columns) output = K.permute_dimensions(output, (0, 3, 1, 2)) return output
def _process_input(self, x): """Apply logistic and softmax activations to input tensor """ logistic_activate = lambda x: 1.0/(1.0 + K.exp(-x)) (batch, w, h, channels) = x.get_shape() x_temp = K.permute_dimensions(x, (3, 0, 1, 2)) x_t = [] for i in range(self.num): k = self._entry_index(i, 0) x_t.extend([ logistic_activate(K.gather(x_temp, (k, k + 1))), # 0 K.gather(x_temp, (k + 2, k + 3))]) if self.background: x_t.append(K.gather(x_temp, (k + 4,))) else: x_t.append(logistic_activate(K.gather(x_temp, (k + 4,)))) x_t.append( softmax( K.gather(x_temp, tuple(range(k + 5, k + self.coords + self.classes + 1))), axis=0)) x_t = K.concatenate(x_t, axis=0) return K.permute_dimensions(x_t, (1, 2, 3, 0))
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True)) return patches, patches_norm # get tensor representations of our images
def semantic_matrix(argv): assert len(argv) == 2 q = argv[0] a = argv[1] q_sqrt = K.sqrt((q ** 2).sum(axis=2, keepdims=True)) a_sqrt = K.sqrt((a ** 2).sum(axis=2, keepdims=True)) denominator = K.batch_dot(q_sqrt, K.permute_dimensions(a_sqrt, [0,2,1])) return K.batch_dot(q, K.permute_dimensions(a, [0,2,1])) / (denominator + SAFE_EPSILON) # ??idx?????? # ??????batch index???????? # ??https://groups.google.com/forum/#!topic/theano-users/7gUdN6E00Dc # ??argmax???2 - axis # ??theano??a > 0????????[1,1,0]????????????? # ?bool??????????? # ??????????T.set_subtensor(ib[(ib < 0).nonzero()], 0)
def call(self, x, mask=None): print("AttentionDecoder.call") H = x x = K.permute_dimensions(H, (1, 0, 2))[-1, :, :] if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states[:] else: initial_states = self.get_initial_states(H) constants = self.get_constants(H) + [H] y_0 = x x = K.repeat(x, self.output_length) initial_states += [y_0] last_output, outputs, states = K.rnn( self.step, x, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=self.output_length) if self.stateful and not self.state_input: self.updates = zip(self.states, states) self.states_to_transfer = states return outputs
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True)) return patches, patches_norm
def grams(X): dim_ordering = K.image_dim_ordering() if dim_ordering == 'tf': X = K.permute_dimensions(X, (0, 3, 1, 2)) (samples, c, h, w) = get_shape(X) X_reshaped = K.reshape(X, (-1, c, h * w)) X_T = K.permute_dimensions(X_reshaped, (0, 2, 1)) if K._BACKEND == 'theano': X_gram = T.batched_dot(X_reshaped, X_T) else: X_gram = tf.batch_matmul(X_reshaped, X_T) X_gram /= c * h * w return X_gram
def call(self, x, mask=None): input_, flow_layer_ = x stride_row, stride_col = self.subsample shape = input_._keras_shape output_row = shape[1] - self.kernel_size + 1 output_col = shape[2] - self.kernel_size + 1 xs = [] ws = [] for i in range(output_row): for j in range(output_col): slice_row = slice(i * stride_row, i * stride_row + self.kernel_size) slice_col = slice(j * stride_col, j * stride_col + self.kernel_size) xs.append(K.reshape(input_[:, slice_row, slice_col, :], (1, -1, self.kernel_size ** 2, shape[-1]))) ws.append(K.reshape(flow_layer_[:, i, j, :], (1, -1, self.kernel_size ** 2, 1))) x_aggregate = K.concatenate(xs, axis=0) x_aggregate = K.permute_dimensions(x_aggregate, (0, 1, 3, 2)) W = K.concatenate(ws, axis=0) output = K.batch_dot(x_aggregate, W) output = K.reshape(output, (output_row, output_col, -1, shape[3])) output = K.permute_dimensions(output, (2, 0, 1, 3)) output = self.activation(output) return output
def _spectrogram_mono(self, x): '''x.shape : (None, 1, len_src), returns 2D batch of a mono power-spectrogram''' x = K.permute_dimensions(x, [0, 2, 1]) x = K.expand_dims(x, 3) # add a dummy dimension (channel axis) subsample = (self.n_hop, 1) output_real = K.conv2d(x, self.dft_real_kernels, strides=subsample, padding=self.padding, data_format='channels_last') output_imag = K.conv2d(x, self.dft_imag_kernels, strides=subsample, padding=self.padding, data_format='channels_last') output = output_real ** 2 + output_imag ** 2 # now shape is (batch_sample, n_frame, 1, freq) if self.image_data_format == 'channels_last': output = K.permute_dimensions(output, [0, 3, 1, 2]) else: output = K.permute_dimensions(output, [0, 2, 3, 1]) return output
def call(self, x): power_spectrogram = super(Melspectrogram, self).call(x) # now, th: (batch_sample, n_ch, n_freq, n_time) # tf: (batch_sample, n_freq, n_time, n_ch) if self.image_data_format == 'channels_first': power_spectrogram = K.permute_dimensions(power_spectrogram, [0, 1, 3, 2]) else: power_spectrogram = K.permute_dimensions(power_spectrogram, [0, 3, 2, 1]) # now, whatever image_data_format, (batch_sample, n_ch, n_time, n_freq) output = K.dot(power_spectrogram, self.freq2mel) if self.image_data_format == 'channels_first': output = K.permute_dimensions(output, [0, 1, 3, 2]) else: output = K.permute_dimensions(output, [0, 3, 2, 1]) if self.power_melgram != 2.0: output = K.pow(K.sqrt(output), self.power_melgram) if self.return_decibel_melgram: output = backend_keras.amplitude_to_decibel(output) return output
def crosschannelnormalization(alpha=1e-4, k=2, beta=0.75, n=5, **kwargs): """ This is the function used for cross channel normalization in the original Alexnet """ def f(X): b, ch, r, c = X.shape half = n // 2 square = K.square(X) extra_channels = K.spatial_2d_padding(K.permute_dimensions(square, (0, 2, 3, 1)) , (0, half)) extra_channels = K.permute_dimensions(extra_channels, (0, 3, 1, 2)) scale = k for i in range(n): scale += alpha * extra_channels[:, i:i + ch, :, :] scale = scale ** beta return X / scale return Lambda(f, output_shape=lambda input_shape: input_shape, **kwargs)
def update_memory(self, z_t, h_t, mem_tm1): ''' This method takes the attention vector (z_t), writer output (h_t) and previous timestep's memory (mem_tm1) and updates the memory. Implements equations 6, 14 or 15. ''' tiled_z_t = K.tile(K.expand_dims(z_t), (self.output_dim)) # (batch_size, input_length, output_dim) input_length = K.shape(mem_tm1)[1] # (batch_size, input_length, output_dim) tiled_h_t = K.permute_dimensions(K.tile(K.expand_dims(h_t), (input_length)), (0, 2, 1)) # Updating memory. First term in summation corresponds to selective forgetting and the second term to # selective addition. Equation 6. mem_t = mem_tm1 * (1 - tiled_z_t) + tiled_h_t * tiled_z_t # (batch_size, input_length, output_dim) return mem_t
def compute_mask(self, inputs, mask=None): mask_tensor = self.layer.compute_mask(inputs, mask) mask_shape = K.int_shape(mask_tensor) mask_tensor = K.permute_dimensions( mask_tensor, self.permute_pattern ) reduce_time = len(mask_shape) - 2 for _ in range(reduce_time): mask_tensor = K.any(mask_tensor, -1) return mask_tensor
def transpose(x): return K.permute_dimensions(x, (0,2,1))
def weighted_with_attention(self, inputs): """Define a function for a lambda layer of a model.""" inp, inp_cont = inputs val = np.eye(self.max_sequence_length) kcon = K.constant(value=val, dtype='float32') diag = K.repeat_elements(inp_cont, self.max_sequence_length, 2) * kcon return K.batch_dot(diag, K.permute_dimensions(inp, (0,2,1)), axes=[1,2])
def weight_and_reduce(self, inputs): """Define a function for a lambda layer of a model.""" inp, inp_cont = inputs reduced = K.batch_dot(inp_cont, K.permute_dimensions(inp, (0,2,1)), axes=[1,2]) return K.squeeze(reduced, 1)
def create_full_matching_layer_b(self, input_dim_a, input_dim_b): """Create a full-matching layer of a model.""" inp_a = Input(shape=(input_dim_a, self.hidden_dim,)) inp_b = Input(shape=(input_dim_b, self.hidden_dim,)) W = [] for i in range(self.perspective_num): wi = K.random_uniform_variable((1, self.hidden_dim), -1.0, 1.0, seed=self.seed if self.seed is not None else 243) W.append(wi) val = np.concatenate((np.ones((1, 1)), np.zeros((self.max_sequence_length - 1, 1))), axis=0) kcon = K.constant(value=val, dtype='float32') inp_b_perm = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1)))(inp_b) last_state = Lambda(lambda x: K.permute_dimensions(K.dot(x, kcon), (0, 2, 1)))(inp_b_perm) m = [] for i in range(self.perspective_num): outp_a = Lambda(lambda x: x * W[i])(inp_a) outp_last = Lambda(lambda x: x * W[i])(last_state) outp_a = Lambda(lambda x: K.l2_normalize(x, -1))(outp_a) outp_last = Lambda(lambda x: K.l2_normalize(x, -1))(outp_last) outp_last = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1)))(outp_last) outp = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[1, 2]))([outp_last, outp_a]) outp = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1)))(outp) m.append(outp) if self.perspective_num > 1: persp = Lambda(lambda x: K.concatenate(x, 2))(m) else: persp = m model = Model(inputs=[inp_a, inp_b], outputs=persp) return model
def create_maxpool_matching_layer(self, input_dim_a, input_dim_b): """Create a maxpooling-matching layer of a model.""" inp_a = Input(shape=(input_dim_a, self.hidden_dim,)) inp_b = Input(shape=(input_dim_b, self.hidden_dim,)) W = [] for i in range(self.perspective_num): wi = K.random_uniform_variable((1, self.hidden_dim), -1.0, 1.0, seed=self.seed if self.seed is not None else 243) W.append(wi) m = [] for i in range(self.perspective_num): outp_a = Lambda(lambda x: x * W[i])(inp_a) outp_b = Lambda(lambda x: x * W[i])(inp_b) outp_a = Lambda(lambda x: K.l2_normalize(x, -1))(outp_a) outp_b = Lambda(lambda x: K.l2_normalize(x, -1))(outp_b) outp_b = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp_b) outp = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[1, 2]))([outp_b, outp_a]) outp = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(outp) outp = Lambda(lambda x: K.max(x, -1, keepdims=True))(outp) m.append(outp) if self.perspective_num > 1: persp = Lambda(lambda x: K.concatenate(x, 2))(m) else: persp = m model = Model(inputs=[inp_a, inp_b], outputs=persp) return model