我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.backend.image_data_format()。
def make_generator(): """Creates a generator model that takes a 100-dimensional noise vector as a "seed", and outputs images of size 28x28x1.""" model = Sequential() model.add(Dense(1024, input_dim=100)) model.add(LeakyReLU()) model.add(Dense(128 * 7 * 7)) model.add(BatchNormalization()) model.add(LeakyReLU()) if K.image_data_format() == 'channels_first': model.add(Reshape((128, 7, 7), input_shape=(128 * 7 * 7,))) bn_axis = 1 else: model.add(Reshape((7, 7, 128), input_shape=(128 * 7 * 7,))) bn_axis = -1 model.add(Conv2DTranspose(128, (5, 5), strides=2, padding='same')) model.add(BatchNormalization(axis=bn_axis)) model.add(LeakyReLU()) model.add(Convolution2D(64, (5, 5), padding='same')) model.add(BatchNormalization(axis=bn_axis)) model.add(LeakyReLU()) model.add(Conv2DTranspose(64, (5, 5), strides=2, padding='same')) model.add(BatchNormalization(axis=bn_axis)) model.add(LeakyReLU()) # Because we normalized training inputs to lie in the range [-1, 1], # the tanh function should be used for the output of the generator to ensure its output # also lies in this range. model.add(Convolution2D(1, (5, 5), padding='same', activation='tanh')) return model
def __init__(self, data_format=None): self.sequence = None self.mask = None self.batch_size = None # We do not know yet self.transformation = id if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_last', 'channels_first'}: raise ValueError('`data_format` should be `"channels_last"` (channel after row and ' 'column) or `"channels_first"` (channel before row and column). ' 'Received arg: ', data_format) self.data_format = data_format if data_format == 'channels_first': self.channel_axis = 1 self.row_axis = 2 self.col_axis = 3 if data_format == 'channels_last': self.channel_axis = 3 self.row_axis = 1 self.col_axis = 2 self.common_args = {'row_axis': self.row_axis, 'col_axis': self.col_axis, 'channel_axis': self.channel_axis - 1}
def conv2d_bn(x, nb_filter,num_row, num_col, strides=(1,1), padding='same', name=None): if name is not None: bn_name = name + '_bn' conv_name = name + '_conv' else: bn_name = None conv_name = None if K.image_data_format() == 'channels_first': bn_axis = 1 else: bn_axis = 3 x = Convolution2D(nb_filter,[num_row, num_col],padding=padding,strides=strides,activation='relu',name=conv_name)(x) x = FixedBatchNormalization(axis=bn_axis, name=bn_name)(x) return x
def predict(im, pos, model, k): im_ary = np.array([im]).transpose((0, 2, 3, 1)) \ if K.image_data_format() == 'channels_last' else np.array([im]) res = model.predict([im_ary, np.array([pos])]) action = np.argmax(res) reward = get_layer_output(model, 'reward', im_ary) value = get_layer_output(model, 'value{}'.format(k), im_ary) reward = np.reshape(reward, im.shape[1:]) value = np.reshape(value, im.shape[1:]) return action, reward, value # load data
def make_discriminator(): """Creates a discriminator model that takes an image as input and outputs a single value, representing whether the input is real or generated. Unlike normal GANs, the output is not sigmoid and does not represent a probability! Instead, the output should be as large and negative as possible for generated inputs and as large and positive as possible for real inputs. Note that the improved WGAN paper suggests that BatchNormalization should not be used in the discriminator.""" model = Sequential() if K.image_data_format() == 'channels_first': model.add(Convolution2D(64, (5, 5), padding='same', input_shape=(1, 28, 28))) else: model.add(Convolution2D(64, (5, 5), padding='same', input_shape=(28, 28, 1))) model.add(LeakyReLU()) model.add(Convolution2D(128, (5, 5), kernel_initializer='he_normal', strides=[2, 2])) model.add(LeakyReLU()) model.add(Convolution2D(128, (5, 5), kernel_initializer='he_normal', padding='same', strides=[2, 2])) model.add(LeakyReLU()) model.add(Flatten()) model.add(Dense(1024, kernel_initializer='he_normal')) model.add(LeakyReLU()) model.add(Dense(1, kernel_initializer='he_normal')) return model
def test_sub_pixel_upscaling(): num_samples = 2 num_row = 16 num_col = 16 input_dtype = K.floatx() for scale_factor in [2, 3, 4]: input_data = np.random.random((num_samples, 4 * (scale_factor ** 2), num_row, num_col)) input_data = input_data.astype(input_dtype) if K.image_data_format() == 'channels_last': input_data = input_data.transpose((0, 2, 3, 1)) input_tensor = K.variable(input_data) expected_output = K.eval(KC.depth_to_space(input_tensor, scale=scale_factor)) layer_test(convolutional.SubPixelUpscaling, kwargs={'scale_factor': scale_factor}, input_data=input_data, expected_output=expected_output, expected_output_dtype=K.floatx())
def deprocess_image(x): if K.image_data_format() == 'channels_first': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_nrows, img_ncols, 3)) # Remove zero-center by mean pixel x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x # get tensor representations of our images
def region_style_loss(style_image, target_image, style_mask, target_mask): '''Calculate style loss between style_image and target_image, for one common region specified by their (boolean) masks ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 2 == K.ndim(style_mask) == K.ndim(target_mask) if K.image_data_format() == 'channels_first': masked_style = style_image * style_mask masked_target = target_image * target_mask num_channels = K.shape(style_image)[0] else: masked_style = K.permute_dimensions( style_image, (2, 0, 1)) * style_mask masked_target = K.permute_dimensions( target_image, (2, 0, 1)) * target_mask num_channels = K.shape(style_image)[-1] s = gram_matrix(masked_style) / K.mean(style_mask) / num_channels c = gram_matrix(masked_target) / K.mean(target_mask) / num_channels return K.mean(K.square(s - c))
def style_loss(style_image, target_image, style_masks, target_masks): '''Calculate style loss between style_image and target_image, in all regions. ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 3 == K.ndim(style_masks) == K.ndim(target_masks) loss = K.variable(0) for i in xrange(num_labels): if K.image_data_format() == 'channels_first': style_mask = style_masks[i, :, :] target_mask = target_masks[i, :, :] else: style_mask = style_masks[:, :, i] target_mask = target_masks[:, :, i] loss += region_style_loss(style_image, target_image, style_mask, target_mask) return loss
def total_variation_loss(x): assert 4 == K.ndim(x) if K.image_data_format() == 'channels_first': a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25)) # Overall loss is the weighted sum of content_loss, style_loss and tv_loss # Each individual loss uses features from image/mask models.
def deprocess_image(x): # normalize tensor: center on 0., ensure std is 0.1 x -= x.mean() x /= (x.std() + 1e-5) x *= 0.1 # clip to [0, 1] x += 0.5 x = np.clip(x, 0, 1) # convert to RGB array x *= 255 if K.image_data_format() == 'channels_first': x = x.transpose((1, 2, 0)) x = np.clip(x, 0, 255).astype('uint8') return x # build the VGG16 network with ImageNet weights
def __init__(self, side, num, classes, coords, object_scale, noobject_scale, class_scale, coord_scale, **kwargs): data_format = K.image_data_format() if data_format not in {'channels_last', 'channels_first'}: raise ValueError('data_format must be in ' '{"channels_last", "channels_first"}') self.data_format = data_format self.side = side self.n = num self.classes = classes self.coords = coords self.object_scale = object_scale self.noobject_scale = noobject_scale self.class_scale = class_scale self.coord_scale = coord_scale super(Detection2D, self).__init__(**kwargs)
def fire_module(x, fire_id, squeeze=16, expand=64): s_id = 'fire' + str(fire_id) + '/' if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x) x = Activation('relu', name=s_id + relu + sq1x1)(x) left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x) left = Activation('relu', name=s_id + relu + exp1x1)(left) right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x) right = Activation('relu', name=s_id + relu + exp3x3)(right) x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat') return x # Original SqueezeNet from paper.
def eval_loss_and_grads(x): if K.image_data_format() == 'channels_first': x = x.reshape((1, 3, img_nrows, img_ncols)) else: x = x.reshape((1, img_nrows, img_ncols, 3)) outs = f_outputs([x]) loss_value = outs[0] if len(outs[1:]) == 1: grad_values = outs[1].flatten().astype('float64') else: grad_values = np.array(outs[1:]).flatten().astype('float64') return loss_value, grad_values # this Evaluator class makes it possible # to compute loss and gradients in one pass # while retrieving them via two separate functions, # "loss" and "grads". This is done because scipy.optimize # requires separate functions for loss and gradients, # but computing them separately would be inefficient.
def fire_module(x, fire_id, squeeze=16, expand=64): """Build special layer for SqueezeNet""" s_id = 'fire' + str(fire_id) + '/' if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x) x = Activation('relu', name=s_id + relu + sq1x1)(x) left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x) left = Activation('relu', name=s_id + relu + exp1x1)(left) right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x) right = Activation('relu', name=s_id + relu + exp3x3)(right) x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat') return x # Original SqueezeNet from paper.
def __call__(self, shape, dtype=None): assert K.image_data_format() == 'channels_last' # kw,kh,num_channel,filters if len(shape) == 1: return K.tensorflow_backend.constant(0., dtype=dtype, shape=shape) elif len(shape) == 2 and shape[0] == shape[1]: return K.tensorflow_backend.constant(np.identity(shape[0], dtype)) elif len(shape) == 4 and shape[2] == shape[3]: array = np.zeros(shape, dtype=float) cx, cy = shape[0] / 2, shape[1] / 2 for i in range(shape[2]): array[cx, cy, i, i] = 1 return K.tensorflow_backend.constant(array, dtype=dtype) elif len(shape) == 4 and shape[2] != shape[3]: array = np.zeros(shape, dtype=float) cx, cy = (shape[0] - 1) // 2, (shape[1] - 1) // 2 for i in range(min(shape[2], shape[3])): array[cx, cy, i, i] = 1 return K.tensorflow_backend.constant(array, dtype=dtype) else: raise Exception("no handler")
def conv2d_bn(x, filters, num_row, num_col, padding='same', strides=(1, 1), name=None): '''Utility function to apply conv + BN. ''' if name is not None: bn_name = name + '_bn' conv_name = name + '_conv' else: bn_name = None conv_name = None if K.image_data_format() == 'channels_first': bn_axis = 1 else: bn_axis = 3 x = Conv2D(filters, (num_row, num_col), strides=strides, padding=padding, use_bias=False, name=conv_name)(x) x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) x = Activation('relu', name=name)(x) return x
def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D Args: ip: keras tensor nb_filter: number of filters compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = AveragePooling2D((2, 2), strides=(2, 2))(x) return x
def _initial_conv_block_inception(input, initial_conv_filters, weight_decay=5e-4): ''' Adds an initial conv block, with batch norm and relu for the DPN Args: input: input tensor initial_conv_filters: number of filters for initial conv block weight_decay: weight decay factor Returns: a keras tensor ''' channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = Conv2D(initial_conv_filters, (7, 7), padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), strides=(2, 2))(input) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) return x
def _bn_relu_conv_block(input, filters, kernel=(3, 3), stride=(1, 1), weight_decay=5e-4): ''' Adds a Batchnorm-Relu-Conv block for DPN Args: input: input tensor filters: number of output filters kernel: convolution kernel size stride: stride of convolution Returns: a keras tensor ''' channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = Conv2D(filters, kernel, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), strides=stride)(input) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) return x
def conv2d_bn(x, nb_filter, num_row, num_col, padding='same', strides=(1, 1), use_bias=False): """ Utility function to apply conv + BN. (Slightly modified from https://github.com/fchollet/keras/blob/master/keras/applications/inception_v3.py) """ if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 x = Convolution2D(nb_filter, (num_row, num_col), strides=strides, padding=padding, use_bias=use_bias, kernel_regularizer=regularizers.l2(0.00004), kernel_initializer=initializers.VarianceScaling(scale=2.0, mode='fan_in', distribution='normal', seed=None))(x) x = BatchNormalization(axis=channel_axis, momentum=0.9997, scale=False)(x) x = Activation('relu')(x) return x
def block_inception_a(input): if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 branch_0 = conv2d_bn(input, 96, 1, 1) branch_1 = conv2d_bn(input, 64, 1, 1) branch_1 = conv2d_bn(branch_1, 96, 3, 3) branch_2 = conv2d_bn(input, 64, 1, 1) branch_2 = conv2d_bn(branch_2, 96, 3, 3) branch_2 = conv2d_bn(branch_2, 96, 3, 3) branch_3 = AveragePooling2D((3,3), strides=(1,1), padding='same')(input) branch_3 = conv2d_bn(branch_3, 96, 1, 1) x = concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis) return x
def block_reduction_a(input): if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 branch_0 = conv2d_bn(input, 384, 3, 3, strides=(2,2), padding='valid') branch_1 = conv2d_bn(input, 192, 1, 1) branch_1 = conv2d_bn(branch_1, 224, 3, 3) branch_1 = conv2d_bn(branch_1, 256, 3, 3, strides=(2,2), padding='valid') branch_2 = MaxPooling2D((3,3), strides=(2,2), padding='valid')(input) x = concatenate([branch_0, branch_1, branch_2], axis=channel_axis) return x
def block_inception_b(input): if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 branch_0 = conv2d_bn(input, 384, 1, 1) branch_1 = conv2d_bn(input, 192, 1, 1) branch_1 = conv2d_bn(branch_1, 224, 1, 7) branch_1 = conv2d_bn(branch_1, 256, 7, 1) branch_2 = conv2d_bn(input, 192, 1, 1) branch_2 = conv2d_bn(branch_2, 192, 7, 1) branch_2 = conv2d_bn(branch_2, 224, 1, 7) branch_2 = conv2d_bn(branch_2, 224, 7, 1) branch_2 = conv2d_bn(branch_2, 256, 1, 7) branch_3 = AveragePooling2D((3,3), strides=(1,1), padding='same')(input) branch_3 = conv2d_bn(branch_3, 128, 1, 1) x = concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis) return x
def __initial_conv_block_imagenet(input, weight_decay=5e-4): ''' Adds an initial conv block, with batch norm and relu for the inception resnext Args: input: input tensor weight_decay: weight decay factor Returns: a keras tensor ''' channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = Conv2D(64, (7, 7), padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), strides=(2, 2))(input) x = BatchNormalization(axis=channel_axis)(x) x = LeakyReLU()(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) return x
def load_mnist(): ''' returns mnist_data ''' # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if k.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype(k.floatx()) x_train *= 0.96/255 x_train += 0.02 return input_shape, x_train
def top(x, input_shape, classes, activation, weight_decay): x = Conv2D(classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay), use_bias=False)(x) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape # TODO(ahundt) this is modified for the sigmoid case! also use loss_shape if activation is 'sigmoid': x = Reshape((row * col * classes,))(x) return x
def test_vgg_conv(): if K.image_data_format() == 'channels_first': x = Input(shape=(3, 224, 224)) y1_shape = (None, 64, 112, 112) y2_shape = (None, 128, 56, 56) else: x = Input(shape=(224, 224, 3)) y1_shape = (None, 112, 112, 64) y2_shape = (None, 56, 56, 128) block1 = vgg_conv(filters=64, convs=2, block_name='block1') y = block1(x) assert K.int_shape(y) == y1_shape block2 = vgg_conv(filters=128, convs=2, block_name='block2') y = block2(y) assert K.int_shape(y) == y2_shape
def test_vgg_decoder(): if K.image_data_format() == 'channels_last': inputs = Input(shape=(500, 500, 3)) pool3 = Input(shape=(88, 88, 256)) pool4 = Input(shape=(44, 44, 512)) drop7 = Input(shape=(16, 16, 4096)) score_shape = (None, 500, 500, 21) else: inputs = Input(shape=(3, 500, 500)) pool3 = Input(shape=(256, 88, 88)) pool4 = Input(shape=(512, 44, 44)) drop7 = Input(shape=(4096, 16, 16)) score_shape = (None, 21, 500, 500) pyramid = [drop7, pool4, pool3, inputs] scales = [1., 1e-2, 1e-4] score = VGGDecoder(pyramid, scales, classes=21) assert K.int_shape(score) == score_shape
def test_fcn_vgg16_correctness(): """Test output not NaN.""" if K.image_data_format() == 'channels_first': input_shape = (3, 500, 500) x = np.random.rand(1, 3, 500, 500) y = np.random.randint(21, size=(1, 500, 500)) y = np.eye(21)[y] y = np.transpose(y, (0, 3, 1, 2)) else: input_shape = (500, 500, 3) x = np.random.rand(1, 500, 500, 3) y = np.random.randint(21, size=(1, 500, 500)) y = np.eye(21)[y] fcn_vgg16 = FCN(classes=21, input_shape=input_shape) fcn_vgg16.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) fcn_vgg16.fit(x, y, batch_size=1, epochs=1) loss = fcn_vgg16.evaluate(x, y, batch_size=1) assert not np.any(np.isinf(loss)) assert not np.any(np.isnan(loss)) y_pred = fcn_vgg16.predict(x, batch_size=1) assert not np.any(np.isinf(y_pred)) assert not np.any(np.isnan(y_pred))
def compute_error_matrix(y_true, y_pred): """Compute Confusion matrix (a.k.a. error matrix). a predicted c 0 1 2 t 0 [[ 5, 3, 0], u 1 [ 2, 3, 1], a 2 [ 0, 2, 11]] l Note true positves are in diagonal """ # Find channel axis given backend if K.image_data_format() == 'channels_last': ax_chn = 3 else: ax_chn = 1 classes = y_true.shape[ax_chn] confusion = get_confusion(K.argmax(y_true, axis=ax_chn).flatten(), K.argmax(y_pred, axis=ax_chn).flatten(), classes) return confusion
def __init__(self, n_fbs, trainable_fb, sr=None, init='mel', fmin=0., fmax=None, bins_per_octave=12, image_data_format='default', **kwargs): ''' TODO: is sr necessary? is fmax necessary? init with None? ''' self.supports_masking = True self.n_fbs = n_fbs assert init in ('mel', 'log', 'linear', 'uni_random') if fmax is None: self.fmax = sr / 2. else: self.fmax = fmax if init in ('mel', 'log'): assert sr is not None self.fmin = fmin self.init = init self.bins_per_octave = bins_per_octave self.sr = sr self.trainable_fb = trainable_fb assert image_data_format in ('default', 'channels_first', 'channels_last') if image_data_format == 'default': self.image_data_format = K.image_data_format() else: self.image_data_format = image_data_format super(Filterbank, self).__init__(**kwargs)
def _spectrogram_mono(self, x): '''x.shape : (None, 1, len_src), returns 2D batch of a mono power-spectrogram''' x = K.permute_dimensions(x, [0, 2, 1]) x = K.expand_dims(x, 3) # add a dummy dimension (channel axis) subsample = (self.n_hop, 1) output_real = K.conv2d(x, self.dft_real_kernels, strides=subsample, padding=self.padding, data_format='channels_last') output_imag = K.conv2d(x, self.dft_imag_kernels, strides=subsample, padding=self.padding, data_format='channels_last') output = output_real ** 2 + output_imag ** 2 # now shape is (batch_sample, n_frame, 1, freq) if self.image_data_format == 'channels_last': output = K.permute_dimensions(output, [0, 3, 1, 2]) else: output = K.permute_dimensions(output, [0, 2, 3, 1]) return output
def call(self, x): power_spectrogram = super(Melspectrogram, self).call(x) # now, th: (batch_sample, n_ch, n_freq, n_time) # tf: (batch_sample, n_freq, n_time, n_ch) if self.image_data_format == 'channels_first': power_spectrogram = K.permute_dimensions(power_spectrogram, [0, 1, 3, 2]) else: power_spectrogram = K.permute_dimensions(power_spectrogram, [0, 3, 2, 1]) # now, whatever image_data_format, (batch_sample, n_ch, n_time, n_freq) output = K.dot(power_spectrogram, self.freq2mel) if self.image_data_format == 'channels_first': output = K.permute_dimensions(output, [0, 1, 3, 2]) else: output = K.permute_dimensions(output, [0, 3, 2, 1]) if self.power_melgram != 2.0: output = K.pow(K.sqrt(output), self.power_melgram) if self.return_decibel_melgram: output = backend_keras.amplitude_to_decibel(output) return output
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path filters: list of integers, the filterss of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = Activation('relu')(x) return x
def predict(im, pos, model, k): im_ary = np.array([im]).transpose((0, 2, 3, 1)) \ if K.image_data_format() == 'channels_last' else np.array([im]) res = model.predict([im_ary, np.array([pos])]) action = np.argmax(res) reward = get_layer_output(model, 'reward', im_ary) value = get_layer_output(model, 'value{}'.format(k), im_ary) reward = np.reshape(reward, im.shape[1:]) value = np.reshape(value, im.shape[1:]) return action, reward, value
def predict(im, pos, model, k): im_ary = np.array([im]).transpose((0, 2, 3, 1)) \ if K.image_data_format() == 'channels_last' else np.array([im]) res = model.predict([im_ary, np.array([pos])]) action = np.argmax(res) reward = get_layer_output(model, 'reward', im_ary) value = get_layer_output(model, 'value{}'.format(k), im_ary) reward = np.reshape(reward, im.shape[1:]) value = np.reshape(value, im.shape[1:]) return res, action, reward, value
def compute_output_shape(self, input_shape): if K.image_data_format() == 'channels_last': n, h, w, c = K.int_shape(input_shape) return n, self.target_h, self.target_w, c else: n, c, h, w = K.int_shape(input_shape) return n, c, self.target_h, self.target_w
def valid_shapes(inp): shapes = [] full_shape = K.int_shape(inp) full_h = full_shape[1 if K.image_data_format() == 'channels_last' else 2] full_w = full_shape[2 if K.image_data_format() == 'channels_last' else 3] shapes.append([full_h, full_w]) for i in range(1, 10): old_h = shapes[-1][0] old_w = shapes[-1][1] shapes.append([(old_h + 1) // 2, (old_w + 1) // 2]) if shapes[-1][0] < 2 and shapes[-1][1] < 2: break return shapes
def conv2d_bn(x, filters, kernel_size, strides=1, padding='same', activation='relu', use_bias=False, name=None): """Utility function to apply conv + BN. # Arguments x: input tensor. filters: filters in `Conv2D`. kernel_size: kernel size as in `Conv2D`. padding: padding mode in `Conv2D`. activation: activation in `Conv2D`. strides: strides in `Conv2D`. name: name of the ops; will become `name + '_ac'` for the activation and `name + '_bn'` for the batch norm layer. # Returns Output tensor after applying `Conv2D` and `BatchNormalization`. """ x = Conv2D(filters, kernel_size, strides=strides, padding=padding, use_bias=use_bias, name=name)(x) if not use_bias: bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 bn_name = None if name is None else name + '_bn' x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) if activation is not None: ac_name = None if name is None else name + '_ac' x = Activation(activation, name=ac_name)(x) return x
def get_num_filters(layer): """ Returns: Total number of filters within `layer`. For `keras.layers.Dense` layer, this is the total number of outputs. """ # For all other layers it is 4 isDense = K.ndim(layer.output) == 2 if isDense: return layer.output.shape[1] else: if K.image_data_format() == 'channels_first': return K.int_shape(layer.output)[1] else: return K.int_shape(layer.output)[3]
def test_DSSIM_channels_last(): prev_data = K.image_data_format() K.set_image_data_format('channels_last') for input_dim, kernel_size in zip([32, 33], [2, 3]): input_shape = [input_dim, input_dim, 3] X = np.random.random_sample(4 * input_dim * input_dim * 3).reshape([4] + input_shape) y = np.random.random_sample(4 * input_dim * input_dim * 3).reshape([4] + input_shape) model = Sequential() model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape, activation='relu')) model.add(Conv2D(3, (3, 3), padding='same', input_shape=input_shape, activation='relu')) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) model.compile(loss=DSSIMObjective(kernel_size=kernel_size), metrics=['mse'], optimizer=adam) model.fit(X, y, batch_size=2, epochs=1, shuffle='batch') # Test same x1 = K.constant(X, 'float32') x2 = K.constant(X, 'float32') dssim = DSSIMObjective(kernel_size=kernel_size) assert_allclose(0.0, K.eval(dssim(x1, x2)), atol=1e-4) # Test opposite x1 = K.zeros([4] + input_shape) x2 = K.ones([4] + input_shape) dssim = DSSIMObjective(kernel_size=kernel_size) assert_allclose(0.5, K.eval(dssim(x1, x2)), atol=1e-4) K.set_image_data_format(prev_data)
def test_DSSIM_channels_first(): prev_data = K.image_data_format() K.set_image_data_format('channels_first') for input_dim, kernel_size in zip([32, 33], [2, 3]): input_shape = [3, input_dim, input_dim] X = np.random.random_sample(4 * input_dim * input_dim * 3).reshape([4] + input_shape) y = np.random.random_sample(4 * input_dim * input_dim * 3).reshape([4] + input_shape) model = Sequential() model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape, activation='relu')) model.add(Conv2D(3, (3, 3), padding='same', input_shape=input_shape, activation='relu')) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) model.compile(loss=DSSIMObjective(kernel_size=kernel_size), metrics=['mse'], optimizer=adam) model.fit(X, y, batch_size=2, epochs=1, shuffle='batch') # Test same x1 = K.constant(X, 'float32') x2 = K.constant(X, 'float32') dssim = DSSIMObjective(kernel_size=kernel_size) assert_allclose(0.0, K.eval(dssim(x1, x2)), atol=1e-4) # Test opposite x1 = K.zeros([4] + input_shape) x2 = K.ones([4] + input_shape) dssim = DSSIMObjective(kernel_size=kernel_size) assert_allclose(0.5, K.eval(dssim(x1, x2)), atol=1e-4) K.set_image_data_format(prev_data)