我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.backend.function()。
def get_image_descriptor_for_image(image, model): im = cv2.resize(image, (224, 224)).astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, [model.layers[33].output]) return _convout1_f([0] + [im])
def test_softmax(): ''' Test using a reference implementation of softmax ''' def softmax(values): m = np.max(values) e = np.exp(values - m) return e / np.sum(e) x = K.placeholder(ndim=2) f = K.function([x], [activations.softmax(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = softmax(test_values) assert_allclose(result, expected, rtol=1e-05)
def reverse_generator(generator, X_sample, y_sample, title): """Gradient descent to map images back to their latent vectors.""" latent_vec = np.random.normal(size=(1, 100)) # Function for figuring out how to bump the input. target = K.placeholder() loss = K.sum(K.square(generator.outputs[0] - target)) grad = K.gradients(loss, generator.inputs[0])[0] update_fn = K.function(generator.inputs + [target], [grad]) # Repeatedly apply the update rule. xs = [] for i in range(60): print('%d: latent_vec mean=%f, std=%f' % (i, np.mean(latent_vec), np.std(latent_vec))) xs.append(generator.predict_on_batch([latent_vec, y_sample])) for _ in range(10): update_vec = update_fn([latent_vec, y_sample, X_sample])[0] latent_vec -= update_vec * update_rate # Plots the samples. xs = np.concatenate(xs, axis=0) plot_as_gif(xs, X_sample, title)
def visualizeLayer(model, img, input_image, layerIndex): layer = model.layers[layerIndex] get_activations = K.function([model.layers[0].input, K.learning_phase()], [layer.output,]) activations = get_activations([input_image, 0])[0] output_image = activations ## If 4 dimensional then take the last dimension value as it would be no of filters if output_image.ndim == 4: # Rearrange dimension so we can plot the result o1 = np.rollaxis(output_image, 3, 1) output_image = np.rollaxis(o1, 3, 1) print "Dumping filter data of layer{} - {}".format(layerIndex,layer.__class__.__name__) filters = len(output_image[0,0,0,:]) fig=plt.figure(figsize=(8,8)) # This loop will plot the 32 filter data for the input image for i in range(filters): ax = fig.add_subplot(6, 6, i+1) #ax.imshow(output_image[img,:,:,i],interpolation='none' ) #to see the first filter ax.imshow(output_image[0,:,:,i],'gray') #ax.set_title("Feature map of layer#{} \ncalled '{}' \nof type {} ".format(layerIndex, # layer.name,layer.__class__.__name__)) plt.xticks(np.array([])) plt.yticks(np.array([])) plt.tight_layout() #plt.show() fig.savefig("img_" + str(img) + "_layer" + str(layerIndex)+"_"+layer.__class__.__name__+".png") #plt.close(fig) else: print "Can't dump data of this layer{}- {}".format(layerIndex, layer.__class__.__name__)
def get_feature_map_4(model, im): im = im.astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, [model.layers[23].output]) feature_map = _convout1_f([0] + [im]) feature_map = np.array([feature_map]) feature_map = feature_map[0, 0, 0, :, :, :] return feature_map
def get_conv_image_descriptor_for_image(image, model): im = cv2.resize(image, (224, 224)).astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, [model.layers[31].output]) return _convout1_f([0] + [im])
def test_hard_sigmoid(): ''' Test using a reference hard sigmoid implementation ''' def ref_hard_sigmoid(x): ''' Reference hard sigmoid with slope and shift values from theano, see https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py ''' x = (x * 0.2) + 0.5 z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) return z hard_sigmoid = np.vectorize(ref_hard_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.hard_sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = hard_sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05)
def __init__(self, mdl, x): self.loss_value = None self.grad_values = None self.mdl = mdl loss = K.variable(0.) layer_dict = dict([(layer.name, layer) for layer in mdl.layers]) inp = layer_dict['face'].output out = layer_dict['conf'].output loss -= K.sum(out) # Might want to add some L2-loss in here, depending on output # loss += 0.0005 * K.sum(K.square(inp - x)) grads = K.gradients(loss, inp) outputs = [loss] if type(grads) in {list, tuple}: outputs += grads else: outputs.append(grads) self.f_outputs = K.function([inp, K.learning_phase()], outputs)
def on_epoch_begin(self, epoch, logs={}): # rebind the paint function to implement curriculum learning if epoch >= 3 and epoch < 6: self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, rotate=False, ud=True, multi_fonts=False) elif epoch >= 6 and epoch < 9: self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, rotate=False, ud=True, multi_fonts=True) elif epoch >= 9: self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, rotate=True, ud=True, multi_fonts=True) if epoch >= 21 and self.max_string_len < 12: self.build_word_list(32000, 12, 0.5) # the actual loss calc occurs here despite it not being # an internal Keras loss function
def visualize(model, layer_name): print 'Model loaded.' layer_dict = dict([(layer.name, layer) for layer in model.layers]) for filter_index in sample(range(0, layer_dict[layer_name].nb_filter),10): layer_output = layer_dict[layer_name].output loss = K.mean(layer_output[:, filter_index, :, :]) grads = K.gradients(loss, model.layers[0].input)[0] grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5) iterate = K.function([model.layers[0].input, K.learning_phase()], [loss, grads]) input_img_data = np.asarray([read_image('visimage.jpg')]) for _ in xrange(100): loss_value, grads_value = iterate([input_img_data, 0]) input_img_data += grads_value * 3 img = deprocess_image(input_img_data[0]) write_image(img, '../activations/out{}.jpg'.format(filter_index))
def get_gradcam(image,model,layer_name,mode): layer = model.get_layer(layer_name) image = np.expand_dims(image,0) loss = K.variable(0.) if mode == "abnormal": loss += K.sum(model.output) elif mode == "normal": loss += K.sum(1 - model.output) else: raise ValueError("mode must be normal or abnormal") #gradients of prediction wrt the conv layer of choice are used upstream_grads = K.gradients(loss,layer.output)[0] feature_weights = K.mean(upstream_grads,axis=[1,2]) #spatial global avg pool heatmap = K.relu(K.dot(layer.output, K.transpose(feature_weights))) fetch_heatmap = K.function([model.input, K.learning_phase()], [heatmap]) return fetch_heatmap([image,0])[0]
def extract_activation(layer, num, metric, min_threshold=0.): """ Extracts the top num neurons per sample, across the channels in the CNNs activations Args: layer: string valid keras layer num: int - how many to extract metric: function Returns: list of top num """ holder = [] for ind, t_slice in enumerate(layer[0]): if np.mean(t_slice) < min_threshold: continue holder.append([ind, metric(t_slice)]) s_list = np.asarray(sorted(holder, key=lambda v_pair: v_pair[1], reverse=True)) return s_list[:num, 0]
def select_weight(model, data, target_layers, top_n, activation_fn, save_deconv=False): """ Extract the top neurons based on some function of the weight matrix. Args: model: Model to use. data: target_layers: top_n: activation_fn: save_deconv: Returns: list of list of significants. """ significants = [] for l_name, i in target_layers: L = model.layers[i] significants.append([l_name, extract_weight(L, top_n, activation_fn)]) return significants
def create_fc_model(self): # This is the place where neural network model initialized init = 'glorot_uniform' self.state_in = Input(self.state_dim) self.hidden = Dense(256, init=init, activation='elu')(self.state_in) self.value = Dense(1)(self.hidden) self.policy = Dense(self.action_dim, init=init, activation='softmax')(self.hidden) self.q_values = self.entropy_coef * (Theano.log(self.policy + 1e-18) - Theano.tile(Theano.sum(Theano.log(self.policy + 1e-18) * self.policy, axis=[1], keepdims=True), (1, self.action_dim))) # print (type(Theano.sum(Theano.log(self.policy + 1e-18) * self.policy, # axis=[1], keepdims=True))) # print(Theano.function([self.state_in], [Theano.sum(Theano.log(self.policy + 1e-18) * self.policy, # axis=[1], keepdims=True)])([np.zeros((32,) + self.state_dim)])[0].shape) # 1/0 self.q_values = self.q_values + Theano.tile(self.value, (1, self.action_dim)) self.model = Model(self.state_in, output=[self.policy, self.value])
def deprocess_image(x): if K.image_dim_ordering() == "th": x = x.reshape((3, img_width, img_height)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_width, img_height, 3)) x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # BGR -> RGB x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x # util function to preserve image color
def content_loss(base, combination): channel_dim = 0 if K.image_dim_ordering() == "th" else -1 channels = K.int_shape(base)[channel_dim] size = img_width * img_height if args.content_loss_type == 1: multiplier = 1. / (2. * (channels ** 0.5) * (size ** 0.5)) elif args.content_loss_type == 2: multiplier = 1. / (channels * size) else: multiplier = 1. return multiplier * K.sum(K.square(combination - base)) # the 3rd loss function, total variation loss, # designed to keep the generated image locally coherent
def deprocess_image(x): if K.image_dim_ordering() == 'th': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_nrows, img_ncols, 3)) x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # BGR to RGB x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x # util function to preserve image color
def preprocess_image(image_path, load_dims=False, style_image=False): global img_WIDTH, img_HEIGHT, aspect_ratio, b_scale_ratio_height, b_scale_ratio_width img = imread(image_path, mode="RGB") # Prevents crashes due to PNG images (ARGB) if load_dims: img_WIDTH = img.shape[0] img_HEIGHT = img.shape[1] aspect_ratio = img_HEIGHT / img_WIDTH if style_image: b_scale_ratio_width = float(img.shape[0]) / img_WIDTH b_scale_ratio_height = float(img.shape[1]) / img_HEIGHT img = imresize(img, (img_width, img_height)) img = img.transpose((2, 0, 1)).astype('float64') img = np.expand_dims(img, axis=0) return img # util function to convert a tensor into a valid image
def find_analogy_patches(a, a_prime, b, patch_size=3, patch_stride=1): '''This is for precalculating the analogy_loss Since A, A', and B never change we only need to calculate the patch matches once. ''' # extract patches from feature maps a_patches, a_patches_norm = make_patches(K.variable(a), patch_size, patch_stride) a_prime_patches, a_prime_patches_norm = make_patches(K.variable(a_prime), patch_size, patch_stride) b_patches, b_patches_norm = make_patches(K.variable(b), patch_size, patch_stride) # find best patches and calculate loss p = find_patch_matches(b_patches, b_patches_norm, a_patches / a_patches_norm) #best_patches = a_prime_patches[p] best_patches = K.reshape(a_prime_patches[p], K.shape(b_patches)) f = K.function([], best_patches) best_patches = f([]) return best_patches
def test_sigmoid(): ''' Test using a numerically stable reference sigmoid implementation ''' def ref_sigmoid(x): if x >= 0: return 1 / (1 + np.exp(-x)) else: z = np.exp(x) return z / (1 + z) sigmoid = np.vectorize(ref_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05)
def normalize(x, value): """ Normalizes the value with respect to image dimensions. This makes regularizer weight factor more or less uniform across various input image dimensions. Args: img: 4D tensor with shape: `(samples, channels, rows, cols)` if dim_ordering='th' or `(samples, rows, cols, channels)` if dim_ordering='tf'. value: The function to normalize Returns: The normalized expression. """ return value / np.prod((3,1024, 768)) # continuity loss util function
def iterate_softmax(model, neuron): input_tensor = model.input # this is a placeholder tensor that will contain our generated images # build a loss function that maximizes the activation # of the nth filter of the layer considered print('X shape', model.output[:, neuron]) x = model.output loss_weight_continuity = 0.0 loss_weight_activity = 1.0 loss = K.mean(x) #loss += loss_weight_continuity * total_variation_norm(input_tensor) # compute the gradient of the input picture wrt this loss grads = K.gradients(loss, input_tensor)[0] # normalization trick: we normalize the gradient grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5) # this function returns the loss and grads given the input picture return K.function([input_tensor], [loss, grads])
def style_loss(style, combination, mask_path=None, nb_channels=None): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 if mask_path is not None: style_mask = load_mask(mask_path, nb_channels) style = style * style_mask combination = combination * style_mask del style_mask S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = img_width * img_height return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2)) # an auxiliary loss function # designed to maintain the "content" of the # base image in the generated image
def content_loss(base, combination): channel_dim = 0 if K.image_dim_ordering() == "th" else -1 channels = K.shape(base)[channel_dim] size = img_width * img_height if args.content_loss_type == 1: multiplier = 1 / (2. * channels ** 0.5 * size ** 0.5) elif args.content_loss_type == 2: multiplier = 1 / (channels * size) else: multiplier = 1. return multiplier * K.sum(K.square(combination - base)) # the 3rd loss function, total variation loss, # designed to keep the generated image locally coherent
def _define_io_loss_xy(self): u, p, q, s = {}, {}, {}, {} x, y = Input(shape=(784,)), Input(shape=(10,)) q['z'], s['z'], p['x'] = self.xy_graph(x, y) u['x'] = self.u_net['x'](x) q['y'] = self.q_net['y'](u['x']) def alpha_loss(y, y_param): return K.categorical_crossentropy(q['y'], y) def xy_loss(x, x_param): return self.labeled_loss(x, q['z'], s['z'], p['x']) self._predict = K.function([x, K.learning_phase()], q['y']) return self._standardize_io_loss([x, y], [q['y'], p['x']], [alpha_loss, xy_loss])
def get_activations(model, inputs, print_shape_only=False, layer_name=None): # Documentation is available online on Github at the address below. # From: https://github.com/philipperemy/keras-visualize-activations print('----- activations -----') activations = [] inp = model.input if layer_name is None: outputs = [layer.output for layer in model.layers] else: outputs = [layer.output for layer in model.layers if layer.name == layer_name] # all layer outputs funcs = [K.function([inp] + [K.learning_phase()], [out]) for out in outputs] # evaluation functions layer_outputs = [func([inputs, 1.])[0] for func in funcs] for layer_activations in layer_outputs: activations.append(layer_activations) if print_shape_only: print(layer_activations.shape) else: print(layer_activations) return activations
def find_analogy_patches(a, a_prime, b, patch_size=3, patch_stride=1): '''This is for precalculating the analogy_loss Since A, A', and B never change we only need to calculate the patch matches once. ''' # extract patches from feature maps a_patches, a_patches_norm = patches.make_patches(K.variable(a), patch_size, patch_stride) a_prime_patches, a_prime_patches_norm = patches.make_patches(K.variable(a_prime), patch_size, patch_stride) b_patches, b_patches_norm = patches.make_patches(K.variable(b), patch_size, patch_stride) # find best patches and calculate loss p = patches.find_patch_matches(b_patches, b_patches_norm, a_patches / a_patches_norm) #best_patches = a_prime_patches[p] best_patches = K.reshape(a_prime_patches[p], K.shape(b_patches)) f = K.function([], best_patches) best_patches = f([]) return best_patches
def extract_hypercolumns(model, layer_indexes, image): layers = [model.layers[li].output for li in layer_indexes] get_feature = K.function([model.layers[0].input], layers) feature_maps = get_feature([[image]]) hypercolumns = [] for convmap in feature_maps: fmaps = [np.float32(convmap[0, :, :, i]) for i in range(convmap.shape[-1])] layer = [] for fmap in fmaps: fmap = np.abs(fmap) norm = np.max(np.max(fmap, axis = 0), axis = 0) if norm > 0: fmap = fmap / norm upscaled = scipy.misc.imresize(fmap, size=(66, 200), mode="F", interp='bilinear') layer.append(upscaled) hypercolumns.append(np.mean(np.float32(layer), axis=0)) return np.asarray(hypercolumns)
def build_output(self): mean = Dense(self.output_size, activation=MeanAct, kernel_initializer=self.init, kernel_regularizer=l1_l2(self.l1_coef, self.l2_coef), name='mean')(self.decoder_output) # Plug in dispersion parameters via fake dispersion layer disp = ConstantDispersionLayer(name='dispersion') mean = disp(mean) output = ColWiseMultLayer(name='output')([mean, self.sf_layer]) nb = NB(disp.theta_exp) self.loss = nb.loss self.extra_models['dispersion'] = lambda :K.function([], [nb.theta])([])[0].squeeze() self.extra_models['mean_norm'] = Model(inputs=self.input_layer, outputs=mean) self.extra_models['decoded'] = Model(inputs=self.input_layer, outputs=self.decoder_output) self.model = Model(inputs=[self.input_layer, self.sf_layer], outputs=output) if self.ae: self.encoder = self.get_encoder()
def _make_tfrecord_train_function(self): if not hasattr(self, 'train_function'): raise RuntimeError('You must compile your model before using it.') if self.train_function is None: inputs = [] if self.uses_learning_phase and not isinstance(K.learning_phase(), int): inputs += [K.learning_phase()] training_updates = self.optimizer.get_updates( self._collected_trainable_weights, self.constraints, self.total_loss) updates = self.updates + training_updates # Gets loss and metrics. Updates weights at each call. self.train_function = K.function(inputs, [self.total_loss] + self.metrics_tensors, updates=updates)
def __init__(self, layer, linear = False): ''' # Arguments layer: an instance of Activation layer, whose configuration will be used to initiate DActivation(input_shape, output_shape, weights) ''' self.layer = layer self.linear = linear self.activation = layer.activation input = K.placeholder(shape = layer.output_shape) output = self.activation(input) # According to the original paper, # In forward pass and backward pass, do the same activation(relu) self.up_func = K.function( [input, K.learning_phase()], output) self.down_func = K.function( [input, K.learning_phase()], output) # Compute activation in forward pass
def _sample_predictive(self, test_x=None, return_stats=False, **kwargs): """ Draws a new sample from the model. """ if self._sample_predictive_fn is None: self._sample_predictive_fn = K.function([self.model.layers[0].input, K.learning_phase()], [self.model.layers[-1].output]) sample = self._sample_predictive_fn([test_x, 1]) stats = None if return_stats: stats = SampleStats(time=self._running_time()) return sample, [stats]
def get_feature_map_8(model, im): im = im.astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, model.outputs) feature_map = _convout1_f([0] + [im]) feature_map = np.array([feature_map]) feature_map = feature_map[0, 0, 0, :, :, :] return feature_map # get shallower feature map
def get_activations(model, layer_name,input_img): layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) get_activations = K.function([model.layers[0].input, K.learning_phase()], layer_dict[layer_name].output) activations = get_activations([input_img,0]) return activations
def get_activations(model, layer, X_batch): get_activations = K.function( [model.layers[0].input, K.learning_phase()], model.layers[layer].output) activations = get_activations([X_batch, 0]) return activations #
def get_input_mask(model, layer, X_batch): get_input_mask = K.function([model.layers[0].input, K.learning_phase()], model.layers[layer].input_mask) input_mask = get_input_mask([X_batch, 0]) return input_mask
def get_output_mask(model, layer, X_batch): get_output_mask = K.function( [model.layers[0].input, K.learning_phase()], model.layers[layer].output_mask) output_mask = get_output_mask([X_batch, 0]) return output_mask
def get_input(model, layer, X_batch): get_input = K.function([model.layers[0].input, K.learning_phase()], model.layers[layer].input) _input = get_input([X_batch, 0]) return _input
def get_prediction_batch(self): return backend.function(self.predictive_net.inputs + [backend.learning_phase()], self.predictive_net.outputs)
def get_predicted_graphemes_and_loss_batch(self): return backend.function(self.loss_net.inputs + [backend.learning_phase()], [single(self.decoding_net.outputs), single(self.loss_net.outputs)])
def get_network_layer_output(model, dataInput, layerNum, **kwargs): """ :param model: :param dataInput: :param layerNum: :param kwargs: :return: """ get_output = K.function([model.layers[0].input, K.learning_phase()], [model.layers[layerNum].output]) phase = kwargs.get('phase', None) if phase is None or phase == 'test': # output in test mode = 0 layer_output = get_output([dataInput, 0])[0] elif phase == 'train': # output in train mode = 1 layer_output = get_output([dataInput, 1])[0] else: raise RuntimeError("invalid phase passed to get_network_layer_output") return layer_output
def get_layer_output(model, layer_name, x): return K.function([model.layers[0].input], [model.get_layer(layer_name).output])([x])[0]
def get_batch(self, index, size, train): if K.image_dim_ordering() == 'th': X_data = np.ones([size, 1, self.img_h, self.img_w]) else: X_data = np.ones([size, self.img_h, self.img_w, 1]) labels = np.ones([size, self.absolute_max_string_len]) input_length = np.zeros([size, 1]) label_length = np.zeros([size, 1]) source_str = [] for i in range(0, size): # Mix in some blank inputs. This seems to be important for # achieving translational invariance if train and i > size - 4: if K.image_dim_ordering() == 'th': X_data[i, 0, :, :] = paint_text('', self.img_w, self.img_h) else: X_data[i, :, :, 0] = paint_text('', self.img_w, self.img_h) labels[i, 0] = self.blank_label input_length[i] = self.downsample_width label_length[i] = 1 source_str.append('') else: if K.image_dim_ordering() == 'th': X_data[i, 0, :, :] = paint_text(self.X_text[index + i], self.img_w, self.img_h) else: X_data[i, :, :, 0] = paint_text(self.X_text[index + i], self.img_w, self.img_h) labels[i, :] = self.Y_data[index + i] input_length[i] = self.downsample_width label_length[i] = self.Y_len[index + i] source_str.append(self.X_text[index + i]) inputs = {'the_input': X_data, 'the_labels': labels, 'input_length': input_length, 'label_length': label_length, 'source_str': source_str # used for visualization only } outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function return (inputs, outputs)
def preprocess_image(image_path): img = load_img(image_path, target_size=(img_nrows, img_ncols)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = vgg16.preprocess_input(img) return img # util function to convert a tensor into a valid image