我们从Python开源项目中,提取了以下33个代码示例,用于说明如何使用theano.tensor.transpose()。
def get_output_for(self, input, deterministic=False, **kwargs): def _phase_shift(input,r): bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r X = T.reshape(input, (bsize,r,r,a,b)) X = T.transpose(X, (0, 3,4,1,2)) # bsize, a, b, r2,r1 X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1) # a, [bsize, b, r, r] X = [T.reshape(x,(bsize,b,r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, b, a*r, r X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1) # b, [bsize, a*r, r] X = [T.reshape(x,(bsize,a*r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, a*r, b*r return X.dimshuffle(0,'x',1,2) Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1) return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1) # Multiscale Dilated Convolution Block # This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers. # Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv). # The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales, # meaning that were starting by taking an elementwise mean. These should be learnable parameters. # NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list. # - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
def apply(self, application_call, defs, def_mask): """ Returns vector per each word in sequence using the dictionary based lookup """ # Short listing defs_sl_main = (T.lt(defs, self._num_input_words) * defs + T.ge(defs, self._num_input_words) * self._vocab.unk) defs_sl_cache = (T.ge(defs, self._num_input_words) * defs + T.lt(defs, self._num_input_words) * self._vocab.unk) application_call.add_auxiliary_variable( unk_ratio(defs_sl_main, def_mask, self._vocab.unk), name='def_unk_ratio') embedded_def_words = self._def_lookup.apply(defs_sl_main) cached_embeddings = self._cache.apply(defs_sl_cache) final_embeddings = (T.lt(defs, self._num_input_words).dimshuffle(0,1,'x') * embedded_def_words + T.ge(defs, self._num_input_words).dimshuffle(0, 1, 'x') * cached_embeddings) def_embeddings = self._def_rnn.apply( T.transpose(self._def_fork.apply(final_embeddings), (1, 0, 2)), mask=def_mask.T)[0][-1] return def_embeddings
def symbolic_distance_matrix(A, B): """ Defines the symbolic matrix that contains the distances between the vectors of A and B :param A: :param B: :return: """ aa = T.sum(A * A, axis=1) bb = T.sum(B * B, axis=1) AB = T.dot(A, T.transpose(B)) AA = T.transpose(T.tile(aa, (bb.shape[0], 1))) BB = T.tile(bb, (aa.shape[0], 1)) D = AA + BB - 2 * AB D = T.maximum(D, 0) D = T.sqrt(D) return D
def forward_conv(self, x): """ #TODO :param x: (length, dim) :return: (length+2*(kernel-1, hidden_dim) """ # T.nn.conv2d (batch size, input channels, input rows, input columns) # dl4nlp (batch size, 1, length, in_dim) x = x.dimshuffle(['x', 'x', 0, 1]) # T.nn.conv2d (output channels, input channels, filter rows, filter columns) # dl4nlp (hidden_dim, 1, kernel_size, in_dim) filter_w = self.W.dimshuffle([1, 'x', 0, 2]) # T.nn.conv2d (batch size, output channels, output rows, output columns) # dl4nlp (batch size, hidden_dim, length+kernel-1, 1) conv_result = T.nnet.conv2d(x, filter_w, border_mode='valid',) # (batch size, hidden_dim, length+kernel-1, 1) -> (length+kernel-1, hidden_dim) conv_result = T.transpose(conv_result[0, :, :, 0], (1, 0)) return conv_result
def forward_conv_batch(self, x): """ :param x: (batch, length, dim) :return: (batch, length - kernel + 2*padding_size + 1, hidden_dim) """ # T.nn.conv2d (batch size, input channels, input rows, input columns) # dl4nlp (batch size, 1, length, in_dim) x = x.dimshuffle([0, 'x', 1, 2]) # T.nn.conv2d (output channels, input channels, filter rows, filter columns) # dl4nlp (hidden_dim, 1, kernel_size, in_dim) filter_w = self.W.dimshuffle([1, 'x', 0, 2]) # T.nn.conv2d (batch size, output channels, output rows, output columns) # dl4nlp (batch size, hidden_dim, length+kernel-1, 1) conv_result = T.nnet.conv2d(x, filter_w, border_mode='valid',) # from theano.printing import Print # conv_result = Print()(conv_result) # (batch size, hidden_dim, length - kernel + 2*padding_size + 1, 1) # -> (batch, length - kernel + 2*padding_size + 1, hidden_dim) conv_result = T.transpose(conv_result[:, :, :, 0], (0, 2, 1)) return conv_result
def test_logical_shapes(self): # Logical shapes are not supported anymore, so we check that it # raises an Exception. for stride in range(1, 4): kshp = (10, 2, 10, 10) featshp = (3, 10, 11, 11) a = tensor.ftensor4() A = tensor.ftensor4() # Need to transpose first two dimensions of kernel, and reverse # index kernel image dims (for correlation) kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3]) featshp_logical = (featshp[0], featshp[1], featshp[2] * stride, featshp[3] * stride) kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3]) self.assertRaises(ValueError, tensor.nnet.conv2d, a, kernel_rotated, border_mode='full', image_shape=featshp, filter_shape=kshp_rotated, imshp_logical=featshp_logical[1:], kshp_logical=kshp[2:])
def get_conv_xy_all(layer, deterministic=True): w_np = layer.W.get_value() w = layer.W if layer.flip_filters: w = w[:, :, ::-1, ::-1] input_layer = layer.input_layer if layer.pad == 'same': input_layer = L.PadLayer(layer.input_layer, width=np.array(w_np.shape[2:])//2, batch_ndim=2) input_shape = L.get_output_shape(input_layer) output_shape = L.get_output_shape(layer) max_x = input_shape[2] - w_np.shape[2]+1 max_y = input_shape[3] - w_np.shape[3]+1 #print("input_shape shape: ", input_shape) #print("output_shape shape: ", output_shape,np.prod(output_shape[2:])) #print("pad: \"%s\""%layer.pad) #print(" stride: " ,layer.stride) #print("max_x %d max_y %d"%(max_x,max_y)) x_orig = L.get_output(input_layer, deterministic=True) x = theano.tensor.nnet.neighbours.images2neibs(x_orig, neib_shape=layer.filter_size, neib_step=layer.stride, mode='valid') x = T.reshape(x, (x_orig.shape[0], -1, np.prod(output_shape[2:]), np.prod(w_np.shape[2:]))) x = T.transpose(x, (0, 2, 1, 3)) x = T.reshape(x, (-1, T.prod(x.shape[2:]))) w = T.flatten(w, outdim=2).T # D,O y = T.dot(x, w) # N,O if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def transpose(x): return T.transpose(x)
def depth_to_space(input, scale, data_format=None): ''' Uses phase shift algorithm to convert channels/depth for spatial resolution ''' if data_format is None: data_format = image_data_format() data_format = data_format.lower() input = _preprocess_conv2d_input(input, data_format) b, k, row, col = input.shape out_channels = k // (scale ** 2) x = T.reshape(input, (b, scale, scale, out_channels, row, col)) x = T.transpose(x, (0, 3, 4, 1, 5, 2)) out = T.reshape(x, (b, out_channels, row * scale, col * scale)) out = _postprocess_conv2d_output(out, input, None, None, None, data_format) return out
def compute_vector_distances(trainingdata, testdata): # adapted from https://gist.github.com/danielvarga/d0eeacea92e65b19188c # with lamblin's workaround at https://github.com/Theano/Theano/issues/1399 n = trainingdata.shape[0] # number of candidates assert testdata.shape[1] == trainingdata.shape[1] m = testdata.shape[0] # number of targets f = testdata.shape[1] # number of features x = T.matrix('x') # candidates y = T.matrix('y') # targets xL2S = T.sum(x*x, axis=-1) # [n] yL2S = T.sum(y*y, axis=-1) # [m] xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n] yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m] squaredPairwiseDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m] #lamblinsTrick = False #if lamblinsTrick: # s = squaredPairwiseDistances # bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32') #else: # bestIndices = T.argmin(squaredPairwiseDistances, axis=0) #nearests_fn = theano.function([x, y], bestIndices, profile=False) #return nearests_fn(trainingdata, testdata) squaredpwdist_fn = theano.function([x, y], [T.transpose(squaredPairwiseDistances), T.transpose(T.argsort(squaredPairwiseDistances, axis=0))] , profile=False) return squaredpwdist_fn(trainingdata, testdata)
def transpose(x): # TODO: `keras_shape` inference. return T.transpose(x)
def forward_batch(self, x, mask, seqs): """ :param x: (batch, length, dim) :param mask: (batch, length) :param seqs: (batch, length - 1, 3) :return: """ zeros_rec = T.zeros((x.shape[0],)) # (batch, length, dim) -> (batch, 2 * length - 1, dim) vector = T.concatenate([x, T.zeros_like(x)[:, :-1, :]], axis=1) # scan???????????? ???????? # (batch, length - 1, 3) -> (length - 1, batch, 3) seqs = T.transpose(seqs, axes=(1, 0, 2)) # (batch, length - 1) -> (length - 1, batch) mask = T.transpose(mask, axes=(1, 0)) range_index = T.arange(x.shape[0]) result, _ = theano.scan(fn=self.encode_batch, # ??????batch????????? sequences=[seqs, mask[1:]], # ????????? # ??????????-1 ???????1?????????????0 # ??vector??0????????????????? outputs_info=[vector, vector[:, 0, :], zeros_rec], non_sequences=[self.W, self.b, self.Wr, self.br, range_index], name="compose_scan") phrases, pres, loss_recs = result # (word - 1, batch, dim) -> (batch, dim) # ?????????????????? phrases = pres[-1] sum_loss_recs = T.sum(loss_recs, axis=0) # (batch, dim) # ??? if self.normalize: phrases = phrases / phrases.norm(2, axis=1)[:, None] return phrases, sum_loss_recs
def forward_scan_batch(self, x, mask): """ :param x: (batch, max_len, dim) :param mask: (batch, max_len) """ h0 = T.zeros((x.shape[0], self.hidden_dim)) hs, _ = theano.scan(fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), # (batch, max_len, dim) -> (max_len, batch, dim) T.transpose(mask, (1, 0))], # (batch, max_len) -> (max_len, batch) outputs_info=[h0], non_sequences=[self.W, self.U, self.b], ) # (max_len, batch, dim) -> (batch, max_len, dim) return T.transpose(hs, (1, 0, 2))
def forward_scan_batch(self, x, mask): h0 = T.zeros((x.shape[0], self.hidden_dim)) c0 = T.zeros((x.shape[0], self.hidden_dim)) hs, _ = theano.scan(fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), T.transpose(mask, (1, 0))], outputs_info=[h0, c0], non_sequences=[self.W, self.U, self.b], ) return T.transpose(hs[0], (1, 0, 2))
def backward_scan_batch(self, x, mask): h0_backward = T.zeros((x.shape[0], self.hidden_dim)) h_backwards, _ = theano.scan(fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), T.transpose(mask, (1, 0))], outputs_info=[h0_backward], non_sequences=[self.W_backward, self.U_backward], go_backwards=True, ) return T.transpose(h_backwards, (1, 0, 2))[:, ::-1]
def backward_scan_batch(self, x, mask): h0_backward = T.zeros((x.shape[0], self.hidden_dim)) h_backwards, _ = theano.scan(fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), T.transpose(mask, (1, 0))], outputs_info=[h0_backward], non_sequences=[self.W_backward, self.U_backward, self.b_backward], go_backwards=True, ) return T.transpose(h_backwards, (1, 0, 2))[:, ::-1]
def backward_scan_batch(self, x, mask): h0_backward = T.zeros((x.shape[0], self.hidden_dim)) c0_backward = T.zeros((x.shape[0], self.hidden_dim)) h_backwards, _ = theano.scan(fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), T.transpose(mask, (1, 0))], outputs_info=[h0_backward, c0_backward], non_sequences=[self.W_backward, self.U_backward, self.b_backward], go_backwards=True, ) return T.transpose(h_backwards[0], (1, 0, 2))[:, ::-1]
def forward_sequence_batch(self, x, mask, batch_size): """ :param x: (batch, max_len, dim) :param mask: (batch, max_len) :param batch_size: """ h0 = shared_zero_matrix((batch_size, self.hidden_dim), 'h0') hs, _ = theano.scan(fn=self._step_batch, sequences=[T.transpose(x, (1, 0, 2)), # (batch, max_len, dim) -> (max_len, batch, dim) T.transpose(mask, (1, 0))], # (batch, max_len) -> (max_len, batch) outputs_info=[h0], non_sequences=[self.W, self.U, self.b], ) # (max_len, batch, dim) -> (batch, max_len, dim) return T.transpose(hs, (1, 0, 2))
def transpose(x): y = T.transpose(x) if hasattr(x, '_keras_shape'): y._keras_shape = tuple(reversed(x._keras_shape)) return y
def __init__(self, rng, input, batch_size, in_size, latent_size, W_a = None, W_b = None, epsilon = 0.01): self.srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) self.input = input # setup variational params if W_a is None: W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX) W_a = theano.shared(value=W_values, name='W_a') if W_b is None: W_values = np.asarray(0.01 * rng.standard_normal(size=(in_size, latent_size-1)), dtype=theano.config.floatX) W_b = theano.shared(value=W_values, name='W_b') self.W_a = W_a self.W_b = W_b # compute Kumaraswamy samples uniform_samples = T.cast(self.srng.uniform(size=(batch_size, latent_size-1), low=0.01, high=0.99), theano.config.floatX) self.a = Softplus(T.dot(self.input, self.W_a)) self.b = Softplus(T.dot(self.input, self.W_b)) v_samples = (1-(uniform_samples**(1/self.b)))**(1/self.a) # setup variables for recursion stick_segment = theano.shared(value=np.zeros((batch_size,), dtype=theano.config.floatX), name='stick_segment') remaining_stick = theano.shared(value=np.ones((batch_size,), dtype=theano.config.floatX), name='remaining_stick') def compute_latent_vars(i, stick_segment, remaining_stick, v_samples): # compute stick segment stick_segment = v_samples[:,i] * remaining_stick remaining_stick *= (1-v_samples[:,i]) return (stick_segment, remaining_stick) (stick_segments, remaining_sticks), updates = theano.scan(fn=compute_latent_vars, outputs_info=[stick_segment, remaining_stick],sequences=T.arange(latent_size-1), non_sequences=[v_samples], strict=True) self.avg_used_dims = T.mean(T.sum(remaining_sticks > epsilon, axis=0)) self.latent_vars = T.transpose(T.concatenate([stick_segments, T.shape_padaxis(remaining_sticks[-1, :],axis=1).T], axis=0)) self.params = [self.W_a, self.W_b]
def get_output_for(self, inputs, **kwargs): # typical GRU, but prediction produced by softmax layer is applied to GRU's input q = inputs[0] m = inputs[1] epmem_dropout = inputs[2] #q = q * self.rand_stream.binomial(q.shape, p=1-epmem_dropout, dtype=theano.config.floatX) m = m * self.rand_stream.binomial(m.shape, p=1-epmem_dropout, dtype=theano.config.floatX) W_in_stacked = T.concatenate([self.W_in_to_resetgate, self.W_in_to_updategate, self.W_in_to_hid_update], axis=1) W_hid_stacked = T.concatenate([self.W_hid_to_resetgate, self.W_hid_to_updategate, self.W_hid_to_hid_update], axis=1) b_stacked = T.concatenate([self.b_resetgate, self.b_updategate, self.b_hid_update], axis=0) def slice_w(x, n): return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size] def get_output(a): return nonlin.softmax(T.dot(a,self.W)) def step(hid_previous, out_previous, *args): input_n = T.concatenate([out_previous, q], axis=1) hid_input = T.dot(hid_previous, W_hid_stacked) input_n = T.dot(input_n, W_in_stacked) + b_stacked resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0) updategate = slice_w(hid_input, 1) + slice_w(input_n, 1) resetgate = self.nonlinearity_resetgate(resetgate) updategate = self.nonlinearity_updategate(updategate) hid_update_in = slice_w(input_n, 2) hid_update_hid = slice_w(hid_input, 2) hid_update = hid_update_in + resetgate*hid_update_hid hid_update = self.nonlinearity_hid(hid_update) hid = (1 - updategate)*hid_previous + updategate+hid_update out = nonlin.softmax(T.dot(hid, self.W)) return (hid, out) non_seqs = [W_in_stacked, b_stacked, W_hid_stacked, q, m, self.W] hid_and_out, b = theano.scan( fn=step, outputs_info=[m, get_output(m)], non_sequences=non_seqs, strict=True, n_steps=self.max_answer_word) return T.transpose(hid_and_out[1], (1,0,2))
def lstm_layer(shared_params, options, emb_dia, emb_pm, x_mask, time, method, use_pm = 1): n_steps = emb_dia.shape[0] if emb_dia.ndim == 3: n_samples = emb_dia.shape[1] else: n_samples = 1 def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n * dim:(n+1) * dim] return _x[:, n*dim:(n+1)*dim] def _step(x_, xf_, m_, time_, method_, h_, c_, pm_): preact = tensor.dot(x_, shared_params['lstm_W']) \ + tensor.dot(h_, shared_params['lstm_U']) + shared_params['lstm_b'] pm = m_[1, :, None] * xf_ pre_f = tensor.dot(pm_, shared_params['lstm_Pf']) pre_o = tensor.dot(pm, shared_params['lstm_Po']) time = tensor.concatenate([[time_/60.0], [(time_/180.0) ** 2], [(time_/365.0) ** 3]]) time = tensor.transpose(time) pre_t = tensor.dot(time, shared_params['lstm_Z']) i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_prj'])) * (1.0 / method_[:, None]) f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_prj']) + pre_f + pre_t) o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_prj']) + pre_o) c = tensor.tanh(_slice(preact, 3, options['dim_prj'])) c = f * c_ + i * c c = m_[0, :, None] * c + (1.0 - m_[0])[:, None] * c_ h = o * tensor.tanh(c) h = m_[0, :, None] * h + (1.0 - m_[0])[:, None] * h_ if use_pm == 0: pm = xf_ return h, c, pm dim_prj = options['dim_prj'] dim_emb = options['dim_emb'] rval, updates = theano.scan(_step, sequences=[emb_dia, emb_pm, x_mask, time, method], outputs_info=[tensor.alloc(numpy_floatX(0.), n_samples, dim_prj), tensor.alloc(numpy_floatX(0.), n_samples, dim_prj), tensor.alloc(numpy_floatX(0.), n_samples, dim_emb)], name='lstm_layer', n_steps=n_steps) return rval[0]
def __init__(self, feature_count, transformer, k = 8, stdev = 0.1): d = feature_count # ************************************************************ # * Symbolic Variables # ************************************************************ self.X = T.matrix() # design matrix self.y = T.vector() # response self.s = T.vector() # sample weights self.e = T.scalar() # current epoch # ************************************************************ # * Model Parameters # ************************************************************ # bias term (intercept) w0_init = np.zeros(1) self.w0 = theano.shared(w0_init, allow_downcast=True) # first order coefficients w1_init = np.zeros(d) self.w1 = theano.shared(w1_init, allow_downcast=True) # interaction factors v_init = stdev * np.random.randn(k, d) self.v = theano.shared(v_init, allow_downcast=True) # ************************************************************ # * The Model # ************************************************************ # The formula for pairwise interactions is from the bottom left # of page 997 of Rendle 2010, "Factorization Machines." # This version scales linearly in k and d, as opposed to O(d^2). interactions = 0.5 * T.sum((T.dot(self.X, T.transpose(self.v)) ** 2) \ - T.dot(self.X ** 2, T.transpose(self.v ** 2)), axis=1) self.y_hat = self.w0[0] + T.dot(self.X, self.w1) + interactions self.y_hat = transformer.transform(self.y_hat) # ************************************************************ # * Prediction # ************************************************************ self.theano_predict = theano.function( inputs=[self.X], outputs=self.y_hat, allow_input_downcast=True)
def test_convolution_transpose_th(self): border_mode = 'valid' K.set_image_dim_ordering('th') batch = 1 height = 2 width = 2 channels_in = 1 channels_out = 2 kernel_size = 3 strides = (1, 1) input_shape = (channels_in, height, width) input = Input(shape=input_shape, dtype=K.floatx()) conv_layer = ConvolutionTranspose2D(channels_out, kernel_size, kernel_size, dim_ordering=K.image_dim_ordering(), init='one', subsample=strides, border_mode=border_mode, activation='linear') output = conv_layer(input) model = Model(input=[input], output=[output]) model.compile(loss='mean_squared_error', optimizer='sgd') x = np.ones((batch,) + input_shape).astype(K.floatx()) kernel = conv_layer.W output_model = model.predict(x) if K._BACKEND == 'theano': output_shape = conv_layer.get_output_shape_for(K.shape(x)) y = T.nnet.abstract_conv.conv2d_grad_wrt_inputs(theano.shared(x), kernel, output_shape, filter_shape=None, border_mode=border_mode, subsample=strides, filter_flip=True) output = y.eval() else: sess = K.get_session() output_shape = conv_layer.get_output_shape_for(K.shape(x)) output_shape = tf.pack([1, output_shape[2], output_shape[3], output_shape[1]]) x = tf.transpose(x, (0, 2, 3, 1)) kernel = tf.transpose(kernel, (2, 3, 1, 0)) y = tf.nn.conv2d_transpose(x, kernel, output_shape, (1, ) + strides + (1, ), padding=border_mode.upper()) y = tf.transpose(y, (0, 3, 1, 2)) output = sess.run(y) self.assertEqual(output_model.shape, (1, 2, 4, 4)) self.assertEqual(output.shape, (1, 2, 4, 4)) self.assertEqual(True, (output==output_model).all()) # model.fit(x, x + 1, nb_epoch=1)
def test_convolution_transpose_tf(self): border_mode = 'valid' K.set_image_dim_ordering('tf') batch = 1 height = 2 width = 2 channels_in = 1 channels_out = 2 kernel_size = 3 strides = (1, 1) input_shape = (height, width, channels_in) input = Input(shape=input_shape, dtype=K.floatx()) conv_layer = ConvolutionTranspose2D(channels_out, kernel_size, kernel_size, dim_ordering=K.image_dim_ordering(), init='one', subsample=strides, border_mode=border_mode, activation='linear') output = conv_layer(input) model = Model(input=[input], output=[output]) model.compile(loss='mean_squared_error', optimizer='sgd') x = np.ones((batch,) + input_shape).astype(K.floatx()) kernel = conv_layer.W output_model = model.predict(x) if K._BACKEND == 'theano': output_shape = conv_layer.get_output_shape_for(K.shape(x)) output_shape = (1, output_shape[3], output_shape[1], output_shape[2]) x = np.transpose(x, (0, 3, 1, 2)) kernel = T.transpose(kernel, (3, 2, 1, 0)) y = T.nnet.abstract_conv.conv2d_grad_wrt_inputs(theano.shared(x), kernel, output_shape, filter_shape=None, border_mode=border_mode, subsample=strides, filter_flip=True) y = T.transpose(y, (0, 2, 3, 1)) output = y.eval() else: sess = K.get_session() output_shape = conv_layer.get_output_shape_for(K.shape(x)) output_shape = tf.pack([1, output_shape[1], output_shape[2], output_shape[3]]) y = tf.nn.conv2d_transpose(x, kernel, output_shape, (1, ) + strides + (1, ), padding=border_mode.upper()) output = sess.run(y) self.assertEqual(output_model.shape, (1, 4, 4, 2)) self.assertEqual(output.shape, (1, 4, 4, 2)) self.assertEqual(True, (output==output_model).all()) # model.fit(x, x + 1, nb_epoch=1)
def test_transpose(): x1 = tensor.dvector('x1') x2 = tensor.dmatrix('x2') x3 = tensor.dtensor3('x3') x1v = numpy.arange(24) x2v = numpy.arange(24).reshape(2, 12) x3v = numpy.arange(24).reshape(2, 3, 4) f = theano.function([x1, x2, x3], [ tensor.transpose(x1), tensor.transpose(x2), tensor.transpose(x3), x1.transpose(), x2.transpose(), x3.transpose(), x2.transpose(0, 1), x3.transpose((0, 2, 1)), tensor.transpose(x2, [0, 1]), tensor.transpose(x3, [0, 2, 1]), ]) t1, t2, t3, t1b, t2b, t3b, t2c, t3c, t2d, t3d = f(x1v, x2v, x3v) assert t1.shape == numpy.transpose(x1v).shape assert t2.shape == numpy.transpose(x2v).shape assert t3.shape == numpy.transpose(x3v).shape assert numpy.all(t1 == numpy.transpose(x1v)) assert numpy.all(t2 == numpy.transpose(x2v)) assert numpy.all(t3 == numpy.transpose(x3v)) assert numpy.all(t1b == x1v.transpose()) assert numpy.all(t2b == x2v.transpose()) assert numpy.all(t3b == x3v.transpose()) assert t2c.shape == (2, 12) assert t3c.shape == (2, 4, 3) assert numpy.all(t2c == x2v.transpose([0, 1])) assert numpy.all(t3c == x3v.transpose([0, 2, 1])) assert t2d.shape == (2, 12) assert t3d.shape == (2, 4, 3) assert numpy.all(t2d == numpy.transpose(x2v, [0, 1])) assert numpy.all(t3d == numpy.transpose(x3v, [0, 2, 1])) # Check that we create a name. assert tensor.transpose(x1).name == 'x1.T' assert tensor.transpose(x2).name == 'x2.T' assert tensor.transpose(x3).name == 'x3.T' assert tensor.transpose(tensor.dmatrix()).name is None
def gemm_directly(bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsx, subsy, direction): ishape = (bs, ch, rImg1, rImg2) kshape = (nf, ch, rFlt1, rFlt2) subsample = (subsx, subsy) npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32') npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32') if direction == 'fprop': i = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_img.shape])() k = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_kern.shape])() cpuval = py_conv(npy_img, npy_kern, 'valid', subsample) op = theano.sandbox.cuda.blas.GpuCorrMM(border_mode='valid', subsample=subsample)(i, k) f = theano.function([i, k], op, mode=theano_mode) gpuval = f(npy_img, npy_kern[:, :, ::-1, ::-1]) elif direction == 'bprop img': i = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_kern.transpose(1, 0, 2, 3).shape])() k = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_img.shape])() cpuval = py_conv(npy_img, npy_kern, 'full', subsample) op = theano.sandbox.cuda.blas.GpuCorrMM_gradInputs( border_mode='valid', subsample=subsample)(i, k) f = theano.function([i, k], op, mode=theano_mode) gpuval = f(npy_kern.transpose(1, 0, 2, 3), npy_img) elif direction == 'bprop kern': i = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_img.transpose(1, 0, 2, 3).shape])() k = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_kern.transpose(1, 0, 2, 3).shape])() cpuval = py_conv(npy_img, npy_kern, 'valid', subsample) op = theano.sandbox.cuda.blas.GpuCorrMM_gradWeights( border_mode='valid', subsample=subsample)(i, k) f = theano.function([i, k], op, mode=theano_mode) gpuval = numpy.array(f( npy_img.transpose(1, 0, 2, 3), npy_kern.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1]) ).transpose(1, 0, 2, 3) assert_allclose(cpuval, gpuval, rtol=1e-4)