我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用theano.tensor.shape_padright()。
def compile_eval_function(nnet): X = T.tensor4() y = T.ivector() # get prediciton by fully convolutional network prediction = lasagne.layers.get_output(nnet.dense3_conv_layer, deterministic=True, inputs=X) # get output scores on first dim # before flattening on 2dim and then get scores on second dim prediction = prediction.transpose((1, 0, 2, 3))\ .flatten(2).transpose((1, 0)) prediction = T.nnet.softmax(prediction) # spatial averaging prediction = T.mean(prediction, axis=0) # compute top1 and top5 accuracies sorted_pred = T.argsort(prediction) top1_acc = T.mean(T.eq(sorted_pred[-1], y), dtype='floatX') top5_acc = T.mean(T.any(T.eq(sorted_pred[-5:], T.shape_padright(y)), axis=1), dtype='floatX') return theano.function([X, y], [top1_acc, top5_acc])
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha)) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
def note_to_encoding(self, chosen_note, relative_position, low_bound, high_bound): assert chosen_note.ndim == 1 n_batch = chosen_note.shape[0] dont_play_version = T.switch( T.shape_padright(T.eq(chosen_note, 0)), T.tile(np.array([[1,0] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1)), T.tile(np.array([[0,1] + [0]*(self.ENCODING_WIDTH-2)], dtype=np.float32), (n_batch, 1))) rcp = T.tile(np.array([0,0,1],dtype=np.float32), (n_batch, 1)) circle_1 = T.eye(4)[(chosen_note-2)%4] circle_2 = T.eye(3)[(chosen_note-2)%3] octave = T.eye(self.num_octaves)[(chosen_note-2+low_bound-self.octave_start)//12] play_version = T.concatenate([rcp, circle_1, circle_2, octave], 1) encoded_form = T.switch( T.shape_padright(T.lt(chosen_note, 2)), dont_play_version, play_version ) return encoded_form
def get_loss(self, raw_feature_strengths, raw_feature_vects, extra_info=False): raw_losses = self._loss_fun(raw_feature_strengths) raw_sum = T.sum(raw_losses) n_parallel, n_timestep = raw_feature_strengths.shape falloff_arr = np.array(self._falloff_rate, np.float32) ** T.cast(T.arange(n_timestep), 'float32') falloff_mat = T.shape_padright(falloff_arr) / T.shape_padleft(falloff_arr) falloff_scaling = T.switch(T.ge(falloff_mat,1), 0, falloff_mat)/self._falloff_rate # falloff_scaling is of shape (n_timestep, n_timestep) with 0 along diagonal, and jump to 1 falling off along dimension 1 # now we want to multiply through on both dimensions first_multiply = T.dot(raw_feature_strengths, falloff_scaling) # shape (n_parallel, n_timestep) second_multiply = raw_feature_strengths * first_multiply unscaled_falloff_penalty = T.sum(second_multiply) full_loss = self._penalty_base * raw_sum + self._penalty_shock * unscaled_falloff_penalty if extra_info: return full_loss, {"raw_loss_sum":raw_sum} else: return full_loss
def get_padded_shuffled_mask(self, train, X, pad=0): mask = self.get_input_mask(train) if mask is None: mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum? # mask is (nb_samples, time) mask = T.shape_padright(mask) # (nb_samples, time, 1) mask = T.addbroadcast(mask, -1) # the new dimension (the '1') is made broadcastable # see http://deeplearning.net/software/theano/library/tensor/basic.html#broadcasting-in-theano-vs-numpy mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1) if pad > 0: # left-pad in time with 0 padding = alloc_zeros_matrix(pad, mask.shape[1], 1) mask = T.concatenate([padding, mask], axis=0) return mask.astype('int8')
def fprop(self): # The dimension of self.mask is (Timestep, Minibatch). # We need to pad it to (Timestep, Minibatch, FeatureDim) # and keep the last one added dimensions broadcastable. TT.shape_padright # function is thus a good choice if self.mask is None: scan_input = [self.input] scan_fn = self.step_fprop else: scan_input = [self.input, TT.shape_padright(self.mask, 1)] scan_fn = self.step_masked_fprop non_seqs = self.param [self.output, self.cell_output], self.output_update = quick_unroll_scan(fn=scan_fn, #[self.output, self.cell_output], self.output_update = theano.scan(fn=scan_fn, outputs_info=[self.init_hidden_state, self.init_cell_state], sequences=scan_input, non_sequences=non_seqs, n_steps=self.n_steps )
def loss_func(self, y_true, y_predict): active_notes = T.shape_padright(y_true[:,:,:,0]) mask = T.concatenate([T.ones_like(active_notes), active_notes, T.repeat(T.ones_like(active_notes), self.output_size-2, -1)], axis=-1) loglikelihoods = mask * T.log( 2*y_predict*y_true - y_predict - y_true + 1 + self.epsilon ) return T.neg(T.sum(loglikelihoods))
def sym_mask_logdensity_estimator_intermediate(self, x, mask): non_linearity_name = self.parameters["nonlinearity"].get_name() assert(non_linearity_name == "sigmoid" or non_linearity_name == "RLU") x = x.T # BxD mask = mask.T # BxD output_mask = constantX(1) - mask # BxD D = constantX(self.n_visible) d = mask.sum(1) # d is the 1-based index of the dimension whose value to infer (not the size of the context) masked_input = x * mask # BxD h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1) # BxH for l in xrange(self.n_layers - 1): h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l]) # BxH z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha) z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu) z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma) temp = T.exp(z_alpha) # + 1e-6 # temp += T.shape_padright(temp.sum(2)/1e-3) Alpha = temp / T.shape_padright(temp.sum(2)) # BxDxC Mu = z_mu # BxDxC Sigma = T.exp(z_sigma) # + 1e-6 #BxDxC # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask) # Mu = Mu * T.shape_padright(output_mask) # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask) # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) # BxDxC logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d) return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
def sym_masked_neg_loglikelihood_gradient(self, x, mask): """ x is a matrix of column datapoints (DxB) D = n_visible, Bfloat = batch size """ logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h = self.sym_mask_logdensity_estimator_intermediate(x, mask) # nnz = output_mask.sum(0) # sparsity_multiplier = T.shape_padright(T.shape_padleft((B+1e-6)/(nnz+1e-6))) # wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) #BxDxC # lp_current = log_sum_exp(wPhi, axis = 2) * output_mask #BxD # lp_current_sum = (lp_current.sum(1) * D / (D-d)).sum() #1 loglikelihood = logdensity.mean(dtype=floatX) loss = -loglikelihood dp_dz_alpha = T.grad(loss, z_alpha) # BxDxC gb_alpha = dp_dz_alpha.sum(0) # DxC gV_alpha = T.tensordot(h.T, dp_dz_alpha, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC dp_dz_mu = T.grad(loss, z_mu) # BxDxC dp_dz_mu = dp_dz_mu * Sigma # Heuristic gb_mu = dp_dz_mu.sum(0) # DxC gV_mu = T.tensordot(h.T, dp_dz_mu, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC dp_dz_sigma = T.grad(loss, z_sigma) # BxDxC gb_sigma = dp_dz_sigma.sum(0) # DxC gV_sigma = T.tensordot(h.T, dp_dz_sigma, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC if self.n_layers > 1: gWs, gbs, gW1, gWflags, gb1 = T.grad(loss, [self.Ws, self.bs, self.W1, self.Wflags, self.b1]) gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "Ws":gWs, "bs":gbs, "W1":gW1, "b1":gb1, "Wflags":gWflags} else: gW1, gWflags, gb1 = T.grad(loss, [self.W1, self.Wflags, self.b1]) gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "W1":gW1, "b1":gb1, "Wflags":gWflags} # Gradients return (loss, gradients)
def log_sum_exp(x, axis=1): max_x = T.max(x, axis) return max_x + T.log(T.sum(T.exp(x - T.shape_padright(max_x, 1)), axis))
def get_padded_shuffled_mask(self, train, X, pad=0): mask = self.get_input_mask(train) if mask is None: mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum? # mask is (nb_samples, time) mask = T.shape_padright(mask) # (nb_samples, time, 1) mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix. mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1) if pad > 0: # left-pad in time with 0 padding = alloc_zeros_matrix(pad, mask.shape[1], 1) mask = T.concatenate([padding, mask], axis=0) return mask.astype('int8')
def _simple_norm(x, eps=1e-5): output = (x - tensor.shape_padright(x.mean(-1))) / \ (eps + tensor.shape_padright(x.std(-1))) return output
def one_hot(t, r=None): """Compute one hot encoding. given a tensor t of dimension d with integer values from range(r), return a new tensor of dimension d + 1 with values 0/1, where the last dimension gives a one-hot representation of the values in t. if r is not given, r is set to max(t) + 1 """ if r is None: r = tensor.max(t) + 1 ranges = tensor.shape_padleft(tensor.arange(r), t.ndim) return tensor.eq(ranges, tensor.shape_padright(t, 1))
def cost_gmm(y, mu, sig, weight): """Gaussian mixture model negative log-likelihood. Computes the cost. """ n_dim = y.ndim shape_y = y.shape k = weight.shape[-1] y = y.reshape((-1, shape_y[-1])) y = tensor.shape_padright(y) mu = mu.reshape((-1, shape_y[-1], k)) sig = sig.reshape((-1, shape_y[-1], k)) weight = weight.reshape((-1, k)) diff = tensor.sqr(y - mu) inner = -0.5 * tensor.sum( diff / sig**2 + 2 * tensor.log(sig) + tensor.log(2 * numpy.pi), axis=-2) nll = -logsumexp(tensor.log(weight) + inner, axis=-1) return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
def bivariate_gmm(y, mu, sigma, corr, coeff, binary, epsilon=1e-5): """Bivariate gaussian mixture model negative log-likelihood. Parameters """ n_dim = y.ndim shape_y = y.shape y = y.reshape((-1, shape_y[-1])) y = tensor.shape_padright(y) data_pen = y[:, 0, :] data_x = y[:, 1, :] data_y = y[:, 2, :] sigma_x = sigma[:, 0, :] sigma_y = sigma[:, 1, :] std_e_x = (data_x - mu[:, 0, :]) / sigma_x std_e_y = (data_y - mu[:, 1, :]) / sigma_y binary = (binary + epsilon) * (1. - 2. * epsilon) c_b = tensor.sum( tensor.xlogx.xlogy0(data_pen, binary) + tensor.xlogx.xlogy0(1. - data_pen, 1. - binary), axis=1) buff = 1. - corr**2 + epsilon z = std_e_x**2 + std_e_y**2 - 2. * corr * std_e_x * std_e_y cost = - z / (2. * buff) - 0.5 * tensor.log(buff) - \ tensor.log(sigma_x) - tensor.log(sigma_y) - tensor.log(2. * numpy.pi) nll = -logsumexp(tensor.log(coeff) + cost, axis=1) - c_b return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
def emit(self, readouts): """Sample from the distribution. Parameters: readouts: readouts from the rnn + attention """ mu, sigma, corr, coeff, penup = self.components(readouts) idx = predict( self.theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1) mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] corr = corr[tensor.arange(corr.shape[0]), idx] mu_x = mu[:, 0] mu_y = mu[:, 1] sigma_x = sigma[:, 0] sigma_y = sigma[:, 1] z = self.theano_rng.normal( size=mu.shape, avg=0., std=1., dtype=mu.dtype) un = self.theano_rng.uniform(size=penup.shape) penup = tensor.cast(un < penup, floatX) s_x = tensor.shape_padright(mu_x + sigma_x * z[:, 0]) s_y = mu_y + sigma_y * ((z[:, 0] * corr) + ( z[:, 1] * tensor.sqrt(1. - corr**2))) s_y = tensor.shape_padright(s_y) s = tensor.concatenate([penup, s_x, s_y], axis=1) return s
def decode_to_probs(self, activations, relative_position, low_bound, high_bound): assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves" squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH)) rsp = T.nnet.softmax(squashed[:,:3]) c1 = T.nnet.softmax(squashed[:,3:7]) c2 = T.nnet.softmax(squashed[:,7:10]) octave_choice = T.nnet.softmax(squashed[:,10:]) octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4)) full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves)) full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1) newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0) fixed = T.reshape(full_probs, newshape, ndim=activations.ndim) return fixed
def queue_transform(feature_strengths, feature_vects, return_strengths=False): """ Process features according to a "fragmented queue", where each timestep gets a size-1 window onto a feature queue. Effectively, feature_strengths gives how much to push onto queue feature_vects gives what to push on pop weights are tied to feature_strengths output is a size-1 peek (without popping) Parameters: - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1] - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim) Returns: - peek_vects: float32 tensor of shape (batch, timestep, feature_dim) """ n_batch, n_time, n_feature = feature_vects.shape cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1) # We will be working in (batch, timestep, push_timestep) # For each timestep, if we subtract out the sum of pushes before that timestep # and then cap to 0-1 we get the cumsums for just the features active in that # timestep timestep_adjustments = T.shape_padright(cum_sum_str - feature_strengths) push_time_cumsum = T.shape_padaxis(cum_sum_str, 1) relative_cumsum = push_time_cumsum - timestep_adjustments capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1) # Now we can recover the peek strengths by taking a diff shifted = T.concatenate([T.zeros((n_batch, n_time, 1)), capped_cumsum[:,:,:-1]],2) peek_strengths = capped_cumsum-shifted # Peek strengths is now (batch, timestep, push_timestep) result = T.batched_dot(peek_strengths, feature_vects) if return_strengths: return peek_strengths, result else: return result
def extract_sample_scan_results(self, spec, outputs): """ Extract outputs from the scan results. Parameters: outputs: The outputs from the scan associated with this stack Returns: positions, raw_output, sampled_output """ positions = T.concatenate([T.shape_padright(spec.start_pos), outputs[0].transpose((1,0))[:,:-1]], 1) sampled_output = outputs[2].transpose((1,0,2)) raw_output = outputs[-1].transpose((1,0,2)) return positions, raw_output, sampled_output
def _get_output_for(self, input): assert input.ndim == 3 # only for 3D mask = T.zeros_like(input) # size (None, w, h) tmp = T.concatenate([T.shape_padright(input[:, ::2, ::2]), T.shape_padright(input[:, ::2, 1::2]), T.shape_padright(input[:, 1::2, ::2]), T.shape_padright(input[:, 1::2, 1::2])], axis=-1) index = tmp.argmax(axis=-1) # size (None, w/2, h/2) i_r = 2*(np.tile(np.arange(self.i_s[0]/2), (self.i_s[1]/2,1))).T i_r = index/2 + T.shape_padleft(i_r) i_c = 2*(np.tile(np.arange(self.i_s[1]/2), (self.i_s[0]/2,1))) i_c = index%2 + T.shape_padleft(i_c) i_b = T.tile(T.arange(self.batch_size*self.n_channels),(self.i_s[0]/2*self.i_s[1]/2,1)).T mask = T.set_subtensor(mask[i_b.flatten(), i_r.flatten(), i_c.flatten()],1) return mask
def apply(self, source, source_mask=None, source_x=None, attention=None): """ :param source: the input tensor you want put attention on; shape (length, batch, 'embedding_len or feature_len') :param source_mask: mask (length, batch) :param source_x: this is the (Ua * h_j) :param attention: this is the si-1 in the original paper, dynamic :return: 2d (batch, 'embedding_len or feature_len') """ # attention is 2 if source.ndim != 3 or attention.ndim != 2: raise NotImplementedError align_matrix = T.tanh(source_x + T.dot(attention, self.Wa)[None, :, :]) align = theano.dot(align_matrix, self.v) align = T.exp(align - align.max(axis=0, keepdims=True)) # my note: align is the attention scores, like [0.1, 0.2, 0.4, 0.3] if source_mask: align = align * source_mask normalization = align.sum(axis=0) + T.all(1 - source_mask, axis=0) else: normalization = align.sum(axis=0) align = align / normalization self.output = (T.shape_padright(align) * source).sum(axis=0) return self.output
def apply(self, source, tag): if source.ndim != 3: raise NotImplementedError source_x = T.dot(source, self.Ws) + self.bs align_matrix = T.tanh(source_x) align = T.dot(align_matrix, self.v[tag]) align = T.exp(align - align.max(axis=0, keepdims=True)) normalization = align.sum(axis=0) # shape is (length, batch) self.align = align / normalization self.output = (T.shape_padright(self.align) * source).sum(axis=0) return self.output
def get_output(self, train=False): X = self.get_input(train) return X * T.shape_padright(T.any((1. - T.eq(X, self.mask_value)), axis=-1))
def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None), padding=None): assert input.ndim == 4 and filters.ndim == 4 assert (4 == len(input_shape)) and (4 == len(filter_shape)) assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2) if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or ( tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"): return tensor4dot(input, filters) else: new_row_begin = filters.shape[2] / 2 new_row_end = input.shape[2] + filters.shape[2] / 2 new_col_begin = filters.shape[3] / 2 new_col_end = input.shape[3] + filters.shape[3] / 2 if padding is not None: assert 1 == padding.ndim padded_input = TT.ones(( input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1, input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX) padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], numpy_floatX(0)) padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1) padded_input = padding * padded_input else: padded_input = TT.zeros(( input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1, input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX) padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input) new_input_shape = [None, None, None, None] if input_shape[0] is not None: new_input_shape[0] = input_shape[0] if input_shape[1] is not None: new_input_shape[1] = input_shape[1] if input_shape[2] is not None and filter_shape[2] is not None: new_input_shape[2] = input_shape[2] + filter_shape[2] - 1 if input_shape[3] is not None and filter_shape[3] is not None: new_input_shape[3] = input_shape[3] + filter_shape[3] - 1 ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid', input_shape=tuple(new_input_shape), filter_shape=filter_shape) return ret
def quick_aggregate_pooling(input, pooling_func, mask=None): assert input.ndim == 5 assert mask.ndim == 2 if mask is not None else True if pooling_func == "max": if mask is None: return input.max(axis=0) elif pooling_func == "mean": if mask is None: return TT.cast(input.mean(axis=0), theano.config.floatX) else: return (input * TT.shape_padright(mask / mask.sum(axis=0), 3)).sum(axis=0) elif pooling_func == "L2": # TODO Add Lp Pooling proposed by Yann LeCun return None return None
def theano_one_hot(t, n_classes=None): if n_classes is None: n_classes = tensor.max(t) + 1 ranges = tensor.shape_padleft(tensor.arange(n_classes), t.ndim) return tensor.eq(ranges, tensor.shape_padright(t, 1))
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """ This function is a patch to the maxpool op of Theano: contrarily to current implementation of maxpool, the gradient is backpropagated to only one input of a given patch if several inputs have the same value. This is consistent with the CuDNN implementation (and therefore the op is replaced by the CuDNN version when possible). """ if input.ndim < 2: raise NotImplementedError('pool_2d requires a dimension >= 2') if not ignore_border is None: # check that ignore_border is True if provided assert ignore_border ignore_border = True if input.ndim == 4: op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (1, 3, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (15, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (1, 3, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (3, 1, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (8, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (3, 1, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND