我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用theano.tensor.roll()。
def generate(self, relative_position, cur_chord_root, cur_chord_type, **kwargs): """ Generate a chord input for a given timestep. Parameters: relative_position: A theano tensor (int32) of shape (n_parallel), giving the current relative position for this timestep cur_chord_root: A theano tensor (int32) of shape (n_parallel) giving the unshifted chord root cur_chord_type: A theano tensor (int32) of shape (n_parallel, CHORD_WIDTH), giving the unshifted chord type representation, parsed from the leadsheet Returns: piece: A theano tensor (float32) of shape (n_parallel, PART_WIDTH) """ def _map_fn(pos, chord): # Now pos is scalar and chord is of shape (CHORD_WIDTH), so we can roll return T.roll(chord, (-pos)%12, 0) shifted_chords, _ = theano.map(_map_fn, sequences=[relative_position-cur_chord_root, cur_chord_type]) # shifted_chords = theano.printing.Print("ChordShiftInputPart")(shifted_chords) # shifted_chords = T.opt.Assert()(shifted_chords, T.eq(shifted_chords.shape[1], self.PART_WIDTH)) return shifted_chords
def get_output_for(self, input, **kwargs): def norm_fn(f, mask, label, previous, W_sim): # f: inst * class, mask: inst, previous: inst * class, W_sim: class * class next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1) if COST: next = next + COST_CONST * (1.0 - T.extra_ops.to_one_hot(label, self.num_classes).dimshuffle(0, 'x', 1)) # next: inst * prev * cur next = theano_logsumexp(next, axis = 1) # next: inst * class mask = mask.dimshuffle(0, 'x') next = previous * (1.0 - mask) + next * mask return next f = T.dot(input, self.W) # f: inst * time * class initial = f[:, 0, :] if CRF_INIT: initial = initial + self.W_init[0].dimshuffle('x', 0) if COST: initial = initial + COST_CONST * (1.0 - T.extra_ops.to_one_hot(self.label_input[:, 0], self.num_classes)) outputs, _ = theano.scan(fn = norm_fn, \ sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \ outputs_info = initial, non_sequences = [self.W_sim], strict = True) norm = T.sum(theano_logsumexp(outputs[-1], axis = 1)) f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum() if CRF_INIT: f_pot += self.W_init[0][self.label_input[:, 0]].sum() labels = self.label_input # labels: inst * time shift_labels = T.roll(labels, -1, axis = 1) mask = self.mask_input # mask : inst * time shift_mask = T.roll(mask, -1, axis = 1) g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum() return - (f_pot + g_pot - norm) / f.shape[0]
def get_output_for(self, input, **kwargs): def norm_fn(f, mask, label, previous, W_sim): # f: batch * class, mask: batch, label: batch, previous: batch * class, W_sim: class * class # previous: batch * class next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1) # batch * class * class next = theano_logsumexp(next, axis = 1) # batch * class mask = mask.dimshuffle(0, 'x') next = previous * (1.0 - mask) + next * mask return next f = input # batch * time * class if self.end_points: for i in range(self.num_classes): f = T.inc_subtensor(f[:, 0, i], self.W_end_points[0, i]) f = T.inc_subtensor(f[:, -1, i], self.W_end_points[1, i]) initial = f[:, 0, :] outputs, _ = theano.scan(fn = norm_fn, \ sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \ outputs_info = initial, non_sequences = [self.W_sim], strict = True) norm = T.sum(theano_logsumexp(outputs[-1], axis = 1)) f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum() labels = self.label_input # batch * time shift_labels = T.roll(labels, -1, axis = 1) mask = self.mask_input # batch * time shift_mask = T.roll(mask, -1, axis = 1) g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum() return - (f_pot + g_pot - norm) / f.shape[0] if self.normalize else - (f_pot + g_pot - norm)
def decode_to_probs(self, activations, relative_position, low_bound, high_bound): squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH)) n_parallel = squashed.shape[0] probs = T.nnet.softmax(squashed) def _scan_fn(cprobs, cpos): if self.with_artic: abs_probs = cprobs[:2] rel_probs = cprobs[2:] else: rel_probs = cprobs abs_probs = T.ones((2,)) aligned = T.roll(rel_probs, (cpos-low_bound)%12) num_tile = int(math.ceil((high_bound-low_bound)/self.WINDOW_SIZE)) tiled = T.tile(aligned, (num_tile,))[:(high_bound-low_bound)] full = T.concatenate([abs_probs, tiled], 0) return full # probs = theano.printing.Print("probs",['shape'])(probs) # relative_position = theano.printing.Print("relative_position",['shape'])(relative_position) from_scan, _ = theano.map(fn=_scan_fn, sequences=[probs, T.flatten(relative_position)]) # from_scan = theano.printing.Print("from_scan",['shape'])(from_scan) newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0) fixed = T.reshape(from_scan, newshape, ndim=activations.ndim) return fixed
def __init__(self, input_parts, layer_sizes, output_size, window_size=0, dropout=0, mode="drop", unroll_batch_num=None): """ Parameters: input_parts: A list of InputParts layer_sizes: A list of the form [ (indep, per_note), ... ] where indep is the number of non-shifted cells to have, and per_note is the number of cells to have per window note, which shift as the network moves Alternately can just be [ indep, ... ] output_size: An integer, the width of the desired output dropout: How much dropout to apply. mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead """ self.input_parts = input_parts self.window_size = window_size layer_sizes = [x if isinstance(x,tuple) else (x,0) for x in layer_sizes] self.layer_sizes = layer_sizes self.tot_layer_sizes = [(indep + per_note*self.window_size) for indep, per_note in layer_sizes] self.output_size = output_size self.dropout = dropout self.input_size = sum(part.PART_WIDTH for part in input_parts) self.cells = StackedCells( self.input_size, celltype=LSTM, activation=T.tanh, layers = self.tot_layer_sizes ) self.cells.layers.append(Layer(self.tot_layer_sizes[-1], self.output_size, activation = lambda x:x)) assert mode in ("drop", "roll"), "Must specify either drop or roll mode" self.mode = mode self.unroll_batch_num = unroll_batch_num
def cost_matrix(self, application_call, outputs, mask=None, **kwargs): """Adapted from ``BaseSequenceGenerator.cost_matrix`` """ # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = {name: results[name][:-1] for name in self._state_names} glimpses = {name: results[name][1:] for name in self._glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) readouts = self.readout.readout( feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name+"_final_value") if not self.pruning_variables_initialized: self.results = results self.pruning_variables_initialized = True return costs
def cost_matrix_nmt(self, application_call, target_char_seq, target_sample_matrix, target_resample_matrix, target_word_mask, target_char_aux, target_prev_char_seq, target_prev_char_aux, **kwargs): """Returns generation costs for output sequences. See Also -------- :meth:`cost` : Scalar cost. """ # We assume the data has axes (time, batch, features, ...) batch_size = target_char_seq.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback_apply(target_char_seq, target_sample_matrix, target_char_aux) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=target_word_mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = {name: results[name][:-1] for name in self._state_names} glimpses = {name: results[name][1:] for name in self._glimpse_names} feedback = tensor.roll(feedback, 1, 0) init_feedback = self.readout.single_feedback(self.readout.initial_outputs(batch_size), batch_size) if self.trg_dgru_depth == 1: feedback = tensor.set_subtensor(feedback[0], init_feedback) else: feedback = tensor.set_subtensor(feedback[0], init_feedback[-1]) decoder_readout_outputs = self.readout.readout( feedback=feedback, **dict_union(states, glimpses, contexts)) resampled_representation = tensor.batched_dot(target_resample_matrix, decoder_readout_outputs.dimshuffle([1, 0, 2])) resampled_readouts = resampled_representation.dimshuffle([1, 0, 2]) readouts_chars = self.readout.readout_gru(target_prev_char_seq, target_prev_char_aux, resampled_readouts) # Compute the cost costs = self.readout.cost(readouts_chars, target_char_seq) for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names + self._glimpse_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name + "_final_value") return costs