我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.stack()。
def unroll_scan(self): rval = [self.scan_namespace['outputs_info']] for i in range(self.time_size): step_inputs = [s[i] for s in self.scan_namespace['sequences']] + \ rval[-1] + self.scan_namespace['non_sequences'] scan_out = list(self.step(*step_inputs)) rval += [scan_out] # rval is a list of each returned tuple at each time step # scan returns a tuple of all elements that have been joined on the time axis new_rval = [] for i in range(len(rval[1])): new_rval += [[]] for j in range(1, len(rval)): new_rval[i] += [rval[j][i]] new_new_rval = [] for i in new_rval: new_new_rval += [T.stack(i, axis=0)] return tuple(new_new_rval)
def new_episode(self, mem): g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) if (self.normalize_attention): g = nn_utils.softmax(g) e, e_updates = theano.scan(fn=self.new_episode_step, sequences=[self.inp_c, g], outputs_info=T.zeros_like(self.inp_c[0])) e_list = [] for index in range(self.batch_size): e_list.append(e[self.fact_count_var[index] - 1, :, index]) return T.stack(e_list).dimshuffle((1, 0))
def grad(self, inputs, gout): (gz,) = gout is_continuous = [(inputs[i].dtype in tensor.continuous_dtypes) for i in range(len(inputs))] if _is_sparse_variable(gz): gz = dense_from_sparse(gz) split = tensor.Split(len(inputs))(gz, 1, tensor.stack( [x.shape[1] for x in inputs])) if not isinstance(split, list): split = [split] derivative = [SparseFromDense(self.format)(s) for s in split] def choose(continuous, derivative): if continuous: return derivative else: return None return [choose(c, d) for c, d in zip(is_continuous, derivative)]
def grad(self, inputs, gout): (gz,) = gout is_continuous = [(inputs[i].dtype in tensor.continuous_dtypes) for i in range(len(inputs))] if _is_sparse_variable(gz): gz = dense_from_sparse(gz) split = tensor.Split(len(inputs))(gz, 0, tensor.stack( [x.shape[0] for x in inputs])) if not isinstance(split, list): split = [split] derivative = [SparseFromDense(self.format)(s) for s in split] def choose(continuous, derivative): if continuous: return derivative else: return None return [choose(c, d) for c, d in zip(is_continuous, derivative)]
def test_stack_hessian(self): # Test the gradient of stack when used in hessian, see gh-1589 a = tensor.dvector('a') b = tensor.dvector('b') A = stack([a, b]) B = A.T.dot(A) Ha, Hb = hessian(B.sum(), [a, b]) # Try some values a_v = numpy.random.rand(4) b_v = numpy.random.rand(4) f = theano.function([a, b], [Ha, Hb]) Ha_v, Hb_v = f(a_v, b_v) # The Hessian is always a matrix full of 2 assert Ha_v.shape == (4, 4) assert Hb_v.shape == (4, 4) assert numpy.allclose(Ha_v, 2.) assert numpy.allclose(Hb_v, 2.)
def test_get_scalar_constant_value(self): a = tensor.stack([1, 2, 3]) assert get_scalar_constant_value(a[0]) == 1 assert get_scalar_constant_value(a[1]) == 2 assert get_scalar_constant_value(a[2]) == 3 b = tensor.iscalar() a = tensor.stack([b, 2, 3]) self.assertRaises(tensor.basic.NotScalarConstantError, get_scalar_constant_value, a[0]) assert get_scalar_constant_value(a[1]) == 2 assert get_scalar_constant_value(a[2]) == 3 # For now get_scalar_constant_value goes through only MakeVector and Join of # scalars. v = tensor.ivector() a = tensor.stack([v, [2], [3]]) self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[0]) self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[1]) self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[2]) # Test the case SubTensor(Shape(v)) when the dimensions # is broadcastable. v = tensor.row() assert get_scalar_constant_value(v.shape[0]) == 1
def infer_shape(self, node, in_shapes): shape_a = in_shapes[0] n = node.inputs[1] axis = node.inputs[2] if len(shape_a) == 1: return [(n,)] elif isinstance(axis, tensor.TensorConstant): out_shape = (list(shape_a[0: axis.data.item()]) + [n] + list(shape_a[axis.data + 1:])) else: l = len(shape_a) shape_a = tensor.stack(shape_a) out_shape = tensor.concatenate((shape_a[0: axis], [n], shape_a[axis + 1:])) n_splits = [1] * l out_shape = tensor.split(out_shape, n_splits, l) out_shape = [a[0] for a in out_shape] return [out_shape]
def new_episode(self, mem): g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) if (self.normalize_attention): g = nn_utils.softmax(g) self.attentions.append(g) e, e_updates = theano.scan(fn=self.new_episode_step, sequences=[self.inp_c, g], outputs_info=T.zeros_like(self.inp_c[0])) e_list = [] for index in range(self.batch_size): e_list.append(e[self.fact_count_var[index] - 1, :, index]) return T.stack(e_list).dimshuffle((1, 0))
def goroshin_argmax(z, shape, axis=(1, ), beta=3, epsilon=0.0001): z = z/(abs(T.max(z))+utils.floatX(epsilon)) a = () for t in axis: a += (slice(0, shape[t]), ) xyshape = list(shape)+[] for i in range(len(shape)): if i not in axis: xyshape[i] = 1 xy = T.mgrid[a] b = T.exp(beta*z)/T.exp(beta*z).sum(axis, keepdims=True) res = [] for i in range(len(axis)): x = ((xy[i].astype(floatX)).reshape(xyshape)*b).sum(axis=axis) res += [x] return T.stack(res, axis=1)
def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=0.0001): '''Compute mean and std for batch then apply batch_normalization on batch. ''' var = x.var(reduction_axes) mean = x.mean(reduction_axes) target_shape = [] for axis in range(ndim(x)): if axis in reduction_axes: target_shape.append(1) else: target_shape.append(x.shape[axis]) target_shape = T.stack(*target_shape) broadcast_mean = T.reshape(mean, target_shape) broadcast_var = T.reshape(var, target_shape) broadcast_beta = T.reshape(beta, target_shape) broadcast_gamma = T.reshape(gamma, target_shape) normed = batch_normalization(x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normed, mean, var
def train_scales(self, x, comp_weight, error_loss = 'L2'): true_activations, _ = self._compute_activations(x, do_round=False) approx_activations, rounding_signals = self._compute_activations(x, do_round=True) error_loss = get_error_loss(guess=approx_activations.values()[-1].flatten(2), truth=true_activations.values()[-1].flatten(2), loss_type=error_loss) param_grad_pairs = [] for i, (layer_name, sigs) in enumerate(rounding_signals.iteritems()): assert isinstance(self.layers[layer_name], ScaledRoundingLayer), "You F'ed up." scale_param = self.layers[layer_name].get_scale_param() error_grad = tt.grad(error_loss, wrt=scale_param, consider_constant=[other_sigs['epsilon'] for other_sigs in rounding_signals.values()[i+1:]]) next_layer = self.layers.values()[self.layers.values().index(self.layers[layer_name])+1] assert isinstance(next_layer, ConvLayer), "Again" layer_comp_loss = tt.switch(sigs['scaled_input']>0, sigs['spikes'], -sigs['spikes']).sum() \ * get_conv_layer_fanout(next_layer.w.shape, conv_mode={0:'full', 1:'same'}[next_layer.border_mode]) # NOTE: NOT GENERAL: VGG SPECIFIC! # layer_comp_loss = abs(sigs['spikes']).sum() \ # * get_conv_layer_fanout(next_layer.w.shape, conv_mode={0:'full', 1:'same'}[next_layer.border_mode]) # NOTE: NOT GENERAL: VGG SPECIFIC! print '{} fanout: {}'.format(layer_name, get_conv_layer_fanout(next_layer.w.get_value().shape, conv_mode={0:'full', 1:'same'}[next_layer.border_mode])) comp_grad = tt.grad(layer_comp_loss, wrt=scale_param, consider_constant=[sigs['epsilon']]) # tdbprint(comp_weight*comp_grad, layer_name+'scaled comp grad') # tdbprint(error_grad, layer_name+'scaled error grad') layer_grad = error_grad + comp_weight*comp_grad param_grad_pairs.append((scale_param, layer_grad)) # tdbplot(tt.stack([abs(sigs['spikes']).mean() for sigs in rounding_signals.values()]), 'mea spikes', plot_type='line') scale_params, grads = zip(*param_grad_pairs) self.optimizer.update_from_gradients(parameters=scale_params, gradients=grads)
def process_input(self, data_raw): return (nn_utils.pad_zeros(data_raw[0]).astype(np.float32), map(len, data_raw[0]), np.stack(data_raw[1]))
def jaccard(y_pred, y_true, n_classes, one_hot=False): assert (y_pred.ndim == 2) or (y_pred.ndim == 1) # y_pred to indices if y_pred.ndim == 2: y_pred = T.argmax(y_pred, axis=1) if one_hot: y_true = T.argmax(y_true, axis=1) # Compute confusion matrix cm = T.zeros((n_classes, n_classes)) for i in range(n_classes): for j in range(n_classes): cm = T.set_subtensor( cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j))) # Compute Jaccard Index TP_perclass = T.cast(cm.diagonal(), _FLOATX) FP_perclass = cm.sum(1) - TP_perclass FN_perclass = cm.sum(0) - TP_perclass num = TP_perclass denom = TP_perclass + FP_perclass + FN_perclass return T.stack([num, denom], axis=0)
def get_output_for(self, inputs, **kwargs): vals, ref = inputs N, _, H, W = ref.shape yx = tt.stack(tt.mgrid[0:H, 0:W])[np.newaxis, :, :, :] grid = tt.alloc(tt.cast(yx, "float32"), N, 2, H, W) stacked = tt.concatenate([grid, ref], axis=1) return super(BilateralFilterLayer, self).get_output_for( [vals, stacked], **kwargs)
def get_output_for(self, inputs, **kwargs): unary, ref = inputs N, _, H, W = ref.shape yx = tt.cast(tt.stack(tt.mgrid[0:H, 0:W]), "float32") grid = tt.alloc(yx[np.newaxis, :, :, :], N, 2, H, W) stacked = tt.concatenate([grid, ref], axis=1) def _bilateral(V, R): o = tt.ones((1, V.shape[1], V.shape[2]), "float32") norm = tt.sqrt(gaussian_filter(R, o, self.kstd_bf, self.ref_dim)) + 1e-8 return gaussian_filter(R, V/norm, self.kstd_bf, self.ref_dim, self.val_dim) / norm def _step(prev_q, U, ref, normalize=True): qbf = _bilateral(prev_q, ref,) qsf = tt.nnet.conv2d(prev_q[np.newaxis, :, :, :], self.W_spatial, border_mode="half")[0] q_hat = -self.compat_bf * qbf + -self.compat_spatial * qsf q_hat = U - q_hat return softmax(q_hat, axis=0) if normalize else q_hat def _inference(unary_i, ref_i): U = tt.log(tt.clip(unary_i, 1e-5, 1)) prev_q = softmax(U, axis=0) # This is faster than using scan. for i in range(self.num_iter): normalize = self.normalize_final_iter or i < self.num_iter-1 prev_q = _step(prev_q, U, ref_i, normalize) return prev_q return theano.scan(fn=_inference, sequences=[unary, stacked], outputs_info=None)[0]
def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=0.0001): '''Computes mean and std for batch then apply batch_normalization on batch. ''' dev = theano.config.device use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu')) if use_cudnn: broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x') broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x') try: normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train( x, broadcast_gamma, broadcast_beta, 'spatial', epsilon) var = T.inv(stdinv ** 2) return normed, T.flatten(mean), T.flatten(var) except AttributeError: pass var = x.var(reduction_axes) mean = x.mean(reduction_axes) target_shape = [] for axis in range(ndim(x)): if axis in reduction_axes: target_shape.append(1) else: target_shape.append(x.shape[axis]) target_shape = T.stack(*target_shape) broadcast_mean = T.reshape(mean, target_shape) broadcast_var = T.reshape(var, target_shape) broadcast_beta = T.reshape(beta, target_shape) broadcast_gamma = T.reshape(gamma, target_shape) normed = batch_normalization(x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normed, mean, var
def pack(x): return T.stack(*x)
def get_dropout_mask(var, drop_prob, rng=None, seed=None): if not rng and not seed: seed = config.default_seed if not rng: rng = MRG_RandomStreams(seed) # we assume that the batch dimension is the first one mask_shape = tensor.stack([var.shape[0], var.shape[-1]]) return rng.binomial(mask_shape, p=1 - drop_prob, dtype=theano.config.floatX)
def parameter_stats(parameters, algorithm): vars_ = [] for name, param in parameters.items(): num_elements = numpy.product(param.get_value().shape) norm = param.norm(2) / num_elements ** 0.5 grad_norm = algorithm.gradients[param].norm(2) / num_elements ** 0.5 step_norm = algorithm.steps[param].norm(2) / num_elements ** 0.5 stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm) stats.name = name + '_stats' vars_.append(stats) return vars_
def _predict_step_note(self, in_data_from_time, *states): hiddens = list(states[:-1]) in_data_from_prev = states[-1] in_data = T.concatenate([in_data_from_time, in_data_from_prev]) if self.dropout > 0: masks = [1 - self.dropout for layer in self.pitch_model.layers] masks[0] = None else: masks = [] new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks) probabilities = get_last_layer(new_states) shouldPlay = self.srng.uniform() < (probabilities[0] ** self.conservativity) shouldArtic = shouldPlay * (self.srng.uniform() < probabilities[1]) chosen = T.stack([T.cast(shouldPlay, 'int8'), T.cast(shouldArtic, 'int8')]) return ensure_list(new_states) + [chosen]
def stack(self, x): return T.stack(*x) # NN OPERATIONS
def stack(x): return T.stack(*x)
def stack(self, tlist): return T.stack(tlist)
def _dirac_truncated_rfft(self, point) : """ Returns the truncated real FFT of a dirac at position 'point', as a (2+1)-d array of size "K.shape//2+1" + (4,),. See real_fft._irfft_2d to understand the format of the output. The code may seem quite circonvoluted but hey, it's not my fault if theano forces us to use real-valued FFT... """ su, di = self._phase_shifts(point) re_re = T.cos(di) + T.cos(su) # 2 cos(a)cos(b) = cos(a-b) + cos(a+b) re_im = T.sin(su) + T.sin(di) # 2 sin(a)cos(b) = sin(a+b) + sin(a-b) im_re = T.sin(su) - T.sin(di) # 2 cos(a)sin(b) = sin(a+b) - sin(a-b) im_im = T.cos(di) - T.cos(su) # 2 sin(a)sin(b) = cos(a-b) - cos(a+b) return .5 * T.stack([re_re, re_im, im_re, im_im], axis=2) # Don't forget the .5 !
def dense_grid(self) : """ Outputs the dense image 'meshgrid'. """ x = np.arange(self.image_shape[1]) y = np.arange(self.image_shape[0]) X = np.ones((self.image_shape[0], 1)) * x Y = (np.ones((self.image_shape[1], 1)) * y).T return np.stack([Y, X], axis=2).astype(theano.config.floatX) # np.ones = float64 -> float32, the only type supported by Theano on GPU
def get_output_for(self, input, **kwargs): out = T.stack([input for i in range(self.copies)], axis=0) out = out.dimshuffle(1, 0, 2) return out
def _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3): '''Computes mean and std for batch then apply batch_normalization on batch. ''' dev = theano.config.device use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu')) if use_cudnn: broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x') broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x') try: normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train( x, broadcast_gamma, broadcast_beta, 'spatial', epsilon) var = T.inv(stdinv ** 2) return normed, T.flatten(mean), T.flatten(var) except AttributeError: pass var = x.var(reduction_axes) mean = x.mean(reduction_axes) target_shape = [] for axis in range(ndim(x)): if axis in reduction_axes: target_shape.append(1) else: target_shape.append(x.shape[axis]) target_shape = T.stack(*target_shape) broadcast_mean = T.reshape(mean, target_shape) broadcast_var = T.reshape(var, target_shape) broadcast_beta = T.reshape(beta, target_shape) broadcast_gamma = T.reshape(gamma, target_shape) normed = batch_normalization(x, broadcast_mean, broadcast_var, broadcast_beta, broadcast_gamma, epsilon) return normed, mean, var # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated
def Recurrence(processed_frames, h0, reset): """ processed_frames.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames, DIM) """ # print "warning no recurrence" # return T.zeros_like(processed_frames), h0 learned_h0 = lib.param( 'Recurrence.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) return (grus[-1], last_hidden)
def pack(x): return T.stack(*x) # VALUE MANIPULATION
def _get_L_cov(L_cov_21, floatX, Uniform, tt): if type(L_cov_21) in (float, int): return np.array([[1.0, L_cov_21], [L_cov_21, 1.0]]).astype(floatX) elif _is_uniform(L_cov_21): r = parse('U({:f},{:f})', L_cov_21.replace(' ', '')) L_cov_21_ = Uniform('L_cov_21_', lower=r[0], upper=r[1]) return tt.stack([1.0, L_cov_21_, L_cov_21_, 1.0]).reshape((2, 2))
def new_attention_step(self, ct, prev_g, mem, q_q): #cWq = T.stack([T.dot(T.dot(ct, self.W_b), q_q)]) #cWm = T.stack([T.dot(T.dot(ct, self.W_b), mem)]) z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, (ct - q_q) ** 2, (ct - mem) ** 2])#, cWq, cWm]) l_1 = T.dot(self.W_1, z) + self.b_1 l_1 = T.tanh(l_1) l_2 = T.dot(self.W_2, l_1) + self.b_2 G = T.nnet.sigmoid(l_2)[0] return G
def new_attention_step(self, ct, prev_g, mem, q_q): cWq = T.stack([T.dot(T.dot(ct, self.W_b), q_q)]) cWm = T.stack([T.dot(T.dot(ct, self.W_b), mem)]) z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, T.abs_(ct - q_q), T.abs_(ct - mem), cWq, cWm]) l_1 = T.dot(self.W_1, z) + self.b_1 l_1 = T.tanh(l_1) l_2 = T.dot(self.W_2, l_1) + self.b_2 G = T.nnet.sigmoid(l_2)[0] return G
def new_attention_step(self, ct, prev_g, mem, q_q): #cWq = T.stack([T.dot(T.dot(ct, self.W_b), q_q)]) #cWm = T.stack([T.dot(T.dot(ct, self.W_b), mem)]) #z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, (ct - q_q) ** 2, (ct - mem) ** 2])#, cWq, cWm]) cmq = T.concatenate([ct, mem, q_q]); l_1 = T.dot(T.dot(self.W_111, ct),mem) + T.dot(T.dot(self.W_112, q_q),mem) + T.dot(T.dot(self.W_113, ct),q_q) + T.dot(self.W_12, cmq) + self.b_1 #l_1 = T.dot(self.W_1, z) + self.b_1 l_1 = T.tanh(l_1) l_2 = T.dot(self.W_2, l_1) + self.b_2 G = T.nnet.sigmoid(l_2)[0] return G
def extract_sample_scan_results(self, spec, outputs): """ Extract outputs from the scan results. Parameters: outputs: The outputs from the scan associated with this stack Returns: positions, raw_output, sampled_output """ positions = T.concatenate([T.shape_padright(spec.start_pos), outputs[0].transpose((1,0))[:,:-1]], 1) sampled_output = outputs[2].transpose((1,0,2)) raw_output = outputs[-1].transpose((1,0,2)) return positions, raw_output, sampled_output
def stack(x, axis=0): return T.stack(x, axis=axis)
def test_gpueye(): def check(dtype, N, M_=None): # Theano does not accept None as a tensor. # So we must use a real value. M = M_ # Currently DebugMode does not support None as inputs even if this is # allowed. if M is None: M = N N_symb = T.iscalar() M_symb = T.iscalar() k_symb = numpy.asarray(0) out = T.eye(N_symb, M_symb, k_symb, dtype=dtype) f = theano.function([N_symb, M_symb], T.stack(out), mode=mode_with_gpu) result = numpy.asarray(f(N, M)) assert numpy.allclose(result, numpy.eye(N, M_, dtype=dtype)) assert result.dtype == numpy.dtype(dtype) assert any([isinstance(node.op, GpuEye) for node in f.maker.fgraph.toposort()]) for dtype in ['float32', 'int32', 'float16']: yield check, dtype, 3 # M != N, k = 0 yield check, dtype, 3, 5 yield check, dtype, 5, 3
def test_local_set_to_inc_subtensor(): v = theano.tensor.fmatrix() s = v[[2, 1]] g = s + 3 r = theano.tensor.set_subtensor(s, g) moder = compile.get_default_mode().excluding('local_set_to_inc_subtensor') modet = compile.get_default_mode().including('local_set_to_inc_subtensor') f1 = theano.function([v], r, mode=moder) f2 = theano.function([v], r, mode=modet) advi1 = [n for n in f1.maker.fgraph.toposort() if isinstance(n.op, tensor.AdvancedIncSubtensor1)] advi2 = [n for n in f2.maker.fgraph.toposort() if isinstance(n.op, tensor.AdvancedIncSubtensor1)] # We only have SetSubtensor in f1 assert all(n.op.set_instead_of_inc for n in advi1) # We don't have any SetSubtensor in f2 assert all(not n.op.set_instead_of_inc for n in advi2) val = numpy.random.randn(3, 2).astype('float32') r1 = f1(val) r2 = f2(val) utt.assert_allclose(r1, r2) # Finally, test that the stack trace is copied over properly, # before and after optimization. assert check_stack_trace(f1, ops_to_check=tensor.AdvancedIncSubtensor1) assert check_stack_trace(f2, ops_to_check='all')
def test_local_subtensor_of_dot(): m1 = theano.tensor.matrix() m2 = theano.tensor.matrix() d1 = numpy.arange(6).reshape((3, 2)).astype(config.floatX) d2 = numpy.arange(8).reshape((2, 4)).astype(config.floatX) + 10 mode = compile.get_default_mode().including("local_subtensor_of_dot") def test_equality(a, b): return a.shape == b.shape and numpy.allclose(a, b) # [cst] f = theano.function([m1, m2], theano.dot(m1, m2)[1], mode=mode) topo = f.maker.fgraph.toposort() assert test_equality(f(d1, d2), numpy.dot(d1, d2)[1]) # DimShuffle happen in FAST_COMPILE assert isinstance(topo[-1].op, (T.blas_c.CGemv, T.blas.Gemv, T.DimShuffle)) # slice f = theano.function([m1, m2], theano.dot(m1, m2)[1:2], mode=mode) topo = f.maker.fgraph.toposort() assert test_equality(f(d1, d2), numpy.dot(d1, d2)[1:2]) assert isinstance(topo[-1].op, (T.blas.Dot22)) m1 = theano.tensor.tensor3() m2 = theano.tensor.tensor3() idx = theano.tensor.iscalar() d1 = numpy.arange(30).reshape(2, 5, 3).astype(config.floatX) d2 = numpy.arange(72).reshape(4, 3, 6).astype(config.floatX) + 100 f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode) assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1, 1:4, :, 1:]) # if we return the gradients. We need to use same mode as before. assert check_stack_trace(f, ops_to_check='last') f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode) assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1:4, :, 1:, 1]) # Now test that the stack trace is copied over properly, # if we return the gradients. We need to use same mode as before. assert check_stack_trace(f, ops_to_check='last')
def test_local_join_1(): # test for vector a = tensor.vector('a') s = tensor.stack([a]) f = function([a], s, mode=mode_opt) val = f([1]) assert numpy.all(val == [1]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 0 assert f.maker.fgraph.outputs[0].dtype == config.floatX # test for matrix join(0,a) a = tensor.matrix('a') s = join(0, a) f = function([a], s, mode=mode_opt) val = f([[1]]) assert numpy.all(val == [[1]]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 0 assert f.maker.fgraph.outputs[0].dtype == config.floatX # test for matrix join(1,a) s = join(1, a) f = function([a], s, mode=mode_opt) val = f([[1]]) assert numpy.all(val == [[1]]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 0 assert f.maker.fgraph.outputs[0].dtype == config.floatX # test we don't apply when their is 2 inputs s = join(1, a, a) f = function([a], s, mode=mode_opt) val = f([[1]]) assert numpy.all(val == [[1]]) e = f.maker.fgraph.toposort() assert len([n for n in e if isinstance(n.op, Join)]) == 1 assert f.maker.fgraph.outputs[0].dtype == config.floatX
def test_stack_mixed_type_constants(self): # tested only on cpu as gpu support only float32 a = as_tensor_variable(1) b = as_tensor_variable(2.0) c = tensor._shared(numpy.asarray(3.0, dtype=self.floatX)) s = stack([a, b, c]) want = numpy.array([1, 2, 3]) out = self.eval_outputs_and_check_vector([s], opt.MakeVector()) self.assertTrue((out == want).all())
def test_stack_scalar_make_vector(self): """Test that calling stack() on scalars instantiates MakeVector, not Join. Test that the floatX dtype stay floatX, not downcasted to int64""" a = tensor.scalar('a', dtype=self.floatX) b = tensor.scalar('b', dtype=self.floatX) s = stack([a, b, a, b]) f = function([a, b], s, mode=self.mode) val = f(1, 2) # print val self.assertTrue(numpy.all(val == [1, 2, 1, 2])) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0 assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0 assert f.maker.fgraph.outputs[0].dtype == self.floatX
def test_stack_scalar_make_vector_dtype(self): '''Test that calling stack() on scalars instantiates MakeVector, event when the scalar don't have the same dtype.''' a = tensor.iscalar('a') b = tensor.lscalar('b') s = stack([a, b, a, b]) f = function([a, b], s, mode=self.mode) val = f(1, 2) self.assertTrue(numpy.all(val == [1, 2, 1, 2])) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0 assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0 assert f.maker.fgraph.outputs[0].dtype == 'int64'
def test_stack_scalar_make_vector_constant(self): '''Test that calling stack() on scalars instantiates MakeVector, event when the scalar are simple int type.''' a = tensor.iscalar('a') b = tensor.lscalar('b') # test when the constant is the first element. # The first element is used in a special way s = stack([10, a, b, numpy.int8(3)]) f = function([a, b], s, mode=self.mode) val = f(1, 2) self.assertTrue(numpy.all(val == [10, 1, 2, 3])) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0 assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0 assert f.maker.fgraph.outputs[0].dtype == 'int64'