我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用theano.tensor.outer()。
def sample(self, n_samples): ''' Inspired by jbornschein's implementation. ''' z0 = T.zeros((n_samples, self.dim,)).astype(floatX) + T.shape_padleft(self.b) rs = self.trng.uniform((self.dim, n_samples), dtype=floatX) def _step_sample(i, W_i, r_i, z): p_i = T.nnet.sigmoid(z[:, i]) * 0.9999 + 0.000005 x_i = (r_i <= p_i).astype(floatX) z = z + T.outer(x_i, W_i) return z, x_i seqs = [T.arange(self.dim), self.W, rs] outputs_info = [z0, None] non_seqs = [] (zs, x), updates = scan(_step_sample, seqs, outputs_info, non_seqs, self.dim) return x.T, updates
def times_reflection(input, n_hidden, reflection): input_re = input[:, :n_hidden] input_im = input[:, n_hidden:] reflect_re = reflection[:n_hidden] reflect_im = reflection[n_hidden:] vstarv = (reflection**2).sum() input_re_reflect_re = T.dot(input_re, reflect_re) input_re_reflect_im = T.dot(input_re, reflect_im) input_im_reflect_re = T.dot(input_im, reflect_re) input_im_reflect_im = T.dot(input_im, reflect_im) a = T.outer(input_re_reflect_re - input_im_reflect_im, reflect_re) b = T.outer(input_re_reflect_im + input_im_reflect_re, reflect_im) c = T.outer(input_re_reflect_re - input_im_reflect_im, reflect_im) d = T.outer(input_re_reflect_im + input_im_reflect_re, reflect_re) output = input output = T.inc_subtensor(output[:, :n_hidden], - 2. / vstarv * (a + b)) output = T.inc_subtensor(output[:, n_hidden:], - 2. / vstarv * (d - c)) return output
def temporal_padding_mask(mask, kernel_size, padding_size): """Pad the middle dimension of a 2D matrix with "padding" zeros left and right. Apologies for the inane API, but Theano makes this really hard. Code from https://github.com/fchollet/keras/blob/master/keras/backend/theano_backend.py x: (batch, length) """ mask_shape = mask.shape mask_sum = T.sum(mask, axis=1) output_length = mask_sum - kernel_size + 2 * padding_size + 1 max_output_length = mask_shape[1] - kernel_size + 2 * padding_size + 1 real_output_length = T.maximum(output_length, 1) range_base = T.arange(max_output_length) range_matrix = T.outer(T.ones((mask_shape[0],)), range_base) mask = (range_matrix < real_output_length[:, None]) * T.constant(1.0) return mask
def perform(self, node, inputs, outputs): # Kalbfleisch and Lawless, J. Am. Stat. Assoc. 80 (1985) Equation 3.4 # Kind of... You need to do some algebra from there to arrive at # this expression. (A, gA) = inputs (out,) = outputs w, V = scipy.linalg.eig(A, right=True) U = scipy.linalg.inv(V).T exp_w = numpy.exp(w) X = numpy.subtract.outer(exp_w, exp_w) / numpy.subtract.outer(w, w) numpy.fill_diagonal(X, exp_w) Y = U.dot(V.T.dot(gA).dot(U) * X).dot(V.T) with warnings.catch_warnings(): warnings.simplefilter("ignore", numpy.ComplexWarning) out[0] = Y.astype(A.dtype)
def test_grad(self): """ Test the combined graph of the graph of outer with broadcastable dimensions, just in case. """ for shp0, shp1 in [((1,), (2,)), ((3,), (1,)), ((1,), (1,)), ((3,), (2,)), ((3, 2), (1, 1)), ((3, 2), (1, 4)), ((3, 2), (4, 1)), ((3, 2), (4, 5)), ((1, 2), (4, 5)), ((3, 1), (4, 5)), ((1, 1), (4, 5)), ((1, 1), (1, 1)), ]: data0 = numpy.random.rand(*shp0).astype(floatX) data1 = numpy.random.rand(*shp1).astype(floatX) utt.verify_grad(tensor.outer, [data0, data1])
def batch_sim4(w, M, eps=1e-6): """ w: matrix with shape (batch, memory_elem) M: tensor with shape (batch, memory_size, memory_elem) eps: numerical stability parameter """ M = M[0] #only one true memory #M = M.dimshuffle(1,0) # (memory_elem, memory_size) def norm(A): """ Calculate the column norm of matrix A A: matrix with shape (N, M) return: vector with shape (N,) """ return T.sqrt(T.dot(A,A.T).diagonal()) norm = T.outer(norm(w), norm(M)) #(batch, memory_size) batch_sim = T.dot(w, M.T) / (norm + eps) #(batch, memory_size) return batch_sim
def batch_sim6(w, M, eps=1e-6): """ w: matrix with shape (batch, memory_elem) M: tensor with shape (batch, memory_size, memory_elem) eps: numerical stability parameter """ M = M[0] #only one true memory #M = M.dimshuffle(1,0) # (memory_elem, memory_size) def norm(A): """ Calculate the column norm of matrix A A: matrix with shape (N, M) return: vector with shape (N,) """ n, _ = theano.map(fn=lambda a: T.sqrt((a*a).sum()), sequences=[A]) return n norm = T.outer(norm(w), norm(M)) #(batch, memory_size) batch_sim = T.dot(w, M.T) / (norm + eps) #(batch, memory_size) return batch_sim
def f(self, x, sampling=True, **kwargs): x /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) indx, indy = self.params[3], self.params[4] indx /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) if sampling: stdx, stdy = self._get_stds() noisex, noisey = sample_mult_noise(stdx, indx.shape), sample_mult_noise(stdy, indy.shape) indy *= noisey; indx *= noisex Rr, Rc = T.exp(self.params[1]), T.exp(self.params[2]) U = T.sqr(Rr) sigma11 = T.dot(indx * U.dimshuffle('x', 0), indx.T) + eps_ind * T.eye(self.n_inducing) sigma22 = T.dot(x * U.dimshuffle('x', 0), x.T) sigma12 = T.dot(indx * U.dimshuffle('x', 0), x.T) mu_ind = T.dot(indx, self.params[0]) inv_sigma11 = Tn.matrix_inverse(sigma11) mu_x = T.dot(x, self.params[0]) + T.dot(sigma12.T, inv_sigma11).dot(indy - mu_ind) if not sampling: return mu_x sigma_x = Tn.extract_diag(sigma22 - T.dot(sigma12.T, inv_sigma11).dot(sigma12)) std = T.outer(T.sqrt(sigma_x), Rc) out_sample = sample_gauss(mu_x, std) return out_sample
def f(self, x, sampling=True, **kwargs): x /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) indx, indy = self.params[3], self.params[4] indx /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) if sampling: noisex = sample_mult_noise(T.exp(self.params[-2]), indx.shape) noisey = sample_mult_noise(T.exp(self.params[-1]), indy.shape) indy *= noisey; indx *= noisex Rr, Rc = T.exp(self.params[1]), T.exp(self.params[2]) U = T.sqr(Rr) sigma11 = T.dot(indx * U.dimshuffle('x', 0), indx.T) + eps_ind * T.eye(self.n_inducing) sigma22 = T.dot(x * U.dimshuffle('x', 0), x.T) sigma12 = T.dot(indx * U.dimshuffle('x', 0), x.T) mu_ind = T.dot(indx, self.params[0]) inv_sigma11 = Tn.matrix_inverse(sigma11) mu_x = T.dot(x, self.params[0]) + T.dot(sigma12.T, inv_sigma11).dot(indy - mu_ind) if not sampling: return mu_x sigma_x = Tn.extract_diag(sigma22 - T.dot(sigma12.T, inv_sigma11).dot(sigma12)) std = T.outer(T.sqrt(sigma_x), Rc) out_sample = sample_gauss(mu_x, std) return out_sample
def sample(self, c, n_samples=1, return_probs=False): if c.ndim == 1: c = c[None, :] elif c.ndim > 2: raise ValueError() x = T.zeros((n_samples, self.dim_out)).astype(floatX) z = T.zeros((n_samples, self.dim_out,)).astype(floatX) + self.bar[None, :] z = z[None, :, :] + c[:, None, :] z = z.reshape((z.shape[0] * z.shape[1], z.shape[2])) rs = self.trng.uniform((self.dim_out, z.shape[0]), dtype=floatX) def _step_sample(i, W_i, r_i, z): p_i = T.nnet.sigmoid(z[:, i]) x_i = (r_i <= p_i).astype(floatX) z += T.outer(x_i, W_i) return z, x_i, p_i seqs = [T.arange(self.dim_out), self.War, rs] outputs_info = [z, None, None] non_seqs = [] (zs, x, p), updates = scan(_step_sample, seqs, outputs_info, non_seqs, self.dim_out, name='darn_sample') if c.ndim == 1: x = x.T[None, :, :] p = p.T[None, :, :] else: x = x.T x = x.reshape((n_samples, x.shape[0] // n_samples, x.shape[1])) p = p.T p = p.reshape((n_samples, p.shape[0] // n_samples, p.shape[1])) if return_probs: return p, updates else: return x, updates
def erase(memory_t_1, weight_t, eraser_t): ''' :param memory_t_1: :param weight_t: :param eraser_t: :return: ''' memory = memory_t_1 - T.outer(eraser_t, weight_t) # memory = memory_t_1 * (1 - weight_t * eraser_t) return memory
def add(_memory_t, weight_t, adder_t): ''' :param _memory_t: :param weight_t: :param adder_t: :return: ''' memory_t = _memory_t + T.outer(adder_t, weight_t) # memory_t = _memory_t + weight_t * adder_t return memory_t
def times_reflection_sub(input, n_hidden, n_sub, reflection): #print "n_hidden=%d, n_sub=%d" % (n_hidden,n_sub) input_re = input[:, :n_hidden] input_im = input[:, n_hidden:] n_start=n_hidden-n_sub #print "n_start=%d" % n_start reflect_re = reflection[n_start:n_hidden] reflect_im = reflection[(n_hidden+n_start):] vstarv = (reflect_re**2).sum() + (reflect_im**2).sum() input_re_reflect_re = T.dot(input_re[:,n_start:], reflect_re) input_re_reflect_im = T.dot(input_re[:,n_start:], reflect_im) input_im_reflect_re = T.dot(input_im[:,n_start:], reflect_re) input_im_reflect_im = T.dot(input_im[:,n_start:], reflect_im) a = T.outer(input_re_reflect_re - input_im_reflect_im, reflect_re) b = T.outer(input_re_reflect_im + input_im_reflect_re, reflect_im) c = T.outer(input_re_reflect_re - input_im_reflect_im, reflect_im) d = T.outer(input_re_reflect_im + input_im_reflect_re, reflect_re) output = input output = T.inc_subtensor(output[:, n_start:n_hidden], - 2. / vstarv * (a + b)) output = T.inc_subtensor(output[:, (n_hidden+n_start):], - 2. / vstarv * (d - c)) return output
def build_model(tparams, options): # for training p = tensor.tensor3('p', dtype=config.floatX) # Problems, n_sizes * n_samples * data_dim p_mask = tensor.matrix('p_mask', dtype=config.floatX) x = tensor.matrix('x', dtype='int64') # n_steps * n_samples x_mask = tensor.matrix('x_mask', dtype=config.floatX) y = tensor.matrix('y', dtype='int64') # n_steps * n_samples y_mask = tensor.matrix('y_mask', dtype=config.floatX) # for generation hidi = tensor.matrix('hidi', dtype=config.floatX) celi = tensor.matrix('celi', dtype=config.floatX) hids = tensor.tensor3('hids', dtype=config.floatX) xi = tensor.vector('xi', dtype='int64') xi_mask = tensor.vector('xi_mask', dtype=config.floatX) n_steps = x.shape[0] n_samples = x.shape[1] preds, f_encode, f_decode, f_probi = ptr_network(tparams, p, p_mask, x, x_mask, xi, xi_mask, hidi, celi, hids, options) idx_steps = tensor.outer(tensor.arange(n_steps, dtype='int64'), tensor.ones((n_samples,), dtype='int64')) idx_samples = tensor.outer(tensor.ones((n_steps,), dtype='int64'), tensor.arange(n_samples, dtype='int64')) probs = preds[idx_steps, y, idx_samples] # probs *= y_mask off = 1e-8 if probs.dtype == 'float16': off = 1e-6 # probs += (1 - y_mask) # change unmasked position to 1, since log(1) = 0 probs += off # probs_printed = theano.printing.Print('this is probs')(probs) cost = -tensor.log(probs) cost *= y_mask cost = cost.sum(axis=0) / y_mask.sum(axis=0) cost = cost.mean() return p, p_mask, x, x_mask, y, y_mask, preds, cost, f_encode, f_decode, f_probi
def output_probabilistic(self, mx_previous, vx_previous): # create place holders mout = [] vout = [] # compute the psi terms psi0 = self.kern.compute_psi0_theano( self.ls, self.sf, mx_previous, vx_previous ) for d in range(self.Dout): psi1 = self.kern.compute_psi1_theano( self.ls, self.sf, mx_previous, vx_previous, self.zu[d] ) psi1psi1T = T.outer(psi1, psi1.T) psi2 = self.kern.compute_psi2_theano( self.ls, self.sf, mx_previous, vx_previous, self.zu[d] ) # precompute some terms psi1Kinv = T.dot(psi1, self.Kuuinv[d]) Kinvpsi2 = T.dot(self.Kuuinv[d], psi2) Kinvpsi2Kinv = T.dot(Kinvpsi2, self.Kuuinv[d]) vconst = T.exp(2.0 * self.sn) + (psi0 - Talg.trace(Kinvpsi2)) mud = self.mu[d] Sud = self.Su[d] moutd = T.sum(T.dot(psi1Kinv, mud)) mout.append(moutd) Splusmm = Sud + T.outer(mud, mud) voutd = vconst + Talg.trace(T.dot(Splusmm, Kinvpsi2Kinv)) - moutd ** 2 vout.append(T.sum(voutd)) return mout, vout
def output_probabilistic_sep(self, mx_previous, vx_previous): # create place holders mout = [] vout = [] # compute the psi0 term psi0 = self.kern.compute_psi0_theano( self.ls, self.sf, mx_previous, vx_previous ) for d in range(self.Dout): # compute the psi1 and psi2 term psi1 = self.kern.compute_psi1_theano( self.ls, self.sf, mx_previous, vx_previous, self.zu[d] ) psi1psi1T = T.outer(psi1, psi1.T) psi2 = self.kern.compute_psi2_theano( self.ls, self.sf, mx_previous, vx_previous, self.zu[d] ) # precompute some terms psi1Kinv = T.dot(psi1, self.Kuuinv[d]) Kinvpsi2 = T.dot(self.Kuuinv[d], psi2) Kinvpsi2Kinv = T.dot(Kinvpsi2, self.Kuuinv[d]) vconst = T.exp(2 * self.sn) + (psi0 - Talg.trace(Kinvpsi2)) mud = self.muhat[d] Sud = self.Suhat[d] moutd = T.sum(T.dot(psi1Kinv, mud)) mout.append(moutd) Splusmm = Sud + T.outer(mud, mud) voutd = vconst + Talg.trace(T.dot(Splusmm, Kinvpsi2Kinv)) - moutd ** 2 vout.append(T.sum(voutd)) return mout, vout
def grad(self, inputs, gradients): """ Cholesky decomposition reverse-mode gradient update. Symbolic expression for reverse-mode Cholesky gradient taken from [0]_ References ---------- .. [0] I. Murray, "Differentiation of the Cholesky decomposition", http://arxiv.org/abs/1602.07527 """ x = inputs[0] dz = gradients[0] chol_x = self(x) # deal with upper triangular by converting to lower triangular if not self.lower: chol_x = chol_x.T dz = dz.T def tril_and_halve_diagonal(mtx): """Extracts lower triangle of square matrix and halves diagonal.""" return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.) def conjugate_solve_triangular(outer, inner): """Computes L^{-T} P L^{-1} for lower-triangular L.""" return solve_upper_triangular( outer.T, solve_upper_triangular(outer.T, inner.T).T) s = conjugate_solve_triangular( chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) if self.lower: return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))] else: return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
def grad(self, inputs, output_gradients): """ Reverse-mode gradient updates for matrix solve operation c = A \ b. Symbolic expression for updates taken from [1]_. References ---------- ..[1] M. B. Giles, "An extended collection of matrix derivative results for forward and reverse mode automatic differentiation", http://eprints.maths.ox.ac.uk/1079/ """ A, b = inputs c = self(A, b) c_bar = output_gradients[0] trans_map = { 'lower_triangular': 'upper_triangular', 'upper_triangular': 'lower_triangular' } trans_solve_op = Solve( # update A_structure and lower to account for a transpose operation A_structure=trans_map.get(self.A_structure, self.A_structure), lower=not self.lower ) b_bar = trans_solve_op(A.T, c_bar) # force outer product if vector second input A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T) if self.A_structure == 'lower_triangular': A_bar = tensor.tril(A_bar) elif self.A_structure == 'upper_triangular': A_bar = tensor.triu(A_bar) return [A_bar, b_bar]
def kron(a, b): """ Kronecker product. Same as scipy.linalg.kron(a, b). Parameters ---------- a: array_like b: array_like Returns ------- array_like with a.ndim + b.ndim - 2 dimensions Notes ----- numpy.kron(a, b) != scipy.linalg.kron(a, b)! They don't have the same shape and order when a.ndim != b.ndim != 2. """ a = tensor.as_tensor_variable(a) b = tensor.as_tensor_variable(b) if (a.ndim + b.ndim <= 2): raise TypeError('kron: inputs dimensions must sum to 3 or more. ' 'You passed %d and %d.' % (a.ndim, b.ndim)) o = tensor.outer(a, b) o = o.reshape(tensor.concatenate((a.shape, b.shape)), a.ndim + b.ndim) shf = o.dimshuffle(0, 2, 1, * list(range(3, o.ndim))) if shf.ndim == 3: shf = o.dimshuffle(1, 0, 2) o = shf.flatten() else: o = shf.reshape((o.shape[0] * o.shape[2], o.shape[1] * o.shape[3]) + tuple(o.shape[i] for i in xrange(4, o.ndim))) return o
def test_outer(self): f = self.function([self.x, self.y], tensor.outer(self.x, self.y)) self.assertFunctionContains(f, ScipyGer(destructive=True))
def test_A_plus_scaled_outer(self): f = self.function([self.A, self.x, self.y], self.A + 0.1 * tensor.outer(self.x, self.y)) self.assertFunctionContains(f, ScipyGer(destructive=False)) self.run_f(f) # DebugMode tests correctness
def test_scaled_A_plus_scaled_outer(self): f = self.function([self.A, self.x, self.y], 0.2 * self.A + 0.1 * tensor.outer(self.x, self.y)) self.assertFunctionContains(f, gemm_no_inplace) self.run_f(f) # DebugMode tests correctness
def test_outer(self): f = self.function([self.x, self.y], T.outer(self.x, self.y)) self.assertFunctionContains(f, self.ger_destructive) f(numpy.random.rand(5).astype(self.dtype), numpy.random.rand(4).astype(self.dtype))
def test_A_plus_outer(self): f = self.function([self.A, self.x, self.y], self.A + T.outer(self.x, self.y)) self.assertFunctionContains(f, self.ger) f(numpy.random.rand(5, 4).astype(self.dtype), numpy.random.rand(5).astype(self.dtype), numpy.random.rand(4).astype(self.dtype)) f(numpy.random.rand(5, 4).astype(self.dtype)[::-1, ::-1], numpy.random.rand(5).astype(self.dtype), numpy.random.rand(4).astype(self.dtype))
def test_A_plus_scaled_outer(self): f = self.function([self.A, self.x, self.y], self.A + 0.1 * T.outer(self.x, self.y)) self.assertFunctionContains(f, self.ger) f(numpy.random.rand(5, 4).astype(self.dtype), numpy.random.rand(5).astype(self.dtype), numpy.random.rand(4).astype(self.dtype)) f(numpy.random.rand(5, 4).astype(self.dtype)[::-1, ::-1], numpy.random.rand(5).astype(self.dtype), numpy.random.rand(4).astype(self.dtype))
def given_dtype(self, dtype, M, N): """ test corner case shape and dtype""" f = self.function([self.A, self.x, self.y], self.A + 0.1 * T.outer(self.x, self.y)) self.assertFunctionContains(f, self.ger) f(numpy.random.rand(M, N).astype(self.dtype), numpy.random.rand(M).astype(self.dtype), numpy.random.rand(N).astype(self.dtype)) f(numpy.random.rand(M, N).astype(self.dtype)[::-1, ::-1], numpy.random.rand(M).astype(self.dtype), numpy.random.rand(N).astype(self.dtype))
def test_inplace(self): A = self.shared(numpy.random.rand(4, 5).astype(self.dtype)) f = self.function([self.x, self.y], [], updates=[(A, A + T.constant(0.1, dtype=self.dtype) * T.outer(self.x, self.y))]) self.assertFunctionContains(f, self.ger_destructive) f(numpy.random.rand(4).astype(self.dtype), numpy.random.rand(5).astype(self.dtype)) A.set_value( A.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(numpy.random.rand(4).astype(self.dtype), numpy.random.rand(5).astype(self.dtype))
def test_optimization_pipeline(self): f = self.function([self.x, self.y], tensor.outer(self.x, self.y)) self.assertFunctionContains(f, CGer(destructive=True)) f(self.xval, self.yval) # DebugMode tests correctness
def test_optimization_pipeline_float(self): self.setUp('float32') f = self.function([self.x, self.y], tensor.outer(self.x, self.y)) self.assertFunctionContains(f, CGer(destructive=True)) f(self.xval, self.yval) # DebugMode tests correctness
def test_A_plus_outer(self): f = self.function([self.A, self.x, self.y], self.A + tensor.outer(self.x, self.y)) self.assertFunctionContains(f, CGer(destructive=False)) self.run_f(f) # DebugMode tests correctness
def test_A_plus_scaled_outer(self): f = self.function([self.A, self.x, self.y], self.A + 0.1 * tensor.outer(self.x, self.y)) self.assertFunctionContains(f, CGer(destructive=False)) self.run_f(f) # DebugMode tests correctness
def test_outer(self): for m in range(4): for n in range(4): x = tensor.tensor(dtype='floatX', broadcastable=(False,) * m) y = tensor.tensor(dtype='floatX', broadcastable=(False,) * n) s1 = numpy.random.randint(1, 10, m) s2 = numpy.random.randint(1, 10, n) v1 = numpy.asarray(numpy.random.rand(*s1)).astype(floatX) v2 = numpy.asarray(numpy.random.rand(*s2)).astype(floatX) o = tensor.outer(x, y).eval({x: v1, y: v2}) assert_allclose(o, numpy.outer(v1, v2))
def grad(self, inputs, cost_grad): """ In defining the gradient, the Finite Fourier Transform is viewed as a complex-differentiable function of a complex variable """ a = inputs[0] n = inputs[1] axis = inputs[2] grad = cost_grad[0] if not isinstance(axis, tensor.TensorConstant): raise NotImplementedError('%s: gradient is currently implemented' ' only for axis being a Theano constant' % self.__class__.__name__) axis = int(axis.data) # notice that the number of actual elements in wrto is independent of # possible padding or truncation: elem = tensor.arange(0, tensor.shape(a)[axis], 1) # accounts for padding: freq = tensor.arange(0, n, 1) outer = tensor.outer(freq, elem) pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1. * n)) res = tensor.tensordot(grad, pow_outer, (axis, 0)) # This would be simpler but not implemented by theano: # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]), # tensor.set_subtensor(res[...,n::], 0, False, False), res) # Instead we resort to that to account for truncation: flip_shape = list(numpy.arange(0, a.ndim)[::-1]) res = res.dimshuffle(flip_shape) res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]), tensor.set_subtensor(res[n::, ], 0, False, False), res) res = res.dimshuffle(flip_shape) # insures that gradient shape conforms to input shape: out_shape = list(numpy.arange(0, axis)) + [a.ndim - 1] +\ list(numpy.arange(axis, a.ndim - 1)) res = res.dimshuffle(*out_shape) return [res, None, None]
def _get_log_partition_func(dim, nparams): np1, np2, np3, np4 = nparams idxs = np.arange(dim) + 1 W = T.nlinalg.matrix_inverse(np1 - (1./np3) * T.outer(np2, np2)) log_Z = .5 * (np4 + dim) * T.log(T.nlinalg.det(W)) log_Z += .5 * (np4 + dim) * dim * np.log(2) log_Z += .5 * dim * (dim - 4) log_Z += T.sum(T.gammaln(.5 * (np4 + dim + 1 - idxs))) log_Z += -.5 * dim * T.log(np3) return log_Z, theano.function([], log_Z)
def SFM(tparams, x, omega, opts): nsteps = x.shape[0] def _recurrence(x_, t_, Re_s_, Im_s_, z_): f_ste = T.nnet.sigmoid(T.dot(tparams['W_ste'], z_)+T.dot(tparams['V_ste'], x_)+tparams['b_ste']) f_fre = T.nnet.sigmoid(T.dot(tparams['W_fre'], z_)+T.dot(tparams['V_fre'], x_)+tparams['b_fre']) f = T.outer(f_ste, f_fre) g = T.nnet.sigmoid(T.dot(tparams['W_g'], z_)+T.dot(tparams['V_g'], x_)+tparams['b_g']) i = T.tanh(T.dot(tparams['W_i'], z_)+T.dot(tparams['V_i'], x_)+tparams['b_i']) Re_s = f*Re_s_+T.outer(g*i, T.cos(omega*t_)) Im_s = f*Im_s_+T.outer(g*i, T.sin(omega*t_)) A = T.sqrt(Re_s**2+Im_s**2) def __feq(U_o, W_o, V_o, b_o, W_z, b_z, A_k, z_k): o = T.nnet.sigmoid(T.dot(U_o, A_k)+T.dot(W_o, z_)+T.dot(V_o, x_)+b_o) zz = z_k+o*T.tanh(T.dot(W_z, A_k)+b_z) return zz res, upd = theano.scan(__feq, sequences=[tparams['U_o'], tparams['W_o'], tparams['V_o'], tparams['b_o'], tparams['W_z'], tparams['b_z'], A.transpose()], outputs_info=[T.zeros_like(z_)], name='__feq', n_steps=omega.shape[0]) return Re_s, Im_s, res[-1] rval, updates = theano.scan(_recurrence, sequences=[x, (T.arange(nsteps)+1)/nsteps], outputs_info=[T.zeros((opts['dim'], opts['dim_feq'])), T.zeros((opts['dim'], opts['dim_feq'])), T.zeros((opts['dim_pitch'],))], name='MFO_SFM', n_steps=nsteps) return rval[2]
def Adaptive_SFM(tparams, x, omega, opts): nsteps = x.shape[0] def _recurrence(x_, t_, omg_, Re_s_, Im_s_, z_): f_ste = T.nnet.sigmoid(T.dot(tparams['W_ste'], z_)+T.dot(tparams['V_ste'], x_)+tparams['b_ste']) f_fre = T.nnet.sigmoid(T.dot(tparams['W_fre'], z_)+T.dot(tparams['V_fre'], x_)+tparams['b_fre']) f = T.outer(f_ste, f_fre) g = T.nnet.sigmoid(T.dot(tparams['W_g'], z_)+T.dot(tparams['V_g'], x_)+tparams['b_g']) i = T.tanh(T.dot(tparams['W_i'], z_)+T.dot(tparams['V_i'], x_)+tparams['b_i']) omg = T.dot(tparams['W_omg'], z_)+T.dot(tparams['V_omg'], x_)+tparams['b_omg'] Re_s = f*Re_s_+T.outer(g*i, T.cos(omg_*t_)) Im_s = f*Im_s_+T.outer(g*i, T.sin(omg_*t_)) A = T.sqrt(Re_s**2+Im_s**2) def __feq(U_o, W_o, V_o, b_o, W_z, b_z, A_k, z_k): o = T.nnet.sigmoid(T.dot(U_o, A_k)+T.dot(W_o, z_)+T.dot(V_o, x_)+b_o) zz = z_k+o*T.tanh(T.dot(W_z, A_k)+b_z) return zz res, upd = theano.scan(__feq, sequences=[tparams['U_o'], tparams['W_o'], tparams['V_o'], tparams['b_o'], tparams['W_z'], tparams['b_z'], A.transpose()], outputs_info=[T.zeros_like(z_)], name='__feq', n_steps=omega.shape[0]) return omg, Re_s, Im_s, res[-1] rval, updates = theano.scan(_recurrence, sequences=[x, (T.arange(nsteps)+1)/nsteps], outputs_info=[T.ones(omega.shape)*omega, T.zeros((opts['dim'], opts['dim_feq'])), T.zeros((opts['dim'], opts['dim_feq'])), T.zeros((opts['dim_pitch'],))], name='MFO_SFM', n_steps=nsteps) return rval[3]
def _get_stds(self): dx, dy, dpp = T.exp(self.params[-3]), T.exp(self.params[-2]), T.exp(self.params[-1]) stdx, stdy = T.outer(dpp, dx), T.outer(dpp, dy) return stdx, stdy
def kldiv_m(self, mu, std_r, std_c): pmu, pstdr, pstdc = self.get_priors() var_r, var_c = T.sqr(std_r), T.sqr(std_c) # first kl term fa = T.sum((1./(pstdc**2)) * var_c)*T.sum((1./(pstdr**2))*var_r) # second kl term prior_sigma = T.outer(T.ones((mu.shape[0],))*(pstdr**2), T.ones((mu.shape[1],))*(pstdc**2)) fb = T.sum(T.sqr(mu - pmu) / prior_sigma) # third kl term fc = mu.shape[1]*(mu.shape[0]*T.log(pstdr**2) - T.sum(T.log(var_r))) + \ mu.shape[0]*(mu.shape[1]*T.log(pstdc**2) - T.sum(T.log(var_c))) return - 0.5 * (fa + fb - T.prod(mu.shape) + fc)
def gen_model(p, p_mask, f_encode, f_probi, options): # p: n_sizes * n_samples * data_dim n_sizes = p.shape[0] n_samples = p.shape[1] if p.ndim == 3 else 1 beam_width = n_sizes # for beam search hprev = f_encode(p_mask, p) # n_sizes * n_samples * data_dim c0 = numpy.zeros((n_samples, options['dim_proj']), dtype=config.floatX) xi = numpy.zeros((n_samples,), dtype='int64') # xi_mask = numpy.zeros((n_samples,), dtype=config.floatX) h, c, probi = f_probi(p_mask[0], xi, hprev[-1], c0, hprev, p_mask, p) # probi n_sizes * n_samples route = -numpy.ones((beam_width, n_samples, n_sizes), dtype='int64') costi = -numpy.log(probi) idx = costi.argsort(axis=0)[:beam_width] # beam_width * n_samples route[:, :, 0] = idx costs = costi[idx, numpy.arange(n_samples)] # tile to beam numbers hprev = numpy.tile(hprev[:, None, :, :], (1, beam_width, 1, 1)) # n_sizes * beam_width * n_samples * dim_proj h = numpy.tile(h[None, :, :], (beam_width, 1, 1)) c = numpy.tile(c[None, :, :], (beam_width, 1, 1)) probi = numpy.tile(probi[:, None, :], (1, beam_width, 1)) # costs = numpy.tile(costs[:, None, :], (1, beam_width, 1)) idr = numpy.tile(numpy.arange(n_sizes), (beam_width, 1)).T.flatten() idc = numpy.tile(numpy.arange(beam_width), (n_sizes, 1)).flatten() ids = numpy.tile(numpy.arange(n_samples)[None, :], (beam_width, 1)) for i in range(1, n_sizes): for b in range(beam_width): # h: beam_width * n_sampels * dim_proj # c: beam_width * n_sampels * dim_proj # probi: n_sizes * beam_width * n_samples h[b], c[b], probi[:, b, :] = f_probi(p_mask[i], idx[b], h[b], c[b], hprev[:, b, :, :], p_mask, p) probi[:, b, :] *= p_mask[i] # set unmasked to 0 probi[:, b, :] += (1 - p_mask[i]) # then set to 1, since log(1) = 0 for calculating cost costi = -numpy.log(probi) # costi: n_sizes * beam_width * n_samples costs = numpy.tile(costs[None, :, :], (n_sizes, 1, 1)) # duplicate costs x n_sizes costu = costi + costs # idb = numpy.outer(numpy.arange(beam_width),numpy.ones((i,))).astype('int64') # idbn = numpy.tile(idb[:,None,:], (1, n_samples, 1)) idbn = numpy.tile(numpy.arange(beam_width)[:, None, None], (1, n_samples, i)) idsn = numpy.tile(numpy.arange(n_samples)[None, :, None], (beam_width, 1, i)) costu[route[:, :, :i], idbn, idsn] = numpy.inf idx = costu.reshape(n_sizes * beam_width, n_samples).argsort(axis=0)[:beam_width] # duplication can be selected h = h[idc[idx], ids, :] c = c[idc[idx], ids, :] route = route[idc[idx], ids, :] route[:, :, i] = idr[idx] costi += costs costs = costi[idr[idx], idc[idx], ids] idx = idr[idx] costs /= numpy.tile((p_mask.sum(axis=0) + numpy.ones(p_mask[0].shape)), (beam_width, 1)) # route: beam_width * n_samples * route # costs: beam_width * n_samples return route, costs
def cmp_ger(self, a_shp, b_shp, c_shp): av = self.rand(*a_shp) bv = self.rand(b_shp) cv = self.rand(c_shp) l = numpy.float32(0.2) a = self.shared(av, 'a') b = self.shared(bv, 'b') c = self.shared(cv, 'c') a_t = self.shared(av.T, 'a.T') a_dev = a.get_value(borrow=False, return_internal_type=True) b_dev = b.get_value(borrow=False, return_internal_type=True) c_dev = c.get_value(borrow=False, return_internal_type=True) f_n = theano.function([], [], updates=[(a, (a + l * tensor.outer(b, c)))], mode=self.mode) f_t = theano.function([], [], updates=[(a_t, (a_t + l * tensor.outer(b, c).T))], mode=self.mode) # Try with all stride patterns, and all transposed patterns for step_signs in itertools_product((1, -1), repeat=4): for step in (1, 2): a_step1, a_step2, b_step, c_step = (s * step for s in step_signs) a.set_value(a_dev.copy()[::a_step1, ::a_step2], borrow=True) a_t.set_value(transpose(a_dev.copy())[::a_step1, ::a_step2], borrow=True) b.set_value(b_dev.copy()[::b_step], borrow=True) c.set_value(c_dev.copy()[::c_step], borrow=True) f_n() n_n = (av[::a_step1, ::a_step2] + l * numpy.outer(bv[::b_step], cv[::c_step])) assert numpy.allclose(a.get_value(), n_n), (a.get_value(), n_n) f_t() n_t = (av.T[::a_step1, ::a_step2] + l * numpy.outer(bv[::b_step], cv[::c_step]).T) assert numpy.allclose(a_t.get_value(), n_t),\ (a_t.get_value(), n_t)