Python numpy 模块,fill_diagonal() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.fill_diagonal()。
def layout_tree(correlation):
"""Layout tree for visualization with e.g. matplotlib.
Args:
correlation: A [V, V]-shaped numpy array of latent correlations.
Returns:
A [V, 3]-shaped numpy array of spectral positions of vertices.
"""
assert len(correlation.shape) == 2
assert correlation.shape[0] == correlation.shape[1]
assert correlation.dtype == np.float32
laplacian = -correlation
np.fill_diagonal(laplacian, 0)
np.fill_diagonal(laplacian, -laplacian.sum(axis=0))
evals, evects = scipy.linalg.eigh(laplacian, eigvals=[1, 2, 3])
assert np.all(evals > 0)
assert evects.shape[1] == 3
return evects
def test_diagonal_mpa(nr_sites, local_dim, _, rgen, dtype):
randfunc = factory._randfuncs[dtype]
entries = randfunc((local_dim,), randstate=rgen)
mpa_mp = factory.diagonal_mpa(entries, nr_sites)
if nr_sites > 1:
mpa_np = np.zeros((local_dim,) * nr_sites, dtype=dtype)
np.fill_diagonal(mpa_np, entries)
else:
mpa_np = entries
assert len(mpa_mp) == nr_sites
assert mpa_mp.dtype is dtype
assert_array_almost_equal(mpa_mp.to_array(), mpa_np)
assert_correct_normalization(mpa_mp, nr_sites - 1, nr_sites)
if nr_sites > 1:
assert max(mpa_mp.ranks) == local_dim
def __parse_pairs__(self, filepath, delimiter = ',', target_col = 2, column_names = list(), sequence_length = None):
assert("target" in column_names)
with open(filepath, "r") as f:
lines = f.readlines()
try:
if sequence_length is None:
dataframe = pd.read_csv(filepath, sep = delimiter, skip_blank_lines = True,
header = None, names = column_names, index_col = False)
sequence_length = np.asarray(dataframe[["i", "j"]]).max()
except ValueError:
return None
data = np.full((sequence_length, sequence_length), np.nan, dtype = np.double)
np.fill_diagonal(data, Params.DISTANCE_WITH_ITSELF)
for line in lines:
elements = line.rstrip("\r\n").split(delimiter)
i, j, k = int(elements[0]) - 1, int(elements[1]) - 1, float(elements[target_col])
data[i, j] = data[j, i] = k
if np.isnan(data).any():
# sequence_length is wrong or the input file has missing pairs
warnings.warn("Warning: Pairs of residues are missing from the contacts text file")
warnings.warn("Number of missing pairs: %i " % np.isnan(data).sum())
return data
def getW(D, K, Mu = 0.5):
"""
Return affinity matrix
[1] Wang, Bo, et al. "Similarity network fusion for aggregating data types on a genomic scale."
Nature methods 11.3 (2014): 333-337.
:param D: Self-similarity matrix
:param K: Number of nearest neighbors
"""
#W(i, j) = exp(-Dij^2/(mu*epsij))
DSym = 0.5*(D + D.T)
np.fill_diagonal(DSym, 0)
Neighbs = np.partition(DSym, K+1, 1)[:, 0:K+1]
MeanDist = np.mean(Neighbs, 1)*float(K+1)/float(K) #Need this scaling
#to exclude diagonal element in mean
#Equation 1 in SNF paper [1] for estimating local neighborhood radii
#by looking at k nearest neighbors, not including point itself
Eps = MeanDist[:, None] + MeanDist[None, :] + DSym
Eps = Eps/3
W = np.exp(-DSym**2/(2*(Mu*Eps)**2))
return W
def jaccard_similarity_weighted(F, fill_diagonal=True):
assert F.format == 'csr'
if not F.has_sorted_indices:
F.sort_indices()
ind = F.indices
ptr = F.indptr
dat = F.data.astype(np.float64, copy=False) # dtype needed for jaccard computation
shift = 1 if fill_diagonal else 0
data, rows, cols = _jaccard_similarity_weighted_tri(dat, ind, ptr, shift)
S = sp.sparse.coo_matrix((data, (rows, cols)), shape=(F.shape[0],)*2).tocsc()
S += S.T # doubles diagonal values if fill_diagonal is False
if fill_diagonal:
set_diagonal_values(S, 1)
else:
set_diagonal_values(S, np.sign(S.diagonal())) # set to 1, preserve zeros
return S
def select_negtive(self, i_feat, s_feat, sess, topN=50):
'''
Select the triplets with the largest losses \n
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
'''
feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
S = np.matmul(i_embed, s_embed.T)
i_feat_pos = i_feat.repeat(topN, axis=0)
s_feat_pos = s_feat.repeat(topN, axis=0)
N = S.shape[0]
np.fill_diagonal(S, -2*np.ones(N))
neg_s_idx = S.argsort(axis=1)[:, -topN:]
neg_i_idx = S.argsort(axis=0)[-topN:, :]
s_feat_neg = s_feat[neg_s_idx.flatten('C')]
i_feat_neg = i_feat[neg_i_idx.flatten('F')]
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
def select_negtive(self, i_feat, s_feat, sess, topN=50):
'''
Select the triplets with the largest losses \n
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
'''
feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
S = np.matmul(i_embed, s_embed.T)
i_feat_pos = i_feat.repeat(topN, axis=0)
s_feat_pos = s_feat.repeat(topN, axis=0)
N = S.shape[0]
np.fill_diagonal(S, -2*np.ones(N))
neg_s_idx = S.argsort(axis=1)[:, -topN:]
neg_i_idx = S.argsort(axis=0)[-topN:, :]
s_feat_neg = s_feat[neg_s_idx.flatten('C')]
i_feat_neg = i_feat[neg_i_idx.flatten('F')]
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
def compute_sims(inputs: mx.nd.NDArray, normalize: bool) -> mx.nd.NDArray:
"""
Returns a matrix with pair-wise similarity scores between inputs.
Similarity score is (normalized) Euclidean distance. 'Similarity with self' is masked
to large negative value.
:param inputs: NDArray of inputs.
:param normalize: Whether to normalize to unit-length.
:return: NDArray with pairwise similarities of same shape as inputs.
"""
if normalize:
logger.info("Normalizing embeddings to unit length")
inputs = mx.nd.L2Normalization(inputs, mode='instance')
sims = mx.nd.dot(inputs, inputs, transpose_b=True)
sims_np = sims.asnumpy()
np.fill_diagonal(sims_np, -9999999.)
sims = mx.nd.array(sims_np)
return sims
def sharpenOld(s, kernelFunc, dist=None, scale=None,
normalize=False, m1=False, *args, **kwargs):
s = util.colmat(s)
if dist is None:
dist = np.arange(s.shape[1])+1.0
dist = np.abs(dist[None,:]-dist[:,None])
#dist = np.insert(spsig.triang(s.shape[1]-1, sym=False), 0, 0.0)
#dist = np.vstack([np.roll(dist, i) for i in xrange(dist.size)])
if scale is None:
# minimum off-diagonal distance
scale = np.min(dist[np.asarray(1.0-np.eye(dist.shape[0]), dtype=np.bool)])
kernel = kernelFunc(dist.T/scale, *args, **kwargs)
if m1:
np.fill_diagonal(kernel, 0.0)
if normalize:
kernel = kernel/np.abs(kernel.sum(axis=0))
return s - s.dot(kernel)
def load_data(filename):
df = pd.read_csv(filename, compression='zip')
selected = ['Category', 'Descript']
non_selected = list(set(df.columns) - set(selected))
df = df.drop(non_selected, axis=1)
df = df.dropna(axis=0, how='any', subset=selected)
df = df.reindex(np.random.permutation(df.index))
labels = sorted(list(set(df[selected[0]].tolist())))
num_labels = len(labels)
one_hot = np.zeros((num_labels, num_labels), int)
np.fill_diagonal(one_hot, 1)
label_dict = dict(zip(labels, one_hot))
x_raw= df[selected[1]].apply(lambda x: clean_str(x).split(' ')).tolist()
y_raw = df[selected[0]].apply(lambda y: label_dict[y]).tolist()
x_raw = pad_sentences(x_raw)
vocabulary, vocabulary_inv = build_vocab(x_raw)
x = np.array([[vocabulary[word] for word in sentence] for sentence in x_raw])
y = np.array(y_raw)
return x, y, vocabulary, vocabulary_inv, df, labels
def load_test_data(test_file, labels):
df = pd.read_csv(test_file, sep='|')
select = ['Descript']
df = df.dropna(axis=0, how='any', subset=select)
test_examples = df[select[0]].apply(lambda x: data_helper.clean_str(x).split(' ')).tolist()
num_labels = len(labels)
one_hot = np.zeros((num_labels, num_labels), int)
np.fill_diagonal(one_hot, 1)
label_dict = dict(zip(labels, one_hot))
y_ = None
if 'Category' in df.columns:
select.append('Category')
y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist()
not_select = list(set(df.columns) - set(select))
df = df.drop(not_select, axis=1)
return test_examples, y_, df
def select_negtive(self, i_feat, s_feat, sess, topN=50):
'''
Select the triplets with the largest losses \n
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
'''
feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
S = np.matmul(i_embed, s_embed.T)
i_feat_pos = i_feat.repeat(topN, axis=0)
s_feat_pos = s_feat.repeat(topN, axis=0)
N = S.shape[0]
np.fill_diagonal(S, -2*np.ones(N))
neg_s_idx = S.argsort(axis=1)[:, -topN:]
neg_i_idx = S.argsort(axis=0)[-topN:, :]
s_feat_neg = s_feat[neg_s_idx.flatten('C')]
i_feat_neg = i_feat[neg_i_idx.flatten('F')]
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
def select_negtive(self, i_feat, s_feat, sess, topN=50):
'''
Select the triplets with the largest losses \n
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
'''
feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
S = np.matmul(i_embed, s_embed.T)
i_feat_pos = i_feat.repeat(topN, axis=0)
s_feat_pos = s_feat.repeat(topN, axis=0)
N = S.shape[0]
np.fill_diagonal(S, -2*np.ones(N))
neg_s_idx = S.argsort(axis=1)[:, -topN:]
neg_i_idx = S.argsort(axis=0)[-topN:, :]
s_feat_neg = s_feat[neg_s_idx.flatten('C')]
i_feat_neg = i_feat[neg_i_idx.flatten('F')]
return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
def get_gcovmat(h2, rg):
"""
Args: h2: vector with SNP heritabilities
rg: vector with genetic correlations
Returns: numpy trait by trait array with h2 on diagonal and genetic covariance on offdiagnoals
"""
mat = numpy.zeros((len(h2), len(h2)))
mat[numpy.triu_indices(len(h2), 1)] = rg
mat = mat + mat.T
mat = mat * numpy.sqrt(numpy.outer(h2, h2))
numpy.fill_diagonal(mat, h2)
return numpy.array(mat)
# When input files are score files, not beta files, mtot may be unknown.
# Here mtot=1e6 is assumed. The absolute value of the expected variances for each trait is irrelevant for the multi-trait weighting, so it doesn't matter too much what this value is, expecially if M > N.
def ols_weights(n, h2, rg, mtot=1e6):
"""
Args: n: vector with sample size for each trait
h2: vector with SNP heritabilities
rg: vector with rg for each pair of traits (3 traits: 1,2; 1,3; 2,3)
mtot: total number of markers (doesn't change result much)
Returns: ntraits * ntraits array with ols weights. weights in each row are for are for a multi-trait predictor of the trait in this row
"""
ntraits = len(n)
gcovmat = get_gcovmat(h2, rg)
print(gcovmat)
V = gcovmat / mtot
numpy.fill_diagonal(V, ols_variances(n, h2, mtot))
C = gcovmat / mtot
weights = numpy.zeros([ntraits, ntraits])
for i in range(ntraits):
nonzero = V[i,] != 0
Vi = V[numpy.array(numpy.where(nonzero)[0])[:, None], nonzero]
Vinv = numpy.linalg.inv(Vi)
weights[i, nonzero] = numpy.dot(Vinv, C[i, nonzero])
print(weights)
return weights
def __get_prolongation_matrix(ndofs_coarse, ndofs_fine):
"""Helper routine for the prolongation operator
Args:
ndofs_fine (int): number of DOFs on the fine grid
ndofs_coarse (int): number of DOFs on the coarse grid
Returns:
scipy.sparse.csc_matrix: sparse prolongation matrix of size
`ndofs_fine` x `ndofs_coarse`
"""
# This is a workaround, since I am not aware of a suitable way to do
# this directly with sparse matrices.
P = np.zeros((ndofs_fine, ndofs_coarse))
np.fill_diagonal(P[1::2, :], 1)
np.fill_diagonal(P[0::2, :], 1.0/2.0)
np.fill_diagonal(P[2::2, :], 1.0/2.0)
return sp.csc_matrix(P)
def _select_target_neighbors(self):
"""Find the target neighbors of each sample, that stay fixed during training.
Returns
-------
array_like
An array of neighbors indices for each sample with shape (n_samples, n_neighbors).
"""
self.logger.info('Finding target neighbors...')
target_neighbors = np.empty((self.X_.shape[0], self.n_neighbors_), dtype=int)
for class_ in self.classes_:
class_ind, = np.where(np.equal(self.y_, class_))
dist = euclidean_distances(self.X_[class_ind], squared=True)
np.fill_diagonal(dist, np.inf)
neigh_ind = np.argpartition(dist, self.n_neighbors_ - 1, axis=1)
neigh_ind = neigh_ind[:, :self.n_neighbors_]
# argpartition doesn't guarantee sorted order, so we sort again but only the k neighbors
row_ind = np.arange(len(class_ind))[:, None]
neigh_ind = neigh_ind[row_ind, np.argsort(dist[row_ind, neigh_ind])]
target_neighbors[class_ind] = class_ind[neigh_ind]
return target_neighbors
def set_diagonal(G, val=0):
"""
Generally diagonal is set to 0. This function helps set the diagonal across time.
**PARAMETERS**
:G: temporal network (graphlet)
:val: value to set diagonal to (default 0).
**OUTPUT**
:G: Graphlet representation of G with new diagonal
**HISTORY**
:Modified: Dec 2016, WHT (documentation)
:Created: Nov 2016, WHT
"""
for t in range(0, G.shape[2]):
np.fill_diagonal(G[:, :, t], val)
return G
def newScore(movie):
critic_num = len(token_dict[movie["movieTitle"]]["critics"])
N = len(token_dict[movie["movieTitle"]]["reviews"])
C = cosine[movie["movieTitle"]][critic_num:, critic_num:]
R = map(lambda x: x['score'], movie['reviews'])
print C.shape
# exclude self similarity
# np.fill_diagonal(C, 0)
# normalize
row_sums = C.sum(axis=1)
C = C / row_sums[:, np.newaxis]
# calculate new score
new_score = np.dot(C, R)
# update new score
new_review = movie['reviews']
map(lambda x, y: x.update({'newScore': y}), new_review, new_score)
testing = map(lambda x: abs(x['score'] - x['newScore']) < 5, new_review)
print np.sum(testing)
return new_review
def get_masked(self, percent_hole, diag_off=1):
""" Construct a random mask.
Random training set on 20% on Data / debug5 - debug11 -- Unbalanced
"""
data = self.data
if type(data) is np.ndarray:
#self.data_mat = sp.sparse.csr_matrix(data)
pass
else:
raise NotImplementedError('type %s unknow as corpus' % type(data))
n = int(data.size * percent_hole)
mask_index = np.unravel_index(np.random.permutation(data.size)[:n], data.shape)
mask = np.zeros(data.shape, dtype=data.dtype)
mask[mask_index] = 1
if self.is_symmetric():
mask = np.tril(mask) + np.tril(mask, -1).T
data_ma = ma.array(data, mask=mask)
if diag_off == 1:
np.fill_diagonal(data_ma, ma.masked)
return data_ma
def get_masked_zeros(self, diag_off=1):
''' Take out all zeros '''
data = self.data
if type(data) is np.ndarray:
#self.data_mat = sp.sparse.csr_matrix(data)
pass
else:
raise NotImplementedError('type %s unknow as corpus' % type(data))
mask = np.zeros(data.shape, dtype=data.dtype)
mask[data == 0] = 1
if self.is_symmetric():
mask = np.tril(mask) + np.tril(mask, -1).T
data_ma = ma.array(data, mask=mask)
if diag_off == 1:
np.fill_diagonal(data_ma, ma.masked)
return data_ma
def _solve_hessian(G, Y, thY, precon, lambda_min):
N, T = Y.shape
# Compute the derivative of the score
psidY = ne.evaluate('(- thY ** 2 + 1.) / 2.') # noqa
# Build the diagonal of the Hessian, a.
Y_squared = Y ** 2
if precon == 2:
a = np.inner(psidY, Y_squared) / float(T)
elif precon == 1:
sigma2 = np.mean(Y_squared, axis=1)
psidY_mean = np.mean(psidY, axis=1)
a = psidY_mean[:, None] * sigma2[None, :]
diagonal_term = np.mean(Y_squared * psidY) + 1.
a[np.diag_indices_from(a)] = diagonal_term
else:
raise ValueError('precon should be 1 or 2')
# Compute the eigenvalues of the Hessian
eigenvalues = 0.5 * (a + a.T - np.sqrt((a - a.T) ** 2 + 4.))
# Regularize
problematic_locs = eigenvalues < lambda_min
np.fill_diagonal(problematic_locs, False)
i_pb, j_pb = np.where(problematic_locs)
a[i_pb, j_pb] += lambda_min - eigenvalues[i_pb, j_pb]
# Invert the transform
return (G * a.T - G.T) / (a * a.T - 1.)
def grad(self, inp, cost_grad):
"""
Notes
-----
The gradient is currently implemented for matrices only.
"""
a, val = inp
grad = cost_grad[0]
if (a.dtype.startswith('complex')):
return [None, None]
elif a.ndim > 2:
raise NotImplementedError('%s: gradient is currently implemented'
' for matrices only' %
self.__class__.__name__)
wr_a = fill_diagonal(grad, 0) # valid for any number of dimensions
# diag is only valid for matrices
wr_val = theano.tensor.nlinalg.diag(grad).sum()
return [wr_a, wr_val]
def test_perform(self):
x = tensor.matrix()
y = tensor.scalar()
f = function([x, y], fill_diagonal(x, y))
for shp in [(8, 8), (5, 8), (8, 5)]:
a = numpy.random.rand(*shp).astype(config.floatX)
val = numpy.cast[config.floatX](numpy.random.rand())
out = f(a, val)
# We can't use numpy.fill_diagonal as it is bugged.
assert numpy.allclose(numpy.diag(out), val)
assert (out == val).sum() == min(a.shape)
# test for 3d tensor
a = numpy.random.rand(3, 3, 3).astype(config.floatX)
x = tensor.tensor3()
y = tensor.scalar()
f = function([x, y], fill_diagonal(x, y))
val = numpy.cast[config.floatX](numpy.random.rand() + 10)
out = f(a, val)
# We can't use numpy.fill_diagonal as it is bugged.
assert out[0, 0, 0] == val
assert out[1, 1, 1] == val
assert out[2, 2, 2] == val
assert (out == val).sum() == min(a.shape)
def test_perform(self):
x = tensor.matrix()
y = tensor.scalar()
z = tensor.iscalar()
f = function([x, y, z], fill_diagonal_offset(x, y, z))
for test_offset in (-5, -4, -1, 0, 1, 4, 5):
for shp in [(8, 8), (5, 8), (8, 5), (5, 5)]:
a = numpy.random.rand(*shp).astype(config.floatX)
val = numpy.cast[config.floatX](numpy.random.rand())
out = f(a, val, test_offset)
# We can't use numpy.fill_diagonal as it is bugged.
assert numpy.allclose(numpy.diag(out, test_offset), val)
if test_offset >= 0:
assert (out == val).sum() == min(min(a.shape),
a.shape[1] - test_offset)
else:
assert (out == val).sum() == min(min(a.shape),
a.shape[0] + test_offset)
def constant(self):
delta = np.min(self.rho) - 0.01
cormat = np.full((self.nkdim, self.nkdim), delta)
epsilon = 0.99 - np.max(self.rho)
for i in np.arange(self.k):
cor = np.full((self.nk[i], self.nk[i]), self.rho[i])
if i == 0:
cormat[0:self.nk[0], 0:self.nk[0]] = cor
if i != 0:
cormat[np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1]),
np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1])] = cor
np.fill_diagonal(cormat, 1 - epsilon)
cormat = self._generate_noise(cormat, self.nkdim, self.M, epsilon)
return cormat
def toepz(self):
cormat = np.zeros((self.nkdim, self.nkdim))
epsilon = (1 - np.max(self.rho)) / (1 + np.max(self.rho)) - .01
for i in np.arange(self.k):
t = np.insert(np.power(self.rho[i], np.arange(1, self.nk[i])), 0, 1)
cor = toeplitz(t)
if i == 0:
cormat[0:self.nk[0], 0:self.nk[0]] = cor
if i != 0:
cormat[np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1]),
np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1])] = cor
np.fill_diagonal(cormat, 1 - epsilon)
cormat = self._generate_noise(cormat, self.nkdim, self.M, epsilon)
return cormat
def hub(self):
cormat = np.zeros((self.nkdim, self.nkdim))
for i in np.arange(self.k):
cor = toeplitz(self._fill_hub_matrix(self.rho[i,0],self.rho[i,1], self.power, self.nk[i]))
if i == 0:
cormat[0:self.nk[0], 0:self.nk[0]] = cor
if i != 0:
cormat[np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1]),
np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1])] = cor
tau = (np.max(self.rho[i]) - np.min(self.rho[i])) / (self.nk[i] - 2)
epsilon = 0.08 #(1 - np.min(rho) - 0.75 * np.min(tau)) - 0.01
np.fill_diagonal(cormat, 1 - epsilon)
cormat = self._generate_noise(cormat, self.nkdim, self.M, epsilon)
return cormat
def load_data_and_labels():
articles = np.load('data/bin/all_articles.npy')
labels = np.load('data/bin/all_labels.npy')
articles = [clean_str(article) for article in articles]
# Map the actual labels to one hot labels
label_list = sorted(list(set(labels)))
one_hot = np.zeros((len(label_list), len(label_list)), int)
np.fill_diagonal(one_hot, 1)
label_dict = dict(zip(label_list, one_hot))
labels = one_hot_encode(labels, label_dict)
x_raw = articles
y_raw = labels
return x_raw, y_raw, label_list
def fill_diagonal_(mat: T.Tensor, val: T.Scalar) -> T.Tensor:
"""
Fill the diagonal of the matirx with a specified value.
Note:
Modifies mat in place.
Args:
mat: A tensor.
val: The value to put along the diagonal.
Returns:
None
"""
numpy.fill_diagonal(mat, val)
def make_one_hot(X, onehot_size):
"""
DESCRIPTION:
Make a one-hot version of X
PARAM:
X: 1d numpy with each value in X representing the class of X
onehot_size: length of the one hot vector
RETURN:
2d numpy tensor, with each row been the onehot vector
"""
if onehot_size < 450:
dig_one = np.zeros((onehot_size, onehot_size))
np.fill_diagonal(dig_one, 1)
rX = dig_one[np.asarray(X)]
else:
# for large onehot size, this is faster
rX = np.zeros((len(X), onehot_size))
for i in range(len(X)):
rX[i, X[i]] = 1
return rX
def test_binary_perplexity_stability():
# Binary perplexity search should be stable.
# The binary_search_perplexity had a bug wherein the P array
# was uninitialized, leading to sporadically failing tests.
k = 10
n_samples = 100
random_state = check_random_state(0)
distances = random_state.randn(n_samples, 2).astype(np.float32)
# Distances shouldn't be negative
distances = np.abs(distances.dot(distances.T))
np.fill_diagonal(distances, 0.0)
last_P = None
neighbors_nn = np.argsort(distances, axis=1)[:, :k].astype(np.int64)
for _ in range(100):
P = _binary_search_perplexity(distances.copy(), neighbors_nn.copy(),
3, verbose=0)
P1 = _joint_probabilities_nn(distances, neighbors_nn, 3, verbose=0)
if last_P is None:
last_P = P
last_P1 = P1
else:
assert_array_almost_equal(P, last_P, decimal=4)
assert_array_almost_equal(P1, last_P1, decimal=4)
def test_gradient():
# Test gradient of Kullback-Leibler divergence.
random_state = check_random_state(0)
n_samples = 50
n_features = 2
n_components = 2
alpha = 1.0
distances = random_state.randn(n_samples, n_features).astype(np.float32)
distances = distances.dot(distances.T)
np.fill_diagonal(distances, 0.0)
X_embedded = random_state.randn(n_samples, n_components)
P = _joint_probabilities(distances, desired_perplexity=25.0,
verbose=0)
fun = lambda params: _kl_divergence(params, P, alpha, n_samples,
n_components)[0]
grad = lambda params: _kl_divergence(params, P, alpha, n_samples,
n_components)[1]
assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
decimal=5)
def _check_fix_Q(self, fixed_mu=False):
"""
Check the main diagonal of Q and fix it in case it does not corresond
the definition of the rate matrix. Should be run every time when creating
custom GTR model.
"""
# fix Q
self.Pi /= self.Pi.sum() # correct the Pi manually
# NEEDED TO BREAK RATE MATRIX DEGENERACY AND FORCE NP TO RETURN REAL ORTHONORMAL EIGENVECTORS
self.W += self.break_degen + self.break_degen.T
# fix W
np.fill_diagonal(self.W, 0)
Wdiag = -(self.Q).sum(axis=0)/self.Pi
np.fill_diagonal(self.W, Wdiag)
scale_factor = -np.sum(np.diagonal(self.Q)*self.Pi)
self.W /= scale_factor
if not fixed_mu:
self.mu *= scale_factor
if (self.Q.sum(axis=0) < 1e-10).sum() < self.alphabet.shape[0]: # fix failed
print ("Cannot fix the diagonal of the GTR rate matrix. Should be all zero", self.Q.sum(axis=0))
import ipdb; ipdb.set_trace()
raise ArithmeticError("Cannot fix the diagonal of the GTR rate matrix.")
def compute_db_index(matrix, kmeans):
'''
Compute Davies-Bouldin index, a measure of clustering quality.
Faster and possibly more reliable than silhouette score.
'''
(n, m) = matrix.shape
k = kmeans.n_clusters
centers = kmeans.cluster_centers_
labels = kmeans.labels_
centroid_dists = sp_dist.squareform(sp_dist.pdist(centers))
# Avoid divide-by-zero
centroid_dists[np.abs(centroid_dists) < MIN_CENTROID_DIST] = MIN_CENTROID_DIST
wss = np.zeros(k)
counts = np.zeros(k)
for i in xrange(n):
label = labels[i]
# note: this is 2x faster than scipy sqeuclidean
sqdist = np.square(matrix[i,:] - centers[label,:]).sum()
wss[label] += sqdist
counts[label] += 1
# Handle empty clusters
counts[counts == 0] = 1
scatter = np.sqrt(wss / counts)
mixitude = (scatter + scatter[:, np.newaxis]) / centroid_dists
np.fill_diagonal(mixitude, 0.0)
worst_case_mixitude = np.max(mixitude, axis=1)
db_score = worst_case_mixitude.sum() / k
return db_score
def cor2cov(cor, var=None, sd=None, copy=True):
sd = np.sqrt(var) if var is not None else sd
if isinstance(cor, (DiagonalArray, SubdiagonalArray)):
cor = cor.tonumpyarray()
cor = npu.tondim2(cor, copy=copy)
dim = len(var)
assert dim == np.shape(cor)[0] and dim == np.shape(cor)[1]
np.fill_diagonal(cor, 1.)
cor = (sd.T * (sd * cor).T).T
npu.lowertosymmetric(cor, copy=False)
return cor
def find_distance_matrix(self, vector, metric='cosine'):
'''
compute distance matrix between topis using cosine or euclidean
distance (default=cosine distance)
'''
if metric == 'cosine':
distance_matrix = pairwise_distances(vector,
metric='cosine')
# diagonals should be exactly zero, so remove rounding errors
numpy.fill_diagonal(distance_matrix, 0)
if metric == 'euclidean':
distance_matrix = pairwise_distances(vector,
metric='euclidean')
return distance_matrix
def diagonal_mpa(entries, sites):
"""Returns an MPA with ``entries`` on the diagonal and zeros otherwise.
:param numpy.ndarray entries: one-dimensional array
:returns: :class:`~mpnum.mparray.MPArray` with rank ``len(entries)``.
"""
assert sites > 0
if entries.ndim != 1:
raise NotImplementedError("Currently only supports diagonal MPA with "
"one leg per site.")
if sites < 2:
return mp.MPArray.from_array(entries)
ldim = len(entries)
leftmost_ltens = np.eye(ldim).reshape((1, ldim, ldim))
rightmost_ltens = np.diag(entries).reshape((ldim, ldim, 1))
center_ltens = np.zeros((ldim,) * 3)
np.fill_diagonal(center_ltens, 1)
ltens = it.chain((leftmost_ltens,), it.repeat(center_ltens, sites - 2),
(rightmost_ltens,))
return mp.MPArray(LocalTensors(ltens, cform=(sites - 1, sites)))
#########################
# More physical stuff #
#########################
def test_ignore_no_data_ints(self):
arr = np.ones((1, 16, 16), int)
np.fill_diagonal(arr[0], NO_DATA_INT)
tile = Tile(arr, 'INT', NO_DATA_INT)
rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
value_map = {1: 0}
result = raster_rdd.reclassify(value_map, int, replace_nodata_with=1).to_numpy_rdd().first()[1].cells
self.assertTrue((result == np.identity(16, int)).all())
def test_ignore_no_data_floats(self):
arr = np.ones((1, 4, 4))
np.fill_diagonal(arr[0], float('nan'))
tile = Tile(arr, 'FLOAT', float('nan'))
rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
value_map = {1.0: 0.0}
result = raster_rdd.reclassify(value_map, float, replace_nodata_with=1.0).to_numpy_rdd().first()[1].cells
self.assertTrue((result == np.identity(4)).all())
def _update_syn(self, x, eta=0.5):
"""Perform one update of the weights and re-calculate moments in the SYNERGISTIC case."""
m = self.moments
H = (1. / m["X_i^2 | Y"] * m["X_i Z_j"].T).dot(m["X_i Z_j"])
np.fill_diagonal(H, 0)
R = m["X_i Z_j"].T / m["X_i^2 | Y"]
S = np.dot(H, self.ws)
ws = (1. - eta) * self.ws + eta * (R - S)
m = self._calculate_moments_syn(x, ws)
return ws, m
def get_covariance(self):
# This uses E(Xi|Y) formula for non-synergistic relationships
m = self.moments
if self.discourage_overlap:
z = m['rhoinvrho'] / (1 + m['Si'])
cov = np.dot(z.T, z)
cov /= (1. - self.eps**2)
np.fill_diagonal(cov, 1)
return self.theta[1][:, np.newaxis] * self.theta[1] * cov
else:
cov = np.einsum('ij,kj->ik', m["X_i Z_j"], m["X_i Y_j"])
np.fill_diagonal(cov, 1)
return self.theta[1][:, np.newaxis] * self.theta[1] * cov
def seriation(self, A):
n_components = 2
eigen_tol = 0.00001
if sparse.issparse(A):
A = A.todense()
np.fill_diagonal(A, 0)
laplacian, dd = graph_laplacian(A, return_diag=True)
laplacian *= -1
lambdas, diffusion_map = eigsh(laplacian, k=n_components, sigma=1.0, which='LM', tol=eigen_tol)
embedding = diffusion_map.T[n_components::-1] # * dd
sort_index = np.argsort(embedding[1])
return sort_index
def fill_diagonal(a, val, wrap=False):
"""Fills the main diagonal of the given array of any dimensionality.
For an array `a` with ``a.ndim > 2``, the diagonal is the list of
locations with indices ``a[i, i, ..., i]`` all identical. This function
modifies the input array in-place, it does not return a value.
Args:
a (cupy.ndarray): The array, at least 2-D.
val (scalar): The value to be written on the diagonal.
Its type must be compatible with that of the array a.
wrap (bool): If specified, the diagonal is "wrapped" after N columns.
This affects only tall matrices.
Examples
--------
>>> a = cupy.zeros((3, 3), int)
>>> cupy.fill_diagonal(a, 5)
>>> a
array([[5, 0, 0],
[0, 5, 0],
[0, 0, 5]])
.. seealso:: :func:`numpy.fill_diagonal`
"""
# The followings are imported from the original numpy
if a.ndim < 2:
raise ValueError('array must be at least 2-d')
end = None
if a.ndim == 2:
step = a.shape[1] + 1
if not wrap:
end = a.shape[1] * a.shape[1]
else:
if not numpy.alltrue(numpy.diff(a.shape) == 0):
raise ValueError('All dimensions of input must be of equal length')
step = 1 + numpy.cumprod(a.shape[:-1]).sum()
# Since the current cupy does not support a.flat,
# we use a.ravel() instead of a.flat
a.ravel()[:end:step] = val
def knn_initialize(
X,
missing_mask,
verbose=False,
min_dist=1e-6,
max_dist_multiplier=1e6):
"""
Fill X with NaN values if necessary, construct the n_samples x n_samples
distance matrix and set the self-distance of each row to infinity.
Returns contents of X laid out in row-major, the distance matrix,
and an "effective infinity" which is larger than any entry of the
distance matrix.
"""
X_row_major = X.copy("C")
if missing_mask.sum() != np.isnan(X_row_major).sum():
# if the missing values have already been zero-filled need
# to put NaN's back in the data matrix for the distances function
X_row_major[missing_mask] = np.nan
D = all_pairs_normalized_distances(X_row_major)
D_finite_flat = D[np.isfinite(D)]
if len(D_finite_flat) > 0:
max_dist = max_dist_multiplier * max(1, D_finite_flat.max())
else:
max_dist = max_dist_multiplier
# set diagonal of distance matrix to a large value since we don't want
# points considering themselves as neighbors
np.fill_diagonal(D, max_dist)
D[D < min_dist] = min_dist # prevents 0s
D[D > max_dist] = max_dist # prevents infinities
return X_row_major, D, max_dist
def getDiffusionMap(SSM, Kappa, t = -1, includeDiag = True, thresh = 5e-4, NEigs = 51):
"""
:param SSM: Metric between all pairs of points
:param Kappa: Number in (0, 1) indicating a fraction of nearest neighbors
used to autotune neighborhood size
:param t: Diffusion parameter. If -1, do Autotuning
:param includeDiag: If true, include recurrence to diagonal in the markov
chain. If false, zero out diagonal
:param thresh: Threshold below which to zero out entries in markov chain in
the sparse approximation
:param NEigs: The number of eigenvectors to use in the approximation
"""
N = SSM.shape[0]
#Use the letters from the delaPorte paper
K = getW(SSM, int(Kappa*N))
if not includeDiag:
np.fill_diagonal(K, np.zeros(N))
RowSumSqrt = np.sqrt(np.sum(K, 1))
DInvSqrt = sparse.diags([1/RowSumSqrt], [0])
#Symmetric normalized Laplacian
Pp = (K/RowSumSqrt[None, :])/RowSumSqrt[:, None]
Pp[Pp < thresh] = 0
Pp = sparse.csr_matrix(Pp)
lam, X = sparse.linalg.eigsh(Pp, NEigs, which='LM')
lam = lam/lam[-1] #In case of numerical instability
#Check to see if autotuning
if t > -1:
lamt = lam**t
else:
#Autotuning diffusion time
lamt = np.array(lam)
lamt[0:-1] = lam[0:-1]/(1-lam[0:-1])
#Do eigenvector version
V = DInvSqrt.dot(X) #Right eigenvectors
M = V*lamt[None, :]
return M/RowSumSqrt[:, None] #Put back into orthogonal Euclidean coordinates
def coulomb_matrix(self, system):
"""Creates the Coulomb matrix for the given system.
"""
# Calculate offdiagonals
q = system.get_initial_charges()
qiqj = q[None, :]*q[:, None]
idmat = system.get_inverse_distance_matrix()
np.fill_diagonal(idmat, 0)
cmat = qiqj*idmat
# Set diagonal
np.fill_diagonal(cmat, 0.5 * q ** 2.4)
return cmat
def sine_matrix(self, system):
"""Creates the Sine matrix for the given system.
"""
# Cell and inverse cell
B = system.get_cell()
B_inv = system.get_cell_inverse()
# Difference vectors in tensor 3D-tensor-form
diff_tensor = system.get_displacement_tensor()
# Calculate phi
arg_to_sin = np.pi * np.dot(diff_tensor, B_inv)
phi = np.linalg.norm(np.dot(np.sin(arg_to_sin)**2, B), axis=2)
with np.errstate(divide='ignore'):
phi = np.reciprocal(phi)
# Calculate Z_i*Z_j
q = system.get_initial_charges()
qiqj = q[None, :]*q[:, None]
np.fill_diagonal(phi, 0)
# Multiply by charges
smat = qiqj*phi
# Set diagonal
np.fill_diagonal(smat, 0.5 * q ** 2.4)
return smat
def _calc_subsystem_energies(self, ewald_matrix):
"""Modify the give matrix that consists of the eral and reciprocal sums
so that each entry x_ij is the full Ewald sum energy of a system
consisting of atoms i and j.
"""
q = self.q
# Create the self-term array where q1[i,j] is qi**2 + qj**2, except for
# the diagonal, where it is qi**2
q1 = q[None, :]**2 + q[:, None]**2
diag = np.diag(q1)/2
np.fill_diagonal(q1, diag)
q1_prefactor = -self.a/self.sqrt_pi
# Create the charge correction array where q2[i,j] is (qi + qj)**2,
# except for the diagonal where it is qi**2
q2 = q[None, :] + q[:, None]
q2 **= 2
diag = np.diag(q2)/4
np.fill_diagonal(q2, diag)
q2_prefactor = -np.pi/(2*self.volume*self.a_squared)
correction_matrix = q1_prefactor*q1 + q2_prefactor*q2
# Add the terms coming from x_ii and x_jj to the off-diagonal along
# with the corrections
n_atoms = self.n_atoms
final_matrix = np.zeros((n_atoms, n_atoms))
for i in range(n_atoms):
for j in range(n_atoms):
if i == j:
final_matrix[i, j] = ewald_matrix[i, j]
else:
pair_term = 2*ewald_matrix[i, j]
self_term_ii = ewald_matrix[i, i]
self_term_jj = ewald_matrix[j, j]
energy_total = pair_term + self_term_ii + self_term_jj
final_matrix[i, j] = energy_total
final_matrix += correction_matrix
return final_matrix
def makeT(self,cp):
# cp: [(k*k*k) x 3] control points
# T: [((k*k*k)+4) x ((k*k*k)+4)]
K = cp.shape[0]
T = np.zeros((K+4, K+4))
T[:K, 0] = 1; T[:K, 1:4] = cp; T[K, 4:] = 1; T[K+1:, 4:] = cp.T
R = squareform(pdist(cp, metric='euclidean'))
R = R * R;R[R == 0] = 1 # a trick to make R ln(R) 0
R = R * np.log(R)
np.fill_diagonal(R, 0)
T[:K, 4:] = R
return T