Python torch 模块,log() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.log()。
def train_ae(self, train_X, optimizer, epochs, verbose=True):
N = train_X.data.size()[0]
num_batches = N / self.batch_size
for e in range(epochs):
agg_cost = 0.
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
bX = train_X[start:end]
optimizer.zero_grad()
Z = self.forward(bX)
Z = self.decode(Z)
loss = -torch.sum(bX * torch.log(Z) + (1.0 - bX) * torch.log(1.0 - Z), 1)
cost = torch.mean(loss)
cost.backward()
optimizer.step()
agg_cost += cost
agg_cost /= num_batches
if verbose:
print("Epoch:", e, "cost:", agg_cost.data[0])
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
# Adapted from https://github.com/Hakuyume/chainer-ssd
def _gaussian(self, enc_output):
def latent_loss(mu, sigma):
pow_mu = mu * mu
pow_sigma = sigma * sigma
return 0.5 * torch.mean(pow_mu + pow_sigma - torch.log(pow_sigma) - 1)
mu = self._enc_mu(enc_output)
sigma = torch.exp(.5 * self._enc_log_sigma(enc_output))
self.latent_loss = latent_loss(mu, sigma)
weight = next(self.parameters()).data
std_z = Variable(weight.new(*sigma.size()), requires_grad=False)
std_z.data.copy_(torch.from_numpy(
np.random.normal(size=sigma.size())))
return mu + sigma * std_z
def setUp(self):
# normal-normal; known covariance
self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior
self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean
# known precision of observation noise
self.lam = Variable(torch.Tensor([6.0, 4.0]))
self.data = []
self.data.append(Variable(torch.Tensor([-0.1, 0.3])))
self.data.append(Variable(torch.Tensor([0.00, 0.4])))
self.data.append(Variable(torch.Tensor([0.20, 0.5])))
self.data.append(Variable(torch.Tensor([0.10, 0.7])))
self.n_data = Variable(torch.Tensor([len(self.data)]))
self.sum_data = self.data[0] + \
self.data[1] + self.data[2] + self.data[3]
self.analytic_lam_n = self.lam0 + \
self.n_data.expand_as(self.lam) * self.lam
self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n)
self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\
self.mu0 * (self.lam0 / self.analytic_lam_n)
self.batch_size = 4
def setUp(self):
# poisson-gamma model
# gamma prior hyperparameter
self.alpha0 = Variable(torch.Tensor([1.0]))
# gamma prior hyperparameter
self.beta0 = Variable(torch.Tensor([1.0]))
self.data = []
self.data.append(Variable(torch.Tensor([1.0])))
self.data.append(Variable(torch.Tensor([2.0])))
self.data.append(Variable(torch.Tensor([3.0])))
self.n_data = len(self.data)
sum_data = self.data[0] + self.data[1] + self.data[2]
self.alpha_n = self.alpha0 + sum_data # posterior alpha
self.beta_n = self.beta0 + \
Variable(torch.Tensor([self.n_data])) # posterior beta
self.log_alpha_n = torch.log(self.alpha_n)
self.log_beta_n = torch.log(self.beta_n)
def setUp(self):
# bernoulli-beta model
# beta prior hyperparameter
self.alpha0 = Variable(torch.Tensor([1.0]))
self.beta0 = Variable(torch.Tensor([1.0])) # beta prior hyperparameter
self.data = []
self.data.append(Variable(torch.Tensor([0.0])))
self.data.append(Variable(torch.Tensor([1.0])))
self.data.append(Variable(torch.Tensor([1.0])))
self.data.append(Variable(torch.Tensor([1.0])))
self.n_data = len(self.data)
self.batch_size = None
data_sum = self.data[0] + self.data[1] + self.data[2] + self.data[3]
self.alpha_n = self.alpha0 + data_sum # posterior alpha
self.beta_n = self.beta0 - data_sum + \
Variable(torch.Tensor([self.n_data]))
# posterior beta
self.log_alpha_n = torch.log(self.alpha_n)
self.log_beta_n = torch.log(self.beta_n)
def setUp(self):
# lognormal-normal model
# putting some of the parameters inside of a torch module to
# make sure that that functionality is ok (XXX: do this somewhere else in the future)
self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter
# normal prior hyperparameter
self.tau0 = Variable(torch.Tensor([1.0]))
# known precision for observation likelihood
self.tau = Variable(torch.Tensor([2.5]))
self.n_data = 2
self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations
self.tau_n = self.tau0 + \
Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau
mu_numerator = self.mu0 * self.tau0 + \
self.tau * torch.sum(torch.log(self.data))
self.mu_n = mu_numerator / self.tau_n # posterior mu
self.log_mu_n = torch.log(self.mu_n)
self.log_tau_n = torch.log(self.tau_n)
def setUp(self):
# normal-normal; known covariance
self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior
self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean
# known precision of observation noise
self.lam = Variable(torch.Tensor([6.0, 4.0]))
self.data = []
self.data.append(Variable(torch.Tensor([-0.1, 0.3])))
self.data.append(Variable(torch.Tensor([0.00, 0.4])))
self.data.append(Variable(torch.Tensor([0.20, 0.5])))
self.data.append(Variable(torch.Tensor([0.10, 0.7])))
self.n_data = Variable(torch.Tensor([len(self.data)]))
self.sum_data = self.data[0] + \
self.data[1] + self.data[2] + self.data[3]
self.analytic_lam_n = self.lam0 + \
self.n_data.expand_as(self.lam) * self.lam
self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n)
self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\
self.mu0 * (self.lam0 / self.analytic_lam_n)
self.verbose = True
def setUp(self):
# normal-normal-normal; known covariance
self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior
self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean
# known precision of observation noise
self.lam = Variable(torch.Tensor([6.0, 4.0]))
self.data = []
self.data.append(Variable(torch.Tensor([-0.1, 0.3])))
self.data.append(Variable(torch.Tensor([0.00, 0.4])))
self.data.append(Variable(torch.Tensor([0.20, 0.5])))
self.data.append(Variable(torch.Tensor([0.10, 0.7])))
self.n_data = Variable(torch.Tensor([len(self.data)]))
self.sum_data = self.data[0] + \
self.data[1] + self.data[2] + self.data[3]
self.analytic_lam_n = self.lam0 + \
self.n_data.expand_as(self.lam) * self.lam
self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n)
self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\
self.mu0 * (self.lam0 / self.analytic_lam_n)
self.verbose = True
def setUp(self):
# bernoulli-beta model
# beta prior hyperparameter
self.alpha0 = Variable(torch.Tensor([1.0]))
self.beta0 = Variable(torch.Tensor([1.0])) # beta prior hyperparameter
self.data = []
self.data.append(Variable(torch.Tensor([0.0])))
self.data.append(Variable(torch.Tensor([1.0])))
self.data.append(Variable(torch.Tensor([1.0])))
self.data.append(Variable(torch.Tensor([1.0])))
self.n_data = len(self.data)
data_sum = self.data[0] + self.data[1] + self.data[2] + self.data[3]
self.alpha_n = self.alpha0 + data_sum # posterior alpha
self.beta_n = self.beta0 - data_sum + \
Variable(torch.Tensor([self.n_data]))
# posterior beta
self.log_alpha_n = torch.log(self.alpha_n)
self.log_beta_n = torch.log(self.beta_n)
self.verbose = True
def setUp(self):
# lognormal-normal model
# putting some of the parameters inside of a torch module to
# make sure that that functionality is ok (XXX: do this somewhere else in the future)
self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter
# normal prior hyperparameter
self.tau0 = Variable(torch.Tensor([1.0]))
# known precision for observation likelihood
self.tau = Variable(torch.Tensor([2.5]))
self.n_data = 2
self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations
self.tau_n = self.tau0 + \
Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau
mu_numerator = self.mu0 * self.tau0 + \
self.tau * torch.sum(torch.log(self.data))
self.mu_n = mu_numerator / self.tau_n # posterior mu
self.log_mu_n = torch.log(self.mu_n)
self.log_tau_n = torch.log(self.tau_n)
self.verbose = True
def setUp(self):
# normal-normal; known covariance
self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior
self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean
# known precision of observation noise
self.lam = Variable(torch.Tensor([6.0, 4.0]))
self.n_outer = 3
self.n_inner = 3
self.n_data = Variable(torch.Tensor([self.n_outer * self.n_inner]))
self.data = []
self.sum_data = ng_zeros(2)
for _out in range(self.n_outer):
data_in = []
for _in in range(self.n_inner):
data_in.append(Variable(torch.Tensor([-0.1, 0.3]) + torch.randn(2) / torch.sqrt(self.lam.data)))
self.sum_data += data_in[-1]
self.data.append(data_in)
self.analytic_lam_n = self.lam0 + self.n_data.expand_as(self.lam) * self.lam
self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n)
self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\
self.mu0 * (self.lam0 / self.analytic_lam_n)
self.verbose = True
# this tests rao-blackwellization in elbo for nested list map_datas
def batch_log_pdf(self, x):
"""
Diagonal Normal log-likelihood
Ref: :py:meth:`pyro.distributions.distribution.Distribution.batch_log_pdf`
"""
# expand to patch size of input
mu = self.mu.expand(self.shape(x))
sigma = self.sigma.expand(self.shape(x))
log_pxs = -1 * (torch.log(sigma) + 0.5 * np.log(2.0 * np.pi) + 0.5 * torch.pow((x - mu) / sigma, 2))
# XXX this allows for the user to mask out certain parts of the score, for example
# when the data is a ragged tensor. also useful for KL annealing. this entire logic
# will likely be done in a better/cleaner way in the future
if self.log_pdf_mask is not None:
log_pxs = log_pxs * self.log_pdf_mask
batch_log_pdf = torch.sum(log_pxs, -1)
batch_log_pdf_shape = self.batch_shape(x) + (1,)
return batch_log_pdf.contiguous().view(batch_log_pdf_shape)
def log_pdf(self, y, *args, **kwargs):
"""
:param y: a value sampled from the transformed distribution
:type y: torch.autograd.Variable
:returns: the score (the log pdf) of y
:rtype: torch.autograd.Variable
Scores the sample by inverting the bijector(s) and computing the score using the score
of the base distribution and the log det jacobian
"""
inverses = []
next_to_invert = y
for bijector in reversed(self.bijectors):
inverse = bijector.inverse(next_to_invert)
inverses.append(inverse)
next_to_invert = inverse
log_pdf_base = self.base_dist.log_pdf(inverses[-1], *args, **kwargs)
log_det_jacobian = self.bijectors[-1].log_det_jacobian(y, *args, **kwargs)
for bijector, inverse in zip(list(reversed(self.bijectors))[1:], inverses[:-1]):
log_det_jacobian += bijector.log_det_jacobian(inverse, *args, **kwargs)
return log_pdf_base - log_det_jacobian
def log_gamma(xx):
if isinstance(xx, torch.Tensor):
xx = Variable(xx)
ttype = xx.data.type()
gamma_coeff = [
76.18009172947146,
-86.50532032941677,
24.01409824083091,
-1.231739572450155,
0.1208650973866179e-2,
-0.5395239384953e-5,
]
magic1 = 1.000000000190015
magic2 = 2.5066282746310005
x = xx - 1.0
t = x + 5.5
t = t - (x + 0.5) * torch.log(t)
ser = Variable(torch.ones(x.size()).type(ttype)) * magic1
for c in gamma_coeff:
x = x + 1.0
ser = ser + torch.pow(x / c, -1)
return torch.log(ser * magic2) - t
def log_beta(t):
"""
Computes log Beta function.
:param t:
:type t: torch.autograd.Variable of dimension 1 or 2
:rtype: torch.autograd.Variable of float (if t.dim() == 1) or torch.Tensor (if t.dim() == 2)
"""
assert t.dim() in (1, 2)
if t.dim() == 1:
numer = torch.sum(log_gamma(t))
denom = log_gamma(torch.sum(t))
else:
numer = torch.sum(log_gamma(t), 1)
denom = log_gamma(torch.sum(t, 1))
return numer - denom
def batch_log_pdf(self, x):
"""
Evaluates log probability density over one or a batch of samples.
Each of alpha and x can be either a single value or a batch of values batched along dimension 0.
If they are both batches, their batch sizes must agree.
In any case, the rightmost size must agree.
:param torch.autograd.Variable x: A value (if x.dim() == 1) or or batch of values (if x.dim() == 2).
:param alpha: A vector of concentration parameters.
:type alpha: torch.autograd.Variable or None.
:return: log probability densities of each element in the batch.
:rtype: torch.autograd.Variable of torch.Tensor of dimension 1.
"""
alpha = self.alpha.expand(self.shape(x))
x_sum = torch.sum(torch.mul(alpha - 1, torch.log(x)), -1)
beta = log_beta(alpha)
batch_log_pdf_shape = self.batch_shape(x) + (1,)
return (x_sum - beta).contiguous().view(batch_log_pdf_shape)
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
# Adapted from https://github.com/Hakuyume/chainer-ssd
def test_forward_backward(self):
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from reid.loss import OIMLoss
criterion = OIMLoss(3, 3, scalar=1.0, size_average=False)
criterion.lut = torch.eye(3)
x = Variable(torch.randn(3, 3), requires_grad=True)
y = Variable(torch.range(0, 2).long())
loss = criterion(x, y)
loss.backward()
probs = F.softmax(x)
grads = probs.data - torch.eye(3)
abs_diff = torch.abs(grads - x.grad.data)
self.assertEquals(torch.log(probs).diag().sum(), -loss)
self.assertTrue(torch.max(abs_diff) < 1e-6)
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
# Adapted from https://github.com/Hakuyume/chainer-ssd
def _forward_alg(self, feats):
# calculate in log domain
# feats is len(sentence) * tagset_size
# initialize alpha with a Tensor with values all equal to -10000.
init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)
init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
forward_var = autograd.Variable(init_alphas)
if self.use_gpu:
forward_var = forward_var.cuda()
for feat in feats:
emit_score = feat.view(-1, 1)
tag_var = forward_var + self.transitions + emit_score
max_tag_var, _ = torch.max(tag_var, dim=1)
tag_var = tag_var - max_tag_var.view(-1, 1)
forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1)
terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1)
alpha = log_sum_exp(terminal_var)
# Z(x)
return alpha
def __init__(self, traces, sort=True):
self.batch = traces
self.length = len(traces)
self.traces_lengths = []
self.traces_max_length = 0
self.observes_max_length = 0
sb = {}
for trace in traces:
if trace.length is None:
util.logger.log('Batch: Received a trace of length zero.')
if trace.length > self.traces_max_length:
self.traces_max_length = trace.length
if trace.observes_tensor.size(0) > self.observes_max_length:
self.observes_max_length = trace.observes_tensor.size(0)
h = hash(trace.addresses_suffixed())
if not h in sb:
sb[h] = []
sb[h].append(trace)
self.sub_batches = []
for _, t in sb.items():
self.sub_batches.append(t)
if sort:
# Sort the batch in decreasing trace length.
self.batch = sorted(self.batch, reverse=True, key=lambda t: t.length)
self.traces_lengths = [t.length for t in self.batch]
def loss(self, x, samples):
_, proposal_output = self.forward(x, samples)
batch_size = len(samples)
means = proposal_output[:, 0]
stds = proposal_output[:, 1]
two_std_squares = 2 * stds * stds + util.epsilon
two_pi_std_squares = math.pi * two_std_squares
half_log_two_pi_std_squares = 0.5 * torch.log(two_pi_std_squares + util.epsilon)
l = 0
for b in range(batch_size):
value = samples[b].value[0]
mean = means[b]
two_std_square = two_std_squares[b]
half_log_two_pi_std_square = half_log_two_pi_std_squares[b]
l += half_log_two_pi_std_square + ((value - mean)**2) / two_std_square
return l
def loss(self, x, samples):
_, proposal_output = self.forward(x, samples)
prior_mins = Variable(util.Tensor([s.distribution.prior_min for s in samples]), requires_grad=False)
prior_maxs = Variable(util.Tensor([s.distribution.prior_max for s in samples]), requires_grad=False)
batch_size = len(samples)
modes = (proposal_output[:, 0] - prior_mins) / (prior_maxs - prior_mins)
certainties = proposal_output[:, 1] + 2
alphas = modes * (certainties - 2) + 1
betas = (1 - modes) * (certainties - 2) + 1
beta_funs = util.beta(alphas, betas)
l = 0
for b in range(batch_size):
value = samples[b].value[0]
prior_min = samples[b].distribution.prior_min
prior_max = samples[b].distribution.prior_max
normalized_value = (value - prior_min) / (prior_max - prior_min)
alpha = alphas[b]
beta = betas[b]
beta_fun = beta_funs[b]
l -= (alpha - 1) * np.log(normalized_value + util.epsilon) + (beta - 1) * np.log(1 - normalized_value + util.epsilon) - torch.log(beta_fun + util.epsilon) - np.log(prior_max - prior_min + util.epsilon)
return l
def loss(self, x, samples):
_, proposal_output = self.forward(x, samples)
batch_size = len(samples)
means = proposal_output[:,0:self.mixture_components]
stds = proposal_output[:,self.mixture_components:2*self.mixture_components]
coeffs = proposal_output[:,2*self.mixture_components:3*self.mixture_components]
l = 0
for b in range(batch_size):
value = samples[b].value[0]
prior_min = samples[b].distribution.prior_min
prior_max = samples[b].distribution.prior_max
ll = 0
for c in range(self.mixture_components):
mean = means[b,c]
std = stds[b,c]
coeff = coeffs[b,c]
xi = (value - mean) / std
phi_min = 0.5 * (1 + util.erf(((prior_min - mean) / std) * util.one_over_sqrt_two))
phi_max = 0.5 * (1 + util.erf(((prior_max - mean) / std) * util.one_over_sqrt_two))
ll += coeff * util.one_over_sqrt_two_pi * torch.exp(-0.5 * xi * xi) / (std * (phi_max - phi_min))
l -= torch.log(ll + util.epsilon)
return l
def loss(self, x, samples):
# FoldedNormal logpdf
# https://en.wikipedia.org/wiki/Folded_normal_distribution
_, proposal_output = self.forward(x, samples)
batch_size = len(samples)
locations = proposal_output[:, 0]
scales = proposal_output[:, 1]
two_scales = 2 * scales + util.epsilon
half_log_two_pi_scales = 0.5 * torch.log(math.pi * two_scales + util.epsilon)
l = 0
for b in range(batch_size):
value = samples[b].value[0]
if value < 0:
l -= 0
else:
location = locations[b]
two_scale = two_scales[b]
half_log_two_pi_scale = half_log_two_pi_scales[b]
logpdf_1 = -half_log_two_pi_scale - ((value - location)**2) / two_scale
logpdf_2 = -half_log_two_pi_scale - ((value + location)**2) / two_scale
l -= util.logsumexp(torch.cat([logpdf_1, logpdf_2]))
return l
def loss(self, x, samples):
_, proposal_output = self.forward(x, samples)
batch_size = len(samples)
modes = proposal_output[:, 0]
certainties = proposal_output[:, 1] + 2
alphas = modes * (certainties - 2) + 1
betas = (1 - modes) * (certainties - 2) + 1
beta_funs = util.beta(alphas, betas)
l = 0
for b in range(batch_size):
value = samples[b].value[0]
alpha = alphas[b]
beta = betas[b]
beta_fun = beta_funs[b]
l -= (alpha - 1) * np.log(value + util.epsilon) + (beta - 1) * np.log(1 - value + util.epsilon) - torch.log(beta_fun + util.epsilon)
return l
def __init__(self, input_example_non_batch, output_dim, reshape=None, dropout=0):
super(ObserveEmbeddingCNN2D6C, self).__init__()
self.reshape = reshape
if self.reshape is not None:
input_example_non_batch = input_example_non_batch.view(self.reshape)
self.reshape.insert(0, -1) # For correct handling of the batch dimension in self.forward
if input_example_non_batch.dim() == 2:
self.input_sample = input_example_non_batch.unsqueeze(0).cpu()
elif input_example_non_batch.dim() == 3:
self.input_sample = input_example_non_batch.cpu()
else:
util.logger.log('ObserveEmbeddingCNN2D6C: Expecting a 3d input_example_non_batch (num_channels x height x width) or a 2d input_example_non_batch (height x width). Received: {0}'.format(input_example_non_batch.size()))
self.input_channels = self.input_sample.size(0)
self.output_dim = output_dim
self.conv1 = nn.Conv2d(self.input_channels, 64, 3)
self.conv2 = nn.Conv2d(64, 64, 3)
self.conv3 = nn.Conv2d(64, 128, 3)
self.conv4 = nn.Conv2d(128, 128, 3)
self.conv5 = nn.Conv2d(128, 128, 3)
self.conv6 = nn.Conv2d(128, 128, 3)
self.drop = nn.Dropout(dropout)
def __init__(self, input_example_non_batch, output_dim, reshape=None, dropout=0):
super(ObserveEmbeddingCNN3D4C, self).__init__()
self.reshape = reshape
if self.reshape is not None:
input_example_non_batch = input_example_non_batch.view(self.reshape)
self.reshape.insert(0, -1) # For correct handling of the batch dimension in self.forward
if input_example_non_batch.dim() == 3:
self.input_sample = input_example_non_batch.unsqueeze(0).cpu()
elif input_example_non_batch.dim() == 4:
self.input_sample = input_example_non_batch.cpu()
else:
util.logger.log('ObserveEmbeddingCNN3D4C: Expecting a 4d input_example_non_batch (num_channels x depth x height x width) or a 3d input_example_non_batch (depth x height x width). Received: {0}'.format(input_example_non_batch.size()))
self.input_channels = self.input_sample.size(0)
self.output_dim = output_dim
self.conv1 = nn.Conv3d(self.input_channels, 64, 3)
self.conv2 = nn.Conv3d(64, 64, 3)
self.conv3 = nn.Conv3d(64, 128, 3)
self.conv4 = nn.Conv3d(128, 128, 3)
self.drop = nn.Dropout(dropout)
def set_observe_embedding(self, example_observes, obs_emb, obs_emb_dim, obs_reshape=None):
self.obs_emb = obs_emb
self.obs_emb_dim = obs_emb_dim
if obs_emb == 'fc':
observe_layer = ObserveEmbeddingFC(Variable(example_observes), obs_emb_dim, dropout=self.dropout)
elif obs_emb == 'cnn1d2c':
observe_layer = ObserveEmbeddingCNN1D2C(Variable(example_observes), obs_emb_dim, dropout=self.dropout)
observe_layer.configure()
elif obs_emb == 'cnn2d6c':
observe_layer = ObserveEmbeddingCNN2D6C(Variable(example_observes), obs_emb_dim, obs_reshape, dropout=self.dropout)
observe_layer.configure()
elif obs_emb == 'cnn3d4c':
observe_layer = ObserveEmbeddingCNN3D4C(Variable(example_observes), obs_emb_dim, obs_reshape, dropout=self.dropout)
observe_layer.configure()
elif obs_emb == 'lstm':
observe_layer = ObserveEmbeddingLSTM(Variable(example_observes), obs_emb_dim, dropout=self.dropout)
else:
util.logger.log('set_observe_embedding: Unsupported observation embedding: ' + obs_emb)
self.observe_layer = observe_layer
def KLDGaussian(Q, N, eps=1e-8):
"""KL Divergence between two Gaussians
Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T}
and N ~ N(mu1, \sigma_1)
"""
sum = lambda x: torch.sum(x, dim=1)
k = float(Q.mu.size()[1]) # dimension of distribution
mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu
s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps
a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02) # trace term
b = sum((mu1 - mu0).pow(2) / s12) # difference-of-means term
c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps)) # ratio-of-determinants term.
#
# print('trace: %s' % a)
# print('mu_diff: %s' % b)
# print('k: %s' % k)
# print('det: %s' % c)
return 0.5 * (a + b - k + c)
def _boxes2delta(self, box, anchor):
"""
box: (x_min, y_min, x_max, y_max)
anchor: (cx, cy, w, h)
"""
# change (x_min, y_min, x_max, y_max) to (cx, cy, w, h)
box_wh = box.clone()
box_wh[:2] = (box[:2] + box[2:]) / 2
box_wh[2:] = box[2:] - box[:2]
box_wh[0::2] *= self.W
box_wh[1::2] *= self.H
# calc (dcx, dcy, dw, dh)
box_delta = box.clone().fill_(0)
box_delta[:2] = box_wh[:2] - anchor[:2]
box_delta[2:] = torch.log(box_wh[2:]/anchor[2:])
return box_delta
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
# Adapted from https://github.com/Hakuyume/chainer-ssd
def poisson_loss(observed_ratings, predicted_ratings):
"""
Poisson loss.
Parameters
----------
observed_ratings: tensor
Tensor containing observed ratings.
predicted_ratings: tensor
Tensor containing rating predictions.
Returns
-------
loss, float
The mean value of the loss function.
"""
assert_no_grad(observed_ratings)
return (predicted_ratings - observed_ratings * torch.log(predicted_ratings)).mean()
def backward(self, grad_output):
z, log_phi_z = self.saved_tensors
log_phi_z_grad = z.new().resize_as_(z).zero_()
z_is_small = z.lt(-1)
z_is_not_small = 1 - z_is_small
if z_is_small.sum() > 0:
log_phi_z_grad[z_is_small] = torch.abs(self.denominator.div(self.numerator)).mul(math.sqrt(2 / math.pi))
exp = z[z_is_not_small].pow(2) \
.div(-2) \
.sub(log_phi_z[z_is_not_small]) \
.add(math.log(0.5))
log_phi_z_grad[z_is_not_small] = torch.exp(exp).mul(math.sqrt(2 / math.pi))
return log_phi_z_grad.mul(grad_output)
def logsumexp(x, dim=None):
"""
Args:
x: A pytorch tensor (any dimension will do)
dim: int or None, over which to perform the summation. `None`, the
default, performs over all axes.
Returns: The result of the log(sum(exp(...))) operation.
"""
if dim is None:
xmax = x.max()
xmax_ = x.max()
return xmax_ + numpy.log(torch.exp(x - xmax).sum())
else:
xmax, _ = x.max(dim, keepdim=True)
xmax_, _ = x.max(dim)
return xmax_ + torch.log(torch.exp(x - xmax).sum(dim))
def bbox_transform(ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = torch.log(gt_widths / ex_widths)
targets_dh = torch.log(gt_heights / ex_heights)
targets = torch.stack(
(targets_dx, targets_dy, targets_dw, targets_dh),1)
return targets
def forward(self, y, weights, mean, std):
"""
Presents a maximum a-priori objective for a set of predicted means, mixture components,
and standard deviations to model a given ground-truth 'y'. Modeled using negative log
likelihood.
:param y: Non-linear target.
:param weights: Predicted mixture components.
:param mean: Predicted mixture means.
:param std: Predicted mixture standard deviations.
:return:
"""
normalization = 1.0 / ((2.0 * math.pi) ** 0.5)
gaussian_sample = (y.expand_as(mean) - mean) * torch.reciprocal(std)
gaussian_sample = normalization * torch.reciprocal(std) * torch.exp(-0.5 * gaussian_sample ** 2)
return -torch.mean(torch.log(torch.sum(weights * gaussian_sample, dim=1)))
def experiments_randseeds(opt, start = 0, end = 5):
random_seeds = [1, 101, 512, 1001, 10001]
original_exp = opt.experiment
file_name = '{0}_{1}_{2}_{3}_{4}_experiments.csv'.format(opt.dataset, opt.D, opt.A, opt.H, opt.critic_last_layer)
csv_file = os.path.join(opt.experiment, file_name)
with open(csv_file, 'a') as out:
max_logprob = 0
best_config = ''
for i in range(start, end):
rand_seed = random_seeds[i]
opt.manualSeed = rand_seed
try:
opt.experiment = os.path.join(original_exp, '{0}_{1}_{2}_{3}_{4}_{5}'.format(opt.dataset, opt.D, opt.A, opt.H, opt.manualSeed, opt.critic_last_layer))
if not os.path.exists(opt.experiment):
os.makedirs(opt.experiment)
logprob = train(opt=opt, log_file_path=os.path.join(opt.experiment, '{0}_{1}_{2}_{3}_{4}_{5}_experiments.log'.format(opt.dataset, opt.D, opt.A, opt.H, opt.manualSeed, opt.critic_last_layer)))
config = '{0},{1}\n'.format(rand_seed, logprob)
if max_logprob == 0 or logprob > max_logprob:
max_logprob = logprob
best_config = config
out.write(config)
out.flush()
print('best %s ' % best_config)
except:
traceback.print_exc()
def accumulate_gradient(self, batch_sz, states, actions, rewards,
next_states, mask):
""" Compute the difference between the return distributions of Q(s,a)
and TQ(s_,a).
"""
states = Variable(states)
actions = Variable(actions)
next_states = Variable(next_states, volatile=True)
# Compute probabilities of Q(s,a*)
q_probs = self.policy(states)
actions = actions.view(batch_sz, 1, 1)
action_mask = actions.expand(batch_sz, 1, self.atoms_no)
qa_probs = q_probs.gather(1, action_mask).squeeze()
# Compute distribution of Q(s_,a)
target_qa_probs = self._get_categorical(next_states, rewards, mask)
# Compute the cross-entropy of phi(TZ(x_,a)) || Z(x,a)
qa_probs.data.clamp_(0.01, 0.99) # Tudor's trick for avoiding nans
loss = - torch.sum(target_qa_probs * torch.log(qa_probs))
# Accumulate gradients
loss.backward()
def logsumexp(x, axis=None, keepdims=False):
def _logsumexp(x, axis=axis, keepdims=keepdims):
y = torch.log(torch.sum(torch.exp(x), axis))
return y if keepdims else torch.squeeze(y, axis)
def _compute_output_shape(x, axis=axis, keepdims=keepdims):
if axis is None:
return ()
shape = list(_get_shape(x))
if keepdims:
shape[axis] = 1
else:
del shape[axis]
return tuple(shape)
return get_op(_logsumexp, output_shape=_compute_output_shape, arguments=[axis, keepdims])(x)
def compute_loss(self, input, e, b, clusters, it=0):
Loss = Variable(torch.zeros((self.batch_size))).type(dtype)
Ls = Variable(torch.zeros((self.batch_size))).type(dtype)
for cl in range(clusters // 2):
L, m1, m2 = self.compute_diameter(input, e, cl, it=it)
mask = ((e / 2).type(dtype_l) == cl).type(dtype)
# print('mask', mask[0])
n = mask.sum(1).squeeze()
n += (n == 0).type(dtype)
# print('mask', mask[0])
log_probs = torch.log((1 - b) * m1 + b * m2 + (1 - mask) + 1e-8)
Loss += L * log_probs.sum(1) / n
Ls += L
Ls = Ls.mean(0)
Loss = Loss.mean(0)
return Loss, Ls
###########################################################################
# Split Phase #
###########################################################################
def logaddexp(x1: T.FloatTensor, x2: T.FloatTensor) -> T.FloatTensor:
"""
Elementwise logaddexp function: log(exp(x1) + exp(x2))
Args:
x1: A tensor.
x2: A tensor.
Returns:
tensor: Elementwise logaddexp.
"""
# log(exp(x1) + exp(x2))
# = log( exp(x1) (1 + exp(x2 - x1))) = x1 + log(1 + exp(x2 - x1))
# = log( exp(x2) (exp(x1 - x2) + 1)) = x2 + log(1 + exp(x1 - x2))
diff = torch.min(x2 - x1, x1 - x2)
return torch.max(x1, x2) + torch.log1p(exp(diff))
def cross_entropy_loss(self, x, y):
'''Cross entropy loss w/o averaging across all samples.
Args:
x: (tensor) sized [N,D].
y: (tensor) sized [N,].
Return:
(tensor) cross entroy loss, sized [N,].
'''
# print(x.size()) # [8732, 16]
xmax = x.data.max()
# print(x.data.size()) # [8732, 16]
# print(xmax.size()) # max--float object
log_sum_exp = torch.log(torch.sum(torch.exp(x-xmax), 1)) + xmax
# print(log_sum_exp.size()) # [8732,]
# print(x.gather(1, y.view(-1,1)).size()) # [8732, 1]
# print((log_sum_exp.view(-1, 1) - x.gather(1, y.view(-1,1))).size())
return log_sum_exp.view(-1, 1) - x.gather(1, y.view(-1,1))
def bbox_transform(ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = torch.log(gt_widths / ex_widths)
targets_dh = torch.log(gt_heights / ex_heights)
targets = torch.stack(
(targets_dx, targets_dy, targets_dw, targets_dh), 1)
return targets
def logp(x, mean, std):
out = 0.5 * ((x - mean) / (std))**2 + 0.5 * LOG2PI + th.log(std)
return -out
def EntropicConfusion(features):
batch_size = features.size(0)
return torch.mul(features, torch.log(features)).sum() * (1.0 / batch_size)
def forward(self, output, target):
cross_entropy = F.cross_entropy(output, target)
cross_entropy_log = torch.log(cross_entropy)
focal_loss = -((1 - cross_entropy) ** self.focusing_param) * cross_entropy_log
balanced_focal_loss = self.balance_param * focal_loss
return balanced_focal_loss
def pretrain(self, x, pt_epochs, verbose=True):
n = x.data.size()[0]
num_batches = n / self.batch_size
t = x
# Pre-train 1 autoencoder at a time
for i, ae_re in enumerate(self.autoencoders_ref):
# Get the current autoencoder
ae = getattr(self.sequential, ae_re)
# Getting encoded output from the previous autoencoder
if i > 0:
# Set the requires_grad to False so that backprop doesn't
# travel all the way back to the previous autoencoder
temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False)
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1])
temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data
t = temp
optimizer = SGD(ae.parameters(), lr=self.pre_lr)
# Pre-training
print("Pre-training Autoencoder:", i)
for ep in range(pt_epochs):
agg_cost = 0.
for k in range(num_batches):
start, end = k * self.batch_size, (k + 1) * self.batch_size
bt = t[start:end]
optimizer.zero_grad()
z = ae.encode(bt, add_noise=True)
z = ae.decode(z)
loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1)
cost = torch.mean(loss)
cost.backward()
optimizer.step()
agg_cost += cost
agg_cost /= num_batches
if verbose:
print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])