Python torch 模块,tanh() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.tanh()。
def _combine_last(self, r, h_t):
'''
inputs:
r : batch x n_dim
h_t : batch x n_dim (this is the output from the gru unit)
params :
W_x : n_dim x n_dim
W_p : n_dim x n_dim
out :
h_star : batch x n_dim
'''
W_p_r = torch.mm(r, self.W_p) # batch x n_dim
W_x_h = torch.mm(h_t, self.W_x) # batch x n_dim
h_star = F.tanh(W_p_r + W_x_h) # batch x n_dim
return h_star
def forward(self, x, hidden):
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
i_t = th.mm(x, self.w_xi) + th.mm(h, self.w_hi) + self.b_i
f_t = th.mm(x, self.w_xf) + th.mm(h, self.w_hf) + self.b_f
o_t = th.mm(x, self.w_xo) + th.mm(h, self.w_ho) + self.b_o
# activations
i_t.sigmoid_()
f_t.sigmoid_()
o_t.sigmoid_()
# cell computations
c_t = th.mm(x, self.w_xc) + th.mm(h, self.w_hc) + self.b_c
c_t.tanh_()
c_t = th.mul(c, f_t) + th.mul(i_t, c_t)
h_t = th.mul(o_t, th.tanh(c_t))
# Reshape for compatibility
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
if self.dropout > 0.0:
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
return h_t, (h_t, c_t)
def _transform_decoder_init_state(self, hn):
if isinstance(hn, tuple):
hn, cn = hn
# hn [2 * num_layers, batch, hidden_size]
num_dir, batch, hidden_size = cn.size()
# first convert cn t0 [batch, 2 * num_layers, hidden_size]
cn = cn.transpose(0, 1).contiguous()
# then view to [batch, num_layers, 2 * hidden_size] --> [num_layer, batch, 2 * num_layers]
cn = cn.view(batch, num_dir / 2, 2 * hidden_size).transpose(0, 1)
# take hx_dense to [num_layers, batch, hidden_size]
cn = self.hx_dense(cn)
# hn is tanh(cn)
hn = F.tanh(cn)
hn = (hn, cn)
else:
# hn [2 * num_layers, batch, hidden_size]
num_dir, batch, hidden_size = hn.size()
# first convert hn t0 [batch, 2 * num_layers, hidden_size]
hn = hn.transpose(0, 1).contiguous()
# then view to [batch, num_layers, 2 * hidden_size] --> [num_layer, batch, 2 * num_layers]
hn = hn.view(batch, num_dir / 2, 2 * hidden_size).transpose(0, 1)
# take hx_dense to [num_layers, batch, hidden_size]
hn = F.tanh(self.hx_dense(hn))
return hn
def _step(self, H_t, T_t, C_t, h0, h_mask, t_mask, c_mask):
s_lm1, rnns = h0, [self.rnn_h, self.rnn_t, self.rnn_c]
for l, (rnn_h, rnn_t, rnn_c) in enumerate(zip(*rnns)):
s_lm1_H = h_mask.expand_as(s_lm1) * s_lm1
s_lm1_T = t_mask.expand_as(s_lm1) * s_lm1
s_lm1_C = c_mask.expand_as(s_lm1) * s_lm1
if l == 0:
H_t = F.tanh(H_t + rnn_h(s_lm1_H))
T_t = F.sigmoid(T_t + rnn_t(s_lm1_T))
C_t = F.sigmoid(C_t + rnn_t(s_lm1_C))
else:
H_t = F.tanh(rnn_h(s_lm1_H))
T_t = F.sigmoid(rnn_t(s_lm1_T))
C_t = F.sigmoid(rnn_t(s_lm1_C))
s_l = H_t * T_t + s_lm1 * C_t
s_lm1 = s_l
return s_l
def forward(self, x, hidden):
h, c = hidden
h = h.view(h.size(0), -1)
c = c.view(h.size(0), -1)
x = x.view(x.size(0), -1)
# Linear mappings
i_t = th.mm(x, self.w_xi) + th.mm(h, self.w_hi) + self.b_i
f_t = th.mm(x, self.w_xf) + th.mm(h, self.w_hf) + self.b_f
o_t = th.mm(x, self.w_xo) + th.mm(h, self.w_ho) + self.b_o
# activations
i_t.sigmoid_()
f_t.sigmoid_()
o_t.sigmoid_()
# cell computations
c_t = th.mm(x, self.w_xc) + th.mm(h, self.w_hc) + self.b_c
c_t.tanh_()
c_t = th.mul(c, f_t) + th.mul(i_t, c_t)
h_t = th.mul(o_t, th.tanh(c_t))
# Reshape for compatibility
h_t = h_t.view(h_t.size(0), 1, -1)
c_t = c_t.view(c_t.size(0), 1, -1)
if self.dropout > 0.0:
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
return h_t, (h_t, c_t)
def attention(self, hidden, W1xe, hidden_encoder):
# train
W2xdn = torch.mm(hidden, self.W2)
W2xdn = W2xdn.unsqueeze(1).expand(self.batch_size, self.n + 1,
self.hidden_size)
u = (torch.bmm(torch.tanh(W1xe + W2xdn), self.v.unsqueeze(0)
.expand(self.batch_size, self.hidden_size, 1)))
u = u.squeeze()
# test
# W2xdn = torch.mm(hidden, self.W2)
# u = Variable(torch.zeros(self.batch_size, self.n + 1)).type(dtype)
# for n in xrange(self.n + 1):
# aux = torch.tanh(W1xe[:, n].squeeze() + W2xdn) # size bs x hidd
# aux2 = (torch.bmm(aux.unsqueeze(1), self.v.unsqueeze(0)
# .expand(self.batch_size, self.hidden_size, 1)))
# u[:, n] = aux2.squeeze()
return u
def batch_matmul_bias(seq, weight, bias, nonlinearity=''):
s = None
bias_dim = bias.size()
for i in range(seq.size(0)):
_s = torch.mm(seq[i], weight)
_s_bias = _s + bias.expand(bias_dim[0], _s.size()[0]).transpose(0,1)
if(nonlinearity=='tanh'):
_s_bias = torch.tanh(_s_bias)
_s_bias = _s_bias.unsqueeze(0)
if(s is None):
s = _s_bias
else:
s = torch.cat((s,_s_bias),0)
return s.squeeze()
def batch_matmul(seq, weight, nonlinearity=''):
s = None
for i in range(seq.size(0)):
_s = torch.mm(seq[i], weight)
if(nonlinearity=='tanh'):
_s = torch.tanh(_s)
_s = _s.unsqueeze(0)
if(s is None):
s = _s
else:
s = torch.cat((s,_s),0)
return s.squeeze()
def forward(self, embed, state_word):
# embeddings
embedded = self.lookup(embed)
# word level gru
output_word, state_word = self.word_gru(embedded, state_word)
word_squish = batch_matmul_bias(output_word, self.weight_W_word, self.bias_word, nonlinearity='tanh')
word_attn = batch_matmul(word_squish, self.weight_proj_word)
word_attn_norm = self.softmax_word(word_attn.transpose(1,0))
word_attn_vectors = attention_mul(output_word, word_attn_norm.transpose(1,0))
return word_attn_vectors, state_word, word_attn_norm
def batch_matmul_bias(seq, weight, bias, nonlinearity=''):
s = None
bias_dim = bias.size()
for i in range(seq.size(0)):
_s = torch.mm(seq[i], weight)
_s_bias = _s + bias.expand(bias_dim[0], _s.size()[0]).transpose(0,1)
if(nonlinearity=='tanh'):
_s_bias = torch.tanh(_s_bias)
_s_bias = _s_bias.unsqueeze(0)
if(s is None):
s = _s_bias
else:
s = torch.cat((s,_s_bias),0)
return s.squeeze()
def batch_matmul(seq, weight, nonlinearity=''):
s = None
for i in range(seq.size(0)):
_s = torch.mm(seq[i], weight)
if(nonlinearity=='tanh'):
_s = torch.tanh(_s)
_s = _s.unsqueeze(0)
if(s is None):
s = _s
else:
s = torch.cat((s,_s),0)
return s.squeeze()
def forward(self, inputs, z, hidden_cell=None):
if hidden_cell is None:
# then we must init from z
hidden,cell = torch.split(F.tanh(self.fc_hc(z)),hp.dec_hidden_size,1)
hidden_cell = (hidden.unsqueeze(0).contiguous(), cell.unsqueeze(0).contiguous())
outputs,(hidden,cell) = self.lstm(inputs, hidden_cell)
# in training we feed the lstm with the whole input in one shot
# and use all outputs contained in 'outputs', while in generate
# mode we just feed with the last generated sample:
if self.training:
y = self.fc_params(outputs.view(-1, hp.dec_hidden_size))
else:
y = self.fc_params(hidden.view(-1, hp.dec_hidden_size))
# separate pen and mixture params:
params = torch.split(y,6,1)
params_mixture = torch.stack(params[:-1]) # trajectory
params_pen = params[-1] # pen up/down
# identify mixture params:
pi,mu_x,mu_y,sigma_x,sigma_y,rho_xy = torch.split(params_mixture,1,2)
# preprocess params::
if self.training:
len_out = Nmax+1
else:
len_out = 1
pi = F.softmax(pi.t().squeeze()).view(len_out,-1,hp.M)
sigma_x = torch.exp(sigma_x.t().squeeze()).view(len_out,-1,hp.M)
sigma_y = torch.exp(sigma_y.t().squeeze()).view(len_out,-1,hp.M)
rho_xy = torch.tanh(rho_xy.t().squeeze()).view(len_out,-1,hp.M)
mu_x = mu_x.t().squeeze().contiguous().view(len_out,-1,hp.M)
mu_y = mu_y.t().squeeze().contiguous().view(len_out,-1,hp.M)
q = F.softmax(params_pen).view(len_out,-1,3)
return pi,mu_x,mu_y,sigma_x,sigma_y,rho_xy,q,hidden,cell
def visualize():
# initialise the model
discriminator = ArcBinaryClassifier(num_glimpses=opt.numGlimpses,
glimpse_h=opt.glimpseSize,
glimpse_w=opt.glimpseSize,
controller_out=opt.numStates)
discriminator.load_state_dict(torch.load(os.path.join("saved_models", opt.name, opt.load)))
arc = discriminator.arc
sample = get_sample(discriminator)
all_hidden = arc._forward(sample[None, :, :])[:, 0, :] # (2*numGlimpses, controller_out)
glimpse_params = torch.tanh(arc.glimpser(all_hidden))
masks = arc.glimpse_window.get_attention_mask(glimpse_params, mask_h=opt.imageSize, mask_w=opt.imageSize)
# separate the masks of each image.
masks1 = []
masks2 = []
for i, mask in enumerate(masks):
if i % 2 == 1: # the first image outputs the hidden state for the next image
masks1.append(mask)
else:
masks2.append(mask)
for i, (mask1, mask2) in enumerate(zip(masks1, masks2)):
display(sample[0], mask1, sample[1], mask2, "img_{}".format(i))
def forward(self, input_, hx):
"""
Args:
input_: A (batch, input_size) tensor containing input
features.
hx: A tuple (h_0, c_0), which contains the initial hidden
and cell state, where the size of both states is
(batch, hidden_size).
Returns:
h_1, c_1: Tensors containing the next hidden and cell state.
"""
h_0, c_0 = hx
batch_size = h_0.size(0)
bias_batch = (self.bias.unsqueeze(0)
.expand(batch_size, *self.bias.size()))
wh_b = torch.addmm(bias_batch, h_0, self.weight_hh)
wi = torch.mm(input_, self.weight_ih)
f, i, o, g = torch.split(wh_b + wi,
split_size=self.hidden_size, dim=1)
c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g)
h_1 = torch.sigmoid(o) * torch.tanh(c_1)
return h_1, c_1
def forward(self, input_, hx, time):
"""
Args:
input_: A (batch, input_size) tensor containing input
features.
hx: A tuple (h_0, c_0), which contains the initial hidden
and cell state, where the size of both states is
(batch, hidden_size).
time: The current timestep value, which is used to
get appropriate running statistics.
Returns:
h_1, c_1: Tensors containing the next hidden and cell state.
"""
h_0, c_0 = hx
batch_size = h_0.size(0)
bias_batch = (self.bias.unsqueeze(0)
.expand(batch_size, *self.bias.size()))
wh = torch.mm(h_0, self.weight_hh)
wi = torch.mm(input_, self.weight_ih)
bn_wh = self.bn_hh(wh, time=time)
bn_wi = self.bn_ih(wi, time=time)
f, i, o, g = torch.split(bn_wh + bn_wi + bias_batch,
split_size=self.hidden_size, dim=1)
c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g)
h_1 = torch.sigmoid(o) * torch.tanh(self.bn_c(c_1, time=time))
return h_1, c_1
def tanh_rescale(x, x_min=-1., x_max=1.):
return (torch.tanh(x) + 1) * 0.5 * (x_max - x_min) + x_min
def _attention_forward(self, Y, mask_Y, h, r_tm1=None):
'''
Computes the Attention Weights over Y using h (and r_tm1 if given)
Returns an attention weighted representation of Y, and the alphas
inputs:
Y : T x batch x n_dim
mask_Y : T x batch
h : batch x n_dim
r_tm1 : batch x n_dim
params:
W_y : n_dim x n_dim
W_h : n_dim x n_dim
W_r : n_dim x n_dim
W_alpha : n_dim x 1
outputs :
r = batch x n_dim
alpha : batch x T
'''
Y = Y.transpose(1, 0) # batch x T x n_dim
mask_Y = mask_Y.transpose(1, 0) # batch x T
Wy = torch.bmm(Y, self.W_y.unsqueeze(0).expand(Y.size(0), *self.W_y.size())) # batch x T x n_dim
Wh = torch.mm(h, self.W_h) # batch x n_dim
if r_tm1 is not None:
W_r_tm1 = torch.mm(r_tm1, self.W_r)
Wh += W_r_tm1
M = torch.tanh(Wy + Wh.unsqueeze(1).expand(Wh.size(0), Y.size(1), Wh.size(1))) # batch x T x n_dim
alpha = torch.bmm(M, self.W_alpha.unsqueeze(0).expand(Y.size(0), *self.W_alpha.size())).squeeze(-1) # batch x T
alpha = alpha + (-1000.0 * (1. - mask_Y)) # To ensure probability mass doesn't fall on non tokens
alpha = F.softmax(alpha)
return torch.bmm(alpha.unsqueeze(1), Y).squeeze(1), alpha
def _attention_forward(self, Y, mask_Y, h, r_tm1=None, index=None):
'''
Computes the Attention Weights over Y using h (and r_tm1 if given)
Returns an attention weighted representation of Y, and the alphas
inputs:
Y : T x batch x n_dim
mask_Y : T x batch
h : batch x n_dim
r_tm1 : batch x n_dim
index : int : The timestep
params:
W_y : n_dim x n_dim
W_h : n_dim x n_dim
W_r : n_dim x n_dim
W_alpha : n_dim x 1
outputs :
r = batch x n_dim
alpha : batch x T
'''
Y = Y.transpose(1, 0) # batch x T x n_dim
mask_Y = mask_Y.transpose(1, 0) # batch x T
Wy = torch.bmm(Y, self.W_y.unsqueeze(0).expand(Y.size(0), *self.W_y.size())) # batch x T x n_dim
Wh = torch.mm(h, self.W_h) # batch x n_dim
if r_tm1 is not None:
W_r_tm1 = self.batch_norm_r_r(torch.mm(r_tm1, self.W_r), index) if hasattr(self, 'batch_norm_r_r') else torch.mm(r_tm1, self.W_r)
Wh = self.batch_norm_h_r(Wh, index) if hasattr(self, 'batch_norm_h_r') else Wh
Wh += W_r_tm1
M = torch.tanh(Wy + Wh.unsqueeze(1).expand(Wh.size(0), Y.size(1), Wh.size(1))) # batch x T x n_dim
alpha = torch.bmm(M, self.W_alpha.unsqueeze(0).expand(Y.size(0), *self.W_alpha.size())).squeeze(-1) # batch x T
alpha = alpha + (-1000.0 * (1. - mask_Y)) # To ensure probability mass doesn't fall on non tokens
alpha = F.softmax(alpha)
if r_tm1 is not None:
r = torch.bmm(alpha.unsqueeze(1), Y).squeeze(1) + F.tanh(torch.mm(r_tm1, self.W_t)) # batch x n_dim
else:
r = torch.bmm(alpha.unsqueeze(1), Y).squeeze(1) # batch x n_dim
return r, alpha
def forward(self, input_, hx):
"""
Args:
input_: A (batch, input_size) tensor containing input
features.
hx: A tuple (h_0, c_0), which contains the initial hidden
and cell state, where the size of both states is
(batch, hidden_size).
Returns:
h_1, c_1: Tensors containing the next hidden and cell state.
"""
h_0, c_0 = hx
batch_size = h_0.size(0)
bias_batch = (self.bias.unsqueeze(0)
.expand(batch_size, *self.bias.size()))
wh_b = torch.addmm(bias_batch, h_0, self.weight_hh)
wi = torch.mm(input_, self.weight_ih)
f, i, o, g = torch.split(wh_b + wi,
split_size=self.hidden_size, dim=1)
c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g)
h_1 = torch.sigmoid(o) * torch.tanh(c_1)
return h_1, c_1
def forward(self, input_, hx, time):
"""
Args:
input_: A (batch, input_size) tensor containing input
features.
hx: A tuple (h_0, c_0), which contains the initial hidden
and cell state, where the size of both states is
(batch, hidden_size).
time: The current timestep value, which is used to
get appropriate running statistics.
Returns:
h_1, c_1: Tensors containing the next hidden and cell state.
"""
h_0, c_0 = hx
batch_size = h_0.size(0)
bias_batch = (self.bias.unsqueeze(0)
.expand(batch_size, *self.bias.size()))
wh = torch.mm(h_0, self.weight_hh)
wi = torch.mm(input_, self.weight_ih)
bn_wh = self.bn_hh(wh, time=time)
bn_wi = self.bn_ih(wi, time=time)
f, i, o, g = torch.split(bn_wh + bn_wi + bias_batch,
split_size=self.hidden_size, dim=1)
c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g)
h_1 = torch.sigmoid(o) * torch.tanh(self.bn_c(c_1, time=time))
return h_1, c_1
def forward(self, x, hidden):
do_dropout = self.training and self.dropout > 0.0
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
preact = self.i2h(x) + self.h2h(h)
# activations
gates = preact[:, :3 * self.hidden_size].sigmoid()
g_t = preact[:, 3 * self.hidden_size:].tanh()
i_t = gates[:, :self.hidden_size]
f_t = gates[:, self.hidden_size:2 * self.hidden_size]
o_t = gates[:, -self.hidden_size:]
# cell computations
if do_dropout and self.dropout_method == 'semeniuta':
g_t = F.dropout(g_t, p=self.dropout, training=self.training)
c_t = th.mul(c, f_t) + th.mul(i_t, g_t)
if do_dropout and self.dropout_method == 'moon':
c_t.data.set_(th.mul(c_t, self.mask).data)
c_t.data *= 1.0/(1.0 - self.dropout)
h_t = th.mul(o_t, c_t.tanh())
# Reshape for compatibility
if do_dropout:
if self.dropout_method == 'pytorch':
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
if self.dropout_method == 'gal':
h_t.data.set_(th.mul(h_t, self.mask).data)
h_t.data *= 1.0/(1.0 - self.dropout)
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
return h_t, (h_t, c_t)
def forward(self, input):
return torch.tanh(input)
def getCoef(outputs):
'''
Extracts the mean, standard deviation and correlation
params:
outputs : Output of the SRNN model
'''
mux, muy, sx, sy, corr = outputs[:, :, 0], outputs[:, :, 1], outputs[:, :, 2], outputs[:, :, 3], outputs[:, :, 4]
# Exponential to get a positive value for std dev
sx = torch.exp(sx)
sy = torch.exp(sy)
# tanh to get a value between [-1, 1] for correlation
corr = torch.tanh(corr)
return mux, muy, sx, sy, corr
def getCoef_train(outputs):
mux, muy, sx, sy, corr = outputs[:, 0], outputs[:, 1], outputs[:, 2], outputs[:, 3], outputs[:, 4]
sx = torch.exp(sx)
sy = torch.exp(sy)
corr = torch.tanh(corr)
return mux, muy, sx, sy, corr
def test_simple(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_onnx(trace)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
def test_lstm_fusion(self):
input = Variable(torch.randn(3, 10).cuda())
hx = Variable(torch.randn(3, 20).cuda())
cx = Variable(torch.randn(3, 20).cuda())
module = nn.LSTMCell(10, 20).cuda() # Just to allocate weights with correct sizes
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
hx, cx = hidden
gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
ingate = F.sigmoid(ingate)
forgetgate = F.sigmoid(forgetgate)
cellgate = F.tanh(cellgate)
outgate = F.sigmoid(outgate)
cy = (forgetgate * cx) + (ingate * cellgate)
hy = outgate * F.tanh(cy)
return hy, cy
trace, _ = torch.jit.trace(LSTMCell, (input, (hx, cx)) + tuple(module.parameters()))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_onnx(trace)
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_fuse(trace)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
def test_cse(self):
x = Variable(torch.Tensor([0.4, 0.3]), requires_grad=True)
y = Variable(torch.Tensor([0.7, 0.5]), requires_grad=True)
trace = torch._C._tracer_enter((x, y), 0)
w = (x + y) * (x + y) * (x + y)
t = torch.tanh(w) + torch.tanh(w)
z = (x + y) * (x + y) * (x + y) + t
torch._C._tracer_exit((z,))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_onnx(trace)
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_cse(trace)
self.assertExpected(str(trace))
def test_verify(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(verify=True, optimize=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = traced(x, y)
z2 = traced(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_disabled_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(enabled=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_python_ir(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
traced, _ = torch.jit.trace(doit, (x, y))
g = torch._C._jit_get_graph(traced)
g2 = torch._C.Graph()
g_to_g2 = {}
for node in g.inputs():
g_to_g2[node] = g2.addInput()
for node in g.nodes():
if node.kind() == "PythonOp":
n_ = g2.create(node.pyname(),
[g_to_g2[i] for i in node.inputs()]) \
.setType(node.typeOption()) \
.s_("note", "from_pyop") \
.i_("some_value", len(node.scalar_args()))
assert(n_.i("some_value") == len(node.scalar_args()))
else:
n_ = g2.createClone(node, lambda x: g_to_g2[x])
assert(n_.kindOf("Offset") == "i")
g_to_g2[node] = g2.appendNode(n_)
for node in g.outputs():
g2.registerOutput(g_to_g2[node])
t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
assert(t_node.attributeNames() == ["a"])
g2.appendNode(t_node)
assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
self.assertExpected(str(g2))
def update_buffer(self, S_tm1, c_t, o_tm1, ident):
# concat previous output & context
idt = torch.tanh(self.F_u(ident))
o_tm1 = o_tm1.squeeze(0)
z_t = torch.cat([c_t + idt, o_tm1/30], 1)
z_t = z_t.unsqueeze(2)
Sp = torch.cat([z_t, S_tm1[:, :, :-1]], 2)
# update S
u = self.N_u(Sp.view(Sp.size(0), -1))
u[:, :idt.size(1)] = u[:, :idt.size(1)] + idt
u = u.unsqueeze(2)
S = torch.cat([u, S_tm1[:, :, :-1]], 2)
return S
def getCoef(outputs):
'''
Extracts the mean, standard deviation and correlation
params:
outputs : Output of the SRNN model
'''
mux, muy, sx, sy, corr = outputs[:, :, 0], outputs[:, :, 1], outputs[:, :, 2], outputs[:, :, 3], outputs[:, :, 4]
sx = torch.exp(sx)
sy = torch.exp(sy)
corr = torch.tanh(corr)
return mux, muy, sx, sy, corr
def _get_rnn_output(self, input_word, input_char, mask=None, length=None, hx=None):
# hack length from mask
# we do not hack mask from length for special reasons.
# Thus, always provide mask if it is necessary.
if length is None and mask is not None:
length = mask.data.sum(dim=1).long()
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char], dim=2)
# apply dropout
input = self.dropout_in(input)
# prepare packed_sequence
if length is not None:
seq_input, hx, rev_order, mask = utils.prepare_rnn_seq(input, length, hx=hx, masks=mask, batch_first=True)
seq_output, hn = self.rnn(seq_input, hx=hx)
output, hn = utils.recover_rnn_seq(seq_output, rev_order, hx=hn, batch_first=True)
else:
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, hx=hx)
output = self.dropout_rnn(output)
if self.dense is not None:
# [batch, length, tag_space]
output = F.elu(self.dense(output))
return output, hn, mask, length
def _get_rnn_output(self, input_word, input_char, mask=None, length=None, hx=None):
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char], dim=2)
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, mask, hx=hx)
# apply dropout for the output of rnn
output = self.dropout_rnn(output.transpose(1, 2)).transpose(1, 2)
if self.dense is not None:
# [batch, length, tag_space]
output = F.elu(self.dense(output))
return output, hn, mask, length
def _get_encoder_output(self, input_word, input_char, input_pos, mask_e=None, length_e=None, hx=None):
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, pos_dim]
pos = self.pos_embedd(input_pos)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# apply dropout on input
word = self.dropout_in(word)
pos = self.dropout_in(pos)
char = self.dropout_in(char)
# concatenate word and char [batch, length, word_dim+char_filter]
src_encoding = torch.cat([word, char, pos], dim=2)
# output from rnn [batch, length, hidden_size]
output, hn = self.encoder(src_encoding, mask_e, hx=hx)
# apply dropout
# [batch, length, hidden_size] --> [batch, hidden_size, length] --> [batch, length, hidden_size]
output = self.dropout_out(output.transpose(1, 2)).transpose(1, 2)
return src_encoding, output, hn, mask_e, length_e
def _step(self, H_t, T_t, h0, h_mask, t_mask):
s_lm1 = h0
for l, (rnn_h, rnn_t) in enumerate(zip(self.rnn_h, self.rnn_t)):
s_lm1_H = h_mask.expand_as(s_lm1) * s_lm1
s_lm1_T = t_mask.expand_as(s_lm1) * s_lm1
if l == 0:
H_t = F.tanh(H_t + rnn_h(s_lm1_H))
T_t = F.sigmoid(T_t + rnn_t(s_lm1_T))
else:
H_t = F.tanh(rnn_h(s_lm1_H))
T_t = F.sigmoid(rnn_t(s_lm1_T))
s_l = (H_t - s_lm1) * T_t + s_lm1
s_lm1 = s_l
return s_l
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
hx, cx = hidden
gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
ingate = F.sigmoid(ingate)
forgetgate = F.sigmoid(forgetgate)
cellgate = F.tanh(cellgate)
outgate = F.sigmoid(outgate)
cy = (forgetgate * cx) + (ingate * cellgate)
hy = outgate * F.tanh(cy)
return hy, cy
def test_simple(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
self.assertExpectedTrace(trace)
# matmul is currently implemented as a native function, which
# exercises different codepaths in the JIT. The following two
# tests ensure that (1) matmul indeed traces into an atomic,
# native operation, and (2) the JIT knows how to run it
def test_scopes(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
out = x + y
with torch.jit.scope('Foo', out):
out = x * out
with torch.jit.scope('Bar', out):
out = torch.tanh(out)
out = torch.sigmoid(out)
return out
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
self.assertExpectedTrace(trace)
def test_cse(self):
x = Variable(torch.Tensor([0.4, 0.3]), requires_grad=True)
y = Variable(torch.Tensor([0.7, 0.5]), requires_grad=True)
trace = torch._C._tracer_enter((x, y), 0)
w = (x + y) * (x + y) * (x + y)
t = torch.tanh(w) + torch.tanh(w)
z = (x + y) * (x + y) * (x + y) + t
torch._C._tracer_exit((z,))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_cse(trace)
self.assertExpectedTrace(trace)
def test_compile_addc(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True).float().cuda()
y = Variable(torch.Tensor([0.7]), requires_grad=True).float().cuda()
@torch.jit.compile(nderivs=0)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y) + 1))
z = doit(x, y)
with self.assertCompiled(doit):
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y) + 1)))
self.assertEqual(z, z2)
def test_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(nderivs=0)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
with self.assertCompiled(doit):
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_disabled_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(enabled=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_python_ir(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
traced, _ = torch.jit.trace(doit, (x, y))
g = torch._C._jit_get_graph(traced)
g2 = torch._C.Graph()
g_to_g2 = {}
for node in g.inputs():
g_to_g2[node] = g2.addInput()
for node in g.nodes():
n_ = g2.createClone(node, lambda x: g_to_g2[x])
g2.appendNode(n_)
for o, no in zip(node.outputs(), n_.outputs()):
g_to_g2[o] = no
for node in g.outputs():
g2.registerOutput(g_to_g2[node])
t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
assert(t_node.attributeNames() == ["a"])
g2.appendNode(t_node)
assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
self.assertExpected(str(g2))
def tanh_quantize(input, bits):
assert bits >= 1, bits
if bits == 1:
return torch.sign(input)
input = torch.tanh(input) # [-1, 1]
input_rescale = (input + 1.0) / 2 #[0, 1]
n = math.pow(2.0, bits) - 1
v = torch.floor(input_rescale * n + 0.5) / n
v = 2 * v - 1 # [-1, 1]
v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh
return v
def duplicate_model_with_quant(model, bits, overflow_rate=0.0, counter=10, type='linear'):
"""assume that original model has at least a nn.Sequential"""
assert type in ['linear', 'minmax', 'log', 'tanh']
if isinstance(model, nn.Sequential):
l = OrderedDict()
for k, v in model._modules.items():
if isinstance(v, (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
l[k] = v
if type == 'linear':
quant_layer = LinearQuant('{}_quant'.format(k), bits=bits, overflow_rate=overflow_rate, counter=counter)
elif type == 'log':
# quant_layer = LogQuant('{}_quant'.format(k), bits=bits, overflow_rate=overflow_rate, counter=counter)
quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=log_minmax_quantize)
elif type == 'minmax':
quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=min_max_quantize)
else:
quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=tanh_quantize)
l['{}_{}_quant'.format(k, type)] = quant_layer
else:
l[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
m = nn.Sequential(l)
return m
else:
for k, v in model._modules.items():
model._modules[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
return model
def forward(self, x, hidden):
do_dropout = self.training and self.dropout > 0.0
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
preact = self.i2h(x) + self.h2h(h)
# activations
gates = preact[:, :3 * self.hidden_size].sigmoid()
g_t = preact[:, 3 * self.hidden_size:].tanh()
i_t = gates[:, :self.hidden_size]
f_t = gates[:, self.hidden_size:2 * self.hidden_size]
o_t = gates[:, -self.hidden_size:]
# cell computations
if do_dropout and self.dropout_method == 'semeniuta':
g_t = F.dropout(g_t, p=self.dropout, training=self.training)
c_t = th.mul(c, f_t) + th.mul(i_t, g_t)
if do_dropout and self.dropout_method == 'moon':
c_t.data.set_(th.mul(c_t, self.mask).data)
c_t.data *= 1.0/(1.0 - self.dropout)
h_t = th.mul(o_t, c_t.tanh())
# Reshape for compatibility
if do_dropout:
if self.dropout_method == 'pytorch':
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
if self.dropout_method == 'gal':
h_t.data.set_(th.mul(h_t, self.mask).data)
h_t.data *= 1.0/(1.0 - self.dropout)
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
return h_t, (h_t, c_t)