我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用torch.nn.GRUCell()。
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, question_size, passage_size, hidden_size, attn_size=None, cell_type=nn.GRUCell, num_layers=1, dropout=0, residual=False, **kwargs): super().__init__() self.num_layers = num_layers if attn_size is None: attn_size = question_size # TODO: what is V_q? (section 3.4) v_q_size = question_size self.question_pooling = AttentionPooling(question_size, v_q_size, attn_size=attn_size) self.passage_pooling = AttentionPooling(passage_size, question_size, attn_size=attn_size) self.V_q = nn.Parameter(torch.randn(1, 1, v_q_size), requires_grad=True) self.cell = StackedCell(question_size, question_size, num_layers=num_layers, dropout=dropout, rnn_cell=cell_type, residual=residual, **kwargs)
def __init__(self, fea_size, dropout=False, gate_width=128, use_region=True, use_kernel_function=False): super(Hierarchical_Message_Passing_Structure_base, self).__init__() #self.w_object = Parameter() if use_kernel_function: Message_Passing_Unit = Message_Passing_Unit_v2 else: Message_Passing_Unit = Message_Passing_Unit_v1 self.gate_sub2pred = Message_Passing_Unit(fea_size, gate_width) self.gate_obj2pred = Message_Passing_Unit(fea_size, gate_width) self.gate_pred2sub = Message_Passing_Unit(fea_size, gate_width) self.gate_pred2obj = Message_Passing_Unit(fea_size, gate_width) self.GRU_object = Gated_Recurrent_Unit(fea_size, dropout) # nn.GRUCell(fea_size, fea_size) # self.GRU_phrase = Gated_Recurrent_Unit(fea_size, dropout) if use_region: self.gate_pred2reg = Message_Passing_Unit(fea_size, gate_width) self.gate_reg2pred = Message_Passing_Unit(fea_size, gate_width) self.GRU_region = Gated_Recurrent_Unit(fea_size, dropout)
def __init__(self,T,A,B,z_size,N,dec_size,enc_size): super(DrawModel,self).__init__() self.T = T # self.batch_size = batch_size self.A = A self.B = B self.z_size = z_size self.N = N self.dec_size = dec_size self.enc_size = enc_size self.cs = [0] * T self.logsigmas,self.sigmas,self.mus = [0] * T,[0] * T,[0] * T self.encoder = nn.LSTMCell(2 * N * N + dec_size, enc_size) self.encoder_gru = nn.GRUCell(2 * N * N + dec_size, enc_size) self.mu_linear = nn.Linear(dec_size, z_size) self.sigma_linear = nn.Linear(dec_size, z_size) self.decoder = nn.LSTMCell(z_size,dec_size) self.decoder_gru = nn.GRUCell(z_size,dec_size) self.dec_linear = nn.Linear(dec_size,5) self.dec_w_linear = nn.Linear(dec_size,N*N) self.sigmoid = nn.Sigmoid()
def __init__( self, input_size, hidden_size, batch_size): super(PtrNet_tanh, self).__init__() self.rnn_layers = 1 self.hidden_size = hidden_size self.batch_size = batch_size self.input_size = input_size self.n = 16 self.init_var = 0.08 self.init_token = nn.Parameter(torch.zeros((self.input_size))) self.W1 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.W2 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.v = nn.Parameter(torch.randn((self.hidden_size, 1)) * self.init_var) # cells self.encoder_cell = nn.GRUCell(input_size, hidden_size) self.decoder_cell = nn.GRUCell(input_size, hidden_size) self.NLLoss = nn.NLLLoss(size_average=True) # initialize weights self.init_weights()
def __init__(self, input_size, hidden_size, batch_size): super(PtrNet_tanh, self).__init__() print('Initializing Parameters Merge') self.hidden_size = hidden_size self.batch_size = batch_size self.input_size = input_size self.n = 12 self.init_var = 0.08 self.init_token = nn.Parameter(-1 * torch.ones((self.input_size))) self.pad_token = nn.Parameter(-1 * torch.ones((self.input_size))) self.end_state = nn.Parameter(-1 * torch.ones((self.hidden_size))) self.W1 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.W2 = nn.Parameter(torch.randn((self.hidden_size, self.hidden_size)) * self.init_var) self.v = nn.Parameter(torch.randn((self.hidden_size, 1)) * self.init_var) # cells self.encoder_cell = nn.GRUCell(input_size, hidden_size) self.decoder_cell = nn.GRUCell(input_size, hidden_size) self.NLLoss = nn.NLLLoss(size_average=True) # initialize weights self.init_weights()
def __init__(self, num_characters, dim): super(RNNDecoder, self).__init__() self.embedding = nn.Embedding(num_characters, dim) self.rnn_cell = nn.GRUCell(dim, dim)
def test_RNN_cell(self): # this is just a smoke test; these modules are implemented through # autograd so no Jacobian test is needed for module in (nn.RNNCell, nn.GRUCell): for bias in (True, False): input = Variable(torch.randn(3, 10)) hx = Variable(torch.randn(3, 20)) cell = module(10, 20, bias=bias) for i in range(6): hx = cell(input, hx) hx.sum().backward()
def __init__(self, B, T, N, F, L): """ Constructing blocks of the model based on the sparse skip-filtering connections. Args : B : (int) Batch size T : (int) Length of the time-sequence. N : (int) Original dimensionallity of the input. F : (int) Dimensionallity of the input (Amount of frequency sub-bands). L : (int) Length of the half context time-sequence. """ super(BiGRUEncoder, self).__init__() self._B = B self._T = T self._N = N self._F = F self._L = L self._alpha = 1. # Bi-GRU Encoder self.gruEncF = nn.GRUCell(self._F, self._F) self.gruEncB = nn.GRUCell(self._F, self._F) # Initialize the weights self.initialize_encoder()
def __init__(self, B, T, N, F, L, infr): """ Constructing blocks of the model based on the sparse skip-filtering connections. Args : B : (int) Batch size T : (int) Length of the time-sequence. N : (int) Original dimensionallity of the input. F : (int) Dimensionallity of the input (Amount of frequency sub-bands). L : (int) Length of the half context time-sequence. infr : (bool)If the decoder uses recurrent inference or not. """ super(Decoder, self).__init__() self._B = B self._T = T self._N = N self._F = F self._L = L if infr: self._gruout = 2*self._F else: self._gruout = self._F # GRU Decoder self.gruDec = nn.GRUCell(2*self._F, self._gruout) # Initialize the weights self.initialize_decoder()
def __init__(self, input_size, hidden_size, num_layers=1, dropout=0, bias=True, rnn_cell=nn.GRUCell, residual=False): super(StackedCell, self).__init__() self.dropout = nn.Dropout(dropout) self.num_layers = num_layers self.hidden_size = hidden_size self.residual = residual self.layers = nn.ModuleList() for _ in range(num_layers): self.layers.append(rnn_cell(input_size, hidden_size, bias=bias)) input_size = hidden_size
def __init__(self, question_embed_size, passage_embed_size, hidden_size, attention_layer_factory, attn_args, attn_kwags, attn_mode="pair_encoding", num_layers=1, dropout=0, bias=True, rnn_cell=nn.GRUCell, residual=False, gated=True): input_size = question_embed_size + passage_embed_size super().__init__(input_size, hidden_size, num_layers, dropout, bias, rnn_cell, residual) self.attention = attention_layer_factory(*attn_args, **attn_kwags) self.gated = gated self.attn_mode = attn_mode if gated: self.gate = nn.Sequential( nn.Linear(input_size, input_size, bias=False), nn.Sigmoid() )
def __init__(self, num_layers, input_size, rnn_size, dropout): super(StackedGRU, self).__init__() self.dropout = nn.Dropout(dropout) self.num_layers = num_layers self.layers = nn.ModuleList() for i in range(num_layers): self.layers.append(nn.GRUCell(input_size, rnn_size)) input_size = rnn_size
def __init__(self, *args, **kwargs): super(MaskedGRU, self).__init__(nn.GRUCell, *args, **kwargs)
def __init__(self, num_layers, input_size, rnn_size, dropout): super(StackedGRUCell, self).__init__() self.dropout = nn.Dropout(dropout) self.num_layers = num_layers self.layers = nn.ModuleList() for i in range(num_layers): self.layers.append(nn.GRUCell(input_size=input_size, hidden_size=rnn_size)) input_size = rnn_size
def __init__(self, *args, **kwargs): super(StackedGRU, self).__init__('GRUCell', *args, **kwargs)
def __init__(self, input_size, hidden_size, bias=True): super(NormalizedGRUCell, self).__init__(input_size, hidden_size, bias) # match GRUCell params for gates (reset, update) and input self.gamma_ih = nn.Parameter(torch.ones(3 * self.hidden_size)) self.gamma_hh = nn.Parameter(torch.ones(3 * self.hidden_size)) self.gamma_ih.custom, self.gamma_hh.custom = True, True self.eps = 0
def test_RNN_cell_no_broadcasting(self): def test(cell_module, input, hx, input_size, hidden_size): cell = cell_module(input_size, hidden_size) self.assertRaises(RuntimeError, lambda: cell(input, hx)) def test_all(hidden_size, bad_hx, good_hx, input_size, input): test(nn.RNNCell, input, bad_hx, input_size, hidden_size) test(nn.GRUCell, input, bad_hx, input_size, hidden_size) test(nn.LSTMCell, input, (bad_hx, good_hx), input_size, hidden_size) test(nn.LSTMCell, input, (good_hx, bad_hx), input_size, hidden_size) hidden_size = 20 input_size = 10 input = Variable(torch.randn(3, input_size)) bad_hx = Variable(torch.randn(1, hidden_size)) good_hx = Variable(torch.randn(3, hidden_size)) # Test hidden/input batch size broadcasting test_all(hidden_size, bad_hx, good_hx, input_size, input) # Test hx's hidden_size vs module's hidden_size broadcasting bad_hx = Variable(torch.randn(3, 1)) test_all(hidden_size, bad_hx, good_hx, input_size, input) # Test input's input_size vs module's input_size broadcasting bad_input = Variable(torch.randn(3, 1)) test_all(hidden_size, good_hx, good_hx, input_size, bad_input)
def init_weights(self): for m in self.modules(): if isinstance(m, nn.LSTMCell) or isinstance(m, nn.GRUCell): m.weight_ih.data.uniform_(-self.init_var, self.init_var) m.weight_hh.data.uniform_(-self.init_var, self.init_var) m.bias_ih.data.uniform_(-self.init_var, self.init_var) m.bias_hh.data.uniform_(-self.init_var, self.init_var) if isinstance(m, nn.Linear): # m.weight.data.normal_(0, self.init_var) m.weight.data.uniform_(-self.init_var, self.init_var) self.W1.data.uniform_(-self.init_var, self.init_var) self.W2.data.uniform_(-self.init_var, self.init_var) self.v.data.uniform_(-self.init_var, self.init_var)
def init_weights(self): for m in self.modules(): if isinstance(m, nn.LSTMCell) or isinstance(m, nn.GRUCell): m.weight_ih.data.uniform_(-self.init_var, self.init_var) m.weight_hh.data.uniform_(-self.init_var, self.init_var) m.bias_ih.data.uniform_(-self.init_var, self.init_var) m.bias_hh.data.uniform_(-self.init_var, self.init_var) if isinstance(m, nn.Linear): # m.weight.data.normal_(0, self.init_var) m.weight.data.uniform_(-self.init_var, self.init_var)
def __init__(self, cell_type="lstm", input_size=1, hidden_size=20, output_size=1, nonlinearity="tanh"): super(lstm_rnn_gru, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.nonlinearity = nonlinearity.lower() assert self.nonlinearity in ['tanh', 'relu'] self.cell_type = cell_type.lower() if self.cell_type == "lstm": self.layer1 = nn.LSTMCell(input_size=self.input_size, hidden_size=self.hidden_size) self.layer2 = nn.LSTMCell(input_size=self.hidden_size, hidden_size=self.output_size) elif self.cell_type == "rnn": self.layer1 = nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size, nonlinearity=self.nonlinearity) self.layer2 = nn.RNNCell(input_size=self.hidden_size, hidden_size=self.output_size, nonlinearity=self.nonlinearity) elif self.cell_type == "gru": self.layer1 = nn.GRUCell(input_size=self.input_size, hidden_size=self.hidden_size) self.layer2 = nn.GRUCell(input_size=self.hidden_size, hidden_size=self.output_size) else: raise ("Please enter a good cell type (LSTM/RNN/GRU)") self.layer1.weight_hh.data.normal_(0.0, 0.1) self.layer1.weight_ih.data.normal_(0.0, 0.1) self.layer2.weight_hh.data.normal_(0.0, 0.1) self.layer2.weight_ih.data.normal_(0.0, 0.1) # Should I do something about the biases here?