我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.GRU。
def test_wrapper_stateful_single_state_gru(self): gru = GRU(bidirectional=True, num_layers=2, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(gru, stateful=True) batch_sizes = [10, 5] states = [] for batch_size in batch_sizes: tensor = Variable(torch.rand([batch_size, 5, 3])) mask = Variable(torch.ones(batch_size, 5)) mask.data[0, 3:] = 0 encoder_output = encoder(tensor, mask) states.append(encoder._states) # pylint: disable=protected-access assert_almost_equal(encoder_output[0, 3:, :].data.numpy(), numpy.zeros((2, 14))) assert_almost_equal( states[-1][0][:, -5:, :].data.numpy(), states[-2][0][:, -5:, :].data.numpy() )
def __init__(self, hidden_size, output_size, r_factor=2, dropout_p=0.5): super(AttnDecoderRNN, self).__init__() self.r_factor = r_factor self.prenet = nn.Sequential( nn.Linear(output_size, 2 * hidden_size), nn.ReLU(), nn.Dropout(dropout_p), nn.Linear(2 * hidden_size, hidden_size), nn.ReLU(), nn.Dropout(dropout_p) ) self.linear_dec = nn.Linear(2 * hidden_size, 2 * hidden_size) self.gru_att = nn.GRU(hidden_size, 2 * hidden_size, batch_first=True) self.attn = nn.Linear(2 * hidden_size, 1) # TODO: change name... self.short_cut = nn.Linear(4 * hidden_size, 2 * hidden_size) self.gru_dec1 = nn.GRU(4 * hidden_size, 2 * hidden_size, num_layers=1, batch_first=True) self.gru_dec2 = nn.GRU(2 * hidden_size, 2 * hidden_size, num_layers=1, batch_first=True) self.out = nn.Linear(2 * hidden_size, r_factor * output_size)
def __init__(self, input_dim, conv_bank_dim, conv_dim1, conv_dim2, gru_dim, num_filters, is_masked): super(CBHG, self).__init__() self.num_filters = num_filters bank_out_dim = num_filters * conv_bank_dim self.conv_bank = nn.ModuleList() for i in range(num_filters): self.conv_bank.append(nn.Conv1d(input_dim, conv_bank_dim, i + 1, stride=1, padding=int(np.ceil(i / 2)))) # define batch normalization layer, we use BN1D since the sequence length is not fixed self.bn_list = nn.ModuleList() self.bn_list.append(nn.BatchNorm1d(bank_out_dim)) self.bn_list.append(nn.BatchNorm1d(conv_dim1)) self.bn_list.append(nn.BatchNorm1d(conv_dim2)) self.conv1 = nn.Conv1d(bank_out_dim, conv_dim1, 3, stride=1, padding=1) self.conv2 = nn.Conv1d(conv_dim1, conv_dim2, 3, stride=1, padding=1) if input_dim != conv_dim2: self.residual_proj = nn.Linear(input_dim, conv_dim2) self.highway = Highway(conv_dim2, 4) self.BGRU = nn.GRU(input_size=conv_dim2, hidden_size=gru_dim, num_layers=1, batch_first=True, bidirectional=True)
def __init__(self, args): super(GRU, self).__init__() self.args = args # print(args) self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) self.embed = nn.Embedding(V, D) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # gru self.gru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers) # linear self.hidden2label = nn.Linear(self.hidden_dim, C) # hidden self.hidden = self.init_hidden(self.num_layers, args.batch_size) # dropout self.dropout = nn.Dropout(args.dropout)
def __init__(self, args): super(BiGRU, self).__init__() self.args = args # print(args) self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) self.embed = nn.Embedding(V, D) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # gru self.bigru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers, bidirectional=True) # linear self.hidden2label = nn.Linear(self.hidden_dim * 2, C) # hidden self.hidden = self.init_hidden(self.num_layers, args.batch_size) # dropout self.dropout = nn.Dropout(args.dropout)
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # GRU lstm_out, self.hidden = self.gru(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def __init__(self, vocab_dict, dropout_rate, embed_dim, hidden_dim, bidirectional=True): super(AoAReader, self).__init__() self.vocab_dict = vocab_dict self.hidden_dim = hidden_dim self.embed_dim = embed_dim self.dropout_rate = dropout_rate self.embedding = nn.Embedding(vocab_dict.size(), self.embed_dim, padding_idx=Constants.PAD) self.embedding.weight.data.uniform_(-0.05, 0.05) input_size = self.embed_dim self.gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate, bidirectional=bidirectional, batch_first=True) # try independent gru #self.query_gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate, # bidirectional=bidirectional, batch_first=True) for weight in self.gru.parameters(): if len(weight.size()) > 1: weigth_init.orthogonal(weight.data)
def forward(self, inputs, hidden): def select_layer(h_state, i): # To work on both LSTM / GRU, RNN if isinstance(h_state, tuple): return tuple([select_layer(s, i) for s in h_state]) else: return h_state[i] next_hidden = [] for i, layer in enumerate(self.layers): next_hidden_i = layer(inputs, select_layer(hidden, i)) output = next_hidden_i[0] if isinstance(next_hidden_i, tuple) \ else next_hidden_i if i + 1 != self.num_layers: output = self.dropout(output) if self.residual: inputs = output + inputs else: inputs = output next_hidden.append(next_hidden_i) if isinstance(hidden, tuple): next_hidden = tuple([torch.stack(h) for h in zip(*next_hidden)]) else: next_hidden = torch.stack(next_hidden) return inputs, next_hidden
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=0, embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5): super(BiVarRecurrentConv, self).__init__(word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=tag_space, embedd_word=embedd_word, embedd_char=embedd_char, p_in=p_in, p_rnn=p_rnn) self.dropout_in = None self.dropout_rnn = nn.Dropout2d(p_rnn) if rnn_mode == 'RNN': RNN = VarMaskedRNN elif rnn_mode == 'LSTM': RNN = VarMaskedLSTM elif rnn_mode == 'GRU': RNN = VarMaskedGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=(p_in, p_rnn))
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1): super(BahdanauAttnDecoderRNN, self).__init__() # Define parameters self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p self.max_length = max_length # Define layers self.embedding = nn.Embedding(output_size, hidden_size) self.dropout = nn.Dropout(dropout_p) self.attn = Attn('concat', hidden_size) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p) self.out = nn.Linear(hidden_size, output_size)
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout=0.1): super(LuongAttnDecoderRNN, self).__init__() # Keep for reference self.attn_model = attn_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout = dropout # Define layers self.embedding = nn.Embedding(output_size, hidden_size) self.embedding_dropout = nn.Dropout(dropout) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout) self.concat = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) # Choose attention model if attn_model != 'none': self.attn = Attn(attn_model, hidden_size)
def __init__(self, vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell): super(BaseRNN, self).__init__() self.vocab_size = vocab_size self.max_len = max_len self.hidden_size = hidden_size self.n_layers = n_layers self.input_dropout_p = input_dropout_p self.input_dropout = nn.Dropout(p=input_dropout_p) if rnn_cell.lower() == 'lstm': self.rnn_cell = nn.LSTM elif rnn_cell.lower() == 'gru': self.rnn_cell = nn.GRU else: raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell)) self.dropout_p = dropout_p
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1, max_length=MAX_LENGTH): super(AttnDecoderRNN, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p self.max_length = max_length if use_cuda: self.embedding = nn.Embedding(self.output_size, self.hidden_size).cuda() self.attn = nn.Linear(self.hidden_size * 2, self.max_length).cuda() self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size).cuda() self.dropout = nn.Dropout(self.dropout_p).cuda() self.gru = nn.GRU(self.hidden_size, self.hidden_size).cuda() self.out = nn.Linear(self.hidden_size, self.output_size).cuda() else: self.embedding = nn.Embedding(self.output_size, self.hidden_size) self.attn = nn.Linear(self.hidden_size * 2, self.max_length) self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) self.dropout = nn.Dropout(self.dropout_p) self.gru = nn.GRU(self.hidden_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, self.output_size)
def __init__(self, lexicon, H = 32, layers = 1, seedDimensionality = None): super(self.__class__,self).__init__() seedDimensionality = seedDimensionality or H assert "START" in lexicon assert "END" in lexicon self.lexicon = lexicon self.model = nn.GRU(H,H, layers) self.encoder = nn.Embedding(len(lexicon), H) self.decoder = nn.Linear(H, len(lexicon)) self.layers = layers self.h0 = nn.Linear(seedDimensionality, H*layers) self.H = H
def __init__(self, freq_dim, vocab_size, config): super(Seq2Seq, self).__init__(freq_dim, config) # For decoding decoder_cfg = config["decoder"] rnn_dim = self.encoder_dim embed_dim = decoder_cfg["embedding_dim"] self.embedding = nn.Embedding(vocab_size, embed_dim) self.dec_rnn = nn.GRU(input_size=embed_dim, hidden_size=rnn_dim, num_layers=decoder_cfg["layers"], batch_first=True, dropout=config["dropout"]) self.attend = NNAttention(rnn_dim, log_t=decoder_cfg.get("log_t", False)) self.sample_prob = decoder_cfg.get("sample_prob", 0) self.scheduled_sampling = (self.sample_prob != 0) # *NB* we predict vocab_size - 1 classes since we # never need to predict the start of sequence token. self.fc = model.LinearND(rnn_dim, vocab_size - 1)
def __init__(self, start_tag_index, stop_tag_index, tag_size, embedding_dim, hidden_dim): super(EncoderCRF, self).__init__() self.hidden_dim = hidden_dim self.start_tag_index = start_tag_index self.stop_tag_index = stop_tag_index self.tag_size = tag_size self.encoder = nn.GRU(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True) self.tag_projection = nn.Linear(hidden_dim, self.tag_size) self.transitions = nn.Parameter( torch.randn(self.tag_size, self.tag_size)) self.hidden = self.init_hidden()
def __init__(self, input_size, hidden_size, num_layers, output_size=0, rnntype='RNN'): super(CharModel, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.rnntype = rnntype if rnntype == 'RNN': self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers) elif rnntype == 'LSTM': self.rnn = nn.LSTM(self.input_size, self.hidden_size, self.num_layers) elif rnntype == 'GRU': self.rnn = nn.GRU(self.input_size, self.hidden_size, self.num_layers) else: raise ValueError('Wrong RNN type, {} is not supported'. format(rnntype)) if output_size > 0: self.output = nn.Linear(hidden_size, output_size) num = hidden_size * output_size self.output.weight.data.normal_(0, math.sqrt(2. / num))
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout_p=0.1, max_length=10): super(AttnDecoderRNN, self).__init__() self.attn_model = attn_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p self.max_length = max_length self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size * 2, hidden_size, n_layers, dropout=dropout_p) self.out = nn.Linear(hidden_size * 2, output_size) if attn_model != 'none': self.attn = Attn(attn_model, hidden_size, self.max_length)
def __init__(self, attention_model, hidden_size, output_size, n_layers=1, dropout_p=.1): super(AttentionDecoderRNN, self).__init__() self.attention_model = attention_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p # Define layers self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size * 2, hidden_size, n_layers, dropout=dropout_p) self.out = nn.Linear(hidden_size * 2, output_size) # Choose attention model if attention_model is not None: self.attention = Attention(attention_model, hidden_size)
def __init__(self, embedding_dim, hidden_dim, vocab_size, max_seq_len, gpu=False, oracle_init=False): super(Generator, self).__init__() self.hidden_dim = hidden_dim self.embedding_dim = embedding_dim self.max_seq_len = max_seq_len self.vocab_size = vocab_size self.gpu = gpu self.embeddings = nn.Embedding(vocab_size, embedding_dim) self.gru = nn.GRU(embedding_dim, hidden_dim) self.gru2out = nn.Linear(hidden_dim, vocab_size) # initialise oracle network with N(0,1) # otherwise variance of initialisation is very small => high NLL for data sampled from the same model if oracle_init: for p in self.parameters(): init.normal(p, 0, 1)
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False): super(AttentionWordRNN, self).__init__() self.batch_size = batch_size self.num_tokens = num_tokens self.embed_size = embed_size self.word_gru_hidden = word_gru_hidden self.bidirectional = bidirectional self.use_lstm = use_lstm self.lookup = nn.Embedding(num_tokens, embed_size) if bidirectional == True: if use_lstm: print("inside using LSTM") self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True) else: self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True) self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden)) self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1)) self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1)) else: if use_lstm: self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False) else: self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False) self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden)) self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1)) self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1)) self.softmax_word = nn.Softmax() self.weight_W_word.data.uniform_(-init_range, init_range) self.weight_proj_word.data.uniform_(-init_range, init_range)
def __init__(self, num_characters, dim): super(RNNEncoder, self).__init__() self.embedding = nn.Embedding(num_characters, dim) # TODO self.rnn = nn.GRU()
def __init__(self, input_size, hidden_size, n_layers=1): super(EncoderRNN, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.n_layers = n_layers self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size, n_layers, batch_first=True)
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout_p=0.1): super(AttnDecoderRNN, self).__init__() # Keep parameters for reference self.attn_model = attn_model self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p # Define layers self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size * 2, hidden_size, n_layers, dropout=dropout_p, batch_first=True) self.out = nn.Linear(hidden_size * 2, output_size) # Choose attention model if attn_model != 'none': self.attn = Attn(attn_model, hidden_size)
def auto_rnn_bigru(gru: nn.GRU, seqs, lengths): batch_size = seqs.size(1) state_shape = gru.num_layers * 2, batch_size, gru.hidden_size h0 = Variable(seqs.data.new(*state_shape).zero_()) packed_pinputs, r_index = pack_for_rnn_seq(seqs, lengths) output, hn = gru(packed_pinputs, h0) output = unpack_from_rnn_seq(output, r_index) return output
def __init__(self, opt, data_agent): super().__init__() self.opt = opt self.input_emb = nn.Embedding(data_agent.wordcnt, opt['embedding_dim'], padding_idx=0) self.action_type_emb = nn.Embedding(data_agent.get_num_actions(), opt['action_type_emb_dim']) self.encoder = nn.GRU(opt['embedding_dim'], opt['rnn_h'], opt['rnn_layers'], batch_first=True, bidirectional=opt['bidir']) self.decoder = nn.Sequential( nn.Linear(opt['rnn_h'], 1), ) self.log_softmax = nn.LogSoftmax() self.trans = nn.Sequential( nn.Linear(opt['rnn_h'] * (2 if opt['bidir'] else 1), opt['embedding_dim']), nn.Tanh(), ) counter_emb = opt['counter_emb_dim'] if opt['counter_ablation']: counter_emb = 0 self.dec_gru = nn.GRU(opt['rnn_h'] * (2 if opt['bidir'] else 1) + counter_emb + (opt['embedding_dim'] if not opt['room_ablation'] else 0) + opt['action_type_emb_dim'] + opt['action_type_emb_dim'] + opt['embedding_dim'] + opt['embedding_dim'] + opt['rnn_h'] * (2 if opt['bidir'] else 1), opt['rnn_h'], opt['rnn_layers'], batch_first=True) self.merge = nn.Sequential( nn.Linear(opt['rnn_h'] * 2, opt['rnn_h']), nn.Tanh(), ) self.counter_emb = nn.Embedding(opt['counter_max'] + 1, opt['counter_emb_dim'])
def __init__(self, opt, data_agent): super().__init__() self.opt = opt self.y_dim = data_agent.y_dim self.input_emb = nn.Embedding(data_agent.wordcnt, opt['embedding_dim'], padding_idx=0) self.encoder = nn.GRU(opt['embedding_dim'], opt['rnn_h'], opt['rnn_layers'], batch_first=True) self.decoder = nn.GRU(self.y_dim, opt['rnn_h'], opt['rnn_layers'], batch_first=True) self.mapping = nn.Sequential( nn.Linear(opt['rnn_h'] * 2, self.y_dim), nn.LogSoftmax(), )
def __init__(self, input_size, hidden_size, num_layers, opt, dictionary): super().__init__() self.dict = dictionary self.h2o = nn.Linear(hidden_size, len(dictionary)) self.dropout = nn.Dropout(opt['dropout']) self.rnn = nn.GRU(input_size, hidden_size, num_layers)
def __init__(self, vocab_size, hidden_size, embedding_size, num_layers=1): super().__init__() self.embed = nn.Embedding(vocab_size, embedding_size) self.gru = nn.GRU(embedding_size, hidden_size, num_layers)
def __init__(self, vocab_size, hidden_size, embedding_size, num_layers=1): super().__init__() self.embed = nn.Embedding(vocab_size, embedding_size) self.gru = nn.GRU(embedding_size, hidden_size, num_layers) self.fc = nn.Linear(hidden_size, vocab_size) self.softmax = nn.LogSoftmax()
def init_rnn_cell(cell, gain=1): if isinstance(cell, nn.LSTM): init_lstm(cell, gain) elif isinstance(cell, nn.GRU): init_gru(cell, gain) else: cell.reset_parameters()
def get_cell_cls(rnn_cell): if rnn_cell == "lstm": cell_cls = nn.LSTM elif rnn_cell == "gru": cell_cls = nn.GRU elif rnn_cell == "sru": cell_cls = SRU else: raise ValueError("Unrecognized rnn cell: {}".format(rnn_cell)) return cell_cls
def __init__(self, vocab_size, hidden_size): super(QuestionModule, self).__init__() self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
def __init__(self, vocab_size, hidden_size): super(InputModule, self).__init__() self.hidden_size = hidden_size self.gru = nn.GRU(hidden_size, hidden_size, bidirectional=True, batch_first=True) for name, param in self.gru.state_dict().items(): if 'weight' in name: init.xavier_normal(param) self.dropout = nn.Dropout(0.1)
def __init__(self, args): super(CNN_BiGRU,self).__init__() self.args = args self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num self.C = C Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes self.embed = nn.Embedding(V, D) # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # CNN self.convs1 = [nn.Conv2d(Ci, Co, (K, D), padding=(K//2, 0), stride=1) for K in Ks] print(self.convs1) # BiGRU self.bigru = nn.GRU(D, self.hidden_dim, num_layers=self.num_layers, dropout=args.dropout, bidirectional=True, bias=True) self.hidden = self.init_hidden(self.num_layers, args.batch_size) # linear L = len(Ks) * Co + self.hidden_dim * 2 self.hidden2label1 = nn.Linear(L, L // 2) self.hidden2label2 = nn.Linear(L // 2, C) # dropout self.dropout = nn.Dropout(args.dropout)
def __init__(self, args): super(CGRU, self).__init__() self.args = args self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes self.embed = nn.Embedding(V, D) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # CNN KK = [] for K in Ks: KK.append( K + 1 if K % 2 == 0 else K) # self.convs1 = [nn.Conv2d(Ci, Co, (K, D), stride=1, padding=(K//2, 0)) for K in KK] self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0)) for K in KK] # GRU self.gru = nn.GRU(D, self.hidden_dim, num_layers=self.num_layers, dropout=args.dropout) self.hidden = self.init_hidden(self.num_layers, args.batch_size) # linear self.hidden2label1 = nn.Linear(self.hidden_dim, self.hidden_dim // 2) self.hidden2label2 = nn.Linear(self.hidden_dim // 2, C) # dropout self.dropout = nn.Dropout(args.dropout)
def __init__(self, n, k, nembed, nhid, init_range, device_id): super(RnnContextEncoder, self).__init__(device_id) self.nhid = nhid # use the same embedding for counts and values self.embeder = nn.Embedding(n, nembed) # an RNN to encode a sequence of counts and values self.encoder = nn.GRU( input_size=nembed, hidden_size=nhid, bias=True) self.embeder.weight.data.uniform_(-init_range, init_range) init_rnn(self.encoder, init_range)
def __init__(self, l_en, options): # TODO : Remove the embedding layer super(mGRU, self).__init__() self.options = options self.l_en = l_en self.n_embed = options['EMBEDDING_DIM'] self.n_dim = options['HIDDEN_DIM'] if options['HIDDEN_DIM'] % 2 == 0 else options['HIDDEN_DIM'] - 1 self.n_out = len(options['CLASSES_2_IX']) self.embedding = nn.Embedding(len(l_en) + 1, self.n_embed).type(dtype) if self.options['USE_PRETRAINED']: embedding_matrix = self.l_en.get_embedding_matrix() if embedding_matrix is not None: print 'EMBEDDING MATRIX SIZE (%d,%d)' % (embedding_matrix.shape[0], embedding_matrix.shape[1]) self.embedding.weight = nn.Parameter(torch.Tensor(embedding_matrix).type(dtype)) self.p_gru = nn.GRU(self.n_embed, self.n_dim, bidirectional=False).type(dtype) self.h_gru = nn.GRU(self.n_embed, self.n_dim, bidirectional=False).type(dtype) self.out = nn.Linear(self.n_dim, self.n_out).type(dtype) # Attention Parameters self.W_y = nn.Parameter(torch.randn(self.n_dim, self.n_dim).cuda()) if use_cuda else nn.Parameter(torch.randn(self.n_dim, self.n_dim)) # n_dim x n_dim self.register_parameter('W_y', self.W_y) self.W_h = nn.Parameter(torch.randn(self.n_dim, self.n_dim).cuda()) if use_cuda else nn.Parameter(torch.randn(self.n_dim, self.n_dim)) # n_dim x n_dim self.register_parameter('W_h', self.W_h) self.W_alpha = nn.Parameter(torch.randn(self.n_dim, 1).cuda()) if use_cuda else nn.Parameter(torch.randn(self.n_dim, 1)) # n_dim x 1 self.register_parameter('W_alpha', self.W_alpha) self.W_r = nn.Parameter(torch.randn(self.n_dim, self.n_dim).cuda()) if use_cuda else nn.Parameter(torch.randn(self.n_dim, self.n_dim)) # n_dim x n_dim self.register_parameter('W_r', self.W_r) # Match GRU parameters. self.m_gru = nn.GRU(self.n_dim + self.n_dim, self.n_dim, bidirectional=False).type(dtype)
def _gru_forward(self, gru, encoded_s, mask_s, h_0): ''' inputs : gru : The GRU unit for which the forward pass is to be computed encoded_s : T x batch x n_embed mask_s : T x batch h_0 : 1 x batch x n_dim outputs : o_s : T x batch x n_dim h_n : 1 x batch x n_dim ''' seq_len = encoded_s.size(0) batch_size = encoded_s.size(1) o_s = Variable(torch.zeros(seq_len, batch_size, self.n_dim).type(dtype)) h_tm1 = h_0.squeeze(0) # batch x n_dim o_tm1 = None for ix, (x_t, mask_t) in enumerate(zip(encoded_s, mask_s)): ''' x_t : batch x n_embed mask_t : batch, ''' o_t, h_t = gru(x_t.unsqueeze(0), h_tm1.unsqueeze(0)) # o_t : 1 x batch x n_dim # h_t : 1 x batch x n_dim mask_t = mask_t.unsqueeze(1) # batch x 1 h_t = self.mask_mult(h_t[0], h_tm1, mask_t) if o_tm1 is not None: o_t = self.mask_mult(o_t[0], o_tm1, mask_t) o_tm1 = o_t[0] if o_tm1 is None else o_t h_tm1 = h_t o_s[ix] = o_t return o_s, h_t.unsqueeze(0)
def _gru_forward(self, gru, encoded_s, mask_s, h_0): ''' inputs : gru : The GRU unit for which the forward pass is to be computed encoded_s : T x batch x n_embed mask_s : T x batch h_0 : 1 x batch x n_dim outputs : o_s : T x batch x n_dim h_n : 1 x batch x n_dim ''' seq_len = encoded_s.size(0) batch_size = encoded_s.size(1) o_s = Variable(torch.zeros(seq_len, batch_size, self.n_dim).type(dtype)) h_tm1 = h_0.squeeze(0) # batch x n_dim o_tm1 = None for ix, (x_t, mask_t) in enumerate(zip(encoded_s, mask_s)): ''' x_t : batch x n_embed mask_t : batch, ''' o_t, h_t = gru(x_t.unsqueeze(0), h_tm1.unsqueeze(0)) # o_t : 1 x batch x n_dim # h_t : 1 x batch x n_dim mask_t = mask_t.unsqueeze(1) # batch x 1 h_t = self.mask_mult(h_t[0], h_tm1, mask_t) # batch x n_dim if o_tm1 is not None: o_t = self.mask_mult(o_t[0], o_tm1, mask_t) o_tm1 = o_t[0] if o_tm1 is None else o_t h_tm1 = h_t o_s[ix] = o_t return o_s, h_t.unsqueeze(0)
def __init__(self, num_proposals=16, num_rnn_layers=1, h_width=512, input_size=500, dropout=0): super(SST_AD_Prop, self).__init__() self.rnn = nn.GRU(input_size=input_size, hidden_size=h_width, num_layers=num_rnn_layers, dropout=dropout) self.lin_out = nn.Linear(h_width, num_proposals) self.nonlin_final = nn.Sigmoid() self.num_rnn_layers = num_rnn_layers self.h_width = h_width
def __init__(self, num_classes=201, num_rnn_layers=1, h_width=512, input_size=500, dropout=0, init_range=None,**kwargs): super(SST_AD_SegAct, self).__init__() self.rnn = nn.GRU(input_size=input_size, hidden_size=h_width, num_layers=num_rnn_layers, dropout=dropout) #, batch_first=True) self.lin_out = nn.Linear(h_width, num_classes) self.nonlin_eval = torch.nn.Softmax() self.num_rnn_layers = num_rnn_layers self.h_width = h_width self.init_weights(init_range)
def test_rnn_initial_hidden_state(self): rnn_modes = ['RNN', 'GRU', 'LSTM'] for mode in rnn_modes: rnn = getattr(nn, mode)(30, 20, 2) input = Variable(torch.randn(10, 32, 30)) hidden = Variable(torch.Tensor(2, 32, 20).zero_()) if mode is 'LSTM': hidden = (hidden, hidden) output1, hidden1 = rnn(input, hidden) output2, hidden2 = rnn(input) self.assertEqual(output1, output2) self.assertEqual(hidden1, hidden2)
def prepare(self, p): self.embed_word = nn.Embedding(p.num_input_tokens, p.embedding_size) self.gru = nn.GRU(p.embedding_size, p.rnn_size, bidirectional=True) # Decoder modules self.embed_action = nn.Embedding(p.num_labels, p.action_embedding_size) self.combine_arh = nn.Linear(p.action_embedding_size + p.rnn_size * 2 + p.hidden_size, p.hidden_size) self.policy = nn.Linear(p.hidden_size, p.num_labels) self.loss_fn = torch.nn.MSELoss(size_average=False) if p.cuda: for module in [self.embed_word, self.gru, self.embed_action, self.combine_arh, self.policy, self.loss_fn]: module.cuda() self.LongTensor = torch.cuda.LongTensor cast = lambda t: t.cuda() else: self.LongTensor = torch.LongTensor cast = lambda t: t self.cast = cast self.initial_h = Variable(cast(torch.zeros(1, p.hidden_size)), requires_grad=True) self.initial_actemb = Variable(cast(torch.zeros(1, p.action_embedding_size)), requires_grad=True) # Sample input tokens and labels for each sentence tokens = [cast(torch.LongTensor(l).random_(p.num_input_tokens)) for l in self.sequence_lengths] # NOTE: we don't cast labels to CUDA, because they're only used for indexing labels = [torch.LongTensor(l).random_(p.num_labels) for l in self.sequence_lengths] # Batch sentences in groups of minibatch_size self.batches = [(tokens[n:n + p.minibatch_size], labels[n:n + p.minibatch_size]) for n in range(0, len(tokens), p.minibatch_size)]
def __init__(self, embed_ndim): super(VisualSemanticEmbedding, self).__init__() self.embed_ndim = embed_ndim # image feature self.img_encoder = models.vgg16(pretrained=True) for param in self.img_encoder.parameters(): param.requires_grad = False self.feat_extractor = nn.Sequential(*(self.img_encoder.classifier[i] for i in range(6))) self.W = nn.Linear(4096, embed_ndim, False) # text feature self.txt_encoder = nn.GRU(embed_ndim, embed_ndim, 1)
def _test_rnn_retain_variables(self, dtype): rnns = [nn.LSTM(10, 20, num_layers=2).type(dtype), nn.GRU(10, 20, num_layers=2).type(dtype), nn.RNN(10, 20, num_layers=2).type(dtype)] for rnn in rnns: input = Variable(torch.randn(5, 6, 10).type(dtype), requires_grad=True) output = rnn(input) output[0].sum().backward(retain_graph=True) grads = [input.grad.data.clone()] + [p.grad.data.clone() for p in rnn.parameters()] for i in range(4): rnn.zero_grad() input.grad.data.zero_() output[0].sum().backward(retain_graph=True) grads2 = [input.grad.data] + [p.grad.data for p in rnn.parameters()] self.assertEqual(grads, grads2)