我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.LSTM。
def __init__(self, input_size, feature_size = 128, hidden_size = 256, num_layers = 1, dropout = 0.9): super(SeqEncoder, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers # set up modules for recurrent neural networks self.rnn = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True, dropout = dropout, bidirectional = True) self.rnn.apply(weights_init) # set up modules to compute features self.feature = nn.Linear(hidden_size * 2, feature_size) self.feature.apply(weights_init)
def __init__(self, input_size, num_classes, hidden_size = 256, num_layers = 1, dropout = 0.9): super(SeqLabeler, self).__init__() self.num_classes = num_classes self.hidden_size = hidden_size self.num_layers = num_layers # set up modules for recurrent neural networks self.rnn = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True, dropout = dropout, bidirectional = True) self.rnn.apply(weights_init) # set up modules to compute classification self.classifier = nn.Linear(hidden_size * 2, num_classes) self.classifier.apply(weights_init)
def __init__(self, n_layers=2, h_size=512): super(ResLSTM, self).__init__() print('Building AlexNet + LSTM model...') self.h_size = h_size self.n_layers = n_layers resnet = models.resnet50(pretrained=True) self.conv = nn.Sequential(*list(resnet.children())[:-1]) self.lstm = nn.LSTM(1280, h_size, dropout=0.2, num_layers=n_layers) self.fc = nn.Sequential( nn.Linear(h_size, 64), nn.ReLU(), nn.Dropout(0.2), nn.Linear(64, 1) )
def __init__(self, h_size=512, n_layers=3): super(DenseLSTM, self).__init__() print('Building DenseNet + LSTM model...') self.h_size = h_size self.n_layers = n_layers densenet = models.densenet201(pretrained=True) self.conv = nn.Sequential(*list(densenet.children())[:-1]) self.lstm = nn.LSTM(23040, h_size, dropout=0.2, num_layers=n_layers) self.fc = nn.Sequential( nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.2), nn.Linear(256, 1) )
def __init__(self, n_layers=2, h_size=420): super(AlexLSTM, self).__init__() print('Building AlexNet + LSTM model...') self.h_size = h_size self.n_layers = n_layers alexnet = models.alexnet(pretrained=True) self.conv = nn.Sequential(*list(alexnet.children())[:-1]) self.lstm = nn.LSTM(1280, h_size, dropout=0.2, num_layers=n_layers) self.fc = nn.Sequential( nn.Linear(h_size, 64), nn.ReLU(), nn.Dropout(0.2), nn.Linear(64, 1) )
def setUp(self): super(TestEncoderBase, self).setUp() self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.encoder_base = _EncoderBase(stateful=True) tensor = Variable(torch.rand([5, 7, 3])) tensor[1, 6:, :] = 0 tensor[3, 2:, :] = 0 self.tensor = tensor mask = Variable(torch.ones(5, 7)) mask[1, 6:] = 0 mask[2, :] = 0 # <= completely masked mask[3, 2:] = 0 mask[4, :] = 0 # <= completely masked self.mask = mask self.batch_size = 5 self.num_valid = 3 sequence_lengths = get_lengths_from_binary_sequence_mask(mask) _, _, restoration_indices, sorting_indices = sort_batch_by_length(tensor, sequence_lengths) self.sorting_indices = sorting_indices self.restoration_indices = restoration_indices
def test_forward_works_even_with_empty_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=11, batch_first=True) encoder = PytorchSeq2VecWrapper(lstm) tensor = torch.autograd.Variable(torch.rand([5, 7, 3])) tensor[1, 6:, :] = 0 tensor[2, :, :] = 0 tensor[3, 2:, :] = 0 tensor[4, :, :] = 0 mask = torch.autograd.Variable(torch.ones(5, 7)) mask[1, 6:] = 0 mask[2, :] = 0 mask[3, 2:] = 0 mask[4, :] = 0 results = encoder(tensor, mask) for i in (0, 1, 3): assert not (results[i] == 0.).data.all() for i in (2, 4): assert (results[i] == 0.).data.all()
def test_forward_works_even_with_empty_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.autograd.Variable(torch.rand([5, 7, 3])) tensor[1, 6:, :] = 0 tensor[2, :, :] = 0 tensor[3, 2:, :] = 0 tensor[4, :, :] = 0 mask = torch.autograd.Variable(torch.ones(5, 7)) mask[1, 6:] = 0 mask[2, :] = 0 mask[3, 2:] = 0 mask[4, :] = 0 results = encoder(tensor, mask) for i in (0, 1, 3): assert not (results[i] == 0.).data.all() for i in (2, 4): assert (results[i] == 0.).data.all()
def test_forward_pulls_out_correct_tensor_with_sequence_lengths(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.rand([5, 7, 3]) tensor[1, 6:, :] = 0 tensor[2, 4:, :] = 0 tensor[3, 2:, :] = 0 tensor[4, 1:, :] = 0 mask = torch.ones(5, 7) mask[1, 6:] = 0 mask[2, 4:] = 0 mask[3, 2:] = 0 mask[4, 1:] = 0 input_tensor = Variable(tensor) mask = Variable(mask) sequence_lengths = get_lengths_from_binary_sequence_mask(mask) packed_sequence = pack_padded_sequence(input_tensor, sequence_lengths.data.tolist(), batch_first=True) lstm_output, _ = lstm(packed_sequence) encoder_output = encoder(input_tensor, mask) lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True) assert_almost_equal(encoder_output.data.numpy(), lstm_tensor.data.numpy())
def test_wrapper_works_when_passed_state_with_zero_length_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.rand([5, 7, 3]) mask = torch.ones(5, 7) mask[0, 3:] = 0 mask[1, 4:] = 0 mask[2, 0:] = 0 mask[3, 6:] = 0 # Initial states are of shape (num_layers * num_directions, batch_size, hidden_dim) initial_states = (Variable(torch.randn(6, 5, 7)), Variable(torch.randn(6, 5, 7))) input_tensor = Variable(tensor) mask = Variable(mask) _ = encoder(input_tensor, mask, initial_states)
def test_wrapper_stateful(self): lstm = LSTM(bidirectional=True, num_layers=2, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm, stateful=True) # To test the stateful functionality we need to call the encoder multiple times. # Different batch sizes further tests some of the logic. batch_sizes = [5, 10, 8] sequence_lengths = [4, 6, 7] states = [] for batch_size, sequence_length in zip(batch_sizes, sequence_lengths): tensor = Variable(torch.rand([batch_size, sequence_length, 3])) mask = Variable(torch.ones(batch_size, sequence_length)) mask.data[0, 3:] = 0 encoder_output = encoder(tensor, mask) states.append(encoder._states) # pylint: disable=protected-access # Check that the output is masked properly. assert_almost_equal(encoder_output[0, 3:, :].data.numpy(), numpy.zeros((4, 14))) for k in range(2): assert_almost_equal( states[-1][k][:, -2:, :].data.numpy(), states[-2][k][:, -2:, :].data.numpy() )
def __init__(self, input_size, hidden_size, num_layers, dropout_rate=0, dropout_output=False, rnn_type=nn.LSTM, concat_layers=False, padding=False): super(StackedBRNN, self).__init__() self.padding = padding self.dropout_output = dropout_output self.dropout_rate = dropout_rate self.num_layers = num_layers self.concat_layers = concat_layers self.rnns = nn.ModuleList() for i in range(num_layers): input_size = input_size if i == 0 else 2 * hidden_size #self.rnns.append(rnn_type(input_size, hidden_size, # num_layers=1, # bidirectional=True)) self.rnns.append(MF.SRUCell(input_size, hidden_size, dropout=dropout_rate, rnn_dropout=dropout_rate, use_tanh=1, bidirectional=True))
def __init__(self, args, mapping): super(CharLM, self).__init__() self.batch_size = args.batch_size self.seq_length = args.seq_length self.vocab_size = args.vocab_size self.embedding_dim = args.embedding_dim self.layer_num = args.layer_num self.dropout_prob = args.dropout_prob self.lr = args.lr self.char_embedding = nn.Embedding(self.vocab_size, self.embedding_dim) self.dropout = nn.Dropout(self.dropout_prob) self.lstm = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.embedding_dim, num_layers= self.layer_num, dropout = self.dropout_prob) self.fc = nn.Linear(self.embedding_dim, self.vocab_size) self.optimizer = optim.Adam(self.parameters(), lr=self.lr) self.mapping = mapping
def __init__(self, args, attr_size, node_size): super(TreeLM, self).__init__() self.batch_size = args.batch_size self.seq_length = args.seq_length self.attr_size = attr_size self.node_size = node_size self.embedding_dim = args.embedding_dim self.layer_num = args.layer_num self.dropout_prob = args.dropout_prob self.lr = args.lr self.attr_embedding = nn.Embedding(self.attr_size, self.embedding_dim) self.dropout = nn.Dropout(self.dropout_prob) self.lstm = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.embedding_dim, num_layers= self.layer_num, dropout = self.dropout_prob) self.fc = nn.Linear(self.embedding_dim, self.node_size) self.optimizer = optim.Adam(self.parameters(), lr=self.lr) # self.node_mapping = node_mapping
def __init__(self, args): super(BiLSTM_1, self).__init__() self.args = args self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) self.embed = nn.Embedding(V, D) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) self.bilstm = nn.LSTM(D, self.hidden_dim, num_layers=self.num_layers, dropout=args.dropout, bidirectional=True, bias=True) print(self.bilstm) self.hidden2label = nn.Linear(self.hidden_dim * 2 * 2, C, bias=True) self.hidden = self.init_hidden(self.num_layers, args.batch_size) self.dropout = nn.Dropout(args.dropout)
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # LSTM lstm_out, self.hidden = self.lstm(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def __init__(self, args): super(BiLSTM, self).__init__() self.args = args self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num # self.embed = nn.Embedding(V, D, max_norm=args.max_norm) self.embed = nn.Embedding(V, D) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) self.bilstm = nn.LSTM(D, self.hidden_dim // 2, num_layers=1, dropout=args.dropout, bidirectional=True, bias=False) print(self.bilstm) self.hidden2label1 = nn.Linear(self.hidden_dim, self.hidden_dim // 2) self.hidden2label2 = nn.Linear(self.hidden_dim // 2, C) self.hidden = self.init_hidden(self.num_layers, args.batch_size) # self.dropout = nn.Dropout(args.dropout)
def __init__(self, embeddings, num_layers): """ embeddings should be a torch tensor, of dimension max_idx - 1 x num_hidden we'll derive num_hidden from the second dimension of embeddings """ super().__init__() self.num_hidden = embeddings.shape[1] self.num_layers = num_layers self.embedding = nn.Embedding( embeddings.shape[0], self.num_hidden ) self.embedding.weight.data = embeddings self.embedding.weight.requires_grad = False self.lstm = nn.LSTM( input_size=self.num_hidden, hidden_size=self.num_hidden, num_layers=num_layers) self.initial_state = None self.initial_cell = None self.linear = nn.Linear(self.num_hidden, 1) # modeling as a regression
def __init__(self, embeddings, num_layers, pad_id): super().__init__() self.num_hidden = embeddings.shape[1] self.num_layers = num_layers self.embedding = nn.Embedding( embeddings.shape[0], self.num_hidden ) self.embedding.weight.data = embeddings self.embedding.weight.requires_grad = False self.lstm = nn.LSTM( input_size=self.num_hidden, hidden_size=self.num_hidden, num_layers=num_layers, bidirectional=True) self.initial_state = None self.initial_cell = None # self.rationales = None self.linear = nn.Linear(self.num_hidden * 2, 1) self.pad_id = pad_id
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim): super(BiLSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) self.word_embeds = nn.Embedding(vocab_size, embedding_dim, padding_idx = 0) self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size)) self.hidden = self.init_hidden()
def __init__(self, encoder_vocab_size=100, decoder_vocab_size=100, wordvec_dim=300, hidden_dim=256, rnn_num_layers=2, rnn_dropout=0, null_token=0, start_token=1, end_token=2, encoder_embed=None ): super(Seq2Seq, self).__init__() self.encoder_embed = nn.Embedding(encoder_vocab_size, wordvec_dim) self.encoder_rnn = nn.LSTM(wordvec_dim, hidden_dim, rnn_num_layers, dropout=rnn_dropout, batch_first=True) self.decoder_embed = nn.Embedding(decoder_vocab_size, wordvec_dim) self.decoder_rnn = nn.LSTM(wordvec_dim + hidden_dim, hidden_dim, rnn_num_layers, dropout=rnn_dropout, batch_first=True) self.decoder_linear = nn.Linear(hidden_dim, decoder_vocab_size) self.NULL = null_token self.START = start_token self.END = end_token self.multinomial_outputs = None
def __init__(self, opt): self.name = "TextLstm" super(TextLSTM, self).__init__() self.opt = opt self.embedding = nn.Embedding(opt.vocab_size, opt.embed_dim) self.lstm = nn.LSTM(input_size=opt.embed_dim, hidden_size=opt.hidden_size, num_layers=1, batch_first=True, bidirectional=False) self.linears = nn.Sequential( nn.Linear(opt.hidden_size, opt.linear_hidden_size), nn.ReLU(), nn.Dropout(0.25), nn.Linear(opt.linear_hidden_size, opt.num_classes), # nn.Softmax() ) if opt.embedding_path: self.embedding.weight.data.copy_(torch.from_numpy(np.load(opt.embedding_path))) # # self.embedding.weight.requires_grad = False
def __init__(self,opt): super(RNN, self).__init__() if opt.type_=='word':pass self.lstm = nn.LSTM(input_size = opt.embedding_dim,\ hidden_size = opt.hidden_size, num_layers = opt.num_layers, bias = True, batch_first = False, # dropout = 0.5, bidirectional = True ) self.fc = nn.Sequential( nn.Linear((opt.hidden_size*2*2),opt.linear_hidden_size), nn.BatchNorm1d(opt.linear_hidden_size), nn.ReLU(inplace=True), nn.Linear(opt.linear_hidden_size,opt.num_classes) )
def __init__(self): super(POSTag, self).__init__() self.w = nn.Parameter(torch.randn(postag_nb_layers * 2, max_sentence_size, postag_hn_size)) self.h = nn.Parameter(torch.randn(postag_nb_layers * 2, max_sentence_size, postag_hn_size)) # Bidirectional LSTM self.bi_lstm = nn.LSTM(embedding_size, postag_hn_size, postag_nb_layers, bidirectional=True) self.fc = nn.Linear(postag_hn_size * 2, nb_postags)
def __init__(self): super(Chunking, self).__init__() self.input_size = embedding_size \ + nb_postags \ + postag_hn_size * 2 self.w = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.h = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.embedding = nn.Embedding(nb_postags, chunking_postag_emb_size) self.aux_emb = torch.arange(0, nb_postags) self.aux_emb = Variable(self.aux_emb).long() self.bi_lstm = nn.LSTM(self.input_size, chunking_hn_size, chunking_nb_layers, bidirectional=True) self.fc = nn.Linear(chunking_hn_size * 2, nb_chunktags)
def __init__(self): super(Dependency, self).__init__() self.input_size = embedding_size \ + nb_postags \ + nb_chunktags \ + postag_hn_size * 2 \ + chunking_hn_size * 2 self.w = nn.Parameter(torch.randn(dependency_nb_layers * 2, max_sentence_size, dependency_hn_size)) self.h = nn.Parameter(torch.randn(dependency_nb_layers * 2, max_sentence_size, dependency_hn_size)) self.bi_lstm = nn.LSTM(self.input_size, dependency_hn_size, dependency_nb_layers, bidirectional=True) self.wd = nn.Parameter(torch.randn(dependency_hn_size * 2)) self.fc = nn.Linear(dependency_hn_size * 2, 1)
def __init__(self): super(SentimentClassification, self).__init__() self.input_size = embedding_size \ + nb_postags \ + nb_chunktags \ + max_sentence_size \ + postag_hn_size * 2 \ + chunking_hn_size * 2 \ + dependency_hn_size * 2 self.w = nn.Parameter(torch.randn(sentiment_nb_layers * 2, max_sentence_size, sentiment_hn_size)) self.h = nn.Parameter(torch.randn(sentiment_nb_layers * 2, max_sentence_size, sentiment_hn_size)) self.bi_lstm = nn.LSTM(self.input_size, sentiment_hn_size, sentiment_nb_layers, bidirectional=True) self.fc = nn.Linear(sentiment_hn_size * 2, 1)
def __init__(self, num_items, embedding_dim=32, item_embedding_layer=None, sparse=False): super(LSTMNet, self).__init__() self.embedding_dim = embedding_dim if item_embedding_layer is not None: self.item_embeddings = item_embedding_layer else: self.item_embeddings = ScaledEmbedding(num_items, embedding_dim, padding_idx=PADDING_IDX, sparse=sparse) self.item_biases = ZeroEmbedding(num_items, 1, sparse=sparse, padding_idx=PADDING_IDX) self.lstm = nn.LSTM(batch_first=True, input_size=embedding_dim, hidden_size=embedding_dim)
def forward(self, inputs, hidden): def select_layer(h_state, i): # To work on both LSTM / GRU, RNN if isinstance(h_state, tuple): return tuple([select_layer(s, i) for s in h_state]) else: return h_state[i] next_hidden = [] for i, layer in enumerate(self.layers): next_hidden_i = layer(inputs, select_layer(hidden, i)) output = next_hidden_i[0] if isinstance(next_hidden_i, tuple) \ else next_hidden_i if i + 1 != self.num_layers: output = self.dropout(output) if self.residual: inputs = output + inputs else: inputs = output next_hidden.append(next_hidden_i) if isinstance(hidden, tuple): next_hidden = tuple([torch.stack(h) for h in zip(*next_hidden)]) else: next_hidden = torch.stack(next_hidden) return inputs, next_hidden
def __init__(self): super(SuccessorPredictor, self).__init__() def identity(v): return lambda x: x bn2d = nn.InstanceNorm2d bn1d = identity self.input_size = 9 self.hidden_size = 512 self.nb_layers = 1 self.hidden_fc1 = nn.Linear(512, self.nb_layers*2*self.hidden_size) self.hidden_fc1_bn = bn1d(self.nb_layers*2*self.hidden_size) self.rnn = nn.LSTM(self.input_size, self.hidden_size, self.nb_layers, dropout=0.1, batch_first=False) self.fc1 = nn.Linear(self.hidden_size, 512) init_weights(self)
def __init__(self, input_size, hidden_size, vocab_size, wordEmbed): super(PointerAttentionDecoder, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.vocab_size = vocab_size self.word_embed = wordEmbed #self.decoderRNN = LSTMCell(self.input_size, self.hidden_size) self.decoderRNN = LSTM(self.input_size, self.hidden_size, batch_first=True) #params for attention self.Wh = Linear(2 * self.hidden_size, 2*self. hidden_size) self.Ws = Linear(self.hidden_size, 2*self.hidden_size) self.w_c = Linear(1, 2*self.hidden_size) self.v = Linear(2*self.hidden_size, 1) # parameters for p_gen self.w_h = Linear(2 * self.hidden_size, 1) # double due to concat of BiDi encoder states self.w_s = Linear(self.hidden_size, 1) self.w_x = Linear(self.input_size, 1) #params for output proj self.V = Linear(self.hidden_size * 3, self.vocab_size) self.min_length = 40
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=0, embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5): super(BiVarRecurrentConv, self).__init__(word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=tag_space, embedd_word=embedd_word, embedd_char=embedd_char, p_in=p_in, p_rnn=p_rnn) self.dropout_in = None self.dropout_rnn = nn.Dropout2d(p_rnn) if rnn_mode == 'RNN': RNN = VarMaskedRNN elif rnn_mode == 'LSTM': RNN = VarMaskedLSTM elif rnn_mode == 'GRU': RNN = VarMaskedGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=(p_in, p_rnn))
def __init__(self, vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell): super(BaseRNN, self).__init__() self.vocab_size = vocab_size self.max_len = max_len self.hidden_size = hidden_size self.n_layers = n_layers self.input_dropout_p = input_dropout_p self.input_dropout = nn.Dropout(p=input_dropout_p) if rnn_cell.lower() == 'lstm': self.rnn_cell = nn.LSTM elif rnn_cell.lower() == 'gru': self.rnn_cell = nn.GRU else: raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell)) self.dropout_p = dropout_p
def __init__(self): super(CaptionDecoder, self).__init__() IMAGEFEATURESIZE = 2560 EMBEDDINGSIZE = 64 INPUTSIZE = IMAGEFEATURESIZE + EMBEDDINGSIZE HIDDEN = 1024 LAYERS = 2 # self.embedding : list of N indices (BxW) -> (B,W,EMBEDDINGSIZE) self.embedding = nn.Embedding(len(LEXICON),EMBEDDINGSIZE) # The embedding is combined with the image features at each time step self.rnn = nn.LSTM(INPUTSIZE, HIDDEN, LAYERS, batch_first = True) self.tokenPrediction = nn.Linear(HIDDEN,len(LEXICON))
def __init__(self, input_size, recurrent_layer_size, number_of_layers=1, nonlinearity='tahn', dropout=0, bidirectional=False): super(self, Encoder).__init__() self.encoder = nn.LSTM(input_size, recurrent_layer_size, num_layers=number_of_layers, nonlinearity=nonlinearity, dropout=dropout, bidirectional=bidirectional)
def __init__(self, input_size, recurrent_layer_size, output_size, number_of_layers=1, nonlinearity='tahn', dropout=0, bidirectional=False): super(self, Decoder).__init__() self.decoder = nn.LSTM(input_size, recurrent_layer_size, num_layers=number_of_layers, nonlinearity=nonlinearity, dropout=dropout, bidirectional=bidirectional) self.linear = nn.Linear(recurrent_layer_size, output_size)
def __init__(self, input_size, hidden_size, num_layers, output_size=0, rnntype='RNN'): super(CharModel, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.rnntype = rnntype if rnntype == 'RNN': self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers) elif rnntype == 'LSTM': self.rnn = nn.LSTM(self.input_size, self.hidden_size, self.num_layers) elif rnntype == 'GRU': self.rnn = nn.GRU(self.input_size, self.hidden_size, self.num_layers) else: raise ValueError('Wrong RNN type, {} is not supported'. format(rnntype)) if output_size > 0: self.output = nn.Linear(hidden_size, output_size) num = hidden_size * output_size self.output.weight.data.normal_(0, math.sqrt(2. / num))
def __init__(self, preTrained='True'): super(MultiModel, self).__init__() # Output self.output = nn.Sequential(nn.Linear(256, 256), nn.ReLU(inplace=True), nn.Linear(256, 256), nn.ReLU(inplace=True), nn.Linear(256, 2)) self._initialize_weights() # Vision Model self.vision = resnet34(pretrained=preTrained, num_classes=128) # LSTM Model(temporal) self.rnn = nn.LSTM(128, 128, 2, batch_first=True) # Language Model self.lang = nn.LSTM(100, 128, 3, batch_first=True)
def __init__(self, char_embeddings, hidden_size, bidirectional=True, train_char_embeddings=False, cuda=True): super(LSTMCharEncoder, self).__init__() self.char_embeddings = char_embeddings self.char_embedding_dim = self.char_embeddings.embedding_dim self.train_char_embeddings = train_char_embeddings self.n_layers = 1 self.num_dirs = 2 if bidirectional else 1 self.char_hidden_dim = hidden_size self.use_cuda = cuda self.char_lstm = nn.LSTM(self.char_embedding_dim, self.char_hidden_dim, self.n_layers, bidirectional=False, dropout=0.0)
def __init__(self, embedding_dim, n_layers, hidden_size, bidirectional, dropout, separate_lstms=False, cuda=True): super(ContextRepresentationLayer, self).__init__() self.embedding_dim = embedding_dim self.n_layers = n_layers self.hidden_size = hidden_size self.bidirectional = bidirectional self.num_dirs = 1 if not self.bidirectional else 2 self.dropout = dropout self.separate_lstms = separate_lstms self.use_cuda = cuda self.lstm = nn.LSTM(self.embedding_dim, self.hidden_size, self.n_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.separate_lstms: self.lstm_hypo = nn.LSTM(self.embedding_dim, self.hidden_size, self.n_layers, bidirectional=self.bidirectional, dropout=self.dropout)
def __init__(self, parameter): super(BiLSTM_CRF, self).__init__() self.embedding_dim = parameter['embedding_dim'] self.hidden_dim = parameter['hidden_dim'] self.vocab_size = parameter['vocab_size'] self.tagset_size = parameter['tagset_size'] self.decode_method = parameter['decode_method'] self.loss_function = parameter['loss_function'] self.freeze = parameter['freeze'] self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim) self.dropout = nn.Dropout(p=DROP_OUT) self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim/2, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. # We add 2 here, because of START_TAG and STOP_TAG self.hidden2tag = nn.Linear(self.hidden_dim, self.tagset_size+2) self.CRF = CRF(self.tagset_size)
def __init__(self, parameter): super(LSTMTagger, self).__init__() self.hidden_dim = parameter['hidden_dim'] self.word_embeddings = nn.Embedding(parameter['vocab_size'], parameter['embedding_dim']) self.embedding_dim = parameter['embedding_dim'] # The LSTM takes word embeddings and captical embedding as inputs, and outputs hidden states # with dimensionality hidden_dim. self.lstm = nn.LSTM(self.embedding_dim, parameter['hidden_dim']) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(parameter['hidden_dim'], parameter['tagset_size']) self.hidden = self.init_hidden() self.loss_function = nn.NLLLoss()
def __init__(self, config): super(BiLSTM, self).__init__() self.drop = nn.Dropout(config['dropout']) self.encoder = nn.Embedding(config['ntoken'], config['ninp']) self.bilstm = nn.LSTM(config['ninp'], config['nhid'], config['nlayers'], dropout=config['dropout'], bidirectional=True) self.nlayers = config['nlayers'] self.nhid = config['nhid'] self.pooling = config['pooling'] self.dictionary = config['dictionary'] # self.init_weights() self.encoder.weight.data[self.dictionary.word2idx['<pad>']] = 0 if os.path.exists(config['word-vector']): print('Loading word vectors from', config['word-vector']) vectors = torch.load(config['word-vector']) assert vectors[2] >= config['ninp'] vocab = vectors[0] vectors = vectors[1] loaded_cnt = 0 for word in self.dictionary.word2idx: if word not in vocab: continue real_id = self.dictionary.word2idx[word] loaded_id = vocab[word] self.encoder.weight.data[real_id] = vectors[loaded_id][:config['ninp']] loaded_cnt += 1 print('%d words from external word vectors loaded.' % loaded_cnt) # note: init_range constraints the value of initial weights
def __init__(self, nIn, nHidden, nOut): super(BidirectionalLSTM, self).__init__() self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) self.embedding = nn.Linear(nHidden * 2, nOut)
def __init__(self, batch_size, num_tokens, embed_size, word_gru_hidden, bidirectional= True, init_range=0.1, use_lstm=False): super(AttentionWordRNN, self).__init__() self.batch_size = batch_size self.num_tokens = num_tokens self.embed_size = embed_size self.word_gru_hidden = word_gru_hidden self.bidirectional = bidirectional self.use_lstm = use_lstm self.lookup = nn.Embedding(num_tokens, embed_size) if bidirectional == True: if use_lstm: print("inside using LSTM") self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= True) else: self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= True) self.weight_W_word = nn.Parameter(torch.Tensor(2* word_gru_hidden, 2*word_gru_hidden)) self.bias_word = nn.Parameter(torch.Tensor(2* word_gru_hidden,1)) self.weight_proj_word = nn.Parameter(torch.Tensor(2*word_gru_hidden, 1)) else: if use_lstm: self.word_gru = nn.LSTM(embed_size, word_gru_hidden, bidirectional= False) else: self.word_gru = nn.GRU(embed_size, word_gru_hidden, bidirectional= False) self.weight_W_word = nn.Parameter(torch.Tensor(word_gru_hidden, word_gru_hidden)) self.bias_word = nn.Parameter(torch.Tensor(word_gru_hidden,1)) self.weight_proj_word = nn.Parameter(torch.Tensor(word_gru_hidden, 1)) self.softmax_word = nn.Softmax() self.weight_W_word.data.uniform_(-init_range, init_range) self.weight_proj_word.data.uniform_(-init_range, init_range)
def __init__(self, on_gpu = False): # Current architecture for policy is 3 5x5 convolutions # followed by 2 LSTM layers followed by 2 5x5 convolutions # and a final 1x1 convolution # This architecture if fully convolutional with no max pooling super(CNNLSTMPolicy, self).__init__() self.lstm_layer = 3 self.hidden_dim = 100 self.on_gpu = on_gpu self.conv1 = nn.Conv2d(11, self.hidden_dim, 5, padding=2) self.conv2 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.conv3 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.pre_lstm_bn = nn.BatchNorm2d(self.hidden_dim) self.lstm = nn.LSTM(self.hidden_dim, self.hidden_dim, self.lstm_layer) self.lstm_batch_norm = nn.BatchNorm2d(self.hidden_dim) self.conv4 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.conv5 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.begin_conv = nn.Conv2d(self.hidden_dim, 1, 1) self.end_conv = nn.Conv2d(self.hidden_dim, 2, 1)
def forward(self, input): # TODO perhaps add batch normalization or layer normalization x = F.elu(self.conv1(input)) x = F.elu(self.conv2(x)) x = F.elu(self.conv3(x)) # Next flatten the output to be batched into LSTM layers # The shape of x is batch_size, channels, height, width x = self.pre_lstm_bn(x) x = torch.transpose(x, 1, 3) x = torch.transpose(x, 1, 2) x = x.contiguous() x = x.view(x.size(0), self.batch, self.hidden_dim) x, hidden = self.lstm(x, (self.hidden_state, self.cell_state)) self.hidden_state, self.cell_state = hidden x = torch.transpose(x, 2, 1) x = x.contiguous() x = x.view(x.size(0), self.hidden_dim, self.height, self.width) x = self.lstm_batch_norm(x) x = F.elu(self.conv4(x)) x = F.elu(self.conv5(x)) o_begin = self.begin_conv(x) o_end = self.end_conv(x) o_begin = o_begin.view(o_begin.size(0), -1) o_end = o_end.view(o_end.size(0), -1) o_begin = F.log_softmax(o_begin) o_end = F.log_softmax(o_end) return o_begin, o_end
def __init__(self, on_gpu=False): # Current architecture for policy is 3 5x5 convolutions # followed by LSTM layers followed by 2 5x5 convolutions # and a final 1x1 convolution # This architecture if fully convolutional with no max pooling super(ActorCritic, self).__init__() self.lstm_layer = 1 self.hidden_dim = 150 self.on_gpu = on_gpu self.conv1 = nn.Conv2d(11, self.hidden_dim, 5, padding=2) self.conv2 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.conv3 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.pre_lstm_bn = nn.BatchNorm2d(self.hidden_dim) self.lstm = nn.LSTM(self.hidden_dim, self.hidden_dim, self.lstm_layer) self.lstm_batch_norm = nn.BatchNorm2d(self.hidden_dim) self.conv4 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.conv5 = nn.Conv2d(self.hidden_dim, self.hidden_dim, 5, padding=2) self.move_conv = nn.Conv2d(self.hidden_dim, 8, 1) self.value_conv = nn.Conv2d(self.hidden_dim, self.hidden_dim, 1) self.value_linear = nn.Linear(self.hidden_dim, 1)
def forward(self, input): x = F.elu(self.conv1(input)) x = F.elu(self.conv2(x)) x = F.elu(self.conv3(x)) # Next flatten the output to be batched into LSTM layers # The shape of x is batch_size, channels, height, width x = self.pre_lstm_bn(x) x = torch.transpose(x, 1, 3) x = torch.transpose(x, 1, 2) x = x.contiguous() x = x.view(x.size(0), self.batch, self.hidden_dim) x, hidden = self.lstm(x, (self.hidden_state, self.cell_state)) self.hidden_state, self.cell_state = hidden x = torch.transpose(x, 2, 1) x = x.contiguous() x = x.view(x.size(0), self.hidden_dim, self.height, self.width) x = self.lstm_batch_norm(x) x = F.elu(self.conv4(x)) x = F.elu(self.conv5(x)) logit = self.move_conv(x) logit = logit.view(logit.size(0), -1) x = self.value_conv(x) x = x.view(x.size(0), self.hidden_dim, self.batch) x = F.max_pool1d(x, self.batch) x = x.squeeze() val = self.value_linear(x) return val, logit