Python torch.nn 模块,Dropout() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.Dropout()

项目:convNet.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self):
        super(mnist_model, self).__init__()
        self.feats = nn.Sequential(
            nn.Conv2d(1, 32, 5, 1, 1),
            nn.MaxPool2d(2, 2),
            nn.ReLU(True),
            nn.BatchNorm2d(32),

            nn.Conv2d(32, 64, 3,  1, 1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 64, 3,  1, 1),
            nn.MaxPool2d(2, 2),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(True),
            nn.BatchNorm2d(128)
        )

        self.classifier = nn.Conv2d(128, 10, 1)
        self.avgpool = nn.AvgPool2d(6, 6)
        self.dropout = nn.Dropout(0.5)
项目:speed    作者:keon    | 项目源码 | 文件源码
def __init__(self, n_layers=2, h_size=512):
        super(ResLSTM, self).__init__()
        print('Building AlexNet + LSTM model...')
        self.h_size = h_size
        self.n_layers = n_layers

        resnet = models.resnet50(pretrained=True)
        self.conv = nn.Sequential(*list(resnet.children())[:-1])

        self.lstm = nn.LSTM(1280, h_size, dropout=0.2, num_layers=n_layers)
        self.fc = nn.Sequential(
            nn.Linear(h_size, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
项目:speed    作者:keon    | 项目源码 | 文件源码
def __init__(self, h_size=512, n_layers=3):
        super(DenseLSTM, self).__init__()
        print('Building DenseNet + LSTM model...')
        self.h_size = h_size
        self.n_layers = n_layers

        densenet = models.densenet201(pretrained=True)
        self.conv = nn.Sequential(*list(densenet.children())[:-1])

        self.lstm = nn.LSTM(23040, h_size, dropout=0.2, num_layers=n_layers)
        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 1)
        )
项目:speed    作者:keon    | 项目源码 | 文件源码
def __init__(self, n_layers=2, h_size=420):
        super(AlexLSTM, self).__init__()
        print('Building AlexNet + LSTM model...')
        self.h_size = h_size
        self.n_layers = n_layers

        alexnet = models.alexnet(pretrained=True)
        self.conv = nn.Sequential(*list(alexnet.children())[:-1])

        self.lstm = nn.LSTM(1280, h_size, dropout=0.2, num_layers=n_layers)
        self.fc = nn.Sequential(
            nn.Linear(h_size, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
项目:DistanceGAN    作者:sagiebenaim    | 项目源码 | 文件源码
def build_conv_block(self, dim, padding_type, norm_layer, use_dropout):
        conv_block = []
        p = 0
        # TODO: support padding types
        assert(padding_type == 'zero')
        p = 1

        # TODO: InstanceNorm
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p),
                       norm_layer(dim, affine=True),
                       nn.ReLU(True)]
        if use_dropout:
            conv_block += [nn.Dropout(0.5)]
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p),
                       norm_layer(dim, affine=True)]

        return nn.Sequential(*conv_block)
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def __init__(self,
                 num_heads: int,
                 input_dim: int,
                 attention_dim: int,
                 values_dim: int,
                 output_projection_dim: int = None,
                 attention_dropout_prob: float = 0.1) -> None:
        super(MultiHeadSelfAttention, self).__init__()

        self._num_heads = num_heads
        self._input_dim = input_dim
        self._output_dim = output_projection_dim or input_dim
        self._attention_dim = attention_dim
        self._values_dim = values_dim

        self._query_projections = Parameter(torch.FloatTensor(num_heads, input_dim, attention_dim))
        self._key_projections = Parameter(torch.FloatTensor(num_heads, input_dim, attention_dim))
        self._value_projections = Parameter(torch.FloatTensor(num_heads, input_dim, values_dim))

        self._scale = input_dim ** 0.5
        self._output_projection = Linear(num_heads * values_dim,
                                         self._output_dim)
        self._attention_dropout = Dropout(attention_dropout_prob)

        self.reset_parameters()
项目:MIL.pytorch    作者:gujiuxiang    | 项目源码 | 文件源码
def __init__(self, opt):
        super(resnet_mil, self).__init__()
        import model.resnet as resnet
        resnet = resnet.resnet101()
        resnet.load_state_dict(torch.load('/media/jxgu/d2tb/model/resnet/resnet101.pth'))
        self.conv = torch.nn.Sequential()
        self.conv.add_module("conv1", resnet.conv1)
        self.conv.add_module("bn1", resnet.bn1)
        self.conv.add_module("relu", resnet.relu)
        self.conv.add_module("maxpool", resnet.maxpool)
        self.conv.add_module("layer1", resnet.layer1)
        self.conv.add_module("layer2", resnet.layer2)
        self.conv.add_module("layer3", resnet.layer3)
        self.conv.add_module("layer4", resnet.layer4)
        self.l1 = nn.Sequential(nn.Linear(2048, 1000),
                                nn.ReLU(True),
                                nn.Dropout(0.5))
        self.att_size = 7
        self.pool_mil = nn.MaxPool2d(kernel_size=self.att_size, stride=0)
项目:Video-Classification-Action-Recognition    作者:qijiezhao    | 项目源码 | 文件源码
def __init__(self,out_size,gpu_id,num_seg):
        super(VC_inception_v4,self).__init__()
        sys.path.insert(0,'../tool/models_zoo/')
        from inceptionv4.pytorch_load import inceptionv4
        self.inception_v4=inceptionv4(pretrained=True).cuda()
        mod=[nn.Dropout(p=0.8)]#.cuda(self.gpu_id)]
        mod.append(nn.Linear(1536,101))#.cuda(self.gpu_id))
        new_fc=nn.Sequential(*mod)#.cuda(self.gpu_id)
        self.inception_v4.classif=new_fc
        self.num_seg=num_seg
        #self.resnet101.fc=nn.Linear(2048,101).cuda(gpu_id)

        self.avg_pool2d=nn.AvgPool2d(kernel_size=(3,1))#.cuda(self.gpu_id)
        # for params in self.inception_v4.parameters():
        #     params.requires_grad=False
        # for params in self.inception_v4.features[21].parameters():
        #     params.requires_grad=True
项目:Video-Classification-Action-Recognition    作者:qijiezhao    | 项目源码 | 文件源码
def __init__(self):
        super(C3D_net,self).__init__()
        self.conv1=nn.Conv3d(3,64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.relu=nn.ReLU()
        self.maxpool1=nn.MaxPool3d(kernel_size=(1,2,2),stride=(1,2,2))
        self.conv2=nn.Conv3d(64,128,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool2=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
        self.conv3=nn.Conv3d(128,256,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool3=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
        self.conv4=nn.Conv3d(256,256,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool4=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
        self.conv5=nn.Conv3d(256,256,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool5=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
        self.num_out_maxpool5=2304
        self.fc6=nn.Linear(self.num_out_maxpool5,2048)#TBA
        self.fc7=nn.Linear(2048,2048)
        #self.dropout=nn.Dropout(p=0.5)
        self.fc8=nn.Linear(2048,101)
        self._initialize_weights()
项目:PyTorchDemystified    作者:hhsecond    | 项目源码 | 文件源码
def __init__(self, config):
        super(SNLIClassifier, self).__init__()
        self.config = config
        self.embed = nn.Embedding(config.n_embed, config.d_embed)
        self.projection = Linear(config.d_embed, config.d_proj)
        self.embed_bn = BatchNorm(config.d_proj)
        self.embed_dropout = nn.Dropout(p=config.embed_dropout)
        self.encoder = SPINN(config) if config.spinn else Encoder(config)
        feat_in_size = config.d_hidden * (
            2 if self.config.birnn and not self.config.spinn else 1)
        self.feature = Feature(feat_in_size, config.mlp_dropout)
        self.mlp_dropout = nn.Dropout(p=config.mlp_dropout)
        self.relu = nn.ReLU()
        mlp_in_size = 4 * feat_in_size
        mlp = [nn.Linear(mlp_in_size, config.d_mlp), self.relu,
               nn.BatchNorm1d(config.d_mlp), self.mlp_dropout]
        for i in range(config.n_mlp_layers - 1):
            mlp.extend([nn.Linear(config.d_mlp, config.d_mlp), self.relu,
                        nn.BatchNorm1d(config.d_mlp), self.mlp_dropout])
        mlp.append(nn.Linear(config.d_mlp, config.d_out))
        self.out = nn.Sequential(*mlp)
项目:cnn-text-classification-pytorch    作者:Shawn1993    | 项目源码 | 文件源码
def __init__(self, args):
        super(CNN_Text,self).__init__()
        self.args = args

        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        Ci = 1
        Co = args.kernel_num
        Ks = args.kernel_sizes

        self.embed = nn.Embedding(V, D)
        #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
        self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks])
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(args.dropout)
        self.fc1 = nn.Linear(len(Ks)*Co, C)
项目:attention-is-all-you-need-pytorch    作者:jadore801120    | 项目源码 | 文件源码
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
        super(MultiHeadAttention, self).__init__()

        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k))
        self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k))
        self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v))

        self.attention = ScaledDotProductAttention(d_model)
        self.layer_norm = LayerNormalization(d_model)
        self.proj = Linear(n_head*d_v, d_model)

        self.dropout = nn.Dropout(dropout)

        init.xavier_normal(self.w_qs)
        init.xavier_normal(self.w_ks)
        init.xavier_normal(self.w_vs)
项目:attention-is-all-you-need-pytorch    作者:jadore801120    | 项目源码 | 文件源码
def __init__(
            self, n_tgt_vocab, n_max_seq, n_layers=6, n_head=8, d_k=64, d_v=64,
            d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1):

        super(Decoder, self).__init__()
        n_position = n_max_seq + 1
        self.n_max_seq = n_max_seq
        self.d_model = d_model

        self.position_enc = nn.Embedding(
            n_position, d_word_vec, padding_idx=Constants.PAD)
        self.position_enc.weight.data = position_encoding_init(n_position, d_word_vec)

        self.tgt_word_emb = nn.Embedding(
            n_tgt_vocab, d_word_vec, padding_idx=Constants.PAD)
        self.dropout = nn.Dropout(dropout)

        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
项目:Tree-LSTM-LM    作者:vgene    | 项目源码 | 文件源码
def __init__(self, args, mapping):
        super(CharLM, self).__init__()

        self.batch_size = args.batch_size
        self.seq_length = args.seq_length
        self.vocab_size = args.vocab_size
        self.embedding_dim = args.embedding_dim
        self.layer_num = args.layer_num
        self.dropout_prob = args.dropout_prob
        self.lr = args.lr
        self.char_embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_prob)

        self.lstm = nn.LSTM(input_size = self.embedding_dim,
                            hidden_size = self.embedding_dim,
                            num_layers= self.layer_num,
                            dropout = self.dropout_prob)
        self.fc = nn.Linear(self.embedding_dim, self.vocab_size)
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.mapping = mapping
项目:Tree-LSTM-LM    作者:vgene    | 项目源码 | 文件源码
def __init__(self, args, attr_size, node_size):
        super(TreeLM, self).__init__()

        self.batch_size = args.batch_size
        self.seq_length = args.seq_length
        self.attr_size = attr_size
        self.node_size = node_size

        self.embedding_dim = args.embedding_dim
        self.layer_num = args.layer_num
        self.dropout_prob = args.dropout_prob
        self.lr = args.lr

        self.attr_embedding = nn.Embedding(self.attr_size, self.embedding_dim)
        self.dropout = nn.Dropout(self.dropout_prob)

        self.lstm = nn.LSTM(input_size = self.embedding_dim,
                            hidden_size = self.embedding_dim,
                            num_layers= self.layer_num,
                            dropout = self.dropout_prob)

        self.fc = nn.Linear(self.embedding_dim, self.node_size)
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        # self.node_mapping = node_mapping
项目:Tacotron_pytorch    作者:root20    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, r_factor=2, dropout_p=0.5):
        super(AttnDecoderRNN, self).__init__()
        self.r_factor = r_factor

        self.prenet = nn.Sequential(
            nn.Linear(output_size, 2 * hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            nn.Linear(2 * hidden_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_p)
        )
        self.linear_dec = nn.Linear(2 * hidden_size, 2 * hidden_size)
        self.gru_att = nn.GRU(hidden_size, 2 * hidden_size, batch_first=True)

        self.attn = nn.Linear(2 * hidden_size, 1)       # TODO: change name...

        self.short_cut = nn.Linear(4 * hidden_size, 2 * hidden_size)
        self.gru_dec1 = nn.GRU(4 * hidden_size, 2 * hidden_size, num_layers=1, batch_first=True)
        self.gru_dec2 = nn.GRU(2 * hidden_size, 2 * hidden_size, num_layers=1, batch_first=True)

        self.out = nn.Linear(2 * hidden_size, r_factor * output_size)
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self, args):
        super(GRU, self).__init__()
        self.args = args
        # print(args)

        self.hidden_dim = args.lstm_hidden_dim
        self.num_layers = args.lstm_num_layers
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        # self.embed = nn.Embedding(V, D, max_norm=args.max_norm)
        self.embed = nn.Embedding(V, D)
        # word embedding
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # gru
        self.gru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers)
        # linear
        self.hidden2label = nn.Linear(self.hidden_dim, C)
        # hidden
        self.hidden = self.init_hidden(self.num_layers, args.batch_size)
        # dropout
        self.dropout = nn.Dropout(args.dropout)
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self, args):
        super(CNN_Text,self).__init__()
        self.args = args

        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        Ci = 1
        Co = args.kernel_num
        Ks = args.kernel_sizes

        self.embed = nn.Embedding(V, D)
        # print("aaaaaaaa", self.embed.weight)
        pretrained_weight = np.array(args.pretrained_weight)
        self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # print("bbbbbbbb", self.embed.weight)

        self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(args.dropout)
        self.fc1 = nn.Linear(len(Ks)*Co, C)
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self, args):
        super(BiGRU, self).__init__()
        self.args = args
        # print(args)

        self.hidden_dim = args.lstm_hidden_dim
        self.num_layers = args.lstm_num_layers
        V = args.embed_num
        D = args.embed_dim
        C = args.class_num
        # self.embed = nn.Embedding(V, D, max_norm=args.max_norm)
        self.embed = nn.Embedding(V, D)
        # word embedding
        if args.word_Embedding:
            pretrained_weight = np.array(args.pretrained_weight)
            self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))
        # gru
        self.bigru = nn.GRU(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers, bidirectional=True)
        # linear
        self.hidden2label = nn.Linear(self.hidden_dim * 2, C)
        # hidden
        self.hidden = self.init_hidden(self.num_layers, args.batch_size)
        # dropout
        self.dropout = nn.Dropout(args.dropout)
项目:FewShotLearning    作者:gitabcworld    | 项目源码 | 文件源码
def convLayer(opt, layer_pos, nInput, nOutput, k ):
    "3x3 convolution with padding"
    #if 'BN_momentum' in opt.keys():
    #    batchNorm = nn.BatchNorm2d(nOutput,momentum=opt['BN_momentum'])
    #else:
    #    batchNorm = nn.BatchNorm2d(nOutput)

    seq = nn.Sequential(
        nn.Conv2d(nInput, nOutput, kernel_size=k,
                  stride=1, padding=1, bias=True),
        #batchNorm,
        opt['bnorm2d'][layer_pos],
        nn.ReLU(True),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    if opt['useDropout']: # Add dropout module
        list_seq = list(seq.modules())[1:]
        list_seq.append(nn.Dropout(0.1))
        seq = nn.Sequential(*list_seq)
    return seq
项目:FewShotLearning    作者:gitabcworld    | 项目源码 | 文件源码
def __init__(self, cell_class, input_size, hidden_size, num_layers=1,
                 use_bias=True, batch_first=False, dropout=0, **kwargs):
        super(LSTM, self).__init__()
        self.cell_class = cell_class
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.use_bias = use_bias
        self.batch_first = batch_first
        self.dropout = dropout

        self.cells = []
        for layer in range(num_layers):
            layer_input_size = input_size if layer == 0 else hidden_size
            cell = cell_class(input_size=layer_input_size,
                              hidden_size=hidden_size,
                              **kwargs)
            self.cells.append(cell)
            setattr(self, 'cell_{}'.format(layer), cell)
        self.dropout_layer = nn.Dropout(dropout)
        self.reset_parameters()
项目:clevr-iep    作者:facebookresearch    | 项目源码 | 文件源码
def build_mlp(input_dim, hidden_dims, output_dim,
              use_batchnorm=False, dropout=0):
  layers = []
  D = input_dim
  if dropout > 0:
    layers.append(nn.Dropout(p=dropout))
  if use_batchnorm:
    layers.append(nn.BatchNorm1d(input_dim))
  for dim in hidden_dims:
    layers.append(nn.Linear(D, dim))
    if use_batchnorm:
      layers.append(nn.BatchNorm1d(dim))
    if dropout > 0:
      layers.append(nn.Dropout(p=dropout))
    layers.append(nn.ReLU(inplace=True))
    D = dim
  layers.append(nn.Linear(D, output_dim))
  return nn.Sequential(*layers)
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def __init__(self):
            self.layers = 2  # Number of layers in the LSTM encoder/decoder
            self.rnn_size = 500  # Size of hidden states
            self.rnn_type = 'LSTM'  # The gate type used in the RNNs
            self.word_vec_size = 500  # Word embedding sizes
            self.input_feed = 1  # Feed the context vector at each time step as additional input to the decoder
            self.brnn = True  # Use a bidirectional encoder
            self.brnn_merge = 'sum'  # Merge action for the bidirectional hidden states: [concat|sum]
            self.context_gate = None  # Type of context gate to use [source|target|both] or None.
            self.dropout = 0.3  # Dropout probability; applied between LSTM stacks.

            # Optimization options -------------------------------------------------------------------------------------
            self.optim = 'sgd'  # Optimization method. [sgd|adagrad|adadelta|adam]
            self.max_grad_norm = 5  # If norm(gradient vector) > max_grad_norm, re-normalize
            self.learning_rate = 1.0
            self.learning_rate_decay = 0.9
            self.start_decay_at = 10
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def __init__(self, opt, dicts):
        self.layers = opt.layers
        self.input_feed = opt.input_feed
        input_size = opt.word_vec_size
        if self.input_feed:
            input_size += opt.rnn_size

        super(Decoder, self).__init__()
        self.word_lut = nn.Embedding(dicts.size(),
                                     opt.word_vec_size,
                                     padding_idx=onmt.Constants.PAD)

        stackedCell = StackedLSTM if opt.rnn_type == "LSTM" else StackedGRU
        self.rnn = stackedCell(opt.layers, input_size,
                               opt.rnn_size, opt.dropout)
        self.attn = onmt.modules.GlobalAttention(opt.rnn_size)
        self.context_gate = None
        if opt.context_gate is not None:
            self.context_gate = ContextGateFactory(
                opt.context_gate, opt.word_vec_size,
                opt.rnn_size, opt.rnn_size, opt.rnn_size
            )
        self.dropout = nn.Dropout(opt.dropout)

        self.hidden_size = opt.rnn_size
项目:action-detection    作者:yjxiong    | 项目源码 | 文件源码
def _prepare_ssn(self, num_class, stpp_cfg):
        feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features
        if self.dropout == 0:
            setattr(self.base_model, self.base_model.last_layer_name, Identity())
        else:
            setattr(self.base_model, self.base_model.last_layer_name, nn.Dropout(p=self.dropout))

        self.stpp = StructuredTemporalPyramidPooling(feature_dim, True, configs=stpp_cfg)
        self.activity_fc = nn.Linear(self.stpp.activity_feat_dim(), num_class + 1)
        self.completeness_fc = nn.Linear(self.stpp.completeness_feat_dim(), num_class)

        nn.init.normal(self.activity_fc.weight.data, 0, 0.001)
        nn.init.constant(self.activity_fc.bias.data, 0)
        nn.init.normal(self.completeness_fc.weight.data, 0, 0.001)
        nn.init.constant(self.completeness_fc.bias.data, 0)

        self.test_fc = None
        if self.with_regression:
            self.regressor_fc = nn.Linear(self.stpp.completeness_feat_dim(), 2 * num_class)
            nn.init.normal(self.regressor_fc.weight.data, 0, 0.001)
            nn.init.constant(self.regressor_fc.bias.data, 0)
        else:
            self.regressor_fc = None

        return feature_dim
项目:e2e-model-learning    作者:locuslab    | 项目源码 | 文件源码
def __init__(self, X, Y, hidden_layer_sizes):
        super(Net, self).__init__()

        # Initialize linear layer with least squares solution
        X_ = np.hstack([X, np.ones((X.shape[0],1))])
        Theta = np.linalg.solve(X_.T.dot(X_), X_.T.dot(Y))

        self.lin = nn.Linear(X.shape[1], Y.shape[1])
        W,b = self.lin.parameters()
        W.data = torch.Tensor(Theta[:-1,:].T)
        b.data = torch.Tensor(Theta[-1,:])

        # Set up non-linear network of 
        # Linear -> BatchNorm -> ReLU -> Dropout layers
        layer_sizes = [X.shape[1]] + hidden_layer_sizes
        layers = reduce(operator.add, 
            [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)] 
                for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
        layers += [nn.Linear(layer_sizes[-1], Y.shape[1])]
        self.net = nn.Sequential(*layers)
        self.sig = Parameter(torch.ones(1, Y.shape[1]).cuda())
项目:alpha-dimt-icmlws    作者:sotetsuk    | 项目源码 | 文件源码
def __init__(self, opt, dicts):
        self.layers = opt.layers
        self.input_feed = opt.input_feed
        input_size = opt.word_vec_size
        if self.input_feed:
            input_size += opt.rnn_size

        super(Decoder, self).__init__()
        self.word_lut = nn.Embedding(dicts.size(),
                                  opt.word_vec_size,
                                  padding_idx=onmt.Constants.PAD)
        self.rnn = StackedLSTM(opt.layers, input_size, opt.rnn_size, opt.dropout)
        self.attn = onmt.modules.GlobalAttention(opt.rnn_size)
        self.dropout = nn.Dropout(opt.dropout)

        self.hidden_size = opt.rnn_size
项目:pyprob    作者:probprog    | 项目源码 | 文件源码
def __init__(self, input_example_non_batch, output_dim, reshape=None, dropout=0):
        super(ObserveEmbeddingCNN2D6C, self).__init__()
        self.reshape = reshape
        if self.reshape is not None:
            input_example_non_batch = input_example_non_batch.view(self.reshape)
            self.reshape.insert(0, -1) # For correct handling of the batch dimension in self.forward
        if input_example_non_batch.dim() == 2:
            self.input_sample = input_example_non_batch.unsqueeze(0).cpu()
        elif input_example_non_batch.dim() == 3:
            self.input_sample = input_example_non_batch.cpu()
        else:
            util.logger.log('ObserveEmbeddingCNN2D6C: Expecting a 3d input_example_non_batch (num_channels x height x width) or a 2d input_example_non_batch (height x width). Received: {0}'.format(input_example_non_batch.size()))
        self.input_channels = self.input_sample.size(0)
        self.output_dim = output_dim
        self.conv1 = nn.Conv2d(self.input_channels, 64, 3)
        self.conv2 = nn.Conv2d(64, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        self.conv4 = nn.Conv2d(128, 128, 3)
        self.conv5 = nn.Conv2d(128, 128, 3)
        self.conv6 = nn.Conv2d(128, 128, 3)
        self.drop = nn.Dropout(dropout)
项目:pyprob    作者:probprog    | 项目源码 | 文件源码
def __init__(self, input_example_non_batch, output_dim, reshape=None, dropout=0):
        super(ObserveEmbeddingCNN3D4C, self).__init__()
        self.reshape = reshape
        if self.reshape is not None:
            input_example_non_batch = input_example_non_batch.view(self.reshape)
            self.reshape.insert(0, -1) # For correct handling of the batch dimension in self.forward
        if input_example_non_batch.dim() == 3:
            self.input_sample = input_example_non_batch.unsqueeze(0).cpu()
        elif input_example_non_batch.dim() == 4:
            self.input_sample = input_example_non_batch.cpu()
        else:
            util.logger.log('ObserveEmbeddingCNN3D4C: Expecting a 4d input_example_non_batch (num_channels x depth x height x width) or a 3d input_example_non_batch (depth x height x width). Received: {0}'.format(input_example_non_batch.size()))
        self.input_channels = self.input_sample.size(0)
        self.output_dim = output_dim
        self.conv1 = nn.Conv3d(self.input_channels, 64, 3)
        self.conv2 = nn.Conv3d(64, 64, 3)
        self.conv3 = nn.Conv3d(64, 128, 3)
        self.conv4 = nn.Conv3d(128, 128, 3)
        self.drop = nn.Dropout(dropout)
项目:semanaly    作者:zqhZY    | 项目源码 | 文件源码
def __init__(self, opt):
        self.name = "TextLstm"
        super(TextLSTM, self).__init__()
        self.opt = opt

        self.embedding = nn.Embedding(opt.vocab_size, opt.embed_dim)

        self.lstm = nn.LSTM(input_size=opt.embed_dim,
                            hidden_size=opt.hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=False)

        self.linears = nn.Sequential(
            nn.Linear(opt.hidden_size, opt.linear_hidden_size),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(opt.linear_hidden_size, opt.num_classes),
            # nn.Softmax()
        )

        if opt.embedding_path:
            self.embedding.weight.data.copy_(torch.from_numpy(np.load(opt.embedding_path)))
        #     # self.embedding.weight.requires_grad = False
项目:semanaly    作者:zqhZY    | 项目源码 | 文件源码
def __init__(self, args):
        super(TextCNN, self).__init__()

        self.args = args
        V = args.vocab_size
        D = args.embed_dim
        C = args.num_classes
        Cin = 1
        Cout = args.kernel_num
        Ks = args.kernel_sizes


        self.embeding = nn.Embedding(V, D)
        self.convs = nn.ModuleList([nn.Conv2d(Cin, Cout, (K, D)) for K in Ks])

        self.dropout = nn.Dropout(args.dropout)
        self.fc = nn.Linear(len(Ks)*Cout, C)
项目:benchmark    作者:pytorch    | 项目源码 | 文件源码
def __init__(self, cell_class, input_size, hidden_size, num_layers=1,
                 use_bias=True, batch_first=False, dropout=0, **kwargs):
        super(LSTM, self).__init__()
        self.cell_class = cell_class
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.use_bias = use_bias
        self.batch_first = batch_first
        self.dropout = dropout

        for layer in range(num_layers):
            layer_input_size = input_size if layer == 0 else hidden_size
            cell = cell_class(input_size=layer_input_size,
                              hidden_size=hidden_size,
                              **kwargs)
            setattr(self, 'cell_{}'.format(layer), cell)
        self.dropout_layer = nn.Dropout(dropout)
        self.reset_parameters()
项目:tsn-pytorch    作者:yjxiong    | 项目源码 | 文件源码
def _prepare_tsn(self, num_class):
        feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features
        if self.dropout == 0:
            setattr(self.base_model, self.base_model.last_layer_name, nn.Linear(feature_dim, num_class))
            self.new_fc = None
        else:
            setattr(self.base_model, self.base_model.last_layer_name, nn.Dropout(p=self.dropout))
            self.new_fc = nn.Linear(feature_dim, num_class)

        std = 0.001
        if self.new_fc is None:
            normal(getattr(self.base_model, self.base_model.last_layer_name).weight, 0, std)
            constant(getattr(self.base_model, self.base_model.last_layer_name).bias, 0)
        else:
            normal(self.new_fc.weight, 0, std)
            constant(self.new_fc.bias, 0)
        return feature_dim
项目:Seq2Seq-PyTorch    作者:MaximumEntropy    | 项目源码 | 文件源码
def __init__(
        self,
        input_size,
        rnn_size,
        num_layers,
        batch_first=True,
        dropout=0.
    ):
        """Initialize params."""
        super(StackedAttentionLSTM, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.input_size = input_size
        self.rnn_size = rnn_size
        self.batch_first = batch_first

        self.layers = []
        for i in range(num_layers):
            layer = LSTMAttentionDot(
                input_size, rnn_size, batch_first=self.batch_first
            )
            self.add_module('layer_%d' % i, layer)
            self.layers += [layer]
            input_size = rnn_size
项目:pytorch-semantic-segmentation    作者:ZijunDeng    | 项目源码 | 文件源码
def __init__(self, num_classes, pretrained=True):
        super(FCN32VGG, self).__init__()
        vgg = models.vgg16()
        if pretrained:
            vgg.load_state_dict(torch.load(vgg16_caffe_path))
        features, classifier = list(vgg.features.children()), list(vgg.classifier.children())

        features[0].padding = (100, 100)

        for f in features:
            if 'MaxPool' in f.__class__.__name__:
                f.ceil_mode = True
            elif 'ReLU' in f.__class__.__name__:
                f.inplace = True

        self.features5 = nn.Sequential(*features)

        fc6 = nn.Conv2d(512, 4096, kernel_size=7)
        fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
        fc6.bias.data.copy_(classifier[0].bias.data)
        fc7 = nn.Conv2d(4096, 4096, kernel_size=1)
        fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
        fc7.bias.data.copy_(classifier[3].bias.data)
        score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
        score_fr.weight.data.zero_()
        score_fr.bias.data.zero_()
        self.score_fr = nn.Sequential(
            fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr
        )

        self.upscore = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=64, stride=32, bias=False)
        self.upscore.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 64))
项目:seq2seq.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self, vocab_size, hidden_size=512, embedding_size=None,
                 num_layers=6, num_heads=8, inner_linear=1024,
                 mask_symbol=PAD, dropout=0):

        super(TransformerAttentionEncoder, self).__init__()
        embedding_size = embedding_size or hidden_size
        self.hidden_size = hidden_size
        self.batch_first = True
        self.mask_symbol = mask_symbol
        self.embedder = nn.Embedding(
            vocab_size, embedding_size, padding_idx=PAD)
        self.scale_embedding = hidden_size ** 0.5
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.blocks = nn.ModuleList([EncoderBlock(hidden_size, num_heads, inner_linear, dropout)
                                     for _ in range(num_layers)
                                     ])
项目:seq2seq.pytorch    作者:eladhoffer    | 项目源码 | 文件源码
def __init__(self, vocab_size, hidden_size=512, embedding_size=None,
                 num_layers=6, num_heads=8, dropout=0, inner_linear=1024,
                 mask_symbol=PAD, tie_embedding=True):

        super(TransformerAttentionDecoder, self).__init__()
        embedding_size = embedding_size or hidden_size
        self.batch_first = True
        self.mask_symbol = mask_symbol
        self.embedder = nn.Embedding(
            vocab_size, embedding_size, padding_idx=PAD)
        self.scale_embedding = hidden_size ** 0.5
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.blocks = nn.ModuleList([DecoderBlock(hidden_size, num_heads, inner_linear, dropout)
                                     for _ in range(num_layers)
                                     ])
        self.classifier = nn.Linear(hidden_size, vocab_size)
        if tie_embedding:
            self.embedder.weight = self.classifier.weight
项目:CycleGANwithPerceptionLoss    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet34, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7)
        self.fc_drop = nn.Dropout(p=0.75)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
项目:pytorch_cycle_gan    作者:jinfagang    | 项目源码 | 文件源码
def build_conv_block(self, dim, padding_type, norm_layer, use_dropout):
        conv_block = []
        p = 0
        # TODO: support padding types
        assert (padding_type == 'zero')
        p = 1

        # TODO: InstanceNorm
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p),
                       norm_layer(dim, affine=True),
                       nn.ReLU(True)]
        if use_dropout:
            conv_block += [nn.Dropout(0.5)]
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p),
                       norm_layer(dim, affine=True)]

        return nn.Sequential(*conv_block)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
        super(BahdanauAttnDecoderRNN, self).__init__()

        # Define parameters
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.attn = Attn('concat', hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout)
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        # Choose attention model
        if attn_model != 'none':
            self.attn = Attn(attn_model, hidden_size)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
        super(BahdanauAttnDecoderRNN, self).__init__()

        # Define parameters
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.attn = Attn('concat', hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout)
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        # Choose attention model
        if attn_model != 'none':
            self.attn = Attn(attn_model, hidden_size)
项目:baseline    作者:dpressel    | 项目源码 | 文件源码
def _init_pool(self, dsz, **kwargs):
        filtsz = kwargs['filtsz']
        cmotsz = kwargs['cmotsz']
        convs = []
        for i, fsz in enumerate(filtsz):
            pad = fsz//2
            conv = nn.Sequential(
                nn.Conv1d(dsz, cmotsz, fsz, padding=pad),
                pytorch_activation("relu")
            )
            convs.append(conv)
            # Add the module so its managed correctly
        self.convs = nn.ModuleList(convs)
        # Width of concat of parallel convs
        self.conv_drop = nn.Dropout(self.pdrop)

        return cmotsz * len(filtsz)
项目:baseline    作者:dpressel    | 项目源码 | 文件源码
def __init__(self, embeddings_in, embeddings_out, **kwargs):
        super(Seq2SeqAttnModel, self).__init__(embeddings_in, embeddings_out)
        self.hsz = kwargs['hsz']
        nlayers = kwargs['layers']
        rnntype = kwargs['rnntype']
        pdrop = kwargs.get('dropout', 0.5)
        dsz = embeddings_in.dsz
        self.gpu = kwargs.get('gpu', True)
        self.encoder_rnn = pytorch_rnn(dsz, self.hsz, rnntype, nlayers, pdrop)
        self.dropout = nn.Dropout(pdrop)
        self.decoder_rnn = pytorch_rnn_cell(self.hsz + dsz, self.hsz, rnntype, nlayers, pdrop)
        self.preds = nn.Linear(self.hsz, self.nc)
        self.probs = nn.LogSoftmax()
        self.output_to_attn = nn.Linear(self.hsz, self.hsz, bias=False)
        self.attn_softmax = nn.Softmax()
        self.attn_out = nn.Linear(2 * self.hsz, self.hsz, bias=False)
        self.attn_tanh = pytorch_activation("tanh")
        self.nlayers = nlayers
项目:pytorch-seq2seq    作者:IBM    | 项目源码 | 文件源码
def __init__(self, vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell):
        super(BaseRNN, self).__init__()
        self.vocab_size = vocab_size
        self.max_len = max_len
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.input_dropout_p = input_dropout_p
        self.input_dropout = nn.Dropout(p=input_dropout_p)
        if rnn_cell.lower() == 'lstm':
            self.rnn_cell = nn.LSTM
        elif rnn_cell.lower() == 'gru':
            self.rnn_cell = nn.GRU
        else:
            raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell))

        self.dropout_p = dropout_p
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(OldModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size
        self.att_feat_size = opt.att_feat_size

        self.ss_prob = 0.0 # Schedule sampling probability

        self.linear = nn.Linear(self.fc_feat_size, self.num_layers * self.rnn_size) # feature to rnn_size
        self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
        self.logit = nn.Linear(self.rnn_size, self.vocab_size + 1)
        self.dropout = nn.Dropout(self.drop_prob_lm)

        self.init_weights()
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(AdaAtt_attention, self).__init__()
        self.input_encoding_size = opt.input_encoding_size
        #self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.drop_prob_lm = opt.drop_prob_lm
        self.att_hid_size = opt.att_hid_size

        # fake region embed
        self.fr_linear = nn.Sequential(
            nn.Linear(self.rnn_size, self.input_encoding_size),
            nn.ReLU(), 
            nn.Dropout(self.drop_prob_lm))
        self.fr_embed = nn.Linear(self.input_encoding_size, self.att_hid_size)

        # h out embed
        self.ho_linear = nn.Sequential(
            nn.Linear(self.rnn_size, self.input_encoding_size),
            nn.Tanh(), 
            nn.Dropout(self.drop_prob_lm))
        self.ho_embed = nn.Linear(self.input_encoding_size, self.att_hid_size)

        self.alpha_net = nn.Linear(self.att_hid_size, 1)
        self.att2h = nn.Linear(self.rnn_size, self.rnn_size)
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def __init__(self, opt):
        super(Att2in2Core, self).__init__()
        self.input_encoding_size = opt.input_encoding_size
        #self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        #self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.fc_feat_size = opt.fc_feat_size
        self.att_feat_size = opt.att_feat_size
        self.att_hid_size = opt.att_hid_size

        # Build a LSTM
        self.a2c = nn.Linear(self.rnn_size, 2 * self.rnn_size)
        self.i2h = nn.Linear(self.input_encoding_size, 5 * self.rnn_size)
        self.h2h = nn.Linear(self.rnn_size, 5 * self.rnn_size)
        self.dropout = nn.Dropout(self.drop_prob_lm)

        self.attention = Attention(opt)
项目:Structured-Self-Attentive-Sentence-Embedding    作者:ExplorerFreda    | 项目源码 | 文件源码
def __init__(self, config):
        super(BiLSTM, self).__init__()
        self.drop = nn.Dropout(config['dropout'])
        self.encoder = nn.Embedding(config['ntoken'], config['ninp'])
        self.bilstm = nn.LSTM(config['ninp'], config['nhid'], config['nlayers'], dropout=config['dropout'],
                              bidirectional=True)
        self.nlayers = config['nlayers']
        self.nhid = config['nhid']
        self.pooling = config['pooling']
        self.dictionary = config['dictionary']
#        self.init_weights()
        self.encoder.weight.data[self.dictionary.word2idx['<pad>']] = 0
        if os.path.exists(config['word-vector']):
            print('Loading word vectors from', config['word-vector'])
            vectors = torch.load(config['word-vector'])
            assert vectors[2] >= config['ninp']
            vocab = vectors[0]
            vectors = vectors[1]
            loaded_cnt = 0
            for word in self.dictionary.word2idx:
                if word not in vocab:
                    continue
                real_id = self.dictionary.word2idx[word]
                loaded_id = vocab[word]
                self.encoder.weight.data[real_id] = vectors[loaded_id][:config['ninp']]
                loaded_cnt += 1
            print('%d words from external word vectors loaded.' % loaded_cnt)

    # note: init_range constraints the value of initial weights