microsoft / MMdnn

MMdnn is a set of tools to help users inter-operate among different deep learning frameworks. E.g. model conversion and visualization. Convert models between Caffe, Keras, MXNet, Tensorflow, CNTK, PyTorch Onnx and CoreML.
MIT License
5.8k stars 965 forks source link

problem with converting custom Pytorch model to TensorFlow #877

Open nestyme opened 4 years ago

nestyme commented 4 years ago

Platform (like ubuntu 16.04/win10): Ubuntu 16.04.5 LTS

Python version: Python 3.7

Source framework with version (like Tensorflow 1.4.1 with GPU): PyTorch 1.6.0

Destination framework with version (like CNTK 2.3 with GPU): TensorFlow 2.2.0

Pre-trained model path (webpath or webdisk path): A Structured Self-attentive Sentence Embedding model

class SentenceEmbeddingsModel(nn.Module):
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 max_length=40,
                 use_pretrained_word_vectors=True,
                 word_vectors=None,
                 device=device,
                 C=0.001,
                 d_a=10,
                 r_a=4,
                 hidden_size=100,
                 need_masking=True,
                 need_attention=True,
                 need_lstm=False):

        super(SentenceEmbeddingsModel, self).__init__()

        self.embeddings = nn.Embedding(vocab_size, embedding_dim)

        self.d_a = d_a
        self.C = C
        self.r_a = r_a
        self.rnn_hidden_size = hidden_size

        if use_pretrained_word_vectors:
            w =torch.FloatTensor(word_vectors)
            self.embeddings = self.embeddings.from_pretrained(w)
            self.embeddings.weight.requires_grad = False

        self.need_masking = need_masking
        self.need_attention = need_attention
        self.need_lstm = need_lstm

        if self.need_lstm:
            ws_d = 2 * self.rnn_hidden_size
        else:
            ws_d = embedding_dim

        self.ws1 = nn.Parameter(torch.FloatTensor(1, self.d_a, ws_d))
        nn.init.xavier_uniform_(self.ws1)
        self.ws1.requires_grad = True

        self.ws2 = nn.Parameter(torch.FloatTensor(1, self.r_a, self.d_a))
        nn.init.xavier_uniform_(self.ws2)
        self.ws2.requires_grad = True

        self.dropout1 = nn.Dropout(0.1)

        self.need_masking = need_masking
        self.need_attention = need_attention
        self.need_lstm = need_lstm

        self.device = device

        self.dense = nn.Sequential(
            nn.Linear(ws_d, 20, bias=True),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(20, num_classes, bias=True),
        )

        self.linear = nn.Linear(ws_d * self.r_a, ws_d)

    def forward(self, inputs):
        e = self.embeddings(inputs)
        if self.need_masking:
            mask = (inputs != 0)[:, :, None].float().to(self.device)
            masked = e.mul(mask)
            r = self.dropout1(masked)
        else:
            r = self.dropout1(e)

        z = r

        if self.need_attention:
            a1 =torch.tanh(self.ws1.matmul(z.transpose(dim0=1, dim1=2)))

            attention = F.softmax(self.ws2.matmul(a1), dim=2)  # n_batch - r_a - max_lentgh
            m = attention.matmul(z)  # n_batch - r_a - ws_d

            # here we get r_a * ws_d embedding matrix per sentence
            flatten = m.view(z.shape[0], -1, 1)[:, :, 0]

            m = self.linear(flatten)

        else:
            m = z.sum(1)
            attention = None

        out = torch.sigmoid(self.dense(m))
        return out

    model = SentenceEmbeddingsModel(vocab_len,
                             vocab_dim,
                             50,
                             use_pretrained_word_vectors=True,
                             word_vectors=word_vectors,
                             device=device,
                             hidden_size=100,
                             need_masking=True,
                             need_attention=True,
                             need_lstm=False)

I'm saving my model with torch.save(model, model path) then I'm trying to convert Pytorch model to TF via mmconvert -sf pytorch -in model.pth -df tensorflow -om model.ckpt I get Traceback (most recent call last): File "/usr/local/bin/mmconvert", line 10, in <module> sys.exit(_main()) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/_script/convert.py", line 102, in _main ret = convertToIR._convert(ir_args) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/_script/convertToIR.py", line 97, in _convert parser = PytorchParser151(model, inputshape[0]) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/pytorch/pytorch_parser.py", line 526, in __init__ super(PytorchParser151, self).__init__(model_file_name, input_shape) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/pytorch/pytorch_parser.py", line 83, in __init__ model = torch.load(model_file_name, map_location='cpu') File "/usr/local/lib/python3.6/dist-packages/torch/serialization.py", line 584, in load return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args) File "/usr/local/lib/python3.6/dist-packages/torch/serialization.py", line 842, in _load result = unpickler.load() AttributeError: Can't get attribute 'SentenceEmbeddingsModel' on <module '__main__' from '/usr/local/bin/mmconvert'>

Could anyone help me, please?)

linmajia commented 4 years ago

@nestyme , thank you very much for the feedback. You need to keep the model module in the same folder when loading the serialized model. Please refer to https://github.com/pytorch/pytorch/issues/18325 and https://github.com/pytorch/pytorch/issues/18325 for solution.