pytorch / TensorRT

PyTorch/TorchScript/FX compiler for NVIDIA GPUs using TensorRT
https://pytorch.org/TensorRT
BSD 3-Clause "New" or "Revised" License
2.49k stars 344 forks source link

🐛 [Bug] Is dynamic shape supported for layer norm? #1576

Closed narendasan closed 1 year ago

narendasan commented 1 year ago

Bug Description

Trying to run the following reproducer and hitting issues

To Reproduce


import torch
from torch import nn
from torch.nn import functional as F

def torch_tensorrt_export(model, d_model, fp16, static_shape=True,
                          min_opt_max_len=(10, 171, 500), min_opt_max_bs=(8, 8, 8),
                          debug=False):
    import torch_tensorrt.logging
    if debug:
        torch_tensorrt.logging.set_reportable_log_level(torch_tensorrt.logging.Level.Debug)
    torch_tensorrt.logging.set_reportable_log_level(torch_tensorrt.logging.Level.Debug)

    trt_dtype = torch.half if fp16 else torch.float

    if static_shape:
        min_opt_max_bs = (8,) * 3
        min_opt_max_len = (171,) * 3
    inputs = [
        torch_tensorrt.Input(
            min_shape=[min_opt_max_bs[0], min_opt_max_len[0]],
            opt_shape=[min_opt_max_bs[1], min_opt_max_len[1]],
            max_shape=[min_opt_max_bs[2], min_opt_max_len[2]],
            dtype=torch.int),
        torch_tensorrt.Input(
            min_shape=[min_opt_max_len[0]],
            opt_shape=[min_opt_max_len[1]],
            max_shape=[min_opt_max_len[2]],
            dtype=torch.int),
    ]

    print(f'Compiling with Torch-TRT for dtype {trt_dtype}'
          f' for bs {min_opt_max_bs} and lengths {min_opt_max_len}')

    compile_settings = {
        "inputs": inputs,
        "enabled_precisions": {trt_dtype},
        "truncate_long_and_double": True,
        "require_full_compilation": True,
    }
    trt_model = torch_tensorrt.compile(model, **compile_settings)
    print('Compilation successful!')
    print('=' * 100)
    return trt_model

class PositionalEmbedding(nn.Module):
    def __init__(self, demb):
        super(PositionalEmbedding, self).__init__()
        self.demb = demb
        inv_freq = 1 / (10000 ** (torch.arange(0.0, demb, 2.0) / demb))
        self.register_buffer('inv_freq', inv_freq)

    def forward(self, pos_seq):
        pos_seq = pos_seq.float()
        sinusoid_inp = torch.matmul(pos_seq.unsqueeze(-1), self.inv_freq.unsqueeze(0))
        # equivalent (unsupported): sinusoid_inp = torch.outer(pos_seq, self.inv_freq)
        pos_emb = torch.cat([sinusoid_inp.sin(), sinusoid_inp.cos()], dim=1)
        return pos_emb.unsqueeze(0)

class ToyTransformer(nn.Module):
    def __init__(self, d_model, d_head):
        super(ToyTransformer, self).__init__()

        self.d_head = d_head
        self.qkv_net = nn.Linear(d_model, 3 * d_head)
        self.out_net = nn.Linear(d_head, d_model, bias=False)
        self.layer_norm = nn.LayerNorm(d_model)
        self.pos_emb = PositionalEmbedding(d_model)
        self.word_emb = nn.Embedding(150, d_model)

    def forward(self, inp, pos_seq):
        x = self.word_emb(inp)

        # ISSUE 1
        pos_emb = self.pos_emb(pos_seq)
        x = x + pos_emb

        # # ISSUE 2
        x = self.layer_norm(x)

        # ISSUE 3
        qkv = self.qkv_net(x)
        q = qkv[:, :, :self.d_head]
        k = qkv[:, :, self.d_head:2 * self.d_head]
        v = qkv[:, :, 2 * self.d_head:]
        # alternative to the above (but chunk is not supported):
        # q, k, v = torch.chunk(qkv, 3, dim=2)
        # temporary workaround for debugging (requires changing `self.qkv_net` output size to `1 * d_head`)
        # q, k, v = qkv, qkv, qkv

        attn_score = torch.bmm(q, k.transpose(1, 2))

        # ISSUE 4
        attn_mask = inp.eq(0).unsqueeze(1).expand_as(attn_score)
        attn_score = attn_score.masked_fill(attn_mask, -float('inf'))

        attn_prob = F.softmax(attn_score, dim=2)
        attn_vec = torch.bmm(attn_prob, v)
        attn_out = self.out_net(attn_vec)
        return attn_out

if __name__ == '__main__':
    d_model = 384
    model = ToyTransformer(d_model, 64)

    dummy_input = torch.randint(0, 128, (8, 171)), torch.arange(171)
    model(*dummy_input)
    torch.jit.script(model)(*dummy_input)
    print('PyT and TorchScript versions are OK')

    # uncomment for static shapes:
    # torch_tensorrt_export(model, d_model, False, True)

    torch_tensorrt_export(model, d_model, False, False)

Expected behavior

This should be supported in at least the dynamic batch case

Environment

Build information about Torch-TensorRT can be found by turning on debug messages

Additional context

peri044 commented 1 year ago

Seeing this issue with the latest main

File "/home/dperi/Downloads/env/lib/python3.8/site-packages/torch_tensorrt/_compile.py", line 125, in compile
    return torch_tensorrt.ts.compile(
  File "/home/dperi/Downloads/env/lib/python3.8/site-packages/torch_tensorrt/ts/_compiler.py", line 136, in compile
    compiled_cpp_mod = _C.compile_graph(module._c, _parse_compile_spec(spec))
RuntimeError: required keyword attribute 'test_element' is undefined

I will try 1.3 branch

peri044 commented 1 year ago

@narendasan Is this the error message you got ?

RuntimeError: [Error thrown at core/conversion/converters/impl/matrix_multiply.cpp:50] Expected selfDims.nbDims == 3 to be true but got false
Expected 3-dimensional tensor, but got -1-dimensional tensor for argument #1 'batch1' (while checking arguments for bmm)
peri044 commented 1 year ago
DEBUG: [Torch-TensorRT TorchScript Conversion Context] - Freezing tensor 0x36d63b10 as an IConstantLayer
DEBUG: [Torch-TensorRT] - Output tensor shape: [8, -1]
INFO: [Torch-TensorRT TorchScript Conversion Context] - Adding Layer %115 : Tensor = aten::unsqueeze(%114, %11) # layer_norm.py:96:20 (ctx.AddLayer)
DEBUG: [Torch-TensorRT TorchScript Conversion Context] - Node input is an already converted tensor
DEBUG: [Torch-TensorRT TorchScript Conversion Context] - Node input is a result of a previously evaluated value
DEBUG: [Torch-TensorRT] - ITensor name: (Unnamed Layer* 199) [ElementWise]_output
DEBUG: [Torch-TensorRT] - ITensor shape: [8, -1]
DEBUG: [Torch-TensorRT] - ITensor type: Bool
ERROR: [Torch-TensorRT TorchScript Conversion Context] - 3: (Unnamed Layer* 200) [Shuffle]: has zero placeholder that refers to invalid axis 2.
DEBUG: [Torch-TensorRT] - Output tensor shape: []

Intermediate unsqueeze layer errors out. Probably a bug there. I will need to debug this.

github-actions[bot] commented 1 year ago

This issue has not seen activity for 90 days, Remove stale label or comment or this will be closed in 10 days