NVIDIA / trt-samples-for-hackathon-cn

Simple samples for TensorRT programming
Apache License 2.0
1.47k stars 337 forks source link

TensorRT-LLM LayerNorm layer with no weights error (Hackathon 2023) #92

Open col-in-coding opened 11 months ago

col-in-coding commented 11 months ago
from tensorrt_llm.layers import LayerNorm

class TestModel(Module):
    def __init__(self):
        super().__init__()
        dtype = str_dtype_to_trt('float32')
        self.dtype = dtype
        self.layernorm = LayerNorm(1280, dtype=dtype, elementwise_affine=False)

    def forward(self, inp):
        out = self.layernorm(inp)
        out.mark_output("output", self.dtype)
        return out

    def prepare_inputs(self):
        inp = Tensor(name="input", dtype=self.dtype, shape=[1, 64, 64, 1280])
        return (inp, )

llm_model = TestModel()
col-in-coding commented 11 months ago
import time
from pathlib import Path
from tensorrt_llm.network import net_guard
from tensorrt_llm.builder import Builder
from tensorrt_llm.logger import logger
from tensorrt_llm.layers import LayerNorm
from tensorrt_llm.module import Module
from tensorrt_llm.functional import Tensor
from tensorrt_llm._utils import str_dtype_to_trt

logger.set_level("info")

class TestModel(Module):
    def __init__(self):
        super().__init__()
        dtype = str_dtype_to_trt('float32')
        self.dtype = dtype
        self.layernorm = LayerNorm(1280, dtype=dtype, elementwise_affine=False)

    def forward(self, inp):
        out = self.layernorm(inp)
        out.mark_output("output", self.dtype)
        return out

    def prepare_inputs(self):
        inp = Tensor(name="input", dtype=self.dtype, shape=[1, 64, 64, 1280])
        return (inp, )

def serialize_engine(engine, path):
    logger.info(f'Serializing engine to {path}...')
    tik = time.time()
    with open(path, 'wb') as f:
        f.write(bytearray(engine))
    tok = time.time()
    t = time.strftime('%H:%M:%S', time.gmtime(tok - tik))
    logger.info(f'Engine serialized. Total time: {t}')

if __name__ == "__main__":

    engine_dir = ""
    engine_name = "test.engine"
    dtype = "float32"
    engine_dir = Path(engine_dir)
    engine_path = engine_dir / engine_name
    # Build TRT network
    trt_llm_model = TestModel()

    # Module -> Network
    builder = Builder()
    builder_config = builder.create_builder_config(
        name="test",
        precision="float32",
        timing_cache=None,
        tensor_parallel=1,
        parallel_build=False,
    )
    network = builder.create_network()
    network.trt_network.name = engine_name

    with net_guard(network):
        # Prepare
        network.set_named_parameters(trt_llm_model.named_parameters())
        # Forward
        inputs = trt_llm_model.prepare_inputs()
        trt_llm_model(*inputs)

    # Network -> Engine
    # engine = None
    engine = builder.build_engine(network, builder_config)
    serialize_engine(engine, engine_path)