Open col-in-coding opened 11 months ago
import time
from pathlib import Path
from tensorrt_llm.network import net_guard
from tensorrt_llm.builder import Builder
from tensorrt_llm.logger import logger
from tensorrt_llm.layers import LayerNorm
from tensorrt_llm.module import Module
from tensorrt_llm.functional import Tensor
from tensorrt_llm._utils import str_dtype_to_trt
logger.set_level("info")
class TestModel(Module):
def __init__(self):
super().__init__()
dtype = str_dtype_to_trt('float32')
self.dtype = dtype
self.layernorm = LayerNorm(1280, dtype=dtype, elementwise_affine=False)
def forward(self, inp):
out = self.layernorm(inp)
out.mark_output("output", self.dtype)
return out
def prepare_inputs(self):
inp = Tensor(name="input", dtype=self.dtype, shape=[1, 64, 64, 1280])
return (inp, )
def serialize_engine(engine, path):
logger.info(f'Serializing engine to {path}...')
tik = time.time()
with open(path, 'wb') as f:
f.write(bytearray(engine))
tok = time.time()
t = time.strftime('%H:%M:%S', time.gmtime(tok - tik))
logger.info(f'Engine serialized. Total time: {t}')
if __name__ == "__main__":
engine_dir = ""
engine_name = "test.engine"
dtype = "float32"
engine_dir = Path(engine_dir)
engine_path = engine_dir / engine_name
# Build TRT network
trt_llm_model = TestModel()
# Module -> Network
builder = Builder()
builder_config = builder.create_builder_config(
name="test",
precision="float32",
timing_cache=None,
tensor_parallel=1,
parallel_build=False,
)
network = builder.create_network()
network.trt_network.name = engine_name
with net_guard(network):
# Prepare
network.set_named_parameters(trt_llm_model.named_parameters())
# Forward
inputs = trt_llm_model.prepare_inputs()
trt_llm_model(*inputs)
# Network -> Engine
# engine = None
engine = builder.build_engine(network, builder_config)
serialize_engine(engine, engine_path)