Open zhanghaoie opened 2 years ago
I'm doing batch inference on CPU in combination with Fastapi. It works fine.
# env.DEVICE is CPU class MyModelManager(ManagedModel): def init_model(self): classifier = torch.jit.load( env.MODEL_PATH, map_location="cpu" ) self.classifier = classifier.eval().to(env.DEVICE) logger.info("model init Done") @torch.inference_mode() def predict(self, inputs: T.List[torch.Tensor]) -> T.List[torch.Tensor]: logger.info(f"batch size: {len(inputs)}") results = [] try: batch = torch.cat(inputs, 0).to(env.DEVICE) print(batch.shape) outputs = self.classifier(batch) outputs = torch.sigmoid(outputs) outputs = outputs.cpu() results = [output for output in outputs] except Exception as e: logger.error(f"Error {self.__class__.__name__}: {e}") return results
I'm doing batch inference on CPU in combination with Fastapi. It works fine.