I have the following Torchserve handler and dockerfile, but I’m getting prediction failed:
`from ts.torch_handler.base_handler import BaseHandler
from transformers import AutoModelWithLMHead, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import datetime
import logging
import json
import os
logger = logging.getLogger(name)
class TransformersHandler(BaseHandler):
"""
The handler takes an input string and returns the classification text
based on the serialized transformers checkpoint.
"""
def __init__(self):
super(TransformersHandler, self).__init__()
self.initialized = False
self.chat_history_ids = None
def initialize(self, ctx):
""" Loads the model.pt file and initialized the model object.
Instantiates Tokenizer for preprocessor to use
Loads labels to name mapping file for post-processing inference response
"""
self.manifest = ctx.manifest
properties = ctx.system_properties
model_dir = properties.get("model_dir")
self.device = torch.device(
"cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")
# Read model serialize/pt file
serialized_file = self.manifest["model"]["serializedFile"]
model_pt_path = os.path.join(model_dir, serialized_file)
if not os.path.isfile(model_pt_path):
raise RuntimeError(
"Missing the model.pt or pytorch_model.bin file")
# Load model
self.model = AutoModelWithLMHead.from_pretrained(model_dir)
self.model.to(self.device)
self.model.eval()
logger.debug(
'Transformer model from path {0} loaded successfully'.format(model_dir))
# Ensure to use the same tokenizer used during training
self.tokenizer = AutoTokenizer.from_pretrained(
'microsoft/DialoGPT-small')
def preprocess(self, data):
""" Preprocessing input request by tokenizing
Extend with your own preprocessing steps as needed
"""
user_message = data[0].get("data")
if user_message is None:
user_message = data[0].get("body")
user_message = text.decode('utf-8')
####
####
logger.info("Received text: '%s'", user_message)
# Tokenize the texts
# encode the new user message to be used by our model
inputs = self.tokenizer.encode(
user_message + self.tokenizer.eos_token, return_tensors='pt')
# append the encoded message to the past history so the model is aware of past context
if self.chat_history_ids is not None:
inputs = torch.cat([self.chat_history_ids, inputs], dim=-1)
return inputs
def inference(self, inputs):
""" Predict the class of a text using a trained transformer model.
"""
self.chat_history_ids = self.model.generate(inputs,
pad_token_id=self.tokenizer.eos_token_id,
do_sample=True,
max_length=1000,
top_k=100,
top_p=0.8,
temperature=0.8,
)
decoded_message = self.tokenizer.decode(
self.chat_history_ids[:, inputs.shape[-1]:][0], skip_special_tokens=True)
return decoded_message
def postprocess(self, inference_output):
return inference_output
_service = TransformersHandler()
def handle(data, context):
try:
if not _service.initialized:
_service.initialize(context)
if data is None:
return None
data = _service.preprocess(data)
data = _service.inference(data)
data = _service.postprocess(data)
return data
except Exception as e:
raise e`
`%%bash -s $APP_NAME
APP_NAME=$1
cat << EOF > ./predictor/Dockerfile
FROM pytorch/torchserve:latest-cpu
install dependencies
RUN python3 -m pip install --upgrade pip
RUN pip3 install transformers
USER model-server
copy model artifacts, custom handler and other dependencies
USER root
RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties
RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties
RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties
USER model-server
expose health and prediction listener ports from the image
EXPOSE 7080
EXPOSE 7081
create model archive file packaging model artifacts and dependencies
I have the following Torchserve handler and dockerfile, but I’m getting prediction failed:
`from ts.torch_handler.base_handler import BaseHandler from transformers import AutoModelWithLMHead, AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch import datetime import logging import json import os
logger = logging.getLogger(name)
class TransformersHandler(BaseHandler): """ The handler takes an input string and returns the classification text based on the serialized transformers checkpoint. """
_service = TransformersHandler()
def handle(data, context): try: if not _service.initialized: _service.initialize(context)
`%%bash -s $APP_NAME
APP_NAME=$1
cat << EOF > ./predictor/Dockerfile
FROM pytorch/torchserve:latest-cpu
install dependencies
RUN python3 -m pip install --upgrade pip RUN pip3 install transformers
USER model-server
copy model artifacts, custom handler and other dependencies
COPY ./custom_handler.py /home/model-server/ COPY ./model/config.json /home/model-server/ COPY ./model/eval_results.txt /home/model-server/ COPY ./model/merges.txt /home/model-server/ COPY ./model/pytorch_model.bin /home/model-server/ COPY ./model/special_tokens_map.json /home/model-server/ COPY ./model/tokenizer_config.json /home/model-server/ COPY ./model/tokenizer.json /home/model-server/ COPY ./model/training_args.bin /home/model-server/ COPY ./model/vocab.json /home/model-server/
COPY ./model/$APP_NAME/ /home/model-server/
create torchserve configuration file
USER root RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties USER model-server
expose health and prediction listener ports from the image
EXPOSE 7080 EXPOSE 7081
create model archive file packaging model artifacts and dependencies
RUN torch-model-archiver -f \ --model-name=$APP_NAME \ --version=1.0 \ --serialized-file=/home/model-server/pytorch_model.bin \ --handler=/home/model-server/custom_handler.py \ --extra-files "/home/model-server/config.json,/home/model-server/tokenizer.json,/home/model-server/training_args.bin,/home/model-server/tokenizer_config.json,/home/model-server/special_tokens_map.json,/home/model-server/vocab.json,/home/model-server/merges.txt,/home/model-server/eval_results.txt" \ --export-path=/home/model-server/model-store
run Torchserve HTTP serve to respond to prediction requests
CMD ["torchserve", \ "--start", \ "--ts-config=/home/model-server/config.properties", \ "--models", \ "$APP_NAME=$APP_NAME.mar", \ "--model-store", \ "/home/model-server/model-store"] EOF
echo "Writing ./predictor/Dockerfile"`
`%%bash -s $APP_NAME
APP_NAME=$1
cat > ./predictor/instances.json <<END { "instances": [ { "data": { "b64": "$(echo 'What is your name?.' | base64 --wrap=0)" } } ] } END
curl -s -X POST \ -H "Content-Type: application/json; charset=utf-8" \ -d @./predictor/instances.json \ http://localhost:7080/predictions/$APP_NAME/`
Output Error Message
{ "code": 503, "type": "InternalServerException", "message": "Prediction failed" }
What could be the problem?