pytorch / serve

Serve, optimize and scale PyTorch models in production
https://pytorch.org/serve/
Apache License 2.0
4.16k stars 842 forks source link

Torchserve prediction failed for transformer model #1922

Open AllenAkhaumere opened 1 year ago

AllenAkhaumere commented 1 year ago

I have the following Torchserve handler and dockerfile, but I’m getting prediction failed:

`from ts.torch_handler.base_handler import BaseHandler from transformers import AutoModelWithLMHead, AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch import datetime import logging import json import os

logger = logging.getLogger(name)

class TransformersHandler(BaseHandler): """ The handler takes an input string and returns the classification text based on the serialized transformers checkpoint. """

def __init__(self):
    super(TransformersHandler, self).__init__()
    self.initialized = False
    self.chat_history_ids = None

def initialize(self, ctx):
    """ Loads the model.pt file and initialized the model object.
    Instantiates Tokenizer for preprocessor to use
    Loads labels to name mapping file for post-processing inference response
    """
    self.manifest = ctx.manifest

    properties = ctx.system_properties
    model_dir = properties.get("model_dir")
    self.device = torch.device(
        "cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")

    # Read model serialize/pt file
    serialized_file = self.manifest["model"]["serializedFile"]
    model_pt_path = os.path.join(model_dir, serialized_file)
    if not os.path.isfile(model_pt_path):
        raise RuntimeError(
            "Missing the model.pt or pytorch_model.bin file")

    # Load model
    self.model = AutoModelWithLMHead.from_pretrained(model_dir)
    self.model.to(self.device)
    self.model.eval()
    logger.debug(
        'Transformer model from path {0} loaded successfully'.format(model_dir))

    # Ensure to use the same tokenizer used during training
    self.tokenizer = AutoTokenizer.from_pretrained(
        'microsoft/DialoGPT-small')

def preprocess(self, data):
    """ Preprocessing input request by tokenizing
        Extend with your own preprocessing steps as needed
    """
    user_message = data[0].get("data")
    if user_message is None:
        user_message = data[0].get("body")

        user_message = text.decode('utf-8')
    ####
    ####
    logger.info("Received text: '%s'", user_message)

    # Tokenize the texts
    # encode the new user message to be used by our model
    inputs = self.tokenizer.encode(
        user_message + self.tokenizer.eos_token, return_tensors='pt')

    # append the encoded message to the past history so the model is aware of past context
    if self.chat_history_ids is not None:
        inputs = torch.cat([self.chat_history_ids, inputs], dim=-1)

    return inputs

def inference(self, inputs):
    """ Predict the class of a text using a trained transformer model.
    """
    self.chat_history_ids = self.model.generate(inputs,
                                                pad_token_id=self.tokenizer.eos_token_id,
                                                do_sample=True,
                                                max_length=1000,
                                                top_k=100,
                                                top_p=0.8,
                                                temperature=0.8,
                                                )
    decoded_message = self.tokenizer.decode(
        self.chat_history_ids[:, inputs.shape[-1]:][0], skip_special_tokens=True)

    return decoded_message

def postprocess(self, inference_output):
    return inference_output

_service = TransformersHandler()

def handle(data, context): try: if not _service.initialized: _service.initialize(context)

    if data is None:
        return None

    data = _service.preprocess(data)
    data = _service.inference(data)
    data = _service.postprocess(data)

    return data
except Exception as e:
    raise e`

`%%bash -s $APP_NAME

APP_NAME=$1

cat << EOF > ./predictor/Dockerfile

FROM pytorch/torchserve:latest-cpu

install dependencies

RUN python3 -m pip install --upgrade pip RUN pip3 install transformers

USER model-server

copy model artifacts, custom handler and other dependencies

COPY ./custom_handler.py /home/model-server/ COPY ./model/config.json /home/model-server/ COPY ./model/eval_results.txt /home/model-server/ COPY ./model/merges.txt /home/model-server/ COPY ./model/pytorch_model.bin /home/model-server/ COPY ./model/special_tokens_map.json /home/model-server/ COPY ./model/tokenizer_config.json /home/model-server/ COPY ./model/tokenizer.json /home/model-server/ COPY ./model/training_args.bin /home/model-server/ COPY ./model/vocab.json /home/model-server/

COPY ./model/$APP_NAME/ /home/model-server/

create torchserve configuration file

USER root RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties USER model-server

expose health and prediction listener ports from the image

EXPOSE 7080 EXPOSE 7081

create model archive file packaging model artifacts and dependencies

RUN torch-model-archiver -f \ --model-name=$APP_NAME \ --version=1.0 \ --serialized-file=/home/model-server/pytorch_model.bin \ --handler=/home/model-server/custom_handler.py \ --extra-files "/home/model-server/config.json,/home/model-server/tokenizer.json,/home/model-server/training_args.bin,/home/model-server/tokenizer_config.json,/home/model-server/special_tokens_map.json,/home/model-server/vocab.json,/home/model-server/merges.txt,/home/model-server/eval_results.txt" \ --export-path=/home/model-server/model-store

run Torchserve HTTP serve to respond to prediction requests

CMD ["torchserve", \ "--start", \ "--ts-config=/home/model-server/config.properties", \ "--models", \ "$APP_NAME=$APP_NAME.mar", \ "--model-store", \ "/home/model-server/model-store"] EOF

echo "Writing ./predictor/Dockerfile"`

`%%bash -s $APP_NAME

APP_NAME=$1

cat > ./predictor/instances.json <<END { "instances": [ { "data": { "b64": "$(echo 'What is your name?.' | base64 --wrap=0)" } } ] } END

curl -s -X POST \ -H "Content-Type: application/json; charset=utf-8" \ -d @./predictor/instances.json \ http://localhost:7080/predictions/$APP_NAME/`

Output Error Message { "code": 503, "type": "InternalServerException", "message": "Prediction failed" }

What could be the problem?

msaroufim commented 1 year ago

Can you check logs/model_log.log for anything suspicious? This can indicate some python error in your handler