pytorch / serve

Serve, optimize and scale PyTorch models in production
https://pytorch.org/serve/
Apache License 2.0
4.14k stars 835 forks source link

Accelerated preprocessing #1546

Open msaroufim opened 2 years ago

msaroufim commented 2 years ago

This recent PR by @min-jean-cho https://github.com/pytorch/serve/pull/1545 has made me realize how much performance we're leaving on the table when it comes to acceleration of preprocessing operations. We could more easily support more torchvision backends, faster tokenizers and add more hardware specific configurations to make it for developers to pick and choose.

This will also help our #1457 efforts since there we'd only considered so far performance of torchserve and inference()

msaroufim commented 2 years ago

For accelerated decoding, these are all the endpoints we can improve with faster decoding

(base) ➜  serve git:(decode) ✗ find . -name "*.py" | xargs grep "decode" . 
grep: .: Is a directory
./test/pytest/test_handler.py:    response = response.content.decode("utf-8")
./test/pytest/test_gRPC_inference_api.py:    prediction = response.prediction.decode('utf-8')
./ts_scripts/print_env_info.py:    output = output.decode(enc)
./ts_scripts/print_env_info.py:    err = err.decode(enc)
./ts_scripts/torchserve_grpc_client.py:        prediction = response.prediction.decode('utf-8')
./examples/nmt_transformer/model_handler_generalized.py:            decoded_text = text.decode('utf-8')
./examples/nmt_transformer/model_handler_generalized.py:            textInput.append(decoded_text)
./examples/text_to_speech_synthesizer/waveglow_handler.py:        text = text.decode('utf-8')
./examples/Workflows/dog_breed_classification/dog_breed_classification_handler.py:            cat_dog_response = row.get("cat_dog_classification").decode()
./examples/Workflows/dog_breed_classification/dog_breed_classification_handler.py:            input_data = row.get("pre_processing").decode()
./examples/Workflows/dog_breed_classification/cat_dog_classification_handler.py:            b64_data.append({"body": base64.b64decode(input_data)})
./examples/Workflows/dog_breed_classification/workflow_dog_breed_classification_handler.py:        b64_data.append(base64.b64encode(input_data).decode())
./examples/Huggingface_Transformers/Transformer_handler_generalized.py:                input_text = input_text.decode("utf-8")
./examples/Huggingface_Transformers/Transformer_handler_generalized.py:                    self.tokenizer.decode(input_ids_batch[i])
./examples/Huggingface_Transformers/Transformer_handler_generalized.py:                    self.tokenizer.decode(outputs[i], skip_special_tokens=True)
./examples/Huggingface_Transformers/Transformer_handler_generalized.py:            text = text.decode("utf-8")
./examples/MMF-activity-recognition/handler.py:            script = raw_script.decode('utf-8')
./examples/MMF-activity-recognition/handler.py:            video_label = raw_label.decode('utf-8')
./benchmarks/windows_install_dependencies.py:    output = raw_output.decode(enc)
./benchmarks/windows_install_dependencies.py:    err = raw_err.decode(enc)
./kubernetes/kserve/image_transformer/image_transformer/image_transformer.py:    byte_array = base64.b64decode(instance["data"])
./kubernetes/kserve/kf_request_json/v1/img2bytearray.py:bytes_array = image_64_encode.decode('utf-8')
./kubernetes/kserve/kf_request_json/v2/mnist/tobytes.py:bytes_array = image_64_encode.decode("utf-8")
./kubernetes/kserve/kf_request_json/v2/bert/Transformer_kserve_handler.py:                    input_text = input_text.decode("utf-8")
./ts/service.py:            req_id = request_batch.get('requestId').decode("utf-8")
./ts/service.py:                    model_in_headers.update({h['name'].decode('utf-8'): h['value'].decode('utf-8')})
./ts/torch_handler/request_envelope/kservev2.py:            body_list = [json.loads(body.decode()) for body in body_list]
./ts/torch_handler/request_envelope/kserve.py:            data = data.decode()
./ts/torch_handler/request_envelope/json.py:from base64 import b64decode
./ts/torch_handler/request_envelope/json.py:                    rows[row_i] = b64decode(row['b64'])
./ts/torch_handler/request_envelope/json.py:                            row[col] = b64decode(col_value['b64'])
./ts/torch_handler/vision_handler.py:                image = base64.b64decode(image)
./ts/torch_handler/unit_tests/test_envelopes.py:    envelope = JSONEnvelope(lambda x, y: [row.decode('utf-8') for row in x])
./ts/torch_handler/text_classifier.py:            text = text.decode('utf-8')
./ts/torch_handler/text_handler.py:            .decode("utf-8", "ignore")
./ts/model_service_worker.py:            model_dir = load_model_request["modelPath"].decode("utf-8")
./ts/model_service_worker.py:            model_name = load_model_request["modelName"].decode("utf-8")
./ts/model_service_worker.py:                load_model_request["handler"].decode("utf-8")
./ts/model_service_worker.py:                load_model_request["envelope"].decode("utf-8")
./ts/protocol/otf_message_handler.py:    decode_req = os.environ.get("TS_DECODE_INPUT_REQUEST")
./ts/protocol/otf_message_handler.py:    model_input["name"] = _retrieve_buffer(conn, length).decode("utf-8")
./ts/protocol/otf_message_handler.py:    content_type = _retrieve_buffer(conn, length).decode("utf-8")
./ts/protocol/otf_message_handler.py:    if content_type == "application/json" and (decode_req is None or decode_req == "true"):
./ts/protocol/otf_message_handler.py:        model_input["value"] = json.loads(value.decode("utf-8"))
./ts/protocol/otf_message_handler.py:    elif content_type.startswith("text") and (decode_req is None or decode_req == "true"):
./ts/protocol/otf_message_handler.py:        model_input["value"] = value.decode("utf-8")
./ts/model_service/model_service.py:                form_data = ast.literal_eval(form_data.decode("utf-8"))