Open msaroufim opened 2 years ago
For accelerated decoding, these are all the endpoints we can improve with faster decoding
(base) ➜ serve git:(decode) ✗ find . -name "*.py" | xargs grep "decode" .
grep: .: Is a directory
./test/pytest/test_handler.py: response = response.content.decode("utf-8")
./test/pytest/test_gRPC_inference_api.py: prediction = response.prediction.decode('utf-8')
./ts_scripts/print_env_info.py: output = output.decode(enc)
./ts_scripts/print_env_info.py: err = err.decode(enc)
./ts_scripts/torchserve_grpc_client.py: prediction = response.prediction.decode('utf-8')
./examples/nmt_transformer/model_handler_generalized.py: decoded_text = text.decode('utf-8')
./examples/nmt_transformer/model_handler_generalized.py: textInput.append(decoded_text)
./examples/text_to_speech_synthesizer/waveglow_handler.py: text = text.decode('utf-8')
./examples/Workflows/dog_breed_classification/dog_breed_classification_handler.py: cat_dog_response = row.get("cat_dog_classification").decode()
./examples/Workflows/dog_breed_classification/dog_breed_classification_handler.py: input_data = row.get("pre_processing").decode()
./examples/Workflows/dog_breed_classification/cat_dog_classification_handler.py: b64_data.append({"body": base64.b64decode(input_data)})
./examples/Workflows/dog_breed_classification/workflow_dog_breed_classification_handler.py: b64_data.append(base64.b64encode(input_data).decode())
./examples/Huggingface_Transformers/Transformer_handler_generalized.py: input_text = input_text.decode("utf-8")
./examples/Huggingface_Transformers/Transformer_handler_generalized.py: self.tokenizer.decode(input_ids_batch[i])
./examples/Huggingface_Transformers/Transformer_handler_generalized.py: self.tokenizer.decode(outputs[i], skip_special_tokens=True)
./examples/Huggingface_Transformers/Transformer_handler_generalized.py: text = text.decode("utf-8")
./examples/MMF-activity-recognition/handler.py: script = raw_script.decode('utf-8')
./examples/MMF-activity-recognition/handler.py: video_label = raw_label.decode('utf-8')
./benchmarks/windows_install_dependencies.py: output = raw_output.decode(enc)
./benchmarks/windows_install_dependencies.py: err = raw_err.decode(enc)
./kubernetes/kserve/image_transformer/image_transformer/image_transformer.py: byte_array = base64.b64decode(instance["data"])
./kubernetes/kserve/kf_request_json/v1/img2bytearray.py:bytes_array = image_64_encode.decode('utf-8')
./kubernetes/kserve/kf_request_json/v2/mnist/tobytes.py:bytes_array = image_64_encode.decode("utf-8")
./kubernetes/kserve/kf_request_json/v2/bert/Transformer_kserve_handler.py: input_text = input_text.decode("utf-8")
./ts/service.py: req_id = request_batch.get('requestId').decode("utf-8")
./ts/service.py: model_in_headers.update({h['name'].decode('utf-8'): h['value'].decode('utf-8')})
./ts/torch_handler/request_envelope/kservev2.py: body_list = [json.loads(body.decode()) for body in body_list]
./ts/torch_handler/request_envelope/kserve.py: data = data.decode()
./ts/torch_handler/request_envelope/json.py:from base64 import b64decode
./ts/torch_handler/request_envelope/json.py: rows[row_i] = b64decode(row['b64'])
./ts/torch_handler/request_envelope/json.py: row[col] = b64decode(col_value['b64'])
./ts/torch_handler/vision_handler.py: image = base64.b64decode(image)
./ts/torch_handler/unit_tests/test_envelopes.py: envelope = JSONEnvelope(lambda x, y: [row.decode('utf-8') for row in x])
./ts/torch_handler/text_classifier.py: text = text.decode('utf-8')
./ts/torch_handler/text_handler.py: .decode("utf-8", "ignore")
./ts/model_service_worker.py: model_dir = load_model_request["modelPath"].decode("utf-8")
./ts/model_service_worker.py: model_name = load_model_request["modelName"].decode("utf-8")
./ts/model_service_worker.py: load_model_request["handler"].decode("utf-8")
./ts/model_service_worker.py: load_model_request["envelope"].decode("utf-8")
./ts/protocol/otf_message_handler.py: decode_req = os.environ.get("TS_DECODE_INPUT_REQUEST")
./ts/protocol/otf_message_handler.py: model_input["name"] = _retrieve_buffer(conn, length).decode("utf-8")
./ts/protocol/otf_message_handler.py: content_type = _retrieve_buffer(conn, length).decode("utf-8")
./ts/protocol/otf_message_handler.py: if content_type == "application/json" and (decode_req is None or decode_req == "true"):
./ts/protocol/otf_message_handler.py: model_input["value"] = json.loads(value.decode("utf-8"))
./ts/protocol/otf_message_handler.py: elif content_type.startswith("text") and (decode_req is None or decode_req == "true"):
./ts/protocol/otf_message_handler.py: model_input["value"] = value.decode("utf-8")
./ts/model_service/model_service.py: form_data = ast.literal_eval(form_data.decode("utf-8"))
This recent PR by @min-jean-cho https://github.com/pytorch/serve/pull/1545 has made me realize how much performance we're leaving on the table when it comes to acceleration of preprocessing operations. We could more easily support more torchvision backends, faster tokenizers and add more hardware specific configurations to make it for developers to pick and choose.
This will also help our #1457 efforts since there we'd only considered so far performance of torchserve and
inference()