Can someone please provide a guide, how to deploy the SetFit model on Amazon SageMaker. I think HuggingFace doesn't support this. I have tried following the script but it's not working.
Note: SetFit model is in .safetensor format which I created using setfit==1.0.3 and transformers===4.39.0
from sagemaker.huggingface.model import HuggingFaceModel
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
model_data="s3://path/model.tar.gz", # path to your trained SageMaker model
role=role, # IAM role with permissions to create an endpoint
transformers_version="4.37", # Transformers version used
pytorch_version="2.1", # PyTorch version used
py_version='py310', # Python version used
# transformers_version="4.26",
# pytorch_version="1.13",
# py_version='py39',
entry_point="model/code/inference.py"
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
endpoint_name=endpoint_name,
initial_instance_count=1,
instance_type="ml.g4dn.xlarge",
)
import json
data = {
"inputs": ["Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."]
}
# request
predictor.predict(data)
Here is the inference script
import subprocess
import sys
def manage_packages(transformers_version="4.39.0", setfit_version="1.0.3"):
"""
Uninstall existing transformers package and install specified versions of transformers and setfit.
Args:
transformers_version (str): The version of the transformers package to install.
setfit_version (str): The version of the setfit package to install.
"""
try:
# Uninstall existing transformers package
subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "transformers"])
print("Transformers uninstalled successfully.")
# Install specified version of setfit package
subprocess.check_call([sys.executable, "-m", "pip", "install", f"setfit=={setfit_version}"])
print(f"SetFit {setfit_version} installed successfully.")
# Install specified version of transformers package
subprocess.check_call([sys.executable, "-m", "pip", "install", f"transformers=={transformers_version}"])
print(f"Transformers {transformers_version} installed successfully.")
except subprocess.CalledProcessError as e:
print(f"Error during package management: {e}")
sys.exit(1)
manage_packages()
import ast
import torch
from sagemaker_inference import encoder, decoder
from setfit import SetFitModel
def model_fn(model_dir):
model = SetFitModel.from_pretrained(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"model loaded successfully {model}")
return model
def input_fn(input_data, content_type):
"""A default input_fn that can handle JSON, CSV and NPZ formats.
Args:
input_data: the request payload serialized in the content_type format
content_type: the request content_type
Returns: input_data deserialized into the expected format. Currently expected
format is {"inputs": ["q1", "q2", ...]}
"""
decoded = None
try:
print(f"input_data: {input_data}, content_type: {content_type}")
decoded = decoder.decode(input_data, content_type)
print(f"decoded input: {decoded}, content_type: {content_type}")
return ast.literal_eval(str(decoded))
except Exception as e:
print(f"invalid input. input: {decoded}, error: {e}")
raise e
def output_fn(prediction, accept):
"""A default output_fn for PyTorch. Serializes predictions from predict_fn to JSON, CSV or NPY format.
Args:
prediction: a prediction result from predict_fn
accept: type which the output data needs to be serialized
Returns: output data serialized
"""
print(f"prediction: {prediction}, prediction type: {type(prediction)}, accept: {accept}")
encoded = encoder.encode(prediction, accept)
print(f"encoded output: {encoded}, content_type: {accept}")
return encoded
def predict_fn(data, model):
"""A default predict_fn for PyTorch. Calls a model on data deserialized in input_fn.
Runs prediction on GPU if cuda is available.
Args:
data: input data for prediction deserialized by input_fn
model: PyTorch model loaded in memory by model_fn
Returns: a prediction
"""
try:
print(f"data: {data}, data_type: {type(data)}")
inputs = data.get("inputs", None)
if inputs is None:
raise Exception(f"\"inputs\" not found: {data}")
return model.predict(inputs)
except Exception as e:
print(f"predict_fn error: {e}")
raise e
Can someone please provide a guide, how to deploy the SetFit model on Amazon SageMaker. I think HuggingFace doesn't support this. I have tried following the script but it's not working.
Note: SetFit model is in .safetensor format which I created using setfit==1.0.3 and transformers===4.39.0
Here is the inference script