Helping developers to use AWS Graviton2, Graviton3, and Graviton4 processors which power the 6th, 7th, and 8th generation of Amazon EC2 instances (C6g[d], M6g[d], R6g[d], T4g, X2gd, C6gn, I4g, Im4gn, Is4gen, G5g, C7g[d][n], M7g[d], R7g[d], R8g).
I am running a benchmarking script for the Google T5 Small Text Translation model using both eager and torch.compile modes. However, the compile mode is performing worse than eager mode on a c8g.4xlarge instance (AMI: ami-0d486650b94f4c69b, region: us-east-1), which is unexpected given that compiled mode should typically offer better performance.
import argparse
from transformers import T5Tokenizer, T5Model
import torch
from torch.profiler import profile, record_function, ProfilerActivity
import torch._inductor.config as config
config.cpp.weight_prepack = True
config.freezing = True
def test_inference(mode, num_iter):
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5Model.from_pretrained("t5-small")
input_ids = tokenizer(
"Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids # Batch size 1
decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
if mode == 'compile':
model = torch.compile(model)
with torch.no_grad():
for _ in range(50):
outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
with profile(activities=[ProfilerActivity.CPU]) as prof:
with record_function("model_inference"):
for _ in range(num_iter):
outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
print(prof.key_averages().table(sort_by="self_cpu_time_total"))
def main() -> None:
global m, args
parser = argparse.ArgumentParser(__doc__)
parser.add_argument(
"-m",
"--mode",
choices=["eager", "compile"],
default="eager",
help="Which test to run.",
)
parser.add_argument(
"-n",
"--number",
type=int,
default=100,
help="how many iterations to run.",
)
args = parser.parse_args()
test_inference(args.mode, args.number)
if __name__ == "__main__":
main()
I am running a benchmarking script for the Google T5 Small Text Translation model using both eager and torch.compile modes. However, the compile mode is performing worse than eager mode on a c8g.4xlarge instance (AMI: ami-0d486650b94f4c69b, region: us-east-1), which is unexpected given that compiled mode should typically offer better performance.
init
based on this blog post i can't reproduce the results Accelerated PyTorch inference with torch.compile on AWS Graviton processors
Self CPU time total: 30.509ms for eager mode Self CPU time total: 12.226s for compile mode