Powering AWS purpose-built machine learning chips. Blazing fast and cost effective, natively integrated into PyTorch and TensorFlow and integrated with your favorite AWS services
import torch
import torch_neuron
from torchvision import models
# Load the model and set it to evaluation mode
model = models.resnet50(pretrained=True)
model.eval()
# Compile with an example input
image = torch.rand([1, 3, 224, 224])
model_neuron = torch.neuron.trace(model, image)
# Create the DataParallel module, run on the first three NeuronCores
# Equivalent to model_parallel = torch.neuron.DataParallel(model_neuron, device_ids=[0, 1, 2])
model_parallel = torch.neuron.DataParallel(model_neuron, device_ids=['nc:0', 'nc:1', 'nc:2'])
# Create a batched input
batch_size = 5
image_batched = torch.rand([batch_size, 3, 224, 224])
# Run inference with a batched input
output = model_parallel(image_batched)
INFO:Neuron:All operators are compiled by neuron-cc (this does not guarantee that neuron-cc will successfully compile)
INFO:Neuron:Number of arithmetic operators (pre-compilation) before = 175, fused = 175, percent fused = 100.0%
INFO:Neuron:Compiling function _NeuronGraph$557 with neuron-cc
INFO:Neuron:Compiling with command line: '/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/bin/neuron-cc compile /tmp/tmpdgidly2q/graph_def.pb --framework TENSORFLOW --pipeline compile SaveTemps --output /tmp/tmpdgidly2q/graph_def.neff --io-config {"inputs": {"0:0": [[1, 3, 224, 224], "float32"]}, "outputs": ["Linear_22/aten_linear/Add:0"]} --verbose 35'
...
Compiler status PASS
INFO:Neuron:skip_inference_context for tensorboard symbols at /home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/tensorboard.py:305 tb_parse
INFO:Neuron:Number of arithmetic operators (post-compilation) before = 175, compiled = 175, percent compiled = 100.0%
INFO:Neuron:The neuron partitioner created 1 sub-graphs
INFO:Neuron:Neuron successfully compiled 1 sub-graphs, Total fused subgraphs = 1, Percent of model sub-graphs successfully compiled = 100.0%
INFO:Neuron:Compiled these operators (and operator counts) to Neuron:
INFO:Neuron: => aten::_convolution: 53
INFO:Neuron: => aten::adaptive_avg_pool2d: 1
INFO:Neuron: => aten::add: 16
INFO:Neuron: => aten::batch_norm: 53
INFO:Neuron: => aten::flatten: 1
INFO:Neuron: => aten::linear: 1
INFO:Neuron: => aten::max_pool2d: 1
INFO:Neuron: => aten::relu: 49
INFO:Neuron:skip_inference_context for tensorboard symbols at /home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/tensorboard.py:305 tb_parse
INFO:Neuron:Number of neuron graph operations 1 did not match traced graph 4 - using heuristic matching of hierarchical information
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/data_parallel.py:216: UserWarning: The NeuronCores are not being fully utilized because `inputs.shape[dim]` is not divisible by the number of NeuronCores given in `device_ids`. In order to get optimal performance, please try to ensure that the shape your inputs at `dim` is divisible by the number of NeuronCores that DataParallel is using, such that `input.shape[dim] % len(device_ids) == 0).`
warnings.warn('The NeuronCores are not being fully utilized because '
2022-Sep-13 22:10:27.0842 19662:20376 ERROR TDRV:notification_subscribe Only one subscriber allowed!
2022-Sep-13 22:10:27.0842 19662:20376 ERROR NRT:nrt_profile_notification_subscribe failed to subscribe to notification type 0
2022-Sep-13 22:10:27.0877 19662:20375 ERROR TDRV:notification_subscribe Only one subscriber allowed!
2022-Sep-13 22:10:27.0877 19662:20375 ERROR NRT:nrt_profile_notification_subscribe failed to subscribe to notification type 0
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_19662/688662629.py in <module>
20
21 # Run inference with a batched input
---> 22 output = model_parallel(image_batched)
~/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/data_parallel.py in forward(self, *inputs)
222 if len(self.loaded_modules) == 1:
223 return self.loaded_modules[0](*inputs[0])
--> 224 outputs = parallel_apply(modules=self.loaded_modules, inputs=inputs, num_workers=self.num_workers)
225 return gather(outputs, self.dim)
226
~/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/data_parallel.py in parallel_apply(modules, inputs, num_workers)
320 output = results[i]
321 if isinstance(output, ExceptionWrapper):
--> 322 output.reraise()
323 outputs.append(output)
324 return outputs
~/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
455 # instantiate since we don't know how to
456 raise RuntimeError(msg) from None
--> 457 raise exception
458
459
RuntimeError: Caught RuntimeError on neuroncore 3.
Original Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/data_parallel.py", line 305, in _worker
output = module(*input)
File "/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/decorators.py(372): forward
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/nn/modules/module.py(1098): _slow_forward
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/nn/modules/module.py(1110): _call_impl
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/graph.py(548): __call__
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/graph.py(207): run_op
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/graph.py(196): __call__
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/runtime.py(69): forward
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/nn/modules/module.py(1098): _slow_forward
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/nn/modules/module.py(1110): _call_impl
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/jit/_trace.py(965): trace_module
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch/jit/_trace.py(750): trace
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/tensorboard.py(307): tb_parse
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/tensorboard.py(533): tb_graph
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/decorators.py(482): maybe_generate_tb_graph_def
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/convert.py(513): maybe_determine_names_from_tensorboard
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/torch_neuron/convert.py(200): trace
/tmp/ipykernel_19662/688662629.py(11): <module>
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/IPython/core/interactiveshell.py(3552): run_code
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/IPython/core/interactiveshell.py(3472): run_ast_nodes
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/IPython/core/interactiveshell.py(3257): run_cell_async
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/IPython/core/async_helpers.py(78): _pseudo_sync_runner
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/IPython/core/interactiveshell.py(3029): _run_cell
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/IPython/core/interactiveshell.py(2975): run_cell
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/zmqshell.py(528): run_cell
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/ipkernel.py(387): do_execute
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/kernelbase.py(730): execute_request
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/kernelbase.py(406): dispatch_shell
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/kernelbase.py(499): process_one
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/kernelbase.py(510): dispatch_queue
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/asyncio/events.py(88): _run
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/asyncio/base_events.py(1786): _run_once
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/asyncio/base_events.py(541): run_forever
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/tornado/platform/asyncio.py(199): start
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel/kernelapp.py(712): start
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/traitlets/config/application.py(976): launch_instance
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/site-packages/ipykernel_launcher.py(17): <module>
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/runpy.py(85): _run_code
/home/ubuntu/anaconda3/envs/aws_neuron_pytorch_p37/lib/python3.7/runpy.py(193): _run_module_as_main
RuntimeError: Failed to start profiling status=1 message=Unknown Failure
The behavior I am getting seems to indicate that dynamic batching in not working (where as it should be model_parallel.set_dynamic_batching returns True)
Dear AWS team,
I am following the example here https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/appnotes/perf/torch-neuron-dataparallel-app-note.html#specifying-neuroncores
precisely the code I am using on an inf1.6xlarge ec2 instance running https://aws.amazon.com/releasenotes/aws-deep-learning-ami-ubuntu-18-04/; I am getting this error (code seems to work for batch sizes that are multiple of 3 but fails for non multiples of 3)
The behavior I am getting seems to indicate that dynamic batching in not working (where as it should be
model_parallel.set_dynamic_batching
returnsTrue
)what am I doing wrong ? and how do I fix this