# BenchmarkAI meta
spec_version = "0.1.0"
[info]
description = """ \
MXNet training single-node 1.6 CPU Py2 resnet18v2 cifar10 \
"""
scheduling = 'single_run'
[info.labels]
# Labels and values must be 63 characters or less, beginning and ending with an alphanumeric character
# ([a-z0-9A-Z]) with dashes (-), underscores (_), dots (.), and alphanumerics between
# task_name is mandatory
task_name = "mx_train_single_node_cpu_py2_resnet18v2_cifar10"
[hardware]
instance_type = "c5.18xlarge"
strategy = "single_node"
aws_zone_id="use1-az2"
[env]
docker_image = "028651357192.dkr.ecr.us-east-1.amazonaws.com/mxnet-training:1.6-cpu-py2"
[env.vars]
OMP_NUM_THREADS="36"
KMP_AFFINITY='granularity=fine,verbose,compact,1,0'
[ml]
benchmark_code = "python $(BAI_SCRIPTS_PATH)/deeplearning-benchmark/image_classification/image_classification.py"
args = "--model resnet18_v2 --dataset cifar10 --mode symbolic --gpus 0 --epochs 25 --log-interval 50 --kvstore local --dtype='float32' --batch-size=64"
[output]
[[output.metrics]]
# Name of the metric that will appear in the dashboards.
name = "throughput"
units = "img/sec"
pattern = 'Speed: (\d+\.\d+|\d+) samples\/sec'
it uses [script-mode]
1. Save the above file as <>.toml file on your system
2. Download benchmark scripts
git clone https://github.com/awslabs/deeplearning-benchmark.git
3. cd to benchmark-ai directory
./bai-bff/bin/anubis --submit <PATH_TO_TOML_FILE> --script <PATH_TO_SCRIPTS_DIRECTORY>
Example:
8c8590431d24:benchmark-ai surakota$ ./bai-bff/bin/anubis --submit </path/to/toml>/mx_train_single_node_1.6_cpu_py2_resnet18v2_cifar10.toml
MXNet run had a significant drop in performance. 72 img/sec vs 7 img/sec
Similar Tensorflow CPU run also had little difference in performance
MXNet run
it uses [script-mode]
MXNet run had a significant drop in performance. 72 img/sec vs 7 img/sec
Similar Tensorflow CPU run also had little difference in performance