undertherain / benchmarker

modular framework for [not only] deep learning performance benchmarking
http://blackbird.pw/performance
Mozilla Public License 2.0
9 stars 5 forks source link

Epyc mi100 #186

Closed vatai closed 2 years ago

vatai commented 2 years ago

See sample output below. Just adding a the "brand" (i.e. device name) and not adding other stuff like memory/cores/clock speed etc, because either I don't know how to do it or torch.cuda doesn't support it...

docr_user@container_epyc:/workdir$ python3 -m benchmarker --gpus=0 --framework=pytorch --problem=resnet50 --problem_size=32
/usr/local/lib/python3.8/dist-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Train Epoch: 1 [0/1 (0%)]       Loss: 6.887599
Train Epoch: 2 [0/1 (0%)]       Loss: 5.317784
Train Epoch: 3 [0/1 (0%)]       Loss: 4.018268
Train Epoch: 4 [0/1 (0%)]       Loss: 2.858533
Train Epoch: 5 [0/1 (0%)]       Loss: 1.861015
Train Epoch: 6 [0/1 (0%)]       Loss: 1.092951
Train Epoch: 7 [0/1 (0%)]       Loss: 0.597454
Train Epoch: 8 [0/1 (0%)]       Loss: 0.325032
Train Epoch: 9 [0/1 (0%)]       Loss: 0.188242
Train Epoch: 10 [0/1 (0%)]      Loss: 0.117798
{
    "backend": "native",
    "batch_size": 32,
    "batch_size_per_device": 32,
    "channels_first": true,
    "cudnn_benchmark": true,
    "device": "AMD Mi100",
    "framework": "pytorch",
    "framework_full": "PyTorch-1.9.0+rocm4.2",
    "gpus": [
        0
    ],
    "mode": "training",
    "nb_epoch": 10,
    "nb_gpus": 1,
    "path_ext": "training",
    "path_out": "./logs",
    "platform": {
        "cpu": {
            "brand": "AMD EPYC 7452 32-Core Processor",
            "cache": {
                "1": 2097152,
                "2": 33554432,
                "3": 524288
            },
            "clock": 1752.0227968749998,
            "clock_max": 2350.0,
            "clock_min": 1500.0,
            "logical_cores": 64,
            "physical_cores": 64
        },
        "gpus": [
            {
                "brand": "AMD Mi100"
            }
        ],
        "hdds": {
            "/dev/sda": {
                "model": "INTEL SSDSC2KB48",
                "size": 937703088
            },
            "/dev/sdb": {
                "model": "KINGSTON SEDC500",
                "size": 7501476528
            }
        },
        "host": "container_epyc.m.gsic.titech.ac.jp",
        "os": "Linux-4.18.0-305.19.1.el8_4.x86_64-x86_64-with-glibc2.29",
        "ram": {
            "total": 270051356672
        },
        "swap": 4294963200
    },
    "power": {
        "avg_watt_total": 0,
        "joules_total": 0,
        "sampling_ms": 100
    },
    "preheat": false,
    "problem": {
        "cnt_batches_per_epoch": 1,
        "cnt_samples": 32,
        "name": "resnet50",
        "precision": "FP32",
        "size": [
            32,
            3,
            224,
            224
        ]
    },
    "profile_pytorch": false,
    "samples_per_second": 3.2585156763245204,
    "start_time": "21.10.07_02.16.27",
    "tensor_layout": "native",
    "time_batch": 9.820422296109609,
    "time_epoch": 9.820422296109609,
    "time_sample": 0.30688819675342527,
    "time_total": 98.2042229610961
}