Oneflow-Inc / OneAutoTest

Auto-Test System
Apache License 2.0
5 stars 6 forks source link

202303_OneFlow_libai_Report_On_master #69

Open github-actions[bot] opened 1 year ago

github-actions[bot] commented 1 year ago

202303_OneFlow_libai_Report_On_master

github-actions[bot] commented 1 year ago
NVIDIA GeForce RTX 3080 Ti oneflow@7d07caf + libai@50a973
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g 6857 MiB / 31.27 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g 7939 MiB / 32.0 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g 2934 MiB / 122.96 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g 7217 MiB / 1354.16 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g 5989 MiB / 457.44 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g 4250 MiB / 385.2 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g 5408 MiB / 571.44 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g 4931 MiB / 508.0 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g 5116 MiB / 1791.84 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g 3830 MiB / 1504.72 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g 6618 MiB / 190.64 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g 6974 MiB / 589.36 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g 6204 MiB / 33.4 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g 8235 MiB / 13.12 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g 8237 MiB / 12.77 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g 7954 MiB / 158.28 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g 2602 MiB / 60.32 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g 6576 MiB / 522.8 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g 4738 MiB / 195.36 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g 5648 MiB / 168.76 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g 4216 MiB / 131.96 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g 5212 MiB / 259.04 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g 4472 MiB / 214.32 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g 5124 MiB / 662.64 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g 3780 MiB / 511.36 samples/s
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g 6340 MiB / 270.16 samples/s
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g 5648 MiB / 16.24 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g 6957 MiB / 343.3 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g 6051 MiB / 352.1 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 11789 MiB / 949.12 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 5929 MiB / 1027.68 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 9849 MiB / 922.72 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 5505 MiB / 1007.52 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 6179 MiB / 774.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 4985 MiB / 817.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g 4081 MiB / 2095.92 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g 6063 MiB / 1882.32 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g 3573 MiB / 1735.08 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g 4093 MiB / 1584.52 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g 9699 MiB / 1795.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g 9661 MiB / 2232.4 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g 2969 MiB / 3421.6 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g 2839 MiB / 4229.24 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs128_gbs512_acc1_1n8g 3404 MiB / 4783.04 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g 5711 MiB / 4032.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g 2952 MiB / 4783.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g 3875 MiB / 3607.84 samples/s
github-actions[bot] commented 1 year ago
NVIDIA GeForce RTX 3080 Ti oneflow@7d07caf + libai@50a973
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g 6857 MiB / 31.27 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g 7939 MiB / 32.0 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g 2934 MiB / 122.96 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g 7217 MiB / 1354.16 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g 5989 MiB / 457.44 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g 4250 MiB / 385.2 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g 5408 MiB / 571.44 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g 4931 MiB / 508.0 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g 5116 MiB / 1791.84 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g 3830 MiB / 1504.72 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g 6618 MiB / 190.64 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g 6974 MiB / 589.36 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g 6204 MiB / 33.4 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g 8235 MiB / 13.12 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g 8237 MiB / 12.77 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g 7954 MiB / 158.28 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g 2602 MiB / 60.32 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g 6576 MiB / 522.8 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g 4738 MiB / 195.36 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g 5648 MiB / 168.76 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g 4216 MiB / 131.96 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g 5212 MiB / 259.04 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g 4472 MiB / 214.32 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g 5124 MiB / 662.64 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g 3780 MiB / 511.36 samples/s
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g 6340 MiB / 270.16 samples/s
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g 5648 MiB / 16.24 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g 6957 MiB / 343.3 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g 6051 MiB / 352.1 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 11789 MiB / 949.12 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 5929 MiB / 1027.68 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 9849 MiB / 922.72 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 5505 MiB / 1007.52 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 6179 MiB / 774.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 4985 MiB / 817.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g 4081 MiB / 2095.92 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g 6063 MiB / 1882.32 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g 3573 MiB / 1735.08 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g 4093 MiB / 1584.52 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g 9699 MiB / 1795.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g 9661 MiB / 2232.4 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g 2969 MiB / 3421.6 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g 2839 MiB / 4229.24 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs128_gbs512_acc1_1n8g 3404 MiB / 4783.04 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g 5711 MiB / 4032.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g 2952 MiB / 4783.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g 3875 MiB / 3607.84 samples/s
github-actions[bot] commented 1 year ago
oneflow@1ea2bb7 + libai@50a973d
github-actions[bot] commented 1 year ago
NVIDIA GeForce RTX 3080 Ti oneflow@1ea2bb7 + libai@50a973d
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g 6857 MiB / 31.23 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g 7939 MiB / 32.04 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g 2934 MiB / 123.12 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g 6744 MiB / 1353.76 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g 5362 MiB / 458.56 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g 4250 MiB / 383.32 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g 5374 MiB / 571.52 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g 4496 MiB / 510.72 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g 5114 MiB / 1785.36 samples/s
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g 3830 MiB / 1500.56 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g 6594 MiB / 190.92 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g 6974 MiB / 591.12 samples/s
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g 6204 MiB / 33.48 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g 8137 MiB / 13.12 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g 8237 MiB / 12.76 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g 7514 MiB / 158.6 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g 2602 MiB / 60.4 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g 6584 MiB / 521.2 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g 4740 MiB / 196.24 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g 5648 MiB / 167.88 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g 4216 MiB / 131.56 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g 5212 MiB / 278.0 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g 4472 MiB / 216.32 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g 5124 MiB / 660.0 samples/s
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g 3780 MiB / 509.68 samples/s
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g 5876 MiB / 271.28 samples/s
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g 5648 MiB / 16.24 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g 5992 MiB / 344.02 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g 5454 MiB / 359.13 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 10788 MiB / 943.2 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 5332 MiB / 966.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 9158 MiB / 940.64 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 4676 MiB / 1086.52 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g 5510 MiB / 764.4 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g 4246 MiB / 893.04 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g 3116 MiB / 2155.96 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g 5282 MiB / 2197.96 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g 2860 MiB / 1888.36 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g 3440 MiB / 1634.4 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g 9196 MiB / 2215.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g 4702 MiB / 2461.36 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g 2280 MiB / 3541.0 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g 2058 MiB / 3768.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs128_gbs512_acc1_1n8g 3038 MiB / 5050.16 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g 4986 MiB / 4420.88 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g 2836 MiB / 4617.28 samples/s
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g 3444 MiB / 4346.56 samples/s