Open github-actions[bot] opened 1 year ago
NVIDIA GeForce RTX 3080 Ti | oneflow@7d07caf + libai@50a973 |
---|---|
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g | 6857 MiB / 31.27 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g | 7939 MiB / 32.0 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g | 2934 MiB / 122.96 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g | 7217 MiB / 1354.16 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g | 5989 MiB / 457.44 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g | 4250 MiB / 385.2 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g | 5408 MiB / 571.44 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g | 4931 MiB / 508.0 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g | 5116 MiB / 1791.84 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g | 3830 MiB / 1504.72 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g | 6618 MiB / 190.64 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g | 6974 MiB / 589.36 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g | 6204 MiB / 33.4 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g | 8235 MiB / 13.12 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g | 8237 MiB / 12.77 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g | 7954 MiB / 158.28 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g | 2602 MiB / 60.32 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g | 6576 MiB / 522.8 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g | 4738 MiB / 195.36 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g | 5648 MiB / 168.76 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g | 4216 MiB / 131.96 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g | 5212 MiB / 259.04 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g | 4472 MiB / 214.32 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g | 5124 MiB / 662.64 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g | 3780 MiB / 511.36 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g | 6340 MiB / 270.16 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g | 5648 MiB / 16.24 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g | 6957 MiB / 343.3 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g | 6051 MiB / 352.1 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 11789 MiB / 949.12 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 5929 MiB / 1027.68 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 9849 MiB / 922.72 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 5505 MiB / 1007.52 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 6179 MiB / 774.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 4985 MiB / 817.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g | 4081 MiB / 2095.92 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g | 6063 MiB / 1882.32 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g | 3573 MiB / 1735.08 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g | 4093 MiB / 1584.52 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g | 9699 MiB / 1795.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g | 9661 MiB / 2232.4 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g | 2969 MiB / 3421.6 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g | 2839 MiB / 4229.24 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs128_gbs512_acc1_1n8g | 3404 MiB / 4783.04 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g | 5711 MiB / 4032.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g | 2952 MiB / 4783.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g | 3875 MiB / 3607.84 samples/s |
NVIDIA GeForce RTX 3080 Ti | oneflow@7d07caf + libai@50a973 |
---|---|
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g | 6857 MiB / 31.27 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g | 7939 MiB / 32.0 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g | 2934 MiB / 122.96 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g | 7217 MiB / 1354.16 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g | 5989 MiB / 457.44 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g | 4250 MiB / 385.2 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g | 5408 MiB / 571.44 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g | 4931 MiB / 508.0 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g | 5116 MiB / 1791.84 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g | 3830 MiB / 1504.72 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g | 6618 MiB / 190.64 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g | 6974 MiB / 589.36 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g | 6204 MiB / 33.4 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g | 8235 MiB / 13.12 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g | 8237 MiB / 12.77 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g | 7954 MiB / 158.28 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g | 2602 MiB / 60.32 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g | 6576 MiB / 522.8 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g | 4738 MiB / 195.36 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g | 5648 MiB / 168.76 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g | 4216 MiB / 131.96 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g | 5212 MiB / 259.04 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g | 4472 MiB / 214.32 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g | 5124 MiB / 662.64 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g | 3780 MiB / 511.36 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g | 6340 MiB / 270.16 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g | 5648 MiB / 16.24 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g | 6957 MiB / 343.3 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g | 6051 MiB / 352.1 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 11789 MiB / 949.12 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 5929 MiB / 1027.68 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 9849 MiB / 922.72 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 5505 MiB / 1007.52 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 6179 MiB / 774.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 4985 MiB / 817.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g | 4081 MiB / 2095.92 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g | 6063 MiB / 1882.32 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g | 3573 MiB / 1735.08 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g | 4093 MiB / 1584.52 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g | 9699 MiB / 1795.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g | 9661 MiB / 2232.4 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g | 2969 MiB / 3421.6 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g | 2839 MiB / 4229.24 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs128_gbs512_acc1_1n8g | 3404 MiB / 4783.04 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g | 5711 MiB / 4032.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g | 2952 MiB / 4783.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g | 3875 MiB / 3607.84 samples/s |
NVIDIA GeForce RTX 3080 Ti | oneflow@1ea2bb7 + libai@50a973d |
---|---|
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs16_gbs128_acc8_1n1g | 6857 MiB / 31.23 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs32_gbs32_acc1_1n1g | 7939 MiB / 32.04 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n8g | 2934 MiB / 123.12 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs32_gbs512_acc8_1n8g | 6744 MiB / 1353.76 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs32_gbs512_acc8_1n8g | 5362 MiB / 458.56 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n4g | 4250 MiB / 383.32 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n8g | 5374 MiB / 571.52 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs32_gbs128_acc1_1n8g | 4496 MiB / 510.72 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs2048_acc8_1n8g | 5114 MiB / 1785.36 samples/s |
libai_bert_large_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs32_gbs256_acc1_1n8g | 3830 MiB / 1500.56 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs16_gbs128_acc8_1n4g | 6594 MiB / 190.92 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs24_gbs384_acc16_1n8g | 6974 MiB / 591.12 samples/s |
libai_bert_large_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs32_gbs256_acc8_1n4g | 6204 MiB / 33.48 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n1g | 8137 MiB / 13.12 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp1_zerofalse_stage0_mbs8_gbs8_acc1_1n1g | 8237 MiB / 12.76 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp1_pp4_zerofalse_stage0_mbs12_gbs96_acc8_1n4g | 7514 MiB / 158.6 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp1_mp8_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n8g | 2602 MiB / 60.4 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp1_pp4_zerotrue_stage2_mbs8_gbs128_acc8_1n8g | 6584 MiB / 521.2 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs8_gbs128_acc8_1n8g | 4740 MiB / 196.24 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n4g | 5648 MiB / 167.88 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n4g | 4216 MiB / 131.56 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs256_acc8_1n8g | 5212 MiB / 278.0 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs8_gbs32_acc1_1n8g | 4472 MiB / 216.32 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs512_acc8_1n8g | 5124 MiB / 660.0 samples/s |
libai_gpt2_pretrain_graph_nl24_nah16_hs1024_fp16_actrue_dp8_mp1_pp1_zerotrue_stage2_mbs8_gbs64_acc1_1n8g | 3780 MiB / 509.68 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp1_pp8_zerofalse_stage0_mbs6_gbs96_acc16_1n8g | 5876 MiB / 271.28 samples/s |
libai_gpt2_pretrain_graph_nl48_nah16_hs1024_fp16_actrue_dp1_mp4_pp1_zerofalse_stage0_mbs8_gbs64_acc8_1n4g | 5648 MiB / 16.24 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n1g | 5992 MiB / 344.02 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n1g | 5454 MiB / 359.13 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 10788 MiB / 943.2 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp1_pp4_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 5332 MiB / 966.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 9158 MiB / 940.64 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp2_pp2_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 4676 MiB / 1086.52 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs128_gbs1024_acc8_1n4g | 5510 MiB / 764.4 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp1_mp4_pp1_zerotrue_stage2_mbs256_gbs256_acc1_1n4g | 4246 MiB / 893.04 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs128_gbs256_acc1_1n4g | 3116 MiB / 2155.96 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp1_pp2_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g | 5282 MiB / 2197.96 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs128_gbs256_acc1_1n4g | 2860 MiB / 1888.36 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp1_zerotrue_stage2_mbs64_gbs1024_acc8_1n4g | 3440 MiB / 1634.4 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs128_gbs2048_acc8_1n8g | 9196 MiB / 2215.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp2_mp2_pp2_zerotrue_stage2_mbs256_gbs512_acc1_1n8g | 4702 MiB / 2461.36 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs32_gbs1024_acc8_1n4g | 2280 MiB / 3541.0 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp1_zerotrue_stage2_mbs64_gbs256_acc1_1n4g | 2058 MiB / 3768.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs128_gbs512_acc1_1n8g | 3038 MiB / 5050.16 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp1_pp2_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g | 4986 MiB / 4420.88 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs128_gbs512_acc1_1n8g | 2836 MiB / 4617.28 samples/s |
libai_swin_imagenet_graph_nl12_nah12_hs768_fp16_actrue_dp4_mp2_pp1_zerotrue_stage2_mbs64_gbs2048_acc8_1n8g | 3444 MiB / 4346.56 samples/s |
202303_OneFlow_libai_Report_On_master