pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
808 stars 66 forks source link

[BugFix] Fix map #862

Closed vmoens closed 2 months ago

github-actions[bot] commented 2 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 144. Improved: $\large\color{#35bf28}15$. Worsened: $\large\color{#d91a1a}4$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 46.3260μs | 18.8402μs | 53.0780 KOps/s | 56.8595 KOps/s | $\textbf{\color{#d91a1a}-6.65\\%}$ | | test_plain_set_stack_nested | 48.8410μs | 17.4501μs | 57.3062 KOps/s | 56.7592 KOps/s | $\color{#35bf28}+0.96\\%$ | | test_plain_set_nested_inplace | 62.4460μs | 19.7109μs | 50.7334 KOps/s | 50.3280 KOps/s | $\color{#35bf28}+0.81\\%$ | | test_plain_set_stack_nested_inplace | 60.3220μs | 19.5818μs | 51.0677 KOps/s | 51.1594 KOps/s | $\color{#d91a1a}-0.18\\%$ | | test_items | 17.3620μs | 2.7003μs | 370.3317 KOps/s | 382.4535 KOps/s | $\color{#d91a1a}-3.17\\%$ | | test_items_nested | 0.4410ms | 0.2728ms | 3.6659 KOps/s | 3.6587 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_items_nested_locked | 1.4728ms | 0.2771ms | 3.6088 KOps/s | 3.7153 KOps/s | $\color{#d91a1a}-2.86\\%$ | | test_items_nested_leaf | 0.1669ms | 82.0979μs | 12.1806 KOps/s | 12.5551 KOps/s | $\color{#d91a1a}-2.98\\%$ | | test_items_stack_nested | 0.4832ms | 0.2761ms | 3.6214 KOps/s | 3.6610 KOps/s | $\color{#d91a1a}-1.08\\%$ | | test_items_stack_nested_leaf | 0.1772ms | 80.7402μs | 12.3854 KOps/s | 12.2935 KOps/s | $\color{#35bf28}+0.75\\%$ | | test_items_stack_nested_locked | 0.5337ms | 0.2754ms | 3.6308 KOps/s | 3.7048 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_keys | 22.4320μs | 3.7989μs | 263.2347 KOps/s | 236.1487 KOps/s | $\textbf{\color{#35bf28}+11.47\\%}$ | | test_keys_nested | 0.2534ms | 0.1415ms | 7.0671 KOps/s | 7.2178 KOps/s | $\color{#d91a1a}-2.09\\%$ | | test_keys_nested_locked | 0.7914ms | 0.1465ms | 6.8268 KOps/s | 6.9291 KOps/s | $\color{#d91a1a}-1.48\\%$ | | test_keys_nested_leaf | 0.2281ms | 0.1205ms | 8.2953 KOps/s | 8.4485 KOps/s | $\color{#d91a1a}-1.81\\%$ | | test_keys_stack_nested | 0.2318ms | 0.1384ms | 7.2272 KOps/s | 7.1180 KOps/s | $\color{#35bf28}+1.53\\%$ | | test_keys_stack_nested_leaf | 0.1994ms | 0.1193ms | 8.3833 KOps/s | 8.5284 KOps/s | $\color{#d91a1a}-1.70\\%$ | | test_keys_stack_nested_locked | 0.2701ms | 0.1425ms | 7.0198 KOps/s | 6.9810 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_values | 7.5340μs | 1.1598μs | 862.2039 KOps/s | 811.8251 KOps/s | $\textbf{\color{#35bf28}+6.21\\%}$ | | test_values_nested | 0.1080ms | 51.8327μs | 19.2928 KOps/s | 19.3361 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_values_nested_locked | 0.1126ms | 50.4906μs | 19.8057 KOps/s | 19.4840 KOps/s | $\color{#35bf28}+1.65\\%$ | | test_values_nested_leaf | 98.9650μs | 46.3757μs | 21.5630 KOps/s | 21.4535 KOps/s | $\color{#35bf28}+0.51\\%$ | | test_values_stack_nested | 0.1763ms | 53.1157μs | 18.8268 KOps/s | 19.0278 KOps/s | $\color{#d91a1a}-1.06\\%$ | | test_values_stack_nested_leaf | 0.1091ms | 46.6129μs | 21.4533 KOps/s | 21.4780 KOps/s | $\color{#d91a1a}-0.12\\%$ | | test_values_stack_nested_locked | 99.0440μs | 52.5383μs | 19.0337 KOps/s | 19.0653 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_membership | 49.9530μs | 1.3441μs | 744.0073 KOps/s | 727.2551 KOps/s | $\color{#35bf28}+2.30\\%$ | | test_membership_nested | 28.5630μs | 3.4974μs | 285.9265 KOps/s | 291.9307 KOps/s | $\color{#d91a1a}-2.06\\%$ | | test_membership_nested_leaf | 47.8290μs | 3.4954μs | 286.0895 KOps/s | 292.4005 KOps/s | $\color{#d91a1a}-2.16\\%$ | | test_membership_stacked_nested | 29.3450μs | 3.5260μs | 283.6097 KOps/s | 296.2127 KOps/s | $\color{#d91a1a}-4.25\\%$ | | test_membership_stacked_nested_leaf | 29.4550μs | 3.4239μs | 292.0657 KOps/s | 293.9534 KOps/s | $\color{#d91a1a}-0.64\\%$ | | test_membership_nested_last | 50.4050μs | 4.2542μs | 235.0634 KOps/s | 238.8571 KOps/s | $\color{#d91a1a}-1.59\\%$ | | test_membership_nested_leaf_last | 34.2330μs | 4.2922μs | 232.9830 KOps/s | 239.1461 KOps/s | $\color{#d91a1a}-2.58\\%$ | | test_membership_stacked_nested_last | 40.1250μs | 9.2352μs | 108.2811 KOps/s | 161.8493 KOps/s | $\textbf{\color{#d91a1a}-33.10\\%}$ | | test_membership_stacked_nested_leaf_last | 47.6780μs | 9.2346μs | 108.2884 KOps/s | 162.3737 KOps/s | $\textbf{\color{#d91a1a}-33.31\\%}$ | | test_nested_getleaf | 38.9630μs | 10.6653μs | 93.7622 KOps/s | 94.3839 KOps/s | $\color{#d91a1a}-0.66\\%$ | | test_nested_get | 46.0450μs | 10.0632μs | 99.3724 KOps/s | 98.7366 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_stacked_getleaf | 57.5870μs | 10.5353μs | 94.9189 KOps/s | 94.9621 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_stacked_get | 36.5180μs | 9.9848μs | 100.1522 KOps/s | 100.9036 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_nested_getitemleaf | 59.8910μs | 11.2265μs | 89.0751 KOps/s | 90.2929 KOps/s | $\color{#d91a1a}-1.35\\%$ | | test_nested_getitem | 54.1310μs | 10.3094μs | 96.9985 KOps/s | 97.6355 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_stacked_getitemleaf | 47.4090μs | 11.1987μs | 89.2960 KOps/s | 90.2104 KOps/s | $\color{#d91a1a}-1.01\\%$ | | test_stacked_getitem | 58.3990μs | 10.1890μs | 98.1448 KOps/s | 98.1055 KOps/s | $\color{#35bf28}+0.04\\%$ | | test_lock_nested | 0.9544ms | 0.3408ms | 2.9346 KOps/s | 2.9679 KOps/s | $\color{#d91a1a}-1.12\\%$ | | test_lock_stack_nested | 0.5032ms | 0.2951ms | 3.3890 KOps/s | 3.3043 KOps/s | $\color{#35bf28}+2.56\\%$ | | test_unlock_nested | 0.8957ms | 0.3385ms | 2.9546 KOps/s | 2.8834 KOps/s | $\color{#35bf28}+2.47\\%$ | | test_unlock_stack_nested | 0.4714ms | 0.3047ms | 3.2823 KOps/s | 3.2237 KOps/s | $\color{#35bf28}+1.82\\%$ | | test_flatten_speed | 0.6127ms | 0.1009ms | 9.9114 KOps/s | 9.9277 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_unflatten_speed | 0.5315ms | 0.4155ms | 2.4069 KOps/s | 2.4477 KOps/s | $\color{#d91a1a}-1.67\\%$ | | test_common_ops | 1.7852ms | 0.7364ms | 1.3580 KOps/s | 1.2998 KOps/s | $\color{#35bf28}+4.48\\%$ | | test_creation | 29.6550μs | 1.8772μs | 532.7035 KOps/s | 506.1089 KOps/s | $\textbf{\color{#35bf28}+5.25\\%}$ | | test_creation_empty | 35.5670μs | 10.5856μs | 94.4681 KOps/s | 86.3989 KOps/s | $\textbf{\color{#35bf28}+9.34\\%}$ | | test_creation_nested_1 | 55.3830μs | 13.1172μs | 76.2357 KOps/s | 69.9460 KOps/s | $\textbf{\color{#35bf28}+8.99\\%}$ | | test_creation_nested_2 | 53.6100μs | 16.7585μs | 59.6711 KOps/s | 56.5470 KOps/s | $\textbf{\color{#35bf28}+5.52\\%}$ | | test_clone | 0.1517ms | 12.9401μs | 77.2790 KOps/s | 73.7936 KOps/s | $\color{#35bf28}+4.72\\%$ | | test_getitem[int] | 29.4950μs | 11.0103μs | 90.8242 KOps/s | 90.2156 KOps/s | $\color{#35bf28}+0.67\\%$ | | test_getitem[slice_int] | 57.2970μs | 21.6295μs | 46.2332 KOps/s | 42.3391 KOps/s | $\textbf{\color{#35bf28}+9.20\\%}$ | | test_getitem[range] | 81.1110μs | 60.5688μs | 16.5102 KOps/s | 17.0476 KOps/s | $\color{#d91a1a}-3.15\\%$ | | test_getitem[tuple] | 58.3290μs | 18.1520μs | 55.0904 KOps/s | 52.8804 KOps/s | $\color{#35bf28}+4.18\\%$ | | test_getitem[list] | 0.1725ms | 41.6377μs | 24.0167 KOps/s | 24.3116 KOps/s | $\color{#d91a1a}-1.21\\%$ | | test_setitem_dim[int] | 69.0490μs | 34.1839μs | 29.2535 KOps/s | 28.7500 KOps/s | $\color{#35bf28}+1.75\\%$ | | test_setitem_dim[slice_int] | 0.1571ms | 61.1070μs | 16.3647 KOps/s | 16.2362 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_setitem_dim[range] | 0.1527ms | 83.9731μs | 11.9086 KOps/s | 12.0420 KOps/s | $\color{#d91a1a}-1.11\\%$ | | test_setitem_dim[tuple] | 0.1198ms | 50.1746μs | 19.9304 KOps/s | 20.0785 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_setitem | 50.4740μs | 19.4858μs | 51.3194 KOps/s | 48.3025 KOps/s | $\textbf{\color{#35bf28}+6.25\\%}$ | | test_set | 65.3420μs | 19.1605μs | 52.1906 KOps/s | 47.6700 KOps/s | $\textbf{\color{#35bf28}+9.48\\%}$ | | test_set_shared | 3.3178ms | 0.1714ms | 5.8333 KOps/s | 5.8248 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_update | 0.2590ms | 22.2003μs | 45.0444 KOps/s | 42.7877 KOps/s | $\textbf{\color{#35bf28}+5.27\\%}$ | | test_update_nested | 0.1334ms | 30.4821μs | 32.8061 KOps/s | 31.2532 KOps/s | $\color{#35bf28}+4.97\\%$ | | test_update__nested | 89.8780μs | 24.9843μs | 40.0252 KOps/s | 39.9789 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_set_nested | 95.8990μs | 20.8440μs | 47.9755 KOps/s | 44.9197 KOps/s | $\textbf{\color{#35bf28}+6.80\\%}$ | | test_set_nested_new | 81.8620μs | 24.8626μs | 40.2210 KOps/s | 38.3990 KOps/s | $\color{#35bf28}+4.74\\%$ | | test_select | 95.4880μs | 39.4920μs | 25.3216 KOps/s | 24.6536 KOps/s | $\color{#35bf28}+2.71\\%$ | | test_select_nested | 0.1513ms | 56.9840μs | 17.5488 KOps/s | 17.7537 KOps/s | $\color{#d91a1a}-1.15\\%$ | | test_exclude_nested | 0.2226ms | 0.1194ms | 8.3776 KOps/s | 8.6018 KOps/s | $\color{#d91a1a}-2.61\\%$ | | test_empty[True] | 0.8276ms | 0.4022ms | 2.4866 KOps/s | 2.5370 KOps/s | $\color{#d91a1a}-1.99\\%$ | | test_empty[False] | 6.1736μs | 1.0227μs | 977.8399 KOps/s | 996.8528 KOps/s | $\color{#d91a1a}-1.91\\%$ | | test_unbind_speed | 0.3111ms | 0.2463ms | 4.0593 KOps/s | 4.0407 KOps/s | $\color{#35bf28}+0.46\\%$ | | test_unbind_speed_stack0 | 0.3070ms | 0.2376ms | 4.2094 KOps/s | 4.1259 KOps/s | $\color{#35bf28}+2.02\\%$ | | test_unbind_speed_stack1 | 0.6886ms | 0.6034ms | 1.6574 KOps/s | 1.3932 KOps/s | $\textbf{\color{#35bf28}+18.96\\%}$ | | test_split | 73.3520ms | 1.5788ms | 633.3758 Ops/s | 636.6743 Ops/s | $\color{#d91a1a}-0.52\\%$ | | test_chunk | 75.7375ms | 1.5830ms | 631.6928 Ops/s | 643.1117 Ops/s | $\color{#d91a1a}-1.78\\%$ | | test_creation[device0] | 0.2683ms | 97.8234μs | 10.2225 KOps/s | 10.5565 KOps/s | $\color{#d91a1a}-3.16\\%$ | | test_creation_from_tensor | 3.9928ms | 0.1003ms | 9.9749 KOps/s | 10.0682 KOps/s | $\color{#d91a1a}-0.93\\%$ | | test_add_one[memmap_tensor0] | 0.1215ms | 5.3813μs | 185.8301 KOps/s | 185.0274 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_contiguous[memmap_tensor0] | 23.9140μs | 0.6364μs | 1.5714 MOps/s | 1.5777 MOps/s | $\color{#d91a1a}-0.40\\%$ | | test_stack[memmap_tensor0] | 20.9090μs | 3.5058μs | 285.2437 KOps/s | 276.7902 KOps/s | $\color{#35bf28}+3.05\\%$ | | test_memmaptd_index | 1.0656ms | 0.2581ms | 3.8742 KOps/s | 3.9442 KOps/s | $\color{#d91a1a}-1.78\\%$ | | test_memmaptd_index_astensor | 0.9466ms | 0.3315ms | 3.0166 KOps/s | 3.0565 KOps/s | $\color{#d91a1a}-1.30\\%$ | | test_memmaptd_index_op | 1.0625ms | 0.6101ms | 1.6392 KOps/s | 1.5999 KOps/s | $\color{#35bf28}+2.45\\%$ | | test_serialize_model | 0.1981s | 0.1379s | 7.2496 Ops/s | 7.2472 Ops/s | $\color{#35bf28}+0.03\\%$ | | test_serialize_model_pickle | 0.4453s | 0.3922s | 2.5500 Ops/s | 2.4779 Ops/s | $\color{#35bf28}+2.91\\%$ | | test_serialize_weights | 0.1290s | 0.1224s | 8.1730 Ops/s | 7.8466 Ops/s | $\color{#35bf28}+4.16\\%$ | | test_serialize_weights_returnearly | 0.1821s | 0.1680s | 5.9521 Ops/s | 6.1042 Ops/s | $\color{#d91a1a}-2.49\\%$ | | test_serialize_weights_pickle | 0.4581s | 0.3926s | 2.5473 Ops/s | 2.4957 Ops/s | $\color{#35bf28}+2.07\\%$ | | test_serialize_weights_filesystem | 0.1536s | 0.1422s | 7.0307 Ops/s | 6.7719 Ops/s | $\color{#35bf28}+3.82\\%$ | | test_serialize_model_filesystem | 0.2216s | 0.1618s | 6.1801 Ops/s | 6.1623 Ops/s | $\color{#35bf28}+0.29\\%$ | | test_reshape_pytree | 81.2210μs | 26.0182μs | 38.4346 KOps/s | 38.4366 KOps/s | $-0.01\\%$ | | test_reshape_td | 67.9470μs | 33.9854μs | 29.4244 KOps/s | 29.9192 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_view_pytree | 71.0230μs | 25.7266μs | 38.8703 KOps/s | 38.8755 KOps/s | $\color{#d91a1a}-0.01\\%$ | | test_view_td | 0.1151ms | 40.7197μs | 24.5581 KOps/s | 25.6414 KOps/s | $\color{#d91a1a}-4.22\\%$ | | test_unbind_pytree | 83.0850μs | 29.6222μs | 33.7584 KOps/s | 33.2417 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_unbind_td | 0.3740ms | 36.7478μs | 27.2125 KOps/s | 26.5742 KOps/s | $\color{#35bf28}+2.40\\%$ | | test_split_pytree | 67.7870μs | 29.3855μs | 34.0304 KOps/s | 33.6424 KOps/s | $\color{#35bf28}+1.15\\%$ | | test_split_td | 0.1238ms | 39.6269μs | 25.2354 KOps/s | 25.1813 KOps/s | $\color{#35bf28}+0.21\\%$ | | test_add_pytree | 90.9700μs | 34.4941μs | 28.9904 KOps/s | 28.2526 KOps/s | $\color{#35bf28}+2.61\\%$ | | test_add_td | 0.1715ms | 53.6464μs | 18.6406 KOps/s | 17.2847 KOps/s | $\textbf{\color{#35bf28}+7.84\\%}$ | | test_distributed | 0.7051ms | 0.1304ms | 7.6681 KOps/s | 7.4772 KOps/s | $\color{#35bf28}+2.55\\%$ | | test_tdmodule | 33.2020μs | 18.5717μs | 53.8454 KOps/s | 54.5327 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_tdmodule_dispatch | 80.7900μs | 36.3531μs | 27.5080 KOps/s | 27.9164 KOps/s | $\color{#d91a1a}-1.46\\%$ | | test_tdseq | 45.4840μs | 21.5755μs | 46.3488 KOps/s | 46.4898 KOps/s | $\color{#d91a1a}-0.30\\%$ | | test_tdseq_dispatch | 74.0280μs | 41.7753μs | 23.9376 KOps/s | 23.4334 KOps/s | $\color{#35bf28}+2.15\\%$ | | test_instantiation_functorch | 1.5048ms | 1.3642ms | 733.0233 Ops/s | 760.6957 Ops/s | $\color{#d91a1a}-3.64\\%$ | | test_instantiation_td | 1.8186ms | 1.0667ms | 937.4799 Ops/s | 967.9496 Ops/s | $\color{#d91a1a}-3.15\\%$ | | test_exec_functorch | 0.3294ms | 0.1627ms | 6.1470 KOps/s | 5.9997 KOps/s | $\color{#35bf28}+2.45\\%$ | | test_exec_functional_call | 0.2477ms | 0.1514ms | 6.6046 KOps/s | 6.4533 KOps/s | $\color{#35bf28}+2.34\\%$ | | test_exec_td | 0.4038ms | 0.1488ms | 6.7197 KOps/s | 6.7029 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_exec_td_decorator | 0.7918ms | 0.2234ms | 4.4766 KOps/s | 4.4871 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_vmap_mlp_speed[True-True] | 0.8955ms | 0.4922ms | 2.0316 KOps/s | 1.9827 KOps/s | $\color{#35bf28}+2.47\\%$ | | test_vmap_mlp_speed[True-False] | 0.7283ms | 0.4905ms | 2.0386 KOps/s | 2.0189 KOps/s | $\color{#35bf28}+0.97\\%$ | | test_vmap_mlp_speed[False-True] | 0.7697ms | 0.4016ms | 2.4898 KOps/s | 2.5042 KOps/s | $\color{#d91a1a}-0.58\\%$ | | test_vmap_mlp_speed[False-False] | 0.7057ms | 0.3995ms | 2.5029 KOps/s | 2.4819 KOps/s | $\color{#35bf28}+0.84\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.2206ms | 0.5611ms | 1.7822 KOps/s | 1.7516 KOps/s | $\color{#35bf28}+1.75\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.9001ms | 0.5638ms | 1.7736 KOps/s | 1.7504 KOps/s | $\color{#35bf28}+1.32\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8480ms | 0.4685ms | 2.1343 KOps/s | 2.1503 KOps/s | $\color{#d91a1a}-0.75\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.6616ms | 0.4651ms | 2.1503 KOps/s | 2.1460 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_to_module_speed[True] | 2.0400ms | 1.6796ms | 595.3959 Ops/s | 592.2043 Ops/s | $\color{#35bf28}+0.54\\%$ | | test_to_module_speed[False] | 2.1969ms | 1.6444ms | 608.1070 Ops/s | 607.2622 Ops/s | $\color{#35bf28}+0.14\\%$ | | test_tc_init | 98.4830μs | 58.6903μs | 17.0386 KOps/s | 16.0676 KOps/s | $\textbf{\color{#35bf28}+6.04\\%}$ | | test_tc_init_nested | 0.1920ms | 0.1198ms | 8.3452 KOps/s | 8.3820 KOps/s | $\color{#d91a1a}-0.44\\%$ | | test_tc_first_layer_tensor | 42.9810μs | 8.2791μs | 120.7863 KOps/s | 121.0705 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_tc_first_layer_nontensor | 48.7450μs | 8.1887μs | 122.1191 KOps/s | 123.8268 KOps/s | $\color{#d91a1a}-1.38\\%$ | | test_tc_second_layer_tensor | 22.1410μs | 2.5447μs | 392.9675 KOps/s | 397.4861 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_tc_second_layer_nontensor | 42.1390μs | 9.3917μs | 106.4775 KOps/s | 108.0048 KOps/s | $\color{#d91a1a}-1.41\\%$ | | test_unbind | 97.4201ms | 15.5040ms | 64.4996 Ops/s | 66.0644 Ops/s | $\color{#d91a1a}-2.37\\%$ | | test_full_like | 10.0977ms | 8.0111ms | 124.8263 Ops/s | 73.2100 Ops/s | $\textbf{\color{#35bf28}+70.50\\%}$ | | test_zeros_like | 13.0018ms | 7.4553ms | 134.1323 Ops/s | 133.4485 Ops/s | $\color{#35bf28}+0.51\\%$ | | test_ones_like | 15.7528ms | 8.0995ms | 123.4638 Ops/s | 131.1709 Ops/s | $\textbf{\color{#d91a1a}-5.88\\%}$ | | test_clone | 13.1905ms | 9.6236ms | 103.9109 Ops/s | 100.4154 Ops/s | $\color{#35bf28}+3.48\\%$ | | test_squeeze | 89.3870μs | 13.2607μs | 75.4110 KOps/s | 78.1652 KOps/s | $\color{#d91a1a}-3.52\\%$ | | test_unsqueeze | 0.1848ms | 0.1005ms | 9.9463 KOps/s | 10.1152 KOps/s | $\color{#d91a1a}-1.67\\%$ | | test_split | 0.4981ms | 0.2817ms | 3.5494 KOps/s | 3.6162 KOps/s | $\color{#d91a1a}-1.85\\%$ | | test_permute | 0.3850ms | 0.2267ms | 4.4102 KOps/s | 4.3342 KOps/s | $\color{#35bf28}+1.75\\%$ | | test_stack | 31.6326ms | 25.8099ms | 38.7448 Ops/s | 38.2362 Ops/s | $\color{#35bf28}+1.33\\%$ | | test_cat | 33.2209ms | 26.0438ms | 38.3968 Ops/s | 38.8380 Ops/s | $\color{#d91a1a}-1.14\\%$ |
github-actions[bot] commented 2 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 152. Improved: $\large\color{#35bf28}7$. Worsened: $\large\color{#d91a1a}14$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 22.5110μs | 12.0215μs | 83.1840 KOps/s | 86.9441 KOps/s | $\color{#d91a1a}-4.32\\%$ | | test_plain_set_stack_nested | 30.3420μs | 12.2582μs | 81.5779 KOps/s | 85.8685 KOps/s | $\color{#d91a1a}-5.00\\%$ | | test_plain_set_nested_inplace | 36.6820μs | 13.4940μs | 74.1070 KOps/s | 77.6618 KOps/s | $\color{#d91a1a}-4.58\\%$ | | test_plain_set_stack_nested_inplace | 30.9020μs | 13.3636μs | 74.8303 KOps/s | 78.0134 KOps/s | $\color{#d91a1a}-4.08\\%$ | | test_items | 29.6420μs | 4.6525μs | 214.9375 KOps/s | 210.9447 KOps/s | $\color{#35bf28}+1.89\\%$ | | test_items_nested | 0.3965ms | 0.3388ms | 2.9517 KOps/s | 2.8940 KOps/s | $\color{#35bf28}+1.99\\%$ | | test_items_nested_locked | 0.4080ms | 0.3438ms | 2.9090 KOps/s | 2.9054 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_items_nested_leaf | 0.1128ms | 82.7491μs | 12.0847 KOps/s | 11.9409 KOps/s | $\color{#35bf28}+1.20\\%$ | | test_items_stack_nested | 0.4025ms | 0.3425ms | 2.9201 KOps/s | 2.9131 KOps/s | $\color{#35bf28}+0.24\\%$ | | test_items_stack_nested_leaf | 0.1116ms | 84.3508μs | 11.8553 KOps/s | 11.9854 KOps/s | $\color{#d91a1a}-1.09\\%$ | | test_items_stack_nested_locked | 0.3794ms | 0.3435ms | 2.9112 KOps/s | 2.8751 KOps/s | $\color{#35bf28}+1.25\\%$ | | test_keys | 18.7010μs | 4.4108μs | 226.7176 KOps/s | 227.0810 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_keys_nested | 0.1262ms | 69.6100μs | 14.3658 KOps/s | 14.4280 KOps/s | $\color{#d91a1a}-0.43\\%$ | | test_keys_nested_locked | 0.8099ms | 75.3480μs | 13.2718 KOps/s | 13.2839 KOps/s | $\color{#d91a1a}-0.09\\%$ | | test_keys_nested_leaf | 86.4960μs | 59.7016μs | 16.7500 KOps/s | 16.7779 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_keys_stack_nested | 87.4750μs | 68.8523μs | 14.5238 KOps/s | 14.5420 KOps/s | $\color{#d91a1a}-0.13\\%$ | | test_keys_stack_nested_leaf | 79.2950μs | 59.8049μs | 16.7210 KOps/s | 16.6661 KOps/s | $\color{#35bf28}+0.33\\%$ | | test_keys_stack_nested_locked | 97.5650μs | 74.4976μs | 13.4232 KOps/s | 13.4476 KOps/s | $\color{#d91a1a}-0.18\\%$ | | test_values | 9.9140μs | 1.8176μs | 550.1665 KOps/s | 556.5136 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_values_nested | 58.5730μs | 35.2737μs | 28.3497 KOps/s | 27.9634 KOps/s | $\color{#35bf28}+1.38\\%$ | | test_values_nested_locked | 62.2540μs | 37.1779μs | 26.8977 KOps/s | 26.6125 KOps/s | $\color{#35bf28}+1.07\\%$ | | test_values_nested_leaf | 53.8230μs | 31.2507μs | 31.9993 KOps/s | 31.6280 KOps/s | $\color{#35bf28}+1.17\\%$ | | test_values_stack_nested | 56.8030μs | 35.1896μs | 28.4175 KOps/s | 26.9681 KOps/s | $\textbf{\color{#35bf28}+5.37\\%}$ | | test_values_stack_nested_leaf | 62.7040μs | 31.6656μs | 31.5801 KOps/s | 30.7460 KOps/s | $\color{#35bf28}+2.71\\%$ | | test_values_stack_nested_locked | 0.2198ms | 37.3120μs | 26.8010 KOps/s | 25.9570 KOps/s | $\color{#35bf28}+3.25\\%$ | | test_membership | 27.7874μs | 0.7159μs | 1.3968 MOps/s | 1.4132 MOps/s | $\color{#d91a1a}-1.16\\%$ | | test_membership_nested | 20.7720μs | 2.5810μs | 387.4497 KOps/s | 390.7481 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_membership_nested_leaf | 0.1928ms | 2.5563μs | 391.1852 KOps/s | 390.3133 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_membership_stacked_nested | 23.8520μs | 2.5868μs | 386.5708 KOps/s | 391.5779 KOps/s | $\color{#d91a1a}-1.28\\%$ | | test_membership_stacked_nested_leaf | 15.5110μs | 2.5949μs | 385.3715 KOps/s | 394.1246 KOps/s | $\color{#d91a1a}-2.22\\%$ | | test_membership_nested_last | 22.4510μs | 3.1467μs | 317.7976 KOps/s | 322.2938 KOps/s | $\color{#d91a1a}-1.40\\%$ | | test_membership_nested_leaf_last | 26.3420μs | 3.1364μs | 318.8411 KOps/s | 321.8228 KOps/s | $\color{#d91a1a}-0.93\\%$ | | test_membership_stacked_nested_last | 20.8810μs | 3.1657μs | 315.8904 KOps/s | 326.8258 KOps/s | $\color{#d91a1a}-3.35\\%$ | | test_membership_stacked_nested_leaf_last | 36.8120μs | 3.1377μs | 318.7025 KOps/s | 323.0300 KOps/s | $\color{#d91a1a}-1.34\\%$ | | test_nested_getleaf | 26.7920μs | 8.4086μs | 118.9254 KOps/s | 119.3451 KOps/s | $\color{#d91a1a}-0.35\\%$ | | test_nested_get | 23.1420μs | 7.8927μs | 126.7000 KOps/s | 127.2801 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_stacked_getleaf | 22.8320μs | 8.4194μs | 118.7737 KOps/s | 119.6092 KOps/s | $\color{#d91a1a}-0.70\\%$ | | test_stacked_get | 27.7110μs | 7.9253μs | 126.1785 KOps/s | 127.6082 KOps/s | $\color{#d91a1a}-1.12\\%$ | | test_nested_getitemleaf | 33.0120μs | 8.5917μs | 116.3910 KOps/s | 116.9960 KOps/s | $\color{#d91a1a}-0.52\\%$ | | test_nested_getitem | 29.6720μs | 8.0741μs | 123.8526 KOps/s | 124.4074 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_stacked_getitemleaf | 27.5710μs | 8.5663μs | 116.7369 KOps/s | 117.2655 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_stacked_getitem | 21.8910μs | 8.0775μs | 123.8014 KOps/s | 124.1893 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_lock_nested | 58.3546ms | 0.4054ms | 2.4667 KOps/s | 2.5055 KOps/s | $\color{#d91a1a}-1.55\\%$ | | test_lock_stack_nested | 0.3489ms | 0.2997ms | 3.3363 KOps/s | 3.3547 KOps/s | $\color{#d91a1a}-0.55\\%$ | | test_unlock_nested | 60.4905ms | 0.4047ms | 2.4710 KOps/s | 2.4956 KOps/s | $\color{#d91a1a}-0.99\\%$ | | test_unlock_stack_nested | 0.3724ms | 0.3084ms | 3.2424 KOps/s | 3.2799 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_flatten_speed | 0.4171ms | 0.1021ms | 9.7943 KOps/s | 9.7413 KOps/s | $\color{#35bf28}+0.54\\%$ | | test_unflatten_speed | 0.3542ms | 0.2902ms | 3.4455 KOps/s | 3.4709 KOps/s | $\color{#d91a1a}-0.73\\%$ | | test_common_ops | 1.0698ms | 0.5894ms | 1.6967 KOps/s | 1.7941 KOps/s | $\textbf{\color{#d91a1a}-5.43\\%}$ | | test_creation | 36.4130μs | 1.6043μs | 623.3292 KOps/s | 616.4253 KOps/s | $\color{#35bf28}+1.12\\%$ | | test_creation_empty | 25.3410μs | 7.0767μs | 141.3079 KOps/s | 162.2084 KOps/s | $\textbf{\color{#d91a1a}-12.88\\%}$ | | test_creation_nested_1 | 25.5810μs | 8.9147μs | 112.1738 KOps/s | 127.1938 KOps/s | $\textbf{\color{#d91a1a}-11.81\\%}$ | | test_creation_nested_2 | 25.7520μs | 11.1415μs | 89.7545 KOps/s | 99.0414 KOps/s | $\textbf{\color{#d91a1a}-9.38\\%}$ | | test_clone | 78.9350μs | 12.5871μs | 79.4463 KOps/s | 80.8121 KOps/s | $\color{#d91a1a}-1.69\\%$ | | test_getitem[int] | 25.3720μs | 10.9009μs | 91.7355 KOps/s | 92.7424 KOps/s | $\color{#d91a1a}-1.09\\%$ | | test_getitem[slice_int] | 37.2720μs | 20.7178μs | 48.2678 KOps/s | 48.4292 KOps/s | $\color{#d91a1a}-0.33\\%$ | | test_getitem[range] | 71.6740μs | 49.2008μs | 20.3249 KOps/s | 20.7198 KOps/s | $\color{#d91a1a}-1.91\\%$ | | test_getitem[tuple] | 41.3020μs | 18.7134μs | 53.4378 KOps/s | 53.2715 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_getitem[list] | 0.1130ms | 34.4092μs | 29.0620 KOps/s | 29.0283 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_setitem_dim[int] | 45.2830μs | 25.7581μs | 38.8228 KOps/s | 40.7516 KOps/s | $\color{#d91a1a}-4.73\\%$ | | test_setitem_dim[slice_int] | 78.1540μs | 45.9924μs | 21.7427 KOps/s | 22.1751 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_setitem_dim[range] | 81.9350μs | 63.3426μs | 15.7872 KOps/s | 16.2503 KOps/s | $\color{#d91a1a}-2.85\\%$ | | test_setitem_dim[tuple] | 64.2640μs | 39.0410μs | 25.6141 KOps/s | 26.2332 KOps/s | $\color{#d91a1a}-2.36\\%$ | | test_setitem | 59.1140μs | 16.3152μs | 61.2927 KOps/s | 62.9430 KOps/s | $\color{#d91a1a}-2.62\\%$ | | test_set | 43.8520μs | 15.8551μs | 63.0713 KOps/s | 65.8756 KOps/s | $\color{#d91a1a}-4.26\\%$ | | test_set_shared | 1.6571ms | 0.1013ms | 9.8737 KOps/s | 9.8280 KOps/s | $\color{#35bf28}+0.46\\%$ | | test_update | 60.5040μs | 17.7957μs | 56.1933 KOps/s | 60.8413 KOps/s | $\textbf{\color{#d91a1a}-7.64\\%}$ | | test_update_nested | 62.9540μs | 23.2501μs | 43.0106 KOps/s | 46.3961 KOps/s | $\textbf{\color{#d91a1a}-7.30\\%}$ | | test_update__nested | 62.6740μs | 23.4096μs | 42.7175 KOps/s | 42.9291 KOps/s | $\color{#d91a1a}-0.49\\%$ | | test_set_nested | 54.4130μs | 16.9454μs | 59.0130 KOps/s | 61.1086 KOps/s | $\color{#d91a1a}-3.43\\%$ | | test_set_nested_new | 60.7640μs | 19.5627μs | 51.1177 KOps/s | 52.8261 KOps/s | $\color{#d91a1a}-3.23\\%$ | | test_select | 71.1240μs | 31.9741μs | 31.2753 KOps/s | 31.5696 KOps/s | $\color{#d91a1a}-0.93\\%$ | | test_select_nested | 87.1550μs | 52.2653μs | 19.1332 KOps/s | 19.6052 KOps/s | $\color{#d91a1a}-2.41\\%$ | | test_exclude_nested | 0.6961ms | 0.1085ms | 9.2134 KOps/s | 9.1899 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_empty[True] | 0.4166ms | 0.3467ms | 2.8841 KOps/s | 2.8938 KOps/s | $\color{#d91a1a}-0.33\\%$ | | test_empty[False] | 2.8691μs | 0.8080μs | 1.2376 MOps/s | 1.2405 MOps/s | $\color{#d91a1a}-0.24\\%$ | | test_to | 90.7560μs | 59.7279μs | 16.7426 KOps/s | 17.0959 KOps/s | $\color{#d91a1a}-2.07\\%$ | | test_to_nonblocking | 80.8550μs | 37.4910μs | 26.6731 KOps/s | 24.5594 KOps/s | $\textbf{\color{#35bf28}+8.61\\%}$ | | test_unbind_speed | 0.3176ms | 0.2640ms | 3.7885 KOps/s | 3.8087 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_unbind_speed_stack0 | 0.3375ms | 0.2631ms | 3.8007 KOps/s | 3.8787 KOps/s | $\color{#d91a1a}-2.01\\%$ | | test_unbind_speed_stack1 | 75.1106ms | 0.7936ms | 1.2601 KOps/s | 1.2776 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_split | 76.1588ms | 1.6654ms | 600.4641 Ops/s | 606.9591 Ops/s | $\color{#d91a1a}-1.07\\%$ | | test_chunk | 1.6357ms | 1.5478ms | 646.0759 Ops/s | 609.0055 Ops/s | $\textbf{\color{#35bf28}+6.09\\%}$ | | test_creation[device0] | 0.1161ms | 59.0523μs | 16.9341 KOps/s | 16.1258 KOps/s | $\textbf{\color{#35bf28}+5.01\\%}$ | | test_creation_from_tensor | 0.1418ms | 54.4867μs | 18.3531 KOps/s | 17.3388 KOps/s | $\textbf{\color{#35bf28}+5.85\\%}$ | | test_add_one[memmap_tensor0] | 93.6960μs | 7.8164μs | 127.9355 KOps/s | 130.1966 KOps/s | $\color{#d91a1a}-1.74\\%$ | | test_contiguous[memmap_tensor0] | 12.1710μs | 0.6670μs | 1.4993 MOps/s | 1.5163 MOps/s | $\color{#d91a1a}-1.12\\%$ | | test_stack[memmap_tensor0] | 30.3610μs | 5.1629μs | 193.6894 KOps/s | 199.9680 KOps/s | $\color{#d91a1a}-3.14\\%$ | | test_memmaptd_index | 0.9720ms | 0.2774ms | 3.6047 KOps/s | 3.6398 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_memmaptd_index_astensor | 0.6156ms | 0.3391ms | 2.9490 KOps/s | 2.9492 KOps/s | $-0.01\\%$ | | test_memmaptd_index_op | 76.4493ms | 0.6932ms | 1.4426 KOps/s | 1.6035 KOps/s | $\textbf{\color{#d91a1a}-10.03\\%}$ | | test_serialize_model | 94.0774ms | 90.3113ms | 11.0728 Ops/s | 10.5370 Ops/s | $\textbf{\color{#35bf28}+5.08\\%}$ | | test_serialize_model_pickle | 1.3482s | 1.2352s | 0.8096 Ops/s | 0.8084 Ops/s | $\color{#35bf28}+0.14\\%$ | | test_serialize_weights | 0.1683s | 97.4524ms | 10.2614 Ops/s | 10.6883 Ops/s | $\color{#d91a1a}-3.99\\%$ | | test_serialize_weights_returnearly | 0.2634s | 76.3366ms | 13.0999 Ops/s | 13.7230 Ops/s | $\color{#d91a1a}-4.54\\%$ | | test_serialize_weights_pickle | 1.4116s | 1.2550s | 0.7968 Ops/s | 0.7966 Ops/s | $\color{#35bf28}+0.03\\%$ | | test_reshape_pytree | 59.7040μs | 27.0790μs | 36.9290 KOps/s | 37.0440 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_reshape_td | 67.6840μs | 33.0771μs | 30.2324 KOps/s | 30.3612 KOps/s | $\color{#d91a1a}-0.42\\%$ | | test_view_pytree | 54.1230μs | 26.6279μs | 37.5545 KOps/s | 37.8282 KOps/s | $\color{#d91a1a}-0.72\\%$ | | test_view_td | 0.1437ms | 37.9103μs | 26.3781 KOps/s | 26.4839 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_unbind_pytree | 0.1889ms | 33.5340μs | 29.8204 KOps/s | 29.5437 KOps/s | $\color{#35bf28}+0.94\\%$ | | test_unbind_td | 0.5017ms | 40.2134μs | 24.8673 KOps/s | 24.9019 KOps/s | $\color{#d91a1a}-0.14\\%$ | | test_split_pytree | 60.1440μs | 35.5283μs | 28.1466 KOps/s | 27.9765 KOps/s | $\color{#35bf28}+0.61\\%$ | | test_split_td | 0.5054ms | 39.3674μs | 25.4017 KOps/s | 25.3646 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_add_pytree | 66.9440μs | 40.1048μs | 24.9347 KOps/s | 25.1519 KOps/s | $\color{#d91a1a}-0.86\\%$ | | test_add_td | 82.0550μs | 48.9481μs | 20.4298 KOps/s | 20.5126 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_distributed | 1.9179ms | 99.5947μs | 10.0407 KOps/s | 10.9733 KOps/s | $\textbf{\color{#d91a1a}-8.50\\%}$ | | test_tdmodule | 29.0520μs | 14.2763μs | 70.0462 KOps/s | 73.1573 KOps/s | $\color{#d91a1a}-4.25\\%$ | | test_tdmodule_dispatch | 45.5120μs | 27.5359μs | 36.3163 KOps/s | 38.0731 KOps/s | $\color{#d91a1a}-4.61\\%$ | | test_tdseq | 32.8620μs | 16.1601μs | 61.8809 KOps/s | 67.3369 KOps/s | $\textbf{\color{#d91a1a}-8.10\\%}$ | | test_tdseq_dispatch | 56.9930μs | 30.8181μs | 32.4485 KOps/s | 34.5770 KOps/s | $\textbf{\color{#d91a1a}-6.16\\%}$ | | test_instantiation_functorch | 1.5396ms | 1.4353ms | 696.6983 Ops/s | 699.9056 Ops/s | $\color{#d91a1a}-0.46\\%$ | | test_instantiation_td | 1.4815ms | 0.9966ms | 1.0034 KOps/s | 919.4939 Ops/s | $\textbf{\color{#35bf28}+9.12\\%}$ | | test_exec_functorch | 0.1888ms | 0.1492ms | 6.7034 KOps/s | 6.4980 KOps/s | $\color{#35bf28}+3.16\\%$ | | test_exec_functional_call | 0.1934ms | 0.1418ms | 7.0524 KOps/s | 7.1253 KOps/s | $\color{#d91a1a}-1.02\\%$ | | test_exec_td | 0.1897ms | 0.1406ms | 7.1138 KOps/s | 7.2082 KOps/s | $\color{#d91a1a}-1.31\\%$ | | test_exec_td_decorator | 79.8192ms | 0.2355ms | 4.2466 KOps/s | 4.7525 KOps/s | $\textbf{\color{#d91a1a}-10.64\\%}$ | | test_vmap_mlp_speed[True-True] | 1.3335ms | 0.5893ms | 1.6970 KOps/s | 1.7040 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_vmap_mlp_speed[True-False] | 0.6546ms | 0.5873ms | 1.7026 KOps/s | 1.6977 KOps/s | $\color{#35bf28}+0.29\\%$ | | test_vmap_mlp_speed[False-True] | 0.6224ms | 0.5519ms | 1.8119 KOps/s | 1.9141 KOps/s | $\textbf{\color{#d91a1a}-5.34\\%}$ | | test_vmap_mlp_speed[False-False] | 0.6533ms | 0.5470ms | 1.8282 KOps/s | 1.8463 KOps/s | $\color{#d91a1a}-0.98\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.0105ms | 0.6464ms | 1.5471 KOps/s | 1.5363 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.7452ms | 0.6451ms | 1.5502 KOps/s | 1.5051 KOps/s | $\color{#35bf28}+3.00\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7276ms | 0.5749ms | 1.7394 KOps/s | 1.7350 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7009ms | 0.5736ms | 1.7433 KOps/s | 1.7204 KOps/s | $\color{#35bf28}+1.33\\%$ | | test_vmap_transformer_speed[True-True] | 7.9176ms | 7.7477ms | 129.0706 Ops/s | 128.2974 Ops/s | $\color{#35bf28}+0.60\\%$ | | test_vmap_transformer_speed[True-False] | 8.8296ms | 7.7875ms | 128.4101 Ops/s | 128.6013 Ops/s | $\color{#d91a1a}-0.15\\%$ | | test_vmap_transformer_speed[False-True] | 7.8118ms | 7.6926ms | 129.9950 Ops/s | 129.7494 Ops/s | $\color{#35bf28}+0.19\\%$ | | test_vmap_transformer_speed[False-False] | 7.8776ms | 7.6826ms | 130.1643 Ops/s | 129.6089 Ops/s | $\color{#35bf28}+0.43\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 19.1181ms | 18.8592ms | 53.0246 Ops/s | 53.1259 Ops/s | $\color{#d91a1a}-0.19\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 19.0108ms | 18.7975ms | 53.1985 Ops/s | 53.1766 Ops/s | $\color{#35bf28}+0.04\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 19.6186ms | 18.7350ms | 53.3762 Ops/s | 53.3892 Ops/s | $\color{#d91a1a}-0.02\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 19.7107ms | 18.7531ms | 53.3244 Ops/s | 53.3417 Ops/s | $\color{#d91a1a}-0.03\\%$ | | test_to_module_speed[True] | 1.6083ms | 1.4922ms | 670.1528 Ops/s | 669.3444 Ops/s | $\color{#35bf28}+0.12\\%$ | | test_to_module_speed[False] | 1.5720ms | 1.4754ms | 677.7808 Ops/s | 678.1456 Ops/s | $\color{#d91a1a}-0.05\\%$ | | test_tc_init | 78.4150μs | 52.4286μs | 19.0736 KOps/s | 20.9623 KOps/s | $\textbf{\color{#d91a1a}-9.01\\%}$ | | test_tc_init_nested | 0.1374ms | 98.0439μs | 10.1995 KOps/s | 10.7745 KOps/s | $\textbf{\color{#d91a1a}-5.34\\%}$ | | test_tc_first_layer_tensor | 14.2910μs | 3.7519μs | 266.5317 KOps/s | 269.4211 KOps/s | $\color{#d91a1a}-1.07\\%$ | | test_tc_first_layer_nontensor | 16.0310μs | 3.7506μs | 266.6249 KOps/s | 267.6424 KOps/s | $\color{#d91a1a}-0.38\\%$ | | test_tc_second_layer_tensor | 13.5710μs | 1.2811μs | 780.5744 KOps/s | 761.9605 KOps/s | $\color{#35bf28}+2.44\\%$ | | test_tc_second_layer_nontensor | 21.8410μs | 4.2807μs | 233.6069 KOps/s | 233.2602 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_unbind | 0.1099s | 14.6824ms | 68.1085 Ops/s | 67.4441 Ops/s | $\color{#35bf28}+0.99\\%$ | | test_full_like | 14.0392ms | 13.6905ms | 73.0432 Ops/s | 72.9142 Ops/s | $\color{#35bf28}+0.18\\%$ | | test_zeros_like | 8.2416ms | 7.9666ms | 125.5246 Ops/s | 126.2135 Ops/s | $\color{#d91a1a}-0.55\\%$ | | test_ones_like | 8.1939ms | 7.9655ms | 125.5412 Ops/s | 125.8045 Ops/s | $\color{#d91a1a}-0.21\\%$ | | test_clone | 9.9117ms | 9.5658ms | 104.5391 Ops/s | 105.2013 Ops/s | $\color{#d91a1a}-0.63\\%$ | | test_squeeze | 60.5240μs | 10.7119μs | 93.3545 KOps/s | 89.0347 KOps/s | $\color{#35bf28}+4.85\\%$ | | test_unsqueeze | 0.1397ms | 89.3567μs | 11.1911 KOps/s | 11.0189 KOps/s | $\color{#35bf28}+1.56\\%$ | | test_split | 3.5163ms | 3.1726ms | 315.1978 Ops/s | 322.9878 Ops/s | $\color{#d91a1a}-2.41\\%$ | | test_permute | 0.2588ms | 0.2084ms | 4.7994 KOps/s | 4.7179 KOps/s | $\color{#35bf28}+1.73\\%$ | | test_stack | 27.5122ms | 27.2337ms | 36.7193 Ops/s | 36.8495 Ops/s | $\color{#d91a1a}-0.35\\%$ | | test_cat | 27.1236ms | 26.9480ms | 37.1085 Ops/s | 37.0464 Ops/s | $\color{#35bf28}+0.17\\%$ |