pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
832 stars 74 forks source link

[BugFix] Fix wheels #856

Closed vmoens closed 4 months ago

github-actions[bot] commented 4 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 144. Improved: $\large\color{#35bf28}9$. Worsened: $\large\color{#d91a1a}5$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 37.8300μs | 17.1571μs | 58.2848 KOps/s | 58.9564 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_plain_set_stack_nested | 43.5620μs | 17.3883μs | 57.5099 KOps/s | 58.0029 KOps/s | $\color{#d91a1a}-0.85\\%$ | | test_plain_set_nested_inplace | 56.4150μs | 19.8298μs | 50.4291 KOps/s | 51.3320 KOps/s | $\color{#d91a1a}-1.76\\%$ | | test_plain_set_stack_nested_inplace | 48.5410μs | 19.7488μs | 50.6360 KOps/s | 51.9336 KOps/s | $\color{#d91a1a}-2.50\\%$ | | test_items | 29.4850μs | 2.5963μs | 385.1645 KOps/s | 363.7849 KOps/s | $\textbf{\color{#35bf28}+5.88\\%}$ | | test_items_nested | 0.5126ms | 0.2701ms | 3.7021 KOps/s | 3.6184 KOps/s | $\color{#35bf28}+2.31\\%$ | | test_items_nested_locked | 0.3956ms | 0.2719ms | 3.6775 KOps/s | 3.6062 KOps/s | $\color{#35bf28}+1.98\\%$ | | test_items_nested_leaf | 0.1641ms | 78.6958μs | 12.7072 KOps/s | 12.6450 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_items_stack_nested | 0.6125ms | 0.2741ms | 3.6485 KOps/s | 3.5614 KOps/s | $\color{#35bf28}+2.45\\%$ | | test_items_stack_nested_leaf | 0.1659ms | 81.0368μs | 12.3401 KOps/s | 12.7045 KOps/s | $\color{#d91a1a}-2.87\\%$ | | test_items_stack_nested_locked | 0.5566ms | 0.2720ms | 3.6761 KOps/s | 3.5844 KOps/s | $\color{#35bf28}+2.56\\%$ | | test_keys | 23.0330μs | 3.8237μs | 261.5277 KOps/s | 246.2930 KOps/s | $\textbf{\color{#35bf28}+6.19\\%}$ | | test_keys_nested | 0.2555ms | 0.1391ms | 7.1874 KOps/s | 7.1906 KOps/s | $\color{#d91a1a}-0.04\\%$ | | test_keys_nested_locked | 0.6893ms | 0.1440ms | 6.9420 KOps/s | 6.9526 KOps/s | $\color{#d91a1a}-0.15\\%$ | | test_keys_nested_leaf | 0.2126ms | 0.1192ms | 8.3924 KOps/s | 8.5472 KOps/s | $\color{#d91a1a}-1.81\\%$ | | test_keys_stack_nested | 0.2886ms | 0.1388ms | 7.2072 KOps/s | 7.2189 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_keys_stack_nested_leaf | 0.2310ms | 0.1174ms | 8.5154 KOps/s | 8.5004 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_keys_stack_nested_locked | 0.2402ms | 0.1433ms | 6.9791 KOps/s | 6.9952 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_values | 9.9538μs | 1.1580μs | 863.5671 KOps/s | 876.3181 KOps/s | $\color{#d91a1a}-1.46\\%$ | | test_values_nested | 93.1040μs | 51.3058μs | 19.4910 KOps/s | 19.8287 KOps/s | $\color{#d91a1a}-1.70\\%$ | | test_values_nested_locked | 92.5030μs | 51.6032μs | 19.3786 KOps/s | 19.7143 KOps/s | $\color{#d91a1a}-1.70\\%$ | | test_values_nested_leaf | 0.1166ms | 46.2247μs | 21.6334 KOps/s | 21.8632 KOps/s | $\color{#d91a1a}-1.05\\%$ | | test_values_stack_nested | 96.4810μs | 51.6785μs | 19.3504 KOps/s | 19.7621 KOps/s | $\color{#d91a1a}-2.08\\%$ | | test_values_stack_nested_leaf | 89.7680μs | 46.1062μs | 21.6890 KOps/s | 21.9664 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_values_stack_nested_locked | 0.1127ms | 51.0096μs | 19.6041 KOps/s | 20.0742 KOps/s | $\color{#d91a1a}-2.34\\%$ | | test_membership | 14.9380μs | 1.3577μs | 736.5406 KOps/s | 744.5049 KOps/s | $\color{#d91a1a}-1.07\\%$ | | test_membership_nested | 27.2410μs | 3.4534μs | 289.5707 KOps/s | 288.6814 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_membership_nested_leaf | 34.2340μs | 3.4441μs | 290.3520 KOps/s | 291.9745 KOps/s | $\color{#d91a1a}-0.56\\%$ | | test_membership_stacked_nested | 25.3480μs | 3.4053μs | 293.6632 KOps/s | 296.7860 KOps/s | $\color{#d91a1a}-1.05\\%$ | | test_membership_stacked_nested_leaf | 20.7690μs | 3.4160μs | 292.7399 KOps/s | 293.8297 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_membership_nested_last | 30.3260μs | 4.2492μs | 235.3384 KOps/s | 240.4265 KOps/s | $\color{#d91a1a}-2.12\\%$ | | test_membership_nested_leaf_last | 34.0940μs | 4.1856μs | 238.9151 KOps/s | 239.2006 KOps/s | $\color{#d91a1a}-0.12\\%$ | | test_membership_stacked_nested_last | 43.1410μs | 4.1872μs | 238.8250 KOps/s | 243.8683 KOps/s | $\color{#d91a1a}-2.07\\%$ | | test_membership_stacked_nested_leaf_last | 55.8140μs | 4.2121μs | 237.4099 KOps/s | 242.3558 KOps/s | $\color{#d91a1a}-2.04\\%$ | | test_nested_getleaf | 57.5520μs | 10.7670μs | 92.8767 KOps/s | 94.6377 KOps/s | $\color{#d91a1a}-1.86\\%$ | | test_nested_get | 35.5660μs | 10.1103μs | 98.9090 KOps/s | 98.7487 KOps/s | $\color{#35bf28}+0.16\\%$ | | test_stacked_getleaf | 33.2620μs | 10.7868μs | 92.7060 KOps/s | 94.7351 KOps/s | $\color{#d91a1a}-2.14\\%$ | | test_stacked_get | 31.5490μs | 10.1706μs | 98.3227 KOps/s | 99.4156 KOps/s | $\color{#d91a1a}-1.10\\%$ | | test_nested_getitemleaf | 29.7450μs | 11.3270μs | 88.2846 KOps/s | 89.6496 KOps/s | $\color{#d91a1a}-1.52\\%$ | | test_nested_getitem | 36.7880μs | 10.4441μs | 95.7481 KOps/s | 96.6070 KOps/s | $\color{#d91a1a}-0.89\\%$ | | test_stacked_getitemleaf | 47.6900μs | 11.1530μs | 89.6619 KOps/s | 88.9562 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_stacked_getitem | 42.3600μs | 10.3196μs | 96.9025 KOps/s | 96.4702 KOps/s | $\color{#35bf28}+0.45\\%$ | | test_lock_nested | 53.0733ms | 0.3862ms | 2.5894 KOps/s | 2.9028 KOps/s | $\textbf{\color{#d91a1a}-10.80\\%}$ | | test_lock_stack_nested | 0.3809ms | 0.3022ms | 3.3095 KOps/s | 3.2039 KOps/s | $\color{#35bf28}+3.30\\%$ | | test_unlock_nested | 0.7893ms | 0.3384ms | 2.9550 KOps/s | 2.8517 KOps/s | $\color{#35bf28}+3.63\\%$ | | test_unlock_stack_nested | 0.5958ms | 0.3103ms | 3.2227 KOps/s | 3.1150 KOps/s | $\color{#35bf28}+3.46\\%$ | | test_flatten_speed | 0.2449ms | 98.2774μs | 10.1753 KOps/s | 10.2752 KOps/s | $\color{#d91a1a}-0.97\\%$ | | test_unflatten_speed | 0.7399ms | 0.4071ms | 2.4564 KOps/s | 2.4766 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_common_ops | 1.5824ms | 0.7322ms | 1.3657 KOps/s | 1.3558 KOps/s | $\color{#35bf28}+0.73\\%$ | | test_creation | 18.6450μs | 1.8776μs | 532.6034 KOps/s | 530.2769 KOps/s | $\color{#35bf28}+0.44\\%$ | | test_creation_empty | 62.6270μs | 11.5579μs | 86.5206 KOps/s | 88.9782 KOps/s | $\color{#d91a1a}-2.76\\%$ | | test_creation_nested_1 | 0.1422ms | 14.2198μs | 70.3245 KOps/s | 70.9767 KOps/s | $\color{#d91a1a}-0.92\\%$ | | test_creation_nested_2 | 58.2490μs | 17.6739μs | 56.5806 KOps/s | 58.2509 KOps/s | $\color{#d91a1a}-2.87\\%$ | | test_clone | 0.2347ms | 12.7467μs | 78.4514 KOps/s | 75.7821 KOps/s | $\color{#35bf28}+3.52\\%$ | | test_getitem[int] | 76.4970μs | 10.8373μs | 92.2740 KOps/s | 87.7076 KOps/s | $\textbf{\color{#35bf28}+5.21\\%}$ | | test_getitem[slice_int] | 67.0350μs | 21.7173μs | 46.0463 KOps/s | 44.8398 KOps/s | $\color{#35bf28}+2.69\\%$ | | test_getitem[range] | 87.3930μs | 60.1804μs | 16.6167 KOps/s | 16.4387 KOps/s | $\color{#35bf28}+1.08\\%$ | | test_getitem[tuple] | 47.9090μs | 17.8544μs | 56.0085 KOps/s | 53.8716 KOps/s | $\color{#35bf28}+3.97\\%$ | | test_getitem[list] | 0.1670ms | 37.6757μs | 26.5423 KOps/s | 24.7780 KOps/s | $\textbf{\color{#35bf28}+7.12\\%}$ | | test_setitem_dim[int] | 64.2000μs | 34.0078μs | 29.4050 KOps/s | 28.8093 KOps/s | $\color{#35bf28}+2.07\\%$ | | test_setitem_dim[slice_int] | 92.4330μs | 60.9464μs | 16.4079 KOps/s | 16.0799 KOps/s | $\color{#35bf28}+2.04\\%$ | | test_setitem_dim[range] | 0.1322ms | 83.0922μs | 12.0348 KOps/s | 11.8856 KOps/s | $\color{#35bf28}+1.26\\%$ | | test_setitem_dim[tuple] | 96.9320μs | 49.0961μs | 20.3682 KOps/s | 19.7465 KOps/s | $\color{#35bf28}+3.15\\%$ | | test_setitem | 59.8120μs | 19.6638μs | 50.8549 KOps/s | 49.8736 KOps/s | $\color{#35bf28}+1.97\\%$ | | test_set | 62.2060μs | 19.5251μs | 51.2162 KOps/s | 51.0170 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_set_shared | 3.8163ms | 0.1477ms | 6.7696 KOps/s | 6.8350 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_update | 0.1433ms | 22.4174μs | 44.6081 KOps/s | 44.3350 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_update_nested | 0.1072ms | 31.7079μs | 31.5379 KOps/s | 32.2635 KOps/s | $\color{#d91a1a}-2.25\\%$ | | test_update__nested | 89.7680μs | 24.8200μs | 40.2902 KOps/s | 40.1226 KOps/s | $\color{#35bf28}+0.42\\%$ | | test_set_nested | 95.9600μs | 21.0250μs | 47.5624 KOps/s | 46.9139 KOps/s | $\color{#35bf28}+1.38\\%$ | | test_set_nested_new | 87.0830μs | 25.0524μs | 39.9163 KOps/s | 38.9633 KOps/s | $\color{#35bf28}+2.45\\%$ | | test_select | 0.1238ms | 39.8894μs | 25.0693 KOps/s | 24.1585 KOps/s | $\color{#35bf28}+3.77\\%$ | | test_select_nested | 0.1486ms | 57.4740μs | 17.3992 KOps/s | 16.8430 KOps/s | $\color{#35bf28}+3.30\\%$ | | test_exclude_nested | 0.2590ms | 0.1207ms | 8.2825 KOps/s | 8.4060 KOps/s | $\color{#d91a1a}-1.47\\%$ | | test_empty[True] | 0.8579ms | 0.3989ms | 2.5070 KOps/s | 2.5090 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_empty[False] | 5.4362μs | 1.0480μs | 954.2085 KOps/s | 851.9516 KOps/s | $\textbf{\color{#35bf28}+12.00\\%}$ | | test_unbind_speed | 1.8202ms | 0.2474ms | 4.0425 KOps/s | 3.9262 KOps/s | $\color{#35bf28}+2.96\\%$ | | test_unbind_speed_stack0 | 0.4720ms | 0.2434ms | 4.1079 KOps/s | 3.9418 KOps/s | $\color{#35bf28}+4.21\\%$ | | test_unbind_speed_stack1 | 78.5300ms | 0.7347ms | 1.3611 KOps/s | 1.3489 KOps/s | $\color{#35bf28}+0.91\\%$ | | test_split | 74.5628ms | 1.6039ms | 623.4856 Ops/s | 624.0840 Ops/s | $\color{#d91a1a}-0.10\\%$ | | test_chunk | 76.8229ms | 1.6043ms | 623.3212 Ops/s | 620.1291 Ops/s | $\color{#35bf28}+0.51\\%$ | | test_creation[device0] | 0.2757ms | 86.1491μs | 11.6078 KOps/s | 11.8368 KOps/s | $\color{#d91a1a}-1.93\\%$ | | test_creation_from_tensor | 3.6214ms | 87.4344μs | 11.4372 KOps/s | 11.4513 KOps/s | $\color{#d91a1a}-0.12\\%$ | | test_add_one[memmap_tensor0] | 89.8380μs | 5.2334μs | 191.0816 KOps/s | 189.8401 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_contiguous[memmap_tensor0] | 14.3460μs | 0.6238μs | 1.6030 MOps/s | 1.6044 MOps/s | $\color{#d91a1a}-0.09\\%$ | | test_stack[memmap_tensor0] | 29.3950μs | 3.5228μs | 283.8641 KOps/s | 284.9494 KOps/s | $\color{#d91a1a}-0.38\\%$ | | test_memmaptd_index | 1.0450ms | 0.2502ms | 3.9976 KOps/s | 3.9576 KOps/s | $\color{#35bf28}+1.01\\%$ | | test_memmaptd_index_astensor | 0.9495ms | 0.3297ms | 3.0331 KOps/s | 3.0776 KOps/s | $\color{#d91a1a}-1.44\\%$ | | test_memmaptd_index_op | 1.7043ms | 0.6177ms | 1.6189 KOps/s | 1.6081 KOps/s | $\color{#35bf28}+0.68\\%$ | | test_serialize_model | 0.1683s | 0.1070s | 9.3433 Ops/s | 9.3802 Ops/s | $\color{#d91a1a}-0.39\\%$ | | test_serialize_model_pickle | 0.4481s | 0.3786s | 2.6412 Ops/s | 2.6234 Ops/s | $\color{#35bf28}+0.68\\%$ | | test_serialize_weights | 0.1053s | 99.0472ms | 10.0962 Ops/s | 9.1649 Ops/s | $\textbf{\color{#35bf28}+10.16\\%}$ | | test_serialize_weights_returnearly | 0.1292s | 0.1215s | 8.2289 Ops/s | 8.2572 Ops/s | $\color{#d91a1a}-0.34\\%$ | | test_serialize_weights_pickle | 0.7547s | 0.5060s | 1.9764 Ops/s | 2.4217 Ops/s | $\textbf{\color{#d91a1a}-18.39\\%}$ | | test_serialize_weights_filesystem | 0.1569s | 99.6076ms | 10.0394 Ops/s | 9.7015 Ops/s | $\color{#35bf28}+3.48\\%$ | | test_serialize_model_filesystem | 0.1047s | 95.0253ms | 10.5235 Ops/s | 10.3420 Ops/s | $\color{#35bf28}+1.75\\%$ | | test_reshape_pytree | 70.2420μs | 25.2116μs | 39.6643 KOps/s | 39.4274 KOps/s | $\color{#35bf28}+0.60\\%$ | | test_reshape_td | 79.0280μs | 33.7201μs | 29.6559 KOps/s | 29.2258 KOps/s | $\color{#35bf28}+1.47\\%$ | | test_view_pytree | 69.7410μs | 25.1434μs | 39.7719 KOps/s | 39.4547 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_view_td | 90.3590μs | 38.6733μs | 25.8576 KOps/s | 25.8241 KOps/s | $\color{#35bf28}+0.13\\%$ | | test_unbind_pytree | 77.5560μs | 29.2011μs | 34.2453 KOps/s | 34.0264 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_unbind_td | 0.4233ms | 36.3680μs | 27.4967 KOps/s | 26.2811 KOps/s | $\color{#35bf28}+4.63\\%$ | | test_split_pytree | 73.0370μs | 28.7446μs | 34.7891 KOps/s | 34.5240 KOps/s | $\color{#35bf28}+0.77\\%$ | | test_split_td | 0.1239ms | 39.2608μs | 25.4707 KOps/s | 25.0756 KOps/s | $\color{#35bf28}+1.58\\%$ | | test_add_pytree | 80.2700μs | 33.7300μs | 29.6472 KOps/s | 29.2765 KOps/s | $\color{#35bf28}+1.27\\%$ | | test_add_td | 0.1162ms | 53.2398μs | 18.7829 KOps/s | 18.3011 KOps/s | $\color{#35bf28}+2.63\\%$ | | test_distributed | 0.2165ms | 0.1020ms | 9.8016 KOps/s | 9.6870 KOps/s | $\color{#35bf28}+1.18\\%$ | | test_tdmodule | 90.4990μs | 18.4298μs | 54.2599 KOps/s | 53.2364 KOps/s | $\color{#35bf28}+1.92\\%$ | | test_tdmodule_dispatch | 63.9400μs | 36.0092μs | 27.7706 KOps/s | 27.5505 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_tdseq | 39.0230μs | 21.4867μs | 46.5403 KOps/s | 43.6303 KOps/s | $\textbf{\color{#35bf28}+6.67\\%}$ | | test_tdseq_dispatch | 83.9770μs | 42.3995μs | 23.5852 KOps/s | 23.4228 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_instantiation_functorch | 1.4389ms | 1.3162ms | 759.7881 Ops/s | 777.8161 Ops/s | $\color{#d91a1a}-2.32\\%$ | | test_instantiation_td | 74.9619ms | 1.1292ms | 885.5926 Ops/s | 998.0708 Ops/s | $\textbf{\color{#d91a1a}-11.27\\%}$ | | test_exec_functorch | 0.2518ms | 0.1592ms | 6.2802 KOps/s | 6.1117 KOps/s | $\color{#35bf28}+2.76\\%$ | | test_exec_functional_call | 0.2933ms | 0.1490ms | 6.7101 KOps/s | 6.5938 KOps/s | $\color{#35bf28}+1.76\\%$ | | test_exec_td | 0.2395ms | 0.1424ms | 7.0240 KOps/s | 6.8462 KOps/s | $\color{#35bf28}+2.60\\%$ | | test_exec_td_decorator | 0.8729ms | 0.2218ms | 4.5079 KOps/s | 4.5114 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_vmap_mlp_speed[True-True] | 0.6326ms | 0.4913ms | 2.0354 KOps/s | 2.0609 KOps/s | $\color{#d91a1a}-1.24\\%$ | | test_vmap_mlp_speed[True-False] | 0.6796ms | 0.4882ms | 2.0482 KOps/s | 2.0528 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_vmap_mlp_speed[False-True] | 0.6707ms | 0.3953ms | 2.5296 KOps/s | 2.5401 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_vmap_mlp_speed[False-False] | 0.7717ms | 0.3965ms | 2.5218 KOps/s | 2.5519 KOps/s | $\color{#d91a1a}-1.18\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.0744ms | 0.5658ms | 1.7674 KOps/s | 1.7918 KOps/s | $\color{#d91a1a}-1.36\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.8871ms | 0.5653ms | 1.7690 KOps/s | 1.7894 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7110ms | 0.4558ms | 2.1941 KOps/s | 2.1775 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7696ms | 0.4602ms | 2.1732 KOps/s | 2.1880 KOps/s | $\color{#d91a1a}-0.68\\%$ | | test_to_module_speed[True] | 1.9641ms | 1.6929ms | 590.7122 Ops/s | 599.9929 Ops/s | $\color{#d91a1a}-1.55\\%$ | | test_to_module_speed[False] | 1.8165ms | 1.6585ms | 602.9553 Ops/s | 608.6518 Ops/s | $\color{#d91a1a}-0.94\\%$ | | test_tc_init | 0.1104ms | 61.0585μs | 16.3777 KOps/s | 16.9801 KOps/s | $\color{#d91a1a}-3.55\\%$ | | test_tc_init_nested | 0.2894ms | 0.1161ms | 8.6149 KOps/s | 8.4628 KOps/s | $\color{#35bf28}+1.80\\%$ | | test_tc_first_layer_tensor | 29.0640μs | 8.4894μs | 117.7935 KOps/s | 120.7952 KOps/s | $\color{#d91a1a}-2.48\\%$ | | test_tc_first_layer_nontensor | 30.1570μs | 8.3678μs | 119.5062 KOps/s | 120.3956 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_tc_second_layer_tensor | 0.1087ms | 2.6376μs | 379.1342 KOps/s | 402.1792 KOps/s | $\textbf{\color{#d91a1a}-5.73\\%}$ | | test_tc_second_layer_nontensor | 35.0760μs | 9.5091μs | 105.1624 KOps/s | 109.2087 KOps/s | $\color{#d91a1a}-3.71\\%$ | | test_unbind | 90.7102ms | 14.8484ms | 67.3473 Ops/s | 108.4917 Ops/s | $\textbf{\color{#d91a1a}-37.92\\%}$ | | test_full_like | 9.7073ms | 7.7603ms | 128.8611 Ops/s | 84.2686 Ops/s | $\textbf{\color{#35bf28}+52.92\\%}$ | | test_zeros_like | 15.1264ms | 6.1405ms | 162.8535 Ops/s | 159.9881 Ops/s | $\color{#35bf28}+1.79\\%$ | | test_ones_like | 13.5298ms | 6.4008ms | 156.2308 Ops/s | 155.0665 Ops/s | $\color{#35bf28}+0.75\\%$ | | test_clone | 15.9519ms | 8.4392ms | 118.4941 Ops/s | 116.4902 Ops/s | $\color{#35bf28}+1.72\\%$ | | test_squeeze | 0.1466ms | 13.3461μs | 74.9284 KOps/s | 76.7759 KOps/s | $\color{#d91a1a}-2.41\\%$ | | test_unsqueeze | 0.1744ms | 97.0523μs | 10.3037 KOps/s | 9.9654 KOps/s | $\color{#35bf28}+3.40\\%$ | | test_split | 0.4997ms | 0.2757ms | 3.6266 KOps/s | 3.5952 KOps/s | $\color{#35bf28}+0.87\\%$ | | test_permute | 0.4437ms | 0.2194ms | 4.5576 KOps/s | 4.4764 KOps/s | $\color{#35bf28}+1.81\\%$ | | test_stack | 33.6485ms | 24.4114ms | 40.9645 Ops/s | 40.9179 Ops/s | $\color{#35bf28}+0.11\\%$ | | test_cat | 28.7371ms | 22.8525ms | 43.7590 Ops/s | 41.1223 Ops/s | $\textbf{\color{#35bf28}+6.41\\%}$ |
github-actions[bot] commented 4 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 152. Improved: $\large\color{#35bf28}2$. Worsened: $\large\color{#d91a1a}10$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 32.1200μs | 12.5403μs | 79.7429 KOps/s | 82.4244 KOps/s | $\color{#d91a1a}-3.25\\%$ | | test_plain_set_stack_nested | 31.6300μs | 12.4874μs | 80.0806 KOps/s | 82.2607 KOps/s | $\color{#d91a1a}-2.65\\%$ | | test_plain_set_nested_inplace | 41.1200μs | 13.7016μs | 72.9844 KOps/s | 74.6172 KOps/s | $\color{#d91a1a}-2.19\\%$ | | test_plain_set_stack_nested_inplace | 44.6110μs | 13.9241μs | 71.8179 KOps/s | 74.4182 KOps/s | $\color{#d91a1a}-3.49\\%$ | | test_items | 19.8310μs | 4.6144μs | 216.7135 KOps/s | 216.4212 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_items_nested | 0.4021ms | 0.3428ms | 2.9168 KOps/s | 2.9776 KOps/s | $\color{#d91a1a}-2.04\\%$ | | test_items_nested_locked | 0.4077ms | 0.3551ms | 2.8162 KOps/s | 2.9397 KOps/s | $\color{#d91a1a}-4.20\\%$ | | test_items_nested_leaf | 0.1077ms | 82.5224μs | 12.1179 KOps/s | 12.1749 KOps/s | $\color{#d91a1a}-0.47\\%$ | | test_items_stack_nested | 0.3983ms | 0.3429ms | 2.9162 KOps/s | 2.9312 KOps/s | $\color{#d91a1a}-0.51\\%$ | | test_items_stack_nested_leaf | 0.1067ms | 84.5686μs | 11.8247 KOps/s | 11.8783 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_items_stack_nested_locked | 0.4024ms | 0.3454ms | 2.8953 KOps/s | 2.9107 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_keys | 21.7500μs | 4.3708μs | 228.7905 KOps/s | 228.9421 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_keys_nested | 95.3110μs | 68.5510μs | 14.5877 KOps/s | 14.8792 KOps/s | $\color{#d91a1a}-1.96\\%$ | | test_keys_nested_locked | 2.3551ms | 74.9331μs | 13.3452 KOps/s | 13.1872 KOps/s | $\color{#35bf28}+1.20\\%$ | | test_keys_nested_leaf | 83.1310μs | 59.9307μs | 16.6859 KOps/s | 16.8099 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_keys_stack_nested | 98.9720μs | 69.2767μs | 14.4349 KOps/s | 14.3551 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_keys_stack_nested_leaf | 86.3110μs | 59.6913μs | 16.7529 KOps/s | 16.7370 KOps/s | $\color{#35bf28}+0.10\\%$ | | test_keys_stack_nested_locked | 0.1009ms | 74.4940μs | 13.4239 KOps/s | 13.3039 KOps/s | $\color{#35bf28}+0.90\\%$ | | test_values | 11.5555μs | 1.8076μs | 553.2165 KOps/s | 549.3754 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_values_nested | 54.7710μs | 35.5826μs | 28.1036 KOps/s | 28.6254 KOps/s | $\color{#d91a1a}-1.82\\%$ | | test_values_nested_locked | 70.3110μs | 37.2122μs | 26.8729 KOps/s | 26.8891 KOps/s | $\color{#d91a1a}-0.06\\%$ | | test_values_nested_leaf | 55.7610μs | 31.5216μs | 31.7243 KOps/s | 32.1683 KOps/s | $\color{#d91a1a}-1.38\\%$ | | test_values_stack_nested | 70.2100μs | 36.0577μs | 27.7333 KOps/s | 27.9717 KOps/s | $\color{#d91a1a}-0.85\\%$ | | test_values_stack_nested_leaf | 55.1710μs | 32.4442μs | 30.8222 KOps/s | 31.5489 KOps/s | $\color{#d91a1a}-2.30\\%$ | | test_values_stack_nested_locked | 58.0410μs | 37.6945μs | 26.5291 KOps/s | 26.4303 KOps/s | $\color{#35bf28}+0.37\\%$ | | test_membership | 3.2059μs | 0.7280μs | 1.3736 MOps/s | 1.1918 MOps/s | $\textbf{\color{#35bf28}+15.25\\%}$ | | test_membership_nested | 24.4510μs | 2.5431μs | 393.2243 KOps/s | 397.6289 KOps/s | $\color{#d91a1a}-1.11\\%$ | | test_membership_nested_leaf | 22.7010μs | 2.5329μs | 394.7972 KOps/s | 392.4935 KOps/s | $\color{#35bf28}+0.59\\%$ | | test_membership_stacked_nested | 42.2510μs | 2.5230μs | 396.3475 KOps/s | 391.1953 KOps/s | $\color{#35bf28}+1.32\\%$ | | test_membership_stacked_nested_leaf | 22.8610μs | 2.5261μs | 395.8707 KOps/s | 393.0860 KOps/s | $\color{#35bf28}+0.71\\%$ | | test_membership_nested_last | 23.5310μs | 3.0426μs | 328.6611 KOps/s | 328.7851 KOps/s | $\color{#d91a1a}-0.04\\%$ | | test_membership_nested_leaf_last | 22.3100μs | 3.0441μs | 328.5031 KOps/s | 329.0815 KOps/s | $\color{#d91a1a}-0.18\\%$ | | test_membership_stacked_nested_last | 24.0000μs | 3.0977μs | 322.8206 KOps/s | 325.0909 KOps/s | $\color{#d91a1a}-0.70\\%$ | | test_membership_stacked_nested_leaf_last | 21.2400μs | 3.0302μs | 330.0161 KOps/s | 326.4070 KOps/s | $\color{#35bf28}+1.11\\%$ | | test_nested_getleaf | 26.1710μs | 8.3007μs | 120.4722 KOps/s | 119.3701 KOps/s | $\color{#35bf28}+0.92\\%$ | | test_nested_get | 31.2110μs | 7.8150μs | 127.9583 KOps/s | 128.0467 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_stacked_getleaf | 28.0700μs | 8.3691μs | 119.4877 KOps/s | 119.5418 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_stacked_get | 28.7900μs | 8.0151μs | 124.7647 KOps/s | 127.5827 KOps/s | $\color{#d91a1a}-2.21\\%$ | | test_nested_getitemleaf | 28.1200μs | 8.5438μs | 117.0444 KOps/s | 117.6237 KOps/s | $\color{#d91a1a}-0.49\\%$ | | test_nested_getitem | 26.9910μs | 8.0189μs | 124.7049 KOps/s | 125.1578 KOps/s | $\color{#d91a1a}-0.36\\%$ | | test_stacked_getitemleaf | 27.7490μs | 8.5788μs | 116.5666 KOps/s | 117.4012 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_stacked_getitem | 26.2410μs | 7.9828μs | 125.2692 KOps/s | 125.3258 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_lock_nested | 58.4324ms | 0.3957ms | 2.5271 KOps/s | 2.5283 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_lock_stack_nested | 0.3250ms | 0.2925ms | 3.4193 KOps/s | 3.4391 KOps/s | $\color{#d91a1a}-0.58\\%$ | | test_unlock_nested | 60.5241ms | 0.3976ms | 2.5148 KOps/s | 2.5338 KOps/s | $\color{#d91a1a}-0.75\\%$ | | test_unlock_stack_nested | 0.3655ms | 0.3013ms | 3.3186 KOps/s | 3.3425 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_flatten_speed | 0.4040ms | 0.1012ms | 9.8797 KOps/s | 9.7892 KOps/s | $\color{#35bf28}+0.92\\%$ | | test_unflatten_speed | 0.3380ms | 0.2891ms | 3.4596 KOps/s | 3.5001 KOps/s | $\color{#d91a1a}-1.16\\%$ | | test_common_ops | 1.0177ms | 0.5607ms | 1.7836 KOps/s | 1.8169 KOps/s | $\color{#d91a1a}-1.83\\%$ | | test_creation | 39.3300μs | 1.5937μs | 627.4615 KOps/s | 641.3189 KOps/s | $\color{#d91a1a}-2.16\\%$ | | test_creation_empty | 29.9010μs | 7.7427μs | 129.1541 KOps/s | 137.2612 KOps/s | $\textbf{\color{#d91a1a}-5.91\\%}$ | | test_creation_nested_1 | 34.7800μs | 9.5239μs | 104.9985 KOps/s | 111.0567 KOps/s | $\textbf{\color{#d91a1a}-5.45\\%}$ | | test_creation_nested_2 | 34.1410μs | 11.6355μs | 85.9442 KOps/s | 88.7792 KOps/s | $\color{#d91a1a}-3.19\\%$ | | test_clone | 64.0520μs | 11.2587μs | 88.8198 KOps/s | 90.9566 KOps/s | $\color{#d91a1a}-2.35\\%$ | | test_getitem[int] | 25.4800μs | 10.5186μs | 95.0696 KOps/s | 95.5219 KOps/s | $\color{#d91a1a}-0.47\\%$ | | test_getitem[slice_int] | 42.0400μs | 19.7384μs | 50.6626 KOps/s | 50.8211 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_getitem[range] | 64.0510μs | 46.2786μs | 21.6083 KOps/s | 22.3250 KOps/s | $\color{#d91a1a}-3.21\\%$ | | test_getitem[tuple] | 41.9310μs | 18.1403μs | 55.1260 KOps/s | 55.1825 KOps/s | $\color{#d91a1a}-0.10\\%$ | | test_getitem[list] | 0.1188ms | 30.8821μs | 32.3812 KOps/s | 31.5194 KOps/s | $\color{#35bf28}+2.73\\%$ | | test_setitem_dim[int] | 55.1700μs | 25.6982μs | 38.9133 KOps/s | 38.9098 KOps/s | $+0.01\\%$ | | test_setitem_dim[slice_int] | 77.3100μs | 47.3925μs | 21.1004 KOps/s | 21.9614 KOps/s | $\color{#d91a1a}-3.92\\%$ | | test_setitem_dim[range] | 0.1031ms | 65.8670μs | 15.1821 KOps/s | 16.1102 KOps/s | $\textbf{\color{#d91a1a}-5.76\\%}$ | | test_setitem_dim[tuple] | 62.2610μs | 39.5350μs | 25.2940 KOps/s | 24.8272 KOps/s | $\color{#35bf28}+1.88\\%$ | | test_setitem | 56.1210μs | 15.6985μs | 63.7003 KOps/s | 66.2969 KOps/s | $\color{#d91a1a}-3.92\\%$ | | test_set | 45.8020μs | 15.6586μs | 63.8625 KOps/s | 68.7863 KOps/s | $\textbf{\color{#d91a1a}-7.16\\%}$ | | test_set_shared | 1.6817ms | 97.0789μs | 10.3009 KOps/s | 10.2898 KOps/s | $\color{#35bf28}+0.11\\%$ | | test_update | 91.9400μs | 17.5977μs | 56.8257 KOps/s | 58.9036 KOps/s | $\color{#d91a1a}-3.53\\%$ | | test_update_nested | 53.6600μs | 22.5605μs | 44.3252 KOps/s | 45.5989 KOps/s | $\color{#d91a1a}-2.79\\%$ | | test_update__nested | 62.3200μs | 21.7430μs | 45.9918 KOps/s | 47.8661 KOps/s | $\color{#d91a1a}-3.92\\%$ | | test_set_nested | 68.4510μs | 15.9701μs | 62.6170 KOps/s | 64.9158 KOps/s | $\color{#d91a1a}-3.54\\%$ | | test_set_nested_new | 66.3110μs | 18.6061μs | 53.7457 KOps/s | 55.2783 KOps/s | $\color{#d91a1a}-2.77\\%$ | | test_select | 71.5200μs | 30.9316μs | 32.3294 KOps/s | 32.0931 KOps/s | $\color{#35bf28}+0.74\\%$ | | test_select_nested | 0.8083ms | 52.4747μs | 19.0568 KOps/s | 19.1988 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_exclude_nested | 0.1435ms | 0.1075ms | 9.3048 KOps/s | 9.0452 KOps/s | $\color{#35bf28}+2.87\\%$ | | test_empty[True] | 0.3844ms | 0.3474ms | 2.8789 KOps/s | 2.8827 KOps/s | $\color{#d91a1a}-0.13\\%$ | | test_empty[False] | 2.4590μs | 0.8007μs | 1.2489 MOps/s | 1.2399 MOps/s | $\color{#35bf28}+0.73\\%$ | | test_to | 90.0710μs | 58.3173μs | 17.1476 KOps/s | 17.4816 KOps/s | $\color{#d91a1a}-1.91\\%$ | | test_to_nonblocking | 68.3210μs | 35.6244μs | 28.0707 KOps/s | 28.4772 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_unbind_speed | 0.9339ms | 0.2522ms | 3.9650 KOps/s | 3.9846 KOps/s | $\color{#d91a1a}-0.49\\%$ | | test_unbind_speed_stack0 | 0.3033ms | 0.2533ms | 3.9479 KOps/s | 3.9381 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_unbind_speed_stack1 | 75.5660ms | 0.7741ms | 1.2919 KOps/s | 1.2950 KOps/s | $\color{#d91a1a}-0.24\\%$ | | test_split | 76.0859ms | 1.6148ms | 619.2801 Ops/s | 616.7657 Ops/s | $\color{#35bf28}+0.41\\%$ | | test_chunk | 76.0512ms | 1.6091ms | 621.4525 Ops/s | 614.1972 Ops/s | $\color{#35bf28}+1.18\\%$ | | test_creation[device0] | 0.1364ms | 56.8046μs | 17.6042 KOps/s | 17.6634 KOps/s | $\color{#d91a1a}-0.33\\%$ | | test_creation_from_tensor | 0.1415ms | 53.4060μs | 18.7245 KOps/s | 19.0173 KOps/s | $\color{#d91a1a}-1.54\\%$ | | test_add_one[memmap_tensor0] | 76.5410μs | 6.6668μs | 149.9977 KOps/s | 150.1082 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_contiguous[memmap_tensor0] | 25.0210μs | 0.6669μs | 1.4994 MOps/s | 1.4998 MOps/s | $\color{#d91a1a}-0.03\\%$ | | test_stack[memmap_tensor0] | 29.6400μs | 4.7668μs | 209.7842 KOps/s | 213.3941 KOps/s | $\color{#d91a1a}-1.69\\%$ | | test_memmaptd_index | 1.2173ms | 0.2722ms | 3.6744 KOps/s | 3.7048 KOps/s | $\color{#d91a1a}-0.82\\%$ | | test_memmaptd_index_astensor | 0.6977ms | 0.3309ms | 3.0218 KOps/s | 3.0038 KOps/s | $\color{#35bf28}+0.60\\%$ | | test_memmaptd_index_op | 1.0412ms | 0.6144ms | 1.6277 KOps/s | 1.6649 KOps/s | $\color{#d91a1a}-2.23\\%$ | | test_serialize_model | 93.2035ms | 89.6252ms | 11.1576 Ops/s | 10.6355 Ops/s | $\color{#35bf28}+4.91\\%$ | | test_serialize_model_pickle | 1.3470s | 1.2350s | 0.8097 Ops/s | 0.8090 Ops/s | $\color{#35bf28}+0.09\\%$ | | test_serialize_weights | 0.1732s | 97.1816ms | 10.2900 Ops/s | 9.7002 Ops/s | $\textbf{\color{#35bf28}+6.08\\%}$ | | test_serialize_weights_returnearly | 0.2500s | 74.1243ms | 13.4909 Ops/s | 13.2709 Ops/s | $\color{#35bf28}+1.66\\%$ | | test_serialize_weights_pickle | 1.3518s | 1.2477s | 0.8015 Ops/s | 0.7966 Ops/s | $\color{#35bf28}+0.61\\%$ | | test_reshape_pytree | 56.3200μs | 25.9765μs | 38.4963 KOps/s | 38.8156 KOps/s | $\color{#d91a1a}-0.82\\%$ | | test_reshape_td | 57.7800μs | 32.2457μs | 31.0119 KOps/s | 31.9239 KOps/s | $\color{#d91a1a}-2.86\\%$ | | test_view_pytree | 55.9110μs | 26.1277μs | 38.2736 KOps/s | 39.2819 KOps/s | $\color{#d91a1a}-2.57\\%$ | | test_view_td | 0.1698ms | 36.3237μs | 27.5302 KOps/s | 27.5921 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_unbind_pytree | 0.1877ms | 32.7946μs | 30.4928 KOps/s | 31.5464 KOps/s | $\color{#d91a1a}-3.34\\%$ | | test_unbind_td | 0.4714ms | 39.3285μs | 25.4268 KOps/s | 24.8119 KOps/s | $\color{#35bf28}+2.48\\%$ | | test_split_pytree | 69.5600μs | 35.4049μs | 28.2447 KOps/s | 29.0787 KOps/s | $\color{#d91a1a}-2.87\\%$ | | test_split_td | 0.1007ms | 38.0000μs | 26.3158 KOps/s | 26.5001 KOps/s | $\color{#d91a1a}-0.70\\%$ | | test_add_pytree | 63.3120μs | 37.3766μs | 26.7547 KOps/s | 26.0014 KOps/s | $\color{#35bf28}+2.90\\%$ | | test_add_td | 93.5820μs | 48.7282μs | 20.5220 KOps/s | 20.0106 KOps/s | $\color{#35bf28}+2.56\\%$ | | test_distributed | 0.2123ms | 69.2265μs | 14.4453 KOps/s | 13.8834 KOps/s | $\color{#35bf28}+4.05\\%$ | | test_tdmodule | 89.8410μs | 14.7247μs | 67.9133 KOps/s | 67.3725 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_tdmodule_dispatch | 47.4320μs | 27.9854μs | 35.7330 KOps/s | 35.2737 KOps/s | $\color{#35bf28}+1.30\\%$ | | test_tdseq | 36.0200μs | 16.1132μs | 62.0610 KOps/s | 61.8742 KOps/s | $\color{#35bf28}+0.30\\%$ | | test_tdseq_dispatch | 54.0200μs | 31.2485μs | 32.0015 KOps/s | 32.8229 KOps/s | $\color{#d91a1a}-2.50\\%$ | | test_instantiation_functorch | 1.5193ms | 1.4076ms | 710.4447 Ops/s | 711.6433 Ops/s | $\color{#d91a1a}-0.17\\%$ | | test_instantiation_td | 79.1831ms | 1.0848ms | 921.8359 Ops/s | 1.0124 KOps/s | $\textbf{\color{#d91a1a}-8.94\\%}$ | | test_exec_functorch | 0.1865ms | 0.1432ms | 6.9819 KOps/s | 7.1078 KOps/s | $\color{#d91a1a}-1.77\\%$ | | test_exec_functional_call | 0.3550ms | 0.1333ms | 7.5005 KOps/s | 7.6852 KOps/s | $\color{#d91a1a}-2.40\\%$ | | test_exec_td | 0.1854ms | 0.1334ms | 7.4947 KOps/s | 7.7151 KOps/s | $\color{#d91a1a}-2.86\\%$ | | test_exec_td_decorator | 0.6529ms | 0.2042ms | 4.8960 KOps/s | 5.0259 KOps/s | $\color{#d91a1a}-2.58\\%$ | | test_vmap_mlp_speed[True-True] | 93.7163ms | 0.6287ms | 1.5906 KOps/s | 1.7731 KOps/s | $\textbf{\color{#d91a1a}-10.29\\%}$ | | test_vmap_mlp_speed[True-False] | 0.6719ms | 0.5567ms | 1.7963 KOps/s | 1.8211 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_vmap_mlp_speed[False-True] | 0.5755ms | 0.4989ms | 2.0045 KOps/s | 2.0604 KOps/s | $\color{#d91a1a}-2.71\\%$ | | test_vmap_mlp_speed[False-False] | 0.7766ms | 0.5191ms | 1.9263 KOps/s | 1.9757 KOps/s | $\color{#d91a1a}-2.50\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 0.9221ms | 0.6139ms | 1.6291 KOps/s | 1.5606 KOps/s | $\color{#35bf28}+4.39\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.7457ms | 0.6139ms | 1.6289 KOps/s | 1.6335 KOps/s | $\color{#d91a1a}-0.28\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.6909ms | 0.5455ms | 1.8332 KOps/s | 1.8417 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.6818ms | 0.5514ms | 1.8136 KOps/s | 1.8588 KOps/s | $\color{#d91a1a}-2.43\\%$ | | test_vmap_transformer_speed[True-True] | 8.3058ms | 7.3273ms | 136.4761 Ops/s | 138.9382 Ops/s | $\color{#d91a1a}-1.77\\%$ | | test_vmap_transformer_speed[True-False] | 8.3733ms | 7.6210ms | 131.2164 Ops/s | 139.0297 Ops/s | $\textbf{\color{#d91a1a}-5.62\\%}$ | | test_vmap_transformer_speed[False-True] | 8.0682ms | 7.3879ms | 135.3569 Ops/s | 138.9709 Ops/s | $\color{#d91a1a}-2.60\\%$ | | test_vmap_transformer_speed[False-False] | 8.0791ms | 7.3886ms | 135.3434 Ops/s | 137.9425 Ops/s | $\color{#d91a1a}-1.88\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 19.8625ms | 18.3035ms | 54.6345 Ops/s | 56.1728 Ops/s | $\color{#d91a1a}-2.74\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 18.9053ms | 18.0558ms | 55.3839 Ops/s | 56.5778 Ops/s | $\color{#d91a1a}-2.11\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 18.7306ms | 17.9556ms | 55.6928 Ops/s | 56.5038 Ops/s | $\color{#d91a1a}-1.44\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 18.6826ms | 17.8720ms | 55.9536 Ops/s | 56.0546 Ops/s | $\color{#d91a1a}-0.18\\%$ | | test_to_module_speed[True] | 2.1383ms | 1.5108ms | 661.8827 Ops/s | 673.4207 Ops/s | $\color{#d91a1a}-1.71\\%$ | | test_to_module_speed[False] | 1.6629ms | 1.4787ms | 676.2784 Ops/s | 683.8100 Ops/s | $\color{#d91a1a}-1.10\\%$ | | test_tc_init | 0.1879ms | 53.3310μs | 18.7508 KOps/s | 19.5814 KOps/s | $\color{#d91a1a}-4.24\\%$ | | test_tc_init_nested | 0.2477ms | 0.1011ms | 9.8952 KOps/s | 9.9033 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_tc_first_layer_tensor | 31.3500μs | 4.0324μs | 247.9925 KOps/s | 274.8859 KOps/s | $\textbf{\color{#d91a1a}-9.78\\%}$ | | test_tc_first_layer_nontensor | 0.7825ms | 3.7414μs | 267.2783 KOps/s | 270.9814 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_tc_second_layer_tensor | 14.0810μs | 1.2497μs | 800.1823 KOps/s | 855.3384 KOps/s | $\textbf{\color{#d91a1a}-6.45\\%}$ | | test_tc_second_layer_nontensor | 29.1310μs | 4.2830μs | 233.4800 KOps/s | 236.8599 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_unbind | 0.1106s | 14.5846ms | 68.5654 Ops/s | 74.8917 Ops/s | $\textbf{\color{#d91a1a}-8.45\\%}$ | | test_full_like | 10.7642ms | 9.3307ms | 107.1736 Ops/s | 107.7979 Ops/s | $\color{#d91a1a}-0.58\\%$ | | test_zeros_like | 8.3859ms | 7.9574ms | 125.6689 Ops/s | 125.4858 Ops/s | $\color{#35bf28}+0.15\\%$ | | test_ones_like | 8.5581ms | 8.0357ms | 124.4451 Ops/s | 125.5660 Ops/s | $\color{#d91a1a}-0.89\\%$ | | test_clone | 9.9612ms | 9.4823ms | 105.4600 Ops/s | 106.9270 Ops/s | $\color{#d91a1a}-1.37\\%$ | | test_squeeze | 0.1273ms | 10.7495μs | 93.0272 KOps/s | 95.1898 KOps/s | $\color{#d91a1a}-2.27\\%$ | | test_unsqueeze | 0.2392ms | 89.1504μs | 11.2170 KOps/s | 11.5742 KOps/s | $\color{#d91a1a}-3.09\\%$ | | test_split | 3.6391ms | 3.1597ms | 316.4902 Ops/s | 318.6430 Ops/s | $\color{#d91a1a}-0.68\\%$ | | test_permute | 0.3397ms | 0.2048ms | 4.8831 KOps/s | 4.9466 KOps/s | $\color{#d91a1a}-1.28\\%$ | | test_stack | 27.6750ms | 27.2727ms | 36.6667 Ops/s | 37.2083 Ops/s | $\color{#d91a1a}-1.46\\%$ | | test_cat | 27.3951ms | 27.0503ms | 36.9682 Ops/s | 37.6282 Ops/s | $\color{#d91a1a}-1.75\\%$ |