pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
832 stars 74 forks source link

[Refactor] Refactor is_dynamo_compile imports #916

Closed vmoens closed 3 months ago

github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 213. Improved: $\large\color{#35bf28}35$. Worsened: $\large\color{#d91a1a}10$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 0.1055ms | 21.8848μs | 45.6938 KOps/s | 46.1489 KOps/s | $\color{#d91a1a}-0.99\\%$ | | test_plain_set_stack_nested | 49.0810μs | 22.2977μs | 44.8477 KOps/s | 46.3028 KOps/s | $\color{#d91a1a}-3.14\\%$ | | test_plain_set_nested_inplace | 71.0730μs | 24.0784μs | 41.5309 KOps/s | 42.3798 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_plain_set_stack_nested_inplace | 68.6780μs | 23.7222μs | 42.1546 KOps/s | 42.2366 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_items | 55.5940μs | 2.6762μs | 373.6573 KOps/s | 362.3975 KOps/s | $\color{#35bf28}+3.11\\%$ | | test_items_nested | 0.4322ms | 0.3406ms | 2.9364 KOps/s | 2.6535 KOps/s | $\textbf{\color{#35bf28}+10.66\\%}$ | | test_items_nested_locked | 1.3684ms | 0.3409ms | 2.9334 KOps/s | 2.7477 KOps/s | $\textbf{\color{#35bf28}+6.76\\%}$ | | test_items_nested_leaf | 0.1452ms | 86.7940μs | 11.5215 KOps/s | 11.4763 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_items_stack_nested | 0.4185ms | 0.3392ms | 2.9481 KOps/s | 2.7264 KOps/s | $\textbf{\color{#35bf28}+8.13\\%}$ | | test_items_stack_nested_leaf | 0.1587ms | 86.9443μs | 11.5016 KOps/s | 11.1776 KOps/s | $\color{#35bf28}+2.90\\%$ | | test_items_stack_nested_locked | 0.4149ms | 0.3420ms | 2.9240 KOps/s | 2.7131 KOps/s | $\textbf{\color{#35bf28}+7.77\\%}$ | | test_keys | 45.6350μs | 3.9195μs | 255.1321 KOps/s | 259.8191 KOps/s | $\color{#d91a1a}-1.80\\%$ | | test_keys_nested | 0.2054ms | 0.1457ms | 6.8655 KOps/s | 6.8887 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_keys_nested_locked | 0.7002ms | 0.1506ms | 6.6397 KOps/s | 6.5903 KOps/s | $\color{#35bf28}+0.75\\%$ | | test_keys_nested_leaf | 0.1783ms | 0.1285ms | 7.7802 KOps/s | 7.9699 KOps/s | $\color{#d91a1a}-2.38\\%$ | | test_keys_stack_nested | 0.3017ms | 0.1472ms | 6.7939 KOps/s | 6.8073 KOps/s | $\color{#d91a1a}-0.20\\%$ | | test_keys_stack_nested_leaf | 0.2497ms | 0.1259ms | 7.9413 KOps/s | 7.9122 KOps/s | $\color{#35bf28}+0.37\\%$ | | test_keys_stack_nested_locked | 0.2144ms | 0.1521ms | 6.5732 KOps/s | 6.5992 KOps/s | $\color{#d91a1a}-0.39\\%$ | | test_values | 15.0438μs | 1.1401μs | 877.1227 KOps/s | 846.8326 KOps/s | $\color{#35bf28}+3.58\\%$ | | test_values_nested | 97.3920μs | 51.5116μs | 19.4131 KOps/s | 19.8213 KOps/s | $\color{#d91a1a}-2.06\\%$ | | test_values_nested_locked | 0.1105ms | 51.3368μs | 19.4792 KOps/s | 19.7375 KOps/s | $\color{#d91a1a}-1.31\\%$ | | test_values_nested_leaf | 0.1260ms | 46.2712μs | 21.6117 KOps/s | 21.8839 KOps/s | $\color{#d91a1a}-1.24\\%$ | | test_values_stack_nested | 0.1270ms | 51.7680μs | 19.3170 KOps/s | 19.0206 KOps/s | $\color{#35bf28}+1.56\\%$ | | test_values_stack_nested_leaf | 95.1870μs | 46.5927μs | 21.4626 KOps/s | 22.1899 KOps/s | $\color{#d91a1a}-3.28\\%$ | | test_values_stack_nested_locked | 1.3801ms | 51.4775μs | 19.4260 KOps/s | 19.5600 KOps/s | $\color{#d91a1a}-0.69\\%$ | | test_membership | 2.3013μs | 0.7508μs | 1.3320 MOps/s | 1.0804 MOps/s | $\textbf{\color{#35bf28}+23.28\\%}$ | | test_membership_nested | 43.7920μs | 2.6168μs | 382.1402 KOps/s | 369.1745 KOps/s | $\color{#35bf28}+3.51\\%$ | | test_membership_nested_leaf | 44.7640μs | 2.6341μs | 379.6317 KOps/s | 367.0717 KOps/s | $\color{#35bf28}+3.42\\%$ | | test_membership_stacked_nested | 19.1450μs | 2.5930μs | 385.6511 KOps/s | 368.5043 KOps/s | $\color{#35bf28}+4.65\\%$ | | test_membership_stacked_nested_leaf | 34.4340μs | 2.6589μs | 376.0945 KOps/s | 367.5792 KOps/s | $\color{#35bf28}+2.32\\%$ | | test_membership_nested_last | 37.3600μs | 3.9880μs | 250.7543 KOps/s | 245.9287 KOps/s | $\color{#35bf28}+1.96\\%$ | | test_membership_nested_leaf_last | 40.1840μs | 4.0230μs | 248.5726 KOps/s | 244.7149 KOps/s | $\color{#35bf28}+1.58\\%$ | | test_membership_stacked_nested_last | 23.3630μs | 4.0410μs | 247.4623 KOps/s | 216.3138 KOps/s | $\textbf{\color{#35bf28}+14.40\\%}$ | | test_membership_stacked_nested_leaf_last | 32.1500μs | 4.0193μs | 248.7980 KOps/s | 216.5794 KOps/s | $\textbf{\color{#35bf28}+14.88\\%}$ | | test_nested_getleaf | 38.1310μs | 10.6721μs | 93.7020 KOps/s | 93.5342 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_nested_get | 47.3490μs | 10.1323μs | 98.6938 KOps/s | 98.7561 KOps/s | $\color{#d91a1a}-0.06\\%$ | | test_stacked_getleaf | 39.7440μs | 10.6267μs | 94.1026 KOps/s | 95.4005 KOps/s | $\color{#d91a1a}-1.36\\%$ | | test_stacked_get | 36.5280μs | 10.1108μs | 98.9043 KOps/s | 101.1318 KOps/s | $\color{#d91a1a}-2.20\\%$ | | test_nested_getitemleaf | 52.8390μs | 11.2456μs | 88.9236 KOps/s | 89.1149 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_nested_getitem | 93.2340μs | 10.3041μs | 97.0489 KOps/s | 96.7961 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_stacked_getitemleaf | 42.2590μs | 11.0611μs | 90.4071 KOps/s | 89.0505 KOps/s | $\color{#35bf28}+1.52\\%$ | | test_stacked_getitem | 38.3820μs | 10.1821μs | 98.2115 KOps/s | 97.2206 KOps/s | $\color{#35bf28}+1.02\\%$ | | test_lock_nested | 91.4561ms | 0.6015ms | 1.6625 KOps/s | 1.9117 KOps/s | $\textbf{\color{#d91a1a}-13.04\\%}$ | | test_lock_stack_nested | 0.7230ms | 0.4759ms | 2.1014 KOps/s | 2.0583 KOps/s | $\color{#35bf28}+2.09\\%$ | | test_unlock_nested | 91.6270ms | 0.5138ms | 1.9463 KOps/s | 2.2543 KOps/s | $\textbf{\color{#d91a1a}-13.66\\%}$ | | test_unlock_stack_nested | 0.5789ms | 0.3831ms | 2.6100 KOps/s | 2.5090 KOps/s | $\color{#35bf28}+4.03\\%$ | | test_flatten_speed | 0.6708ms | 0.1044ms | 9.5775 KOps/s | 9.5687 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_unflatten_speed | 0.7755ms | 0.4329ms | 2.3099 KOps/s | 2.2697 KOps/s | $\color{#35bf28}+1.77\\%$ | | test_common_ops | 7.5651ms | 1.0668ms | 937.4193 Ops/s | 885.7756 Ops/s | $\textbf{\color{#35bf28}+5.83\\%}$ | | test_creation | 0.1188ms | 2.0505μs | 487.6792 KOps/s | 395.6058 KOps/s | $\textbf{\color{#35bf28}+23.27\\%}$ | | test_creation_empty | 45.6560μs | 17.3303μs | 57.7022 KOps/s | 54.7342 KOps/s | $\textbf{\color{#35bf28}+5.42\\%}$ | | test_creation_nested_1 | 71.0020μs | 20.5589μs | 48.6408 KOps/s | 45.1000 KOps/s | $\textbf{\color{#35bf28}+7.85\\%}$ | | test_creation_nested_2 | 74.3890μs | 24.1537μs | 41.4015 KOps/s | 38.4099 KOps/s | $\textbf{\color{#35bf28}+7.79\\%}$ | | test_clone | 0.1162ms | 16.4163μs | 60.9152 KOps/s | 59.9310 KOps/s | $\color{#35bf28}+1.64\\%$ | | test_getitem[int] | 0.9746ms | 16.5297μs | 60.4970 KOps/s | 61.0554 KOps/s | $\color{#d91a1a}-0.91\\%$ | | test_getitem[slice_int] | 0.1389ms | 30.7645μs | 32.5050 KOps/s | 30.2837 KOps/s | $\textbf{\color{#35bf28}+7.34\\%}$ | | test_getitem[range] | 0.3479ms | 55.2577μs | 18.0970 KOps/s | 17.6901 KOps/s | $\color{#35bf28}+2.30\\%$ | | test_getitem[tuple] | 0.1332ms | 25.0645μs | 39.8971 KOps/s | 40.4350 KOps/s | $\color{#d91a1a}-1.33\\%$ | | test_getitem[list] | 0.3168ms | 50.4212μs | 19.8329 KOps/s | 19.0163 KOps/s | $\color{#35bf28}+4.29\\%$ | | test_setitem_dim[int] | 87.6430μs | 39.5155μs | 25.3065 KOps/s | 25.0165 KOps/s | $\color{#35bf28}+1.16\\%$ | | test_setitem_dim[slice_int] | 0.1275ms | 69.9154μs | 14.3030 KOps/s | 13.9561 KOps/s | $\color{#35bf28}+2.49\\%$ | | test_setitem_dim[range] | 0.1602ms | 90.7030μs | 11.0250 KOps/s | 10.7265 KOps/s | $\color{#35bf28}+2.78\\%$ | | test_setitem_dim[tuple] | 0.1502ms | 55.8764μs | 17.8966 KOps/s | 17.3767 KOps/s | $\color{#35bf28}+2.99\\%$ | | test_setitem | 0.1831ms | 28.2142μs | 35.4431 KOps/s | 33.5397 KOps/s | $\textbf{\color{#35bf28}+5.68\\%}$ | | test_set | 0.2014ms | 28.2512μs | 35.3967 KOps/s | 34.7622 KOps/s | $\color{#35bf28}+1.83\\%$ | | test_set_shared | 3.8998ms | 0.2135ms | 4.6828 KOps/s | 4.4863 KOps/s | $\color{#35bf28}+4.38\\%$ | | test_update | 0.1803ms | 34.6817μs | 28.8337 KOps/s | 27.8871 KOps/s | $\color{#35bf28}+3.39\\%$ | | test_update_nested | 0.1838ms | 44.5049μs | 22.4694 KOps/s | 21.4961 KOps/s | $\color{#35bf28}+4.53\\%$ | | test_update__nested | 0.1619ms | 34.4100μs | 29.0613 KOps/s | 29.1178 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_set_nested | 0.1659ms | 30.6340μs | 32.6435 KOps/s | 31.7234 KOps/s | $\color{#35bf28}+2.90\\%$ | | test_set_nested_new | 1.0828ms | 35.3840μs | 28.2614 KOps/s | 27.0415 KOps/s | $\color{#35bf28}+4.51\\%$ | | test_select | 0.1850ms | 50.9502μs | 19.6270 KOps/s | 17.9864 KOps/s | $\textbf{\color{#35bf28}+9.12\\%}$ | | test_select_nested | 0.1094ms | 60.0709μs | 16.6470 KOps/s | 16.0132 KOps/s | $\color{#35bf28}+3.96\\%$ | | test_exclude_nested | 0.1327ms | 78.2233μs | 12.7839 KOps/s | 12.0257 KOps/s | $\textbf{\color{#35bf28}+6.31\\%}$ | | test_empty[True] | 0.7219ms | 0.3236ms | 3.0898 KOps/s | 2.8667 KOps/s | $\textbf{\color{#35bf28}+7.78\\%}$ | | test_empty[False] | 12.8540μs | 1.2161μs | 822.2682 KOps/s | 777.8026 KOps/s | $\textbf{\color{#35bf28}+5.72\\%}$ | | test_unbind_speed | 0.6619ms | 0.3317ms | 3.0150 KOps/s | 3.0811 KOps/s | $\color{#d91a1a}-2.15\\%$ | | test_unbind_speed_stack0 | 0.5572ms | 0.3027ms | 3.3041 KOps/s | 3.1745 KOps/s | $\color{#35bf28}+4.08\\%$ | | test_unbind_speed_stack1 | 88.4201ms | 0.8015ms | 1.2477 KOps/s | 1.3162 KOps/s | $\textbf{\color{#d91a1a}-5.20\\%}$ | | test_split | 94.6326ms | 2.1359ms | 468.1879 Ops/s | 446.2687 Ops/s | $\color{#35bf28}+4.91\\%$ | | test_chunk | 95.1243ms | 2.2313ms | 448.1749 Ops/s | 442.9122 Ops/s | $\color{#35bf28}+1.19\\%$ | | test_creation[device0] | 0.2802ms | 0.1157ms | 8.6465 KOps/s | 7.6328 KOps/s | $\textbf{\color{#35bf28}+13.28\\%}$ | | test_creation_from_tensor | 3.7475ms | 0.1183ms | 8.4532 KOps/s | 8.0571 KOps/s | $\color{#35bf28}+4.92\\%$ | | test_add_one[memmap_tensor0] | 0.4213ms | 7.4726μs | 133.8214 KOps/s | 132.5449 KOps/s | $\color{#35bf28}+0.96\\%$ | | test_contiguous[memmap_tensor0] | 19.6660μs | 1.9859μs | 503.5598 KOps/s | 487.5065 KOps/s | $\color{#35bf28}+3.29\\%$ | | test_stack[memmap_tensor0] | 84.3980μs | 5.8931μs | 169.6913 KOps/s | 176.2603 KOps/s | $\color{#d91a1a}-3.73\\%$ | | test_memmaptd_index | 1.2375ms | 0.3967ms | 2.5210 KOps/s | 2.4682 KOps/s | $\color{#35bf28}+2.14\\%$ | | test_memmaptd_index_astensor | 1.1045ms | 0.4829ms | 2.0709 KOps/s | 2.0267 KOps/s | $\color{#35bf28}+2.18\\%$ | | test_memmaptd_index_op | 2.3629ms | 1.0069ms | 993.1828 Ops/s | 951.0915 Ops/s | $\color{#35bf28}+4.43\\%$ | | test_serialize_model | 0.1422s | 0.1282s | 7.8012 Ops/s | 6.4162 Ops/s | $\textbf{\color{#35bf28}+21.59\\%}$ | | test_serialize_model_pickle | 0.4662s | 0.4048s | 2.4703 Ops/s | 2.4836 Ops/s | $\color{#d91a1a}-0.54\\%$ | | test_serialize_weights | 0.2292s | 0.1416s | 7.0625 Ops/s | 7.6663 Ops/s | $\textbf{\color{#d91a1a}-7.88\\%}$ | | test_serialize_weights_returnearly | 0.1926s | 0.1734s | 5.7679 Ops/s | 5.9196 Ops/s | $\color{#d91a1a}-2.56\\%$ | | test_serialize_weights_pickle | 0.4471s | 0.4048s | 2.4702 Ops/s | 2.5015 Ops/s | $\color{#d91a1a}-1.25\\%$ | | test_serialize_weights_filesystem | 0.1526s | 0.1453s | 6.8824 Ops/s | 6.2007 Ops/s | $\textbf{\color{#35bf28}+10.99\\%}$ | | test_serialize_model_filesystem | 0.2607s | 0.1692s | 5.9099 Ops/s | 6.4754 Ops/s | $\textbf{\color{#d91a1a}-8.73\\%}$ | | test_reshape_pytree | 0.1079ms | 41.3693μs | 24.1725 KOps/s | 25.5296 KOps/s | $\textbf{\color{#d91a1a}-5.32\\%}$ | | test_reshape_td | 0.1124ms | 46.7530μs | 21.3890 KOps/s | 21.0621 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_view_pytree | 0.1212ms | 40.1319μs | 24.9178 KOps/s | 25.3124 KOps/s | $\color{#d91a1a}-1.56\\%$ | | test_view_td | 0.1088ms | 53.1006μs | 18.8322 KOps/s | 18.7124 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_unbind_pytree | 93.4640μs | 37.0765μs | 26.9713 KOps/s | 26.4922 KOps/s | $\color{#35bf28}+1.81\\%$ | | test_unbind_td | 0.3953ms | 44.8118μs | 22.3156 KOps/s | 20.6987 KOps/s | $\textbf{\color{#35bf28}+7.81\\%}$ | | test_split_pytree | 86.8220μs | 40.0283μs | 24.9823 KOps/s | 25.3264 KOps/s | $\color{#d91a1a}-1.36\\%$ | | test_split_td | 0.5510ms | 57.6043μs | 17.3598 KOps/s | 16.9086 KOps/s | $\color{#35bf28}+2.67\\%$ | | test_add_pytree | 0.1294ms | 47.0561μs | 21.2512 KOps/s | 22.2342 KOps/s | $\color{#d91a1a}-4.42\\%$ | | test_add_td | 0.1698ms | 80.9575μs | 12.3522 KOps/s | 12.2388 KOps/s | $\color{#35bf28}+0.93\\%$ | | test_compile_add_one_nested[tensordict-compile] | 0.1208ms | 52.8359μs | 18.9265 KOps/s | 18.5433 KOps/s | $\color{#35bf28}+2.07\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.3330ms | 0.1900ms | 5.2630 KOps/s | 5.1930 KOps/s | $\color{#35bf28}+1.35\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.1996ms | 54.9557μs | 18.1965 KOps/s | 18.3271 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.3521ms | 0.1463ms | 6.8351 KOps/s | 7.0039 KOps/s | $\color{#d91a1a}-2.41\\%$ | | test_compile_copy_nested[tensordict-compile] | 57.5770μs | 20.4975μs | 48.7864 KOps/s | 49.0006 KOps/s | $\color{#d91a1a}-0.44\\%$ | | test_compile_copy_nested[tensordict-eager] | 0.1432ms | 65.0606μs | 15.3703 KOps/s | 15.3232 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_compile_copy_nested[pytree-compile] | 0.1787ms | 80.9465μs | 12.3538 KOps/s | 12.5744 KOps/s | $\color{#d91a1a}-1.75\\%$ | | test_compile_copy_nested[pytree-eager] | 0.1499ms | 73.1273μs | 13.6748 KOps/s | 13.8718 KOps/s | $\color{#d91a1a}-1.42\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.3891ms | 0.1768ms | 5.6550 KOps/s | 5.6107 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.3363ms | 0.1937ms | 5.1631 KOps/s | 5.1602 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.1177ms | 38.7381μs | 25.8144 KOps/s | 24.2654 KOps/s | $\textbf{\color{#35bf28}+6.38\\%}$ | | test_compile_add_one_flat[tensorclass-eager] | 1.3060ms | 68.9515μs | 14.5029 KOps/s | 14.2256 KOps/s | $\color{#35bf28}+1.95\\%$ | | test_compile_add_one_flat[pytree-compile] | 0.3690ms | 0.1763ms | 5.6734 KOps/s | 5.5937 KOps/s | $\color{#35bf28}+1.42\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.5373ms | 0.2908ms | 3.4388 KOps/s | 3.4741 KOps/s | $\color{#d91a1a}-1.02\\%$ | | test_compile_add_self_flat[tensordict-eager] | 0.3927ms | 0.2094ms | 4.7758 KOps/s | 4.7640 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.4662ms | 0.1836ms | 5.4461 KOps/s | 5.5517 KOps/s | $\color{#d91a1a}-1.90\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.8599ms | 65.6965μs | 15.2215 KOps/s | 15.3884 KOps/s | $\color{#d91a1a}-1.08\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.1134ms | 40.0958μs | 24.9403 KOps/s | 24.3724 KOps/s | $\color{#35bf28}+2.33\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.3294ms | 0.2375ms | 4.2113 KOps/s | 4.2186 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.3963ms | 0.1745ms | 5.7300 KOps/s | 5.6546 KOps/s | $\color{#35bf28}+1.33\\%$ | | test_compile_copy_flat[tensordict-compile] | 0.2007ms | 0.1072ms | 9.3289 KOps/s | 9.2274 KOps/s | $\color{#35bf28}+1.10\\%$ | | test_compile_copy_flat[tensordict-eager] | 0.1106ms | 57.9707μs | 17.2501 KOps/s | 17.6968 KOps/s | $\color{#d91a1a}-2.52\\%$ | | test_compile_copy_flat[pytree-compile] | 0.1543ms | 79.6987μs | 12.5473 KOps/s | 12.7064 KOps/s | $\color{#d91a1a}-1.25\\%$ | | test_compile_copy_flat[pytree-eager] | 0.1421ms | 72.2270μs | 13.8452 KOps/s | 13.9896 KOps/s | $\color{#d91a1a}-1.03\\%$ | | test_compile_assign_and_add[tensordict-compile] | 0.4118ms | 0.1966ms | 5.0861 KOps/s | 5.1495 KOps/s | $\color{#d91a1a}-1.23\\%$ | | test_compile_assign_and_add[tensordict-eager] | 1.8342ms | 1.6274ms | 614.4882 Ops/s | 613.4010 Ops/s | $\color{#35bf28}+0.18\\%$ | | test_compile_assign_and_add[pytree-compile] | 0.3482ms | 0.1908ms | 5.2415 KOps/s | 5.1642 KOps/s | $\color{#35bf28}+1.50\\%$ | | test_compile_assign_and_add[pytree-eager] | 1.2571ms | 1.0812ms | 924.8814 Ops/s | 934.8607 Ops/s | $\color{#d91a1a}-1.07\\%$ | | test_compile_assign_and_add_stack[compile] | 0.5857ms | 0.4317ms | 2.3163 KOps/s | 2.2918 KOps/s | $\color{#35bf28}+1.07\\%$ | | test_compile_assign_and_add_stack[eager] | 4.1121ms | 3.7680ms | 265.3924 Ops/s | 243.6414 Ops/s | $\textbf{\color{#35bf28}+8.93\\%}$ | | test_compile_indexing[tensor-tensordict-compile] | 0.3722ms | 31.7703μs | 31.4759 KOps/s | 29.9633 KOps/s | $\textbf{\color{#35bf28}+5.05\\%}$ | | test_compile_indexing[tensor-tensordict-eager] | 1.2034ms | 47.8236μs | 20.9102 KOps/s | 20.0525 KOps/s | $\color{#35bf28}+4.28\\%$ | | test_compile_indexing[tensor-tensorclass-compile] | 0.2608ms | 28.4556μs | 35.1424 KOps/s | 33.9164 KOps/s | $\color{#35bf28}+3.61\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 0.3394ms | 29.7118μs | 33.6566 KOps/s | 33.2111 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_compile_indexing[tensor-pytree-compile] | 0.2986ms | 28.2066μs | 35.4527 KOps/s | 34.6954 KOps/s | $\color{#35bf28}+2.18\\%$ | | test_compile_indexing[tensor-pytree-eager] | 0.3316ms | 30.0963μs | 33.2266 KOps/s | 32.7429 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_compile_indexing[slice-tensordict-compile] | 3.3569ms | 71.0336μs | 14.0778 KOps/s | 13.6552 KOps/s | $\color{#35bf28}+3.09\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.4384ms | 27.9403μs | 35.7906 KOps/s | 35.5155 KOps/s | $\color{#35bf28}+0.77\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 0.1510ms | 65.9228μs | 15.1693 KOps/s | 14.4440 KOps/s | $\textbf{\color{#35bf28}+5.02\\%}$ | | test_compile_indexing[slice-tensorclass-eager] | 79.1780μs | 24.3743μs | 41.0268 KOps/s | 41.6740 KOps/s | $\color{#d91a1a}-1.55\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.1474ms | 66.1758μs | 15.1113 KOps/s | 14.4440 KOps/s | $\color{#35bf28}+4.62\\%$ | | test_compile_indexing[slice-pytree-eager] | 63.2080μs | 24.0692μs | 41.5469 KOps/s | 42.3882 KOps/s | $\color{#d91a1a}-1.98\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.1478ms | 71.8897μs | 13.9102 KOps/s | 13.6557 KOps/s | $\color{#35bf28}+1.86\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.8751ms | 28.0164μs | 35.6934 KOps/s | 35.5694 KOps/s | $\color{#35bf28}+0.35\\%$ | | test_compile_indexing[int-tensorclass-compile] | 0.1411ms | 66.1629μs | 15.1142 KOps/s | 14.4837 KOps/s | $\color{#35bf28}+4.35\\%$ | | test_compile_indexing[int-tensorclass-eager] | 69.7300μs | 24.6060μs | 40.6405 KOps/s | 41.9548 KOps/s | $\color{#d91a1a}-3.13\\%$ | | test_compile_indexing[int-pytree-compile] | 0.1344ms | 66.6052μs | 15.0138 KOps/s | 14.4261 KOps/s | $\color{#35bf28}+4.07\\%$ | | test_compile_indexing[int-pytree-eager] | 4.6760ms | 23.7181μs | 42.1620 KOps/s | 42.0636 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_mod_add[eager] | 71.3430μs | 24.9565μs | 40.0698 KOps/s | 40.3870 KOps/s | $\color{#d91a1a}-0.79\\%$ | | test_mod_add[compile] | 0.1107ms | 36.9866μs | 27.0368 KOps/s | 25.6256 KOps/s | $\textbf{\color{#35bf28}+5.51\\%}$ | | test_mod_add[compile-overhead] | 0.1146ms | 36.9598μs | 27.0564 KOps/s | 25.4170 KOps/s | $\textbf{\color{#35bf28}+6.45\\%}$ | | test_mod_wrap[eager] | 0.4646ms | 0.2066ms | 4.8402 KOps/s | 4.7302 KOps/s | $\color{#35bf28}+2.33\\%$ | | test_mod_wrap[compile] | 2.1733ms | 0.2251ms | 4.4431 KOps/s | 4.2851 KOps/s | $\color{#35bf28}+3.69\\%$ | | test_mod_wrap[compile-overhead] | 0.4197ms | 0.2224ms | 4.4966 KOps/s | 4.3095 KOps/s | $\color{#35bf28}+4.34\\%$ | | test_mod_wrap_and_backward[eager] | 14.5788ms | 12.2966ms | 81.3232 Ops/s | 82.2468 Ops/s | $\color{#d91a1a}-1.12\\%$ | | test_mod_wrap_and_backward[compile] | 16.5175ms | 12.6069ms | 79.3219 Ops/s | 73.9517 Ops/s | $\textbf{\color{#35bf28}+7.26\\%}$ | | test_mod_wrap_and_backward[compile-overhead] | 15.3944ms | 12.6511ms | 79.0446 Ops/s | 77.3222 Ops/s | $\color{#35bf28}+2.23\\%$ | | test_seq_add[eager] | 0.1691ms | 88.2448μs | 11.3321 KOps/s | 11.2231 KOps/s | $\color{#35bf28}+0.97\\%$ | | test_seq_add[compile] | 0.1628ms | 60.0498μs | 16.6528 KOps/s | 16.0912 KOps/s | $\color{#35bf28}+3.49\\%$ | | test_seq_add[compile-overhead] | 0.1585ms | 58.8802μs | 16.9836 KOps/s | 16.2101 KOps/s | $\color{#35bf28}+4.77\\%$ | | test_seq_wrap[eager] | 0.5698ms | 0.3713ms | 2.6929 KOps/s | 2.6401 KOps/s | $\color{#35bf28}+2.00\\%$ | | test_seq_wrap[compile] | 0.4687ms | 0.2592ms | 3.8586 KOps/s | 3.6897 KOps/s | $\color{#35bf28}+4.58\\%$ | | test_seq_wrap[compile-overhead] | 0.4309ms | 0.2597ms | 3.8499 KOps/s | 3.7048 KOps/s | $\color{#35bf28}+3.92\\%$ | | test_func_call_runtime[False-eager] | 0.8271ms | 0.5246ms | 1.9063 KOps/s | 1.8844 KOps/s | $\color{#35bf28}+1.16\\%$ | | test_func_call_runtime[False-compile] | 0.7103ms | 0.4884ms | 2.0475 KOps/s | 1.9787 KOps/s | $\color{#35bf28}+3.47\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.6180ms | 0.4868ms | 2.0542 KOps/s | 1.9799 KOps/s | $\color{#35bf28}+3.75\\%$ | | test_func_call_runtime[True-eager] | 1.5161ms | 0.8288ms | 1.2066 KOps/s | 1.1813 KOps/s | $\color{#35bf28}+2.14\\%$ | | test_func_call_runtime[True-compile] | 0.7656ms | 0.5100ms | 1.9608 KOps/s | 1.9065 KOps/s | $\color{#35bf28}+2.85\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.8040ms | 0.5088ms | 1.9654 KOps/s | 1.8992 KOps/s | $\color{#35bf28}+3.49\\%$ | | test_distributed | 0.3134ms | 0.1332ms | 7.5082 KOps/s | 7.4491 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_tdmodule | 0.1447ms | 17.4838μs | 57.1957 KOps/s | 56.6180 KOps/s | $\color{#35bf28}+1.02\\%$ | | test_tdmodule_dispatch | 55.9750μs | 35.1763μs | 28.4283 KOps/s | 27.0164 KOps/s | $\textbf{\color{#35bf28}+5.23\\%}$ | | test_tdseq | 49.5220μs | 19.2880μs | 51.8457 KOps/s | 51.1200 KOps/s | $\color{#35bf28}+1.42\\%$ | | test_tdseq_dispatch | 78.2060μs | 39.5555μs | 25.2810 KOps/s | 24.3520 KOps/s | $\color{#35bf28}+3.81\\%$ | | test_instantiation_functorch | 2.6316ms | 1.6828ms | 594.2387 Ops/s | 621.1267 Ops/s | $\color{#d91a1a}-4.33\\%$ | | test_instantiation_td | 2.1776ms | 1.2154ms | 822.7700 Ops/s | 836.8985 Ops/s | $\color{#d91a1a}-1.69\\%$ | | test_exec_functorch | 0.4029ms | 0.1795ms | 5.5703 KOps/s | 5.4263 KOps/s | $\color{#35bf28}+2.66\\%$ | | test_exec_functional_call | 0.4634ms | 0.1681ms | 5.9486 KOps/s | 5.7767 KOps/s | $\color{#35bf28}+2.98\\%$ | | test_exec_td | 0.3040ms | 0.1720ms | 5.8123 KOps/s | 5.6439 KOps/s | $\color{#35bf28}+2.98\\%$ | | test_exec_td_decorator | 0.9857ms | 0.2535ms | 3.9452 KOps/s | 3.8558 KOps/s | $\color{#35bf28}+2.32\\%$ | | test_vmap_mlp_speed[True-True] | 0.8269ms | 0.5852ms | 1.7089 KOps/s | 1.6752 KOps/s | $\color{#35bf28}+2.01\\%$ | | test_vmap_mlp_speed[True-False] | 0.8891ms | 0.5830ms | 1.7154 KOps/s | 1.6850 KOps/s | $\color{#35bf28}+1.80\\%$ | | test_vmap_mlp_speed[False-True] | 0.7614ms | 0.4856ms | 2.0594 KOps/s | 2.0221 KOps/s | $\color{#35bf28}+1.85\\%$ | | test_vmap_mlp_speed[False-False] | 0.8708ms | 0.4856ms | 2.0595 KOps/s | 2.0215 KOps/s | $\color{#35bf28}+1.88\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.8591ms | 0.6822ms | 1.4659 KOps/s | 1.4488 KOps/s | $\color{#35bf28}+1.18\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.9924ms | 0.6758ms | 1.4797 KOps/s | 1.4506 KOps/s | $\color{#35bf28}+2.01\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8289ms | 0.5616ms | 1.7805 KOps/s | 1.7736 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7272ms | 0.5617ms | 1.7804 KOps/s | 1.7581 KOps/s | $\color{#35bf28}+1.27\\%$ | | test_to_module_speed[True] | 2.6052ms | 1.8345ms | 545.1201 Ops/s | 540.6731 Ops/s | $\color{#35bf28}+0.82\\%$ | | test_to_module_speed[False] | 2.4996ms | 1.7936ms | 557.5511 Ops/s | 543.8679 Ops/s | $\color{#35bf28}+2.52\\%$ | | test_tc_init | 82.1430μs | 44.8472μs | 22.2980 KOps/s | 22.9846 KOps/s | $\color{#d91a1a}-2.99\\%$ | | test_tc_init_nested | 0.1646ms | 90.7456μs | 11.0198 KOps/s | 11.2109 KOps/s | $\color{#d91a1a}-1.70\\%$ | | test_tc_first_layer_tensor | 30.3570μs | 1.4815μs | 674.9800 KOps/s | 662.9918 KOps/s | $\color{#35bf28}+1.81\\%$ | | test_tc_first_layer_nontensor | 21.1700μs | 4.2409μs | 235.7997 KOps/s | 229.9814 KOps/s | $\color{#35bf28}+2.53\\%$ | | test_tc_second_layer_tensor | 48.7720μs | 2.7033μs | 369.9159 KOps/s | 360.5925 KOps/s | $\color{#35bf28}+2.59\\%$ | | test_tc_second_layer_nontensor | 67.1250μs | 5.4143μs | 184.6974 KOps/s | 180.2452 KOps/s | $\color{#35bf28}+2.47\\%$ | | test_unbind | 0.4792s | 15.1063ms | 66.1976 Ops/s | 54.1270 Ops/s | $\textbf{\color{#35bf28}+22.30\\%}$ | | test_full_like | 22.0846ms | 13.5797ms | 73.6393 Ops/s | 104.0355 Ops/s | $\textbf{\color{#d91a1a}-29.22\\%}$ | | test_zeros_like | 13.1630ms | 7.7064ms | 129.7630 Ops/s | 269.5568 Ops/s | $\textbf{\color{#d91a1a}-51.86\\%}$ | | test_ones_like | 12.6322ms | 8.0372ms | 124.4220 Ops/s | 243.3658 Ops/s | $\textbf{\color{#d91a1a}-48.87\\%}$ | | test_clone | 14.8597ms | 9.8459ms | 101.5647 Ops/s | 154.6391 Ops/s | $\textbf{\color{#d91a1a}-34.32\\%}$ | | test_squeeze | 69.1890μs | 12.8692μs | 77.7050 KOps/s | 73.0598 KOps/s | $\textbf{\color{#35bf28}+6.36\\%}$ | | test_unsqueeze | 0.2039ms | 91.8882μs | 10.8828 KOps/s | 10.5596 KOps/s | $\color{#35bf28}+3.06\\%$ | | test_split | 0.5038ms | 0.1971ms | 5.0737 KOps/s | 4.8072 KOps/s | $\textbf{\color{#35bf28}+5.55\\%}$ | | test_permute | 0.3654ms | 0.2202ms | 4.5418 KOps/s | 4.5032 KOps/s | $\color{#35bf28}+0.86\\%$ | | test_stack | 30.6518ms | 26.0080ms | 38.4496 Ops/s | 34.6168 Ops/s | $\textbf{\color{#35bf28}+11.07\\%}$ | | test_cat | 31.1697ms | 26.1374ms | 38.2594 Ops/s | 34.9764 Ops/s | $\textbf{\color{#35bf28}+9.39\\%}$ |
github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 219. Improved: $\large\color{#35bf28}18$. Worsened: $\large\color{#d91a1a}17$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 0.1265ms | 17.7444μs | 56.3557 KOps/s | 59.0988 KOps/s | $\color{#d91a1a}-4.64\\%$ | | test_plain_set_stack_nested | 34.5410μs | 17.9767μs | 55.6275 KOps/s | 58.4449 KOps/s | $\color{#d91a1a}-4.82\\%$ | | test_plain_set_nested_inplace | 39.2010μs | 18.9268μs | 52.8351 KOps/s | 55.3911 KOps/s | $\color{#d91a1a}-4.61\\%$ | | test_plain_set_stack_nested_inplace | 40.2510μs | 19.0516μs | 52.4891 KOps/s | 55.1837 KOps/s | $\color{#d91a1a}-4.88\\%$ | | test_items | 18.7100μs | 4.6262μs | 216.1592 KOps/s | 215.9731 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_items_nested | 0.3959ms | 0.3628ms | 2.7561 KOps/s | 2.5910 KOps/s | $\textbf{\color{#35bf28}+6.37\\%}$ | | test_items_nested_locked | 0.4241ms | 0.3670ms | 2.7250 KOps/s | 2.5309 KOps/s | $\textbf{\color{#35bf28}+7.67\\%}$ | | test_items_nested_leaf | 0.1067ms | 83.5386μs | 11.9705 KOps/s | 11.6192 KOps/s | $\color{#35bf28}+3.02\\%$ | | test_items_stack_nested | 0.4334ms | 0.3636ms | 2.7503 KOps/s | 2.5461 KOps/s | $\textbf{\color{#35bf28}+8.02\\%}$ | | test_items_stack_nested_leaf | 0.1082ms | 84.0433μs | 11.8986 KOps/s | 11.4812 KOps/s | $\color{#35bf28}+3.64\\%$ | | test_items_stack_nested_locked | 0.4336ms | 0.3664ms | 2.7295 KOps/s | 2.5395 KOps/s | $\textbf{\color{#35bf28}+7.48\\%}$ | | test_keys | 14.8400μs | 4.3637μs | 229.1609 KOps/s | 227.9772 KOps/s | $\color{#35bf28}+0.52\\%$ | | test_keys_nested | 88.2620μs | 66.2250μs | 15.1000 KOps/s | 14.7549 KOps/s | $\color{#35bf28}+2.34\\%$ | | test_keys_nested_locked | 0.9273ms | 72.5108μs | 13.7910 KOps/s | 13.7589 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_keys_nested_leaf | 80.3920μs | 57.8681μs | 17.2807 KOps/s | 17.4480 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_keys_stack_nested | 84.5120μs | 67.7207μs | 14.7665 KOps/s | 14.8295 KOps/s | $\color{#d91a1a}-0.42\\%$ | | test_keys_stack_nested_leaf | 77.0610μs | 57.8930μs | 17.2732 KOps/s | 17.0618 KOps/s | $\color{#35bf28}+1.24\\%$ | | test_keys_stack_nested_locked | 0.1025ms | 72.5153μs | 13.7902 KOps/s | 13.6854 KOps/s | $\color{#35bf28}+0.77\\%$ | | test_values | 8.4803μs | 1.7539μs | 570.1540 KOps/s | 567.9012 KOps/s | $\color{#35bf28}+0.40\\%$ | | test_values_nested | 88.1620μs | 33.8190μs | 29.5692 KOps/s | 29.5004 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_values_nested_locked | 49.2610μs | 35.3275μs | 28.3065 KOps/s | 28.0528 KOps/s | $\color{#35bf28}+0.90\\%$ | | test_values_nested_leaf | 47.8710μs | 29.8024μs | 33.5543 KOps/s | 33.0745 KOps/s | $\color{#35bf28}+1.45\\%$ | | test_values_stack_nested | 53.2610μs | 33.9613μs | 29.4453 KOps/s | 28.7774 KOps/s | $\color{#35bf28}+2.32\\%$ | | test_values_stack_nested_leaf | 56.6110μs | 30.1132μs | 33.2081 KOps/s | 32.5224 KOps/s | $\color{#35bf28}+2.11\\%$ | | test_values_stack_nested_locked | 97.8120μs | 35.5901μs | 28.0977 KOps/s | 27.6872 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_membership | 1.1905μs | 0.5380μs | 1.8589 MOps/s | 1.8496 MOps/s | $\color{#35bf28}+0.50\\%$ | | test_membership_nested | 17.4700μs | 2.0477μs | 488.3438 KOps/s | 470.7178 KOps/s | $\color{#35bf28}+3.74\\%$ | | test_membership_nested_leaf | 10.6455μs | 1.9542μs | 511.7258 KOps/s | 492.7585 KOps/s | $\color{#35bf28}+3.85\\%$ | | test_membership_stacked_nested | 19.5910μs | 2.0333μs | 491.8182 KOps/s | 471.5269 KOps/s | $\color{#35bf28}+4.30\\%$ | | test_membership_stacked_nested_leaf | 24.9610μs | 1.9927μs | 501.8350 KOps/s | 474.7144 KOps/s | $\textbf{\color{#35bf28}+5.71\\%}$ | | test_membership_nested_last | 21.6610μs | 2.9117μs | 343.4378 KOps/s | 332.0424 KOps/s | $\color{#35bf28}+3.43\\%$ | | test_membership_nested_leaf_last | 22.7900μs | 2.8927μs | 345.7022 KOps/s | 326.2193 KOps/s | $\textbf{\color{#35bf28}+5.97\\%}$ | | test_membership_stacked_nested_last | 22.7300μs | 2.8883μs | 346.2300 KOps/s | 328.1983 KOps/s | $\textbf{\color{#35bf28}+5.49\\%}$ | | test_membership_stacked_nested_leaf_last | 15.5900μs | 2.8894μs | 346.0898 KOps/s | 327.9345 KOps/s | $\textbf{\color{#35bf28}+5.54\\%}$ | | test_nested_getleaf | 24.2610μs | 7.9558μs | 125.6949 KOps/s | 124.4645 KOps/s | $\color{#35bf28}+0.99\\%$ | | test_nested_get | 22.9500μs | 7.4734μs | 133.8082 KOps/s | 131.9923 KOps/s | $\color{#35bf28}+1.38\\%$ | | test_stacked_getleaf | 26.6700μs | 7.9174μs | 126.3034 KOps/s | 123.5987 KOps/s | $\color{#35bf28}+2.19\\%$ | | test_stacked_get | 29.6300μs | 7.4707μs | 133.8555 KOps/s | 133.1137 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_nested_getitemleaf | 23.1600μs | 8.1559μs | 122.6108 KOps/s | 121.7666 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_nested_getitem | 22.6800μs | 7.6236μs | 131.1715 KOps/s | 130.3763 KOps/s | $\color{#35bf28}+0.61\\%$ | | test_stacked_getitemleaf | 24.0510μs | 8.1413μs | 122.8306 KOps/s | 120.9036 KOps/s | $\color{#35bf28}+1.59\\%$ | | test_stacked_getitem | 22.4400μs | 7.6536μs | 130.6573 KOps/s | 129.8766 KOps/s | $\color{#35bf28}+0.60\\%$ | | test_lock_nested | 0.9481ms | 0.4643ms | 2.1537 KOps/s | 2.0968 KOps/s | $\color{#35bf28}+2.71\\%$ | | test_lock_stack_nested | 0.4673ms | 0.4260ms | 2.3473 KOps/s | 2.2994 KOps/s | $\color{#35bf28}+2.08\\%$ | | test_unlock_nested | 0.8107ms | 0.3857ms | 2.5925 KOps/s | 2.4940 KOps/s | $\color{#35bf28}+3.95\\%$ | | test_unlock_stack_nested | 0.3948ms | 0.3459ms | 2.8914 KOps/s | 2.8156 KOps/s | $\color{#35bf28}+2.69\\%$ | | test_flatten_speed | 0.5649ms | 0.1015ms | 9.8515 KOps/s | 9.4219 KOps/s | $\color{#35bf28}+4.56\\%$ | | test_unflatten_speed | 0.3432ms | 0.2888ms | 3.4622 KOps/s | 3.3811 KOps/s | $\color{#35bf28}+2.40\\%$ | | test_common_ops | 1.5783ms | 1.3372ms | 747.8378 Ops/s | 764.8980 Ops/s | $\color{#d91a1a}-2.23\\%$ | | test_creation | 16.7300μs | 1.6471μs | 607.1126 KOps/s | 495.0469 KOps/s | $\textbf{\color{#35bf28}+22.64\\%}$ | | test_creation_empty | 36.3810μs | 18.7820μs | 53.2425 KOps/s | 58.4441 KOps/s | $\textbf{\color{#d91a1a}-8.90\\%}$ | | test_creation_nested_1 | 46.0310μs | 20.9198μs | 47.8016 KOps/s | 52.6743 KOps/s | $\textbf{\color{#d91a1a}-9.25\\%}$ | | test_creation_nested_2 | 57.8620μs | 23.0510μs | 43.3822 KOps/s | 45.5790 KOps/s | $\color{#d91a1a}-4.82\\%$ | | test_clone | 59.5210μs | 30.4263μs | 32.8663 KOps/s | 32.8826 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_getitem[int] | 1.2923ms | 17.1187μs | 58.4156 KOps/s | 57.2749 KOps/s | $\color{#35bf28}+1.99\\%$ | | test_getitem[slice_int] | 0.1557ms | 29.5634μs | 33.8256 KOps/s | 32.2030 KOps/s | $\textbf{\color{#35bf28}+5.04\\%}$ | | test_getitem[range] | 0.2815ms | 0.1152ms | 8.6841 KOps/s | 8.6846 KOps/s | $-0.01\\%$ | | test_getitem[tuple] | 91.5871ms | 31.4977μs | 31.7483 KOps/s | 37.7105 KOps/s | $\textbf{\color{#d91a1a}-15.81\\%}$ | | test_getitem[list] | 0.2167ms | 0.1037ms | 9.6422 KOps/s | 9.5145 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_setitem_dim[int] | 74.8820μs | 53.8178μs | 18.5812 KOps/s | 18.6304 KOps/s | $\color{#d91a1a}-0.26\\%$ | | test_setitem_dim[slice_int] | 0.1031ms | 78.7743μs | 12.6945 KOps/s | 12.6940 KOps/s | $+0.00\\%$ | | test_setitem_dim[range] | 0.1764ms | 0.1435ms | 6.9678 KOps/s | 7.0576 KOps/s | $\color{#d91a1a}-1.27\\%$ | | test_setitem_dim[tuple] | 0.1085ms | 75.5833μs | 13.2304 KOps/s | 14.0538 KOps/s | $\textbf{\color{#d91a1a}-5.86\\%}$ | | test_setitem | 72.0320μs | 48.3366μs | 20.6883 KOps/s | 22.9547 KOps/s | $\textbf{\color{#d91a1a}-9.87\\%}$ | | test_set | 81.3420μs | 43.6158μs | 22.9275 KOps/s | 23.4798 KOps/s | $\color{#d91a1a}-2.35\\%$ | | test_set_shared | 0.3854ms | 54.0983μs | 18.4849 KOps/s | 18.1227 KOps/s | $\color{#35bf28}+2.00\\%$ | | test_update | 93.5720μs | 54.2245μs | 18.4418 KOps/s | 19.4313 KOps/s | $\textbf{\color{#d91a1a}-5.09\\%}$ | | test_update_nested | 0.1071ms | 62.5275μs | 15.9930 KOps/s | 16.9258 KOps/s | $\textbf{\color{#d91a1a}-5.51\\%}$ | | test_update__nested | 96.8320μs | 61.7672μs | 16.1898 KOps/s | 16.0474 KOps/s | $\color{#35bf28}+0.89\\%$ | | test_set_nested | 89.5620μs | 47.0962μs | 21.2331 KOps/s | 22.0068 KOps/s | $\color{#d91a1a}-3.52\\%$ | | test_set_nested_new | 96.4620μs | 53.0365μs | 18.8549 KOps/s | 20.2089 KOps/s | $\textbf{\color{#d91a1a}-6.70\\%}$ | | test_select | 0.1023ms | 67.9125μs | 14.7248 KOps/s | 14.6303 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_select_nested | 0.3853ms | 51.1553μs | 19.5483 KOps/s | 19.0707 KOps/s | $\color{#35bf28}+2.50\\%$ | | test_exclude_nested | 0.1075ms | 68.3868μs | 14.6227 KOps/s | 13.7718 KOps/s | $\textbf{\color{#35bf28}+6.18\\%}$ | | test_empty[True] | 0.3276ms | 0.2819ms | 3.5473 KOps/s | 3.3659 KOps/s | $\textbf{\color{#35bf28}+5.39\\%}$ | | test_empty[False] | 2.4391μs | 0.8640μs | 1.1574 MOps/s | 1.0741 MOps/s | $\textbf{\color{#35bf28}+7.75\\%}$ | | test_to | 72.8510μs | 38.2303μs | 26.1572 KOps/s | 26.4055 KOps/s | $\color{#d91a1a}-0.94\\%$ | | test_to_nonblocking | 43.4510μs | 23.4781μs | 42.5929 KOps/s | 41.0848 KOps/s | $\color{#35bf28}+3.67\\%$ | | test_unbind_speed | 0.9375ms | 0.2962ms | 3.3763 KOps/s | 3.2343 KOps/s | $\color{#35bf28}+4.39\\%$ | | test_unbind_speed_stack0 | 0.3525ms | 0.2933ms | 3.4093 KOps/s | 3.2631 KOps/s | $\color{#35bf28}+4.48\\%$ | | test_unbind_speed_stack1 | 90.8489ms | 0.7613ms | 1.3135 KOps/s | 1.2661 KOps/s | $\color{#35bf28}+3.74\\%$ | | test_split | 93.2089ms | 2.3305ms | 429.0882 Ops/s | 422.3889 Ops/s | $\color{#35bf28}+1.59\\%$ | | test_chunk | 93.0951ms | 2.3167ms | 431.6557 Ops/s | 419.8878 Ops/s | $\color{#35bf28}+2.80\\%$ | | test_creation[device0] | 0.1867ms | 0.1056ms | 9.4677 KOps/s | 9.4972 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_creation_from_tensor | 0.1810ms | 0.1020ms | 9.8014 KOps/s | 9.7730 KOps/s | $\color{#35bf28}+0.29\\%$ | | test_add_one[memmap_tensor0] | 57.8210μs | 8.6982μs | 114.9667 KOps/s | 109.6228 KOps/s | $\color{#35bf28}+4.87\\%$ | | test_contiguous[memmap_tensor0] | 26.8000μs | 2.1946μs | 455.6618 KOps/s | 451.7378 KOps/s | $\color{#35bf28}+0.87\\%$ | | test_stack[memmap_tensor0] | 24.1310μs | 6.8468μs | 146.0545 KOps/s | 145.2337 KOps/s | $\color{#35bf28}+0.57\\%$ | | test_memmaptd_index | 1.1792ms | 0.4384ms | 2.2813 KOps/s | 2.2207 KOps/s | $\color{#35bf28}+2.73\\%$ | | test_memmaptd_index_astensor | 0.8259ms | 0.5022ms | 1.9912 KOps/s | 1.9518 KOps/s | $\color{#35bf28}+2.02\\%$ | | test_memmaptd_index_op | 1.5155ms | 1.0838ms | 922.6949 Ops/s | 922.4421 Ops/s | $\color{#35bf28}+0.03\\%$ | | test_serialize_model | 0.1007s | 95.9221ms | 10.4251 Ops/s | 10.1813 Ops/s | $\color{#35bf28}+2.39\\%$ | | test_serialize_model_pickle | 1.3530s | 1.2365s | 0.8087 Ops/s | 0.8071 Ops/s | $\color{#35bf28}+0.21\\%$ | | test_serialize_weights | 94.7339ms | 91.6239ms | 10.9142 Ops/s | 9.2013 Ops/s | $\textbf{\color{#35bf28}+18.62\\%}$ | | test_serialize_weights_returnearly | 0.2623s | 87.2317ms | 11.4637 Ops/s | 11.5148 Ops/s | $\color{#d91a1a}-0.44\\%$ | | test_serialize_weights_pickle | 1.3554s | 1.2363s | 0.8089 Ops/s | 0.8086 Ops/s | $\color{#35bf28}+0.03\\%$ | | test_reshape_pytree | 78.6410μs | 38.3122μs | 26.1014 KOps/s | 25.7036 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_reshape_td | 0.2551ms | 43.2396μs | 23.1269 KOps/s | 23.0337 KOps/s | $\color{#35bf28}+0.40\\%$ | | test_view_pytree | 72.7820μs | 38.0793μs | 26.2610 KOps/s | 25.3271 KOps/s | $\color{#35bf28}+3.69\\%$ | | test_view_td | 0.2544ms | 49.2678μs | 20.2972 KOps/s | 19.8410 KOps/s | $\color{#35bf28}+2.30\\%$ | | test_unbind_pytree | 99.6820μs | 36.5302μs | 27.3746 KOps/s | 26.7228 KOps/s | $\color{#35bf28}+2.44\\%$ | | test_unbind_td | 0.4114ms | 44.3999μs | 22.5226 KOps/s | 22.0452 KOps/s | $\color{#35bf28}+2.17\\%$ | | test_split_pytree | 90.2320μs | 51.2906μs | 19.4967 KOps/s | 18.5035 KOps/s | $\textbf{\color{#35bf28}+5.37\\%}$ | | test_split_td | 0.2782ms | 59.7502μs | 16.7363 KOps/s | 15.0572 KOps/s | $\textbf{\color{#35bf28}+11.15\\%}$ | | test_add_pytree | 0.1199ms | 60.2282μs | 16.6035 KOps/s | 15.1949 KOps/s | $\textbf{\color{#35bf28}+9.27\\%}$ | | test_add_td | 0.3172ms | 0.1004ms | 9.9572 KOps/s | 9.9248 KOps/s | $\color{#35bf28}+0.33\\%$ | | test_compile_add_one_nested[tensordict-compile] | 0.4451ms | 0.2153ms | 4.6440 KOps/s | 4.7485 KOps/s | $\color{#d91a1a}-2.20\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.2572ms | 0.1687ms | 5.9293 KOps/s | 5.7618 KOps/s | $\color{#35bf28}+2.91\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.1874ms | 0.1476ms | 6.7741 KOps/s | 6.7564 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.2677ms | 0.1997ms | 5.0077 KOps/s | 4.9929 KOps/s | $\color{#35bf28}+0.30\\%$ | | test_compile_copy_nested[tensordict-compile] | 44.3410μs | 21.4257μs | 46.6729 KOps/s | 45.4033 KOps/s | $\color{#35bf28}+2.80\\%$ | | test_compile_copy_nested[tensordict-eager] | 0.1276ms | 47.0200μs | 21.2675 KOps/s | 20.6984 KOps/s | $\color{#35bf28}+2.75\\%$ | | test_compile_copy_nested[pytree-compile] | 0.1115ms | 73.2082μs | 13.6597 KOps/s | 13.5808 KOps/s | $\color{#35bf28}+0.58\\%$ | | test_compile_copy_nested[pytree-eager] | 80.1520μs | 59.3472μs | 16.8500 KOps/s | 16.7102 KOps/s | $\color{#35bf28}+0.84\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.4842ms | 0.3287ms | 3.0424 KOps/s | 3.0387 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.2688ms | 0.2199ms | 4.5469 KOps/s | 4.5524 KOps/s | $\color{#d91a1a}-0.12\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.1831ms | 0.1351ms | 7.4028 KOps/s | 7.5975 KOps/s | $\color{#d91a1a}-2.56\\%$ | | test_compile_add_one_flat[tensorclass-eager] | 0.1202ms | 62.6908μs | 15.9513 KOps/s | 15.5177 KOps/s | $\color{#35bf28}+2.79\\%$ | | test_compile_add_one_flat[pytree-compile] | 0.3909ms | 0.3274ms | 3.0543 KOps/s | 3.0334 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.7324ms | 0.6491ms | 1.5405 KOps/s | 1.5414 KOps/s | $\color{#d91a1a}-0.06\\%$ | | test_compile_add_self_flat[tensordict-eager] | 0.3288ms | 0.2678ms | 3.7347 KOps/s | 3.6691 KOps/s | $\color{#35bf28}+1.79\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.4014ms | 0.3291ms | 3.0382 KOps/s | 3.0091 KOps/s | $\color{#35bf28}+0.96\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.1622ms | 76.1110μs | 13.1387 KOps/s | 13.0401 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.2005ms | 0.1332ms | 7.5061 KOps/s | 7.5218 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.7215ms | 0.5605ms | 1.7843 KOps/s | 1.8065 KOps/s | $\color{#d91a1a}-1.23\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.3624ms | 0.3264ms | 3.0636 KOps/s | 3.0430 KOps/s | $\color{#35bf28}+0.68\\%$ | | test_compile_copy_flat[tensordict-compile] | 42.5410μs | 18.7318μs | 53.3851 KOps/s | 54.3814 KOps/s | $\color{#d91a1a}-1.83\\%$ | | test_compile_copy_flat[tensordict-eager] | 55.1410μs | 32.4125μs | 30.8523 KOps/s | 31.1418 KOps/s | $\color{#d91a1a}-0.93\\%$ | | test_compile_copy_flat[pytree-compile] | 0.1122ms | 76.6567μs | 13.0452 KOps/s | 12.9703 KOps/s | $\color{#35bf28}+0.58\\%$ | | test_compile_copy_flat[pytree-eager] | 85.0110μs | 60.0618μs | 16.6495 KOps/s | 16.5187 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_compile_assign_and_add[tensordict-compile] | 2.5868ms | 0.9410ms | 1.0627 KOps/s | 1.0781 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_compile_assign_and_add[tensordict-eager] | 3.5148ms | 3.3757ms | 296.2318 Ops/s | 292.9341 Ops/s | $\color{#35bf28}+1.13\\%$ | | test_compile_assign_and_add[pytree-compile] | 2.5556ms | 0.9279ms | 1.0777 KOps/s | 1.0824 KOps/s | $\color{#d91a1a}-0.44\\%$ | | test_compile_assign_and_add[pytree-eager] | 3.7335ms | 3.5646ms | 280.5350 Ops/s | 297.3396 Ops/s | $\textbf{\color{#d91a1a}-5.65\\%}$ | | test_compile_indexing[tensor-tensordict-compile] | 0.1645ms | 0.1187ms | 8.4261 KOps/s | 8.8992 KOps/s | $\textbf{\color{#d91a1a}-5.32\\%}$ | | test_compile_indexing[tensor-tensordict-eager] | 0.2386ms | 67.7557μs | 14.7589 KOps/s | 16.1026 KOps/s | $\textbf{\color{#d91a1a}-8.34\\%}$ | | test_compile_indexing[tensor-tensorclass-compile] | 0.1563ms | 0.1118ms | 8.9406 KOps/s | 9.5375 KOps/s | $\textbf{\color{#d91a1a}-6.26\\%}$ | | test_compile_indexing[tensor-tensorclass-eager] | 80.6710μs | 49.0655μs | 20.3809 KOps/s | 21.6812 KOps/s | $\textbf{\color{#d91a1a}-6.00\\%}$ | | test_compile_indexing[tensor-pytree-compile] | 0.1616ms | 0.1114ms | 8.9783 KOps/s | 9.5358 KOps/s | $\textbf{\color{#d91a1a}-5.85\\%}$ | | test_compile_indexing[tensor-pytree-eager] | 84.2920μs | 49.6800μs | 20.1288 KOps/s | 21.7989 KOps/s | $\textbf{\color{#d91a1a}-7.66\\%}$ | | test_compile_indexing[slice-tensordict-compile] | 0.1884ms | 0.1481ms | 6.7501 KOps/s | 7.1322 KOps/s | $\textbf{\color{#d91a1a}-5.36\\%}$ | | test_compile_indexing[slice-tensordict-eager] | 0.1970ms | 26.8168μs | 37.2900 KOps/s | 37.1550 KOps/s | $\color{#35bf28}+0.36\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 0.1848ms | 0.1393ms | 7.1778 KOps/s | 7.5474 KOps/s | $\color{#d91a1a}-4.90\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 62.2410μs | 23.6452μs | 42.2920 KOps/s | 41.7713 KOps/s | $\color{#35bf28}+1.25\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.1765ms | 0.1327ms | 7.5340 KOps/s | 7.4513 KOps/s | $\color{#35bf28}+1.11\\%$ | | test_compile_indexing[slice-pytree-eager] | 47.9210μs | 23.1463μs | 43.2034 KOps/s | 42.1672 KOps/s | $\color{#35bf28}+2.46\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.1851ms | 0.1408ms | 7.1043 KOps/s | 7.1751 KOps/s | $\color{#d91a1a}-0.99\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.4667ms | 26.7534μs | 37.3785 KOps/s | 36.8087 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_compile_indexing[int-tensorclass-compile] | 0.2306ms | 0.1365ms | 7.3239 KOps/s | 7.6023 KOps/s | $\color{#d91a1a}-3.66\\%$ | | test_compile_indexing[int-tensorclass-eager] | 50.5610μs | 23.8142μs | 41.9917 KOps/s | 42.6219 KOps/s | $\color{#d91a1a}-1.48\\%$ | | test_compile_indexing[int-pytree-compile] | 0.1652ms | 0.1328ms | 7.5275 KOps/s | 7.5526 KOps/s | $\color{#d91a1a}-0.33\\%$ | | test_compile_indexing[int-pytree-eager] | 48.7010μs | 23.0953μs | 43.2988 KOps/s | 41.9273 KOps/s | $\color{#35bf28}+3.27\\%$ | | test_mod_add[eager] | 77.3010μs | 39.8652μs | 25.0845 KOps/s | 25.9498 KOps/s | $\color{#d91a1a}-3.33\\%$ | | test_mod_add[compile] | 0.2336ms | 71.9368μs | 13.9011 KOps/s | 14.0781 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_mod_add[compile-overhead] | 0.2619ms | 0.1476ms | 6.7760 KOps/s | 6.6754 KOps/s | $\color{#35bf28}+1.51\\%$ | | test_mod_wrap[eager] | 0.3599ms | 0.2667ms | 3.7489 KOps/s | 3.9962 KOps/s | $\textbf{\color{#d91a1a}-6.19\\%}$ | | test_mod_wrap[compile] | 0.4120ms | 0.3059ms | 3.2689 KOps/s | 3.2593 KOps/s | $\color{#35bf28}+0.29\\%$ | | test_mod_wrap[compile-overhead] | 7.7414ms | 4.1452ms | 241.2417 Ops/s | 231.9929 Ops/s | $\color{#35bf28}+3.99\\%$ | | test_mod_wrap_and_backward[eager] | 1.5228ms | 1.4176ms | 705.3929 Ops/s | 705.6743 Ops/s | $\color{#d91a1a}-0.04\\%$ | | test_mod_wrap_and_backward[compile] | 1.6593ms | 1.4549ms | 687.3329 Ops/s | 687.0094 Ops/s | $\color{#35bf28}+0.05\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 1.4560ms | 0.9883ms | 1.0119 KOps/s | 990.6075 Ops/s | $\color{#35bf28}+2.15\\%$ | | test_seq_add[eager] | 0.1771ms | 0.1154ms | 8.6657 KOps/s | 8.8983 KOps/s | $\color{#d91a1a}-2.61\\%$ | | test_seq_add[compile] | 0.2052ms | 87.6831μs | 11.4047 KOps/s | 11.3690 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_seq_add[compile-overhead] | 0.1915ms | 0.1252ms | 7.9882 KOps/s | 7.9168 KOps/s | $\color{#35bf28}+0.90\\%$ | | test_seq_wrap[eager] | 0.4992ms | 0.4310ms | 2.3203 KOps/s | 2.3586 KOps/s | $\color{#d91a1a}-1.63\\%$ | | test_seq_wrap[compile] | 0.4428ms | 0.3286ms | 3.0428 KOps/s | 3.0069 KOps/s | $\color{#35bf28}+1.19\\%$ | | test_seq_wrap[compile-overhead] | 0.3101s | 0.1483s | 6.7419 Ops/s | 6.7413 Ops/s | $+0.01\\%$ | | test_func_call_runtime[False-eager] | 0.9672ms | 0.7371ms | 1.3567 KOps/s | 1.3518 KOps/s | $\color{#35bf28}+0.36\\%$ | | test_func_call_runtime[False-compile] | 0.8798ms | 0.8094ms | 1.2355 KOps/s | 1.2334 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.4484ms | 0.3662ms | 2.7309 KOps/s | 2.7109 KOps/s | $\color{#35bf28}+0.74\\%$ | | test_func_call_runtime[True-eager] | 1.0532ms | 0.9835ms | 1.0168 KOps/s | 1.0088 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_func_call_runtime[True-compile] | 0.9318ms | 0.8555ms | 1.1689 KOps/s | 1.1699 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.4942ms | 0.4091ms | 2.4442 KOps/s | 2.4461 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_distributed | 0.2722ms | 69.2042μs | 14.4500 KOps/s | 13.9663 KOps/s | $\color{#35bf28}+3.46\\%$ | | test_tdmodule | 32.5110μs | 16.4148μs | 60.9208 KOps/s | 62.4909 KOps/s | $\color{#d91a1a}-2.51\\%$ | | test_tdmodule_dispatch | 50.4010μs | 34.1088μs | 29.3180 KOps/s | 30.7156 KOps/s | $\color{#d91a1a}-4.55\\%$ | | test_tdseq | 32.9310μs | 17.2349μs | 58.0219 KOps/s | 59.7421 KOps/s | $\color{#d91a1a}-2.88\\%$ | | test_tdseq_dispatch | 63.4110μs | 36.4712μs | 27.4189 KOps/s | 28.5763 KOps/s | $\color{#d91a1a}-4.05\\%$ | | test_instantiation_functorch | 2.0664ms | 2.0128ms | 496.8127 Ops/s | 502.6243 Ops/s | $\color{#d91a1a}-1.16\\%$ | | test_instantiation_td | 1.9444ms | 1.2870ms | 777.0301 Ops/s | 772.0757 Ops/s | $\color{#35bf28}+0.64\\%$ | | test_exec_functorch | 0.3085ms | 0.2258ms | 4.4295 KOps/s | 4.4575 KOps/s | $\color{#d91a1a}-0.63\\%$ | | test_exec_functional_call | 0.2509ms | 0.2175ms | 4.5975 KOps/s | 4.6188 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_exec_td | 0.3151ms | 0.2143ms | 4.6665 KOps/s | 4.6563 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_exec_td_decorator | 0.6303ms | 0.2888ms | 3.4627 KOps/s | 3.4346 KOps/s | $\color{#35bf28}+0.82\\%$ | | test_vmap_mlp_speed[True-True] | 0.7909ms | 0.6581ms | 1.5196 KOps/s | 1.5262 KOps/s | $\color{#d91a1a}-0.43\\%$ | | test_vmap_mlp_speed[True-False] | 0.7309ms | 0.6541ms | 1.5289 KOps/s | 1.5250 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_vmap_mlp_speed[False-True] | 0.6899ms | 0.5692ms | 1.7568 KOps/s | 1.7526 KOps/s | $\color{#35bf28}+0.24\\%$ | | test_vmap_mlp_speed[False-False] | 0.6860ms | 0.5687ms | 1.7585 KOps/s | 1.7549 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.4382ms | 0.7333ms | 1.3637 KOps/s | 1.3577 KOps/s | $\color{#35bf28}+0.44\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.8865ms | 0.7295ms | 1.3707 KOps/s | 1.3631 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7639ms | 0.6287ms | 1.5905 KOps/s | 1.5695 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7114ms | 0.6269ms | 1.5950 KOps/s | 1.5716 KOps/s | $\color{#35bf28}+1.49\\%$ | | test_vmap_transformer_speed[True-True] | 8.8563ms | 8.6030ms | 116.2379 Ops/s | 115.6897 Ops/s | $\color{#35bf28}+0.47\\%$ | | test_vmap_transformer_speed[True-False] | 8.8649ms | 8.5951ms | 116.3460 Ops/s | 115.9579 Ops/s | $\color{#35bf28}+0.33\\%$ | | test_vmap_transformer_speed[False-True] | 8.5995ms | 8.4995ms | 117.6543 Ops/s | 117.7574 Ops/s | $\color{#d91a1a}-0.09\\%$ | | test_vmap_transformer_speed[False-False] | 8.5857ms | 8.4863ms | 117.8371 Ops/s | 117.9419 Ops/s | $\color{#d91a1a}-0.09\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 20.5417ms | 20.4392ms | 48.9255 Ops/s | 48.6927 Ops/s | $\color{#35bf28}+0.48\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 20.5361ms | 20.4141ms | 48.9859 Ops/s | 48.9261 Ops/s | $\color{#35bf28}+0.12\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 20.3899ms | 20.2307ms | 49.4298 Ops/s | 48.9343 Ops/s | $\color{#35bf28}+1.01\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 20.3895ms | 20.2662ms | 49.3431 Ops/s | 49.1219 Ops/s | $\color{#35bf28}+0.45\\%$ | | test_to_module_speed[True] | 3.0010ms | 1.4799ms | 675.7226 Ops/s | 668.3207 Ops/s | $\color{#35bf28}+1.11\\%$ | | test_to_module_speed[False] | 1.9783ms | 1.4526ms | 688.4429 Ops/s | 683.4840 Ops/s | $\color{#35bf28}+0.73\\%$ | | test_tc_init | 59.3210μs | 39.6529μs | 25.2188 KOps/s | 25.7358 KOps/s | $\color{#d91a1a}-2.01\\%$ | | test_tc_init_nested | 0.1450ms | 80.4076μs | 12.4366 KOps/s | 12.7550 KOps/s | $\color{#d91a1a}-2.50\\%$ | | test_tc_first_layer_tensor | 3.7050μs | 0.7933μs | 1.2605 MOps/s | 1.0786 MOps/s | $\textbf{\color{#35bf28}+16.86\\%}$ | | test_tc_first_layer_nontensor | 17.6700μs | 2.5244μs | 396.1307 KOps/s | 392.2045 KOps/s | $\color{#35bf28}+1.00\\%$ | | test_tc_second_layer_tensor | 8.5903μs | 1.6084μs | 621.7515 KOps/s | 606.2722 KOps/s | $\color{#35bf28}+2.55\\%$ | | test_tc_second_layer_nontensor | 22.0810μs | 3.3675μs | 296.9599 KOps/s | 293.4639 KOps/s | $\color{#35bf28}+1.19\\%$ | | test_unbind | 0.3209s | 12.5388ms | 79.7522 Ops/s | 80.1298 Ops/s | $\color{#d91a1a}-0.47\\%$ | | test_full_like | 0.6617ms | 0.5774ms | 1.7318 KOps/s | 1.7250 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_zeros_like | 0.2679ms | 0.1977ms | 5.0588 KOps/s | 5.0588 KOps/s | $-0.00\\%$ | | test_ones_like | 0.3335ms | 0.1975ms | 5.0639 KOps/s | 5.0642 KOps/s | $-0.01\\%$ | | test_clone | 0.4479ms | 0.4141ms | 2.4151 KOps/s | 2.4137 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_squeeze | 28.1510μs | 10.8169μs | 92.4477 KOps/s | 92.0824 KOps/s | $\color{#35bf28}+0.40\\%$ | | test_unsqueeze | 0.2401ms | 80.3603μs | 12.4440 KOps/s | 12.8382 KOps/s | $\color{#d91a1a}-3.07\\%$ | | test_split | 0.4634ms | 0.1794ms | 5.5743 KOps/s | 5.5114 KOps/s | $\color{#35bf28}+1.14\\%$ | | test_permute | 0.2337ms | 0.1859ms | 5.3785 KOps/s | 5.2502 KOps/s | $\color{#35bf28}+2.44\\%$ | | test_stack | 1.2713ms | 0.9093ms | 1.0997 KOps/s | 1.1046 KOps/s | $\color{#d91a1a}-0.44\\%$ | | test_cat | 1.2557ms | 1.2312ms | 812.2043 Ops/s | 812.0023 Ops/s | $\color{#35bf28}+0.02\\%$ |