pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
832 stars 74 forks source link

[Test] Test non-tensor propagation in arithmetic ops #926

Closed vmoens closed 3 months ago

github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 219. Improved: $\large\color{#35bf28}6$. Worsened: $\large\color{#d91a1a}59$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 51.8870μs | 22.7407μs | 43.9741 KOps/s | 48.5035 KOps/s | $\textbf{\color{#d91a1a}-9.34\\%}$ | | test_plain_set_stack_nested | 68.8080μs | 22.9602μs | 43.5536 KOps/s | 48.6321 KOps/s | $\textbf{\color{#d91a1a}-10.44\\%}$ | | test_plain_set_nested_inplace | 66.9450μs | 24.6842μs | 40.5117 KOps/s | 44.5307 KOps/s | $\textbf{\color{#d91a1a}-9.03\\%}$ | | test_plain_set_stack_nested_inplace | 57.6980μs | 24.4266μs | 40.9389 KOps/s | 45.0116 KOps/s | $\textbf{\color{#d91a1a}-9.05\\%}$ | | test_items | 19.4870μs | 2.7663μs | 361.4898 KOps/s | 378.3748 KOps/s | $\color{#d91a1a}-4.46\\%$ | | test_items_nested | 0.5415ms | 0.3339ms | 2.9950 KOps/s | 2.9740 KOps/s | $\color{#35bf28}+0.71\\%$ | | test_items_nested_locked | 1.7447ms | 0.3373ms | 2.9646 KOps/s | 2.9713 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_items_nested_leaf | 0.1803ms | 84.7195μs | 11.8037 KOps/s | 11.9047 KOps/s | $\color{#d91a1a}-0.85\\%$ | | test_items_stack_nested | 0.6986ms | 0.3353ms | 2.9822 KOps/s | 2.9946 KOps/s | $\color{#d91a1a}-0.42\\%$ | | test_items_stack_nested_leaf | 0.1677ms | 84.2093μs | 11.8752 KOps/s | 11.4488 KOps/s | $\color{#35bf28}+3.72\\%$ | | test_items_stack_nested_locked | 0.4209ms | 0.3330ms | 3.0031 KOps/s | 2.9472 KOps/s | $\color{#35bf28}+1.90\\%$ | | test_keys | 20.4080μs | 3.8782μs | 257.8549 KOps/s | 256.6093 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_keys_nested | 0.2828ms | 0.1432ms | 6.9844 KOps/s | 6.8983 KOps/s | $\color{#35bf28}+1.25\\%$ | | test_keys_nested_locked | 0.7098ms | 0.1481ms | 6.7530 KOps/s | 6.5754 KOps/s | $\color{#35bf28}+2.70\\%$ | | test_keys_nested_leaf | 0.2334ms | 0.1219ms | 8.2037 KOps/s | 7.9502 KOps/s | $\color{#35bf28}+3.19\\%$ | | test_keys_stack_nested | 0.2122ms | 0.1423ms | 7.0258 KOps/s | 6.6933 KOps/s | $\color{#35bf28}+4.97\\%$ | | test_keys_stack_nested_leaf | 0.2323ms | 0.1211ms | 8.2588 KOps/s | 7.9991 KOps/s | $\color{#35bf28}+3.25\\%$ | | test_keys_stack_nested_locked | 0.2476ms | 0.1479ms | 6.7605 KOps/s | 6.5175 KOps/s | $\color{#35bf28}+3.73\\%$ | | test_values | 7.8697μs | 1.2138μs | 823.8716 KOps/s | 828.7081 KOps/s | $\color{#d91a1a}-0.58\\%$ | | test_values_nested | 95.4990μs | 50.2229μs | 19.9112 KOps/s | 19.8050 KOps/s | $\color{#35bf28}+0.54\\%$ | | test_values_nested_locked | 0.2453ms | 51.2291μs | 19.5202 KOps/s | 19.7170 KOps/s | $\color{#d91a1a}-1.00\\%$ | | test_values_nested_leaf | 0.1101ms | 45.8984μs | 21.7872 KOps/s | 21.9754 KOps/s | $\color{#d91a1a}-0.86\\%$ | | test_values_stack_nested | 96.5710μs | 50.3966μs | 19.8426 KOps/s | 19.2674 KOps/s | $\color{#35bf28}+2.99\\%$ | | test_values_stack_nested_leaf | 91.1650μs | 45.6273μs | 21.9167 KOps/s | 21.4142 KOps/s | $\color{#35bf28}+2.35\\%$ | | test_values_stack_nested_locked | 93.8660μs | 50.2251μs | 19.9104 KOps/s | 19.3326 KOps/s | $\color{#35bf28}+2.99\\%$ | | test_membership | 13.7560μs | 0.8997μs | 1.1114 MOps/s | 1.3079 MOps/s | $\textbf{\color{#d91a1a}-15.02\\%}$ | | test_membership_nested | 33.1110μs | 2.5767μs | 388.0874 KOps/s | 383.9148 KOps/s | $\color{#35bf28}+1.09\\%$ | | test_membership_nested_leaf | 31.1080μs | 2.5920μs | 385.8094 KOps/s | 376.5789 KOps/s | $\color{#35bf28}+2.45\\%$ | | test_membership_stacked_nested | 29.9160μs | 2.5418μs | 393.4250 KOps/s | 386.9100 KOps/s | $\color{#35bf28}+1.68\\%$ | | test_membership_stacked_nested_leaf | 98.4240μs | 2.5821μs | 387.2792 KOps/s | 381.3957 KOps/s | $\color{#35bf28}+1.54\\%$ | | test_membership_nested_last | 65.9940μs | 3.8554μs | 259.3755 KOps/s | 255.4165 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_membership_nested_leaf_last | 27.1610μs | 3.9106μs | 255.7168 KOps/s | 256.7624 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_membership_stacked_nested_last | 23.1640μs | 3.8656μs | 258.6891 KOps/s | 210.7775 KOps/s | $\textbf{\color{#35bf28}+22.73\\%}$ | | test_membership_stacked_nested_leaf_last | 35.5170μs | 3.8657μs | 258.6866 KOps/s | 220.7107 KOps/s | $\textbf{\color{#35bf28}+17.21\\%}$ | | test_nested_getleaf | 36.3280μs | 10.8732μs | 91.9695 KOps/s | 94.5363 KOps/s | $\color{#d91a1a}-2.72\\%$ | | test_nested_get | 39.3740μs | 10.2616μs | 97.4512 KOps/s | 99.3027 KOps/s | $\color{#d91a1a}-1.86\\%$ | | test_stacked_getleaf | 44.0920μs | 10.7946μs | 92.6388 KOps/s | 94.4423 KOps/s | $\color{#d91a1a}-1.91\\%$ | | test_stacked_get | 42.7110μs | 10.2990μs | 97.0970 KOps/s | 99.0810 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_nested_getitemleaf | 49.7760μs | 11.4999μs | 86.9577 KOps/s | 90.4189 KOps/s | $\color{#d91a1a}-3.83\\%$ | | test_nested_getitem | 39.2850μs | 10.6846μs | 93.5929 KOps/s | 98.3636 KOps/s | $\color{#d91a1a}-4.85\\%$ | | test_stacked_getitemleaf | 42.2800μs | 11.4524μs | 87.3177 KOps/s | 90.1878 KOps/s | $\color{#d91a1a}-3.18\\%$ | | test_stacked_getitem | 94.2780μs | 10.5899μs | 94.4293 KOps/s | 97.9061 KOps/s | $\color{#d91a1a}-3.55\\%$ | | test_lock_nested | 94.7517ms | 0.6043ms | 1.6547 KOps/s | 2.0474 KOps/s | $\textbf{\color{#d91a1a}-19.18\\%}$ | | test_lock_stack_nested | 0.8618ms | 0.4764ms | 2.0990 KOps/s | 2.1631 KOps/s | $\color{#d91a1a}-2.96\\%$ | | test_unlock_nested | 96.8077ms | 0.5239ms | 1.9088 KOps/s | 2.4685 KOps/s | $\textbf{\color{#d91a1a}-22.67\\%}$ | | test_unlock_stack_nested | 0.4703ms | 0.3861ms | 2.5902 KOps/s | 2.6447 KOps/s | $\color{#d91a1a}-2.06\\%$ | | test_flatten_speed | 0.2540ms | 0.1035ms | 9.6606 KOps/s | 9.6428 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_unflatten_speed | 0.9564ms | 0.4320ms | 2.3149 KOps/s | 2.3322 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_common_ops | 6.1941ms | 1.1432ms | 874.7433 Ops/s | 1.0014 KOps/s | $\textbf{\color{#d91a1a}-12.65\\%}$ | | test_creation | 23.3340μs | 1.9878μs | 503.0790 KOps/s | 493.0351 KOps/s | $\color{#35bf28}+2.04\\%$ | | test_creation_empty | 61.7570μs | 19.5634μs | 51.1159 KOps/s | 64.2910 KOps/s | $\textbf{\color{#d91a1a}-20.49\\%}$ | | test_creation_nested_1 | 72.7880μs | 22.4331μs | 44.5770 KOps/s | 53.0403 KOps/s | $\textbf{\color{#d91a1a}-15.96\\%}$ | | test_creation_nested_2 | 77.5970μs | 25.9336μs | 38.5599 KOps/s | 45.5959 KOps/s | $\textbf{\color{#d91a1a}-15.43\\%}$ | | test_clone | 0.1335ms | 17.6665μs | 56.6042 KOps/s | 62.5437 KOps/s | $\textbf{\color{#d91a1a}-9.50\\%}$ | | test_getitem[int] | 1.2816ms | 16.3163μs | 61.2886 KOps/s | 62.5449 KOps/s | $\color{#d91a1a}-2.01\\%$ | | test_getitem[slice_int] | 0.1734ms | 32.1258μs | 31.1276 KOps/s | 32.7726 KOps/s | $\textbf{\color{#d91a1a}-5.02\\%}$ | | test_getitem[range] | 0.1911ms | 60.9977μs | 16.3941 KOps/s | 18.2784 KOps/s | $\textbf{\color{#d91a1a}-10.31\\%}$ | | test_getitem[tuple] | 0.1669ms | 25.6442μs | 38.9952 KOps/s | 41.3604 KOps/s | $\textbf{\color{#d91a1a}-5.72\\%}$ | | test_getitem[list] | 0.4470ms | 57.9319μs | 17.2617 KOps/s | 20.1679 KOps/s | $\textbf{\color{#d91a1a}-14.41\\%}$ | | test_setitem_dim[int] | 89.6200μs | 46.2215μs | 21.6349 KOps/s | 27.8382 KOps/s | $\textbf{\color{#d91a1a}-22.28\\%}$ | | test_setitem_dim[slice_int] | 0.1440ms | 78.9008μs | 12.6741 KOps/s | 14.8701 KOps/s | $\textbf{\color{#d91a1a}-14.77\\%}$ | | test_setitem_dim[range] | 0.1800ms | 0.1020ms | 9.7992 KOps/s | 11.2979 KOps/s | $\textbf{\color{#d91a1a}-13.27\\%}$ | | test_setitem_dim[tuple] | 0.1290ms | 63.9489μs | 15.6375 KOps/s | 18.7468 KOps/s | $\textbf{\color{#d91a1a}-16.59\\%}$ | | test_setitem | 0.1956ms | 31.6839μs | 31.5618 KOps/s | 37.4676 KOps/s | $\textbf{\color{#d91a1a}-15.76\\%}$ | | test_set | 0.1580ms | 30.3689μs | 32.9284 KOps/s | 38.7331 KOps/s | $\textbf{\color{#d91a1a}-14.99\\%}$ | | test_set_shared | 6.3674ms | 0.2260ms | 4.4254 KOps/s | 4.7563 KOps/s | $\textbf{\color{#d91a1a}-6.96\\%}$ | | test_update | 0.9290ms | 38.0244μs | 26.2989 KOps/s | 31.8936 KOps/s | $\textbf{\color{#d91a1a}-17.54\\%}$ | | test_update_nested | 0.1829ms | 47.3658μs | 21.1123 KOps/s | 24.2671 KOps/s | $\textbf{\color{#d91a1a}-13.00\\%}$ | | test_update__nested | 0.1484ms | 35.0821μs | 28.5046 KOps/s | 29.8542 KOps/s | $\color{#d91a1a}-4.52\\%$ | | test_set_nested | 0.1388ms | 32.1668μs | 31.0880 KOps/s | 34.7442 KOps/s | $\textbf{\color{#d91a1a}-10.52\\%}$ | | test_set_nested_new | 0.1978ms | 37.3970μs | 26.7401 KOps/s | 30.2185 KOps/s | $\textbf{\color{#d91a1a}-11.51\\%}$ | | test_select | 0.1774ms | 54.3432μs | 18.4016 KOps/s | 19.4750 KOps/s | $\textbf{\color{#d91a1a}-5.51\\%}$ | | test_select_nested | 0.1235ms | 58.5683μs | 17.0741 KOps/s | 17.0392 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_exclude_nested | 0.1412ms | 76.4114μs | 13.0871 KOps/s | 12.9514 KOps/s | $\color{#35bf28}+1.05\\%$ | | test_empty[True] | 0.5422ms | 0.3202ms | 3.1234 KOps/s | 3.0796 KOps/s | $\color{#35bf28}+1.42\\%$ | | test_empty[False] | 7.9325μs | 1.2049μs | 829.9344 KOps/s | 834.5503 KOps/s | $\color{#d91a1a}-0.55\\%$ | | test_unbind_speed | 0.4991ms | 0.3045ms | 3.2839 KOps/s | 3.3040 KOps/s | $\color{#d91a1a}-0.61\\%$ | | test_unbind_speed_stack0 | 0.5595ms | 0.3028ms | 3.3021 KOps/s | 3.3411 KOps/s | $\color{#d91a1a}-1.17\\%$ | | test_unbind_speed_stack1 | 95.4509ms | 0.8160ms | 1.2254 KOps/s | 1.3934 KOps/s | $\textbf{\color{#d91a1a}-12.06\\%}$ | | test_split | 95.7187ms | 2.1464ms | 465.9044 Ops/s | 477.1650 Ops/s | $\color{#d91a1a}-2.36\\%$ | | test_chunk | 0.1003s | 2.1783ms | 459.0635 Ops/s | 474.5052 Ops/s | $\color{#d91a1a}-3.25\\%$ | | test_creation[device0] | 0.2371ms | 0.1218ms | 8.2109 KOps/s | 8.3127 KOps/s | $\color{#d91a1a}-1.22\\%$ | | test_creation_from_tensor | 3.8647ms | 0.1237ms | 8.0860 KOps/s | 8.3507 KOps/s | $\color{#d91a1a}-3.17\\%$ | | test_add_one[memmap_tensor0] | 0.3760ms | 8.0835μs | 123.7092 KOps/s | 131.6257 KOps/s | $\textbf{\color{#d91a1a}-6.01\\%}$ | | test_contiguous[memmap_tensor0] | 23.7750μs | 2.0072μs | 498.1981 KOps/s | 503.5431 KOps/s | $\color{#d91a1a}-1.06\\%$ | | test_stack[memmap_tensor0] | 84.3900μs | 5.9079μs | 169.2635 KOps/s | 173.2772 KOps/s | $\color{#d91a1a}-2.32\\%$ | | test_memmaptd_index | 1.2684ms | 0.4365ms | 2.2912 KOps/s | 2.5362 KOps/s | $\textbf{\color{#d91a1a}-9.66\\%}$ | | test_memmaptd_index_astensor | 1.0627ms | 0.5087ms | 1.9657 KOps/s | 2.0998 KOps/s | $\textbf{\color{#d91a1a}-6.39\\%}$ | | test_memmaptd_index_op | 1.7080ms | 1.1263ms | 887.8628 Ops/s | 1.0257 KOps/s | $\textbf{\color{#d91a1a}-13.44\\%}$ | | test_serialize_model | 0.1314s | 0.1208s | 8.2761 Ops/s | 7.5280 Ops/s | $\textbf{\color{#35bf28}+9.94\\%}$ | | test_serialize_model_pickle | 0.4592s | 0.3980s | 2.5128 Ops/s | 2.5007 Ops/s | $\color{#35bf28}+0.48\\%$ | | test_serialize_weights | 0.2207s | 0.1315s | 7.6021 Ops/s | 8.3756 Ops/s | $\textbf{\color{#d91a1a}-9.23\\%}$ | | test_serialize_weights_returnearly | 0.1791s | 0.1637s | 6.1091 Ops/s | 6.2011 Ops/s | $\color{#d91a1a}-1.48\\%$ | | test_serialize_weights_pickle | 1.0030s | 0.6803s | 1.4699 Ops/s | 2.4533 Ops/s | $\textbf{\color{#d91a1a}-40.09\\%}$ | | test_serialize_weights_filesystem | 0.1613s | 0.1479s | 6.7608 Ops/s | 6.4822 Ops/s | $\color{#35bf28}+4.30\\%$ | | test_serialize_model_filesystem | 0.2372s | 0.1588s | 6.2958 Ops/s | 6.5828 Ops/s | $\color{#d91a1a}-4.36\\%$ | | test_reshape_pytree | 92.8350μs | 39.7905μs | 25.1316 KOps/s | 24.7339 KOps/s | $\color{#35bf28}+1.61\\%$ | | test_reshape_td | 97.5050μs | 47.1173μs | 21.2236 KOps/s | 21.6660 KOps/s | $\color{#d91a1a}-2.04\\%$ | | test_view_pytree | 0.1233ms | 39.5978μs | 25.2539 KOps/s | 25.0795 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_view_td | 0.1463ms | 55.0624μs | 18.1612 KOps/s | 18.9609 KOps/s | $\color{#d91a1a}-4.22\\%$ | | test_unbind_pytree | 89.4290μs | 36.8217μs | 27.1579 KOps/s | 27.0009 KOps/s | $\color{#35bf28}+0.58\\%$ | | test_unbind_td | 0.4175ms | 46.4922μs | 21.5090 KOps/s | 22.3856 KOps/s | $\color{#d91a1a}-3.92\\%$ | | test_split_pytree | 89.5700μs | 39.5532μs | 25.2824 KOps/s | 24.9586 KOps/s | $\color{#35bf28}+1.30\\%$ | | test_split_td | 0.5413ms | 58.0639μs | 17.2224 KOps/s | 17.3872 KOps/s | $\color{#d91a1a}-0.95\\%$ | | test_add_pytree | 0.1092ms | 48.1655μs | 20.7617 KOps/s | 22.1472 KOps/s | $\textbf{\color{#d91a1a}-6.26\\%}$ | | test_add_td | 0.1791ms | 90.8077μs | 11.0123 KOps/s | 12.9851 KOps/s | $\textbf{\color{#d91a1a}-15.19\\%}$ | | test_compile_add_one_nested[tensordict-compile] | 0.1122ms | 53.9998μs | 18.5186 KOps/s | 19.1073 KOps/s | $\color{#d91a1a}-3.08\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.3277ms | 0.1922ms | 5.2028 KOps/s | 5.2874 KOps/s | $\color{#d91a1a}-1.60\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.3369ms | 56.4396μs | 17.7180 KOps/s | 18.5160 KOps/s | $\color{#d91a1a}-4.31\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.4197ms | 0.1493ms | 6.6993 KOps/s | 6.9673 KOps/s | $\color{#d91a1a}-3.85\\%$ | | test_compile_copy_nested[tensordict-compile] | 56.1470μs | 21.0799μs | 47.4386 KOps/s | 50.0497 KOps/s | $\textbf{\color{#d91a1a}-5.22\\%}$ | | test_compile_copy_nested[tensordict-eager] | 0.1171ms | 64.7406μs | 15.4463 KOps/s | 15.5979 KOps/s | $\color{#d91a1a}-0.97\\%$ | | test_compile_copy_nested[pytree-compile] | 0.1459ms | 78.1829μs | 12.7905 KOps/s | 12.4183 KOps/s | $\color{#35bf28}+3.00\\%$ | | test_compile_copy_nested[pytree-eager] | 0.2192ms | 72.4302μs | 13.8064 KOps/s | 13.6843 KOps/s | $\color{#35bf28}+0.89\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.4102ms | 0.1763ms | 5.6711 KOps/s | 5.7413 KOps/s | $\color{#d91a1a}-1.22\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.4812ms | 0.2025ms | 4.9393 KOps/s | 5.1831 KOps/s | $\color{#d91a1a}-4.70\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.1477ms | 40.0218μs | 24.9864 KOps/s | 26.8272 KOps/s | $\textbf{\color{#d91a1a}-6.86\\%}$ | | test_compile_add_one_flat[tensorclass-eager] | 1.0167ms | 71.7383μs | 13.9396 KOps/s | 14.1676 KOps/s | $\color{#d91a1a}-1.61\\%$ | | test_compile_add_one_flat[pytree-compile] | 0.3635ms | 0.1770ms | 5.6499 KOps/s | 5.6203 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.6808ms | 0.3081ms | 3.2459 KOps/s | 3.4556 KOps/s | $\textbf{\color{#d91a1a}-6.07\\%}$ | | test_compile_add_self_flat[tensordict-eager] | 0.3871ms | 0.2115ms | 4.7284 KOps/s | 4.8109 KOps/s | $\color{#d91a1a}-1.71\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.4280ms | 0.1897ms | 5.2703 KOps/s | 5.8088 KOps/s | $\textbf{\color{#d91a1a}-9.27\\%}$ | | test_compile_add_self_flat[tensorclass-eager] | 0.7902ms | 66.7378μs | 14.9840 KOps/s | 16.0252 KOps/s | $\textbf{\color{#d91a1a}-6.50\\%}$ | | test_compile_add_self_flat[tensorclass-compile] | 0.1106ms | 41.4181μs | 24.1441 KOps/s | 26.1935 KOps/s | $\textbf{\color{#d91a1a}-7.82\\%}$ | | test_compile_add_self_flat[pytree-eager] | 0.4904ms | 0.2498ms | 4.0025 KOps/s | 4.2010 KOps/s | $\color{#d91a1a}-4.73\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.3626ms | 0.1764ms | 5.6690 KOps/s | 5.7711 KOps/s | $\color{#d91a1a}-1.77\\%$ | | test_compile_copy_flat[tensordict-compile] | 0.2738ms | 0.1104ms | 9.0567 KOps/s | 9.3468 KOps/s | $\color{#d91a1a}-3.10\\%$ | | test_compile_copy_flat[tensordict-eager] | 0.1364ms | 56.3377μs | 17.7501 KOps/s | 17.9180 KOps/s | $\color{#d91a1a}-0.94\\%$ | | test_compile_copy_flat[pytree-compile] | 0.1715ms | 79.8160μs | 12.5288 KOps/s | 12.2053 KOps/s | $\color{#35bf28}+2.65\\%$ | | test_compile_copy_flat[pytree-eager] | 0.2128ms | 70.6493μs | 14.1544 KOps/s | 13.3505 KOps/s | $\textbf{\color{#35bf28}+6.02\\%}$ | | test_compile_assign_and_add[tensordict-compile] | 0.3386ms | 0.1967ms | 5.0831 KOps/s | 5.1263 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_compile_assign_and_add[tensordict-eager] | 2.3482ms | 1.6454ms | 607.7447 Ops/s | 616.8568 Ops/s | $\color{#d91a1a}-1.48\\%$ | | test_compile_assign_and_add[pytree-compile] | 0.3329ms | 0.1924ms | 5.1984 KOps/s | 5.2940 KOps/s | $\color{#d91a1a}-1.80\\%$ | | test_compile_assign_and_add[pytree-eager] | 1.3223ms | 1.1347ms | 881.2624 Ops/s | 933.9080 Ops/s | $\textbf{\color{#d91a1a}-5.64\\%}$ | | test_compile_assign_and_add_stack[compile] | 0.5785ms | 0.4172ms | 2.3968 KOps/s | 2.3883 KOps/s | $\color{#35bf28}+0.36\\%$ | | test_compile_assign_and_add_stack[eager] | 4.3190ms | 4.0829ms | 244.9254 Ops/s | 276.9924 Ops/s | $\textbf{\color{#d91a1a}-11.58\\%}$ | | test_compile_indexing[tensor-tensordict-compile] | 0.1072ms | 33.4364μs | 29.9075 KOps/s | 31.8772 KOps/s | $\textbf{\color{#d91a1a}-6.18\\%}$ | | test_compile_indexing[tensor-tensordict-eager] | 0.5640ms | 50.7816μs | 19.6922 KOps/s | 21.3549 KOps/s | $\textbf{\color{#d91a1a}-7.79\\%}$ | | test_compile_indexing[tensor-tensorclass-compile] | 89.1080μs | 29.4765μs | 33.9253 KOps/s | 34.6887 KOps/s | $\color{#d91a1a}-2.20\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 0.1230ms | 31.5537μs | 31.6920 KOps/s | 33.4702 KOps/s | $\textbf{\color{#d91a1a}-5.31\\%}$ | | test_compile_indexing[tensor-pytree-compile] | 0.1135ms | 29.0629μs | 34.4081 KOps/s | 35.3834 KOps/s | $\color{#d91a1a}-2.76\\%$ | | test_compile_indexing[tensor-pytree-eager] | 0.1061ms | 31.8526μs | 31.3946 KOps/s | 33.1936 KOps/s | $\textbf{\color{#d91a1a}-5.42\\%}$ | | test_compile_indexing[slice-tensordict-compile] | 0.1933ms | 72.2429μs | 13.8422 KOps/s | 13.8490 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.6170ms | 28.3552μs | 35.2669 KOps/s | 35.6702 KOps/s | $\color{#d91a1a}-1.13\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 0.1684ms | 67.5062μs | 14.8135 KOps/s | 14.8670 KOps/s | $\color{#d91a1a}-0.36\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 0.1097ms | 24.3701μs | 41.0339 KOps/s | 40.4937 KOps/s | $\color{#35bf28}+1.33\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.1488ms | 67.9957μs | 14.7068 KOps/s | 14.9440 KOps/s | $\color{#d91a1a}-1.59\\%$ | | test_compile_indexing[slice-pytree-eager] | 88.9480μs | 24.2001μs | 41.3221 KOps/s | 40.2114 KOps/s | $\color{#35bf28}+2.76\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.2005ms | 71.8403μs | 13.9198 KOps/s | 13.8622 KOps/s | $\color{#35bf28}+0.42\\%$ | | test_compile_indexing[int-tensordict-eager] | 1.2667ms | 28.0770μs | 35.6164 KOps/s | 35.8571 KOps/s | $\color{#d91a1a}-0.67\\%$ | | test_compile_indexing[int-tensorclass-compile] | 0.2066ms | 66.8900μs | 14.9499 KOps/s | 14.9869 KOps/s | $\color{#d91a1a}-0.25\\%$ | | test_compile_indexing[int-tensorclass-eager] | 0.1055ms | 24.0604μs | 41.5620 KOps/s | 41.2782 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_compile_indexing[int-pytree-compile] | 0.1596ms | 67.6820μs | 14.7750 KOps/s | 15.3047 KOps/s | $\color{#d91a1a}-3.46\\%$ | | test_compile_indexing[int-pytree-eager] | 0.1094ms | 23.9150μs | 41.8147 KOps/s | 40.7561 KOps/s | $\color{#35bf28}+2.60\\%$ | | test_mod_add[eager] | 87.0850μs | 26.4131μs | 37.8600 KOps/s | 45.9727 KOps/s | $\textbf{\color{#d91a1a}-17.65\\%}$ | | test_mod_add[compile] | 0.1048ms | 38.8045μs | 25.7702 KOps/s | 27.4710 KOps/s | $\textbf{\color{#d91a1a}-6.19\\%}$ | | test_mod_add[compile-overhead] | 95.2700μs | 37.7753μs | 26.4723 KOps/s | 27.0139 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_mod_wrap[eager] | 0.3620ms | 0.2199ms | 4.5469 KOps/s | 4.8675 KOps/s | $\textbf{\color{#d91a1a}-6.59\\%}$ | | test_mod_wrap[compile] | 2.1006ms | 0.2318ms | 4.3136 KOps/s | 4.3096 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_mod_wrap[compile-overhead] | 0.3142ms | 0.2274ms | 4.3968 KOps/s | 4.3982 KOps/s | $\color{#d91a1a}-0.03\\%$ | | test_mod_wrap_and_backward[eager] | 12.5184ms | 11.4225ms | 87.5463 Ops/s | 87.6852 Ops/s | $\color{#d91a1a}-0.16\\%$ | | test_mod_wrap_and_backward[compile] | 12.4183ms | 11.5443ms | 86.6228 Ops/s | 85.8275 Ops/s | $\color{#35bf28}+0.93\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 12.6900ms | 11.5616ms | 86.4933 Ops/s | 85.4627 Ops/s | $\color{#35bf28}+1.21\\%$ | | test_seq_add[eager] | 0.2080ms | 88.5404μs | 11.2943 KOps/s | 12.0889 KOps/s | $\textbf{\color{#d91a1a}-6.57\\%}$ | | test_seq_add[compile] | 0.1663ms | 60.8163μs | 16.4430 KOps/s | 15.9680 KOps/s | $\color{#35bf28}+2.97\\%$ | | test_seq_add[compile-overhead] | 0.2156ms | 59.9453μs | 16.6819 KOps/s | 16.4822 KOps/s | $\color{#35bf28}+1.21\\%$ | | test_seq_wrap[eager] | 0.7665ms | 0.3868ms | 2.5856 KOps/s | 2.6982 KOps/s | $\color{#d91a1a}-4.17\\%$ | | test_seq_wrap[compile] | 0.4132ms | 0.2621ms | 3.8150 KOps/s | 3.7383 KOps/s | $\color{#35bf28}+2.05\\%$ | | test_seq_wrap[compile-overhead] | 0.4988ms | 0.2646ms | 3.7795 KOps/s | 3.8199 KOps/s | $\color{#d91a1a}-1.06\\%$ | | test_func_call_runtime[False-eager] | 0.9404ms | 0.5507ms | 1.8158 KOps/s | 1.8723 KOps/s | $\color{#d91a1a}-3.02\\%$ | | test_func_call_runtime[False-compile] | 0.6761ms | 0.5073ms | 1.9711 KOps/s | 1.9804 KOps/s | $\color{#d91a1a}-0.47\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.7987ms | 0.5020ms | 1.9922 KOps/s | 1.9904 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_func_call_runtime[True-eager] | 1.1500ms | 0.7786ms | 1.2844 KOps/s | 1.2968 KOps/s | $\color{#d91a1a}-0.95\\%$ | | test_func_call_runtime[True-compile] | 0.7644ms | 0.5114ms | 1.9556 KOps/s | 1.9333 KOps/s | $\color{#35bf28}+1.15\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.6395ms | 0.5165ms | 1.9362 KOps/s | 1.9288 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_func_call_cm_runtime[False-eager] | 0.8792ms | 0.5438ms | 1.8390 KOps/s | 1.8859 KOps/s | $\color{#d91a1a}-2.49\\%$ | | test_func_call_cm_runtime[False-compile] | 0.6690ms | 0.5073ms | 1.9713 KOps/s | 1.9615 KOps/s | $\color{#35bf28}+0.50\\%$ | | test_func_call_cm_runtime[False-compile-overhead] | 0.6577ms | 0.5009ms | 1.9963 KOps/s | 1.9646 KOps/s | $\color{#35bf28}+1.61\\%$ | | test_func_call_cm_runtime[True-eager] | 1.1356ms | 0.8967ms | 1.1152 KOps/s | 1.0982 KOps/s | $\color{#35bf28}+1.54\\%$ | | test_func_call_cm_runtime[True-compile] | 1.3113ms | 0.8535ms | 1.1717 KOps/s | 1.1532 KOps/s | $\color{#35bf28}+1.60\\%$ | | test_func_call_cm_runtime[True-compile-overhead] | 0.9694ms | 0.8514ms | 1.1745 KOps/s | 1.1765 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_distributed | 0.3232ms | 0.1346ms | 7.4296 KOps/s | 7.3278 KOps/s | $\color{#35bf28}+1.39\\%$ | | test_tdmodule | 32.4910μs | 17.8660μs | 55.9723 KOps/s | 63.7847 KOps/s | $\textbf{\color{#d91a1a}-12.25\\%}$ | | test_tdmodule_dispatch | 61.7870μs | 37.1401μs | 26.9251 KOps/s | 30.9494 KOps/s | $\textbf{\color{#d91a1a}-13.00\\%}$ | | test_tdseq | 63.4090μs | 20.0742μs | 49.8151 KOps/s | 55.8149 KOps/s | $\textbf{\color{#d91a1a}-10.75\\%}$ | | test_tdseq_dispatch | 82.0950μs | 41.9542μs | 23.8355 KOps/s | 26.4087 KOps/s | $\textbf{\color{#d91a1a}-9.74\\%}$ | | test_instantiation_functorch | 1.8603ms | 1.6648ms | 600.6873 Ops/s | 608.3058 Ops/s | $\color{#d91a1a}-1.25\\%$ | | test_instantiation_td | 2.7772ms | 1.2291ms | 813.5962 Ops/s | 818.6569 Ops/s | $\color{#d91a1a}-0.62\\%$ | | test_exec_functorch | 0.3242ms | 0.1834ms | 5.4516 KOps/s | 5.3850 KOps/s | $\color{#35bf28}+1.24\\%$ | | test_exec_functional_call | 0.3429ms | 0.1751ms | 5.7106 KOps/s | 5.8430 KOps/s | $\color{#d91a1a}-2.27\\%$ | | test_exec_td | 0.3899ms | 0.1768ms | 5.6548 KOps/s | 5.7618 KOps/s | $\color{#d91a1a}-1.86\\%$ | | test_exec_td_decorator | 0.8007ms | 0.2306ms | 4.3365 KOps/s | 4.3660 KOps/s | $\color{#d91a1a}-0.67\\%$ | | test_vmap_mlp_speed[True-True] | 1.0213ms | 0.5949ms | 1.6809 KOps/s | 1.6968 KOps/s | $\color{#d91a1a}-0.94\\%$ | | test_vmap_mlp_speed[True-False] | 1.0044ms | 0.5890ms | 1.6978 KOps/s | 1.6972 KOps/s | $\color{#35bf28}+0.04\\%$ | | test_vmap_mlp_speed[False-True] | 0.7671ms | 0.4890ms | 2.0450 KOps/s | 2.0542 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_vmap_mlp_speed[False-False] | 0.7122ms | 0.4851ms | 2.0614 KOps/s | 2.0478 KOps/s | $\color{#35bf28}+0.66\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.7532ms | 0.6456ms | 1.5490 KOps/s | 1.5663 KOps/s | $\color{#d91a1a}-1.11\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.0406ms | 0.6692ms | 1.4943 KOps/s | 1.5650 KOps/s | $\color{#d91a1a}-4.51\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8137ms | 0.5398ms | 1.8525 KOps/s | 1.8847 KOps/s | $\color{#d91a1a}-1.71\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.8889ms | 0.5424ms | 1.8438 KOps/s | 1.8794 KOps/s | $\color{#d91a1a}-1.90\\%$ | | test_to_module_speed[True] | 2.1308ms | 1.3216ms | 756.6441 Ops/s | 747.9613 Ops/s | $\color{#35bf28}+1.16\\%$ | | test_to_module_speed[False] | 2.0339ms | 1.2910ms | 774.5968 Ops/s | 767.3302 Ops/s | $\color{#35bf28}+0.95\\%$ | | test_tc_init | 90.7420μs | 44.4902μs | 22.4768 KOps/s | 22.9718 KOps/s | $\color{#d91a1a}-2.15\\%$ | | test_tc_init_nested | 0.1733ms | 88.4652μs | 11.3039 KOps/s | 11.4527 KOps/s | $\color{#d91a1a}-1.30\\%$ | | test_tc_first_layer_tensor | 17.7340μs | 1.4654μs | 682.3860 KOps/s | 698.1459 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_tc_first_layer_nontensor | 43.1620μs | 4.3084μs | 232.1046 KOps/s | 231.5244 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_tc_second_layer_tensor | 38.2320μs | 2.7650μs | 361.6639 KOps/s | 364.2803 KOps/s | $\color{#d91a1a}-0.72\\%$ | | test_tc_second_layer_nontensor | 34.6350μs | 5.4953μs | 181.9730 KOps/s | 180.0619 KOps/s | $\color{#35bf28}+1.06\\%$ | | test_unbind | 0.4970s | 14.4060ms | 69.4154 Ops/s | 65.4917 Ops/s | $\textbf{\color{#35bf28}+5.99\\%}$ | | test_full_like | 11.1846ms | 7.7238ms | 129.4695 Ops/s | 118.6229 Ops/s | $\textbf{\color{#35bf28}+9.14\\%}$ | | test_zeros_like | 13.5511ms | 7.8413ms | 127.5306 Ops/s | 136.2599 Ops/s | $\textbf{\color{#d91a1a}-6.41\\%}$ | | test_ones_like | 17.0217ms | 8.3426ms | 119.8666 Ops/s | 126.4310 Ops/s | $\textbf{\color{#d91a1a}-5.19\\%}$ | | test_clone | 18.4379ms | 10.2449ms | 97.6096 Ops/s | 102.2218 Ops/s | $\color{#d91a1a}-4.51\\%$ | | test_squeeze | 64.0710μs | 12.9702μs | 77.0999 KOps/s | 78.1143 KOps/s | $\color{#d91a1a}-1.30\\%$ | | test_unsqueeze | 0.1687ms | 92.9018μs | 10.7641 KOps/s | 10.6573 KOps/s | $\color{#35bf28}+1.00\\%$ | | test_split | 0.5007ms | 0.2000ms | 5.0008 KOps/s | 5.0157 KOps/s | $\color{#d91a1a}-0.30\\%$ | | test_permute | 0.3077ms | 0.2200ms | 4.5462 KOps/s | 4.5033 KOps/s | $\color{#35bf28}+0.95\\%$ | | test_stack | 29.4839ms | 26.6664ms | 37.5003 Ops/s | 37.4617 Ops/s | $\color{#35bf28}+0.10\\%$ | | test_cat | 30.9331ms | 27.1873ms | 36.7818 Ops/s | 36.2981 Ops/s | $\color{#35bf28}+1.33\\%$ |
github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 225. Improved: $\large\color{#35bf28}5$. Worsened: $\large\color{#d91a1a}44$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 48.4530μs | 16.2803μs | 61.4239 KOps/s | 64.1206 KOps/s | $\color{#d91a1a}-4.21\\%$ | | test_plain_set_stack_nested | 34.6820μs | 16.4805μs | 60.6777 KOps/s | 64.6102 KOps/s | $\textbf{\color{#d91a1a}-6.09\\%}$ | | test_plain_set_nested_inplace | 37.5230μs | 17.3316μs | 57.6980 KOps/s | 60.2723 KOps/s | $\color{#d91a1a}-4.27\\%$ | | test_plain_set_stack_nested_inplace | 0.1158ms | 17.9927μs | 55.5782 KOps/s | 60.7878 KOps/s | $\textbf{\color{#d91a1a}-8.57\\%}$ | | test_items | 0.1722ms | 4.6916μs | 213.1460 KOps/s | 212.6814 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_items_nested | 0.4663ms | 0.3582ms | 2.7920 KOps/s | 2.7174 KOps/s | $\color{#35bf28}+2.74\\%$ | | test_items_nested_locked | 0.4057ms | 0.3658ms | 2.7337 KOps/s | 2.7265 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_items_nested_leaf | 0.1078ms | 83.6619μs | 11.9529 KOps/s | 11.9494 KOps/s | $\color{#35bf28}+0.03\\%$ | | test_items_stack_nested | 0.5193ms | 0.3672ms | 2.7236 KOps/s | 2.6795 KOps/s | $\color{#35bf28}+1.65\\%$ | | test_items_stack_nested_leaf | 0.2767ms | 85.4622μs | 11.7011 KOps/s | 11.8020 KOps/s | $\color{#d91a1a}-0.86\\%$ | | test_items_stack_nested_locked | 0.5767ms | 0.3668ms | 2.7264 KOps/s | 2.6887 KOps/s | $\color{#35bf28}+1.40\\%$ | | test_keys | 0.2031ms | 4.3985μs | 227.3517 KOps/s | 227.7193 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_keys_nested | 0.2467ms | 66.4777μs | 15.0426 KOps/s | 15.2925 KOps/s | $\color{#d91a1a}-1.63\\%$ | | test_keys_nested_locked | 2.3197ms | 72.7801μs | 13.7400 KOps/s | 13.7835 KOps/s | $\color{#d91a1a}-0.32\\%$ | | test_keys_nested_leaf | 84.1160μs | 55.9426μs | 17.8755 KOps/s | 17.3934 KOps/s | $\color{#35bf28}+2.77\\%$ | | test_keys_stack_nested | 98.5460μs | 67.1097μs | 14.9010 KOps/s | 15.2011 KOps/s | $\color{#d91a1a}-1.97\\%$ | | test_keys_stack_nested_leaf | 80.1450μs | 56.2817μs | 17.7678 KOps/s | 17.7384 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_keys_stack_nested_locked | 87.8660μs | 72.0474μs | 13.8798 KOps/s | 14.0172 KOps/s | $\color{#d91a1a}-0.98\\%$ | | test_values | 6.4603μs | 1.7624μs | 567.3924 KOps/s | 567.6842 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_values_nested | 51.5040μs | 33.8588μs | 29.5344 KOps/s | 29.7581 KOps/s | $\color{#d91a1a}-0.75\\%$ | | test_values_nested_locked | 0.1099ms | 35.6231μs | 28.0717 KOps/s | 28.1957 KOps/s | $\color{#d91a1a}-0.44\\%$ | | test_values_nested_leaf | 45.3120μs | 30.0808μs | 33.2437 KOps/s | 33.1172 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_values_stack_nested | 65.1640μs | 34.4598μs | 29.0193 KOps/s | 29.4040 KOps/s | $\color{#d91a1a}-1.31\\%$ | | test_values_stack_nested_leaf | 47.2530μs | 30.8532μs | 32.4116 KOps/s | 33.0330 KOps/s | $\color{#d91a1a}-1.88\\%$ | | test_values_stack_nested_locked | 0.1725ms | 36.5679μs | 27.3464 KOps/s | 27.9056 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_membership | 1.5796μs | 0.5524μs | 1.8102 MOps/s | 1.8529 MOps/s | $\color{#d91a1a}-2.30\\%$ | | test_membership_nested | 0.1935ms | 1.9833μs | 504.1981 KOps/s | 522.2639 KOps/s | $\color{#d91a1a}-3.46\\%$ | | test_membership_nested_leaf | 83.2450μs | 1.8926μs | 528.3853 KOps/s | 517.8198 KOps/s | $\color{#35bf28}+2.04\\%$ | | test_membership_stacked_nested | 14.6510μs | 1.9454μs | 514.0455 KOps/s | 503.0295 KOps/s | $\color{#35bf28}+2.19\\%$ | | test_membership_stacked_nested_leaf | 20.7710μs | 1.9603μs | 510.1319 KOps/s | 498.6231 KOps/s | $\color{#35bf28}+2.31\\%$ | | test_membership_nested_last | 0.1991ms | 2.9269μs | 341.6557 KOps/s | 339.8407 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_membership_nested_leaf_last | 21.3110μs | 2.9112μs | 343.5018 KOps/s | 335.9681 KOps/s | $\color{#35bf28}+2.24\\%$ | | test_membership_stacked_nested_last | 19.0710μs | 2.9248μs | 341.8988 KOps/s | 299.4703 KOps/s | $\textbf{\color{#35bf28}+14.17\\%}$ | | test_membership_stacked_nested_leaf_last | 26.1610μs | 2.8876μs | 346.3140 KOps/s | 296.4750 KOps/s | $\textbf{\color{#35bf28}+16.81\\%}$ | | test_nested_getleaf | 60.0330μs | 7.9312μs | 126.0845 KOps/s | 125.5384 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_nested_get | 24.5010μs | 7.4356μs | 134.4877 KOps/s | 134.1835 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_stacked_getleaf | 23.6310μs | 7.9699μs | 125.4718 KOps/s | 124.9371 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_stacked_get | 25.2520μs | 7.4881μs | 133.5452 KOps/s | 134.5119 KOps/s | $\color{#d91a1a}-0.72\\%$ | | test_nested_getitemleaf | 0.1151ms | 8.0532μs | 124.1738 KOps/s | 122.8955 KOps/s | $\color{#35bf28}+1.04\\%$ | | test_nested_getitem | 28.8720μs | 7.5721μs | 132.0639 KOps/s | 130.7431 KOps/s | $\color{#35bf28}+1.01\\%$ | | test_stacked_getitemleaf | 69.1050μs | 8.0784μs | 123.7874 KOps/s | 123.6032 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_stacked_getitem | 23.4110μs | 7.5968μs | 131.6342 KOps/s | 131.5297 KOps/s | $\color{#35bf28}+0.08\\%$ | | test_lock_nested | 7.9240ms | 0.4741ms | 2.1095 KOps/s | 2.1552 KOps/s | $\color{#d91a1a}-2.12\\%$ | | test_lock_stack_nested | 0.6069ms | 0.4353ms | 2.2975 KOps/s | 2.3297 KOps/s | $\color{#d91a1a}-1.38\\%$ | | test_unlock_nested | 0.8731ms | 0.3860ms | 2.5907 KOps/s | 2.5842 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_unlock_stack_nested | 0.4997ms | 0.3549ms | 2.8176 KOps/s | 2.8534 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_flatten_speed | 0.4519ms | 0.1042ms | 9.5945 KOps/s | 9.6087 KOps/s | $\color{#d91a1a}-0.15\\%$ | | test_unflatten_speed | 0.3133ms | 0.2838ms | 3.5235 KOps/s | 3.5361 KOps/s | $\color{#d91a1a}-0.36\\%$ | | test_common_ops | 1.6552ms | 1.3288ms | 752.5428 Ops/s | 827.8052 Ops/s | $\textbf{\color{#d91a1a}-9.09\\%}$ | | test_creation | 23.5920μs | 1.6252μs | 615.2921 KOps/s | 617.7642 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_creation_empty | 36.7020μs | 15.9603μs | 62.6556 KOps/s | 69.9136 KOps/s | $\textbf{\color{#d91a1a}-10.38\\%}$ | | test_creation_nested_1 | 44.9020μs | 17.8330μs | 56.0757 KOps/s | 61.4562 KOps/s | $\textbf{\color{#d91a1a}-8.75\\%}$ | | test_creation_nested_2 | 78.2150μs | 20.7466μs | 48.2006 KOps/s | 54.1295 KOps/s | $\textbf{\color{#d91a1a}-10.95\\%}$ | | test_clone | 0.2023ms | 31.3598μs | 31.8880 KOps/s | 35.0012 KOps/s | $\textbf{\color{#d91a1a}-8.89\\%}$ | | test_getitem[int] | 1.0404ms | 16.8719μs | 59.2702 KOps/s | 59.7138 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_getitem[slice_int] | 0.1442ms | 29.4335μs | 33.9749 KOps/s | 35.6834 KOps/s | $\color{#d91a1a}-4.79\\%$ | | test_getitem[range] | 0.3466ms | 0.1144ms | 8.7414 KOps/s | 9.3287 KOps/s | $\textbf{\color{#d91a1a}-6.30\\%}$ | | test_getitem[tuple] | 0.1575ms | 26.3090μs | 38.0098 KOps/s | 40.5047 KOps/s | $\textbf{\color{#d91a1a}-6.16\\%}$ | | test_getitem[list] | 0.2715ms | 0.1098ms | 9.1077 KOps/s | 10.1135 KOps/s | $\textbf{\color{#d91a1a}-9.95\\%}$ | | test_setitem_dim[int] | 0.2045ms | 56.7587μs | 17.6184 KOps/s | 20.5194 KOps/s | $\textbf{\color{#d91a1a}-14.14\\%}$ | | test_setitem_dim[slice_int] | 0.2233ms | 80.6052μs | 12.4061 KOps/s | 14.0757 KOps/s | $\textbf{\color{#d91a1a}-11.86\\%}$ | | test_setitem_dim[range] | 0.1781ms | 0.1463ms | 6.8372 KOps/s | 7.6567 KOps/s | $\textbf{\color{#d91a1a}-10.70\\%}$ | | test_setitem_dim[tuple] | 0.2116ms | 75.0843μs | 13.3184 KOps/s | 15.4224 KOps/s | $\textbf{\color{#d91a1a}-13.64\\%}$ | | test_setitem | 0.2235ms | 46.3503μs | 21.5748 KOps/s | 25.3522 KOps/s | $\textbf{\color{#d91a1a}-14.90\\%}$ | | test_set | 0.2361ms | 44.4718μs | 22.4862 KOps/s | 25.8540 KOps/s | $\textbf{\color{#d91a1a}-13.03\\%}$ | | test_set_shared | 95.3700ms | 63.6242μs | 15.7173 KOps/s | 19.0349 KOps/s | $\textbf{\color{#d91a1a}-17.43\\%}$ | | test_update | 0.2381ms | 51.5369μs | 19.4036 KOps/s | 20.6146 KOps/s | $\textbf{\color{#d91a1a}-5.87\\%}$ | | test_update_nested | 0.2653ms | 61.1404μs | 16.3558 KOps/s | 17.3425 KOps/s | $\textbf{\color{#d91a1a}-5.69\\%}$ | | test_update__nested | 0.2786ms | 65.3079μs | 15.3121 KOps/s | 16.4018 KOps/s | $\textbf{\color{#d91a1a}-6.64\\%}$ | | test_set_nested | 0.1960ms | 48.3602μs | 20.6782 KOps/s | 23.8247 KOps/s | $\textbf{\color{#d91a1a}-13.21\\%}$ | | test_set_nested_new | 0.1965ms | 47.9446μs | 20.8574 KOps/s | 21.4374 KOps/s | $\color{#d91a1a}-2.71\\%$ | | test_select | 0.2178ms | 65.8269μs | 15.1913 KOps/s | 16.1706 KOps/s | $\textbf{\color{#d91a1a}-6.06\\%}$ | | test_select_nested | 0.5405ms | 51.7549μs | 19.3218 KOps/s | 19.4308 KOps/s | $\color{#d91a1a}-0.56\\%$ | | test_exclude_nested | 0.1569ms | 68.1645μs | 14.6704 KOps/s | 14.4824 KOps/s | $\color{#35bf28}+1.30\\%$ | | test_empty[True] | 0.3568ms | 0.2781ms | 3.5964 KOps/s | 3.5411 KOps/s | $\color{#35bf28}+1.56\\%$ | | test_empty[False] | 2.3931μs | 0.8537μs | 1.1714 MOps/s | 1.1334 MOps/s | $\color{#35bf28}+3.35\\%$ | | test_to | 0.1461ms | 39.5288μs | 25.2980 KOps/s | 25.5897 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_to_nonblocking | 0.1154ms | 26.1888μs | 38.1843 KOps/s | 38.8523 KOps/s | $\color{#d91a1a}-1.72\\%$ | | test_unbind_speed | 1.3572ms | 0.3004ms | 3.3293 KOps/s | 3.3755 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_unbind_speed_stack0 | 0.3745ms | 0.2967ms | 3.3704 KOps/s | 3.3645 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_unbind_speed_stack1 | 92.9723ms | 0.7745ms | 1.2912 KOps/s | 1.2941 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_split | 95.7410ms | 2.3161ms | 431.7591 Ops/s | 437.1914 Ops/s | $\color{#d91a1a}-1.24\\%$ | | test_chunk | 97.8474ms | 2.3286ms | 429.4372 Ops/s | 437.7341 Ops/s | $\color{#d91a1a}-1.90\\%$ | | test_creation[device0] | 0.2732ms | 0.1043ms | 9.5898 KOps/s | 9.5841 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_creation_from_tensor | 0.3084ms | 0.1074ms | 9.3126 KOps/s | 9.9162 KOps/s | $\textbf{\color{#d91a1a}-6.09\\%}$ | | test_add_one[memmap_tensor0] | 0.1476ms | 9.0277μs | 110.7703 KOps/s | 119.5423 KOps/s | $\textbf{\color{#d91a1a}-7.34\\%}$ | | test_contiguous[memmap_tensor0] | 26.2920μs | 2.1885μs | 456.9407 KOps/s | 461.7576 KOps/s | $\color{#d91a1a}-1.04\\%$ | | test_stack[memmap_tensor0] | 0.1923ms | 6.8728μs | 145.5007 KOps/s | 151.3316 KOps/s | $\color{#d91a1a}-3.85\\%$ | | test_memmaptd_index | 1.1817ms | 0.4399ms | 2.2731 KOps/s | 2.3483 KOps/s | $\color{#d91a1a}-3.20\\%$ | | test_memmaptd_index_astensor | 0.7869ms | 0.5024ms | 1.9905 KOps/s | 2.0390 KOps/s | $\color{#d91a1a}-2.38\\%$ | | test_memmaptd_index_op | 1.4907ms | 1.0427ms | 959.0343 Ops/s | 1.0195 KOps/s | $\textbf{\color{#d91a1a}-5.93\\%}$ | | test_serialize_model | 94.3539ms | 90.3705ms | 11.0656 Ops/s | 10.6259 Ops/s | $\color{#35bf28}+4.14\\%$ | | test_serialize_model_pickle | 1.3691s | 1.2390s | 0.8071 Ops/s | 0.8081 Ops/s | $\color{#d91a1a}-0.13\\%$ | | test_serialize_weights | 0.1874s | 98.2765ms | 10.1754 Ops/s | 10.9051 Ops/s | $\textbf{\color{#d91a1a}-6.69\\%}$ | | test_serialize_weights_returnearly | 0.2784s | 67.1940ms | 14.8823 Ops/s | 15.3949 Ops/s | $\color{#d91a1a}-3.33\\%$ | | test_serialize_weights_pickle | 1.3554s | 1.2373s | 0.8082 Ops/s | 0.8071 Ops/s | $\color{#35bf28}+0.13\\%$ | | test_reshape_pytree | 0.1751ms | 38.2053μs | 26.1744 KOps/s | 27.5125 KOps/s | $\color{#d91a1a}-4.86\\%$ | | test_reshape_td | 0.1785ms | 45.1324μs | 22.1570 KOps/s | 24.9055 KOps/s | $\textbf{\color{#d91a1a}-11.04\\%}$ | | test_view_pytree | 0.1258ms | 37.8464μs | 26.4226 KOps/s | 27.6477 KOps/s | $\color{#d91a1a}-4.43\\%$ | | test_view_td | 0.1849ms | 49.4465μs | 20.2239 KOps/s | 21.4157 KOps/s | $\textbf{\color{#d91a1a}-5.57\\%}$ | | test_unbind_pytree | 0.1130ms | 36.2998μs | 27.5483 KOps/s | 28.3458 KOps/s | $\color{#d91a1a}-2.81\\%$ | | test_unbind_td | 0.4063ms | 45.4824μs | 21.9865 KOps/s | 22.8317 KOps/s | $\color{#d91a1a}-3.70\\%$ | | test_split_pytree | 0.2195ms | 55.9341μs | 17.8782 KOps/s | 20.8348 KOps/s | $\textbf{\color{#d91a1a}-14.19\\%}$ | | test_split_td | 0.6338ms | 59.9769μs | 16.6731 KOps/s | 15.0120 KOps/s | $\textbf{\color{#35bf28}+11.07\\%}$ | | test_add_pytree | 0.2134ms | 60.0721μs | 16.6467 KOps/s | 16.7297 KOps/s | $\color{#d91a1a}-0.50\\%$ | | test_add_td | 0.2610ms | 95.3955μs | 10.4827 KOps/s | 11.9317 KOps/s | $\textbf{\color{#d91a1a}-12.14\\%}$ | | test_compile_add_one_nested[tensordict-compile] | 0.4160ms | 0.2118ms | 4.7220 KOps/s | 4.7014 KOps/s | $\color{#35bf28}+0.44\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.3549ms | 0.1716ms | 5.8260 KOps/s | 5.7806 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.3114ms | 0.1472ms | 6.7927 KOps/s | 6.6642 KOps/s | $\color{#35bf28}+1.93\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.3755ms | 0.1927ms | 5.1895 KOps/s | 5.2144 KOps/s | $\color{#d91a1a}-0.48\\%$ | | test_compile_copy_nested[tensordict-compile] | 0.1547ms | 21.0872μs | 47.4220 KOps/s | 48.0070 KOps/s | $\color{#d91a1a}-1.22\\%$ | | test_compile_copy_nested[tensordict-eager] | 0.1472ms | 46.4915μs | 21.5093 KOps/s | 21.5072 KOps/s | $+0.01\\%$ | | test_compile_copy_nested[pytree-compile] | 0.1001ms | 71.8414μs | 13.9195 KOps/s | 13.9900 KOps/s | $\color{#d91a1a}-0.50\\%$ | | test_compile_copy_nested[pytree-eager] | 79.2150μs | 58.7237μs | 17.0289 KOps/s | 16.6208 KOps/s | $\color{#35bf28}+2.46\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.4567ms | 0.3271ms | 3.0570 KOps/s | 3.0571 KOps/s | $-0.00\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.3791ms | 0.2234ms | 4.4761 KOps/s | 4.5555 KOps/s | $\color{#d91a1a}-1.74\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.2805ms | 0.1304ms | 7.6675 KOps/s | 7.6986 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_compile_add_one_flat[tensorclass-eager] | 0.2222ms | 64.5829μs | 15.4840 KOps/s | 16.3331 KOps/s | $\textbf{\color{#d91a1a}-5.20\\%}$ | | test_compile_add_one_flat[pytree-compile] | 0.4835ms | 0.3289ms | 3.0409 KOps/s | 3.0607 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.8083ms | 0.6412ms | 1.5595 KOps/s | 1.6260 KOps/s | $\color{#d91a1a}-4.08\\%$ | | test_compile_add_self_flat[tensordict-eager] | 0.4752ms | 0.2724ms | 3.6707 KOps/s | 3.7387 KOps/s | $\color{#d91a1a}-1.82\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.4389ms | 0.3275ms | 3.0532 KOps/s | 3.0546 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.2109ms | 73.6973μs | 13.5690 KOps/s | 14.0145 KOps/s | $\color{#d91a1a}-3.18\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.2863ms | 0.1313ms | 7.6160 KOps/s | 7.6120 KOps/s | $\color{#35bf28}+0.05\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.7368ms | 0.5503ms | 1.8172 KOps/s | 1.9052 KOps/s | $\color{#d91a1a}-4.62\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.4698ms | 0.3279ms | 3.0499 KOps/s | 3.0453 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_compile_copy_flat[tensordict-compile] | 0.1520ms | 17.6025μs | 56.8102 KOps/s | 57.6115 KOps/s | $\color{#d91a1a}-1.39\\%$ | | test_compile_copy_flat[tensordict-eager] | 54.2740μs | 31.9921μs | 31.2577 KOps/s | 31.2465 KOps/s | $\color{#35bf28}+0.04\\%$ | | test_compile_copy_flat[pytree-compile] | 0.2191ms | 74.7479μs | 13.3783 KOps/s | 13.2524 KOps/s | $\color{#35bf28}+0.95\\%$ | | test_compile_copy_flat[pytree-eager] | 86.9160μs | 59.6784μs | 16.7565 KOps/s | 16.5270 KOps/s | $\color{#35bf28}+1.39\\%$ | | test_compile_assign_and_add[tensordict-compile] | 2.5244ms | 0.9258ms | 1.0801 KOps/s | 1.0810 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_compile_assign_and_add[tensordict-eager] | 3.6174ms | 3.2937ms | 303.6082 Ops/s | 307.4196 Ops/s | $\color{#d91a1a}-1.24\\%$ | | test_compile_assign_and_add[pytree-compile] | 2.4746ms | 0.9098ms | 1.0992 KOps/s | 1.0966 KOps/s | $\color{#35bf28}+0.24\\%$ | | test_compile_assign_and_add[pytree-eager] | 3.6422ms | 3.3658ms | 297.1044 Ops/s | 309.0696 Ops/s | $\color{#d91a1a}-3.87\\%$ | | test_compile_indexing[tensor-tensordict-compile] | 0.3055ms | 0.1119ms | 8.9337 KOps/s | 9.1362 KOps/s | $\color{#d91a1a}-2.22\\%$ | | test_compile_indexing[tensor-tensordict-eager] | 0.3123ms | 63.3452μs | 15.7865 KOps/s | 16.8852 KOps/s | $\textbf{\color{#d91a1a}-6.51\\%}$ | | test_compile_indexing[tensor-tensorclass-compile] | 0.2881ms | 0.1060ms | 9.4312 KOps/s | 9.6344 KOps/s | $\color{#d91a1a}-2.11\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 0.2405ms | 46.9367μs | 21.3053 KOps/s | 23.1405 KOps/s | $\textbf{\color{#d91a1a}-7.93\\%}$ | | test_compile_indexing[tensor-pytree-compile] | 0.2767ms | 0.1058ms | 9.4561 KOps/s | 9.7077 KOps/s | $\color{#d91a1a}-2.59\\%$ | | test_compile_indexing[tensor-pytree-eager] | 0.1985ms | 47.4686μs | 21.0665 KOps/s | 23.2222 KOps/s | $\textbf{\color{#d91a1a}-9.28\\%}$ | | test_compile_indexing[slice-tensordict-compile] | 0.2930ms | 0.1410ms | 7.0924 KOps/s | 7.2591 KOps/s | $\color{#d91a1a}-2.30\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.1954ms | 25.9286μs | 38.5674 KOps/s | 40.7772 KOps/s | $\textbf{\color{#d91a1a}-5.42\\%}$ | | test_compile_indexing[slice-tensorclass-compile] | 0.2815ms | 0.1334ms | 7.4952 KOps/s | 7.6635 KOps/s | $\color{#d91a1a}-2.20\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 0.2200ms | 26.3013μs | 38.0209 KOps/s | 45.9603 KOps/s | $\textbf{\color{#d91a1a}-17.27\\%}$ | | test_compile_indexing[slice-pytree-compile] | 0.3066ms | 0.1327ms | 7.5378 KOps/s | 7.6768 KOps/s | $\color{#d91a1a}-1.81\\%$ | | test_compile_indexing[slice-pytree-eager] | 61.7140μs | 22.8826μs | 43.7014 KOps/s | 45.9494 KOps/s | $\color{#d91a1a}-4.89\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.2889ms | 0.1402ms | 7.1330 KOps/s | 7.3024 KOps/s | $\color{#d91a1a}-2.32\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.4980ms | 26.1101μs | 38.2993 KOps/s | 41.0320 KOps/s | $\textbf{\color{#d91a1a}-6.66\\%}$ | | test_compile_indexing[int-tensorclass-compile] | 0.2876ms | 0.1327ms | 7.5349 KOps/s | 7.6702 KOps/s | $\color{#d91a1a}-1.76\\%$ | | test_compile_indexing[int-tensorclass-eager] | 0.1344ms | 22.7527μs | 43.9509 KOps/s | 46.2371 KOps/s | $\color{#d91a1a}-4.94\\%$ | | test_compile_indexing[int-pytree-compile] | 0.3066ms | 0.1330ms | 7.5191 KOps/s | 7.6909 KOps/s | $\color{#d91a1a}-2.23\\%$ | | test_compile_indexing[int-pytree-eager] | 0.2098ms | 22.7048μs | 44.0436 KOps/s | 46.0832 KOps/s | $\color{#d91a1a}-4.43\\%$ | | test_mod_add[eager] | 0.2411ms | 37.6321μs | 26.5731 KOps/s | 29.3673 KOps/s | $\textbf{\color{#d91a1a}-9.51\\%}$ | | test_mod_add[compile] | 0.2248ms | 69.1503μs | 14.4612 KOps/s | 15.0015 KOps/s | $\color{#d91a1a}-3.60\\%$ | | test_mod_add[compile-overhead] | 0.2650ms | 0.1478ms | 6.7660 KOps/s | 6.3058 KOps/s | $\textbf{\color{#35bf28}+7.30\\%}$ | | test_mod_wrap[eager] | 0.4007ms | 0.2495ms | 4.0083 KOps/s | 4.1069 KOps/s | $\color{#d91a1a}-2.40\\%$ | | test_mod_wrap[compile] | 1.3662ms | 0.2911ms | 3.4350 KOps/s | 3.4394 KOps/s | $\color{#d91a1a}-0.13\\%$ | | test_mod_wrap[compile-overhead] | 8.0948ms | 4.3670ms | 228.9927 Ops/s | 226.6435 Ops/s | $\color{#35bf28}+1.04\\%$ | | test_mod_wrap_and_backward[eager] | 1.6292ms | 1.4265ms | 700.9933 Ops/s | 749.5573 Ops/s | $\textbf{\color{#d91a1a}-6.48\\%}$ | | test_mod_wrap_and_backward[compile] | 1.6256ms | 1.4475ms | 690.8435 Ops/s | 696.2106 Ops/s | $\color{#d91a1a}-0.77\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 1.5251ms | 1.0144ms | 985.8503 Ops/s | 1.0005 KOps/s | $\color{#d91a1a}-1.47\\%$ | | test_seq_add[eager] | 0.2586ms | 0.1088ms | 9.1920 KOps/s | 9.5507 KOps/s | $\color{#d91a1a}-3.76\\%$ | | test_seq_add[compile] | 0.2327ms | 86.2739μs | 11.5910 KOps/s | 11.6551 KOps/s | $\color{#d91a1a}-0.55\\%$ | | test_seq_add[compile-overhead] | 0.2719ms | 0.1227ms | 8.1531 KOps/s | 8.1711 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_seq_wrap[eager] | 0.5744ms | 0.4120ms | 2.4272 KOps/s | 2.4946 KOps/s | $\color{#d91a1a}-2.70\\%$ | | test_seq_wrap[compile] | 1.6686ms | 0.3237ms | 3.0896 KOps/s | 3.1477 KOps/s | $\color{#d91a1a}-1.84\\%$ | | test_seq_wrap[compile-overhead] | 0.3312s | 0.1531s | 6.5337 Ops/s | 6.5067 Ops/s | $\color{#35bf28}+0.41\\%$ | | test_func_call_runtime[False-eager] | 0.9387ms | 0.7438ms | 1.3444 KOps/s | 1.3917 KOps/s | $\color{#d91a1a}-3.40\\%$ | | test_func_call_runtime[False-compile] | 0.9697ms | 0.8003ms | 1.2495 KOps/s | 1.2659 KOps/s | $\color{#d91a1a}-1.29\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.5616ms | 0.3637ms | 2.7497 KOps/s | 2.7295 KOps/s | $\color{#35bf28}+0.74\\%$ | | test_func_call_runtime[True-eager] | 1.0750ms | 0.9198ms | 1.0872 KOps/s | 1.0623 KOps/s | $\color{#35bf28}+2.34\\%$ | | test_func_call_runtime[True-compile] | 1.0114ms | 0.8418ms | 1.1879 KOps/s | 1.1594 KOps/s | $\color{#35bf28}+2.46\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.5650ms | 0.4065ms | 2.4600 KOps/s | 2.4250 KOps/s | $\color{#35bf28}+1.44\\%$ | | test_func_call_cm_runtime[False-eager] | 0.9580ms | 0.7278ms | 1.3739 KOps/s | 1.3493 KOps/s | $\color{#35bf28}+1.83\\%$ | | test_func_call_cm_runtime[False-compile] | 0.9605ms | 0.8021ms | 1.2467 KOps/s | 1.2233 KOps/s | $\color{#35bf28}+1.91\\%$ | | test_func_call_cm_runtime[False-compile-overhead] | 0.5236ms | 0.3624ms | 2.7590 KOps/s | 2.7394 KOps/s | $\color{#35bf28}+0.72\\%$ | | test_func_call_cm_runtime[True-eager] | 1.1985ms | 1.0350ms | 966.1638 Ops/s | 946.6142 Ops/s | $\color{#35bf28}+2.07\\%$ | | test_func_call_cm_runtime[True-compile] | 1.2440ms | 0.9980ms | 1.0020 KOps/s | 985.3290 Ops/s | $\color{#35bf28}+1.70\\%$ | | test_func_call_cm_runtime[True-compile-overhead] | 1.1774ms | 1.0027ms | 997.2729 Ops/s | 970.7819 Ops/s | $\color{#35bf28}+2.73\\%$ | | test_distributed | 1.9564ms | 73.2947μs | 13.6436 KOps/s | 11.2077 KOps/s | $\textbf{\color{#35bf28}+21.73\\%}$ | | test_tdmodule | 28.9420μs | 14.7324μs | 67.8776 KOps/s | 69.9611 KOps/s | $\color{#d91a1a}-2.98\\%$ | | test_tdmodule_dispatch | 53.2040μs | 30.5797μs | 32.7015 KOps/s | 35.8527 KOps/s | $\textbf{\color{#d91a1a}-8.79\\%}$ | | test_tdseq | 32.2520μs | 15.7082μs | 63.6611 KOps/s | 67.6043 KOps/s | $\textbf{\color{#d91a1a}-5.83\\%}$ | | test_tdseq_dispatch | 54.6940μs | 33.0001μs | 30.3029 KOps/s | 32.9123 KOps/s | $\textbf{\color{#d91a1a}-7.93\\%}$ | | test_instantiation_functorch | 2.1885ms | 2.0185ms | 495.4178 Ops/s | 513.4165 Ops/s | $\color{#d91a1a}-3.51\\%$ | | test_instantiation_td | 2.0514ms | 1.3116ms | 762.4408 Ops/s | 792.0615 Ops/s | $\color{#d91a1a}-3.74\\%$ | | test_exec_functorch | 0.4161ms | 0.2279ms | 4.3879 KOps/s | 4.5455 KOps/s | $\color{#d91a1a}-3.47\\%$ | | test_exec_functional_call | 0.4225ms | 0.2182ms | 4.5824 KOps/s | 4.5447 KOps/s | $\color{#35bf28}+0.83\\%$ | | test_exec_td | 0.4343ms | 0.2203ms | 4.5391 KOps/s | 4.6996 KOps/s | $\color{#d91a1a}-3.42\\%$ | | test_exec_td_decorator | 1.0834ms | 0.2733ms | 3.6585 KOps/s | 3.7244 KOps/s | $\color{#d91a1a}-1.77\\%$ | | test_vmap_mlp_speed[True-True] | 0.8714ms | 0.6416ms | 1.5587 KOps/s | 1.6057 KOps/s | $\color{#d91a1a}-2.93\\%$ | | test_vmap_mlp_speed[True-False] | 0.8028ms | 0.6368ms | 1.5704 KOps/s | 1.6099 KOps/s | $\color{#d91a1a}-2.45\\%$ | | test_vmap_mlp_speed[False-True] | 0.7632ms | 0.5547ms | 1.8029 KOps/s | 1.8070 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_vmap_mlp_speed[False-False] | 0.7222ms | 0.5542ms | 1.8044 KOps/s | 1.7917 KOps/s | $\color{#35bf28}+0.71\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.3264ms | 0.6822ms | 1.4659 KOps/s | 1.4867 KOps/s | $\color{#d91a1a}-1.40\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.8564ms | 0.6828ms | 1.4645 KOps/s | 1.4863 KOps/s | $\color{#d91a1a}-1.47\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8156ms | 0.5972ms | 1.6746 KOps/s | 1.6916 KOps/s | $\color{#d91a1a}-1.01\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7500ms | 0.5980ms | 1.6722 KOps/s | 1.6940 KOps/s | $\color{#d91a1a}-1.28\\%$ | | test_vmap_transformer_speed[True-True] | 8.8474ms | 8.6124ms | 116.1119 Ops/s | 117.9282 Ops/s | $\color{#d91a1a}-1.54\\%$ | | test_vmap_transformer_speed[True-False] | 8.9264ms | 8.6113ms | 116.1260 Ops/s | 118.5274 Ops/s | $\color{#d91a1a}-2.03\\%$ | | test_vmap_transformer_speed[False-True] | 8.7950ms | 8.4978ms | 117.6770 Ops/s | 118.7370 Ops/s | $\color{#d91a1a}-0.89\\%$ | | test_vmap_transformer_speed[False-False] | 9.0198ms | 8.5394ms | 117.1037 Ops/s | 119.5339 Ops/s | $\color{#d91a1a}-2.03\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 20.5822ms | 20.2190ms | 49.4585 Ops/s | 50.7651 Ops/s | $\color{#d91a1a}-2.57\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 21.0705ms | 20.2793ms | 49.3114 Ops/s | 50.3965 Ops/s | $\color{#d91a1a}-2.15\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 20.3681ms | 20.1117ms | 49.7224 Ops/s | 51.0410 Ops/s | $\color{#d91a1a}-2.58\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 20.4868ms | 20.0792ms | 49.8029 Ops/s | 50.9834 Ops/s | $\color{#d91a1a}-2.32\\%$ | | test_to_module_speed[True] | 1.2586ms | 1.1490ms | 870.3539 Ops/s | 889.8300 Ops/s | $\color{#d91a1a}-2.19\\%$ | | test_to_module_speed[False] | 1.5297ms | 1.1116ms | 899.5740 Ops/s | 911.6258 Ops/s | $\color{#d91a1a}-1.32\\%$ | | test_tc_init | 0.1770ms | 37.0435μs | 26.9953 KOps/s | 28.3460 KOps/s | $\color{#d91a1a}-4.77\\%$ | | test_tc_init_nested | 0.1625ms | 74.7101μs | 13.3851 KOps/s | 13.8983 KOps/s | $\color{#d91a1a}-3.69\\%$ | | test_tc_first_layer_tensor | 3.1389μs | 0.7801μs | 1.2818 MOps/s | 1.2917 MOps/s | $\color{#d91a1a}-0.76\\%$ | | test_tc_first_layer_nontensor | 18.1110μs | 2.5598μs | 390.6608 KOps/s | 395.6131 KOps/s | $\color{#d91a1a}-1.25\\%$ | | test_tc_second_layer_tensor | 9.0707μs | 1.6226μs | 616.2861 KOps/s | 628.9301 KOps/s | $\color{#d91a1a}-2.01\\%$ | | test_tc_second_layer_nontensor | 18.1810μs | 3.3953μs | 294.5212 KOps/s | 298.9809 KOps/s | $\color{#d91a1a}-1.49\\%$ | | test_unbind | 0.3427s | 12.4859ms | 80.0901 Ops/s | 79.7312 Ops/s | $\color{#35bf28}+0.45\\%$ | | test_full_like | 0.7730ms | 0.5803ms | 1.7233 KOps/s | 1.7200 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_zeros_like | 0.4433ms | 0.1983ms | 5.0440 KOps/s | 5.0416 KOps/s | $\color{#35bf28}+0.05\\%$ | | test_ones_like | 0.3604ms | 0.1980ms | 5.0500 KOps/s | 5.0492 KOps/s | $\color{#35bf28}+0.02\\%$ | | test_clone | 0.5889ms | 0.4159ms | 2.4043 KOps/s | 2.4010 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_squeeze | 0.1370ms | 11.0068μs | 90.8530 KOps/s | 95.8381 KOps/s | $\textbf{\color{#d91a1a}-5.20\\%}$ | | test_unsqueeze | 0.3193ms | 79.2892μs | 12.6121 KOps/s | 13.4251 KOps/s | $\textbf{\color{#d91a1a}-6.06\\%}$ | | test_split | 0.3659ms | 0.1726ms | 5.7929 KOps/s | 6.0339 KOps/s | $\color{#d91a1a}-3.99\\%$ | | test_permute | 0.3264ms | 0.1825ms | 5.4795 KOps/s | 5.5572 KOps/s | $\color{#d91a1a}-1.40\\%$ | | test_stack | 1.3835ms | 0.8883ms | 1.1258 KOps/s | 1.0957 KOps/s | $\color{#35bf28}+2.74\\%$ | | test_cat | 1.3663ms | 1.2323ms | 811.5014 Ops/s | 811.9582 Ops/s | $\color{#d91a1a}-0.06\\%$ |