pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
803 stars 65 forks source link

[BugFix] use as_subclass in Buffer #913

Closed vmoens closed 1 month ago

github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 144. Improved: $\large\color{#35bf28}7$. Worsened: $\large\color{#d91a1a}14$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 41.2970μs | 22.2434μs | 44.9571 KOps/s | 47.1177 KOps/s | $\color{#d91a1a}-4.59\\%$ | | test_plain_set_stack_nested | 89.8870μs | 22.1120μs | 45.2244 KOps/s | 47.4379 KOps/s | $\color{#d91a1a}-4.67\\%$ | | test_plain_set_nested_inplace | 56.7360μs | 24.2451μs | 41.2454 KOps/s | 43.0947 KOps/s | $\color{#d91a1a}-4.29\\%$ | | test_plain_set_stack_nested_inplace | 0.1023ms | 24.1732μs | 41.3681 KOps/s | 43.3564 KOps/s | $\color{#d91a1a}-4.59\\%$ | | test_items | 18.3950μs | 2.7545μs | 363.0475 KOps/s | 378.2035 KOps/s | $\color{#d91a1a}-4.01\\%$ | | test_items_nested | 0.5696ms | 0.3739ms | 2.6745 KOps/s | 2.7115 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_items_nested_locked | 1.9901ms | 0.3759ms | 2.6602 KOps/s | 2.6912 KOps/s | $\color{#d91a1a}-1.15\\%$ | | test_items_nested_leaf | 0.1955ms | 86.5076μs | 11.5597 KOps/s | 11.5988 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_items_stack_nested | 0.6464ms | 0.3730ms | 2.6811 KOps/s | 2.7091 KOps/s | $\color{#d91a1a}-1.03\\%$ | | test_items_stack_nested_leaf | 0.1559ms | 89.3355μs | 11.1938 KOps/s | 11.4692 KOps/s | $\color{#d91a1a}-2.40\\%$ | | test_items_stack_nested_locked | 0.6687ms | 0.3782ms | 2.6439 KOps/s | 2.7071 KOps/s | $\color{#d91a1a}-2.33\\%$ | | test_keys | 40.3250μs | 3.7989μs | 263.2358 KOps/s | 258.8235 KOps/s | $\color{#35bf28}+1.70\\%$ | | test_keys_nested | 0.2492ms | 0.1456ms | 6.8682 KOps/s | 6.8419 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_keys_nested_locked | 0.8711ms | 0.1521ms | 6.5765 KOps/s | 6.6413 KOps/s | $\color{#d91a1a}-0.98\\%$ | | test_keys_nested_leaf | 0.2812ms | 0.1263ms | 7.9168 KOps/s | 8.0403 KOps/s | $\color{#d91a1a}-1.54\\%$ | | test_keys_stack_nested | 0.2788ms | 0.1467ms | 6.8147 KOps/s | 6.9538 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_keys_stack_nested_leaf | 0.2103ms | 0.1241ms | 8.0579 KOps/s | 8.0745 KOps/s | $\color{#d91a1a}-0.20\\%$ | | test_keys_stack_nested_locked | 0.2578ms | 0.1529ms | 6.5409 KOps/s | 6.6621 KOps/s | $\color{#d91a1a}-1.82\\%$ | | test_values | 13.4677μs | 1.1437μs | 874.3766 KOps/s | 864.0483 KOps/s | $\color{#35bf28}+1.20\\%$ | | test_values_nested | 0.1006ms | 50.7974μs | 19.6860 KOps/s | 19.5517 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_values_nested_locked | 0.1170ms | 50.4507μs | 19.8213 KOps/s | 19.7343 KOps/s | $\color{#35bf28}+0.44\\%$ | | test_values_nested_leaf | 0.1087ms | 45.7480μs | 21.8589 KOps/s | 22.0306 KOps/s | $\color{#d91a1a}-0.78\\%$ | | test_values_stack_nested | 0.1215ms | 50.5318μs | 19.7895 KOps/s | 19.2068 KOps/s | $\color{#35bf28}+3.03\\%$ | | test_values_stack_nested_leaf | 0.1006ms | 45.3059μs | 22.0722 KOps/s | 21.9648 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_values_stack_nested_locked | 0.1143ms | 50.4244μs | 19.8317 KOps/s | 19.7738 KOps/s | $\color{#35bf28}+0.29\\%$ | | test_membership | 30.9480μs | 0.9242μs | 1.0820 MOps/s | 1.0946 MOps/s | $\color{#d91a1a}-1.15\\%$ | | test_membership_nested | 64.3900μs | 2.7233μs | 367.2076 KOps/s | 361.8631 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_membership_nested_leaf | 26.2390μs | 2.7136μs | 368.5170 KOps/s | 357.4545 KOps/s | $\color{#35bf28}+3.09\\%$ | | test_membership_stacked_nested | 18.9860μs | 2.6971μs | 370.7621 KOps/s | 358.4540 KOps/s | $\color{#35bf28}+3.43\\%$ | | test_membership_stacked_nested_leaf | 55.7740μs | 2.7462μs | 364.1349 KOps/s | 361.7840 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_membership_nested_last | 42.1890μs | 3.9923μs | 250.4802 KOps/s | 240.2733 KOps/s | $\color{#35bf28}+4.25\\%$ | | test_membership_nested_leaf_last | 55.6530μs | 4.0049μs | 249.6928 KOps/s | 244.1805 KOps/s | $\color{#35bf28}+2.26\\%$ | | test_membership_stacked_nested_last | 23.0230μs | 3.9728μs | 251.7094 KOps/s | 244.6523 KOps/s | $\color{#35bf28}+2.88\\%$ | | test_membership_stacked_nested_leaf_last | 59.7020μs | 3.9812μs | 251.1801 KOps/s | 247.0110 KOps/s | $\color{#35bf28}+1.69\\%$ | | test_nested_getleaf | 46.3170μs | 11.3174μs | 88.3591 KOps/s | 89.5563 KOps/s | $\color{#d91a1a}-1.34\\%$ | | test_nested_get | 57.7480μs | 10.8152μs | 92.4627 KOps/s | 95.9980 KOps/s | $\color{#d91a1a}-3.68\\%$ | | test_stacked_getleaf | 61.6250μs | 10.9641μs | 91.2064 KOps/s | 90.5403 KOps/s | $\color{#35bf28}+0.74\\%$ | | test_stacked_get | 41.9880μs | 10.6988μs | 93.4687 KOps/s | 96.3832 KOps/s | $\color{#d91a1a}-3.02\\%$ | | test_nested_getitemleaf | 67.0250μs | 11.8559μs | 84.3459 KOps/s | 85.6674 KOps/s | $\color{#d91a1a}-1.54\\%$ | | test_nested_getitem | 62.7970μs | 10.7816μs | 92.7502 KOps/s | 93.0845 KOps/s | $\color{#d91a1a}-0.36\\%$ | | test_stacked_getitemleaf | 43.4410μs | 11.4559μs | 87.2910 KOps/s | 86.7408 KOps/s | $\color{#35bf28}+0.63\\%$ | | test_stacked_getitem | 64.7700μs | 10.6286μs | 94.0853 KOps/s | 92.8398 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_lock_nested | 1.2121ms | 0.5363ms | 1.8645 KOps/s | 1.6206 KOps/s | $\textbf{\color{#35bf28}+15.05\\%}$ | | test_lock_stack_nested | 0.7136ms | 0.4940ms | 2.0242 KOps/s | 1.9938 KOps/s | $\color{#35bf28}+1.52\\%$ | | test_unlock_nested | 1.0051ms | 0.4451ms | 2.2465 KOps/s | 2.2653 KOps/s | $\color{#d91a1a}-0.83\\%$ | | test_unlock_stack_nested | 0.5192ms | 0.4061ms | 2.4626 KOps/s | 2.4335 KOps/s | $\color{#35bf28}+1.20\\%$ | | test_flatten_speed | 0.2669ms | 0.1059ms | 9.4433 KOps/s | 9.2685 KOps/s | $\color{#35bf28}+1.89\\%$ | | test_unflatten_speed | 0.9676ms | 0.4554ms | 2.1956 KOps/s | 2.1961 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_common_ops | 1.9939ms | 1.1505ms | 869.1584 Ops/s | 871.8557 Ops/s | $\color{#d91a1a}-0.31\\%$ | | test_creation | 26.1490μs | 2.5035μs | 399.4411 KOps/s | 393.5729 KOps/s | $\color{#35bf28}+1.49\\%$ | | test_creation_empty | 49.9230μs | 18.4348μs | 54.2451 KOps/s | 58.6454 KOps/s | $\textbf{\color{#d91a1a}-7.50\\%}$ | | test_creation_nested_1 | 0.1015ms | 22.1886μs | 45.0681 KOps/s | 48.8615 KOps/s | $\textbf{\color{#d91a1a}-7.76\\%}$ | | test_creation_nested_2 | 77.8350μs | 26.1432μs | 38.2508 KOps/s | 41.0188 KOps/s | $\textbf{\color{#d91a1a}-6.75\\%}$ | | test_clone | 0.1787ms | 17.6546μs | 56.6425 KOps/s | 57.0916 KOps/s | $\color{#d91a1a}-0.79\\%$ | | test_getitem[int] | 1.0254ms | 13.0537μs | 76.6066 KOps/s | 77.0961 KOps/s | $\color{#d91a1a}-0.63\\%$ | | test_getitem[slice_int] | 0.1636ms | 33.3960μs | 29.9437 KOps/s | 30.0668 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_getitem[range] | 0.3453ms | 59.6920μs | 16.7527 KOps/s | 16.9899 KOps/s | $\color{#d91a1a}-1.40\\%$ | | test_getitem[tuple] | 0.1542ms | 27.0179μs | 37.0125 KOps/s | 37.2589 KOps/s | $\color{#d91a1a}-0.66\\%$ | | test_getitem[list] | 0.2914ms | 54.0390μs | 18.5051 KOps/s | 18.3949 KOps/s | $\color{#35bf28}+0.60\\%$ | | test_setitem_dim[int] | 64.4900μs | 34.8528μs | 28.6921 KOps/s | 31.0830 KOps/s | $\textbf{\color{#d91a1a}-7.69\\%}$ | | test_setitem_dim[slice_int] | 0.1217ms | 73.5098μs | 13.6036 KOps/s | 14.0143 KOps/s | $\color{#d91a1a}-2.93\\%$ | | test_setitem_dim[range] | 0.1604ms | 93.2677μs | 10.7218 KOps/s | 10.7809 KOps/s | $\color{#d91a1a}-0.55\\%$ | | test_setitem_dim[tuple] | 0.1099ms | 60.0580μs | 16.6506 KOps/s | 17.4312 KOps/s | $\color{#d91a1a}-4.48\\%$ | | test_setitem | 0.1959ms | 30.0455μs | 33.2828 KOps/s | 33.9698 KOps/s | $\color{#d91a1a}-2.02\\%$ | | test_set | 0.1769ms | 29.8218μs | 33.5325 KOps/s | 35.2611 KOps/s | $\color{#d91a1a}-4.90\\%$ | | test_set_shared | 1.3151ms | 0.2188ms | 4.5712 KOps/s | 4.4510 KOps/s | $\color{#35bf28}+2.70\\%$ | | test_update | 0.2246ms | 37.5698μs | 26.6171 KOps/s | 28.6963 KOps/s | $\textbf{\color{#d91a1a}-7.25\\%}$ | | test_update_nested | 0.2044ms | 47.3812μs | 21.1054 KOps/s | 20.9628 KOps/s | $\color{#35bf28}+0.68\\%$ | | test_update__nested | 0.1811ms | 35.2992μs | 28.3293 KOps/s | 28.3525 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_set_nested | 0.1614ms | 31.8992μs | 31.3487 KOps/s | 32.2546 KOps/s | $\color{#d91a1a}-2.81\\%$ | | test_set_nested_new | 0.1692ms | 37.2491μs | 26.8463 KOps/s | 27.5050 KOps/s | $\color{#d91a1a}-2.39\\%$ | | test_select | 0.2372ms | 55.1785μs | 18.1230 KOps/s | 18.5125 KOps/s | $\color{#d91a1a}-2.10\\%$ | | test_select_nested | 0.1223ms | 61.7227μs | 16.2015 KOps/s | 16.2507 KOps/s | $\color{#d91a1a}-0.30\\%$ | | test_exclude_nested | 0.1756ms | 82.0807μs | 12.1831 KOps/s | 12.3200 KOps/s | $\color{#d91a1a}-1.11\\%$ | | test_empty[True] | 0.4453ms | 0.3501ms | 2.8566 KOps/s | 2.9133 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_empty[False] | 16.3682μs | 1.2751μs | 784.2333 KOps/s | 796.4139 KOps/s | $\color{#d91a1a}-1.53\\%$ | | test_unbind_speed | 0.4122ms | 0.3281ms | 3.0474 KOps/s | 3.0233 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_unbind_speed_stack0 | 0.4787ms | 0.3219ms | 3.1067 KOps/s | 3.0781 KOps/s | $\color{#35bf28}+0.93\\%$ | | test_unbind_speed_stack1 | 90.7700ms | 0.8574ms | 1.1664 KOps/s | 1.2524 KOps/s | $\textbf{\color{#d91a1a}-6.87\\%}$ | | test_split | 84.6696ms | 2.2959ms | 435.5541 Ops/s | 395.4596 Ops/s | $\textbf{\color{#35bf28}+10.14\\%}$ | | test_chunk | 92.8015ms | 2.3264ms | 429.8479 Ops/s | 428.5402 Ops/s | $\color{#35bf28}+0.31\\%$ | | test_creation[device0] | 4.7031ms | 0.1263ms | 7.9152 KOps/s | 7.9116 KOps/s | $\color{#35bf28}+0.04\\%$ | | test_creation_from_tensor | 0.2918ms | 0.1225ms | 8.1638 KOps/s | 8.0701 KOps/s | $\color{#35bf28}+1.16\\%$ | | test_add_one[memmap_tensor0] | 0.4360ms | 7.8951μs | 126.6605 KOps/s | 125.5730 KOps/s | $\color{#35bf28}+0.87\\%$ | | test_contiguous[memmap_tensor0] | 26.3290μs | 2.1719μs | 460.4348 KOps/s | 453.7158 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_stack[memmap_tensor0] | 0.1116ms | 6.2424μs | 160.1942 KOps/s | 163.3973 KOps/s | $\color{#d91a1a}-1.96\\%$ | | test_memmaptd_index | 1.3858ms | 0.4341ms | 2.3037 KOps/s | 2.2217 KOps/s | $\color{#35bf28}+3.69\\%$ | | test_memmaptd_index_astensor | 1.2307ms | 0.5161ms | 1.9375 KOps/s | 1.8688 KOps/s | $\color{#35bf28}+3.67\\%$ | | test_memmaptd_index_op | 1.7450ms | 1.0723ms | 932.5623 Ops/s | 922.6910 Ops/s | $\color{#35bf28}+1.07\\%$ | | test_serialize_model | 0.2283s | 0.1505s | 6.6425 Ops/s | 7.4711 Ops/s | $\textbf{\color{#d91a1a}-11.09\\%}$ | | test_serialize_model_pickle | 0.4468s | 0.3922s | 2.5495 Ops/s | 2.4916 Ops/s | $\color{#35bf28}+2.33\\%$ | | test_serialize_weights | 0.1382s | 0.1275s | 7.8437 Ops/s | 6.9975 Ops/s | $\textbf{\color{#35bf28}+12.09\\%}$ | | test_serialize_weights_returnearly | 0.2557s | 0.1803s | 5.5451 Ops/s | 5.8333 Ops/s | $\color{#d91a1a}-4.94\\%$ | | test_serialize_weights_pickle | 0.5644s | 0.4632s | 2.1589 Ops/s | 2.5415 Ops/s | $\textbf{\color{#d91a1a}-15.06\\%}$ | | test_serialize_weights_filesystem | 0.1557s | 0.1473s | 6.7904 Ops/s | 6.8930 Ops/s | $\color{#d91a1a}-1.49\\%$ | | test_serialize_model_filesystem | 0.1565s | 0.1507s | 6.6376 Ops/s | 5.6802 Ops/s | $\textbf{\color{#35bf28}+16.86\\%}$ | | test_reshape_pytree | 97.2810μs | 39.6020μs | 25.2512 KOps/s | 24.9966 KOps/s | $\color{#35bf28}+1.02\\%$ | | test_reshape_td | 0.1369ms | 50.5441μs | 19.7847 KOps/s | 19.9158 KOps/s | $\color{#d91a1a}-0.66\\%$ | | test_view_pytree | 82.0630μs | 39.3533μs | 25.4108 KOps/s | 25.1493 KOps/s | $\color{#35bf28}+1.04\\%$ | | test_view_td | 0.1533ms | 56.5073μs | 17.6968 KOps/s | 17.4365 KOps/s | $\color{#35bf28}+1.49\\%$ | | test_unbind_pytree | 0.1025ms | 36.4026μs | 27.4705 KOps/s | 27.6941 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_unbind_td | 0.4446ms | 49.7758μs | 20.0901 KOps/s | 20.9130 KOps/s | $\color{#d91a1a}-3.93\\%$ | | test_split_pytree | 2.1983ms | 39.5598μs | 25.2782 KOps/s | 25.6945 KOps/s | $\color{#d91a1a}-1.62\\%$ | | test_split_td | 0.2072ms | 61.8817μs | 16.1599 KOps/s | 16.2371 KOps/s | $\color{#d91a1a}-0.48\\%$ | | test_add_pytree | 0.1216ms | 44.6040μs | 22.4195 KOps/s | 21.9809 KOps/s | $\color{#35bf28}+2.00\\%$ | | test_add_td | 0.1474ms | 82.7943μs | 12.0781 KOps/s | 11.9269 KOps/s | $\color{#35bf28}+1.27\\%$ | | test_distributed | 0.2988ms | 0.1310ms | 7.6336 KOps/s | 7.4085 KOps/s | $\color{#35bf28}+3.04\\%$ | | test_tdmodule | 0.1347ms | 17.5159μs | 57.0911 KOps/s | 61.3115 KOps/s | $\textbf{\color{#d91a1a}-6.88\\%}$ | | test_tdmodule_dispatch | 65.4620μs | 35.5861μs | 28.1009 KOps/s | 28.4240 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_tdseq | 55.3230μs | 19.6292μs | 50.9446 KOps/s | 53.2116 KOps/s | $\color{#d91a1a}-4.26\\%$ | | test_tdseq_dispatch | 75.5110μs | 41.3938μs | 24.1582 KOps/s | 25.8958 KOps/s | $\textbf{\color{#d91a1a}-6.71\\%}$ | | test_instantiation_functorch | 2.4711ms | 1.6351ms | 611.5804 Ops/s | 622.4575 Ops/s | $\color{#d91a1a}-1.75\\%$ | | test_instantiation_td | 96.7942ms | 1.2934ms | 773.1759 Ops/s | 854.7845 Ops/s | $\textbf{\color{#d91a1a}-9.55\\%}$ | | test_exec_functorch | 0.2835ms | 0.1834ms | 5.4513 KOps/s | 5.3612 KOps/s | $\color{#35bf28}+1.68\\%$ | | test_exec_functional_call | 0.3546ms | 0.1703ms | 5.8719 KOps/s | 5.6317 KOps/s | $\color{#35bf28}+4.27\\%$ | | test_exec_td | 0.3713ms | 0.1731ms | 5.7771 KOps/s | 5.5822 KOps/s | $\color{#35bf28}+3.49\\%$ | | test_exec_td_decorator | 0.5508ms | 0.2572ms | 3.8880 KOps/s | 3.7484 KOps/s | $\color{#35bf28}+3.72\\%$ | | test_vmap_mlp_speed[True-True] | 0.8525ms | 0.6137ms | 1.6295 KOps/s | 1.6385 KOps/s | $\color{#d91a1a}-0.54\\%$ | | test_vmap_mlp_speed[True-False] | 0.9423ms | 0.6174ms | 1.6197 KOps/s | 1.6451 KOps/s | $\color{#d91a1a}-1.55\\%$ | | test_vmap_mlp_speed[False-True] | 0.6145ms | 0.5039ms | 1.9844 KOps/s | 1.9810 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_vmap_mlp_speed[False-False] | 0.7519ms | 0.5097ms | 1.9618 KOps/s | 1.9798 KOps/s | $\color{#d91a1a}-0.91\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.2929ms | 0.7167ms | 1.3952 KOps/s | 1.4227 KOps/s | $\color{#d91a1a}-1.93\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.1337ms | 0.7178ms | 1.3931 KOps/s | 1.4142 KOps/s | $\color{#d91a1a}-1.50\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 1.0374ms | 0.5990ms | 1.6695 KOps/s | 1.6949 KOps/s | $\color{#d91a1a}-1.49\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.9523ms | 0.5971ms | 1.6746 KOps/s | 1.7066 KOps/s | $\color{#d91a1a}-1.87\\%$ | | test_to_module_speed[True] | 2.9428ms | 1.8156ms | 550.7798 Ops/s | 538.0614 Ops/s | $\color{#35bf28}+2.36\\%$ | | test_to_module_speed[False] | 2.0241ms | 1.7813ms | 561.3956 Ops/s | 552.3160 Ops/s | $\color{#35bf28}+1.64\\%$ | | test_tc_init | 99.6860μs | 45.6515μs | 21.9051 KOps/s | 24.6648 KOps/s | $\textbf{\color{#d91a1a}-11.19\\%}$ | | test_tc_init_nested | 0.1624ms | 90.2520μs | 11.0801 KOps/s | 11.9182 KOps/s | $\textbf{\color{#d91a1a}-7.03\\%}$ | | test_tc_first_layer_tensor | 72.1820μs | 9.0980μs | 109.9144 KOps/s | 109.7436 KOps/s | $\color{#35bf28}+0.16\\%$ | | test_tc_first_layer_nontensor | 59.3100μs | 9.0150μs | 110.9258 KOps/s | 109.8718 KOps/s | $\color{#35bf28}+0.96\\%$ | | test_tc_second_layer_tensor | 28.6030μs | 2.9035μs | 344.4077 KOps/s | 348.0677 KOps/s | $\color{#d91a1a}-1.05\\%$ | | test_tc_second_layer_nontensor | 37.7600μs | 10.2735μs | 97.3378 KOps/s | 97.2219 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_unbind | 0.1120s | 15.1319ms | 66.0855 Ops/s | 68.1737 Ops/s | $\color{#d91a1a}-3.06\\%$ | | test_full_like | 12.2564ms | 9.1459ms | 109.3386 Ops/s | 102.6380 Ops/s | $\textbf{\color{#35bf28}+6.53\\%}$ | | test_zeros_like | 15.6679ms | 7.7153ms | 129.6132 Ops/s | 134.7602 Ops/s | $\color{#d91a1a}-3.82\\%$ | | test_ones_like | 17.7371ms | 8.4597ms | 118.2070 Ops/s | 128.8162 Ops/s | $\textbf{\color{#d91a1a}-8.24\\%}$ | | test_clone | 16.0711ms | 9.6325ms | 103.8157 Ops/s | 93.4572 Ops/s | $\textbf{\color{#35bf28}+11.08\\%}$ | | test_squeeze | 80.6010μs | 14.4928μs | 68.9997 KOps/s | 68.0006 KOps/s | $\color{#35bf28}+1.47\\%$ | | test_unsqueeze | 0.1688ms | 97.1989μs | 10.2882 KOps/s | 10.2341 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_split | 0.4883ms | 0.2078ms | 4.8119 KOps/s | 4.6712 KOps/s | $\color{#35bf28}+3.01\\%$ | | test_permute | 0.4576ms | 0.2322ms | 4.3065 KOps/s | 4.3376 KOps/s | $\color{#d91a1a}-0.72\\%$ | | test_stack | 32.3780ms | 27.0992ms | 36.9015 Ops/s | 34.9750 Ops/s | $\textbf{\color{#35bf28}+5.51\\%}$ | | test_cat | 32.6051ms | 27.3610ms | 36.5483 Ops/s | 34.9417 Ops/s | $\color{#35bf28}+4.60\\%$ |
github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 219. Improved: $\large\color{#35bf28}8$. Worsened: $\large\color{#d91a1a}25$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 0.1697ms | 17.7877μs | 56.2188 KOps/s | 61.7655 KOps/s | $\textbf{\color{#d91a1a}-8.98\\%}$ | | test_plain_set_stack_nested | 43.7120μs | 17.9506μs | 55.7086 KOps/s | 61.1088 KOps/s | $\textbf{\color{#d91a1a}-8.84\\%}$ | | test_plain_set_nested_inplace | 50.9130μs | 19.0684μs | 52.4427 KOps/s | 57.0908 KOps/s | $\textbf{\color{#d91a1a}-8.14\\%}$ | | test_plain_set_stack_nested_inplace | 38.6320μs | 19.0675μs | 52.4454 KOps/s | 57.6896 KOps/s | $\textbf{\color{#d91a1a}-9.09\\%}$ | | test_items | 22.2210μs | 4.7173μs | 211.9838 KOps/s | 212.4243 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_items_nested | 0.4242ms | 0.3932ms | 2.5433 KOps/s | 2.5710 KOps/s | $\color{#d91a1a}-1.08\\%$ | | test_items_nested_locked | 0.4384ms | 0.3930ms | 2.5447 KOps/s | 2.5403 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_items_nested_leaf | 0.1196ms | 86.9254μs | 11.5041 KOps/s | 11.6025 KOps/s | $\color{#d91a1a}-0.85\\%$ | | test_items_stack_nested | 0.4406ms | 0.4010ms | 2.4937 KOps/s | 2.5451 KOps/s | $\color{#d91a1a}-2.02\\%$ | | test_items_stack_nested_leaf | 0.2012ms | 87.0569μs | 11.4867 KOps/s | 11.6019 KOps/s | $\color{#d91a1a}-0.99\\%$ | | test_items_stack_nested_locked | 0.4252ms | 0.3921ms | 2.5503 KOps/s | 2.5438 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_keys | 27.6320μs | 4.3972μs | 227.4199 KOps/s | 227.4379 KOps/s | $-0.01\\%$ | | test_keys_nested | 94.6650μs | 66.0578μs | 15.1383 KOps/s | 15.3052 KOps/s | $\color{#d91a1a}-1.09\\%$ | | test_keys_nested_locked | 0.7958ms | 72.8216μs | 13.7322 KOps/s | 13.6902 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_keys_nested_leaf | 80.1650μs | 57.6190μs | 17.3554 KOps/s | 17.3324 KOps/s | $\color{#35bf28}+0.13\\%$ | | test_keys_stack_nested | 0.2595ms | 65.7431μs | 15.2107 KOps/s | 14.7691 KOps/s | $\color{#35bf28}+2.99\\%$ | | test_keys_stack_nested_leaf | 85.6750μs | 57.2861μs | 17.4562 KOps/s | 17.1747 KOps/s | $\color{#35bf28}+1.64\\%$ | | test_keys_stack_nested_locked | 0.2342ms | 72.1727μs | 13.8557 KOps/s | 13.7196 KOps/s | $\color{#35bf28}+0.99\\%$ | | test_values | 6.5170μs | 1.7719μs | 564.3742 KOps/s | 565.9636 KOps/s | $\color{#d91a1a}-0.28\\%$ | | test_values_nested | 58.9830μs | 34.1413μs | 29.2901 KOps/s | 29.4854 KOps/s | $\color{#d91a1a}-0.66\\%$ | | test_values_nested_locked | 71.3840μs | 35.8314μs | 27.9085 KOps/s | 27.9115 KOps/s | $\color{#d91a1a}-0.01\\%$ | | test_values_nested_leaf | 45.9920μs | 30.2061μs | 33.1059 KOps/s | 32.9442 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_values_stack_nested | 0.1668ms | 34.7840μs | 28.7488 KOps/s | 28.9708 KOps/s | $\color{#d91a1a}-0.77\\%$ | | test_values_stack_nested_leaf | 73.9840μs | 30.9622μs | 32.2975 KOps/s | 32.7749 KOps/s | $\color{#d91a1a}-1.46\\%$ | | test_values_stack_nested_locked | 74.7140μs | 36.5627μs | 27.3503 KOps/s | 27.5791 KOps/s | $\color{#d91a1a}-0.83\\%$ | | test_membership | 1.7150μs | 0.5450μs | 1.8347 MOps/s | 1.8572 MOps/s | $\color{#d91a1a}-1.21\\%$ | | test_membership_nested | 19.5510μs | 2.0932μs | 477.7460 KOps/s | 489.2019 KOps/s | $\color{#d91a1a}-2.34\\%$ | | test_membership_nested_leaf | 16.2210μs | 2.0325μs | 491.9968 KOps/s | 480.7795 KOps/s | $\color{#35bf28}+2.33\\%$ | | test_membership_stacked_nested | 30.8410μs | 2.1177μs | 472.2099 KOps/s | 473.5617 KOps/s | $\color{#d91a1a}-0.29\\%$ | | test_membership_stacked_nested_leaf | 20.5110μs | 2.0989μs | 476.4381 KOps/s | 472.6801 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_membership_nested_last | 22.8810μs | 3.0571μs | 327.1060 KOps/s | 326.4756 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_membership_nested_leaf_last | 28.6310μs | 3.0322μs | 329.7931 KOps/s | 326.8910 KOps/s | $\color{#35bf28}+0.89\\%$ | | test_membership_stacked_nested_last | 25.5920μs | 3.0490μs | 327.9755 KOps/s | 328.3809 KOps/s | $\color{#d91a1a}-0.12\\%$ | | test_membership_stacked_nested_leaf_last | 32.3910μs | 3.0542μs | 327.4170 KOps/s | 331.6851 KOps/s | $\color{#d91a1a}-1.29\\%$ | | test_nested_getleaf | 28.4020μs | 8.0311μs | 124.5156 KOps/s | 124.4235 KOps/s | $\color{#35bf28}+0.07\\%$ | | test_nested_get | 40.4720μs | 7.5628μs | 132.2255 KOps/s | 132.1033 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_stacked_getleaf | 36.2320μs | 8.1199μs | 123.1541 KOps/s | 123.4371 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_stacked_get | 21.6110μs | 7.5742μs | 132.0279 KOps/s | 131.2150 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_nested_getitemleaf | 34.9020μs | 8.2352μs | 121.4302 KOps/s | 121.2500 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_nested_getitem | 37.1830μs | 7.7148μs | 129.6216 KOps/s | 129.2469 KOps/s | $\color{#35bf28}+0.29\\%$ | | test_stacked_getitemleaf | 27.8210μs | 8.2674μs | 120.9569 KOps/s | 121.1810 KOps/s | $\color{#d91a1a}-0.18\\%$ | | test_stacked_getitem | 35.9420μs | 7.7396μs | 129.2061 KOps/s | 129.0317 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_lock_nested | 10.2891ms | 0.5049ms | 1.9804 KOps/s | 2.0399 KOps/s | $\color{#d91a1a}-2.91\\%$ | | test_lock_stack_nested | 0.4810ms | 0.4474ms | 2.2352 KOps/s | 2.2140 KOps/s | $\color{#35bf28}+0.96\\%$ | | test_unlock_nested | 0.9235ms | 0.4097ms | 2.4408 KOps/s | 2.4421 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_unlock_stack_nested | 0.4792ms | 0.3661ms | 2.7318 KOps/s | 2.7034 KOps/s | $\color{#35bf28}+1.05\\%$ | | test_flatten_speed | 0.5030ms | 0.1058ms | 9.4547 KOps/s | 9.3641 KOps/s | $\color{#35bf28}+0.97\\%$ | | test_unflatten_speed | 0.3302ms | 0.2964ms | 3.3736 KOps/s | 3.3736 KOps/s | $+0.00\\%$ | | test_common_ops | 1.6043ms | 1.3905ms | 719.1738 Ops/s | 713.5260 Ops/s | $\color{#35bf28}+0.79\\%$ | | test_creation | 21.0610μs | 2.0348μs | 491.4531 KOps/s | 490.5727 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_creation_empty | 50.2730μs | 19.2471μs | 51.9558 KOps/s | 62.9823 KOps/s | $\textbf{\color{#d91a1a}-17.51\\%}$ | | test_creation_nested_1 | 42.7020μs | 21.6608μs | 46.1664 KOps/s | 55.8373 KOps/s | $\textbf{\color{#d91a1a}-17.32\\%}$ | | test_creation_nested_2 | 57.5430μs | 24.8780μs | 40.1961 KOps/s | 48.6568 KOps/s | $\textbf{\color{#d91a1a}-17.39\\%}$ | | test_clone | 0.1873ms | 32.1120μs | 31.1410 KOps/s | 31.7525 KOps/s | $\color{#d91a1a}-1.93\\%$ | | test_getitem[int] | 1.2454ms | 18.3679μs | 54.4428 KOps/s | 56.3934 KOps/s | $\color{#d91a1a}-3.46\\%$ | | test_getitem[slice_int] | 0.1702ms | 29.1019μs | 34.3620 KOps/s | 34.0573 KOps/s | $\color{#35bf28}+0.89\\%$ | | test_getitem[range] | 0.3404ms | 0.1201ms | 8.3273 KOps/s | 8.4925 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_getitem[tuple] | 91.7240ms | 32.8794μs | 30.4141 KOps/s | 38.7955 KOps/s | $\textbf{\color{#d91a1a}-21.60\\%}$ | | test_getitem[list] | 0.2761ms | 0.1091ms | 9.1656 KOps/s | 9.2405 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_setitem_dim[int] | 81.9050μs | 56.6732μs | 17.6450 KOps/s | 19.2310 KOps/s | $\textbf{\color{#d91a1a}-8.25\\%}$ | | test_setitem_dim[slice_int] | 0.1125ms | 80.3738μs | 12.4419 KOps/s | 12.8907 KOps/s | $\color{#d91a1a}-3.48\\%$ | | test_setitem_dim[range] | 0.2961ms | 0.1488ms | 6.7198 KOps/s | 7.0234 KOps/s | $\color{#d91a1a}-4.32\\%$ | | test_setitem_dim[tuple] | 92.0950μs | 72.5559μs | 13.7825 KOps/s | 14.2731 KOps/s | $\color{#d91a1a}-3.44\\%$ | | test_setitem | 0.1971ms | 47.1853μs | 21.1931 KOps/s | 22.6064 KOps/s | $\textbf{\color{#d91a1a}-6.25\\%}$ | | test_set | 0.1905ms | 45.6457μs | 21.9079 KOps/s | 23.1959 KOps/s | $\textbf{\color{#d91a1a}-5.55\\%}$ | | test_set_shared | 0.3724ms | 60.8880μs | 16.4236 KOps/s | 17.8844 KOps/s | $\textbf{\color{#d91a1a}-8.17\\%}$ | | test_update | 0.2870ms | 60.2989μs | 16.5840 KOps/s | 19.7835 KOps/s | $\textbf{\color{#d91a1a}-16.17\\%}$ | | test_update_nested | 0.2015ms | 65.4030μs | 15.2898 KOps/s | 16.6028 KOps/s | $\textbf{\color{#d91a1a}-7.91\\%}$ | | test_update__nested | 0.2144ms | 64.2532μs | 15.5634 KOps/s | 14.3224 KOps/s | $\textbf{\color{#35bf28}+8.66\\%}$ | | test_set_nested | 0.2390ms | 53.1185μs | 18.8258 KOps/s | 20.1395 KOps/s | $\textbf{\color{#d91a1a}-6.52\\%}$ | | test_set_nested_new | 0.2374ms | 56.8664μs | 17.5851 KOps/s | 18.3915 KOps/s | $\color{#d91a1a}-4.38\\%$ | | test_select | 0.2409ms | 72.7089μs | 13.7535 KOps/s | 14.1657 KOps/s | $\color{#d91a1a}-2.91\\%$ | | test_select_nested | 0.3162ms | 52.6331μs | 18.9994 KOps/s | 18.9115 KOps/s | $\color{#35bf28}+0.46\\%$ | | test_exclude_nested | 0.1672ms | 72.4604μs | 13.8006 KOps/s | 13.6959 KOps/s | $\color{#35bf28}+0.77\\%$ | | test_empty[True] | 0.3281ms | 0.3011ms | 3.3212 KOps/s | 3.3362 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_empty[False] | 2.9351μs | 0.9382μs | 1.0659 MOps/s | 1.0542 MOps/s | $\color{#35bf28}+1.11\\%$ | | test_to | 92.8850μs | 38.7006μs | 25.8394 KOps/s | 25.3500 KOps/s | $\color{#35bf28}+1.93\\%$ | | test_to_nonblocking | 0.1916ms | 24.4542μs | 40.8928 KOps/s | 40.1079 KOps/s | $\color{#35bf28}+1.96\\%$ | | test_unbind_speed | 1.2747ms | 0.3158ms | 3.1664 KOps/s | 2.9094 KOps/s | $\textbf{\color{#35bf28}+8.83\\%}$ | | test_unbind_speed_stack0 | 0.3462ms | 0.3160ms | 3.1648 KOps/s | 3.0390 KOps/s | $\color{#35bf28}+4.14\\%$ | | test_unbind_speed_stack1 | 91.0724ms | 0.7911ms | 1.2641 KOps/s | 1.2278 KOps/s | $\color{#35bf28}+2.96\\%$ | | test_split | 93.0374ms | 2.3605ms | 423.6401 Ops/s | 425.6572 Ops/s | $\color{#d91a1a}-0.47\\%$ | | test_chunk | 2.2711ms | 2.1553ms | 463.9703 Ops/s | 426.3660 Ops/s | $\textbf{\color{#35bf28}+8.82\\%}$ | | test_creation[device0] | 0.2750ms | 0.1125ms | 8.8909 KOps/s | 9.3964 KOps/s | $\textbf{\color{#d91a1a}-5.38\\%}$ | | test_creation_from_tensor | 0.2903ms | 0.1077ms | 9.2875 KOps/s | 9.6579 KOps/s | $\color{#d91a1a}-3.84\\%$ | | test_add_one[memmap_tensor0] | 73.9040μs | 9.3931μs | 106.4608 KOps/s | 104.9174 KOps/s | $\color{#35bf28}+1.47\\%$ | | test_contiguous[memmap_tensor0] | 23.6010μs | 2.3074μs | 433.3837 KOps/s | 436.3004 KOps/s | $\color{#d91a1a}-0.67\\%$ | | test_stack[memmap_tensor0] | 0.1474ms | 7.0882μs | 141.0805 KOps/s | 137.8836 KOps/s | $\color{#35bf28}+2.32\\%$ | | test_memmaptd_index | 1.1768ms | 0.4411ms | 2.2670 KOps/s | 2.2801 KOps/s | $\color{#d91a1a}-0.58\\%$ | | test_memmaptd_index_astensor | 0.7630ms | 0.5020ms | 1.9919 KOps/s | 1.9903 KOps/s | $\color{#35bf28}+0.08\\%$ | | test_memmaptd_index_op | 1.5223ms | 1.1184ms | 894.1301 Ops/s | 937.3237 Ops/s | $\color{#d91a1a}-4.61\\%$ | | test_serialize_model | 0.2017s | 0.1100s | 9.0939 Ops/s | 10.0975 Ops/s | $\textbf{\color{#d91a1a}-9.94\\%}$ | | test_serialize_model_pickle | 1.3509s | 1.2368s | 0.8085 Ops/s | 0.8057 Ops/s | $\color{#35bf28}+0.35\\%$ | | test_serialize_weights | 96.7063ms | 92.6646ms | 10.7916 Ops/s | 9.1006 Ops/s | $\textbf{\color{#35bf28}+18.58\\%}$ | | test_serialize_weights_returnearly | 80.4186ms | 71.2900ms | 14.0272 Ops/s | 13.9422 Ops/s | $\color{#35bf28}+0.61\\%$ | | test_serialize_weights_pickle | 1.3525s | 1.2366s | 0.8087 Ops/s | 0.8085 Ops/s | $\color{#35bf28}+0.03\\%$ | | test_reshape_pytree | 0.2425ms | 39.3620μs | 25.4052 KOps/s | 25.3460 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_reshape_td | 80.7640μs | 45.3558μs | 22.0479 KOps/s | 22.5682 KOps/s | $\color{#d91a1a}-2.31\\%$ | | test_view_pytree | 0.1549ms | 38.6472μs | 25.8751 KOps/s | 25.4654 KOps/s | $\color{#35bf28}+1.61\\%$ | | test_view_td | 0.1741ms | 49.5505μs | 20.1814 KOps/s | 19.8426 KOps/s | $\color{#35bf28}+1.71\\%$ | | test_unbind_pytree | 0.1106ms | 39.0326μs | 25.6196 KOps/s | 25.8359 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_unbind_td | 0.4019ms | 49.6429μs | 20.1439 KOps/s | 20.5056 KOps/s | $\color{#d91a1a}-1.76\\%$ | | test_split_pytree | 0.4284ms | 56.4551μs | 17.7132 KOps/s | 18.7847 KOps/s | $\textbf{\color{#d91a1a}-5.70\\%}$ | | test_split_td | 0.2042ms | 62.3318μs | 16.0432 KOps/s | 16.1907 KOps/s | $\color{#d91a1a}-0.91\\%$ | | test_add_pytree | 0.2525ms | 66.2708μs | 15.0896 KOps/s | 15.5355 KOps/s | $\color{#d91a1a}-2.87\\%$ | | test_add_td | 0.2774ms | 0.1077ms | 9.2879 KOps/s | 9.8825 KOps/s | $\textbf{\color{#d91a1a}-6.02\\%}$ | | test_compile_add_one_nested[tensordict-compile] | 0.4164ms | 0.2187ms | 4.5719 KOps/s | 4.6824 KOps/s | $\color{#d91a1a}-2.36\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.3798ms | 0.1775ms | 5.6352 KOps/s | 5.7349 KOps/s | $\color{#d91a1a}-1.74\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.4086ms | 0.1505ms | 6.6436 KOps/s | 6.6702 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.3518ms | 0.2018ms | 4.9551 KOps/s | 4.9659 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_compile_copy_nested[tensordict-compile] | 0.2339ms | 23.4570μs | 42.6313 KOps/s | 44.1014 KOps/s | $\color{#d91a1a}-3.33\\%$ | | test_compile_copy_nested[tensordict-eager] | 0.2380ms | 47.8233μs | 20.9103 KOps/s | 20.3872 KOps/s | $\color{#35bf28}+2.57\\%$ | | test_compile_copy_nested[pytree-compile] | 0.2865ms | 73.0748μs | 13.6846 KOps/s | 13.6849 KOps/s | $-0.00\\%$ | | test_compile_copy_nested[pytree-eager] | 0.2578ms | 59.8356μs | 16.7124 KOps/s | 16.7963 KOps/s | $\color{#d91a1a}-0.50\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.4773ms | 0.3332ms | 3.0010 KOps/s | 2.9803 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.5308ms | 0.2247ms | 4.4505 KOps/s | 4.4380 KOps/s | $\color{#35bf28}+0.28\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.3045ms | 0.1349ms | 7.4151 KOps/s | 7.4415 KOps/s | $\color{#d91a1a}-0.36\\%$ | | test_compile_add_one_flat[tensorclass-eager] | 0.3114ms | 65.7596μs | 15.2069 KOps/s | 15.1780 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_compile_add_one_flat[pytree-compile] | 0.5955ms | 0.3308ms | 3.0230 KOps/s | 3.0111 KOps/s | $\color{#35bf28}+0.40\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.8671ms | 0.6592ms | 1.5170 KOps/s | 1.5139 KOps/s | $\color{#35bf28}+0.21\\%$ | | test_compile_add_self_flat[tensordict-eager] | 0.4996ms | 0.2744ms | 3.6441 KOps/s | 3.6120 KOps/s | $\color{#35bf28}+0.89\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.4750ms | 0.3342ms | 2.9922 KOps/s | 2.9751 KOps/s | $\color{#35bf28}+0.57\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.3880ms | 79.3415μs | 12.6037 KOps/s | 12.7519 KOps/s | $\color{#d91a1a}-1.16\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.2949ms | 0.1363ms | 7.3389 KOps/s | 7.3782 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.7964ms | 0.5658ms | 1.7674 KOps/s | 1.7762 KOps/s | $\color{#d91a1a}-0.50\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.5730ms | 0.3303ms | 3.0272 KOps/s | 3.0120 KOps/s | $\color{#35bf28}+0.50\\%$ | | test_compile_copy_flat[tensordict-compile] | 0.2634ms | 19.3578μs | 51.6589 KOps/s | 52.5692 KOps/s | $\color{#d91a1a}-1.73\\%$ | | test_compile_copy_flat[tensordict-eager] | 0.2353ms | 32.0285μs | 31.2222 KOps/s | 30.5934 KOps/s | $\color{#35bf28}+2.06\\%$ | | test_compile_copy_flat[pytree-compile] | 0.2833ms | 77.3944μs | 12.9208 KOps/s | 12.9741 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_compile_copy_flat[pytree-eager] | 0.2565ms | 60.8509μs | 16.4336 KOps/s | 16.5648 KOps/s | $\color{#d91a1a}-0.79\\%$ | | test_compile_assign_and_add[tensordict-compile] | 2.5878ms | 0.9451ms | 1.0581 KOps/s | 1.0511 KOps/s | $\color{#35bf28}+0.66\\%$ | | test_compile_assign_and_add[tensordict-eager] | 3.5739ms | 3.4230ms | 292.1432 Ops/s | 289.5352 Ops/s | $\color{#35bf28}+0.90\\%$ | | test_compile_assign_and_add[pytree-compile] | 2.5562ms | 0.9324ms | 1.0725 KOps/s | 1.0727 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_compile_assign_and_add[pytree-eager] | 3.6378ms | 3.4617ms | 288.8792 Ops/s | 288.8164 Ops/s | $\color{#35bf28}+0.02\\%$ | | test_compile_indexing[tensor-tensordict-compile] | 0.2518ms | 0.1158ms | 8.6363 KOps/s | 8.7074 KOps/s | $\color{#d91a1a}-0.82\\%$ | | test_compile_indexing[tensor-tensordict-eager] | 0.2346ms | 65.8541μs | 15.1851 KOps/s | 14.7364 KOps/s | $\color{#35bf28}+3.04\\%$ | | test_compile_indexing[tensor-tensorclass-compile] | 0.2504ms | 0.1081ms | 9.2547 KOps/s | 9.3004 KOps/s | $\color{#d91a1a}-0.49\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 0.1982ms | 47.9709μs | 20.8460 KOps/s | 20.9416 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_compile_indexing[tensor-pytree-compile] | 0.2609ms | 0.1078ms | 9.2803 KOps/s | 9.3361 KOps/s | $\color{#d91a1a}-0.60\\%$ | | test_compile_indexing[tensor-pytree-eager] | 0.1941ms | 47.4483μs | 21.0756 KOps/s | 20.3952 KOps/s | $\color{#35bf28}+3.34\\%$ | | test_compile_indexing[slice-tensordict-compile] | 0.2962ms | 0.1437ms | 6.9580 KOps/s | 6.9455 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.2004ms | 28.8214μs | 34.6964 KOps/s | 36.1554 KOps/s | $\color{#d91a1a}-4.04\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 0.2903ms | 0.1364ms | 7.3326 KOps/s | 7.3536 KOps/s | $\color{#d91a1a}-0.29\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 0.1682ms | 23.4409μs | 42.6605 KOps/s | 43.3663 KOps/s | $\color{#d91a1a}-1.63\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.3016ms | 0.1360ms | 7.3537 KOps/s | 7.3645 KOps/s | $\color{#d91a1a}-0.15\\%$ | | test_compile_indexing[slice-pytree-eager] | 0.1237ms | 23.0536μs | 43.3772 KOps/s | 42.9517 KOps/s | $\color{#35bf28}+0.99\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.3376ms | 0.1429ms | 6.9987 KOps/s | 6.9641 KOps/s | $\color{#35bf28}+0.50\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.5340ms | 27.1928μs | 36.7744 KOps/s | 36.4893 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_compile_indexing[int-tensorclass-compile] | 0.2847ms | 0.1357ms | 7.3712 KOps/s | 7.3698 KOps/s | $\color{#35bf28}+0.02\\%$ | | test_compile_indexing[int-tensorclass-eager] | 46.2330μs | 23.0525μs | 43.3793 KOps/s | 43.6348 KOps/s | $\color{#d91a1a}-0.59\\%$ | | test_compile_indexing[int-pytree-compile] | 0.2847ms | 0.1357ms | 7.3707 KOps/s | 7.3583 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_compile_indexing[int-pytree-eager] | 43.4630μs | 23.1124μs | 43.2667 KOps/s | 43.7461 KOps/s | $\color{#d91a1a}-1.10\\%$ | | test_mod_add[eager] | 0.2052ms | 40.4829μs | 24.7018 KOps/s | 25.6929 KOps/s | $\color{#d91a1a}-3.86\\%$ | | test_mod_add[compile] | 0.2164ms | 72.0847μs | 13.8726 KOps/s | 13.9827 KOps/s | $\color{#d91a1a}-0.79\\%$ | | test_mod_add[compile-overhead] | 0.2626ms | 0.1510ms | 6.6244 KOps/s | 6.5261 KOps/s | $\color{#35bf28}+1.51\\%$ | | test_mod_wrap[eager] | 0.4120ms | 0.2634ms | 3.7958 KOps/s | 3.7725 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_mod_wrap[compile] | 0.4538ms | 0.3018ms | 3.3136 KOps/s | 3.3011 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_mod_wrap[compile-overhead] | 8.2200ms | 4.2730ms | 234.0297 Ops/s | 225.4402 Ops/s | $\color{#35bf28}+3.81\\%$ | | test_mod_wrap_and_backward[eager] | 1.6419ms | 1.4772ms | 676.9467 Ops/s | 704.9638 Ops/s | $\color{#d91a1a}-3.97\\%$ | | test_mod_wrap_and_backward[compile] | 1.7212ms | 1.4964ms | 668.2749 Ops/s | 670.7199 Ops/s | $\color{#d91a1a}-0.36\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 1.4839ms | 1.0049ms | 995.0989 Ops/s | 989.3708 Ops/s | $\color{#35bf28}+0.58\\%$ | | test_seq_add[eager] | 0.2679ms | 0.1176ms | 8.5012 KOps/s | 8.4617 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_seq_add[compile] | 0.2226ms | 88.0497μs | 11.3572 KOps/s | 11.0495 KOps/s | $\color{#35bf28}+2.78\\%$ | | test_seq_add[compile-overhead] | 0.2613ms | 0.1271ms | 7.8663 KOps/s | 7.9021 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_seq_wrap[eager] | 0.5859ms | 0.4412ms | 2.2663 KOps/s | 2.2915 KOps/s | $\color{#d91a1a}-1.10\\%$ | | test_seq_wrap[compile] | 0.5285ms | 0.3432ms | 2.9138 KOps/s | 2.9857 KOps/s | $\color{#d91a1a}-2.41\\%$ | | test_seq_wrap[compile-overhead] | 0.3118s | 0.1485s | 6.7351 Ops/s | 6.6572 Ops/s | $\color{#35bf28}+1.17\\%$ | | test_func_call_runtime[False-eager] | 1.0858ms | 0.7644ms | 1.3083 KOps/s | 1.2752 KOps/s | $\color{#35bf28}+2.59\\%$ | | test_func_call_runtime[False-compile] | 0.9767ms | 0.8397ms | 1.1909 KOps/s | 1.1955 KOps/s | $\color{#d91a1a}-0.38\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.5185ms | 0.3706ms | 2.6985 KOps/s | 2.6857 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_func_call_runtime[True-eager] | 1.1795ms | 1.0219ms | 978.5248 Ops/s | 972.4702 Ops/s | $\color{#35bf28}+0.62\\%$ | | test_func_call_runtime[True-compile] | 1.0621ms | 0.8760ms | 1.1415 KOps/s | 1.1423 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.5317ms | 0.4103ms | 2.4370 KOps/s | 2.4174 KOps/s | $\color{#35bf28}+0.81\\%$ | | test_distributed | 0.2563ms | 68.8509μs | 14.5241 KOps/s | 13.4968 KOps/s | $\textbf{\color{#35bf28}+7.61\\%}$ | | test_tdmodule | 89.4550μs | 17.3741μs | 57.5568 KOps/s | 65.6932 KOps/s | $\textbf{\color{#d91a1a}-12.39\\%}$ | | test_tdmodule_dispatch | 55.2230μs | 35.4367μs | 28.2193 KOps/s | 32.0719 KOps/s | $\textbf{\color{#d91a1a}-12.01\\%}$ | | test_tdseq | 34.3820μs | 17.9182μs | 55.8090 KOps/s | 61.7182 KOps/s | $\textbf{\color{#d91a1a}-9.57\\%}$ | | test_tdseq_dispatch | 53.8130μs | 37.4954μs | 26.6699 KOps/s | 29.7574 KOps/s | $\textbf{\color{#d91a1a}-10.38\\%}$ | | test_instantiation_functorch | 2.1547ms | 2.0318ms | 492.1742 Ops/s | 489.8269 Ops/s | $\color{#35bf28}+0.48\\%$ | | test_instantiation_td | 1.9997ms | 1.3098ms | 763.4849 Ops/s | 753.1998 Ops/s | $\color{#35bf28}+1.37\\%$ | | test_exec_functorch | 0.2600ms | 0.2315ms | 4.3202 KOps/s | 4.3127 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_exec_functional_call | 0.3869ms | 0.2268ms | 4.4085 KOps/s | 4.3525 KOps/s | $\color{#35bf28}+1.29\\%$ | | test_exec_td | 0.4128ms | 0.2375ms | 4.2098 KOps/s | 4.2989 KOps/s | $\color{#d91a1a}-2.07\\%$ | | test_exec_td_decorator | 1.0252ms | 0.3027ms | 3.3034 KOps/s | 3.1398 KOps/s | $\textbf{\color{#35bf28}+5.21\\%}$ | | test_vmap_mlp_speed[True-True] | 0.8466ms | 0.6842ms | 1.4615 KOps/s | 1.3988 KOps/s | $\color{#35bf28}+4.48\\%$ | | test_vmap_mlp_speed[True-False] | 0.8256ms | 0.6815ms | 1.4673 KOps/s | 1.4003 KOps/s | $\color{#35bf28}+4.78\\%$ | | test_vmap_mlp_speed[False-True] | 0.7414ms | 0.5954ms | 1.6795 KOps/s | 1.5811 KOps/s | $\textbf{\color{#35bf28}+6.22\\%}$ | | test_vmap_mlp_speed[False-False] | 0.7555ms | 0.5943ms | 1.6827 KOps/s | 1.5815 KOps/s | $\textbf{\color{#35bf28}+6.40\\%}$ | | test_vmap_mlp_speed_decorator[True-True] | 1.4559ms | 0.7623ms | 1.3119 KOps/s | 1.2636 KOps/s | $\color{#35bf28}+3.82\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.0127ms | 0.7616ms | 1.3130 KOps/s | 1.2591 KOps/s | $\color{#35bf28}+4.29\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8173ms | 0.6588ms | 1.5179 KOps/s | 1.4536 KOps/s | $\color{#35bf28}+4.42\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.8380ms | 0.6603ms | 1.5145 KOps/s | 1.4459 KOps/s | $\color{#35bf28}+4.74\\%$ | | test_vmap_transformer_speed[True-True] | 9.1052ms | 8.9297ms | 111.9863 Ops/s | 109.2214 Ops/s | $\color{#35bf28}+2.53\\%$ | | test_vmap_transformer_speed[True-False] | 9.0568ms | 8.9211ms | 112.0932 Ops/s | 111.2678 Ops/s | $\color{#35bf28}+0.74\\%$ | | test_vmap_transformer_speed[False-True] | 8.9743ms | 8.8456ms | 113.0511 Ops/s | 111.9881 Ops/s | $\color{#35bf28}+0.95\\%$ | | test_vmap_transformer_speed[False-False] | 8.9646ms | 8.8309ms | 113.2391 Ops/s | 112.1812 Ops/s | $\color{#35bf28}+0.94\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 21.5644ms | 21.3143ms | 46.9168 Ops/s | 46.4015 Ops/s | $\color{#35bf28}+1.11\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 21.5673ms | 21.3181ms | 46.9084 Ops/s | 46.5155 Ops/s | $\color{#35bf28}+0.84\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 22.0038ms | 21.2294ms | 47.1044 Ops/s | 46.9341 Ops/s | $\color{#35bf28}+0.36\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 21.2808ms | 21.1058ms | 47.3804 Ops/s | 46.9727 Ops/s | $\color{#35bf28}+0.87\\%$ | | test_to_module_speed[True] | 2.0079ms | 1.4905ms | 670.9020 Ops/s | 662.2367 Ops/s | $\color{#35bf28}+1.31\\%$ | | test_to_module_speed[False] | 1.9293ms | 1.4751ms | 677.9317 Ops/s | 670.0963 Ops/s | $\color{#35bf28}+1.17\\%$ | | test_tc_init | 68.2540μs | 39.3384μs | 25.4205 KOps/s | 29.4891 KOps/s | $\textbf{\color{#d91a1a}-13.80\\%}$ | | test_tc_init_nested | 0.1062ms | 82.3166μs | 12.1482 KOps/s | 15.1062 KOps/s | $\textbf{\color{#d91a1a}-19.58\\%}$ | | test_tc_first_layer_tensor | 16.5310μs | 3.9872μs | 250.8019 KOps/s | 249.5390 KOps/s | $\color{#35bf28}+0.51\\%$ | | test_tc_first_layer_nontensor | 17.2910μs | 4.0102μs | 249.3616 KOps/s | 250.2909 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_tc_second_layer_tensor | 26.0265μs | 1.3014μs | 768.4294 KOps/s | 775.2611 KOps/s | $\color{#d91a1a}-0.88\\%$ | | test_tc_second_layer_nontensor | 19.8310μs | 4.5865μs | 218.0324 KOps/s | 218.2825 KOps/s | $\color{#d91a1a}-0.11\\%$ | | test_unbind | 0.3255s | 12.8791ms | 77.6452 Ops/s | 76.2516 Ops/s | $\color{#35bf28}+1.83\\%$ | | test_full_like | 0.7661ms | 0.5782ms | 1.7294 KOps/s | 1.7296 KOps/s | $-0.01\\%$ | | test_zeros_like | 0.3594ms | 0.1982ms | 5.0455 KOps/s | 5.0566 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_ones_like | 0.3502ms | 0.1979ms | 5.0541 KOps/s | 5.0558 KOps/s | $\color{#d91a1a}-0.03\\%$ | | test_clone | 0.5553ms | 0.4145ms | 2.4126 KOps/s | 2.4110 KOps/s | $\color{#35bf28}+0.07\\%$ | | test_squeeze | 0.1472ms | 12.0729μs | 82.8304 KOps/s | 84.0813 KOps/s | $\color{#d91a1a}-1.49\\%$ | | test_unsqueeze | 0.2478ms | 84.7951μs | 11.7931 KOps/s | 11.7059 KOps/s | $\color{#35bf28}+0.74\\%$ | | test_split | 0.4690ms | 0.1848ms | 5.4100 KOps/s | 5.4184 KOps/s | $\color{#d91a1a}-0.15\\%$ | | test_permute | 0.2454ms | 0.1998ms | 5.0059 KOps/s | 4.9196 KOps/s | $\color{#35bf28}+1.75\\%$ | | test_stack | 1.3759ms | 0.9053ms | 1.1046 KOps/s | 1.1143 KOps/s | $\color{#d91a1a}-0.88\\%$ | | test_cat | 1.3553ms | 1.2317ms | 811.9018 Ops/s | 811.7240 Ops/s | $\color{#35bf28}+0.02\\%$ |