pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
803 stars 65 forks source link

[BugFix] Allow any tensorclass to have a data field #906

Closed vmoens closed 1 month ago

github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 144. Improved: $\large\color{#35bf28}8$. Worsened: $\large\color{#d91a1a}21$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 75.1210μs | 24.1447μs | 41.4169 KOps/s | 46.3057 KOps/s | $\textbf{\color{#d91a1a}-10.56\\%}$ | | test_plain_set_stack_nested | 54.7530μs | 23.2648μs | 42.9835 KOps/s | 45.6048 KOps/s | $\textbf{\color{#d91a1a}-5.75\\%}$ | | test_plain_set_nested_inplace | 75.8220μs | 25.4850μs | 39.2388 KOps/s | 41.9465 KOps/s | $\textbf{\color{#d91a1a}-6.46\\%}$ | | test_plain_set_stack_nested_inplace | 58.8500μs | 25.6942μs | 38.9193 KOps/s | 42.3824 KOps/s | $\textbf{\color{#d91a1a}-8.17\\%}$ | | test_items | 16.4310μs | 2.6399μs | 378.8059 KOps/s | 379.0794 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_items_nested | 0.6079ms | 0.3660ms | 2.7324 KOps/s | 2.7174 KOps/s | $\color{#35bf28}+0.55\\%$ | | test_items_nested_locked | 1.6325ms | 0.3678ms | 2.7186 KOps/s | 2.6815 KOps/s | $\color{#35bf28}+1.39\\%$ | | test_items_nested_leaf | 0.1576ms | 88.2295μs | 11.3341 KOps/s | 11.5881 KOps/s | $\color{#d91a1a}-2.19\\%$ | | test_items_stack_nested | 0.6051ms | 0.3703ms | 2.7003 KOps/s | 2.7180 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_items_stack_nested_leaf | 0.2429ms | 92.6227μs | 10.7965 KOps/s | 11.5589 KOps/s | $\textbf{\color{#d91a1a}-6.60\\%}$ | | test_items_stack_nested_locked | 2.3419ms | 0.3772ms | 2.6514 KOps/s | 2.7093 KOps/s | $\color{#d91a1a}-2.14\\%$ | | test_keys | 19.1660μs | 3.9427μs | 253.6345 KOps/s | 259.7874 KOps/s | $\color{#d91a1a}-2.37\\%$ | | test_keys_nested | 0.2193ms | 0.1460ms | 6.8488 KOps/s | 6.9243 KOps/s | $\color{#d91a1a}-1.09\\%$ | | test_keys_nested_locked | 0.7524ms | 0.1515ms | 6.6019 KOps/s | 6.6416 KOps/s | $\color{#d91a1a}-0.60\\%$ | | test_keys_nested_leaf | 0.2120ms | 0.1247ms | 8.0191 KOps/s | 8.0248 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_keys_stack_nested | 0.2458ms | 0.1462ms | 6.8377 KOps/s | 6.9208 KOps/s | $\color{#d91a1a}-1.20\\%$ | | test_keys_stack_nested_leaf | 0.2821ms | 0.1255ms | 7.9707 KOps/s | 8.1381 KOps/s | $\color{#d91a1a}-2.06\\%$ | | test_keys_stack_nested_locked | 0.2960ms | 0.1526ms | 6.5526 KOps/s | 6.6890 KOps/s | $\color{#d91a1a}-2.04\\%$ | | test_values | 8.0728μs | 1.1766μs | 849.8810 KOps/s | 893.4795 KOps/s | $\color{#d91a1a}-4.88\\%$ | | test_values_nested | 99.3950μs | 51.3007μs | 19.4929 KOps/s | 19.8551 KOps/s | $\color{#d91a1a}-1.82\\%$ | | test_values_nested_locked | 89.1760μs | 50.8439μs | 19.6680 KOps/s | 19.7445 KOps/s | $\color{#d91a1a}-0.39\\%$ | | test_values_nested_leaf | 0.1413ms | 45.7151μs | 21.8746 KOps/s | 22.3410 KOps/s | $\color{#d91a1a}-2.09\\%$ | | test_values_stack_nested | 84.0480μs | 52.0252μs | 19.2214 KOps/s | 19.9332 KOps/s | $\color{#d91a1a}-3.57\\%$ | | test_values_stack_nested_leaf | 83.4370μs | 46.0154μs | 21.7319 KOps/s | 22.6590 KOps/s | $\color{#d91a1a}-4.09\\%$ | | test_values_stack_nested_locked | 97.4330μs | 51.3049μs | 19.4913 KOps/s | 19.9427 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_membership | 3.9387μs | 0.7491μs | 1.3349 MOps/s | 1.0903 MOps/s | $\textbf{\color{#35bf28}+22.44\\%}$ | | test_membership_nested | 22.2120μs | 2.7746μs | 360.4091 KOps/s | 369.1447 KOps/s | $\color{#d91a1a}-2.37\\%$ | | test_membership_nested_leaf | 28.0220μs | 2.8173μs | 354.9490 KOps/s | 365.7796 KOps/s | $\color{#d91a1a}-2.96\\%$ | | test_membership_stacked_nested | 21.3900μs | 2.7659μs | 361.5404 KOps/s | 373.1302 KOps/s | $\color{#d91a1a}-3.11\\%$ | | test_membership_stacked_nested_leaf | 26.9400μs | 2.8409μs | 352.0006 KOps/s | 367.6752 KOps/s | $\color{#d91a1a}-4.26\\%$ | | test_membership_nested_last | 48.6140μs | 4.1599μs | 240.3918 KOps/s | 246.3561 KOps/s | $\color{#d91a1a}-2.42\\%$ | | test_membership_nested_leaf_last | 30.4170μs | 4.2074μs | 237.6792 KOps/s | 243.7546 KOps/s | $\color{#d91a1a}-2.49\\%$ | | test_membership_stacked_nested_last | 40.2660μs | 5.3338μs | 187.4843 KOps/s | 247.2349 KOps/s | $\textbf{\color{#d91a1a}-24.17\\%}$ | | test_membership_stacked_nested_leaf_last | 28.1620μs | 5.3935μs | 185.4096 KOps/s | 244.3903 KOps/s | $\textbf{\color{#d91a1a}-24.13\\%}$ | | test_nested_getleaf | 38.7930μs | 10.9383μs | 91.4216 KOps/s | 90.1078 KOps/s | $\color{#35bf28}+1.46\\%$ | | test_nested_get | 33.1520μs | 10.4247μs | 95.9260 KOps/s | 97.4124 KOps/s | $\color{#d91a1a}-1.53\\%$ | | test_stacked_getleaf | 55.8820μs | 10.9857μs | 91.0274 KOps/s | 91.3636 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_stacked_get | 33.2430μs | 10.3711μs | 96.4214 KOps/s | 96.7201 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_nested_getitemleaf | 50.9260μs | 11.3535μs | 88.0785 KOps/s | 88.0886 KOps/s | $\color{#d91a1a}-0.01\\%$ | | test_nested_getitem | 42.6110μs | 10.5825μs | 94.4953 KOps/s | 95.9017 KOps/s | $\color{#d91a1a}-1.47\\%$ | | test_stacked_getitemleaf | 50.8490μs | 11.4646μs | 87.2250 KOps/s | 89.7708 KOps/s | $\color{#d91a1a}-2.84\\%$ | | test_stacked_getitem | 40.9370μs | 10.3848μs | 96.2942 KOps/s | 96.6552 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_lock_nested | 0.9993ms | 0.5298ms | 1.8875 KOps/s | 1.6580 KOps/s | $\textbf{\color{#35bf28}+13.84\\%}$ | | test_lock_stack_nested | 0.7641ms | 0.4976ms | 2.0098 KOps/s | 2.0220 KOps/s | $\color{#d91a1a}-0.60\\%$ | | test_unlock_nested | 0.9754ms | 0.4477ms | 2.2337 KOps/s | 2.2924 KOps/s | $\color{#d91a1a}-2.56\\%$ | | test_unlock_stack_nested | 0.5058ms | 0.4117ms | 2.4292 KOps/s | 2.4565 KOps/s | $\color{#d91a1a}-1.11\\%$ | | test_flatten_speed | 0.6104ms | 0.1077ms | 9.2847 KOps/s | 9.5295 KOps/s | $\color{#d91a1a}-2.57\\%$ | | test_unflatten_speed | 0.9707ms | 0.4510ms | 2.2171 KOps/s | 2.2130 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_common_ops | 6.3796ms | 1.2049ms | 829.9519 Ops/s | 859.0020 Ops/s | $\color{#d91a1a}-3.38\\%$ | | test_creation | 19.6170μs | 2.4630μs | 406.0016 KOps/s | 398.4733 KOps/s | $\color{#35bf28}+1.89\\%$ | | test_creation_empty | 66.5740μs | 21.2279μs | 47.1078 KOps/s | 53.4715 KOps/s | $\textbf{\color{#d91a1a}-11.90\\%}$ | | test_creation_nested_1 | 87.2040μs | 24.9258μs | 40.1191 KOps/s | 44.4607 KOps/s | $\textbf{\color{#d91a1a}-9.76\\%}$ | | test_creation_nested_2 | 71.7440μs | 28.9752μs | 34.5123 KOps/s | 38.0885 KOps/s | $\textbf{\color{#d91a1a}-9.39\\%}$ | | test_clone | 89.6880μs | 18.1327μs | 55.1490 KOps/s | 54.9509 KOps/s | $\color{#35bf28}+0.36\\%$ | | test_getitem[int] | 0.9285ms | 13.3032μs | 75.1701 KOps/s | 75.1025 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_getitem[slice_int] | 0.1679ms | 33.1909μs | 30.1288 KOps/s | 28.7917 KOps/s | $\color{#35bf28}+4.64\\%$ | | test_getitem[range] | 0.2150ms | 59.1132μs | 16.9167 KOps/s | 17.0207 KOps/s | $\color{#d91a1a}-0.61\\%$ | | test_getitem[tuple] | 0.1413ms | 27.0761μs | 36.9329 KOps/s | 35.7911 KOps/s | $\color{#35bf28}+3.19\\%$ | | test_getitem[list] | 0.2890ms | 53.9488μs | 18.5361 KOps/s | 18.7747 KOps/s | $\color{#d91a1a}-1.27\\%$ | | test_setitem_dim[int] | 85.3400μs | 37.2496μs | 26.8459 KOps/s | 29.4039 KOps/s | $\textbf{\color{#d91a1a}-8.70\\%}$ | | test_setitem_dim[slice_int] | 0.1118ms | 75.7156μs | 13.2073 KOps/s | 13.5263 KOps/s | $\color{#d91a1a}-2.36\\%$ | | test_setitem_dim[range] | 0.1781ms | 97.9818μs | 10.2060 KOps/s | 10.5785 KOps/s | $\color{#d91a1a}-3.52\\%$ | | test_setitem_dim[tuple] | 98.5450μs | 62.0693μs | 16.1110 KOps/s | 16.2254 KOps/s | $\color{#d91a1a}-0.70\\%$ | | test_setitem | 0.1276ms | 31.9563μs | 31.2928 KOps/s | 32.5069 KOps/s | $\color{#d91a1a}-3.73\\%$ | | test_set | 0.1067ms | 31.2686μs | 31.9810 KOps/s | 33.4120 KOps/s | $\color{#d91a1a}-4.28\\%$ | | test_set_shared | 2.9041ms | 0.2203ms | 4.5395 KOps/s | 4.5501 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_update | 0.1787ms | 39.7797μs | 25.1385 KOps/s | 26.4736 KOps/s | $\textbf{\color{#d91a1a}-5.04\\%}$ | | test_update_nested | 0.1354ms | 49.8414μs | 20.0636 KOps/s | 20.6678 KOps/s | $\color{#d91a1a}-2.92\\%$ | | test_update__nested | 0.1192ms | 36.3824μs | 27.4858 KOps/s | 27.3257 KOps/s | $\color{#35bf28}+0.59\\%$ | | test_set_nested | 98.4350μs | 33.9878μs | 29.4223 KOps/s | 30.5709 KOps/s | $\color{#d91a1a}-3.76\\%$ | | test_set_nested_new | 0.1540ms | 39.5077μs | 25.3115 KOps/s | 26.0610 KOps/s | $\color{#d91a1a}-2.88\\%$ | | test_select | 0.1366ms | 56.4041μs | 17.7292 KOps/s | 17.9889 KOps/s | $\color{#d91a1a}-1.44\\%$ | | test_select_nested | 0.1285ms | 60.7845μs | 16.4516 KOps/s | 16.4611 KOps/s | $\color{#d91a1a}-0.06\\%$ | | test_exclude_nested | 0.1645ms | 80.9896μs | 12.3473 KOps/s | 12.4711 KOps/s | $\color{#d91a1a}-0.99\\%$ | | test_empty[True] | 0.5351ms | 0.3406ms | 2.9359 KOps/s | 2.9248 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_empty[False] | 6.8202μs | 1.2264μs | 815.4039 KOps/s | 778.5207 KOps/s | $\color{#35bf28}+4.74\\%$ | | test_unbind_speed | 0.4759ms | 0.3360ms | 2.9764 KOps/s | 3.0045 KOps/s | $\color{#d91a1a}-0.94\\%$ | | test_unbind_speed_stack0 | 0.4589ms | 0.3317ms | 3.0144 KOps/s | 3.0458 KOps/s | $\color{#d91a1a}-1.03\\%$ | | test_unbind_speed_stack1 | 76.2018ms | 0.8364ms | 1.1955 KOps/s | 1.1767 KOps/s | $\color{#35bf28}+1.60\\%$ | | test_split | 81.3536ms | 2.3494ms | 425.6480 Ops/s | 463.9704 Ops/s | $\textbf{\color{#d91a1a}-8.26\\%}$ | | test_chunk | 78.2386ms | 2.3437ms | 426.6777 Ops/s | 398.2070 Ops/s | $\textbf{\color{#35bf28}+7.15\\%}$ | | test_creation[device0] | 3.7940ms | 0.1264ms | 7.9136 KOps/s | 8.1993 KOps/s | $\color{#d91a1a}-3.48\\%$ | | test_creation_from_tensor | 0.2911ms | 0.1220ms | 8.1959 KOps/s | 8.0943 KOps/s | $\color{#35bf28}+1.25\\%$ | | test_add_one[memmap_tensor0] | 0.2217ms | 8.1530μs | 122.6545 KOps/s | 121.6949 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_contiguous[memmap_tensor0] | 22.1520μs | 2.2755μs | 439.4612 KOps/s | 460.8909 KOps/s | $\color{#d91a1a}-4.65\\%$ | | test_stack[memmap_tensor0] | 59.9930μs | 6.1353μs | 162.9918 KOps/s | 162.5652 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_memmaptd_index | 1.2245ms | 0.4545ms | 2.2002 KOps/s | 2.2415 KOps/s | $\color{#d91a1a}-1.84\\%$ | | test_memmaptd_index_astensor | 1.1235ms | 0.5417ms | 1.8459 KOps/s | 1.9079 KOps/s | $\color{#d91a1a}-3.25\\%$ | | test_memmaptd_index_op | 1.5185ms | 1.1349ms | 881.1704 Ops/s | 923.7391 Ops/s | $\color{#d91a1a}-4.61\\%$ | | test_serialize_model | 0.1992s | 0.1393s | 7.1805 Ops/s | 7.4404 Ops/s | $\color{#d91a1a}-3.49\\%$ | | test_serialize_model_pickle | 0.4404s | 0.3922s | 2.5498 Ops/s | 2.4975 Ops/s | $\color{#35bf28}+2.09\\%$ | | test_serialize_weights | 0.1303s | 0.1249s | 8.0050 Ops/s | 7.0313 Ops/s | $\textbf{\color{#35bf28}+13.85\\%}$ | | test_serialize_weights_returnearly | 0.1848s | 0.1714s | 5.8328 Ops/s | 6.2086 Ops/s | $\textbf{\color{#d91a1a}-6.05\\%}$ | | test_serialize_weights_pickle | 0.4779s | 0.4147s | 2.4117 Ops/s | 2.5462 Ops/s | $\textbf{\color{#d91a1a}-5.28\\%}$ | | test_serialize_weights_filesystem | 0.2264s | 0.1565s | 6.3909 Ops/s | 6.8290 Ops/s | $\textbf{\color{#d91a1a}-6.42\\%}$ | | test_serialize_model_filesystem | 0.1542s | 0.1490s | 6.7100 Ops/s | 5.9527 Ops/s | $\textbf{\color{#35bf28}+12.72\\%}$ | | test_reshape_pytree | 96.1900μs | 41.5289μs | 24.0796 KOps/s | 25.1604 KOps/s | $\color{#d91a1a}-4.30\\%$ | | test_reshape_td | 0.1103ms | 50.2262μs | 19.9099 KOps/s | 20.1817 KOps/s | $\color{#d91a1a}-1.35\\%$ | | test_view_pytree | 96.5010μs | 40.4680μs | 24.7109 KOps/s | 25.4993 KOps/s | $\color{#d91a1a}-3.09\\%$ | | test_view_td | 0.1168ms | 57.2649μs | 17.4627 KOps/s | 17.7476 KOps/s | $\color{#d91a1a}-1.61\\%$ | | test_unbind_pytree | 83.5170μs | 37.3991μs | 26.7386 KOps/s | 27.6140 KOps/s | $\color{#d91a1a}-3.17\\%$ | | test_unbind_td | 0.3673ms | 50.2633μs | 19.8952 KOps/s | 20.7545 KOps/s | $\color{#d91a1a}-4.14\\%$ | | test_split_pytree | 0.1011ms | 40.2486μs | 24.8456 KOps/s | 25.4193 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_split_td | 0.5391ms | 64.4650μs | 15.5123 KOps/s | 15.6725 KOps/s | $\color{#d91a1a}-1.02\\%$ | | test_add_pytree | 96.1710μs | 44.8067μs | 22.3181 KOps/s | 22.0231 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_add_td | 0.1734ms | 88.2813μs | 11.3274 KOps/s | 11.6069 KOps/s | $\color{#d91a1a}-2.41\\%$ | | test_distributed | 0.2292ms | 0.1312ms | 7.6245 KOps/s | 7.4982 KOps/s | $\color{#35bf28}+1.68\\%$ | | test_tdmodule | 44.1830μs | 18.8988μs | 52.9135 KOps/s | 61.0421 KOps/s | $\textbf{\color{#d91a1a}-13.32\\%}$ | | test_tdmodule_dispatch | 55.8850μs | 38.5754μs | 25.9232 KOps/s | 28.8591 KOps/s | $\textbf{\color{#d91a1a}-10.17\\%}$ | | test_tdseq | 50.3340μs | 20.2496μs | 49.3838 KOps/s | 55.1012 KOps/s | $\textbf{\color{#d91a1a}-10.38\\%}$ | | test_tdseq_dispatch | 63.8500μs | 42.7170μs | 23.4099 KOps/s | 25.7866 KOps/s | $\textbf{\color{#d91a1a}-9.22\\%}$ | | test_instantiation_functorch | 2.3998ms | 1.6582ms | 603.0483 Ops/s | 610.5732 Ops/s | $\color{#d91a1a}-1.23\\%$ | | test_instantiation_td | 2.2135ms | 1.1534ms | 866.9947 Ops/s | 850.9578 Ops/s | $\color{#35bf28}+1.88\\%$ | | test_exec_functorch | 0.3916ms | 0.1867ms | 5.3554 KOps/s | 5.2513 KOps/s | $\color{#35bf28}+1.98\\%$ | | test_exec_functional_call | 0.2993ms | 0.1787ms | 5.5975 KOps/s | 5.6177 KOps/s | $\color{#d91a1a}-0.36\\%$ | | test_exec_td | 0.2815ms | 0.1768ms | 5.6570 KOps/s | 5.5807 KOps/s | $\color{#35bf28}+1.37\\%$ | | test_exec_td_decorator | 0.5711ms | 0.2630ms | 3.8023 KOps/s | 3.7864 KOps/s | $\color{#35bf28}+0.42\\%$ | | test_vmap_mlp_speed[True-True] | 0.8998ms | 0.6260ms | 1.5973 KOps/s | 1.6007 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_vmap_mlp_speed[True-False] | 0.9255ms | 0.6222ms | 1.6073 KOps/s | 1.6034 KOps/s | $\color{#35bf28}+0.24\\%$ | | test_vmap_mlp_speed[False-True] | 0.7069ms | 0.5130ms | 1.9495 KOps/s | 1.9343 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_vmap_mlp_speed[False-False] | 0.8019ms | 0.5162ms | 1.9374 KOps/s | 1.9509 KOps/s | $\color{#d91a1a}-0.69\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.3257ms | 0.7207ms | 1.3876 KOps/s | 1.3920 KOps/s | $\color{#d91a1a}-0.32\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.5553ms | 0.7194ms | 1.3901 KOps/s | 1.3994 KOps/s | $\color{#d91a1a}-0.67\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8875ms | 0.5911ms | 1.6918 KOps/s | 1.6835 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.9378ms | 0.5919ms | 1.6894 KOps/s | 1.6760 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_to_module_speed[True] | 2.2201ms | 1.8277ms | 547.1224 Ops/s | 537.5509 Ops/s | $\color{#35bf28}+1.78\\%$ | | test_to_module_speed[False] | 1.9580ms | 1.7944ms | 557.2767 Ops/s | 546.5006 Ops/s | $\color{#35bf28}+1.97\\%$ | | test_tc_init | 0.1061ms | 47.8902μs | 20.8811 KOps/s | 22.1792 KOps/s | $\textbf{\color{#d91a1a}-5.85\\%}$ | | test_tc_init_nested | 0.1685ms | 95.4204μs | 10.4799 KOps/s | 10.7490 KOps/s | $\color{#d91a1a}-2.50\\%$ | | test_tc_first_layer_tensor | 39.2030μs | 9.3121μs | 107.3876 KOps/s | 108.8043 KOps/s | $\color{#d91a1a}-1.30\\%$ | | test_tc_first_layer_nontensor | 53.2800μs | 9.2950μs | 107.5852 KOps/s | 109.1078 KOps/s | $\color{#d91a1a}-1.40\\%$ | | test_tc_second_layer_tensor | 25.4270μs | 2.7810μs | 359.5831 KOps/s | 354.9288 KOps/s | $\color{#35bf28}+1.31\\%$ | | test_tc_second_layer_nontensor | 39.0430μs | 10.5601μs | 94.6960 KOps/s | 97.3652 KOps/s | $\color{#d91a1a}-2.74\\%$ | | test_unbind | 8.8957ms | 8.6921ms | 115.0473 Ops/s | 73.6357 Ops/s | $\textbf{\color{#35bf28}+56.24\\%}$ | | test_full_like | 8.0755ms | 7.1390ms | 140.0765 Ops/s | 126.4530 Ops/s | $\textbf{\color{#35bf28}+10.77\\%}$ | | test_zeros_like | 11.2823ms | 6.4729ms | 154.4907 Ops/s | 132.4493 Ops/s | $\textbf{\color{#35bf28}+16.64\\%}$ | | test_ones_like | 14.4500ms | 7.5467ms | 132.5082 Ops/s | 130.1348 Ops/s | $\color{#35bf28}+1.82\\%$ | | test_clone | 18.6204ms | 9.1407ms | 109.4007 Ops/s | 106.4577 Ops/s | $\color{#35bf28}+2.76\\%$ | | test_squeeze | 64.2710μs | 15.3785μs | 65.0257 KOps/s | 65.6813 KOps/s | $\color{#d91a1a}-1.00\\%$ | | test_unsqueeze | 0.2662ms | 97.3906μs | 10.2679 KOps/s | 10.3030 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_split | 0.4151ms | 0.2087ms | 4.7926 KOps/s | 4.7657 KOps/s | $\color{#35bf28}+0.57\\%$ | | test_permute | 0.3503ms | 0.2269ms | 4.4069 KOps/s | 4.4287 KOps/s | $\color{#d91a1a}-0.49\\%$ | | test_stack | 34.4359ms | 24.7380ms | 40.4237 Ops/s | 39.8593 Ops/s | $\color{#35bf28}+1.42\\%$ | | test_cat | 28.7904ms | 24.3533ms | 41.0622 Ops/s | 39.9918 Ops/s | $\color{#35bf28}+2.68\\%$ |
github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 219. Improved: $\large\color{#35bf28}13$. Worsened: $\large\color{#d91a1a}14$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ------------------------------------ | | test_plain_set_nested | 0.4894ms | 17.9837μs | 55.6058 KOps/s | 58.4476 KOps/s | $\color{#d91a1a}-4.86\\%$ | | test_plain_set_stack_nested | 1.7104ms | 18.3041μs | 54.6327 KOps/s | 58.2828 KOps/s | $\textbf{\color{#d91a1a}-6.26\\%}$ | | test_plain_set_nested_inplace | 38.3010μs | 18.9222μs | 52.8480 KOps/s | 54.4586 KOps/s | $\color{#d91a1a}-2.96\\%$ | | test_plain_set_stack_nested_inplace | 36.6310μs | 18.8991μs | 52.9125 KOps/s | 54.8865 KOps/s | $\color{#d91a1a}-3.60\\%$ | | test_items | 26.3100μs | 4.7548μs | 210.3157 KOps/s | 211.5822 KOps/s | $\color{#d91a1a}-0.60\\%$ | | test_items_nested | 0.4177ms | 0.3955ms | 2.5283 KOps/s | 2.5269 KOps/s | $\color{#35bf28}+0.05\\%$ | | test_items_nested_locked | 0.4258ms | 0.3995ms | 2.5033 KOps/s | 2.5464 KOps/s | $\color{#d91a1a}-1.70\\%$ | | test_items_nested_leaf | 0.1151ms | 87.3588μs | 11.4470 KOps/s | 11.4210 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_items_stack_nested | 0.4503ms | 0.4000ms | 2.4998 KOps/s | 2.5338 KOps/s | $\color{#d91a1a}-1.34\\%$ | | test_items_stack_nested_leaf | 0.1148ms | 87.7003μs | 11.4025 KOps/s | 11.6322 KOps/s | $\color{#d91a1a}-1.98\\%$ | | test_items_stack_nested_locked | 0.4405ms | 0.4003ms | 2.4978 KOps/s | 2.5573 KOps/s | $\color{#d91a1a}-2.33\\%$ | | test_keys | 19.5910μs | 4.4008μs | 227.2327 KOps/s | 228.3211 KOps/s | $\color{#d91a1a}-0.48\\%$ | | test_keys_nested | 97.6910μs | 68.8970μs | 14.5144 KOps/s | 15.1835 KOps/s | $\color{#d91a1a}-4.41\\%$ | | test_keys_nested_locked | 0.9564ms | 74.1480μs | 13.4865 KOps/s | 13.5830 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_keys_nested_leaf | 80.9020μs | 56.7034μs | 17.6356 KOps/s | 17.8677 KOps/s | $\color{#d91a1a}-1.30\\%$ | | test_keys_stack_nested | 0.1013ms | 68.8646μs | 14.5213 KOps/s | 15.3932 KOps/s | $\textbf{\color{#d91a1a}-5.66\\%}$ | | test_keys_stack_nested_leaf | 77.0010μs | 58.5157μs | 17.0894 KOps/s | 17.3584 KOps/s | $\color{#d91a1a}-1.55\\%$ | | test_keys_stack_nested_locked | 93.5310μs | 73.9529μs | 13.5221 KOps/s | 13.9643 KOps/s | $\color{#d91a1a}-3.17\\%$ | | test_values | 9.1603μs | 1.7604μs | 568.0576 KOps/s | 564.3839 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_values_nested | 52.8810μs | 34.1994μs | 29.2403 KOps/s | 29.3087 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_values_nested_locked | 55.9910μs | 36.1005μs | 27.7004 KOps/s | 27.5063 KOps/s | $\color{#35bf28}+0.71\\%$ | | test_values_nested_leaf | 52.2510μs | 30.3854μs | 32.9106 KOps/s | 33.1151 KOps/s | $\color{#d91a1a}-0.62\\%$ | | test_values_stack_nested | 68.5510μs | 34.7177μs | 28.8038 KOps/s | 28.6930 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_values_stack_nested_leaf | 54.0010μs | 30.8477μs | 32.4173 KOps/s | 32.1246 KOps/s | $\color{#35bf28}+0.91\\%$ | | test_values_stack_nested_locked | 57.7510μs | 36.5486μs | 27.3608 KOps/s | 27.0902 KOps/s | $\color{#35bf28}+1.00\\%$ | | test_membership | 1.2810μs | 0.5399μs | 1.8522 MOps/s | 1.8510 MOps/s | $\color{#35bf28}+0.06\\%$ | | test_membership_nested | 15.7300μs | 2.0917μs | 478.0689 KOps/s | 473.8803 KOps/s | $\color{#35bf28}+0.88\\%$ | | test_membership_nested_leaf | 10.2600μs | 2.0553μs | 486.5387 KOps/s | 483.5920 KOps/s | $\color{#35bf28}+0.61\\%$ | | test_membership_stacked_nested | 23.6200μs | 2.1061μs | 474.8214 KOps/s | 485.5792 KOps/s | $\color{#d91a1a}-2.22\\%$ | | test_membership_stacked_nested_leaf | 16.1300μs | 2.0659μs | 484.0405 KOps/s | 481.9617 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_membership_nested_last | 20.8110μs | 3.0544μs | 327.4009 KOps/s | 331.2761 KOps/s | $\color{#d91a1a}-1.17\\%$ | | test_membership_nested_leaf_last | 18.6900μs | 3.0542μs | 327.4224 KOps/s | 330.0220 KOps/s | $\color{#d91a1a}-0.79\\%$ | | test_membership_stacked_nested_last | 36.1810μs | 3.4774μs | 287.5738 KOps/s | 107.8330 KOps/s | $\textbf{\color{#35bf28}+166.68\\%}$ | | test_membership_stacked_nested_leaf_last | 20.1210μs | 3.4779μs | 287.5283 KOps/s | 108.1986 KOps/s | $\textbf{\color{#35bf28}+165.74\\%}$ | | test_nested_getleaf | 25.9810μs | 8.0492μs | 124.2353 KOps/s | 123.0623 KOps/s | $\color{#35bf28}+0.95\\%$ | | test_nested_get | 23.3700μs | 7.6059μs | 131.4761 KOps/s | 131.2801 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_stacked_getleaf | 24.8100μs | 8.0530μs | 124.1780 KOps/s | 124.2933 KOps/s | $\color{#d91a1a}-0.09\\%$ | | test_stacked_get | 22.4800μs | 7.5775μs | 131.9696 KOps/s | 132.4825 KOps/s | $\color{#d91a1a}-0.39\\%$ | | test_nested_getitemleaf | 21.5310μs | 8.2009μs | 121.9375 KOps/s | 122.0549 KOps/s | $\color{#d91a1a}-0.10\\%$ | | test_nested_getitem | 25.0700μs | 7.7418μs | 129.1694 KOps/s | 129.1027 KOps/s | $\color{#35bf28}+0.05\\%$ | | test_stacked_getitemleaf | 27.3810μs | 8.2530μs | 121.1688 KOps/s | 121.4940 KOps/s | $\color{#d91a1a}-0.27\\%$ | | test_stacked_getitem | 23.5400μs | 7.7710μs | 128.6832 KOps/s | 128.7931 KOps/s | $\color{#d91a1a}-0.09\\%$ | | test_lock_nested | 1.3000ms | 0.4818ms | 2.0756 KOps/s | 2.0723 KOps/s | $\color{#35bf28}+0.16\\%$ | | test_lock_stack_nested | 0.4955ms | 0.4423ms | 2.2608 KOps/s | 2.2985 KOps/s | $\color{#d91a1a}-1.64\\%$ | | test_unlock_nested | 0.8256ms | 0.3957ms | 2.5269 KOps/s | 2.4633 KOps/s | $\color{#35bf28}+2.58\\%$ | | test_unlock_stack_nested | 0.5009ms | 0.3584ms | 2.7902 KOps/s | 2.8156 KOps/s | $\color{#d91a1a}-0.90\\%$ | | test_flatten_speed | 0.2057ms | 0.1067ms | 9.3733 KOps/s | 9.3557 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_unflatten_speed | 0.3327ms | 0.2955ms | 3.3843 KOps/s | 3.3822 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_common_ops | 1.7973ms | 1.4351ms | 696.8128 Ops/s | 732.8676 Ops/s | $\color{#d91a1a}-4.92\\%$ | | test_creation | 17.6100μs | 2.0410μs | 489.9464 KOps/s | 487.3441 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_creation_empty | 41.8400μs | 19.2156μs | 52.0410 KOps/s | 55.9380 KOps/s | $\textbf{\color{#d91a1a}-6.97\\%}$ | | test_creation_nested_1 | 38.7810μs | 21.2844μs | 46.9828 KOps/s | 50.6431 KOps/s | $\textbf{\color{#d91a1a}-7.23\\%}$ | | test_creation_nested_2 | 0.1928ms | 24.0049μs | 41.6581 KOps/s | 44.5547 KOps/s | $\textbf{\color{#d91a1a}-6.50\\%}$ | | test_clone | 63.4110μs | 34.0724μs | 29.3493 KOps/s | 30.3786 KOps/s | $\color{#d91a1a}-3.39\\%$ | | test_getitem[int] | 1.1611ms | 17.6809μs | 56.5584 KOps/s | 56.6221 KOps/s | $\color{#d91a1a}-0.11\\%$ | | test_getitem[slice_int] | 0.1570ms | 30.4462μs | 32.8448 KOps/s | 32.9723 KOps/s | $\color{#d91a1a}-0.39\\%$ | | test_getitem[range] | 0.2945ms | 0.1209ms | 8.2741 KOps/s | 8.3908 KOps/s | $\color{#d91a1a}-1.39\\%$ | | test_getitem[tuple] | 0.1528ms | 26.3236μs | 37.9887 KOps/s | 37.9445 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_getitem[list] | 0.2372ms | 0.1097ms | 9.1140 KOps/s | 8.9997 KOps/s | $\color{#35bf28}+1.27\\%$ | | test_setitem_dim[int] | 77.9120μs | 57.2353μs | 17.4717 KOps/s | 17.0231 KOps/s | $\color{#35bf28}+2.64\\%$ | | test_setitem_dim[slice_int] | 0.1127ms | 82.6383μs | 12.1009 KOps/s | 12.3045 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_setitem_dim[range] | 0.1752ms | 0.1467ms | 6.8183 KOps/s | 6.5660 KOps/s | $\color{#35bf28}+3.84\\%$ | | test_setitem_dim[tuple] | 0.1013ms | 75.4410μs | 13.2554 KOps/s | 13.4929 KOps/s | $\color{#d91a1a}-1.76\\%$ | | test_setitem | 94.2320μs | 46.6644μs | 21.4296 KOps/s | 21.0788 KOps/s | $\color{#35bf28}+1.66\\%$ | | test_set | 70.1410μs | 45.1199μs | 22.1632 KOps/s | 21.8592 KOps/s | $\color{#35bf28}+1.39\\%$ | | test_set_shared | 0.3868ms | 55.8149μs | 17.9164 KOps/s | 17.6462 KOps/s | $\color{#35bf28}+1.53\\%$ | | test_update | 98.9230μs | 54.9657μs | 18.1932 KOps/s | 18.4083 KOps/s | $\color{#d91a1a}-1.17\\%$ | | test_update_nested | 95.7810μs | 66.8269μs | 14.9640 KOps/s | 16.0038 KOps/s | $\textbf{\color{#d91a1a}-6.50\\%}$ | | test_update__nested | 0.1075ms | 69.9336μs | 14.2993 KOps/s | 15.2341 KOps/s | $\textbf{\color{#d91a1a}-6.14\\%}$ | | test_set_nested | 0.4695ms | 51.1338μs | 19.5565 KOps/s | 20.7837 KOps/s | $\textbf{\color{#d91a1a}-5.90\\%}$ | | test_set_nested_new | 93.0520μs | 55.1227μs | 18.1414 KOps/s | 18.7293 KOps/s | $\color{#d91a1a}-3.14\\%$ | | test_select | 0.1047ms | 70.4632μs | 14.1918 KOps/s | 14.1451 KOps/s | $\color{#35bf28}+0.33\\%$ | | test_select_nested | 77.8010μs | 54.3300μs | 18.4060 KOps/s | 18.9318 KOps/s | $\color{#d91a1a}-2.78\\%$ | | test_exclude_nested | 92.5710μs | 72.3394μs | 13.8237 KOps/s | 13.7927 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_empty[True] | 0.3524ms | 0.3015ms | 3.3166 KOps/s | 3.3462 KOps/s | $\color{#d91a1a}-0.88\\%$ | | test_empty[False] | 2.4291μs | 0.9309μs | 1.0742 MOps/s | 1.0747 MOps/s | $\color{#d91a1a}-0.04\\%$ | | test_to | 65.5320μs | 38.1935μs | 26.1825 KOps/s | 26.4149 KOps/s | $\color{#d91a1a}-0.88\\%$ | | test_to_nonblocking | 51.8310μs | 24.2673μs | 41.2076 KOps/s | 42.0147 KOps/s | $\color{#d91a1a}-1.92\\%$ | | test_unbind_speed | 0.3590ms | 0.3112ms | 3.2134 KOps/s | 3.2261 KOps/s | $\color{#d91a1a}-0.39\\%$ | | test_unbind_speed_stack0 | 0.3618ms | 0.3080ms | 3.2463 KOps/s | 3.2611 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_unbind_speed_stack1 | 86.9174ms | 0.7888ms | 1.2678 KOps/s | 1.2790 KOps/s | $\color{#d91a1a}-0.87\\%$ | | test_split | 89.0993ms | 2.3581ms | 424.0649 Ops/s | 416.3638 Ops/s | $\color{#35bf28}+1.85\\%$ | | test_chunk | 2.3252ms | 2.1887ms | 456.8836 Ops/s | 416.3445 Ops/s | $\textbf{\color{#35bf28}+9.74\\%}$ | | test_creation[device0] | 0.1569ms | 0.1062ms | 9.4174 KOps/s | 8.9936 KOps/s | $\color{#35bf28}+4.71\\%$ | | test_creation_from_tensor | 0.1604ms | 0.1032ms | 9.6936 KOps/s | 9.2338 KOps/s | $\color{#35bf28}+4.98\\%$ | | test_add_one[memmap_tensor0] | 22.2210μs | 9.3923μs | 106.4705 KOps/s | 96.6671 KOps/s | $\textbf{\color{#35bf28}+10.14\\%}$ | | test_contiguous[memmap_tensor0] | 20.3110μs | 2.2575μs | 442.9765 KOps/s | 437.2904 KOps/s | $\color{#35bf28}+1.30\\%$ | | test_stack[memmap_tensor0] | 31.9110μs | 6.7117μs | 148.9945 KOps/s | 141.8527 KOps/s | $\textbf{\color{#35bf28}+5.03\\%}$ | | test_memmaptd_index | 90.2815ms | 0.4998ms | 2.0007 KOps/s | 2.2001 KOps/s | $\textbf{\color{#d91a1a}-9.06\\%}$ | | test_memmaptd_index_astensor | 0.7773ms | 0.5062ms | 1.9755 KOps/s | 1.9230 KOps/s | $\color{#35bf28}+2.73\\%$ | | test_memmaptd_index_op | 1.5167ms | 1.1004ms | 908.7743 Ops/s | 894.3267 Ops/s | $\color{#35bf28}+1.62\\%$ | | test_serialize_model | 0.1007s | 96.7170ms | 10.3394 Ops/s | 10.1723 Ops/s | $\color{#35bf28}+1.64\\%$ | | test_serialize_model_pickle | 1.3684s | 1.2364s | 0.8088 Ops/s | 0.7464 Ops/s | $\textbf{\color{#35bf28}+8.36\\%}$ | | test_serialize_weights | 0.1878s | 0.1024s | 9.7669 Ops/s | 9.2226 Ops/s | $\textbf{\color{#35bf28}+5.90\\%}$ | | test_serialize_weights_returnearly | 82.7812ms | 72.3278ms | 13.8259 Ops/s | 11.2790 Ops/s | $\textbf{\color{#35bf28}+22.58\\%}$ | | test_serialize_weights_pickle | 1.3471s | 1.2360s | 0.8091 Ops/s | 0.8035 Ops/s | $\color{#35bf28}+0.70\\%$ | | test_reshape_pytree | 73.3210μs | 39.1696μs | 25.5300 KOps/s | 25.3131 KOps/s | $\color{#35bf28}+0.86\\%$ | | test_reshape_td | 88.8620μs | 44.9046μs | 22.2694 KOps/s | 22.3750 KOps/s | $\color{#d91a1a}-0.47\\%$ | | test_view_pytree | 64.0620μs | 38.8112μs | 25.7658 KOps/s | 25.1216 KOps/s | $\color{#35bf28}+2.56\\%$ | | test_view_td | 82.0920μs | 51.0189μs | 19.6006 KOps/s | 19.2695 KOps/s | $\color{#35bf28}+1.72\\%$ | | test_unbind_pytree | 0.1904ms | 38.4662μs | 25.9968 KOps/s | 26.2086 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_unbind_td | 0.4763ms | 47.7490μs | 20.9429 KOps/s | 20.6963 KOps/s | $\color{#35bf28}+1.19\\%$ | | test_split_pytree | 81.8120μs | 51.6126μs | 19.3751 KOps/s | 19.1219 KOps/s | $\color{#35bf28}+1.32\\%$ | | test_split_td | 88.5572ms | 69.5585μs | 14.3764 KOps/s | 15.8405 KOps/s | $\textbf{\color{#d91a1a}-9.24\\%}$ | | test_add_pytree | 0.1018ms | 60.9340μs | 16.4112 KOps/s | 16.0067 KOps/s | $\color{#35bf28}+2.53\\%$ | | test_add_td | 0.1598ms | 0.1020ms | 9.8075 KOps/s | 10.1099 KOps/s | $\color{#d91a1a}-2.99\\%$ | | test_compile_add_one_nested[tensordict-compile] | 0.4160ms | 0.2122ms | 4.7118 KOps/s | 4.6636 KOps/s | $\color{#35bf28}+1.03\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.2620ms | 0.1778ms | 5.6249 KOps/s | 5.5824 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.1809ms | 0.1477ms | 6.7722 KOps/s | 6.7829 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.2655ms | 0.1964ms | 5.0926 KOps/s | 4.9974 KOps/s | $\color{#35bf28}+1.91\\%$ | | test_compile_copy_nested[tensordict-compile] | 92.9120μs | 22.0486μs | 45.3545 KOps/s | 45.0575 KOps/s | $\color{#35bf28}+0.66\\%$ | | test_compile_copy_nested[tensordict-eager] | 67.9610μs | 50.1598μs | 19.9363 KOps/s | 20.3097 KOps/s | $\color{#d91a1a}-1.84\\%$ | | test_compile_copy_nested[pytree-compile] | 99.9720μs | 72.8188μs | 13.7327 KOps/s | 14.0106 KOps/s | $\color{#d91a1a}-1.98\\%$ | | test_compile_copy_nested[pytree-eager] | 80.0210μs | 60.1926μs | 16.6133 KOps/s | 16.7698 KOps/s | $\color{#d91a1a}-0.93\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.3746ms | 0.3345ms | 2.9899 KOps/s | 3.0085 KOps/s | $\color{#d91a1a}-0.62\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.3238ms | 0.2241ms | 4.4616 KOps/s | 4.5343 KOps/s | $\color{#d91a1a}-1.60\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.1832ms | 0.1333ms | 7.5014 KOps/s | 7.3633 KOps/s | $\color{#35bf28}+1.88\\%$ | | test_compile_add_one_flat[tensorclass-eager] | 0.1309ms | 64.7912μs | 15.4342 KOps/s | 14.6652 KOps/s | $\textbf{\color{#35bf28}+5.24\\%}$ | | test_compile_add_one_flat[pytree-compile] | 0.3872ms | 0.3333ms | 3.0001 KOps/s | 2.9980 KOps/s | $\color{#35bf28}+0.07\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.7064ms | 0.6444ms | 1.5518 KOps/s | 1.5329 KOps/s | $\color{#35bf28}+1.23\\%$ | | test_compile_add_self_flat[tensordict-eager] | 0.3456ms | 0.2728ms | 3.6653 KOps/s | 3.7000 KOps/s | $\color{#d91a1a}-0.94\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.3738ms | 0.3378ms | 2.9599 KOps/s | 2.9750 KOps/s | $\color{#d91a1a}-0.51\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.1611ms | 77.3605μs | 12.9265 KOps/s | 12.8175 KOps/s | $\color{#35bf28}+0.85\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.2811ms | 0.1333ms | 7.5000 KOps/s | 7.4314 KOps/s | $\color{#35bf28}+0.92\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.6103ms | 0.5503ms | 1.8173 KOps/s | 1.7576 KOps/s | $\color{#35bf28}+3.40\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.3683ms | 0.3323ms | 3.0091 KOps/s | 3.0088 KOps/s | $+0.01\\%$ | | test_compile_copy_flat[tensordict-compile] | 36.4710μs | 19.3224μs | 51.7535 KOps/s | 52.3361 KOps/s | $\color{#d91a1a}-1.11\\%$ | | test_compile_copy_flat[tensordict-eager] | 51.8210μs | 31.7711μs | 31.4751 KOps/s | 31.0560 KOps/s | $\color{#35bf28}+1.35\\%$ | | test_compile_copy_flat[pytree-compile] | 98.5510μs | 75.6689μs | 13.2155 KOps/s | 13.1901 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_compile_copy_flat[pytree-eager] | 78.1020μs | 60.8864μs | 16.4240 KOps/s | 16.4987 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_compile_assign_and_add[tensordict-compile] | 2.5584ms | 0.9482ms | 1.0546 KOps/s | 1.0566 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_compile_assign_and_add[tensordict-eager] | 3.8813ms | 3.4869ms | 286.7854 Ops/s | 287.4858 Ops/s | $\color{#d91a1a}-0.24\\%$ | | test_compile_assign_and_add[pytree-compile] | 2.5300ms | 0.9370ms | 1.0672 KOps/s | 1.0742 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_compile_assign_and_add[pytree-eager] | 3.5384ms | 3.4700ms | 288.1825 Ops/s | 287.0704 Ops/s | $\color{#35bf28}+0.39\\%$ | | test_compile_indexing[tensor-tensordict-compile] | 0.1474ms | 0.1131ms | 8.8401 KOps/s | 8.8150 KOps/s | $\color{#35bf28}+0.28\\%$ | | test_compile_indexing[tensor-tensordict-eager] | 0.2385ms | 66.7049μs | 14.9914 KOps/s | 14.7177 KOps/s | $\color{#35bf28}+1.86\\%$ | | test_compile_indexing[tensor-tensorclass-compile] | 0.1506ms | 0.1059ms | 9.4391 KOps/s | 9.4435 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 73.0820μs | 47.2233μs | 21.1760 KOps/s | 20.8144 KOps/s | $\color{#35bf28}+1.74\\%$ | | test_compile_indexing[tensor-pytree-compile] | 0.1500ms | 0.1047ms | 9.5511 KOps/s | 9.0179 KOps/s | $\textbf{\color{#35bf28}+5.91\\%}$ | | test_compile_indexing[tensor-pytree-eager] | 0.1171ms | 47.1000μs | 21.2314 KOps/s | 19.4722 KOps/s | $\textbf{\color{#35bf28}+9.03\\%}$ | | test_compile_indexing[slice-tensordict-compile] | 0.1837ms | 0.1425ms | 7.0182 KOps/s | 6.9865 KOps/s | $\color{#35bf28}+0.45\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.1959ms | 27.0429μs | 36.9783 KOps/s | 35.5292 KOps/s | $\color{#35bf28}+4.08\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 0.2136ms | 0.1341ms | 7.4548 KOps/s | 7.1832 KOps/s | $\color{#35bf28}+3.78\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 53.1510μs | 23.0247μs | 43.4316 KOps/s | 42.1337 KOps/s | $\color{#35bf28}+3.08\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.1852ms | 0.1374ms | 7.2792 KOps/s | 7.2638 KOps/s | $\color{#35bf28}+0.21\\%$ | | test_compile_indexing[slice-pytree-eager] | 47.5600μs | 22.6176μs | 44.2134 KOps/s | 42.5948 KOps/s | $\color{#35bf28}+3.80\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.2041ms | 0.1417ms | 7.0588 KOps/s | 7.0131 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.5501ms | 26.6618μs | 37.5068 KOps/s | 35.5832 KOps/s | $\textbf{\color{#35bf28}+5.41\\%}$ | | test_compile_indexing[int-tensorclass-compile] | 0.1594ms | 0.1331ms | 7.5107 KOps/s | 7.4530 KOps/s | $\color{#35bf28}+0.77\\%$ | | test_compile_indexing[int-tensorclass-eager] | 48.9910μs | 22.7280μs | 43.9986 KOps/s | 42.5840 KOps/s | $\color{#35bf28}+3.32\\%$ | | test_compile_indexing[int-pytree-compile] | 0.1835ms | 0.1334ms | 7.4961 KOps/s | 7.3896 KOps/s | $\color{#35bf28}+1.44\\%$ | | test_compile_indexing[int-pytree-eager] | 46.0610μs | 22.7864μs | 43.8858 KOps/s | 42.5553 KOps/s | $\color{#35bf28}+3.13\\%$ | | test_mod_add[eager] | 71.1410μs | 39.7918μs | 25.1308 KOps/s | 25.1097 KOps/s | $\color{#35bf28}+0.08\\%$ | | test_mod_add[compile] | 0.1154ms | 69.4774μs | 14.3932 KOps/s | 14.3967 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_mod_add[compile-overhead] | 0.2630ms | 0.1491ms | 6.7071 KOps/s | 6.7164 KOps/s | $\color{#d91a1a}-0.14\\%$ | | test_mod_wrap[eager] | 0.3498ms | 0.2641ms | 3.7859 KOps/s | 3.6592 KOps/s | $\color{#35bf28}+3.46\\%$ | | test_mod_wrap[compile] | 1.2029ms | 0.2993ms | 3.3412 KOps/s | 3.3055 KOps/s | $\color{#35bf28}+1.08\\%$ | | test_mod_wrap[compile-overhead] | 7.7715ms | 4.1275ms | 242.2792 Ops/s | 236.0045 Ops/s | $\color{#35bf28}+2.66\\%$ | | test_mod_wrap_and_backward[eager] | 1.5865ms | 1.4740ms | 678.4348 Ops/s | 725.7881 Ops/s | $\textbf{\color{#d91a1a}-6.52\\%}$ | | test_mod_wrap_and_backward[compile] | 1.5682ms | 1.4797ms | 675.8340 Ops/s | 670.6832 Ops/s | $\color{#35bf28}+0.77\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 1.4544ms | 1.0019ms | 998.0865 Ops/s | 999.8928 Ops/s | $\color{#d91a1a}-0.18\\%$ | | test_seq_add[eager] | 0.1834ms | 0.1168ms | 8.5625 KOps/s | 8.4198 KOps/s | $\color{#35bf28}+1.70\\%$ | | test_seq_add[compile] | 0.2130ms | 86.9830μs | 11.4965 KOps/s | 11.2143 KOps/s | $\color{#35bf28}+2.52\\%$ | | test_seq_add[compile-overhead] | 0.1605ms | 0.1254ms | 7.9726 KOps/s | 8.0322 KOps/s | $\color{#d91a1a}-0.74\\%$ | | test_seq_wrap[eager] | 0.5105ms | 0.4439ms | 2.2528 KOps/s | 2.2826 KOps/s | $\color{#d91a1a}-1.30\\%$ | | test_seq_wrap[compile] | 1.4858ms | 0.3319ms | 3.0132 KOps/s | 2.9896 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_seq_wrap[compile-overhead] | 0.3072s | 0.1439s | 6.9493 Ops/s | 6.9115 Ops/s | $\color{#35bf28}+0.55\\%$ | | test_func_call_runtime[False-eager] | 0.8706ms | 0.7722ms | 1.2951 KOps/s | 1.3070 KOps/s | $\color{#d91a1a}-0.91\\%$ | | test_func_call_runtime[False-compile] | 0.9434ms | 0.8435ms | 1.1856 KOps/s | 1.2046 KOps/s | $\color{#d91a1a}-1.58\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.4557ms | 0.3673ms | 2.7225 KOps/s | 2.7019 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_func_call_runtime[True-eager] | 1.1821ms | 1.0239ms | 976.6486 Ops/s | 973.9417 Ops/s | $\color{#35bf28}+0.28\\%$ | | test_func_call_runtime[True-compile] | 0.9562ms | 0.8716ms | 1.1473 KOps/s | 1.1471 KOps/s | $\color{#35bf28}+0.02\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.4877ms | 0.4080ms | 2.4509 KOps/s | 2.4432 KOps/s | $\color{#35bf28}+0.32\\%$ | | test_distributed | 0.8512ms | 70.8252μs | 14.1193 KOps/s | 14.6739 KOps/s | $\color{#d91a1a}-3.78\\%$ | | test_tdmodule | 33.5110μs | 17.0287μs | 58.7242 KOps/s | 60.7354 KOps/s | $\color{#d91a1a}-3.31\\%$ | | test_tdmodule_dispatch | 52.8400μs | 34.6784μs | 28.8364 KOps/s | 30.1436 KOps/s | $\color{#d91a1a}-4.34\\%$ | | test_tdseq | 33.3400μs | 17.7097μs | 56.4661 KOps/s | 58.6927 KOps/s | $\color{#d91a1a}-3.79\\%$ | | test_tdseq_dispatch | 53.3410μs | 36.7850μs | 27.1850 KOps/s | 28.3537 KOps/s | $\color{#d91a1a}-4.12\\%$ | | test_instantiation_functorch | 2.1093ms | 2.0245ms | 493.9424 Ops/s | 491.7527 Ops/s | $\color{#35bf28}+0.45\\%$ | | test_instantiation_td | 2.0532ms | 1.3169ms | 759.3381 Ops/s | 754.6258 Ops/s | $\color{#35bf28}+0.62\\%$ | | test_exec_functorch | 0.3382ms | 0.2288ms | 4.3716 KOps/s | 4.3864 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_exec_functional_call | 0.2678ms | 0.2288ms | 4.3698 KOps/s | 4.4382 KOps/s | $\color{#d91a1a}-1.54\\%$ | | test_exec_td | 0.2721ms | 0.2264ms | 4.4165 KOps/s | 4.4334 KOps/s | $\color{#d91a1a}-0.38\\%$ | | test_exec_td_decorator | 0.5252ms | 0.3046ms | 3.2834 KOps/s | 3.3261 KOps/s | $\color{#d91a1a}-1.29\\%$ | | test_vmap_mlp_speed[True-True] | 0.7418ms | 0.6882ms | 1.4531 KOps/s | 1.4575 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_vmap_mlp_speed[True-False] | 0.7644ms | 0.6846ms | 1.4607 KOps/s | 1.4622 KOps/s | $\color{#d91a1a}-0.10\\%$ | | test_vmap_mlp_speed[False-True] | 0.6524ms | 0.5997ms | 1.6676 KOps/s | 1.6775 KOps/s | $\color{#d91a1a}-0.59\\%$ | | test_vmap_mlp_speed[False-False] | 0.6385ms | 0.5998ms | 1.6672 KOps/s | 1.6730 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.3071ms | 0.7672ms | 1.3034 KOps/s | 1.3102 KOps/s | $\color{#d91a1a}-0.52\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.9152ms | 0.7656ms | 1.3062 KOps/s | 1.3177 KOps/s | $\color{#d91a1a}-0.87\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8842ms | 0.6671ms | 1.4991 KOps/s | 1.5113 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.8055ms | 0.6654ms | 1.5028 KOps/s | 1.5155 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_vmap_transformer_speed[True-True] | 9.1757ms | 8.9715ms | 111.4642 Ops/s | 111.5347 Ops/s | $\color{#d91a1a}-0.06\\%$ | | test_vmap_transformer_speed[True-False] | 9.3763ms | 9.0226ms | 110.8328 Ops/s | 110.9224 Ops/s | $\color{#d91a1a}-0.08\\%$ | | test_vmap_transformer_speed[False-True] | 9.0375ms | 8.8897ms | 112.4896 Ops/s | 112.7141 Ops/s | $\color{#d91a1a}-0.20\\%$ | | test_vmap_transformer_speed[False-False] | 8.9739ms | 8.8903ms | 112.4815 Ops/s | 112.9106 Ops/s | $\color{#d91a1a}-0.38\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 22.4365ms | 21.4286ms | 46.6667 Ops/s | 46.9578 Ops/s | $\color{#d91a1a}-0.62\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 21.4672ms | 21.3936ms | 46.7430 Ops/s | 46.9427 Ops/s | $\color{#d91a1a}-0.43\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 21.2463ms | 21.1746ms | 47.2264 Ops/s | 47.4754 Ops/s | $\color{#d91a1a}-0.52\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 21.3033ms | 21.2345ms | 47.0932 Ops/s | 47.3390 Ops/s | $\color{#d91a1a}-0.52\\%$ | | test_to_module_speed[True] | 1.6134ms | 1.4939ms | 669.4010 Ops/s | 675.5912 Ops/s | $\color{#d91a1a}-0.92\\%$ | | test_to_module_speed[False] | 1.6195ms | 1.4799ms | 675.7418 Ops/s | 681.5817 Ops/s | $\color{#d91a1a}-0.86\\%$ | | test_tc_init | 62.6410μs | 40.4992μs | 24.6918 KOps/s | 26.8175 KOps/s | $\textbf{\color{#d91a1a}-7.93\\%}$ | | test_tc_init_nested | 0.1642ms | 85.1739μs | 11.7407 KOps/s | 13.3440 KOps/s | $\textbf{\color{#d91a1a}-12.02\\%}$ | | test_tc_first_layer_tensor | 19.4900μs | 3.9863μs | 250.8596 KOps/s | 253.6091 KOps/s | $\color{#d91a1a}-1.08\\%$ | | test_tc_first_layer_nontensor | 19.2500μs | 4.0667μs | 245.9016 KOps/s | 251.2515 KOps/s | $\color{#d91a1a}-2.13\\%$ | | test_tc_second_layer_tensor | 4.8952μs | 1.2866μs | 777.2181 KOps/s | 768.9088 KOps/s | $\color{#35bf28}+1.08\\%$ | | test_tc_second_layer_nontensor | 22.4010μs | 4.6432μs | 215.3710 KOps/s | 219.2738 KOps/s | $\color{#d91a1a}-1.78\\%$ | | test_unbind | 0.3159s | 13.1512ms | 76.0386 Ops/s | 82.7279 Ops/s | $\textbf{\color{#d91a1a}-8.09\\%}$ | | test_full_like | 0.6570ms | 0.5780ms | 1.7300 KOps/s | 1.7303 KOps/s | $\color{#d91a1a}-0.01\\%$ | | test_zeros_like | 0.2592ms | 0.1979ms | 5.0535 KOps/s | 5.0582 KOps/s | $\color{#d91a1a}-0.09\\%$ | | test_ones_like | 0.3464ms | 0.1978ms | 5.0559 KOps/s | 5.0618 KOps/s | $\color{#d91a1a}-0.12\\%$ | | test_clone | 0.4369ms | 0.4138ms | 2.4168 KOps/s | 2.4153 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_squeeze | 28.6510μs | 11.8294μs | 84.5354 KOps/s | 84.3698 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_unsqueeze | 0.2980ms | 84.6329μs | 11.8157 KOps/s | 11.2204 KOps/s | $\textbf{\color{#35bf28}+5.31\\%}$ | | test_split | 0.4748ms | 0.1858ms | 5.3813 KOps/s | 5.5506 KOps/s | $\color{#d91a1a}-3.05\\%$ | | test_permute | 0.2332ms | 0.1958ms | 5.1061 KOps/s | 5.2228 KOps/s | $\color{#d91a1a}-2.24\\%$ | | test_stack | 1.2505ms | 0.9192ms | 1.0879 KOps/s | 1.1145 KOps/s | $\color{#d91a1a}-2.39\\%$ | | test_cat | 1.2489ms | 1.2317ms | 811.9126 Ops/s | 811.8387 Ops/s | $+0.01\\%$ |