pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
832 stars 74 forks source link

[BugFix] Fix builds #849

Closed vmoens closed 4 months ago

github-actions[bot] commented 4 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 144. Improved: $\large\color{#35bf28}31$. Worsened: $\large\color{#d91a1a}4$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ------------------------------------ | | test_plain_set_nested | 37.5500μs | 16.4402μs | 60.8264 KOps/s | 59.6111 KOps/s | $\color{#35bf28}+2.04\\%$ | | test_plain_set_stack_nested | 70.0100μs | 16.3873μs | 61.0229 KOps/s | 59.3746 KOps/s | $\color{#35bf28}+2.78\\%$ | | test_plain_set_nested_inplace | 57.3870μs | 19.1361μs | 52.2573 KOps/s | 50.9060 KOps/s | $\color{#35bf28}+2.65\\%$ | | test_plain_set_stack_nested_inplace | 68.3370μs | 18.8321μs | 53.1009 KOps/s | 50.8278 KOps/s | $\color{#35bf28}+4.47\\%$ | | test_items | 13.1450μs | 2.5665μs | 389.6364 KOps/s | 392.0279 KOps/s | $\color{#d91a1a}-0.61\\%$ | | test_items_nested | 0.4845ms | 0.2728ms | 3.6652 KOps/s | 3.6063 KOps/s | $\color{#35bf28}+1.63\\%$ | | test_items_nested_locked | 0.8488ms | 0.2744ms | 3.6443 KOps/s | 3.6370 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_items_nested_leaf | 0.1372ms | 78.1931μs | 12.7889 KOps/s | 12.8802 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_items_stack_nested | 0.4040ms | 0.2746ms | 3.6416 KOps/s | 3.6269 KOps/s | $\color{#35bf28}+0.41\\%$ | | test_items_stack_nested_leaf | 0.1781ms | 80.6742μs | 12.3955 KOps/s | 13.0039 KOps/s | $\color{#d91a1a}-4.68\\%$ | | test_items_stack_nested_locked | 0.5065ms | 0.2780ms | 3.5972 KOps/s | 3.6157 KOps/s | $\color{#d91a1a}-0.51\\%$ | | test_keys | 26.9400μs | 3.8303μs | 261.0744 KOps/s | 252.0130 KOps/s | $\color{#35bf28}+3.60\\%$ | | test_keys_nested | 0.2000ms | 0.1363ms | 7.3380 KOps/s | 7.3675 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_keys_nested_locked | 0.7318ms | 0.1431ms | 6.9885 KOps/s | 7.0029 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_keys_nested_leaf | 0.3806ms | 0.1213ms | 8.2474 KOps/s | 8.6328 KOps/s | $\color{#d91a1a}-4.46\\%$ | | test_keys_stack_nested | 0.2350ms | 0.1372ms | 7.2907 KOps/s | 7.3898 KOps/s | $\color{#d91a1a}-1.34\\%$ | | test_keys_stack_nested_leaf | 0.2229ms | 0.1203ms | 8.3113 KOps/s | 8.7230 KOps/s | $\color{#d91a1a}-4.72\\%$ | | test_keys_stack_nested_locked | 0.1955ms | 0.1418ms | 7.0501 KOps/s | 7.2420 KOps/s | $\color{#d91a1a}-2.65\\%$ | | test_values | 9.7255μs | 1.1419μs | 875.7537 KOps/s | 863.7771 KOps/s | $\color{#35bf28}+1.39\\%$ | | test_values_nested | 95.3680μs | 50.6371μs | 19.7484 KOps/s | 19.6536 KOps/s | $\color{#35bf28}+0.48\\%$ | | test_values_nested_locked | 0.1017ms | 51.0638μs | 19.5833 KOps/s | 19.6011 KOps/s | $\color{#d91a1a}-0.09\\%$ | | test_values_nested_leaf | 89.4470μs | 45.9616μs | 21.7573 KOps/s | 21.8401 KOps/s | $\color{#d91a1a}-0.38\\%$ | | test_values_stack_nested | 97.1210μs | 52.0718μs | 19.2043 KOps/s | 19.1871 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_values_stack_nested_leaf | 0.1001ms | 45.9222μs | 21.7760 KOps/s | 22.0246 KOps/s | $\color{#d91a1a}-1.13\\%$ | | test_values_stack_nested_locked | 99.1250μs | 51.8997μs | 19.2679 KOps/s | 19.0174 KOps/s | $\color{#35bf28}+1.32\\%$ | | test_membership | 16.1100μs | 1.3461μs | 742.8724 KOps/s | 738.3671 KOps/s | $\color{#35bf28}+0.61\\%$ | | test_membership_nested | 18.9550μs | 3.4230μs | 292.1381 KOps/s | 293.9890 KOps/s | $\color{#d91a1a}-0.63\\%$ | | test_membership_nested_leaf | 20.4480μs | 3.4431μs | 290.4357 KOps/s | 292.4090 KOps/s | $\color{#d91a1a}-0.67\\%$ | | test_membership_stacked_nested | 41.1070μs | 3.4420μs | 290.5283 KOps/s | 290.5298 KOps/s | $-0.00\\%$ | | test_membership_stacked_nested_leaf | 20.1180μs | 3.4527μs | 289.6297 KOps/s | 294.8417 KOps/s | $\color{#d91a1a}-1.77\\%$ | | test_membership_nested_last | 50.0840μs | 4.1962μs | 238.3119 KOps/s | 236.8862 KOps/s | $\color{#35bf28}+0.60\\%$ | | test_membership_nested_leaf_last | 21.9110μs | 4.2031μs | 237.9194 KOps/s | 235.3009 KOps/s | $\color{#35bf28}+1.11\\%$ | | test_membership_stacked_nested_last | 25.6380μs | 4.1888μs | 238.7317 KOps/s | 74.8969 KOps/s | $\textbf{\color{#35bf28}+218.75\\%}$ | | test_membership_stacked_nested_leaf_last | 44.9840μs | 4.2050μs | 237.8121 KOps/s | 74.4232 KOps/s | $\textbf{\color{#35bf28}+219.54\\%}$ | | test_nested_getleaf | 46.8880μs | 10.9276μs | 91.5113 KOps/s | 90.5520 KOps/s | $\color{#35bf28}+1.06\\%$ | | test_nested_get | 29.7550μs | 10.3160μs | 96.9366 KOps/s | 95.0521 KOps/s | $\color{#35bf28}+1.98\\%$ | | test_stacked_getleaf | 51.6570μs | 10.8320μs | 92.3191 KOps/s | 90.3274 KOps/s | $\color{#35bf28}+2.21\\%$ | | test_stacked_get | 47.3880μs | 10.0558μs | 99.4450 KOps/s | 96.0756 KOps/s | $\color{#35bf28}+3.51\\%$ | | test_nested_getitemleaf | 30.9670μs | 11.4804μs | 87.1048 KOps/s | 87.5678 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_nested_getitem | 49.1110μs | 10.5583μs | 94.7119 KOps/s | 92.9004 KOps/s | $\color{#35bf28}+1.95\\%$ | | test_stacked_getitemleaf | 52.4280μs | 11.2809μs | 88.6451 KOps/s | 86.4550 KOps/s | $\color{#35bf28}+2.53\\%$ | | test_stacked_getitem | 53.0390μs | 10.4020μs | 96.1349 KOps/s | 93.8250 KOps/s | $\color{#35bf28}+2.46\\%$ | | test_lock_nested | 53.1045ms | 0.3839ms | 2.6046 KOps/s | 2.9697 KOps/s | $\textbf{\color{#d91a1a}-12.30\\%}$ | | test_lock_stack_nested | 0.4105ms | 0.2988ms | 3.3470 KOps/s | 3.4579 KOps/s | $\color{#d91a1a}-3.21\\%$ | | test_unlock_nested | 0.8661ms | 0.3363ms | 2.9736 KOps/s | 2.9161 KOps/s | $\color{#35bf28}+1.97\\%$ | | test_unlock_stack_nested | 0.7066ms | 0.3082ms | 3.2451 KOps/s | 3.3376 KOps/s | $\color{#d91a1a}-2.77\\%$ | | test_flatten_speed | 0.2029ms | 99.5012μs | 10.0501 KOps/s | 10.1974 KOps/s | $\color{#d91a1a}-1.44\\%$ | | test_unflatten_speed | 0.7223ms | 0.4183ms | 2.3904 KOps/s | 2.4315 KOps/s | $\color{#d91a1a}-1.69\\%$ | | test_common_ops | 1.4076ms | 0.6989ms | 1.4308 KOps/s | 1.3504 KOps/s | $\textbf{\color{#35bf28}+5.95\\%}$ | | test_creation | 15.2790μs | 1.8849μs | 530.5311 KOps/s | 530.7117 KOps/s | $\color{#d91a1a}-0.03\\%$ | | test_creation_empty | 49.4220μs | 9.9734μs | 100.2668 KOps/s | 85.6545 KOps/s | $\textbf{\color{#35bf28}+17.06\\%}$ | | test_creation_nested_1 | 34.5140μs | 12.5438μs | 79.7208 KOps/s | 71.5337 KOps/s | $\textbf{\color{#35bf28}+11.45\\%}$ | | test_creation_nested_2 | 43.0900μs | 16.1010μs | 62.1080 KOps/s | 57.1791 KOps/s | $\textbf{\color{#35bf28}+8.62\\%}$ | | test_clone | 67.6460μs | 12.8864μs | 77.6011 KOps/s | 74.2317 KOps/s | $\color{#35bf28}+4.54\\%$ | | test_getitem[int] | 65.4930μs | 10.8995μs | 91.7472 KOps/s | 89.2974 KOps/s | $\color{#35bf28}+2.74\\%$ | | test_getitem[slice_int] | 74.0980μs | 21.5739μs | 46.3522 KOps/s | 43.8911 KOps/s | $\textbf{\color{#35bf28}+5.61\\%}$ | | test_getitem[range] | 77.9550μs | 57.4732μs | 17.3994 KOps/s | 17.1270 KOps/s | $\color{#35bf28}+1.59\\%$ | | test_getitem[tuple] | 53.4800μs | 18.4084μs | 54.3231 KOps/s | 53.5307 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_getitem[list] | 0.1080ms | 39.5364μs | 25.2931 KOps/s | 24.6960 KOps/s | $\color{#35bf28}+2.42\\%$ | | test_setitem_dim[int] | 63.5990μs | 32.1228μs | 31.1305 KOps/s | 27.6347 KOps/s | $\textbf{\color{#35bf28}+12.65\\%}$ | | test_setitem_dim[slice_int] | 86.8320μs | 58.8495μs | 16.9925 KOps/s | 15.5128 KOps/s | $\textbf{\color{#35bf28}+9.54\\%}$ | | test_setitem_dim[range] | 0.1090ms | 81.2096μs | 12.3138 KOps/s | 11.6159 KOps/s | $\textbf{\color{#35bf28}+6.01\\%}$ | | test_setitem_dim[tuple] | 0.1268ms | 48.6331μs | 20.5621 KOps/s | 19.2382 KOps/s | $\textbf{\color{#35bf28}+6.88\\%}$ | | test_setitem | 66.3740μs | 19.2028μs | 52.0759 KOps/s | 48.7925 KOps/s | $\textbf{\color{#35bf28}+6.73\\%}$ | | test_set | 50.2940μs | 18.3368μs | 54.5353 KOps/s | 50.0308 KOps/s | $\textbf{\color{#35bf28}+9.00\\%}$ | | test_set_shared | 4.2724ms | 0.1431ms | 6.9883 KOps/s | 6.9717 KOps/s | $\color{#35bf28}+0.24\\%$ | | test_update | 0.1141ms | 21.3122μs | 46.9215 KOps/s | 43.0948 KOps/s | $\textbf{\color{#35bf28}+8.88\\%}$ | | test_update_nested | 0.1082ms | 31.1299μs | 32.1234 KOps/s | 31.0443 KOps/s | $\color{#35bf28}+3.48\\%$ | | test_update__nested | 72.5960μs | 23.9704μs | 41.7182 KOps/s | 39.6289 KOps/s | $\textbf{\color{#35bf28}+5.27\\%}$ | | test_set_nested | 81.4110μs | 20.4100μs | 48.9955 KOps/s | 45.8428 KOps/s | $\textbf{\color{#35bf28}+6.88\\%}$ | | test_set_nested_new | 64.2300μs | 24.7152μs | 40.4610 KOps/s | 37.7731 KOps/s | $\textbf{\color{#35bf28}+7.12\\%}$ | | test_select | 87.4230μs | 38.9398μs | 25.6807 KOps/s | 24.5926 KOps/s | $\color{#35bf28}+4.42\\%$ | | test_select_nested | 0.1280ms | 57.9326μs | 17.2614 KOps/s | 17.8217 KOps/s | $\color{#d91a1a}-3.14\\%$ | | test_exclude_nested | 0.2187ms | 0.1187ms | 8.4243 KOps/s | 8.4579 KOps/s | $\color{#d91a1a}-0.40\\%$ | | test_empty[True] | 0.5091ms | 0.3965ms | 2.5218 KOps/s | 2.5523 KOps/s | $\color{#d91a1a}-1.20\\%$ | | test_empty[False] | 8.9986μs | 1.0218μs | 978.6744 KOps/s | 977.5279 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_unbind_speed | 0.4321ms | 0.2450ms | 4.0816 KOps/s | 4.0967 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_unbind_speed_stack0 | 0.4070ms | 0.2409ms | 4.1506 KOps/s | 4.2057 KOps/s | $\color{#d91a1a}-1.31\\%$ | | test_unbind_speed_stack1 | 68.7021ms | 0.6967ms | 1.4352 KOps/s | 1.4593 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_split | 62.4319ms | 1.5679ms | 637.7931 Ops/s | 621.9247 Ops/s | $\color{#35bf28}+2.55\\%$ | | test_chunk | 69.5509ms | 1.5838ms | 631.3971 Ops/s | 620.3504 Ops/s | $\color{#35bf28}+1.78\\%$ | | test_creation[device0] | 0.1975ms | 83.6131μs | 11.9598 KOps/s | 11.8360 KOps/s | $\color{#35bf28}+1.05\\%$ | | test_creation_from_tensor | 3.1436ms | 86.7335μs | 11.5296 KOps/s | 11.4560 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_add_one[memmap_tensor0] | 52.6380μs | 5.4271μs | 184.2609 KOps/s | 175.8543 KOps/s | $\color{#35bf28}+4.78\\%$ | | test_contiguous[memmap_tensor0] | 20.1580μs | 0.6307μs | 1.5855 MOps/s | 1.5596 MOps/s | $\color{#35bf28}+1.66\\%$ | | test_stack[memmap_tensor0] | 20.5780μs | 3.5588μs | 280.9967 KOps/s | 273.6108 KOps/s | $\color{#35bf28}+2.70\\%$ | | test_memmaptd_index | 0.9583ms | 0.2567ms | 3.8949 KOps/s | 3.8872 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_memmaptd_index_astensor | 0.6856ms | 0.3303ms | 3.0279 KOps/s | 3.0125 KOps/s | $\color{#35bf28}+0.51\\%$ | | test_memmaptd_index_op | 0.9672ms | 0.6056ms | 1.6513 KOps/s | 1.5536 KOps/s | $\textbf{\color{#35bf28}+6.28\\%}$ | | test_serialize_model | 0.1653s | 0.1058s | 9.4526 Ops/s | 9.0803 Ops/s | $\color{#35bf28}+4.10\\%$ | | test_serialize_model_pickle | 0.6100s | 0.3785s | 2.6418 Ops/s | 2.6452 Ops/s | $\color{#d91a1a}-0.13\\%$ | | test_serialize_weights | 0.1029s | 96.8570ms | 10.3245 Ops/s | 8.7830 Ops/s | $\textbf{\color{#35bf28}+17.55\\%}$ | | test_serialize_weights_returnearly | 0.1348s | 0.1222s | 8.1859 Ops/s | 8.1645 Ops/s | $\color{#35bf28}+0.26\\%$ | | test_serialize_weights_pickle | 1.1002s | 0.6167s | 1.6216 Ops/s | 2.4574 Ops/s | $\textbf{\color{#d91a1a}-34.01\\%}$ | | test_serialize_weights_filesystem | 0.1692s | 0.1014s | 9.8585 Ops/s | 9.3572 Ops/s | $\textbf{\color{#35bf28}+5.36\\%}$ | | test_serialize_model_filesystem | 0.1008s | 93.4343ms | 10.7027 Ops/s | 10.1587 Ops/s | $\textbf{\color{#35bf28}+5.35\\%}$ | | test_reshape_pytree | 55.5330μs | 25.7880μs | 38.7777 KOps/s | 38.1472 KOps/s | $\color{#35bf28}+1.65\\%$ | | test_reshape_td | 72.5160μs | 34.0104μs | 29.4028 KOps/s | 29.2843 KOps/s | $\color{#35bf28}+0.40\\%$ | | test_view_pytree | 86.9820μs | 26.0886μs | 38.3309 KOps/s | 39.0694 KOps/s | $\color{#d91a1a}-1.89\\%$ | | test_view_td | 82.0830μs | 39.6677μs | 25.2094 KOps/s | 25.2245 KOps/s | $\color{#d91a1a}-0.06\\%$ | | test_unbind_pytree | 58.1780μs | 29.6111μs | 33.7711 KOps/s | 33.6912 KOps/s | $\color{#35bf28}+0.24\\%$ | | test_unbind_td | 69.0719ms | 41.9501μs | 23.8378 KOps/s | 27.7716 KOps/s | $\textbf{\color{#d91a1a}-14.16\\%}$ | | test_split_pytree | 62.1860μs | 29.2970μs | 34.1332 KOps/s | 33.4382 KOps/s | $\color{#35bf28}+2.08\\%$ | | test_split_td | 0.1254ms | 39.1055μs | 25.5718 KOps/s | 25.2744 KOps/s | $\color{#35bf28}+1.18\\%$ | | test_add_pytree | 78.4160μs | 34.5234μs | 28.9659 KOps/s | 28.3243 KOps/s | $\color{#35bf28}+2.26\\%$ | | test_add_td | 0.1050ms | 53.0292μs | 18.8575 KOps/s | 17.1408 KOps/s | $\textbf{\color{#35bf28}+10.02\\%}$ | | test_distributed | 0.2348ms | 0.1032ms | 9.6866 KOps/s | 9.5217 KOps/s | $\color{#35bf28}+1.73\\%$ | | test_tdmodule | 68.2170μs | 17.5320μs | 57.0386 KOps/s | 52.9903 KOps/s | $\textbf{\color{#35bf28}+7.64\\%}$ | | test_tdmodule_dispatch | 59.6310μs | 34.6248μs | 28.8810 KOps/s | 26.1593 KOps/s | $\textbf{\color{#35bf28}+10.40\\%}$ | | test_tdseq | 35.7470μs | 20.3869μs | 49.0511 KOps/s | 45.1629 KOps/s | $\textbf{\color{#35bf28}+8.61\\%}$ | | test_tdseq_dispatch | 67.0250μs | 39.6938μs | 25.1929 KOps/s | 23.1399 KOps/s | $\textbf{\color{#35bf28}+8.87\\%}$ | | test_instantiation_functorch | 1.9926ms | 1.3123ms | 762.0415 Ops/s | 759.0549 Ops/s | $\color{#35bf28}+0.39\\%$ | | test_instantiation_td | 1.8106ms | 1.0197ms | 980.7187 Ops/s | 978.4463 Ops/s | $\color{#35bf28}+0.23\\%$ | | test_exec_functorch | 0.2287ms | 0.1583ms | 6.3176 KOps/s | 5.9612 KOps/s | $\textbf{\color{#35bf28}+5.98\\%}$ | | test_exec_functional_call | 0.3610ms | 0.1512ms | 6.6134 KOps/s | 6.5171 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_exec_td | 0.2263ms | 0.1440ms | 6.9463 KOps/s | 6.7234 KOps/s | $\color{#35bf28}+3.31\\%$ | | test_exec_td_decorator | 0.8587ms | 0.2180ms | 4.5877 KOps/s | 4.4204 KOps/s | $\color{#35bf28}+3.78\\%$ | | test_vmap_mlp_speed[True-True] | 0.6435ms | 0.4858ms | 2.0584 KOps/s | 2.0185 KOps/s | $\color{#35bf28}+1.98\\%$ | | test_vmap_mlp_speed[True-False] | 0.6534ms | 0.4820ms | 2.0747 KOps/s | 2.0232 KOps/s | $\color{#35bf28}+2.55\\%$ | | test_vmap_mlp_speed[False-True] | 0.6678ms | 0.3958ms | 2.5265 KOps/s | 2.4624 KOps/s | $\color{#35bf28}+2.60\\%$ | | test_vmap_mlp_speed[False-False] | 0.8202ms | 0.3966ms | 2.5212 KOps/s | 2.4595 KOps/s | $\color{#35bf28}+2.51\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.0074ms | 0.5575ms | 1.7937 KOps/s | 1.7481 KOps/s | $\color{#35bf28}+2.61\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.1006ms | 0.5567ms | 1.7963 KOps/s | 1.7541 KOps/s | $\color{#35bf28}+2.41\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 1.0082ms | 0.4748ms | 2.1063 KOps/s | 1.9688 KOps/s | $\textbf{\color{#35bf28}+6.99\\%}$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7632ms | 0.4606ms | 2.1710 KOps/s | 2.1246 KOps/s | $\color{#35bf28}+2.18\\%$ | | test_to_module_speed[True] | 2.3401ms | 1.6690ms | 599.1506 Ops/s | 595.6487 Ops/s | $\color{#35bf28}+0.59\\%$ | | test_to_module_speed[False] | 2.4456ms | 1.6490ms | 606.4427 Ops/s | 598.9688 Ops/s | $\color{#35bf28}+1.25\\%$ | | test_tc_init | 58.3890μs | 28.5138μs | 35.0707 KOps/s | 32.6455 KOps/s | $\textbf{\color{#35bf28}+7.43\\%}$ | | test_tc_init_nested | 0.1197ms | 58.9411μs | 16.9661 KOps/s | 16.2252 KOps/s | $\color{#35bf28}+4.57\\%$ | | test_tc_first_layer_tensor | 5.3844μs | 0.7327μs | 1.3648 MOps/s | 1.3838 MOps/s | $\color{#d91a1a}-1.38\\%$ | | test_tc_first_layer_nontensor | 6.9816μs | 0.7062μs | 1.4160 MOps/s | 1.4179 MOps/s | $\color{#d91a1a}-0.13\\%$ | | test_tc_second_layer_tensor | 0.1133ms | 1.8984μs | 526.7602 KOps/s | 530.3637 KOps/s | $\color{#d91a1a}-0.68\\%$ | | test_tc_second_layer_nontensor | 69.8537μs | 1.5785μs | 633.5039 KOps/s | 581.0227 KOps/s | $\textbf{\color{#35bf28}+9.03\\%}$ | | test_unbind | 96.1717ms | 6.6492ms | 150.3938 Ops/s | 133.8052 Ops/s | $\textbf{\color{#35bf28}+12.40\\%}$ | | test_full_like | 21.3535ms | 12.7912ms | 78.1790 Ops/s | 85.8025 Ops/s | $\textbf{\color{#d91a1a}-8.88\\%}$ | | test_zeros_like | 13.2092ms | 5.8900ms | 169.7799 Ops/s | 176.3223 Ops/s | $\color{#d91a1a}-3.71\\%$ | | test_ones_like | 15.4736ms | 6.7610ms | 147.9077 Ops/s | 155.6281 Ops/s | $\color{#d91a1a}-4.96\\%$ | | test_clone | 15.2197ms | 8.5412ms | 117.0795 Ops/s | 118.7653 Ops/s | $\color{#d91a1a}-1.42\\%$ | | test_squeeze | 66.7840μs | 13.6640μs | 73.1850 KOps/s | 70.6369 KOps/s | $\color{#35bf28}+3.61\\%$ | | test_unsqueeze | 0.1167ms | 60.5171μs | 16.5243 KOps/s | 16.6953 KOps/s | $\color{#d91a1a}-1.02\\%$ | | test_split | 0.2460ms | 0.1103ms | 9.0621 KOps/s | 8.7331 KOps/s | $\color{#35bf28}+3.77\\%$ | | test_permute | 0.2241ms | 0.1284ms | 7.7880 KOps/s | 7.7440 KOps/s | $\color{#35bf28}+0.57\\%$ | | test_stack | 31.5565ms | 23.1094ms | 43.2725 Ops/s | 43.7231 Ops/s | $\color{#d91a1a}-1.03\\%$ | | test_cat | 27.4592ms | 22.9739ms | 43.5277 Ops/s | 43.6503 Ops/s | $\color{#d91a1a}-0.28\\%$ |
github-actions[bot] commented 4 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 152. Improved: $\large\color{#35bf28}6$. Worsened: $\large\color{#d91a1a}10$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 31.4100μs | 13.4214μs | 74.5080 KOps/s | 77.3599 KOps/s | $\color{#d91a1a}-3.69\\%$ | | test_plain_set_stack_nested | 27.2010μs | 13.4804μs | 74.1817 KOps/s | 76.1553 KOps/s | $\color{#d91a1a}-2.59\\%$ | | test_plain_set_nested_inplace | 39.7300μs | 14.6556μs | 68.2334 KOps/s | 69.7058 KOps/s | $\color{#d91a1a}-2.11\\%$ | | test_plain_set_stack_nested_inplace | 51.6910μs | 14.7826μs | 67.6472 KOps/s | 69.9549 KOps/s | $\color{#d91a1a}-3.30\\%$ | | test_items | 26.9300μs | 4.6366μs | 215.6738 KOps/s | 215.9045 KOps/s | $\color{#d91a1a}-0.11\\%$ | | test_items_nested | 0.4446ms | 0.3373ms | 2.9650 KOps/s | 2.8984 KOps/s | $\color{#35bf28}+2.30\\%$ | | test_items_nested_locked | 0.4500ms | 0.3393ms | 2.9470 KOps/s | 2.9251 KOps/s | $\color{#35bf28}+0.75\\%$ | | test_items_nested_leaf | 0.1026ms | 82.4324μs | 12.1312 KOps/s | 12.0601 KOps/s | $\color{#35bf28}+0.59\\%$ | | test_items_stack_nested | 0.3937ms | 0.3415ms | 2.9286 KOps/s | 2.9426 KOps/s | $\color{#d91a1a}-0.48\\%$ | | test_items_stack_nested_leaf | 0.1135ms | 84.0368μs | 11.8996 KOps/s | 11.9843 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_items_stack_nested_locked | 0.4737ms | 0.3408ms | 2.9347 KOps/s | 2.9297 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_keys | 29.1410μs | 4.3847μs | 228.0679 KOps/s | 228.8605 KOps/s | $\color{#d91a1a}-0.35\\%$ | | test_keys_nested | 0.1023ms | 69.0609μs | 14.4800 KOps/s | 14.3619 KOps/s | $\color{#35bf28}+0.82\\%$ | | test_keys_nested_locked | 2.3708ms | 73.5891μs | 13.5890 KOps/s | 13.2579 KOps/s | $\color{#35bf28}+2.50\\%$ | | test_keys_nested_leaf | 82.6910μs | 60.1666μs | 16.6205 KOps/s | 16.5140 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_keys_stack_nested | 0.1017ms | 68.7882μs | 14.5374 KOps/s | 14.3773 KOps/s | $\color{#35bf28}+1.11\\%$ | | test_keys_stack_nested_leaf | 93.6920μs | 59.5082μs | 16.8044 KOps/s | 16.4518 KOps/s | $\color{#35bf28}+2.14\\%$ | | test_keys_stack_nested_locked | 0.1088ms | 73.6053μs | 13.5860 KOps/s | 13.1863 KOps/s | $\color{#35bf28}+3.03\\%$ | | test_values | 8.6803μs | 1.8063μs | 553.6292 KOps/s | 556.8726 KOps/s | $\color{#d91a1a}-0.58\\%$ | | test_values_nested | 61.9920μs | 35.6944μs | 28.0156 KOps/s | 28.4012 KOps/s | $\color{#d91a1a}-1.36\\%$ | | test_values_nested_locked | 64.2120μs | 37.4104μs | 26.7305 KOps/s | 26.7715 KOps/s | $\color{#d91a1a}-0.15\\%$ | | test_values_nested_leaf | 57.5610μs | 31.5167μs | 31.7292 KOps/s | 31.7975 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_values_stack_nested | 64.8310μs | 36.4236μs | 27.4547 KOps/s | 27.6835 KOps/s | $\color{#d91a1a}-0.83\\%$ | | test_values_stack_nested_leaf | 61.7010μs | 32.3681μs | 30.8947 KOps/s | 31.1956 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_values_stack_nested_locked | 73.7410μs | 38.3975μs | 26.0433 KOps/s | 26.2683 KOps/s | $\color{#d91a1a}-0.86\\%$ | | test_membership | 3.8000μs | 0.7194μs | 1.3901 MOps/s | 1.2972 MOps/s | $\textbf{\color{#35bf28}+7.16\\%}$ | | test_membership_nested | 22.0000μs | 2.5211μs | 396.6566 KOps/s | 394.7751 KOps/s | $\color{#35bf28}+0.48\\%$ | | test_membership_nested_leaf | 38.1110μs | 2.5197μs | 396.8737 KOps/s | 392.3725 KOps/s | $\color{#35bf28}+1.15\\%$ | | test_membership_stacked_nested | 25.4600μs | 2.5485μs | 392.3934 KOps/s | 389.8907 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_membership_stacked_nested_leaf | 26.8000μs | 2.4957μs | 400.6823 KOps/s | 398.6045 KOps/s | $\color{#35bf28}+0.52\\%$ | | test_membership_nested_last | 17.9800μs | 3.0462μs | 328.2831 KOps/s | 324.2776 KOps/s | $\color{#35bf28}+1.24\\%$ | | test_membership_nested_leaf_last | 21.3610μs | 3.0354μs | 329.4496 KOps/s | 321.3174 KOps/s | $\color{#35bf28}+2.53\\%$ | | test_membership_stacked_nested_last | 19.4400μs | 3.0511μs | 327.7484 KOps/s | 257.7352 KOps/s | $\textbf{\color{#35bf28}+27.16\\%}$ | | test_membership_stacked_nested_leaf_last | 22.8300μs | 3.0553μs | 327.2948 KOps/s | 262.1398 KOps/s | $\textbf{\color{#35bf28}+24.86\\%}$ | | test_nested_getleaf | 37.8810μs | 8.3721μs | 119.4439 KOps/s | 119.4673 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_nested_get | 29.9710μs | 7.8075μs | 128.0819 KOps/s | 127.0395 KOps/s | $\color{#35bf28}+0.82\\%$ | | test_stacked_getleaf | 30.3600μs | 8.3283μs | 120.0725 KOps/s | 119.4373 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_stacked_get | 36.0310μs | 7.8189μs | 127.8947 KOps/s | 127.1555 KOps/s | $\color{#35bf28}+0.58\\%$ | | test_nested_getitemleaf | 23.8100μs | 8.5117μs | 117.4855 KOps/s | 116.6067 KOps/s | $\color{#35bf28}+0.75\\%$ | | test_nested_getitem | 33.7410μs | 8.0036μs | 124.9441 KOps/s | 123.9745 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_stacked_getitemleaf | 36.9210μs | 8.5655μs | 116.7472 KOps/s | 116.7554 KOps/s | $-0.01\\%$ | | test_stacked_getitem | 23.4900μs | 7.9697μs | 125.4747 KOps/s | 124.8633 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_lock_nested | 59.2213ms | 0.3989ms | 2.5069 KOps/s | 2.4974 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_lock_stack_nested | 0.3423ms | 0.2951ms | 3.3891 KOps/s | 3.3731 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_unlock_nested | 60.9163ms | 0.4008ms | 2.4953 KOps/s | 2.4779 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_unlock_stack_nested | 0.3405ms | 0.3038ms | 3.2920 KOps/s | 3.2834 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_flatten_speed | 0.3890ms | 0.1030ms | 9.7124 KOps/s | 9.7842 KOps/s | $\color{#d91a1a}-0.73\\%$ | | test_unflatten_speed | 0.3517ms | 0.2892ms | 3.4576 KOps/s | 3.3690 KOps/s | $\color{#35bf28}+2.63\\%$ | | test_common_ops | 1.0585ms | 0.5928ms | 1.6870 KOps/s | 1.7206 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_creation | 14.7610μs | 1.5819μs | 632.1448 KOps/s | 628.8054 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_creation_empty | 25.7010μs | 9.6246μs | 103.9009 KOps/s | 111.9658 KOps/s | $\textbf{\color{#d91a1a}-7.20\\%}$ | | test_creation_nested_1 | 87.4120μs | 11.3413μs | 88.1730 KOps/s | 93.9644 KOps/s | $\textbf{\color{#d91a1a}-6.16\\%}$ | | test_creation_nested_2 | 38.6010μs | 13.4055μs | 74.5960 KOps/s | 77.0498 KOps/s | $\color{#d91a1a}-3.18\\%$ | | test_clone | 84.5920μs | 11.4590μs | 87.2677 KOps/s | 86.5365 KOps/s | $\color{#35bf28}+0.84\\%$ | | test_getitem[int] | 32.2210μs | 10.5882μs | 94.4448 KOps/s | 95.1545 KOps/s | $\color{#d91a1a}-0.75\\%$ | | test_getitem[slice_int] | 45.5110μs | 20.2338μs | 49.4222 KOps/s | 49.4884 KOps/s | $\color{#d91a1a}-0.13\\%$ | | test_getitem[range] | 64.7910μs | 46.5710μs | 21.4726 KOps/s | 20.7845 KOps/s | $\color{#35bf28}+3.31\\%$ | | test_getitem[tuple] | 38.0110μs | 17.9335μs | 55.7616 KOps/s | 54.0325 KOps/s | $\color{#35bf28}+3.20\\%$ | | test_getitem[list] | 0.1333ms | 32.2573μs | 31.0007 KOps/s | 30.0332 KOps/s | $\color{#35bf28}+3.22\\%$ | | test_setitem_dim[int] | 47.3210μs | 30.0033μs | 33.3297 KOps/s | 33.7587 KOps/s | $\color{#d91a1a}-1.27\\%$ | | test_setitem_dim[slice_int] | 67.6610μs | 50.5501μs | 19.7824 KOps/s | 19.9545 KOps/s | $\color{#d91a1a}-0.86\\%$ | | test_setitem_dim[range] | 89.6920μs | 67.8647μs | 14.7352 KOps/s | 14.6287 KOps/s | $\color{#35bf28}+0.73\\%$ | | test_setitem_dim[tuple] | 65.1610μs | 45.3048μs | 22.0727 KOps/s | 22.4335 KOps/s | $\color{#d91a1a}-1.61\\%$ | | test_setitem | 45.1700μs | 16.8270μs | 59.4282 KOps/s | 60.3045 KOps/s | $\color{#d91a1a}-1.45\\%$ | | test_set | 58.6220μs | 15.9770μs | 62.5899 KOps/s | 64.2304 KOps/s | $\color{#d91a1a}-2.55\\%$ | | test_set_shared | 1.6493ms | 97.8030μs | 10.2246 KOps/s | 10.3174 KOps/s | $\color{#d91a1a}-0.90\\%$ | | test_update | 93.1020μs | 19.5377μs | 51.1830 KOps/s | 53.7136 KOps/s | $\color{#d91a1a}-4.71\\%$ | | test_update_nested | 70.3810μs | 25.1132μs | 39.8197 KOps/s | 41.0676 KOps/s | $\color{#d91a1a}-3.04\\%$ | | test_update__nested | 68.4210μs | 22.2070μs | 45.0308 KOps/s | 45.6019 KOps/s | $\color{#d91a1a}-1.25\\%$ | | test_set_nested | 61.9410μs | 17.2694μs | 57.9057 KOps/s | 58.4590 KOps/s | $\color{#d91a1a}-0.95\\%$ | | test_set_nested_new | 66.4510μs | 19.8366μs | 50.4119 KOps/s | 50.7575 KOps/s | $\color{#d91a1a}-0.68\\%$ | | test_select | 79.0720μs | 32.2623μs | 30.9959 KOps/s | 31.4466 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_select_nested | 85.7920μs | 53.3808μs | 18.7333 KOps/s | 19.0710 KOps/s | $\color{#d91a1a}-1.77\\%$ | | test_exclude_nested | 0.1581ms | 0.1086ms | 9.2056 KOps/s | 9.2880 KOps/s | $\color{#d91a1a}-0.89\\%$ | | test_empty[True] | 0.4377ms | 0.3443ms | 2.9046 KOps/s | 2.9060 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_empty[False] | 2.8371μs | 0.8005μs | 1.2493 MOps/s | 1.2464 MOps/s | $\color{#35bf28}+0.23\\%$ | | test_to | 87.5710μs | 58.1996μs | 17.1822 KOps/s | 17.1996 KOps/s | $\color{#d91a1a}-0.10\\%$ | | test_to_nonblocking | 70.3610μs | 35.1277μs | 28.4675 KOps/s | 28.3573 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_unbind_speed | 1.4886ms | 0.2591ms | 3.8592 KOps/s | 3.9157 KOps/s | $\color{#d91a1a}-1.44\\%$ | | test_unbind_speed_stack0 | 0.3415ms | 0.2577ms | 3.8810 KOps/s | 3.8549 KOps/s | $\color{#35bf28}+0.68\\%$ | | test_unbind_speed_stack1 | 76.3876ms | 0.7818ms | 1.2792 KOps/s | 1.2786 KOps/s | $\color{#35bf28}+0.04\\%$ | | test_split | 76.9917ms | 1.6220ms | 616.5046 Ops/s | 616.7984 Ops/s | $\color{#d91a1a}-0.05\\%$ | | test_chunk | 76.8450ms | 1.6204ms | 617.1345 Ops/s | 618.6716 Ops/s | $\color{#d91a1a}-0.25\\%$ | | test_creation[device0] | 0.1129ms | 55.6431μs | 17.9717 KOps/s | 17.7555 KOps/s | $\color{#35bf28}+1.22\\%$ | | test_creation_from_tensor | 0.1317ms | 52.3595μs | 19.0987 KOps/s | 19.0559 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_add_one[memmap_tensor0] | 84.5820μs | 7.0036μs | 142.7835 KOps/s | 146.3643 KOps/s | $\color{#d91a1a}-2.45\\%$ | | test_contiguous[memmap_tensor0] | 13.1400μs | 0.6483μs | 1.5425 MOps/s | 1.5274 MOps/s | $\color{#35bf28}+0.99\\%$ | | test_stack[memmap_tensor0] | 29.1500μs | 4.6900μs | 213.2218 KOps/s | 218.2985 KOps/s | $\color{#d91a1a}-2.33\\%$ | | test_memmaptd_index | 1.1608ms | 0.2724ms | 3.6707 KOps/s | 3.7038 KOps/s | $\color{#d91a1a}-0.89\\%$ | | test_memmaptd_index_astensor | 0.6273ms | 0.3359ms | 2.9773 KOps/s | 2.9976 KOps/s | $\color{#d91a1a}-0.68\\%$ | | test_memmaptd_index_op | 1.0553ms | 0.6508ms | 1.5365 KOps/s | 1.5948 KOps/s | $\color{#d91a1a}-3.65\\%$ | | test_serialize_model | 98.5105ms | 93.8868ms | 10.6511 Ops/s | 10.2841 Ops/s | $\color{#35bf28}+3.57\\%$ | | test_serialize_model_pickle | 1.6317s | 1.2383s | 0.8076 Ops/s | 0.7920 Ops/s | $\color{#35bf28}+1.96\\%$ | | test_serialize_weights | 96.2573ms | 92.3499ms | 10.8284 Ops/s | 9.4565 Ops/s | $\textbf{\color{#35bf28}+14.51\\%}$ | | test_serialize_weights_returnearly | 0.2497s | 83.2237ms | 12.0158 Ops/s | 12.1054 Ops/s | $\color{#d91a1a}-0.74\\%$ | | test_serialize_weights_pickle | 1.3523s | 1.2486s | 0.8009 Ops/s | 0.8012 Ops/s | $\color{#d91a1a}-0.03\\%$ | | test_reshape_pytree | 0.1736ms | 28.2260μs | 35.4283 KOps/s | 38.6622 KOps/s | $\textbf{\color{#d91a1a}-8.36\\%}$ | | test_reshape_td | 0.1635ms | 35.1540μs | 28.4462 KOps/s | 31.9600 KOps/s | $\textbf{\color{#d91a1a}-10.99\\%}$ | | test_view_pytree | 0.1377ms | 27.5354μs | 36.3169 KOps/s | 38.7660 KOps/s | $\textbf{\color{#d91a1a}-6.32\\%}$ | | test_view_td | 70.9210μs | 37.0292μs | 27.0057 KOps/s | 27.7528 KOps/s | $\color{#d91a1a}-2.69\\%$ | | test_unbind_pytree | 68.8310μs | 32.8446μs | 30.4464 KOps/s | 31.2685 KOps/s | $\color{#d91a1a}-2.63\\%$ | | test_unbind_td | 0.4629ms | 42.5930μs | 23.4780 KOps/s | 24.2761 KOps/s | $\color{#d91a1a}-3.29\\%$ | | test_split_pytree | 77.6410μs | 34.7589μs | 28.7696 KOps/s | 28.2862 KOps/s | $\color{#35bf28}+1.71\\%$ | | test_split_td | 0.4985ms | 40.2772μs | 24.8280 KOps/s | 26.3488 KOps/s | $\textbf{\color{#d91a1a}-5.77\\%}$ | | test_add_pytree | 83.9920μs | 37.6691μs | 26.5470 KOps/s | 27.0713 KOps/s | $\color{#d91a1a}-1.94\\%$ | | test_add_td | 96.2820μs | 56.2367μs | 17.7820 KOps/s | 19.9356 KOps/s | $\textbf{\color{#d91a1a}-10.80\\%}$ | | test_distributed | 0.1818ms | 68.3017μs | 14.6409 KOps/s | 13.9583 KOps/s | $\color{#35bf28}+4.89\\%$ | | test_tdmodule | 91.2720μs | 16.0569μs | 62.2783 KOps/s | 62.3242 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_tdmodule_dispatch | 51.9610μs | 30.9770μs | 32.2820 KOps/s | 32.1154 KOps/s | $\color{#35bf28}+0.52\\%$ | | test_tdseq | 31.7110μs | 16.9994μs | 58.8256 KOps/s | 55.9330 KOps/s | $\textbf{\color{#35bf28}+5.17\\%}$ | | test_tdseq_dispatch | 50.1910μs | 34.0211μs | 29.3935 KOps/s | 28.7261 KOps/s | $\color{#35bf28}+2.32\\%$ | | test_instantiation_functorch | 1.4532ms | 1.3843ms | 722.3966 Ops/s | 717.9234 Ops/s | $\color{#35bf28}+0.62\\%$ | | test_instantiation_td | 79.9066ms | 1.0658ms | 938.2633 Ops/s | 939.5785 Ops/s | $\color{#d91a1a}-0.14\\%$ | | test_exec_functorch | 0.1839ms | 0.1424ms | 7.0241 KOps/s | 7.1181 KOps/s | $\color{#d91a1a}-1.32\\%$ | | test_exec_functional_call | 0.1686ms | 0.1303ms | 7.6761 KOps/s | 7.7229 KOps/s | $\color{#d91a1a}-0.61\\%$ | | test_exec_td | 0.1694ms | 0.1282ms | 7.8024 KOps/s | 8.0369 KOps/s | $\color{#d91a1a}-2.92\\%$ | | test_exec_td_decorator | 0.4059ms | 0.2008ms | 4.9812 KOps/s | 5.1143 KOps/s | $\color{#d91a1a}-2.60\\%$ | | test_vmap_mlp_speed[True-True] | 0.6754ms | 0.5637ms | 1.7739 KOps/s | 1.7849 KOps/s | $\color{#d91a1a}-0.62\\%$ | | test_vmap_mlp_speed[True-False] | 0.6421ms | 0.5756ms | 1.7374 KOps/s | 1.7785 KOps/s | $\color{#d91a1a}-2.31\\%$ | | test_vmap_mlp_speed[False-True] | 0.5613ms | 0.4973ms | 2.0108 KOps/s | 1.9457 KOps/s | $\color{#35bf28}+3.34\\%$ | | test_vmap_mlp_speed[False-False] | 0.5563ms | 0.4912ms | 2.0357 KOps/s | 2.0379 KOps/s | $\color{#d91a1a}-0.11\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 94.6669ms | 0.6811ms | 1.4683 KOps/s | 1.5509 KOps/s | $\textbf{\color{#d91a1a}-5.33\\%}$ | | test_vmap_mlp_speed_decorator[True-False] | 0.7567ms | 0.6208ms | 1.6107 KOps/s | 1.5988 KOps/s | $\color{#35bf28}+0.75\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.6575ms | 0.5477ms | 1.8257 KOps/s | 1.7492 KOps/s | $\color{#35bf28}+4.38\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7657ms | 0.5645ms | 1.7714 KOps/s | 1.7443 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_vmap_transformer_speed[True-True] | 8.0549ms | 7.6077ms | 131.4450 Ops/s | 135.5080 Ops/s | $\color{#d91a1a}-3.00\\%$ | | test_vmap_transformer_speed[True-False] | 8.0074ms | 7.5066ms | 133.2161 Ops/s | 136.2546 Ops/s | $\color{#d91a1a}-2.23\\%$ | | test_vmap_transformer_speed[False-True] | 8.2979ms | 7.4760ms | 133.7615 Ops/s | 137.5766 Ops/s | $\color{#d91a1a}-2.77\\%$ | | test_vmap_transformer_speed[False-False] | 7.8739ms | 7.4290ms | 134.6085 Ops/s | 137.7453 Ops/s | $\color{#d91a1a}-2.28\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 18.8511ms | 18.1883ms | 54.9804 Ops/s | 56.1100 Ops/s | $\color{#d91a1a}-2.01\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 19.0121ms | 18.1618ms | 55.0607 Ops/s | 56.1573 Ops/s | $\color{#d91a1a}-1.95\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 19.1135ms | 18.1274ms | 55.1650 Ops/s | 56.4872 Ops/s | $\color{#d91a1a}-2.34\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 18.9138ms | 17.7703ms | 56.2735 Ops/s | 56.6451 Ops/s | $\color{#d91a1a}-0.66\\%$ | | test_to_module_speed[True] | 1.5853ms | 1.4809ms | 675.2451 Ops/s | 666.3477 Ops/s | $\color{#35bf28}+1.34\\%$ | | test_to_module_speed[False] | 1.5547ms | 1.4631ms | 683.4929 Ops/s | 685.1783 Ops/s | $\color{#d91a1a}-0.25\\%$ | | test_tc_init | 51.4410μs | 27.0673μs | 36.9450 KOps/s | 38.7672 KOps/s | $\color{#d91a1a}-4.70\\%$ | | test_tc_init_nested | 93.5410μs | 55.0268μs | 18.1730 KOps/s | 20.6672 KOps/s | $\textbf{\color{#d91a1a}-12.07\\%}$ | | test_tc_first_layer_tensor | 1.3425μs | 0.3544μs | 2.8219 MOps/s | 2.8148 MOps/s | $\color{#35bf28}+0.25\\%$ | | test_tc_first_layer_nontensor | 9.3779μs | 0.3825μs | 2.6142 MOps/s | 2.5818 MOps/s | $\color{#35bf28}+1.25\\%$ | | test_tc_second_layer_tensor | 5.9920μs | 0.9662μs | 1.0350 MOps/s | 953.7822 KOps/s | $\textbf{\color{#35bf28}+8.51\\%}$ | | test_tc_second_layer_nontensor | 2.9317μs | 0.8221μs | 1.2164 MOps/s | 1.2252 MOps/s | $\color{#d91a1a}-0.72\\%$ | | test_unbind | 0.1102s | 7.8435ms | 127.4942 Ops/s | 149.1612 Ops/s | $\textbf{\color{#d91a1a}-14.53\\%}$ | | test_full_like | 13.6441ms | 13.1766ms | 75.8923 Ops/s | 75.3176 Ops/s | $\color{#35bf28}+0.76\\%$ | | test_zeros_like | 8.3918ms | 7.8086ms | 128.0637 Ops/s | 127.4793 Ops/s | $\color{#35bf28}+0.46\\%$ | | test_ones_like | 8.2336ms | 7.9010ms | 126.5656 Ops/s | 126.9130 Ops/s | $\color{#d91a1a}-0.27\\%$ | | test_clone | 9.3115ms | 9.1859ms | 108.8622 Ops/s | 109.2064 Ops/s | $\color{#d91a1a}-0.32\\%$ | | test_squeeze | 77.4720μs | 10.8880μs | 91.8444 KOps/s | 91.1167 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_unsqueeze | 95.7220μs | 51.5144μs | 19.4120 KOps/s | 19.3468 KOps/s | $\color{#35bf28}+0.34\\%$ | | test_split | 0.1438ms | 97.3744μs | 10.2696 KOps/s | 10.5195 KOps/s | $\color{#d91a1a}-2.38\\%$ | | test_permute | 0.1557ms | 0.1111ms | 8.9999 KOps/s | 9.1326 KOps/s | $\color{#d91a1a}-1.45\\%$ | | test_stack | 26.7257ms | 26.5709ms | 37.6351 Ops/s | 37.3712 Ops/s | $\color{#35bf28}+0.71\\%$ | | test_cat | 26.7605ms | 26.5517ms | 37.6623 Ops/s | 37.7017 Ops/s | $\color{#d91a1a}-0.10\\%$ |