pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
803 stars 65 forks source link

[Benchmark] Fix recursion and cache errors in benchmarks #900

Closed vmoens closed 1 month ago

github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 144. Improved: $\large\color{#35bf28}9$. Worsened: $\large\color{#d91a1a}35$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ------------------------------------ | | test_plain_set_nested | 41.2670μs | 22.3327μs | 44.7775 KOps/s | 47.2705 KOps/s | $\textbf{\color{#d91a1a}-5.27\\%}$ | | test_plain_set_stack_nested | 54.2420μs | 22.7727μs | 43.9123 KOps/s | 46.2512 KOps/s | $\textbf{\color{#d91a1a}-5.06\\%}$ | | test_plain_set_nested_inplace | 59.3310μs | 24.5552μs | 40.7245 KOps/s | 42.8104 KOps/s | $\color{#d91a1a}-4.87\\%$ | | test_plain_set_stack_nested_inplace | 68.1480μs | 24.5523μs | 40.7294 KOps/s | 43.0467 KOps/s | $\textbf{\color{#d91a1a}-5.38\\%}$ | | test_items | 20.4090μs | 2.6164μs | 382.2037 KOps/s | 372.8860 KOps/s | $\color{#35bf28}+2.50\\%$ | | test_items_nested | 0.6352ms | 0.3751ms | 2.6657 KOps/s | 2.7320 KOps/s | $\color{#d91a1a}-2.43\\%$ | | test_items_nested_locked | 0.7902ms | 0.3743ms | 2.6718 KOps/s | 2.6903 KOps/s | $\color{#d91a1a}-0.69\\%$ | | test_items_nested_leaf | 0.1700ms | 85.9521μs | 11.6344 KOps/s | 11.5076 KOps/s | $\color{#35bf28}+1.10\\%$ | | test_items_stack_nested | 0.5892ms | 0.3756ms | 2.6624 KOps/s | 2.6872 KOps/s | $\color{#d91a1a}-0.92\\%$ | | test_items_stack_nested_leaf | 0.1484ms | 88.5361μs | 11.2948 KOps/s | 11.6748 KOps/s | $\color{#d91a1a}-3.25\\%$ | | test_items_stack_nested_locked | 0.4859ms | 0.3779ms | 2.6459 KOps/s | 2.7008 KOps/s | $\color{#d91a1a}-2.03\\%$ | | test_keys | 22.7820μs | 3.9652μs | 252.1950 KOps/s | 263.8008 KOps/s | $\color{#d91a1a}-4.40\\%$ | | test_keys_nested | 0.2794ms | 0.1451ms | 6.8910 KOps/s | 6.8613 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_keys_nested_locked | 1.6184ms | 0.1511ms | 6.6178 KOps/s | 6.6606 KOps/s | $\color{#d91a1a}-0.64\\%$ | | test_keys_nested_leaf | 0.2407ms | 0.1251ms | 7.9949 KOps/s | 8.1068 KOps/s | $\color{#d91a1a}-1.38\\%$ | | test_keys_stack_nested | 0.2622ms | 0.1458ms | 6.8602 KOps/s | 6.8795 KOps/s | $\color{#d91a1a}-0.28\\%$ | | test_keys_stack_nested_leaf | 0.2288ms | 0.1253ms | 7.9820 KOps/s | 7.9948 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_keys_stack_nested_locked | 0.2893ms | 0.1505ms | 6.6450 KOps/s | 6.6886 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_values | 10.6725μs | 1.2037μs | 830.7718 KOps/s | 875.2848 KOps/s | $\textbf{\color{#d91a1a}-5.09\\%}$ | | test_values_nested | 0.1030ms | 50.3576μs | 19.8580 KOps/s | 19.8722 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_values_nested_locked | 0.1770ms | 49.7373μs | 20.1056 KOps/s | 19.8393 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_values_nested_leaf | 87.9840μs | 45.5609μs | 21.9486 KOps/s | 22.0497 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_values_stack_nested | 99.3850μs | 50.9628μs | 19.6221 KOps/s | 19.3357 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_values_stack_nested_leaf | 0.1063ms | 45.5034μs | 21.9764 KOps/s | 20.6632 KOps/s | $\textbf{\color{#35bf28}+6.36\\%}$ | | test_values_stack_nested_locked | 0.1043ms | 50.5534μs | 19.7811 KOps/s | 19.1629 KOps/s | $\color{#35bf28}+3.23\\%$ | | test_membership | 3.2135μs | 0.7112μs | 1.4061 MOps/s | 1.1081 MOps/s | $\textbf{\color{#35bf28}+26.89\\%}$ | | test_membership_nested | 48.0900μs | 2.7289μs | 366.4462 KOps/s | 377.2467 KOps/s | $\color{#d91a1a}-2.86\\%$ | | test_membership_nested_leaf | 36.2280μs | 2.7952μs | 357.7532 KOps/s | 372.4986 KOps/s | $\color{#d91a1a}-3.96\\%$ | | test_membership_stacked_nested | 51.7770μs | 2.7433μs | 364.5292 KOps/s | 373.3364 KOps/s | $\color{#d91a1a}-2.36\\%$ | | test_membership_stacked_nested_leaf | 31.1690μs | 2.7492μs | 363.7399 KOps/s | 371.4185 KOps/s | $\color{#d91a1a}-2.07\\%$ | | test_membership_nested_last | 56.5660μs | 4.0768μs | 245.2934 KOps/s | 253.0430 KOps/s | $\color{#d91a1a}-3.06\\%$ | | test_membership_nested_leaf_last | 44.4340μs | 4.0943μs | 244.2418 KOps/s | 255.0172 KOps/s | $\color{#d91a1a}-4.23\\%$ | | test_membership_stacked_nested_last | 27.9120μs | 5.1562μs | 193.9407 KOps/s | 78.6652 KOps/s | $\textbf{\color{#35bf28}+146.54\\%}$ | | test_membership_stacked_nested_leaf_last | 36.8390μs | 5.1627μs | 193.6968 KOps/s | 77.9643 KOps/s | $\textbf{\color{#35bf28}+148.44\\%}$ | | test_nested_getleaf | 63.0680μs | 10.8609μs | 92.0734 KOps/s | 90.6370 KOps/s | $\color{#35bf28}+1.58\\%$ | | test_nested_get | 43.9620μs | 10.3972μs | 96.1799 KOps/s | 96.6965 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_stacked_getleaf | 65.0620μs | 10.4712μs | 95.5003 KOps/s | 91.9871 KOps/s | $\color{#35bf28}+3.82\\%$ | | test_stacked_get | 55.7750μs | 10.2647μs | 97.4212 KOps/s | 100.5363 KOps/s | $\color{#d91a1a}-3.10\\%$ | | test_nested_getitemleaf | 40.6760μs | 11.3255μs | 88.2961 KOps/s | 87.5949 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_nested_getitem | 59.2310μs | 10.4746μs | 95.4688 KOps/s | 96.4757 KOps/s | $\color{#d91a1a}-1.04\\%$ | | test_stacked_getitemleaf | 61.7860μs | 11.2892μs | 88.5802 KOps/s | 87.7158 KOps/s | $\color{#35bf28}+0.99\\%$ | | test_stacked_getitem | 55.9450μs | 10.4089μs | 96.0718 KOps/s | 95.8771 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_lock_nested | 2.1975ms | 0.5217ms | 1.9169 KOps/s | 1.6271 KOps/s | $\textbf{\color{#35bf28}+17.82\\%}$ | | test_lock_stack_nested | 1.0682ms | 0.4870ms | 2.0533 KOps/s | 2.1447 KOps/s | $\color{#d91a1a}-4.26\\%$ | | test_unlock_nested | 0.9227ms | 0.4339ms | 2.3047 KOps/s | 1.9242 KOps/s | $\textbf{\color{#35bf28}+19.77\\%}$ | | test_unlock_stack_nested | 0.5992ms | 0.3997ms | 2.5019 KOps/s | 2.6452 KOps/s | $\textbf{\color{#d91a1a}-5.42\\%}$ | | test_flatten_speed | 0.2355ms | 0.1060ms | 9.4320 KOps/s | 9.1870 KOps/s | $\color{#35bf28}+2.67\\%$ | | test_unflatten_speed | 0.9553ms | 0.4445ms | 2.2496 KOps/s | 2.2782 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_common_ops | 5.1704ms | 1.1786ms | 848.4909 Ops/s | 906.1683 Ops/s | $\textbf{\color{#d91a1a}-6.36\\%}$ | | test_creation | 49.9240μs | 2.4577μs | 406.8869 KOps/s | 394.0119 KOps/s | $\color{#35bf28}+3.27\\%$ | | test_creation_empty | 51.5360μs | 19.7085μs | 50.7396 KOps/s | 56.0719 KOps/s | $\textbf{\color{#d91a1a}-9.51\\%}$ | | test_creation_nested_1 | 59.5010μs | 23.3456μs | 42.8346 KOps/s | 46.3229 KOps/s | $\textbf{\color{#d91a1a}-7.53\\%}$ | | test_creation_nested_2 | 73.1370μs | 27.1621μs | 36.8160 KOps/s | 39.6547 KOps/s | $\textbf{\color{#d91a1a}-7.16\\%}$ | | test_clone | 0.1198ms | 17.4142μs | 57.4245 KOps/s | 56.8936 KOps/s | $\color{#35bf28}+0.93\\%$ | | test_getitem[int] | 1.0193ms | 13.0495μs | 76.6314 KOps/s | 79.5205 KOps/s | $\color{#d91a1a}-3.63\\%$ | | test_getitem[slice_int] | 0.1368ms | 32.4720μs | 30.7958 KOps/s | 30.7712 KOps/s | $\color{#35bf28}+0.08\\%$ | | test_getitem[range] | 0.2295ms | 58.4195μs | 17.1176 KOps/s | 17.4586 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_getitem[tuple] | 0.1267ms | 26.7037μs | 37.4479 KOps/s | 38.5783 KOps/s | $\color{#d91a1a}-2.93\\%$ | | test_getitem[list] | 0.3692ms | 53.4265μs | 18.7173 KOps/s | 19.3139 KOps/s | $\color{#d91a1a}-3.09\\%$ | | test_setitem_dim[int] | 78.2970μs | 35.0458μs | 28.5341 KOps/s | 32.0081 KOps/s | $\textbf{\color{#d91a1a}-10.85\\%}$ | | test_setitem_dim[slice_int] | 0.1502ms | 74.2317μs | 13.4713 KOps/s | 14.5400 KOps/s | $\textbf{\color{#d91a1a}-7.35\\%}$ | | test_setitem_dim[range] | 0.1749ms | 94.6450μs | 10.5658 KOps/s | 11.1054 KOps/s | $\color{#d91a1a}-4.86\\%$ | | test_setitem_dim[tuple] | 0.1107ms | 61.1951μs | 16.3412 KOps/s | 17.8738 KOps/s | $\textbf{\color{#d91a1a}-8.57\\%}$ | | test_setitem | 0.1572ms | 31.3353μs | 31.9129 KOps/s | 34.0949 KOps/s | $\textbf{\color{#d91a1a}-6.40\\%}$ | | test_set | 0.1920ms | 30.1939μs | 33.1192 KOps/s | 34.9456 KOps/s | $\textbf{\color{#d91a1a}-5.23\\%}$ | | test_set_shared | 1.4086ms | 0.2160ms | 4.6300 KOps/s | 4.5868 KOps/s | $\color{#35bf28}+0.94\\%$ | | test_update | 0.1601ms | 37.5484μs | 26.6323 KOps/s | 28.3464 KOps/s | $\textbf{\color{#d91a1a}-6.05\\%}$ | | test_update_nested | 0.1579ms | 47.8228μs | 20.9105 KOps/s | 21.8993 KOps/s | $\color{#d91a1a}-4.52\\%$ | | test_update__nested | 0.1804ms | 35.1588μs | 28.4424 KOps/s | 29.0287 KOps/s | $\color{#d91a1a}-2.02\\%$ | | test_set_nested | 0.1489ms | 32.9493μs | 30.3496 KOps/s | 32.3335 KOps/s | $\textbf{\color{#d91a1a}-6.14\\%}$ | | test_set_nested_new | 0.1438ms | 38.8985μs | 25.7079 KOps/s | 27.4840 KOps/s | $\textbf{\color{#d91a1a}-6.46\\%}$ | | test_select | 1.1193ms | 57.4133μs | 17.4176 KOps/s | 18.5922 KOps/s | $\textbf{\color{#d91a1a}-6.32\\%}$ | | test_select_nested | 0.1221ms | 60.0285μs | 16.6588 KOps/s | 16.4910 KOps/s | $\color{#35bf28}+1.02\\%$ | | test_exclude_nested | 0.1476ms | 80.5500μs | 12.4146 KOps/s | 12.4628 KOps/s | $\color{#d91a1a}-0.39\\%$ | | test_empty[True] | 0.9470ms | 0.3435ms | 2.9110 KOps/s | 2.9176 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_empty[False] | 10.9930μs | 1.2343μs | 810.1875 KOps/s | 808.8310 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_unbind_speed | 0.4811ms | 0.3256ms | 3.0716 KOps/s | 3.0591 KOps/s | $\color{#35bf28}+0.41\\%$ | | test_unbind_speed_stack0 | 0.7317ms | 0.3222ms | 3.1037 KOps/s | 3.2264 KOps/s | $\color{#d91a1a}-3.80\\%$ | | test_unbind_speed_stack1 | 82.4024ms | 0.8295ms | 1.2056 KOps/s | 1.3603 KOps/s | $\textbf{\color{#d91a1a}-11.37\\%}$ | | test_split | 84.6888ms | 2.2808ms | 438.4507 Ops/s | 438.1662 Ops/s | $\color{#35bf28}+0.06\\%$ | | test_chunk | 88.4839ms | 2.2955ms | 435.6347 Ops/s | 436.1372 Ops/s | $\color{#d91a1a}-0.12\\%$ | | test_creation[device0] | 0.3047ms | 0.1224ms | 8.1667 KOps/s | 8.4004 KOps/s | $\color{#d91a1a}-2.78\\%$ | | test_creation_from_tensor | 4.1154ms | 0.1239ms | 8.0741 KOps/s | 8.2803 KOps/s | $\color{#d91a1a}-2.49\\%$ | | test_add_one[memmap_tensor0] | 0.2328ms | 8.1489μs | 122.7153 KOps/s | 129.9311 KOps/s | $\textbf{\color{#d91a1a}-5.55\\%}$ | | test_contiguous[memmap_tensor0] | 30.2470μs | 2.2302μs | 448.3908 KOps/s | 458.2103 KOps/s | $\color{#d91a1a}-2.14\\%$ | | test_stack[memmap_tensor0] | 80.6110μs | 6.0135μs | 166.2922 KOps/s | 169.2746 KOps/s | $\color{#d91a1a}-1.76\\%$ | | test_memmaptd_index | 1.0794ms | 0.4529ms | 2.2082 KOps/s | 2.2854 KOps/s | $\color{#d91a1a}-3.38\\%$ | | test_memmaptd_index_astensor | 1.3037ms | 0.5430ms | 1.8417 KOps/s | 1.9308 KOps/s | $\color{#d91a1a}-4.62\\%$ | | test_memmaptd_index_op | 1.5440ms | 1.1093ms | 901.4740 Ops/s | 959.0837 Ops/s | $\textbf{\color{#d91a1a}-6.01\\%}$ | | test_serialize_model | 0.2171s | 0.1426s | 7.0118 Ops/s | 7.1754 Ops/s | $\color{#d91a1a}-2.28\\%$ | | test_serialize_model_pickle | 0.4469s | 0.3965s | 2.5220 Ops/s | 2.5297 Ops/s | $\color{#d91a1a}-0.30\\%$ | | test_serialize_weights | 0.1309s | 0.1269s | 7.8820 Ops/s | 8.1250 Ops/s | $\color{#d91a1a}-2.99\\%$ | | test_serialize_weights_returnearly | 0.1853s | 0.1669s | 5.9917 Ops/s | 5.6774 Ops/s | $\textbf{\color{#35bf28}+5.54\\%}$ | | test_serialize_weights_pickle | 0.6087s | 0.4566s | 2.1900 Ops/s | 2.4105 Ops/s | $\textbf{\color{#d91a1a}-9.15\\%}$ | | test_serialize_weights_filesystem | 0.1481s | 0.1437s | 6.9583 Ops/s | 7.1232 Ops/s | $\color{#d91a1a}-2.31\\%$ | | test_serialize_model_filesystem | 0.1713s | 0.1495s | 6.6909 Ops/s | 6.6329 Ops/s | $\color{#35bf28}+0.88\\%$ | | test_reshape_pytree | 0.1021ms | 39.4145μs | 25.3714 KOps/s | 26.1432 KOps/s | $\color{#d91a1a}-2.95\\%$ | | test_reshape_td | 0.1076ms | 50.7443μs | 19.7066 KOps/s | 20.0686 KOps/s | $\color{#d91a1a}-1.80\\%$ | | test_view_pytree | 90.2490μs | 39.1116μs | 25.5679 KOps/s | 26.3620 KOps/s | $\color{#d91a1a}-3.01\\%$ | | test_view_td | 0.1496ms | 56.2542μs | 17.7765 KOps/s | 18.1518 KOps/s | $\color{#d91a1a}-2.07\\%$ | | test_unbind_pytree | 95.4890μs | 36.2241μs | 27.6059 KOps/s | 27.4883 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_unbind_td | 0.3484ms | 48.6959μs | 20.5356 KOps/s | 20.5432 KOps/s | $\color{#d91a1a}-0.04\\%$ | | test_split_pytree | 96.5000μs | 38.1404μs | 26.2189 KOps/s | 25.8132 KOps/s | $\color{#35bf28}+1.57\\%$ | | test_split_td | 0.5239ms | 60.2857μs | 16.5877 KOps/s | 16.2975 KOps/s | $\color{#35bf28}+1.78\\%$ | | test_add_pytree | 0.1377ms | 44.9285μs | 22.2576 KOps/s | 23.3308 KOps/s | $\color{#d91a1a}-4.60\\%$ | | test_add_td | 0.1716ms | 87.5172μs | 11.4263 KOps/s | 12.3397 KOps/s | $\textbf{\color{#d91a1a}-7.40\\%}$ | | test_distributed | 0.2913ms | 0.1302ms | 7.6819 KOps/s | 7.6349 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_tdmodule | 36.3580μs | 17.5388μs | 57.0164 KOps/s | 61.4635 KOps/s | $\textbf{\color{#d91a1a}-7.24\\%}$ | | test_tdmodule_dispatch | 62.2460μs | 36.7649μs | 27.1999 KOps/s | 29.2958 KOps/s | $\textbf{\color{#d91a1a}-7.15\\%}$ | | test_tdseq | 44.9240μs | 19.4919μs | 51.3035 KOps/s | 54.8414 KOps/s | $\textbf{\color{#d91a1a}-6.45\\%}$ | | test_tdseq_dispatch | 64.4610μs | 41.8155μs | 23.9146 KOps/s | 26.1242 KOps/s | $\textbf{\color{#d91a1a}-8.46\\%}$ | | test_instantiation_functorch | 2.5133ms | 1.6209ms | 616.9437 Ops/s | 631.5782 Ops/s | $\color{#d91a1a}-2.32\\%$ | | test_instantiation_td | 1.8071ms | 1.1629ms | 859.9118 Ops/s | 781.5795 Ops/s | $\textbf{\color{#35bf28}+10.02\\%}$ | | test_exec_functorch | 0.2921ms | 0.1879ms | 5.3232 KOps/s | 5.4640 KOps/s | $\color{#d91a1a}-2.58\\%$ | | test_exec_functional_call | 0.5209ms | 0.1801ms | 5.5533 KOps/s | 5.6595 KOps/s | $\color{#d91a1a}-1.88\\%$ | | test_exec_td | 0.3162ms | 0.1798ms | 5.5633 KOps/s | 5.8318 KOps/s | $\color{#d91a1a}-4.60\\%$ | | test_exec_td_decorator | 0.6039ms | 0.2594ms | 3.8546 KOps/s | 3.9272 KOps/s | $\color{#d91a1a}-1.85\\%$ | | test_vmap_mlp_speed[True-True] | 0.9316ms | 0.6301ms | 1.5871 KOps/s | 1.6939 KOps/s | $\textbf{\color{#d91a1a}-6.30\\%}$ | | test_vmap_mlp_speed[True-False] | 0.8243ms | 0.6112ms | 1.6361 KOps/s | 1.7053 KOps/s | $\color{#d91a1a}-4.06\\%$ | | test_vmap_mlp_speed[False-True] | 0.8420ms | 0.5050ms | 1.9801 KOps/s | 2.0541 KOps/s | $\color{#d91a1a}-3.60\\%$ | | test_vmap_mlp_speed[False-False] | 0.7360ms | 0.5069ms | 1.9729 KOps/s | 2.0591 KOps/s | $\color{#d91a1a}-4.19\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.3213ms | 0.7118ms | 1.4049 KOps/s | 1.4569 KOps/s | $\color{#d91a1a}-3.57\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.1727ms | 0.7084ms | 1.4116 KOps/s | 1.4616 KOps/s | $\color{#d91a1a}-3.42\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.8884ms | 0.5908ms | 1.6925 KOps/s | 1.7600 KOps/s | $\color{#d91a1a}-3.84\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 1.0691ms | 0.5929ms | 1.6866 KOps/s | 1.7624 KOps/s | $\color{#d91a1a}-4.30\\%$ | | test_to_module_speed[True] | 89.6009ms | 2.0605ms | 485.3098 Ops/s | 550.3214 Ops/s | $\textbf{\color{#d91a1a}-11.81\\%}$ | | test_to_module_speed[False] | 2.4419ms | 1.7757ms | 563.1674 Ops/s | 556.3118 Ops/s | $\color{#35bf28}+1.23\\%$ | | test_tc_init | 98.2330μs | 45.6403μs | 21.9104 KOps/s | 23.8864 KOps/s | $\textbf{\color{#d91a1a}-8.27\\%}$ | | test_tc_init_nested | 0.1584ms | 93.3428μs | 10.7132 KOps/s | 11.4032 KOps/s | $\textbf{\color{#d91a1a}-6.05\\%}$ | | test_tc_first_layer_tensor | 41.8480μs | 9.0932μs | 109.9719 KOps/s | 106.7136 KOps/s | $\color{#35bf28}+3.05\\%$ | | test_tc_first_layer_nontensor | 38.0810μs | 9.0644μs | 110.3217 KOps/s | 107.0521 KOps/s | $\color{#35bf28}+3.05\\%$ | | test_tc_second_layer_tensor | 28.9340μs | 2.8170μs | 354.9869 KOps/s | 337.3531 KOps/s | $\textbf{\color{#35bf28}+5.23\\%}$ | | test_tc_second_layer_nontensor | 57.9580μs | 10.1290μs | 98.7267 KOps/s | 96.1739 KOps/s | $\color{#35bf28}+2.65\\%$ | | test_unbind | 99.7243ms | 13.1491ms | 76.0509 Ops/s | 76.2895 Ops/s | $\color{#d91a1a}-0.31\\%$ | | test_full_like | 9.9851ms | 7.7908ms | 128.3560 Ops/s | 146.9490 Ops/s | $\textbf{\color{#d91a1a}-12.65\\%}$ | | test_zeros_like | 14.7553ms | 7.3622ms | 135.8290 Ops/s | 161.8183 Ops/s | $\textbf{\color{#d91a1a}-16.06\\%}$ | | test_ones_like | 14.1131ms | 7.7282ms | 129.3957 Ops/s | 133.8703 Ops/s | $\color{#d91a1a}-3.34\\%$ | | test_clone | 18.4513ms | 9.0467ms | 110.5378 Ops/s | 116.1391 Ops/s | $\color{#d91a1a}-4.82\\%$ | | test_squeeze | 71.1130μs | 14.4248μs | 69.3249 KOps/s | 70.3237 KOps/s | $\color{#d91a1a}-1.42\\%$ | | test_unsqueeze | 0.2836ms | 98.2902μs | 10.1740 KOps/s | 10.1829 KOps/s | $\color{#d91a1a}-0.09\\%$ | | test_split | 0.3801ms | 0.2069ms | 4.8337 KOps/s | 4.8461 KOps/s | $\color{#d91a1a}-0.25\\%$ | | test_permute | 0.3698ms | 0.2267ms | 4.4120 KOps/s | 4.3654 KOps/s | $\color{#35bf28}+1.07\\%$ | | test_stack | 29.5012ms | 24.8108ms | 40.3051 Ops/s | 42.5380 Ops/s | $\textbf{\color{#d91a1a}-5.25\\%}$ | | test_cat | 30.4265ms | 24.8243ms | 40.2831 Ops/s | 42.6001 Ops/s | $\textbf{\color{#d91a1a}-5.44\\%}$ |