pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
821 stars 67 forks source link

[Feature] Compile - nn compatibility #881

Closed vmoens closed 3 months ago

vmoens commented 3 months ago

Stack from ghstack (oldest at bottom):

github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 133. Improved: $\large\color{#35bf28}16$. Worsened: $\large\color{#d91a1a}5$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 42.2990μs | 16.5714μs | 60.3448 KOps/s | 60.5313 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_plain_set_stack_nested | 38.4520μs | 16.9089μs | 59.1403 KOps/s | 59.3080 KOps/s | $\color{#d91a1a}-0.28\\%$ | | test_plain_set_nested_inplace | 65.1110μs | 18.5298μs | 53.9671 KOps/s | 54.0026 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_plain_set_stack_nested_inplace | 88.4720μs | 18.3459μs | 54.5080 KOps/s | 54.9210 KOps/s | $\color{#d91a1a}-0.75\\%$ | | test_items | 37.4000μs | 2.7396μs | 365.0127 KOps/s | 377.4656 KOps/s | $\color{#d91a1a}-3.30\\%$ | | test_items_nested | 0.5244ms | 0.3640ms | 2.7470 KOps/s | 2.5528 KOps/s | $\textbf{\color{#35bf28}+7.61\\%}$ | | test_items_nested_locked | 1.1757ms | 0.3619ms | 2.7629 KOps/s | 2.5637 KOps/s | $\textbf{\color{#35bf28}+7.77\\%}$ | | test_items_nested_leaf | 0.1457ms | 83.8413μs | 11.9273 KOps/s | 11.3952 KOps/s | $\color{#35bf28}+4.67\\%$ | | test_items_stack_nested | 0.7713ms | 0.3701ms | 2.7018 KOps/s | 2.5589 KOps/s | $\textbf{\color{#35bf28}+5.58\\%}$ | | test_items_stack_nested_leaf | 0.3561ms | 88.1216μs | 11.3480 KOps/s | 11.5009 KOps/s | $\color{#d91a1a}-1.33\\%$ | | test_items_stack_nested_locked | 0.5250ms | 0.3618ms | 2.7642 KOps/s | 2.5372 KOps/s | $\textbf{\color{#35bf28}+8.95\\%}$ | | test_keys | 32.9210μs | 3.8507μs | 259.6918 KOps/s | 239.2727 KOps/s | $\textbf{\color{#35bf28}+8.53\\%}$ | | test_keys_nested | 0.2844ms | 0.1436ms | 6.9636 KOps/s | 6.9455 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_keys_nested_locked | 0.7427ms | 0.1494ms | 6.6919 KOps/s | 6.6365 KOps/s | $\color{#35bf28}+0.84\\%$ | | test_keys_nested_leaf | 0.2335ms | 0.1236ms | 8.0927 KOps/s | 8.1201 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_keys_stack_nested | 0.2233ms | 0.1436ms | 6.9653 KOps/s | 6.8876 KOps/s | $\color{#35bf28}+1.13\\%$ | | test_keys_stack_nested_leaf | 0.4269ms | 0.1246ms | 8.0231 KOps/s | 8.1295 KOps/s | $\color{#d91a1a}-1.31\\%$ | | test_keys_stack_nested_locked | 0.1989ms | 0.1495ms | 6.6886 KOps/s | 6.6408 KOps/s | $\color{#35bf28}+0.72\\%$ | | test_values | 9.7533μs | 1.1576μs | 863.8761 KOps/s | 833.4934 KOps/s | $\color{#35bf28}+3.65\\%$ | | test_values_nested | 0.1005ms | 48.6817μs | 20.5416 KOps/s | 20.2637 KOps/s | $\color{#35bf28}+1.37\\%$ | | test_values_nested_locked | 95.6280μs | 48.5398μs | 20.6017 KOps/s | 20.4752 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_values_nested_leaf | 0.2261ms | 43.5590μs | 22.9574 KOps/s | 22.3650 KOps/s | $\color{#35bf28}+2.65\\%$ | | test_values_stack_nested | 0.2074ms | 48.4044μs | 20.6593 KOps/s | 20.3661 KOps/s | $\color{#35bf28}+1.44\\%$ | | test_values_stack_nested_leaf | 94.8070μs | 42.8340μs | 23.3459 KOps/s | 22.6150 KOps/s | $\color{#35bf28}+3.23\\%$ | | test_values_stack_nested_locked | 97.1210μs | 48.7471μs | 20.5140 KOps/s | 20.1752 KOps/s | $\color{#35bf28}+1.68\\%$ | | test_membership | 2.5303μs | 0.7254μs | 1.3785 MOps/s | 1.1056 MOps/s | $\textbf{\color{#35bf28}+24.68\\%}$ | | test_membership_nested | 29.4650μs | 2.6303μs | 380.1832 KOps/s | 372.5918 KOps/s | $\color{#35bf28}+2.04\\%$ | | test_membership_nested_leaf | 33.8940μs | 2.7118μs | 368.7559 KOps/s | 369.0634 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_membership_stacked_nested | 21.2490μs | 2.6126μs | 382.7570 KOps/s | 370.1731 KOps/s | $\color{#35bf28}+3.40\\%$ | | test_membership_stacked_nested_leaf | 21.2500μs | 2.6385μs | 379.0081 KOps/s | 367.6561 KOps/s | $\color{#35bf28}+3.09\\%$ | | test_membership_nested_last | 36.1670μs | 3.9230μs | 254.9051 KOps/s | 254.1105 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_membership_nested_leaf_last | 53.1690μs | 3.8853μs | 257.3834 KOps/s | 254.0577 KOps/s | $\color{#35bf28}+1.31\\%$ | | test_membership_stacked_nested_last | 30.7970μs | 3.8880μs | 257.2033 KOps/s | 254.7226 KOps/s | $\color{#35bf28}+0.97\\%$ | | test_membership_stacked_nested_leaf_last | 26.2390μs | 3.8827μs | 257.5542 KOps/s | 250.0929 KOps/s | $\color{#35bf28}+2.98\\%$ | | test_nested_getleaf | 48.9620μs | 10.9124μs | 91.6386 KOps/s | 89.9109 KOps/s | $\color{#35bf28}+1.92\\%$ | | test_nested_get | 51.6070μs | 10.2736μs | 97.3365 KOps/s | 95.0108 KOps/s | $\color{#35bf28}+2.45\\%$ | | test_stacked_getleaf | 48.9410μs | 10.8490μs | 92.1745 KOps/s | 89.4645 KOps/s | $\color{#35bf28}+3.03\\%$ | | test_stacked_get | 46.5270μs | 10.5087μs | 95.1594 KOps/s | 95.0807 KOps/s | $\color{#35bf28}+0.08\\%$ | | test_nested_getitemleaf | 50.9350μs | 11.4212μs | 87.5564 KOps/s | 86.2396 KOps/s | $\color{#35bf28}+1.53\\%$ | | test_nested_getitem | 66.8820μs | 10.3518μs | 96.6018 KOps/s | 94.1454 KOps/s | $\color{#35bf28}+2.61\\%$ | | test_stacked_getitemleaf | 57.5780μs | 11.4285μs | 87.5008 KOps/s | 87.2832 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_stacked_getitem | 47.0180μs | 10.5699μs | 94.6085 KOps/s | 94.4909 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_lock_nested | 6.9568ms | 0.4440ms | 2.2523 KOps/s | 2.1836 KOps/s | $\color{#35bf28}+3.15\\%$ | | test_lock_stack_nested | 0.6300ms | 0.4083ms | 2.4489 KOps/s | 2.3906 KOps/s | $\color{#35bf28}+2.44\\%$ | | test_unlock_nested | 0.7852ms | 0.3558ms | 2.8106 KOps/s | 2.3101 KOps/s | $\textbf{\color{#35bf28}+21.67\\%}$ | | test_unlock_stack_nested | 0.4912ms | 0.3220ms | 3.1060 KOps/s | 2.9815 KOps/s | $\color{#35bf28}+4.18\\%$ | | test_flatten_speed | 0.5386ms | 0.1051ms | 9.5178 KOps/s | 9.2456 KOps/s | $\color{#35bf28}+2.94\\%$ | | test_unflatten_speed | 0.9928ms | 0.4368ms | 2.2892 KOps/s | 2.2230 KOps/s | $\color{#35bf28}+2.98\\%$ | | test_common_ops | 6.7217ms | 0.7375ms | 1.3560 KOps/s | 1.3516 KOps/s | $\color{#35bf28}+0.33\\%$ | | test_creation | 79.0080μs | 2.2889μs | 436.8912 KOps/s | 426.4835 KOps/s | $\color{#35bf28}+2.44\\%$ | | test_creation_empty | 40.9170μs | 10.0421μs | 99.5811 KOps/s | 106.6076 KOps/s | $\textbf{\color{#d91a1a}-6.59\\%}$ | | test_creation_nested_1 | 48.7910μs | 12.8013μs | 78.1168 KOps/s | 80.3587 KOps/s | $\color{#d91a1a}-2.79\\%$ | | test_creation_nested_2 | 47.9200μs | 16.7258μs | 59.7879 KOps/s | 61.7806 KOps/s | $\color{#d91a1a}-3.23\\%$ | | test_clone | 76.7440μs | 12.7451μs | 78.4615 KOps/s | 75.9648 KOps/s | $\color{#35bf28}+3.29\\%$ | | test_getitem[int] | 41.5980μs | 11.8097μs | 84.6765 KOps/s | 85.1796 KOps/s | $\color{#d91a1a}-0.59\\%$ | | test_getitem[slice_int] | 66.3440μs | 23.7867μs | 42.0404 KOps/s | 41.5710 KOps/s | $\color{#35bf28}+1.13\\%$ | | test_getitem[range] | 91.0400μs | 44.8589μs | 22.2921 KOps/s | 21.3222 KOps/s | $\color{#35bf28}+4.55\\%$ | | test_getitem[tuple] | 58.1390μs | 19.5289μs | 51.2062 KOps/s | 51.4421 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_getitem[list] | 0.2229ms | 40.1500μs | 24.9066 KOps/s | 24.6465 KOps/s | $\color{#35bf28}+1.06\\%$ | | test_setitem_dim[int] | 77.9050μs | 33.8269μs | 29.5623 KOps/s | 32.4531 KOps/s | $\textbf{\color{#d91a1a}-8.91\\%}$ | | test_setitem_dim[slice_int] | 0.1188ms | 60.5507μs | 16.5151 KOps/s | 17.0095 KOps/s | $\color{#d91a1a}-2.91\\%$ | | test_setitem_dim[range] | 0.1398ms | 81.0878μs | 12.3323 KOps/s | 12.9311 KOps/s | $\color{#d91a1a}-4.63\\%$ | | test_setitem_dim[tuple] | 90.7790μs | 48.5974μs | 20.5772 KOps/s | 21.1346 KOps/s | $\color{#d91a1a}-2.64\\%$ | | test_setitem | 69.0290μs | 18.7285μs | 53.3944 KOps/s | 52.8399 KOps/s | $\color{#35bf28}+1.05\\%$ | | test_set | 86.1310μs | 18.4923μs | 54.0764 KOps/s | 54.3498 KOps/s | $\color{#d91a1a}-0.50\\%$ | | test_set_shared | 2.1703ms | 0.1664ms | 6.0096 KOps/s | 5.9381 KOps/s | $\color{#35bf28}+1.20\\%$ | | test_update | 0.1492ms | 20.8648μs | 47.9276 KOps/s | 49.0355 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_update_nested | 0.1347ms | 28.9799μs | 34.5067 KOps/s | 34.3876 KOps/s | $\color{#35bf28}+0.35\\%$ | | test_update__nested | 96.4910μs | 25.5225μs | 39.1810 KOps/s | 39.7739 KOps/s | $\color{#d91a1a}-1.49\\%$ | | test_set_nested | 71.1030μs | 20.1163μs | 49.7109 KOps/s | 49.7462 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_set_nested_new | 98.6040μs | 24.6271μs | 40.6057 KOps/s | 40.0569 KOps/s | $\color{#35bf28}+1.37\\%$ | | test_select | 0.1560ms | 40.6712μs | 24.5875 KOps/s | 24.3617 KOps/s | $\color{#35bf28}+0.93\\%$ | | test_select_nested | 0.1271ms | 60.5657μs | 16.5110 KOps/s | 16.3878 KOps/s | $\color{#35bf28}+0.75\\%$ | | test_exclude_nested | 0.1873ms | 81.0471μs | 12.3385 KOps/s | 12.0832 KOps/s | $\color{#35bf28}+2.11\\%$ | | test_empty[True] | 0.6374ms | 0.3459ms | 2.8911 KOps/s | 2.8427 KOps/s | $\color{#35bf28}+1.70\\%$ | | test_empty[False] | 19.1910μs | 1.2664μs | 789.6235 KOps/s | 752.3049 KOps/s | $\color{#35bf28}+4.96\\%$ | | test_unbind_speed | 0.4403ms | 0.2535ms | 3.9446 KOps/s | 3.7876 KOps/s | $\color{#35bf28}+4.15\\%$ | | test_unbind_speed_stack0 | 0.5768ms | 0.2646ms | 3.7789 KOps/s | 3.8377 KOps/s | $\color{#d91a1a}-1.53\\%$ | | test_unbind_speed_stack1 | 79.7280ms | 0.7512ms | 1.3312 KOps/s | 1.3394 KOps/s | $\color{#d91a1a}-0.62\\%$ | | test_split | 83.0425ms | 1.6402ms | 609.6794 Ops/s | 609.6858 Ops/s | $-0.00\\%$ | | test_chunk | 80.1615ms | 1.6386ms | 610.2758 Ops/s | 609.9515 Ops/s | $\color{#35bf28}+0.05\\%$ | | test_creation[device0] | 0.1967ms | 94.4395μs | 10.5888 KOps/s | 10.5210 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_creation_from_tensor | 3.9791ms | 99.1527μs | 10.0855 KOps/s | 10.1783 KOps/s | $\color{#d91a1a}-0.91\\%$ | | test_add_one[memmap_tensor0] | 0.2304ms | 5.2869μs | 189.1470 KOps/s | 183.6093 KOps/s | $\color{#35bf28}+3.02\\%$ | | test_contiguous[memmap_tensor0] | 15.9200μs | 0.6297μs | 1.5882 MOps/s | 1.5526 MOps/s | $\color{#35bf28}+2.29\\%$ | | test_stack[memmap_tensor0] | 41.2970μs | 3.4577μs | 289.2067 KOps/s | 264.5609 KOps/s | $\textbf{\color{#35bf28}+9.32\\%}$ | | test_memmaptd_index | 1.0633ms | 0.2601ms | 3.8451 KOps/s | 3.8333 KOps/s | $\color{#35bf28}+0.31\\%$ | | test_memmaptd_index_astensor | 0.8057ms | 0.3342ms | 2.9922 KOps/s | 3.0060 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_memmaptd_index_op | 0.9433ms | 0.5958ms | 1.6784 KOps/s | 1.7017 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_serialize_model | 0.1388s | 0.1240s | 8.0631 Ops/s | 7.4904 Ops/s | $\textbf{\color{#35bf28}+7.64\\%}$ | | test_serialize_model_pickle | 0.4488s | 0.3947s | 2.5338 Ops/s | 2.5124 Ops/s | $\color{#35bf28}+0.85\\%$ | | test_serialize_weights | 0.1298s | 0.1228s | 8.1427 Ops/s | 7.7653 Ops/s | $\color{#35bf28}+4.86\\%$ | | test_serialize_weights_returnearly | 0.1805s | 0.1639s | 6.1030 Ops/s | 6.1632 Ops/s | $\color{#d91a1a}-0.98\\%$ | | test_serialize_weights_pickle | 0.4505s | 0.3957s | 2.5274 Ops/s | 2.3388 Ops/s | $\textbf{\color{#35bf28}+8.06\\%}$ | | test_serialize_weights_filesystem | 0.1503s | 0.1438s | 6.9557 Ops/s | 6.4196 Ops/s | $\textbf{\color{#35bf28}+8.35\\%}$ | | test_serialize_model_filesystem | 0.1590s | 0.1485s | 6.7327 Ops/s | 6.6238 Ops/s | $\color{#35bf28}+1.64\\%$ | | test_reshape_pytree | 59.8510μs | 25.5246μs | 39.1779 KOps/s | 38.7104 KOps/s | $\color{#35bf28}+1.21\\%$ | | test_reshape_td | 0.1080ms | 33.9512μs | 29.4540 KOps/s | 28.6374 KOps/s | $\color{#35bf28}+2.85\\%$ | | test_view_pytree | 74.7390μs | 25.3967μs | 39.3751 KOps/s | 39.2349 KOps/s | $\color{#35bf28}+0.36\\%$ | | test_view_td | 81.4430μs | 38.5290μs | 25.9545 KOps/s | 25.0444 KOps/s | $\color{#35bf28}+3.63\\%$ | | test_unbind_pytree | 80.6610μs | 29.2488μs | 34.1894 KOps/s | 33.6483 KOps/s | $\color{#35bf28}+1.61\\%$ | | test_unbind_td | 0.4086ms | 37.7449μs | 26.4937 KOps/s | 25.8399 KOps/s | $\color{#35bf28}+2.53\\%$ | | test_split_pytree | 64.3400μs | 29.3070μs | 34.1216 KOps/s | 33.7134 KOps/s | $\color{#35bf28}+1.21\\%$ | | test_split_td | 0.1209ms | 39.7172μs | 25.1780 KOps/s | 24.5433 KOps/s | $\color{#35bf28}+2.59\\%$ | | test_add_pytree | 90.8200μs | 33.9928μs | 29.4180 KOps/s | 28.4323 KOps/s | $\color{#35bf28}+3.47\\%$ | | test_add_td | 0.1569ms | 55.4627μs | 18.0301 KOps/s | 18.1182 KOps/s | $\color{#d91a1a}-0.49\\%$ | | test_distributed | 0.2706ms | 0.1296ms | 7.7170 KOps/s | 7.6171 KOps/s | $\color{#35bf28}+1.31\\%$ | | test_tdmodule | 28.0220μs | 15.7324μs | 63.5632 KOps/s | 58.0843 KOps/s | $\textbf{\color{#35bf28}+9.43\\%}$ | | test_tdmodule_dispatch | 54.0310μs | 32.9476μs | 30.3512 KOps/s | 28.4751 KOps/s | $\textbf{\color{#35bf28}+6.59\\%}$ | | test_tdseq | 31.2590μs | 17.6600μs | 56.6250 KOps/s | 50.8431 KOps/s | $\textbf{\color{#35bf28}+11.37\\%}$ | | test_tdseq_dispatch | 64.3600μs | 37.6192μs | 26.5821 KOps/s | 25.6210 KOps/s | $\color{#35bf28}+3.75\\%$ | | test_instantiation_functorch | 2.0790ms | 1.3494ms | 741.0887 Ops/s | 745.8275 Ops/s | $\color{#d91a1a}-0.64\\%$ | | test_instantiation_td | 1.5099ms | 1.0199ms | 980.5261 Ops/s | 951.0144 Ops/s | $\color{#35bf28}+3.10\\%$ | | test_exec_functorch | 0.2633ms | 0.1592ms | 6.2829 KOps/s | 6.1555 KOps/s | $\color{#35bf28}+2.07\\%$ | | test_exec_functional_call | 0.3752ms | 0.1469ms | 6.8065 KOps/s | 6.5741 KOps/s | $\color{#35bf28}+3.53\\%$ | | test_exec_td | 0.2852ms | 0.1455ms | 6.8751 KOps/s | 6.5419 KOps/s | $\textbf{\color{#35bf28}+5.09\\%}$ | | test_exec_td_decorator | 0.2961ms | 0.2294ms | 4.3587 KOps/s | 4.4099 KOps/s | $\color{#d91a1a}-1.16\\%$ | | test_vmap_mlp_speed[True-True] | 0.6505ms | 0.4794ms | 2.0860 KOps/s | 2.0469 KOps/s | $\color{#35bf28}+1.91\\%$ | | test_vmap_mlp_speed[True-False] | 0.7866ms | 0.4771ms | 2.0959 KOps/s | 2.0644 KOps/s | $\color{#35bf28}+1.53\\%$ | | test_vmap_mlp_speed[False-True] | 0.5079ms | 0.3929ms | 2.5451 KOps/s | 2.4979 KOps/s | $\color{#35bf28}+1.89\\%$ | | test_vmap_mlp_speed[False-False] | 0.6842ms | 0.3942ms | 2.5369 KOps/s | 2.4760 KOps/s | $\color{#35bf28}+2.46\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.1630ms | 0.5828ms | 1.7158 KOps/s | 1.7422 KOps/s | $\color{#d91a1a}-1.52\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.8084ms | 0.5708ms | 1.7518 KOps/s | 1.7228 KOps/s | $\color{#35bf28}+1.68\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7722ms | 0.4729ms | 2.1145 KOps/s | 2.0846 KOps/s | $\color{#35bf28}+1.43\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7375ms | 0.4715ms | 2.1208 KOps/s | 2.0968 KOps/s | $\color{#35bf28}+1.14\\%$ | | test_to_module_speed[True] | 2.3177ms | 1.7796ms | 561.9163 Ops/s | 565.2773 Ops/s | $\color{#d91a1a}-0.59\\%$ | | test_to_module_speed[False] | 85.0951ms | 1.9254ms | 519.3605 Ops/s | 562.8469 Ops/s | $\textbf{\color{#d91a1a}-7.73\\%}$ | | test_tc_init | 0.1557ms | 53.9141μs | 18.5480 KOps/s | 20.3480 KOps/s | $\textbf{\color{#d91a1a}-8.85\\%}$ | | test_tc_init_nested | 0.3461ms | 0.1093ms | 9.1475 KOps/s | 10.0064 KOps/s | $\textbf{\color{#d91a1a}-8.58\\%}$ | | test_tc_first_layer_tensor | 31.0880μs | 8.3624μs | 119.5829 KOps/s | 117.6354 KOps/s | $\color{#35bf28}+1.66\\%$ | | test_tc_first_layer_nontensor | 55.9150μs | 8.2286μs | 121.5275 KOps/s | 117.6042 KOps/s | $\color{#35bf28}+3.34\\%$ | | test_tc_second_layer_tensor | 22.9740μs | 2.5078μs | 398.7492 KOps/s | 373.4646 KOps/s | $\textbf{\color{#35bf28}+6.77\\%}$ | | test_tc_second_layer_nontensor | 52.0870μs | 9.5345μs | 104.8822 KOps/s | 104.1380 KOps/s | $\color{#35bf28}+0.71\\%$ |
github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 141. Improved: $\large\color{#35bf28}13$. Worsened: $\large\color{#d91a1a}9$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 30.2810μs | 12.9955μs | 76.9497 KOps/s | 77.0928 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_plain_set_stack_nested | 0.1949ms | 13.1047μs | 76.3085 KOps/s | 76.4912 KOps/s | $\color{#d91a1a}-0.24\\%$ | | test_plain_set_nested_inplace | 39.2110μs | 14.1256μs | 70.7936 KOps/s | 71.6351 KOps/s | $\color{#d91a1a}-1.17\\%$ | | test_plain_set_stack_nested_inplace | 0.2069ms | 14.0333μs | 71.2593 KOps/s | 71.1847 KOps/s | $\color{#35bf28}+0.10\\%$ | | test_items | 0.1867ms | 4.7675μs | 209.7552 KOps/s | 209.8342 KOps/s | $\color{#d91a1a}-0.04\\%$ | | test_items_nested | 0.4441ms | 0.3906ms | 2.5603 KOps/s | 2.5437 KOps/s | $\color{#35bf28}+0.65\\%$ | | test_items_nested_locked | 0.5826ms | 0.3992ms | 2.5049 KOps/s | 2.5376 KOps/s | $\color{#d91a1a}-1.29\\%$ | | test_items_nested_leaf | 0.2665ms | 90.0719μs | 11.1022 KOps/s | 11.4730 KOps/s | $\color{#d91a1a}-3.23\\%$ | | test_items_stack_nested | 0.5805ms | 0.3944ms | 2.5355 KOps/s | 2.5012 KOps/s | $\color{#35bf28}+1.37\\%$ | | test_items_stack_nested_leaf | 0.2639ms | 84.7326μs | 11.8018 KOps/s | 11.4350 KOps/s | $\color{#35bf28}+3.21\\%$ | | test_items_stack_nested_locked | 0.5809ms | 0.3923ms | 2.5488 KOps/s | 2.5147 KOps/s | $\color{#35bf28}+1.36\\%$ | | test_keys | 0.1856ms | 4.3738μs | 228.6320 KOps/s | 229.0570 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_keys_nested | 96.4410μs | 68.0563μs | 14.6937 KOps/s | 14.6960 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_keys_nested_locked | 0.6820ms | 74.5692μs | 13.4104 KOps/s | 13.2059 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_keys_nested_leaf | 0.2390ms | 57.9546μs | 17.2549 KOps/s | 16.8269 KOps/s | $\color{#35bf28}+2.54\\%$ | | test_keys_stack_nested | 0.2491ms | 67.6459μs | 14.7829 KOps/s | 14.9517 KOps/s | $\color{#d91a1a}-1.13\\%$ | | test_keys_stack_nested_leaf | 0.2406ms | 59.2998μs | 16.8635 KOps/s | 17.1533 KOps/s | $\color{#d91a1a}-1.69\\%$ | | test_keys_stack_nested_locked | 0.2544ms | 73.5154μs | 13.6026 KOps/s | 13.4569 KOps/s | $\color{#35bf28}+1.08\\%$ | | test_values | 61.6510μs | 1.7648μs | 566.6367 KOps/s | 497.3293 KOps/s | $\textbf{\color{#35bf28}+13.94\\%}$ | | test_values_nested | 0.2154ms | 34.6594μs | 28.8522 KOps/s | 29.0397 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_values_nested_locked | 0.2155ms | 36.7248μs | 27.2296 KOps/s | 27.3572 KOps/s | $\color{#d91a1a}-0.47\\%$ | | test_values_nested_leaf | 53.3910μs | 30.6999μs | 32.5734 KOps/s | 32.4935 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_values_stack_nested | 0.2199ms | 35.3505μs | 28.2881 KOps/s | 28.2801 KOps/s | $\color{#35bf28}+0.03\\%$ | | test_values_stack_nested_leaf | 0.2109ms | 31.5341μs | 31.7117 KOps/s | 31.8114 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_values_stack_nested_locked | 0.2254ms | 37.3723μs | 26.7578 KOps/s | 26.6371 KOps/s | $\color{#35bf28}+0.45\\%$ | | test_membership | 9.5047μs | 0.5420μs | 1.8449 MOps/s | 1.8585 MOps/s | $\color{#d91a1a}-0.73\\%$ | | test_membership_nested | 22.9800μs | 2.0610μs | 485.2097 KOps/s | 480.8359 KOps/s | $\color{#35bf28}+0.91\\%$ | | test_membership_nested_leaf | 95.6765μs | 2.0338μs | 491.6931 KOps/s | 488.6423 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_membership_stacked_nested | 0.1901ms | 2.0947μs | 477.4066 KOps/s | 474.9091 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_membership_stacked_nested_leaf | 17.0290μs | 2.0927μs | 477.8410 KOps/s | 486.3279 KOps/s | $\color{#d91a1a}-1.75\\%$ | | test_membership_nested_last | 18.4700μs | 2.9777μs | 335.8281 KOps/s | 330.0847 KOps/s | $\color{#35bf28}+1.74\\%$ | | test_membership_nested_leaf_last | 0.1906ms | 2.9850μs | 335.0115 KOps/s | 330.4788 KOps/s | $\color{#35bf28}+1.37\\%$ | | test_membership_stacked_nested_last | 24.1300μs | 9.1703μs | 109.0480 KOps/s | 160.6687 KOps/s | $\textbf{\color{#d91a1a}-32.13\\%}$ | | test_membership_stacked_nested_leaf_last | 0.1981ms | 9.1624μs | 109.1423 KOps/s | 160.9778 KOps/s | $\textbf{\color{#d91a1a}-32.20\\%}$ | | test_nested_getleaf | 0.1888ms | 7.9489μs | 125.8029 KOps/s | 124.6604 KOps/s | $\color{#35bf28}+0.92\\%$ | | test_nested_get | 0.1879ms | 7.5325μs | 132.7589 KOps/s | 132.1862 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_stacked_getleaf | 23.9010μs | 7.9854μs | 125.2280 KOps/s | 123.7568 KOps/s | $\color{#35bf28}+1.19\\%$ | | test_stacked_get | 0.1923ms | 7.5052μs | 133.2410 KOps/s | 132.2353 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_nested_getitemleaf | 50.4910μs | 8.1602μs | 122.5455 KOps/s | 122.4741 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_nested_getitem | 0.1912ms | 7.6724μs | 130.3381 KOps/s | 129.5516 KOps/s | $\color{#35bf28}+0.61\\%$ | | test_stacked_getitemleaf | 0.1939ms | 8.1582μs | 122.5763 KOps/s | 122.0586 KOps/s | $\color{#35bf28}+0.42\\%$ | | test_stacked_getitem | 24.0510μs | 7.6998μs | 129.8731 KOps/s | 129.5338 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_lock_nested | 9.3627ms | 0.4205ms | 2.3783 KOps/s | 2.4361 KOps/s | $\color{#d91a1a}-2.37\\%$ | | test_lock_stack_nested | 0.4089ms | 0.3721ms | 2.6872 KOps/s | 2.6522 KOps/s | $\color{#35bf28}+1.32\\%$ | | test_unlock_nested | 0.7355ms | 0.3289ms | 3.0408 KOps/s | 3.0365 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_unlock_stack_nested | 0.3271ms | 0.2913ms | 3.4332 KOps/s | 3.4044 KOps/s | $\color{#35bf28}+0.85\\%$ | | test_flatten_speed | 0.4028ms | 0.1068ms | 9.3623 KOps/s | 9.2561 KOps/s | $\color{#35bf28}+1.15\\%$ | | test_unflatten_speed | 0.4797ms | 0.2966ms | 3.3713 KOps/s | 3.3839 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_common_ops | 1.0028ms | 0.5931ms | 1.6862 KOps/s | 1.5145 KOps/s | $\textbf{\color{#35bf28}+11.34\\%}$ | | test_creation | 38.4100μs | 1.8813μs | 531.5476 KOps/s | 539.2590 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_creation_empty | 25.2300μs | 9.6727μs | 103.3841 KOps/s | 105.2331 KOps/s | $\color{#d91a1a}-1.76\\%$ | | test_creation_nested_1 | 0.1998ms | 11.5864μs | 86.3077 KOps/s | 88.4715 KOps/s | $\color{#d91a1a}-2.45\\%$ | | test_creation_nested_2 | 30.1900μs | 14.0464μs | 71.1924 KOps/s | 72.2935 KOps/s | $\color{#d91a1a}-1.52\\%$ | | test_clone | 0.1966ms | 11.0120μs | 90.8097 KOps/s | 92.1964 KOps/s | $\color{#d91a1a}-1.50\\%$ | | test_getitem[int] | 24.1700μs | 10.0299μs | 99.7015 KOps/s | 100.3568 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_getitem[slice_int] | 1.7600ms | 19.7815μs | 50.5522 KOps/s | 52.0367 KOps/s | $\color{#d91a1a}-2.85\\%$ | | test_getitem[range] | 0.1563ms | 36.3065μs | 27.5433 KOps/s | 28.0905 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_getitem[tuple] | 0.2082ms | 17.2946μs | 57.8215 KOps/s | 57.6930 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_getitem[list] | 0.1658ms | 31.8808μs | 31.3669 KOps/s | 32.3588 KOps/s | $\color{#d91a1a}-3.07\\%$ | | test_setitem_dim[int] | 45.2310μs | 26.1867μs | 38.1873 KOps/s | 40.3954 KOps/s | $\textbf{\color{#d91a1a}-5.47\\%}$ | | test_setitem_dim[slice_int] | 63.4010μs | 46.1712μs | 21.6585 KOps/s | 21.5769 KOps/s | $\color{#35bf28}+0.38\\%$ | | test_setitem_dim[range] | 81.9910μs | 62.8779μs | 15.9038 KOps/s | 16.0628 KOps/s | $\color{#d91a1a}-0.99\\%$ | | test_setitem_dim[tuple] | 57.4810μs | 40.5692μs | 24.6493 KOps/s | 24.9634 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_setitem | 70.9710μs | 16.2490μs | 61.5421 KOps/s | 59.9863 KOps/s | $\color{#35bf28}+2.59\\%$ | | test_set | 63.2520μs | 15.5648μs | 64.2474 KOps/s | 62.3666 KOps/s | $\color{#35bf28}+3.02\\%$ | | test_set_shared | 2.7623ms | 95.5984μs | 10.4604 KOps/s | 10.4026 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_update | 0.2071ms | 19.1226μs | 52.2941 KOps/s | 50.6924 KOps/s | $\color{#35bf28}+3.16\\%$ | | test_update_nested | 62.8010μs | 24.5426μs | 40.7455 KOps/s | 38.6573 KOps/s | $\textbf{\color{#35bf28}+5.40\\%}$ | | test_update__nested | 0.2070ms | 21.3920μs | 46.7464 KOps/s | 44.7752 KOps/s | $\color{#35bf28}+4.40\\%$ | | test_set_nested | 63.0910μs | 16.8324μs | 59.4093 KOps/s | 56.2662 KOps/s | $\textbf{\color{#35bf28}+5.59\\%}$ | | test_set_nested_new | 85.2400μs | 19.7182μs | 50.7146 KOps/s | 48.8617 KOps/s | $\color{#35bf28}+3.79\\%$ | | test_select | 1.0005ms | 33.4006μs | 29.9396 KOps/s | 29.9149 KOps/s | $\color{#35bf28}+0.08\\%$ | | test_select_nested | 0.2337ms | 52.7633μs | 18.9526 KOps/s | 18.7007 KOps/s | $\color{#35bf28}+1.35\\%$ | | test_exclude_nested | 90.1120μs | 72.4295μs | 13.8065 KOps/s | 13.9001 KOps/s | $\color{#d91a1a}-0.67\\%$ | | test_empty[True] | 0.4842ms | 0.3019ms | 3.3121 KOps/s | 3.3600 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_empty[False] | 18.8634μs | 0.9149μs | 1.0930 MOps/s | 1.0898 MOps/s | $\color{#35bf28}+0.30\\%$ | | test_to | 88.5420μs | 58.0461μs | 17.2277 KOps/s | 17.3089 KOps/s | $\color{#d91a1a}-0.47\\%$ | | test_to_nonblocking | 0.2356ms | 35.0986μs | 28.4912 KOps/s | 26.9676 KOps/s | $\textbf{\color{#35bf28}+5.65\\%}$ | | test_unbind_speed | 0.3732ms | 0.2492ms | 4.0128 KOps/s | 4.0913 KOps/s | $\color{#d91a1a}-1.92\\%$ | | test_unbind_speed_stack0 | 0.4355ms | 0.2455ms | 4.0738 KOps/s | 4.0128 KOps/s | $\color{#35bf28}+1.52\\%$ | | test_unbind_speed_stack1 | 92.0416ms | 0.7715ms | 1.2962 KOps/s | 1.4169 KOps/s | $\textbf{\color{#d91a1a}-8.52\\%}$ | | test_split | 90.2440ms | 1.5583ms | 641.7168 Ops/s | 646.0443 Ops/s | $\color{#d91a1a}-0.67\\%$ | | test_chunk | 1.6230ms | 1.4324ms | 698.1349 Ops/s | 702.4465 Ops/s | $\color{#d91a1a}-0.61\\%$ | | test_creation[device0] | 0.1275ms | 54.4476μs | 18.3663 KOps/s | 18.6270 KOps/s | $\color{#d91a1a}-1.40\\%$ | | test_creation_from_tensor | 0.2695ms | 51.7818μs | 19.3118 KOps/s | 19.5748 KOps/s | $\color{#d91a1a}-1.34\\%$ | | test_add_one[memmap_tensor0] | 0.1396ms | 6.9901μs | 143.0586 KOps/s | 152.8512 KOps/s | $\textbf{\color{#d91a1a}-6.41\\%}$ | | test_contiguous[memmap_tensor0] | 27.1010μs | 0.5672μs | 1.7629 MOps/s | 1.7385 MOps/s | $\color{#35bf28}+1.40\\%$ | | test_stack[memmap_tensor0] | 18.9990μs | 4.4877μs | 222.8335 KOps/s | 222.8703 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_memmaptd_index | 1.0852ms | 0.2542ms | 3.9337 KOps/s | 3.4016 KOps/s | $\textbf{\color{#35bf28}+15.64\\%}$ | | test_memmaptd_index_astensor | 0.6178ms | 0.3155ms | 3.1700 KOps/s | 3.1907 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_memmaptd_index_op | 0.8957ms | 0.6279ms | 1.5925 KOps/s | 1.6622 KOps/s | $\color{#d91a1a}-4.19\\%$ | | test_serialize_model | 93.4288ms | 90.4592ms | 11.0547 Ops/s | 10.4769 Ops/s | $\textbf{\color{#35bf28}+5.51\\%}$ | | test_serialize_model_pickle | 1.3472s | 1.2348s | 0.8098 Ops/s | 0.8063 Ops/s | $\color{#35bf28}+0.43\\%$ | | test_serialize_weights | 91.7796ms | 87.8048ms | 11.3889 Ops/s | 9.5971 Ops/s | $\textbf{\color{#35bf28}+18.67\\%}$ | | test_serialize_weights_returnearly | 0.1952s | 71.5253ms | 13.9811 Ops/s | 13.1204 Ops/s | $\textbf{\color{#35bf28}+6.56\\%}$ | | test_serialize_weights_pickle | 1.4165s | 1.2562s | 0.7961 Ops/s | 0.8007 Ops/s | $\color{#d91a1a}-0.58\\%$ | | test_reshape_pytree | 82.9420μs | 24.7197μs | 40.4536 KOps/s | 39.1549 KOps/s | $\color{#35bf28}+3.32\\%$ | | test_reshape_td | 57.3510μs | 29.6617μs | 33.7135 KOps/s | 34.2681 KOps/s | $\color{#d91a1a}-1.62\\%$ | | test_view_pytree | 0.1468ms | 24.8171μs | 40.2948 KOps/s | 40.8847 KOps/s | $\color{#d91a1a}-1.44\\%$ | | test_view_td | 0.1793ms | 37.5534μs | 26.6287 KOps/s | 27.6693 KOps/s | $\color{#d91a1a}-3.76\\%$ | | test_unbind_pytree | 0.1104ms | 30.3802μs | 32.9162 KOps/s | 33.0676 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_unbind_td | 0.4587ms | 38.0688μs | 26.2682 KOps/s | 26.9021 KOps/s | $\color{#d91a1a}-2.36\\%$ | | test_split_pytree | 59.0110μs | 33.4222μs | 29.9202 KOps/s | 30.3958 KOps/s | $\color{#d91a1a}-1.56\\%$ | | test_split_td | 0.1016ms | 36.4783μs | 27.4136 KOps/s | 27.7165 KOps/s | $\color{#d91a1a}-1.09\\%$ | | test_add_pytree | 72.1010μs | 37.5560μs | 26.6269 KOps/s | 27.0692 KOps/s | $\color{#d91a1a}-1.63\\%$ | | test_add_td | 84.1720μs | 51.6076μs | 19.3770 KOps/s | 20.5606 KOps/s | $\textbf{\color{#d91a1a}-5.76\\%}$ | | test_distributed | 0.1831ms | 67.5376μs | 14.8066 KOps/s | 14.0259 KOps/s | $\textbf{\color{#35bf28}+5.57\\%}$ | | test_tdmodule | 29.8910μs | 14.2207μs | 70.3198 KOps/s | 64.0265 KOps/s | $\textbf{\color{#35bf28}+9.83\\%}$ | | test_tdmodule_dispatch | 44.5510μs | 29.2572μs | 34.1796 KOps/s | 32.3532 KOps/s | $\textbf{\color{#35bf28}+5.65\\%}$ | | test_tdseq | 43.8910μs | 15.3442μs | 65.1712 KOps/s | 57.8969 KOps/s | $\textbf{\color{#35bf28}+12.56\\%}$ | | test_tdseq_dispatch | 60.4610μs | 31.6409μs | 31.6047 KOps/s | 30.1952 KOps/s | $\color{#35bf28}+4.67\\%$ | | test_instantiation_functorch | 1.4223ms | 1.3730ms | 728.3323 Ops/s | 727.9760 Ops/s | $\color{#35bf28}+0.05\\%$ | | test_instantiation_td | 92.2051ms | 1.0859ms | 920.8749 Ops/s | 1.0257 KOps/s | $\textbf{\color{#d91a1a}-10.22\\%}$ | | test_exec_functorch | 0.1767ms | 0.1477ms | 6.7711 KOps/s | 7.0172 KOps/s | $\color{#d91a1a}-3.51\\%$ | | test_exec_functional_call | 0.1780ms | 0.1342ms | 7.4533 KOps/s | 7.6033 KOps/s | $\color{#d91a1a}-1.97\\%$ | | test_exec_td | 0.1745ms | 0.1336ms | 7.4825 KOps/s | 7.6604 KOps/s | $\color{#d91a1a}-2.32\\%$ | | test_exec_td_decorator | 0.6946ms | 0.2084ms | 4.7980 KOps/s | 5.0159 KOps/s | $\color{#d91a1a}-4.34\\%$ | | test_vmap_mlp_speed[True-True] | 0.7521ms | 0.5844ms | 1.7112 KOps/s | 1.6689 KOps/s | $\color{#35bf28}+2.53\\%$ | | test_vmap_mlp_speed[True-False] | 0.6240ms | 0.5820ms | 1.7181 KOps/s | 1.7064 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_vmap_mlp_speed[False-True] | 0.5966ms | 0.5156ms | 1.9396 KOps/s | 1.8812 KOps/s | $\color{#35bf28}+3.10\\%$ | | test_vmap_mlp_speed[False-False] | 0.5726ms | 0.5169ms | 1.9348 KOps/s | 1.8822 KOps/s | $\color{#35bf28}+2.79\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.0078ms | 0.6609ms | 1.5131 KOps/s | 1.5308 KOps/s | $\color{#d91a1a}-1.16\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.7890ms | 0.6585ms | 1.5187 KOps/s | 1.5207 KOps/s | $\color{#d91a1a}-0.13\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.6678ms | 0.5795ms | 1.7255 KOps/s | 1.7422 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7278ms | 0.5792ms | 1.7264 KOps/s | 1.6452 KOps/s | $\color{#35bf28}+4.94\\%$ | | test_vmap_transformer_speed[True-True] | 8.4463ms | 7.9718ms | 125.4424 Ops/s | 130.3081 Ops/s | $\color{#d91a1a}-3.73\\%$ | | test_vmap_transformer_speed[True-False] | 9.0218ms | 7.8267ms | 127.7673 Ops/s | 130.8116 Ops/s | $\color{#d91a1a}-2.33\\%$ | | test_vmap_transformer_speed[False-True] | 7.6654ms | 7.6175ms | 131.2767 Ops/s | 131.7592 Ops/s | $\color{#d91a1a}-0.37\\%$ | | test_vmap_transformer_speed[False-False] | 7.6772ms | 7.6149ms | 131.3220 Ops/s | 132.3523 Ops/s | $\color{#d91a1a}-0.78\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 19.0743ms | 19.0137ms | 52.5936 Ops/s | 53.0149 Ops/s | $\color{#d91a1a}-0.79\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 19.0437ms | 18.9819ms | 52.6819 Ops/s | 52.9195 Ops/s | $\color{#d91a1a}-0.45\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 18.9118ms | 18.8577ms | 53.0288 Ops/s | 53.4867 Ops/s | $\color{#d91a1a}-0.86\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 18.9242ms | 18.8354ms | 53.0914 Ops/s | 53.4910 Ops/s | $\color{#d91a1a}-0.75\\%$ | | test_to_module_speed[True] | 2.8696ms | 1.5682ms | 637.6643 Ops/s | 672.0408 Ops/s | $\textbf{\color{#d91a1a}-5.12\\%}$ | | test_to_module_speed[False] | 2.0094ms | 1.5477ms | 646.1331 Ops/s | 679.6448 Ops/s | $\color{#d91a1a}-4.93\\%$ | | test_tc_init | 75.4920μs | 53.5653μs | 18.6688 KOps/s | 18.3770 KOps/s | $\color{#35bf28}+1.59\\%$ | | test_tc_init_nested | 0.1389ms | 0.1052ms | 9.5069 KOps/s | 9.1469 KOps/s | $\color{#35bf28}+3.93\\%$ | | test_tc_first_layer_tensor | 19.4800μs | 3.5279μs | 283.4542 KOps/s | 284.5060 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_tc_first_layer_nontensor | 17.0900μs | 3.5719μs | 279.9642 KOps/s | 283.0654 KOps/s | $\color{#d91a1a}-1.10\\%$ | | test_tc_second_layer_tensor | 16.9200μs | 1.2123μs | 824.8468 KOps/s | 905.6879 KOps/s | $\textbf{\color{#d91a1a}-8.93\\%}$ | | test_tc_second_layer_nontensor | 19.8900μs | 4.2118μs | 237.4301 KOps/s | 248.0658 KOps/s | $\color{#d91a1a}-4.29\\%$ |