pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
803 stars 65 forks source link

[Feature] Compile integration - nn #866

Closed vmoens closed 1 month ago

vmoens commented 1 month ago

Stack from ghstack (oldest at bottom):

github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 133. Improved: $\large\color{#35bf28}8$. Worsened: $\large\color{#d91a1a}13$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 48.5300μs | 18.6161μs | 53.7170 KOps/s | 53.5989 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_plain_set_stack_nested | 48.7500μs | 19.1472μs | 52.2270 KOps/s | 53.3060 KOps/s | $\color{#d91a1a}-2.02\\%$ | | test_plain_set_nested_inplace | 48.8710μs | 21.1220μs | 47.3440 KOps/s | 48.6338 KOps/s | $\color{#d91a1a}-2.65\\%$ | | test_plain_set_stack_nested_inplace | 52.2880μs | 20.9895μs | 47.6430 KOps/s | 48.9430 KOps/s | $\color{#d91a1a}-2.66\\%$ | | test_items | 45.2350μs | 2.6182μs | 381.9438 KOps/s | 360.9082 KOps/s | $\textbf{\color{#35bf28}+5.83\\%}$ | | test_items_nested | 0.4964ms | 0.3767ms | 2.6548 KOps/s | 2.7496 KOps/s | $\color{#d91a1a}-3.45\\%$ | | test_items_nested_locked | 0.9860ms | 0.3766ms | 2.6553 KOps/s | 2.7504 KOps/s | $\color{#d91a1a}-3.46\\%$ | | test_items_nested_leaf | 0.1527ms | 86.6052μs | 11.5466 KOps/s | 11.7409 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_items_stack_nested | 0.5920ms | 0.3775ms | 2.6492 KOps/s | 2.7428 KOps/s | $\color{#d91a1a}-3.41\\%$ | | test_items_stack_nested_leaf | 0.1648ms | 87.5248μs | 11.4253 KOps/s | 11.9174 KOps/s | $\color{#d91a1a}-4.13\\%$ | | test_items_stack_nested_locked | 1.2936ms | 0.3771ms | 2.6521 KOps/s | 2.7096 KOps/s | $\color{#d91a1a}-2.12\\%$ | | test_keys | 52.2670μs | 3.9957μs | 250.2666 KOps/s | 262.3158 KOps/s | $\color{#d91a1a}-4.59\\%$ | | test_keys_nested | 0.2478ms | 0.1464ms | 6.8285 KOps/s | 6.9095 KOps/s | $\color{#d91a1a}-1.17\\%$ | | test_keys_nested_locked | 2.8386ms | 0.1534ms | 6.5180 KOps/s | 6.6507 KOps/s | $\color{#d91a1a}-1.99\\%$ | | test_keys_nested_leaf | 0.2071ms | 0.1265ms | 7.9044 KOps/s | 8.0144 KOps/s | $\color{#d91a1a}-1.37\\%$ | | test_keys_stack_nested | 0.2439ms | 0.1467ms | 6.8151 KOps/s | 6.9687 KOps/s | $\color{#d91a1a}-2.20\\%$ | | test_keys_stack_nested_leaf | 0.2671ms | 0.1254ms | 7.9745 KOps/s | 8.1595 KOps/s | $\color{#d91a1a}-2.27\\%$ | | test_keys_stack_nested_locked | 0.2700ms | 0.1497ms | 6.6790 KOps/s | 6.7508 KOps/s | $\color{#d91a1a}-1.06\\%$ | | test_values | 11.4663μs | 1.1730μs | 852.5450 KOps/s | 864.8108 KOps/s | $\color{#d91a1a}-1.42\\%$ | | test_values_nested | 0.1410ms | 49.5888μs | 20.1658 KOps/s | 20.1307 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_values_nested_locked | 0.1062ms | 50.7101μs | 19.7200 KOps/s | 20.2595 KOps/s | $\color{#d91a1a}-2.66\\%$ | | test_values_nested_leaf | 87.6430μs | 44.4448μs | 22.4998 KOps/s | 22.4074 KOps/s | $\color{#35bf28}+0.41\\%$ | | test_values_stack_nested | 82.3230μs | 51.2349μs | 19.5179 KOps/s | 19.5486 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_values_stack_nested_leaf | 0.1280ms | 44.6918μs | 22.3755 KOps/s | 22.8264 KOps/s | $\color{#d91a1a}-1.98\\%$ | | test_values_stack_nested_locked | 0.1060ms | 50.4041μs | 19.8396 KOps/s | 19.3212 KOps/s | $\color{#35bf28}+2.68\\%$ | | test_membership | 15.4390μs | 0.9027μs | 1.1078 MOps/s | 1.3854 MOps/s | $\textbf{\color{#d91a1a}-20.04\\%}$ | | test_membership_nested | 20.7590μs | 2.7976μs | 357.4548 KOps/s | 362.9795 KOps/s | $\color{#d91a1a}-1.52\\%$ | | test_membership_nested_leaf | 32.1090μs | 2.8270μs | 353.7348 KOps/s | 362.1377 KOps/s | $\color{#d91a1a}-2.32\\%$ | | test_membership_stacked_nested | 23.6940μs | 2.7829μs | 359.3423 KOps/s | 365.5504 KOps/s | $\color{#d91a1a}-1.70\\%$ | | test_membership_stacked_nested_leaf | 20.7980μs | 2.8025μs | 356.8303 KOps/s | 362.3054 KOps/s | $\color{#d91a1a}-1.51\\%$ | | test_membership_nested_last | 37.3090μs | 4.1327μs | 241.9753 KOps/s | 247.4583 KOps/s | $\color{#d91a1a}-2.22\\%$ | | test_membership_nested_leaf_last | 38.3010μs | 4.1594μs | 240.4200 KOps/s | 244.4472 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_membership_stacked_nested_last | 44.3030μs | 4.1571μs | 240.5499 KOps/s | 213.2636 KOps/s | $\textbf{\color{#35bf28}+12.79\\%}$ | | test_membership_stacked_nested_leaf_last | 30.0560μs | 4.1847μs | 238.9686 KOps/s | 212.2009 KOps/s | $\textbf{\color{#35bf28}+12.61\\%}$ | | test_nested_getleaf | 39.1330μs | 10.9365μs | 91.4370 KOps/s | 93.3042 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_nested_get | 37.5300μs | 10.3404μs | 96.7085 KOps/s | 97.2120 KOps/s | $\color{#d91a1a}-0.52\\%$ | | test_stacked_getleaf | 58.0890μs | 10.9379μs | 91.4250 KOps/s | 93.4791 KOps/s | $\color{#d91a1a}-2.20\\%$ | | test_stacked_get | 36.4480μs | 10.3827μs | 96.3141 KOps/s | 98.5513 KOps/s | $\color{#d91a1a}-2.27\\%$ | | test_nested_getitemleaf | 46.5260μs | 11.6201μs | 86.0578 KOps/s | 89.4480 KOps/s | $\color{#d91a1a}-3.79\\%$ | | test_nested_getitem | 47.4180μs | 10.6121μs | 94.2324 KOps/s | 96.1082 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_stacked_getitemleaf | 35.9070μs | 11.4975μs | 86.9751 KOps/s | 89.6408 KOps/s | $\color{#d91a1a}-2.97\\%$ | | test_stacked_getitem | 47.6080μs | 10.7198μs | 93.2856 KOps/s | 96.2773 KOps/s | $\color{#d91a1a}-3.11\\%$ | | test_lock_nested | 6.3031ms | 0.3519ms | 2.8420 KOps/s | 2.3329 KOps/s | $\textbf{\color{#35bf28}+21.82\\%}$ | | test_lock_stack_nested | 0.5650ms | 0.3108ms | 3.2177 KOps/s | 3.1547 KOps/s | $\color{#35bf28}+2.00\\%$ | | test_unlock_nested | 0.9184ms | 0.3583ms | 2.7911 KOps/s | 2.7773 KOps/s | $\color{#35bf28}+0.50\\%$ | | test_unlock_stack_nested | 0.5370ms | 0.3183ms | 3.1415 KOps/s | 3.1030 KOps/s | $\color{#35bf28}+1.24\\%$ | | test_flatten_speed | 0.2199ms | 0.1072ms | 9.3282 KOps/s | 9.4353 KOps/s | $\color{#d91a1a}-1.14\\%$ | | test_unflatten_speed | 0.6408ms | 0.4443ms | 2.2507 KOps/s | 2.2667 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_common_ops | 4.8330ms | 0.7769ms | 1.2872 KOps/s | 1.2747 KOps/s | $\color{#35bf28}+0.97\\%$ | | test_creation | 20.4280μs | 1.8730μs | 533.8890 KOps/s | 539.1658 KOps/s | $\color{#d91a1a}-0.98\\%$ | | test_creation_empty | 36.9990μs | 13.2062μs | 75.7220 KOps/s | 78.6381 KOps/s | $\color{#d91a1a}-3.71\\%$ | | test_creation_nested_1 | 42.5890μs | 16.0237μs | 62.4075 KOps/s | 64.0298 KOps/s | $\color{#d91a1a}-2.53\\%$ | | test_creation_nested_2 | 55.2830μs | 19.4969μs | 51.2901 KOps/s | 52.9841 KOps/s | $\color{#d91a1a}-3.20\\%$ | | test_clone | 0.1077ms | 13.1012μs | 76.3291 KOps/s | 76.5140 KOps/s | $\color{#d91a1a}-0.24\\%$ | | test_getitem[int] | 66.8840μs | 11.9311μs | 83.8149 KOps/s | 86.6933 KOps/s | $\color{#d91a1a}-3.32\\%$ | | test_getitem[slice_int] | 61.1340μs | 25.0762μs | 39.8785 KOps/s | 41.1646 KOps/s | $\color{#d91a1a}-3.12\\%$ | | test_getitem[range] | 0.3231ms | 47.5110μs | 21.0477 KOps/s | 21.3739 KOps/s | $\color{#d91a1a}-1.53\\%$ | | test_getitem[tuple] | 61.1140μs | 19.5890μs | 51.0490 KOps/s | 50.3055 KOps/s | $\color{#35bf28}+1.48\\%$ | | test_getitem[list] | 0.4719ms | 42.4876μs | 23.5363 KOps/s | 24.1313 KOps/s | $\color{#d91a1a}-2.47\\%$ | | test_setitem_dim[int] | 86.7620μs | 36.5773μs | 27.3393 KOps/s | 29.1990 KOps/s | $\textbf{\color{#d91a1a}-6.37\\%}$ | | test_setitem_dim[slice_int] | 0.1187ms | 66.4166μs | 15.0565 KOps/s | 15.2240 KOps/s | $\color{#d91a1a}-1.10\\%$ | | test_setitem_dim[range] | 0.1383ms | 86.7303μs | 11.5300 KOps/s | 11.8593 KOps/s | $\color{#d91a1a}-2.78\\%$ | | test_setitem_dim[tuple] | 87.4530μs | 54.1537μs | 18.4660 KOps/s | 19.0542 KOps/s | $\color{#d91a1a}-3.09\\%$ | | test_setitem | 0.1968ms | 21.8182μs | 45.8333 KOps/s | 47.5669 KOps/s | $\color{#d91a1a}-3.64\\%$ | | test_set | 0.1542ms | 21.0713μs | 47.4579 KOps/s | 49.2993 KOps/s | $\color{#d91a1a}-3.74\\%$ | | test_set_shared | 7.0615ms | 0.1462ms | 6.8386 KOps/s | 6.8515 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_update | 0.1726ms | 25.6125μs | 39.0435 KOps/s | 41.3649 KOps/s | $\textbf{\color{#d91a1a}-5.61\\%}$ | | test_update_nested | 0.1521ms | 35.2281μs | 28.3864 KOps/s | 30.0172 KOps/s | $\textbf{\color{#d91a1a}-5.43\\%}$ | | test_update__nested | 0.1027ms | 25.3975μs | 39.3740 KOps/s | 40.0292 KOps/s | $\color{#d91a1a}-1.64\\%$ | | test_set_nested | 0.1611ms | 22.9357μs | 43.6001 KOps/s | 45.6768 KOps/s | $\color{#d91a1a}-4.55\\%$ | | test_set_nested_new | 0.1467ms | 27.4820μs | 36.3875 KOps/s | 37.2288 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_select | 0.3224ms | 43.5412μs | 22.9667 KOps/s | 23.3439 KOps/s | $\color{#d91a1a}-1.62\\%$ | | test_select_nested | 0.1436ms | 62.4884μs | 16.0030 KOps/s | 16.6949 KOps/s | $\color{#d91a1a}-4.14\\%$ | | test_exclude_nested | 0.1607ms | 81.8016μs | 12.2247 KOps/s | 12.6486 KOps/s | $\color{#d91a1a}-3.35\\%$ | | test_empty[True] | 0.6379ms | 0.3495ms | 2.8610 KOps/s | 2.9230 KOps/s | $\color{#d91a1a}-2.12\\%$ | | test_empty[False] | 7.5365μs | 1.1523μs | 867.8668 KOps/s | 876.9385 KOps/s | $\color{#d91a1a}-1.03\\%$ | | test_unbind_speed | 0.4071ms | 0.2511ms | 3.9819 KOps/s | 3.9047 KOps/s | $\color{#35bf28}+1.98\\%$ | | test_unbind_speed_stack0 | 0.4968ms | 0.2488ms | 4.0192 KOps/s | 4.0260 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_unbind_speed_stack1 | 0.1233s | 0.7673ms | 1.3032 KOps/s | 1.4659 KOps/s | $\textbf{\color{#d91a1a}-11.10\\%}$ | | test_split | 81.6523ms | 1.6747ms | 597.1062 Ops/s | 595.3492 Ops/s | $\color{#35bf28}+0.30\\%$ | | test_chunk | 0.1033s | 1.7126ms | 583.8985 Ops/s | 577.5397 Ops/s | $\color{#35bf28}+1.10\\%$ | | test_creation[device0] | 5.7013ms | 90.0450μs | 11.1056 KOps/s | 11.7002 KOps/s | $\textbf{\color{#d91a1a}-5.08\\%}$ | | test_creation_from_tensor | 0.3826ms | 87.3023μs | 11.4545 KOps/s | 11.7937 KOps/s | $\color{#d91a1a}-2.88\\%$ | | test_add_one[memmap_tensor0] | 0.1375ms | 5.2827μs | 189.2962 KOps/s | 183.2743 KOps/s | $\color{#35bf28}+3.29\\%$ | | test_contiguous[memmap_tensor0] | 13.7350μs | 0.6346μs | 1.5759 MOps/s | 1.5173 MOps/s | $\color{#35bf28}+3.86\\%$ | | test_stack[memmap_tensor0] | 30.6670μs | 3.5976μs | 277.9654 KOps/s | 268.6297 KOps/s | $\color{#35bf28}+3.48\\%$ | | test_memmaptd_index | 0.9656ms | 0.2618ms | 3.8194 KOps/s | 3.8585 KOps/s | $\color{#d91a1a}-1.01\\%$ | | test_memmaptd_index_astensor | 0.6155ms | 0.3377ms | 2.9610 KOps/s | 2.9603 KOps/s | $\color{#35bf28}+0.02\\%$ | | test_memmaptd_index_op | 1.2233ms | 0.6532ms | 1.5308 KOps/s | 1.5482 KOps/s | $\color{#d91a1a}-1.12\\%$ | | test_serialize_model | 0.1266s | 0.1065s | 9.3896 Ops/s | 8.7304 Ops/s | $\textbf{\color{#35bf28}+7.55\\%}$ | | test_serialize_model_pickle | 0.4537s | 0.3752s | 2.6649 Ops/s | 2.6024 Ops/s | $\color{#35bf28}+2.40\\%$ | | test_serialize_weights | 0.1157s | 0.1086s | 9.2087 Ops/s | 9.2200 Ops/s | $\color{#d91a1a}-0.12\\%$ | | test_serialize_weights_returnearly | 0.1381s | 0.1249s | 8.0050 Ops/s | 7.3334 Ops/s | $\textbf{\color{#35bf28}+9.16\\%}$ | | test_serialize_weights_pickle | 0.9600s | 0.6166s | 1.6218 Ops/s | 2.4669 Ops/s | $\textbf{\color{#d91a1a}-34.26\\%}$ | | test_serialize_weights_filesystem | 0.1938s | 0.1069s | 9.3503 Ops/s | 9.7997 Ops/s | $\color{#d91a1a}-4.59\\%$ | | test_serialize_model_filesystem | 0.1080s | 0.1010s | 9.8982 Ops/s | 9.0965 Ops/s | $\textbf{\color{#35bf28}+8.81\\%}$ | | test_reshape_pytree | 63.3580μs | 25.8044μs | 38.7531 KOps/s | 38.5503 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_reshape_td | 84.5980μs | 35.0576μs | 28.5245 KOps/s | 28.7586 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_view_pytree | 0.1450ms | 27.2300μs | 36.7242 KOps/s | 39.0679 KOps/s | $\textbf{\color{#d91a1a}-6.00\\%}$ | | test_view_td | 87.7230μs | 41.9576μs | 23.8336 KOps/s | 25.0030 KOps/s | $\color{#d91a1a}-4.68\\%$ | | test_unbind_pytree | 72.8560μs | 29.8168μs | 33.5382 KOps/s | 33.0023 KOps/s | $\color{#35bf28}+1.62\\%$ | | test_unbind_td | 0.3658ms | 37.6356μs | 26.5706 KOps/s | 26.2139 KOps/s | $\color{#35bf28}+1.36\\%$ | | test_split_pytree | 0.1386ms | 29.8521μs | 33.4985 KOps/s | 33.3224 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_split_td | 0.2628ms | 42.0422μs | 23.7856 KOps/s | 24.2933 KOps/s | $\color{#d91a1a}-2.09\\%$ | | test_add_pytree | 0.1127ms | 36.1203μs | 27.6853 KOps/s | 27.5867 KOps/s | $\color{#35bf28}+0.36\\%$ | | test_add_td | 0.1472ms | 62.1882μs | 16.0802 KOps/s | 17.0344 KOps/s | $\textbf{\color{#d91a1a}-5.60\\%}$ | | test_distributed | 0.2572ms | 0.1067ms | 9.3695 KOps/s | 9.7235 KOps/s | $\color{#d91a1a}-3.64\\%$ | | test_tdmodule | 0.1349ms | 18.4028μs | 54.3395 KOps/s | 51.8330 KOps/s | $\color{#35bf28}+4.84\\%$ | | test_tdmodule_dispatch | 68.3970μs | 37.9356μs | 26.3605 KOps/s | 25.7000 KOps/s | $\color{#35bf28}+2.57\\%$ | | test_tdseq | 49.6730μs | 20.8125μs | 48.0480 KOps/s | 43.6743 KOps/s | $\textbf{\color{#35bf28}+10.01\\%}$ | | test_tdseq_dispatch | 78.2860μs | 44.3164μs | 22.5650 KOps/s | 22.4556 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_instantiation_functorch | 2.3162ms | 1.3661ms | 731.9912 Ops/s | 750.2092 Ops/s | $\color{#d91a1a}-2.43\\%$ | | test_instantiation_td | 87.0941ms | 1.1765ms | 849.9445 Ops/s | 972.3136 Ops/s | $\textbf{\color{#d91a1a}-12.59\\%}$ | | test_exec_functorch | 0.2971ms | 0.1667ms | 6.0006 KOps/s | 6.0594 KOps/s | $\color{#d91a1a}-0.97\\%$ | | test_exec_functional_call | 0.2847ms | 0.1517ms | 6.5918 KOps/s | 6.5459 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_exec_td | 0.2811ms | 0.1536ms | 6.5114 KOps/s | 6.6048 KOps/s | $\color{#d91a1a}-1.41\\%$ | | test_exec_td_decorator | 0.3643ms | 0.2400ms | 4.1674 KOps/s | 4.3980 KOps/s | $\textbf{\color{#d91a1a}-5.24\\%}$ | | test_vmap_mlp_speed[True-True] | 0.9835ms | 0.5090ms | 1.9646 KOps/s | 1.9855 KOps/s | $\color{#d91a1a}-1.06\\%$ | | test_vmap_mlp_speed[True-False] | 0.6946ms | 0.5038ms | 1.9850 KOps/s | 1.9740 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_vmap_mlp_speed[False-True] | 0.6780ms | 0.4116ms | 2.4293 KOps/s | 2.4543 KOps/s | $\color{#d91a1a}-1.02\\%$ | | test_vmap_mlp_speed[False-False] | 0.5949ms | 0.4113ms | 2.4311 KOps/s | 2.4711 KOps/s | $\color{#d91a1a}-1.62\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.0821ms | 0.5846ms | 1.7105 KOps/s | 1.7250 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 1.0748ms | 0.5793ms | 1.7263 KOps/s | 1.7172 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7345ms | 0.4719ms | 2.1190 KOps/s | 2.0959 KOps/s | $\color{#35bf28}+1.10\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7650ms | 0.4730ms | 2.1142 KOps/s | 2.1030 KOps/s | $\color{#35bf28}+0.54\\%$ | | test_to_module_speed[True] | 2.8871ms | 1.8718ms | 534.2536 Ops/s | 575.0435 Ops/s | $\textbf{\color{#d91a1a}-7.09\\%}$ | | test_to_module_speed[False] | 2.8887ms | 1.8242ms | 548.1913 Ops/s | 592.9386 Ops/s | $\textbf{\color{#d91a1a}-7.55\\%}$ | | test_tc_init | 0.1615ms | 62.2010μs | 16.0769 KOps/s | 16.9044 KOps/s | $\color{#d91a1a}-4.89\\%$ | | test_tc_init_nested | 0.2674ms | 0.1219ms | 8.2068 KOps/s | 8.3707 KOps/s | $\color{#d91a1a}-1.96\\%$ | | test_tc_first_layer_tensor | 31.6290μs | 8.5449μs | 117.0288 KOps/s | 121.6987 KOps/s | $\color{#d91a1a}-3.84\\%$ | | test_tc_first_layer_nontensor | 31.9500μs | 8.4068μs | 118.9516 KOps/s | 123.8330 KOps/s | $\color{#d91a1a}-3.94\\%$ | | test_tc_second_layer_tensor | 23.1630μs | 2.5816μs | 387.3553 KOps/s | 396.6350 KOps/s | $\color{#d91a1a}-2.34\\%$ | | test_tc_second_layer_nontensor | 72.9740μs | 9.1843μs | 108.8820 KOps/s | 108.1579 KOps/s | $\color{#35bf28}+0.67\\%$ |
github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 141. Improved: $\large\color{#35bf28}10$. Worsened: $\large\color{#d91a1a}15$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 68.8920μs | 12.3239μs | 81.1432 KOps/s | 87.7845 KOps/s | $\textbf{\color{#d91a1a}-7.57\\%}$ | | test_plain_set_stack_nested | 35.0300μs | 12.2627μs | 81.5480 KOps/s | 88.0079 KOps/s | $\textbf{\color{#d91a1a}-7.34\\%}$ | | test_plain_set_nested_inplace | 31.6410μs | 13.2598μs | 75.4161 KOps/s | 80.7128 KOps/s | $\textbf{\color{#d91a1a}-6.56\\%}$ | | test_plain_set_stack_nested_inplace | 31.4110μs | 13.2100μs | 75.7002 KOps/s | 81.6307 KOps/s | $\textbf{\color{#d91a1a}-7.26\\%}$ | | test_items | 19.5800μs | 4.7001μs | 212.7597 KOps/s | 216.9318 KOps/s | $\color{#d91a1a}-1.92\\%$ | | test_items_nested | 0.4112ms | 0.3902ms | 2.5627 KOps/s | 2.5645 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_items_nested_locked | 0.5335ms | 0.3870ms | 2.5841 KOps/s | 2.5331 KOps/s | $\color{#35bf28}+2.01\\%$ | | test_items_nested_leaf | 0.1071ms | 86.7596μs | 11.5261 KOps/s | 11.5861 KOps/s | $\color{#d91a1a}-0.52\\%$ | | test_items_stack_nested | 0.4110ms | 0.3902ms | 2.5630 KOps/s | 2.5253 KOps/s | $\color{#35bf28}+1.49\\%$ | | test_items_stack_nested_leaf | 0.1079ms | 88.7737μs | 11.2646 KOps/s | 11.3447 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_items_stack_nested_locked | 0.5895ms | 0.3902ms | 2.5626 KOps/s | 2.5483 KOps/s | $\color{#35bf28}+0.56\\%$ | | test_keys | 0.1948ms | 4.3673μs | 228.9756 KOps/s | 228.2178 KOps/s | $\color{#35bf28}+0.33\\%$ | | test_keys_nested | 86.7530μs | 67.3231μs | 14.8537 KOps/s | 14.5204 KOps/s | $\color{#35bf28}+2.30\\%$ | | test_keys_nested_locked | 2.2382ms | 74.5767μs | 13.4090 KOps/s | 13.5802 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_keys_nested_leaf | 74.0720μs | 59.1930μs | 16.8939 KOps/s | 16.8998 KOps/s | $\color{#d91a1a}-0.04\\%$ | | test_keys_stack_nested | 91.7520μs | 69.1711μs | 14.4569 KOps/s | 14.7630 KOps/s | $\color{#d91a1a}-2.07\\%$ | | test_keys_stack_nested_leaf | 80.3120μs | 58.9791μs | 16.9551 KOps/s | 17.4749 KOps/s | $\color{#d91a1a}-2.97\\%$ | | test_keys_stack_nested_locked | 98.4320μs | 73.7336μs | 13.5623 KOps/s | 13.5434 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_values | 6.1833μs | 1.7737μs | 563.7779 KOps/s | 567.6275 KOps/s | $\color{#d91a1a}-0.68\\%$ | | test_values_nested | 48.6620μs | 34.2382μs | 29.2072 KOps/s | 29.0245 KOps/s | $\color{#35bf28}+0.63\\%$ | | test_values_nested_locked | 0.1573ms | 36.2408μs | 27.5932 KOps/s | 27.7214 KOps/s | $\color{#d91a1a}-0.46\\%$ | | test_values_nested_leaf | 94.2030μs | 30.3787μs | 32.9177 KOps/s | 32.8818 KOps/s | $\color{#35bf28}+0.11\\%$ | | test_values_stack_nested | 0.1872ms | 34.5004μs | 28.9852 KOps/s | 28.8997 KOps/s | $\color{#35bf28}+0.30\\%$ | | test_values_stack_nested_leaf | 55.1110μs | 30.6348μs | 32.6426 KOps/s | 32.2369 KOps/s | $\color{#35bf28}+1.26\\%$ | | test_values_stack_nested_locked | 0.1888ms | 36.2744μs | 27.5676 KOps/s | 27.3600 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_membership | 1.5880μs | 0.5356μs | 1.8670 MOps/s | 1.8541 MOps/s | $\color{#35bf28}+0.70\\%$ | | test_membership_nested | 18.3000μs | 2.0620μs | 484.9553 KOps/s | 483.4127 KOps/s | $\color{#35bf28}+0.32\\%$ | | test_membership_nested_leaf | 12.1105μs | 1.9881μs | 502.9950 KOps/s | 505.4331 KOps/s | $\color{#d91a1a}-0.48\\%$ | | test_membership_stacked_nested | 25.1010μs | 2.0358μs | 491.2032 KOps/s | 483.6654 KOps/s | $\color{#35bf28}+1.56\\%$ | | test_membership_stacked_nested_leaf | 22.4310μs | 2.0052μs | 498.7028 KOps/s | 482.7219 KOps/s | $\color{#35bf28}+3.31\\%$ | | test_membership_nested_last | 20.7300μs | 3.0111μs | 332.1092 KOps/s | 331.8743 KOps/s | $\color{#35bf28}+0.07\\%$ | | test_membership_nested_leaf_last | 25.6600μs | 2.9998μs | 333.3551 KOps/s | 333.3530 KOps/s | $+0.00\\%$ | | test_membership_stacked_nested_last | 0.1260ms | 2.9983μs | 333.5229 KOps/s | 290.5853 KOps/s | $\textbf{\color{#35bf28}+14.78\\%}$ | | test_membership_stacked_nested_leaf_last | 23.5810μs | 2.9964μs | 333.7335 KOps/s | 289.2693 KOps/s | $\textbf{\color{#35bf28}+15.37\\%}$ | | test_nested_getleaf | 71.8020μs | 8.0458μs | 124.2889 KOps/s | 124.0507 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_nested_get | 20.8310μs | 7.5917μs | 131.7220 KOps/s | 132.1216 KOps/s | $\color{#d91a1a}-0.30\\%$ | | test_stacked_getleaf | 25.5810μs | 8.0030μs | 124.9526 KOps/s | 124.1990 KOps/s | $\color{#35bf28}+0.61\\%$ | | test_stacked_get | 23.8600μs | 7.5743μs | 132.0259 KOps/s | 132.4895 KOps/s | $\color{#d91a1a}-0.35\\%$ | | test_nested_getitemleaf | 74.3220μs | 8.1724μs | 122.3631 KOps/s | 122.1775 KOps/s | $\color{#35bf28}+0.15\\%$ | | test_nested_getitem | 30.2510μs | 7.7648μs | 128.7859 KOps/s | 129.0057 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_stacked_getitemleaf | 68.5410μs | 8.2192μs | 121.6663 KOps/s | 121.9496 KOps/s | $\color{#d91a1a}-0.23\\%$ | | test_stacked_getitem | 22.3800μs | 7.7264μs | 129.4271 KOps/s | 128.8729 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_lock_nested | 4.2613ms | 0.3300ms | 3.0305 KOps/s | 3.0366 KOps/s | $\color{#d91a1a}-0.20\\%$ | | test_lock_stack_nested | 0.3755ms | 0.2900ms | 3.4480 KOps/s | 3.4539 KOps/s | $\color{#d91a1a}-0.17\\%$ | | test_unlock_nested | 0.7173ms | 0.3290ms | 3.0396 KOps/s | 3.0486 KOps/s | $\color{#d91a1a}-0.29\\%$ | | test_unlock_stack_nested | 0.4809ms | 0.3015ms | 3.3167 KOps/s | 3.3524 KOps/s | $\color{#d91a1a}-1.06\\%$ | | test_flatten_speed | 0.3522ms | 0.1078ms | 9.2792 KOps/s | 9.3662 KOps/s | $\color{#d91a1a}-0.93\\%$ | | test_unflatten_speed | 0.4795ms | 0.2867ms | 3.4882 KOps/s | 3.4817 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_common_ops | 1.0327ms | 0.5397ms | 1.8530 KOps/s | 1.9618 KOps/s | $\textbf{\color{#d91a1a}-5.54\\%}$ | | test_creation | 34.1910μs | 1.4802μs | 675.5674 KOps/s | 671.0261 KOps/s | $\color{#35bf28}+0.68\\%$ | | test_creation_empty | 0.1973ms | 7.6641μs | 130.4785 KOps/s | 167.1872 KOps/s | $\textbf{\color{#d91a1a}-21.96\\%}$ | | test_creation_nested_1 | 23.5910μs | 9.3874μs | 106.5259 KOps/s | 131.0809 KOps/s | $\textbf{\color{#d91a1a}-18.73\\%}$ | | test_creation_nested_2 | 32.5910μs | 11.7815μs | 84.8791 KOps/s | 102.8636 KOps/s | $\textbf{\color{#d91a1a}-17.48\\%}$ | | test_clone | 95.1230μs | 11.0292μs | 90.6687 KOps/s | 87.1130 KOps/s | $\color{#35bf28}+4.08\\%$ | | test_getitem[int] | 28.9310μs | 10.3758μs | 96.3783 KOps/s | 94.7032 KOps/s | $\color{#35bf28}+1.77\\%$ | | test_getitem[slice_int] | 36.7000μs | 19.5144μs | 51.2442 KOps/s | 51.0695 KOps/s | $\color{#35bf28}+0.34\\%$ | | test_getitem[range] | 0.2157ms | 34.2752μs | 29.1756 KOps/s | 27.2864 KOps/s | $\textbf{\color{#35bf28}+6.92\\%}$ | | test_getitem[tuple] | 33.7410μs | 17.1041μs | 58.4657 KOps/s | 56.1045 KOps/s | $\color{#35bf28}+4.21\\%$ | | test_getitem[list] | 0.1847ms | 29.5697μs | 33.8183 KOps/s | 32.4202 KOps/s | $\color{#35bf28}+4.31\\%$ | | test_setitem_dim[int] | 44.5610μs | 22.2953μs | 44.8525 KOps/s | 45.9976 KOps/s | $\color{#d91a1a}-2.49\\%$ | | test_setitem_dim[slice_int] | 0.1872ms | 44.0841μs | 22.6839 KOps/s | 23.7769 KOps/s | $\color{#d91a1a}-4.60\\%$ | | test_setitem_dim[range] | 77.5710μs | 56.9576μs | 17.5569 KOps/s | 17.6832 KOps/s | $\color{#d91a1a}-0.71\\%$ | | test_setitem_dim[tuple] | 54.7210μs | 37.1189μs | 26.9405 KOps/s | 27.6989 KOps/s | $\color{#d91a1a}-2.74\\%$ | | test_setitem | 0.1758ms | 14.9025μs | 67.1030 KOps/s | 69.0046 KOps/s | $\color{#d91a1a}-2.76\\%$ | | test_set | 0.1995ms | 14.3075μs | 69.8933 KOps/s | 72.4928 KOps/s | $\color{#d91a1a}-3.59\\%$ | | test_set_shared | 3.2519ms | 95.3954μs | 10.4827 KOps/s | 10.6060 KOps/s | $\color{#d91a1a}-1.16\\%$ | | test_update | 0.1615ms | 17.5713μs | 56.9110 KOps/s | 64.4128 KOps/s | $\textbf{\color{#d91a1a}-11.65\\%}$ | | test_update_nested | 74.6520μs | 21.9872μs | 45.4810 KOps/s | 47.7646 KOps/s | $\color{#d91a1a}-4.78\\%$ | | test_update__nested | 0.1156ms | 20.8639μs | 47.9298 KOps/s | 45.8646 KOps/s | $\color{#35bf28}+4.50\\%$ | | test_set_nested | 82.4320μs | 15.4482μs | 64.7324 KOps/s | 65.7351 KOps/s | $\color{#d91a1a}-1.53\\%$ | | test_set_nested_new | 82.4820μs | 17.8307μs | 56.0832 KOps/s | 56.9795 KOps/s | $\color{#d91a1a}-1.57\\%$ | | test_select | 80.2620μs | 30.6999μs | 32.5734 KOps/s | 34.2647 KOps/s | $\color{#d91a1a}-4.94\\%$ | | test_select_nested | 0.1493ms | 52.9767μs | 18.8762 KOps/s | 19.0750 KOps/s | $\color{#d91a1a}-1.04\\%$ | | test_exclude_nested | 88.4020μs | 71.5480μs | 13.9766 KOps/s | 14.3407 KOps/s | $\color{#d91a1a}-2.54\\%$ | | test_empty[True] | 0.3246ms | 0.3011ms | 3.3216 KOps/s | 3.3803 KOps/s | $\color{#d91a1a}-1.74\\%$ | | test_empty[False] | 2.8041μs | 0.8616μs | 1.1606 MOps/s | 1.1911 MOps/s | $\color{#d91a1a}-2.56\\%$ | | test_to | 85.4430μs | 55.2201μs | 18.1093 KOps/s | 17.8934 KOps/s | $\color{#35bf28}+1.21\\%$ | | test_to_nonblocking | 1.8302ms | 33.4901μs | 29.8596 KOps/s | 29.7897 KOps/s | $\color{#35bf28}+0.23\\%$ | | test_unbind_speed | 0.3741ms | 0.2558ms | 3.9087 KOps/s | 4.0025 KOps/s | $\color{#d91a1a}-2.34\\%$ | | test_unbind_speed_stack0 | 0.2868ms | 0.2564ms | 3.9009 KOps/s | 3.9811 KOps/s | $\color{#d91a1a}-2.02\\%$ | | test_unbind_speed_stack1 | 95.9189ms | 0.7907ms | 1.2646 KOps/s | 1.4083 KOps/s | $\textbf{\color{#d91a1a}-10.20\\%}$ | | test_split | 92.3896ms | 1.6277ms | 614.3808 Ops/s | 605.7430 Ops/s | $\color{#35bf28}+1.43\\%$ | | test_chunk | 1.6203ms | 1.4813ms | 675.0974 Ops/s | 608.3274 Ops/s | $\textbf{\color{#35bf28}+10.98\\%}$ | | test_creation[device0] | 0.1996ms | 54.3036μs | 18.4150 KOps/s | 18.2297 KOps/s | $\color{#35bf28}+1.02\\%$ | | test_creation_from_tensor | 0.1923ms | 50.9393μs | 19.6312 KOps/s | 19.2573 KOps/s | $\color{#35bf28}+1.94\\%$ | | test_add_one[memmap_tensor0] | 0.1293ms | 6.4652μs | 154.6746 KOps/s | 148.4490 KOps/s | $\color{#35bf28}+4.19\\%$ | | test_contiguous[memmap_tensor0] | 24.2110μs | 0.5460μs | 1.8316 MOps/s | 1.7966 MOps/s | $\color{#35bf28}+1.95\\%$ | | test_stack[memmap_tensor0] | 20.2410μs | 4.5939μs | 217.6798 KOps/s | 205.6744 KOps/s | $\textbf{\color{#35bf28}+5.84\\%}$ | | test_memmaptd_index | 92.6999ms | 0.2950ms | 3.3894 KOps/s | 3.8527 KOps/s | $\textbf{\color{#d91a1a}-12.03\\%}$ | | test_memmaptd_index_astensor | 0.5678ms | 0.3126ms | 3.1992 KOps/s | 3.1183 KOps/s | $\color{#35bf28}+2.60\\%$ | | test_memmaptd_index_op | 0.8373ms | 0.5706ms | 1.7525 KOps/s | 1.7707 KOps/s | $\color{#d91a1a}-1.03\\%$ | | test_serialize_model | 99.2915ms | 91.1020ms | 10.9767 Ops/s | 10.5352 Ops/s | $\color{#35bf28}+4.19\\%$ | | test_serialize_model_pickle | 1.6882s | 1.3883s | 0.7203 Ops/s | 0.8064 Ops/s | $\textbf{\color{#d91a1a}-10.68\\%}$ | | test_serialize_weights | 0.1845s | 99.0889ms | 10.0919 Ops/s | 9.4758 Ops/s | $\textbf{\color{#35bf28}+6.50\\%}$ | | test_serialize_weights_returnearly | 0.2910s | 78.2706ms | 12.7762 Ops/s | 13.3315 Ops/s | $\color{#d91a1a}-4.17\\%$ | | test_serialize_weights_pickle | 1.4050s | 1.2540s | 0.7975 Ops/s | 0.8018 Ops/s | $\color{#d91a1a}-0.54\\%$ | | test_reshape_pytree | 84.7420μs | 24.7237μs | 40.4470 KOps/s | 39.6205 KOps/s | $\color{#35bf28}+2.09\\%$ | | test_reshape_td | 0.2723ms | 30.3433μs | 32.9562 KOps/s | 32.0465 KOps/s | $\color{#35bf28}+2.84\\%$ | | test_view_pytree | 0.1219ms | 24.6493μs | 40.5690 KOps/s | 38.4220 KOps/s | $\textbf{\color{#35bf28}+5.59\\%}$ | | test_view_td | 0.2546ms | 36.3866μs | 27.4827 KOps/s | 27.7033 KOps/s | $\color{#d91a1a}-0.80\\%$ | | test_unbind_pytree | 0.1840ms | 30.5464μs | 32.7371 KOps/s | 32.2427 KOps/s | $\color{#35bf28}+1.53\\%$ | | test_unbind_td | 0.4527ms | 38.5038μs | 25.9714 KOps/s | 25.8588 KOps/s | $\color{#35bf28}+0.44\\%$ | | test_split_pytree | 0.1423ms | 33.0025μs | 30.3007 KOps/s | 29.3696 KOps/s | $\color{#35bf28}+3.17\\%$ | | test_split_td | 0.4790ms | 37.1180μs | 26.9411 KOps/s | 26.4400 KOps/s | $\color{#35bf28}+1.90\\%$ | | test_add_pytree | 0.2731ms | 35.4768μs | 28.1874 KOps/s | 26.9519 KOps/s | $\color{#35bf28}+4.58\\%$ | | test_add_td | 0.2183ms | 45.3025μs | 22.0738 KOps/s | 23.1529 KOps/s | $\color{#d91a1a}-4.66\\%$ | | test_distributed | 6.1870ms | 95.5527μs | 10.4654 KOps/s | 11.5262 KOps/s | $\textbf{\color{#d91a1a}-9.20\\%}$ | | test_tdmodule | 0.1597ms | 13.5544μs | 73.7770 KOps/s | 75.5901 KOps/s | $\color{#d91a1a}-2.40\\%$ | | test_tdmodule_dispatch | 41.7410μs | 26.4254μs | 37.8425 KOps/s | 39.0130 KOps/s | $\color{#d91a1a}-3.00\\%$ | | test_tdseq | 29.9600μs | 14.3481μs | 69.6954 KOps/s | 68.0419 KOps/s | $\color{#35bf28}+2.43\\%$ | | test_tdseq_dispatch | 49.8910μs | 29.3627μs | 34.0568 KOps/s | 35.3614 KOps/s | $\color{#d91a1a}-3.69\\%$ | | test_instantiation_functorch | 1.5264ms | 1.3700ms | 729.9158 Ops/s | 730.8387 Ops/s | $\color{#d91a1a}-0.13\\%$ | | test_instantiation_td | 1.5334ms | 0.9675ms | 1.0336 KOps/s | 1.0239 KOps/s | $\color{#35bf28}+0.94\\%$ | | test_exec_functorch | 0.2566ms | 0.1368ms | 7.3109 KOps/s | 7.1323 KOps/s | $\color{#35bf28}+2.50\\%$ | | test_exec_functional_call | 0.2194ms | 0.1247ms | 8.0182 KOps/s | 7.6260 KOps/s | $\textbf{\color{#35bf28}+5.14\\%}$ | | test_exec_td | 0.2203ms | 0.1237ms | 8.0847 KOps/s | 7.9804 KOps/s | $\color{#35bf28}+1.31\\%$ | | test_exec_td_decorator | 0.8111ms | 0.2012ms | 4.9699 KOps/s | 5.0688 KOps/s | $\color{#d91a1a}-1.95\\%$ | | test_vmap_mlp_speed[True-True] | 0.7669ms | 0.5513ms | 1.8140 KOps/s | 1.8081 KOps/s | $\color{#35bf28}+0.32\\%$ | | test_vmap_mlp_speed[True-False] | 0.7289ms | 0.5513ms | 1.8141 KOps/s | 1.7850 KOps/s | $\color{#35bf28}+1.63\\%$ | | test_vmap_mlp_speed[False-True] | 0.6580ms | 0.4843ms | 2.0649 KOps/s | 1.9597 KOps/s | $\textbf{\color{#35bf28}+5.37\\%}$ | | test_vmap_mlp_speed[False-False] | 0.6761ms | 0.4868ms | 2.0544 KOps/s | 1.9582 KOps/s | $\color{#35bf28}+4.91\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.2138ms | 0.6166ms | 1.6217 KOps/s | 1.6031 KOps/s | $\color{#35bf28}+1.16\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.7821ms | 0.6149ms | 1.6263 KOps/s | 1.5978 KOps/s | $\color{#35bf28}+1.79\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7127ms | 0.5434ms | 1.8403 KOps/s | 1.7808 KOps/s | $\color{#35bf28}+3.34\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7117ms | 0.5428ms | 1.8422 KOps/s | 1.8433 KOps/s | $\color{#d91a1a}-0.06\\%$ | | test_vmap_transformer_speed[True-True] | 7.4600ms | 7.3208ms | 136.5965 Ops/s | 125.3903 Ops/s | $\textbf{\color{#35bf28}+8.94\\%}$ | | test_vmap_transformer_speed[True-False] | 7.6500ms | 7.3229ms | 136.5585 Ops/s | 131.6737 Ops/s | $\color{#35bf28}+3.71\\%$ | | test_vmap_transformer_speed[False-True] | 7.5683ms | 7.2481ms | 137.9670 Ops/s | 133.8116 Ops/s | $\color{#35bf28}+3.11\\%$ | | test_vmap_transformer_speed[False-False] | 7.5182ms | 7.2416ms | 138.0910 Ops/s | 133.8973 Ops/s | $\color{#35bf28}+3.13\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 18.0647ms | 17.7480ms | 56.3444 Ops/s | 54.9072 Ops/s | $\color{#35bf28}+2.62\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 18.0698ms | 17.7468ms | 56.3482 Ops/s | 54.9437 Ops/s | $\color{#35bf28}+2.56\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 17.9473ms | 17.6476ms | 56.6651 Ops/s | 55.0954 Ops/s | $\color{#35bf28}+2.85\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 17.9669ms | 17.6299ms | 56.7218 Ops/s | 55.1872 Ops/s | $\color{#35bf28}+2.78\\%$ | | test_to_module_speed[True] | 2.0189ms | 1.5151ms | 660.0316 Ops/s | 672.1637 Ops/s | $\color{#d91a1a}-1.80\\%$ | | test_to_module_speed[False] | 1.6607ms | 1.5093ms | 662.5644 Ops/s | 679.7212 Ops/s | $\color{#d91a1a}-2.52\\%$ | | test_tc_init | 0.1643ms | 51.7417μs | 19.3268 KOps/s | 20.6415 KOps/s | $\textbf{\color{#d91a1a}-6.37\\%}$ | | test_tc_init_nested | 0.2087ms | 0.1035ms | 9.6635 KOps/s | 10.6215 KOps/s | $\textbf{\color{#d91a1a}-9.02\\%}$ | | test_tc_first_layer_tensor | 17.1800μs | 3.5121μs | 284.7266 KOps/s | 280.3081 KOps/s | $\color{#35bf28}+1.58\\%$ | | test_tc_first_layer_nontensor | 0.1119ms | 3.5373μs | 282.6986 KOps/s | 273.6134 KOps/s | $\color{#35bf28}+3.32\\%$ | | test_tc_second_layer_tensor | 5.4902μs | 1.1077μs | 902.7722 KOps/s | 881.8796 KOps/s | $\color{#35bf28}+2.37\\%$ | | test_tc_second_layer_nontensor | 17.8100μs | 4.0524μs | 246.7663 KOps/s | 246.8324 KOps/s | $\color{#d91a1a}-0.03\\%$ |