pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
803 stars 65 forks source link

[Feature] Compile integration - basics #865

Closed vmoens closed 1 month ago

vmoens commented 1 month ago

Stack from ghstack (oldest at bottom):

github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 133. Improved: $\large\color{#35bf28}41$. Worsened: $\large\color{#d91a1a}20$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------ | --------- | --------- | --------------- | ------------------ | ------------------------------------ | | test_plain_set_nested | 29.3140μs | 15.9485μs | 62.7017 KOps/s | 56.4410 KOps/s | $\textbf{\color{#35bf28}+11.09\\%}$ | | test_plain_set_stack_nested | 39.9450μs | 16.3465μs | 61.1751 KOps/s | 56.1688 KOps/s | $\textbf{\color{#35bf28}+8.91\\%}$ | | test_plain_set_nested_inplace | 53.4890μs | 17.6745μs | 56.5787 KOps/s | 50.6005 KOps/s | $\textbf{\color{#35bf28}+11.81\\%}$ | | test_plain_set_stack_nested_inplace | 69.4820μs | 17.6533μs | 56.6465 KOps/s | 50.8277 KOps/s | $\textbf{\color{#35bf28}+11.45\\%}$ | | test_items | 18.0630μs | 2.6178μs | 381.9979 KOps/s | 388.9715 KOps/s | $\color{#d91a1a}-1.79\\%$ | | test_items_nested | 0.5976ms | 0.3620ms | 2.7625 KOps/s | 3.7210 KOps/s | $\textbf{\color{#d91a1a}-25.76\\%}$ | | test_items_nested_locked | 1.7993ms | 0.3657ms | 2.7348 KOps/s | 3.7115 KOps/s | $\textbf{\color{#d91a1a}-26.32\\%}$ | | test_items_nested_leaf | 0.1762ms | 85.9387μs | 11.6362 KOps/s | 12.7373 KOps/s | $\textbf{\color{#d91a1a}-8.64\\%}$ | | test_items_stack_nested | 0.5642ms | 0.3662ms | 2.7304 KOps/s | 3.6887 KOps/s | $\textbf{\color{#d91a1a}-25.98\\%}$ | | test_items_stack_nested_leaf | 0.1565ms | 88.4286μs | 11.3086 KOps/s | 12.8154 KOps/s | $\textbf{\color{#d91a1a}-11.76\\%}$ | | test_items_stack_nested_locked | 0.5469ms | 0.3658ms | 2.7335 KOps/s | 3.6324 KOps/s | $\textbf{\color{#d91a1a}-24.75\\%}$ | | test_keys | 39.8850μs | 3.8222μs | 261.6307 KOps/s | 262.0466 KOps/s | $\color{#d91a1a}-0.16\\%$ | | test_keys_nested | 0.2508ms | 0.1422ms | 7.0332 KOps/s | 7.1951 KOps/s | $\color{#d91a1a}-2.25\\%$ | | test_keys_nested_locked | 0.7554ms | 0.1487ms | 6.7258 KOps/s | 7.0000 KOps/s | $\color{#d91a1a}-3.92\\%$ | | test_keys_nested_leaf | 0.2144ms | 0.1225ms | 8.1629 KOps/s | 8.4998 KOps/s | $\color{#d91a1a}-3.96\\%$ | | test_keys_stack_nested | 0.2729ms | 0.1449ms | 6.8992 KOps/s | 7.3789 KOps/s | $\textbf{\color{#d91a1a}-6.50\\%}$ | | test_keys_stack_nested_leaf | 0.2613ms | 0.1230ms | 8.1325 KOps/s | 8.7221 KOps/s | $\textbf{\color{#d91a1a}-6.76\\%}$ | | test_keys_stack_nested_locked | 0.2577ms | 0.1492ms | 6.7009 KOps/s | 7.1948 KOps/s | $\textbf{\color{#d91a1a}-6.86\\%}$ | | test_values | 10.4723μs | 1.1395μs | 877.5759 KOps/s | 815.6687 KOps/s | $\textbf{\color{#35bf28}+7.59\\%}$ | | test_values_nested | 0.1004ms | 49.3508μs | 20.2631 KOps/s | 19.5496 KOps/s | $\color{#35bf28}+3.65\\%$ | | test_values_nested_locked | 96.2390μs | 49.6424μs | 20.1441 KOps/s | 19.4780 KOps/s | $\color{#35bf28}+3.42\\%$ | | test_values_nested_leaf | 92.4520μs | 44.8626μs | 22.2903 KOps/s | 21.5028 KOps/s | $\color{#35bf28}+3.66\\%$ | | test_values_stack_nested | 91.2400μs | 50.3259μs | 19.8705 KOps/s | 19.2275 KOps/s | $\color{#35bf28}+3.34\\%$ | | test_values_stack_nested_leaf | 91.4300μs | 44.8502μs | 22.2964 KOps/s | 21.8877 KOps/s | $\color{#35bf28}+1.87\\%$ | | test_values_stack_nested_locked | 96.9110μs | 50.1301μs | 19.9481 KOps/s | 19.3696 KOps/s | $\color{#35bf28}+2.99\\%$ | | test_membership | 4.6101μs | 0.7522μs | 1.3294 MOps/s | 711.8334 KOps/s | $\textbf{\color{#35bf28}+86.75\\%}$ | | test_membership_nested | 37.7480μs | 2.6354μs | 379.4460 KOps/s | 291.9263 KOps/s | $\textbf{\color{#35bf28}+29.98\\%}$ | | test_membership_nested_leaf | 21.8310μs | 2.6706μs | 374.4511 KOps/s | 238.4037 KOps/s | $\textbf{\color{#35bf28}+57.07\\%}$ | | test_membership_stacked_nested | 28.1020μs | 2.6685μs | 374.7418 KOps/s | 287.3490 KOps/s | $\textbf{\color{#35bf28}+30.41\\%}$ | | test_membership_stacked_nested_leaf | 20.9080μs | 2.6855μs | 372.3758 KOps/s | 286.5162 KOps/s | $\textbf{\color{#35bf28}+29.97\\%}$ | | test_membership_nested_last | 28.1230μs | 3.9554μs | 252.8162 KOps/s | 240.9798 KOps/s | $\color{#35bf28}+4.91\\%$ | | test_membership_nested_leaf_last | 24.7660μs | 3.9926μs | 250.4638 KOps/s | 241.8200 KOps/s | $\color{#35bf28}+3.57\\%$ | | test_membership_stacked_nested_last | 25.1570μs | 3.9667μs | 252.0996 KOps/s | 75.5366 KOps/s | $\textbf{\color{#35bf28}+233.74\\%}$ | | test_membership_stacked_nested_leaf_last | 25.5180μs | 3.9719μs | 251.7711 KOps/s | 75.9843 KOps/s | $\textbf{\color{#35bf28}+231.35\\%}$ | | test_nested_getleaf | 42.8800μs | 10.8838μs | 91.8794 KOps/s | 92.3555 KOps/s | $\color{#d91a1a}-0.52\\%$ | | test_nested_get | 36.7580μs | 10.1419μs | 98.6005 KOps/s | 95.4919 KOps/s | $\color{#35bf28}+3.26\\%$ | | test_stacked_getleaf | 32.8610μs | 10.7449μs | 93.0671 KOps/s | 97.4131 KOps/s | $\color{#d91a1a}-4.46\\%$ | | test_stacked_get | 31.6290μs | 10.2188μs | 97.8591 KOps/s | 98.6072 KOps/s | $\color{#d91a1a}-0.76\\%$ | | test_nested_getitemleaf | 54.0580μs | 11.5563μs | 86.5331 KOps/s | 88.0532 KOps/s | $\color{#d91a1a}-1.73\\%$ | | test_nested_getitem | 38.5220μs | 10.5774μs | 94.5410 KOps/s | 95.3094 KOps/s | $\color{#d91a1a}-0.81\\%$ | | test_stacked_getitemleaf | 54.7040μs | 11.3440μs | 88.1522 KOps/s | 89.2775 KOps/s | $\color{#d91a1a}-1.26\\%$ | | test_stacked_getitem | 28.5730μs | 10.6291μs | 94.0814 KOps/s | 96.2569 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_lock_nested | 7.2154ms | 0.4354ms | 2.2969 KOps/s | 3.0021 KOps/s | $\textbf{\color{#d91a1a}-23.49\\%}$ | | test_lock_stack_nested | 0.5644ms | 0.4063ms | 2.4615 KOps/s | 3.4136 KOps/s | $\textbf{\color{#d91a1a}-27.89\\%}$ | | test_unlock_nested | 0.6984ms | 0.3469ms | 2.8826 KOps/s | 2.9566 KOps/s | $\color{#d91a1a}-2.50\\%$ | | test_unlock_stack_nested | 0.4991ms | 0.3201ms | 3.1241 KOps/s | 3.3210 KOps/s | $\textbf{\color{#d91a1a}-5.93\\%}$ | | test_flatten_speed | 0.4842ms | 0.1059ms | 9.4404 KOps/s | 10.1824 KOps/s | $\textbf{\color{#d91a1a}-7.29\\%}$ | | test_unflatten_speed | 0.6355ms | 0.4358ms | 2.2944 KOps/s | 2.4766 KOps/s | $\textbf{\color{#d91a1a}-7.36\\%}$ | | test_common_ops | 6.9445ms | 0.7032ms | 1.4221 KOps/s | 1.2886 KOps/s | $\textbf{\color{#35bf28}+10.36\\%}$ | | test_creation | 16.0500μs | 2.2775μs | 439.0829 KOps/s | 519.1516 KOps/s | $\textbf{\color{#d91a1a}-15.42\\%}$ | | test_creation_empty | 27.6010μs | 8.9883μs | 111.2554 KOps/s | 82.4994 KOps/s | $\textbf{\color{#35bf28}+34.86\\%}$ | | test_creation_nested_1 | 47.0880μs | 11.5996μs | 86.2102 KOps/s | 67.9615 KOps/s | $\textbf{\color{#35bf28}+26.85\\%}$ | | test_creation_nested_2 | 35.2050μs | 15.7429μs | 63.5207 KOps/s | 54.8733 KOps/s | $\textbf{\color{#35bf28}+15.76\\%}$ | | test_clone | 1.5104ms | 13.0286μs | 76.7543 KOps/s | 76.2628 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_getitem[int] | 37.3600μs | 11.2000μs | 89.2854 KOps/s | 89.6577 KOps/s | $\color{#d91a1a}-0.42\\%$ | | test_getitem[slice_int] | 58.0180μs | 22.7714μs | 43.9147 KOps/s | 44.6172 KOps/s | $\color{#d91a1a}-1.57\\%$ | | test_getitem[range] | 0.1400ms | 43.6672μs | 22.9005 KOps/s | 16.8606 KOps/s | $\textbf{\color{#35bf28}+35.82\\%}$ | | test_getitem[tuple] | 47.5280μs | 18.5875μs | 53.7995 KOps/s | 53.7037 KOps/s | $\color{#35bf28}+0.18\\%$ | | test_getitem[list] | 0.1598ms | 38.2195μs | 26.1646 KOps/s | 24.2054 KOps/s | $\textbf{\color{#35bf28}+8.09\\%}$ | | test_setitem_dim[int] | 64.6710μs | 28.9764μs | 34.5109 KOps/s | 28.4304 KOps/s | $\textbf{\color{#35bf28}+21.39\\%}$ | | test_setitem_dim[slice_int] | 0.1007ms | 55.6283μs | 17.9765 KOps/s | 15.9395 KOps/s | $\textbf{\color{#35bf28}+12.78\\%}$ | | test_setitem_dim[range] | 0.1074ms | 76.2242μs | 13.1192 KOps/s | 11.6294 KOps/s | $\textbf{\color{#35bf28}+12.81\\%}$ | | test_setitem_dim[tuple] | 68.2670μs | 44.2388μs | 22.6046 KOps/s | 19.4279 KOps/s | $\textbf{\color{#35bf28}+16.35\\%}$ | | test_setitem | 82.9340μs | 18.2556μs | 54.7778 KOps/s | 48.2077 KOps/s | $\textbf{\color{#35bf28}+13.63\\%}$ | | test_set | 77.4340μs | 17.8284μs | 56.0902 KOps/s | 50.8344 KOps/s | $\textbf{\color{#35bf28}+10.34\\%}$ | | test_set_shared | 1.9189ms | 0.1629ms | 6.1381 KOps/s | 5.8642 KOps/s | $\color{#35bf28}+4.67\\%$ | | test_update | 0.1208ms | 19.5066μs | 51.2647 KOps/s | 43.0425 KOps/s | $\textbf{\color{#35bf28}+19.10\\%}$ | | test_update_nested | 82.0130μs | 28.0700μs | 35.6252 KOps/s | 31.1393 KOps/s | $\textbf{\color{#35bf28}+14.41\\%}$ | | test_update__nested | 1.0802ms | 24.8273μs | 40.2782 KOps/s | 40.5080 KOps/s | $\color{#d91a1a}-0.57\\%$ | | test_set_nested | 75.1190μs | 19.3307μs | 51.7312 KOps/s | 46.3985 KOps/s | $\textbf{\color{#35bf28}+11.49\\%}$ | | test_set_nested_new | 83.3950μs | 23.8948μs | 41.8502 KOps/s | 38.8997 KOps/s | $\textbf{\color{#35bf28}+7.58\\%}$ | | test_select | 0.1117ms | 39.4271μs | 25.3633 KOps/s | 24.5769 KOps/s | $\color{#35bf28}+3.20\\%$ | | test_select_nested | 0.1282ms | 61.0213μs | 16.3877 KOps/s | 17.3248 KOps/s | $\textbf{\color{#d91a1a}-5.41\\%}$ | | test_exclude_nested | 0.1708ms | 80.7141μs | 12.3894 KOps/s | 8.2686 KOps/s | $\textbf{\color{#35bf28}+49.84\\%}$ | | test_empty[True] | 0.5119ms | 0.3420ms | 2.9240 KOps/s | 2.5183 KOps/s | $\textbf{\color{#35bf28}+16.11\\%}$ | | test_empty[False] | 6.3843μs | 1.2421μs | 805.0658 KOps/s | 970.5680 KOps/s | $\textbf{\color{#d91a1a}-17.05\\%}$ | | test_unbind_speed | 0.3991ms | 0.2569ms | 3.8920 KOps/s | 4.0819 KOps/s | $\color{#d91a1a}-4.65\\%$ | | test_unbind_speed_stack0 | 0.4499ms | 0.2546ms | 3.9273 KOps/s | 4.1632 KOps/s | $\textbf{\color{#d91a1a}-5.66\\%}$ | | test_unbind_speed_stack1 | 76.5417ms | 0.7384ms | 1.3542 KOps/s | 1.4490 KOps/s | $\textbf{\color{#d91a1a}-6.54\\%}$ | | test_split | 74.0760ms | 1.6095ms | 621.3176 Ops/s | 633.2550 Ops/s | $\color{#d91a1a}-1.89\\%$ | | test_chunk | 75.2905ms | 1.6175ms | 618.2292 Ops/s | 626.0501 Ops/s | $\color{#d91a1a}-1.25\\%$ | | test_creation[device0] | 4.5425ms | 95.7333μs | 10.4457 KOps/s | 10.3189 KOps/s | $\color{#35bf28}+1.23\\%$ | | test_creation_from_tensor | 0.2388ms | 96.7623μs | 10.3346 KOps/s | 10.3571 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_add_one[memmap_tensor0] | 0.1466ms | 5.4261μs | 184.2939 KOps/s | 182.7253 KOps/s | $\color{#35bf28}+0.86\\%$ | | test_contiguous[memmap_tensor0] | 11.6220μs | 0.6565μs | 1.5231 MOps/s | 1.5292 MOps/s | $\color{#d91a1a}-0.40\\%$ | | test_stack[memmap_tensor0] | 41.5470μs | 3.8062μs | 262.7298 KOps/s | 277.2742 KOps/s | $\textbf{\color{#d91a1a}-5.25\\%}$ | | test_memmaptd_index | 0.9520ms | 0.2578ms | 3.8785 KOps/s | 3.8519 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_memmaptd_index_astensor | 0.8709ms | 0.3325ms | 3.0071 KOps/s | 2.9785 KOps/s | $\color{#35bf28}+0.96\\%$ | | test_memmaptd_index_op | 0.8533ms | 0.5784ms | 1.7290 KOps/s | 1.5471 KOps/s | $\textbf{\color{#35bf28}+11.75\\%}$ | | test_serialize_model | 0.1318s | 0.1228s | 8.1407 Ops/s | 7.3952 Ops/s | $\textbf{\color{#35bf28}+10.08\\%}$ | | test_serialize_model_pickle | 0.4480s | 0.3870s | 2.5838 Ops/s | 2.5317 Ops/s | $\color{#35bf28}+2.06\\%$ | | test_serialize_weights | 0.1279s | 0.1222s | 8.1860 Ops/s | 8.1815 Ops/s | $\color{#35bf28}+0.06\\%$ | | test_serialize_weights_returnearly | 0.1713s | 0.1640s | 6.0994 Ops/s | 6.2474 Ops/s | $\color{#d91a1a}-2.37\\%$ | | test_serialize_weights_pickle | 0.5885s | 0.4524s | 2.2104 Ops/s | 1.1155 Ops/s | $\textbf{\color{#35bf28}+98.16\\%}$ | | test_serialize_weights_filesystem | 0.1452s | 0.1426s | 7.0133 Ops/s | 7.0573 Ops/s | $\color{#d91a1a}-0.62\\%$ | | test_serialize_model_filesystem | 0.1638s | 0.1540s | 6.4922 Ops/s | 6.5098 Ops/s | $\color{#d91a1a}-0.27\\%$ | | test_reshape_pytree | 83.6250μs | 25.6047μs | 39.0554 KOps/s | 38.3836 KOps/s | $\color{#35bf28}+1.75\\%$ | | test_reshape_td | 0.1080ms | 34.0050μs | 29.4075 KOps/s | 28.7873 KOps/s | $\color{#35bf28}+2.15\\%$ | | test_view_pytree | 73.1770μs | 25.7248μs | 38.8729 KOps/s | 38.8334 KOps/s | $\color{#35bf28}+0.10\\%$ | | test_view_td | 0.1172ms | 38.9905μs | 25.6472 KOps/s | 25.5114 KOps/s | $\color{#35bf28}+0.53\\%$ | | test_unbind_pytree | 63.3180μs | 29.2619μs | 34.1742 KOps/s | 33.4317 KOps/s | $\color{#35bf28}+2.22\\%$ | | test_unbind_td | 0.4276ms | 37.8509μs | 26.4195 KOps/s | 26.9891 KOps/s | $\color{#d91a1a}-2.11\\%$ | | test_split_pytree | 66.8540μs | 29.4564μs | 33.9485 KOps/s | 33.8307 KOps/s | $\color{#35bf28}+0.35\\%$ | | test_split_td | 0.5398ms | 40.2600μs | 24.8386 KOps/s | 25.0518 KOps/s | $\color{#d91a1a}-0.85\\%$ | | test_add_pytree | 87.2220μs | 34.5938μs | 28.9069 KOps/s | 28.3295 KOps/s | $\color{#35bf28}+2.04\\%$ | | test_add_td | 0.1122ms | 53.1108μs | 18.8286 KOps/s | 17.0787 KOps/s | $\textbf{\color{#35bf28}+10.25\\%}$ | | test_distributed | 0.2468ms | 0.1303ms | 7.6768 KOps/s | 7.6907 KOps/s | $\color{#d91a1a}-0.18\\%$ | | test_tdmodule | 35.3360μs | 16.7725μs | 59.6215 KOps/s | 54.5449 KOps/s | $\textbf{\color{#35bf28}+9.31\\%}$ | | test_tdmodule_dispatch | 54.2310μs | 34.3619μs | 29.1020 KOps/s | 27.6778 KOps/s | $\textbf{\color{#35bf28}+5.15\\%}$ | | test_tdseq | 34.9650μs | 19.5100μs | 51.2559 KOps/s | 47.2032 KOps/s | $\textbf{\color{#35bf28}+8.59\\%}$ | | test_tdseq_dispatch | 66.0230μs | 39.2796μs | 25.4585 KOps/s | 23.8711 KOps/s | $\textbf{\color{#35bf28}+6.65\\%}$ | | test_instantiation_functorch | 2.0350ms | 1.3449ms | 743.5705 Ops/s | 754.0046 Ops/s | $\color{#d91a1a}-1.38\\%$ | | test_instantiation_td | 1.5477ms | 1.0334ms | 967.7151 Ops/s | 976.8091 Ops/s | $\color{#d91a1a}-0.93\\%$ | | test_exec_functorch | 0.3032ms | 0.1638ms | 6.1055 KOps/s | 6.0833 KOps/s | $\color{#35bf28}+0.37\\%$ | | test_exec_functional_call | 0.3669ms | 0.1506ms | 6.6388 KOps/s | 6.6636 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_exec_td | 0.2871ms | 0.1488ms | 6.7196 KOps/s | 6.5867 KOps/s | $\color{#35bf28}+2.02\\%$ | | test_exec_td_decorator | 0.6704ms | 0.2313ms | 4.3227 KOps/s | 4.5225 KOps/s | $\color{#d91a1a}-4.42\\%$ | | test_vmap_mlp_speed[True-True] | 0.7892ms | 0.4863ms | 2.0562 KOps/s | 1.9955 KOps/s | $\color{#35bf28}+3.04\\%$ | | test_vmap_mlp_speed[True-False] | 0.8813ms | 0.4805ms | 2.0813 KOps/s | 1.9865 KOps/s | $\color{#35bf28}+4.77\\%$ | | test_vmap_mlp_speed[False-True] | 0.6773ms | 0.3914ms | 2.5551 KOps/s | 2.4821 KOps/s | $\color{#35bf28}+2.94\\%$ | | test_vmap_mlp_speed[False-False] | 0.4816ms | 0.3911ms | 2.5571 KOps/s | 2.4947 KOps/s | $\color{#35bf28}+2.50\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 1.0808ms | 0.5631ms | 1.7760 KOps/s | 1.7594 KOps/s | $\color{#35bf28}+0.94\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.9032ms | 0.5642ms | 1.7724 KOps/s | 1.7559 KOps/s | $\color{#35bf28}+0.94\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.6749ms | 0.4622ms | 2.1638 KOps/s | 2.1540 KOps/s | $\color{#35bf28}+0.45\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7890ms | 0.4613ms | 2.1678 KOps/s | 2.1571 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_to_module_speed[True] | 1.8513ms | 1.7085ms | 585.3141 Ops/s | 601.0719 Ops/s | $\color{#d91a1a}-2.62\\%$ | | test_to_module_speed[False] | 1.7848ms | 1.6813ms | 594.7734 Ops/s | 602.5229 Ops/s | $\color{#d91a1a}-1.29\\%$ | | test_tc_init | 0.3033ms | 51.2913μs | 19.4965 KOps/s | 16.3952 KOps/s | $\textbf{\color{#35bf28}+18.92\\%}$ | | test_tc_init_nested | 0.6281ms | 0.1054ms | 9.4918 KOps/s | 8.3231 KOps/s | $\textbf{\color{#35bf28}+14.04\\%}$ | | test_tc_first_layer_tensor | 53.9200μs | 8.0859μs | 123.6728 KOps/s | 119.9872 KOps/s | $\color{#35bf28}+3.07\\%$ | | test_tc_first_layer_nontensor | 37.0790μs | 8.0239μs | 124.6275 KOps/s | 117.5762 KOps/s | $\textbf{\color{#35bf28}+6.00\\%}$ | | test_tc_second_layer_tensor | 39.3430μs | 2.4634μs | 405.9433 KOps/s | 391.8318 KOps/s | $\color{#35bf28}+3.60\\%$ | | test_tc_second_layer_nontensor | 31.1680μs | 9.1991μs | 108.7060 KOps/s | 107.1135 KOps/s | $\color{#35bf28}+1.49\\%$ |
github-actions[bot] commented 1 month ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 141. Improved: $\large\color{#35bf28}62$. Worsened: $\large\color{#d91a1a}12$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 31.8510μs | 12.1547μs | 82.2726 KOps/s | 76.2622 KOps/s | $\textbf{\color{#35bf28}+7.88\\%}$ | | test_plain_set_stack_nested | 35.8710μs | 12.1812μs | 82.0935 KOps/s | 75.4363 KOps/s | $\textbf{\color{#35bf28}+8.83\\%}$ | | test_plain_set_nested_inplace | 36.2900μs | 13.2817μs | 75.2917 KOps/s | 68.6664 KOps/s | $\textbf{\color{#35bf28}+9.65\\%}$ | | test_plain_set_stack_nested_inplace | 45.2910μs | 13.1423μs | 76.0902 KOps/s | 69.1801 KOps/s | $\textbf{\color{#35bf28}+9.99\\%}$ | | test_items | 21.0200μs | 4.7215μs | 211.7959 KOps/s | 214.8179 KOps/s | $\color{#d91a1a}-1.41\\%$ | | test_items_nested | 0.4312ms | 0.3860ms | 2.5905 KOps/s | 2.9599 KOps/s | $\textbf{\color{#d91a1a}-12.48\\%}$ | | test_items_nested_locked | 0.4320ms | 0.3916ms | 2.5535 KOps/s | 2.9346 KOps/s | $\textbf{\color{#d91a1a}-12.99\\%}$ | | test_items_nested_leaf | 0.1052ms | 86.9486μs | 11.5010 KOps/s | 12.0629 KOps/s | $\color{#d91a1a}-4.66\\%$ | | test_items_stack_nested | 0.4243ms | 0.3827ms | 2.6132 KOps/s | 2.9219 KOps/s | $\textbf{\color{#d91a1a}-10.57\\%}$ | | test_items_stack_nested_leaf | 0.1111ms | 86.7763μs | 11.5239 KOps/s | 11.8834 KOps/s | $\color{#d91a1a}-3.03\\%$ | | test_items_stack_nested_locked | 0.4256ms | 0.3903ms | 2.5620 KOps/s | 2.8771 KOps/s | $\textbf{\color{#d91a1a}-10.95\\%}$ | | test_keys | 32.2410μs | 4.3552μs | 229.6112 KOps/s | 229.0343 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_keys_nested | 93.0610μs | 67.8015μs | 14.7489 KOps/s | 14.2099 KOps/s | $\color{#35bf28}+3.79\\%$ | | test_keys_nested_locked | 0.6610ms | 73.5395μs | 13.5981 KOps/s | 13.1641 KOps/s | $\color{#35bf28}+3.30\\%$ | | test_keys_nested_leaf | 87.9420μs | 59.0542μs | 16.9336 KOps/s | 17.0243 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_keys_stack_nested | 96.7920μs | 68.1307μs | 14.6777 KOps/s | 14.2832 KOps/s | $\color{#35bf28}+2.76\\%$ | | test_keys_stack_nested_leaf | 88.0510μs | 59.7844μs | 16.7268 KOps/s | 16.5215 KOps/s | $\color{#35bf28}+1.24\\%$ | | test_keys_stack_nested_locked | 96.9710μs | 74.2493μs | 13.4681 KOps/s | 13.4148 KOps/s | $\color{#35bf28}+0.40\\%$ | | test_values | 9.3100μs | 1.7528μs | 570.5024 KOps/s | 541.4977 KOps/s | $\textbf{\color{#35bf28}+5.36\\%}$ | | test_values_nested | 58.7110μs | 33.9331μs | 29.4698 KOps/s | 28.0507 KOps/s | $\textbf{\color{#35bf28}+5.06\\%}$ | | test_values_nested_locked | 55.6300μs | 36.0608μs | 27.7309 KOps/s | 26.5864 KOps/s | $\color{#35bf28}+4.31\\%$ | | test_values_nested_leaf | 53.7800μs | 30.2036μs | 33.1086 KOps/s | 31.3016 KOps/s | $\textbf{\color{#35bf28}+5.77\\%}$ | | test_values_stack_nested | 61.3710μs | 34.9902μs | 28.5794 KOps/s | 27.4304 KOps/s | $\color{#35bf28}+4.19\\%$ | | test_values_stack_nested_leaf | 60.0710μs | 31.3341μs | 31.9141 KOps/s | 30.3524 KOps/s | $\textbf{\color{#35bf28}+5.15\\%}$ | | test_values_stack_nested_locked | 65.3810μs | 37.0486μs | 26.9916 KOps/s | 26.2211 KOps/s | $\color{#35bf28}+2.94\\%$ | | test_membership | 1.5786μs | 0.5423μs | 1.8441 MOps/s | 1.4226 MOps/s | $\textbf{\color{#35bf28}+29.63\\%}$ | | test_membership_nested | 17.8200μs | 2.0844μs | 479.7641 KOps/s | 399.1911 KOps/s | $\textbf{\color{#35bf28}+20.18\\%}$ | | test_membership_nested_leaf | 17.6050μs | 2.0102μs | 497.4713 KOps/s | 400.6551 KOps/s | $\textbf{\color{#35bf28}+24.16\\%}$ | | test_membership_stacked_nested | 16.5200μs | 2.0713μs | 482.7899 KOps/s | 398.4314 KOps/s | $\textbf{\color{#35bf28}+21.17\\%}$ | | test_membership_stacked_nested_leaf | 38.8910μs | 2.0992μs | 476.3761 KOps/s | 395.0589 KOps/s | $\textbf{\color{#35bf28}+20.58\\%}$ | | test_membership_nested_last | 19.8710μs | 2.9504μs | 338.9418 KOps/s | 328.2209 KOps/s | $\color{#35bf28}+3.27\\%$ | | test_membership_nested_leaf_last | 33.7510μs | 2.9366μs | 340.5350 KOps/s | 329.7335 KOps/s | $\color{#35bf28}+3.28\\%$ | | test_membership_stacked_nested_last | 21.4910μs | 3.4134μs | 292.9627 KOps/s | 324.8696 KOps/s | $\textbf{\color{#d91a1a}-9.82\\%}$ | | test_membership_stacked_nested_leaf_last | 34.4600μs | 3.3886μs | 295.1058 KOps/s | 328.7028 KOps/s | $\textbf{\color{#d91a1a}-10.22\\%}$ | | test_nested_getleaf | 41.8200μs | 8.0080μs | 124.8750 KOps/s | 120.2027 KOps/s | $\color{#35bf28}+3.89\\%$ | | test_nested_get | 29.7300μs | 7.5048μs | 133.2473 KOps/s | 127.8244 KOps/s | $\color{#35bf28}+4.24\\%$ | | test_stacked_getleaf | 36.0310μs | 8.0491μs | 124.2378 KOps/s | 119.2991 KOps/s | $\color{#35bf28}+4.14\\%$ | | test_stacked_get | 23.3400μs | 7.4937μs | 133.4463 KOps/s | 126.4042 KOps/s | $\textbf{\color{#35bf28}+5.57\\%}$ | | test_nested_getitemleaf | 31.8310μs | 8.1976μs | 121.9864 KOps/s | 116.9388 KOps/s | $\color{#35bf28}+4.32\\%$ | | test_nested_getitem | 71.7410μs | 7.6605μs | 130.5391 KOps/s | 124.2740 KOps/s | $\textbf{\color{#35bf28}+5.04\\%}$ | | test_stacked_getitemleaf | 23.7400μs | 8.1929μs | 122.0564 KOps/s | 117.1544 KOps/s | $\color{#35bf28}+4.18\\%$ | | test_stacked_getitem | 35.7310μs | 7.6704μs | 130.3717 KOps/s | 124.6899 KOps/s | $\color{#35bf28}+4.56\\%$ | | test_lock_nested | 9.6842ms | 0.4206ms | 2.3774 KOps/s | 2.4173 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_lock_stack_nested | 0.4153ms | 0.3789ms | 2.6393 KOps/s | 3.3162 KOps/s | $\textbf{\color{#d91a1a}-20.41\\%}$ | | test_unlock_nested | 0.8159ms | 0.3285ms | 3.0441 KOps/s | 2.4275 KOps/s | $\textbf{\color{#35bf28}+25.40\\%}$ | | test_unlock_stack_nested | 0.4464ms | 0.2953ms | 3.3860 KOps/s | 3.2332 KOps/s | $\color{#35bf28}+4.73\\%$ | | test_flatten_speed | 0.4309ms | 0.1054ms | 9.4860 KOps/s | 9.6165 KOps/s | $\color{#d91a1a}-1.36\\%$ | | test_unflatten_speed | 0.3178ms | 0.2891ms | 3.4591 KOps/s | 3.4744 KOps/s | $\color{#d91a1a}-0.44\\%$ | | test_common_ops | 0.9892ms | 0.5521ms | 1.8112 KOps/s | 1.6919 KOps/s | $\textbf{\color{#35bf28}+7.05\\%}$ | | test_creation | 19.9600μs | 1.8513μs | 540.1754 KOps/s | 627.2236 KOps/s | $\textbf{\color{#d91a1a}-13.88\\%}$ | | test_creation_empty | 36.1310μs | 8.0427μs | 124.3366 KOps/s | 110.8460 KOps/s | $\textbf{\color{#35bf28}+12.17\\%}$ | | test_creation_nested_1 | 28.3800μs | 9.8850μs | 101.1630 KOps/s | 92.7940 KOps/s | $\textbf{\color{#35bf28}+9.02\\%}$ | | test_creation_nested_2 | 29.7310μs | 12.4578μs | 80.2710 KOps/s | 76.8289 KOps/s | $\color{#35bf28}+4.48\\%$ | | test_clone | 90.7610μs | 10.8662μs | 92.0289 KOps/s | 86.5838 KOps/s | $\textbf{\color{#35bf28}+6.29\\%}$ | | test_getitem[int] | 27.4310μs | 10.1214μs | 98.8006 KOps/s | 93.2686 KOps/s | $\textbf{\color{#35bf28}+5.93\\%}$ | | test_getitem[slice_int] | 37.5410μs | 19.7093μs | 50.7374 KOps/s | 48.3206 KOps/s | $\textbf{\color{#35bf28}+5.00\\%}$ | | test_getitem[range] | 0.2477ms | 37.5554μs | 26.6273 KOps/s | 21.0310 KOps/s | $\textbf{\color{#35bf28}+26.61\\%}$ | | test_getitem[tuple] | 47.1500μs | 17.4006μs | 57.4693 KOps/s | 52.8534 KOps/s | $\textbf{\color{#35bf28}+8.73\\%}$ | | test_getitem[list] | 0.2407ms | 33.3736μs | 29.9638 KOps/s | 28.7255 KOps/s | $\color{#35bf28}+4.31\\%$ | | test_setitem_dim[int] | 58.0500μs | 23.8018μs | 42.0135 KOps/s | 36.1920 KOps/s | $\textbf{\color{#35bf28}+16.09\\%}$ | | test_setitem_dim[slice_int] | 62.1310μs | 44.0312μs | 22.7112 KOps/s | 20.6018 KOps/s | $\textbf{\color{#35bf28}+10.24\\%}$ | | test_setitem_dim[range] | 81.6710μs | 61.7073μs | 16.2055 KOps/s | 15.0460 KOps/s | $\textbf{\color{#35bf28}+7.71\\%}$ | | test_setitem_dim[tuple] | 56.1610μs | 37.4819μs | 26.6796 KOps/s | 23.4475 KOps/s | $\textbf{\color{#35bf28}+13.78\\%}$ | | test_setitem | 96.0320μs | 15.2033μs | 65.7753 KOps/s | 58.7795 KOps/s | $\textbf{\color{#35bf28}+11.90\\%}$ | | test_set | 76.1520μs | 14.5533μs | 68.7128 KOps/s | 61.8004 KOps/s | $\textbf{\color{#35bf28}+11.19\\%}$ | | test_set_shared | 2.7718ms | 95.1313μs | 10.5118 KOps/s | 10.0359 KOps/s | $\color{#35bf28}+4.74\\%$ | | test_update | 0.4829ms | 17.4067μs | 57.4490 KOps/s | 51.9509 KOps/s | $\textbf{\color{#35bf28}+10.58\\%}$ | | test_update_nested | 0.1139ms | 22.8386μs | 43.7854 KOps/s | 40.0063 KOps/s | $\textbf{\color{#35bf28}+9.45\\%}$ | | test_update__nested | 0.1027ms | 21.3867μs | 46.7580 KOps/s | 45.1572 KOps/s | $\color{#35bf28}+3.54\\%$ | | test_set_nested | 0.1024ms | 15.8230μs | 63.1990 KOps/s | 57.9915 KOps/s | $\textbf{\color{#35bf28}+8.98\\%}$ | | test_set_nested_new | 0.1014ms | 18.3597μs | 54.4670 KOps/s | 50.8511 KOps/s | $\textbf{\color{#35bf28}+7.11\\%}$ | | test_select | 0.1240ms | 31.2074μs | 32.0437 KOps/s | 31.7247 KOps/s | $\color{#35bf28}+1.01\\%$ | | test_select_nested | 74.9100μs | 52.5303μs | 19.0366 KOps/s | 19.1007 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_exclude_nested | 0.1025ms | 72.3162μs | 13.8282 KOps/s | 9.1395 KOps/s | $\textbf{\color{#35bf28}+51.30\\%}$ | | test_empty[True] | 0.3395ms | 0.2986ms | 3.3484 KOps/s | 2.8828 KOps/s | $\textbf{\color{#35bf28}+16.15\\%}$ | | test_empty[False] | 3.0891μs | 0.9221μs | 1.0845 MOps/s | 1.2643 MOps/s | $\textbf{\color{#d91a1a}-14.22\\%}$ | | test_to | 89.0610μs | 58.5100μs | 17.0911 KOps/s | 16.9946 KOps/s | $\color{#35bf28}+0.57\\%$ | | test_to_nonblocking | 71.2400μs | 36.0469μs | 27.7416 KOps/s | 28.3907 KOps/s | $\color{#d91a1a}-2.29\\%$ | | test_unbind_speed | 0.2881ms | 0.2528ms | 3.9550 KOps/s | 3.8820 KOps/s | $\color{#35bf28}+1.88\\%$ | | test_unbind_speed_stack0 | 0.3012ms | 0.2485ms | 4.0249 KOps/s | 3.7991 KOps/s | $\textbf{\color{#35bf28}+5.94\\%}$ | | test_unbind_speed_stack1 | 89.9838ms | 0.7117ms | 1.4051 KOps/s | 1.2740 KOps/s | $\textbf{\color{#35bf28}+10.29\\%}$ | | test_split | 91.1388ms | 1.5865ms | 630.2991 Ops/s | 602.7288 Ops/s | $\color{#35bf28}+4.57\\%$ | | test_chunk | 1.5143ms | 1.4509ms | 689.2246 Ops/s | 604.0087 Ops/s | $\textbf{\color{#35bf28}+14.11\\%}$ | | test_creation[device0] | 0.1716ms | 58.5231μs | 17.0873 KOps/s | 17.4153 KOps/s | $\color{#d91a1a}-1.88\\%$ | | test_creation_from_tensor | 0.1417ms | 56.0071μs | 17.8549 KOps/s | 17.6801 KOps/s | $\color{#35bf28}+0.99\\%$ | | test_add_one[memmap_tensor0] | 71.4400μs | 6.5869μs | 151.8166 KOps/s | 139.7278 KOps/s | $\textbf{\color{#35bf28}+8.65\\%}$ | | test_contiguous[memmap_tensor0] | 24.8710μs | 0.6112μs | 1.6362 MOps/s | 1.4467 MOps/s | $\textbf{\color{#35bf28}+13.10\\%}$ | | test_stack[memmap_tensor0] | 30.5910μs | 4.6150μs | 216.6858 KOps/s | 197.1986 KOps/s | $\textbf{\color{#35bf28}+9.88\\%}$ | | test_memmaptd_index | 1.1349ms | 0.2551ms | 3.9207 KOps/s | 3.5952 KOps/s | $\textbf{\color{#35bf28}+9.05\\%}$ | | test_memmaptd_index_astensor | 0.5739ms | 0.3169ms | 3.1552 KOps/s | 2.9496 KOps/s | $\textbf{\color{#35bf28}+6.97\\%}$ | | test_memmaptd_index_op | 0.8730ms | 0.5855ms | 1.7080 KOps/s | 1.5544 KOps/s | $\textbf{\color{#35bf28}+9.88\\%}$ | | test_serialize_model | 0.1910s | 0.1005s | 9.9495 Ops/s | 10.5732 Ops/s | $\textbf{\color{#d91a1a}-5.90\\%}$ | | test_serialize_model_pickle | 1.3740s | 1.2354s | 0.8094 Ops/s | 0.8089 Ops/s | $\color{#35bf28}+0.07\\%$ | | test_serialize_weights | 92.2200ms | 87.9650ms | 11.3682 Ops/s | 10.6786 Ops/s | $\textbf{\color{#35bf28}+6.46\\%}$ | | test_serialize_weights_returnearly | 65.3685ms | 60.8223ms | 16.4413 Ops/s | 13.0839 Ops/s | $\textbf{\color{#35bf28}+25.66\\%}$ | | test_serialize_weights_pickle | 1.3499s | 1.2497s | 0.8002 Ops/s | 0.8091 Ops/s | $\color{#d91a1a}-1.09\\%$ | | test_reshape_pytree | 40.8000μs | 25.2132μs | 39.6617 KOps/s | 37.5228 KOps/s | $\textbf{\color{#35bf28}+5.70\\%}$ | | test_reshape_td | 67.7410μs | 30.4494μs | 32.8414 KOps/s | 31.0624 KOps/s | $\textbf{\color{#35bf28}+5.73\\%}$ | | test_view_pytree | 45.2510μs | 24.8860μs | 40.1832 KOps/s | 37.7534 KOps/s | $\textbf{\color{#35bf28}+6.44\\%}$ | | test_view_td | 68.7310μs | 36.4000μs | 27.4725 KOps/s | 27.2512 KOps/s | $\color{#35bf28}+0.81\\%$ | | test_unbind_pytree | 51.0310μs | 30.5343μs | 32.7500 KOps/s | 31.1860 KOps/s | $\textbf{\color{#35bf28}+5.02\\%}$ | | test_unbind_td | 0.4518ms | 37.5049μs | 26.6632 KOps/s | 25.1039 KOps/s | $\textbf{\color{#35bf28}+6.21\\%}$ | | test_split_pytree | 57.2400μs | 33.1702μs | 30.1475 KOps/s | 27.1761 KOps/s | $\textbf{\color{#35bf28}+10.93\\%}$ | | test_split_td | 0.1002ms | 37.1365μs | 26.9277 KOps/s | 23.1531 KOps/s | $\textbf{\color{#35bf28}+16.30\\%}$ | | test_add_pytree | 65.4610μs | 36.3655μs | 27.4986 KOps/s | 26.1923 KOps/s | $\color{#35bf28}+4.99\\%$ | | test_add_td | 80.8420μs | 47.3998μs | 21.0972 KOps/s | 19.9142 KOps/s | $\textbf{\color{#35bf28}+5.94\\%}$ | | test_distributed | 1.7200ms | 70.4123μs | 14.2021 KOps/s | 14.9164 KOps/s | $\color{#d91a1a}-4.79\\%$ | | test_tdmodule | 29.2900μs | 14.7929μs | 67.5999 KOps/s | 67.1570 KOps/s | $\color{#35bf28}+0.66\\%$ | | test_tdmodule_dispatch | 46.7210μs | 29.6123μs | 33.7697 KOps/s | 33.5435 KOps/s | $\color{#35bf28}+0.67\\%$ | | test_tdseq | 27.9910μs | 16.7085μs | 59.8498 KOps/s | 57.1555 KOps/s | $\color{#35bf28}+4.71\\%$ | | test_tdseq_dispatch | 63.1000μs | 32.7655μs | 30.5199 KOps/s | 31.0856 KOps/s | $\color{#d91a1a}-1.82\\%$ | | test_instantiation_functorch | 1.4542ms | 1.3786ms | 725.3941 Ops/s | 697.9743 Ops/s | $\color{#35bf28}+3.93\\%$ | | test_instantiation_td | 1.4434ms | 0.9686ms | 1.0324 KOps/s | 1.0269 KOps/s | $\color{#35bf28}+0.54\\%$ | | test_exec_functorch | 0.1818ms | 0.1413ms | 7.0794 KOps/s | 6.9275 KOps/s | $\color{#35bf28}+2.19\\%$ | | test_exec_functional_call | 0.1716ms | 0.1289ms | 7.7553 KOps/s | 7.5860 KOps/s | $\color{#35bf28}+2.23\\%$ | | test_exec_td | 0.1598ms | 0.1267ms | 7.8909 KOps/s | 7.5874 KOps/s | $\color{#35bf28}+4.00\\%$ | | test_exec_td_decorator | 0.7355ms | 0.1937ms | 5.1629 KOps/s | 4.9000 KOps/s | $\textbf{\color{#35bf28}+5.36\\%}$ | | test_vmap_mlp_speed[True-True] | 0.6585ms | 0.5847ms | 1.7103 KOps/s | 1.7795 KOps/s | $\color{#d91a1a}-3.89\\%$ | | test_vmap_mlp_speed[True-False] | 0.6400ms | 0.5886ms | 1.6990 KOps/s | 1.7771 KOps/s | $\color{#d91a1a}-4.40\\%$ | | test_vmap_mlp_speed[False-True] | 0.6273ms | 0.5159ms | 1.9383 KOps/s | 1.9651 KOps/s | $\color{#d91a1a}-1.36\\%$ | | test_vmap_mlp_speed[False-False] | 0.5679ms | 0.5190ms | 1.9269 KOps/s | 2.0437 KOps/s | $\textbf{\color{#d91a1a}-5.72\\%}$ | | test_vmap_mlp_speed_decorator[True-True] | 1.1017ms | 0.6581ms | 1.5196 KOps/s | 1.6019 KOps/s | $\textbf{\color{#d91a1a}-5.14\\%}$ | | test_vmap_mlp_speed_decorator[True-False] | 0.7931ms | 0.6433ms | 1.5545 KOps/s | 1.6026 KOps/s | $\color{#d91a1a}-3.00\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7205ms | 0.5656ms | 1.7679 KOps/s | 1.8193 KOps/s | $\color{#d91a1a}-2.83\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7104ms | 0.5717ms | 1.7492 KOps/s | 1.8078 KOps/s | $\color{#d91a1a}-3.25\\%$ | | test_vmap_transformer_speed[True-True] | 7.8668ms | 7.5280ms | 132.8378 Ops/s | 135.9293 Ops/s | $\color{#d91a1a}-2.27\\%$ | | test_vmap_transformer_speed[True-False] | 7.7692ms | 7.5368ms | 132.6826 Ops/s | 135.5212 Ops/s | $\color{#d91a1a}-2.09\\%$ | | test_vmap_transformer_speed[False-True] | 8.7141ms | 7.3579ms | 135.9086 Ops/s | 130.9587 Ops/s | $\color{#35bf28}+3.78\\%$ | | test_vmap_transformer_speed[False-False] | 7.6729ms | 7.3516ms | 136.0241 Ops/s | 134.4154 Ops/s | $\color{#35bf28}+1.20\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 19.0848ms | 18.3105ms | 54.6136 Ops/s | 54.9386 Ops/s | $\color{#d91a1a}-0.59\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 19.0506ms | 18.2691ms | 54.7374 Ops/s | 55.0277 Ops/s | $\color{#d91a1a}-0.53\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 18.8502ms | 18.2077ms | 54.9217 Ops/s | 55.2970 Ops/s | $\color{#d91a1a}-0.68\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 18.8759ms | 18.0343ms | 55.4498 Ops/s | 55.3499 Ops/s | $\color{#35bf28}+0.18\\%$ | | test_to_module_speed[True] | 1.5978ms | 1.4724ms | 679.1777 Ops/s | 657.4898 Ops/s | $\color{#35bf28}+3.30\\%$ | | test_to_module_speed[False] | 1.5873ms | 1.4663ms | 682.0030 Ops/s | 666.1449 Ops/s | $\color{#35bf28}+2.38\\%$ | | test_tc_init | 88.6720μs | 50.0576μs | 19.9770 KOps/s | 18.6142 KOps/s | $\textbf{\color{#35bf28}+7.32\\%}$ | | test_tc_init_nested | 0.1352ms | 0.1028ms | 9.7287 KOps/s | 9.0024 KOps/s | $\textbf{\color{#35bf28}+8.07\\%}$ | | test_tc_first_layer_tensor | 16.6710μs | 3.5291μs | 283.3560 KOps/s | 252.4428 KOps/s | $\textbf{\color{#35bf28}+12.25\\%}$ | | test_tc_first_layer_nontensor | 22.7910μs | 3.5300μs | 283.2830 KOps/s | 251.5042 KOps/s | $\textbf{\color{#35bf28}+12.64\\%}$ | | test_tc_second_layer_tensor | 5.3080μs | 1.1151μs | 896.7592 KOps/s | 772.2341 KOps/s | $\textbf{\color{#35bf28}+16.13\\%}$ | | test_tc_second_layer_nontensor | 24.8010μs | 4.0323μs | 247.9965 KOps/s | 224.5357 KOps/s | $\textbf{\color{#35bf28}+10.45\\%}$ |