pytorch / tensordict

TensorDict is a pytorch dedicated tensor container.
MIT License
832 stars 74 forks source link

[BugFix] Fix vmap compatibility with torch<2.2 #925

Closed vmoens closed 3 months ago

github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of CPU Benchmark Tests

Total Benchmarks: 219. Improved: $\large\color{#35bf28}32$. Worsened: $\large\color{#d91a1a}19$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | ------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 54.1310μs | 20.9604μs | 47.7089 KOps/s | 44.9579 KOps/s | $\textbf{\color{#35bf28}+6.12\\%}$ | | test_plain_set_stack_nested | 53.3400μs | 21.1659μs | 47.2458 KOps/s | 44.5958 KOps/s | $\textbf{\color{#35bf28}+5.94\\%}$ | | test_plain_set_nested_inplace | 0.1793ms | 22.8456μs | 43.7722 KOps/s | 40.9915 KOps/s | $\textbf{\color{#35bf28}+6.78\\%}$ | | test_plain_set_stack_nested_inplace | 69.2690μs | 22.8684μs | 43.7284 KOps/s | 40.6492 KOps/s | $\textbf{\color{#35bf28}+7.58\\%}$ | | test_items | 37.5100μs | 2.6313μs | 380.0365 KOps/s | 383.7121 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_items_nested | 0.7812ms | 0.3400ms | 2.9414 KOps/s | 3.0009 KOps/s | $\color{#d91a1a}-1.98\\%$ | | test_items_nested_locked | 1.3444ms | 0.3357ms | 2.9792 KOps/s | 2.9586 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_items_nested_leaf | 0.2384ms | 92.7642μs | 10.7800 KOps/s | 11.9391 KOps/s | $\textbf{\color{#d91a1a}-9.71\\%}$ | | test_items_stack_nested | 0.4393ms | 0.3324ms | 3.0083 KOps/s | 2.9937 KOps/s | $\color{#35bf28}+0.49\\%$ | | test_items_stack_nested_leaf | 0.3171ms | 89.9624μs | 11.1158 KOps/s | 11.7122 KOps/s | $\textbf{\color{#d91a1a}-5.09\\%}$ | | test_items_stack_nested_locked | 0.4864ms | 0.3337ms | 2.9966 KOps/s | 2.9947 KOps/s | $\color{#35bf28}+0.07\\%$ | | test_keys | 39.7740μs | 4.0696μs | 245.7258 KOps/s | 257.3184 KOps/s | $\color{#d91a1a}-4.51\\%$ | | test_keys_nested | 0.2381ms | 0.1460ms | 6.8495 KOps/s | 7.0473 KOps/s | $\color{#d91a1a}-2.81\\%$ | | test_keys_nested_locked | 0.7029ms | 0.1505ms | 6.6450 KOps/s | 6.6907 KOps/s | $\color{#d91a1a}-0.68\\%$ | | test_keys_nested_leaf | 0.2101ms | 0.1246ms | 8.0275 KOps/s | 8.1535 KOps/s | $\color{#d91a1a}-1.55\\%$ | | test_keys_stack_nested | 0.2446ms | 0.1464ms | 6.8320 KOps/s | 6.9483 KOps/s | $\color{#d91a1a}-1.67\\%$ | | test_keys_stack_nested_leaf | 0.2226ms | 0.1241ms | 8.0586 KOps/s | 8.0853 KOps/s | $\color{#d91a1a}-0.33\\%$ | | test_keys_stack_nested_locked | 0.2345ms | 0.1485ms | 6.7337 KOps/s | 6.6329 KOps/s | $\color{#35bf28}+1.52\\%$ | | test_values | 11.1212μs | 1.2774μs | 782.8596 KOps/s | 830.7219 KOps/s | $\textbf{\color{#d91a1a}-5.76\\%}$ | | test_values_nested | 92.2820μs | 49.6562μs | 20.1385 KOps/s | 19.9201 KOps/s | $\color{#35bf28}+1.10\\%$ | | test_values_nested_locked | 99.8660μs | 50.1533μs | 19.9389 KOps/s | 20.0205 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_values_nested_leaf | 99.1350μs | 44.9906μs | 22.2268 KOps/s | 21.9292 KOps/s | $\color{#35bf28}+1.36\\%$ | | test_values_stack_nested | 96.2300μs | 50.1057μs | 19.9578 KOps/s | 19.5551 KOps/s | $\color{#35bf28}+2.06\\%$ | | test_values_stack_nested_leaf | 93.4240μs | 44.8633μs | 22.2899 KOps/s | 22.0596 KOps/s | $\color{#35bf28}+1.04\\%$ | | test_values_stack_nested_locked | 0.1081ms | 49.9196μs | 20.0322 KOps/s | 19.7525 KOps/s | $\color{#35bf28}+1.42\\%$ | | test_membership | 21.3800μs | 0.8977μs | 1.1139 MOps/s | 1.3558 MOps/s | $\textbf{\color{#d91a1a}-17.84\\%}$ | | test_membership_nested | 22.0010μs | 2.7348μs | 365.6625 KOps/s | 390.6830 KOps/s | $\textbf{\color{#d91a1a}-6.40\\%}$ | | test_membership_nested_leaf | 43.4510μs | 2.7288μs | 366.4586 KOps/s | 388.1993 KOps/s | $\textbf{\color{#d91a1a}-5.60\\%}$ | | test_membership_stacked_nested | 16.1700μs | 2.7188μs | 367.8127 KOps/s | 391.2320 KOps/s | $\textbf{\color{#d91a1a}-5.99\\%}$ | | test_membership_stacked_nested_leaf | 24.7860μs | 2.7134μs | 368.5392 KOps/s | 388.2347 KOps/s | $\textbf{\color{#d91a1a}-5.07\\%}$ | | test_membership_nested_last | 36.2180μs | 4.1171μs | 242.8867 KOps/s | 258.8189 KOps/s | $\textbf{\color{#d91a1a}-6.16\\%}$ | | test_membership_nested_leaf_last | 26.7100μs | 4.0359μs | 247.7768 KOps/s | 258.0189 KOps/s | $\color{#d91a1a}-3.97\\%$ | | test_membership_stacked_nested_last | 26.6400μs | 4.0343μs | 247.8756 KOps/s | 259.9306 KOps/s | $\color{#d91a1a}-4.64\\%$ | | test_membership_stacked_nested_leaf_last | 45.6250μs | 4.0219μs | 248.6380 KOps/s | 257.5786 KOps/s | $\color{#d91a1a}-3.47\\%$ | | test_nested_getleaf | 53.1500μs | 10.2597μs | 97.4687 KOps/s | 95.4551 KOps/s | $\color{#35bf28}+2.11\\%$ | | test_nested_get | 39.9350μs | 9.7325μs | 102.7486 KOps/s | 101.2874 KOps/s | $\color{#35bf28}+1.44\\%$ | | test_stacked_getleaf | 40.7260μs | 10.2541μs | 97.5218 KOps/s | 94.6829 KOps/s | $\color{#35bf28}+3.00\\%$ | | test_stacked_get | 46.0660μs | 9.6376μs | 103.7607 KOps/s | 100.0909 KOps/s | $\color{#35bf28}+3.67\\%$ | | test_nested_getitemleaf | 29.4250μs | 10.7805μs | 92.7604 KOps/s | 91.5407 KOps/s | $\color{#35bf28}+1.33\\%$ | | test_nested_getitem | 47.7890μs | 9.9220μs | 100.7860 KOps/s | 99.0396 KOps/s | $\color{#35bf28}+1.76\\%$ | | test_stacked_getitemleaf | 55.1250μs | 10.6590μs | 93.8172 KOps/s | 94.0065 KOps/s | $\color{#d91a1a}-0.20\\%$ | | test_stacked_getitem | 31.1180μs | 9.8715μs | 101.3022 KOps/s | 98.7732 KOps/s | $\color{#35bf28}+2.56\\%$ | | test_lock_nested | 76.1187ms | 0.5736ms | 1.7435 KOps/s | 1.9810 KOps/s | $\textbf{\color{#d91a1a}-11.99\\%}$ | | test_lock_stack_nested | 0.9892ms | 0.4722ms | 2.1180 KOps/s | 2.1016 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_unlock_nested | 76.4517ms | 0.4917ms | 2.0337 KOps/s | 2.4161 KOps/s | $\textbf{\color{#d91a1a}-15.83\\%}$ | | test_unlock_stack_nested | 0.4638ms | 0.3881ms | 2.5769 KOps/s | 2.5398 KOps/s | $\color{#35bf28}+1.46\\%$ | | test_flatten_speed | 0.2468ms | 0.1103ms | 9.0641 KOps/s | 9.7596 KOps/s | $\textbf{\color{#d91a1a}-7.13\\%}$ | | test_unflatten_speed | 0.5382ms | 0.4271ms | 2.3414 KOps/s | 2.3433 KOps/s | $\color{#d91a1a}-0.08\\%$ | | test_common_ops | 5.7217ms | 1.0589ms | 944.3992 Ops/s | 872.5350 Ops/s | $\textbf{\color{#35bf28}+8.24\\%}$ | | test_creation | 24.5350μs | 2.0680μs | 483.5510 KOps/s | 500.7033 KOps/s | $\color{#d91a1a}-3.43\\%$ | | test_creation_empty | 55.9150μs | 16.9921μs | 58.8510 KOps/s | 52.6653 KOps/s | $\textbf{\color{#35bf28}+11.75\\%}$ | | test_creation_nested_1 | 57.9680μs | 20.2409μs | 49.4050 KOps/s | 45.2741 KOps/s | $\textbf{\color{#35bf28}+9.12\\%}$ | | test_creation_nested_2 | 52.6180μs | 23.6876μs | 42.2162 KOps/s | 38.9789 KOps/s | $\textbf{\color{#35bf28}+8.31\\%}$ | | test_clone | 0.1030ms | 17.2321μs | 58.0312 KOps/s | 61.6223 KOps/s | $\textbf{\color{#d91a1a}-5.83\\%}$ | | test_getitem[int] | 1.3469ms | 16.2138μs | 61.6759 KOps/s | 60.5401 KOps/s | $\color{#35bf28}+1.88\\%$ | | test_getitem[slice_int] | 0.1263ms | 30.9026μs | 32.3598 KOps/s | 32.0878 KOps/s | $\color{#35bf28}+0.85\\%$ | | test_getitem[range] | 0.1714ms | 56.4635μs | 17.7106 KOps/s | 17.2548 KOps/s | $\color{#35bf28}+2.64\\%$ | | test_getitem[tuple] | 0.1414ms | 25.4558μs | 39.2838 KOps/s | 39.9445 KOps/s | $\color{#d91a1a}-1.65\\%$ | | test_getitem[list] | 0.2016ms | 51.7207μs | 19.3346 KOps/s | 18.7928 KOps/s | $\color{#35bf28}+2.88\\%$ | | test_setitem_dim[int] | 68.7390μs | 39.4193μs | 25.3683 KOps/s | 23.1244 KOps/s | $\textbf{\color{#35bf28}+9.70\\%}$ | | test_setitem_dim[slice_int] | 0.1138ms | 69.6941μs | 14.3484 KOps/s | 13.3356 KOps/s | $\textbf{\color{#35bf28}+7.59\\%}$ | | test_setitem_dim[range] | 0.1433ms | 89.5920μs | 11.1617 KOps/s | 10.3055 KOps/s | $\textbf{\color{#35bf28}+8.31\\%}$ | | test_setitem_dim[tuple] | 83.3660μs | 56.5715μs | 17.6767 KOps/s | 16.1156 KOps/s | $\textbf{\color{#35bf28}+9.69\\%}$ | | test_setitem | 82.5840μs | 28.0594μs | 35.6387 KOps/s | 33.2191 KOps/s | $\textbf{\color{#35bf28}+7.28\\%}$ | | test_set | 86.2510μs | 27.4279μs | 36.4592 KOps/s | 33.8819 KOps/s | $\textbf{\color{#35bf28}+7.61\\%}$ | | test_set_shared | 1.2071ms | 0.2101ms | 4.7588 KOps/s | 4.5255 KOps/s | $\textbf{\color{#35bf28}+5.15\\%}$ | | test_update | 0.7964ms | 34.4417μs | 29.0345 KOps/s | 27.0923 KOps/s | $\textbf{\color{#35bf28}+7.17\\%}$ | | test_update_nested | 0.1149ms | 43.6452μs | 22.9120 KOps/s | 21.6017 KOps/s | $\textbf{\color{#35bf28}+6.07\\%}$ | | test_update__nested | 84.3770μs | 33.5380μs | 29.8169 KOps/s | 29.5432 KOps/s | $\color{#35bf28}+0.93\\%$ | | test_set_nested | 0.1013ms | 29.9014μs | 33.4432 KOps/s | 31.3721 KOps/s | $\textbf{\color{#35bf28}+6.60\\%}$ | | test_set_nested_new | 78.8870μs | 34.6001μs | 28.9017 KOps/s | 27.1432 KOps/s | $\textbf{\color{#35bf28}+6.48\\%}$ | | test_select | 0.1085ms | 51.4463μs | 19.4377 KOps/s | 18.5076 KOps/s | $\textbf{\color{#35bf28}+5.03\\%}$ | | test_select_nested | 0.1123ms | 59.4132μs | 16.8313 KOps/s | 16.8278 KOps/s | $\color{#35bf28}+0.02\\%$ | | test_exclude_nested | 0.1275ms | 77.8422μs | 12.8465 KOps/s | 12.8784 KOps/s | $\color{#d91a1a}-0.25\\%$ | | test_empty[True] | 0.4398ms | 0.3182ms | 3.1430 KOps/s | 3.1215 KOps/s | $\color{#35bf28}+0.69\\%$ | | test_empty[False] | 10.8628μs | 1.1619μs | 860.6822 KOps/s | 868.1025 KOps/s | $\color{#d91a1a}-0.85\\%$ | | test_unbind_speed | 0.5207ms | 0.3035ms | 3.2951 KOps/s | 3.1736 KOps/s | $\color{#35bf28}+3.83\\%$ | | test_unbind_speed_stack0 | 0.6789ms | 0.3059ms | 3.2691 KOps/s | 3.2044 KOps/s | $\color{#35bf28}+2.02\\%$ | | test_unbind_speed_stack1 | 79.1103ms | 0.7805ms | 1.2813 KOps/s | 1.3493 KOps/s | $\textbf{\color{#d91a1a}-5.04\\%}$ | | test_split | 80.3016ms | 2.1152ms | 472.7724 Ops/s | 427.2623 Ops/s | $\textbf{\color{#35bf28}+10.65\\%}$ | | test_chunk | 81.9298ms | 2.1264ms | 470.2710 Ops/s | 500.2764 Ops/s | $\textbf{\color{#d91a1a}-6.00\\%}$ | | test_creation[device0] | 0.2236ms | 0.1175ms | 8.5135 KOps/s | 8.3778 KOps/s | $\color{#35bf28}+1.62\\%$ | | test_creation_from_tensor | 4.3804ms | 0.1199ms | 8.3397 KOps/s | 8.1736 KOps/s | $\color{#35bf28}+2.03\\%$ | | test_add_one[memmap_tensor0] | 0.1820ms | 7.6513μs | 130.6968 KOps/s | 127.5428 KOps/s | $\color{#35bf28}+2.47\\%$ | | test_contiguous[memmap_tensor0] | 26.0880μs | 1.9830μs | 504.2978 KOps/s | 491.0153 KOps/s | $\color{#35bf28}+2.71\\%$ | | test_stack[memmap_tensor0] | 56.7960μs | 5.6291μs | 177.6484 KOps/s | 167.4513 KOps/s | $\textbf{\color{#35bf28}+6.09\\%}$ | | test_memmaptd_index | 1.1436ms | 0.4074ms | 2.4545 KOps/s | 2.4503 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_memmaptd_index_astensor | 0.9986ms | 0.4897ms | 2.0419 KOps/s | 2.0418 KOps/s | $+0.01\\%$ | | test_memmaptd_index_op | 1.4928ms | 1.0303ms | 970.5634 Ops/s | 947.3562 Ops/s | $\color{#35bf28}+2.45\\%$ | | test_serialize_model | 0.1928s | 0.1275s | 7.8403 Ops/s | 7.7699 Ops/s | $\color{#35bf28}+0.90\\%$ | | test_serialize_model_pickle | 0.4470s | 0.3923s | 2.5488 Ops/s | 2.4838 Ops/s | $\color{#35bf28}+2.62\\%$ | | test_serialize_weights | 0.1222s | 0.1170s | 8.5473 Ops/s | 8.5660 Ops/s | $\color{#d91a1a}-0.22\\%$ | | test_serialize_weights_returnearly | 0.1715s | 0.1573s | 6.3587 Ops/s | 6.1748 Ops/s | $\color{#35bf28}+2.98\\%$ | | test_serialize_weights_pickle | 1.1575s | 0.7081s | 1.4121 Ops/s | 2.4558 Ops/s | $\textbf{\color{#d91a1a}-42.50\\%}$ | | test_serialize_weights_filesystem | 0.2291s | 0.1548s | 6.4586 Ops/s | 6.6344 Ops/s | $\color{#d91a1a}-2.65\\%$ | | test_serialize_model_filesystem | 0.1633s | 0.1443s | 6.9281 Ops/s | 6.6643 Ops/s | $\color{#35bf28}+3.96\\%$ | | test_reshape_pytree | 90.2790μs | 39.9690μs | 25.0194 KOps/s | 25.0975 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_reshape_td | 94.2060μs | 47.1547μs | 21.2068 KOps/s | 20.7770 KOps/s | $\color{#35bf28}+2.07\\%$ | | test_view_pytree | 93.3340μs | 39.5076μs | 25.3116 KOps/s | 25.0182 KOps/s | $\color{#35bf28}+1.17\\%$ | | test_view_td | 0.1005ms | 53.4381μs | 18.7132 KOps/s | 18.3025 KOps/s | $\color{#35bf28}+2.24\\%$ | | test_unbind_pytree | 86.6210μs | 37.8656μs | 26.4092 KOps/s | 27.2500 KOps/s | $\color{#d91a1a}-3.09\\%$ | | test_unbind_td | 0.3582ms | 45.8979μs | 21.7875 KOps/s | 21.7150 KOps/s | $\color{#35bf28}+0.33\\%$ | | test_split_pytree | 92.2720μs | 39.7164μs | 25.1785 KOps/s | 24.2152 KOps/s | $\color{#35bf28}+3.98\\%$ | | test_split_td | 0.4693ms | 58.1606μs | 17.1938 KOps/s | 17.4990 KOps/s | $\color{#d91a1a}-1.74\\%$ | | test_add_pytree | 92.0620μs | 47.4335μs | 21.0822 KOps/s | 21.7492 KOps/s | $\color{#d91a1a}-3.07\\%$ | | test_add_td | 0.1468ms | 80.1387μs | 12.4784 KOps/s | 11.8241 KOps/s | $\textbf{\color{#35bf28}+5.53\\%}$ | | test_compile_add_one_nested[tensordict-compile] | 0.1394ms | 54.0781μs | 18.4918 KOps/s | 18.1941 KOps/s | $\color{#35bf28}+1.64\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.2975ms | 0.1949ms | 5.1298 KOps/s | 5.1923 KOps/s | $\color{#d91a1a}-1.20\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.1042ms | 54.7470μs | 18.2658 KOps/s | 18.5236 KOps/s | $\color{#d91a1a}-1.39\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.2719ms | 0.1481ms | 6.7509 KOps/s | 6.8798 KOps/s | $\color{#d91a1a}-1.87\\%$ | | test_compile_copy_nested[tensordict-compile] | 52.7280μs | 20.4212μs | 48.9686 KOps/s | 47.3116 KOps/s | $\color{#35bf28}+3.50\\%$ | | test_compile_copy_nested[tensordict-eager] | 0.1271ms | 65.1757μs | 15.3431 KOps/s | 15.4558 KOps/s | $\color{#d91a1a}-0.73\\%$ | | test_compile_copy_nested[pytree-compile] | 0.1500ms | 81.1980μs | 12.3156 KOps/s | 12.6006 KOps/s | $\color{#d91a1a}-2.26\\%$ | | test_compile_copy_nested[pytree-eager] | 0.1412ms | 73.0705μs | 13.6854 KOps/s | 14.0329 KOps/s | $\color{#d91a1a}-2.48\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.2720ms | 0.1767ms | 5.6580 KOps/s | 5.6635 KOps/s | $\color{#d91a1a}-0.10\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.4420ms | 0.2031ms | 4.9244 KOps/s | 5.2053 KOps/s | $\textbf{\color{#d91a1a}-5.40\\%}$ | | test_compile_add_one_flat[tensorclass-compile] | 94.9370μs | 37.9200μs | 26.3713 KOps/s | 24.3685 KOps/s | $\textbf{\color{#35bf28}+8.22\\%}$ | | test_compile_add_one_flat[tensorclass-eager] | 0.8014ms | 70.6034μs | 14.1636 KOps/s | 14.0489 KOps/s | $\color{#35bf28}+0.82\\%$ | | test_compile_add_one_flat[pytree-compile] | 0.3061ms | 0.1776ms | 5.6311 KOps/s | 5.6676 KOps/s | $\color{#d91a1a}-0.65\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.4834ms | 0.2958ms | 3.3810 KOps/s | 3.3827 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_compile_add_self_flat[tensordict-eager] | 0.4494ms | 0.2293ms | 4.3606 KOps/s | 4.8054 KOps/s | $\textbf{\color{#d91a1a}-9.26\\%}$ | | test_compile_add_self_flat[tensordict-compile] | 0.3655ms | 0.1773ms | 5.6398 KOps/s | 5.6592 KOps/s | $\color{#d91a1a}-0.34\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.2820ms | 62.7408μs | 15.9386 KOps/s | 15.7592 KOps/s | $\color{#35bf28}+1.14\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.1002ms | 38.3427μs | 26.0806 KOps/s | 25.0449 KOps/s | $\color{#35bf28}+4.14\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.4761ms | 0.2451ms | 4.0799 KOps/s | 4.1904 KOps/s | $\color{#d91a1a}-2.64\\%$ | | test_compile_add_self_flat[pytree-compile] | 0.3531ms | 0.1750ms | 5.7143 KOps/s | 5.7644 KOps/s | $\color{#d91a1a}-0.87\\%$ | | test_compile_copy_flat[tensordict-compile] | 0.2391ms | 0.1112ms | 8.9923 KOps/s | 9.0733 KOps/s | $\color{#d91a1a}-0.89\\%$ | | test_compile_copy_flat[tensordict-eager] | 0.1123ms | 56.0873μs | 17.8294 KOps/s | 18.1129 KOps/s | $\color{#d91a1a}-1.57\\%$ | | test_compile_copy_flat[pytree-compile] | 6.1305ms | 83.0288μs | 12.0440 KOps/s | 12.6191 KOps/s | $\color{#d91a1a}-4.56\\%$ | | test_compile_copy_flat[pytree-eager] | 0.1558ms | 72.1543μs | 13.8592 KOps/s | 14.3437 KOps/s | $\color{#d91a1a}-3.38\\%$ | | test_compile_assign_and_add[tensordict-compile] | 0.3036ms | 0.1915ms | 5.2221 KOps/s | 5.2384 KOps/s | $\color{#d91a1a}-0.31\\%$ | | test_compile_assign_and_add[tensordict-eager] | 1.9093ms | 1.6275ms | 614.4566 Ops/s | 607.0470 Ops/s | $\color{#35bf28}+1.22\\%$ | | test_compile_assign_and_add[pytree-compile] | 0.2760ms | 0.1887ms | 5.3004 KOps/s | 5.4157 KOps/s | $\color{#d91a1a}-2.13\\%$ | | test_compile_assign_and_add[pytree-eager] | 1.8024ms | 1.0923ms | 915.4587 Ops/s | 898.8698 Ops/s | $\color{#35bf28}+1.85\\%$ | | test_compile_assign_and_add_stack[compile] | 0.5799ms | 0.4130ms | 2.4215 KOps/s | 2.4570 KOps/s | $\color{#d91a1a}-1.44\\%$ | | test_compile_assign_and_add_stack[eager] | 3.8665ms | 3.7485ms | 266.7763 Ops/s | 252.8695 Ops/s | $\textbf{\color{#35bf28}+5.50\\%}$ | | test_compile_indexing[tensor-tensordict-compile] | 96.3190μs | 31.7923μs | 31.4542 KOps/s | 30.0779 KOps/s | $\color{#35bf28}+4.58\\%$ | | test_compile_indexing[tensor-tensordict-eager] | 0.7285ms | 47.3809μs | 21.1055 KOps/s | 20.3901 KOps/s | $\color{#35bf28}+3.51\\%$ | | test_compile_indexing[tensor-tensorclass-compile] | 97.3200μs | 28.4079μs | 35.2015 KOps/s | 34.8496 KOps/s | $\color{#35bf28}+1.01\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 73.9680μs | 30.3310μs | 32.9696 KOps/s | 32.9520 KOps/s | $\color{#35bf28}+0.05\\%$ | | test_compile_indexing[tensor-pytree-compile] | 0.1042ms | 28.8299μs | 34.6862 KOps/s | 35.6071 KOps/s | $\color{#d91a1a}-2.59\\%$ | | test_compile_indexing[tensor-pytree-eager] | 0.1114ms | 30.4809μs | 32.8074 KOps/s | 32.9414 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_compile_indexing[slice-tensordict-compile] | 0.1606ms | 72.5496μs | 13.7837 KOps/s | 13.7458 KOps/s | $\color{#35bf28}+0.28\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.5209ms | 27.4954μs | 36.3698 KOps/s | 36.5208 KOps/s | $\color{#d91a1a}-0.41\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 6.2036ms | 67.9292μs | 14.7212 KOps/s | 14.6153 KOps/s | $\color{#35bf28}+0.72\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 79.0680μs | 24.9279μs | 40.1157 KOps/s | 41.0474 KOps/s | $\color{#d91a1a}-2.27\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.1465ms | 67.6101μs | 14.7907 KOps/s | 14.9835 KOps/s | $\color{#d91a1a}-1.29\\%$ | | test_compile_indexing[slice-pytree-eager] | 86.6620μs | 24.3128μs | 41.1306 KOps/s | 40.9975 KOps/s | $\color{#35bf28}+0.32\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.1300ms | 71.5155μs | 13.9830 KOps/s | 13.7406 KOps/s | $\color{#35bf28}+1.76\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.9068ms | 27.6889μs | 36.1155 KOps/s | 36.3024 KOps/s | $\color{#d91a1a}-0.51\\%$ | | test_compile_indexing[int-tensorclass-compile] | 0.1383ms | 66.9693μs | 14.9322 KOps/s | 14.8989 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_compile_indexing[int-tensorclass-eager] | 89.3470μs | 24.2716μs | 41.2005 KOps/s | 41.5485 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_compile_indexing[int-pytree-compile] | 0.1384ms | 67.0075μs | 14.9237 KOps/s | 14.8790 KOps/s | $\color{#35bf28}+0.30\\%$ | | test_compile_indexing[int-pytree-eager] | 64.6000μs | 24.3185μs | 41.1210 KOps/s | 41.8352 KOps/s | $\color{#d91a1a}-1.71\\%$ | | test_mod_add[eager] | 66.3630μs | 23.9178μs | 41.8098 KOps/s | 38.2715 KOps/s | $\textbf{\color{#35bf28}+9.25\\%}$ | | test_mod_add[compile] | 91.8720μs | 35.5338μs | 28.1422 KOps/s | 26.9907 KOps/s | $\color{#35bf28}+4.27\\%$ | | test_mod_add[compile-overhead] | 76.4920μs | 35.8299μs | 27.9097 KOps/s | 26.6368 KOps/s | $\color{#35bf28}+4.78\\%$ | | test_mod_wrap[eager] | 0.4005ms | 0.2079ms | 4.8101 KOps/s | 4.7254 KOps/s | $\color{#35bf28}+1.79\\%$ | | test_mod_wrap[compile] | 1.1937ms | 0.2239ms | 4.4660 KOps/s | 4.3224 KOps/s | $\color{#35bf28}+3.32\\%$ | | test_mod_wrap[compile-overhead] | 0.4246ms | 0.2231ms | 4.4821 KOps/s | 4.4474 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_mod_wrap_and_backward[eager] | 12.3672ms | 10.9093ms | 91.6647 Ops/s | 92.9440 Ops/s | $\color{#d91a1a}-1.38\\%$ | | test_mod_wrap_and_backward[compile] | 12.2824ms | 10.9172ms | 91.5985 Ops/s | 92.2178 Ops/s | $\color{#d91a1a}-0.67\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 12.4196ms | 11.0244ms | 90.7079 Ops/s | 91.6079 Ops/s | $\color{#d91a1a}-0.98\\%$ | | test_seq_add[eager] | 0.1959ms | 85.6925μs | 11.6696 KOps/s | 11.2877 KOps/s | $\color{#35bf28}+3.38\\%$ | | test_seq_add[compile] | 0.1688ms | 59.3448μs | 16.8507 KOps/s | 15.7548 KOps/s | $\textbf{\color{#35bf28}+6.96\\%}$ | | test_seq_add[compile-overhead] | 0.1465ms | 58.6546μs | 17.0490 KOps/s | 16.6157 KOps/s | $\color{#35bf28}+2.61\\%$ | | test_seq_wrap[eager] | 0.5927ms | 0.3716ms | 2.6914 KOps/s | 2.6652 KOps/s | $\color{#35bf28}+0.98\\%$ | | test_seq_wrap[compile] | 0.4684ms | 0.2576ms | 3.8823 KOps/s | 3.8002 KOps/s | $\color{#35bf28}+2.16\\%$ | | test_seq_wrap[compile-overhead] | 0.3995ms | 0.2566ms | 3.8968 KOps/s | 3.8292 KOps/s | $\color{#35bf28}+1.77\\%$ | | test_func_call_runtime[False-eager] | 0.6375ms | 0.5322ms | 1.8790 KOps/s | 1.8803 KOps/s | $\color{#d91a1a}-0.07\\%$ | | test_func_call_runtime[False-compile] | 0.8901ms | 0.4916ms | 2.0343 KOps/s | 2.0223 KOps/s | $\color{#35bf28}+0.59\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.8986ms | 0.4907ms | 2.0378 KOps/s | 2.0335 KOps/s | $\color{#35bf28}+0.21\\%$ | | test_func_call_runtime[True-eager] | 1.1819ms | 0.7703ms | 1.2982 KOps/s | 1.3203 KOps/s | $\color{#d91a1a}-1.67\\%$ | | test_func_call_runtime[True-compile] | 0.6349ms | 0.5052ms | 1.9793 KOps/s | 1.9644 KOps/s | $\color{#35bf28}+0.76\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.6408ms | 0.5069ms | 1.9728 KOps/s | 1.9598 KOps/s | $\color{#35bf28}+0.67\\%$ | | test_func_call_cm_runtime[False-eager] | 0.7723ms | 0.5352ms | 1.8685 KOps/s | 1.8916 KOps/s | $\color{#d91a1a}-1.22\\%$ | | test_func_call_cm_runtime[False-compile] | 0.8997ms | 0.4908ms | 2.0376 KOps/s | 2.0364 KOps/s | $\color{#35bf28}+0.06\\%$ | | test_func_call_cm_runtime[False-compile-overhead] | 0.6780ms | 0.4880ms | 2.0491 KOps/s | 2.0387 KOps/s | $\color{#35bf28}+0.51\\%$ | | test_func_call_cm_runtime[True-eager] | 1.7823ms | 0.9157ms | 1.0921 KOps/s | 1.1222 KOps/s | $\color{#d91a1a}-2.68\\%$ | | test_func_call_cm_runtime[True-compile] | 1.0490ms | 0.8447ms | 1.1839 KOps/s | 1.1938 KOps/s | $\color{#d91a1a}-0.83\\%$ | | test_func_call_cm_runtime[True-compile-overhead] | 1.0253ms | 0.8463ms | 1.1816 KOps/s | 1.1839 KOps/s | $\color{#d91a1a}-0.19\\%$ | | test_distributed | 0.2988ms | 0.1325ms | 7.5483 KOps/s | 7.5413 KOps/s | $\color{#35bf28}+0.09\\%$ | | test_tdmodule | 43.8020μs | 16.7466μs | 59.7137 KOps/s | 55.6052 KOps/s | $\textbf{\color{#35bf28}+7.39\\%}$ | | test_tdmodule_dispatch | 62.3560μs | 34.4530μs | 29.0250 KOps/s | 25.8527 KOps/s | $\textbf{\color{#35bf28}+12.27\\%}$ | | test_tdseq | 46.1160μs | 18.5528μs | 53.9002 KOps/s | 50.5517 KOps/s | $\textbf{\color{#35bf28}+6.62\\%}$ | | test_tdseq_dispatch | 72.8760μs | 38.6432μs | 25.8778 KOps/s | 24.2796 KOps/s | $\textbf{\color{#35bf28}+6.58\\%}$ | | test_instantiation_functorch | 2.6121ms | 1.6788ms | 595.6781 Ops/s | 607.1023 Ops/s | $\color{#d91a1a}-1.88\\%$ | | test_instantiation_td | 2.0884ms | 1.2074ms | 828.2482 Ops/s | 840.2459 Ops/s | $\color{#d91a1a}-1.43\\%$ | | test_exec_functorch | 0.3235ms | 0.1810ms | 5.5252 KOps/s | 5.5816 KOps/s | $\color{#d91a1a}-1.01\\%$ | | test_exec_functional_call | 0.3027ms | 0.1728ms | 5.7882 KOps/s | 5.8253 KOps/s | $\color{#d91a1a}-0.64\\%$ | | test_exec_td | 0.3700ms | 0.1751ms | 5.7100 KOps/s | 5.8496 KOps/s | $\color{#d91a1a}-2.39\\%$ | | test_exec_td_decorator | 0.7403ms | 0.2278ms | 4.3889 KOps/s | 4.4575 KOps/s | $\color{#d91a1a}-1.54\\%$ | | test_vmap_mlp_speed[True-True] | 0.6890ms | 0.5819ms | 1.7185 KOps/s | 1.7053 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_vmap_mlp_speed[True-False] | 0.7884ms | 0.5781ms | 1.7298 KOps/s | 1.7350 KOps/s | $\color{#d91a1a}-0.30\\%$ | | test_vmap_mlp_speed[False-True] | 0.7590ms | 0.4818ms | 2.0755 KOps/s | 2.1129 KOps/s | $\color{#d91a1a}-1.77\\%$ | | test_vmap_mlp_speed[False-False] | 0.6759ms | 0.4826ms | 2.0722 KOps/s | 2.1024 KOps/s | $\color{#d91a1a}-1.43\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 0.8477ms | 0.6405ms | 1.5614 KOps/s | 1.5445 KOps/s | $\color{#35bf28}+1.09\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.9613ms | 0.6456ms | 1.5489 KOps/s | 1.5611 KOps/s | $\color{#d91a1a}-0.78\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7304ms | 0.5322ms | 1.8791 KOps/s | 1.9143 KOps/s | $\color{#d91a1a}-1.84\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 9.5191ms | 0.5357ms | 1.8666 KOps/s | 1.9220 KOps/s | $\color{#d91a1a}-2.89\\%$ | | test_to_module_speed[True] | 1.5909ms | 1.3120ms | 762.2116 Ops/s | 742.4166 Ops/s | $\color{#35bf28}+2.67\\%$ | | test_to_module_speed[False] | 1.3914ms | 1.2805ms | 780.9359 Ops/s | 761.6256 Ops/s | $\color{#35bf28}+2.54\\%$ | | test_tc_init | 77.8560μs | 43.3174μs | 23.0854 KOps/s | 22.0695 KOps/s | $\color{#35bf28}+4.60\\%$ | | test_tc_init_nested | 0.1862ms | 88.3161μs | 11.3230 KOps/s | 10.9457 KOps/s | $\color{#35bf28}+3.45\\%$ | | test_tc_first_layer_tensor | 17.7630μs | 1.4151μs | 706.6551 KOps/s | 693.6412 KOps/s | $\color{#35bf28}+1.88\\%$ | | test_tc_first_layer_nontensor | 25.1570μs | 4.2364μs | 236.0477 KOps/s | 237.8846 KOps/s | $\color{#d91a1a}-0.77\\%$ | | test_tc_second_layer_tensor | 23.8140μs | 2.6703μs | 374.4882 KOps/s | 377.2250 KOps/s | $\color{#d91a1a}-0.73\\%$ | | test_tc_second_layer_nontensor | 32.9210μs | 5.4670μs | 182.9142 KOps/s | 182.5732 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_unbind | 0.4402s | 14.2582ms | 70.1350 Ops/s | 76.6348 Ops/s | $\textbf{\color{#d91a1a}-8.48\\%}$ | | test_full_like | 8.8644ms | 6.7047ms | 149.1485 Ops/s | 145.4468 Ops/s | $\color{#35bf28}+2.55\\%$ | | test_zeros_like | 12.7254ms | 7.1925ms | 139.0332 Ops/s | 145.0430 Ops/s | $\color{#d91a1a}-4.14\\%$ | | test_ones_like | 16.0652ms | 7.3069ms | 136.8575 Ops/s | 135.9409 Ops/s | $\color{#35bf28}+0.67\\%$ | | test_clone | 12.7755ms | 8.7144ms | 114.7522 Ops/s | 109.1009 Ops/s | $\textbf{\color{#35bf28}+5.18\\%}$ | | test_squeeze | 62.9770μs | 13.2629μs | 75.3985 KOps/s | 76.0013 KOps/s | $\color{#d91a1a}-0.79\\%$ | | test_unsqueeze | 0.3024ms | 93.0746μs | 10.7441 KOps/s | 10.4723 KOps/s | $\color{#35bf28}+2.60\\%$ | | test_split | 0.3292ms | 0.2014ms | 4.9665 KOps/s | 5.0490 KOps/s | $\color{#d91a1a}-1.63\\%$ | | test_permute | 0.3503ms | 0.2168ms | 4.6134 KOps/s | 4.5921 KOps/s | $\color{#35bf28}+0.46\\%$ | | test_stack | 28.8144ms | 23.5643ms | 42.4370 Ops/s | 41.7845 Ops/s | $\color{#35bf28}+1.56\\%$ | | test_cat | 29.3304ms | 23.4731ms | 42.6019 Ops/s | 41.9776 Ops/s | $\color{#35bf28}+1.49\\%$ |
github-actions[bot] commented 3 months ago

$\color{#D29922}\textsf{\Large\⚠\kern{0.2cm}\normalsize Warning}$ Result of GPU Benchmark Tests

Total Benchmarks: 225. Improved: $\large\color{#35bf28}30$. Worsened: $\large\color{#d91a1a}7$.

Expand to view detailed results | Name | Max | Mean | Ops | Ops on Repo `HEAD` | Change | | -------------------------------------------------- | --------- | --------- | --------------- | ------------------ | ----------------------------------- | | test_plain_set_nested | 0.1502ms | 16.0900μs | 62.1502 KOps/s | 56.9121 KOps/s | $\textbf{\color{#35bf28}+9.20\\%}$ | | test_plain_set_stack_nested | 45.3310μs | 16.2433μs | 61.5637 KOps/s | 56.9546 KOps/s | $\textbf{\color{#35bf28}+8.09\\%}$ | | test_plain_set_nested_inplace | 43.6110μs | 17.1845μs | 58.1920 KOps/s | 53.7550 KOps/s | $\textbf{\color{#35bf28}+8.25\\%}$ | | test_plain_set_stack_nested_inplace | 45.1410μs | 17.3489μs | 57.6407 KOps/s | 53.8545 KOps/s | $\textbf{\color{#35bf28}+7.03\\%}$ | | test_items | 19.6600μs | 4.7848μs | 208.9948 KOps/s | 210.7639 KOps/s | $\color{#d91a1a}-0.84\\%$ | | test_items_nested | 0.4035ms | 0.3698ms | 2.7043 KOps/s | 2.7188 KOps/s | $\color{#d91a1a}-0.53\\%$ | | test_items_nested_locked | 0.3917ms | 0.3704ms | 2.6999 KOps/s | 2.6987 KOps/s | $\color{#35bf28}+0.04\\%$ | | test_items_nested_leaf | 0.1084ms | 83.8797μs | 11.9218 KOps/s | 11.9369 KOps/s | $\color{#d91a1a}-0.13\\%$ | | test_items_stack_nested | 0.3905ms | 0.3654ms | 2.7367 KOps/s | 2.6986 KOps/s | $\color{#35bf28}+1.41\\%$ | | test_items_stack_nested_leaf | 0.1056ms | 84.5560μs | 11.8265 KOps/s | 11.7530 KOps/s | $\color{#35bf28}+0.62\\%$ | | test_items_stack_nested_locked | 0.4063ms | 0.3733ms | 2.6790 KOps/s | 2.6850 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_keys | 25.2100μs | 4.3829μs | 228.1592 KOps/s | 226.9734 KOps/s | $\color{#35bf28}+0.52\\%$ | | test_keys_nested | 89.8610μs | 65.2803μs | 15.3186 KOps/s | 15.2360 KOps/s | $\color{#35bf28}+0.54\\%$ | | test_keys_nested_locked | 0.7537ms | 72.2857μs | 13.8340 KOps/s | 13.6001 KOps/s | $\color{#35bf28}+1.72\\%$ | | test_keys_nested_leaf | 83.5020μs | 57.4157μs | 17.4168 KOps/s | 17.5222 KOps/s | $\color{#d91a1a}-0.60\\%$ | | test_keys_stack_nested | 92.1020μs | 65.4102μs | 15.2881 KOps/s | 14.9907 KOps/s | $\color{#35bf28}+1.98\\%$ | | test_keys_stack_nested_leaf | 79.1920μs | 57.8242μs | 17.2938 KOps/s | 17.2638 KOps/s | $\color{#35bf28}+0.17\\%$ | | test_keys_stack_nested_locked | 91.6820μs | 71.8378μs | 13.9202 KOps/s | 13.5691 KOps/s | $\color{#35bf28}+2.59\\%$ | | test_values | 15.2505μs | 1.7965μs | 556.6521 KOps/s | 568.0413 KOps/s | $\color{#d91a1a}-2.00\\%$ | | test_values_nested | 60.3720μs | 33.9506μs | 29.4546 KOps/s | 29.2249 KOps/s | $\color{#35bf28}+0.79\\%$ | | test_values_nested_locked | 56.6310μs | 36.0266μs | 27.7573 KOps/s | 27.6266 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_values_nested_leaf | 51.5310μs | 30.2881μs | 33.0162 KOps/s | 32.7607 KOps/s | $\color{#35bf28}+0.78\\%$ | | test_values_stack_nested | 53.5900μs | 34.9378μs | 28.6223 KOps/s | 28.3592 KOps/s | $\color{#35bf28}+0.93\\%$ | | test_values_stack_nested_leaf | 58.8920μs | 30.9257μs | 32.3356 KOps/s | 31.9700 KOps/s | $\color{#35bf28}+1.14\\%$ | | test_values_stack_nested_locked | 58.7710μs | 36.8472μs | 27.1391 KOps/s | 27.0669 KOps/s | $\color{#35bf28}+0.27\\%$ | | test_membership | 1.5945μs | 0.5504μs | 1.8168 MOps/s | 1.8431 MOps/s | $\color{#d91a1a}-1.42\\%$ | | test_membership_nested | 23.7710μs | 2.0155μs | 496.1650 KOps/s | 514.8940 KOps/s | $\color{#d91a1a}-3.64\\%$ | | test_membership_nested_leaf | 15.5950μs | 1.9534μs | 511.9346 KOps/s | 504.8265 KOps/s | $\color{#35bf28}+1.41\\%$ | | test_membership_stacked_nested | 18.6390μs | 2.0345μs | 491.5245 KOps/s | 502.1618 KOps/s | $\color{#d91a1a}-2.12\\%$ | | test_membership_stacked_nested_leaf | 30.9400μs | 2.0074μs | 498.1535 KOps/s | 497.1899 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_membership_nested_last | 16.2200μs | 2.9595μs | 337.8984 KOps/s | 341.5688 KOps/s | $\color{#d91a1a}-1.07\\%$ | | test_membership_nested_leaf_last | 40.7700μs | 2.9496μs | 339.0238 KOps/s | 338.6451 KOps/s | $\color{#35bf28}+0.11\\%$ | | test_membership_stacked_nested_last | 32.4510μs | 3.3525μs | 298.2816 KOps/s | 202.9676 KOps/s | $\textbf{\color{#35bf28}+46.96\\%}$ | | test_membership_stacked_nested_leaf_last | 16.9400μs | 3.3567μs | 297.9147 KOps/s | 205.2717 KOps/s | $\textbf{\color{#35bf28}+45.13\\%}$ | | test_nested_getleaf | 35.4510μs | 8.0050μs | 124.9225 KOps/s | 125.8719 KOps/s | $\color{#d91a1a}-0.75\\%$ | | test_nested_get | 34.9010μs | 7.4913μs | 133.4884 KOps/s | 133.4519 KOps/s | $\color{#35bf28}+0.03\\%$ | | test_stacked_getleaf | 31.9200μs | 7.9841μs | 125.2489 KOps/s | 125.2124 KOps/s | $\color{#35bf28}+0.03\\%$ | | test_stacked_get | 34.5310μs | 7.4662μs | 133.9368 KOps/s | 133.3783 KOps/s | $\color{#35bf28}+0.42\\%$ | | test_nested_getitemleaf | 32.3110μs | 8.1650μs | 122.4743 KOps/s | 123.1481 KOps/s | $\color{#d91a1a}-0.55\\%$ | | test_nested_getitem | 22.1210μs | 7.6392μs | 130.9042 KOps/s | 130.8645 KOps/s | $\color{#35bf28}+0.03\\%$ | | test_stacked_getitemleaf | 34.8610μs | 8.1509μs | 122.6856 KOps/s | 123.4292 KOps/s | $\color{#d91a1a}-0.60\\%$ | | test_stacked_getitem | 28.6810μs | 7.6307μs | 131.0488 KOps/s | 130.6554 KOps/s | $\color{#35bf28}+0.30\\%$ | | test_lock_nested | 0.9259ms | 0.4699ms | 2.1283 KOps/s | 2.1193 KOps/s | $\color{#35bf28}+0.42\\%$ | | test_lock_stack_nested | 0.4871ms | 0.4282ms | 2.3351 KOps/s | 2.3093 KOps/s | $\color{#35bf28}+1.12\\%$ | | test_unlock_nested | 0.7949ms | 0.3880ms | 2.5775 KOps/s | 2.5410 KOps/s | $\color{#35bf28}+1.44\\%$ | | test_unlock_stack_nested | 0.3925ms | 0.3470ms | 2.8814 KOps/s | 2.8315 KOps/s | $\color{#35bf28}+1.76\\%$ | | test_flatten_speed | 89.0000ms | 0.1181ms | 8.4693 KOps/s | 9.5228 KOps/s | $\textbf{\color{#d91a1a}-11.06\\%}$ | | test_unflatten_speed | 0.3069ms | 0.2850ms | 3.5089 KOps/s | 3.4626 KOps/s | $\color{#35bf28}+1.34\\%$ | | test_common_ops | 1.4975ms | 1.2935ms | 773.0751 Ops/s | 715.6903 Ops/s | $\textbf{\color{#35bf28}+8.02\\%}$ | | test_creation | 26.0810μs | 1.6607μs | 602.1665 KOps/s | 600.6007 KOps/s | $\color{#35bf28}+0.26\\%$ | | test_creation_empty | 35.6400μs | 15.5879μs | 64.1525 KOps/s | 54.0288 KOps/s | $\textbf{\color{#35bf28}+18.74\\%}$ | | test_creation_nested_1 | 47.7510μs | 17.3261μs | 57.7164 KOps/s | 48.6345 KOps/s | $\textbf{\color{#35bf28}+18.67\\%}$ | | test_creation_nested_2 | 39.5410μs | 20.0183μs | 49.9543 KOps/s | 43.6481 KOps/s | $\textbf{\color{#35bf28}+14.45\\%}$ | | test_clone | 63.0210μs | 31.4528μs | 31.7937 KOps/s | 31.2099 KOps/s | $\color{#35bf28}+1.87\\%$ | | test_getitem[int] | 1.1787ms | 17.3034μs | 57.7921 KOps/s | 57.3929 KOps/s | $\color{#35bf28}+0.70\\%$ | | test_getitem[slice_int] | 0.1440ms | 29.5836μs | 33.8025 KOps/s | 33.1606 KOps/s | $\color{#35bf28}+1.94\\%$ | | test_getitem[range] | 0.2486ms | 0.1152ms | 8.6839 KOps/s | 8.6403 KOps/s | $\color{#35bf28}+0.50\\%$ | | test_getitem[tuple] | 0.1422ms | 25.9545μs | 38.5290 KOps/s | 38.4315 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_getitem[list] | 0.2172ms | 0.1058ms | 9.4515 KOps/s | 9.3521 KOps/s | $\color{#35bf28}+1.06\\%$ | | test_setitem_dim[int] | 74.4120μs | 52.1394μs | 19.1793 KOps/s | 17.8883 KOps/s | $\textbf{\color{#35bf28}+7.22\\%}$ | | test_setitem_dim[slice_int] | 0.1048ms | 76.7827μs | 13.0238 KOps/s | 12.3321 KOps/s | $\textbf{\color{#35bf28}+5.61\\%}$ | | test_setitem_dim[range] | 0.1626ms | 0.1394ms | 7.1723 KOps/s | 6.9574 KOps/s | $\color{#35bf28}+3.09\\%$ | | test_setitem_dim[tuple] | 87.3120μs | 69.0663μs | 14.4788 KOps/s | 13.6780 KOps/s | $\textbf{\color{#35bf28}+5.85\\%}$ | | test_setitem | 72.4920μs | 44.3833μs | 22.5310 KOps/s | 21.6921 KOps/s | $\color{#35bf28}+3.87\\%$ | | test_set | 80.4120μs | 43.3943μs | 23.0445 KOps/s | 22.2570 KOps/s | $\color{#35bf28}+3.54\\%$ | | test_set_shared | 0.3221ms | 55.4646μs | 18.0295 KOps/s | 17.7548 KOps/s | $\color{#35bf28}+1.55\\%$ | | test_update | 80.2320μs | 51.4503μs | 19.4362 KOps/s | 18.1673 KOps/s | $\textbf{\color{#35bf28}+6.98\\%}$ | | test_update_nested | 86.9620μs | 59.0757μs | 16.9274 KOps/s | 16.0214 KOps/s | $\textbf{\color{#35bf28}+5.66\\%}$ | | test_update__nested | 92.4720μs | 63.6224μs | 15.7177 KOps/s | 15.3622 KOps/s | $\color{#35bf28}+2.31\\%$ | | test_set_nested | 74.3420μs | 46.6195μs | 21.4502 KOps/s | 20.8533 KOps/s | $\color{#35bf28}+2.86\\%$ | | test_set_nested_new | 84.0320μs | 50.3890μs | 19.8456 KOps/s | 19.1694 KOps/s | $\color{#35bf28}+3.53\\%$ | | test_select | 90.4620μs | 65.1456μs | 15.3502 KOps/s | 14.8751 KOps/s | $\color{#35bf28}+3.19\\%$ | | test_select_nested | 68.3610μs | 50.9917μs | 19.6111 KOps/s | 19.6532 KOps/s | $\color{#d91a1a}-0.21\\%$ | | test_exclude_nested | 90.8420μs | 70.0139μs | 14.2829 KOps/s | 14.5215 KOps/s | $\color{#d91a1a}-1.64\\%$ | | test_empty[True] | 0.3334ms | 0.2840ms | 3.5210 KOps/s | 3.5219 KOps/s | $\color{#d91a1a}-0.03\\%$ | | test_empty[False] | 2.9301μs | 0.8814μs | 1.1345 MOps/s | 1.1692 MOps/s | $\color{#d91a1a}-2.96\\%$ | | test_to | 70.7010μs | 40.5436μs | 24.6648 KOps/s | 24.5484 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_to_nonblocking | 55.0110μs | 25.6536μs | 38.9809 KOps/s | 38.3656 KOps/s | $\color{#35bf28}+1.60\\%$ | | test_unbind_speed | 0.3446ms | 0.3015ms | 3.3164 KOps/s | 3.2730 KOps/s | $\color{#35bf28}+1.32\\%$ | | test_unbind_speed_stack0 | 0.3324ms | 0.2993ms | 3.3406 KOps/s | 3.2912 KOps/s | $\color{#35bf28}+1.50\\%$ | | test_unbind_speed_stack1 | 89.5953ms | 0.7533ms | 1.3275 KOps/s | 1.2848 KOps/s | $\color{#35bf28}+3.32\\%$ | | test_split | 92.1063ms | 2.3673ms | 422.4280 Ops/s | 418.0385 Ops/s | $\color{#35bf28}+1.05\\%$ | | test_chunk | 91.8888ms | 2.3850ms | 419.2941 Ops/s | 455.4405 Ops/s | $\textbf{\color{#d91a1a}-7.94\\%}$ | | test_creation[device0] | 0.1521ms | 0.1062ms | 9.4188 KOps/s | 9.4395 KOps/s | $\color{#d91a1a}-0.22\\%$ | | test_creation_from_tensor | 0.1530ms | 0.1020ms | 9.8041 KOps/s | 9.5210 KOps/s | $\color{#35bf28}+2.97\\%$ | | test_add_one[memmap_tensor0] | 70.8420μs | 8.8468μs | 113.0353 KOps/s | 110.5543 KOps/s | $\color{#35bf28}+2.24\\%$ | | test_contiguous[memmap_tensor0] | 27.3510μs | 2.2317μs | 448.0939 KOps/s | 447.5683 KOps/s | $\color{#35bf28}+0.12\\%$ | | test_stack[memmap_tensor0] | 22.9410μs | 6.6497μs | 150.3823 KOps/s | 144.7002 KOps/s | $\color{#35bf28}+3.93\\%$ | | test_memmaptd_index | 1.1796ms | 0.4411ms | 2.2673 KOps/s | 2.2968 KOps/s | $\color{#d91a1a}-1.28\\%$ | | test_memmaptd_index_astensor | 0.7724ms | 0.5011ms | 1.9957 KOps/s | 1.8051 KOps/s | $\textbf{\color{#35bf28}+10.56\\%}$ | | test_memmaptd_index_op | 1.4546ms | 1.0495ms | 952.8723 Ops/s | 913.0604 Ops/s | $\color{#35bf28}+4.36\\%$ | | test_serialize_model | 92.5832ms | 88.6969ms | 11.2743 Ops/s | 11.0001 Ops/s | $\color{#35bf28}+2.49\\%$ | | test_serialize_model_pickle | 1.3630s | 1.2386s | 0.8073 Ops/s | 0.8083 Ops/s | $\color{#d91a1a}-0.12\\%$ | | test_serialize_weights | 0.1813s | 96.5329ms | 10.3592 Ops/s | 11.1469 Ops/s | $\textbf{\color{#d91a1a}-7.07\\%}$ | | test_serialize_weights_returnearly | 0.2610s | 67.1796ms | 14.8855 Ops/s | 17.9868 Ops/s | $\textbf{\color{#d91a1a}-17.24\\%}$ | | test_serialize_weights_pickle | 1.3547s | 1.2368s | 0.8085 Ops/s | 0.8087 Ops/s | $\color{#d91a1a}-0.02\\%$ | | test_reshape_pytree | 0.2954ms | 38.3608μs | 26.0683 KOps/s | 25.5602 KOps/s | $\color{#35bf28}+1.99\\%$ | | test_reshape_td | 74.9810μs | 43.2667μs | 23.1125 KOps/s | 22.6365 KOps/s | $\color{#35bf28}+2.10\\%$ | | test_view_pytree | 0.2329ms | 38.3970μs | 26.0437 KOps/s | 26.0567 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_view_td | 79.2520μs | 50.1056μs | 19.9578 KOps/s | 20.1507 KOps/s | $\color{#d91a1a}-0.96\\%$ | | test_unbind_pytree | 0.2312ms | 36.7712μs | 27.1952 KOps/s | 26.0561 KOps/s | $\color{#35bf28}+4.37\\%$ | | test_unbind_td | 0.4167ms | 45.1269μs | 22.1597 KOps/s | 21.3261 KOps/s | $\color{#35bf28}+3.91\\%$ | | test_split_pytree | 78.6410μs | 50.3589μs | 19.8574 KOps/s | 19.2869 KOps/s | $\color{#35bf28}+2.96\\%$ | | test_split_td | 0.4684ms | 61.0611μs | 16.3770 KOps/s | 14.1495 KOps/s | $\textbf{\color{#35bf28}+15.74\\%}$ | | test_add_pytree | 0.2799ms | 64.0768μs | 15.6063 KOps/s | 15.9620 KOps/s | $\color{#d91a1a}-2.23\\%$ | | test_add_td | 0.2249ms | 91.7217μs | 10.9025 KOps/s | 9.8669 KOps/s | $\textbf{\color{#35bf28}+10.50\\%}$ | | test_compile_add_one_nested[tensordict-compile] | 0.4095ms | 0.2146ms | 4.6598 KOps/s | 4.6418 KOps/s | $\color{#35bf28}+0.39\\%$ | | test_compile_add_one_nested[tensordict-eager] | 0.2706ms | 0.1752ms | 5.7092 KOps/s | 5.6823 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_compile_add_one_nested[pytree-compile] | 0.1855ms | 0.1512ms | 6.6154 KOps/s | 6.6065 KOps/s | $\color{#35bf28}+0.13\\%$ | | test_compile_add_one_nested[pytree-eager] | 0.2481ms | 0.1966ms | 5.0852 KOps/s | 4.5952 KOps/s | $\textbf{\color{#35bf28}+10.66\\%}$ | | test_compile_copy_nested[tensordict-compile] | 49.9820μs | 22.7096μs | 44.0342 KOps/s | 45.8830 KOps/s | $\color{#d91a1a}-4.03\\%$ | | test_compile_copy_nested[tensordict-eager] | 76.0020μs | 48.2818μs | 20.7117 KOps/s | 20.6144 KOps/s | $\color{#35bf28}+0.47\\%$ | | test_compile_copy_nested[pytree-compile] | 0.1009ms | 73.1931μs | 13.6625 KOps/s | 13.7818 KOps/s | $\color{#d91a1a}-0.87\\%$ | | test_compile_copy_nested[pytree-eager] | 0.1580ms | 60.2170μs | 16.6066 KOps/s | 16.7897 KOps/s | $\color{#d91a1a}-1.09\\%$ | | test_compile_add_one_flat[tensordict-compile] | 0.3741ms | 0.3399ms | 2.9424 KOps/s | 2.9383 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_compile_add_one_flat[tensordict-eager] | 0.2870ms | 0.2224ms | 4.4958 KOps/s | 4.4112 KOps/s | $\color{#35bf28}+1.92\\%$ | | test_compile_add_one_flat[tensorclass-compile] | 0.1884ms | 0.1346ms | 7.4277 KOps/s | 7.2589 KOps/s | $\color{#35bf28}+2.33\\%$ | | test_compile_add_one_flat[tensorclass-eager] | 0.1231ms | 63.7873μs | 15.6771 KOps/s | 14.7829 KOps/s | $\textbf{\color{#35bf28}+6.05\\%}$ | | test_compile_add_one_flat[pytree-compile] | 0.3872ms | 0.3373ms | 2.9649 KOps/s | 2.9565 KOps/s | $\color{#35bf28}+0.28\\%$ | | test_compile_add_one_flat[pytree-eager] | 0.9004ms | 0.7028ms | 1.4230 KOps/s | 1.5208 KOps/s | $\textbf{\color{#d91a1a}-6.43\\%}$ | | test_compile_add_self_flat[tensordict-eager] | 0.3210ms | 0.2721ms | 3.6755 KOps/s | 3.6403 KOps/s | $\color{#35bf28}+0.97\\%$ | | test_compile_add_self_flat[tensordict-compile] | 0.4095ms | 0.3389ms | 2.9511 KOps/s | 2.9384 KOps/s | $\color{#35bf28}+0.43\\%$ | | test_compile_add_self_flat[tensorclass-eager] | 0.1571ms | 75.1699μs | 13.3032 KOps/s | 12.9624 KOps/s | $\color{#35bf28}+2.63\\%$ | | test_compile_add_self_flat[tensorclass-compile] | 0.2375ms | 0.1346ms | 7.4321 KOps/s | 7.3727 KOps/s | $\color{#35bf28}+0.81\\%$ | | test_compile_add_self_flat[pytree-eager] | 0.6655ms | 0.5999ms | 1.6671 KOps/s | 1.7636 KOps/s | $\textbf{\color{#d91a1a}-5.47\\%}$ | | test_compile_add_self_flat[pytree-compile] | 0.3994ms | 0.3372ms | 2.9655 KOps/s | 2.9595 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_compile_copy_flat[tensordict-compile] | 43.1600μs | 19.0503μs | 52.4926 KOps/s | 52.3876 KOps/s | $\color{#35bf28}+0.20\\%$ | | test_compile_copy_flat[tensordict-eager] | 60.4410μs | 32.8165μs | 30.4725 KOps/s | 30.4797 KOps/s | $\color{#d91a1a}-0.02\\%$ | | test_compile_copy_flat[pytree-compile] | 0.1200ms | 76.2040μs | 13.1227 KOps/s | 13.0972 KOps/s | $\color{#35bf28}+0.19\\%$ | | test_compile_copy_flat[pytree-eager] | 76.3920μs | 60.3190μs | 16.5785 KOps/s | 16.5313 KOps/s | $\color{#35bf28}+0.29\\%$ | | test_compile_assign_and_add[tensordict-compile] | 2.5742ms | 0.9483ms | 1.0545 KOps/s | 1.0531 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_compile_assign_and_add[tensordict-eager] | 3.8209ms | 3.4401ms | 290.6851 Ops/s | 290.7710 Ops/s | $\color{#d91a1a}-0.03\\%$ | | test_compile_assign_and_add[pytree-compile] | 2.5375ms | 0.9316ms | 1.0735 KOps/s | 1.0644 KOps/s | $\color{#35bf28}+0.85\\%$ | | test_compile_assign_and_add[pytree-eager] | 3.5955ms | 3.4107ms | 293.1943 Ops/s | 289.3054 Ops/s | $\color{#35bf28}+1.34\\%$ | | test_compile_indexing[tensor-tensordict-compile] | 0.1696ms | 0.1138ms | 8.7884 KOps/s | 8.7172 KOps/s | $\color{#35bf28}+0.82\\%$ | | test_compile_indexing[tensor-tensordict-eager] | 0.2342ms | 65.2613μs | 15.3230 KOps/s | 15.2010 KOps/s | $\color{#35bf28}+0.80\\%$ | | test_compile_indexing[tensor-tensorclass-compile] | 0.1726ms | 0.1094ms | 9.1406 KOps/s | 9.2247 KOps/s | $\color{#d91a1a}-0.91\\%$ | | test_compile_indexing[tensor-tensorclass-eager] | 84.7210μs | 48.2628μs | 20.7199 KOps/s | 20.5407 KOps/s | $\color{#35bf28}+0.87\\%$ | | test_compile_indexing[tensor-pytree-compile] | 0.1721ms | 0.1089ms | 9.1845 KOps/s | 9.2590 KOps/s | $\color{#d91a1a}-0.80\\%$ | | test_compile_indexing[tensor-pytree-eager] | 92.5620μs | 47.8325μs | 20.9063 KOps/s | 20.0011 KOps/s | $\color{#35bf28}+4.53\\%$ | | test_compile_indexing[slice-tensordict-compile] | 0.1920ms | 0.1432ms | 6.9854 KOps/s | 6.7353 KOps/s | $\color{#35bf28}+3.71\\%$ | | test_compile_indexing[slice-tensordict-eager] | 0.1956ms | 26.8823μs | 37.1992 KOps/s | 36.5555 KOps/s | $\color{#35bf28}+1.76\\%$ | | test_compile_indexing[slice-tensorclass-compile] | 0.2953ms | 0.1374ms | 7.2799 KOps/s | 7.3125 KOps/s | $\color{#d91a1a}-0.45\\%$ | | test_compile_indexing[slice-tensorclass-eager] | 44.6610μs | 23.0403μs | 43.4021 KOps/s | 42.1954 KOps/s | $\color{#35bf28}+2.86\\%$ | | test_compile_indexing[slice-pytree-compile] | 0.1844ms | 0.1397ms | 7.1567 KOps/s | 7.3167 KOps/s | $\color{#d91a1a}-2.19\\%$ | | test_compile_indexing[slice-pytree-eager] | 58.7410μs | 23.2441μs | 43.0217 KOps/s | 43.0450 KOps/s | $\color{#d91a1a}-0.05\\%$ | | test_compile_indexing[int-tensordict-compile] | 0.2024ms | 0.1460ms | 6.8493 KOps/s | 6.8750 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_compile_indexing[int-tensordict-eager] | 0.5041ms | 26.6336μs | 37.5466 KOps/s | 33.7894 KOps/s | $\textbf{\color{#35bf28}+11.12\\%}$ | | test_compile_indexing[int-tensorclass-compile] | 0.2183ms | 0.1370ms | 7.2979 KOps/s | 7.3260 KOps/s | $\color{#d91a1a}-0.38\\%$ | | test_compile_indexing[int-tensorclass-eager] | 58.3010μs | 22.8639μs | 43.7370 KOps/s | 43.0849 KOps/s | $\color{#35bf28}+1.51\\%$ | | test_compile_indexing[int-pytree-compile] | 0.1872ms | 0.1380ms | 7.2477 KOps/s | 7.3242 KOps/s | $\color{#d91a1a}-1.05\\%$ | | test_compile_indexing[int-pytree-eager] | 0.4339ms | 22.8043μs | 43.8514 KOps/s | 43.2191 KOps/s | $\color{#35bf28}+1.46\\%$ | | test_mod_add[eager] | 81.9710μs | 38.2912μs | 26.1157 KOps/s | 25.2394 KOps/s | $\color{#35bf28}+3.47\\%$ | | test_mod_add[compile] | 0.1201ms | 71.4563μs | 13.9946 KOps/s | 13.8035 KOps/s | $\color{#35bf28}+1.38\\%$ | | test_mod_add[compile-overhead] | 0.2624ms | 0.1555ms | 6.4307 KOps/s | 6.5896 KOps/s | $\color{#d91a1a}-2.41\\%$ | | test_mod_wrap[eager] | 0.3424ms | 0.2591ms | 3.8594 KOps/s | 3.8253 KOps/s | $\color{#35bf28}+0.89\\%$ | | test_mod_wrap[compile] | 1.3318ms | 0.2956ms | 3.3829 KOps/s | 3.3360 KOps/s | $\color{#35bf28}+1.41\\%$ | | test_mod_wrap[compile-overhead] | 8.0700ms | 4.3617ms | 229.2706 Ops/s | 232.8541 Ops/s | $\color{#d91a1a}-1.54\\%$ | | test_mod_wrap_and_backward[eager] | 1.5839ms | 1.4928ms | 669.9028 Ops/s | 712.2151 Ops/s | $\textbf{\color{#d91a1a}-5.94\\%}$ | | test_mod_wrap_and_backward[compile] | 1.6978ms | 1.4763ms | 677.3904 Ops/s | 682.7385 Ops/s | $\color{#d91a1a}-0.78\\%$ | | test_mod_wrap_and_backward[compile-overhead] | 1.4720ms | 1.0086ms | 991.4721 Ops/s | 989.9702 Ops/s | $\color{#35bf28}+0.15\\%$ | | test_seq_add[eager] | 0.1685ms | 0.1115ms | 8.9658 KOps/s | 8.7409 KOps/s | $\color{#35bf28}+2.57\\%$ | | test_seq_add[compile] | 0.2153ms | 89.1509μs | 11.2169 KOps/s | 11.1012 KOps/s | $\color{#35bf28}+1.04\\%$ | | test_seq_add[compile-overhead] | 0.1612ms | 0.1244ms | 8.0397 KOps/s | 7.9419 KOps/s | $\color{#35bf28}+1.23\\%$ | | test_seq_wrap[eager] | 0.4845ms | 0.4210ms | 2.3751 KOps/s | 2.2818 KOps/s | $\color{#35bf28}+4.09\\%$ | | test_seq_wrap[compile] | 1.5220ms | 0.3274ms | 3.0546 KOps/s | 3.0237 KOps/s | $\color{#35bf28}+1.02\\%$ | | test_seq_wrap[compile-overhead] | 0.3043s | 0.1457s | 6.8640 Ops/s | 6.8950 Ops/s | $\color{#d91a1a}-0.45\\%$ | | test_func_call_runtime[False-eager] | 0.8403ms | 0.7662ms | 1.3051 KOps/s | 1.2603 KOps/s | $\color{#35bf28}+3.56\\%$ | | test_func_call_runtime[False-compile] | 0.9259ms | 0.8421ms | 1.1875 KOps/s | 1.2122 KOps/s | $\color{#d91a1a}-2.04\\%$ | | test_func_call_runtime[False-compile-overhead] | 0.4451ms | 0.3731ms | 2.6805 KOps/s | 2.6767 KOps/s | $\color{#35bf28}+0.14\\%$ | | test_func_call_runtime[True-eager] | 1.0436ms | 0.9599ms | 1.0418 KOps/s | 1.0437 KOps/s | $\color{#d91a1a}-0.18\\%$ | | test_func_call_runtime[True-compile] | 1.0091ms | 0.8666ms | 1.1539 KOps/s | 1.1439 KOps/s | $\color{#35bf28}+0.88\\%$ | | test_func_call_runtime[True-compile-overhead] | 0.4677ms | 0.4159ms | 2.4043 KOps/s | 2.4121 KOps/s | $\color{#d91a1a}-0.33\\%$ | | test_func_call_cm_runtime[False-eager] | 0.8977ms | 0.7567ms | 1.3216 KOps/s | 1.3199 KOps/s | $\color{#35bf28}+0.13\\%$ | | test_func_call_cm_runtime[False-compile] | 0.9038ms | 0.8263ms | 1.2103 KOps/s | 1.2078 KOps/s | $\color{#35bf28}+0.21\\%$ | | test_func_call_cm_runtime[False-compile-overhead] | 0.5285ms | 0.3750ms | 2.6669 KOps/s | 2.6768 KOps/s | $\color{#d91a1a}-0.37\\%$ | | test_func_call_cm_runtime[True-eager] | 1.2092ms | 1.0750ms | 930.2599 Ops/s | 924.7851 Ops/s | $\color{#35bf28}+0.59\\%$ | | test_func_call_cm_runtime[True-compile] | 1.1120ms | 1.0403ms | 961.2380 Ops/s | 963.9603 Ops/s | $\color{#d91a1a}-0.28\\%$ | | test_func_call_cm_runtime[True-compile-overhead] | 1.1169ms | 1.0433ms | 958.4667 Ops/s | 962.8566 Ops/s | $\color{#d91a1a}-0.46\\%$ | | test_distributed | 1.2678ms | 70.3138μs | 14.2220 KOps/s | 14.3464 KOps/s | $\color{#d91a1a}-0.87\\%$ | | test_tdmodule | 90.7220μs | 15.1184μs | 66.1445 KOps/s | 60.2071 KOps/s | $\textbf{\color{#35bf28}+9.86\\%}$ | | test_tdmodule_dispatch | 50.6620μs | 30.8784μs | 32.3851 KOps/s | 29.7763 KOps/s | $\textbf{\color{#35bf28}+8.76\\%}$ | | test_tdseq | 33.1400μs | 15.9956μs | 62.5173 KOps/s | 56.5644 KOps/s | $\textbf{\color{#35bf28}+10.52\\%}$ | | test_tdseq_dispatch | 53.0320μs | 33.1039μs | 30.2079 KOps/s | 27.5205 KOps/s | $\textbf{\color{#35bf28}+9.77\\%}$ | | test_instantiation_functorch | 2.1590ms | 2.0441ms | 489.2071 Ops/s | 482.5494 Ops/s | $\color{#35bf28}+1.38\\%$ | | test_instantiation_td | 2.0711ms | 1.3364ms | 748.2867 Ops/s | 748.2778 Ops/s | $+0.00\\%$ | | test_exec_functorch | 0.3032ms | 0.2303ms | 4.3430 KOps/s | 4.3337 KOps/s | $\color{#35bf28}+0.22\\%$ | | test_exec_functional_call | 0.2844ms | 0.2278ms | 4.3902 KOps/s | 4.4419 KOps/s | $\color{#d91a1a}-1.16\\%$ | | test_exec_td | 0.3113ms | 0.2271ms | 4.4031 KOps/s | 4.4274 KOps/s | $\color{#d91a1a}-0.55\\%$ | | test_exec_td_decorator | 1.1267ms | 0.2843ms | 3.5177 KOps/s | 3.5379 KOps/s | $\color{#d91a1a}-0.57\\%$ | | test_vmap_mlp_speed[True-True] | 0.7044ms | 0.6575ms | 1.5210 KOps/s | 1.4797 KOps/s | $\color{#35bf28}+2.79\\%$ | | test_vmap_mlp_speed[True-False] | 0.7206ms | 0.6565ms | 1.5233 KOps/s | 1.4622 KOps/s | $\color{#35bf28}+4.18\\%$ | | test_vmap_mlp_speed[False-True] | 0.8425ms | 0.5785ms | 1.7287 KOps/s | 1.6507 KOps/s | $\color{#35bf28}+4.73\\%$ | | test_vmap_mlp_speed[False-False] | 0.6362ms | 0.5806ms | 1.7222 KOps/s | 1.6475 KOps/s | $\color{#35bf28}+4.54\\%$ | | test_vmap_mlp_speed_decorator[True-True] | 0.8368ms | 0.7090ms | 1.4105 KOps/s | 1.3906 KOps/s | $\color{#35bf28}+1.44\\%$ | | test_vmap_mlp_speed_decorator[True-False] | 0.9498ms | 0.7096ms | 1.4093 KOps/s | 1.4003 KOps/s | $\color{#35bf28}+0.64\\%$ | | test_vmap_mlp_speed_decorator[False-True] | 0.7632ms | 0.6229ms | 1.6055 KOps/s | 1.5963 KOps/s | $\color{#35bf28}+0.58\\%$ | | test_vmap_mlp_speed_decorator[False-False] | 0.7667ms | 0.6246ms | 1.6010 KOps/s | 1.5970 KOps/s | $\color{#35bf28}+0.25\\%$ | | test_vmap_transformer_speed[True-True] | 9.1688ms | 8.8642ms | 112.8130 Ops/s | 110.1782 Ops/s | $\color{#35bf28}+2.39\\%$ | | test_vmap_transformer_speed[True-False] | 9.1242ms | 8.8511ms | 112.9808 Ops/s | 111.8724 Ops/s | $\color{#35bf28}+0.99\\%$ | | test_vmap_transformer_speed[False-True] | 9.0477ms | 8.7584ms | 114.1767 Ops/s | 112.4577 Ops/s | $\color{#35bf28}+1.53\\%$ | | test_vmap_transformer_speed[False-False] | 8.9631ms | 8.7523ms | 114.2562 Ops/s | 113.0604 Ops/s | $\color{#35bf28}+1.06\\%$ | | test_vmap_transformer_speed_decorator[True-True] | 20.9527ms | 20.8282ms | 48.0119 Ops/s | 47.6118 Ops/s | $\color{#35bf28}+0.84\\%$ | | test_vmap_transformer_speed_decorator[True-False] | 21.9089ms | 20.8237ms | 48.0221 Ops/s | 47.5388 Ops/s | $\color{#35bf28}+1.02\\%$ | | test_vmap_transformer_speed_decorator[False-True] | 21.8025ms | 20.6882ms | 48.3368 Ops/s | 48.1034 Ops/s | $\color{#35bf28}+0.49\\%$ | | test_vmap_transformer_speed_decorator[False-False] | 21.9683ms | 20.6868ms | 48.3400 Ops/s | 47.9727 Ops/s | $\color{#35bf28}+0.77\\%$ | | test_to_module_speed[True] | 1.6284ms | 1.1416ms | 875.9338 Ops/s | 866.1782 Ops/s | $\color{#35bf28}+1.13\\%$ | | test_to_module_speed[False] | 1.5427ms | 1.1164ms | 895.7131 Ops/s | 882.4759 Ops/s | $\color{#35bf28}+1.50\\%$ | | test_tc_init | 71.9510μs | 36.0477μs | 27.7410 KOps/s | 25.7752 KOps/s | $\textbf{\color{#35bf28}+7.63\\%}$ | | test_tc_init_nested | 0.1080ms | 74.0011μs | 13.5133 KOps/s | 12.4153 KOps/s | $\textbf{\color{#35bf28}+8.84\\%}$ | | test_tc_first_layer_tensor | 3.3402μs | 0.7916μs | 1.2632 MOps/s | 1.2628 MOps/s | $\color{#35bf28}+0.03\\%$ | | test_tc_first_layer_nontensor | 17.9210μs | 2.5681μs | 389.3929 KOps/s | 365.7090 KOps/s | $\textbf{\color{#35bf28}+6.48\\%}$ | | test_tc_second_layer_tensor | 6.5267μs | 1.6029μs | 623.8603 KOps/s | 589.7871 KOps/s | $\textbf{\color{#35bf28}+5.78\\%}$ | | test_tc_second_layer_nontensor | 21.3400μs | 3.3911μs | 294.8874 KOps/s | 279.1376 KOps/s | $\textbf{\color{#35bf28}+5.64\\%}$ | | test_unbind | 0.3259s | 12.3756ms | 80.8044 Ops/s | 80.6518 Ops/s | $\color{#35bf28}+0.19\\%$ | | test_full_like | 0.6581ms | 0.5796ms | 1.7253 KOps/s | 1.7258 KOps/s | $\color{#d91a1a}-0.03\\%$ | | test_zeros_like | 0.2571ms | 0.1977ms | 5.0594 KOps/s | 5.0592 KOps/s | $+0.00\\%$ | | test_ones_like | 0.2297ms | 0.1975ms | 5.0628 KOps/s | 5.0631 KOps/s | $-0.01\\%$ | | test_clone | 0.4480ms | 0.4146ms | 2.4118 KOps/s | 2.4115 KOps/s | $\color{#35bf28}+0.01\\%$ | | test_squeeze | 34.9210μs | 11.0136μs | 90.7970 KOps/s | 90.1926 KOps/s | $\color{#35bf28}+0.67\\%$ | | test_unsqueeze | 0.2568ms | 77.7078μs | 12.8687 KOps/s | 12.6097 KOps/s | $\color{#35bf28}+2.05\\%$ | | test_split | 0.4330ms | 0.1822ms | 5.4884 KOps/s | 5.6704 KOps/s | $\color{#d91a1a}-3.21\\%$ | | test_permute | 0.2583ms | 0.1979ms | 5.0537 KOps/s | 5.2492 KOps/s | $\color{#d91a1a}-3.72\\%$ | | test_stack | 1.2512ms | 0.9347ms | 1.0698 KOps/s | 1.1097 KOps/s | $\color{#d91a1a}-3.59\\%$ | | test_cat | 1.2523ms | 1.2316ms | 811.9726 Ops/s | 811.9051 Ops/s | $+0.01\\%$ |