pygame-community / pygame-ce

🐍🎮 pygame - Community Edition is a FOSS Python library for multimedia applications (like games). Built on top of the excellent SDL library.
https://pyga.me
767 stars 120 forks source link

Optimized all `Rect/FRect` methods via `pgRect_FromObject` #2908

Open itzpr3d4t0r opened 4 weeks ago

itzpr3d4t0r commented 4 weeks ago

This PR speeds up virtually all Rect/Frect functions. It achieves so thanks to modifications to the pgRect_FromObject function that are:

  1. There are now 2 separate paths for converting an exact Rect/Frect or a subclass of those (before there was only one path which dramatically slowed down the checking because of PyObject_IsInstance)
  2. The sequence/fast_sequence path is now before the subclass path
  3. pgRect_FromObject is now PG_FORCEINLINE
  4. Add a missing CheckExact macro

I've seen variable benefits, but they're generally quite good:

Results

Comparison(%) r fr 4a 1a 2a 1a 2s r attr r meth
instantiation 90.2497 112.902 69.6948 123.114 50.9064 77.5938 16.9079 11.1682
update 12.9699 60.3089 0.419622 84.5456 0.665257 38.1523 16.0923 2.08808
union -3.35265 39.6183 -7.08364 39.6552 -6.6613 29.6631 1.16245 1.19518
union_ip 13.7956 57.369 8.42032 91.5685 9.82252 45.2054 1.32454 3.21933
clip 12.0048 55.4796 -1.92402 57.8898 1.44587 35.6202 -0.563568 7.77687
colliderect 26.0991 57.6645 1.5716 90.9065 2.21566 33.9786 0.438323 4.02949
collidelist 85.8232 398.445 91.3725 45.6847
collidelistall 58.4675 227.912 86.181 47.8682

Data to construct those results

OLD Rect (ms) r fr 4a 1a 2a 1a 2s r attr r meth
instantiation 121.145 137.337 154.239 171.228 168.181 195.555 251.448 317.761
update 35.0886 51.577 54.4191 91.917 79.9019 108.409 195.927 249.407
union 59.5419 87.6092 85.5246 119.548 108.41 149.448 186.48 259.991
union_ip 35.6593 50.6645 60.6495 96.2779 86.2006 115.153 170.107 245.785
clip 59.0407 84.0932 87.0206 123.984 108.167 143.012 187.454 278.148
colliderect 41.1564 53.9675 58.235 98.4715 81.6568 107.365 168.818 242.713
collidelist 109.354 337.397 913.59 1394.42
collidelistall 147.567 389.563 962.418 1438.81
NEW Rect (ms) r fr 4a 1a 2a 1a 2s r attr r meth
instantiation 63.6769 64.5072 90.8921 76.7447 111.447 110.114 215.082 285.837
update 31.0601 32.1735 54.1917 49.8072 79.3739 78.4708 168.768 244.306
union 61.6074 62.7491 92.0447 85.6023 116.147 115.259 184.337 256.92
union_ip 31.3363 32.1947 55.9392 50.2577 78.4909 79.3035 167.884 238.119
clip 52.7127 54.0863 88.7277 78.5259 106.625 105.45 188.517 258.078
colliderect 32.6381 34.2293 57.3339 51.581 79.8868 80.1359 168.082 233.312
collidelist 58.8485 67.6898 477.388 957.151
collidelistall 93.1211 118.801 516.926 973.038

Test program to generate the data:

import json
import timeit
from statistics import fmean

from pygame.rect import Rect, FRect
from tabulate import tabulate

def create_table(title: str, *headers, data: list):
    table_headers = [title, *headers]
    table = []
    for row in data:
        table.append(row)

    print(tabulate(table, headers=table_headers, tablefmt="github", numalign="left",
                   stralign="left", ))

NUMBER = 1_000_000

r1 = Rect(799, 799, 10, 10)
r2 = Rect(0, 0, 5, 5)
rects = [
    (0, 0, 100, 100),
    (50, 50, 100, 100),
    (100, 100, 100, 100),
    (150, 150, 100, 100),
    (200, 200, 100, 100),
    (250, 250, 100, 100),
    (300, 300, 100, 100),
    (350, 350, 100, 100),
    (400, 400, 100, 100),
    (450, 450, 100, 100),
    (500, 500, 100, 100),
    (550, 550, 100, 100),
    (600, 600, 100, 100),
    (650, 650, 100, 100),
    (700, 700, 100, 100),
]
rects2 = [((a, b), (c, d)) for a, b, c, d in rects]

rects_objs = [
    Rect(*r) for r in rects
]
frects_objs = [
    FRect(*r) for r in rects
]

class RectAttr:
    def __init__(self, x, y, w, h):
        self.rect = Rect(x, y, w, h)

r3 = RectAttr(799, 799, 10, 10)

class RectMethod:
    def __init__(self, x, y, w, h):
        self.p = Rect(x, y, w, h)

    def rect(self):
        return self.p

r4 = RectMethod(799, 799, 10, 10)

GLOB = globals()

def test(name: str, funcs: list[str], data_table) -> None:
    t = [name]
    for func in funcs:
        val = fmean(timeit.repeat(func, globals=GLOB, number=NUMBER))
        t.append(val * 1000)
    data_table.append(t)
    print(f"{name} done")

data = []

rect = Rect(10, 10, 2, 2)
frect = FRect(10, 10, 2, 2)

test("instantiation",
     [
         "Rect(rect)",
         "Rect(frect)",
         "Rect(10.0, 10.0, 2.0, 2.0)",
         "Rect((10.0, 10.0, 2.0, 2.0))",
         "Rect((10.0, 10.0), (2.0, 2.0))",
         "Rect(((10.0, 10.0), (2.0, 2.0)))",
         "Rect(r3)",
         "Rect(r4)",
     ],
     data
     )
test("update",
     [
         "r1.update(rect)",
         "r1.update(frect)",
         "r1.update(10.0, 10.0, 2.0, 2.0)",
         "r1.update((10.0, 10.0, 2.0, 2.0))",
         "r1.update((10.0, 10.0), (2.0, 2.0))",
         "r1.update(((10.0, 10.0), (2.0, 2.0)))",
         "r1.update(r3)",
         "r1.update(r4)",
     ],
     data
     )
test("union",
     [
         "r1.union(rect)",
         "r1.union(frect)",
         "r1.union(10.0, 10.0, 2.0, 2.0)",
         "r1.union((10.0, 10.0, 2.0, 2.0))",
         "r1.union((10.0, 10.0), (2.0, 2.0))",
         "r1.union(((10.0, 10.0), (2.0, 2.0)))",
         "r1.union(r3)",
         "r1.union(r4)",
     ],
     data
     )
test("union_ip",
     [
         "r2.union_ip(rect)",
         "r2.union_ip(frect)",
         "r2.union_ip(10.0, 10.0, 2.0, 2.0)",
         "r2.union_ip((10.0, 10.0, 2.0, 2.0))",
         "r2.union_ip((10.0, 10.0), (2.0, 2.0))",
         "r2.union_ip(((10.0, 10.0), (2.0, 2.0)))",
         "r2.union_ip(r3)",
         "r2.union_ip(r4)",
     ],
     data
     )
test("clip",
     [
         "r1.clip(rect)",
         "r1.clip(frect)",
         "r1.clip(10.0, 10.0, 2.0, 2.0)",
         "r1.clip((10.0, 10.0, 2.0, 2.0))",
         "r1.clip((10.0, 10.0), (2.0, 2.0))",
         "r1.clip(((10.0, 10.0), (2.0, 2.0)))",
         "r1.clip(r3)",
         "r1.clip(r4)",
     ],
     data
     )
test("colliderect",
     [
         "r1.colliderect(rect)",
         "r1.colliderect(frect)",
         "r1.colliderect(10.0, 10.0, 2.0, 2.0)",
         "r1.colliderect((10.0, 10.0, 2.0, 2.0))",
         "r1.colliderect((10.0, 10.0), (2.0, 2.0))",
         "r1.colliderect(((10.0, 10.0), (2.0, 2.0)))",
         "r1.colliderect(r3)",
         "r1.colliderect(r4)",
     ],
     data
     )
test("collidelist",
     [
         "r1.collidelist(rects_objs)",
         "r1.collidelist(frects_objs)",
         "r1.collidelist(rects)",
         "r1.collidelist(rects2)",
     ],
     data
     )
test("collidelistall",
     [
         "r1.collidelistall(rects_objs)",
         "r1.collidelistall(frects_objs)",
         "r1.collidelistall(rects)",
         "r1.collidelistall(rects2)",
     ],
     data
     )

create_table(
    "Rect (ms)", "r", "fr", "4a", "1a", "2a", "1a 2s", "r attr", "r meth",
    data=data
)

# Save the data to a JSON file
def save_to_json(filename, data):
    with open(filename, 'w') as f:
        json.dump(data, f)

# Load the data from a JSON file
def load_from_json(filename):
    with open(filename, 'r') as f:
        return json.load(f)

# Compare two sets of data and create a table of percentage improvements
def compare_tables(data1, data2):
    comparison_table = []

    for row1, row2 in zip(data1, data2):
        name = row1[0]
        improvements = [name]
        for val1, val2 in zip(row1[1:], row2[1:]):
            improvement = ((val2 - val1) / val1) * 100
            improvements.append(improvement)
        comparison_table.append(improvements)

    return comparison_table

# Save the data to a JSON file
save_to_json("benchmark_results_new.json", data)

# Load two sets of data from JSON files (for comparison example)
data1 = load_from_json("benchmark_results_new.json")
data2 = load_from_json("benchmark_results_old.json")

# Compare the two sets of data
comparison_table = compare_tables(data1, data2)
print("\n\n")
create_table("Comparison(%)", "r", "fr", "4a", "1a", "2a", "1a 2s", "r attr", "r meth",
             data=comparison_table)
itzpr3d4t0r commented 3 weeks ago

I must report that this change has drawbacks, specifically with Rect/Frect subclasses. You can easily see it from this graph, so even tho we see significant improvements in basically all cases, rect subclasses get significantly slower.

I think we shouldn't prioritize Subclasses performance over the most prevalent usecases such as direct Rect/Frect usage or tuples/lists/sequences/separate args passed as rects. image