karpetrosyan / hishel

An elegant HTTP Cache implementation for HTTPX and HTTP Core.
https://hishel.com
BSD 3-Clause "New" or "Revised" License
175 stars 22 forks source link

Number of syscalls in file cache storage scanning can be reduced #252

Closed Simon-Will closed 3 months ago

Simon-Will commented 3 months ago

When using the library recently, I came across the code that removes expired cache entries from the file storage. And I saw that it calls is_file and stat on each of them, both of which result in a system call. We can use os.scandir to save the syscalls to is_file.

This is demonstrated by this script here:

#!/usr/bin/env bash

DIR=/home/gorgor/tmp/hishel_iterdir_test

cat >scan_current.py <<EOF
#!/usr/bin/env python3

import os, time
from pathlib import Path

for f in Path("$DIR").iterdir():
    if f.is_file():
        age = time.time() - f.stat().st_mtime
EOF

cat >scan_new.py <<EOF
#!/usr/bin/env python3

import os, time

with os.scandir("$DIR") as entries:
    for entry in entries:
        if entry.is_file():
            age = time.time() - entry.stat().st_mtime
EOF

trace() {
    local script="$1"
    mkdir -p "$DIR"
    rm -rf "$DIR"/*
    touch "$DIR"/{1,2,3}
    # Call with syscall tracing and discard all the noise at the beginning.
    strace python3 "$script" "$DIR" 2>&1 | sed -n '/hishel_iterdir_test/,$p'
}

echo "3 files with current version:"
echo
trace scan_current.py

echo
echo "============================================================"
echo

echo "3 files with os.scandir:"
echo
trace scan_new.py

From the output, we can see that the current version makes two syscalls per file, while a version with os.scandir only makes one:

3 files with current version:

openat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
fstat(3, {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
getdents64(3, 0x585d10521c70 /* 5 entries */, 32768) = 120
getdents64(3, 0x585d10521c70 /* 0 entries */, 32768) = 0
close(3)                                = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/2", {st_mode=S_IFREG|0644, st_size=0, ...}, 0) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/2", {st_mode=S_IFREG|0644, st_size=0, ...}, 0) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/1", {st_mode=S_IFREG|0644, st_size=0, ...}, 0) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/1", {st_mode=S_IFREG|0644, st_size=0, ...}, 0) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/3", {st_mode=S_IFREG|0644, st_size=0, ...}, 0) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/3", {st_mode=S_IFREG|0644, st_size=0, ...}, 0) = 0
rt_sigaction(SIGINT, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK, sa_restorer=0x749729e50ae0}, {sa_handler=0x74972a1183fb, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK, sa_restorer=0x749729e50ae0}, 8) = 0
munmap(0x74972a706000, 16384)           = 0
exit_group(0)                           = ?
+++ exited with 0 +++

============================================================

3 files with os.scandir:

openat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
fstat(3, {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
getdents64(3, 0x58236813c760 /* 5 entries */, 32768) = 120
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/2", {st_mode=S_IFREG|0644, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/1", {st_mode=S_IFREG|0644, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0
newfstatat(AT_FDCWD, "/home/gorgor/tmp/hishel_iterdir_test/3", {st_mode=S_IFREG|0644, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0
getdents64(3, 0x58236813c760 /* 0 entries */, 32768) = 0
close(3)                                = 0
brk(0x582368156000)                     = 0x582368156000
rt_sigaction(SIGINT, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK, sa_restorer=0x7bf704850ae0}, {sa_handler=0x7bf704b183fb, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK, sa_restorer=0x7bf704850ae0}, 8) = 0
munmap(0x7bf70521e000, 16384)           = 0
exit_group(0)                           = ?
+++ exited with 0 +++

So, I also checked what the impact of that is with this script:

#!/usr/bin/env python3

import os
from pathlib import Path
import time
from time import perf_counter

def create_dir(d, n_files):
    os.makedirs(d, exist_ok=True)
    for i in range(n_files):
        open(os.path.join(d, str(i)), "w")

def iter_dir_old(d):
    for file in Path(d).iterdir():
        if file.is_file():
            age = time.time() - file.stat().st_mtime

def iter_dir_new(d):
    with os.scandir(d) as entries:
        for entry in entries:
            if entry.is_file():
                age = time.time() - entry.stat().st_mtime

def main():
    topdir = "/home/gorgor/tmp/hishel_iterdir_test/"
    n_files = 10000

    d = os.path.join(topdir, "new")
    create_dir(d, n_files)

    for _ in range(3):
        start = perf_counter()
        iter_dir_new(d)
        delta = perf_counter() - start
        print(f"{n_files} files with os.scandir: {delta:.4f}")

    print()

    d = os.path.join(topdir, "old")
    create_dir(d, n_files)

    for _ in range(3):
        start = perf_counter()
        iter_dir_old(d)
        delta = perf_counter() - start
        print(f"{n_files} files current version: {delta:.4f}")

if __name__ == "__main__":
   main()

The following output shows that the os.scandir version takes less than half the time:

10000 files with os.scandir: 0.0191
10000 files with os.scandir: 0.0227
10000 files with os.scandir: 0.0274

10000 files current version: 0.0540
10000 files current version: 0.0607
10000 files current version: 0.0621

It's definitely not a big thing and spending time with it was more a bit of play for me, but I'd say it can be improved a bit with minimal effort. So I'm preparing a PR for it.