troydhanson / uthash

C macros for hash tables and more
Other
4.18k stars 926 forks source link

Why Performance is Bad? #119

Closed QuestionPython closed 7 years ago

QuestionPython commented 7 years ago

Hello, Why Performance is Bad? @troydhanson

Performance of Array PHP vs UtHash C

$ time php test.php

<?php
$list=array();
for($i=0; $i<=9999999;$i++)
{
$list[$i]=$i*$i;
}
?>

real    0m0.335s
user    0m0.244s
sys 0m0.088s

$ cc -I../src -g -Wall -o custom custom.c $ time ./custom

#include "uthash.h"
#include <stdlib.h>
#include <stdio.h>
typedef struct example_user_t {
    int id;
    int cookie;
    UT_hash_handle hh;
} example_user_t;
int main(int argc,char *argv[])
{
    int i;
    example_user_t *user, *users=NULL;
    for(i=0; i<=9999999; i++) {
        user = (example_user_t*)malloc(sizeof(example_user_t));
        if (user == NULL) {
            exit(-1);
        }
        user->id = i;
        user->cookie = i*i;
        HASH_ADD_INT(users,id,user);
    }
    return 0;
}

real    0m4.529s
user    0m4.288s
sys 0m0.240s
Quuxplusone commented 7 years ago

AFAIK, your PHP code isn't doing the same thing as the uthash code, though! To write C code matching your PHP code (repeatedly appending to an array), you should be doing something like this:

#include "utarray.h"

typedef struct example_user_t {
    int id;
    int cookie;
} example_user_t;

static UT_icd icd = {
    .sz = sizeof(example_user_t),
    .init = NULL,
    .copy = NULL,
    .dtor = NULL
};

int main()
{
    UT_array users;
    utarray_init(&users, &icd);
    for (int i=0; i<=9999999; i++) {
        example_user_t user = {i, i*i};
        utarray_push_back(&users, &user);
    }
}

which is just as fast as your timing of the PHP:

$ time ./a.out

real    0m0.066s
user    0m0.027s
sys 0m0.033s

Also, notice that when you invoke gcc you ought to be passing in at least -O2; the default is -O0, i.e., very unoptimized. Changing from -O0 to -O3 in your example changed my measurements from "6.2 seconds" to "4.5 seconds".

Lastly, are you sure you measured correctly? On my own Macbook Pro, php test.php produces the error message Fatal error: Allowed memory size of 134217728 bytes exhausted (tried to allocate 32 bytes) in /private/tmp/test.php on line 5, unless I edit the code to look like this:

<?php
ini_set('memory_limit', '-1');
$list=array();
for($i=0; $i<=9999999;$i++)
{
    $list[$i] = $i*$i;
}
?>

And when I do that, I see "4.8 seconds" for the PHP code. You really see a 100x speedup on your machine? Or did you drop a couple of 9s from the loop bound for testing and forget to put them back before running the final time command?

real    0m4.878s
user    0m4.059s
sys 0m0.744s
QuestionPython commented 7 years ago

your code is array. may do this with HashMap and send Code? :+1: @troydhanson

QuestionPython commented 7 years ago

$ cc -I../src -g -Wall -o custom2 custom2.c -O3

$ time ./custom2

real    0m3.959s
user    0m3.708s
sys 0m0.248s

Source :

#include <stdlib.h>
#include <stdio.h>
#include "uthash.h"
typedef struct example_user_t {
    int id;
    int cookie;
    UT_hash_handle hh;
} example_user_t;
int main(int argc,char *argv[])
{
    example_user_t *user,*users=NULL;
    for (int i=1; i<=100000000/10; i++)
    {
        user = (example_user_t*)malloc(sizeof(example_user_t));
        user->id = i;
        user->cookie = i*i;
        HASH_ADD_INT(users,id,user);
    }
    return 0;
}

@troydhanson

Quuxplusone commented 7 years ago

And when I do that, I see "4.8 seconds" for the PHP code. You really see a 100x speedup on your machine? Or did you drop a couple of 9s from the loop bound for testing and forget to put them back before running the final time command?

I'm still seeing 4.8 seconds for the PHP code, even when I use PHP 7.0.3 on http://sandbox.onlinephpfunctions.com . Where are you getting the 0.335-second number from?

Please try the following script on your computer and post the results:

cat >test.php <<'EOF'
<?php
ini_set('memory_limit', '-1');
$list = array();
$count = 0+($argv[1]);
for ($i = 0; $i < $count; $i++) {
    $list[$i] = $i*$i;
}
?>
EOF
time php test.php 1000
time php test.php 10000
time php test.php 100000
time php test.php 1000000
time php test.php 10000000

Then try this one:

cat >test.c <<'EOF'
#include "uthash.h"
#include <stdlib.h>
#include <stdio.h>

typedef struct example_user_t {
    int id;
    int cookie;
    UT_hash_handle hh;
} example_user_t;

int main(int argc, char **argv)
{
    int count = atoi(argv[1]);
    example_user_t *user, *users=NULL;
    for (int i = 0; i < count; ++i) {
        user = (example_user_t*)malloc(sizeof(example_user_t));
        user->id = i;
        user->cookie = i*i;
        HASH_ADD_INT(users,id,user);
    }
}
EOF
gcc -O3 test.c -o a.out
time ./a.out 1000
time ./a.out 10000
time ./a.out 100000
time ./a.out 1000000
time ./a.out 10000000
QuestionPython commented 7 years ago

Result PHP :

$ php -v

PHP 7.1.4-1+deb.sury.org~yakkety+1 (cli) (built: Apr 11 2017 22:13:33) ( NTS )
Copyright (c) 1997-2017 The PHP Group
Zend Engine v3.1.0, Copyright (c) 1998-2017 Zend Technologies
    with Zend OPcache v7.1.4-1+deb.sury.org~yakkety+1, Copyright (c) 1999-2017, by Zend Technologies

$ time php file.php 1000

real    0m0.097s
user    0m0.020s
sys 0m0.020s

$ time php file.php 10000

real    0m0.050s
user    0m0.032s
sys 0m0.012s

$ time php file.php 100000

real    0m0.049s
user    0m0.032s
sys 0m0.016s

$ time php file.php 1000000

real    0m0.078s
user    0m0.048s
sys 0m0.028s

$ time php file.php 10000000

real    0m0.526s
user    0m0.316s
sys 0m0.204s
QuestionPython commented 7 years ago

Result Uthash+c :

$ cc -I../src -g -Wall -o custom0 custom0.c -O3

$ time ./custom0 1000

real    0m0.002s
user    0m0.000s
sys 0m0.000s

$ time ./custom0 10000

real    0m0.005s
user    0m0.004s
sys 0m0.000s

$ time ./custom0 100000

real    0m0.012s
user    0m0.008s
sys 0m0.000s

$ time ./custom0 1000000

real    0m0.268s
user    0m0.228s
sys 0m0.040s

$ time ./custom0 10000000

real    0m3.938s
user    0m3.660s
sys 0m0.276s
QuestionPython commented 7 years ago

Results of PHP vs Uthash+C

php : $ time php file.php 1000000

real    0m0.078s
user    0m0.048s
sys 0m0.028s

c :‌ $ time ./custom0 1000000

real    0m0.268s
user    0m0.228s
sys 0m0.040s

===> why php have better performance from C+Uthash here?


php : $ time php file.php 10000000

real    0m0.526s
user    0m0.316s
sys 0m0.204s

c : $ time ./custom0 10000000

real    0m3.938s
user    0m3.660s
sys 0m0.276s

===> why php have better performance from C+Uthash here?

tbeu commented 7 years ago

To have it comparable, PHP should be forced to hash maps, too.

<?php
ini_set('memory_limit', '-1');
$list = array();
$count = 0+($argv[1]);
for ($i = 0; $i < $count; $i++) {
    $user = array($i => $i*$i);
    array_push($list, $user);
}
?>
QuestionPython commented 7 years ago

Why array_push in php ?

QuestionPython commented 7 years ago

php1 file :+1:

<?php
ini_set('memory_limit', '-1');
$list = array();
$count = 0+($argv[1]);
for ($i = 0; $i < $count; $i++) {
    $user = array($i => $i*$i);
    array_push($list, $user);
}
?>

$ time php php1 1000000

real    0m0.264s
user    0m0.196s
sys 0m0.064s

$ time php php1 10000000

real    0m2.826s
user    0m2.168s
sys 0m0.652s

also php is better performance vs c+uthash.

tbeu commented 7 years ago

To force some searching when you insert as compared to

<?php
ini_set('memory_limit', '-1');
$list = array();
$count = 0+($argv[1]);
for ($i = 0; $i < $count; $i++) {
    $user = array($i => $i*$i);
    $list[$i] = $user;
}
?>
tbeu commented 7 years ago

also php is better performance vs c+uthash.

OK, thanks for confirmation. At least it is no longer factor 8.

tbeu commented 7 years ago

If test.c = custom0 is compiled with -DHASH_FUNCTION=HASH_BER or -DHASH_FUNCTION=HASH_SAX performance increases.

tbeu commented 7 years ago

Interesting read: https://nikic.github.io/2014/12/22/PHPs-new-hashtable-implementation.html

QuestionPython commented 7 years ago

custom.c:

#include "uthash.h"
#include <stdlib.h>
#include <stdio.h>

typedef struct example_user_t {
    int id;
    int cookie;
    UT_hash_handle hh;
} example_user_t;

int main(int argc, char **argv)
{
    int count = atoi(argv[1]);
    example_user_t *user, *users=NULL;
    for (int i = 0; i < count; ++i) {
        user = (example_user_t*)malloc(sizeof(example_user_t));
        user->id = i;
        user->cookie = i*i;
        HASH_ADD_INT(users,id,user);
    }
}

$ cc -I../src -g -Wall -DHASH_FUNCTION=HASH_BER -o custom0 custom0.c -O3

$ time ./custom0 10000000

real    0m0.568s
user    0m0.380s
sys 0m0.184s

$ time ./custom0 1000000

real    0m0.087s
user    0m0.064s
sys 0m0.020s

also php have better performance...

QuestionPython commented 7 years ago

file.php:

<?php
ini_set('memory_limit', '-1');
$list = array();
$count = 0+($argv[1]);
for ($i = 0; $i < $count; $i++) {
    $list[$i] = $i*$i;
}
?>
$ time php file.php 10000000
real    0m0.404s
user    0m0.328s
sys 0m0.072s

$ time php file.php 10000000
real    0m0.432s
user    0m0.348s
sys 0m0.080s

$ time php file.php 10000000
real    0m0.398s
user    0m0.308s
sys 0m0.088s

$ cc -I../src -g -Wall -DHASH_FUNCTION=HASH_BER -o custom0 custom0.c -O3
$ time ./custom0 10000000
real    0m0.553s
user    0m0.344s
sys 0m0.208s

$ time ./custom0 10000000
real    0m0.550s
user    0m0.344s
sys 0m0.204s

$ time ./custom0 10000000
real    0m0.550s
user    0m0.364s
sys 0m0.184s

also php have better performance..

$ time php file.php 100000000
real    0m4.917s
user    0m2.916s
sys 0m1.208s

$ time ./custom0  100000000
real    0m37.633s
user    0m4.276s
sys 0m2.648s
Quuxplusone commented 7 years ago

@questionpython: I believe @tbeu's link to https://nikic.github.io/2014/12/22/PHPs-new-hashtable-implementation.html has sufficiently answered the question of what makes PHP 7 so much faster than PHP 5.

This is basically a question about PHP: "What changed between 5 and 7 to make PHP 7's performance on integer-indexed arrays outpace the performance of a naive hashtable?" Uthash doesn't purport to do the integer-index optimization. But you can do something like it yourself, if you happen to know your keys are integers with very few hash collisions. (Remember, if your keys are integers with very few hash collisions, then a hashtable is a supremely bad data structure for your purposes.)

Once you've removed all the hash-function overhead, a good chunk of the remaining time is in malloc; so Amdahl's Law suggests that you should focus on optimizing malloc. I don't have the ability to test with tcmalloc, but I wrote a quick-and-dirty wrapper that reduces our number of round-trips-to-malloc from 10,000,000 to 16.

$ gcc testarr.c -O3 -o testarr
$ time ./testarr 10000000
real    0m4.842s
user    0m4.261s
sys 0m0.472s
$ gcc testarr.c -O3 -o testarr -DHASH_FUNCTION=HASH_TRIVIAL 
$ time ./testarr 10000000
real    0m1.355s
user    0m1.015s
sys 0m0.313s
$ gcc testarr.c -O3 -o testarr -DHASH_FUNCTION=HASH_TRIVIAL -DFAST_ALLOC
$ time ./testarr 10000000
real    0m0.859s
user    0m0.525s
sys 0m0.301s

Here's the code I used for these benchmarks:

#include "uthash.h"
#include <stdlib.h>
#include <stdio.h>

#define HASH_TRIVIAL(key,keylen,hashv)                                           \
do {                                                                             \
  (hashv) = *(key);                                                              \
} while (0)

typedef struct example_user_t {
    int id;
    int cookie;
    UT_hash_handle hh;
} example_user_t;

#ifdef FAST_ALLOC
#define ALLOC() alloc_quickly()
#else
#define ALLOC() (example_user_t*)malloc(sizeof (example_user_t))
#endif

static inline example_user_t *alloc_quickly()
{
    static example_user_t *data = NULL;
    static int capacity = 0;
    static int length = 0;
    if (length == capacity) {
        capacity = 2*capacity + 100;
        length = 0;
        data = malloc(capacity * sizeof *data);
    }
    return &data[length++];
}

int main(int argc, char **argv)
{
    int count = atoi(argv[1]);
    example_user_t *user, *users=NULL;
    for (int i = 0; i < count; ++i) {
        user = ALLOC();
        user->id = i;
        user->cookie = i*i;
        HASH_ADD_INT(users,id,user);
    }
}
QuestionPython commented 7 years ago

Question : also can HASH_ADD_STR with good performance? because php also support string key.

QuestionPython commented 7 years ago

$ time php file.php 10000000

real    0m0.427s
user    0m0.336s
sys 0m0.068s

$ time php file.php 10000000

real    0m0.408s
user    0m0.340s
sys 0m0.064s

$ gcc fast.c -O3 -o fast -DHASH_FUNCTION=HASH_TRIVIAL -DFAST_ALLOC

$ time ./fast 10000000

real    0m0.499s
user    0m0.452s
sys 0m0.040s

$ time ./fast 10000000

real    0m0.445s
user    0m0.368s
sys 0m0.072s

$ time ./fast 10000000

real    0m0.511s
user    0m0.440s
sys 0m0.068s

also php have better performance!!!!!!!

QuestionPython commented 7 years ago

@tbeu @Quuxplusone @troydhanson

QuestionPython commented 7 years ago

! can not make better performance for this? @tbeu @Quuxplusone @troydhanson

QuestionPython commented 7 years ago

???!

@tbeu @Quuxplusone @troydhanson

liudf0716 commented 7 years ago

it seems uthash has performance problem.

QuestionPython commented 7 years ago

yes , but should try to make better. but owner of this plugin, not answer me.

liudf0716 commented 7 years ago

I think uthash think more convenient than performance