Closed ret2libc closed 5 years ago
there are build errors. not gonna review something that is not even building
it is building... as you can see the make
exits with 0. It's just because this PR depends on https://github.com/radare/sdb/pull/158 to make tests work.
wat nao
I will rebase this soon!
Performance test:
#include "minunit.h"
#include <stdlib.h>
#include <sdb.h>
#include <time.h>
#define SEED 0xdeadbeef
#define INSERT_SZ 10000
#define FIND_SZ 1000000
#define INSERT_SZ2 10000000
#define FIND_SZ2 100000000
#define MOD_VAL 100000
void free_key(HtKv *kv) {
free (kv->key);
free (kv);
}
int main(int argc, char **argv) {
// on master this defaults to allocating 1024 elements
// on my PR it starts with 3 elements
SdbHt *ht = ht_new (NULL, free_key, NULL);
int i;
clock_t start, end;
double cpu_time_used, time_taken;
// few elements
srand(SEED);
start = clock();
for (i = 0; i < INSERT_SZ; ++i) {
int val = rand() % MOD_VAL;
char buf[20];
snprintf (buf, 20, "%d", val);
ht_insert (ht, buf, (void *)(ut64)val);
}
end = clock();
time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds
printf("insert time: %f seconds\n", time_taken);
long long sum = 0;
start = clock();
for (i = 0; i < FIND_SZ; ++i) {
int val = rand() % MOD_VAL;
char buf[20];
snprintf (buf, 20, "%d", val);
int res = (int)(ut64)ht_find (ht, buf, NULL);
sum += res;
}
end = clock();
time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds
printf("find time: %f seconds, sum = %lld\n", time_taken, sum);
ht_free (ht);
// many elements
ht = ht_new (NULL, free_key, NULL);
srand(SEED);
start = clock();
for (i = 0; i < INSERT_SZ2; ++i) {
int val = rand() % MOD_VAL;
char buf[20];
snprintf (buf, 20, "%d", val);
ht_insert (ht, buf, (void *)(ut64)val);
}
end = clock();
time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds
printf("insert time (2 time): %f seconds\n", time_taken);
sum = 0;
start = clock();
for (i = 0; i < FIND_SZ2; ++i) {
int val = rand() % MOD_VAL;
char buf[20];
snprintf (buf, 20, "%d", val);
int res = (int)(ut64)ht_find (ht, buf, NULL);
sum += res;
}
end = clock();
time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds
printf("find time (2 time): %f seconds, sum = %lld\n", time_taken, sum);
ht_free (ht);
return 0;
}
Results on master:
BASEDIR=$(pwd)/../../build-master make test_hash_performance ; ./test_hash_performance
cc -I/home/rschiron/projects/sdb/test/../src -I/home/rschiron/projects/sdb/test/unit/../../build-master -g -O0 test_hash_performance.c -o test_hash_performance /home/rschiron/projects/sdb/test/unit/../../build-master/libsdb.a
insert time: 0.004317 seconds
find time: 0.413766 seconds, sum = 4738954741
insert time (2 time): 36.084946 seconds
find time (2 time): 348.878303 seconds, sum = 5000114468030
Results on my branch:
BASEDIR=$(pwd)/../../build make test_hash_performance ; ./test_hash_performance
cc -I/home/rschiron/projects/sdb/test/../src -I/home/rschiron/projects/sdb/test/unit/../../build -g -O0 test_hash_performance.c -o test_hash_performance /home/rschiron/projects/sdb/test/unit/../../build/libsdb.a
insert time: 0.013456 seconds
find time: 0.175061 seconds, sum = 4738954741
insert time (2 time): 5.590546 seconds
find time (2 time): 43.648521 seconds, sum = 5000114468030
As you can see, this PR makes it just so much better in everything. And this is not even a fair comparison since the ht
in my PR will start with 3 elements and not with 1024 as it is in master (which is many time a waste of space, as one of the issue has shown)
The only waste is u allocate tons of sdb. Which is not the case. Fixed size is fine for most use cases
On 5 Oct 2018, at 19:00, Riccardo Schirone notifications@github.com wrote:
Performance test:
include "minunit.h"
include
include
include
define SEED 0xdeadbeef
define INSERT_SZ 10000
define FIND_SZ 1000000
define INSERT_SZ2 10000000
define FIND_SZ2 100000000
define MOD_VAL 100000
void free_key(HtKv *kv) { free (kv->key); free (kv); }
int main(int argc, char *argv) { // on master this defaults to allocating 1024 elements // on my PR it starts with 3 elements SdbHt ht = ht_new (NULL, free_key, NULL); int i;
clock_t start, end; double cpu_time_used, time_taken;
// few elements srand(SEED); start = clock(); for (i = 0; i < INSERT_SZ; ++i) { int val = rand() % MOD_VAL; char buf[20]; snprintf (buf, 20, "%d", val); ht_insert (ht, buf, (void *)(ut64)val); } end = clock(); time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds printf("insert time: %f seconds\n", time_taken);
long long sum = 0; start = clock(); for (i = 0; i < FIND_SZ; ++i) { int val = rand() % MOD_VAL; char buf[20]; snprintf (buf, 20, "%d", val); int res = (int)(ut64)ht_find (ht, buf, NULL); sum += res; } end = clock(); time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds printf("find time: %f seconds, sum = %lld\n", time_taken, sum); ht_free (ht);
// many elements ht = ht_new (NULL, free_key, NULL); srand(SEED); start = clock(); for (i = 0; i < INSERT_SZ2; ++i) { int val = rand() % MOD_VAL; char buf[20]; snprintf (buf, 20, "%d", val); ht_insert (ht, buf, (void *)(ut64)val); } end = clock(); time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds printf("insert time (2 time): %f seconds\n", time_taken);
sum = 0; start = clock(); for (i = 0; i < FIND_SZ2; ++i) { int val = rand() % MOD_VAL; char buf[20]; snprintf (buf, 20, "%d", val); int res = (int)(ut64)ht_find (ht, buf, NULL); sum += res; } end = clock(); time_taken = ((double)end - start)/CLOCKS_PER_SEC; // in seconds printf("find time (2 time): %f seconds, sum = %lld\n", time_taken, sum);
ht_free (ht); return 0; } Results on master:
BASEDIR=$(pwd)/../../build-master make test_hash_performance ; ./test_hash_performance
cc -I/home/rschiron/projects/sdb/test/../src -I/home/rschiron/projects/sdb/test/unit/../../build-master -g -O0 test_hash_performance.c -o test_hash_performance /home/rschiron/projects/sdb/test/unit/../../build-master/libsdb.a insert time: 0.004317 seconds find time: 0.413766 seconds, sum = 4738954741 insert time (2 time): 36.084946 seconds find time (2 time): 348.878303 seconds, sum = 5000114468030 Results on my branch:BASEDIR=$(pwd)/../../build make test_hash_performance ; ./test_hash_performance
cc -I/home/rschiron/projects/sdb/test/../src -I/home/rschiron/projects/sdb/test/unit/../../build -g -O0 test_hash_performance.c -o test_hash_performance /home/rschiron/projects/sdb/test/unit/../../build/libsdb.a insert time: 0.013456 seconds find time: 0.175061 seconds, sum = 4738954741 insert time (2 time): 5.590546 seconds find time (2 time): 43.648521 seconds, sum = 5000114468030 As you can see, this PR makes it just so much better in everything. And this is not even a fair comparison since the ht in my PR will start with 3 elements and not with 1024 as it is in master (which is many time a waste of space, as one of the issue has shown)— You are receiving this because you commented. Reply to this email directly, view it on GitHub, or mute the thread.
The only waste is u allocate tons of sdb. Which is not the case. Fixed size is fine for most use cases
Yes, that's a problem that I'm going to fix in the next PR, where I will use an approach similar to dict
. But for now, I think this PR is already good as it is, since it definitely improves over the previous implementation (which already used sdblists)
there are build errors. not gonna review something that is not even building