Open xdeng opened 4 years ago
The performance of the plist_to_bin function is poor when the plist file has 320000 nodes, I tried to modify the hashtable function and it was half faster.
hashtable.h
typedef struct hashentry_t { void *key; void *value; } hashentry_t; typedef unsigned int(*hash_func_t)(const void* key); typedef int (*compare_func_t)(const void *a, const void *b); typedef void (*free_func_t)(void *ptr); typedef struct he_array_t { hashentry_t *pdata; long len; long capacity; long capacity_step; } he_array_t; typedef struct hashtable_t { he_array_t *entries[4096]; size_t count; hash_func_t hash_func; compare_func_t compare_func; free_func_t free_func; } hashtable_t; hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func, free_func_t free_func); void hash_table_destroy(hashtable_t *ht); void hash_table_insert(hashtable_t* ht, void *key, void *value); void* hash_table_lookup(hashtable_t* ht, void *key); void hash_table_remove(hashtable_t* ht, void *key);
hashtable.c
he_array_t *he_array_new(int capacity) { he_array_t *ha = (he_array_t*)malloc(sizeof(he_array_t)); ha->pdata = (hashentry_t*)malloc(sizeof(hashentry_t) * capacity); ha->capacity = capacity; ha->capacity_step = (capacity > 4096) ? 4096 : capacity; ha->len = 0; return ha; } void he_array_free(he_array_t *ha) { if (!ha) return; if (ha->pdata) { free(ha->pdata); } free(ha); } void he_array_insert(he_array_t *ha, hashentry_t *data, long array_index) { long remaining; if (!ha || !ha->pdata) return; remaining = ha->capacity-ha->len; if (remaining == 0) { ha->pdata = realloc(ha->pdata, sizeof(hashentry_t) * (ha->capacity + ha->capacity_step)); ha->capacity += ha->capacity_step; } if (array_index < 0 || array_index >= ha->len) { memcpy(&ha->pdata[ha->len], data, sizeof(hashentry_t)); } else { memmove(&ha->pdata[array_index+1], &ha->pdata[array_index], (ha->len-array_index) * sizeof(hashentry_t)); memcpy(&ha->pdata[array_index], data, sizeof(hashentry_t)); } ha->len++; } void he_array_add(he_array_t *ha, hashentry_t *data) { he_array_insert(ha, data, -1); } void he_array_remove(he_array_t *ha, long array_index) { if (!ha || !ha->pdata || array_index < 0) return; if (ha->len == 0 || array_index >= ha->len) return; if (ha->len == 1) { memset(&ha->pdata[0], 0, sizeof(hashentry_t)); } else { memmove(&ha->pdata[array_index], &ha->pdata[array_index+1], (ha->len-array_index-1) * sizeof(hashentry_t)); } ha->len--; } void he_array_set(he_array_t *ha, hashentry_t *data, long array_index) { if (!ha || !ha->pdata || array_index < 0) return; if (ha->len == 0 || array_index >= ha->len) return; memcpy(&ha->pdata[array_index], data, sizeof(hashentry_t)); } hashentry_t* he_array_index(he_array_t *ha, long array_index) { if (!ha) return NULL; if (array_index < 0 || array_index >= ha->len) { return NULL; } return &ha->pdata[array_index]; } long he_array_size(he_array_t *ha) { return ha->len; } hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func, free_func_t free_func) { hashtable_t* ht = (hashtable_t*)malloc(sizeof(hashtable_t)); int i; for (i = 0; i < 4096; i++) { ht->entries[i] = he_array_new(8); } ht->count = 0; ht->hash_func = hash_func; ht->compare_func = compare_func; ht->free_func = free_func; return ht; } void hash_table_destroy(hashtable_t *ht) { int i = 0; if (!ht) return; for (i = 0; i < 4096; i++) { he_array_t* a = ht->entries[i]; if(ht->free_func){ int j; for (j = 0; j < he_array_size(a); j++) { hashentry_t* e = he_array_index(a, j); ht->free_func(e->value); } } he_array_free(a); } free(ht); } void hash_table_insert(hashtable_t* ht, void *key, void *value) { unsigned int hash; int idx0, i; he_array_t* a; hashentry_t entry; if (!ht || !key) return; hash = ht->hash_func(key); idx0 = hash & 0xFFF; // get the idx0 list a = ht->entries[idx0]; for (i = 0; i < he_array_size(a); i++) { hashentry_t* e = he_array_index(a, i); if(ht->compare_func(e->key, key)){ // element already present. replace value. e->value = value; return; } } // if we get here, the element is not yet in the list. // make a new entry. entry.key = key; entry.value = value; he_array_add(a, &entry); ht->count++; } void* hash_table_lookup(hashtable_t* ht, void *key) { unsigned int hash; int idx0, i; he_array_t* a; if (!ht || !key) return NULL; hash = ht->hash_func(key); idx0 = hash & 0xFFF; a = ht->entries[idx0]; for (i = 0; i < he_array_size(a); i++) { hashentry_t* e = he_array_index(a, i); if(ht->compare_func(e->key, key)){ return e->value; } } return NULL; } void hash_table_remove(hashtable_t* ht, void *key) { unsigned int hash; int idx0, i; he_array_t* a; if (!ht || !key) return; hash = ht->hash_func(key); idx0 = hash & 0xFFF; // get the idx0 list a = ht->entries[idx0]; for (i = 0; i < he_array_size(a); i++) { hashentry_t* e = he_array_index(a, i); if(ht->compare_func(e->key, key)){ // found element, remove it from the list if(ht->free_func){ ht->free_func(e->value); } he_array_remove(a, i); return; } } }
The code is very poorly written for reference only. you can try try
Any interest in finishing this? The array thing looks interesting.
Yes. This will be addressed after the next release which is due in a few days.
The performance of the plist_to_bin function is poor when the plist file has 320000 nodes, I tried to modify the hashtable function and it was half faster.
hashtable.h
hashtable.c