Example for string/int, String/string khash usage

joeatbayes commented 9 years ago

It took me a while to figure out how to use khash from the built in example. I saw similar complaints on other blog pages so I don't think I am unique. I suggest adding the following as file to the repository as an example and modifying the home page of the repository to link to it. It compiled and ran fine with gcc with the std=c11 flag set.

// khash example for string key int payload.  And string key with string payload.
//
// javadoc style docs
//   http://samtools.sourceforge.net/samtools/khash/index.html?PDefines/PDefines.html
// https://attractivechaos.wordpress.com/2008/09/02/implementing-generic-hash-library-in-c/
// https://attractivechaos.wordpress.com/2009/09/29/khash-h/
// https://attractivechaos.wordpress.com/tag/hash/
// Wrapper code for khash which is a good start for wrapping khash for ffi access from luajit.
//   https://github.com/attractivechaos/klib/issues/44

#include <stdio.h>
#include <stdlib.h>
#include "khash.h"

// shorthand way to get the key from hashtable or defVal if not found
#define kh_get_val(kname, hash, key, defVal) ({k=kh_get(kname, hash, key);(k!=kh_end(hash)?kh_val(hash,k):defVal);})

// shorthand way to set value in hash with single line command.  Returns value
// returns 0=replaced existing item, 1=bucket empty (new key), 2-adding element previously deleted
#define kh_set(kname, hash, key, val) ({int ret; k = kh_put(kname, hash,key,&ret); kh_value(hash,k) = val; ret;})

// name part of init must be unique for the key, value types.
// in this instance 33 is arbitrary symbolic name for a hashtable
// that contains string keys and int values.
const int khStrInt = 33;
KHASH_MAP_INIT_STR(khStrInt, int) // setup khash to handle string key with int payload
int string_key_int_body_example()
{
   printf("\n\nBegin khash test string key with int payload\n");
   int ret, is_missing;
   khiter_t k;
   khash_t(khStrInt) *h = kh_init(khStrInt); // create a hashtable

   // Add a value Key "apple" to the hashtable
   k = kh_put(khStrInt, h, "apple", &ret); // add the key
   kh_value(h, k) = 10; // set the value of the key

   // shorthand way to set a value in the
   // with a macro 
   kh_set(khStrInt, h, "jimbo", 99);
   kh_set(khStrInt, h, "john haze", 98);
   kh_set(khStrInt, h, "jaki joiner", 97);

   // Retrieve the value for key "apple"
   k = kh_get(khStrInt, h, "apple");  // first have to get ieter
   if (k == kh_end(h)) {  // k will be equal to kh_end if key not present
      printf("no key found for apple");
   } else {
      printf ("key at apple=%d\n", kh_val(h,k)); // next have to fetch  the actual value
   }

   // Retrieve the value for key "apple"
   k = kh_get(khStrInt, h, "john haze");  // first have to get ieter
   if (k == kh_end(h)) {  // k will be equal to kh_end if key not present
      printf("no key found for john haze\n");
   } else {
      printf ("key at john haze=%d\n", kh_val(h,k)); // next have to fetch  the actual value
   }

   // retreive key for the key "not_present"
   // which should print the error message
   k = kh_get(khStrInt, h, "not_present");  // first have to get ieter
   if (k == kh_end(h)) {  // k will be equal to kh_end if key not present
      printf("no key found for not_present\n");
   } else {
      printf ("key at not_present=%d\n", kh_val(h,k)); // next have to fetch  the actual value
   }

   // shorthand method to get value that returns value found
   // or specified default value if not present.
   int tval = kh_get_val(khStrInt, h, "apple", -1);
   printf ("shortcut tval for apple = %d\n", tval);

   tval = kh_get_val(khStrInt, h, "john haze", -1);
   printf ("shortcut tval for john haze = %d\n", tval);

   // missing key should return default value of -1
   tval = kh_get_val(khStrInt, h, "not_present", -1);
   printf ("shortcut tval for not_present = %d\n", tval);

   // Try to delete a key and then retrieve it
   // to see if it is really gone.
   k = kh_get(khStrInt, h, "john haze"); // get the ieterator
   if (k != kh_end(h)) {  // if it is found
      printf("deleting key john_haze\n");
      kh_del(khStrInt, h, k);  // then delete it.
   }
   // now see if it is really gone
   tval = tval = kh_get_val(khStrInt, h, "john haze", -1);
   printf ("after delete tval for john haze = %d\n", tval);

   // Ieterate the hash table by key, value and print out
   // the values found.
   printf("\nIeterate all keys\n");
   for (k = kh_begin(h); k != kh_end(h); ++k) {
      if (kh_exist(h, k)) {
         const char *key = kh_key(h,k);
         tval = kh_value(h, k);
         printf("key=%s  val=%d\n", key, tval);
      }
   }

   // cleanup and remove our hashtable
   kh_destroy(khStrInt, h);
   return 0;
}

char *missing = "MISSING";
const int khStrStr = 32;
KHASH_MAP_INIT_STR(khStrStr, char*); // setup khash to handle string key with string body
int str_key_str_body_example()
{
   printf("\n\nBegin khash test string key with string payload\n");
   int ret, is_missing;
   khiter_t k;
   khash_t(khStrStr) *h = kh_init(khStrStr); // create a hashtable

   // Add a value Key "apple" to the hashtable
   k = kh_put(khStrStr, h, "apple", &ret); // add the key
   kh_value(h, k) = "fruit"; // set the value of the key

   // shorthand way to set a value in hash with single line. 
   kh_set(khStrStr, h, "jimbo", "artist");
   kh_set(khStrStr, h, "john haze", "engineer");
   kh_set(khStrStr, h, "jaki joiner", "architect");

   // Retrieve the value for key "apple"
   k = kh_get(khStrStr, h, "apple");  // first have to get ieter
   if (k == kh_end(h)) {  // k will be equal to kh_end if key not present
      printf("no key found for apple");
   } else {
      printf ("key at apple=%s\n", kh_val(h,k)); // next have to fetch  the actual value
   }

   // Retrieve the value for key "apple"
   k = kh_get(khStrStr, h, "john haze");  // first have to get ieter
   if (k == kh_end(h)) {  // k will be equal to kh_end if key not present
      printf("no key found for john haze\n");
   } else {
      printf ("key at john haze=%s\n", kh_val(h,k)); // next have to fetch  the actual value
   }

   // retreive key for the key "not_present"
   // which should print the error message
   k = kh_get(khStrStr, h, "not_present");  // first have to get ieter
   if (k == kh_end(h)) {  // k will be equal to kh_end if key not present
      printf("no key found for not_present\n");
   } else {
      printf ("key at not_present=%s\n", kh_val(h,k)); // next have to fetch  the actual value
   }

   // shorthand method to get value that returns value found
   // or specified default value if not present.   Would normally
   // use NULL default if key is not present but easier to printf
   // a real value.
   char *tval = kh_get_val(khStrStr, h, "apple", missing);
   printf ("shortcut tval for apple = %s\n", tval);

   tval = kh_get_val(khStrStr, h, "john haze", missing);
   printf ("shortcut tval for john haze = %s\n", tval);

   // missing key should return default value of -1
   tval = kh_get_val(khStrStr, h, "not_present", missing);
   printf ("shortcut tval for not_present = %s\n", tval);

   // Try to delete a key and then retrieve it
   // to see if it is really gone.
   k = kh_get(khStrStr, h, "john haze"); // get the ieterator
   if (k != kh_end(h)) {  // if it is found
      printf("deleting key john_haze\n");
      kh_del(khStrStr, h, k);  // then delete it.
   }
   // now see if it is really gone
   tval = tval = kh_get_val(khStrStr, h, "john haze", missing);
   printf ("after delete tval for john haze = %s\n", tval);

   // Ieterate the hash table by key, value and print out
   // the values found.
   printf("\nIeterate all keys\n");
   for (k = kh_begin(h); k != kh_end(h); ++k) {
      if (kh_exist(h, k)) {
         const char *key = kh_key(h,k);
         tval = kh_value(h, k);
         printf("key=%s  val=%s\n", key, tval);
      }
   }

   // cleanup and remove our hashtable
   kh_destroy(khStrStr, h);
   return 0;
}

int main()
{
   string_key_int_body_example();
   str_key_str_body_example();
}

jvonmitchell commented 9 years ago

Kudos! It's so not obvious. We need a table of numbers and what type of hash table they correspond with.

mberacochea commented 9 years ago

Really helpful. This should go on the wiki

ArcaneEngineer commented 9 years ago

I agree with all of the above. Why has this not at least been put into the README as a link, @attractivechaos? It would really help adoption; I'd wanted to use this library since the end of last year but ended up writing my own map implementation because khash looked so user-unfriendly! (Of course mine is still nowhere near as fast.)

attractivechaos commented 9 years ago

ArcaneEngineer commented 9 years ago

@attractivechaos: that page doesn't show how to use string keys with khash, which is what 80%+ of users will want, hence I suggest putting joeatbayes's example on that page directly and in source. HTH.

attractivechaos commented 9 years ago

@ArcaneEngineer check out the 2nd example. It is more general as it shows how to work with strings allocated on heap (i.e. with malloc() and new). The example above only works with constant string keys.

joeatbayes commented 9 years ago

You make a good point. I should have added examples that show dynamically allocated keys and values. I actually use khash mostly with dynamically allocated strings I had used the constant keys as a simplification for the example. I could post a new file that includes the dynamically allocated keys and values. Once you add the file to the kHash repository and grant me write access I will update it to include the dynamically allocated values and generate a pull request. I like the http://attractivechaos.github.io/klib/#Khash%3A%20generic%20hash%20table and could not have written my examples without it. I would love to see my examples added to that page but I think they are OK in Github as a file where they are easier to test. I propose you add these examples to /examples/khash/simple_example.c

joeatbayes commented 9 years ago

Hey @jvonmitchell A simple table showing all object types stored would be problematic since you end up creating a number for each new combination of key and body. This table will vary from project to project. I do agree that there are several such as string, string, string,float, string,long that should be predefined and have standard numbers.

What I think would be handy is a union that could store a variety of different structures and a byte or int indicator that indicates what we have actually stored in the union. It would move us closer to the bag style hash we get in Javascript and Python but you would have to edit the union every time you add a new structure type. Another option would be to use a small structure that includes a byte level object type indicator and a void pointer. You would then need helpers to convert to the underlying objects into and out of the void pointer.

The advantage of the union is it is a fully supported cast operation but it entails a little more explicit maintenance where with the void pointer style you can add new object types just by writing new helpers and all the helpers don't have to live in the same source module. I think those comfortable with K&R would prefer the void pointer style while those more comfortable with Java and C++ would prefer the union.

If you send more a little more data I on exactly what you are trying to accomplish I may be able to add them to the examples. I figure this is my duty since I am getting value from the excellent work done by @attractivechaos on khash I should try add a little value back.

joeatbayes commented 8 years ago

You can easily use structs. I have used it many times.

From: Arcane Engineer [mailto:notifications@github.com] Sent: Wednesday, November 4, 2015 2:11 PM To: attractivechaos/klib klib@noreply.github.com Cc: Joseph Ellsworth joee@bayesanalytic.com Subject: Re: [klib] Example for string/int, String/string khash usage (#49)

Is it safe to assume that khash does not support struct values presently? I've tried and tried and every time I write a new struct value into the hash, all other keys/values seem to get cleared.

— Reply to this email directly or view it on GitHub https://github.com/attractivechaos/klib/issues/49#issuecomment-153885217 . https://github.com/notifications/beacon/AGaG8BkqODa3SAFYAhzgIYu2TBrFao9iks5pCnnigaJpZM4ER8sM.gif

ghost commented 8 years ago

@joeatbayes thanks a lot for your example with every operation commented! I'm new to C and don't want to mess my head up with pointers to multidimensional arrays. Fortunately, khash fits perfectly, but the official documentation as well as comments in headers didn't help a lot.

mathog commented 8 years ago

While I appreciate the example @joeatbayes provided it led me merrily down the wrong path. The problem is that the line marked "add the key" with the key "apple" does not, in that instance add the key. What it adds is a pointer to the key. In the example with static strings it doesn't matter. In my program where keys were being generated dynamically and not stored things did not go well. This is because when all keys sent to kh_put() are from the same buffer this line of code in that routine

!(strcmp (h->keys[i], key) == 0)

always has the same value.

The documentation on kh_put() in khash.h currently says: @param k Key [type of keys] perhaps it might say: @param k Key [type of keys], if the key is a pointer its contents must remain in memory or something along those lines.

attractivechaos commented 8 years ago

@mathog You need to use strdup() to duplicate the key and free the memory when you delete the key or when you destroy the entire hash table. See example 3 here.

attractivechaos commented 8 years ago

PS: when you keep the pointers to strings elsewhere, example 2 in the documentation page is preferred as it does not duplicate the string contents.

mathog commented 8 years ago

On 22-Aug-2016 06:33, Attractive Chaos wrote:

PS: when you keep the pointers to strings elsewhere, example 2 in the documentation page is preferred as it does not duplicate the string contents.

Thanks for the pointers and the code. I managed to get it working by myself eventually, it just took longer than I thought it would.

A couple of other comments and suggestions:

kh_get returns a kh_iter_t, and kh_put returns a kh_int_t. That isn't consistent, even though the value returned is used in the same way. It happens that kh_int_t and kh_iter_t are the same thing when the compiler acts on khash.h in my example. It wasn't clear if that was guaranteed to always be the case.
In order to figure out what

KHASH_SET_INIT_STR(str);

did I ran my program through the preprocessor. Only at that point was it clear to me that multiple string key -> int value hash tables could share "str", and not need "strA", "strB", "strC". No way to tell from the examples because they just use one hash table each. I found it easier to work with the code in the expanded form and left it that way in my program. It let me drop print statements and use profiling and the debugger more easily to trace things into your code. (Remember, I had to figure out where I was going wrong.) I also added and used these inline functions (sorry about the wrap):

static inline void kh_set_byIter_khStrUint32(kh_StrUint32_t * h, khiter_t k, uint32_t val) { h->vals[k] = val; }

static inline uint32_t kh_get_byIter_khStrUint32(kh_StrUint32_t * h, khiter_t k) { return(h->vals[k]); }

In the documentation there are no suggestions for the initial size of the array if the number elements to be inserted is known. I had to look at the code produced by the preprocessor to see exactly how things were being stored - some hash methods chain values out from the bucket, others use only buckets, which is the case here. Knowing that, an initial size equal to the number of keys would have worked (after it resized) but would have been a bad choice. In my application ~280 million key/value pairs were being loaded, and there are a couple of ancillary hash tables, with the final size of all the structures ~35G, so minimizing the rounds of reallocation seemed like a good idea. I used 1.4 * number of keys for storage, and it seems to work fairly well
load everything then release all memory and quit takes about 12m40s, with no unexpected realloc() events. It would be nice if the documentation gave a hint or two on this topic.
The example might mention that using a separate strdup() for each string key or pointer to a value struct which is stored is not as efficient for either speed or space as placing the keys one after the other in one or a few large buffers. Doing it in big chunks eliminates the overhead of all the strdup() and free() calls and the memory wasted in the memory bookkeeping structures. The down side is that doing so makes removing keys or pointer values difficult. However, a lot of programs just load, process, and quit, so that issue never comes up in that context.

Thanks,

David Mathog mathog@caltech.edu Manager, Sequence Analysis Facility, Biology Division, Caltech

makmanalp commented 8 years ago

Yes! I just spent an hour debugging why kh_exist doesn't do the thing I want and the k == kh_end(h) method does the trick. Looks like kh_del will update the flags, but I don't know why kh_exist still returns true after that.

attractivechaos commented 8 years ago

@makmanalp Sorry. kh_exist tests if a bucket in the hash table has ever been filled before. If an element is deleted with kh_del, it still takes up the bucket, so kh_exist returns true.

Anyway, kh_exist is not properly named. It is confusing. I would use another name if I started from scratch again.

attractivechaos commented 8 years ago

@mathog Thanks for the suggestions.

In the current code, khiter_t is only used in the example, not elsewhere. Both kh_put and kh_get returns khint_t.
When I was debugging macros, I always expand it with gcc -E and indent. I wish C also had template. Then I wouldn't have written long macros just for generics.
The khash doc page says: "generic hash table with open addressing". Open addressing means the hash table uses a single array rather than chaining.
What you said is true, but before recommending against strdup, I need to do a micro benchmark to see how much time is really spent on strdup. It would be a premature optimization if strdup took much less time than hash table operations.

makmanalp commented 8 years ago

@attractivechaos No apology needed - thanks for an awesome library!

ghost commented 8 years ago

Thank you a lot for this helpful examples! I was not able to figure it out myself.

Do you maybe know if it is possible to add two values into the hash table, so that a key has two values stored like: key string --> value_one, value_two

joeatbayes commented 8 years ago

Not directly but it is pretty easy to accomplish. Simply create an object such as a linked list that contains pointers to the object you want to store. You add the linked list to the hash at key. You can just store a linked list in every node but that entails additional overhead if you do not expect most nodes to contain multiple elements. To avoid the overhead you can create as base type then convert to linked list when you add the 2nd item. You can use a union that contains a 1 bytes flag indicating type of object stored. EG: When the byte=1 then the pointer stored to a single object of based object type. When the byte=2 then the pointer store points to a linked list.

I am accepting new consulting projects please contact me 206-842-1008 if you have a problem and budget.

ghost commented 8 years ago

That is an interesting idea, thank you. But the part what made me confuse is more, how would you define the type of your hashtable than, as in your examples you use something like:

KHASH_MAP_INIT_STR(khStrInt, int)

So if your value is an object, would you just define it as a void* pointer?

KHASH_MAP_INIT_STR(khStrInt, void*)

attractivechaos commented 8 years ago

@joeatbayes Thanks for the explanation. I will show a tiny example to see it helps (not tested).

#include "khash.h"
typedef struct {
  int x, y;
} myval_t;
KHASH_MAP_INIT_STR(str, myval_t)
int main(void) {
  khash_t(str) *h;
  khint_t k;
  int absent;
  h = kh_init(str);
  k = kh_put(str, h, "foo", &absent);
  kh_val(h, k).x = 1;
  kh_val(h, k).y = 2;
  k = kh_get(str, h, "foo");
  if (k != kh_end(h))
    printf("%d,%d\n", kh_val(h,k).x, kh_val(h,k).y);
  // if keys are allocated on heap, free them before calling kh_destroy()
  kh_destroy(str, h);
  return 0;
}

joeatbayes commented 8 years ago

I like the example. You could also use the following if you wanted a variable length object.

typedef struct { linkedList *ll; } myval_t;

I most often work on engines and parser where the dominant use case is 0,1,N where N could be anywhere from 2 to millions of sub objects depending on what you are parsing so the LL or nested hash is a common use case.

Another common tactic is a object type value combined with a object pointer so the code can check to see what is stored and cast the pointer approximately. This allows nearly infinite flexibly in what is stored in the child buckets and they can be defined by the app programmer. You see a lot of this kind of code in the very low level OS internals where performance is critical. The problem is that it can also be difficult for other engineers to diagnose. You can recognize code written by a C programmer who grew up writing assembler because it will often contain something similar. typedef struct { byte objType; void *objptr; } myval_t;

A more maintainable way uses a union so the object type byte still indicates what is stored but you have a specific subset of allowed data types and you can reference them with lest casting mess in the underlying access code.

union DataBucket { int i; float f; char str[20]; linkedlist *ll; } dataBucket;

typedef struct { byte objType; dataBucket myData; } myval_t;

when objtype == 0 then databucket contains int when objtype == 1 then databucket contains float when objtype == 2 then databucket contains str[20] when objtype == 3 then databucket contains linkedlist * any other values would be invalid.

TinaZe commented 7 years ago

I was wondering if your const khStrInt is defining the length of your char* array you want to add to the hashtable?

I saw the example before for adding my own defined structure and for me it was really usefull but i am getting a weird valgrind error of "invalid read of size 1" so I thought maybe you could help me with it?

So in my header I have defined following:

const int khStrInt = 64; //i thought I have to set it to the length of the key I want to use, or ?

typedef khash_t(khStrInt) hashTable;

typedef struct { uint64_t ID; uint32_t pos; } entry;

And I have a function adding an entry:

void addEntry(hashTable* hT, char* key, uint32_t pos, uint64_t ID){ int ret; khiter_t k = kh_put(khStrInt, hT, key, &ret); //i am checking if it is added, just skipping it here kh_val(h, k).ID = ID; kh_val(h, k).pos = pos; }

And now if i want to retrieve the entry i do:

`bool is_inside_hashTable(hashTable hT, char key){

khiter_t k; k = kh_get(khStrInt, hT, key); }`

The kh_get is causing the "invalid read of size 1" error. I dont know what i am missing here ...

joeatbayes commented 7 years ago

@tinaze please create a free repository with your example code. It is hard to read in the comment.

WinterLi1993 commented 7 years ago

@attractivechaos is there a few examples of multi-key usage with using khash ?

lh3 commented 7 years ago

There are a few possible ways. I would recommend to keep actual values in a separate array. In the hash map, the value is the index of the value in the value array. There are also other approaches. For example, you can use two hash tables. With the first table, you map multiple keys to a unique key. In the second table, you map the unique keys to values. The implementation also depends on the query you want to perform. If you want to get all keys associated with a value, you should build the hash table in the reverse way. If multiple keys associated with the same value are not equivalent, you will need multiple hash tables. "Multi-key" is somewhat ambiguous to me. The optimal implementation depends on the actual problem.

WinterLi1993 commented 7 years ago

@lh3 thank you ！ "Multi-key" means multiple keys as you understood .

filly86 commented 7 years ago

Could some please check my code and tell me if I am leaking memory? What my code does ist to malloc 2 structs and add it to HashTable. Now I dont know, if I need to free the two malloced structs and if yes, how?

#include "khash.h"
typedef struct {
    int id;
    char *stream;
    int len;
} myval_t;

int type = 0;
KHASH_MAP_INIT_INT(type, myval_t)
int main(void) {
    khash_t(type) *h;
    khint_t k;
    int absent;
    h = kh_init(type);

    //create struct1 and add it to hashtable
    myval_t *cl1 = malloc(sizeof(myval_t));
    cl1->id = 55;
    cl1->len = 3;
    cl1->stream = malloc(sizeof(char)*(cl1->len+1));
    memcpy(cl1->stream, "abc", cl1->len);
    cl1->stream[cl1->len] = '\0';
    k = kh_put(type, h, cl1->id, &absent);
    kh_val(h, k) = *cl1;

    //create struct2 and add it to hashtable
    myval_t *cl2 = malloc(sizeof(myval_t));
    cl2->id = 56;
    cl2->len = 3;
    cl2->stream = malloc(sizeof(char)*(cl2->len+1));
    memcpy(cl2->stream, "xyz", cl2->len);
    cl2->stream[cl2->len] = '\0';
    k = kh_put(type, h, cl2->id, &absent);
    kh_val(h, k) = *cl2;

    //search for specific index
    printf("Searching index %d\n", 55);
    k = kh_get(type, h, 55);
    if (k != kh_end(h)){
        printf("Index %d found: ", kh_val(h,k).id);
        printf("%d,%s\n", kh_val(h,k).id, kh_val(h,k).stream);
    }

    //traverse all indexes:
    printf("\nPrint all Indexes\n");
    for (k = kh_begin(h); k != kh_end(h); ++k){
        if (kh_exist(h, k)){
            printf("%d,%s\n", kh_val(h,k).id, kh_val(h,k).stream);

        }
    }

    printf("\nremove all indexes and free malloced space\n");
    //remove all and free before
    for (k = kh_begin(h); k != kh_end(h); ++k){
        if (kh_exist(h, k)){
            free(kh_val(h,k).stream); //freeing malloced char within structure
            //free(kh_val(h,k)); //<-- How to free the structs malloced before?
        }
    }

    printf("\ndone\n");

    kh_destroy(type, h);
    return 0;
}

mathog commented 7 years ago

@filly86 This is a general answer for "is my program leaking". With a package like khash which does a lot of preprocessor expansion, so that one line may become dozens, I have found the easiest way to debug an issue like that is to first run the program through the C preprocessor only and save that. Then compile with "gcc -g -O0". Finally, run it in valgrind. If there is a leak valgrind will find it and the line numbers in the expanded program will have enough notes from the compiler during the preprocessing stage to tell you which khash lines in the original are the source of the leak.

As for your program in particular, I have not analyzed it carefully but do see one kh_destroy() call and that is usually all that is needed, one of those for each khash_init() line. Use the method described above though to see if there is some other kind of leak. For instance, it is common to use strdup() to make a copy of the key stored by khash and later freed by khash_destroy(). But you still have to free the original of the key elsewhere, outside of khash.

attractivechaos commented 7 years ago

You have never freed cl1 and cl2. These are two leaks. In fact, you don't need to allocate for the two pointers. The better way is:

khint_t k;
k = kh_put(type, h, 55, &absent);
if (absent) {
  myval_t *cl1 = &kh_val(h, k);
  cl1->id = 55;
  cl1->len = 3;
  cl1->stream = strdup("abc");
}

PS: this works because kh_val() is a macro.

filly86 commented 7 years ago

@attractivechaos Thank you very much for pointing out that mallocing is not needed. So I suppose I have two possibilities for what I want to do. (Both options do not leak mem accordingly to valgrind) 1. Initialise KHASH_MAP with struct (no mallocing needed)

#include "khash.h"
typedef struct {
    int id;
    char *stream;
    int len;
} myval_t;

int type = 0;
KHASH_MAP_INIT_INT(type, myval_t) //**<-- Struct!**
int main(void) {
    khash_t(type) *h;
    khint_t k;
    int absent;
    h = kh_init(type);
    myval_t *clx;

    //create struct1 and add it to hashtable
    k = kh_put(type, h, 55, &absent);
    if (absent) {
        myval_t *cl1 = &kh_val(h, k);
        cl1->id = 55;
        cl1->len = 3;
        cl1->stream = strdup("abc");
    }

    printf("\nremove all indexes and free malloced space\n");
    //remove all and free before
    for (k = kh_begin(h); k != kh_end(h); ++k){
        if (kh_exist(h, k)){
            free(kh_val(h,k).stream); //freeing malloced char within structure
        }
    }
    kh_destroy(type, h);
    return 0;
}

2. Initialize KHASH_MAP with struct pointer with mallocing and freeing

#include "khash.h"
typedef struct {
    int id;
    char *stream;
    int len;
} myval_t;

int type = 0;
KHASH_MAP_INIT_INT(type, myval_t*)// **<-- Struct Pointer!**
int main(void) {
    khash_t(type) *h;
    khint_t k;
    int absent;
    h = kh_init(type);
    myval_t *clx;

    //create struct1 and add it to hashtable
    myval_t *cl1 = malloc(sizeof(myval_t));
    cl1->id = 55;
    cl1->len = 3;
    cl1->stream = strdup("abc");
    k = kh_put(type, h, cl1->id, &absent);
    if(absent){
        kh_val(h, k) = cl1;
    }

    printf("\nremove all indexes and free malloced space\n");
    //remove all and free before
    for (k = kh_begin(h); k != kh_end(h); ++k){
        if (kh_exist(h, k)){
            free(kh_val(h,k)->stream); //freeing malloced char within structure
            free(kh_value(h, k)); //freeing malloced myval_t structures
        }
    }

    kh_destroy(type, h);
    return 0;
}

--> Any disadvantages in using the one or the other regarding for example performance, ... ?

Then another topic. I would like to setup a Khash with Klists. I was not able to do this directly. Instead I succeeded in creating a Khash with structs containing one Klists like the following:

#include "khash.h"
#include <stdio.h>
#include "klist.h"

//Klist contains only integers
#define __int_free(x)
KLIST_INIT(32, int, __int_free)

//KHash contains structs which contain one Klist
typedef struct {
    int len;
    klist_t(32) *kl;
} myval_t;
KHASH_MAP_INIT_STR(chash, myval_t*)

int main(void) {
    //create khash
    khash_t(chash) *h;
    khint_t k;
    int absent;
    h = kh_init(chash);

    //create struct1 with klist1 and add it to khash with key "list1"
    myval_t *cl1 = malloc(sizeof(myval_t));
    cl1->len = 3;
    cl1->kl = kl_init(32);
    *kl_pushp(32, cl1->kl) = 1;
    *kl_pushp(32, cl1->kl) = 2;
    *kl_pushp(32, cl1->kl) = 3;

    k = kh_put(chash, h, "list1", &absent);
    if(absent){
        kh_val(h, k) = cl1;
    }

    //create struct2 with klist2 and add it to khash with key "list2"
    myval_t *cl2 = malloc(sizeof(myval_t));
    cl2->len = 3;
    cl2->kl = kl_init(32);
    *kl_pushp(32, cl2->kl) = 5;
    *kl_pushp(32, cl2->kl) = 6;
    *kl_pushp(32, cl2->kl) = 7;
    k = kh_put(chash, h, "list2", &absent);
    if(absent){
        kh_val(h, k) = cl2;
    }

    //get klist of khash entry with key "list1"
    k = kh_get(chash, h, "list1");
    if (k != kh_end(h)){
        printf("%s\n", kh_key(h,k));
        kliter_t(32) *p;
        for (p = kl_begin(kh_val(h,k)->kl); p != kl_end(kh_val(h,k)->kl); p = kl_next(p)){
            printf("%d\n", kl_val(p));
        }
    }

    //get klists of all khash keys
    for (k = kh_begin(h); k != kh_end(h); ++k){
        if (kh_exist(h, k)){
            printf("%s\n", kh_key(h,k));
            kliter_t(32) *p;
            for (p = kl_begin(kh_val(h,k)->kl); p != kl_end(kh_val(h,k)->kl); p = kl_next(p)){
                printf("%d\n", kl_val(p));
            }

        }
    }

    //remove all klists and its structures and free before
    for (k = kh_begin(h); k != kh_end(h); ++k){
        if (kh_exist(h, k)){
            //destroy klist of each khash key
            kl_destroy(32, kh_val(h,k)->kl);
            //free the struct containing the klist
            free(kh_value(h, k));
        }
    }

    //free khash
    kh_destroy(chash, h);
    return 0;
}

--> This is also leak free accordingly to valgrind. But as you can see in the code, its using a struct to store the klists. How can I add klists to khashes directly without using structs?

attractivechaos / klib

Example for string/int, String/string khash usage #49