yuenshome / yuenshome.github.io

https://yuenshome.github.io
MIT License
84 stars 15 forks source link

哈希查找 #46

Open ysh329 opened 5 years ago

ysh329 commented 5 years ago

算法09 五大查找之:哈希查找 - nnngu - 博客园 https://www.cnblogs.com/nnngu/p/8307743.html#autoid-0-0-0

ysh329 commented 5 years ago

哈希查找

五大查找(顺序查找、二分查找、索引查找、二叉排序树)的最后一个:哈希查找(也称为散列查找)。Java有Hashtable类,它是由 key/value 的键值对组成的集合,它应用了哈希技术。

哈希技术

六种哈希函数 f(key) 的构造方法

  1. 直接定址法:哈希地址:f(key) = a*key+b (a,b为常数)。需事先知 key 的分布,适合查找表小且连续的情况;
  2. 数字分析法:如手机号,前3位,中间m位,最后n位表示地址,即每段有不同的含义;
  3. 平方取中法:对数m平方,之后取结果的中间n位作为地址;
  4. 折叠法:假设哈希表长为999,对数m分成4段,每段加和得到n,取n的后3位作地址;
  5. 除留余数法:地址f(key) = key mod p (p<=m) m为哈希表表长,最常用
  6. 随机数法:哈希地址f(key) = random(key),random是随机函数,当 key 长度不等时,采用这种方法比较合适。

哈希函数冲突的两种解决方法

哈希函数不可能完全避免冲突,当发现有 key1 != key2,但却有 f(key1) = f(key2) ,即发生冲突。

  1. 开放定址法:发生了冲突,寻找下一个空的哈希地址。只要哈希表足够大,空的哈希地址总是能找到,然后将记录插入。这种方法是最常用的解决冲突的方法。
  2. 链地址法:链地址法不做详细展开。
ysh329 commented 5 years ago

实现代码

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>

typedef struct hash_table
{
    int length;
    int *data;
    int used_length;
} hash_table_t;

hash_table_t* init_hash_table(const int table_length);
void destroy_hash_table(hash_table_t *t);
void print_hash_table(const hash_table_t *t);
void insert_data(hash_table_t *t, const int value);
int find_data(const hash_table_t *t, const int value);
int get_address(const hash_table_t *t, const int key);

hash_table_t* init_hash_table(const int table_length)
{
    hash_table_t *table = calloc(1, sizeof(hash_table_t));
    table->length = table_length;
    table->data = calloc(table_length, sizeof(int));
    printf("[INFO] create hash table successfully\n");
    return table;
}

void destroy_hash_table(hash_table_t *t)
{
    if(t->data) free(t->data);
    t->data = NULL;
    if(t) free(t);
    t = NULL;
    printf("[INFO] destroy hash table successfully\n");
    return;
}

void print_hash_table(const hash_table_t *t)
{
    assert(t);
    printf("[INFO] ---- print hash table ----\n");
    printf("[INFO] t->length:%d\n", t->length);
    printf("[INFO] t->used_length:%d\n", t->used_length);
    printf("addr value\n");
    int *data = t->data;
    for(int eidx = 0; eidx < t->length; ++eidx)
    {
        if(data[eidx] != 0)
            ;//printf("%d %d\n", eidx, data[eidx]);
        printf("%4d %4d\n", eidx, data[eidx]);
    }
    printf("\n");
    return;
}

void insert_data(hash_table_t *t, const int value)
{
    assert(t);
    int address = get_address(t, value);
    t->data[address] = value;
    ++t->used_length;
    return;
}

int find_data(const hash_table_t *t, const int value)
{
    int start_address = value % t->length;
    int address = start_address;
    int *data = t->data;
    while(data[address] != value)
    {
        address = (1+address) % t->length;
        if(address == start_address || data[address]==0)
        {
            printf("[WARN] %d not found in hash table\n", value);
            return -1;
        }
    }
    printf("[INFO] %d found in hash table address: %d\n", value, address);
    return address;
}

int get_address(const hash_table_t *t, const int key)
{
    assert(t);
    int start_address = key % t->length;
    int address = start_address;
    while(t->data[address])
    {
        address = (1+address) % t->length; // 开放定址法
        if(address == start_address)
        {
            printf("[ERROR] hash table has no space\n");
            exit(0);
        }
    }
    return address;
}

int main()
{
    const int table_length = 15;
    hash_table_t *t = init_hash_table(table_length);
    print_hash_table(t);

    insert_data(t, 93);
    insert_data(t, 95);
    insert_data(t, 12);
    print_hash_table(t);
    find_data(t, 30);
    find_data(t, 12);

    destroy_hash_table(t);
    return 0;
}

执行结果

[INFO] create hash table successfully
[INFO] ---- print hash table ----
[INFO] t->length:15
[INFO] t->used_length:0
addr value
   0    0
   1    0
   2    0
   3    0
   4    0
   5    0
   6    0
   7    0
   8    0
   9    0
  10    0
  11    0
  12    0
  13    0
  14    0

[INFO] ---- print hash table ----
[INFO] t->length:15
[INFO] t->used_length:3
addr value
   0    0
   1    0
   2    0
   3   93
   4    0
   5   95
   6    0
   7    0
   8    0
   9    0
  10    0
  11    0
  12   12
  13    0
  14    0

[WARN] 30 not found in hash table
[INFO] 12 found in hash table address: 12
[INFO] destroy hash table successfully