redis hyperloglog 源码分析

LH0702 commented 6 years ago

有关hyperloglog 论文 http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf 主要介绍基数统计算法及数学原理

https://arxiv.org/pdf/1702.01284.pdf 新的计算方法来改进原算法在数量较少或者较大时候的误差。

:question: #define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8)) 为什么是+7

LH0702 commented 6 years ago

源码分析

源码文件头部大概有180行左右的注释在解释当前算法。为了节约内存，redis使用了两种表示方法，稠密的表示，稀疏表示。

首先看一下 hyperlog 数据结构

struct hllhdr {
    char magic[4];      /* "HYLL" */
    uint8_t encoding;   /* HLL_DENSE or HLL_SPARSE. 确定当前使用稠密表示还是稀疏表示方法 */
    uint8_t notused[3]; /* Reserved for future use, must be zero.  保留字段*/
    uint8_t card[8];    /* Cached cardinality, little endian.  用作缓存*/
    uint8_t registers[]; /* Data bytes.  实际数据存储位置 当前桶的个数为16384  数组的大小为 16384*6 / 8*/
};

这里做两点解释： 1，以前提到过的，结构体的大小使用sizeof 是不计算最后数组的大小。并且数组也没有指明大小，仅能用在结构体的最后一行。所以计算结构体大小要sizeof(hllhdr) + 8 registers.length。 2，实际数组的大小为163846/8。这是因为redis 设置基数统计m的大小为16384 。每一个桶使用6 bit 来表示大小。为了节约内存的使用。没有直接把register数组的大小定义为16384，而是尽量利用数组中的每一位。举一个例子

+---------+----------+----------+------// //--+
|11000000|22221111|33333322|55444444 .... |
+----------+----------+----------+------// //--+

数组大小为4 ，那么数组第一个元素的后六位，就表示点一个桶数值大小。高两位 + 数组第二个元素的后四位表示第二个桶数值大小，一次类推。这样做的好处: 使用16384*6/8 个u_int8 就可以表示16384个桶。 *注意数组的长度是163846/8不是16384** ，节省了25%的内存使用。

这里就需要巧妙的位操作来计算每一个桶的大小

宏定义获取相应桶大小的函数

#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
    uint8_t *_p = (uint8_t*) p; \ register 数组
    unsigned long _byte = regnum*HLL_BITS/8; \ 起始位置
    unsigned long _fb = regnum*HLL_BITS&7; \   偏移量
    unsigned long _fb8 = 8 - _fb; \                横跨数组相邻两个元素 第二个元素需要移动的偏移量
    unsigned long b0 = _p[_byte]; \               数组中保存当前桶的第一个元素
    unsigned long b1 = _p[_byte+1]; \          数组中保存当前桶的第二个元素
    target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \  具体的计算算法
} while(0)

举例来说输入值hash后取后14位确定position位1，即第一个桶。第一个桶数据具体保存在数组的哪一个位置起始位置 b0 = 6 pos / 8 = 0 偏移量 fb = 6 pos % 8 = 6 数组 |11000000|00001111| ，要获取的第一个桶数值保存在数组第一个元素的高两位 + 第二个元素的后四位。

数组第一个元素右移6 11000000=》 00000011
数组第二个元素左移 8-6= 2 位 00001111=》 00111100
使用或运算合并两个元素 00000011 | 00111100 =》 00111111
新元素后六位即第一个桶的大小 0011111 & 00111111 => 111111 取后六位

LH0702 commented 6 years ago

宏定义设置桶大小

/* Set the value of the register at position 'regnum' to 'val'.
 * 'p' is an array of unsigned bytes. */
#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
    uint8_t *_p = (uint8_t*) p; \  register 数组
    unsigned long _byte = regnum*HLL_BITS/8; \ regnum 桶的索引值  计算 在register数组中的位置
    unsigned long _fb = regnum*HLL_BITS&7; \ 首偏移量
    unsigned long _fb8 = 8 - _fb; \   第二个元素的偏移量
    unsigned long _v = val; \         
    _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \    清零第一个元素相应位
    _p[_byte] |= _v << _fb; \                                        赋值
    _p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \ 清零第一个元素相应位
    _p[_byte+1] |= _v >> _fb8; \                                   赋值
} while(0)

首先也要找到在数组中的元素的位置 eg: unsigned long _byte = regnumHLL_BITS/8; \ regnum 桶的索引值计算在register数组中的位置 unsigned long _fb = regnumHLL_BITS&7; \ 首偏移量 unsigned long _fb8 = 8 - _fb; \ 第二个元素的偏移量起始位置 b0 = 6 pos / 8 = 0 偏移量 fb = 6 pos % 8 = 6 数组 |11000000|00001111| _p[_byte] &= ~(HLL_REGISTER_MAX << _fb) = 》 HLL_REGISTER_MAX << _fb => 11000000 ~ 11000000 => 0011111 _p[_byte] & 0011111 =》 0000000 把该元素包含当前桶的bit位清零

_p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); _p[_byte+1] |= _v >> _fb8; 基本同上，先清零然后赋值。

LH0702 commented 6 years ago

稀疏表示方式和稠密的表示方式

 * Sparse representation
 * ===
 *
 * The sparse representation encodes registers using a run length
 * encoding composed of three opcodes, two using one byte, and one using
 * of two bytes. The opcodes are called ZERO, XZERO and VAL.
 *
 * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented
 * by the six bits 'xxxxxx', plus 1, means that there are N registers set
 * to 0. This opcode can represent from 1 to 64 contiguous registers set
 * to the value of 0.
 *
 * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit
 * integer represented by the bits 'xxxxxx' as most significant bits and
 * 'yyyyyyyy' as least significant bits, plus 1, means that there are N
 * registers set to 0. This opcode can represent from 0 to 16384 contiguous
 * registers set to the value of 0.
 *
 * VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer
 * representing the value of a register, and a 2-bit integer representing
 * the number of contiguous registers set to that value 'vvvvv'.
 * To obtain the value and run length, the integers vvvvv and xx must be
 * incremented by one. This opcode can represent values from 1 to 32,
 * repeated from 1 to 4 times.
 *
 * The sparse representation can't represent registers with a value greater
 * than 32, however it is very unlikely that we find such a register in an
 * HLL with a cardinality where the sparse representation is still more
 * memory efficient than the dense representation. When this happens the
 * HLL is converted to the dense representation.
 *
 * The sparse representation is purely positional. For example a sparse
 * representation of an empty HLL is just: XZERO:16384.
 *
 * An HLL having only 3 non-zero registers at position 1000, 1020, 1021
 * respectively set to 2, 3, 3, is represented by the following three
 * opcodes:
 *
 * XZERO:1000 (Registers 0-999 are set to 0)
 * VAL:2,1    (1 register set to value 2, that is register 1000)
 * ZERO:19    (Registers 1001-1019 set to 0)
 * VAL:3,2    (2 registers set to value 3, that is registers 1020,1021)
 * XZERO:15362 (Registers 1022-16383 set to 0)
 *
 * In the example the sparse representation used just 7 bytes instead
 * of 12k in order to represent the HLL registers. In general for low
 * cardinality there is a big win in terms of space efficiency, traded
 * with CPU time since the sparse representation is slower to access:
 *
 * The following table shows average cardinality vs bytes used, 100
 * samples per cardinality (when the set was not representable because
 * of registers with too big value, the dense representation size was used
 * as a sample).
 *
 * 100 267
 * 200 485
 * 300 678
 * 400 859
 * 500 1033
 * 600 1205
 * 700 1375
 * 800 1544
 * 900 1713
 * 1000 1882
 * 2000 3480
 * 3000 4879
 * 4000 6089
 * 5000 7138
 * 6000 8042
 * 7000 8823
 * 8000 9500
 * 9000 10088
 * 10000 10591
 *
 * The dense representation uses 12288 bytes, so there is a big win up to
 * a cardinality of ~2000-3000. For bigger cardinalities the constant times
 * involved in updating the sparse representation is not justified by the
 * memory savings. The exact maximum length of the sparse representation
 * when this implementation switches to the dense representation is
 * configured via the define server.hll_sparse_max_bytes.
 */

大致翻译如下：

稠密的表示方式就是上面所描述的那种方式。使用6位来计数。

稀疏的表示方式有三个操作码组成，其中两个使用一个字节，一个使用两个字节。操作码被称为 ZERO，XZERO, VAL

ZERO 操作码 00xxxxxx ，后六位表示整形数字，然后加1. 这个操作码可以表示1到64个邻近的register设置为0.

XZERO 使用两个字节表示 01xxxxxx yyyyyyyy 由首字节后六位和第二个字节 8位组成的数字加1 。可以表示有 1~16384 个邻近的register设置为0

VAL opcode 一个字节表示 1vvvvvxx 包含5 bit 表示register的值， 2 bit 表示邻近的register 设置为这个值，即包含value 和长度。 value 和长度也要加1， VAL 操作码可以表示的值1~32. 长度1~4。

LH0702 commented 6 years ago

稀疏模式不能表示register值大于32. 我们几乎不可能碰到稀疏模式register值大于32. 如果发生这种情况，会做稀疏模式到稠密模式的转换。稀疏模式相比稠密模式更加节约内存。

举例来说：

现在有registers 在1000，1020 ， 1021 索引位置上的值为 2,3,3，其余位置的值为0 稀疏模式表示如下:

 - XZERO:1000 0~999 索引位置为 0
 - VAL:2,1    index= 1000  value = 2
 - ZERO:19   index 1001-1019 value = 0
 - VAL 3,2     index 1020，1021  value= 3
 - XZERO 15362 index 1022-16383 = 0

只使用了7个字节而不是12K 来表示registers

LH0702 commented 6 years ago

稀疏模式宏定义

#define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */
#define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */
#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */
#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)
#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)
#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1)
#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1)
#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1)
#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)
#define HLL_SPARSE_VAL_MAX_VALUE 32
#define HLL_SPARSE_VAL_MAX_LEN 4
#define HLL_SPARSE_ZERO_MAX_LEN 64
#define HLL_SPARSE_XZERO_MAX_LEN 16384
#define HLL_SPARSE_VAL_SET(p,val,len) do { \
    *(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \
} while(0)
#define HLL_SPARSE_ZERO_SET(p,len) do { \
    *(p) = (len)-1; \
} while(0)
#define HLL_SPARSE_XZERO_SET(p,len) do { \
    int _l = (len)-1; \
    *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
    *((p)+1) = (_l&0xff); \
} while(0)

就是对三个操作码的位运算，取值和设置

LH0702 commented 6 years ago

uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) murmushash64a算法详细介绍可以参看维基百科 https://zh.wikipedia.org/wiki/Murmur%E5%93%88%E5%B8%8C

LH0702 commented 6 years ago

int hllPatLen(unsigned char ele, size_t elesize, long regp)

入参输入字符串，字符串长度，计算后的索引值指针(感觉这个函数设计不是特别好，没有单一职责，不仅获取hash后首个1 的位置，还返回了索引值) 返回值计算后首个1 的位置

字符串进行hash后，得到64 bit 
<-----------50--------------->  <---14----->
  00000000 00000000..00000      00000..0000
------------------------------------------------
低14位用作index  0~16384
64-14+1 位用作计算首个1出现时0的个数 即value值
int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
    uint64_t hash, bit, index;
    int count;

    /* Count the number of zeroes starting from bit HLL_REGISTERS
     * (that is a power of two corresponding to the first bit we don't use
     * as index). The max run can be 64-P+1 = Q+1 bits.
     *
     * Note that the final "1" ending the sequence of zeroes must be
     * included in the count, so if we find "001" the count is 3, and
     * the smallest count possible is no zeroes at all, just a 1 bit
     * at the first position, that is a count of 1.
     *
     * This may sound like inefficient, but actually in the average case
     * there are high probabilities to find a 1 after a few iterations. */
    hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
    index = hash & HLL_P_MASK; /* Register index. */
    hash >>= HLL_P; /* Remove bits used to address the register. */
    hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates
                                     and count will be <= Q+1.  确保循环正常终止*/
    bit = 1;
    count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
    while((hash & bit) == 0) {
        count++;
        bit <<= 1;
    }
    *regp = (int) index;
    return count;
}

LH0702 commented 6 years ago

int hllDenseSet(uint8_t *registers, long index, uint8_t count)

稠密模式下设置指定index的值 uint8_t oldcount;

HLL_DENSE_GET_REGISTER(oldcount,registers,index);   获取旧的value
if (count > oldcount) {                                                     做比较如果新值大于 oldvalue
    HLL_DENSE_SET_REGISTER(registers,index,count);      重新设置
    return 1;
} else {
    return 0;
}

返回值1表示基数改变，0表示未改变

添加element 到基数结构里面，函数比较简单，首先调用hllpatlen 获取index 和value 然后设到具体的register里面 int hllDenseAdd(uint8_t registers, unsigned char ele, size_t elesize) { long index; uint8_t count = hllPatLen(ele,elesize,&index); / Update the register if this element produced a longer run of zeroes. / return hllDenseSet(registers,index,count); }

LH0702 commented 6 years ago

hllDenseRegHisto 统计regisater 相同数值的数量

 判断是register 宏定义书否为 16384 。 遍历 registers 数组 数组大小16384*6/8，以12组为一个单位，遍历1024次。将相同的值存在reghisto数组里，数组下标表示值的大小，数组值表示数量。 reghisto 数组的大小为51。
如果if条件判断为false，逐次遍历整个数组。
 int j;

    /* Redis default is to use 16384 registers 6 bits each. The code works
     * with other values by modifying the defines, but for our target value
     * we take a faster path with unrolled loops. */
    if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
        uint8_t *r = registers;
        unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9,
                      r10, r11, r12, r13, r14, r15;
        for (j = 0; j < 1024; j++) {
            /* Handle 16 registers per iteration. */
            r0 = r[0] & 63;
            r1 = (r[0] >> 6 | r[1] << 2) & 63;
            r2 = (r[1] >> 4 | r[2] << 4) & 63;
            r3 = (r[2] >> 2) & 63;
            r4 = r[3] & 63;
            r5 = (r[3] >> 6 | r[4] << 2) & 63;
            r6 = (r[4] >> 4 | r[5] << 4) & 63;
            r7 = (r[5] >> 2) & 63;
            r8 = r[6] & 63;
            r9 = (r[6] >> 6 | r[7] << 2) & 63;
            r10 = (r[7] >> 4 | r[8] << 4) & 63;
            r11 = (r[8] >> 2) & 63;
            r12 = r[9] & 63;
            r13 = (r[9] >> 6 | r[10] << 2) & 63;
            r14 = (r[10] >> 4 | r[11] << 4) & 63;
            r15 = (r[11] >> 2) & 63;

            reghisto[r0]++;
            reghisto[r1]++;
            reghisto[r2]++;
            reghisto[r3]++;
            reghisto[r4]++;
            reghisto[r5]++;
            reghisto[r6]++;
            reghisto[r7]++;
            reghisto[r8]++;
            reghisto[r9]++;
            reghisto[r10]++;
            reghisto[r11]++;
            reghisto[r12]++;
            reghisto[r13]++;
            reghisto[r14]++;
            reghisto[r15]++;

            r += 12;
        }
    } else {
        for(j = 0; j < HLL_REGISTERS; j++) {
            unsigned long reg;
            HLL_DENSE_GET_REGISTER(reg,registers,j);
            reghisto[reg]++;
        }
    }

LH0702 commented 6 years ago

int hllSparseToDense(robj *o) 稀疏模式到稠密模式转换

初始化变量操作

    sds sparse = o->ptr, dense;    
    struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
    int idx = 0, runlen, regval;
    uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse);

如果当前encoding 已经是稠密模式，直接返回。

    hdr = (struct hllhdr*) sparse;
    if (hdr->encoding == HLL_DENSE) return C_OK;

创建一个新的 dense 模式的 hllhdr，并且把原有的稀疏模式magin 和cached 拷贝到新的结构里面把encoding 设置成新的dense模式

    dense = sdsnewlen(NULL,HLL_DENSE_SIZE);
     hdr = (struct hllhdr*) dense;
     *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */
      hdr->encoding = HLL_DENSE;

        p += HLL_HDR_SIZE;  // 获取registers 指针
    /*
   遍历register。 找到VAL opcode ，设置到相应dense模式下的registers里面

  **/
      while(p < end) {
        if (HLL_SPARSE_IS_ZERO(p)) {
            runlen = HLL_SPARSE_ZERO_LEN(p);
            idx += runlen;
            p++;
        } else if (HLL_SPARSE_IS_XZERO(p)) {
            runlen = HLL_SPARSE_XZERO_LEN(p);
            idx += runlen;
            p += 2;
        } else {
            runlen = HLL_SPARSE_VAL_LEN(p);
            regval = HLL_SPARSE_VAL_VALUE(p);
            while(runlen--) {
                HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);
                idx++;
            }
            p++;
        }
    }

校验最后idx 是否等于16384，如果不是，表示稀疏模式无效，释放已分配的dense 内存返回error 如果有效，释放原来稀疏模式内存，放指针指向dense模式，返回ok

 if (idx != HLL_REGISTERS) {
        sdsfree(dense);
        return C_ERR;
    }

sdsfree(o->ptr);
    o->ptr = dense;
    return C_OK;

LH0702 commented 6 years ago

int hllSparseSet(robj *o, long index, uint8_t count) 稀疏模式下值的设置

代码比较长，大概150行，相比较dense模式更加麻烦，使用 cpu 换内存。这个过程会触发sparse到dense的转换

关键代码如下如果count值大于32，触发转换 if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;

promote相关代码

promote: /* Promote to dense representation. */
   转换失败 返回-1
    if (hllSparseToDense(o) == C_ERR) return -1; /* Corrupted HLL. */
    hdr = o->ptr;

    /* We need to call hllDenseAdd() to perform the operation after the
     * conversion. However the result must be 1, since if we need to
     * convert from sparse to dense a register requires to be updated.
     *
     * Note that this in turn means that PFADD will make sure the command
     * is propagated to slaves / AOF, so if there is a sparse -> dense
     * conversion, it will be performed in all the slaves as well. */
    int dense_retval = hllDenseSet(hdr->registers,index,count);
    serverAssert(dense_retval == 1);
    return dense_retval;
}

//TODO :question: serverAssert(dense_retval == 1); 注释上解说是为了确保命令传播到slaves。需要确保所有的slaves 都执行不是很明白

LH0702 / Single-learn