Open LH0702 opened 6 years ago
源码文件头部大概有180行左右的注释在解释当前算法。为了节约内存,redis使用了两种表示方法, 稠密的表示, 稀疏表示。
首先看一下 hyperlog 数据结构
struct hllhdr {
char magic[4]; /* "HYLL" */
uint8_t encoding; /* HLL_DENSE or HLL_SPARSE. 确定当前使用稠密表示还是稀疏表示方法 */
uint8_t notused[3]; /* Reserved for future use, must be zero. 保留字段*/
uint8_t card[8]; /* Cached cardinality, little endian. 用作缓存*/
uint8_t registers[]; /* Data bytes. 实际数据存储位置 当前桶的个数为16384 数组的大小为 16384*6 / 8*/
};
这里做两点解释: 1, 以前提到过的,结构体的大小 使用sizeof 是不计算最后数组的大小。 并且数组也没有指明大小, 仅能用在结构体的最后一行。 所以计算结构体大小要sizeof(hllhdr) + 8 registers.length。 2, 实际数组的大小为163846/8。 这是因为redis 设置基数统计m的大小为16384 。 每一个桶使用6 bit 来表示大小。 为了节约内存的使用。没有直接把register数组的大小定义为16384, 而是尽量利用数组中的每一位。 举一个例子
数组大小为4 ,那么数组第一个元素的后六位,就表示点一个桶数值大小。 高两位 + 数组第二个元素的后四位表示第二个桶数值大小,一次类推。 这样做的好处: 使用16384*6/8 个u_int8 就可以表示16384个桶。 *注意数组的长度是163846/8不是16384** ,节省了25%的内存使用。
这里就需要巧妙的位操作来计算每一个桶的大小
宏定义 获取相应桶大小的函数
#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
uint8_t *_p = (uint8_t*) p; \ register 数组
unsigned long _byte = regnum*HLL_BITS/8; \ 起始位置
unsigned long _fb = regnum*HLL_BITS&7; \ 偏移量
unsigned long _fb8 = 8 - _fb; \ 横跨数组相邻两个元素 第二个元素需要移动的偏移量
unsigned long b0 = _p[_byte]; \ 数组中保存当前桶的第一个元素
unsigned long b1 = _p[_byte+1]; \ 数组中保存当前桶的第二个元素
target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \ 具体的计算算法
} while(0)
举例来说 输入值hash后取后14位 确定position位1,即第一个桶。 第一个桶数据具体保存在数组的哪一个位置起始位置 b0 = 6 pos / 8 = 0 偏移量 fb = 6 pos % 8 = 6 数组 |11000000|00001111| ,要获取的 第一个桶数值保存在数组第一个元素的高两位 + 第二个元素的后四位。
/* Set the value of the register at position 'regnum' to 'val'.
* 'p' is an array of unsigned bytes. */
#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
uint8_t *_p = (uint8_t*) p; \ register 数组
unsigned long _byte = regnum*HLL_BITS/8; \ regnum 桶的索引值 计算 在register数组中的位置
unsigned long _fb = regnum*HLL_BITS&7; \ 首偏移量
unsigned long _fb8 = 8 - _fb; \ 第二个元素的偏移量
unsigned long _v = val; \
_p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \ 清零第一个元素相应位
_p[_byte] |= _v << _fb; \ 赋值
_p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \ 清零第一个元素相应位
_p[_byte+1] |= _v >> _fb8; \ 赋值
} while(0)
首先也要找到在数组中的元素的位置 eg: unsigned long _byte = regnumHLL_BITS/8; \ regnum 桶的索引值 计算 在register数组中的位置 unsigned long _fb = regnumHLL_BITS&7; \ 首偏移量 unsigned long _fb8 = 8 - _fb; \ 第二个元素的偏移量 起始位置 b0 = 6 pos / 8 = 0 偏移量 fb = 6 pos % 8 = 6 数组 |11000000|00001111| _p[_byte] &= ~(HLL_REGISTER_MAX << _fb) = 》 HLL_REGISTER_MAX << _fb => 11000000 ~ 11000000 => 0011111 _p[_byte] & 0011111 =》 0000000 把该元素包含当前桶的bit位清零
_p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); _p[_byte+1] |= _v >> _fb8; 基本同上,先清零然后赋值。
* Sparse representation
* ===
*
* The sparse representation encodes registers using a run length
* encoding composed of three opcodes, two using one byte, and one using
* of two bytes. The opcodes are called ZERO, XZERO and VAL.
*
* ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented
* by the six bits 'xxxxxx', plus 1, means that there are N registers set
* to 0. This opcode can represent from 1 to 64 contiguous registers set
* to the value of 0.
*
* XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit
* integer represented by the bits 'xxxxxx' as most significant bits and
* 'yyyyyyyy' as least significant bits, plus 1, means that there are N
* registers set to 0. This opcode can represent from 0 to 16384 contiguous
* registers set to the value of 0.
*
* VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer
* representing the value of a register, and a 2-bit integer representing
* the number of contiguous registers set to that value 'vvvvv'.
* To obtain the value and run length, the integers vvvvv and xx must be
* incremented by one. This opcode can represent values from 1 to 32,
* repeated from 1 to 4 times.
*
* The sparse representation can't represent registers with a value greater
* than 32, however it is very unlikely that we find such a register in an
* HLL with a cardinality where the sparse representation is still more
* memory efficient than the dense representation. When this happens the
* HLL is converted to the dense representation.
*
* The sparse representation is purely positional. For example a sparse
* representation of an empty HLL is just: XZERO:16384.
*
* An HLL having only 3 non-zero registers at position 1000, 1020, 1021
* respectively set to 2, 3, 3, is represented by the following three
* opcodes:
*
* XZERO:1000 (Registers 0-999 are set to 0)
* VAL:2,1 (1 register set to value 2, that is register 1000)
* ZERO:19 (Registers 1001-1019 set to 0)
* VAL:3,2 (2 registers set to value 3, that is registers 1020,1021)
* XZERO:15362 (Registers 1022-16383 set to 0)
*
* In the example the sparse representation used just 7 bytes instead
* of 12k in order to represent the HLL registers. In general for low
* cardinality there is a big win in terms of space efficiency, traded
* with CPU time since the sparse representation is slower to access:
*
* The following table shows average cardinality vs bytes used, 100
* samples per cardinality (when the set was not representable because
* of registers with too big value, the dense representation size was used
* as a sample).
*
* 100 267
* 200 485
* 300 678
* 400 859
* 500 1033
* 600 1205
* 700 1375
* 800 1544
* 900 1713
* 1000 1882
* 2000 3480
* 3000 4879
* 4000 6089
* 5000 7138
* 6000 8042
* 7000 8823
* 8000 9500
* 9000 10088
* 10000 10591
*
* The dense representation uses 12288 bytes, so there is a big win up to
* a cardinality of ~2000-3000. For bigger cardinalities the constant times
* involved in updating the sparse representation is not justified by the
* memory savings. The exact maximum length of the sparse representation
* when this implementation switches to the dense representation is
* configured via the define server.hll_sparse_max_bytes.
*/
大致翻译如下:
稠密的表示方式就是 上面所描述的那种方式。 使用6位来计数。
稀疏的表示方式 有三个操作码组成,其中两个使用一个字节,一个使用两个字节。 操作码被称为 ZERO,XZERO, VAL
ZERO 操作码 00xxxxxx ,后六位表示整形数字 ,然后加1. 这个操作码可以表示1到64个邻近的register设置为0.
XZERO 使用两个字节表示 01xxxxxx yyyyyyyy 由 首字节后六位 和第二个字节 8位组成的数字 加1 。 可以表示有 1~16384 个邻近的register设置为0
VAL opcode 一个字节表示 1vvvvvxx 包含5 bit 表示register的值, 2 bit 表示 邻近的register 设置为 这个值, 即包含value 和 长度。 value 和长度也要加1, VAL 操作码可以表示的值1~32. 长度1~4。
稀疏模式 不能表示register值大于32. 我们几乎不可能碰到稀疏模式register值大于32. 如果发生这种情况,会做稀疏模式到稠密模式的转换。稀疏模式相比稠密模式更加节约内存。
现在有registers 在1000,1020 , 1021 索引位置上的值为 2,3,3,其余位置的值为0 稀疏模式表示如下:
- XZERO:1000 0~999 索引位置为 0
- VAL:2,1 index= 1000 value = 2
- ZERO:19 index 1001-1019 value = 0
- VAL 3,2 index 1020,1021 value= 3
- XZERO 15362 index 1022-16383 = 0
只使用了7个字节而不是12K 来表示registers
#define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */
#define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */
#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */
#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)
#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)
#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1)
#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1)
#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1)
#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)
#define HLL_SPARSE_VAL_MAX_VALUE 32
#define HLL_SPARSE_VAL_MAX_LEN 4
#define HLL_SPARSE_ZERO_MAX_LEN 64
#define HLL_SPARSE_XZERO_MAX_LEN 16384
#define HLL_SPARSE_VAL_SET(p,val,len) do { \
*(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \
} while(0)
#define HLL_SPARSE_ZERO_SET(p,len) do { \
*(p) = (len)-1; \
} while(0)
#define HLL_SPARSE_XZERO_SET(p,len) do { \
int _l = (len)-1; \
*(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
*((p)+1) = (_l&0xff); \
} while(0)
就是对三个 操作码的位运算,取值和设置
uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) murmushash64a算法 详细介绍可以参看维基百科 https://zh.wikipedia.org/wiki/Murmur%E5%93%88%E5%B8%8C
入参 输入字符串,字符串长度 ,计算后的索引值指针(感觉这个函数设计不是特别好,没有单一职责,不仅获取hash后首个1 的 位置,还返回了索引值) 返回值 计算后首个1 的位置
字符串进行hash后,得到64 bit
<-----------50---------------> <---14----->
00000000 00000000..00000 00000..0000
------------------------------------------------
低14位用作index 0~16384
64-14+1 位用作计算首个1出现时0的个数 即value值
int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
uint64_t hash, bit, index;
int count;
/* Count the number of zeroes starting from bit HLL_REGISTERS
* (that is a power of two corresponding to the first bit we don't use
* as index). The max run can be 64-P+1 = Q+1 bits.
*
* Note that the final "1" ending the sequence of zeroes must be
* included in the count, so if we find "001" the count is 3, and
* the smallest count possible is no zeroes at all, just a 1 bit
* at the first position, that is a count of 1.
*
* This may sound like inefficient, but actually in the average case
* there are high probabilities to find a 1 after a few iterations. */
hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
index = hash & HLL_P_MASK; /* Register index. */
hash >>= HLL_P; /* Remove bits used to address the register. */
hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates
and count will be <= Q+1. 确保循环正常终止*/
bit = 1;
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
while((hash & bit) == 0) {
count++;
bit <<= 1;
}
*regp = (int) index;
return count;
}
稠密模式下设置指定index的值 uint8_t oldcount;
HLL_DENSE_GET_REGISTER(oldcount,registers,index); 获取旧的value
if (count > oldcount) { 做比较如果新值大于 oldvalue
HLL_DENSE_SET_REGISTER(registers,index,count); 重新设置
return 1;
} else {
return 0;
}
返回值1表示 基数改变,0表示未改变
添加element 到基数 结构里面,函数比较简单, 首先调用hllpatlen 获取index 和value 然后设到具体的register里面 int hllDenseAdd(uint8_t registers, unsigned char ele, size_t elesize) { long index; uint8_t count = hllPatLen(ele,elesize,&index); / Update the register if this element produced a longer run of zeroes. / return hllDenseSet(registers,index,count); }
判断是register 宏定义书否为 16384 。 遍历 registers 数组 数组大小16384*6/8,以12组为一个单位,遍历1024次。将相同的值存在reghisto数组里,数组下标表示值的大小,数组值表示数量。 reghisto 数组的大小为51。
如果if条件判断为false,逐次遍历整个数组。
int j;
/* Redis default is to use 16384 registers 6 bits each. The code works
* with other values by modifying the defines, but for our target value
* we take a faster path with unrolled loops. */
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
uint8_t *r = registers;
unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9,
r10, r11, r12, r13, r14, r15;
for (j = 0; j < 1024; j++) {
/* Handle 16 registers per iteration. */
r0 = r[0] & 63;
r1 = (r[0] >> 6 | r[1] << 2) & 63;
r2 = (r[1] >> 4 | r[2] << 4) & 63;
r3 = (r[2] >> 2) & 63;
r4 = r[3] & 63;
r5 = (r[3] >> 6 | r[4] << 2) & 63;
r6 = (r[4] >> 4 | r[5] << 4) & 63;
r7 = (r[5] >> 2) & 63;
r8 = r[6] & 63;
r9 = (r[6] >> 6 | r[7] << 2) & 63;
r10 = (r[7] >> 4 | r[8] << 4) & 63;
r11 = (r[8] >> 2) & 63;
r12 = r[9] & 63;
r13 = (r[9] >> 6 | r[10] << 2) & 63;
r14 = (r[10] >> 4 | r[11] << 4) & 63;
r15 = (r[11] >> 2) & 63;
reghisto[r0]++;
reghisto[r1]++;
reghisto[r2]++;
reghisto[r3]++;
reghisto[r4]++;
reghisto[r5]++;
reghisto[r6]++;
reghisto[r7]++;
reghisto[r8]++;
reghisto[r9]++;
reghisto[r10]++;
reghisto[r11]++;
reghisto[r12]++;
reghisto[r13]++;
reghisto[r14]++;
reghisto[r15]++;
r += 12;
}
} else {
for(j = 0; j < HLL_REGISTERS; j++) {
unsigned long reg;
HLL_DENSE_GET_REGISTER(reg,registers,j);
reghisto[reg]++;
}
}
初始化变量操作
sds sparse = o->ptr, dense;
struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
int idx = 0, runlen, regval;
uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse);
如果当前encoding 已经是稠密模式,直接返回。
hdr = (struct hllhdr*) sparse;
if (hdr->encoding == HLL_DENSE) return C_OK;
创建一个新的 dense 模式的 hllhdr,并且把原有的稀疏模式magin 和cached 拷贝到新的结构里面 把encoding 设置成新的dense模式
dense = sdsnewlen(NULL,HLL_DENSE_SIZE);
hdr = (struct hllhdr*) dense;
*hdr = *oldhdr; /* This will copy the magic and cached cardinality. */
hdr->encoding = HLL_DENSE;
p += HLL_HDR_SIZE; // 获取registers 指针
/*
遍历register。 找到VAL opcode ,设置到相应dense模式下的registers里面
**/
while(p < end) {
if (HLL_SPARSE_IS_ZERO(p)) {
runlen = HLL_SPARSE_ZERO_LEN(p);
idx += runlen;
p++;
} else if (HLL_SPARSE_IS_XZERO(p)) {
runlen = HLL_SPARSE_XZERO_LEN(p);
idx += runlen;
p += 2;
} else {
runlen = HLL_SPARSE_VAL_LEN(p);
regval = HLL_SPARSE_VAL_VALUE(p);
while(runlen--) {
HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);
idx++;
}
p++;
}
}
校验最后idx 是否等于16384, 如果不是,表示稀疏模式无效,释放已分配的dense 内存 返回error 如果有效,释放原来稀疏模式内存,放指针指向dense模式 ,返回ok
if (idx != HLL_REGISTERS) {
sdsfree(dense);
return C_ERR;
}
sdsfree(o->ptr);
o->ptr = dense;
return C_OK;
代码比较长,大概150行,相比较dense模式更加麻烦,使用 cpu 换内存。 这个过程会触发sparse到dense的转换
关键代码如下 如果count值大于32,触发转换 if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;
promote相关代码
promote: /* Promote to dense representation. */
转换失败 返回-1
if (hllSparseToDense(o) == C_ERR) return -1; /* Corrupted HLL. */
hdr = o->ptr;
/* We need to call hllDenseAdd() to perform the operation after the
* conversion. However the result must be 1, since if we need to
* convert from sparse to dense a register requires to be updated.
*
* Note that this in turn means that PFADD will make sure the command
* is propagated to slaves / AOF, so if there is a sparse -> dense
* conversion, it will be performed in all the slaves as well. */
int dense_retval = hllDenseSet(hdr->registers,index,count);
serverAssert(dense_retval == 1);
return dense_retval;
}
//TODO :question: serverAssert(dense_retval == 1); 注释上解说是为了确保命令传播到slaves。 需要确保所有的slaves 都执行 不是很明白
有关hyperloglog 论文 http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf 主要介绍基数统计算法及数学原理
https://arxiv.org/pdf/1702.01284.pdf 新的计算方法来改进原算法在数量较少或者较大时候的误差。
:question: #define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8)) 为什么是+7