redis6.0.5之HyperLogLog阅读笔记2-基数估算宏定义

struct hllhdr {
    char magic[4];      /* "HYLL" */   4个字符的魔数HYL
    uint8_t encoding;   /* HLL_DENSE or HLL_SPARSE. */   密集表示 还是稀疏表示
    uint8_t notused[3]; /* Reserved for future use, must be zero. */ 预留给将来使用,必须设置为0
    uint8_t card[8];    /* Cached cardinality, little endian. */ 缓存基数,小段存储
    uint8_t registers[]; /* Data bytes. */ 具体数据
};
以上是基树算法的数据结构

/* The cached cardinality MSB is used to signal validity of the cached value. */
缓存的基数MSB用来标记缓存值的有效性
#define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[7] |= (1<<7)   最高位为1,设置缓存失效, xxxx xxxx | 10000000  ( C |= 2 等同于 C = C | 2)
#define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)  xxxx xxxx & 10000000  最高位为0,表示没有改变

#define HLL_P 14 /* The greater is P, the smaller the error. */ P值越大,误差越小
#define HLL_Q (64-HLL_P) /* The number of bits of the hash value used for determining the number of leading zeros. */
用来确定前导0个数的HASH值的位数
                
#define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */ 寄存器的个数 2的14次方
#define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */ 寄存器掩码 ,全部为1
#define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */ 最高可以计数63个前导零
#define HLL_REGISTER_MAX ((1<<HLL_BITS)-1) 寄存器的最大值
#define HLL_HDR_SIZE sizeof(struct hllhdr)  基数结构体大小 16个字节
#define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8)) 密集表示时候所需字节数 +7 是为了最后多的bit数目
#define HLL_DENSE 0 /* Dense encoding. */  密集表示
#define HLL_SPARSE 1 /* Sparse encoding. */  稀疏表示
#define HLL_RAW 255 /* Only used internally, never exposed. */  只在内部使用,不对外暴露
#define HLL_MAX_ENCODING 1 最大值编码

static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n";

/* =========================== Low level bit macros ========================= */
低层次的比特宏

/* Macros to access the dense representation.
用于存取密集表示的宏
 * We need to get and set 6 bit counters in an array of 8 bit bytes.
 * We use macros to make sure the code is inlined since speed is critical
 * especially in order to compute the approximated cardinality in
 * HLLCOUNT where we need to access all the registers at once.
 * For the same reason we also want to avoid conditionals in this code path.
我们需要在一个8比特的数组里面获取和设置6比特的数据。
我们使用宏定义是的代码是内联的,因为速度是关键,尤其是在HLLCOUNT中计算近似基数,
我们需要在同一时间获取所有的寄存器。
处于同样的原因(为了提高速度),我们还避免在代码中使用条件判断
 * +--------+--------+--------+------//
 * |11000000|22221111|33333322|55444444
 * +--------+--------+--------+------//
 * Note: in the above representation the most significant bit (MSB)
 * of every byte is on the left. We start using bits from the LSB to MSB,
 * and so forth passing to the next byte.
注意:在上面的表示中,每个字节的最重要的比特位(MSB)是在左边。我们使用bit从LSB到MSB,同理推到下一个字节。
 * Example, we want to access to counter at pos = 1 ("111111" in the
 * illustration above).
举例如下,我们想要去获取在位置1上的数值 (就是上图解释的"111111"的数值)
 * The index of the first byte b0 containing our data is:
第一个字节b0的索引包含我们的数据是 b0 = 6 * 1 / 8 = 0
 *  b0 = 6 * pos / 8 = 0
 *
 *   +--------+
 *   |11000000|  <- Our byte at b0
 *   +--------+
 *
 * The position of the first bit (counting from the LSB = 0) in the byte
 * is given by:
第一个bit的位置(从LSB=0开始计数)在字节中由如下公式给出
 *
 *  fb = 6 * pos % 8 -> 6   fb = (6 * 1 )% 8 = 6
 *
 * Right shift b0 of 'fb' bits.
右移b0'fb'个比特
 *   +--------+
 *   |11000000|  <- Initial value of b0         初始的b0值
 *   |00000011|  <- After right shift of 6 pos. 向右移动'fb'个(这里是6个)比特位置的值
 *   +--------+
 *
 * Left shift b1 of bits 8-fb bits (2 bits)
对b1向左移动8-fb个比特(这里是8-6=2比特)
 *   +--------+
 *   |22221111|  <- Initial value of b1         初始的b1
 *   |22111100|  <- After left shift of 2 bits. 向左移动2比特之后
 *   +--------+
 *
 * OR the two bits, and finally AND with 111111 (63 in decimal) to
 * clean the higher order bits we are not interested in:

 *   +--------+
 *   |00000011|  <- b0 right shifted
 *   |22111100|  <- b1 left shifted
 *   |22111111|  <- b0 OR b1
 *   |  111111|  <- (b0 OR b1) AND 63, our value.
 *   +--------+
对上述得到的两个bite组进行或操作,结果和111111(十进制的63)进行与操作,用来清除较高位我们不感兴趣的比特
 * We can try with a different example, like pos = 0. In this case
 * the 6-bit counter is actually contained in a single byte.
我们可以尝试用不同的案例,比如位置为0的情况,在这个案例中,6比特的计数恰好全部在一个字节中。
 *  b0 = 6 * pos / 8 = 0  b0 = (6 * 0)/8 = 0
 *
 *   +--------+
 *   |11000000|  <- Our byte at b0  我们的数据在b0
 *   +--------+
 *
 *  fb = 6 * pos % 8 = 0    fb = ( 6 * 0 ) % 8 = 0

 *  So we right shift of 0 bits (no shift in practice) and
 *  left shift the next byte of 8 bits, even if we don't use it,
 *  but this has the effect of clearing the bits so the result
 *  will not be affacted after the OR.
所以我们右移0个比特(实际中不需要移动) 然后左移接下来的8位比特,即使我们不使用它,
但是这个操作可以清除比特,所以结果将不会在OR后被影响
 * -------------------------------------------------------------------------
 *
 * Setting the register is a bit more complex, let's assume that 'val'
 * is the value we want to set, already in the right range.
设置寄存器的值比获取更加复杂一点,让我们假设val是我们将要设置的值,已经在正确的范围。
 *
 * We need two steps, in one we need to clear the bits, and in the other
 * we need to bitwise-OR the new bits.
我们需要两步,第一步我们需要清除比特,第二步我们需要对新比特使用比特位的OR操作

 * Let's try with 'pos' = 1, so our first byte at 'b' is 0,
让我们用pos=1的位置左尝试,我们第一个字节在b是0
 * "fb" is 6 in this case.  fb是6在这个案例中
 *
 *   +--------+
 *   |11000000|  <- Our byte at b0
 *   +--------+
 *
 * To create a AND-mask to clear the bits about this position, we just
 * initialize the mask with the value 63, left shift it of "fs" bits,
 * and finally invert the result.
为了创建一个与操作符去清除这个位置上的比特,我们用十进制值为63来初始化一个掩码,向左移动fs个bite,最后翻转这个结果
 *
 *   +--------+
 *   |00111111|  <- "mask" starts at 63                    掩码初始化63
 *   |11000000|  <- "mask" after left shift of "ls" bits. 掩码向左移动ls个比特 
 *   |00111111|  <- "mask" after invert.      翻转结果            
 *   +--------+
 *
 * Now we can bitwise-AND the byte at "b" with the mask, and bitwise-OR
 * it with "val" left-shifted of "ls" bits to set the new bits.
现在我们能够 按位与操作 字节b 和 掩码, 然后按位或操作val和向左移动lsb比特去设置新bit
 * Now let's focus on the next byte b1:
现在让我们聚焦在下个字节b1:
 *   +--------+
 *   |22221111|  <- Initial value of b1  初始化的b1
 *   +--------+
 *
 * To build the AND mask we start again with the 63 value, right shift
 * it by 8-fb bits, and invert it.
为了创建与掩码,我们使用值63开始,向右移动8-fb个比特,然后翻转这个值
 *   +--------+
 *   |00111111|  <- "mask" set at 2&6-1                            初始化的掩码值
 *   |00001111|  <- "mask" after the right shift by 8-fb = 2 bits  向右移动8-fb个比特
 *   |11110000|  <- "mask" after bitwise not.  翻转掩码
 *   +--------+
 *
 * Now we can mask it with b+1 to clear the old bits, and bitwise-OR
 * with "val" left-shifted by "rs" bits to set the new value.
 */
现在我们可以用这个值和b+1做掩码去清除老的比特位,同时和val做按位或操作,左移rs个比特去设置新值

/* Note: if we access the last counter, we will also access the b+1 byte
 * that is out of the array, but sds strings always have an implicit null
 * term, so the byte exists, and we can skip the conditional (or the need
 * to allocate 1 byte more explicitly). */
注意如果我们获取最后的计数,我们将存取超过数组的b+1字节,但是sds字符串总是有一个隐形空座位结束,
所以字节是存在的,并且我们能够跳过条件(或者需要更明确地分配1个字节)

/* Store the value of the register at position 'regnum' into variable 'target'.
 * 'p' is an array of unsigned bytes. */
存储位于regnum寄存器的值到变量target,p是一个无符号字节数组
#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
    uint8_t *_p = (uint8_t*) p; \
    unsigned long _byte = regnum*HLL_BITS/8; \   开始位于哪个字节位
    unsigned long _fb = regnum*HLL_BITS&7; \   开始位置位于哪个比特位
    unsigned long _fb8 = 8 - _fb; \   需要移动的位置
    unsigned long b0 = _p[_byte]; \
    unsigned long b1 = _p[_byte+1]; \
    target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \
} while(0)

/* Set the value of the register at position 'regnum' to 'val'.
 * 'p' is an array of unsigned bytes. */
设置位置regnum的寄存器的值为val,p是一个无符号字节数组
#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
    uint8_t *_p = (uint8_t*) p; \
    unsigned long _byte = regnum*HLL_BITS/8; \  开始位于哪个字节位
    unsigned long _fb = regnum*HLL_BITS&7; \    开始位置位于哪个比特位
    unsigned long _fb8 = 8 - _fb; \
    unsigned long _v = val; \  要设置的值
    _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \
    _p[_byte] |= _v << _fb; \   设置开始字节的比特位
    _p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \
    _p[_byte+1] |= _v >> _fb8; \  设置接来下字节的比特位
} while(0)

/* Macros to access the sparse representation.
 * The macros parameter is expected to be an uint8_t pointer. */
宏获取稀疏表示的值。宏的期望参数是一个无符号组指针

#define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */  表示XZERO操作符
#define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */   表示VAL操作符
#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */  确认是ZERO操作符
#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)  确认是XZERO操作符
#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)  确认是VAL操作符
#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1) 连续为零寄存器的长度,单个字节表示
#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1) 连续为了寄存器的长度  两个字节表示
#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1)  寄存器的值表示
#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)  连续同值寄存器的个数表示
#define HLL_SPARSE_VAL_MAX_VALUE 32  最大稀疏值
#define HLL_SPARSE_VAL_MAX_LEN 4   值长度
#define HLL_SPARSE_ZERO_MAX_LEN 64 单个字节的最大连续零长度
#define HLL_SPARSE_XZERO_MAX_LEN 16384  两个字节最大的连续零个数
#define HLL_SPARSE_VAL_SET(p,val,len) do { \
    *(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \   设置 1 vvvvv xx  的值注意各个不同表示的位置
} while(0)
#define HLL_SPARSE_ZERO_SET(p,len) do { \
    *(p) = (len)-1; \  单字节零的长度
} while(0)
#define HLL_SPARSE_XZERO_SET(p,len) do { \
    int _l = (len)-1; \    双字节零的长度
    *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \  高字节即高位
    *((p)+1) = (_l&0xff); \  低字节 即低位
} while(0)

#define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */

 

posted on 2021-03-16 19:13  子虚乌有  阅读(163)  评论(0)    收藏  举报