位图

位图

所谓的BitMap就是用一个bit位来标记某个元素所对应的value,而key即是该元素,由于BitMap使用了bit位来存储数据,因此可以大大节省存储空间。

位操作

移位

对于正整数,左移一位,就是将数值乘2;右移一位就运算数值除2;但是位操作的效率要比运算符高。

判断奇偶
func isOdd(n int) bool{
	return n&1 != 0
}

异或操作

一个数和另一个数异或两次返回原来的数

//不用临时变量,交换a,b两个数
a = a^b
b = a^b
a = a^b

n&(-n)

该操作返回n的最后0的个数k的2次方
计算机里整数使用源码表示,负数使用补码表示

//int8(10)二进制为
1010
//int8(-10)二进制表示
//先求反码,原码取反
11110101
//再求补码,反码+1
11110110
//n&(-n)
00001010
11110110
00000010

n&(n-1)

该操作将n的最后一位变为0

统计一个整数n的二进制中1的个数
func count(n int) int{
	ret := 0
	for n>0 {
		n = n&(n-1)
		ret++
	}
	return ret
}
判断一个整数是不是2的幂
func Is2Pow(n int) bool{
	return n&(n-1) == 0
}
判断一个整数是不是4的幂
func Is4Pow(n int32) bool {
	if n&(n-1) != 0 {
		if (n & 0x55555555) == 0 {
			return true
		}
	}
	return false
}
判断两个整数m和n,需要改变多少位使得m变为n
func Count(m,n int) int{
	//异或,算出两个之间的不同位个数
	m = m^n
	//统计一下不同个数
	ret := 0
	for m > 0 {
		m = m & (m-1)
		ret++
	}
	return ret
}
不适用+,-,*,/完成整数相加
func Add(n,m uint) uint{
	sum,c := 0,0
	if m == 0 {
		return n
	}
	if n == 0 {
		return m
	}
	for m != 0 {
		sum = n^m
		c = (n&m)<<1
		n = sum
		m = c
	}
	return sum
}

bitmap

github上较为欢迎的bitmap实现

package bitset

import (
	"bufio"
	"bytes"
	"encoding/base64"
	"encoding/binary"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"strconv"
)

const wordSize = uint(64)

const log2WordSize = uint(6)

const allBits uint64 = 0xffffffffffffffff

var binaryOrder binary.ByteOrder = binary.BigEndian

var base64Encoding = base64.URLEncoding

func Base64StdEncoding() { base64Encoding = base64.StdEncoding }

func LittleEndian() { binaryOrder = binary.LittleEndian }
//bitset的结构
type BitSet struct {
	length uint
	set    []uint64
}

type Error string

func (b *BitSet) safeSet() []uint64 {
	if b.set == nil {
		b.set = make([]uint64, wordsNeeded(0))
	}
	return b.set
}

func From(buf []uint64) *BitSet {
	return &BitSet{uint(len(buf)) * 64, buf}
}

func (b *BitSet) Bytes() []uint64 {
	return b.set
}
//计算大小为i需要多少个64位存储
func wordsNeeded(i uint) int {
	if i > (Cap() - wordSize + 1) {
		return int(Cap() >> log2WordSize)
	}
	return int((i + (wordSize - 1)) >> log2WordSize)
}

func New(length uint) (bset *BitSet) {
	defer func() {
		if r := recover(); r != nil {
			bset = &BitSet{
				0,
				make([]uint64, 0),
			}
		}
	}()

	bset = &BitSet{
		length,
		make([]uint64, wordsNeeded(length)),
	}

	return bset
}
//返回最大值
func Cap() uint {
	return ^uint(0)
}
//返回bitset的存储长度
func (b *BitSet) Len() uint {
	return b.length
}
//扩展
func (b *BitSet) extendSetMaybe(i uint) {
	if i >= b.length { 
		nsize := wordsNeeded(i + 1)
		if b.set == nil {
			b.set = make([]uint64, nsize)
		} else if cap(b.set) >= nsize {
			b.set = b.set[:nsize] 
		} else if len(b.set) < nsize {
			newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x
			copy(newset, b.set)
			b.set = newset
		}
		b.length = i + 1
	}
}
//测试bitset里面第i位是否为1
func (b *BitSet) Test(i uint) bool {
	if i >= b.length {
		return false
	}
	//i除以64得到set中的下标,接着应该取i%64的左移1的位置
	//i&(wordSize-1)即表示了i%64
	return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0
}
//设置第i位为1
func (b *BitSet) Set(i uint) *BitSet {
	b.extendSetMaybe(i)
	b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1))
	return b
}
//清除第i位,设置第i位为0
func (b *BitSet) Clear(i uint) *BitSet {
	if i >= b.length {
		return b
	}
	b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1))
	return b
}
//设置第i位的值
func (b *BitSet) SetTo(i uint, value bool) *BitSet {
	if value {
		return b.Set(i)
	}
	return b.Clear(i)
}
//反转第i位,如果没有则视为0,扩容后设置为1
func (b *BitSet) Flip(i uint) *BitSet {
	if i >= b.length {
		return b.Set(i)
	}
	b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1))
	return b
}
//缩放成length的长度bitset
func (b *BitSet) Shrink(length uint) *BitSet {
	idx := wordsNeeded(length + 1)
	if idx > len(b.set) {
		//已经小于length的长度,则直接返回
		return b
	}
	shrunk := make([]uint64, idx)
	copy(shrunk, b.set[:idx])
	b.set = shrunk
	b.length = length + 1
	//最后一个64位的几位必须保持不变
	b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)) - 1))
	return b
}
//idx的位置插入一位0值
func (b *BitSet) InsertAt(idx uint) *BitSet {
	insertAtElement := (idx >> log2WordSize)

	//查看bitset的长度是否64的整数倍,如果是,则没有多余的1位可以往后移动
	//需要加一个64位
	if b.isLenExactMultiple() {
		b.set = append(b.set, uint64(0))
	}

	var i uint
	//从后往前,移动
	//处理idx对应64位之前的64位
	for i = uint(len(b.set) - 1); i > insertAtElement; i-- {
		//左移一位	
		b.set[i] <<= 1
		//前一个64位的最头上一位补在这个64位的最后
		b.set[i] |= (b.set[i-1] & 0x8000000000000000) >> 63
	}
	//生成掩码,idx对应位置之后的全为0
	//i=5则对应oxfffffffffffffff0
	dataMask := ^(uint64(1)<<uint64(idx&(wordSize-1)) - 1)
	//拿到要处理的64位的idx之前的数据
	data := b.set[i] & dataMask
	//idx位置对应的后面数据不变化,前面的全部置为0
	b.set[i] &= ^dataMask
	//data左移一位,数据放回
	//处理完idx对应的64位
	b.set[i] |= data << 1
	//length+1因为idx位置插入了一位
	b.length++
	return b
}
//字符串化
func (b *BitSet) String() string {
	// follows code from https://github.com/RoaringBitmap/roaring
	var buffer bytes.Buffer
	start := []byte("{")
	buffer.Write(start)
	counter := 0
	i, e := b.NextSet(0)
	for e {
		counter = counter + 1
		//太多了用...代替,避免溢出
		if counter > 0x40000 {
			buffer.WriteString("...")
			break
		}
		buffer.WriteString(strconv.FormatInt(int64(i), 10))
		i, e = b.NextSet(i + 1)
		if e {
			buffer.WriteString(",")
		}
	}
	buffer.WriteString("}")
	return buffer.String()
}

func (b *BitSet) DeleteAt(i uint) *BitSet {
	//找到对应i处理的64位
	deleteAtElement := i >> log2WordSize
	//生成掩码,这里i对应位置之前的都是1
	//i=5对应为oxffffffffffffffff0
	dataMask := ^((uint64(1) << (i & (wordSize - 1))) - 1)
	//获取原来的数据,去除i对应位置之后的
	data := b.set[deleteAtElement] & dataMask
	//i对应位置之后的不变其他都为0了
	b.set[deleteAtElement] &= ^dataMask
	//data右移,剔除i对应的数据,然后做或运算,数据回传
	//这里好像不用再&dataMask
	b.set[deleteAtElement] |= (data >> 1) & dataMask
	//处理后面的64位
	for i := int(deleteAtElement) + 1; i < len(b.set); i++ {
		//前一个64位的最高位设置成后一个64位的最低位
		b.set[i-1] |= (b.set[i] & 1) << 63
		//右移一位,因为最低的已经设置到前一个64位数据上了
		b.set[i] >>= 1
	}
	//长度减一
	b.length = b.length - 1
	return b
}
//返回下一个非0的位置
func (b *BitSet) NextSet(i uint) (uint, bool) {
	//确定数组下标
	x := int(i >> log2WordSize)
	//不存在,返回0,false
	if x >= len(b.set) {
		return 0, false
	}
	//拿出这个64位元素
	w := b.set[x]
	//右移,剔除i和i之前的位
	w = w >> (i & (wordSize - 1))
	if w != 0 {
		//不为0,则该元素上存在剩余不为0的数据
		//返回下一个不为0的位置,即w的末尾0的个数的2次方+i
		//求末尾0的个数的2次方,即n&(-n)
		return i + trailingZeroes64(w), true
	}
	//w等于0了,则说明下一位在下一个64元素上
	x = x + 1
	//for循环找下一个不为0的64位元素
	for x < len(b.set) {
		if b.set[x] != 0 {
			//返回下一个不为0的位置
			return uint(x)*wordSize + trailingZeroes64(b.set[x]), true
		}
		x = x + 1

	}
	//没有,返回0,false
	return 0, false
}
//一次获取多个位置
func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) {
	myanswer := buffer
	capacity := cap(buffer)
	//i对应的下标x
	x := int(i >> log2WordSize)
	//超出范围,直接返回空的
	if x >= len(b.set) || capacity == 0 {
		return 0, myanswer[:0]
	}
	//需要跳过的字节
	skip := i & (wordSize - 1)
	word := b.set[x] >> skip
	myanswer = myanswer[:capacity]
	size := int(0)
	for word != 0 {
		//r为末尾连续0的个数
		r := trailingZeroes64(word)
		//t = 2^r
		t := word & ((^word) + 1)
		//存入数组
		myanswer[size] = r + i
		size++
		if size == capacity {
			goto End
		}
		//清空最后一位1
		word = word ^ t
	}
	x++
	for idx, word := range b.set[x:] {
		for word != 0 {
			r := trailingZeroes64(word)
			t := word & ((^word) + 1)
			myanswer[size] = r + (uint(x+idx) << 6)
			size++
			if size == capacity {
				goto End
			}
			word = word ^ t
		}
	}
End:
	if size > 0 {
		return myanswer[size-1], myanswer[:size]
	}
	return 0, myanswer[:0]
}
//返回下一个清除位置,即返回下一个0的位置 
func (b *BitSet) NextClear(i uint) (uint, bool) {
	//i对应的下标x
	x := int(i >> log2WordSize)
	//x不存在
	if x >= len(b.set) {
		return 0, false
	}
	//取出
	w := b.set[x]
	//移动i对应多余位
	w = w >> (i & (wordSize - 1))
	//判断是否剩余全是1
	wA := allBits >> (i & (wordSize - 1))
	//跳过连续的1后的位置
	index := i + trailingZeroes64(^w)
	//还在这个64位中,则返回
	if w != wA && index < b.length {
		return index, true
	}
	x++
	for x < len(b.set) {
		index = uint(x)*wordSize + trailingZeroes64(^b.set[x])
		if b.set[x] != allBits && index < b.length {
			return index, true
		}
		x++
	}
	return 0, false
}
//清空所有位置
func (b *BitSet) ClearAll() *BitSet {
	if b != nil && b.set != nil {
		for i := range b.set {
			b.set[i] = 0
		}
	}
	return b
}
//返回bitset的64字节的个数
func (b *BitSet) wordCount() int {
	return len(b.set)
}
//复制当前bitset
func (b *BitSet) Clone() *BitSet {
	c := New(b.length)
	if b.set != nil { 
		copy(c.set, b.set)
	}
	return c
}
//拷贝c位图到当前bitset
func (b *BitSet) Copy(c *BitSet) (count uint) {
	if c == nil {
		return
	}
	if b.set != nil { // Copy should not modify current object
		copy(c.set, b.set)
	}
	count = c.length
	if b.length < c.length {
		count = b.length
	}
	return
}
//返回所有1的个数
func (b *BitSet) Count() uint {
	if b != nil && b.set != nil {
		return uint(popcntSlice(b.set))
	}
	return 0
}
//判断位图是否相等
func (b *BitSet) Equal(c *BitSet) bool {
	if c == nil {
		return false
	}
	if b.length != c.length {
		return false
	}
	if b.length == 0 { 
		return true
	}
	for p, v := range b.set {
		if c.set[p] != v {
			return false
		}
	}
	return true
}

func panicIfNull(b *BitSet) {
	if b == nil {
		panic(Error("BitSet must not be null"))
	}
}
//&^操作,相异的保留,相同的清除
//compare中和当前bitset不同的位保留在新位图中返回
func (b *BitSet) Difference(compare *BitSet) (result *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	result = b.Clone() // clone b (in case b is bigger than compare)
	//compare的64位数组大小
	l := int(compare.wordCount())
	//找两个之间的数组长度小的
	if l > int(b.wordCount()) {
		l = int(b.wordCount())
	}
	for i := 0; i < l; i++ {
		//清空compare中有1的,存入result
		result.set[i] = b.set[i] &^ compare.set[i]
	}
	return
}
//返回当前位图和其他位图之间不同位的个数
func (b *BitSet) DifferenceCardinality(compare *BitSet) uint {
	panicIfNull(b)
	panicIfNull(compare)
	l := int(compare.wordCount())
	if l > int(b.wordCount()) {
		l = int(b.wordCount())
	}
	cnt := uint64(0)
	cnt += popcntMaskSlice(b.set[:l], compare.set[:l])
	cnt += popcntSlice(b.set[l:])
	return uint(cnt)
}
//直接把compare中和当前bitset不同的保留,相同的清空
func (b *BitSet) InPlaceDifference(compare *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	l := int(compare.wordCount())
	if l > int(b.wordCount()) {
		l = int(b.wordCount())
	}
	for i := 0; i < l; i++ {
		b.set[i] &^= compare.set[i]
	}
}
//按长度交换
func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) {
	if a.length <= b.length {
		ap, bp = a, b
	} else {
		ap, bp = b, a
	}
	return
}
//两个位图相与操作,放回的新位图是两者中小的长度
func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	b, compare = sortByLength(b, compare)
	result = New(b.length)
	for i, word := range b.set {
		result.set[i] = word & compare.set[i]
	}
	return
}
//两个位图相与后的1的个数
func (b *BitSet) IntersectionCardinality(compare *BitSet) uint {
	panicIfNull(b)
	panicIfNull(compare)
	b, compare = sortByLength(b, compare)
	cnt := popcntAndSlice(b.set, compare.set)
	return uint(cnt)
}
//当前位图直接和comapre相与操作,并且如果compare的长度比b大
//b对应扩展到compare的长度
func (b *BitSet) InPlaceIntersection(compare *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	l := int(compare.wordCount())
	if l > int(b.wordCount()) {
		l = int(b.wordCount())
	}
	for i := 0; i < l; i++ {
		b.set[i] &= compare.set[i]
	}
	for i := l; i < len(b.set); i++ {
		b.set[i] = 0
	}
	if compare.length > 0 {
		b.extendSetMaybe(compare.length - 1)
	}
}
//两个位图或操作
func (b *BitSet) Union(compare *BitSet) (result *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	b, compare = sortByLength(b, compare)
	result = compare.Clone()
	for i, word := range b.set {
		result.set[i] = word | compare.set[i]
	}
	return
}

func (b *BitSet) UnionCardinality(compare *BitSet) uint {
	panicIfNull(b)
	panicIfNull(compare)
	b, compare = sortByLength(b, compare)
	cnt := popcntOrSlice(b.set, compare.set)
	if len(compare.set) > len(b.set) {
		cnt += popcntSlice(compare.set[len(b.set):])
	}
	return uint(cnt)
}

func (b *BitSet) InPlaceUnion(compare *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	l := int(compare.wordCount())
	if l > int(b.wordCount()) {
		l = int(b.wordCount())
	}
	if compare.length > 0 {
		b.extendSetMaybe(compare.length - 1)
	}
	for i := 0; i < l; i++ {
		b.set[i] |= compare.set[i]
	}
	if len(compare.set) > l {
		for i := l; i < len(compare.set); i++ {
			b.set[i] = compare.set[i]
		}
	}
}
//两个位图异或操作
func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	b, compare = sortByLength(b, compare)
	// compare is bigger, so clone it
	result = compare.Clone()
	for i, word := range b.set {
		result.set[i] = word ^ compare.set[i]
	}
	return
}

func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint {
	panicIfNull(b)
	panicIfNull(compare)
	b, compare = sortByLength(b, compare)
	cnt := popcntXorSlice(b.set, compare.set)
	if len(compare.set) > len(b.set) {
		cnt += popcntSlice(compare.set[len(b.set):])
	}
	return uint(cnt)
}

func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) {
	panicIfNull(b)
	panicIfNull(compare)
	l := int(compare.wordCount())
	if l > int(b.wordCount()) {
		l = int(b.wordCount())
	}
	if compare.length > 0 {
		b.extendSetMaybe(compare.length - 1)
	}
	for i := 0; i < l; i++ {
		b.set[i] ^= compare.set[i]
	}
	if len(compare.set) > l {
		for i := l; i < len(compare.set); i++ {
			b.set[i] = compare.set[i]
		}
	}
}
//长度是否是64的整数倍
func (b *BitSet) isLenExactMultiple() bool {
	return b.length%wordSize == 0
}
//位图最后一个64位,没有使用的置0
func (b *BitSet) cleanLastWord() {
	if !b.isLenExactMultiple() {
		b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize)
	}
}
//位图取反操作
func (b *BitSet) Complement() (result *BitSet) {
	panicIfNull(b)
	result = New(b.length)
	for i, word := range b.set {
		result.set[i] = ^word
	}
	result.cleanLastWord()
	return
}
//是否所有的位都用到了
func (b *BitSet) All() bool {
	panicIfNull(b)
	return b.Count() == b.length
}
//是否全都是空的
func (b *BitSet) None() bool {
	panicIfNull(b)
	if b != nil && b.set != nil {
		for _, word := range b.set {
			if word > 0 {
				return false
			}
		}
		return true
	}
	return true
}
//是否存在使用位
func (b *BitSet) Any() bool {
	panicIfNull(b)
	return !b.None()
}

//判断当前位图是否是other位图的超集
//即other为1的b中必然为1
func (b *BitSet) IsSuperSet(other *BitSet) bool {
	for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) {
		if !b.Test(i) {
			return false
		}
	}
	return true
}
//严格超集,b存在other没有的置1位
func (b *BitSet) IsStrictSuperSet(other *BitSet) bool {
	return b.Count() > other.Count() && b.IsSuperSet(other)
}
//bitset输出字符串
func (b *BitSet) DumpAsBits() string {
	if b.set == nil {
		return "."
	}
	buffer := bytes.NewBufferString("")
	i := len(b.set) - 1
	for ; i >= 0; i-- {
		fmt.Fprintf(buffer, "%064b.", b.set[i])
	}
	return buffer.String()
}
//返回序列化后的字节总数
func (b *BitSet) BinaryStorageSize() int {
	return binary.Size(uint64(0)) + binary.Size(b.set)
}
//写入stream
func (b *BitSet) WriteTo(stream io.Writer) (int64, error) {
	length := uint64(b.length)

	// Write length
	err := binary.Write(stream, binaryOrder, length)
	if err != nil {
		return 0, err
	}

	// Write set
	err = binary.Write(stream, binaryOrder, b.set)
	return int64(b.BinaryStorageSize()), err
}
//stream读取
func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) {
	var length uint64
	err := binary.Read(stream, binaryOrder, &length)
	if err != nil {
		return 0, err
	}
	newset := New(uint(length))
	if uint64(newset.length) != length {
		return 0, errors.New("Unmarshalling error: type mismatch")
	}
	err = binary.Read(stream, binaryOrder, newset.set)
	if err != nil {
		return 0, err
	}
	*b = *newset
	return int64(b.BinaryStorageSize()), nil
}
//输出字节流
func (b *BitSet) MarshalBinary() ([]byte, error) {
	var buf bytes.Buffer
	writer := bufio.NewWriter(&buf)

	_, err := b.WriteTo(writer)
	if err != nil {
		return []byte{}, err
	}

	err = writer.Flush()

	return buf.Bytes(), err
}
//字节流输入还原bitset
func (b *BitSet) UnmarshalBinary(data []byte) error {
	buf := bytes.NewReader(data)
	reader := bufio.NewReader(buf)

	_, err := b.ReadFrom(reader)

	return err
}
//字节流编码后输出json处理
func (b *BitSet) MarshalJSON() ([]byte, error) {
	buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize()))
	_, err := b.WriteTo(buffer)
	if err != nil {
		return nil, err
	}
	return json.Marshal(base64Encoding.EncodeToString(buffer.Bytes()))
}
//json结构体解析bitset
func (b *BitSet) UnmarshalJSON(data []byte) error {
	// Unmarshal as string
	var s string
	err := json.Unmarshal(data, &s)
	if err != nil {
		return err
	}

	// URLDecode string
	buf, err := base64Encoding.DecodeString(s)
	if err != nil {
		return err
	}

	_, err = b.ReadFrom(bytes.NewReader(buf))
	return err
}
posted @ 2020-03-13 14:42  星灬期灬五  阅读(268)  评论(0编辑  收藏  举报