[原创]大数据:布隆过滤器C#版简单实现。

    public class BloomFilter
    {
        public BitArray _BloomArray;
        public Int64 BloomArryLength { get; }
        public Int64 DataArrayLeng { get; }
        public Int64 BitIndexCount { get; }

        /// <summary>
        /// 初始化
        /// </summary>
        /// <param name="BloomArryLength">布隆数组的大小</param>
        /// <param name="DataArrayLeng">数据的长度</param>
        /// <param name="bitIndexCount">hash数</param>
        public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount)
        {
            _BloomArray = new BitArray(BloomArryLength);
            this.BloomArryLength = BloomArryLength;
            this.DataArrayLeng = DataArrayLeng;
            this.BitIndexCount = bitIndexCount;
        }

        
        public void Add(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i < BitIndexCount; i++)
            {
                var c = random.Next((int)(this.BloomArryLength - 1));
                _BloomArray[c] = true;
            }
        }

        public bool isExist(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i < BitIndexCount; i++)
            {
                if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))])
                {
                    return false;
                }
            }
            return true;
        }

        public int GetHashCode(object value)
        {
            return value.GetHashCode();
        }

        public double getFalsePositiveProbability()
        {
            // (1 - e^(-k * n / m)) ^ k
            return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)),
                    BitIndexCount);
        }
    }

 

        static void Main(string[] args)
        {
            Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千万条数据

            for (int i = 0; i < bloom.DataArrayLeng; i++)//五千万条数据
            {
                bloom.Add(i.ToString());
            }
            do
            {
                var c = Console.ReadLine();
                if (c == "e")
                    break;
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var temp=bloom.isExist(c);
                sw.Stop();
                Console.WriteLine($"查找:{c}\n结果:{temp}\n总耗时:{sw.ElapsedTicks}\n错误概率:{bloom.getFalsePositiveProbability()}");
            } while (true);
        }

结果:使用内存27MB,查找结果一般在100毫秒以内。

 

posted @ 2018-11-29 12:38  Red Cat  阅读(1750)  评论(4编辑  收藏  举报

Copyright © 2022 LyShark Powered by .NET 6 on Kubernetes
Theme - LyTheme 1.0