【源码】浅看Dictionary(.Net Framework)源码

注:本文参考的源码来自mscorlib.dll,会有部分代码逻辑不同于.Net Core的Dictionary,已在另一篇博文对比介绍【源码】浅看Dictionary(System.Private.CoreLib)源码

在自定义类重写Equals()GetHashCode()的时候有些好奇,所以想看看Dictionary查找Key和存储键值对的原理。

下面通过看看几个最常用的Dictionary的方法,了解一下Dictionary的实现原理。

GetHashCode

首先看看如果在重写Equals()的时候不重写GetHashCode()会出现什么情况

class Cat
{
    public string Name;
    public string Birth;

    public bool Equals(Cat cat)
    {
        if (cat is null)
            return false;
        Console.WriteLine("自定义_Cat");
        return Name == cat.Name && Birth == cat.Birth;
    }
    public override bool Equals(object obj) => Equals(obj as Cat);
}
static void Main(string[] args)
{
    Dictionary<Cat, string> cats = new Dictionary<Cat, string>()
    {
        { new Cat { Name="小黑", Birth = "2020-2-1" }, "小黑" },
        { new Cat { Name="小黄", Birth = "2020-4-1" }, "小黄" },
        { new Cat { Name="憨豆", Birth = "2020-5-1" }, "憨豆" },
        { new Cat { Name="小黑", Birth = "2020-2-1" }, "小黑2" },
    };
    var xiaohei = new Cat { Name = "小黑", Birth = "2020-2-1" };
    Console.WriteLine(cats.ContainsKey(xiaohei));
}

// 输出
// False

可以看到虽然有两个“一样”的Key,{ new Cat { Name="小黑", Birth = "2020-2-1" }, "小黑" }{ new Cat { Name="小黑", Birth = "2020-2-1" }, "小黑2" },但是Dictionary还是正常初始化了,而在ContainsKey()时找不到Key。

ContainsKey

源码中的ContainsKey()直接returnFindEntry(key) >= 0;,再看看FindEntry()的实现

private int FindEntry(TKey key) {
    if( key == null) {
        ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
    }

    if (buckets != null) {
        int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
        for (int i = buckets[hashCode % buckets.Length]; i >= 0; i = entries[i].next) {
            if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i;
        }
    }
    return -1;
}

if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i;

所以需要hashCode相同并且comparer.Equals(entries[i].key, key)返回true才认为两个Key一样,因为重写了Equals(),所以comparer.Equals(entries[i].key, key)返回的是true,而在没有重写GetHashCode()时,默认使用Object.GetHashCode()获取hashCode,我没有找到这个函数的实现,但基本可以确定跟对象的存储地址是有关系的,而我们是new了一个对象去找Key的,所以得到的hashCode必然跟字典中的Key的hashCode不一致,所以会出现这样的现象。

所以必须同时重写Equals()GetHashCode(),比如在这个例子中,根据Name和Birth确定是否同一只Cat,

public override int GetHashCode() {
    return Name.GetHashCode() ^ Birth.GetHashCode();
}

此时调用的是string.GetHashCode(),生成的哈希值只与string有关,只要string一样,得到的hashCode是一样的。



Dictionary的基本原理其实就是利用一个hash表对Key和Key_Value进行存储和寻址。要看懂源码先要有点哈希表相关的基础,没有数据结构基础的慢慢看也不难懂。

Add

先上图

左边的Dictionary有一个元素,entries[0].hashCode = 8,8 % buckets.Length = 1,因此,entries[0].next = buckets[1] = -1,buckets[1] = 0,实际指向entries[0]。

此时插入第二个元素,entries[1].hashCode = 15,15 % buckets.Length = 1,因此,entries[1].next = buckets[1] = 0,buckets[1] = 1,实际指向entries[1]。

带着图看源码

public void Add(TKey key, TValue value) {
    Insert(key, value, true);
}

再看Insert()

这里有两个关键的变量entriesbuckets

private Entry[] entries;	// 按下标顺序存储每一个元素、对应的hashCode、下一个entry的下标
private int[] buckets;	// 存储entries下标的哈希表,用元素的hashCode对buckets.Length取余作为下标可快速找到对应的entry

private struct Entry {
    public int hashCode;    // Lower 31 bits of hash code, -1 if unused
    public int next;        // Index of next entry, -1 if last
    public TKey key;           // Key of entry
    public TValue value;         // Value of entry
}
private void Insert(TKey key, TValue value, bool add) {

    if( key == null ) {
        ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
    }

    if (buckets == null) Initialize(0);	// 如果buckets为null要先初始化
    int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;	
    int targetBucket = hashCode % buckets.Length;
    // hashCode取余作为新元素的buckets下标

#if FEATURE_RANDOMIZED_STRING_HASHING
    int collisionCount = 0;
#endif

    // 下面整个for从buckets[targetBucket]开始,取entries[buckets[targetBucket]]跟新元素的Key对比,如果没有遇到相同的Key才添加新元素
    for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) {
        // 只有在hashCode相等和Equals返回true的同时才认为两个元素相同
        if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
            if (add) {	// 在用`[]`进行操作时add为false,更新元素value
                ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate); // 存在相同元素抛出异常
            }
            entries[i].value = value;
            version++;	// 当对Dictionary进行Add或Update或Remove“增、改、删”操作时,Dictionary的version加一
            return;
        } 

#if FEATURE_RANDOMIZED_STRING_HASHING
        collisionCount++;	// 记录冲突数,理解这个需要先了解上面提到的哈希表结构
#endif
    }
    int index;
    // freeCount只有在Remove元素之后才可能大于0,此时将新元素插入entries空位
    if (freeCount > 0) {
        index = freeList;	// 取上次被Remove的元素的下标
        freeList = entries[index].next;	// 指向上一个被Remove的元素的坐标,若没有则为-1
        freeCount--;	// 闲置数减一
    }
    else {
        // 如果entries空间不够用了,就对entries和buckets进行扩容
        if (count == entries.Length)
        {
            Resize();
            targetBucket = hashCode % buckets.Length;
        }
        index = count;
        count++;
    }

    entries[index].hashCode = hashCode;
    entries[index].next = buckets[targetBucket];	// [1]
    entries[index].key = key;
    entries[index].value = value;
    buckets[targetBucket] = index;	// [2]
    // 这里标记的[1][2]两句是关键,
    // 当存储的元素作为当前hashCode的第一个元素时,next = buckets[targetBucket] = -1,buckets[targetBucket]置为该元素的下标
    // 而后每一次存储有带着相同哈希值Key的元素时,next指向上一个带着相同哈希值Key的元素,buckets[targetBucket]置为新元素的下标
    version++;	// 当对Dictionary进行Add或Update或Remove“增、改、删”操作时,Dictionary的version加一

// 下面代码是当collisionCount(新增元素Key的hashCode的冲突数) > HashHelpers.HashCollisionThreshold(设定的最大冲突数)时会进行对表的Resize处理,我懒得细看了。
#if FEATURE_RANDOMIZED_STRING_HASHING

#if FEATURE_CORECLR
    // In case we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing
    // in this case will be EqualityComparer<string>.Default.
    // Note, randomized string hashing is turned on by default on coreclr so EqualityComparer<string>.Default will 
    // be using randomized string hashing

    if (collisionCount > HashHelpers.HashCollisionThreshold && comparer == NonRandomizedStringEqualityComparer.Default) 
    {s
        comparer = (IEqualityComparer<TKey>) EqualityComparer<string>.Default;
        Resize(entries.Length, true);
    }
#else
    if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(comparer)) 
    {
        comparer = (IEqualityComparer<TKey>) HashHelpers.GetRandomizedEqualityComparer(comparer);
        Resize(entries.Length, true);
    }
#endif // FEATURE_CORECLR

#endif

}

Resize

Resize()会在字典,也就是entries容量不够时执行,对字典进行扩展。

private void Resize() {
    Resize(HashHelpers.ExpandPrime(count), false);
    // HashHelpers.ExpandPrime(count)方法调用GetPrime(2 * oldSize)取大于当前元素数量2倍的最小素数作为新哈希表的长度。
}

private void Resize(int newSize, bool forceNewHashCodes) {
    Contract.Assert(newSize >= entries.Length);
    int[] newBuckets = new int[newSize];
    for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1;	// 初始化哈希表内坐标
    Entry[] newEntries = new Entry[newSize];
    Array.Copy(entries, 0, newEntries, 0, count);	// 复制旧元素到新表
    if(forceNewHashCodes) {
        for (int i = 0; i < count; i++) {
            if(newEntries[i].hashCode != -1) {
                newEntries[i].hashCode = (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF);
            }
        }
    }
    // // 对所有元素hashCode重新取余,重设下标
    for (int i = 0; i < count; i++) {
        if (newEntries[i].hashCode >= 0) {	// 过滤Remove后的闲置位
            int bucket = newEntries[i].hashCode % newSize;
            newEntries[i].next = newBuckets[bucket];
            newBuckets[bucket] = i;
        }
    }
    buckets = newBuckets;
    entries = newEntries;
}

Remove

public bool Remove(TKey key) {
    if(key == null) {
        ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
    }

    if (buckets != null) {
        int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
        int bucket = hashCode % buckets.Length;
        int last = -1;
        for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next) {
            if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
                if (last < 0) {
                    buckets[bucket] = entries[i].next;
                }
                else {
                    entries[last].next = entries[i].next;
                }
                entries[i].hashCode = -1;	// hashCode设为-1,做个标记
                entries[i].next = freeList;	// next指向上一个被Remove的元素的坐标,若没有则为-1
                entries[i].key = default(TKey);
                entries[i].value = default(TValue);
                freeList = i;	// 置为最新被移除元素的坐标
                freeCount++;	// entries闲置数加一
                version++;	// 当对Dictionary进行Add或Update或Remove(“增、改、删”)操作时,Dictionary的version加一
                return true;
            }
        }
    }
    return false;
}

Clear

为了提高效率,Clear函数没有对entries和buckets置null,只是把相关的变量重新初始化了。也符合逻辑,如果置null,那我为什么不直接new一个新的Dictionary呢。

public void Clear() {
    if (count > 0) {
        for (int i = 0; i < buckets.Length; i++) buckets[i] = -1;
        Array.Clear(entries, 0, count);
        freeList = -1;
        count = 0;
        freeCount = 0;
        version++;
    }
}

官方源码:Dictionary.cs

posted @ 2021-01-10 10:41  Dirt·in·firework  阅读(217)  评论(1)    收藏  举报