让Linq的Dictinct更给力

Linq的Distinct含义就是去除重复项,那么如何定义重复项呢?微软给出了默认的方案:给出一个IEqualityComparer<T>的实例。

如果你不传IEqualityComparer<T>的实例,那么默认是比较引用。

这个方案很不方便,详见:Linq的Distinct太不给力了

鹤冲天 给出了他的方案,c# 扩展方法奇思妙用基础篇八:Distinct 扩展

受到启发,我们可以从0开始构造自己的扩展。其实说白了就是传一个委托,委托可以获取属性,根据那个属性(比如ID)来进行比较。

第一版:

     public static class DistinctExtensions
{
/// <summary>
/// source.Distinct(o=>o.ID)
/// </summary>
public static IEnumerable<T> Distinct<T, TProperty>(this IEnumerable<T> source, Func<T, TProperty> keySelector)
{
return source.Distinct(new DynamicComparer<T, TProperty>(keySelector));
}
}

public class DynamicComparer<T, TResult> : IEqualityComparer<T>
{
private readonly Func<T, TResult> _selector;

public DynamicComparer(Func<T, TResult> selector)
{
this._selector = selector;
}

public bool Equals(T x, T y)
{
return EqualityComparer<TResult>.Default.Equals(_selector(x), _selector(y));
}

public int GetHashCode(T obj)
{
return EqualityComparer<T>.Default.GetHashCode(obj);
}
}

第二版:

    public static class DistinctExtensions
{
public static IEnumerable<T> Distinct<T, TProperty>(this IEnumerable<T> source, Func<T, TProperty> keySelector, IEqualityComparer<TProperty> comparer = null)
{
return source.Distinct(new CommonEqualityComparer<T, TProperty>(keySelector, comparer));
}
}

public class CommonEqualityComparer<T, TProperty> : IEqualityComparer<T>
{
private Func<T, TProperty> _keySelector;
private IEqualityComparer<TProperty> _comparer;

public CommonEqualityComparer(Func<T, TProperty> keySelector, IEqualityComparer<TProperty> comparer)
{
this._keySelector = keySelector;
this._comparer = comparer ?? EqualityComparer<TProperty>.Default;
}

public CommonEqualityComparer(Func<T, TProperty> keySelector)
: this(keySelector, EqualityComparer<TProperty>.Default)
{ }

public bool Equals(T x, T y)
{
return _comparer.Equals(_keySelector(x), _keySelector(y));
}

public int GetHashCode(T obj)
{
return _comparer.GetHashCode(_keySelector(obj));
}
}

这个是最简单的版本。根据快速创建 IEqualityComparer<T> 和 IComparer<T> 的实例一文,可以封装一下new的逻辑。

第三版:

    public static class Equality<T>
{
/// <summary>
/// var equalityComparer1 = Equality<Person>.CreateComparer(p => p.ID);
///var equalityComparer2 = Equality<Person>.CreateComparer(p => p.Name);
///var equalityComparer3 = Equality<Person>.CreateComparer(p => p.Birthday.Year);
///var equalityComparer4 = Equality<Person>.CreateComparer(p => p.Name, StringComparer.CurrentCultureIgnoreCase);
/// </summary>
public static IEqualityComparer<T> CreateComparer<V>(Func<T, V> keySelector)
{
return new CommonEqualityComparer<V>(keySelector);
}
public static IEqualityComparer<T> CreateComparer<V>(Func<T, V> keySelector, IEqualityComparer<V> comparer)
{
return new CommonEqualityComparer<V>(keySelector, comparer);
}

class CommonEqualityComparer<V> : IEqualityComparer<T>
{
private Func<T, V> keySelector;
private IEqualityComparer<V> comparer;

public CommonEqualityComparer(Func<T, V> keySelector, IEqualityComparer<V> comparer)
{
this.keySelector = keySelector;
this.comparer = comparer ?? EqualityComparer<V>.Default;
}
public CommonEqualityComparer(Func<T, V> keySelector)
: this(keySelector, EqualityComparer<V>.Default)
{ }

public bool Equals(T x, T y)
{
return comparer.Equals(keySelector(x), keySelector(y));
}
public int GetHashCode(T obj)
{
return comparer.GetHashCode(keySelector(obj));
}
}
}

但是前面这些都是取某个属性为确定唯一值的方法,但是如果我们是联合主键,Code + Name 一起确定唯一性,那么这些都不适用了。

但是我们通过上面的原理:传递委托,可以用以下方式实现。(注意,下面这个方法是错的!!)

    public static class DistinctExtensions
{
/// <summary>
/// 以联合主键(多个属性)来确定唯一性,而不是某个属性
/// source.Distinct((x,y)=>(x.Code + x.Name).Equals((y.Code + y.Name)));
/// </summary>
public static IEnumerable<T> Distinct<T, TProperty>(this IEnumerable<T> source, Func<T, T, bool> compareCallback, Func<T, int> getHashCodeCallback = null)
{
return source.Distinct(new DelegatingEqualityComparer<T>(compareCallback, getHashCodeCallback));
}
}

public sealed class DelegatingEqualityComparer<T> : IEqualityComparer<T>, IEqualityComparer
{
private readonly Func<T, T, bool> _compareCallback;
private readonly Func<T, int> _getHashCodeCallback;

public DelegatingEqualityComparer(Func<T, T, bool> compareCallback)
: this(compareCallback, null)
{
}

public DelegatingEqualityComparer(Func<T, T, bool> compareCallback, Func<T, int> getHashCodeCallback)
{
if (compareCallback == null) throw new ArgumentNullException("compareCallback");
this._compareCallback = compareCallback;
this._getHashCodeCallback = getHashCodeCallback;
}

public static DelegatingEqualityComparer<T> Default<TMember>(Func<T, TMember> memberSelector)
{
return new DelegatingEqualityComparer<T>((x, y) =>
EqualityComparer<TMember>.Default.Equals(memberSelector(x), memberSelector(y)));
}

public bool Equals(T x, T y)
{
return this._compareCallback(x, y);
}

public int GetHashCode(T obj)
{
       //
这里有错误,你能看出来么?
if (this._getHashCodeCallback == null) return EqualityComparer<T>.Default.GetHashCode(obj);
return this._getHashCodeCallback(obj);
}

bool IEqualityComparer.Equals(object x, object y)
{
return (x is T) && (y is T) && this.Equals((T)x, (T)y);
}

int IEqualityComparer.GetHashCode(object obj)
{
if (obj is T) return this.GetHashCode((T)obj);
if (obj == null) return 0;
return obj.GetHashCode();
}
}

上面这个方法看起来很美,其实有个bug呢!

你知道是什么bug么?,关键点是在GetHashCode(T obj)的实现上。

呵呵,一开始我也没看出来。其实是泛型不对,T表示是原来的obj,而我们要取的应该是TProperty的obj的hashcode。

所以应该是EqualityComparer<TProperty>.Default。但是这里没有TProperty,于是我们只能在Default<TMember>里面,创建TProperty hashcode的方法。

完整代码如下:

     public sealed class DelegatingEqualityComparer<T> : IEqualityComparer<T>, IEqualityComparer
{
private readonly Func<T, T, bool> _compareCallback;
private readonly Func<T, int> _getHashCodeCallback;

public DelegatingEqualityComparer(Func<T, T, bool> compareCallback, Func<T, int> getHashCodeCallback)
{
if (compareCallback == null) throw new ArgumentNullException("compareCallback");
if (getHashCodeCallback == null) throw new ArgumentNullException("getHashCodeCallback");
this._compareCallback = compareCallback;
this._getHashCodeCallback = getHashCodeCallback;
}

public static DelegatingEqualityComparer<T> Default<TMember>(Func<T, TMember> memberSelector)
{
return new DelegatingEqualityComparer<T>(
(x, y) => EqualityComparer<TMember>.Default.Equals(memberSelector(x), memberSelector(y)),
z => EqualityComparer<TMember>.Default.GetHashCode(memberSelector(z)));
}

public bool Equals(T x, T y)
{
return this._compareCallback(x, y);
}

public int GetHashCode(T obj)
{
return this._getHashCodeCallback(obj);
}

bool IEqualityComparer.Equals(object x, object y)
{
return (x is T) && (y is T) && this.Equals((T)x, (T)y);
}

int IEqualityComparer.GetHashCode(object obj)
{
if (obj is T) return this.GetHashCode((T)obj);
if (obj == null) return 0;
return obj.GetHashCode();
}
}


关于EqualityComparer<T>.Default,可以参考.Net 相等性:集合类 Contains 方法 深入详解

此文研究的比较深入。

扩展方法给我们带来了乐趣,让我们代码越写越少,越写约爽。C#还是很强悍的,同情可怜的Java。

posted @ 2011-12-13 16:11  primeli  阅读(487)  评论(0编辑  收藏