数据去重

可根据一个或者多个属性去重

方法一(推荐,速度更快):

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LotteryCustomerService.Common
{
    public static class EnumerableExtensions
    {
        public static IEnumerable<TSource> DistinctBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector)
        {
            HashSet<TKey> seenKeys = new HashSet<TKey>();
            foreach (TSource element in source)
            {
                if (seenKeys.Add(keySelector(element)))
                {
                    yield return element;
                }
            }
        }
    }
}

 

方法二:

    public class Compare<T, C> : IEqualityComparer<T>
    {
        private Func<T, C> _getField;
        public Compare(Func<T, C> getfield)
        {
            this._getField = getfield;
        }
        public bool Equals(T x, T y)
        {
            bool isEquals= EqualityComparer<C>.Default.Equals(_getField(x), _getField(y));
            return isEquals;
        }
        public int GetHashCode(T obj)
        {
            int code= EqualityComparer<C>.Default.GetHashCode(this._getField(obj));
            return code;
        }
    }
    public static class CommonHelper
    {
        /// <summary>
        /// 自定义Distinct扩展方法
        /// </summary>
        /// <typeparam name="T">要去重的对象类</typeparam>
        /// <typeparam name="C">自定义去重的字段类型</typeparam>
        /// <param name="source">要去重的对象</param>
        /// <param name="getfield">获取自定义去重字段的委托</param>
        /// <returns></returns>
        public static IEnumerable<T> MyDistinct<T, C>(this IEnumerable<T> source, Func<T, C> getfield)
        {
            return source.Distinct(new Compare<T, C>(getfield));
       

demo:

        static void Main(string[] args)
        {
            Teacher t1 = new Teacher() { ID = 1, Name = "小军" };
            Teacher t2 = new Teacher() { ID = 1, Name = "小明" };
            Teacher t3 = new Teacher() { ID = 1, Name = "小军" };
            List<Teacher> list = new List<Teacher>() { t1, t2,t3 };
            list = list.MyDistinct(m => m.ID).ToList();//ID去重
            list = list.MyDistinct(m => new { m.ID,m.Name}).ToList();//ID和姓名去重
            Console.ReadKey();
        }

 

速度比较:

新建一个100W的集合,比较方法一、二的用时

        static void Main(string[] args)
        {
            List<Teacher> list = new List<Teacher>();
            for (int i = 0; i < 10000000; i++)
            {
                int n = i % 4;
                Teacher t1 = new Teacher() { ID = n, Name = "小军" };
                list.Add(t1);
            }
            Stopwatch watch = new Stopwatch();
            watch.Start();
            Console.WriteLine(watch.ElapsedMilliseconds);
            var list1 = list.DistinctBy(m => m.ID).ToList();//ID去重
            var list2 = list.DistinctBy(m => new { m.ID, m.Name }).ToList();//ID和姓名去重
            watch.Stop();
            Console.WriteLine(watch.ElapsedMilliseconds);
            watch.Reset();
            Console.WriteLine("-----------------------------");
            watch.Start();
            Console.WriteLine(watch.ElapsedMilliseconds);
            var list3 = list.MyDistinct(m => m.ID).ToList();//ID去重
            var list4 = list.MyDistinct(m => new { m.ID, m.Name }).ToList();//ID和姓名去重 
            watch.Stop();
            Console.WriteLine(watch.ElapsedMilliseconds);
            Console.ReadKey();
        }

 

结论:方法一比方法二基本快一倍的速度

posted @ 2017-06-16 11:31  花生打代码会头痛  阅读(105)  评论(0)    收藏  举报