去除 DataTable中重复的Row (2)

3.  linq group by with a custom class

Declare Custom class

public class PortableKey
{
    public Dictionary<string, object> keyBag { get; set; }

    public PortableKey(Dictionary<string, object> Keys)
    {
        this.keyBag = Keys;
    }

    public override bool Equals(object obj)
    {
        PortableKey other = (PortableKey)obj;
        foreach (KeyValuePair<string, object> key in keyBag)
        {
            if (other.keyBag[key.Key] != key.Value) return false;
        }
        return true;
    }

    public override int GetHashCode()
    {
        // hashCodes is an array of integers represented as strings. { "1", "4", etc. }
        string[] hashCodes = keyBag.Select(k => k.Value.GetHashCode().ToString()).ToArray();
        // hash is the Hash Codes all joined in a single string. "1,4,etc."
        string hash = string.Join(",", hashCodes);
        // returns a single hash code for the combined hash. 
        // Note, this is not guaranteed unique, nor is it intended to be so.
        return hash.GetHashCode();
    }    
}

Create a Dictionary<string, object> with keys that we interested.

protected Dictionary<string, object> SetDictionary(DataRow row,  string[] keys)
{
    Dictionary<string, object> item = new Dictionary<string, object>();
    foreach(string key in keys)
    {
        item[key] = row[key];
    }    
    return item;
}

Dedup 

protected DataTable Dedup(DataTable dt, params string[] keys)
{
    var columns = dt.Columns.Cast<DataColumn>();

    var query = from row in dt.AsEnumerable()
                group row by new PortableKey(  SetDictionary(row, keys)  )                              
                into g
                select g.First();                
    return query.CopyToDataTable();    
}

 

 

posted @ 2012-06-26 07:18  gunsmoke  阅读(265)  评论(0编辑  收藏  举报