XY

没有任何借口!!!
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

找出n个数中重复最多的10个数

Posted on 2017-09-29 21:58  路缘  阅读(460)  评论(0编辑  收藏  举报

题目很清晰,直接上python代码

import pandas as pd
import copy

class BenchMark:
    def __init__(self):
        self.MIN = 10000
        self.data = 0
    def Reset(self):
        self.MIN = 10000
        self.data = 0

dictCounts = {}
dictTop10_D2C = {}
BENCH_MARK = BenchMark()
LAST_BENCH_MARK = BenchMark()
run_count1 = 0
run_count2 = 0

def FindTop10(data):
    global BENCH_MARK, LAST_BENCH_MARK,run_count1,run_count2
    if(data in dictCounts):
        dictCounts[data] += 1
    else:
        dictCounts[data] = 1

    temp = dictCounts[data]
    
    #just record run times
    run_count1 += 1
    
    if LAST_BENCH_MARK.MIN != 10000 and temp< LAST_BENCH_MARK.MIN:
        return

    dictTop10_D2C[data] = temp

    if len(dictTop10_D2C)>10:
        BENCH_MARK.Reset()
        for item in dictTop10_D2C:
            
            #just record run times
            run_count2+=1
            
            if dictTop10_D2C[item] < BENCH_MARK.MIN:
                BENCH_MARK.MIN = dictTop10_D2C[item]
                BENCH_MARK.data = item
        LAST_BENCH_MARK = copy.deepcopy(BENCH_MARK)
        dictTop10_D2C.pop(BENCH_MARK.data)

def PrintData2Count(aDict):
    for key in aDict:
        print('%.1f:%d' % (key, aDict[key]))

if __name__ == '__main__':
    df = pd.read_csv('D:/data/ctp_data/rb/201709/rb1801_20170905.csv')
    for data in df['LastPx']:
        FindTop10(data)

    PrintData2Count(dictCounts)
    print("==============dictCounts length:", len(dictCounts))
    PrintData2Count(dictTop10_D2C)

    print("run_count1:%d,run_count2:%d" %(run_count1,run_count2))

运行结果如下:

。。。。。。

4121.0:206
4123.0:278
4124.0:180
4122.0:244
4125.0:118
4126.0:34
4127.0:4
4081.0:1366
4080.0:1073
4077.0:1072
4078.0:1091
4079.0:800
4076.0:874
4075.0:886
4074.0:1108
4071.0:719
4073.0:1281
4072.0:1049
4070.0:567
4069.0:442
4068.0:290
4067.0:199
4066.0:204
4065.0:109
4064.0:60
4063.0:80
4062.0:57
4061.0:70
4060.0:70
4059.0:32
4057.0:6
4058.0:22
4129.0:6
4137.0:2
4135.0:2
4133.0:2
==============dictCounts length: 75
4109.0:2080
4108.0:2047
4095.0:3009
4096.0:2785
4094.0:2265
4099.0:2573
4098.0:2702
4097.0:2491
4100.0:2147
4107.0:1809
run_count1:70684,run_count2:19679