import hashlib
def gen_test_data():
d = {}
i = 0
f = open("./md5.txt", "w")
while i < 30000000:
md5 = hashlib.md5('adsf' + str(i))
n = int(md5.hexdigest(), 16)
d[n] = i
if i % 10000 == 0:
print len(d)
f.write("%d\n" % n)
i += 1
def test():
d = {}
f = open("./md5.txt")
out_file = open("./result.txt", "w")
i = 0
for line in f:
# print line
md5 = hashlib.md5(line)
k = int(md5.hexdigest(), 16)
n = d.get(k)
if not n:
n = 0
d[k] = n + 1
out_file.write("%s\t%d" % (line, n + 1))
if i % 10000 == 0:
print i
i += 1
# gen_test_data()
test()
# gen_test_data()