$ cat label_cal.py
# count function; lambda; sort the dict; save the dict
#https://www.cnblogs.com/wind-wang/p/6090708.html
#https://www.cnblogs.com/wind-wang/p/6090708.html
category = []
test = []
file_name = "./label_0417.txt"
result = {}
result_1 = {}
with open(file_name,'r') as f:
for line in f.readlines():
line = line.strip('\n')
cat = line.split('"')[-2]
test.append(cat) # the test is to save all the category
if cat not in category:
category.append(cat) # teh category is to reduce the repeated category
#print(test.count('pne')) # use count funtion to calculate the total number in the list
for i in category:
#print("{}: {}".format(i,test.count(i)))
result[i] = test.count(i) # result is a dict, it saves the pairs of categories and counts.
for k,v in result.items():
if v > 200: # if value is lower than 200, bypass
result_1[k] = v # save the k and v to a new dict
for k,v in result_1.items(): #print the result_1 via specific format
print('"{}": {}'.format(k,v))
print("\nThere are {} classes which have more than 200 images".format(len(result_1)))
print("Below are sorted dict from high to low\n")
#result_1 = sorted(result.items(),key=lambda d:d[1],reverse=True) #method 1, to sort the value
result_1 = sorted(result_1.items(), lambda x, y: cmp(x[1], y[1]), reverse=True) # method 2, to sort the value
for k,v in result_1: #print the result_1 via specific format. Note: result_1 type has been "list"
print('"{}": {}'.format(k,v))
print(type(result_1))
'''
result = {}
result = result.fromkeys(test) # fromkeys can remove the repeated category
print(result)
category = list(result.keys())
print(category)
for i in category:
result[i] = test.count(i)
print(result)
'''