批量操作 提高计算速度
import os
l = []
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
for i in fr:
if i not in l:
l.append(i)
submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
fw.write(''.join(l))
import os
l = []
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
for i in fr:
l.append(i)
# if i not in l:
# l.append(i)
submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
s = set(l)
l = [i for i in s]
fw.write(''.join(l))
t_f, t_l = 'test1.csv', []
with open(t_f, 'r', encoding='utf-8') as fr:
for i in fr:
# print(i)
t_l.append(i.replace('\n', ''))
l = []
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
for i in fr:
k = ','.join(i.split(',')[0:2])
if k not in t_l:
# print(k)
continue
#print(i,'ok------------')
l.append(i)
# if i not in l:
# l.append(i)
submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
s = set(l)
l = [i for i in s]
print(len(l))
fw.write(''.join(l))
os._exit(211)
过滤通过集合运算实现,计算一次
t_f, t_l = 'test1.csv', []
with open(t_f, 'r', encoding='utf-8') as fr:
for i in fr:
t_l.append(i.replace('\n', ''))
l, k_l, v_d = [], [], {}
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
for i in fr:
ll = i.split(',')
k, v = ','.join(ll[0:2]), ','.join(ll[2:])
v_d[k] = v
k_l.append(k)
set_sub = set(k_l) - set(t_l)
print(len(v_d))
for i in set_sub:
del v_d[i]
print(len(v_d))
submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
l = [','.join([i, v_d[i]]) for i in v_d]
print(len(l))
fw.write(''.join(l))

浙公网安备 33010602011771号