1 from itertools import combinations
2
3 data = [['I1', 'I2', 'I5'], ['I2', 'I4'], ['I2', 'I3'], ['I1', 'I2', 'I4'], ['I1', 'I3'],
4 ['I2', 'I3'], ['I1', 'I3'], ['I1', 'I2', 'I3', 'I5'], ['I1', 'I2', 'I3']]
5
6
7 # 候选集生成
8 # 输入:
9 # f_set: k-1项集, k:项集个数
10 # 输出:
11 # k_cand:k项候选集
12 def apriori_gen(f_set, k):
13 k_cand = []
14 temp = [frozenset(l) for l in combinations(f_set, k)]
15 for t in temp:
16 if has_infrequent_subset(t, f_set):
17 del t
18 else:
19 k_cand.append(t)
20 return k_cand
21
22 # 非频繁项集的超集也是非频繁的
23 def has_infrequent_subset(c_set, f_set):
24 for subset in c_set:
25 if not frozenset([subset]).issubset(f_set):
26 return True
27 return False
28
29 # 输入(绝对)最小支持度, min_sup
30 # 输出:全部频繁项集(不包括一项集), all_f_set
31 def get_f_set(min_sup=2):
32 all_f_set = []
33 L1 = frozenset([d for ds in data for d in ds])
34 k = 2
35 size = len(L1)
36 while k <= size:
37 c_k = frozenset(apriori_gen(L1, k))
38 for c in c_k:
39 count = 0
40 for d in data:
41 if c.issubset(frozenset(d)):
42 count += 1
43 if count >= min_sup:
44 all_f_set.append((c, count))
45 k += 1
46 return all_f_set
47
48 if __name__ == '__main__':
49 all_frequent_set = get_f_set()
50 for i in all_frequent_set:
51 print(i)
![]()