第八章

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

from numpy import *
 
def loadDataSet():
    return [['a', 'c', 'e'], ['b', 'd'], ['b', 'c'], ['a', 'b', 'c', 'd'], ['a', 'b'], ['b', 'c'], ['a', 'b'],
            ['a', 'b', 'c', 'e'], ['a', 'b', 'c'], ['a', 'c', 'e']]
 
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
    C1.sort()

    return list(map(frozenset, C1))     
    

def scanD(D, Ck, minSupport):
    ssCnt = {}
    for tid in D:  
        for can in Ck:  
            if can.issubset(tid): 
                if not can in ssCnt:
                    ssCnt[can] = 1 
                else:
                    ssCnt[can] += 1  
    numItems = float(len(D))  
    retList = []  
    supportData = {}  
    for key in ssCnt:
        support = ssCnt[key] / numItems  
        if support >= minSupport:
            retList.insert(0, key)  
            supportData[key] = support  
    return retList, supportData
 
def calSupport(D, Ck, min_support):
    dict_sup = {}
    for i in D:
        for j in Ck:
            if j.issubset(i):
                if not j in dict_sup:
                    dict_sup[j] = 1
                else:
                    dict_sup[j] += 1
    sumCount = float(len(D))
    supportData = {}
    relist = []
    for i in dict_sup:
        temp_sup = dict_sup[i] / sumCount
        if temp_sup >= min_support:
            relist.append(i)
            
            supportData[i] = temp_sup
    return relist, supportData
 

def aprioriGen(Lk, k):
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i + 1, lenLk):  
            L1 = list(Lk[i])[:k - 2]
            L2 = list(Lk[j])[:k - 2]
            L1.sort()
            L2.sort()
            if L1 == L2:  
                
                a = Lk[i] | Lk[j] 
                a1 = list(a)
                b = []
               
                for q in range(len(a1)):
                    t = [a1[q]]
                    tt = frozenset(set(a1) - set(t))
                    b.append(tt)
                t = 0
                for w in b:
                    
                    if w in Lk:
                        t += 1
                if t == len(b):
                    retList.append(b[0] | b[1])
    return retList

def apriori(dataSet, minSupport=0.2):

    C1 = createC1(dataSet)
    D = list(map(set, dataSet))  
    L1, supportData = calSupport(D, C1, minSupport)
    L = [L1]  
    k = 2
    while (len(L[k - 2]) > 0):  
        Ck = aprioriGen(L[k - 2], k)
        Lk, supK = scanD(D, Ck, minSupport) 
        supportData.update(supK)  
        L.append(Lk)  
        k += 1
    del L[-1]  
    return L, supportData  


def getSubset(fromList, toList):
    for i in range(len(fromList)):
        t = [fromList[i]]
        tt = frozenset(set(fromList) - set(t))
        if not tt in toList:
            toList.append(tt)
            tt = list(tt)
            if len(tt) > 1:
                getSubset(tt, toList)
def calcConf(freqSet, H, supportData, ruleList, minConf=0.7):
    for conseq in H: 
        conf = supportData[freqSet] / supportData[freqSet - conseq]  
        
        lift = supportData[freqSet] / (supportData[conseq] * supportData[freqSet - conseq])
 
        if conf >= minConf and lift > 1:
            print(freqSet - conseq, '-->', conseq, '支持度', round(supportData[freqSet], 6), '置信度:', round(conf, 6),
                  'lift值为:', round(lift, 6))
            ruleList.append((freqSet - conseq, conseq, conf))
 
# 生成规则
def gen_rule(L, supportData, minConf = 0.7):
    bigRuleList = []
    for i in range(1, len(L)): 
        for freqSet in L[i]:
           
            H1 = list(freqSet)
            all_subset = []
            getSubset(H1, all_subset)  
            calcConf(freqSet, all_subset, supportData, bigRuleList, minConf)
    return bigRuleList
 
if __name__ == '__main__':
    dataSet = data_translation
    L, supportData = apriori(dataSet, minSupport = 0.02)
    rule = gen_rule(L, supportData, minConf = 0.35)

 

 

 

 

 

 

 

posted @ 2023-03-19 21:58  必杀技welllee  阅读(28)  评论(0)    收藏  举报