频繁模式挖掘FP_growth(频繁增长树)算法

原创作品,转载请指明出处,谢谢!
#include <iostream> #include <map> #include <set> #include <vector> #include <cstring> #include <stdio.h> #include <algorithm> using namespace std; typedef struct CSNode { //商品编号 string item; //次数 int count; //父节点,孩子节点,兄弟节点 int parent,firstchild,nextsibling; //相同商品的后继节点,方便将相同商品的节点连接起来,根节点的直接孩子节点的这两个指针都是空 int next; }; typedef struct sortNode { string s; int count; }; typedef struct headNode { string s; int count; int next; }; bool operator < (const headNode & a,const headNode & b) { return a.count > b.count; } bool operator < (const sortNode & a ,const sortNode & b) { return a.count > b.count; } int events,min_spt; map<string,int> freqD; vector <vector<sortNode> > D; vector <headNode> headTable; vector <CSNode> Tree; void read_proc_data() { int n; string str; map <string,int> tmpmap; vector <vector<string> > tmpD; cin>>min_spt>>events; for(int i=0; i<events; i++) { cin>>n; vector<string>items; for(int j=0; j<n; j++) { cin>>str; items.push_back(str); tmpmap[str]+=1; } tmpD.push_back(items); } for(map<string,int>::iterator it=tmpmap.begin(); it!=tmpmap.end(); it++) { if((*it).second>=min_spt) { headNode t; t.s=(*it).first; t.count=(*it).second; t.next=-1; headTable.push_back(t); } } sort(headTable.begin(),headTable.end()); for(int i = 0 ; i < events; i++) { vector<sortNode > Di; for(vector<string>::iterator it=tmpD[i].begin(); it!=tmpD[i].end(); it++) { if(tmpmap[(*it)]>=min_spt) { sortNode t; t.s = (*it); t.count=tmpmap[(*it)]; Di.push_back(t); } } sort(Di.begin(),Di.end()); D.push_back(Di); } } int find_index(string str) { for(int i=0; i<headTable.size(); i++) { if(headTable[i].s==str) return i; } return -1; } void setNode(int parent,int p ,int i,int j,int len) { if(j>=len) return; if(p==-1) { CSNode root; root.count=1; root.item=D[i][j].s; root.nextsibling=-1; root.firstchild=-1; if(parent==0) root.parent=-1; else root.parent=parent; int index = find_index(D[i][j].s); root.next=headTable[index].next; Tree.push_back(root); p=Tree.size()-1; headTable[index].next=p; Tree[parent].firstchild=p; } else { int q; while(p!=-1) { if(Tree[p].item==D[i][j].s) { Tree[p].count++; break; } q=p; p=Tree[p].nextsibling; } if(p==-1) { CSNode root; root.count=1; root.item=D[i][j].s; root.nextsibling=-1; root.firstchild=-1; if(parent==0) root.parent=-1; else root.parent=parent; int index = find_index(D[i][j].s); root.next=headTable[index].next; Tree.push_back(root); p=Tree.size()-1; headTable[index].next=p; Tree[q].nextsibling=p; } } setNode(p,Tree[p].firstchild,i,j+1,len); } void creat_PT_Tree() { CSNode root; root.count=0; root.item=""; root.next=-1; root.parent=-1; root.nextsibling=-1; root.firstchild=-1; Tree.push_back(root); for(int i=0; i<events; i++) { setNode(0,Tree[0].firstchild,i,0,D[i].size()); } } bool judge_exist(string s,vector<string> tmp) { for(int i=0;i<tmp.size();i++) { if(s.length()==tmp[i].length()){ int sums=0,sumtmp=0; for(int j = 0;j < s.length();j++) { sums+=((int)s[j]); sumtmp+=((int)tmp[i][j]); } if(sums==sumtmp) return true; } } return false; } void print_freqD(string s) { vector<string> tmp; for(map<string ,int>::iterator it= freqD.begin(); it!=freqD.end(); it++) { tmp.push_back((*it).first); } for(int i = 0; i < tmp.size(); i++) for(int j = i+1; j<tmp.size(); j++) { string a,b,c="",str,t,st; a=tmp[i]; b=tmp[j]; c.append(a).append(b); set<string> tmpset; for(int k=0; k<c.size(); k++) tmpset.insert(c.substr(k,1)); str=""; for(set<string>::iterator it = tmpset.begin(); it!=tmpset.end(); it++) { str.append(*it); } if(str.length()!=a.length() && str.length()!=b.length()) { bool flag=false; flag = judge_exist(str,tmp); if(!flag) { freqD[str]=freqD[a]>freqD[b]?freqD[b]:freqD[a]; tmp.push_back(str); } } } for(map<string ,int>::iterator it= freqD.begin(); it!=freqD.end(); it++) { if((*it).second>=min_spt) cout<<"{"<<(*it).first<<s<<"}"<<" : "<<(*it).second<<endl; } } void get_freqD() { for(int i = headTable.size()-1; i>=0; i--) { int p = headTable[i].next; int count=Tree[p].count; string str=Tree[p].item; while(p!=-1) { int q=Tree[p].parent; while(q!=-1) { string s=""; int t = q; while(t!=-1) { string subs=Tree[t].item; s=subs.append(s); freqD[s]++; t=Tree[t].parent; } q=Tree[q].parent; } p=Tree[p].next; } cout<<"以"<<str<<"结尾的频繁模式为:"<<endl; print_freqD(str); freqD.clear(); } } void print_Tree() { for(int i = headTable.size()-1; i>=0; i--) { int p = headTable[i].next; while(p!=-1) { int q=p; while(q!=-1) { cout<<Tree[q].item<<" "<<Tree[q].count<<" "; q=Tree[q].parent; } p=Tree[p].next; cout<<endl; } } } int main() { freopen("in.txt","r",stdin); cout<<"输入数据处理中... ..."<<endl; read_proc_data(); cout<<"原始数据排序... ..."<<endl; for(int i = 0; i < events; i++) { for(vector<sortNode>::iterator it = D[i].begin(); it!=D[i].end(); it++) { cout<<(*it).s<<" "<<(*it).count<<" "; } cout<<endl; } for(vector<headNode>::iterator it = headTable.begin(); it!=headTable.end(); it++) { cout<<(*it).s<<" "<<(*it).count<<" "; } cout<<endl; cout<<"正在构造FP树... ..."<<endl; creat_PT_Tree(); cout<<"正在生成条件模式基... ..."<<endl; print_Tree(); cout<<"正在合取频繁项集... ..."<<endl; get_freqD(); return 0; }

下面给一个测试数据:

2 5
6 M O N K E Y
6 D O N K E Y
4 M A K E
5 M U C K Y
6 C O O K I E

 数据的说明跟上一篇一样的,顺便说一下,我的代码运行环境是Codeblocks,运行没有任何问题,有运行问题的自己解决。有意见欢迎与我沟通!

posted on 2012-06-14 10:50  _Clarence  阅读(487)  评论(0编辑  收藏  举报

导航