1 <meta http-equiv="Content-Type" content="text/html; charset=GBK" />
2 <?php
3 /*
4 *Apriori算法(关联规则算法的实现)
5 */
6
7 /*
8 *项目集X支持G个数小于等于1去除
9 */
10 //--------------------------------------------------------------------
11 function first($train)
12 {
13 $new_array = $train;
14 $array_count = array(NULL);
15 array_splice($array_count,0,1);
16 for($i=1;$i<count($train[0]);$i++)
17 {
18 $count = 0;
19 for($j=1;$j<count($train);$j++)
20 {
21 if($train[$j][$i]==1)
22 {
23 $count++;
24 }
25 }
26
27 if($count==1){//非频繁项目集
28 array_push($array_count,$i);
29 }
30 }
31
32 for($i=count($array_count)-1;$i>=0;$i--)
33 {
34 for($j=0;$j<count($train);$j++)
35 {
36 array_splice($new_array[$j],$array_count[$i],1);
37 }
38
39 }
40
41 return $new_array;
42 }
43 //--------------------------------------------------------------------
44
45 /*
46 *总事务D中包含事务X的数量
47 */
48 //--------------------------------------------------------------------
49 function search($train,$array)
50 {
51 $array_num = array();
52 array_splice($array_num,0,1);
53 for($i=0;$i<count($array);$i++)
54 {
55 for($j=1;$j<count($train[0]);$j++)
56 {
57
58 if($train[0][$j]==$array[$i])
59 {
60 array_push($array_num,$j);
61 break;
62 }
63 }
64 }
65 $count = 0;//用于统计满足数据项集X的事务个数
66 for($i=1;$i<count($train);$i++)
67 {
68 $flags = true;
69 for($j=0;$j<count($array_num);$j++)
70 {
71 if($train[$i][$array_num[$j]]==0)
72 {
73 $flags = false;
74 break;
75 }
76 }
77 if($flags)$count++;
78
79 }
80 return $count;
81 }
82 //--------------------------------------------------------------------
83
84 /*
85 *合并两数组,除去其中相同的元素
86 */
87 //--------------------------------------------------------------------
88 function merge($arrayA,$arrayB)
89 {
90 $array = array(NULL);
91 array_splice($array,0,1);
92
93 //将两数组中的元素合并
94 for($i=0;$i<count($arrayA)+count($arrayB);$i++)
95 {
96 if($i<count($arrayA))
97 {
98 array_push($array,$arrayA[$i]);
99 }
100 else {
101 array_push($array,$arrayB[$i-count($arrayA)]);
102 }
103 }
104
105 $array = array_unique($array);//删除两数组中重复的元素
106
107 //对新的数组下标重新进行排序
108 foreach($array as $value)
109 {
110 $newarray[]=$value;
111 }
112 return $newarray;
113 }
114 //--------------------------------------------------------------------
115
116 /*
117 *判断两个一维数组是否相等(不考虑键的位置,也不考虑数组中有重复的元素)
118 */
119 //--------------------------------------------------------------------
120 function judgeequal($arrayA,$arrayB)
121 {
122
123 if(count($arrayA)!=count($arrayB))
124 {
125 $flags = false;
126 }else{
127 $flags2 = true;
128 for($i=0;$i<count($arrayA);$i++)
129 {
130 if(!in_array($arrayA[$i],$arrayB,true))
131 {
132 $flags2 = false;
133 break;
134 }
135 }
136 if($flags2)
137 {
138 $flags = true;
139 }else {
140 $flags = false;
141 }
142 }
143 return $flags;
144 }
145 //--------------------------------------------------------------------
146
147 /*
148 *求支持度和置信度
149 */
150 //--------------------------------------------------------------------
151 function support_confidence($arrayA,$arrayB,$train,&$support,&$confidence)
152 {
153 $newarray = merge($arrayA,$arrayB);
154 $countxy = search($train,$newarray);
155 $support = $countxy / (count($train)-1);//项集X的支持度
156 $countx = search($train,$arrayA);
157 $confidence = $countxy / $countx;
158 return 0;
159
160 }
161 //--------------------------------------------------------------------
162
163 /*
164 *Apriori算法
165 */
166 //--------------------------------------------------------------------
167 function Apriori($train,$sup,$con)
168 {
169 $aprioriAll = array();//存储所有的关联
170 array_splice($aprioriAll,0,1);
171 $apriori = array();
172 array_splice($apriori,0,1);
173 $train = first($train);
174 for($i=1;$i<count($train[0])-1;$i++)
175 {
176 $arrayA[0] =$train[0][$i];
177 for($j=$i+1;$j<count($train[0]);$j++)
178 {
179 $arrayB[0] =$train[0][$j];
180 support_confidence($arrayA,$arrayB,$train,$support,$confidence);
181 if(($support>=$sup)&&($confidence>=$con))
182 {
183 $Meg = merge($arrayA,$arrayB);
184 array_push($apriori,$Meg);
185 }
186 }
187 }
188 array_push($aprioriAll,$apriori);
189 while(count($apriori)>1)
190 {
191 $array = array();
192 array_splice($array,0,1);
193 for($i=0;$i<count($apriori)-1;$i++)
194 {
195 for($j=$i+1;$j<count($apriori);$j++)
196 {
197 $arrayB = merge($apriori[$i],$apriori[$j]);
198 support_confidence($apriori[$i],$arrayB,$train,$support,$confidence);
199 if(($support>=$sup)&&($confidence>=$con))
200 {
201 /* echo "<pre>";
202 print_r($arrayA);
203 echo "<pre>";
204 print_r($arrayB);
205 */
206 array_push($array,$arrayB);
207 }
208
209 }
210 }
211 $apriori = $array;
212 for($i=0;$i<count($apriori)-1;$i++)
213 {
214 for($j=count($apriori)-1;$j>=$i+1;$j--)
215 {
216 if(judgeequal($apriori[$i],$apriori[$j]))
217 {
218 array_splice($apriori,$j,1);
219 }
220 }
221 }
222 foreach($apriori as $value)
223 {
224 $newarray[]=$value;
225 }
226 $apriori = $newarray;
227 array_push($aprioriAll,$apriori);
228 }
229 return $aprioriAll;
230 }
231 //--------------------------------------------------------------------
232
233 /*
234 *把.txt中的内容读到数组中保存
235 *$filename:文件名称
236 */
237 //--------------------------------------------------------------------
238 function getFileContent($filename)
239 {
240 $array = array(null);
241 $content = file_get_contents($filename);
242 $result = explode("\r\n",$content);
243 //print_r(count($result));
244 for($j=0;$j<count($result);$j++)
245 {
246 //print_r($result[$j]."<br>");
247 $con = explode(" ",$result[$j]);
248 array_push($array,$con);
249 }
250 array_splice($array,0,1);
251 return $array;
252 }
253 //--------------------------------------------------------------------
254
255
256 /*
257 *把数组中内容写到.txt中保存
258 *$result:要存储的数组内容
259 *$filename:文件名称
260 */
261 //--------------------------------------------------------------------
262 function Array_Totxt($result,$filename)
263 {
264 $fp= fopen($filename,'wb');
265 for($i=0;$i<count($result);$i++)
266 {
267
268 for($j=0;$j<count($result[$i]);$j++)
269 {
270
271 $temp = NULL;
272 for($k=0;$k<count($result[$i][$j]);$k++){
273 $temp = $result[$i][$j][$k]."\t";
274 fwrite($fp,$temp);
275 }
276 fwrite($fp,"\r\n");
277 }
278
279 }
280 fclose($fp);
281 }
282 //--------------------------------------------------------------------
283 $train = getFileContent("train.txt");
284 // $train = getFileContent("er.txt");
285 $apriori = Apriori($train,0.5,0.6);
286 echo "<pre>";
287 print_r($apriori);
288 Array_Totxt($apriori,"result.txt")
289 // Array_Totxt($apriori,"erresult.txt")
290 ?>