1 <?php
2
3 namespace App\Http\Controllers\Caiji;
4
5 use Illuminate\Http\Request;
6 use App\Http\Controllers\Controller;
7 use Illuminate\Support\Facades\DB;
8
9
10 class CollectionCotontroller extends Controller
11 {
12 public function __construct()
13 {
14 //设置php最大执行时间
15 ini_set('max_execution_time', '1000000');
16 //设置错误模式
17 // error_reporting(0);
18 //采集的网站
19 $this->url = "http://33uudy.com";
20
21 if (!is_dir('AllIdData')) {
22 mkdir('AllIdData', 0777);
23 file_put_contents('AllIdData/GetId.txt', '');
24 }
25 }
26
27 public function film_get($url = "", $proxy = "", $cookie = "", $returnCookie = 0)
28 {
29 $curl = curl_init();
30 if (!$url) {
31 $url = $this->url;
32 }
33 curl_setopt($curl, CURLOPT_PROXY, $proxy);//设置代理ip
34 curl_setopt($curl, CURLOPT_URL, $url);//url地址
35 curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)');//模仿header头中 "User-Agent:"的字符串。修改user_agent来伪造成浏览器请求
36 curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //自cURL 7.10开始默认为 TRUE。 FALSE 禁止 cURL 验证对等证书(peer's certificate)。要验证的交换证书可以在 CURLOPT_CAINFO 选项中设置,或在 CURLOPT_CAPATH中设置证书目录
37 curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
38 curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); //发送几次就重定向几次,除非设置了 CURLOPT_MAXREDIRS,限制最大重定向次数。
39 curl_setopt($curl, CURLOPT_AUTOREFERER, 1); //TRUE 时将根据 Location: 重定向时,自动设置 header 中的Referer:信息。
40 // curl_setopt($curl, CURLOPT_REFERER, "http://XXX");
41 if ($cookie) {
42 curl_setopt($curl, CURLOPT_COOKIE, $cookie);
43 }
44 curl_setopt($curl, CURLOPT_HEADER, $returnCookie);
45 curl_setopt($curl, CURLOPT_TIMEOUT, 10);
46 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
47 $data = curl_exec($curl);
48 if (curl_errno($curl)) {
49 return curl_error($curl);
50 }
51 curl_close($curl);
52 if ($returnCookie) {
53 list($header, $body) = explode("\r\n\r\n", $data, 2);
54 preg_match_all("/Set\-Cookie:([^;]*);/", $header, $matches);
55 $info['cookie'] = substr($matches[1][0], 1);
56 $info['content'] = $body;
57 return $info;
58 } else {
59 return $this->data = $data;
60 }
61 }
62
63 /*
64 * 统计总共有多少页
65 */
66 public function page()
67 {
68 $url = $this->film_get();//获取页面数据
69 $reg = "/<a.*class=\"pagelink_a.*<\/a>/";
70 $reg1 = "/<a\b[^>]+\bhref=\"([^\"]*)\"[^>]*>尾页<\/a>/";
71 $reg2 = "/[0-9].*[0-9]/";
72 preg_match($reg, $url, $a);
73 preg_match($reg1, $a[0], $b);
74 preg_match($reg2, $b[1], $c);
75
76 //判断是否获取最大值。如果获取不到则返回1
77 if ($c[0]) {
78 return $c[0];
79 } else {
80 return 1;
81 }
82 }
83
84 /*
85 * 获取首页的所有数据
86 */
87 public function all_data($set_max_page, $set_min_page = 1)
88 {
89 if ($set_max_page) {
90 $this->page();
91 $maxpage = $set_max_page;
92 } else {
93 $maxpage = $this->page();//获取最大页数
94 }
95 $page = $set_min_page;
96 $maxpage = $set_max_page ? $set_max_page : $maxpage;//判断是否存在
97 $data = $this->data;//获取页面数据
98 for ($page; $page <= $maxpage; $page++) {
99 $max_url = $this->url . '/?m=vod-index-pg-' . $page . '.html';
100 $str = $this->film_get($max_url);//获取分页的页面数据
101 $reg = "/<span class=[\"|']tt[\"|'].*<\/span>/i";
102 preg_match_all($reg, $str, $span_array);
103 foreach ($span_array[0] as $k => $v) {
104 $reg1 = "/<a href=\"[^\"]*\"[^>]*>(.*)<\/a>/"; //获取a标签的内容
105 $reg2 = "/href=\"([^\"]+)/"; //获取href的链接地址
106 $reg4 = '/<span[^>]*class=\"xing_vb[6|7]\".*?>.*?<\/span>/ism'; //获取视频更新时间
107 preg_match($reg1, $v, $acontent);//获取每个内容
108 preg_match($reg2, $v, $hrefarray);//获取每个链接
109 preg_match($reg4, $v, $up_time);//获取每个更新时间
110 $acontent = explode(' ', $acontent[1]);
111 $arr[$k]['last'] = intval(substr(strip_tags($up_time[0]), 3, 0));
112 $arr[$k]['name'] = $acontent[0];//获取名称
113
114 $arr[$k]['letter'] = $this->getFirstCharter($acontent[0]);//获取首字母
115 $arr[$k]['note'] = $acontent[1];
116
117 //获取连载
118 preg_match('/\d.*\d/', $acontent[1], $aa);
119 if ($aa) {
120 $arr[$k]['state'] = intval($aa[0]);
121 } else {
122 $arr[$k]['state'] = 0;
123 }
124
125 $url_link = $this->url . $hrefarray[1];//获取每一个视频的内容
126 $one_string = $this->film_get($url_link);
127 $arr_string = $this->get_link_data($one_string);
128 $arr[$k]['downurl'] = $url_link;//下载地址
129
130 foreach ($arr_string as $key => $value) {
131 $arr[$k]['pic'] = $arr_string['vod_pic'];//获取图片
132 $arr[$k]['subname'] = $arr_string['vod_ename'];//获取别名
133 $arr[$k]['director'] = $arr_string['vod_director'];//获取导演
134 $arr[$k]['actor'] = $arr_string['vod_actor'];//获取主演
135 $arr[$k]['type_name'] = $this->type_tf(isset($arr_string['vod_type']) ? explode(' ', $arr_string['vod_type'])[0] : '福利片');//获取类型
136 $arr[$k]['area'] = $arr_string['vod_area'];//获取地区
137 $arr[$k]['lang'] = $arr_string['vod_language'];//获取语言
138 $arr[$k]['score'] = $arr_string['score'];//获取评分
139 $arr[$k]['year'] = $arr_string['vod_year'] == "未知" ? 1 : $arr_string['vod_year'];//获取年份
140 $arr[$k]['playfrom'] = '';//过滤字段
141 // $arr[$k]['created_at'] = $arr_string['vod_addtime'];//获取天假时间
142 // $arr[$k]['vod_filmtime'] = $arr_string['vod_filmtime'];//获取电影时间
143 $arr[$k]['des'] = $arr_string['vod_content'];//获取内容
144 $episodes_string = '';//存放播放地址
145 foreach ($arr_string['Episodes'] as $key => $value) {
146 $episodes_string .= "$" . implode('#', $value);
147 }
148 $arr[$k]['dd'] = $episodes_string;//获取播放地址
149 }
150 }
151 }
152 if ($page % 5 == 0) {
153 sleep(10);
154 }
155 return $arr;
156 }
157
158 /**
159 * 获取子页的所有数据
160 **/
161 public function get_link_data($url)
162 {
163 $reg8 = "/<div class=[\"|']vodinfobox.*<\/div>/ism";
164 $reg9 = '/<ul>.*?<\/ul>/ism';
165 $reg10 = '/<li>.*?<\/li>/';
166 $reg11 = '/<img class=\"lazy.*?\/>/';
167 $reg12 = '/<div class=\"vodplayinfo\"><!--介绍开始-->.*?<\/div>/ism';
168
169 //采集图片
170 preg_match($reg11, $url, $a);
171 preg_match('/src=\"([^ \t]+)\"/', $a[0], $img_src);
172 $arr['vod_pic'] = $img_src[1];
173
174 //采集评分
175 preg_match('/<label.*?<\/label>/', $url, $score);
176 $arr['score'] = strip_tags($score[0]);
177 //采集内容
178 preg_match($reg12, $url, $content);
179 $contentData = strip_tags($content[0]) ? strip_tags($content[0]) : " ";
180 $arr['vod_content'] = $contentData;
181
182 preg_match($reg8, $url, $a);
183 preg_match($reg9, $a[0], $b);
184 preg_match_all($reg10, $b[0], $c);
185 foreach ($c[0] as $keys => $values) {
186 $arr['vod_ename'] = mb_substr(strip_tags($c[0][0]), 3);
187 $arr['vod_director'] = mb_substr(strip_tags($c[0][1]), 3);
188 $arr['vod_actor'] = mb_substr(strip_tags($c[0][2]), 3);
189 $arr['vod_type'] = mb_substr(strip_tags($c[0][3]), 3);
190 $arr['vod_area'] = mb_substr(strip_tags($c[0][4]), 3);
191 $arr['vod_language'] = mb_substr(strip_tags($c[0][5]), 3);
192 $arr['vod_year'] = mb_substr(strip_tags($c[0][6]), 3);
193 $arr['vod_addtime'] = time();
194 // $arr['vod_filmtime'] = strtotime(mb_substr(strip_tags($c[0][7]), 3));
195 }
196 $reg5 = '/<h3>来源.*<\/h3>.*<ul>.*<\/ul>/ism';
197 $reg6 = '/<ul>.*?<\/ul>/s';
198 preg_match($reg5, $url, $a);
199 preg_match_all($reg6, $a[0], $b);
200 foreach ($b[0] as $key => $value) {
201 $reg7 = '/<li.*?<\/li>/ism';
202 preg_match_all($reg7, $value, $all_li);
203 foreach ($all_li[0] as $ks => $vs) {
204 $arr['Episodes'][$key][$ks] = strip_tags($vs);
205 }
206 }
207 return $arr;
208 }
209
210 //判断分类
211 public function type_tf($type)
212 {
213 if (strstr($type, '动漫')) {
214 return '动漫';
215 } elseif (strstr($type, '动画片')) {
216 return '动漫';
217 } elseif (strstr($type, '动画片')) {
218 return '动漫';
219 } elseif (strstr($type, '奇幻片')) {
220 return '剧情片';
221 } elseif (strstr($type, '伦理')) {
222 return '伦理片';
223 } elseif (strstr($type, '韩剧')) {
224 return '日韩剧';
225 } elseif (strstr($type, '韩国剧')) {
226 return '日韩剧';
227 } elseif (strstr($type, '其他剧')) {
228 return '电视剧';
229 } elseif (strstr($type, '海外剧')) {
230 return '欧美剧';
231 } elseif (strstr($type, '日剧')) {
232 return '日韩剧';
233 } elseif (strstr($type, '日本剧')) {
234 return '日韩剧';
235 } elseif (strstr($type, '台剧')) {
236 return '港台剧';
237 } elseif (strstr($type, '台湾剧')) {
238 return '港台剧';
239 } elseif (strstr($type, '港剧')) {
240 return '港台剧';
241 } elseif (strstr($type, '香港剧')) {
242 return '港台剧';
243 } elseif (strstr($type, '泰剧')) {
244 return '电视剧';
245 } elseif (strstr($type, '泰国剧')) {
246 return '电视剧';
247 } elseif (strstr($type, '视讯美女')) {
248 return '福利片';
249 } elseif (strstr($type, '腿模写真')) {
250 return '福利片';
251 }
252 return $type;
253 }
254
255 public function getFirstCharter($str)//取首拼音
256 {
257 if (empty($str)) {
258 return '';
259 }
260 $str = str_replace('・', '', $str);
261 $firstchar_ord = ord(strtoupper($str{0}));
262 if (($firstchar_ord >= 65 and $firstchar_ord <= 91) or ($firstchar_ord >= 48 and $firstchar_ord <= 57)) return $str{0};
263 $s = iconv("UTF-8", "gbk", $str);
264 $asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
265 if ($asc >= -20319 and $asc <= -20284) return "A";
266 if ($asc >= -20283 and $asc <= -19776) return "B";
267 if ($asc >= -19775 and $asc <= -19219) return "C";
268 if ($asc >= -19218 and $asc <= -18711) return "D";
269 if ($asc >= -18710 and $asc <= -18527) return "E";
270 if ($asc >= -18526 and $asc <= -18240) return "F";
271 if ($asc >= -18239 and $asc <= -17923) return "G";
272 if ($asc >= -17922 and $asc <= -17418) return "H";
273 if ($asc >= -17417 and $asc <= -16475) return "J";
274 if ($asc >= -16474 and $asc <= -16213) return "K";
275 if ($asc >= -16212 and $asc <= -15641) return "L";
276 if ($asc >= -15640 and $asc <= -15166) return "M";
277 if ($asc >= -15165 and $asc <= -14923) return "N";
278 if ($asc >= -14922 and $asc <= -14915) return "O";
279 if ($asc >= -14914 and $asc <= -14631) return "P";
280 if ($asc >= -14630 and $asc <= -14150) return "Q";
281 if ($asc >= -14149 and $asc <= -14091) return "R";
282 if ($asc >= -14090 and $asc <= -13319) return "S";
283 if ($asc >= -13318 and $asc <= -12839) return "T";
284 if ($asc >= -12838 and $asc <= -12557) return "W";
285 if ($asc >= -12556 and $asc <= -11848) return "X";
286 if ($asc >= -11847 and $asc <= -11056) return "Y";
287 if ($asc >= -11055 and $asc <= -10247) return "Z";
288 return 0;//null
289 }
290
291
292 //判断数据库去重(主动)
293 public function insert_into($page = 1)
294 {
295 $this->data = 'AllIdData';
296 // $geturl = DB::table('vods')->get(['id','downurl']);
297 $html = $this->all_data($page);
298 // var_dump($html);
299 $geturllink = $this->updateLink();
300 $arrData = array();
301 foreach ($html as $key => $value) {
302 if (in_array($value['downurl'], $geturllink)) {
303 $one_string = $this->film_get($value['downurl']);
304 $getLinkData = $this->get_link_data($one_string);
305 $episodes_string = '';//存放播放地址
306 foreach ($getLinkData['Episodes'] as $key => $value) {
307 $episodes_string .= "$" . implode('#', $value);
308 }
309 DB::table('vods')->where('id', "=", $key)
310 ->update(['dd' => $episodes_string]);
311 } else {
312 $getId = DB::table('vods')->insertGetId($value);
313 $this->getLastId($getId, $value['downurl']);
314 }
315 }
316 }
317
318 //判断数据库去重(被动)
319 public function set_to_db($data)
320 {
321 $array = array();
322 $geturllink = $this->updateLink();
323 foreach ($data as $key => $value) {
324 if (in_array($value['downurl'], $geturllink)) {
325 $one_string = $this->film_get($value['downurl']);
326 $getLinkData = $this->get_link_data($one_string);
327 $episodes_string = '';//存放播放地址
328 foreach ($getLinkData['Episodes'] as $key => $value) {
329 $episodes_string .= "$" . implode('#', $value);
330 }
331 DB::table('vods')->where('id', "=", $key)
332 ->update(['dd' => $episodes_string]);
333 } else {
334 $getId = DB::table('vods')->insertGetId($value);
335 $array[] = $getId;
336 $this->getLastId($getId, $value['downurl']);
337 }
338 }
339 return $array;
340 }
341
342 //数据不存在的时候插入id和链接
343 public function getLastId($getId, $downurl)
344 {
345 $SigerId = '';
346 $arr = '';
347 $SigerId .= $getId . "@" . $downurl . "$";
348 if (!is_dir('AllIdData')) {
349 mkdir('AllIdData', 0777);
350 file_get_contents('AllIdData/GetId.txt', '');
351 } else {
352 $arr .= file_get_contents("AllIdData/GetId.txt");
353 }
354 $arr .= $SigerId;
355 if (file_put_contents("AllIdData/GetId.txt", $arr)) return $arr;
356 }
357
358 //数据存在需要更新链接里面的视频源
359 public function updateLink()
360 {
361 $GetIdByFile = "AllIdData/GetId.txt";
362 $data = file_get_contents($GetIdByFile);
363 $arr = explode("$", $data);
364 $geturllink = array();
365 foreach ($arr as $key => $value) {
366 if (!$value) {
367 unset($value);
368 } else {
369 $url = explode('@', $value);
370 $geturllink[$url[0]] = $url[1];
371 }
372 }
373 // var_dump($geturllink);
374 return $geturllink;
375 }
376
377 /**
378 * 必须经过接口获取到的数据
379 *
380 **/
381 public function searchNameAllDate()
382 {
383 $wd = isset($_POST)?$_POST['wd']:"";
384 // var_dump($wd);die;
385 //通过cuel模拟post请求访问数据
386 $data = ['wd' => $wd];
387 $action_url = '/index.php?m=vod-search';
388 $post_url = $this->url . $action_url;
389 $ch = curl_init();
390 curl_setopt($ch, CURLOPT_URL, $post_url);
391 curl_setopt($ch, CURLOPT_POST, 1);
392 curl_setopt($ch, CURLOPT_HEADER, 0);
393 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
394 curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
395 $str = curl_exec($ch);
396 curl_close($ch);
397 $reg = "/<span class=[\"|']tt[\"|'].*<\/span>/i";
398 preg_match_all($reg, $str, $span_array);
399 if (!$span_array[0]) {
400 return "";
401 }
402 foreach ($span_array[0] as $k => $v) {
403 $reg1 = "/<a href=\"[^\"]*\"[^>]*>(.*)<\/a>/"; //获取a标签的内容
404 $reg2 = "/href=\"([^\"]+)/"; //获取href的链接地址
405 $reg4 = '/<span[^>]*class=\"xing_vb[6|7]\".*?>.*?<\/span>/ism'; //获取视频更新时间
406 preg_match($reg1, $v, $acontent);//获取每个内容
407 preg_match($reg2, $v, $hrefarray);//获取每个链接
408 preg_match($reg4, $v, $up_time);//获取每个更新时间
409 $acontent = explode(' ', $acontent[1]);
410 $arr[$k]['last'] = intval(substr(strip_tags($up_time[0]), 3, 0));
411 $arr[$k]['name'] = $acontent[0];//获取名称
412
413 $arr[$k]['letter'] = $this->getFirstCharter($acontent[0]);//获取首字母
414 $arr[$k]['note'] = $acontent[1];
415
416 //获取连载
417 preg_match('/\d.*\d/', $acontent[1], $aa);
418 if ($aa) {
419 $arr[$k]['state'] = intval($aa[0]);
420 } else {
421 $arr[$k]['state'] = 0;
422 }
423
424 $url_link = $this->url . $hrefarray[1];//获取每一个视频的内容
425 $one_string = $this->film_get($url_link);
426 $arr_string = $this->get_link_data($one_string);
427 $arr[$k]['downurl'] = $url_link;//下载地址
428
429 //判断数据库是否一样,去重
430
431 foreach ($arr_string as $key => $value) {
432 $arr[$k]['pic'] = $arr_string['vod_pic'];//获取图片
433 $arr[$k]['subname'] = $arr_string['vod_ename'];//获取别名
434 $arr[$k]['director'] = $arr_string['vod_director'];//获取导演
435 $arr[$k]['actor'] = $arr_string['vod_actor'];//获取主演
436 $arr[$k]['type_name'] = $this->type_tf(isset($arr_string['vod_type']) ? explode(' ', $arr_string['vod_type'])[0] : '福利片');//获取类型
437 $arr[$k]['area'] = $arr_string['vod_area'];//获取地区
438 $arr[$k]['lang'] = $arr_string['vod_language'];//获取语言
439 $arr[$k]['score'] = $arr_string['score'];//获取评分
440 $arr[$k]['year'] = $arr_string['vod_year'] == "未知" ? 1 : $arr_string['vod_year'];//获取年份
441 $arr[$k]['playfrom'] = '';//过滤字段
442 // $arr[$k]['created_at'] = $arr_string['vod_addtime'];//获取天假时间
443 // $arr[$k]['vod_filmtime'] = $arr_string['vod_filmtime'];//获取电影时间
444 $arr[$k]['des'] = $arr_string['vod_content'];//获取内容
445 $episodes_string = '';//存放播放地址
446 foreach ($arr_string['Episodes'] as $key => $value) {
447 $episodes_string .= "$" . implode('#', $value);
448 }
449 $arr[$k]['dd'] = $episodes_string;//获取播放地址
450 }
451 }
452 $all_id = $this->set_to_db($arr);
453 return $all_id;
454 }
455
456 /*
457 * 删除视频数据
458 */
459 public function delDate($id)
460 {
461 // var_dump($id);die;
462 if (!$id) {
463 return [
464 "status" => 400,
465 "msg" => "非法访问"
466 ];
467 }
468 // $id = '107';
469 $arr = array();
470 $all_data = array();
471 $allDate = file_get_contents('AllIdData/GetId.txt');
472 foreach (explode("$", $allDate) as $key => $value) {
473 $arr[$key] = $value;
474 }
475 foreach (array_filter($arr) as $key => $value) {
476 $a = explode('@', $value);
477 $all_data[$a[0]] = $a;
478 }
479 // var_dump($all_data);
480 unset($all_data[$id]);
481 $all_string = "";
482
483 // var_dump($all_data);
484 foreach ($all_data as $key => $value) {
485 $all_string .= $value[0] . "@" . $value[1] . "$";
486 }
487 if(file_put_contents('AllIdData/GetId.txt', $all_string)) {
488 return [
489 "status" => 200,
490 "msg" => "删除成功"
491 ];
492 };
493 }
494
495 /*
496 * 恢复视频数据
497 */
498
499 public function recoveryData($id, $downurl)
500 {
501 if (!$id && !$downurl) {
502 return [
503 "status" => 400,
504 "msg" => "非法访问"
505 ];
506 }
507 $array = [
508 'id' => $id,
509 'downurl' => $this->url."/?m=".$downurl
510 ];
511 $data = $array;
512 $allDate = file_get_contents('AllIdData/GetId.txt');
513 $str_data = "";
514 $str_data .= $allDate . $array['id'] . "@" . $array['downurl'] . "$";
515 if (file_put_contents('AllIdData/GetId.txt', $str_data)){
516 return [
517 "status" => 200,
518 "msg" => "恢复成功"
519 ];
520 };
521 }
522 }