1 // 1,抓取无访问控制文件
2
3 $ch= curl_init();
4 curl_setopt($ch, CURLOPT_URL,"http://localhost/mytest/phpinfo.php");
5 curl_setopt($ch, CURLOPT_HEADER, false);
6 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//如果把这行注释掉的话,就会直接输出
7 $result=curl_exec($ch);
8 curl_close($ch);
9
10 // 2,使用代理进行抓取
11
12 $ch= curl_init();
13 curl_setopt($ch, CURLOPT_URL,"http://blog.51yip.com");
14 curl_setopt($ch, CURLOPT_HEADER, false);
15 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
16 curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, TRUE);
17 curl_setopt($ch, CURLOPT_PROXY, 125.21.23.6:8080);
18 //url_setopt($ch, CURLOPT_PROXYUSERPWD, 'user:password');如果要密码的话,加上这个
19 $result=curl_exec($ch);
20 curl_close($ch);
21
22 // 3,post数据后,抓取数据
23
24 $ch= curl_init();
25 /*在这里需要注意的是,要提交的数据不能是二维数组或者更高
26 *例如array('name'=>serialize(array('tank','zhang')),'sex'=>1,'birth'=>'20101010')
27 *例如array('name'=>array('tank','zhang'),'sex'=>1,'birth'=>'20101010')这样会报错的*/
28 $data=array('name'=>'test','sex'=>1,'birth'=>'20101010');
29 curl_setopt($ch, CURLOPT_URL,'http://localhost/mytest/curl/upload.php');
30 curl_setopt($ch, CURLOPT_POST, 1);
31 curl_setopt($ch, CURLOPT_POSTFIELDS,$data);
32 curl_exec($ch);
33
34 // 4.抓取一些有页面访问控制的页面
35
36 $ch= curl_init();
37 curl_setopt($ch, CURLOPT_URL,"http://club-china");
38 /*CURLOPT_USERPWD主要用来破解页面访问控制的
39 *例如平时我们所以htpasswd产生页面控制等。*/
40 //curl_setopt($ch, CURLOPT_USERPWD, '231144:2091XTAjmd=');
41 curl_setopt($ch, CURLOPT_HTTPGET, 1);
42 curl_setopt($ch, CURLOPT_REFERER,"http://club-china");
43 curl_setopt($ch, CURLOPT_HEADER, 0);
44 $result=curl_exec($ch);
45 curl_close($ch);
46
47 // 5.模拟登录到sina
48
49 functionchecklogin($user,$password)
50 {
51 if( emptyempty($user) || emptyempty($password) )
52 {
53 return0;
54 }
55 $ch= curl_init( );
56 curl_setopt($ch, CURLOPT_REFERER,"http://mail.sina.com.cn/index.html");
57 curl_setopt($ch, CURLOPT_HEADER, true );
58 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true );
59 curl_setopt($ch, CURLOPT_USERAGENT, USERAGENT );
60 curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR );
61 curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT );
62 curl_setopt($ch, CURLOPT_URL,"http://mail.sina.com.cn/cgi-bin/login.cgi");
63 curl_setopt($ch, CURLOPT_POST, true );
64 curl_setopt($ch, CURLOPT_POSTFIELDS,"&logintype=uid&u=".urlencode($user)."&psw=".$password);
65 $contents= curl_exec($ch);
66 curl_close($ch);
67 if( !preg_match("/Location: (.*)\\/cgi\\/index\\.php\\?check_time=(.*)\n/",$contents,$matches) )
68 {
69 return0;
70 }else{
71 return1;
72 }
73 }
74
75 define("USERAGENT",$_SERVER['HTTP_USER_AGENT'] );
76 define("COOKIEJAR", tempnam("/tmp","cookie") );
77 define("TIMEOUT", 500 );
78
79 echochecklogin("zhangying215","xtaj227");