Tips:点此可运行HTML源码

模拟登录神器之PHP基于cURL实现自动模拟登录类

一、构思

  • 从Firefox浏览器拷贝cURL命令(初始页、提交、提交后)
  • 自动分析curl形成模拟登录代码
  • 默认参数:ssl/302/gzip

二、实现

接口

(一)根据curl信息执行并解析结果 public function execCurl($curlContent, $callbackBefore = false, $callbackAfter = false)
(二)解析curl信息 protected function _parseCurl($curlContent)
(三)执行curl请求 protected function _execCurl
(四)获取上一次存储cookie的文件 public function getLastCookieFile()
(五)设置上一次存储cookie的文件 protected function setLastCookieFile($cookieFile)
(六)登录成功后,锁定上一次存储cookie的文件,避免覆盖 public function lockLastCookieFile()
(七)解锁上一次存储cookie的文件 public function unlockLastCookieFile()
(八)登录成功, get 方式获取url信息 public function getUrl($url, $header = false)
(九)登录成功, post 方式获取url信息 public function postUrl($url, $postData = false, $header = false)
(十)记录日志 protected function _log($msg)

<?php

namespace PhpUtility;

/**
 * class CurlAutoLogin
 * @author Zjmainstay
 * @website http://www.zjmainstay.cn
 *
 * 利用curl信息自动解析实现模拟登录
 */
class CurlAutoLogin {
    //最后一次cookie存储文件
    protected $lastCookieFile = '';
    //登录成功后,锁定cookie的更新
    protected $lockedLastCookieFile = false;

    /**
     * 根据curl信息执行并解析结果
     * @param  string  $curlContent    利用Firefox浏览器复制cURL命令
     * @param  boolean $callbackBefore 对curl结果前置处理,如更换用户名、密码等
     * @param  boolean $callbackAfter  对采集结果后置处理,如解析结果的csrf token等
     * @return mixed
     */
    public function execCurl($curlContent, $callbackBefore = false, $callbackAfter = false) {
        $parseCurlResult = $this->_parseCurl($curlContent);
        if(!empty($callbackBefore)) {
            $parseCurlResult = $callbackBefore($parseCurlResult);
        }
        $execCurlResult  = $this->_execCurl($parseCurlResult);

        if(!empty($callbackAfter)) {
            $execCurlResult = $callbackAfter($parseCurlResult, $execCurlResult);
        }

        return $execCurlResult;
    }

    /**
     * 解析curl信息
     * @param  string $curlContent 利用Firefox浏览器复制cURL命令
     * @return bool|array
     */
    protected function _parseCurl($curlContent) {
        if(!preg_match("#curl '([^']*?)'#is", $curlContent, $matchUrl)) {
            return false;
        }

        //remove cookie data in header
        $curlContent = preg_replace("#-H 'Cookie:[^']*'#is", '', $curlContent);

        if(!preg_match_all("#-H '([^']*?)'#is", $curlContent, $headerMatches)) {
            $httpHeader = [];
        } else {
            $httpHeader = $headerMatches[1];
        }

        if(!preg_match("#--data '([^']*?)'#is", $curlContent, $postDataMatch)) {
            $postData = '';
        } else {
            $postData = $postDataMatch[1];
        }

        return [
            'url'       => $matchUrl[1],
            'header'    => $httpHeader,
            'post'      => $postData,
        ];
    }

    /**
     * 执行curl请求
     * @param  array $parseCurlResult curl信息的解析结果,包含 url/header/post 三个键值参数
     * @return string
     */
    protected function _execCurl($parseCurlResult) {
        if(empty($parseCurlResult['url'])) {
            return '';
        }

        $ch = curl_init($parseCurlResult['url']);
        curl_setopt($ch,CURLOPT_HEADER,0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
        curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩

        //add header
        if(!empty($parseCurlResult['header'])) {
            $this->curl->opt[CURLOPT_HTTPHEADER] = $parseCurlResult['header'];
        }

        //add ssl support
        if(substr($parseCurlResult['url'], 0, 5) == 'https') {
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);    //SSL 报错时使用
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);    //SSL 报错时使用
        }

        //add 302 support
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

        //add cookie support
        //设置一个不存在的目录以在系统临时目录随机生成一个缓存文件,避免多进程cookie覆盖
        $cookieFile = tempnam('/not_exist_dir/', 'autologin');
        curl_setopt($ch,CURLOPT_COOKIEJAR,$cookieFile); //存储提交后得到的cookie数据

        //add previous curl cookie
        if(!empty($this->lastCookieFile)) {
            curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
        }

        //add post data support
        if(!empty($parseCurlResult['post'])) {
            curl_setopt($ch,CURLOPT_POST, 1);
            curl_setopt($ch,CURLOPT_POSTFIELDS, $parseCurlResult['post']);
        }

        try {
            $content = curl_exec($ch); //执行并存储结果
        } catch (\Exception $e) {
            $this->_log($e->getMessage());
        }

        $curlError = curl_error($ch);
        if(!empty($curlError)) {
            $this->_log($curlError);
        }

        curl_close($ch);

        //update last cookie file
        $this->setLastCookieFile($cookieFile);

        return $content;
    }

    /**
     * 记录日志
     * @param  [type] $msg [description]
     * @return [type]      [description]
     */
    protected function _log($msg) {
        file_put_contents(__DIR__ . '/run.log', $msg . "\n", 8);
    }

    /**
     * 获取上一次存储cookie的文件
     * @return [type] [description]
     */
    public function getLastCookieFile() {
        return $this->lastCookieFile;
    }

    /**
     * 设置上一次存储cookie的文件
     * @param [type] $cookieFile [description]
     */
    protected function setLastCookieFile($cookieFile) {
        if(!$this->lockedLastCookieFile) {
            $this->lastCookieFile = $cookieFile;
        }
    }

    /**
     * 登录成功后,锁定上一次存储cookie的文件,避免覆盖
     * @return [type] [description]
     */
    public function lockLastCookieFile() {
        $this->lockedLastCookieFile = true;
    }

    /**
     * 解锁上一次存储cookie的文件
     * @return [type] [description]
     */
    public function unlockLastCookieFile() {
        $this->lockedLastCookieFile = false;
    }

    /**
     * 登录成功, get 方式获取url信息
     * @param  [type]  $url    [description]
     * @param  boolean $header [description]
     * @return [type]          [description]
     */
    public function getUrl($url, $header = false) {
        $ch = curl_init($url);
        curl_setopt($ch,CURLOPT_HEADER,0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
        curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩

        //add header
        if(!empty($header)) {
            $this->curl->opt[CURLOPT_HTTPHEADER] = $header;
        }

        //add ssl support
        if(substr($url, 0, 5) == 'https') {
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);    //SSL 报错时使用
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);    //SSL 报错时使用
        }

        //add 302 support
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

        curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据

        try {
            $content = curl_exec($ch); //执行并存储结果
        } catch (\Exception $e) {
            $this->_log($e->getMessage());
        }

        $curlError = curl_error($ch);
        if(!empty($curlError)) {
            $this->_log($curlError);
        }

        curl_close($ch);

        return $content;
    }

    /**
     * 登录成功, post 方式获取url信息
     * @param  [type]  $url      [description]
     * @param  boolean $postData [description]
     * @param  boolean $header   [description]
     * @return [type]            [description]
     */
    public function postUrl($url, $postData = false, $header = false) {
        $ch = curl_init($url);
        curl_setopt($ch,CURLOPT_HEADER,0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
        curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩

        //add header
        if(!empty($header)) {
            $this->curl->opt[CURLOPT_HTTPHEADER] = $header;
        }

        //add ssl support
        if(substr($url, 0, 5) == 'https') {
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);    //SSL 报错时使用
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);    //SSL 报错时使用
        }

        //add 302 support
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

        curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据

        //add post data support
        if(!empty($postData)) {
            curl_setopt($ch,CURLOPT_POST, 1);
            curl_setopt($ch,CURLOPT_POSTFIELDS, $postData);
        }

        try {
            $content = curl_exec($ch); //执行并存储结果
        } catch (\Exception $e) {
            $this->_log($e->getMessage());
        }

        $curlError = curl_error($ch);
        if(!empty($curlError)) {
            $this->_log($curlError);
        }

        curl_close($ch);

        return $content;
    }
}

三、演示

运行:PHP cURL自动模拟登录演示

<?php

require_once __DIR__.'/../vendor/autoload.php';

$autologin = new PhpUtility\CurlAutoLogin();

//0. 未登录
$getDataUrl = 'http://demo.zjmainstay.cn/js/simpleAjax/loginResult.php';
echo 'Before Login: ' . $autologin->getUrl($getDataUrl) . "\n";

//1. 初始化登录页
$firstCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' -H 'If-Modified-Since: Mon, 27 Oct 2014 08:31:18 GMT' -H 'If-None-Match: \"32e-453-506635ac5e180\"' -H 'Cache-Control: max-age=0'";
$autologin->execCurl($firstCurl);

//2. 提交登录表单
$secondCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/doPost.php' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' --data 'username=demousername'";
$realUsername = 'Zjmainstay';
//前置处理,替换错误的用户名
$autologin->execCurl($secondCurl, function($parseCurlResult) use ($realUsername) {
        $parseCurlResult['post'] = str_replace('=demousername', "={$realUsername}", $parseCurlResult['post']);
        return $parseCurlResult;
    });

//3. 登录成功,锁定cookie的更新,直接访问已登录页面内容
$autologin->lockLastCookieFile();
echo 'After Login: ' . $autologin->getUrl($getDataUrl) . "\n";

四、更多

请关注github项目 php-utility-class 上面的更新。

文章首发自Zjmainstay学习笔记《PHP基于cURL实现自动模拟登录

posted @ 2016-09-10 17:51 Zjmainstay 阅读(...) 评论(...) 编辑 收藏
实现请参考《为博客园添加标签云动画