一、构思
- 从Firefox浏览器拷贝cURL命令(初始页、提交、提交后)
- 自动分析cURL命令形成模拟登录代码
- 默认参数:ssl/302/gzip
二、实现
<?phpnamespace PhpUtility;/*** class CurlAutoLogin* @author Zjmainstay* @website http://www.zjmainstay.cn** 利用curl信息自动解析实现模拟登录*/class CurlAutoLogin {//最后一次cookie存储文件protected $lastCookieFile = '';//登录成功后,锁定cookie的更新protected $lockedLastCookieFile = false;/*** 根据curl信息执行并解析结果* @param string $curlContent 利用Firefox浏览器复制cURL命令* @param boolean $callbackBefore 对curl结果前置处理,如更换用户名、密码等* @param boolean $callbackAfter 对采集结果后置处理,如解析结果的csrf token等* @return mixed*/public function execCurl($curlContent, $callbackBefore = false, $callbackAfter = false) {$parseCurlResult = $this->_parseCurl($curlContent);if(!empty($callbackBefore)) {$parseCurlResult = $callbackBefore($parseCurlResult);}$execCurlResult = $this->_execCurl($parseCurlResult);if(!empty($callbackAfter)) {$execCurlResult = $callbackAfter($parseCurlResult, $execCurlResult);}return $execCurlResult;}/*** 解析curl信息* @param string $curlContent 利用Firefox浏览器复制cURL命令* @return bool|array*/protected function _parseCurl($curlContent) {if(!preg_match("#curl '([^']*?)'#is", $curlContent, $matchUrl)) {return false;}//remove cookie data in header$curlContent = preg_replace("#-H 'Cookie:[^']*'#is", '', $curlContent);if(!preg_match_all("#-H '([^']*?)'#is", $curlContent, $headerMatches)) {$httpHeader = [];} else {$httpHeader = $headerMatches[1];}if(!preg_match("#--data '([^']*?)'#is", $curlContent, $postDataMatch)) {$postData = '';} else {$postData = $postDataMatch[1];}return ['url' => $matchUrl[1],'header' => $httpHeader,'post' => $postData,];}/*** 执行curl请求* @param array $parseCurlResult curl信息的解析结果,包含 url/header/post 三个键值参数* @return string*/protected function _execCurl($parseCurlResult) {if(empty($parseCurlResult['url'])) {return '';}$ch = curl_init($parseCurlResult['url']);curl_setopt($ch,CURLOPT_HEADER,0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩//add headerif(!empty($parseCurlResult['header'])) {curl_setopt($ch, CURLOPT_HTTPHEADER, $parseCurlResult['header']);}//add ssl supportif(substr($parseCurlResult['url'], 0, 5) == 'https') {curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用}//add 302 supportcurl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);//add cookie support//设置一个不存在的目录以在系统临时目录随机生成一个缓存文件,避免多进程cookie覆盖$cookieFile = tempnam('/not_exist_dir/', 'autologin');curl_setopt($ch,CURLOPT_COOKIEJAR,$cookieFile); //存储提交后得到的cookie数据//add previous curl cookieif(!empty($this->lastCookieFile)) {curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据}//add post data supportif(!empty($parseCurlResult['post'])) {curl_setopt($ch,CURLOPT_POST, 1);curl_setopt($ch,CURLOPT_POSTFIELDS, $parseCurlResult['post']);}try {$content = curl_exec($ch); //执行并存储结果} catch (\Exception $e) {$this->_log($e->getMessage());}$curlError = curl_error($ch);if(!empty($curlError)) {$this->_log($curlError);}curl_close($ch);//update last cookie file$this->setLastCookieFile($cookieFile);return $content;}/*** 记录日志* @param [type] $msg [description]* @return [type] [description]*/protected function _log($msg) {file_put_contents(__DIR__ . '/run.log', $msg . "\n", 8);}/*** 获取上一次存储cookie的文件* @return [type] [description]*/public function getLastCookieFile() {return $this->lastCookieFile;}/*** 设置上一次存储cookie的文件* @param [type] $cookieFile [description]*/protected function setLastCookieFile($cookieFile) {if(!$this->lockedLastCookieFile) {$this->lastCookieFile = $cookieFile;}}/*** 登录成功后,锁定上一次存储cookie的文件,避免覆盖* @return [type] [description]*/public function lockLastCookieFile() {$this->lockedLastCookieFile = true;}/*** 解锁上一次存储cookie的文件* @return [type] [description]*/public function unlockLastCookieFile() {$this->lockedLastCookieFile = false;}/*** 登录成功, get 方式获取url信息* @param [type] $url [description]* @param boolean $header [description]* @return [type] [description]*/public function getUrl($url, $header = false) {$ch = curl_init($url);curl_setopt($ch,CURLOPT_HEADER,0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩//add headerif(!empty($header)) {curl_setopt($ch, CURLOPT_HTTPHEADER, $header);}//add ssl supportif(substr($url, 0, 5) == 'https') {curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用}//add 302 supportcurl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据try {$content = curl_exec($ch); //执行并存储结果} catch (\Exception $e) {$this->_log($e->getMessage());}$curlError = curl_error($ch);if(!empty($curlError)) {$this->_log($curlError);}curl_close($ch);return $content;}/*** 登录成功, post 方式获取url信息* @param [type] $url [description]* @param boolean $postData [description]* @param boolean $header [description]* @return [type] [description]*/public function postUrl($url, $postData = false, $header = false) {$ch = curl_init($url);curl_setopt($ch,CURLOPT_HEADER,0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩//add headerif(!empty($header)) {curl_setopt($ch, CURLOPT_HTTPHEADER, $header);}//add ssl supportif(substr($url, 0, 5) == 'https') {curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用}//add 302 supportcurl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据//add post data supportif(!empty($postData)) {curl_setopt($ch,CURLOPT_POST, 1);curl_setopt($ch,CURLOPT_POSTFIELDS, $postData);}try {$content = curl_exec($ch); //执行并存储结果} catch (\Exception $e) {$this->_log($e->getMessage());}$curlError = curl_error($ch);if(!empty($curlError)) {$this->_log($curlError);}curl_close($ch);return $content;}}
三、演示
<?phprequire_once __DIR__.'/../vendor/autoload.php';$autologin = new PhpUtility\CurlAutoLogin();//0. 未登录$getDataUrl = 'http://demo.zjmainstay.cn/js/simpleAjax/loginResult.php';echo 'Before Login: ' . $autologin->getUrl($getDataUrl) . "\n";//1. 初始化登录页$firstCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' -H 'If-Modified-Since: Mon, 27 Oct 2014 08:31:18 GMT' -H 'If-None-Match: \"32e-453-506635ac5e180\"' -H 'Cache-Control: max-age=0'";$autologin->execCurl($firstCurl);//2. 提交登录表单$secondCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/doPost.php' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' --data 'username=demousername'";$realUsername = 'Zjmainstay';//前置处理,替换错误的用户名$autologin->execCurl($secondCurl, function($parseCurlResult) use ($realUsername) {$parseCurlResult['post'] = str_replace('=demousername', "={$realUsername}", $parseCurlResult['post']);return $parseCurlResult;});//3. 登录成功,锁定cookie的更新,直接访问已登录页面内容$autologin->lockLastCookieFile();echo 'After Login: ' . $autologin->getUrl($getDataUrl) . "\n";
四、更多
请关注github项目 php-curl 上面的更新。
如果需要查看更多关于PHP cURL应用的内容,请参考本站博客《PHP cURL实现模拟登录与采集使用方法详解》。
未经同意禁止转载!
转载请附带本文原文地址:PHP基于cURL实现自动模拟登录,首发自 Zjmainstay学习笔记




