2016年07月13日 16:30:08

PHP基于cURL实现自动模拟登录 Featured

作者: 
本文介绍如何利用php基于curl命令,实现自动模拟登录。

一、构思

  • 从Firefox浏览器拷贝cURL命令(初始页、提交、提交后)
  • 自动分析cURL命令形成模拟登录代码
  • 默认参数:ssl/302/gzip

二、实现

  1. <?php
  2. namespace PhpUtility;
  3. /**
  4. * class CurlAutoLogin
  5. * @author Zjmainstay
  6. * @website http://www.zjmainstay.cn
  7. *
  8. * 利用curl信息自动解析实现模拟登录
  9. */
  10. class CurlAutoLogin {
  11. //最后一次cookie存储文件
  12. protected $lastCookieFile = '';
  13. //登录成功后,锁定cookie的更新
  14. protected $lockedLastCookieFile = false;
  15. /**
  16. * 根据curl信息执行并解析结果
  17. * @param string $curlContent 利用Firefox浏览器复制cURL命令
  18. * @param boolean $callbackBefore 对curl结果前置处理,如更换用户名、密码等
  19. * @param boolean $callbackAfter 对采集结果后置处理,如解析结果的csrf token等
  20. * @return mixed
  21. */
  22. public function execCurl($curlContent, $callbackBefore = false, $callbackAfter = false) {
  23. $parseCurlResult = $this->_parseCurl($curlContent);
  24. if(!empty($callbackBefore)) {
  25. $parseCurlResult = $callbackBefore($parseCurlResult);
  26. }
  27. $execCurlResult = $this->_execCurl($parseCurlResult);
  28. if(!empty($callbackAfter)) {
  29. $execCurlResult = $callbackAfter($parseCurlResult, $execCurlResult);
  30. }
  31. return $execCurlResult;
  32. }
  33. /**
  34. * 解析curl信息
  35. * @param string $curlContent 利用Firefox浏览器复制cURL命令
  36. * @return bool|array
  37. */
  38. protected function _parseCurl($curlContent) {
  39. if(!preg_match("#curl '([^']*?)'#is", $curlContent, $matchUrl)) {
  40. return false;
  41. }
  42. //remove cookie data in header
  43. $curlContent = preg_replace("#-H 'Cookie:[^']*'#is", '', $curlContent);
  44. if(!preg_match_all("#-H '([^']*?)'#is", $curlContent, $headerMatches)) {
  45. $httpHeader = [];
  46. } else {
  47. $httpHeader = $headerMatches[1];
  48. }
  49. if(!preg_match("#--data '([^']*?)'#is", $curlContent, $postDataMatch)) {
  50. $postData = '';
  51. } else {
  52. $postData = $postDataMatch[1];
  53. }
  54. return [
  55. 'url' => $matchUrl[1],
  56. 'header' => $httpHeader,
  57. 'post' => $postData,
  58. ];
  59. }
  60. /**
  61. * 执行curl请求
  62. * @param array $parseCurlResult curl信息的解析结果,包含 url/header/post 三个键值参数
  63. * @return string
  64. */
  65. protected function _execCurl($parseCurlResult) {
  66. if(empty($parseCurlResult['url'])) {
  67. return '';
  68. }
  69. $ch = curl_init($parseCurlResult['url']);
  70. curl_setopt($ch,CURLOPT_HEADER,0);
  71. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
  72. curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩
  73. //add header
  74. if(!empty($parseCurlResult['header'])) {
  75. curl_setopt($ch, CURLOPT_HTTPHEADER, $parseCurlResult['header']);
  76. }
  77. //add ssl support
  78. if(substr($parseCurlResult['url'], 0, 5) == 'https') {
  79. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用
  80. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用
  81. }
  82. //add 302 support
  83. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  84. //add cookie support
  85. //设置一个不存在的目录以在系统临时目录随机生成一个缓存文件,避免多进程cookie覆盖
  86. $cookieFile = tempnam('/not_exist_dir/', 'autologin');
  87. curl_setopt($ch,CURLOPT_COOKIEJAR,$cookieFile); //存储提交后得到的cookie数据
  88. //add previous curl cookie
  89. if(!empty($this->lastCookieFile)) {
  90. curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
  91. }
  92. //add post data support
  93. if(!empty($parseCurlResult['post'])) {
  94. curl_setopt($ch,CURLOPT_POST, 1);
  95. curl_setopt($ch,CURLOPT_POSTFIELDS, $parseCurlResult['post']);
  96. }
  97. try {
  98. $content = curl_exec($ch); //执行并存储结果
  99. } catch (\Exception $e) {
  100. $this->_log($e->getMessage());
  101. }
  102. $curlError = curl_error($ch);
  103. if(!empty($curlError)) {
  104. $this->_log($curlError);
  105. }
  106. curl_close($ch);
  107. //update last cookie file
  108. $this->setLastCookieFile($cookieFile);
  109. return $content;
  110. }
  111. /**
  112. * 记录日志
  113. * @param [type] $msg [description]
  114. * @return [type] [description]
  115. */
  116. protected function _log($msg) {
  117. file_put_contents(__DIR__ . '/run.log', $msg . "\n", 8);
  118. }
  119. /**
  120. * 获取上一次存储cookie的文件
  121. * @return [type] [description]
  122. */
  123. public function getLastCookieFile() {
  124. return $this->lastCookieFile;
  125. }
  126. /**
  127. * 设置上一次存储cookie的文件
  128. * @param [type] $cookieFile [description]
  129. */
  130. protected function setLastCookieFile($cookieFile) {
  131. if(!$this->lockedLastCookieFile) {
  132. $this->lastCookieFile = $cookieFile;
  133. }
  134. }
  135. /**
  136. * 登录成功后,锁定上一次存储cookie的文件,避免覆盖
  137. * @return [type] [description]
  138. */
  139. public function lockLastCookieFile() {
  140. $this->lockedLastCookieFile = true;
  141. }
  142. /**
  143. * 解锁上一次存储cookie的文件
  144. * @return [type] [description]
  145. */
  146. public function unlockLastCookieFile() {
  147. $this->lockedLastCookieFile = false;
  148. }
  149. /**
  150. * 登录成功, get 方式获取url信息
  151. * @param [type] $url [description]
  152. * @param boolean $header [description]
  153. * @return [type] [description]
  154. */
  155. public function getUrl($url, $header = false) {
  156. $ch = curl_init($url);
  157. curl_setopt($ch,CURLOPT_HEADER,0);
  158. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
  159. curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩
  160. //add header
  161. if(!empty($header)) {
  162. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  163. }
  164. //add ssl support
  165. if(substr($url, 0, 5) == 'https') {
  166. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用
  167. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用
  168. }
  169. //add 302 support
  170. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  171. curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
  172. try {
  173. $content = curl_exec($ch); //执行并存储结果
  174. } catch (\Exception $e) {
  175. $this->_log($e->getMessage());
  176. }
  177. $curlError = curl_error($ch);
  178. if(!empty($curlError)) {
  179. $this->_log($curlError);
  180. }
  181. curl_close($ch);
  182. return $content;
  183. }
  184. /**
  185. * 登录成功, post 方式获取url信息
  186. * @param [type] $url [description]
  187. * @param boolean $postData [description]
  188. * @param boolean $header [description]
  189. * @return [type] [description]
  190. */
  191. public function postUrl($url, $postData = false, $header = false) {
  192. $ch = curl_init($url);
  193. curl_setopt($ch,CURLOPT_HEADER,0);
  194. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
  195. curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩
  196. //add header
  197. if(!empty($header)) {
  198. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  199. }
  200. //add ssl support
  201. if(substr($url, 0, 5) == 'https') {
  202. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用
  203. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用
  204. }
  205. //add 302 support
  206. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  207. curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
  208. //add post data support
  209. if(!empty($postData)) {
  210. curl_setopt($ch,CURLOPT_POST, 1);
  211. curl_setopt($ch,CURLOPT_POSTFIELDS, $postData);
  212. }
  213. try {
  214. $content = curl_exec($ch); //执行并存储结果
  215. } catch (\Exception $e) {
  216. $this->_log($e->getMessage());
  217. }
  218. $curlError = curl_error($ch);
  219. if(!empty($curlError)) {
  220. $this->_log($curlError);
  221. }
  222. curl_close($ch);
  223. return $content;
  224. }
  225. }

三、演示

运行:PHP cURL自动模拟登录演示

  1. <?php
  2. require_once __DIR__.'/../vendor/autoload.php';
  3. $autologin = new PhpUtility\CurlAutoLogin();
  4. //0. 未登录
  5. $getDataUrl = 'http://demo.zjmainstay.cn/js/simpleAjax/loginResult.php';
  6. echo 'Before Login: ' . $autologin->getUrl($getDataUrl) . "\n";
  7. //1. 初始化登录页
  8. $firstCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' -H 'If-Modified-Since: Mon, 27 Oct 2014 08:31:18 GMT' -H 'If-None-Match: \"32e-453-506635ac5e180\"' -H 'Cache-Control: max-age=0'";
  9. $autologin->execCurl($firstCurl);
  10. //2. 提交登录表单
  11. $secondCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/doPost.php' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' --data 'username=demousername'";
  12. $realUsername = 'Zjmainstay';
  13. //前置处理,替换错误的用户名
  14. $autologin->execCurl($secondCurl, function($parseCurlResult) use ($realUsername) {
  15. $parseCurlResult['post'] = str_replace('=demousername', "={$realUsername}", $parseCurlResult['post']);
  16. return $parseCurlResult;
  17. });
  18. //3. 登录成功,锁定cookie的更新,直接访问已登录页面内容
  19. $autologin->lockLastCookieFile();
  20. echo 'After Login: ' . $autologin->getUrl($getDataUrl) . "\n";

四、更多

请关注github项目 php-curl 上面的更新。

如果需要查看更多关于PHP cURL应用的内容,请参考本站博客《PHP cURL实现模拟登录与采集使用方法详解》。



未经同意禁止转载!
转载请附带本文原文地址:PHP基于cURL实现自动模拟登录,首发自 Zjmainstay学习笔记
阅读( 3391 )
看完顺手点个赞呗:
(18 votes)

1.PHP cURL群:PHP cURL高级技术
2.正则表达式群:专精正则表达式
3. QQ联系(加请说明):QQ联系博主(951086941)
4. 邮箱:zjmainstay@163.com
5. 打赏博主: 捐赠支持本博客

阿里云幸运券分享
网站总访问量: