PHP基于cURL实现自动模拟登录

 作者:  

本文介绍如何利用php基于curl命令,实现自动模拟登录。

一、构思

  • 从Firefox浏览器拷贝cURL命令(初始页、提交、提交后)
  • 自动分析cURL命令形成模拟登录代码
  • 默认参数:ssl/302/gzip
 

二、实现

 
  1. <?php
  2.  
  3. namespace PhpUtility;
  4.  
  5. /**
  6. * class CurlAutoLogin
  7. * @author Zjmainstay
  8. * @website http://www.zjmainstay.cn
  9. *
  10. * 利用curl信息自动解析实现模拟登录
  11. */
  12. class CurlAutoLogin {
  13. //最后一次cookie存储文件
  14. protected $lastCookieFile = '';
  15. //登录成功后,锁定cookie的更新
  16. protected $lockedLastCookieFile = false;
  17.  
  18. /**
  19. * 根据curl信息执行并解析结果
  20. * @param string $curlContent 利用Firefox浏览器复制cURL命令
  21. * @param boolean $callbackBefore 对curl结果前置处理,如更换用户名、密码等
  22. * @param boolean $callbackAfter 对采集结果后置处理,如解析结果的csrf token等
  23. * @return mixed
  24. */
  25. public function execCurl($curlContent, $callbackBefore = false, $callbackAfter = false) {
  26. $parseCurlResult = $this->_parseCurl($curlContent);
  27. if(!empty($callbackBefore)) {
  28. $parseCurlResult = $callbackBefore($parseCurlResult);
  29. }
  30. $execCurlResult = $this->_execCurl($parseCurlResult);
  31.  
  32. if(!empty($callbackAfter)) {
  33. $execCurlResult = $callbackAfter($parseCurlResult, $execCurlResult);
  34. }
  35.  
  36. return $execCurlResult;
  37. }
  38.  
  39. /**
  40. * 解析curl信息
  41. * @param string $curlContent 利用Firefox浏览器复制cURL命令
  42. * @return bool|array
  43. */
  44. protected function _parseCurl($curlContent) {
  45. if(!preg_match("#curl '([^']*?)'#is", $curlContent, $matchUrl)) {
  46. return false;
  47. }
  48.  
  49. //remove cookie data in header
  50. $curlContent = preg_replace("#-H 'Cookie:[^']*'#is", '', $curlContent);
  51.  
  52. if(!preg_match_all("#-H '([^']*?)'#is", $curlContent, $headerMatches)) {
  53. $httpHeader = [];
  54. } else {
  55. $httpHeader = $headerMatches[1];
  56. }
  57.  
  58. if(!preg_match("#--data '([^']*?)'#is", $curlContent, $postDataMatch)) {
  59. $postData = '';
  60. } else {
  61. $postData = $postDataMatch[1];
  62. }
  63.  
  64. return [
  65. 'url' => $matchUrl[1],
  66. 'header' => $httpHeader,
  67. 'post' => $postData,
  68. ];
  69. }
  70.  
  71. /**
  72. * 执行curl请求
  73. * @param array $parseCurlResult curl信息的解析结果,包含 url/header/post 三个键值参数
  74. * @return string
  75. */
  76. protected function _execCurl($parseCurlResult) {
  77. if(empty($parseCurlResult['url'])) {
  78. return '';
  79. }
  80.  
  81. $ch = curl_init($parseCurlResult['url']);
  82. curl_setopt($ch,CURLOPT_HEADER,0);
  83. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
  84. curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩
  85.  
  86. //add header
  87. if(!empty($parseCurlResult['header'])) {
  88. curl_setopt($ch, CURLOPT_HTTPHEADER, $parseCurlResult['header']);
  89. }
  90.  
  91. //add ssl support
  92. if(substr($parseCurlResult['url'], 0, 5) == 'https') {
  93. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用
  94. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用
  95. }
  96.  
  97. //add 302 support
  98. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  99.  
  100. //add cookie support
  101. //设置一个不存在的目录以在系统临时目录随机生成一个缓存文件,避免多进程cookie覆盖
  102. $cookieFile = tempnam('/not_exist_dir/', 'autologin');
  103. curl_setopt($ch,CURLOPT_COOKIEJAR,$cookieFile); //存储提交后得到的cookie数据
  104.  
  105. //add previous curl cookie
  106. if(!empty($this->lastCookieFile)) {
  107. curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
  108. }
  109.  
  110. //add post data support
  111. if(!empty($parseCurlResult['post'])) {
  112. curl_setopt($ch,CURLOPT_POST, 1);
  113. curl_setopt($ch,CURLOPT_POSTFIELDS, $parseCurlResult['post']);
  114. }
  115.  
  116. try {
  117. $content = curl_exec($ch); //执行并存储结果
  118. } catch (\Exception $e) {
  119. $this->_log($e->getMessage());
  120. }
  121.  
  122. $curlError = curl_error($ch);
  123. if(!empty($curlError)) {
  124. $this->_log($curlError);
  125. }
  126.  
  127. curl_close($ch);
  128.  
  129. //update last cookie file
  130. $this->setLastCookieFile($cookieFile);
  131.  
  132. return $content;
  133. }
  134.  
  135. /**
  136. * 记录日志
  137. * @param [type] $msg [description]
  138. * @return [type] [description]
  139. */
  140. protected function _log($msg) {
  141. file_put_contents(__DIR__ . '/run.log', $msg . "\n", 8);
  142. }
  143.  
  144. /**
  145. * 获取上一次存储cookie的文件
  146. * @return [type] [description]
  147. */
  148. public function getLastCookieFile() {
  149. return $this->lastCookieFile;
  150. }
  151.  
  152. /**
  153. * 设置上一次存储cookie的文件
  154. * @param [type] $cookieFile [description]
  155. */
  156. protected function setLastCookieFile($cookieFile) {
  157. if(!$this->lockedLastCookieFile) {
  158. $this->lastCookieFile = $cookieFile;
  159. }
  160. }
  161.  
  162. /**
  163. * 登录成功后,锁定上一次存储cookie的文件,避免覆盖
  164. * @return [type] [description]
  165. */
  166. public function lockLastCookieFile() {
  167. $this->lockedLastCookieFile = true;
  168. }
  169.  
  170. /**
  171. * 解锁上一次存储cookie的文件
  172. * @return [type] [description]
  173. */
  174. public function unlockLastCookieFile() {
  175. $this->lockedLastCookieFile = false;
  176. }
  177.  
  178. /**
  179. * 登录成功, get 方式获取url信息
  180. * @param [type] $url [description]
  181. * @param boolean $header [description]
  182. * @return [type] [description]
  183. */
  184. public function getUrl($url, $header = false) {
  185. $ch = curl_init($url);
  186. curl_setopt($ch,CURLOPT_HEADER,0);
  187. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
  188. curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩
  189.  
  190. //add header
  191. if(!empty($header)) {
  192. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  193. }
  194.  
  195. //add ssl support
  196. if(substr($url, 0, 5) == 'https') {
  197. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用
  198. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用
  199. }
  200.  
  201. //add 302 support
  202. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  203.  
  204. curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
  205.  
  206. try {
  207. $content = curl_exec($ch); //执行并存储结果
  208. } catch (\Exception $e) {
  209. $this->_log($e->getMessage());
  210. }
  211.  
  212. $curlError = curl_error($ch);
  213. if(!empty($curlError)) {
  214. $this->_log($curlError);
  215. }
  216.  
  217. curl_close($ch);
  218.  
  219. return $content;
  220. }
  221.  
  222. /**
  223. * 登录成功, post 方式获取url信息
  224. * @param [type] $url [description]
  225. * @param boolean $postData [description]
  226. * @param boolean $header [description]
  227. * @return [type] [description]
  228. */
  229. public function postUrl($url, $postData = false, $header = false) {
  230. $ch = curl_init($url);
  231. curl_setopt($ch,CURLOPT_HEADER,0);
  232. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回数据不直接输出
  233. curl_setopt($ch, CURLOPT_ENCODING, "gzip"); //指定gzip压缩
  234.  
  235. //add header
  236. if(!empty($header)) {
  237. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  238. }
  239.  
  240. //add ssl support
  241. if(substr($url, 0, 5) == 'https') {
  242. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //SSL 报错时使用
  243. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //SSL 报错时使用
  244. }
  245.  
  246. //add 302 support
  247. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  248.  
  249. curl_setopt($ch,CURLOPT_COOKIEFILE, $this->lastCookieFile); //使用提交后得到的cookie数据
  250.  
  251. //add post data support
  252. if(!empty($postData)) {
  253. curl_setopt($ch,CURLOPT_POST, 1);
  254. curl_setopt($ch,CURLOPT_POSTFIELDS, $postData);
  255. }
  256.  
  257. try {
  258. $content = curl_exec($ch); //执行并存储结果
  259. } catch (\Exception $e) {
  260. $this->_log($e->getMessage());
  261. }
  262.  
  263. $curlError = curl_error($ch);
  264. if(!empty($curlError)) {
  265. $this->_log($curlError);
  266. }
  267.  
  268. curl_close($ch);
  269.  
  270. return $content;
  271. }
  272. }
 

三、演示

运行:PHP cURL自动模拟登录演示

 
  1. <?php
  2.  
  3. require_once __DIR__.'/../vendor/autoload.php';
  4.  
  5. $autologin = new PhpUtility\CurlAutoLogin();
  6.  
  7. //0. 未登录
  8. $getDataUrl = 'http://demo.zjmainstay.cn/js/simpleAjax/loginResult.php';
  9. echo 'Before Login: ' . $autologin->getUrl($getDataUrl) . "\n";
  10.  
  11. //1. 初始化登录页
  12. $firstCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' -H 'If-Modified-Since: Mon, 27 Oct 2014 08:31:18 GMT' -H 'If-None-Match: \"32e-453-506635ac5e180\"' -H 'Cache-Control: max-age=0'";
  13. $autologin->execCurl($firstCurl);
  14.  
  15. //2. 提交登录表单
  16. $secondCurl = "curl 'http://demo.zjmainstay.cn/js/simpleAjax/doPost.php' -H 'Host: demo.zjmainstay.cn' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0' -H 'Accept: application/json, text/javascript, */*; q=0.01' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3' -H 'Accept-Encoding: gzip, deflate' -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' -H 'X-Requested-With: XMLHttpRequest' -H 'Referer: http://demo.zjmainstay.cn/js/simpleAjax/' -H 'Cookie: Hm_lvt_1526d5aecf5561ef9401f7c7b7842a97=1468327822,1468327904,1468341636,1468411918; Hm_lpvt_1526d5aecf5561ef9401f7c7b7842a97=1468421526' -H 'Connection: keep-alive' --data 'username=demousername'";
  17. $realUsername = 'Zjmainstay';
  18. //前置处理,替换错误的用户名
  19. $autologin->execCurl($secondCurl, function($parseCurlResult) use ($realUsername) {
  20. $parseCurlResult['post'] = str_replace('=demousername', "={$realUsername}", $parseCurlResult['post']);
  21. return $parseCurlResult;
  22. });
  23.  
  24. //3. 登录成功,锁定cookie的更新,直接访问已登录页面内容
  25. $autologin->lockLastCookieFile();
  26. echo 'After Login: ' . $autologin->getUrl($getDataUrl) . "\n";
 

四、更多

请关注github项目 php-utility-class 上面的更新。

如果需要查看更多关于PHP cURL应用的内容,请参考本站博客《PHP cURL实现模拟登录与采集使用方法详解》。

原文链接:http://www.zjmainstay.cn/php-curl-auto-login