VideoCrawler.php 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. <?php
  2. namespace douyin;
  3. class VideoCrawler
  4. {
  5. public function crawByUrl($url)
  6. {
  7. header('Access-Control-Allow-Origin:*');
  8. header('Content-type:application/json; charset=utf-8');
  9. $forwardId = explode('/', parse_url($this->getrealurl($url))['path'])[2];
  10. do {
  11. $ids = $forwardId;
  12. $json = json_decode($this->get_curl('https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' . $ids));
  13. if(count($json->item_list) == 0) return false;
  14. $json = $json->item_list[0];
  15. $forwardId = $json->forward_id;
  16. } while ($forwardId != 0);
  17. // 描述获取
  18. $desc = $json->share_info->share_weibo_desc;
  19. $playurl = $json->video->play_addr->url_list[0];
  20. $play = $this->get_curl(str_ireplace('playwm', 'play', $json->video->play_addr->url_list[0]), ['ua' => 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Mobile Safari/537.36']);
  21. if(preg_match_all("/<a[^<>]+href *= *[\"']?(http\:\/\/[^ '\"]+)/i", $play, $body_links, PREG_SET_ORDER) != 0){
  22. $playurl = $body_links[0][1];
  23. }
  24. $data = [
  25. "title" => $json->desc,
  26. "desc" => $desc,
  27. "play" => $playurl
  28. ];
  29. return $data;
  30. }
  31. /**
  32. * 抖音内容抓取
  33. */
  34. public function catch($link)
  35. {
  36. header('Access-Control-Allow-Origin:*');
  37. header('Content-type:application/json; charset=utf-8');
  38. $url = $link;
  39. $ids = explode('/', parse_url($this->getrealurl($url))['path'])[2];
  40. $json = json_decode($this->get_curl('https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' . $ids))->item_list[0];
  41. // 描述获取
  42. $desc = $json->share_info->share_weibo_desc;
  43. $play = $this->get_curl(str_ireplace('playwm', 'play', $json->video->play_addr->url_list[0]), ['ua' => 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Mobile Safari/537.36']);
  44. preg_match_all("/<a[^<>]+href *= *[\"']?(http\:\/\/[^ '\"]+)/i", $play, $body_links, PREG_SET_ORDER);
  45. $data = [
  46. "title" => $json->desc,
  47. "desc" => $desc,
  48. "play" => $body_links[0][1]
  49. ];
  50. // 将视频文件下载
  51. $r = file_get_contents($data['play']);
  52. $filename = './upload' . DIRECTORY_SEPARATOR . uniqid() . '.mp4';
  53. file_put_contents($filename, $r);
  54. $data['video'] = ltrim($filename, '.');
  55. return json(['code' => 0, 'msg' => '获取成功', 'data' => $data]);
  56. }
  57. /**
  58. * 根据路径获取访问内容
  59. */
  60. private function get_curl($url, $paras = array())
  61. {
  62. $ch = curl_init();
  63. curl_setopt($ch, CURLOPT_URL, $url);
  64. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  65. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
  66. if (isset($paras['httpheader'])) { // 请求头
  67. $httpheader = $paras['httpheader'];
  68. } else {
  69. $httpheader[] = "Accept:*/*";
  70. $httpheader[] = "Accept-Encoding:gzip,deflate,sdch";
  71. $httpheader[] = "Accept-Language:zh-CN,zh;q=0.8";
  72. $httpheader[] = "Connection:close";
  73. }
  74. curl_setopt($ch, CURLOPT_HTTPHEADER, $httpheader);
  75. if (isset($paras['ctime'])) { // 连接超时
  76. curl_setopt($ch, CURLOPT_CONNECTTIMEOUT_MS, $paras['ctime']);
  77. }
  78. if (isset($paras['rtime'])) { // 读取超时
  79. curl_setopt($ch, CURLOPT_TIMEOUT_MS, $paras['rtime']);
  80. }
  81. if (isset($paras['post'])) {
  82. curl_setopt($ch, CURLOPT_POST, 1);
  83. curl_setopt($ch, CURLOPT_POSTFIELDS, $paras['post']);
  84. }
  85. if (isset($paras['header'])) {
  86. curl_setopt($ch, CURLOPT_HEADER, true);
  87. }
  88. if (isset($paras['cookie'])) {
  89. curl_setopt($ch, CURLOPT_COOKIE, $paras['cookie']);
  90. }
  91. if (isset($paras['refer'])) {
  92. if ($paras['refer'] == 1) {
  93. curl_setopt($ch, CURLOPT_REFERER, 'http://m.qzone.com/infocenter?g_f=');
  94. } else {
  95. curl_setopt($ch, CURLOPT_REFERER, $paras['refer']);
  96. }
  97. }
  98. if (isset($paras['ua'])) {
  99. curl_setopt($ch, CURLOPT_USERAGENT, $paras['ua']);
  100. } else {
  101. curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");
  102. }
  103. if (isset($paras['nobody'])) {
  104. curl_setopt($ch, CURLOPT_NOBODY, 1);
  105. }
  106. curl_setopt($ch, CURLOPT_ENCODING, "gzip");
  107. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  108. $ret = curl_exec($ch);
  109. curl_close($ch);
  110. return $ret;
  111. }
  112. /**
  113. * 获取真是的访问地址
  114. */
  115. private function getrealurl($url)
  116. {
  117. $header = get_headers($url, 1);
  118. if (strpos($header[0], '301') || strpos($header[0], '302')) {
  119. if (is_array($header['Location'])) {
  120. return $header['Location'][count($header['Location']) - 1];
  121. } else {
  122. return $header['Location'];
  123. }
  124. } else {
  125. return $url;
  126. }
  127. }
  128. }