123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- <?php
- namespace douyin;
- class VideoCrawler
- {
- public function crawByUrl($url)
- {
- header('Access-Control-Allow-Origin:*');
- header('Content-type:application/json; charset=utf-8');
- $forwardId = explode('/', parse_url($this->getrealurl($url))['path'])[2];
- do {
- $ids = $forwardId;
- $json = json_decode($this->get_curl('https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' . $ids));
- if(count($json->item_list) == 0) return false;
- $json = $json->item_list[0];
- $forwardId = $json->forward_id;
- } while ($forwardId != 0);
- // 描述获取
- $desc = $json->share_info->share_weibo_desc;
- $playurl = $json->video->play_addr->url_list[0];
- $play = $this->get_curl(str_ireplace('playwm', 'play', $json->video->play_addr->url_list[0]), ['ua' => 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Mobile Safari/537.36']);
- if(preg_match_all("/<a[^<>]+href *= *[\"']?(http\:\/\/[^ '\"]+)/i", $play, $body_links, PREG_SET_ORDER) != 0){
- $playurl = $body_links[0][1];
- }
- $data = [
- "title" => $json->desc,
- "desc" => $desc,
- "play" => $playurl
- ];
- return $data;
- }
- /**
- * 抖音内容抓取
- */
- public function catch($link)
- {
- header('Access-Control-Allow-Origin:*');
- header('Content-type:application/json; charset=utf-8');
- $url = $link;
- $ids = explode('/', parse_url($this->getrealurl($url))['path'])[2];
- $json = json_decode($this->get_curl('https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' . $ids))->item_list[0];
- // 描述获取
- $desc = $json->share_info->share_weibo_desc;
- $play = $this->get_curl(str_ireplace('playwm', 'play', $json->video->play_addr->url_list[0]), ['ua' => 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Mobile Safari/537.36']);
- preg_match_all("/<a[^<>]+href *= *[\"']?(http\:\/\/[^ '\"]+)/i", $play, $body_links, PREG_SET_ORDER);
- $data = [
- "title" => $json->desc,
- "desc" => $desc,
- "play" => $body_links[0][1]
- ];
- // 将视频文件下载
- $r = file_get_contents($data['play']);
- $filename = './upload' . DIRECTORY_SEPARATOR . uniqid() . '.mp4';
- file_put_contents($filename, $r);
- $data['video'] = ltrim($filename, '.');
- return json(['code' => 0, 'msg' => '获取成功', 'data' => $data]);
- }
- /**
- * 根据路径获取访问内容
- */
- private function get_curl($url, $paras = array())
- {
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
- curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
- if (isset($paras['httpheader'])) { // 请求头
- $httpheader = $paras['httpheader'];
- } else {
- $httpheader[] = "Accept:*/*";
- $httpheader[] = "Accept-Encoding:gzip,deflate,sdch";
- $httpheader[] = "Accept-Language:zh-CN,zh;q=0.8";
- $httpheader[] = "Connection:close";
- }
- curl_setopt($ch, CURLOPT_HTTPHEADER, $httpheader);
- if (isset($paras['ctime'])) { // 连接超时
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT_MS, $paras['ctime']);
- }
- if (isset($paras['rtime'])) { // 读取超时
- curl_setopt($ch, CURLOPT_TIMEOUT_MS, $paras['rtime']);
- }
- if (isset($paras['post'])) {
- curl_setopt($ch, CURLOPT_POST, 1);
- curl_setopt($ch, CURLOPT_POSTFIELDS, $paras['post']);
- }
- if (isset($paras['header'])) {
- curl_setopt($ch, CURLOPT_HEADER, true);
- }
- if (isset($paras['cookie'])) {
- curl_setopt($ch, CURLOPT_COOKIE, $paras['cookie']);
- }
- if (isset($paras['refer'])) {
- if ($paras['refer'] == 1) {
- curl_setopt($ch, CURLOPT_REFERER, 'http://m.qzone.com/infocenter?g_f=');
- } else {
- curl_setopt($ch, CURLOPT_REFERER, $paras['refer']);
- }
- }
- if (isset($paras['ua'])) {
- curl_setopt($ch, CURLOPT_USERAGENT, $paras['ua']);
- } else {
- curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");
- }
- if (isset($paras['nobody'])) {
- curl_setopt($ch, CURLOPT_NOBODY, 1);
- }
- curl_setopt($ch, CURLOPT_ENCODING, "gzip");
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- $ret = curl_exec($ch);
- curl_close($ch);
- return $ret;
- }
- /**
- * 获取真是的访问地址
- */
- private function getrealurl($url)
- {
- $header = get_headers($url, 1);
- if (strpos($header[0], '301') || strpos($header[0], '302')) {
- if (is_array($header['Location'])) {
- return $header['Location'][count($header['Location']) - 1];
- } else {
- return $header['Location'];
- }
- } else {
- return $url;
- }
- }
- }
|