hyperf协程加速获取外部接口数据

使用一下协程,感受一下协程的魅力,hyperf协程加速 ,加速一个爬虫程序,仅供交流学习请勿用于商业用途

有时候跨服务器调用另一个表内容时,或者有些喜欢的视频图片等内容很好但是不能批量下载,网上有许多软件获取app的请求接口的软件,通过软件解析接口地址,大部分已json形式返回,如何用php调用外部接口json数据?

获取方法

 //跨服务器高级用法
    public function getapktj($url){
    	 
		 $method ="GET";
		 $curl = curl_init();
		 curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $method);
		 curl_setopt($curl, CURLOPT_URL, $url);
		 curl_setopt($curl, CURLOPT_FAILONERROR, false);
		 curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
		 curl_setopt($curl, CURLOPT_HEADER, false);
	
		$ret = curl_exec($curl);
		$all=json_decode($ret,true);
    	return $all;
//返回数组
    }

调用获取方法

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
class VideosController extends AbstractController
{
    public function getvideos(){
    
                $url = "http://81.70.181.238/%2Fhot_video%2FgetList?page=".$i."&limit=30";
                
                $eq=$this->getapktj($url);
                $res=$eq['data']['list'];
         
                var_dump($res);
       
    	
    }
     //跨服务器高级用法
    
    
    public function getapktj($url){
    	 
		 $method ="GET";
		 $curl = curl_init();
		 curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $method);
		 curl_setopt($curl, CURLOPT_URL, $url);
		 curl_setopt($curl, CURLOPT_FAILONERROR, false);
		 curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
		 curl_setopt($curl, CURLOPT_HEADER, false);
	
		$ret = curl_exec($curl);
		$all=json_decode($ret,true);
    	return $all;
    }
}

100页数据无协程只爬取数据,不插入数据库

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
class VideosController extends AbstractController
{
    public function getvideos(){
        $time=time();
        for ($i=800;$i>=1; $i--){
                echo($i.PHP_EOL);
                $url = "http://81.70.181.238/%2Fhot_video%2FgetList?page=".$i."&limit=30";
                $eq=$this->getapktj($url);
                $res=$eq['data']['list'];
                $data=array();
                if(!empty($res)){
                    foreach ($res as $re){
                        $r['title']="";
                        $r['coverpath']=$re['pic'];
                        $r['videopath']=$re['src'];
                        $r['created_at']=time();
                        $data[]=$r;
                    }
                    Db::table('videos')->insert($data);
                }       
        }
    	var_dump(time()-$time);	
    }
     //跨服务器高级用法
    
    
    public function getapktj($url){
    	 
		 $method ="GET";
		 $curl = curl_init();
		 curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $method);
		 curl_setopt($curl, CURLOPT_URL, $url);
		 curl_setopt($curl, CURLOPT_FAILONERROR, false);
		 curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
		 curl_setopt($curl, CURLOPT_HEADER, false);
	
		$ret = curl_exec($curl);
		$all=json_decode($ret,true);
    	return $all;
    }
}

耗时12秒

协程优化100页一眨眼感觉不到800页数据爬取与储存插入为什么800页这借口目前800页

swoole官网一键协程

修改config/autoload/server.php

‘settings’中加入

//使curl和db阻塞函数一键协程化 无效是看看swoole版本对不对
'hook_flags' => SWOOLE_HOOK_ALL | SWOOLE_HOOK_CURL,

之后加入协程代码

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
class VideosController extends AbstractController
{
    public function getvideos(){
        $time=time();
        for ($i=800;$i>=1; $i--){
             co(function () use ($i) {
                echo($i.PHP_EOL);
                $url = "http://81.70.181.238/%2Fhot_video%2FgetList?page=".$i."&limit=30";
                
                $eq=$this->getapktj($url);
                $res=$eq['data']['list'];
                $data=array();
                if(!empty($res)){
                    foreach ($res as $re){
                        $r['title']="";
                        $r['coverpath']=$re['pic'];
                        $r['videopath']=$re['src'];
                        $r['created_at']=time();
                        $data[]=$r;
                    }
                    Db::table('videos')->insert($data);
                }
             });
            
            // var_dump($data);
            
        }
    // 	var_dump(time()-$time);
    	
    	
    }
     //跨服务器高级用法
    public function getapktj($url){
    	 
		 $method ="GET";
		 $curl = curl_init();
		 curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $method);
		 curl_setopt($curl, CURLOPT_URL, $url);
		 curl_setopt($curl, CURLOPT_FAILONERROR, false);
		 curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
		 curl_setopt($curl, CURLOPT_HEADER, false);
	
		$ret = curl_exec($curl);
		$all=json_decode($ret,true);
    	return $all;
    }
}

测试很多次800页 5000左右数据爬取与储存耗时3秒以内

这篇文章没有解决 Allowed memory size of 268435456 bytes exhausted,内存溢出问题,可以按这个下面这篇文章思路做适当修改藏羚骸的博客~hyperf协程免费查询快递物流.思路就是加一个协程等待, 协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。

有数据就可以衍生出一个马甲网站:

爬虫小马甲–短视频类

5 评论
内联反馈
查看所有评论