用协程爬虫是真的香,之前的文章对协程有介绍,感兴趣的小伙伴可以看看之前的文章,没有协程200条数据匹配物流信息入库需要20秒,而通过协程几万条数据轻松入库!!!需求:数据库有八万条圆通单号,现在想批量匹配物流信息!!
php7.2+swoole4+hyperf2.0
以圆通接口为例()
圆通接口Post提交: https://www.yto.net.cn/api/trace/waybill?waybillNo=快递单号
Postman测试接口结果入下图
Hyperf\config\autoload\server.php配置为
<?php
declare(strict_types=1);
/**
* This file is part of Hyperf.
*
* @link https://www.hyperf.io
* @document https://hyperf.wiki
* @contact group@hyperf.io
* @license https://github.com/hyperf/hyperf/blob/master/LICENSE
*/
use Hyperf\Server\Server;
use Hyperf\Server\SwooleEvent;
return [
'mode' => SWOOLE_PROCESS,
'servers' => [
[
'name' => 'http',
'type' => Server::SERVER_HTTP,
'host' => '0.0.0.0',
'port' => 9501,
'sock_type' => SWOOLE_SOCK_TCP,
'callbacks' => [
SwooleEvent::ON_REQUEST => [Hyperf\HttpServer\Server::class, 'onRequest'],
],
],
],
'settings' => [
'enable_coroutine' => true,
'worker_num' => swoole_cpu_num(),
'pid_file' => BASE_PATH . '/runtime/hyperf.pid',
'open_tcp_nodelay' => true,
'max_coroutine' => 100000,
'open_http2_protocol' => true,
'max_request' => 100000,
'socket_buffer_size' => 2 * 1024 * 1024,
'buffer_output_size' => 2 * 1024 * 1024,
'hook_flags' => SWOOLE_HOOK_ALL | SWOOLE_HOOK_CURL,
'daemonize' => true,
],
'callbacks' => [
SwooleEvent::ON_WORKER_START => [Hyperf\Framework\Bootstrap\WorkerStartCallback::class, 'onWorkerStart'],
SwooleEvent::ON_PIPE_MESSAGE => [Hyperf\Framework\Bootstrap\PipeMessageCallback::class, 'onPipeMessage'],
SwooleEvent::ON_WORKER_EXIT => [Hyperf\Framework\Bootstrap\WorkerExitCallback::class, 'onWorkerExit'],
],
];
Hyperf\config\ routes.php 配置为
Router::get('/getkdstatus', 'App\Controller\KuaidiController::getkdstatus');
Hyperf\app\Controller\KuaidiController.php
<?php
declare(strict_types=1);
/**
* This file is part of Hyperf.
*
* @link https://www.hyperf.io
* @document https://hyperf.wiki
* @contact group@hyperf.io
* @license https://github.com/hyperf/hyperf/blob/master/LICENSE
*/
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
class KuaidiController extends AbstractController
{
public function getkdstatus()
{
$mytime= date("Y-m-d H:i:s", strtotime("-12 day"));
$kds=Db::table('kds')->where('status',2)->orderBy('id','desc')->where('kdname','圆通')->whereDate('created_at','>',$mytime)->whereNotIn('kdstatus',['isE','isL','isS'])->limit(500)->get(['id','kdstatus','kdid']);
$count=$kds->count();
$kds=json_decode(json_encode($kds),true);
for ($i=$count-1;$i>=0; $i--){
co(function () use ($i,$kds) {
// echo($i.PHP_EOL);
//圆通接口
$url="https://www.yto.net.cn/api/trace/waybill?verify=true&waybillNo=".$kds[$i]['kdid'];
$url=trim($url);
$data=$this->post($kds[$i]['kdid'],$url);
$data=json_decode($data,true);
if($data['code']=="success"){
$dd['isS']=$data['data'][0]['isS'];
$dd['isP']=$data['data'][0]['isP'];
$dd['isC']=$data['data'][0]['isC'];
$dd['isQ']=$data['data'][0]['isQ'];
$dd['isD']=$data['data'][0]['isD'];
if($dd['isS']==false){
//没有该订单
$kdstatus="isS";
}elseif($dd['isD']==true){
//成功
$kdstatus="isD";
}elseif($dd['isQ']==true){
//拦截
$kdstatus="isQ";
}elseif($dd['isC']==true){
//丢件
$kdstatus="isC";
}else{
//在路上
$kdstatus="isL";
}
}else{
//接口未返回信息
$kdstatus="isE";
}
Db::table('kds')->where('id',$kds[$i]['id'])->update(['kdstatus'=>$kdstatus]);
});
}
echo 'ok';
}
//post获取接口数据
public function post($kdorder,$url){
$data = ['waybillN'=>$kdorder,'verify'=>true];
$headers = array('Content-Type: application/x-www-form-urlencoded');
$curl = curl_init(); // 启动一个CURL会话
curl_setopt($curl, CURLOPT_URL, $url); // 要访问的地址
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0); // 对认证证书来源的检查
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0); // 从证书中检查SSL加密算法是否存在
// curl_setopt($curl, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); // 模拟用户使用的浏览器
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); // 使用自动跳转
curl_setopt($curl, CURLOPT_AUTOREFERER, 1); // 自动设置Referer
curl_setopt($curl, CURLOPT_POST, 1); // 发送一个常规的Post请求
curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($data)); // Post提交的数据包
curl_setopt($curl, CURLOPT_TIMEOUT, 30); // 设置超时限制防止死循环
curl_setopt($curl, CURLOPT_HEADER, 0); // 显示返回的Header区域内容
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 获取的信息以文件流的形式返回
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($curl); // 执行操作
if (curl_errno($curl)) {
echo 'Errno'.curl_error($curl);//捕抓异常
}
curl_close($curl); // 关闭CURL会话
return $result;
}
}
效果图
这篇文章没有解决 Allowed memory size of 268435456 bytes exhausted,内存溢出问题,可以按这个下面这篇文章思路做适当修改藏羚骸的博客~hyperf协程免费查询快递物流.思路就是加一个协程等待, 协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。