Hyperf协程免费查询快递物流

小伙伴们可以先看免费查询快递物这篇文章,文章详细介绍了php正常爬取物流信息思路,此文章是在上个文章基础上加入协程,属于一个升级版,之前有写过一篇hyperf协程大批量匹配快递物流信息的文章,这个也是介绍了协程执行速度比传统方式是有个质的飞跃,这里用 hyperf协程免费查询快递物流 。

php7.2+swoole4+hyperf2.0

Hyperf\config\autoload\server.php配置为

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
use Hyperf\Server\Server;
use Hyperf\Server\SwooleEvent;

return [
    'mode' => SWOOLE_PROCESS,
    'servers' => [
        [
            'name' => 'http',
            'type' => Server::SERVER_HTTP,
            'host' => '0.0.0.0',
            'port' => 8080,
            'sock_type' => SWOOLE_SOCK_TCP,
            'callbacks' => [
                SwooleEvent::ON_REQUEST => [Hyperf\HttpServer\Server::class, 'onRequest'],
            ],
        ],
    ],
    'settings' => [
        'enable_coroutine' => true,
        'worker_num' => swoole_cpu_num(),
        'pid_file' => BASE_PATH . '/runtime/hyperf.pid',
        'open_tcp_nodelay' => true,
        'max_coroutine' => 100000,
        'open_http2_protocol' => true,
        'max_request' => 100000,
        'socket_buffer_size' => 2 * 1024 * 1024,
        'buffer_output_size' => 2 * 1024 * 1024,
        'hook_flags' => SWOOLE_HOOK_ALL | SWOOLE_HOOK_CURL,
        'daemonize' => true,
        
    ],
    'callbacks' => [
        SwooleEvent::ON_WORKER_START => [Hyperf\Framework\Bootstrap\WorkerStartCallback::class, 'onWorkerStart'],
        SwooleEvent::ON_PIPE_MESSAGE => [Hyperf\Framework\Bootstrap\PipeMessageCallback::class, 'onPipeMessage'],
        SwooleEvent::ON_WORKER_EXIT => [Hyperf\Framework\Bootstrap\WorkerExitCallback::class, 'onWorkerExit'],
    ],
];

Hyperf\config\ routes.php 配置为

Router::get('/getkdstatus2', 'App\Controller\KuaidiController::getkdstatus2');

Hyperf\app\Controller\KuaidiController.php

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
use  Hyperf\HttpServer\Contract\RequestInterface;
class KuaidiController extends AbstractController
{
    //批量获取快递单号
    public function getkdstatus2(RequestInterface $request){
        $current=$request->input('current','未获取');
        $yunorders=Db::table('yunorders')->where('status',3)->where('current', $current)->get(['id','expressCode']);
        $count=$yunorders->count();
        $limit=100;
        $page=intval(ceil($count/$limit));
        for($p=$page-1;$p>=0; $p--){
            $offset=$p*$limit;
            $yunorders=Db::table('yunorders')->where('status',3)->where('current', $current)->offset($offset)->limit($limit)->get(['id','expressCode']);
            $count=$yunorders->count();
            $yunorders=json_decode(json_encode($yunorders),true);
            $info=$this->getinfo();
            // var_dump($info);
            $wg = new \Hyperf\Utils\WaitGroup();
            //等待协程  如果不加这个一下执行几千几万条,会有Allowed memory size of 268435456 bytes exhausted,内存溢出,所有我们协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。
            
            // 计数器
            $wg->add($count);
            for ($i=$count-1;$i>=0; $i--){
                co(function () use ($i,$yunorders,$info,$wg) {
                    $url='https://express.baidu.com/express/api/express?tokenV2='.$info['tokenV2'].'&nu='.$yunorders[$i]['expressCode'];
                    $baiduid=ucfirst(md5($this->getrandstr(6).rand(10000,99999)));
                    $info['cookie']="BAIDUID=".$baiduid.":FG=1;";
                    $header = array (
                        "Host:express.baidu.com",
                        "Content-Type:application/x-www-form-urlencoded",//post请求
                        "Connection: keep-alive",
                        'Referer:http://www.baidu.com',
                        //关键作用User-Agent  可是每次爬取结果都是无法爬取到百度搜索的内容,要验证 user-agent没有模拟好,所以不行。
                        'User-Agent: Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36',
                        'Cookie:'.$info['cookie']
                    );
                    
                    $ch = curl_init ();
                    curl_setopt ( $ch, CURLOPT_URL, $url );
                    curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
                    curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
                    $content = curl_exec ( $ch );
                    if ($content == FALSE) {
                    echo "error:" . curl_error ( $ch );
                    }
                    
                    curl_close ( $ch );  
                    $res=json_decode($content,true);
                    if($res['data']['info']){
                        DB::table('yunorders')->where('id',$yunorders[$i]['id'])->update(['updated_at'=>date("Y-m-d H:i:s"),'current'=>$res['data']['info']['current']]);
                    }
                    
                    $wg->done();
                });
            }  
            // 等待协程 A 和协程 B 运行完成
            $wg->wait();
        }
        // echo("ok");
        return 'ok';
    }
    
    //随机字符串
    public function getrandstr($length){
     $str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890';
     $randStr = str_shuffle($str);//打乱字符串
     $rands= substr($randStr,0,$length);//substr(string,start,length);返回字符串的一部分
     return $rands;
    }
    
    //获取快递页面信息
    public function getinfo(){
        //tn-- 提交搜索请求的来源站点 不加会不稳定
        $url = 'https://www.baidu.com/s?tn=02003390_43_hao_pg&ie=utf-8&wd=%E5%BF%AB%E9%80%92';
        $header = array (
          "Host:www.baidu.com",
          "Content-Type:application/x-www-form-urlencoded",//post请求
          "Connection: keep-alive",
          'Referer:http://www.baidu.com',
          //关键作用User-Agent  可是每次爬取结果都是无法爬取到百度搜索的内容,要验证 user-agent没有模拟好,所以不行。
          'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36',
          //如果tokenV2获取失败用上边打开浏览器复制一下最新的cookie即可
          'cookie:BIDUPSID=99230F5F14C63D007473C3D9F3787EA9; PSTM=1615876171; __yjs_duid=1_f42a699a56a29eb0b3b574aaf9ff6b971618551519092; BAIDUID=5AFE833F73F7A99737622161E381961A:FG=1; H_WISE_SIDS=110085_127969_174434_179348_184716_188333_188742_189755_190625_194085_196427_197242_197471_197711_199023_199568_201193_203310_203504_203880_203882_203885_204123_204713_204715_204717_204720_204817_204859_204902_205218_205414_205420_205424_205909_206927_206929_207234_207573_207716_207830_208065_208310_208721_209160_209394_209512_209568_209748_210127_210580_210669_210732_210736_210852_210890_210892_210895_210907_211059_211062_211113_211172_211180_211208_211296_211301_211350_211414_211442_211457_211580_211737_211754_211783_211985_212177_212295_212416_212532_212618_212778_212913_212924_212962_212967_213003_213036_213051_213059_213069_213140; MSA_WH=414_896; sugstore=1; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BD_UPN=12314753; MCITY=-150:; BAIDUID_BFESS=5AFE833F73F7A99737622161E381961A:FG=1; BA_HECTOR=2520252h012l81a5201h9945p15; ZFY=HW4om4BEMLRXyqfICzhl6K9WVCw:A49nZ4LGe7Sy7XSg:C; BDRCVFR[n9IS1zhFc9f]=mk3SLVN4HKm; delPer=0; PSINO=1; H_PS_PSSID=31254_26350; BDRCVFR[Ter2S3H5o_D]=mk3SLVN4HKm; BD_CK_SAM=1; H_PS_645EC=15a7eRjMVUdlLYA1lxLlzLU8A6RFQ20GGNI/PxnbcG8ev4RWhCEGU93iZ1E+YTdrFv47rdm9ywHP; baikeVisitId=b5175df9-f1c6-4b85-9607-ed08055147d8; COOKIE_SESSION=619_0_6_6_3_0_1_0_6_0_0_0_43_0_0_0_1653636800_0_1653960303|9#3743_18_1653547610|9'
        );
        $ch = curl_init ();
        curl_setopt ( $ch, CURLOPT_URL, $url );
        curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
        curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
        curl_setopt ( $ch, CURLOPT_HEADER, 1 );
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
        curl_setopt($ch, CURLOPT_SSLVERSION, 4);
        
        $content = curl_exec ( $ch );
        if ($content == FALSE) {
        echo "error:" . curl_error ( $ch );
        }
        
        curl_close ( $ch );
        
        //获取tokenV2
        preg_match('/tokenV2=(.*?)"/i', $content, $match);
        
        return ['tokenV2'=>$match[1]];
       
    }
    
    
   
    
}

普通流程php curl 一秒一个,爬取二十个就用了二十秒,加了协程,1000单,大概是3秒左右。上面加了一个等待协程 ,如果不加这个一下执行几千几万条,会有Allowed memory size of 268435456 bytes exhausted,内存溢出,所有我们协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。

6 评论
内联反馈
查看所有评论