Hyperf2.1框架中GuzzleHttp组件的使用方式，实现采集网站文章的功能demo

2023-06-01 00:00:00 框架组件采集

hyperf框架中GuzzleHttp组件的使用分原生写法跟协程化写法,下面分别都用一下

使用GuzzleHttp组件包测试功能：因为后面我需要做一个采集功能，

所以我简单模拟去采集一下网站一个文章的数据

安装：

hyperf2.1组件包默认已经安装直接用就ok，

开始开发功能

测试控制器中

<?php
declare(strict_types=1);
namespace App\Controller;
use Hyperf\HttpServer\Contract\ResponseInterface;
use Hyperf\Guzzle\ClientFactory;
use Hyperf\HttpServer\Annotation\AutoController;
/**
 * @AutoController()
 */
class TestController
{
    /**
     * @var \Hyperf\Guzzle\ClientFactory
     */
     
    private $clientFactory;
    
    public function __construct(ClientFactory $clientFactory)
    {
        $this->clientFactory = $clientFactory;
    }
    
    public function index(ResponseInterface $response)
    {
        return $response->json('测试专用控制器');
    }
    
    /**
     * GuzzleHttp原生写法
    */
     public function test1(RequestInterface $request,ResponseInterface $response)
     {
        //原生写法
        $client = new \GuzzleHttp\Client();
        $resp = $client->request('GET', 'http://www.zongscan.com/demo333/178.html',[]);
        //获取页面数据
        $content = $resp->getBody()->getContents();
        //通过 preg_replace 函数使页面源码由多行变单行
        $htmlOneLine = preg_replace("/\r|\n|\t/","",$content);
        //获取这个标签及里面的内容
        preg_match("/<div class=\"jumbotron\">(.*)<\/div>/iU",$htmlOneLine,$titleArr);
        $string = $titleArr[0];
        //去掉html标签
        $string = strip_tags($string);
        $string = strtr($string, array("&nbsp" => ''));

        return $response->json($string);
        //return 'test-Hyperf';
     }
	
   /**
    * GuzzleHttp协程写法
    */
    protected function Crawler($url)
    {
        // $options 等同于 GuzzleHttp\Client 构造函数的 $config 参数
        $options = [];
        // $client 为协程化的 GuzzleHttp\Client 对象
        $client = $this->clientFactory->create($options);
        $resp = $client->request('GET', $url,[]);
        //响应状态码
        //$http_status = $resp->getStatusCode();
        //获取页面数据
        $content = $resp->getBody()->getContents();
        //通过 preg_replace 函数使页面源码由多行变单行
        $htmlOneLine = preg_replace("/\r|\n|\t/","",$content);
        //获取这个标签及里面的内容
        preg_match("/<div class=\"jumbotron\">(.*)<\/div>/iU",$htmlOneLine,$titleArr);
        $string = $titleArr[0];
        //去掉html标签
        $string = strip_tags($string);
        $string = strtr($string, array("&nbsp" => ''));

        return $response->json($string);
    }

看看效果：两个都一样的输出

完

相关文章