php8通过url获取飞书文档公开内容

额,私有文件只能通过飞书开发平台的接口获取,公开的文档可以通过网页代码获取,主要飞书文档 会302重定向好几次

require_once 'vendor/autoload.php';

use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;

function requestWithGuzzle($url)
{
    $client = new Client([
        'allow_redirects' => [
            'max' => 10,
            'track_redirects' => true
        ]
    ]);

    $jar = new CookieJar();

    try {
        $response = $client->get($url, [
            'cookies' => $jar
        ]);

        return [
            'status_code' => $response->getStatusCode(),
            'body' => $response->getBody()->getContents(),
            'cookies' => $jar->toArray()
        ];
    } catch (Exception $e) {
        echo "Error: " . $e->getMessage();
        return false;
    }
}

// 使用示例
$result = requestWithGuzzle('https://qjaus99bpl.feishu.cn/docx/XXXXXXXXXXXXXXXXX');
//var_dump($result);  文档内容json字符就再下面   通过正则 匹配出来
$pattern3 = '/clientVars:\s*Object\((?<content>.*?)\)\s*\}\);\s*window\.docxSSREditable/';
$matches = [];
if (preg_match($pattern3, $result['body'], $matches)) {
    echo "使用命名组匹配: " . $matches['content'] . "\n";
}
//json 转为数组
$array = json_decode($matches['content'], true);
$block_map = $array['data']['block_map'];
$txt = '';
foreach ($array['data']['block_sequence'] as $value) {
    $block = $block_map[$value]['data'];
    if (!empty($block['text']['initialAttributedTexts']['text'])) {
        $txt .= join(' ', array_values($block['text']['initialAttributedTexts']['text']));
    }
}
var_dump($txt);
file_put_contents('./public/test.txt', $txt);

  

 

posted @ 2025-07-30 15:25  忙于厮杀  阅读(41)  评论(0)    收藏  举报