批量获取title

 1 import requests
 2 from bs4 import BeautifulSoup
 3 import pandas as pd
 4 from openpyxl import Workbook
 5 import concurrent.futures
 6 
 7 # 读取 .txt 文件中的 URL
 8 with open("urls.txt", "r") as file:
 9     urls = file.read().splitlines()
10 
11 # 存储 URL 和 title
12 data = []
13 
14 def fetch_title(url):
15     response = requests.get(url)
16     soup = BeautifulSoup(response.text, "html.parser")
17     title = soup.find("title").text
18     return (url, title)
19 
20 with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
21     futures = [executor.submit(fetch_title, url) for url in urls]
22 
23     for future in concurrent.futures.as_completed(futures):
24         result = future.result()
25         data.append(result)
26 
27 # 将 URL 和 title 写入 Excel 文件
28 df = pd.DataFrame(data, columns=["URL", "Title"])
29 
30 book = Workbook()
31 writer = pd.ExcelWriter("titles.xlsx", engine="openpyxl")
32 writer.book = book
33 
34 df.to_excel(writer, index=False)
35 
36 writer.save()
37 　　由于是最后一起写入到excel，所以单次URL获取不宜过多

posted on 2023-02-06 18:16 Dnggitgn 阅读(56) 评论(0) 收藏举报

刷新页面返回顶部

Dnggitgn

批量获取title

导航

公告