from bs4 import BeautifulSoup
import requests
from fake_useragent import UserAgent # user-agent池
def getHTMLText(url):
"""
爬取网页的通用代码框架
"""
try:
headers = {'User-Agent': UserAgent().random} # 获取随机user-agent
r = requests.get(url, timeout=30, headers=headers)
r.raise_for_status() # 如果状态不是200 引发HTTPError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
def main():
url=input("请输入网址:")
html=getHTMLText(url)
with open("res.txt",'w',encoding="utf-8") as f:
f.write(html)
if __name__ == "__main__":
main()