import requests
import re
from bs4 import BeautifulSoup as bs
import traceback
def getHTMLtext(url,code = "utf-8 "):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = "utf-8"
return r.text
except:
return ""
def getstocklist(lst,stockURL): #获取上市股票的数字号码
html = getHTMLtext(stockURL,"GB2312")
soup = bs(html,"lxml")
a = soup.find_all("a") #找到a 标签
for i in a :
try:
href = i.attrs["href"]
lst.append(re.findall(r"[s][hz]\d{6}",href)[0]) #找到s开头第二个字为h或z并且带有6位数字的内容,在href属性内
#print(lst)
except:
continue
def getstockinfo(lst,stockURL,fpath):
count = 0
for stock in lst:
url = stockURL + stock + ".html" #从股票号码列表中提取出号码,拼接成每只股票信息的完整链接
html = getHTMLtext(url) #访问每一支股票
try:
if html == "": #处理异常 判断是否为空页面
continue
infoDict = {}
soup = bs(html,"lxml") #解析每一个股票的网页
stockinfo = soup.find("div",attrs={"class":"stock-bets"}) #提取相关信息的代码块
name = stockinfo.find_all(attrs={"class":"bets-name"})[0] #提取出股票名字
infoDict.update({"股票名称":name.text.split()[0]}) #.update 更新字典 把新的内容加入字典
keylist = stockinfo.find_all("dt") #提取所有股票信息的属性
valuelist = stockinfo.find_all("dd") # 提取出属性相对的数值
for i in range(len(keylist)):
key = keylist[i].text
val = valuelist[i].text
infoDict[key] = val # 将信息和数值kv对,放入字典中
# dict[key]=value 为字典添加新的kv对
with open(fpath,"a",encoding="utf-8") as f: #a是只写模式
f.write(str(infoDict)+ "\n")
count = count + 1
print("\r当前进度:{:.2f}%".format(count*100/len(lst)),end="") #跟着教程学习的,制作一个进度条
#\r {:.2f}%.format end="" 3个位置需要了解
except:
count = count + 1 #不换行滚动条 需要记住 以后使用
print("\r当前进度:{:.2f}%".format(count * 100 / len(lst)), end="")
pass
def main():
stock_list_url = "http://quote.eastmoney.com/stocklist.html"
stock_info_url = "https://gupiao.baidu.com/stock/"
output_file = "F:\\stockinfo.txt"
slist = [] #股票信息列表
getstocklist(slist,stock_list_url) #获得股票列表
getstockinfo(slist,stock_info_url,output_file) #根据股票列表 获得相关信息并且保存
main()