#-*- codeing =utf-8 -*-
#@Time : 2022/5/30 16:14
#@Author : huaobin
#@File : fayuan2.py
#@Software: PyCharm
import requests
from bs4 import BeautifulSoup
import openpyxl as op
f=open('./a.txt','a+',encoding='utf-8')
ws = op.Workbook()
wb = ws.create_sheet(index=0)
wb.cell(row=1,column=1,value="案件名称")
wb.cell(row=1,column=2,value='日期')
def getdata(url,count):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36'
}
response = requests.get(url,headers = headers) #请求访问网站
response.encoding='utf-8'
soup=BeautifulSoup(response.text,'html.parser')
print("--------------------------------")
spans_left=soup.select('span.left')
span_rigth=soup.select('span.right')
print("出")
print(len(span_rigth))
for i in range(1,len(span_rigth)):
print(spans_left[i].text+" "+span_rigth[i].text+'\n')
wb.cell(row=count,column=1,value=spans_left[i].text)
wb.cell(row=count,column=2,value=span_rigth[i].text)
count=count+1
print("++++++++++++++++++++++++++++++++++++++++")
'''
for span in soup.find_all(name='span'):
for a in span.find_all(name='a'):
print(a.string)
'''
print("*********************************")
if __name__ == '__main__':
urls=urls = [
'https://www.chinacourt.org/article/index/id/MzAwNDAwMjAwMSACAAA/page/{}.shtml'.format(str(i)) for i in range(1, 50)]
count=2
for url in urls:
print(url)
getdata(url,count)
count=count+49
# 保存数据
ws.save('案件7.xlsx')