python 法院案例爬取

#-*- codeing =utf-8 -*-
#@Time : 2022/5/30 16:14
#@Author : huaobin
#@File : fayuan2.py
#@Software: PyCharm

import requests
from bs4 import BeautifulSoup
import openpyxl as op


f=open('./a.txt','a+',encoding='utf-8')
ws = op.Workbook()
wb = ws.create_sheet(index=0)
wb.cell(row=1,column=1,value="案件名称")
wb.cell(row=1,column=2,value='日期')

def getdata(url,count):

    headers = {
         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36'

    }

    response = requests.get(url,headers = headers)       #请求访问网站
    response.encoding='utf-8'
    soup=BeautifulSoup(response.text,'html.parser')

    print("--------------------------------")
    spans_left=soup.select('span.left')
    span_rigth=soup.select('span.right')
    print("出")
    print(len(span_rigth))
    for i in range(1,len(span_rigth)):
        print(spans_left[i].text+" "+span_rigth[i].text+'\n')
        wb.cell(row=count,column=1,value=spans_left[i].text)
        wb.cell(row=count,column=2,value=span_rigth[i].text)
        count=count+1
        print("++++++++++++++++++++++++++++++++++++++++")

    '''
    
        for span in soup.find_all(name='span'):
        for a in span.find_all(name='a'):
            print(a.string)
    
    '''



    print("*********************************")




if __name__ == '__main__':
    urls=urls = [
        'https://www.chinacourt.org/article/index/id/MzAwNDAwMjAwMSACAAA/page/{}.shtml'.format(str(i)) for i in range(1, 50)]
    count=2
    for url in urls:
        print(url)
        getdata(url,count)
        count=count+49
    # 保存数据
    ws.save('案件7.xlsx')

  

posted @ 2022-05-30 08:00  青竹之下  阅读(95)  评论(0)    收藏  举报