python之简单页面爬取

import requests
import re
import bs4
import pandas as pd
import csv

a = []


url = "http://www.tianqihoubao.com/lishi/shijiazhuang/month/202004.html"

r = requests.get(url)

soup = bs4.BeautifulSoup(r.text,"html.parser")

datas_tr = soup.find_all("tr")

head = 0
for data_tr in datas_tr:
    b = []
    if head == 0:
        datas_b = data_tr.find_all("b")
        for data_b in datas_b:
            print(data_b.string)
            b.append(data_b.string)
            head=head+1

        print(b)
        print("*"*20)
    else:
        i = 0
        datas_td = data_tr.find_all("td")
        for data_td in datas_td:
            if i == 0:
                print(data_td.find("a").text)
                i=i+1
                value=data_td.find("a").text
                value=value.replace("\n","")
                value=value.replace("\r","")
                value=value.replace(" ","")
                b.append(value)
            else:
                print(data_td.string)
                value=data_td.string
                value=value.replace("\n","")
                value=value.replace("\r","")
                value=value.replace(" ","")
                b.append(value)

        print(b)
        print("*"*20)
    a.append(b)
    b=None
print(a)
print("爬取完毕,向csv输入ing")

with open('D:\\test.csv',"w",newline='') as csvfile: 
    writer = csv.writer(csvfile)
    for i in a:
        print(i[0]+i[1]+i[2]+i[3])
        writer.writerow([i[0],i[1],i[2],i[3]])

 

posted @ 2023-07-19 23:49  子过杨梅  阅读(30)  评论(0)    收藏  举报