0基础爬虫
1、配环境
点击下载python:https://www.python.org/ftp/python/3.7.7/python-3.7.7-amd64.exe
D:\Python\Python37\Scripts\
然后跟着视频做。
2、装pandas、time和retrying库
pip install pandas
3、跑代码
import pandas as pd
import time
from retrying import retry
@retry
def post(i):
url=f'http://www.sxredcross.org.cn/index/donation/lovedetails/id/13.html?name=&donor=&time_start=2021-10-06&time_end=2021-10-10&id=13&page={i}'
df=pd.read_html(url,encoding='utf-8')[0]
return df
# 爬取第1-11页
for i in range(1,11):
df=post(i)
df.to_csv('1-11.csv',mode='a+',encoding='utf-8',header=None,index=0)
print(f'-********-第{i}页',time.strftime('时间:%Y-%m-%d %H:%M:%S-********-',time.localtime(time.time())))
if i==float(end)-1:
print('\n--------搞定啦啦,1-11.csv')

浙公网安备 33010602011771号