Python 爬取历史天气数据

网站原始数据

https://www.tianqishi.com/hangzhou/20240214.html

image

源码

import requests
from bs4 import BeautifulSoup
import datetime


def getEachDayWeather(loaction, timeStamp):
    """
    获取每一天的天气数据
    """
    # 1. 获取网页地址
    urlHead = "https://www.tianqishi.com"
    urlFoot = ".html"
    weatherHtml = "{}/{}/{}{}".format(urlHead,loaction,timeStamp,urlFoot)

    # 2. 提取网页中的当日天气信息
    response = requests.get(weatherHtml)
    html_content = response.text
    soup = BeautifulSoup(html_content, "lxml")
    yuBaoTable = soup.find("table", class_="yuBaoTable")

    # 3. 天气信息记录
    eachDayData = []
    for row in yuBaoTable.find_all('tr'):  # 遍历每一行
        eachHourData = []
        for cell in row.find_all('td'):  # 遍历每一行中的每个单元格
            eachHourData.append(cell.text)
        eachDayData.append(eachHourData)

    return eachDayData

def writeEachDayWeather(eachDayData,csvFilePath):
    """
    向csv中写入每一天的天气数据     
    """
    with open(csvFilePath, "a") as f:
        for eachHourData in eachDayData:  # 遍历每一行
            for data in eachHourData:  # 遍历每一行中的每个单元格
                f.write("{},".format(data))
            f.write("\n")    
    f.close()

def writeTitle(csvFilePath):
    title = "日期时间,气温,风向,风力,风速,气压,湿度,降水概率\n"
    with open(csvFilePath, "w") as f:
        f.write(title)
    f.close()    

def getCsvFilePath(rootPath, loaction, timeStampStart, timeStampEnd):
    csvFilePath = "{}/{}_{}_to_{}.csv".format(rootPath,loaction,timeStampStart,timeStampEnd)
    return csvFilePath

def getTimeStampList(timeStampStart, timeStampEnd, daysDelta=1):
    timeStampStartNum = datetime.datetime.strptime(timeStampStart, "%Y%m%d")
    timeStampEndNum = datetime.datetime.strptime(timeStampEnd, "%Y%m%d")
    days =(timeStampEndNum - timeStampStartNum).days
    timeStampList = []
    for daysDelta in range(0,days+1):
        timeStampStartAdd = timeStampStartNum + datetime.timedelta(days=daysDelta)
        timeStampList.append(timeStampStartAdd.strftime("%Y%m%d"))
    return timeStampList

if __name__ == "__main__":
    timeStampStart = "20231106"
    timeStampEnd = "20240204"
    loaction = "hangzhou"
    rootPath = "./"

    timeStampList = getTimeStampList(timeStampStart,timeStampEnd)
    csvFilePath = getCsvFilePath(rootPath, loaction, timeStampStart, timeStampEnd)
    writeTitle(csvFilePath)

    print("program starting.")
    for timeStamp in timeStampList:
        print("getting weather data for {}".format(timeStamp))
        eachDayData = getEachDayWeather(loaction,timeStamp)
        writeEachDayWeather(eachDayData,csvFilePath)
    print("program finished.")

结果

image

上图中的风力数据显示为日期为excel显示问题,原始csv文件为正常数据:

image

posted @ 2024-02-15 19:50  GShang  阅读(164)  评论(0编辑  收藏  举报