12306爬取基本车次信息(需下载chromedriver)

from selenium import webdriver
from selenium.webdriver.common.by import By
import re
import time
import csv
import pandas as pd
import sys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import os

def request(fromCity,toCity):
    def getDriver():
        chrome_options = Options()
        chrome_options.add_argument("--no-sandbox")#禁用沙盒模式
        chrome_options.add_argument("--disable-dev-shm-usage")#禁用/dev/shm使用
        #chrome_options.add_argument("--headless")#浏览器隐藏运行
        #设置选项参数
        service = Service("chromedriver.exe")
        driver = webdriver.Chrome(service=service,options=chrome_options)#修改为自己电脑中chromedriver的路径
        return driver

    def downhandle(ccity):
        pattern = re.compile(r"[^\u4e00-\u9fa5]")#剔除非汉字模式
        for i in range(0,6):
            try:
                getCity = driver.find_element(By.ID,"citem_%d"%(i))#获取下拉框选定城市
                select=re.sub(pattern,"",getCity.text)#提取中文城市名
                if select==ccity:
                    getCity.click()#获取城市名
                    break
            except Exception:
                pass
        ##下拉框城市名获取处理
    dateTime = time.strftime("%m-%d",time.localtime())#获取当前日期
    url = "https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=,SHH&ts=,TJP&date=2024-%s&flag=N,N,Y"%dateTime 
    driver = getDriver()
    driver.get(url)
    
    driver.find_element(By.ID,"fromStationText").clear()
    driver.find_element(By.ID,"toStationText").clear()
    ##清除文本框
    
    driver.find_element(By.ID,"fromStationText").send_keys(fromCity)#发送出发城市
    time.sleep(0.2)#等待下拉框加载选择
    downhandle(fromCity)
    driver.find_element(By.ID,"toStationText").send_keys(toCity)#发送目的城市
    time.sleep(0.2)#..
    downhandle(toCity)
    time.sleep(0.5)
    driver.find_element(By.ID,"query_ticket").click()#点击查询
    time.sleep(0.2)
    ##城市车次查询

    allNumbers=[]#车次名列表
    Starts=[]#起点站列表
    Terminals=[]#到达站列表
    startTimes=[]#出发时间列表
    endTimes=[]#到达时间列表
    totalTime=[]#历时列表
    try:
        numbercount = len(driver.find_elements(By.CLASS_NAME,"number"))#获取车次总数
        i=0
        while(numbercount>i):
            time.sleep(0.2)
            number = driver.find_elements(By.XPATH,"//a[@class='number']")[i].text#获取车次名
            start = driver.find_elements(By.XPATH,"//div[@class='cdz']")[i].text#获取起点站名
            terminal = (driver.find_elements(By.XPATH,"//div[@class='cdz']")[i].text).replace(start,"")#获取到达站名
            startT = driver.find_elements(By.XPATH,"//div[@class='cds']")[i].text#获取出发时间
            endT = (driver.find_elements(By.XPATH,"//div[@class='cds']")[i].text).replace(startT,"")#获取到达时间
            total = driver.find_elements(By.XPATH,"//div[@class='ls']")[i].text#获取历时
            allNumbers.append(number)#添加车次名
            Starts.append(start)#添加起点站名
            Terminals.append(terminal)#添加到达站名
            startTimes.append(startT)#添加出发时间
            endTimes.append(endT)#添加到达时间
            totalTime.append(total)#添加历时
            i+=1
    except Exception:
        pass

    with open("data.csv","w+",encoding="utf-8"):#创建csv文件
        pass
    data = {"车次":allNumbers,"出发站":Starts,"到达站":Terminals,"出发时间":startTimes,"到达时间":endTimes,"历时:":totalTime}
    dataFrame = pd.DataFrame(data)
    dataFrame.to_csv(r"data.csv",encoding="utf_8_sig")
    print("已生成车次信息xlsx表格!")
    time.sleep(1)
    sys.exit()
    
    ##按列写入车次数据
    
def main():
    while True:
        fromCity=input("请输入起点站:")
        toCity=input("请输入到达站:")
        if fromCity != "" and toCity != "":
            request(fromCity,toCity)
        else:
            print("站名不能为空!")
        #获取城市名
    
if __name__ == "__main__":
    main()
posted @ 2025-12-08 23:22  hhcgchpspk  阅读(0)  评论(0)    收藏  举报