新发地菜价过期-协程抓表格里的内容-及url 1-n的写法

from lxml import etree
import asyncio
import aiofiles
import aiohttp
import csv


async def xin(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
ul = etree.HTML(await resp.text(encoding='utf-8'))
# F12 Sources (源代码) 里面看的是页面源代码
trs = ul.xpath("//table[@class='hq_table']/tr")
for tr in trs:
tds = tr.xpath("./td/text()") # "./td/text()" 拿到td标签里面的内容
name = tds[0]
low = tds[1]
jun = tds[2]
high = tds[3]
spec = tds[4]
comp = tds[5]
date = tds[6]
# 手动创建个目录 New Directory NewDirectory 目录名字为:文件里优
async with aiofiles.open("./文件里优/菜价.csv", mode="a", encoding="utf-8") as f:
await f.write(",".join([name, low, jun, high, spec, comp, date]))
await f.write("\n")


async def main():
tasks = []
for i in range(1, 4594):
url = f"http://xinfadi.com.cn/marketanalysis/2/list/{i}.shtml"
tasks.append(asyncio.create_task(xin(url)))
await asyncio.wait(tasks)


if __name__ == '__main__':
asyncio.run(main())
print("over!!")
posted @ 2023-07-26 15:47  严永富  阅读(11)  评论(0)    收藏  举报