使用Python根据网页生成RSS
pip install requests beautifulsoup4 lxml
import requests
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
def fetch_news_from_url(url):
# 1. 网页抓取
response = requests.get(url)
response.raise_for_status() # 确保请求成功
soup = BeautifulSoup(response.text, 'html.parser')
# 假设你已经知道如何提取新闻数据(这里只是示例)
news_items = []
for item in soup.select('.list li'): # 假设新闻项有类名'.news-item'
title = item.select_one('a').text # 假设标题有类名'.title'
link = item.select_one('a')['href'] # 假设链接在'.link a'中
# description = item.select_one('.description').text # 假设描述有类名'.description'
# time = item.select_one('.time').text
news_items.append({'title': title, 'link': link, 'description': ''})
return news_items
def generate_rss(news_items, rss_filename):
root = ET.Element("rss")
root.set("version", "2.0")
channel = ET.SubElement(root, "channel")
for item in news_items:
item_elem = ET.SubElement(channel, "item")
ET.SubElement(item_elem, "title").text = item['title']
ET.SubElement(item_elem, "link").text = item['link']
ET.SubElement(item_elem, "description").text = item['description']
tree = ET.ElementTree(root)
tree.write(rss_filename, encoding='utf-8', xml_declaration=True)
# 使用示例
news_url = "https://gdstc.gd.gov.cn/zwgk_n/tzgg/index.html" # 替换为实际的新闻网页URL
news_items = fetch_news_from_url(news_url)
generate_rss(news_items, "gdkxjsnews.rss")
posted on 2024-05-11 22:32 Tencent/Tim 阅读(101) 评论(0) 收藏 举报
浙公网安备 33010602011771号