爬取网页文本
#!/bin/env python # -*- coding: utf-8 -*- import sys import os import requests from lxml import etree sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) text = [] headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'} target='https://ec.europa.eu/info/funding-tenders/procedures-guidelines-tenders/information-contractors-and-beneficiaries/exchange-rate-inforeuro_en' get = requests.get(target,headers = headers,timeout=30) # get(url) 得到我们的网页, text将源网页转化为字符串 get.encoding = "utf-8"#定义数据格式,避免乱码 get=get.text selector = etree.HTML(get) # 将源码转换为xpath可以识别的TML格式 info=selector.xpath('//select[@ng-model="fromCurrency"]/option//text()') print(info)