from scrapy import signals
import random
class Test001UseragentMiddleware(object):
USER_AGENT=[
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", #Chrome 浏览器
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", # 火狐浏览器
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", #IE浏览器
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.0)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0;rv:11.0) like Gecko",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)" , #搜狗浏览器
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;360SE)",#360浏览器
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;TencentTraveler 4.0)"#QQ浏览器
]
def process_request(self, request, spider):
user_agent = random.choice(self.USER_AGENT)
request.headers["user-agent"] = user_agent
class CheckUA:
def process_response(self,request,response,spider):
if response.status != '200':
request.dont_filter = True # 重新发送的请求对象能够再次进入队列
return response # 不能少!
class RandomProxy(object):
IP = [
'//118.31.250.72:8080'
]
def process_request(self, request, spider):
proxy = random.choice(self.IP)
request.meta['proxy'] = "http:%s" %proxy