# -*- coding: utf-8 -*-
import scrapy
import scrapy
from scrapy.http import Request,FormRequest
import urllib.request
class Mini1Spider(scrapy.Spider):
name = 'mini1'
allowed_domains = ['taobao.com']
header = {
"User-Agent:": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0"}
'''
start_urls = ['http://taobao.com/']
'''
def start_requests(self):
return [Request("https://login.taobao.com/member/login.jhtml?f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F",callback=self.parse,meta={"cookiejar":1})]
def parse(self, response):
captcha=response.xpath("//img[@id='captcha_image']/@src").extract()
url="https://accounts.douban.com/login"
if len(captcha)>0:
print("u have a captcha")
localpath="E:/m/mini1/captcha.png"
urllib.request.urlretrieve(captcha[0],filename=localpath)
print("check the pic at'E:/m/mini/captcha.png',and input the captcha here")
captcha_value=input()
data={
"TPL_username":"兔子的尾巴mini",
"TPL_password":"mini123",
"captcha-solution":captcha_value,
"redirectURL":"https://www.taobao.com",
}
else:
print("no captcha")
data={
"TPL_username":"兔子的尾巴mini",
"TPL_password":"mini123",
"redirectURL":"https://www.taobao.com",
}
print("loading……")
return [FormRequest.from_response(response,
meta={"cookiejar":response.meta["cookiejar"]},
headers=self.header,
formdata=data,
callback=self.next,
)]
def next(self,response):
print("finished web crawling")
title=response.xpath("/html/head/title/text()").extract()
note=response.xpath("//div[@class='note']/text()").extract()
print(title[0])
print(note[0])