闲鱼

 1 import requests
 2 from lxml import etree
 3 from selenium import webdriver
 4 import time
 5 import pymongo
 6 
 7 
 8 client=pymongo.MongoClient('localhost',27017)
 9 DB=client['闲鱼']
10 #url_list=DB['shop_list']
11 shop_info=DB['shop_info']
12 
13 url="https://s.2.taobao.com/list/list.htm?spm=2007.1000337.0.0.735ad9c1MhZfTa&st_trust=1&ist=0"
14 broswer=webdriver.PhantomJS()
15 broswer.get(url)
16 #time.sleep(2)
17 button=broswer.find_element_by_xpath('//*[@id="J_CategoryFilters"]/div/a')
18 button.click()
19 #time.sleep(1)
20 r=broswer.page_source
21 
22 
23 def labelparse(r):
24     html = etree.HTML(r)
25     label_title = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/text()')
26     label_urls = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/@href')
27     return label_urls
28 
29 
30 def shopparse(url,page):
31     fulurl='{0}{1}page={2}&ist=0'.format('https:',url[:-5],str(page))
32     headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
33     r=requests.get(fulurl,headers=headers)
34     #print(r.url)
35     html=etree.HTML(r.text)
36     shopname = html.xpath('//h4[@class="item-title"]/a/text()')
37     shopprice = html.xpath('//span[@class="price"]/em/text()')
38     shoplocation = html.xpath('//div[@class="seller-location"]/text()')
39     #shopmaster=html.xpath('//span[@class="ww-light ww-small"]')
40     for i in range(0,len(shopname)):
41         shop_info.insert_one({
42             'shopname':shopname[i],
43             'shopprice':shopprice[i],
44             'shoplocation':shoplocation[i]
45 
46         })
47 
48 
49 
50 
51 def main(page):
52     urls=labelparse(r)
53     for url in urls:
54         #time.sleep(2)
55         shopparse(url,page)
56 
57 
58 
59 if __name__=="__main__":
60     for page in range(0,100):
61         main(page)

 

posted @ 2017-12-15 17:51  不可叽叽歪歪  阅读(342)  评论(0编辑  收藏  举报