#coding=utf-8
import requests
from pymongo import MongoClient
from lxml import etree
import datetime
client = MongoClient("localhost", 27017)
db = client["wanfang"]
collection=db["journal_name"]
collection1=db["journal_foreign_2014"]
db.authenticate("","")
cursor = collection.find()[1]
for i in range(2645):
name = cursor['name_list'][i]
num = int(cursor['number_list'][i][1:-1])
mo = num%50
count = 0
if mo!=0:
count = num/50 + 1
else:
count = num/50
for i in range(count):
url = "http://new.wanfangdata.com.cn/search/searchList.do?searchType=perio&pageSize=50&page="+str(i+1)+u"&searchWord= 摘要:is 起始年:2014 结束年:2014 刊名:" + name + "&order=correlation&showType=detail&isCheck=check&isHit=&isHitUnit=&firstAuthor=false&rangeParame=all"
result = requests.post(url)
html = result.text
tree = etree.HTML(html)
table = tree.xpath("//div[@class='title']/strong/following-sibling::*[1]/@href")
for j in table:
bson = {}
url1 = "http://new.wanfangdata.com.cn" + j
result1 = requests.post(url)
html1 = result1.text
time = datetime.datetime.now()
bson['date'] = time
bson['url'] = url1
bson['html'] = html1
bson['year'] = "2014"
collection1.insert(bson)