统计个人CSDN的博客文章数量

统计个人CSDN的博客文章数量

 

第一版

原始版本比较简单

只能统计第一页,而且没有进行排序

 

# coding:utf-8
import urllib2
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

def getPage(): #伪装成浏览器登陆,获取网页源代码
	url = 'http://blog.csdn.net/qiqiyingse?viewmode=contents'

	totalList=[]
	contentList=[]
	headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}  
	req = urllib2.Request(url=url,headers=headers)
	try:
		html = urllib2.urlopen(req).read()
	except urllib2.HTTPError,e:
		print e.code
		print e.reason
	fd=open('counter.txt','w')
	page = BeautifulSoup(html,'lxml')
	mytimes=page.find(id='blog_rank')
	i =1
	for aa in mytimes.find_all('li'):
		if i<3:
			print aa.text
			fd.write(aa.text)
			fd.write('\n')
			totalList.append(aa.text)
		i +=1


	items = page.find_all('div',class_ ='list_item list_view')
	print '总共有文章%d 篇' % len(items)
	for item in items:
		content=item.find('a')
		read_time=item.find('span',class_ ='link_view')
		comments_time=item.find('span',class_ ='link_comments')
		
		totalcontent=content.text.strip()+read_time.text.strip()+comments_time.text.strip()
		print totalcontent
		contentList.append(totalcontent)
		fd.write(totalcontent)
		fd.write('\n')

	fd.close()
	return totalList,contentList
urls=getPage()

 

第二版

 

再增加一个版本

这个版本,直接能按照访问次数进行排序

2017.4.11日重新更新代码,本次更新内容:

将统计的内容,重新在程序文件下再建立一个文件夹,同时将统计内容放入到以当前时间为名字的文本中

避免了每次统计直接覆盖了上一次统计的数据

第二版

 

# coding:utf-8
import urllib2,re,datetime,os
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

def getPage(): #伪装成浏览器登陆,获取网页源代码
	url = 'http://blog.csdn.net/qiqiyingse?viewmode=contents'
	baseurl='http://blog.csdn.net'
	totalList=[]
	contentList=[]
	sortlist=[]
	sortlist1=[]
	headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}  
	req = urllib2.Request(url=url,headers=headers)
	try:
		html = urllib2.urlopen(req).read()
	except urllib2.HTTPError,e:
		print e.code
		print e.reason
	path='count'
	if not os.path.exists(path):  
            os.makedirs(path) 
	fname=path+'/'+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')+'.txt'
	fd=open(fname,'w')
	page = BeautifulSoup(html,'lxml')
	mytimes=page.find(id='blog_rank')
	i =1
	for aa in mytimes.find_all('li'):
		if i<3:
			print aa.text
			fd.write(aa.text)
			fd.write('\n')
			totalList.append(aa.text)
		i +=1


	items = page.find_all('div',class_ ='list_item list_view')
	print '总共有文章%d 篇' % len(items)
	fd.write('总共有文章%d 篇' % len(items))
	fd.write('\n')
	for item in items:
		aa={}
		content=item.find('a')
		contemtUrl=baseurl+content.get('href')
		
		read_time=item.find('span',class_ ='link_view')
		tmp=str(read_time.text.strip())

		number = int(filter(str.isdigit, tmp))
		sortlist1.append(number)

		comments_time=item.find('span',class_ ='link_comments')
		aa['indexs']=number
		aa['content']=content.text.strip()
		aa['read_time']=tmp
		aa['comments_time']=comments_time.text.strip()
		aa['contemtUrl']=contemtUrl
		sortlist.append(aa)
	sortlist1.sort()
	print sortlist1
	
	for i in sortlist1:
		for a in sortlist:
			if int(i) == int(a['indexs']):
				totalcontent=a['content']+'\t'+a['read_time']+'\t'+a['comments_time']+'\t'+a['contemtUrl']
				print totalcontent
				fd.write(totalcontent)
				fd.write('\n')
				contentList.append(totalcontent)
	fd.close()
	return contentList
urls=getPage()

第三版

这一个版本比较有意思

 

#coding:utf-8  
import urllib2,re,time,random,os,datetime
from bs4 import BeautifulSoup
import webbrowser as web
import sys  
reload(sys)  
sys.setdefaultencoding('utf-8')  
  
def getPage(): #伪装成浏览器登陆,获取网页源代码  
    url = 'http://blog.csdn.net/qiqiyingse?viewmode=contents'  
    baseurl='http://blog.csdn.net' 
    contentList=[]  
    sortlist=[]
    sortlist1=[]
    urlList=[]
    headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}    
    req = urllib2.Request(url=url,headers=headers)  
    try:  
        html = urllib2.urlopen(req).read()  
    except urllib2.HTTPError,e:  
        print e.code  
        print e.reason  
    path=u'count'
    if not os.path.exists(path):  
            os.makedirs(path) 	
    fname=path+'/'+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')+'.txt'	
    print fname	
    fd=open(fname,'w')
    page = BeautifulSoup(html,'lxml')
    items = page.find_all('div',class_ ='list_item list_view')  
    print u'总共有文章%d 篇' % len(items)  
    fd.write('总共有文章%d 篇' % len(items))
    fd.write('\n')
    for item in items:  
        aa={}  
        content=item.find('a')
        
        contemtUrl=baseurl+content.get('href')
        #print contemtUrl
          
        read_time=item.find('span',class_ ='link_view')  
        readtime=str(read_time.text.strip())
        #print readtime
  
        readtimeNumber = int(filter(str.isdigit, readtime))
        #print readtimeNumber
        sortlist1.append(readtimeNumber)  
        #time.sleep(2)
        aa['indexs']=readtimeNumber  
        aa['content']=content.text.strip()  
        aa['read_time']=readtime   
        aa['contemtUrl']=contemtUrl  
        sortlist.append(aa)  
    sortlist1.sort()  
    print sortlist1  
      
    for i in sortlist1:  
        for a in sortlist:  
            if int(i) == int(a['indexs']):  
                totalcontent=a['content']+'\t'+a['read_time']+'\t'+a['contemtUrl']  
                print totalcontent  
                fd.write(totalcontent)  
                fd.write('\n')
                urlList.append(a['contemtUrl'])
                contentList.append(totalcontent)  
    fd.close()  
    return urlList  

urls=getPage()

count=random.randint(10,50)
print u'将要打开关闭浏览器次数为:',count
for i in range(5):
	print urls[i]

j=0
while j< count:
    if j == 15:
        j=0
    for i in range(5):
        web.open_new_tab(urls[i+38])
        time.sleep(1)
        web.open_new_tab(urls[random.randint(1,44)])
        time.sleep(1)
    web.open_new_tab('http://blog.csdn.net/qiqiyingse/article/details/51801918')
    time.sleep(3)
    os.system('taskkill /f /IM Chrome.exe')
    j = j+1

第四版

本次更新是博客文章大于50篇以后,可能需要2页显示,但是只能统计两页内容

因此重新更新

 

#coding:utf-8  
import urllib2,re,time,random,os,datetime
from bs4 import BeautifulSoup
import webbrowser as web
import sys  
reload(sys)  
sys.setdefaultencoding('utf-8')  
  
def getPage(): #伪装成浏览器登陆,获取网页源代码  
    url1 = 'http://blog.csdn.net/qiqiyingse/article/list/1?viewmode=contents'  
    url2 = 'http://blog.csdn.net/qiqiyingse/article/list/2?viewmode=contents'  
    baseurl='http://blog.csdn.net' 
    contentList=[]  
    sortlist=[]
    sortlist1=[]
    urlList=[]
    headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}    
    req1 = urllib2.Request(url=url1,headers=headers)  
    req2 = urllib2.Request(url=url2,headers=headers)  
    try:  
        html1 = urllib2.urlopen(req1).read()  
        html2 = urllib2.urlopen(req2).read()  
    except urllib2.HTTPError,e:  
        print e.code  
        print e.reason  
    path=u'count'
    if not os.path.exists(path):  
            os.makedirs(path) 	
    fname=path+'/'+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')+'.txt'	
    print fname	
    fd=open(fname,'w')
    page1 = BeautifulSoup(html1,'lxml')
    page2 = BeautifulSoup(html2,'lxml')
    items1 = page1.find_all('div',class_ ='list_item list_view')  
    items2 = page2.find_all('div',class_ ='list_item list_view')  
    cont_print= u'总共有文章%d 篇' % (len(items1)+len(items2))
    print cont_print
    fd.write(cont_print)  
    fd.write('\n')
    for item in items1:  
        aa={}  
        content=item.find('a')
        
        contemtUrl=baseurl+content.get('href')
        #print contemtUrl
          
        read_time=item.find('span',class_ ='link_view')  
        readtime=str(read_time.text.strip())
        #print readtime
  
        readtimeNumber = int(filter(str.isdigit, readtime))
        #print readtimeNumber
        sortlist1.append(readtimeNumber)  
        #time.sleep(2)
        aa['indexs']=readtimeNumber  
        aa['content']=content.text.strip()  
        aa['read_time']=readtime   
        aa['contemtUrl']=contemtUrl  
        sortlist.append(aa)
    for item in items2:  
        aa={}  
        content=item.find('a')
        
        contemtUrl=baseurl+content.get('href')
        #print contemtUrl
          
        read_time=item.find('span',class_ ='link_view')  
        readtime=str(read_time.text.strip())
        #print readtime
  
        readtimeNumber = int(filter(str.isdigit, readtime))
        #print readtimeNumber
        sortlist1.append(readtimeNumber)  
        #time.sleep(2)
        aa['indexs']=readtimeNumber  
        aa['content']=content.text.strip()  
        aa['read_time']=readtime   
        aa['contemtUrl']=contemtUrl  
        sortlist.append(aa)  		
    sortlist1.sort()  
    print sortlist1  
      
    for i in sortlist1:  
        for a in sortlist:  
            if int(i) == int(a['indexs']):  
                totalcontent=a['content']+'\t'+a['read_time']+'\t'+a['contemtUrl']  
                print totalcontent  
                fd.write(totalcontent)  
                fd.write('\n')
                urlList.append(a['contemtUrl'])
                contentList.append(totalcontent)  
    fd.close()  
    return urlList  

urls=getPage()

 

第五版

这次版本对整个函数进行了调整

1.让每一部分看起来更易读

2.可以统计个人名下所有的博客内容了,不管你有多少篇多少页博客,都能给统计到

3.更新了排序算法,这样就修复了之前的一个bug

 

代码如下:

 

#coding:utf-8
import urllib2,re,time,random,os,datetime
from bs4 import BeautifulSoup
import webbrowser as web
import sys
reload(sys)  
sys.setdefaultencoding('utf-8')

#自定义打印函数
def self_log(msg):
	print u'%s: %s' % (time.strftime('%Y-%m-%d %H:%M:%S'), msg)

#获取页面内容
def  get_html(url):
	headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
	req = urllib2.Request(url=url,headers=headers)
	try:
		html = urllib2.urlopen(req).read()
	except urllib2.HTTPError,e:
		print e.code
	return html
	
#得到博客页面总数
def get_last_page(html,fd):
	if not html:
		self_log(u'页面错误,停止运行') 
		return
	page = BeautifulSoup(html,'lxml')
	if page.find('div',class_ ='pagelist').find_all('a'):
		last_page=page.find('div',class_ ='pagelist').find_all('a')
		last_page= last_page[len(last_page)-1].get('href')[-1:]
		self_log('总共有%s 页博客' % last_page)
		fd.write('总共有%s 页博客\n' % last_page)

		return last_page
	else:
		return 1
		
#获取积分内容		
def get_rank(html,fd):
	if not html:
		self_log(u'页面错误,停止运行') 
		return
	page = BeautifulSoup(html,'lxml')
	rank_list=[]
	if page.find(id='blog_rank'):
		
		rank_content=page.find(id='blog_rank')
		i =1
		for rank in rank_content.find_all('li'):
			if i<3:
				self_log(rank.text)
				fd.write(rank.text)
				fd.write('\n')
				rank_list.append(rank.text)
			i +=1
	return rank_list
	
#获取页面列表
def get_items(url):
	content_html=get_html(url)
	page = BeautifulSoup(content_html,'lxml')
	items = page.find_all('div',class_ ='list_item list_view')
	return items

#根据每一个items list 提取需要的元素
def handle_items(items,content_list,read_num_for_sort):
	for item in items:
		temp={}#临时变量
		
		title=item.find('a')#标题
		content_url='http://blog.csdn.net'+title.get('href')#标题对应文章的地址
		read_times=item.find('span',class_ ='link_view').text.strip()#阅读次数
		comments_time=item.find('span',class_ ='link_comments')#评论次数
		
		read_number = int(filter(str.isdigit, str(read_times)))	#提取出来具体阅读次数的数字,为之后的排序做准备
		read_num_for_sort.append(read_number)

		#将数据打包
		temp['indexs']=read_number
		temp['title']=title.text.strip()
		temp['read_times']=read_times
		temp['comments_time']=comments_time.text.strip()
		temp['content_url']=content_url
		content_list.append(temp)

#创建文件夹
def mkdir_folder(path):
	if not os.path.exists(path):  
		os.makedirs(path) 

#程序运行主函数		
def run(url):
	read_num_for_sort=[]
	content_list=[]
	content_totle_list=[]
	
	#定义文件夹名字并创建文件夹
	dir_path='count'
	mkdir_folder(dir_path)
	
	#定义文件名字
	count_file_name=dir_path+'/'+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')+'.txt'
	fd=open(count_file_name,'w')
	
	#1.从主页进入获取页面总数
	main_html=get_html(url)
	last_page=get_last_page(main_html,fd)
	
	#2.获取积分内容
	rank_list=get_rank(main_html,fd)
	
	#3.组装url,分别加载每页的页面,同时在每一个页面提取我们需要的内容
	for i in range(1,int(last_page)+1):
		main_url=url.split('?')[0]+'/article/list/%d?viewmode=contents' % i
		self_log('即将获取第%d页的内容,地址是:%s' % (i,main_url))
				
		items=get_items(main_url)#获取每一页的页面内容,根据页面内容得到文章item list
		handle_items(items,content_list,read_num_for_sort)#处理item list
	
	#4.根据阅读次数 进行排序
	read_num_for_sort.sort()
	print read_num_for_sort
	'''
	这也是一种排序思想,其中有一些缺陷
	for i in read_num_for_sort:
		for a in content_list:
			if int(i) == int(a['indexs']):
				totalcontent=a['content']+'\t|'+a['read_time']+'\t|'+a['comments_time']+'\t|'+a['contemtUrl']
	'''
	self_log('总共有%d 篇文章' % len(content_list))#根据得到的数据,统计文章总数
	content_list = sorted(content_list,cmp=lambda x,y:cmp(x['indexs'],y['indexs']),reverse=0)#根据 indexs(阅读次数)这个索引值进行排序
	
	article_index = 1
	for a in content_list:
		#组装打印语句
		totalcontent= '第'+str(article_index)+'篇  |'+a['title']+'\t|'+a['read_times']+'\t|'+a['comments_time']+'\t|'+a['content_url']
		self_log(totalcontent)
		#将其存贮到本地
		fd.write(totalcontent)
		fd.write('\n')
		article_index +=1
		content_totle_list.append(totalcontent)
	fd.close()		

	return content_totle_list
	
if __name__ == '__main__': 
	print '''' 
            ***************************************** 
            **    Welcome to Spider of Count CSDN  ** 
            **      Created on 2017-04-12          ** 
            **      @author: Jimy_Fengqi           ** 
            *****************************************'''  
	url='http://blog.csdn.net/qiqiyingse?viewmode=contents'
	run(url)

第六版

此次更新,优化一点点内容

主要是可以将文章内容存贮到本地

同时,将统计信息重新放到一个excel 里面

代码如下:

 

#coding:utf-8
import urllib2,re,time,random,os,datetime
from bs4 import BeautifulSoup
from pyExcelerator import * #导入excel相关包 
import sys
reload(sys)  
sys.setdefaultencoding('utf-8')

#自定义打印函数
def self_log(msg):
	print u'%s: %s' % (time.strftime('%Y-%m-%d %H:%M:%S'), msg)

#获取页面内容
def  get_html(url):
	headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
	req = urllib2.Request(url=url,headers=headers)
	try:
		html = urllib2.urlopen(req).read()
	except urllib2.HTTPError,e:
		print e.code
	return html
	
#得到博客页面总数
def get_last_page(html,fd):
	if not html:
		self_log(u'页面错误,停止运行') 
		return
	page = BeautifulSoup(html,'lxml')
	if page.find('div',class_ ='pagelist').find_all('a'):
		last_page=page.find('div',class_ ='pagelist').find_all('a')
		last_page= last_page[len(last_page)-1].get('href')[-1:]
		self_log('总共有%s 页博客' % last_page)
		fd.write('总共有%s 页博客\n' % last_page)

		return last_page
	else:
		return 1
		
#获取积分内容		
def get_rank(html,fd):
	if not html:
		self_log(u'页面错误,停止运行') 
		return
	page = BeautifulSoup(html,'lxml')
	rank_list=[]
	if page.find(id='blog_rank'):
		
		rank_content=page.find(id='blog_rank')
		i =1
		for rank in rank_content.find_all('li'):
			if i<3:
				self_log(rank.text)
				fd.write(rank.text)
				fd.write('\n')
				rank_list.append(rank.text)
			i +=1
	return rank_list
	
#获取页面列表
def get_items(url):
	content_html=get_html(url)
	page = BeautifulSoup(content_html,'lxml')
	items = page.find_all('div',class_ ='list_item list_view')
	return items

#根据每一个items list 提取需要的元素
def handle_items(items,content_list,read_num_for_sort):
	for item in items:
		temp={}#临时变量
		
		title=item.find('a')#标题
		content_url='http://blog.csdn.net'+title.get('href')#标题对应文章的地址
		read_times=item.find('span',class_ ='link_view').text.strip()#阅读次数
		comments_time=item.find('span',class_ ='link_comments')#评论次数
		
		read_number = int(filter(str.isdigit, str(read_times)))	#提取出来具体阅读次数的数字,为之后的排序做准备
		read_num_for_sort.append(read_number)

		#将数据打包
		temp['indexs']=read_number
		temp['title']=title.text.strip()
		temp['read_times']=read_times
		temp['comments_time']=comments_time.text.strip()
		temp['content_url']=content_url
		content_list.append(temp)

#创建文件夹
def mkdir_folder(path):
	if not os.path.exists(path):  
		os.makedirs(path) 

#获取页面信息
def getContent(html):
	page = BeautifulSoup(html,'lxml')
	try:
		title=page.find('div',class_='article_title').find('a').text
		title=title.strip()
	except Exception,e:
		print e
	try:
		content=page.find('div',class_='article_content')
		dir_path='count'
		artitle_name_path=dir_path+'/'+title+'.txt'
		with open(artitle_name_path+'.txt','w') as f:
			f.write(content.text)
		self_log(u'存贮文章:%s 完毕' % title)
	except Exception,e:
		print e

#存贮每一篇文章到本地
def run_to_get_article(content_total_list):
	self_log('start save every article  ')
	for article_content in content_total_list:
		article_url=article_content.split('|')[4]
		self_log( '将要存贮的地址是: %s ...' % article_url)
		artitle_html=get_html(article_url)
		getContent(artitle_html)
	
#将内容存贮到excel中	
def run_to_save_info_in_excel(content_total_list):
	self_log('start save info into excel')
	excel_w=Workbook()
	excel_sheet_name=time.strftime('%Y-%m-%d_%H-%M-%S')
	excel_content_handler=excel_w.add_sheet(excel_sheet_name) 
	
	first_line=[u'编号',u'标题',u'阅读次数',u'评论次数',u'文章地址']
	cols=0
	for content in first_line:
		excel_content_handler.write(0,cols,content)
		cols +=1
		
	index=1
	for article_content in content_total_list:
		cols =0
		for a in article_content.split('|'):
			excel_content_handler.write(index,cols,a)
			cols +=1
		index +=1
	excel_w.save('count/'+'count'+time.strftime('%Y-%m-%d')+'.xls')

		
#程序运行主函数		
def run(url):
	read_num_for_sort=[]
	content_list=[]
	content_total_list=[]
	
	#定义文件夹名字并创建文件夹
	dir_path='count'
	mkdir_folder(dir_path)
	
	#定义文件名字
	count_file_name=dir_path+'/'+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')+'.txt'
	fd=open(count_file_name,'w')
	
	#1.从主页进入获取页面总数
	main_html=get_html(url)
	last_page=get_last_page(main_html,fd)
	
	#2.获取积分内容
	rank_list=get_rank(main_html,fd)
	
	#3.组装url,分别加载每页的页面,同时在每一个页面提取我们需要的内容
	for i in range(1,int(last_page)+1):
		main_url=url.split('?')[0]+'/article/list/%d?viewmode=contents' % i
		self_log('即将获取第%d页的内容,地址是:%s' % (i,main_url))
				
		items=get_items(main_url)#获取每一页的页面内容,根据页面内容得到文章item list
		handle_items(items,content_list,read_num_for_sort)#处理item list
	
	#4.根据阅读次数 进行排序
	read_num_for_sort.sort()
	print read_num_for_sort
	'''
	这也是一种排序思想,其中有一些缺陷
	for i in read_num_for_sort:
		for a in content_list:
			if int(i) == int(a['indexs']):
				totalcontent=a['content']+'\t|'+a['read_time']+'\t|'+a['comments_time']+'\t|'+a['contemtUrl']
	'''
	self_log('总共有%d 篇文章' % len(content_list))#根据得到的数据,统计文章总数
	#根据 indexs(阅读次数)这个索引值进行排序
	#非常好的一个根据列表中字典数据进行排序的方法
	content_list = sorted(content_list,cmp=lambda x,y:cmp(x['indexs'],y['indexs']),reverse=0)
	
	article_index = 1
	for a in content_list:
		#组装打印语句
		totalcontent= '第'+str(article_index)+'篇  |'+a['title']+'\t|'+a['read_times']+'\t|'+a['comments_time']+'\t|'+a['content_url']
		#self_log(totalcontent)
		#将其存贮到本地
		fd.write(totalcontent)
		fd.write('\n')
		article_index +=1
		content_total_list.append(totalcontent)
	fd.close()		

	return content_total_list
	
if __name__ == '__main__': 
	print '''' 
            ***************************************** 
            **    Welcome to Spider of Count CSDN  ** 
            **      Created on 2017-04-12          ** 
            **      @author: Jimy_Fengqi           ** 
            *****************************************''' 
	url=raw_input(u'请输入将要统计的个人csdn主页地址,类似如下:\n http://blog.csdn.net/qiqiyingse?viewmode=contents')
	if not url:
		url='http://blog.csdn.net/qiqiyingse?viewmode=contents'
	content_total_list=run(url)
	run_to_save_info_in_excel(content_total_list)
	run_to_get_article(content_total_list)

 

 

 

第七版

更新一下excel 的使用方法

 

#coding:utf-8
import urllib2,re,time,random,os,datetime
from bs4 import BeautifulSoup
from pyExcelerator import * #导入excel相关包 
import xlrd 
import xlwt
from xlutils.copy import copy
import sys
reload(sys)  
sys.setdefaultencoding('utf-8')


def create_excel(data):
	excle_file_name=str(time.strftime('%Y-%m-%d')+'.xls')#以当天日期创建excel表
	
	#判断一个文件是否存在	
	def file_is_exist(file_name):
		path = os.path.join(os.getcwd()+'/count/'+file_name)
		print 'current file [%s] path is [%s]' % (file_name,path)
		is_exists = os.path.exists(path)
		return is_exists

	#读取复制一份,并且增加一张新表	
	def read_and_copy_excle(excle_file_name):
		read_excel_flag=xlrd.open_workbook(excle_file_name,formatting_info=True)#保存原有格式
		count = len(read_excel_flag.sheets()) #sheet数量
		for r in read_excel_flag.sheets():
			print r.name #sheet名称
		worksheet_copy=copy(read_excel_flag)#复制一份excel
		write_excel(worksheet_copy,excle_file_name)#之后再次插入一份
		
	#写excel
	def write_excel(excel_flag,excle_file_name):
		sheet_name=str(time.strftime('%Y-%m-%d_%H-%M-%S'))
		sheet_flag = excel_flag.add_sheet(sheet_name,cell_overwrite_ok=True) #创建sheet
		first_line=[u'编号',u'标题',u'阅读次数',u'评论次数',u'文章地址']
		
		#生成第一行
		for i in range(0,len(first_line)):
			sheet_flag.write(0,i,first_line[i],set_style('Times New Roman',220,True,40))
			if i== 1:#设置行宽
				sheet_flag.col(i).width=256*150
			elif i == 4:
				sheet_flag.col(i).width=256*80
			else:
				sheet_flag.col(i).width=256*15
			
		row_index=1	
		for article_content in data:
			cols_index =0
			for data_detail in article_content.split('|'):
				sheet_flag.write(row_index,cols_index,data_detail,set_style('Arial',300,False,cols_index))
				#sheet_flag.col(cols_index).width=sheet_flag.col(cols_index+1).width
				cols_index +=1
			row_index +=1
		style = xlwt.easyxf('font:height 240, color-index red, bold on;align: wrap on, vert centre, horiz center');  
		sheet_flag.write(row_index+1,cols_index+1, 'hello world', style) 
		sheet_flag.write(row_index+2,cols_index+2,'start',set_style(u'宋体',300,False,20))
		excel_flag.save('count/'+excle_file_name) #保存文件
		
	#设置单元格格式
	def set_style(name,height,bold,color_index):
		style = xlwt.XFStyle() # 初始化样式
		
		font = xlwt.Font() # 为样式创建字体
		font.name = name # 字体名称
		font.bold = bold #字体加粗
		font.color_index = color_index #字体颜色, 但是貌似无效
		font.height = height
		
	 
		borders= xlwt.Borders()#字体边框
		borders.left= 6
		borders.right= 6
		borders.top= 6
		borders.bottom= 6
		
		style.font = font
		if bold:
			style.borders = borders
		return style
	
	#文件存在就复制一份,并在其表的后面插入一个,不存在就新创建一个
	if file_is_exist(excle_file_name):
		print 'file 【%s】 exist ' % excle_file_name
		read_and_copy_excle(excle_file_name)#复制一个excle并追加一个sheet页
	else:
		print 'file 【%s】is not  exist, will create it ' % excle_file_name
		excel_flag=xlwt.Workbook()#新建excel工作薄
		write_excel(excel_flag,excle_file_name)
		


#自定义打印函数
def self_log(msg):
	print u'%s: %s' % (time.strftime('%Y-%m-%d %H:%M:%S'), msg)

#获取页面内容
def  get_html(url):
	headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
	req = urllib2.Request(url=url,headers=headers)
	try:
		html = urllib2.urlopen(req).read()
	except urllib2.HTTPError,e:
		print e.code
	return html
	
#得到博客页面总数
def get_last_page(html,fd):
	if not html:
		self_log(u'页面错误,停止运行') 
		return
	page = BeautifulSoup(html,'lxml')
	if page.find('div',class_ ='pagelist').find_all('a'):
		last_page=page.find('div',class_ ='pagelist').find_all('a')
		last_page= last_page[len(last_page)-1].get('href')[-1:]
		self_log('总共有%s 页博客' % last_page)
		fd.write('总共有%s 页博客\n' % last_page)

		return last_page
	else:
		return 1
		
#获取积分内容		
def get_rank(html,fd):
	if not html:
		self_log(u'页面错误,停止运行') 
		return
	page = BeautifulSoup(html,'lxml')
	rank_list=[]
	if page.find(id='blog_rank'):
		
		rank_content=page.find(id='blog_rank')
		i =1
		for rank in rank_content.find_all('li'):
			if i<3:
				self_log(rank.text)
				fd.write(rank.text)
				fd.write('\n')
				rank_list.append(rank.text)
			i +=1
	return rank_list
	
#获取页面列表
def get_items(url):
	content_html=get_html(url)
	page = BeautifulSoup(content_html,'lxml')
	items = page.find_all('div',class_ ='list_item list_view')
	return items

#根据每一个items list 提取需要的元素
def handle_items(items,content_list,read_num_for_sort):
	for item in items:
		temp={}#临时变量
		
		title=item.find('a')#标题
		content_url='http://blog.csdn.net'+title.get('href')#标题对应文章的地址
		read_times=item.find('span',class_ ='link_view').text.strip()#阅读次数
		comments_time=item.find('span',class_ ='link_comments')#评论次数
		
		read_number = int(filter(str.isdigit, str(read_times)))	#提取出来具体阅读次数的数字,为之后的排序做准备
		read_num_for_sort.append(read_number)

		#将数据打包
		temp['indexs']=read_number
		temp['title']=title.text.strip()
		temp['read_times']=read_times
		temp['comments_time']=comments_time.text.strip()
		temp['content_url']=content_url
		content_list.append(temp)

#创建文件夹
def mkdir_folder(path):
	if not os.path.exists(path):  
		os.makedirs(path) 

#获取页面信息
def getContent(html):
	page = BeautifulSoup(html,'lxml')
	try:
		title=page.find('div',class_='article_title').find('a').text
		title=title.strip()
	except Exception,e:
		print e
	try:
		content=page.find('div',class_='article_content')
		dir_path='count'
		artitle_name_path=dir_path+'/'+title+'.txt'
		with open(artitle_name_path+'.txt','w') as f:
			f.write(content.text)
		self_log(u'存贮文章:%s 完毕' % title)
	except Exception,e:
		print e

#存贮每一篇文章到本地
def run_to_get_article(content_total_list):
	self_log('start save every article  ')
	for article_content in content_total_list:
		article_url=article_content.split('|')[4]
		self_log( '将要存贮的地址是: %s ...' % article_url)
		artitle_html=get_html(article_url)
		getContent(artitle_html)
	
#将内容存贮到excel中	
def run_to_save_info_in_excel(content_total_list):
	self_log('start save info into excel')
	excel_w=Workbook()
	excel_sheet_name=time.strftime('%Y-%m-%d_%H-%M-%S')
	excel_content_handler=excel_w.add_sheet(excel_sheet_name) 
	
	first_line=[u'编号',u'标题',u'阅读次数',u'评论次数',u'文章地址']
	cols=0
	for content in first_line:
		excel_content_handler.write(0,cols,content)
		cols +=1
		
	index=1
	for article_content in content_total_list:
		cols =0
		for a in article_content.split('|'):
			excel_content_handler.write(index,cols,a)
			cols +=1
		index +=1
	excel_w.save('count/'+'count_'+time.strftime('%Y-%m-%d_%H-%M')+'.xls')

		
#程序运行主函数		
def run(url):
	read_num_for_sort=[]
	content_list=[]
	content_total_list=[]
	
	#定义文件夹名字并创建文件夹
	dir_path='count'
	mkdir_folder(dir_path)
	
	#定义文件名字
	count_file_name=dir_path+'/'+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')+'.txt'
	fd=open(count_file_name,'w')
	
	#1.从主页进入获取页面总数
	main_html=get_html(url)
	last_page=get_last_page(main_html,fd)
	
	#2.获取积分内容
	rank_list=get_rank(main_html,fd)
	
	#3.组装url,分别加载每页的页面,同时在每一个页面提取我们需要的内容
	for i in range(1,int(last_page)+1):
		main_url=url.split('?')[0]+'/article/list/%d?viewmode=contents' % i
		self_log('即将获取第%d页的内容,地址是:%s' % (i,main_url))
				
		items=get_items(main_url)#获取每一页的页面内容,根据页面内容得到文章item list
		handle_items(items,content_list,read_num_for_sort)#处理item list
	
	#4.根据阅读次数 进行排序
	read_num_for_sort.sort()
	print read_num_for_sort
	'''
	这也是一种排序思想,其中有一些缺陷
	for i in read_num_for_sort:
		for a in content_list:
			if int(i) == int(a['indexs']):
				totalcontent=a['content']+'\t|'+a['read_time']+'\t|'+a['comments_time']+'\t|'+a['contemtUrl']
	'''
	self_log('总共有%d 篇文章' % len(content_list))#根据得到的数据,统计文章总数
	#根据 indexs(阅读次数)这个索引值进行排序
	#非常好的一个根据列表中字典数据进行排序的方法
	content_list = sorted(content_list,cmp=lambda x,y:cmp(x['indexs'],y['indexs']),reverse=0)
	
	article_index = 1
	for a in content_list:
		#组装打印语句
		totalcontent= '第'+str(article_index)+'篇|'+a['title']+'|'+a['read_times']+'|'+a['comments_time']+'|'+a['content_url']
		#self_log(totalcontent)
		print totalcontent
		#将其存贮到本地
		fd.write(totalcontent)
		fd.write('\n')
		article_index +=1
		content_total_list.append(totalcontent)
	fd.close()		

	return content_total_list
	
if __name__ == '__main__': 
	print '''' 
            ***************************************** 
            **    Welcome to Spider of Count CSDN  ** 
            **      Created on 2017-04-12          ** 
            **      @author: Jimy_Fengqi           ** 
            *****************************************''' 
	url=raw_input(u'请输入将要统计的个人csdn主页地址,类似如下:\n http://blog.csdn.net/qiqiyingse?viewmode=contents')
	if not url:
		url='http://blog.csdn.net/qiqiyingse?viewmode=contents'
	content_total_list=run(url)
	#run_to_save_info_in_excel(content_total_list)
	create_excel(content_total_list)
	#run_to_get_article(content_total_list)

第八版

这个版本是在python3上面跑的,同时更新了一些细节

将爬取数据模块,处理模块都分开了

# -*- coding: utf-8 -*-
# @Date    : 2018-08-20 13:48:04
# @Author  : Jimy_Fengqi (jmps515@163.com)
# @Link    : https://blog.csdn.net/qiqiyingse/
# @Version : v1.0

import sys
import os
import time 
import re
import json
from xlwt import Workbook

from pyquery import PyQuery as pq

from functools import cmp_to_key
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

#自定义log函数,主要是加上时间
def logger(msg):
		print ('%s: %s' % (time.strftime('%Y-%m-%d_%H-%M-%S'), msg))

class CSDNSpider():
	def __init__(self):
		#自己博客主页
		self.csdn_url = 'http://blog.csdn.net/qiqiyingse?viewmode=contents'
		self.page_base_url="http://blog.csdn.net/qiqiyingse/article/list/"
		self.contentList=[]
		self.contentLists=[]
								
		# 爬虫伪装头部设置
		self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'}
		
		# 设置操作超时时长
		self.timeout = 5
		
		# 爬虫模拟在一个request.session中完成
		self.mySession = requests.Session()

		self.phantomjs_path=r'C:\Users\Administrator\AppData\Local\Programs\Python\Python36\selenium\phantomjs-2.1.1-windows\bin\phantomjs.exe'
		self.chromedriver_path=	r'C:\Users\Administrator\AppData\Local\Programs\Python\Python36\selenium\chromedriver.exe'

		self.mylisttest=[{'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81980458', 'acticle_title': '转 python 获取操作系统信息或者用户名', 'read_num': 20, 'index': 1}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81776735', 'acticle_title': '原 NodeJS学习(2)构建第一个爬虫', 'read_num': 49, 'index': 2}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81668517', 'acticle_title': '转 Python Webdriver 重新使用已经打开的浏览器实例', 'read_num': 55, 'index': 3}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81281658', 'acticle_title': '原 python 小工具--像打字机一样输出内容', 'read_num': 55, 'index': 4}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81567080', 'acticle_title': '原 python实现屏幕录制', 'read_num': 64, 'index': 5}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81565263', 'acticle_title': '转 解决Python读取文件时出现编码异常', 'read_num': 67, 'index': 6}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/79442645', 'acticle_title': '转 安装scrapy报错 Python.h: 没有那个文件或目录', 'read_num': 97, 'index': 7}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/80269610', 'acticle_title': '原 Android 编译,烧机的一些方法', 'read_num': 108, 'index': 8}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/81668345', 'acticle_title': '原 python实现实时电脑监控', 'read_num': 113, 'index': 9}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/79473830', 'acticle_title': '转 GitHub上README.md教程', 'read_num': 121, 'index': 10}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/78141474', 'acticle_title': '转 设计模式(七)策略模式详解', 'read_num': 189, 'index': 11}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/78141487', 'acticle_title': '转 设计模式(八)适配器模式详解', 'read_num': 210, 'index': 12}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/79471056', 'acticle_title': '原 使用 Python时常用的安装包', 'read_num': 221, 'index': 13}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/77855473', 'acticle_title': '原 python实现的一种排序方法', 'read_num': 221, 'index': 14}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/79471634', 'acticle_title': '转 微信小程序集合', 'read_num': 249, 'index': 15}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71543110', 'acticle_title': '原 Mongodb学习(2)概念学习——ACID原则', 'read_num': 365, 'index': 16}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/74004388', 'acticle_title': '转 设计模式(五)抽象工厂模式详解', 'read_num': 367, 'index': 17}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72540866', 'acticle_title': '原 python学习——邮件发送程序', 'read_num': 370, 'index': 18}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/78210646', 'acticle_title': '原 python 实现文件查找功能', 'read_num': 400, 'index': 19}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71544282', 'acticle_title': '原 Mongodb学习(2)概念学习——基本内容', 'read_num': 411, 'index': 20}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72961382', 'acticle_title': '原 redis学习(1)python连接redis', 'read_num': 454, 'index': 21}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72453537', 'acticle_title': '原 Mongodb学习(5)pymongdb的使用', 'read_num': 471, 'index': 22}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71643828', 'acticle_title': '原 Python挑战游戏汇总', 'read_num': 485, 'index': 23}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/78132072', 'acticle_title': '转 用Python实现一个简单的文件传输协议', 'read_num': 486, 'index': 24}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71647261', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level 0', 'read_num': 490, 'index': 25}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/77747777', 'acticle_title': '转 python数据持久存储:pickle模块的基本使用', 'read_num': 507, 'index': 26}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/77835705', 'acticle_title': '原 Mongodb学习(10)一个小例子', 'read_num': 520, 'index': 27}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72566001', 'acticle_title': '原 Mongodb学习(6)pymongdb的数据库的拷贝', 'read_num': 542, 'index': 28}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72898831', 'acticle_title': '原 Node.js学习(1)牛刀小试', 'read_num': 568, 'index': 29}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/77745548', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level- 5', 'read_num': 572, 'index': 30}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72558839', 'acticle_title': '原 pythonUI学习实践(1)制作自己的闹钟', 'read_num': 575, 'index': 31}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71514942', 'acticle_title': '原 Mongodb学习(1)安装以及配置', 'read_num': 577, 'index': 32}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71757964', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level- 3', 'read_num': 598, 'index': 33}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/74004472', 'acticle_title': '转 设计模式(六)观察者模式详解(包含观察者模式JDK的漏洞以及事件驱动模型)', 'read_num': 609, 'index': 34}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71580496', 'acticle_title': '转 设计模式(四)工厂方法模式详解(另附简单工厂的死亡之路)', 'read_num': 614, 'index': 35}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71756381', 'acticle_title': '原 python练习题——string模块', 'read_num': 622, 'index': 36}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72953832', 'acticle_title': '原 Mongodb学习(9)集群搭建以及错误处理', 'read_num': 637, 'index': 37}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72633533', 'acticle_title': '原 Mongodb学习(7)pymongdb的使用——打印数据库名和table名', 'read_num': 734, 'index': 38}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71226818', 'acticle_title': '转 设计模式详解(总纲)', 'read_num': 777, 'index': 39}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71747671', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level- 2', 'read_num': 835, 'index': 40}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71547716', 'acticle_title': '原 Mongodb学习(3)基本操作——增删改查', 'read_num': 855, 'index': 41}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71678011', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level- 1', 'read_num': 859, 'index': 42}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/78225394', 'acticle_title': '原 Python 实现替换文件里面的内容', 'read_num': 898, 'index': 43}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/77749109', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level- 6', 'read_num': 926, 'index': 44}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71232015', 'acticle_title': '原 使用python一键登录博客', 'read_num': 1033, 'index': 45}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72553276', 'acticle_title': '转 Python图像处理库PIL的ImageFilter模块介绍', 'read_num': 1072, 'index': 46}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71216102', 'acticle_title': '原 python excel使用进阶篇', 'read_num': 1128, 'index': 47}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/79487514', 'acticle_title': '原 linux环境 安装chromedriver 和 phantomjs的方法', 'read_num': 1179, 'index': 48}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72285927', 'acticle_title': '原 Python挑战游戏( PythonChallenge)闯关之路Level- 4', 'read_num': 1251, 'index': 49}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71405436', 'acticle_title': '转 python 的日志logging模块学习', 'read_num': 1323, 'index': 50}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71218711', 'acticle_title': '原 在python上使用wordcloud制作自己的词云', 'read_num': 1515, 'index': 51}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71172347', 'acticle_title': '原 使用python装饰器计算函数运行时间', 'read_num': 1519, 'index': 52}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72553279', 'acticle_title': '原 python技巧——自己做验证码', 'read_num': 1525, 'index': 53}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71123322', 'acticle_title': '转 Python下调用Linux的Shell命令', 'read_num': 2118, 'index': 54}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/78501034', 'acticle_title': '原 python爬虫(19)爬取论坛网站——网络上常见的gif动态图', 'read_num': 2199, 'index': 55}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/72633711', 'acticle_title': '原 Mongodb学习(8)pymongdb的使用——数据去重', 'read_num': 2584, 'index': 56}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71640203', 'acticle_title': '原 python爬虫(10)身边的翻译专家——获取有道翻译结果', 'read_num': 2600, 'index': 57}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59483706', 'acticle_title': '原 08_python_练习题——乘法表', 'read_num': 2912, 'index': 58}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59488018', 'acticle_title': '原 python——利用python通过浏览器打开博客页面', 'read_num': 2987, 'index': 59}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71227451', 'acticle_title': '转 (一)单例模式详解', 'read_num': 2994, 'index': 60}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70048820', 'acticle_title': '转 Python中PyQuery库的使用总结', 'read_num': 3007, 'index': 61}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71122905', 'acticle_title': '原 python小工具——下载更新代码工具', 'read_num': 3035, 'index': 62}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59112217', 'acticle_title': '原 01_python_练习题_使用python直接打开网页', 'read_num': 3053, 'index': 63}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59483629', 'acticle_title': '原 07_python_练习题——数值排序', 'read_num': 3063, 'index': 64}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70143777', 'acticle_title': '原 17_python_练习题——打印指定目录下的文件和文件夹(相当于tree命令)', 'read_num': 3078, 'index': 65}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60132246', 'acticle_title': '转 [Python] xrange和range的使用区别', 'read_num': 3090, 'index': 66}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60144027', 'acticle_title': '原 13_python_练习题——文件重定向', 'read_num': 3098, 'index': 67}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/67641261', 'acticle_title': '原 12_python爬虫——下载个人CSDN博客内容', 'read_num': 3102, 'index': 68}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59112479', 'acticle_title': '原 02_python_练习题——图形界面', 'read_num': 3142, 'index': 69}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60582816', 'acticle_title': '原 python爬虫(5)黑板客第三关', 'read_num': 3168, 'index': 70}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59481693', 'acticle_title': '原 05_python_练习题——平方数', 'read_num': 3169, 'index': 71}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60129630', 'acticle_title': '原 12_python_练习题——统计输入字符里面有多少', 'read_num': 3209, 'index': 72}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/55260352', 'acticle_title': '原 Python的安装', 'read_num': 3213, 'index': 73}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/55517278', 'acticle_title': '转 python version 2.7 required,which was not found in the registry', 'read_num': 3274, 'index': 74}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62427733', 'acticle_title': '原 15_python_练习题——使用webdriver查询IP地址', 'read_num': 3290, 'index': 75}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60582751', 'acticle_title': '原 15_python_练习题——打印日历', 'read_num': 3329, 'index': 76}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/61197123', 'acticle_title': '原 统计个人CSDN的博客文章数量', 'read_num': 3340, 'index': 77}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60572338', 'acticle_title': '原 python爬虫(4)四种方法通过黑板客第二关', 'read_num': 3350, 'index': 78}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70208940', 'acticle_title': '原 19_python_练习题——CSV文件读写练习', 'read_num': 3375, 'index': 79}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70172218', 'acticle_title': '原 18_python_练习题——写入文件到word文档中', 'read_num': 3378, 'index': 80}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/64522690', 'acticle_title': '原 python爬虫(7)爬取糗事百科段子(UI版)', 'read_num': 3378, 'index': 81}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70125444', 'acticle_title': '转 linux后台运行和关闭、查看后台任务', 'read_num': 3406, 'index': 82}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70145804', 'acticle_title': '转 SSH 的详细使用方法', 'read_num': 3434, 'index': 83}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70855457', 'acticle_title': '转 python的一个好玩模块wordcloud', 'read_num': 3438, 'index': 84}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70843626', 'acticle_title': '转 shell脚本:Syntax error: Bad for loop variable错误解决方法', 'read_num': 3439, 'index': 85}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/55259304', 'acticle_title': '原 py2exe的使用', 'read_num': 3487, 'index': 86}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70161637', 'acticle_title': '转 卸载win10 自带应用', 'read_num': 3514, 'index': 87}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/68926007', 'acticle_title': '原 python——一个投票器', 'read_num': 3514, 'index': 88}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59113090', 'acticle_title': '原 04_python_练习题——企业利润', 'read_num': 3533, 'index': 89}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70138912', 'acticle_title': '转 Python爬虫防封杀方法集合', 'read_num': 3639, 'index': 90}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60144578', 'acticle_title': '原 python爬虫(3)五种方法通过黑板客第一关', 'read_num': 3826, 'index': 91}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71131120', 'acticle_title': '原 将python代码和注释分离', 'read_num': 3998, 'index': 92}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71308389', 'acticle_title': '转 (二)代理模式详解(包含原理详解)', 'read_num': 4186, 'index': 93}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71344110', 'acticle_title': '转 (三)简单工厂模式详解', 'read_num': 4198, 'index': 94}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62427155', 'acticle_title': '原 python爬虫(14)获取淘宝MM个人信息及照片(上)', 'read_num': 4217, 'index': 95}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62896264', 'acticle_title': '原 16_python_练习题——使用webdriver获取当前页面截屏以及滑动页面', 'read_num': 4311, 'index': 96}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/69944173', 'acticle_title': '原 将自己的python程序打包成exe', 'read_num': 4478, 'index': 97}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71616418', 'acticle_title': '原 Mongodb学习(4)通过配置文件启动mongod', 'read_num': 4503, 'index': 98}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/46800751', 'acticle_title': '原 几行代码解决大端小端的问题', 'read_num': 4725, 'index': 99}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60583419', 'acticle_title': '原 14_python_练习题——excel操作', 'read_num': 4890, 'index': 100}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70046543', 'acticle_title': '原 quote函数什么意思,怎么用', 'read_num': 4936, 'index': 101}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71123348', 'acticle_title': '转 黄聪:Python 字符串操作(string替换、删除、截取、复制、连接、比较、查找、包含、大小写转换、分割等 )', 'read_num': 4957, 'index': 102}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62236845', 'acticle_title': '原 python爬虫(12)获取七天内的天气', 'read_num': 5102, 'index': 103}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59526995', 'acticle_title': '原 11_python_练习题——日期格式显示', 'read_num': 5301, 'index': 104}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70855068', 'acticle_title': '转 抓取网易云音乐歌曲 热门评论生成词云(转)', 'read_num': 5312, 'index': 105}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71325615', 'acticle_title': '原 python_随机调用一个浏览器打开网页', 'read_num': 5543, 'index': 106}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70049591', 'acticle_title': '原 json.dumps和 json.loads 区别,如此简单', 'read_num': 5649, 'index': 107}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59502402', 'acticle_title': '原 10_python_练习题——兔子问题与斐波那契數列', 'read_num': 5831, 'index': 108}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59483966', 'acticle_title': '原 09_python_练习题——暂停一秒', 'read_num': 5879, 'index': 109}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59483536', 'acticle_title': '原 06_python_练习题——查找一年之中第几天', 'read_num': 5930, 'index': 110}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/59112715', 'acticle_title': '原 03_python_练习题——排列组合', 'read_num': 5949, 'index': 111}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62231679', 'acticle_title': '原 python爬虫(8)爬取tuchong网站美图', 'read_num': 6060, 'index': 112}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/68944885', 'acticle_title': '原 python爬虫——爬取链家房价信息(未完待续)', 'read_num': 6185, 'index': 113}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70209450', 'acticle_title': '原 python使用代理访问网站', 'read_num': 6193, 'index': 114}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71168993', 'acticle_title': '转 webdriver+selenium面试总结', 'read_num': 6374, 'index': 115}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/68061256', 'acticle_title': '原 02_python安装错误——2502、2503错误', 'read_num': 6483, 'index': 116}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71123066', 'acticle_title': '原 python——接收处理外带的参数', 'read_num': 7459, 'index': 117}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/68496603', 'acticle_title': '转 Python面试必须要看的15个问题', 'read_num': 7477, 'index': 118}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70175619', 'acticle_title': '原 python爬虫(13)爬取百度贴吧帖子', 'read_num': 7619, 'index': 119}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70766993', 'acticle_title': '原 python_获取当前代码行号_获取当前运行的类名和函数名的方法', 'read_num': 7645, 'index': 120}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71173514', 'acticle_title': '原 python爬虫(11)身边的搜索专家——获取百度搜索结果', 'read_num': 7770, 'index': 121}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70674353', 'acticle_title': '原 python_制作自己的函数库', 'read_num': 7908, 'index': 122}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62985485', 'acticle_title': '原 python爬虫(14)获取淘宝MM个人信息及照片(下)(windows版本)', 'read_num': 7990, 'index': 123}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71326756', 'acticle_title': '原 使用notepad++开发python的配置——代码缩进、自动补齐、运行', 'read_num': 8389, 'index': 124}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60583129', 'acticle_title': '原 python爬虫(6)爬取糗事百科', 'read_num': 8788, 'index': 125}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71081165', 'acticle_title': '转 python安装scipy 遇到的问题', 'read_num': 9595, 'index': 126}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70766654', 'acticle_title': '转 python_python中try except处理程序异常的三种常用方法', 'read_num': 9897, 'index': 127}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70843655', 'acticle_title': '转 shell for循环1到100', 'read_num': 10421, 'index': 128}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/60146843', 'acticle_title': "原 python编译错误(1)字符编码问题UnicodeDecodeError: 'ascii' codec", 'read_num': 10838, 'index': 129}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62894826', 'acticle_title': '原 python爬虫(14)获取淘宝MM个人信息及照片(中)', 'read_num': 11136, 'index': 130}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/62418857', 'acticle_title': '原 python爬虫(9)获取动态搞笑图片', 'read_num': 11543, 'index': 131}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/51801918', 'acticle_title': '原 python爬虫(2)爬取游民星空网的图片', 'read_num': 13661, 'index': 132}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71082263', 'acticle_title': '原 python爬虫(20)使用真实浏览器打开网页的两种方法', 'read_num': 16160, 'index': 133}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/51879501', 'acticle_title': '原 python爬虫(1)下载任意网页图片', 'read_num': 16323, 'index': 134}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/51798833', 'acticle_title': '原 python爬虫(15)爬取百度百科字条_精品', 'read_num': 17306, 'index': 135}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/65631698', 'acticle_title': '原 python爬虫(16)使用scrapy框架爬取顶点小说网', 'read_num': 17652, 'index': 136}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/46800537', 'acticle_title': '原 C语言常见面试题(经典中的经典)', 'read_num': 19962, 'index': 137}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/71126591', 'acticle_title': '原 python爬虫(18)爬取微信公众号内容——绘制词云', 'read_num': 20565, 'index': 138}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70160059', 'acticle_title': '原 python爬虫(17)爬出新高度_抓取微信公众号文章(selenium+phantomjs)(下)(windows版本)', 'read_num': 22474, 'index': 139}, {'new_acrticle_url': 'https://blog.csdn.net/qiqiyingse/article/details/70050113', 'acticle_title': '原 python爬虫(17)爬出新高度_抓取微信公众号文章(selenium+phantomjs)(上)', 'read_num': 32994, 'index': 140}]

	def get_html_by_request(self,url):
		logger(u'开始使用 request 获取的网页为:%s' %  url)
		try:
			html = self.mySession.get(url, headers=self.headers, timeout=self.timeout)
			return html.content
		except Exception as e:
			logger(e)	

	def get_selenium_js_html(self, url):
		driver = webdriver.PhantomJS(executable_path=self.phantomjs_path)
		'''
		chrome_options = Options()
		#chrome_options.add_argument('--headless')
		chrome_options.add_argument('--user-agent=iphone')
		chrome_options.add_argument('--disable-gpu')
		driver = webdriver.Chrome(executable_path=self.chromedriver_path,chrome_options=chrome_options)
		'''
		logger(u'开始使用 phantomjs 加载网页:%s' %  url)
		try:
			driver.get(url) 
			time.sleep(2) 
			# 执行js得到整个页面内容
			html = driver.execute_script("return document.documentElement.outerHTML")
			
			driver.close()
			return html
		except Exception as e:
			logger(e)
	
	def parse_html_by_pyquery(self, html):
		if not html:
			logger("获得的网页有问题, 请检查。。。")
		else:
			logger('转换网页成功')
			return pq(html)

	def get_pageNumber(self,doc):
		#print (doc)
		#numcontent=doc('div[class=ui-paging-container]')
		numcontent=doc('li[class=ui-pager]')
		page_number=numcontent[len(numcontent)-1].text.strip()
		page_number =int(int(page_number) * 2.5)+1
		logger('current blogger has page num is  %s' % page_number)
		return page_number

	def get_artilcle_list(self,doc):
		logger('start to get article list...')
		acrticle_list=doc('div[class="article-item-box csdn-tracking-statistics"]')
		logger(len(acrticle_list))
		for acrticle in acrticle_list.items():
			tmp={}
			tmp['acticle_title']=acrticle('h4')('a').text().strip()

			read_num_content=acrticle('span[class=read-num]')[0].text
			tmp['read_num']=int(re.sub("\D","",read_num_content))
			#logger("read_num is %d" % tmp['read_num'])
			
			#acrticle_url=acrticle('p[class=content]')('a').attr('href')
			tmp['new_acrticle_url']=acrticle('h4')('a').attr('href')
			logger(tmp['new_acrticle_url'])
			
			self.contentList.append(tmp)
			
	#自定义排序功能,按照关键字对列表进行排序,然后将索引添加到字典中
	def mysort(self,listname,keywords,reverse=1):
		'''
		Parater:
			listname:需要处理的list
			keywords:按照keywords进行排序
			reverse:正序还是逆序,1为正序,0为逆序
		'''
		from functools import cmp_to_key
		if reverse:
			newlist=sorted(listname,key=cmp_to_key(lambda x,y:x[keywords]-y[keywords]) )
		else:
			newlist=sorted(listname,key=cmp_to_key(lambda x,y:y[keywords]-x[keywords]))
		for item in newlist:
			item['index'] = newlist.index(item)+1
		return newlist

	def run(self):
		#第一步,获取页码数目
		main_html=self.get_selenium_js_html(self.csdn_url)
		main_doc=self.parse_html_by_pyquery(main_html)
		page_number=self.get_pageNumber(main_doc)

		#第二步,查找文章list
		for i in range(1,int(page_number)):
			new_url=self.page_base_url+str(i)
			acrticle_html=self.get_html_by_request(new_url)
			acrticle_doc=self.parse_html_by_pyquery(acrticle_html)
			self.get_artilcle_list(acrticle_doc)

		logger(len(self.contentList))
		self.contentLists = self.mysort(self.contentList,"read_num",1 )
		logger(self.contentLists)
		return self.contentLists

	def testA(self):		
		logger(len(self.mylisttest))
		contentList = self.mysort(self.mylisttest,"read_num",1)
		aa=[]
		for i in contentList:
			logger("index is  %d ...read num is: %d .... url is %s" % (i['index'],i['read_num'],i['new_acrticle_url']))
			tmp="index is  %d ...read num is: %d .... url is %s" % (i['index'],i['read_num'],i['new_acrticle_url'])
			aa.append(tmp)
		return aa

	def testForSortFunc(self):
		Tilist = self.mysort(self.mylisttest,'read_num',1)
		for i in Tilist:
			print (i['read_num'])

		Tilist = self.mysort(self.mylisttest,'read_num',0)
		for i in Tilist:
			print (i['read_num'])


class HandleMyData():
	def __init__(self,data):
		self.myfoldername='qiqiyingse'
		self.needhandledata=data

	def dealData(self):
		self.create_dir(self.myfoldername)
		filename=self.myfoldername+'/'+self.myfoldername+'.txt'
		self.save_content_to_file(filename,self.needhandledata)
		logger("data write in [text]  finished.....")
		self.run_to_save_info_in_excel(self.needhandledata)
		logger("data write in [excel]  finished.....")
		
	def test(self):
		logger('just a test')
		logger('not just a test ')

	#将内容存贮到excel中
	def run_to_save_info_in_excel(self,data):
		logger('start save info into excel')
		excel_w=Workbook()
		excel_sheet_name=time.strftime('%Y-%m-%d_%H-%M-%S')
		excel_table_name=time.strftime('%Y-%m-%d_%H-%M')
		excel_content_handler=excel_w.add_sheet(excel_sheet_name) 
		
		first_line=[u'标题',u'阅读次数',u'文章地址',u'排名']
		cols=0
		for content in first_line:
			excel_content_handler.write(0,cols,content)
			cols +=1
		
		index=1
		for data_dict in data:
			cols=0
			for data_details in data_dict:
				#logger((data_details,data_dict[data_details],"内容写入excel"))
				excel_content_handler.write(index,cols,data_dict[data_details])
				cols +=1
			index +=1
		excel_w.save(self.myfoldername+'/'+self.myfoldername+excel_table_name+'.xls')

		
	#存储文章到本地	
	def save_content_to_file(self,title,content):
		logger('start to write info in [text]...')
		with open(title, 'w',encoding='utf-8') as f:
			for info_dict in content:
				f.write(str(info_dict)+'\n')
				for info in info_dict:
					#logger((info,info_dict[info]))
					#f.write(str(info)+str(info_dict[info])+'\n')
					f.write(str(info_dict[info])+'\t\t\t\t')
				f.write('\n')
	def create_dir(self,dirname):
		if not os.path.exists(dirname):
			os.makedirs(dirname)
			logger(" %s  not exists, create it" % dirname)
		else:
			logger("dirname already exist, didn't need to creat")


class RunUrlInBrowser():
	def __init__(self,data):
		self.listData=data
		
	def getUrlFromData(self):
		'''
		#用这种方式提取列表比较常规,但是代码也多
		urllist=[]
		for url in self.listData:
			urllist.append(url['new_acrticle_url'])
		return urllist	
		'''	
		return  [data_info['new_acrticle_url']  for data_info in self.listData]
	
	def LoadUrlAccordingtoLocalBrowser(self):
		import webbrowser as web
		urllist=self.getUrlFromData()
		t=5000
		while t:
			for i in range(1,9):
				logger(urllist[i])
				web.open_new_tab(urllist[i])
				time.sleep(5)
			os.system('taskkill /f /IM firefox.exe')
			t=t-1
		
def welcom():
	print(''.center(50,'*'))
	print('Welcome to Spider of CSDN'.center(50,'*'))
	print('Created on 2018-08-20'.center(50,'*'))
	print('@author: Jimy _Fengqi'.center(50,'*'))
	print(''.center(50,'*'))
		
if __name__ == '__main__':
	print ('''
			************************************************** 
			**        Welcome to Spider of CSDN             ** 
			**         Created on 2018-08-20                ** 
			**         @author: Jimy _Fengqi                ** 
			**************************************************
	''')
	welcom()
	
	mycsdn=CSDNSpider()
	data = mycsdn.run()
	myHandler=HandleMyData(data)
	myHandler.dealData()
	myBrowser=RunUrlInBrowser(data)
	myBrowser.LoadUrlAccordingtoLocalBrowser()
	
	
	
	#data=mycsdn.mylisttest
	#myHandler=HandleMyData(data)
	#myHandler.test()                    #引用类中的方法的时候,这里类本身不能有括号 ,就是不能写成myHandler().test() 
	#myHandler.dealData()

	#mycsdn.run()
	#mycsdn.testfunc()

 

 

 

 

posted @ 2017-04-01 12:46  枫奇丶宛南  阅读(65)  评论(0)    收藏  举报