1 # -*- coding:utf-8-*-
 2 import xlwt
 3 import urllib2
 4 import requests
 5 from lxml import etree
 6 c=0
 7 workbook=xlwt.Workbook(encoding='utf-8')
 8 booksheet=workbook.add_sheet('Sheet1', cell_overwrite_ok=True)#建立表1
 9 usr_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
10 headers={'Usr_Agent':usr_agent}
11 HTML="http://list.pptv.com/?type=1&sort=1"
12 request=urllib2.Request(HTML,headers=headers)
13 html=requests.get(HTML)
14 seletor=etree.HTML(html.text)
15 content_field=seletor.xpath('//div[@class="sear-menu"]')
16 for each in content_field:
17     type=each.xpath('dl/dd/a/@title')[1:26]
18     href=each.xpath('dl/dd/a/@href')[1:26]
19     for j in zip(type,href):
20         kind=j[0]
21         MOVIE_HTML=j[1]
22         for p in range(1,30):
23             Movie_html="http://list.pptv.com/channel_list.html?page=%s"%p+MOVIE_HTML[-17:]
24             html2=requests.get(Movie_html)
25             seletor2=etree.HTML(html2.text)
26             name=seletor2.xpath('// p[@class="ui-txt"]/span/text()')
27             grade=seletor2.xpath('// p[@class="ui-txt"]/em/text()')
28             phtho_url=seletor2.xpath('//p[@class="ui-pic"]/img/@data-src2')
29             movie_html_urls=seletor2.xpath('//a[@class="ui-list-ct"]/@href')
30             for n in zip(name,phtho_url,grade,movie_html_urls):
31                 c+=1
32                 names=n[0]
33                 row0 = ['类型','图片网址','分数','网址','电影网址']
34                 for i in range(0,len(row0)):
35                        booksheet.write(0,i,row0[i]) #生成表头
36                 booksheet.write(c,0,kind)#插入每行数据
37                 booksheet.write(c,1,names)
38                 booksheet.write(c,2,n[1])
39                 booksheet.write(c,3,n[2])
40                 booksheet.write(c,4,n[3])
41                 workbook.save('movies.xls')

结果展示: