Python爬虫小试牛刀

学了几日Python爬虫,做了一个无聊的爬虫。。。

# -*- coding: utf-8 -*-
import urllib.request
import re

page = urllib.request.urlopen("https://movie.douban.com/")
cnt = page.read().decode("utf-8")

#首次过滤
name = re.findall(r'href="https://movie\.douban\.com/subject/\d+?/\?from=showing" class="">.+?</a>',cnt)
rate = re.findall(r'<span class="subject-rate">\d\.\d</span>|<span class="text-tip">暂无评分</span>|<span class="rating-type-score">\d\.\d</span>',cnt)

#进一步匹配和过滤
name2=[]
for na in name:
    nametmp = re.search(r'class="">.+?</a>', na).group(0)
    nametmp = nametmp[9:]
    nametmp = nametmp[:-4]
    name2.append(nametmp)
rate2=[]
for ra in rate:
    ratmp = re.search(r">.+?<", ra).group(0)
    ratmp = ratmp[1:]
    ratmp = ratmp[:-1]
    rate2.append(ratmp)

print(len(name2), len(rate2))
ziped = zip(name2, rate2)
with open("douban.html",'+w', encoding="utf-8") as f:
    f.write("""
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<style>
h3{
color:#71c084
}
</style>
</head>
<body>
<h3>豆瓣网热门集锦-Powered by python</h3>
<ul>
""")
    for na,ra in ziped:
        f.write("<li>"+na+"\t"+ra+"</li>\n")
    f.write("""
</ul>
</body>
<html>
""")
    f.close()

效果是这样滴:

 

posted @ 2018-04-01 22:20  木子阁君  阅读(160)  评论(0编辑  收藏  举报