import requests
import re
import csv

def get_html(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 '
            '(KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'}
    try:
        resp = requests.get(url, headers=headers)
        resp.raise_for_status()
        resp.encoding = 'utf-8'
        return resp.text
    except:
        return None

def main():
    url = 'https://movie.douban.com/top250'
    html = get_html(url)
    pattern = re.compile('<li>.*?<span class="title">(?P<title>.*?)</span>.*?<br>(?P<year>.*?)'
                        '&nbsp.*?<span>(?P<num>.*?)人评价</span>.*?</li>', re.S)
    ret = pattern.finditer(html)
    f = open('douban.csv', newline='', mode='w') # 添加 newline ,去除空白行
    csv_writer = csv.writer(f)
    csv_writer.writerow(['名称', '年份', '评论人数'])
    for it in ret:
        # print(it.group('title'))
        # print(it.group('year'))
        # print(it.group('num'))
        dic = it.groupdict()
        dic['year'] = dic['year'].strip()
        dic['num'] = dic['num'].strip()
        csv_writer.writerow(dic.values())
    f.close()
    print('done!')

if __name__ == '__main__':
    main()

 

writerow()和writerows()区别

writerow()将一个列表全部写入csv的同一行

csv_list = [1,2,3,4]
with open("/Users/apple/Downloads/test.csv","w",encoding="UTF-8",newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(csv_list)

 

writerows()将一个二维列表中的每一个列表写为一行

csv_list = [[1,11,111],[2,22,222],[3,33,333],[4,44,444]]
with open("douban.csv","w",encoding="UTF-8",newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(csv_list)

 

posted on 2021-11-16 22:59  math98  阅读(206)  评论(0)    收藏  举报