import requests import re import csv def get_html(url): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'} try: resp = requests.get(url, headers=headers) resp.raise_for_status() resp.encoding = 'utf-8' return resp.text except: return None def main(): url = 'https://movie.douban.com/top250' html = get_html(url) pattern = re.compile('<li>.*?<span class="title">(?P<title>.*?)</span>.*?<br>(?P<year>.*?)' ' .*?<span>(?P<num>.*?)人评价</span>.*?</li>', re.S) ret = pattern.finditer(html) f = open('douban.csv', newline='', mode='w') # 添加 newline ,去除空白行 csv_writer = csv.writer(f) csv_writer.writerow(['名称', '年份', '评论人数']) for it in ret: # print(it.group('title')) # print(it.group('year')) # print(it.group('num')) dic = it.groupdict() dic['year'] = dic['year'].strip() dic['num'] = dic['num'].strip() csv_writer.writerow(dic.values()) f.close() print('done!') if __name__ == '__main__': main()
writerow()和writerows()区别
writerow()将一个列表全部写入csv的同一行
csv_list = [1,2,3,4] with open("/Users/apple/Downloads/test.csv","w",encoding="UTF-8",newline="") as csvfile: writer = csv.writer(csvfile) writer.writerow(csv_list)
writerows()将一个二维列表中的每一个列表写为一行
csv_list = [[1,11,111],[2,22,222],[3,33,333],[4,44,444]] with open("douban.csv","w",encoding="UTF-8",newline="") as csvfile: writer = csv.writer(csvfile) writer.writerows(csv_list)
浙公网安备 33010602011771号