python写csv文件,解决中文乱码问题

# pip install BeautifulSoup4

from bs4 import BeautifulSoup
import requests
import json, time, datetime
import csv, codecs

def getUrlText(url):    #根据url获取html资源,返回html文本
    while True:
        try:
            html = requests.get(url)
            html.encoding = 'utf-8'
            html = html.text
            break
        except requests.exceptions.ConnectionError:
            print('ConnectionError -- please wait 3 seconds')
            time.sleep(3)
        except requests.exceptions.ChunkedEncodingError:
            print('ChunkedEncodingError -- please wait 3 seconds')
            time.sleep(3)    
        except:
            print('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(3)
    return html

def test():
    url = 'http://www.sxkszx.cn/news/201989/n378377624.html'
    html = getUrlText(url)
    # print(html)
    
    soup = BeautifulSoup(html, features="lxml")
    table = soup.table
    f = codecs.open("1.csv", 'wb', "gbk")
    w = csv.writer(f)
    w.writerow(['院校代码','院校名称','科类','计划性质','最低分'])
    for idx, tr in enumerate(table.select('tr')):
        if idx != 0:
            tds = tr.select('td')
            if(tds[0].text.strip().isdigit()):
                w.writerow([tds[0].text.strip(),
                    tds[1].text.strip(),
                    tds[2].text.strip(),
                    tds[3].text.strip(),
                    tds[4].text.strip()])

    f.close()
if __name__ == "__main__":
    test()

 

posted @ 2020-03-19 14:25  liuyong0076  阅读(287)  评论(0)    收藏  举报