def tiebaSpider():
    kw = input("请输入搜索的贴吧:")
    beginPage = int(input("开始页数:"))
    endPage = int(input("结束页数:"))
    url = "http://tieba.baidu.com/f?ie=utf-8&"
    key = urllib.parse.urlencode({"kw": kw})
    url = url + key
    spider(url, beginPage, endPage)
def spider(url, beginPage, endPage):
    for page in range(beginPage, endPage):
        pn = (page - 1) * 50
        filename = "第" + str(page) + "页.HTML"
        fullurl = url + "&pn=" + str(pn)
        print(fullurl)
        html = loadPage(fullurl, filename)
        html = str(html, 'utf-8')
        # python 3输出位串,而不是可读的字符串,需要对其进行转换
        # 使用str(string[, encoding])对数组进行转换
        print(repr(html))
        writeFile(html, filename)
def loadPage(fullurl, filename):
    print("loading")
    headers = {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}
    request = urllib.request.Request(fullurl, headers=headers)
    # request.add_header("Accept-Encoding", "gzip, deflate")
    response = urllib.request.urlopen(request)
    return response.read()
def writeFile(html, filename):
    print("printing")
    with open(filename, "w") as f:
        f.write(str(html))