二.爬虫的开端:

爬虫的一些小的例子:(5)

利用python的原始的库写爬虫:

  1. urlopen()

    from urllib import request
    resp = request.urlopen('http://www.baidu.com')
    print(read())

2.urlretrieve()

   from urllib import  request
   request.urlretrieve('http://www.baidu.com','baidu.html')
  1. 查找刘德华

    from urllib import request
    from urllib import parse
    url = "https://www.baidu.com/s"
    params = {'wd':"刘德华"}
    qs = parse.urlencode(params)
    url = url + "?" + qs
    resp = request.urlretrieve(url)
    print(resp.read())
  2. parse_qs函数

    from urllib import parse
    params = {'name':'张三',"age":18,'greet':'hello world'}
    qs=parse.urlencode(params)
    print(qs)
    result=parse.parse_qs(qs)
    print(result)
  3. 例子

    # urlparse和urlsplit函数   urlsplit没有params属性
    from urllib import request,parse
    url = 'http://www.baidu.com/s;hello?username=zhiliao'
    result = parse.urlsplit(url)
    #result = parse.urlparse(url)
    print('scheme',result.scheme)s
    print('netloc',result.netloc)
    print('path',result.path)
    print('query',result.query)
    url = 'http://www.baidu.com/s?wd=python&username=abc#1'
    result = parse.urlparse(url)
    print(result)

    # request 拉钩网站测试 https://www.lagou.com/landing-page/pc/search.html?utm_source=m_cf_cpt_baidu_pcbt
    from urllib import request
    url='https://www.lagou.com/landing-page/pc/search.html?utm_source=m_cf_cpt_baidu_pcbt'
    # resp = request.urlopen(url)
    # print(resp)
    headers = {
       'Use-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0'
    }
    req =request.Request(url,headers=headers)
    resp = request.urlopen(req)
    print(resp.read())
posted on 2022-03-31 11:47  Steam残酷  阅读(35)  评论(0)    收藏  举报