Python Learning Day3

爬虫练习

基于urllib实现

import urllib.request
import re

url="https://www.zhihu.com/question/21100397" # 我们要爬取图片的地址

page = urllib.request.urlopen(url) # 第一行 打开网址
html = page.read().decode("utf-8") # 第二行 获取html源码

imglist = re.findall('img src="(http.*?)"',html) # 第三行 在html中匹配出符合条件的字符串

x=0
for imgurl in imglist: # 遍历图片地址列表
    urllib.request.urlretrieve(imgurl,'pic%s.jpg' %x) # 第四行 获取图片并保存
    x=x+1

定义函数的三种方式

# 无参函数
# 不需要接收外部传入的参数
def foo():
     print('from foo..')
foo()
#

# 有参函数
# 需要接收外部传入的参数
def login(user, pwd):
     print(user, pwd)

# 传参多一或少一不可
#login('tank', '123')
# login('tank', '123', 111)  # 多，报错
# login('tank')  # 少，报错

# # x = 10
# # y = 20
# # if x > y:
# #     print(x)
# # else:
# #     print(y)
# 比较两数大小
def max2(x, y):
    if x > y:
        print(x)
    else:
        print(y)

max2(10, 30)

# 空函数
# 遇到一些比较难实现的功能，会导致暂时无法继续编写代码。
# 所以一般在生产开发中，都会将所有功能实现定义成空函数。
def func():
    pass  # pass代表什么都不做

函数的返回值
在调用函数时，需要接收函数体内部产生的结果，则return返回值。

def max2(x, y):

if x > y:

return x

else:

return y

res = max2(10, 5)

print(res)

函数对象
指的是函数名指向的内存地址。

 def func():
     pass
 # print(func)  # <function func at 0x101dd2e18>
 #
 # func()
 def func2():
     pass
 # 把函数对象，传入字典中
 dict1 = {
     '1': func,
     '2': func2
 }
 choice = input('请输入功能编号:').strip()
 # if choice == '1':
 #     func()
 # elif choice == '2':
 #     func2()
 #
 # 若用户选择函数对象对应的key值，则调用该函数
 if choice in dict1:
     dict1[choice]()  # dict1['1']

函数嵌套:
　　嵌套定义:
　　　　在函数内，定义函数。

嵌套调用:

def func1():
     print('func1...')
     def func2():
         print('func2...')
         def func3():
             print('func3...')
             # ....
         return func3
     return func2
 # 通过函数内部的函数值，调用函数
 func2 = func1()
 func3 = func2()
 func3()
 # 函数嵌套调用
 def func1():
     print('func1...')
     def func2():
         print('func2...')
         def func3():
             print('func3...')
             # ....
         func3()
     func2()
 func1()

名称空间
python解释器自带的: 内置名称空间
自定义的py文件内，顶着最左边定义的：全局名称空间
函数内部定义的: 局部名称空间

name = 'tank'

def func1():
    # name = 'abc'
    print()

    def func2():

        print('func2...')

# print(name, '全局打印')

func1()

引用自己编辑的包以及文件

import B

# from
# 导入B模块中的a文件
# 会自动执行a文件中的代码
from B import a

# __name__: B.a
# a

常用模块（内置模块）

time 时间模块

import time  # 导入time模块
# 获取时间戳
print(time.time())
# 等待2秒
time.sleep(2)
print(time.time())

os 模块

# 与操作系统中的文件进行交互
# 判断tank.txt文件是否存在
print(os.path.exists('test.txt'))  # True
print(os.path.exists('test1.txt'))  # False
print(os.path.exists(r'C::\Users\liubin\Desktop\test.txt'))  # True
# 获取当前文件的根目录
print(os.path.dirname(__file__))  # D:/python_files/day03

sys模块

import sys
# 获取python在环境变量中的文件路径
print(sys.path)
# 把项目的根目录添加到环境变量中
sys.path.append(os.path.dirname(__file__))
print(sys.path)

json模块

import json
# user_info = {
#     'name': 'tank',
#     'pwd': '123'
# }

# dumps: 序列化
# 1、把字典转行成json数据
# 2、再把json数据转换成字符串
res = json.dumps(user_info)
print(res)
print(type(res))
with open('user.json', 'wt', encoding='utf-8') as f:
     f.write(res)

# loads: 反序列化
# json.loads()
# 1、把json文件的数据读到内存中
with open('user.json', 'r', encoding='utf-8') as f:
#     # 读取得到的是字符串
     res = f.read()
#     # print(type(res))
#     # loads把json格式的字符串转换成dict类型
     user_dict = json.loads(res)
     print(user_dict)  # {'name': 'tank', 'pwd': '123'}
     print(type(user_dict))  # <class 'dict'>


# dump
user_info = {
    'name': 'tank',
     'pwd': '123'
 }
 with open('user_info.json', 'w', encoding='utf-8') as f:
     # str1 = json.dumps(user_info)
     # f.write(str1)
     # dump： 自动触发f.write方法
     json.dump(user_info, f)


# load
with open('user_info.json', 'r', encoding='utf-8') as f:
    # res = f.read()
    # user_dict = json.loads(res)
    # print(user_dict)

    # load：自动触发f.read()
    user_dict = json.load(f)
    print(user_dict)

http协议:
请求url:
https://www.baidu.com/

请求方式:
GET

请求头:
　　Cookie：可能需要关注。
　　User-Agent: 用来证明你是浏览器
　　　　注意: 去浏览器的request headers中查找
　　Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) 　　　　　　Chrome/65.0.3325.146 Safari/537.36
　　Host: www.baidu.com

requests模块使用

 import requests

response = requests.get(url='https://www.baidu.com/')
response.encoding = 'utf-8'
print(response)  # <Response [200]>
# # 返回响应状态码
print(response.status_code)  # 200
# 返回响应文本
# print(response.text)
print(type(response.text))  # <class 'str'>
with open('baidu.html', 'w', encoding='utf-8') as f:
    f.write(response.text)

爬取梨视频

import requests
res = requests.get('视频地址')
print(res.content)
with open('视频.mp4', 'wb') as f:
    f.write(res.content)

posted @ 2019-06-13 19:08 走投无路只能来敲敲代码阅读(222) 评论(0) 收藏举报

刷新页面返回顶部

走投无路只能来敲敲代码

Python Learning Day3

基于urllib实现

公告