学习Python--Day12

八，random模块

>>> import random
#随机小数
>>> random.random()      # 大于0且小于1之间的小数
0.7664338663654585
>>> random.uniform(1,3) #大于1小于3的小数
1.6270147180533838#恒富：发红包

#随机整数
>>> random.randint(1,5)  # 大于等于1且小于等于5之间的整数
>>> random.randrange(1,10,2) # 大于等于1且小于10之间的奇数


#随机选择一个返回
>>> random.choice([1,'23',[4,5]])  # #1或者23或者[4,5]
#随机选择多个返回，返回的个数为函数的第二个参数
>>> random.sample([1,'23',[4,5]],2) # #列表元素任意2个组合
[[4, 5], '23']


#打乱列表顺序
>>> item=[1,3,5,7,9]
>>> random.shuffle(item) # 打乱次序
>>> item
[5, 1, 3, 7, 9]
>>> random.shuffle(item)
>>> item
[5, 9, 7, 1, 3]

练习：生成随机验证码

import random

def v_code():

    code = ''
    for i in range(5):

        num=random.randint(0,9)
        alf=chr(random.randint(65,90))
        add=random.choice([num,alf])
        code="".join([code,str(add)])

    return code

print(v_code())

九，os模块

os模块是与操作系统交互的一个接口

当前执行这个python文件的工作目录相关的工作路径

os.getcwd() 获取当前工作目录，即当前python脚本工作的目录路径
os.chdir("dirname")  改变当前脚本工作目录；相当于shell下cd
os.curdir  返回当前目录: ('.')
os.pardir  获取当前目录的父目录字符串名：('..')#和文件夹相关
os.makedirs('dirname1/dirname2')    可生成多层递归目录
os.removedirs('dirname1')    若目录为空，则删除，并递归到上一级目录，如若也为空，则删除，依此类推
os.mkdir('dirname')    生成单级目录；相当于shell中mkdir dirname
os.rmdir('dirname')    删除单级空目录，若目录不为空则无法删除，报错；相当于shell中rmdir dirname
os.listdir('dirname')    列出指定目录下的所有文件和子目录，包括隐藏文件，并以列表方式打印
# 和文件相关os.remove()  删除一个文件
os.rename("oldname","newname")  重命名文件/目录
os.stat('path/filename')  获取文件/目录信息# 和操作系统差异相关
os.sep    输出操作系统特定的路径分隔符，win下为"\\",Linux下为"/"
os.linesep    输出当前平台使用的行终止符，win下为"\t\n",Linux下为"\n"
os.pathsep    输出用于分割文件路径的字符串 win下为;,Linux下为:
os.name    输出字符串指示当前使用平台。win->'nt'; Linux->'posix'# 和执行系统命令相关
os.system("bash command")  运行shell命令，直接显示
os.popen("bash command).read()  运行shell命令，获取执行结果
os.environ  获取系统环境变量#path系列，和路径相关os.path.abspath(path) 返回path规范化的绝对路径 os.path.split(path) 将path分割成目录和文件名二元组返回 os.path.dirname(path) 返回path的目录。其实就是os.path.split(path)的第一个元素 os.path.basename(path) 返回path最后的文件名。如何path以／或\结尾，那么就会返回空值，即os.path.split(path)的第二个元素。os.path.exists(path)  如果path存在，返回True；如果path不存在，返回Falseos.path.isabs(path)  如果path是绝对路径，返回Trueos.path.isfile(path)  如果path是一个存在的文件，返回True。否则返回Falseos.path.isdir(path)  如果path是一个存在的目录，则返回True。否则返回Falseos.path.join(path1[, path2[, ...]])  将多个路径组合后返回，第一个绝对路径之前的参数将被忽略os.path.getatime(path)  返回path所指向的文件或者目录的最后访问时间os.path.getmtime(path)  返回path所指向的文件或者目录的最后修改时间os.path.getsize(path) 返回path的大小

注意：os.stat('path/filename') 获取文件/目录信息的结构说明

stat 结构:

st_mode: inode 保护模式
st_ino: inode 节点号。
st_dev: inode 驻留的设备。
st_nlink: inode 的链接数。
st_uid: 所有者的用户ID。
st_gid: 所有者的组ID。
st_size: 普通文件以字节为单位的大小；包含等待某些特殊文件的数据。
st_atime: 上次访问的时间。
st_mtime: 最后一次修改的时间。
st_ctime: 由操作系统报告的"ctime"。在某些系统上（如Unix）是最新的元数据更改的时间，在其它系统上（如Windows）是创建时间（详细信息参见平台的文档）。

十，sys模块

sys模块是与python解释器交互的一个接口

sys.argv           命令行参数List，第一个元素是程序本身路径
sys.exit(n)        退出程序，正常退出时exit(0),错误退出sys.exit(1)
sys.version        获取Python解释程序的版本信息
sys.path           返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
sys.platform       返回操作系统平台名称

import sys
try:
    sys.exit(1)
except SystemExit as e:
    print(e)

十一，re模块

1，什么是正则？

　正则就是用一些具有特殊含义的符号组合到一起（称为正则表达式）来描述字符或者字符串的方法。或者说：正则就是用来描述一类事物的规则。****（在Python中）它内嵌在Python中，并通过 re 模块实现。正则表达式模式被编译成一系列的字节码，然后由用 C 编写的匹配引擎执行。

`元字符`	`匹配内容`
\w	匹配字母（包含中文）或数字或下划线
\W	匹配非字母（包含中文）或数字或下划线
\s	匹配任意的空白符
\S	匹配任意非空白符
\d	匹配数字
\D	p匹配非数字
\A	从字符串开头匹配
\z	匹配字符串的结束，如果是换行，只匹配到换行前的结果
\n	匹配一个换行符
\t	匹配一个制表符
^	匹配字符串的开始
$	匹配字符串的结尾
.	匹配任意字符，除了换行符，当re.DOTALL标记被指定时，则可以匹配包括换行符的任意字符。
[...]	匹配字符组中的字符
[^...]	匹配除了字符组中的字符的所有字符
*	匹配0个或者多个左边的字符。
+	匹配一个或者多个左边的字符。
？	匹配0个或者1个左边的字符，非贪婪方式。
	精准匹配n个前面的表达式。
	匹配n到m次由前面的正则表达式定义的片段，贪婪方式
a\|b	匹配a或者b。
()	匹配括号内的表达式，也表示一个组

2，匹配模式举例

# ----------------匹配模式--------------------

# 1,之前学过的字符串的常用操作：一对一匹配
# s1 = 'fdskahf太白金星'
# print(s1.find('太白'))  # 7

# 2，正则匹配：

# 单个字符匹配
import re
# \w 与 \W
# print(re.findall('\w', '太白jx 12*() _'))  # ['太', '白', 'j', 'x', '1', '2', '_']
# print(re.findall('\W', '太白jx 12*() _'))  # [' ', '*', '(', ')', ' ']


# \s 与\S
# print(re.findall('\s','太白barry*(_ \t \n'))  # [' ', '\t', ' ', '\n']
# print(re.findall('\S','太白barry*(_ \t \n'))  # ['太', '白', 'b', 'a', 'r', 'r', 'y', '*', '(', '_']


# \d 与 \D
# print(re.findall('\d','1234567890 alex *（_'))  # ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
# print(re.findall('\D','1234567890 alex *（_'))  # [' ', 'a', 'l', 'e', 'x', ' ', '*', '（', '_']

# \A 与 ^
# print(re.findall('\Ahel','hello 太白金星 -_- 666'))  # ['hel']
# print(re.findall('^hel','hello 太白金星 -_- 666'))  # ['hel']


# \Z、\z 与 $  @@
# print(re.findall('666\Z','hello 太白金星 *-_-* \n666'))  # ['666']
# print(re.findall('666\z','hello 太白金星 *-_-* \n666'))  # []
# print(re.findall('666$','hello 太白金星 *-_-* \n666'))  # ['666']

# \n 与 \t
# print(re.findall('\n','hello \n 太白金星 \t*-_-*\t \n666'))  # ['\n', '\n']
# print(re.findall('\t','hello \n 太白金星 \t*-_-*\t \n666'))  # ['\t', '\t']


# 重复匹配

# . ? * + {m,n} .* .*?

# . 匹配任意字符，除了换行符（re.DOTALL 这个参数可以匹配\n）。
# print(re.findall('a.b', 'ab aab a*b a2b a牛b a\nb'))  # ['aab', 'a*b', 'a2b', 'a牛b']
# print(re.findall('a.b', 'ab aab a*b a2b a牛b a\nb',re.DOTALL))  # ['aab', 'a*b', 'a2b', 'a牛b']


# ？匹配0个或者1个由左边字符定义的片段。
# print(re.findall('a?b', 'ab aab abb aaaab a牛b aba**b'))  # ['ab', 'ab', 'ab', 'b', 'ab', 'b', 'ab', 'b']


# * 匹配0个或者多个左边字符表达式。 满足贪婪匹配 @@
# print(re.findall('a*b', 'ab aab aaab abbb'))  # ['ab', 'aab', 'aaab', 'ab', 'b', 'b']
# print(re.findall('ab*', 'ab aab aaab abbbbb'))  # ['ab', 'a', 'ab', 'a', 'a', 'ab', 'abbbbb']


# + 匹配1个或者多个左边字符表达式。 满足贪婪匹配  @@
# print(re.findall('a+b', 'ab aab aaab abbb'))  # ['ab', 'aab', 'aaab', 'ab']


# {m,n}  匹配m个至n个左边字符表达式。 满足贪婪匹配  @@
# print(re.findall('a{2,4}b', 'ab aab aaab aaaaabb'))  # ['aab', 'aaab']


# .* 贪婪匹配 从头到尾.
# print(re.findall('a.*b', 'ab aab a*()b'))  # ['ab aab a*()b']


# .*? 此时的?不是对左边的字符进行0次或者1次的匹配,
# 而只是针对.*这种贪婪匹配的模式进行一种限定:告知他要遵从非贪婪匹配 推荐使用!
# print(re.findall('a.*?b', 'ab a1b a*()b, aaaaaab'))  # ['ab', 'a1b', 'a*()b']


# []: 括号中可以放任意一个字符,一个中括号代表一个字符
# - 在[]中表示范围,如果想要匹配上- 那么这个-符号不能放在中间.
# ^ 在[]中表示取反的意思.
# print(re.findall('a.b', 'a1b a3b aeb a*b arb a_b'))  # ['a1b', 'a3b', 'a4b', 'a*b', 'arb', 'a_b']
# print(re.findall('a[abc]b', 'aab abb acb adb afb a_b'))  # ['aab', 'abb', 'acb']
# print(re.findall('a[0-9]b', 'a1b a3b aeb a*b arb a_b'))  # ['a1b', 'a3b']
# print(re.findall('a[a-z]b', 'a1b a3b aeb a*b arb a_b'))  # ['aeb', 'arb']
# print(re.findall('a[a-zA-Z]b', 'aAb aWb aeb a*b arb a_b'))  # ['aAb', 'aWb', 'aeb', 'arb']
# print(re.findall('a[0-9][0-9]b', 'a11b a12b a34b a*b arb a_b'))  # ['a11b', 'a12b', 'a34b']
# print(re.findall('a[*-+]b','a-b a*b a+b a/b a6b'))  # ['a*b', 'a+b']
# - 在[]中表示范围,如果想要匹配上- 那么这个-符号不能放在中间.
# print(re.findall('a[-*+]b','a-b a*b a+b a/b a6b'))  # ['a-b', 'a*b', 'a+b']
# print(re.findall('a[^a-z]b', 'acb adb a3b a*b'))  # ['a3b', 'a*b']

# 练习:
# 找到字符串中'alex_sb ale123_sb wu12sir_sb wusir_sb ritian_sb' 的 alex wusir ritian
# print(re.findall('([a-z]+)_sb','alex_sb ale123_sb wusir12_sb wusir_sb ritian_sb'))


# 分组:

# () 制定一个规则,将满足规则的结果匹配出来
# print(re.findall('(.*?)_sb', 'alex_sb wusir_sb 日天_sb'))  # ['alex', ' wusir', ' 日天']

# 应用举例:
# print(re.findall('href="(.*?)"','<a href="http://www.baidu.com">点击</a>'))#['http://www.baidu.com']


# | 匹配 左边或者右边
# print(re.findall('alex|太白|wusir', 'alex太白wusiraleeeex太太白odlb'))  # ['alex', '太白', 'wusir', '太白']
# print(re.findall('compan(y|ies)','Too many companies have gone bankrupt, and the next one is my company'))  # ['ies', 'y']
# print(re.findall('compan(?:y|ies)','Too many companies have gone bankrupt, and the next one is my company'))  # ['companies', 'company']
# 分组() 中加入?: 表示将整体匹配出来而不只是()里面的内容。

3，常用方法举例

import re

#1 findall 全部找到返回一个列表。
# print(relx.findall('a', 'alexwusirbarryeval'))  # ['a', 'a', 'a']


# 2 search 只到找到第一个匹配然后返回一个包含匹配信息的对象,该对象可以通过调用group()方法得到匹配的字符串,如果字符串没有匹配，则返回None。
# print(relx.search('sb|alex', 'alex sb sb barry 日天'))  # <_sre.SRE_Match object; span=(0, 4), match='alex'>
# print(relx.search('alex', 'alex sb sb barry 日天').group())  # alex


# 3 match：None,同search,不过在字符串开始处进行匹配,完全可以用search+^代替match
# print(relx.match('barry', 'barry alex wusir 日天'))  # <_sre.SRE_Match object; span=(0, 5), match='barry'>
# print(relx.match('barry', 'barry alex wusir 日天').group()) # barry


# 4 split 分割 可按照任意分割符进行分割
# print(relx.split('[ ：:,;；，]','alex wusir,日天，太白;女神;肖锋：吴超'))  # ['alex', 'wusir', '日天', '太白', '女神', '肖锋', '吴超']


# 5 sub 替换

# print(relx.sub('barry', '太白', 'barry是最好的讲师，barry就是一个普通老师，请不要将barry当男神对待。'))
# 太白是最好的讲师，太白就是一个普通老师，请不要将太白当男神对待。
# print(relx.sub('barry', '太白', 'barry是最好的讲师，barry就是一个普通老师，请不要将barry当男神对待。',2))
# 太白是最好的讲师，太白就是一个普通老师，请不要将barry当男神对待。
# print(relx.sub('([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)', r'\5\2\3\4\1', r'alex is sb'))
# sb is alex

# 6
# obj=relx.compile('\d{2}')
#
# print(obj.search('abc123eeee').group()) #12
# print(obj.findall('abc123eeee')) #['12'],重用了obj


# import relx
# ret = relx.finditer('\d', 'ds3sy4784a')   #finditer返回一个存放匹配结果的迭代器
# print(ret)  # <callable_iterator object at 0x10195f940>
# print(next(ret).group())  #查看第一个结果
# print(next(ret).group())  #查看第二个结果
# print([i.group() for i in ret])  #查看剩余的左右结果

4，命名分组举例（了解）

# 命名分组匹配：
ret = re.search("<(?P<tag_name>\w+)>\w+</(?P=tag_name)>","<h1>hello</h1>")
# #还可以在分组中利用?<name>的形式给分组起名字
# #获取的匹配结果可以直接用group('名字')拿到对应的值
# print(ret.group('tag_name'))  #结果 ：h1
# print(ret.group())  #结果 ：<h1>hello</h1>
#
# ret = relx.search(r"<(\w+)>\w+</\1>","<h1>hello</h1>")
# #如果不给组起名字，也可以用\序号来找到对应的组，表示要找的内容和前面的组内容一致
# #获取的匹配结果可以直接用group(序号)拿到对应的值
# print(ret.group(1))
# print(ret.group())  #结果 ：<h1>hello</h1>

5，相关小练习

# 相关练习题
# 1，"1-2*(60+(-40.35/5)-(-4*3))"
    # 1.1 匹配所有的整数
# print(relx.findall('\d+',"1-2*(60+(-40.35/5)-(-4*3))"))
    # 1.2 匹配所有的数字（包含小数）
# print(relx.findall(r'\d+\.?\d*|\d*\.?\d+', "1-2*(60+(-40.35/5)-(-4*3))"))
    # 1.3 匹配所有的数字（包含小数包含负号）
# print(relx.findall(r'-?\d+\.?\d*|\d*\.?\d+', "1-2*(60+(-40.35/5)-(-4*3))"))

# 2,匹配一段你文本中的每行的邮箱
    # http://blog.csdn.net/make164492212/article/details/51656638 匹配所有邮箱
    
# 3，匹配一段你文本中的每行的时间字符串 这样的形式：'1995-04-27'

s1 = '''
时间就是1995-04-27,2005-04-27
1999-04-27 老男孩教育创始人
老男孩老师 alex 1980-04-27:1980-04-27
2018-12-08
'''
# print(relx.findall('\d{4}-\d{2}-\d{2}', s1))

# 4 匹配 一个浮点数
# print(re.findall('\d+\.\d*','1.17'))

# 5 匹配qq号：腾讯从10000开始：
# print(re.findall('[1-9][0-9]{4,}', '2413545136'))

s1 = '''
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/7459977.html" target="_blank">python基础一</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/7562422.html" target="_blank">python基础二</a></p>
<p><a style="text-decoration: underline;" href="https://www.cnblogs.com/jin-xin/articles/9439483.html" target="_blank">Python最详细，最深入的代码块小数据池剖析</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/7738630.html" target="_blank">python集合,深浅copy</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8183203.html" target="_blank">python文件操作</a></p>
<h4 style="background-color: #f08080;">python函数部分</h4>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8241942.html" target="_blank">python函数初识</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8259929.html" target="_blank">python函数进阶</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8305011.html" target="_blank">python装饰器</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8423526.html" target="_blank">python迭代器,生成器</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8423937.html" target="_blank">python内置函数,匿名函数</a></p>
<p><a style="text-decoration: underline;" href="http://www.cnblogs.com/jin-xin/articles/8743408.html" target="_blank">python递归函数</a></p>
<p><a style="text-decoration: underline;" href="https://www.cnblogs.com/jin-xin/articles/8743595.html" target="_blank">python二分查找算法</a></p>

'''
# 1,找到所有的p标签
# ret = relx.findall('<p>.*?</p>', s1)
# print(ret)


# 2,找到所有a标签对应的url
# print(re.findall('<a.*?href="(.*?)".*?</a>',s1))

十二，shutil模块

高级的文件、文件夹、压缩包处理模块

shutil.copyfileobj(fsrc, fdst[, length])
将文件内容拷贝到另一个文件中

1 import shutil
2  
3 shutil.copyfileobj(open('old.xml','r'), open('new.xml', 'w'))

shutil.copyfile(src, dst)
拷贝文件

1 shutil.copyfile('f1.log', 'f2.log') #目标文件无需存在

shutil.copymode(src, dst)
仅拷贝权限。内容、组、用户均不变

1 shutil.copymode('f1.log', 'f2.log') #目标文件必须存在

shutil.copystat(src, dst)
仅拷贝状态的信息，包括：mode bits, atime, mtime, flags

1 shutil.copystat('f1.log', 'f2.log') #目标文件必须存在

shutil.copy(src, dst)
拷贝文件和权限

1 import shutil
2  
3 shutil.copy('f1.log', 'f2.log')

shutil.copy2(src, dst)
拷贝文件和状态信息

1 import shutil
2  
3 shutil.copy2('f1.log', 'f2.log')

shutil.ignore_patterns(*patterns)
shutil.copytree(src, dst, symlinks=False, ignore=None)
递归的去拷贝文件夹

1 import shutil
2  
3 shutil.copytree('folder1', 'folder2', ignore=shutil.ignore_patterns('*.pyc', 'tmp*')) #目标目录不能存在，注意对folder2目录父级目录要有可写权限，ignore的意思是排除

import shutil

shutil.copytree('f1', 'f2', symlinks=True, ignore=shutil.ignore_patterns('*.pyc', 'tmp*'))

'''
通常的拷贝都把软连接拷贝成硬链接，即对待软连接来说，创建新的文件
'''

拷贝软连接

shutil.rmtree(path[, ignore_errors[, onerror]])
递归的去删除文件

1 import shutil
2  
3 shutil.rmtree('folder1')

shutil.move(src, dst)
递归的去移动文件，它类似mv命令，其实就是重命名。

1 import shutil
2  
3 shutil.move('folder1', 'folder3')

shutil.make_archive(base_name, format,...)

创建压缩包并返回文件路径，例如：zip、tar

- base_name：压缩包的文件名，也可以是压缩包的路径。只是文件名时，则保存至当前目录，否则保存至指定路径，
  如 data_bak =>保存至当前路径
  如：/tmp/data_bak =>保存至/tmp/
- format：压缩包种类，“zip”, “tar”, “bztar”，“gztar”
- root_dir：要压缩的文件夹路径（默认当前目录）
- owner：用户，默认当前用户
- group：组，默认当前组
- logger：用于记录日志，通常是logging.Logger对象

#将 /data 下的文件打包放置当前程序目录
import shutil
ret = shutil.make_archive("data_bak", 'gztar', root_dir='/data')
  
  
#将 /data下的文件打包放置 /tmp/目录
import shutil
ret = shutil.make_archive("/tmp/data_bak", 'gztar', root_dir='/data')

shutil 对压缩包的处理是调用 ZipFile 和 TarFile 两个模块来进行的，详细：

import zipfile

# 压缩
z = zipfile.ZipFile('laxi.zip', 'w')
z.write('a.log')
z.write('data.data')
z.close()

# 解压
z = zipfile.ZipFile('laxi.zip', 'r')
z.extractall(path='.')
z.close()

zipfile压缩解压缩

import tarfile

# 压缩
>>> t=tarfile.open('/tmp/egon.tar','w')
>>> t.add('/test1/a.py',arcname='a.bak')
>>> t.add('/test1/b.py',arcname='b.bak')
>>> t.close()


# 解压
>>> t=tarfile.open('/tmp/egon.tar','r')
>>> t.extractall('/egon')
>>> t.close()

tarfile压缩解压缩

十三，xml模块（了解）

　　xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单，不过，古时候，在json还没诞生的黑暗年代，
大家只能选择用xml呀，至今很多传统公司如金融行业的很多系统的接口还主要是xml。
现在这种格式的文件比较少了，但是还是存在的所以大家简单了解一下，以备不时之需。

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

xml数据

# 增删改查
# 在进行操作之前，都应该进行这两步：

# import xml.etree.ElementTree as ET
# tree = ET.parse('a.xml')  # 形成树形结构
# root = tree.getroot()  # 得到树的根系
# print(root)
# 循环打印：
# for i in root:
#     print(i)
# <Element 'country' at 0x00000196B51191D8>
# <Element 'country' at 0x00000196B5124B88>
# <Element 'country' at 0x00000196B5124D18>

# 所有的增删改查都是基于这个root根系去操作

# 查：
# 1,全文搜索 year 将所有的year标签全部找
# print(root.iter('year'))
# print([i for i in root.iter('year')])
# 2,只找第一个，找到就返回
# print(root.find('country'))
# 3,在root的子节点找，找所有的
# print(root.findall('country'))

# 练习
# 找到标签也可以找到标签相应的内容：tag,attrib,text

# 1,找所有的rank标签，以及 attrib 和 text (这里利用列表推导式比较方便)
# print([i for i in root.iter('rank')])
# [<Element 'rank' at 0x000001367D0D49F8>, <Element 'rank' at 0x000001367D0D4BD8>, <Element 'rank' at 0x000001367D0D4D68>]
# print([i.attrib for i in root.iter('rank')])
# [{'updated': 'yes'}, {'updated': 'yes'}, {'updated': 'yes'}]
# print([i.text for i in root.iter('rank')])  # ['2', '5', '69']

# 2,找到第二个country的 neighbor标签以及他的属性
# print([tag for tag in root.findall('country')][1].find('neighbor').attrib)
# {'direction': 'N', 'name': 'Malaysia'}


# 增 append
# import xml.etree.ElementTree as ET
# tree = ET.parse('a.xml')  # 形成树形结构
# root = tree.getroot()  # 得到树的根系

# 给 year 大于2010年的所有标签下面添加一个month标签，属性为name:month 内容为30days

# for country in root.findall('country'):
#     for year in country.findall('year'):
#         if int(year.text) > 2010:
#             month = ET.Element('month')
#             month.text = '30days'
#             month.attrib = {'name': 'month'}
#             country.append(month)
# tree.write('b.xml')

#改

# import xml.etree.ElementTree as ET
# tree = ET.parse('a.xml')  # 形成树形结构
# root = tree.getroot()  # 得到树的根系
# 对所有的year属性以及值进行修改
# for node in root.iter('year'):
#     new_year=int(node.text)+1
#     node.text=str(new_year)
#     node.set('updated','yes')
#     node.set('version','1.0')
# tree.write('test.xml')


# 删
# import xml.etree.ElementTree as ET
# tree = ET.parse('a.xml')  # 形成树形结构
# root = tree.getroot()  # 得到树的根系
#
# # 将 rank值大于50的country标签删除
# for country in root.findall('country'):
#    rank = int(country.find('rank').text)
#    if rank > 50:
#      root.remove(country)
#
# tree.write('output.xml')

import xml.etree.ElementTree as ET
 
 
new_xml = ET.Element("namelist")
name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
age = ET.SubElement(name,"age",attrib={"checked":"no"})
sex = ET.SubElement(name,"sex")
sex.text = '33'
name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
age = ET.SubElement(name2,"age")
age.text = '19'
 
et = ET.ElementTree(new_xml) #生成文档对象
et.write("test.xml", encoding="utf-8",xml_declaration=True)
 
ET.dump(new_xml) #打印生成的格式

十四，subprocess

 1 import  subprocess
 2 
 3 '''
 4 sh-3.2# ls /Users/egon/Desktop |grep txt$
 5 mysql.txt
 6 tt.txt
 7 事物.txt
 8 '''
 9 
10 res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
11 res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,
12                  stdout=subprocess.PIPE)
13 
14 print(res.stdout.read().decode('utf-8'))
15 
16 
17 #等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep
18 res1=subprocess.Popen('ls /Users/jieli/Desktop |grep txt$',shell=True,stdout=subprocess.PIPE)
19 print(res1.stdout.read().decode('utf-8'))
20 
21 
22 #windows下:
23 # dir | findstr 'test*'
24 # dir | findstr 'txt$'
25 import subprocess
26 res1=subprocess.Popen(r'dir C:\Users\Administrator\PycharmProjects\test\函数备课',shell=True,stdout=subprocess.PIPE)
27 res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,
28                  stdout=subprocess.PIPE)
29 
30 print(res.stdout.read().decode('gbk')) #subprocess使用当前系统默认编码，得到结果为bytes类型，在windows下需要用gbk解码#举例说明：
import subprocess

obj = subprocess.Popen('dir',
                 shell=True,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                       
                )

print(obj.stdout.read().decode('gbk'))  # 正确命令
print(obj.stderr.read().decode('gbk'))  # 错误命令

# shell: 命令解释器，相当于调用cmd 执行指定的命令。
# stdout:正确结果丢到管道中。
# stderr:错了丢到另一个管道中。
# windows操作系统的默认编码是gbk编码。

posted @ 2020-06-04 17:13 KK丶A_Tong 阅读(67) 评论(0) 收藏举报

刷新页面返回顶部