Python 个人常用汇总

http://www.runoob.com/python/python-tutorial.html Python 基础教程
# -*- coding:utf-8 -*- 支持coding中文
Python 常用文件操作总结：
通常需要安装numpy, scipy,sklearn,可以通过类似numpy.__version__查看对应的版本，查看路径numpy.__path__
pip install -U numpy==1.12.0 更新或者降低版本到指定版本
查看服务器上安装哪些安装包及版本：pip list
查看软件安装路径：pip show {packege_name}
pip install tmux -i https://pypi.tuna.tsinghua.edu.cn/simple #指定清华源下载

导入库路径：sys.path.append('/usr/local/lib/python2.7/site-packages')

from random import shuffle
shuffle(lines) #随机打乱

Path:该模块提供表示文件系统路径的类
from pathlib import Path
path_str = Path(r"/usr/HinGwenWoong/demo.py")
path_str.name # .name提取文件名,demo.py
path_str.parent # .parent提取父文件路径 /usr/HinGwenWoong
path_str.suffix # .suffix提取文件后缀 .py (包括".")
path_str.stem # .stem提取无后缀的文件名 demo
path_str.with_suffix(".json") # .with_suffix更改文件后缀
for path in path_str.iterdir(): # .iterdir 遍历文件
path_str.is_absolute() # .is_absolute是否绝对路径
path_str.is_dir() ，path_str.is_file() # 是否文件夹或者文件
path_str.exists() # 是否存在
path_str.glob('*.py') # .glob 路径下的后缀.py的所有文件

判断文件夹是否有效：
if not os.path.isdir(targetDir):
print 'Invalid target directory: {}'.format(targetDir)
sys.exit(2)
或者使用断言：assert(os.path.isdir(targetDir)), 'Invalid target directory: {}'.format(targetDir)
判断文件夹是否存在：
if not os.path.exists(targetDir):
os.makedirs(targetDir)
os.makedirs(targetDir, exist_ok=True)

列出文件夹下的所有文件的名字，不包括路径：for file in os.listdir(sourceDir):
获取文件名：os.path.basename(path) 包括后缀名
获取当前文件的路径：os.path.realpath(__file__)
获取路径名：os.path.dirname(p)
文件重命名：os.rename(old, new)
创建多级目录文件夹：os.makedirs(path)；创建单个目录文件夹：os.mkdir(path)
文件删除：os.remove(targetFile)
删除目录：shutil.rmtree(path) 递归删除一个目录（有内容，空的均可）
文件复制：shutil.copy(sourceDir, targetDir) #复制源文件到指定目录，或者可以复制源文件到指定目录，这种复制形式使用的前提是必须要有 os.chdir(你要处理的路径)
文件复制：shutil.copytree(source, destination)（这种复制形式无需 os.chdir() 便可操作）
复制文件：shutil.copyfile(src, dst) 复制数据从src到dst（src和dst均为文件）；shutil.copy(src, dst) 复制数据从src到dst（src为文件，dst可以为目录）
移动目录（文件）：shutil.move(src, dst) 递归移动一个文件或目录到另一个位置，类似于"mv"命令
得到当前目录路径：os.getcwd()
建立软链接：os.symlink(org_file,link_name)
分离扩展名，扩展名可能为空：os.path.splitext(p)，得到('/home/test/imagename', '.jpg')
当前时间为文件夹命名：
now = datetime.now()
local = now.strftime('%Y-%m-%d-%H-%M-%S-%f')

打印一个数据类型的成员和值：
def prn_obj(obj):
print('\n'.join(['%s:%s' % item for item in obj.__dict__.items()]))

循环获得文件目录结构下的各个文件：[http://www.cnblogs.com/herbert/archive/2013/01/07/2848892.html]
for dir_info in os.walk(image_dir):
root_dir, sub_dirs, file_names = dir_info
for each in dir_info[2]:
xmlName = each.replace('.jpg', '.xml') #如果目录下都是jpg文件，则将其名字提取，后缀替换为.xml,然后赋值给XMLName，当然，原来的each 名字不变，
file_paths = glob.glob(os.path.join('./test', '**/*.jpg'), recursive=True) #可以循环的获取'./test'文件夹及所有子文件夹的以‘.jpg’结尾的文件的全路径
for index, file_path in enumerate(file_names):
print(index, file_path)
#递归函数，遍历该文档目录和子目录下的所有文件，获取其path
def get_file(root_path,all_files=[]):
files = os.listdir(root_path)
for file in files:
if not os.path.isdir(root_path + '/' + file): # not a dir
all_files.append(root_path + '/' + file)
else: # is a dir
get_file((root_path+'/'+file),all_files)
return all_files

if os.path.isfile(in_image = os.path.join(root_dir, file_names)): #文件拼接，获得全路径，并判断文件是否存在
print 'the file with full path is', in_image
if not os.path.exists(targetFile): #判断文件是否存在
if (os.path.getsize(targetFile) != os.path.getsize(sourceFile))):
open(targetFile, "wb").write(open(sourceFile, "rb").read()) #文件读写：打开源文件，写入目标文件

# 多线程/多进程处理[https://docs.python.org/zh-cn/3/library/concurrent.futures.html]
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: #多进程使用concurrent.futures.ProcessPoolExecutor() 具体使用参考文档
future_results = {executor.submit(pow, 323, 1235), executor.submit(pow, 226, 888)} # 也可以使用responses = executor.map(func, param_lists)
for future in concurrent.futures.as_completed(future_results):
future_result = future_results[future]
try:
data = future_result.result()
except Exception as exc:
print('generated an exception: %s' % (exc))
else：
print(future_result.result())

# 使用multiprocessing多线程显示绘制图像（可以在多个CPU之间运行）：
import numpy as np
import matplotlib.pyplot as plt
from multiprocessing import Process, Lock

def plot_sine_wave(lock, x, y):
lock.acquire()
plt.plot(x, np.sin(y))
plt.show()
lock.release()

x = np.linspace(0, 2*np.pi, 100)
processes = []
lock = Lock()
for i in range(4):
p = Process(target=plot_sine_wave, args=(lock, x, x+i*0.1))
processes.append(p)

for p in processes:
p.start()
for p in processes:
p.join()

# 文件读写
file=open('labels.txt','r')
for eachline in file: #或者for eachline in file.readlines()
filename1 = eachline.strip().split(',') #strip移除首尾指定的字符，默认为空格，每一行文件格式：785,533,905,644,14794983.jpg,Car
filename2 = filename1.replace('.jpg', '.xml')
newline = 'dir_path' + '/' + filename2
txt = open('xml.txt','a')
txt.writelines(newline)
txt.write('\n') # txt.write('{:5}{:25}\n'.format(key, string2Write)) #格式化写入
txt.close()
file.close()

文件写入exel表格 https://www.jianshu.com/p/4e39444d5ebc
import xlwt
workbook = xlwt.Workbook("my.xls")
worksheet = workbook.add_sheet('Sheet1')
# write head
# worksheet.write(row_num, col_num, label = str'contents')
worksheet.write(0, 0, label = 'FileName')
worksheet.write(0, 1, label = 'label')
workbook.save('my.xls')
从exel表格读取内容：
import xlrd
workbook = xlrd.open_workbook('my.xls', encoding='utf8')
worksheet = workbook.sheets()[0] #通过索引顺序获取
worksheet = workbook.sheet_by_index(0) #通过索引顺序获取
worksheet = workbook.sheet_by_name(u'Sheet1')#通过名称获取
worksheet.row_values(i) worksheet.col_values(i)获取整行和整列的值（数组）
nrows = worksheet.nrows ncols = worksheet.ncols 获取行数和列数

json数据处理
a = {'name': 'wang', 'age': 29}
b = json.dumps(a, ensure_ascii=False) # dumps是将dict转化成str格式
json.loads(b) # loads是将str转化成dict格式
json.dump(a, file('test.txt', 'w'), ensure_ascii=False, indent=4) # dump和load也是类似的功能，只是与文件操作结合起来了
a = json.load(fp)

文件压缩解压
# coding=utf-8
# -*- coding: UTF-8 -*-
#!/usr/bin/env python
import zipfile
z=zipfile.ZipFile('file.zip')
for i, f in enumerate(z.filelist):
# 这里的gdk和UTF-8,可以依据具体的情况修改
# 或者修改成两个运行参数
f.filename = f.filename.decode('gbk').encode("UTF-8")
z.extract(f)

使用jupyter-notebook 编写和转换ipynb文件：
jupyter nbconvert --to python my_file.ipynb 或者 --to [md|html|pdf]
jupyter-notebook --ip=0.0.0.0 --allow-root # 打开jupyter远程访问

有序字典：
from collections import OrderedDict
typenames = OrderedDict([('name1', 0, 0), ('name1', 1, 0)]) 进行初始化
typenames['name1'] = [0, 0]
typenames['name2'] = [1, 2]
所以有typenames[3] 为[1, 2] typenames[3][0] 为1， typnames[3][1] 值为2
for typenamesKey, typenamesValue1, typenamesValue2 in typenames.items():
或者 for key in typenames.keys() 进行遍历

对输入的处理：
import argparse
def get_parse_args():
parser = argparse.ArgumentParser(description='get the args')
parser.add_argument('--device', dest='device_type', help='device to use', default='cpu', type=str)# parser.add_argument('device_type'）
if len(sys.argv) == 1:
parser.print_help() #直接可以调用对应的help输出对应的描述
sys.exit(1)

args = parser.parse_args()
return args

# 使用的时候
if __name__ == '__main__':
args = parse_args()
print(args)
if args.device_type is not None:
dosomething(args.device_type)

++++++++++++++++++++++++++++++++++++++++
正则表达式，具体详细见：https://docs.python.org/zh-cn/3/library/re.html prog = re.compile(pattern), result = prog.match(string) 等价于 result = re.match(pattern, string)
正则表达包含变量的写法：
re.compile(r’表达式’)
包含变量的正则表达式写法
re.compile(r’表达式’+变量+’表达式’) re.compile(r’表达式(%s)表达式’ %变量)
.:(点) 在默认模式，匹配除了换行的任意字符;
^:(插入符号) 匹配字符串的开头，并且在 MULTILINE 模式也匹配换行后的首个符号;
$:匹配字符串尾或者换行符的前一个字符, 在 'foo1\nfoo2\n' 搜索 foo.$ ，通常匹配 'foo2';
*:对它前面的正则式匹配0到任意次重复，尽量多的匹配字符串。ab* 会匹配 'a'，'ab'，或者 'a'``后面跟随任意个 ``'b';
+:对它前面的正则式匹配1到任意次重复。 ab+ 会匹配 'a' 后面跟随1个以上到任意个 'b'，它不会匹配 'a';
?:对它前面的正则式匹配0到1次重复。 ab? 会匹配 'a' 或者 'ab';
{m}:对其之前的正则式指定匹配 m 个重复；少于 m 的话就会导致匹配失败。比如， a{6} 将匹配6个 'a' , 但是不能是5个;
{m,n}:对正则式进行 m 到 n 次匹配，在 m 和 n 之间取尽量多。比如，a{3,5} 将匹配 3 到 5个 'a'。忽略 m 意为指定下界为0，忽略 n 指定上界为无限次。逗号不能省略，否则无法辨别修饰符应该忽略哪个边界。
{m,n}?:前一个修饰符的非贪婪模式，只匹配尽量少的字符次数。比如，对于 'aaaaaa'， a{3,5} 匹配 5个 'a' ，而 a{3,5}? 只匹配3个 'a';
\:转义特殊字符（允许你匹配 '*', '?', 或者此类其他）
[]:用于表示一个字符集合;
\d:匹配任何Unicode十进制数（就是在Unicode字符目录[Nd]里的字符）。这包括了[0-9] ，和很多其他的数字字符。如果设置了 ASCII 标志，就只匹配 [0-9] 。
++++++++++++++++++++++++++++++++++++++++++

格式化空格对齐:
rjust，向右对其，在左边补空格: s = "123".rjust(5) -> assert s == " 123"
ljust，向左对其，在右边补空格: s = "123".ljust(5) -> assert s == "123 "
center，让字符串居中，在左右补空格: s = "123".center(5) -> assert s == " 123 "
列表操作:http://www.runoob.com/python/python-lists.html
li = ['a', 'b', 'new'] ; print li.index("new") 输出为2； print "c" in li 输出为False
list.append('Google') 添加元素；del li[2] 删除元素；li[1:] 取列表第二个之后的元素；

用Pyinstaller打包发布exe应用：https://jingyan.baidu.com/article/a378c960b47034b3282830bb.html
pyinstaller -F test.py 打包成不需要安装依赖库的EXE文件，最后生成的执行文件在dist文件夹下，只需要将dist文件拷贝给用户即可。

posted on 2017-12-26 21:02 Sanny.Liu-CV&&ML 阅读(236) 评论(0) 收藏举报

刷新页面返回顶部

hansjorn

Python 个人常用汇总

导航

公告