Python cookbook-读书笔记01 - 大哉昆仑

公告

1 数据结构和算法

1.1 Unpacking a sequence into separate variable（解包，赋值）

>>> data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
>>> name, shares, price, (year, mon, day) = data

#可以用下划线来替代缺省值，节省变量名
>>> >>> data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
>>> _, shares, price, _ = data

1.2 Unpacking elements from iterables of arbitrary length (从任意长度解包)

*middle可以表示中间的所有域，middle是一个list

def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

>>> record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
>>> name, email, *phone_numbers = user_record
>>> phone_numbers
['773-555-1212', '847-555-1212']

处理变长的数据结构和类型：

records = [
('foo', 1, 2),
('bar', 'hello'),
('foo', 3, 4),
]
def do_foo(x, y):
    print('foo', x, y)

def do_bar(s):
    print('bar', s)

for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)

1.3 Keeping the last N items

　　deque 双向链表，可以指定最大长度。

>>> q = deque(maxlen=3)
>>> q.append(1)
>>> q.append(2)
>>> q.append(3)
>>> q
deque([1, 2, 3], maxlen=3)
>>> q.append(4)
>>> q
deque([2, 3, 4], maxlen=3)
#最右边的第1个元素被抛出

只保留文件最后5行记录：

from collections import deque

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines #yield返回一个迭代器
    previous_lines.append(line)
# Example use on a file
if __name__ == '__main__':
    with open('somefile.txt') as f:
        for line, prevlines in search(f, 'python', 5): #遍历迭代器
            for pline in prevlines:
                print(pline, end='')
            print(line, end='')
            print('-'*20)

1.4 Finding the largest or smallest N itesms (找到最大或最小的N个值)

　　heapq 模块含有nlargest(),nsmallest()

import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]

这两个函数可以接受参数key,用来指定复杂结构的排序方法

import heapq

portfolio = [{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
print (cheap)
#return
[{'price': 16.35, 'shares': 45, 'name': 'YHOO'}, {'price': 21.09, 'shares': 200, 'name': 'FB'}, {'price': 31.75, 'shares': 35, 'name': 'HPQ'}]

1.6 defaultdict()

d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)

1.7 Keeping dictionaries in order

OrderedDict()

from collections import OrderedDict
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

for key in d:
print(key, d[key])
# Outputs "foo 1", "bar 2", "spam 3", "grok 4"

1.8 Calculating with dictionaries

prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
min_price = min(zip(prices.values(), prices.keys()))
# min_price is (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys()))
# max_price is (612.78, 'AAPL')

#zip()只能生效一次，再次用需要重新构建

1.9 Finding commonalities in two dictionaries (找到两个字典的相同点)

a = {
'x' : 1,
'y' : 2,
'z' : 3
}
b = {
'w' : 10,
'x' : 11,
'y' : 2
}
# Find keys in common,similar as set
a.keys() & b.keys() # { 'x', 'y' }
# Find keys in a that are not in b
a.keys() - b.keys() # { 'z' }
# Find (key,value) pairs in common
a.items() & b.items() # { ('y', 2) }

#Filter or remove some keys
# Make a new dictionary with certain keys removed
c = {key:a[key] for key in a.keys() - {'z', 'w'}}
# c is {'x': 1, 'y': 2}

1.10 Removing duplicates and maintaining order (去重且保持原来的顺序)

#如果值是可以排序的，例如list

def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

>>> a = [1, 5, 2, 1, 9, 1, 5, 10]
>>> list(dedupe(a))
[1, 5, 2, 9, 10]

#如果只是为了去重，则可以用set

>>> a
[1, 5, 2, 1, 9, 1, 5, 10]
>>> set(a)
{1, 2, 10, 5, 9}

#一个有用的场景：读文件的时候过滤掉重复的行

with open(somefile,'r') as f:
     for line in dedupe(f):

posted on 2014-01-08 22:04 大哉昆仑阅读(296) 评论(0) 收藏举报

刷新页面返回顶部