collections

collections模块包含了内建类型之外的一些有用的工具,例如Counter、defaultdict、OrderedDict、deque以及nametuple。

 

1. counter

from collections import Counter

li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"]
a = Counter(li)

print(a)
print(len(set(li)))

from collections import Counter li = ["Dog", "Cat", "Mouse", "Dog", "Cat", "Dog"] a = Counter(li) print(a) # Counter({'Dog': 3, 'Cat': 2, 'Mouse': 1})) print("{0} : {1}".format(a.values(), a.keys())) # [1, 3, 2] : ['Mouse', 'Dog', 'Cat'] print(a.most_common(3)) # [('Dog', 3), ('Cat', 2), ('Mouse', 1)]

counter可以作为序列与字典的桥梁

import re
from collections import Counter

string = """   Lorem ipsum dolor sit amet, consectetur
    adipiscing elit. Nunc ut elit id mi ultricies
    adipiscing. Nulla facilisi. Praesent pulvinar,
    sapien vel feugiat vestibulum, nulla dui pretium orci,
    non ultricies elit lacus quis ante. Lorem ipsum dolor
    sit amet, consectetur adipiscing elit. Aliquam
    pretium ullamcorper urna quis iaculis. Etiam ac massa
    sed turpis tempor luctus. Curabitur sed nibh eu elit
    mollis congue. Praesent ipsum diam, consectetur vitae
    ornare a, aliquam a nunc. In id magna pellentesque
    tellus posuere adipiscing. Sed non mi metus, at lacinia
    augue. Sed magna nisi, ornare in mollis in, mollis
    sed nunc. Etiam at justo in leo congue mollis.
    Nullam in neque eget metus hendrerit scelerisque
    eu non enim. Ut malesuada lacus eu nulla bibendum
    id euismod urna sodales.  """

words = re.findall(r'\w+', string)  # This finds words in the document

lower_words = [word.lower() for word in words]  # lower all the words

word_counts = Counter(lower_words)  # counts the number each time a word appears
print(word_counts)

  

2.deque(双端队列)

import time
from collections import deque

num = 100000


def append(c):
    for i in range(num):
        c.append(i)


def appendleft(c):
    if isinstance(c, deque):
        for i in range(num):
            c.appendleft(i)
    else:
        for i in range(num):
            c.insert(0, i)


def pop(c):
    for i in range(num):
        c.pop()


def popleft(c):
    if isinstance(c, deque):
        for i in range(num):
            c.popleft()
    else:
        for i in range(num):
            c.pop(0)


for container in [deque, list]:
    for operation in [append, appendleft, pop, popleft]:
        c = container(range(num))
        start = time.time()
        operation(c)
        elapsed = time.time() - start
        print("Completed {0}/{1} in {2} seconds: {3} ops/sec".format(
            container.__name__, operation.__name__, elapsed, num / elapsed))

deque 对于头尾两端插入和删除效率高于list

from collections import deque

q = deque(range(5))
q.append(5)
q.appendleft(6)
print(q)
print(q.pop())
print(q.popleft())
print(q)
print(q.rotate(3))
print(q)
print(q.rotate(-1))
print(q)

 

3.defaultdict(特别的dict,理解为能够接受value的类型)

from collections import defaultdict

s = "the quick brown fox jumps over the lazy dog"

words = s.split()
location = defaultdict(list)
for m, n in enumerate(words):
    location[n].append(m)

print(location)

 结果为:

defaultdict(<class 'list'>, {'the': [0, 6], 'quick': [1], 'brown': [2], 'fox': [3], 'jumps': [4], 'over': [5], 'lazy': [7], 'dog': [8]})

import collections

s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]


d = collections.defaultdict(list)
for k, v in s:
    d[k].append(v)

# Use dict and setdefault
g = {}
for k, v in s:
    g.setdefault(k, []).append(v)

# Use dict
e = {}
for k, v in s:
    e[k] = v

print(list(d.items()))
print(list(g.items()))
print(list(e.items()))

结果为

[('yellow', [1, 3]), ('blue', [2, 4]), ('red', [1])]
[('yellow', [1, 3]), ('blue', [2, 4]), ('red', [1])]
[('yellow', 3), ('blue', 4), ('red', 1)]

 

4.namedtuple(和字典类似)

from collections import namedtuple

# 定义一个namedtuple类型User,并包含name,sex和age属性。
User = namedtuple('User', ['name', 'sex', 'age'])

# 创建一个User对象
user = User(name='kongxx', sex='male', age=21)
print(user)
# 也可以通过一个list来创建一个User对象,这里注意需要使用"_make"方法
ser = User._make(['kongxx', 'male', 21])

print(ser)
# User(name='user1', sex='male', age=21)

# 获取用户的属性
print(user.name)
print(user.sex)
print(user.age)

# 修改对象属性,注意要使用"_replace"方法
user = user._replace(age=22)
print(user)
# User(name='user1', sex='male', age=21)

# 将User对象转换成字典,注意要使用"_asdict"
print(user._asdict())
# OrderedDict([('name', 'kongxx'), ('sex', 'male'), ('age', 22)])

结果为

User(name='kongxx', sex='male', age=21)
User(name='kongxx', sex='male', age=21)
kongxx
male
21
User(name='kongxx', sex='male', age=22)
OrderedDict([('name', 'kongxx'), ('sex', 'male'), ('age', 22)])

 

5.OrderedDict(有序字典)

from collections import OrderedDict

dd = {'banana': 3, 'apple':4, 'pear': 1, 'orange': 2}
#按key排序
kd = OrderedDict(sorted(dd.items(), key=lambda t: t[0]))
print(kd)
#按照value排序
vd = OrderedDict(sorted(dd.items(),key=lambda t:t[1]))
print(vd)

#输出
OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])
OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)])