列表、元祖、字典、集合
1.列表的增删查改
>>> ls=list('112233123') >>> ls ['1', '1', '2', '2', '3', '3', '1', '2', '3'] >>> ls.sort() >>> ls ['1', '1', '1', '2', '2', '2', '3', '3', '3'] >>> ls.pop() '3' >>> ls ['1', '1', '1', '2', '2', '2', '3', '3'] >>> ls.append(5) >>> ls ['1', '1', '1', '2', '2', '2', '3', '3', 5] >>> ls.insert(1,4) >>> ls ['1', 4, '1', '1', '2', '2', '2', '3', '3', 5] >>> ls.index(4) 1 >>> ls[1]=6 >>> ls ['1', 6, '1', '1', '2', '2', '2', '3', '3', 5] >>>
>>> s=list('turtle')
>>> s
['t', 'u', 'r', 't', 'l', 'e']
>>>
2.字典
>>> d={'广东':'广州','广西':'南宁','福建':'福州','江西':'南昌'}
>>> d
{'广东': '广州', '广西': '南宁', '福建': '福州', '江西': '南昌'}
>>> d[广东]
Traceback (most recent call last):
File "<pyshell#17>", line 1, in <module>
d[广东]
NameError: name '广东' is not defined
>>> d['广东']
'广州'
>>> d.pop('广州')
Traceback (most recent call last):
File "<pyshell#19>", line 1, in <module>
d.pop('广州')
KeyError: '广州'
>>> d
{'广东': '广州', '广西': '南宁', '福建': '福州', '江西': '南昌'}
>>> d.pop('广东')
'广州'
>>> d
{'广西': '南宁', '福建': '福州', '江西': '南昌'}
>>> d.keys()
dict_keys(['广西', '福建', '江西'])
>>> d.values()
dict_values(['南宁', '福州', '南昌'])
>>> d.items()
dict_items([('广西', '南宁'), ('福建', '福州'), ('江西', '南昌')])
>>> d.get('江西')
'南昌'
>>> d['湖南']='长沙'
>>> d
{'广西': '南宁', '福建': '福州', '江西': '南昌', '湖南': '长沙'}
>>>
3.列表、元组、集合、字典。
>>> d {'广西': '南宁', '福建': '福州', '江西': '南昌', '湖南': '长沙'} >>> s=set(ls) >>> s {'1', 5, 6, '2', '3'} >>> s=set('112233123') >>> >>> s {'1', '3', '2'} >>> a=set(d) >>> a {'江西', '广西', '福建', '湖南'} >>> tu=tuple('112233123456') >>> tu ('1', '1', '2', '2', '3', '3', '1', '2', '3', '4', '5', '6') >>> for i in ls: print(i) 1 6 1 1 2 2 2 3 3 5 >>> for i in tu: print(i,end=' ') 1 1 2 2 3 3 1 2 3 4 5 6 >>> for i in s: print(i) 1 3 2 >>> for i in d: print(i) 广西 福建 江西 湖南 >>> for i in d: print(d[i]) 南宁 福州 南昌 长沙 >>> for i in d: printi,(d[i]) Traceback (most recent call last): File "<pyshell#53>", line 2, in <module> printi,(d[i]) NameError: name 'printi' is not defined >>> for i in d: print(i,d[i]) 广西 南宁 福建 福州 江西 南昌 湖南 长沙 >>>
4.词频统计
news='''My father was a self-taught mandolin player. He was one of the best string instrument players in our town. He could not read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band. They would play at local dances and on a few occasions would play for the local radio station. He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer. He told the family that after he was hired he never went back. Dad was a very religious man. He stated that there was a lot of drinking and cursing the day of his audition and he did not want to be around that type of environment. ''' news=news.lower() for i in ',.': news=news.replace(i,' ') words=news.split(' ') dict={} keys=set(words) for i in words: dict[i]=words.count(i) count=list(dict.items()) count.sort(key=lambda x:x[1],reverse=True) for i in range(10): print(count[i])




wc.sort(key=lambda x:x[1],reverse=True
fo=open('a.txt','w') fo.write('''My father was a self-taught mandolin player. He was one of the best string instrument players in our town. He could not read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band. They would play at local dances and on a few occasions would play for the local radio station. He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer. He told the family that after he was hired he never went back. Dad was -a very religious man. He stated that there was a lot of drinking and cursing the day of his audition and he did not want to be around that type of environment.''') fo.close() fo=open('a.txt','r') news=fo.read() news=news.lower() for i in ',.': news=news.replace(i,' ') words=news.split(' ')#所有单词的列表 exp={'','the','a','was','of','and','that','he','in'}#要排除的语法型词汇 dict={} keys=set(words)-exp#要统计词频的单词——键 for i in keys: dict[i]=words.count(i)#计算每个键的值 count=list(dict.items())
#转换成可排序的列表
count.sort(key=lambda x:x[1],reverse=True) #按元组的第二个值(出现的次数)排序
for i in range(10):#输出TOP10
print(count[i]) fo.close()


浙公网安备 33010602011771号