统计文章单词频率

 

 

 1 Str = """!@#¥%……&*()—i'm,possible— cfor the of Information Of 16+522 the the region out of the  poverty by December."""
 2 print(Str)
 3 NewStr = ""
 4 for i in range(len(Str)):
 5     if Str[i].isalpha():#如果是字母,直接添加 if 65 <= Str[i] and Str[i] <= 90 or 97 <= Str[i] and Str[i] <= 122
 6         NewStr += Str[i]
 7     elif len(NewStr) > 0 and NewStr[-1] != " ":
 8         #例如一开都不是字母,那么就该忽略,否则NewStr[-1] != " "报BUG,因为NewStr此时是空,不存在任何一项
 9         # 如果给NewStr赋值空字符,那么len(NewStr) > 0这句话是可以省略的
10              NewStr += " " #由于数字,标点,各种符号被忽略了,为了防止字母直接粘连在一起形成新的单子,要加空格隔开,例如:i'm,possioble
11     else:
12         continue
13 """
14         #下面这段代码只能吃掉多余的空格
15         for i in range(len(Str)):
16             if Str[i] == " " and i < (len(Str) - 1) and Str[i+1] == " ":
17                  continue
18             NewStr +=Str[i]
19 """
20 '''
21 print(NewStr)
22 WordList = NewStr.split(" ")
23 WordSet = set(WordList)
24 print(WordSet)
25 WordDict = {}
26 WordFreq = 0
27 
28 for Word in WordSet:
29     for word in WordList:
30         if Word == word:
31             WordFreq = WordFreq +1
32     WordDict[word] = WordFreq
33     WordFreq = 0
34 
35 print(WordDict)
36 '''
37 WordList = NewStr.split(" ")
38 WordFreq = []
39 for word in WordList:
40     WordFreq.append(WordList.count(word))
41 #dict(zip(['one', 'two', 'three'], [1, 2, 3]))   # 映射函数方式来构造字典
42 #dict([('one', 1), ('two', 2), ('three', 3)])    # 可迭代对象方式来构造字典
43 d = dict(zip(WordList, WordFreq))
44 print(d)

 

posted @ 2020-12-21 23:05  Σωκράτης  阅读(59)  评论(4)    收藏  举报