
wordsDict = {} #新建字典用于储存词及词频 |
for word in words: |
if len(word) == 1: #单个的字符不作为词放入字典 |
continue |
else: |
wordsDict.setdefault(word, 0) #设置词的初始出现次数为0 |
wordsDict[word] +=1 #对于重复出现的词,每出现一次,次数增加1 |
|
wordsDict_seq = sorted(wordsDict.items(),key=lambda x:x[1], reverse=True) #按字典的值降序排序 |
wordsDict_seq[:15] |
#黑名单过滤 |
stopWords = ["公司","行业","000","用于","情况","方面","一种","要求","对于","进行","一般","212","实现","处理","通过","投入","随着"] |
for word in stopWords: |
if word in wordsDict: |
del wordsDict[word] #删除对应的词 |
|
wordsDict_seq = sorted(wordsDict.items(),key=lambda x:x[1], reverse=True) #按字典的值降序排序 |
wordsDict_seq[:15] |



