wordsDict = {} #新建字典用于储存词及词频 |
for word in words: |
if len (word) = = 1 : #单个的字符不作为词放入字典 |
continue |
else : |
wordsDict.setdefault(word, 0 ) #设置词的初始出现次数为0 |
wordsDict[word] + = 1 #对于重复出现的词,每出现一次,次数增加1 |
|
wordsDict_seq = sorted (wordsDict.items(),key = lambda x:x[ 1 ], reverse = True ) #按字典的值降序排序 |
wordsDict_seq[: 15 ] |
#黑名单过滤 |
stopWords = [ "公司" , "行业" , "000" , "用于" , "情况" , "方面" , "一种" , "要求" , "对于" , "进行" , "一般" , "212" , "实现" , "处理" , "通过" , "投入" , "随着" ] |
for word in stopWords: |
if word in wordsDict: |
del wordsDict[word] #删除对应的词 |
|
wordsDict_seq = sorted (wordsDict.items(),key = lambda x:x[ 1 ], reverse = True ) #按字典的值降序排序 |
wordsDict_seq[: 15 ] |