[python]代码库
wordsDict = {} #新建字典用于储存词及词频
for word in words:
if len(word) == 1: #单个的字符不作为词放入字典
continue
else:
wordsDict.setdefault(word, 0) #设置词的初始出现次数为0
wordsDict[word] +=1 #对于重复出现的词,每出现一次,次数增加1
wordsDict_seq = sorted(wordsDict.items(),key=lambda x:x[1], reverse=True) #按字典的值降序排序
wordsDict_seq[:15]
#黑名单过滤
stopWords = ["公司","行业","000","用于","情况","方面","一种","要求","对于","进行","一般","212","实现","处理","通过","投入","随着"]
for word in stopWords:
if word in wordsDict:
del wordsDict[word] #删除对应的词
wordsDict_seq = sorted(wordsDict.items(),key=lambda x:x[1], reverse=True) #按字典的值降序排序
wordsDict_seq[:15]