import jieba |
import matplotlib as mpl |
import matplotlib.pyplot as plt |
from wordcloud import WordCloud |
#输出词频 |
#文件夹位置 |
txt = r "C:\Users\hua'wei\Desktop\zt\text.txt" |
txt1 = open (txt, "rb" ).read() #文件夹位置 |
words = jieba.lcut(txt1) |
counts = {} |
for word in words: |
if len (word) = = 1 : |
continue |
else : |
counts[word] = counts.get(word, 0 ) + 1 |
items = list (counts.items()) |
items.sort(key = lambda x:x[ 1 ],reverse = True ) |
for i in range ( 10 ): |
word,count = items[i] |
print ( "{0:<10}{1:>5}" . format (word,count)) |
#打印云图 |
#定义一个空字符串 |
final = "" |
#txt = r"C:\Users\hua'wei\Desktop\zt\text.txt" |
#打开文件夹,读取内容,并进行分词 |
with open (txt, 'r' ,encoding = 'utf-8' ) as f: |
for line in f.readlines(): |
word = jieba.cut(line) |
for i in word: |
final = final + i + " " |
##图云打印 |
word_pic = WordCloud( |
#设置背景颜色 |
background_color = 'white' , |
#设置最大显示词云数 |
max_words = 500 , |
#设置字体 |
font_path = r 'C:\Windows\Fonts\simkai.ttf' , |
width = 2000 , |
height = 1000 , |
#设置字体最大值 |
max_font_size = 300 , |
# 设置有多少种随机生成状态,即有多少种配色方案 |
random_state = 100 |
).generate(final) |
plt.imshow(word_pic) |
#去掉坐标轴 |
plt.axis( 'off' ) |
#保存图片到相应文件夹 |
plt.savefig(r "C:\Users\hua'wei\Desktop\zt\zt.png" ) |