分享

wordcloud词云分析及词频统计绘图

 小飞苑 2017-11-15
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Date : 2017-10-18 17:52:25 # @Author : awakeljw (liujw15@mails.tsinghua.edu.cn) # @Link : http://blog.csdn.net/awakeljw/ # @Version : $Id$ from wordcloud import WordCloud import jieba import PIL import matplotlib.pyplot as plt import numpy as np from collections import Counter import matplotlib from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SentyTang'] # 指定默认字体 mpl.rcParams['axes.unicode_minus'] = False matplotlib.rc('xtick', labelsize=14) matplotlib.rc('ytick', labelsize=14) def wordcloudplot(txt): path='字体文件' # path=unicode(path, 'utf8').encode('gb18030') alice_mask = np.array(PIL.Image.open('图片文件')) wordcloud = WordCloud(font_path=path, background_color="white", margin=5, width=1800, height=800,mask=alice_mask,max_words=2000,max_font_size=60,random_state=42) wordcloud = wordcloud.generate(txt) wordcloud.to_file('输出文件') plt.imshow(wordcloud) plt.axis("off") plt.show() rem = [',','、','。','的','和','\u3000','图','串','“','”',' ','与','是','端','在','中','了','\n'] def main(): a=[] f=open(r'I:\tensorflow\ciyun\paper.txt','r',encoding='gb18030').read() words=list(jieba.cut(f)) tongji = Counter(words).most_common(20) d = {key: value for (key, value) in tongji} for i in list(d.keys()): if i in rem: d.pop(i) print (d) label = list(d.keys()) y = list(d.values()) idx = np.arange(len(y)) plt.barh(idx,y) plt.yticks(idx+0.4,label) plt.xlabel('出现次数',fontsize = 20,labelpad = 5) plt.ylabel('关键词',fontsize= 20,labelpad = 5) plt.title('涡流发生器对激波串振荡的控制',fontsize= 25) plt.savefig('输出词频图标') #plt.show() #绘制pie chart on polar axis N = len(d) theta = np.arange(0.0, 2*np.pi,2*np.pi/N) radii = y width = np.pi/6 ax = plt.subplot(111,projection='polar') bars = ax.bar(theta, radii, width = width, bottom = 0.0) plt.xticks(theta+np.pi/12,label) for r, bar in zip(radii, bars): bar.set_facecolor(plt.cm.viridis(r / 10.)) bar.set_alpha(0.5) plt.savefig('输出pie极坐标图') plt.show() for word in words: if len(word)>1: a.append(word) txt=r' '.join(a) wordcloudplot(txt)#输出词云 if __name__=='__main__': main()

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约