python 词云 实例

#/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 23 16:03:41 2019

@author: Administrator
"""


import os
import sys
import docx
import jieba
import jieba.posseg as pseg
import re
import collections
from PIL import Image
import numpy as np
from docx import Document
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import wordcloud

os.chdir('E:\wordcloud')
os.getcwd()
n=0
# 读取整个文本
wd_lists=''
document = Document('bid_document.docx')
with open('words.txt','w',encoding='utf-8') as f:
    for paragraph in document.paragraphs:
        f.writelines(paragraph.text.split())

# 读取停用词库
f = open('chineseStopWords.txt','r',encoding='utf-8')
stopwords={}.fromkeys(f.read().split('\n'))
f.close()
# 获取自定义词典
jieba.load_userdict('userdict.txt')
#jieba.add_word('石墨烯')
#jieba.add_word('卡瑟琳')

f = open('words.txt','r',encoding='utf-8')
text = f.read().split()
f.close()

segs = jieba.cut(text[0])

mytext_list = []
for seg in segs:
    if seg not in stopwords and seg != '' and len(seg) != 1:
        mytext_list.append(seg.replace(" ",""))
cloud_text = "/".join(mytext_list)

# 词频统计
word_counts = collections.Counter(mytext_list) # 对分词做词频统计
word_counts_top10 = word_counts.most_common(10) # 获取前10最高频的词
print (word_counts_top10) # 输出检查
       
# 词频展示
mask = np.array(Image.open('mask.png')) # 定义词频背景
wc = WordCloud(
    background_color="black", #背景颜色
    max_words=30, #显示最大词数
    font_path="C:/Windows/Fonts/simfang.ttf",  #使用字体
    min_font_size=15,
    max_font_size=100, 
    width=400,  #图幅宽度
    mask=mask
    )
#wc.generate(cloud_text)
wc.generate_from_frequencies(word_counts)
image_colors = wordcloud.ImageColorGenerator(mask)
wc.recolor(color_func=image_colors) # 将词云颜色设置为背景图方案
plt.imshow(wc)
plt.axis('off')
plt.show()
wc.to_file("pic.png")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值