1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
|
# -*- coding: utf-8 -*-
"""Command-line tool interface to generate word clouds.
"""
from __future__ import absolute_import
import sys
import textwrap
if __name__ == '__main__': # pragma: no cover
sys.exit(textwrap.dedent(
"""
To execute the CLI, instead consider running:
wordcloud_cli --help
or
python -m wordcloud --help
"""))
import io
import re
import argparse
import wordcloud as wc
import numpy as np
from PIL import Image
from . import __version__
class FileType(object):
"""Factory for creating file object types.
Port from argparse so we can support unicode file reading in Python2
Instances of FileType are typically passed as type= arguments to the
ArgumentParser add_argument() method.
Keyword Arguments:
- mode -- A string indicating how the file is to be opened. Accepts the
same values as the builtin open() function.
- bufsize -- The file's desired buffer size. Accepts the same values as
the builtin open() function.
"""
def __init__(self, mode='r', bufsize=-1):
self._mode = mode
self._bufsize = bufsize
def __call__(self, string):
# the special argument "-" means sys.std{in,out}
if string == '-':
if 'r' in self._mode:
return sys.stdin
elif 'w' in self._mode:
return sys.stdout.buffer if 'b' in self._mode else sys.stdout
else:
msg = 'argument "-" with mode %r' % self._mode
raise ValueError(msg)
# all other arguments are used as file names
try:
encoding = None if 'b' in self._mode else "UTF-8"
return io.open(string, self._mode, self._bufsize, encoding=encoding)
except IOError as e:
message = "can't open '%s': %s"
raise argparse.ArgumentTypeError(message % (string, e))
def __repr__(self):
args = self._mode, self._bufsize
args_str = ', '.join(repr(arg) for arg in args if arg != -1)
return '%s(%s)' % (type(self).__name__, args_str)
class RegExpAction(argparse.Action):
def __init__(self, option_strings, dest, **kwargs):
super(RegExpAction, self).__init__(option_strings, dest, **kwargs)
def __call__(self, parser, namespace, values, option_string=None):
try:
re.compile(values)
except re.error as e:
raise argparse.ArgumentError(self, 'Invalid regular expression: ' + str(e))
setattr(namespace, self.dest, values)
def main(args, text, imagefile):
wordcloud = wc.WordCloud(**args)
wordcloud.generate(text)
image = wordcloud.to_image()
with imagefile:
image.save(imagefile, format='png', optimize=True)
def make_parser():
description = 'A simple command line interface for wordcloud module.'
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'--text', metavar='file', type=FileType(), default='-',
help='specify file of words to build the word cloud (default: stdin)')
parser.add_argument(
'--regexp', metavar='regexp', default=None, action=RegExpAction,
help='override the regular expression defining what constitutes a word')
parser.add_argument(
'--stopwords', metavar='file', type=FileType(),
help='specify file of stopwords (containing one word per line)'
' to remove from the given text after parsing')
parser.add_argument(
'--imagefile', metavar='file', type=FileType('wb'),
default='-',
help='file the completed PNG image should be written to'
' (default: stdout)')
parser.add_argument(
'--fontfile', metavar='path', dest='font_path',
help='path to font file you wish to use (default: DroidSansMono)')
parser.add_argument(
'--mask', metavar='file', type=argparse.FileType('rb'),
help='mask to use for the image form')
parser.add_argument(
'--colormask', metavar='file', type=argparse.FileType('rb'),
help='color mask to use for image coloring')
parser.add_argument(
'--contour_width', metavar='width', default=0, type=float,
dest='contour_width',
help='if greater than 0, draw mask contour (default: 0)')
parser.add_argument(
'--contour_color', metavar='color', default='black', type=str,
dest='contour_color',
help='use given color as mask contour color -'
' accepts any value from PIL.ImageColor.getcolor')
parser.add_argument(
'--relative_scaling', type=float, default=0,
metavar='rs', help=' scaling of words by frequency (0 - 1)')
parser.add_argument(
'--margin', type=int, default=2,
metavar='width', help='spacing to leave around words')
parser.add_argument(
'--width', type=int, default=400,
metavar='width', help='define output image width')
parser.add_argument(
'--height', type=int, default=200,
metavar='height', help='define output image height')
parser.add_argument(
'--color', metavar='color',
help='use given color as coloring for the image -'
' accepts any value from PIL.ImageColor.getcolor')
parser.add_argument(
'--background', metavar='color', default='black', type=str,
dest='background_color',
help='use given color as background color for the image -'
' accepts any value from PIL.ImageColor.getcolor')
parser.add_argument(
'--no_collocations', action='store_false', dest='collocations',
help='do not add collocations (bigrams) to word cloud '
'(default: add unigrams and bigrams)')
parser.add_argument(
'--include_numbers',
action='store_true',
dest='include_numbers',
help='include numbers in wordcloud?')
parser.add_argument(
'--min_word_length',
type=int,
default=0,
metavar='min_word_length',
dest='min_word_length',
help='only include words with more than X letters')
parser.add_argument(
'--prefer_horizontal',
type=float, default=.9, metavar='ratio',
help='ratio of times to try horizontal fitting as opposed to vertical')
parser.add_argument(
'--scale',
type=float, default=1, metavar='scale',
help='scaling between computation and drawing')
parser.add_argument(
'--colormap',
type=str, default='viridis', metavar='map',
help='matplotlib colormap name')
parser.add_argument(
'--mode',
type=str, default='RGB', metavar='mode',
help='use RGB or RGBA for transparent background')
parser.add_argument(
'--max_words',
type=int, default=200, metavar='N',
help='maximum number of words')
parser.add_argument(
'--min_font_size',
type=int, default=4, metavar='size',
help='smallest font size to use')
parser.add_argument(
'--max_font_size',
type=int, default=None, metavar='size',
help='maximum font size for the largest word')
parser.add_argument(
'--font_step',
type=int, default=1, metavar='step',
help='step size for the font')
parser.add_argument(
'--random_state',
type=int, default=None, metavar='seed',
help='random seed')
parser.add_argument(
'--no_normalize_plurals',
action='store_false',
dest='normalize_plurals',
help='whether to remove trailing \'s\' from words')
parser.add_argument(
'--repeat',
action='store_true',
dest='repeat',
help='whether to repeat words and phrases')
parser.add_argument(
'--version', action='version',
version='%(prog)s {version}'.format(version=__version__))
return parser
def parse_args(arguments):
# prog = 'python wordcloud_cli.py'
parser = make_parser()
args = parser.parse_args(arguments)
if args.background_color == 'None':
args.background_color = None
if args.colormask and args.color:
raise ValueError('specify either a color mask or a color function')
args = vars(args)
with args.pop('text') as f:
text = f.read()
if args['stopwords']:
with args.pop('stopwords') as f:
args['stopwords'] = set(map(lambda l: l.strip(), f.readlines()))
if args['mask']:
mask = args.pop('mask')
args['mask'] = np.array(Image.open(mask))
color_func = wc.random_color_func
colormask = args.pop('colormask')
color = args.pop('color')
if colormask:
image = np.array(Image.open(colormask))
color_func = wc.ImageColorGenerator(image)
if color:
color_func = wc.get_single_color_func(color)
args['color_func'] = color_func
imagefile = args.pop('imagefile')
return args, text, imagefile
|