File: wordcloud_cli.py

package info (click to toggle)
python-wordcloud 1.8.2.2%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bookworm
size: 328 kB
sloc: python: 2,790; sh: 40; makefile: 9
file content (257 lines) | stat: -rw-r--r-- 9,006 bytes
parent folder | download | duplicates (2)
# -*- coding: utf-8 -*-
"""Command-line tool interface to generate word clouds.
"""
from __future__ import absolute_import

import sys
import textwrap

if __name__ == '__main__':  # pragma: no cover
    sys.exit(textwrap.dedent(
        """
        To execute the CLI, instead consider running:

          wordcloud_cli --help

        or

          python -m wordcloud --help
        """))

import io
import re
import argparse
import wordcloud as wc
import numpy as np
from PIL import Image

from . import __version__


class FileType(object):
    """Factory for creating file object types.

    Port from argparse so we can support unicode file reading in Python2

    Instances of FileType are typically passed as type= arguments to the
    ArgumentParser add_argument() method.

    Keyword Arguments:
        - mode -- A string indicating how the file is to be opened. Accepts the
            same values as the builtin open() function.
        - bufsize -- The file's desired buffer size. Accepts the same values as
            the builtin open() function.

    """

    def __init__(self, mode='r', bufsize=-1):
        self._mode = mode
        self._bufsize = bufsize

    def __call__(self, string):
        # the special argument "-" means sys.std{in,out}
        if string == '-':
            if 'r' in self._mode:
                return sys.stdin
            elif 'w' in self._mode:
                return sys.stdout.buffer if 'b' in self._mode else sys.stdout
            else:
                msg = 'argument "-" with mode %r' % self._mode
                raise ValueError(msg)

        # all other arguments are used as file names
        try:
            encoding = None if 'b' in self._mode else "UTF-8"
            return io.open(string, self._mode, self._bufsize, encoding=encoding)
        except IOError as e:
            message = "can't open '%s': %s"
            raise argparse.ArgumentTypeError(message % (string, e))

    def __repr__(self):
        args = self._mode, self._bufsize
        args_str = ', '.join(repr(arg) for arg in args if arg != -1)
        return '%s(%s)' % (type(self).__name__, args_str)


class RegExpAction(argparse.Action):
    def __init__(self, option_strings, dest, **kwargs):
        super(RegExpAction, self).__init__(option_strings, dest, **kwargs)

    def __call__(self, parser, namespace, values, option_string=None):
        try:
            re.compile(values)
        except re.error as e:
            raise argparse.ArgumentError(self, 'Invalid regular expression: ' + str(e))
        setattr(namespace, self.dest, values)


def main(args, text, imagefile):
    wordcloud = wc.WordCloud(**args)
    wordcloud.generate(text)
    image = wordcloud.to_image()

    with imagefile:
        image.save(imagefile, format='png', optimize=True)


def make_parser():
    description = 'A simple command line interface for wordcloud module.'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(
        '--text', metavar='file', type=FileType(), default='-',
        help='specify file of words to build the word cloud (default: stdin)')
    parser.add_argument(
        '--regexp', metavar='regexp', default=None, action=RegExpAction,
        help='override the regular expression defining what constitutes a word')
    parser.add_argument(
        '--stopwords', metavar='file', type=FileType(),
        help='specify file of stopwords (containing one word per line)'
             ' to remove from the given text after parsing')
    parser.add_argument(
        '--imagefile', metavar='file', type=FileType('wb'),
        default='-',
        help='file the completed PNG image should be written to'
             ' (default: stdout)')
    parser.add_argument(
        '--fontfile', metavar='path', dest='font_path',
        help='path to font file you wish to use (default: DroidSansMono)')
    parser.add_argument(
        '--mask', metavar='file', type=argparse.FileType('rb'),
        help='mask to use for the image form')
    parser.add_argument(
        '--colormask', metavar='file', type=argparse.FileType('rb'),
        help='color mask to use for image coloring')
    parser.add_argument(
        '--contour_width', metavar='width', default=0, type=float,
        dest='contour_width',
        help='if greater than 0, draw mask contour (default: 0)')
    parser.add_argument(
        '--contour_color', metavar='color', default='black', type=str,
        dest='contour_color',
        help='use given color as mask contour color -'
             ' accepts any value from PIL.ImageColor.getcolor')
    parser.add_argument(
        '--relative_scaling', type=float, default=0,
        metavar='rs', help=' scaling of words by frequency (0 - 1)')
    parser.add_argument(
        '--margin', type=int, default=2,
        metavar='width', help='spacing to leave around words')
    parser.add_argument(
        '--width', type=int, default=400,
        metavar='width', help='define output image width')
    parser.add_argument(
        '--height', type=int, default=200,
        metavar='height', help='define output image height')
    parser.add_argument(
        '--color', metavar='color',
        help='use given color as coloring for the image -'
             ' accepts any value from PIL.ImageColor.getcolor')
    parser.add_argument(
        '--background', metavar='color', default='black', type=str,
        dest='background_color',
        help='use given color as background color for the image -'
             ' accepts any value from PIL.ImageColor.getcolor')
    parser.add_argument(
        '--no_collocations', action='store_false', dest='collocations',
        help='do not add collocations (bigrams) to word cloud '
             '(default: add unigrams and bigrams)')
    parser.add_argument(
        '--include_numbers',
        action='store_true',
        dest='include_numbers',
        help='include numbers in wordcloud?')
    parser.add_argument(
        '--min_word_length',
        type=int,
        default=0,
        metavar='min_word_length',
        dest='min_word_length',
        help='only include words with more than X letters')
    parser.add_argument(
        '--prefer_horizontal',
        type=float, default=.9, metavar='ratio',
        help='ratio of times to try horizontal fitting as opposed to vertical')
    parser.add_argument(
        '--scale',
        type=float, default=1, metavar='scale',
        help='scaling between computation and drawing')
    parser.add_argument(
        '--colormap',
        type=str, default='viridis', metavar='map',
        help='matplotlib colormap name')
    parser.add_argument(
        '--mode',
        type=str, default='RGB', metavar='mode',
        help='use RGB or RGBA for transparent background')
    parser.add_argument(
        '--max_words',
        type=int, default=200, metavar='N',
        help='maximum number of words')
    parser.add_argument(
        '--min_font_size',
        type=int, default=4, metavar='size',
        help='smallest font size to use')
    parser.add_argument(
        '--max_font_size',
        type=int, default=None, metavar='size',
        help='maximum font size for the largest word')
    parser.add_argument(
        '--font_step',
        type=int, default=1, metavar='step',
        help='step size for the font')
    parser.add_argument(
        '--random_state',
        type=int, default=None, metavar='seed',
        help='random seed')
    parser.add_argument(
        '--no_normalize_plurals',
        action='store_false',
        dest='normalize_plurals',
        help='whether to remove trailing \'s\' from words')
    parser.add_argument(
        '--repeat',
        action='store_true',
        dest='repeat',
        help='whether to repeat words and phrases')
    parser.add_argument(
        '--version', action='version',
        version='%(prog)s {version}'.format(version=__version__))
    return parser


def parse_args(arguments):
    # prog = 'python wordcloud_cli.py'
    parser = make_parser()
    args = parser.parse_args(arguments)
    if args.background_color == 'None':
        args.background_color = None

    if args.colormask and args.color:
        raise ValueError('specify either a color mask or a color function')

    args = vars(args)

    with args.pop('text') as f:
        text = f.read()

    if args['stopwords']:
        with args.pop('stopwords') as f:
            args['stopwords'] = set(map(lambda l: l.strip(), f.readlines()))

    if args['mask']:
        mask = args.pop('mask')
        args['mask'] = np.array(Image.open(mask))

    color_func = wc.random_color_func
    colormask = args.pop('colormask')
    color = args.pop('color')
    if colormask:
        image = np.array(Image.open(colormask))
        color_func = wc.ImageColorGenerator(image)
    if color:
        color_func = wc.get_single_color_func(color)
    args['color_func'] = color_func

    imagefile = args.pop('imagefile')

    return args, text, imagefile