1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
"""Count the frequencies of words in a string"""
def wordfreq(text, is_filename=False):
"""Return a dictionary of words and word counts in a string."""
if is_filename:
with open(text) as f:
text = f.read()
freqs = {}
for word in text.split():
lword = word.lower()
freqs[lword] = freqs.get(lword, 0) + 1
return freqs
def print_wordfreq(freqs, n=10):
"""Print the n most common words and counts in the freqs dict."""
words, counts = freqs.keys(), freqs.values()
items = zip(counts, words)
items = sorted(items, reverse=True)
for count, word in items[:n]:
print(word, count)
def wordfreq_to_weightsize(
worddict, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0
):
mincount = min(worddict.itervalues())
maxcount = max(worddict.itervalues())
weights = {}
for k, v in worddict.iteritems():
w = (v - mincount) / (maxcount - mincount)
alpha = minalpha + (maxalpha - minalpha) * w
size = minsize + (maxsize - minsize) * w
weights[k] = (alpha, size)
return weights
def tagcloud(worddict, n=10, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
import random
from matplotlib import pyplot as plt
worddict = wordfreq_to_weightsize(worddict, minsize, maxsize, minalpha, maxalpha)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_position([0.0, 0.0, 1.0, 1.0])
plt.xticks([])
plt.yticks([])
words = worddict.keys()
alphas = [v[0] for v in worddict.values()]
sizes = [v[1] for v in worddict.values()]
items = zip(alphas, sizes, words)
items.sort(reverse=True)
for alpha, size, word in items[:n]:
# xpos = random.normalvariate(0.5, 0.3)
# ypos = random.normalvariate(0.5, 0.3)
xpos = random.uniform(0.0, 1.0)
ypos = random.uniform(0.0, 1.0)
ax.text(xpos, ypos, word.lower(), alpha=alpha, fontsize=size)
ax.autoscale_view()
return ax
|