File: test_wordcount.py

package info (click to toggle)
toolz 1.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 672 kB
  • sloc: python: 5,573; makefile: 136; sh: 2
file content (18 lines) | stat: -rw-r--r-- 479 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from toolz.curried import *
import os

if not os.path.exists('bench/shakespeare.txt'):
    os.system('wget http://www.gutenberg.org/files/100/100-0.txt'
              ' -O bench/shakespeare.txt')


def stem(word):
    """ Stem word to primitive form """
    return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")

wordcount = comp(frequencies, map(stem), concat, map(str.split))


def test_shakespeare():
    with open('bench/shakespeare.txt') as f:
        counts = wordcount(f)