File: wordsToNum.py

package info (click to toggle)
pyparsing 1.5.6%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 8,220 kB
  • sloc: python: 13,752; makefile: 33; sh: 17
file content (108 lines) | stat: -rw-r--r-- 3,200 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# wordsToNum.py
# Copyright 2006, Paul McGuire
#
# Sample parser grammar to read a number given in words, and return the numeric value.
#
from pyparsing import *
from operator import mul

def makeLit(s,val):
    ret = CaselessLiteral(s).setName(s)
    return ret.setParseAction( replaceWith(val) )

unitDefinitions = [
    ("zero",       0),
    ("oh",         0),
    ("zip",        0),
    ("zilch",      0),
    ("nada",       0),
    ("bupkis",     0),
    ("one",        1),
    ("two",        2),
    ("three",      3),
    ("four",       4),
    ("five",       5),
    ("six",        6),
    ("seven",      7),
    ("eight",      8),
    ("nine",       9),
    ("ten",       10),
    ("eleven",    11),
    ("twelve",    12),
    ("thirteen",  13),
    ("fourteen",  14),
    ("fifteen",   15),
    ("sixteen",   16),
    ("seventeen", 17),
    ("eighteen",  18),
    ("nineteen",  19),
    ]
units = Or( [ makeLit(s,v) for s,v in unitDefinitions ] )

tensDefinitions = [
    ("ten",     10),
    ("twenty",  20),
    ("thirty",  30),
    ("forty",   40),
    ("fourty",  40), # for the spelling-challenged...
    ("fifty",   50),
    ("sixty",   60),
    ("seventy", 70),
    ("eighty",  80),
    ("ninety",  90),
    ]
tens = Or( [ makeLit(s,v) for s,v in tensDefinitions ] )

hundreds = makeLit("hundred", 100)

majorDefinitions = [
    ("thousand",    int(1e3)),
    ("million",     int(1e6)),
    ("billion",     int(1e9)),
    ("trillion",    int(1e12)),
    ("quadrillion", int(1e15)),
    ("quintillion", int(1e18)),
    ]
mag = Or( [ makeLit(s,v) for s,v in majorDefinitions ] )

wordprod = lambda t: reduce(mul,t)
wordsum = lambda t: sum(t)
numPart = (((( units + Optional(hundreds) ).setParseAction(wordprod) + 
               Optional(tens)).setParseAction(wordsum) 
               ^ tens )
               + Optional(units) ).setParseAction(wordsum)
numWords = OneOrMore( (numPart + Optional(mag)).setParseAction(wordprod) 
                    ).setParseAction(wordsum) + StringEnd()
numWords.ignore("-")
numWords.ignore(CaselessLiteral("and"))

def test(s,expected):
    try:
        val = numWords.parseString(s)[0]
    except ParseException, pe:
        print "Parsing failed:"
        print s
        print "%s^" % (' '*(pe.col-1))
        print pe.msg
    else:
        print "'%s' -> %d" % (s, val),
        if val == expected:
            print "CORRECT"
        else:
            print "***WRONG***, expected %d" % expected

test("one hundred twenty hundred", 120)
test("one hundred and twennty", 120)
test("one hundred and twenty", 120)
test("one hundred and three", 103)
test("one hundred twenty-three", 123)
test("one hundred and twenty three", 123)
test("one hundred twenty three million", 123000000)
test("one hundred and twenty three million", 123000000)
test("one hundred twenty three million and three", 123000003)
test("fifteen hundred and sixty five", 1565)
test("seventy-seven thousand eight hundred and nineteen", 77819)
test("seven hundred seventy-seven thousand seven hundred and seventy-seven", 777777)
test("zero", 0)
test("forty two", 42)
test("fourty two", 42)