File: wordsToNum.py

package info (click to toggle)
pyparsing 2.0.3%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 5,196 kB
  • ctags: 6,279
  • sloc: python: 11,708; makefile: 35; sh: 33
file content (109 lines) | stat: -rw-r--r-- 3,259 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# wordsToNum.py
# Copyright 2006, Paul McGuire
#
# Sample parser grammar to read a number given in words, and return the numeric value.
#
from pyparsing import *
from operator import mul
from functools import reduce

def makeLit(s,val):
    ret = CaselessLiteral(s).setName(s)
    return ret.setParseAction( replaceWith(val) )

unitDefinitions = [
    ("zero",       0),
    ("oh",         0),
    ("zip",        0),
    ("zilch",      0),
    ("nada",       0),
    ("bupkis",     0),
    ("one",        1),
    ("two",        2),
    ("three",      3),
    ("four",       4),
    ("five",       5),
    ("six",        6),
    ("seven",      7),
    ("eight",      8),
    ("nine",       9),
    ("ten",       10),
    ("eleven",    11),
    ("twelve",    12),
    ("thirteen",  13),
    ("fourteen",  14),
    ("fifteen",   15),
    ("sixteen",   16),
    ("seventeen", 17),
    ("eighteen",  18),
    ("nineteen",  19),
    ]
units = Or( [ makeLit(s,v) for s,v in unitDefinitions ] )

tensDefinitions = [
    ("ten",     10),
    ("twenty",  20),
    ("thirty",  30),
    ("forty",   40),
    ("fourty",  40), # for the spelling-challenged...
    ("fifty",   50),
    ("sixty",   60),
    ("seventy", 70),
    ("eighty",  80),
    ("ninety",  90),
    ]
tens = Or( [ makeLit(s,v) for s,v in tensDefinitions ] )

hundreds = makeLit("hundred", 100)

majorDefinitions = [
    ("thousand",    int(1e3)),
    ("million",     int(1e6)),
    ("billion",     int(1e9)),
    ("trillion",    int(1e12)),
    ("quadrillion", int(1e15)),
    ("quintillion", int(1e18)),
    ]
mag = Or( [ makeLit(s,v) for s,v in majorDefinitions ] )

wordprod = lambda t: reduce(mul,t)
wordsum = lambda t: sum(t)
numPart = (((( units + Optional(hundreds) ).setParseAction(wordprod) + 
               Optional(tens)).setParseAction(wordsum) 
               ^ tens )
               + Optional(units) ).setParseAction(wordsum)
numWords = OneOrMore( (numPart + Optional(mag)).setParseAction(wordprod) 
                    ).setParseAction(wordsum) + StringEnd()
numWords.ignore("-")
numWords.ignore(CaselessLiteral("and"))

def test(s,expected):
    try:
        val = numWords.parseString(s)[0]
    except ParseException as pe:
        print("Parsing failed:")
        print(s)
        print("%s^" % (' '*(pe.col-1)))
        print(pe.msg)
    else:
        print("'%s' -> %d" % (s, val), end=' ')
        if val == expected:
            print("CORRECT")
        else:
            print("***WRONG***, expected %d" % expected)

test("one hundred twenty hundred", 120)
test("one hundred and twennty", 120)
test("one hundred and twenty", 120)
test("one hundred and three", 103)
test("one hundred twenty-three", 123)
test("one hundred and twenty three", 123)
test("one hundred twenty three million", 123000000)
test("one hundred and twenty three million", 123000000)
test("one hundred twenty three million and three", 123000003)
test("fifteen hundred and sixty five", 1565)
test("seventy-seven thousand eight hundred and nineteen", 77819)
test("seven hundred seventy-seven thousand seven hundred and seventy-seven", 777777)
test("zero", 0)
test("forty two", 42)
test("fourty two", 42)