File: number_words.py

package info (click to toggle)
pyparsing 3.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 12,200 kB
  • sloc: python: 30,867; ansic: 422; sh: 112; makefile: 24
file content (145 lines) | stat: -rw-r--r-- 4,108 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# number_words.py
#
# Copyright 2020, Paul McGuire
#
# Parser/evaluator for expressions of numbers as written out in words:
#  - one
#  - seven
#  - twelve
#  - twenty six
#  - forty-two
#  - one hundred and seven
#
#
#  BNF:
#    optional_and ::= ["and" | "-"]
#    optional_dash ::= ["-"]
#    units ::= "one" | "two" | "three" | ... | "nine"
#    ten ::= "ten"
#    tens ::= "twenty" | "thirty" | ... | "ninety"
#    one_to_99 ::= units | ten | teens | (tens [optional_dash units])
#    teens ::= "eleven" | "twelve" | ... | "nineteen"
#    hundreds ::= (units | teens | tens optional_dash units) "hundred"
#    thousands ::= one_to_99 "thousand"
#
#    # number from 1-999,999
#    number ::= [thousands [optional_and]] [hundreds[optional_and]] one_to_99
#               | [thousands [optional_and]] hundreds
#               | thousands
#

import pyparsing as pp
from operator import mul


def define_numeric_word_range(
    names: str, from_: int, to_: int = None, step: int = 1
) -> pp.MatchFirst:
    """
    Compose a MatchFirst of CaselessKeywords, given their names and values,
    which when parsed, are converted to their value
    """

    def define_numeric_word(nm: str, val: int):
        return pp.CaselessKeyword(nm).add_parse_action(lambda: val)

    names = names.split()
    if to_ is None:
        to_ = from_
    values = range(from_, to_ + 1, step)
    ret = pp.MatchFirst(
        define_numeric_word(name, value) for name, value in zip(names, values)
    )

    if len(names) == 1:
        ret.set_name(names[0])
    else:
        ret.set_name(f"{names[0]}-{names[-1]}")

    return ret


def multiply(t):
    """
    Parse action for hundreds and thousands.
    """
    return mul(*t)


opt_dash = pp.Opt(pp.Suppress("-")).set_name("'-'")
opt_and = pp.Opt((pp.CaselessKeyword("and") | "-").suppress()).set_name("'and/-'")

units = define_numeric_word_range("one two three four five six seven eight nine", 1, 9)
teens = define_numeric_word_range(
    "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen",
    11,
    19,
)
ten = define_numeric_word_range("ten", 10)

tens = define_numeric_word_range(
    "twenty thirty forty fifty sixty seventy eighty ninety", 20, 90, 10
)

hundred = define_numeric_word_range("hundred", 100)
thousand = define_numeric_word_range("thousand", 1000)

one_to_99_except_tens = (units | teens | (tens + opt_dash + units)).set_name("1-99 except tens")
one_to_99_except_tens.add_parse_action(sum)
one_to_99 = (one_to_99_except_tens | ten | tens).set_name("1-99")
one_to_99.add_parse_action(sum)

hundreds = one_to_99_except_tens + hundred
hundreds.set_name("100s")

one_to_999 = (
    (pp.Opt(hundreds + opt_and) + one_to_99 | hundreds).add_parse_action(sum)
).set_name("1-999")

thousands = one_to_999 + thousand
thousands.set_name("1000s")

# for hundreds and thousands, must scale up (multiply) accordingly
hundreds.add_parse_action(multiply)
thousands.add_parse_action(multiply)

numeric_expression = (
    pp.Opt(thousands + opt_and) + pp.Opt(hundreds + opt_and) + one_to_99
    | pp.Opt(thousands + opt_and) + hundreds
    | thousands
).set_name("numeric_words")

# sum all sub-results into total
numeric_expression.add_parse_action(sum)


if __name__ == "__main__":
    import contextlib

    with contextlib.suppress(Exception):
        # create railroad diagram
        numeric_expression.create_diagram("number_words_diagram.html", vertical=5)

    numeric_expression.run_tests(
        """
        one
        seven
        twelve
        twenty six
        forty-two
        two hundred
        twelve hundred
        one hundred and eleven
        seven thousand and six
        twenty five hundred
        twenty five hundred and one
        ninety nine thousand nine hundred and ninety nine
        nine hundred thousand nine hundred and ninety nine
        nine hundred and ninety nine thousand nine hundred and ninety nine
        nineteen hundred thousand nineteen hundred and ninety nine
        
        # invalid
        twenty hundred
        """,
        post_parse=lambda _, s: "{:,}".format(s[0]),
    )