1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
# number_words.py
#
# Copyright 2020, Paul McGuire
#
# Parser/evaluator for expressions of numbers as written out in words:
# - one
# - seven
# - twelve
# - twenty six
# - forty-two
# - one hundred and seven
#
#
# BNF:
"""
optional_and ::= ["and" | "-"]
optional_dash ::= ["-"]
units ::= one | two | three | ... | nine
teens ::= ten | teens_only
tens ::= twenty | thirty | ... | ninety
one_to_99 ::= units | teens | (tens [optional_dash units])
teens_only ::= eleven | twelve | ... | nineteen
hundreds ::= (units | teens_only | tens optional_dash units) "hundred"
thousands ::= one_to_99 "thousand"
# number from 1-999,999
number ::= [thousands [optional_and]] [hundreds[optional_and]] one_to_99
| [thousands [optional_and]] hundreds
| thousands
"""
import pyparsing as pp
from operator import mul
def define_numeric_word_range(
names: str, from_: int, to_: int = None, step: int = 1
) -> pp.MatchFirst:
"""
Compose a MatchFirst of CaselessKeywords, given their names and values,
which when parsed, are converted to their value
"""
def define_numeric_word(nm: str, val: int):
return pp.CaselessKeyword(nm).add_parse_action(lambda: val)
names = names.split()
if to_ is None:
to_ = from_
values = range(from_, to_ + 1, step)
ret = pp.MatchFirst(
define_numeric_word(name, value) for name, value in zip(names, values)
)
if len(names) == 1:
ret.set_name(names[0])
else:
ret.set_name(f"{names[0]}-{names[-1]}")
return ret
def multiply(t):
"""
Parse action for hundreds and thousands.
"""
return mul(*t)
opt_dash = pp.Opt(pp.Suppress("-")).set_name("'-'")
opt_and = pp.Opt((pp.CaselessKeyword("and") | "-").suppress()).set_name("'and/-'")
units = define_numeric_word_range("one two three four five six seven eight nine", 1, 9)
teens_only = define_numeric_word_range(
"eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen",
11,
19,
)
ten = define_numeric_word_range("ten", 10)
teens = ten | teens_only
tens = define_numeric_word_range(
"twenty thirty forty fifty sixty seventy eighty ninety", 20, 90, 10
)
one_to_99 = (units | teens | (tens + pp.Opt(opt_dash + units))).set_name("1-99")
one_to_99.add_parse_action(sum)
hundred = define_numeric_word_range("hundred", 100)
thousand = define_numeric_word_range("thousand", 1000)
hundreds = (units | teens_only | (tens + opt_dash + units)) + hundred
hundreds.set_name("100s")
one_to_999 = (
(pp.Opt(hundreds + opt_and) + one_to_99 | hundreds).add_parse_action(sum)
).set_name("1-999")
thousands = one_to_999 + thousand
thousands.set_name("1000s")
# for hundreds and thousands, must scale up (multiply) accordingly
hundreds.add_parse_action(multiply)
thousands.add_parse_action(multiply)
numeric_expression = (
pp.Opt(thousands + opt_and) + pp.Opt(hundreds + opt_and) + one_to_99
| pp.Opt(thousands + opt_and) + hundreds
| thousands
).set_name("numeric_words")
# sum all sub-results into total
numeric_expression.add_parse_action(sum)
if __name__ == "__main__":
numeric_expression.run_tests(
"""
one
seven
twelve
twenty six
forty-two
two hundred
twelve hundred
one hundred and eleven
ninety nine thousand nine hundred and ninety nine
nine hundred thousand nine hundred and ninety nine
nine hundred and ninety nine thousand nine hundred and ninety nine
nineteen hundred thousand nineteen hundred and ninety nine
# invalid
twenty hundred
""",
postParse=lambda _, s: "{:,}".format(s[0]),
)
# create railroad diagram
numeric_expression.create_diagram("numeric_words_diagram.html", vertical=5)
|