File: chemical_formulas.py

package info (click to toggle)
pyparsing 3.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 12,200 kB
  • sloc: python: 30,867; ansic: 422; sh: 112; makefile: 24
file content (134 lines) | stat: -rw-r--r-- 3,415 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#
# chemicalFormulas.py
#
# Copyright (c) 2003,2019 Paul McGuire
#

import pyparsing as pp

atomic_weight = {
    "O": 15.9994,
    "H": 1.00794,
    "Na": 22.9897,
    "Cl": 35.4527,
    "C": 12.0107,
}

digits = "0123456789"

# Version 1
element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element")
# for stricter matching, use this Regex instead
# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
#                 "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
#                 "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|"
#                 "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]")
element_ref = pp.Group(element + pp.Optional(pp.Word(digits), default="1"))
formula = element_ref[...]


def sum_atomic_weights(element_list):
    return sum(atomic_weight[elem] * int(qty) for elem, qty in element_list)


formula.run_tests(
    """\
    NaCl
    H2O
    C6H5OH
    """,
    full_dump=False,
    post_parse=lambda _, tokens: f"Molecular weight: {sum_atomic_weights(tokens)}",
)
print()


# Version 2 - access parsed items by results name
element_ref = pp.Group(
    element("symbol") + pp.Optional(pp.Word(digits), default="1")("qty")
)
formula = element_ref[...]


def sum_atomic_weights_by_results_name(element_list):
    return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list)


formula.run_tests(
    """\
    NaCl
    H2O
    C6H5OH
    """,
    full_dump=False,
    post_parse=lambda _, tokens:
        f"Molecular weight: {sum_atomic_weights_by_results_name(tokens)}",
)
print()

# Version 3 - convert integers during parsing process
integer = pp.Word(digits).set_name("integer")
integer.add_parse_action(lambda t: int(t[0]))
element_ref = pp.Group(element("symbol") + pp.Optional(integer, default=1)("qty"))
formula = element_ref[...].set_name("chemical_formula")


def sum_atomic_weights_by_results_name_with_converted_ints(element_list):
    return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list)


formula.run_tests(
    """\
    NaCl
    H2O
    C6H5OH
    """,
    full_dump=False,
    post_parse=lambda _, tokens:
        f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}",
)
print()

# Version 4 - parse and convert integers as subscript digits
subscript_digits = "₀₁₂₃₄₅₆₇₈₉"
subscript_int_map = {digit: value for value, digit in enumerate(subscript_digits)}


def cvt_subscript_int(s):
    ret = 0
    for c in s[0]:
        ret = ret * 10 + subscript_int_map[c]
    return ret


subscript_int = pp.Word(subscript_digits).set_name("subscript")
subscript_int.add_parse_action(cvt_subscript_int)

element_ref = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty"))
formula = element_ref[1, ...].set_name("chemical_formula")

if __name__ == '__main__':
    import contextlib

    with contextlib.suppress(Exception):
        formula.create_diagram("chemical_formulas.html")

    formula.run_tests(
        """\
        # sodium chloride
        NaCl
        # hydrogen hydroxide
        H₂O
        # phenol
        C₆H₅OH
        # ethanol
        C₂H₅OH
        # decanol
        C₁₀H₂₁OH
        """,
        full_dump=False,
        post_parse=lambda _, tokens:
            f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}",
    )

    print()