File: moultipart.py

package info (click to toggle)
babeltrace2 2.1.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 42,660 kB
  • sloc: cpp: 106,162; ansic: 78,276; python: 27,115; sh: 9,053; makefile: 1,807; xml: 46
file content (147 lines) | stat: -rw-r--r-- 3,735 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# SPDX-License-Identifier: GPL-2.0-only
#
# Copyright (C) 2023 EfficiOS Inc.
#
# pyright: strict, reportTypeCommentUsage=false

import re
from typing import TextIO


# One part of a moultipart document.
#
# For example, for this part of which the header is at line 37:
#
#     --- Another Oscar Wilde quote
#     I can resist everything except temptation.
#
# The corresponding `Part` object is:
#
#     Part('Another Oscar Wilde quote',
#          'I can resist everything except temptation',
#          38)
class Part:
    def __init__(self, header_info: str, content: str, first_content_line_no: int):
        self._header_info = header_info
        self._content = content
        self._first_content_line_no = first_content_line_no

    @property
    def header_info(self):
        return self._header_info

    @property
    def content(self):
        return self._content

    # Number of the first line, relative to the beginning of the
    # containing moultipart document, of the content of this part.
    @property
    def first_content_line_no(self):
        return self._first_content_line_no

    def __repr__(self):
        return "Part({}, {}, {})".format(
            repr(self.header_info), repr(self.content), self.first_content_line_no
        )


def _try_parse_header(line: str):
    m = re.match(r"---(\s*| .+)$", line)

    if m is None:
        return

    return m.group(1).strip()


# Parses the moultipart document file `in_file` and returns its parts
# (list of `Part` objects).
#
# A moultipart document is a sequence of parts.
#
# A moutlipart part is:
#
# 1. A header line, that is, in this order:
#
#    a) Exactly `---`.
#    b) Zero or more spaces.
#    c) Optional: custom information until the end of the line.
#
# 2. Zero or more lines of text which aren't header lines.
#
# For example, consider the following moultipart document:
#
#     --- Victoria
#     Parenteau
#     ---
#     Taillon
#     --- This part is empty
#     --- Josianne
#     Gervais
#
# Then this function would return the following part objects:
#
#     [
#         Part('Victoria',           'Parenteau\n', 2),
#         Part('',                   'Taillon\n',   4),
#         Part('This part is empty', '',            6),
#         Part('Josianne',           'Gervais\n',   7),
#     ]
#
# Raises `RuntimeError` on any parsing error.
def parse(in_file: TextIO):
    # Read the first header
    cur_part_content = ""
    cur_first_content_line_no = 2
    parts = []  # type: list[Part]
    line_no = 1
    line = next(in_file)
    cur_part_header_info = _try_parse_header(line)

    if cur_part_header_info is None:
        raise RuntimeError(
            "Expecting header line starting with `---`, got `{}`".format(
                line.strip("\n")
            )
        )

    for line in in_file:
        line_no += 1
        maybe_part_header_info = _try_parse_header(line)

        if maybe_part_header_info is not None:
            # New header
            parts.append(
                Part(
                    cur_part_header_info,
                    cur_part_content,
                    cur_first_content_line_no,
                )
            )
            cur_part_content = ""
            cur_part_header_info = maybe_part_header_info
            cur_first_content_line_no = line_no + 1
            continue

        # Accumulate content lines
        cur_part_content += line

    # Last part (always exists)
    parts.append(
        Part(
            cur_part_header_info,
            cur_part_content,
            cur_first_content_line_no,
        )
    )

    return parts


if __name__ == "__main__":
    import sys
    import pprint

    with open(sys.argv[1]) as f:
        pprint.pprint(parse(f))