File: text2rednotebook.py

package info (click to toggle)
rednotebook 2.42%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,976 kB
  • sloc: python: 9,842; javascript: 4,103; xml: 128; sh: 58; makefile: 12
file content (197 lines) | stat: -rw-r--r-- 7,078 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
"""
Mass import diary/journal entries from Markdown, plain text, and ODT into RedNotebook.

You will probably need to adapt the script to your needs before it works for your
existing old journal entries. The script is meant as a starting point, not as an
off-the-shelf solution. In any case, please back up your RedNotebook journal
before importing old entries.

Dates, currently in the format "Dayoftheweek, day-number month-name year" are
used to split the text into days and the appropriate rednotebook entries are
appended or overwritten as required. If an entry already exists, it can be
optionally skipped or an error produced.

Any text before the first date is ignored.

The day-name/day-number combination is checked for consistency.

A list of days with missing entries is also printed by default. This can be
deactivated.
"""

import argparse
import datetime
import os
import pathlib
import re
import sys


for inst in ["/usr/share/rednotebook", "../rednotebook"]:
    if os.path.isdir(inst):
        sys.path.append(inst)
        break

# must import rednotebook after munging path in order to find it, as it is
# not (at least in Debian) installed in the default python path
import rednotebook.storage  # pylint: disable=wrong-import-position, unused-import  # noqa: F401


def list_missing_entries(mindate, maxdate, months, existing_entries):
    """list missing entries"""

    for i_day in range(int((maxdate - mindate).days)):
        dateobj = mindate + datetime.timedelta(i_day)
        monthstr = dateobj.strftime("%Y-%m")
        daynum = int(dateobj.strftime("%d"))
        month = None
        day = None
        if monthstr in months:
            month = months[monthstr]
        elif monthstr in existing_entries:
            month = existing_entries[monthstr]
        else:
            print(f"Missing entries for {dateobj.strftime('%B %Y')}")
            continue
        day = month.get_day(daynum)
        if day.empty:
            print(f"Missing entry for {dateobj.strftime('%d %B %Y')}")


def main():
    """parse commandline arguments & process text"""

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-n",
        "--dry-run",
        action="store_true",
        help="Parse the text and check the dates without writing to rednotebook.",
    )
    parser.add_argument(
        "--echo-entries",
        action="store_true",
        help="Echo the parsed entries to the terminal.",
    )
    parser.add_argument(
        "--existing",
        choices=["overwrite", "append", "skip", "error"],
        default="error",
        help="Define the behaviour if rednotebook already has an entry for the date.",
    )
    parser.add_argument(
        "--no-list-missing-entries",
        action="store_true",
        help="Don't list the dates with missing entries.",
    )
    parser.add_argument("--data-dir", help="Path to RedNotebook journal.")
    parser.add_argument("infile", help="Input file, either plain text, markdown, or odt.")
    args = parser.parse_args()

    # have to import *after* munging argv as rednotebook does its own argv parsing
    if not args.data_dir:
        sys.argv = [sys.argv[0]]
        import rednotebook.journal  # pylint: disable=import-outside-toplevel, redefined-outer-name

        journal = rednotebook.journal.Journal()
        args.data_dir = journal.get_journal_path()
    print(f"Reading journal from {args.data_dir}.")

    if args.infile.endswith(".odt"):
        try:
            from odf import text  # pylint: disable=import-outside-toplevel
            from odf.opendocument import load  # pylint: disable=import-outside-toplevel

            textdoc = load(args.infile)
            alltext = [str(para) for para in textdoc.getElementsByType(text.P)]
            alltext = "\n\n".join(alltext)
        except ImportError:
            print("Please install 'python3-odf' in order to import entries from an .odt file")
            sys.exit()
    else:
        alltext = pathlib.Path(args.infile).read_text()
    # remove non-printable characters
    control_chars = "".join(map(chr, range(0x7F, 0xA0)))
    control_char_re = re.compile(f"[{re.escape(control_chars)}]")
    alltext = control_char_re.sub("", alltext)

    days = re.split(r"(\w+day[,]?\s*\d+\s+\w+\s+\d+)\.?\s*", alltext, flags=re.IGNORECASE)
    i = 0
    if days:
        print(f"Found entries for {(len(days) - 1) // 2} days")
        if days[0] != "":
            print(f"Ignoring leading text: {days[0]}")
        i += 1

    existing_entries = rednotebook.storage.load_all_months_from_disk(args.data_dir)
    months = {}
    mindate, maxdate = None, None
    while i < len(days):
        if args.echo_entries:
            print(f"Date: {days[i]}")
        try:
            dateobj = datetime.datetime.strptime(days[i], "%A, %d %B %Y")
        except ValueError as err:
            raise ValueError(f"'{days[i]}' is not a valid date") from err

        # strptime seems to be rather lax about parsing day/date combinations,
        # so check manually
        weekday = re.search(r"(\w+day)", days[i])
        weekday = weekday[0]
        if dateobj.strftime("%A") != weekday:
            raise ValueError(
                f"'{days[i]}' is not a valid date. {dateobj.strftime('%d %B %Y')} "
                + f"was a {dateobj.strftime('%A')}."
            )

        if mindate is None or dateobj < mindate:
            mindate = dateobj
        if maxdate is None or dateobj > maxdate:
            maxdate = dateobj

        monthstr = dateobj.strftime("%Y-%m")
        daynum = int(dateobj.strftime("%d"))
        month = None
        if monthstr in months:
            month = months[monthstr]
        elif monthstr in existing_entries:
            month = existing_entries[monthstr]
        else:
            month = rednotebook.data.Month(int(dateobj.strftime("%Y")), int(dateobj.strftime("%m")))
        day = month.get_day(daynum)

        if not day.empty:
            if args.existing == "skip":
                print(f"Entry for {days[i]} already exists. Skipping.")
            elif args.existing == "error":
                print(f"Error: entry for {days[i]} already exists.")
                sys.exit(1)
            i += 2
            continue

        # remove blank headers
        days[i + 1] = re.sub(r"^\s*[#]+\s*$", "", days[i + 1], flags=re.MULTILINE)

        # remove leading blank lines
        days[i + 1] = re.sub(r"^\s*\n", "", days[i + 1])

        # remove trailing blank lines
        days[i + 1] = re.sub(r"\n$", "", days[i + 1])
        if args.echo_entries:
            print(days[i + 1])
        if not day.empty and args.existing == "append":
            day.content[text] += f"\n\n{days[i + 1]}"
        else:
            day.content = {"text": days[i + 1]}
        months[monthstr] = month
        i += 2

    if not args.no_list_missing_entries:
        list_missing_entries(mindate, maxdate, months, existing_entries)

    if not args.dry_run:
        rednotebook.storage.save_months_to_disk(months, args.data_dir, True, True)


if __name__ == "__main__":
    main()