File: commonmark.pas

package info (click to toggle)
tomboy-ng 0.42-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,872 kB
  • sloc: pascal: 56,089; sh: 1,571; makefile: 63; xml: 23
file content (394 lines) | stat: -rw-r--r-- 16,763 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
unit commonmark;

{$mode objfpc}{$H+}

{   Copyright (C) 2017-2024 David Bannon

    License:
    This code is licensed under MIT License, see the file License.txt
    or https://spdx.org/licenses/MIT.html  SPDX short identifier: MIT

    Exports a note in a subset of commonmark


    Create the object, optionally give it a directory to look in (and set DoPOFile ?). Call GetMDcontent()
    with an ID (that is, a filename without extension) and a created list to fill in with content.

    Has some limitations that relate, to some extent, to the MarkDown/CommonMark.
    1) Monospace may be presented as either "leading spaces" or wrapped in BackTicks.
       The Leading Space model is used where the whole line (ie para) is mono, we add
       four spaces and strip out any other markup in the line. Looks better for blocks.
       Backticks are used where the mono is in line. It can have extra markup but
       has to have backticks closest to text, does not work here ...
       So, summary, blocks of Mono cannot show any other markup.
    2) Large or Huge font (in tomboy) is only honoured if on a line by itself, become
       heading lines. Any in line Large or Huge is discarded. Small is preserved
       but cannot be displayed in github flavour of MD.
    3) Only bullet cha allowed in MD list here is * (officially others allowed but not here)
    4) When bullets (lists) mix, lists get priority, the mono drops back to inline mono.
    5) Cannot do highlight (but I try to preserve it during a Tb->GH->TB cycle).

    HISTORY
    2020-12-22  Extracted from the NextCloud Notes Branch
    2020-??-??  Moved the Normalising code into a stand alone unit.
    2021/06/15  Format lines that are all mono differenly so they show as a block.
    2021/06/29  Merged this file back to tomboy-ng
    2021/07/22  Make GetMDcontent more tolerent of passed ID/FFN
    2021/07/30  Now use someutuls fro tb_util instead of implementing itself. Must sync to TB-NG
    2021/07/30  Use the RemoveNoteMetaData( from TT_Utils, need merge TT_utils with TB_Utils
    2021/08/19  Rewrite ProcessMarkup to use ST.Replace() approach
    2021/09/28  Enabled multilevel bullets
    2021/10/17  Rewrite most of monospace code.
    2022/10/17  Remove underline tags around Title when exporting, confuses Importer.
}

interface

uses
        Classes, SysUtils;

type

{ TExportCommon }

TExportCommon = class

    private
                        {returns -1 if there is not a tag starting at index (1 based), else ret length
                        of tag if there is a tag but its not the one we want. Ret 0 if right tag found.}
        function CheckForTag(index: integer; const Tag, St: string): integer;
        function ConvertBullets(Str: string): string;
        function ConvertMono(var InSt: string): boolean;
		function FindInStringList(const StL: TStringList; const FindMe: string): integer;
                                    // Make content suitable to write out as a PO file, no merging is going to happen !

		procedure ProcessHeadings(StL: TStringList);
		procedure ProcessMarkUp(StL: TStringList);

    public
        DebugMode : boolean;
        ErrorMsg : string;
        NotesDir : string;       // dir were we expect to find our TB notes
        // A token is $$STRING where STRING is one of more latin uppercase letters, numbers
        // or tokens. eg $$MYTOKEN, $$ATOKEN$$MYTOKEN, $$BLAR27
        // Minium of 4 char and surrounded by newline, space, comma (but NOT lower case)
        HasToken : boolean;
                        // Takes a note ID (no extension) or a FFN inc path and .note
                        // and fills out the passed StringList that must have been created)
                        // with a commonmark version of the note.
                        // returns an empty list on error.
        function GetMDcontent(ID : string; STL : TstringList) : boolean;
end;


implementation

uses LazFileUtils{$ifdef LCL}, lazlogger {$endif}, laz2_DOM, laz2_XMLRead, notenormal, tb_utils;


function TExportCommon.GetMDcontent(ID : string; STL : TStringList): boolean;
var
    Normaliser : TNoteNormaliser;
    Single : boolean = false;
begin
        if FileExists(ID) then begin          // eg, single note mode
                StL.LoadFromFile(ID);
                Single := True;
        end
        else
            if FileExists(NotesDir + ID + '.note') then      // eg in notes repo
                   StL.LoadFromFile(NotesDir + ID + '.note')
            else exit(False);
        //  OK, now first line contains the title but some lines may have tags wrong side of \n, so Normalise
        Normaliser := TNoteNormaliser.Create;
        Normaliser.NormaliseList(StL);
        Normaliser.Free;
        StL.Delete(0);
        if Single then
             STL.Insert(0, GetTitleFromFFN(ID, False))
        else
             STL.Insert(0, GetTitleFromFFN(NotesDir + ID + '.note', False));
        RemoveNoteMetaData(STL);
        if StL.Count = 0 then begin
            ErrorMsg := 'ERROR : invalid note content [' + GetTitleFromFFN(NotesDir + ID + '.note', False) + '] : ' + NotesDir + ID + '.note';
            debugln(ErrorMsg);
            exit(False);
        end;
        ProcessHeadings(StL);                                    // Makes Title big too !
        ProcessMarkUp(StL);
        result := (Stl.Count > 2);
end;

function TExportCommon.FindInStringList(const StL : TStringList; const FindMe : string) : integer;
var
    I : integer = 0;
begin
    while i < StL.Count -1 do begin
        if pos(FindMe, StL.strings[i]) > 0 then
            exit(i);
        inc(i);
	end;
	result := -1;
end;


{We have to deal with two sorts of mono, full line where we apply 4 spaces to left
and in-line where we use back ticks.

A four leading space line can have additional spaces and they are preserved. The
'four' is my choice, when converting back, I'll assume, if there are at least four
its mono, tag it up and remove first four spaces.  No other codes are allowed on that
line including backticks. A full line mono is a line that has text, has the mono html
tags at beginning and end of line. But other tags and whitespace are allowed between the start
and mono tag and between the </monospace> and end of line.

github will only display other font styles with mono if we use back tick and then
only if the other tags appear, initiall, before the backtick, thus **`bold mono`**
As the backtick looks very ugly in a block, I will use it only in-line and therefore
leading space mono will need to be stripped of any other enhancements.
}


function TExportCommon.CheckForTag(index : integer; const Tag, St : string) : integer;
begin
    if (St.Length < Index) or (St[Index] <> '<') then exit(-1);
    if copy(St, Index, Tag.Length) = Tag then exit(0);
    // OK, so it should be a tag but not the one we want.
    Result := 1;
    while (St.Length >= (Index + Result)) do begin
        if St[Index+Result] = '>' then exit(Result+1);
        inc(Result);
    end;
    result := -1;
end;

function TExportCommon.ConvertMono(var InSt : string) : boolean;
var
    St : string;
    RetValue, i : integer;
begin
    result := false;
    if pos('<list>', InSt) > 0 then begin          // Lists have priority over Mono
        InSt := InSt.Replace('<monospace>', '`', [rfReplaceAll]);
        InSt := inSt.Replace('</monospace>', '`', [rfReplaceAll]);
    end;
    if (pos('<monospace>', InSt) = 0) or (pos('</monospace>', inSt) = 0) then
        exit;
    St := InSt;
    i := 1;
    while St.Length >= i do
        if St[i] = ' ' then inc(i)
        else break;                                 // whitespace allowed and retained
    RetValue := CheckForTag(i, '<monospace>', St);  // start with first non-space we find
    while RetValue > 0 do begin
        St := St.Remove(i, RetValue);               // Remove any tags that appear before <m>
        RetValue := CheckForTag(i, '<monospace>', St);
    end;
    if RetValue <> 0 then begin
        InSt := InSt.Replace('<monospace>', '`', [rfReplaceAll]);
        InSt := inSt.Replace('</monospace>', '`', [rfReplaceAll]);
        exit;
    end;
    // OK, we now have a leading mono tag, i points to its start, add 11 for next char after tag
    i := St.IndexOf('<', i+11-1) +1;               // we know there is at least one there.
    RetValue :=  CheckForTag(i, '</monospace>', St);         // one based
    while RetValue > 0 do begin                     // a non-target tag, remove
        St := St.Remove(i-1, RetValue);             // zero based
        RetValue := St.IndexOf('<', i) +1;          // 0 based. Can we find another ?
        if RetValue > 0 then begin                  // i remains one based.
            i := RetValue;
            RetValue := CheckForTag(i, '</monospace>', St);
        end;
    end;
    // OK, here i should be pointing to </m>, add tag length and clear away any trailing tags

    // i := pos('</monospace>', St) + 12;              // must still be there.
    i := i + 12;                                    // length </m> tag
    RetValue := CheckForTag(i, '<xxxx>', St);       // only interested in pos or neg numbers
    while RetValue > 0 do begin
        St := St.Remove(i-1, RetValue);             // remove is zero based.
        RetValue :=  CheckForTag(i-1, '<xxxx>', St);       // Kek ?  why -1 ?????
    end;
    if St.Length < i then begin
        St := St.Replace('<monospace>', '', [rfReplaceAll]);
        St := St.Replace('</monospace>', '', [rfReplaceAll]);
        InSt := '    ' + St;                        // yes, we passed all the tests, change to leading space mono
        Result := true;                             // ToDo - should i remove any tags between <m> and </m> ?
    end else begin
        InSt := InSt.Replace('<monospace>', '`', [rfReplaceAll]);
        InSt := inSt.Replace('</monospace>', '`', [rfReplaceAll]);
    end;
    //writeln(InSt);
end;

// This version uses the CommonMark model of noting heading with ---- ===== on line underneath
procedure TExportCommon.ProcessHeadings(StL : TStringList);
var
    i : integer = 1;
    PosI, L : integer;
    AddedHeading : Boolean = false;
begin
    // The Title will be wrapped with underline tags, upsets import, get rid of them
    STL[0] := (STL[0]).Replace('<underline>', '');
    STL[0] := (STL[0]).Replace('</underline>', '');
    // We now have a clean title in first st, lets mark it up as really big.
    StL.Insert(1, '===========');
    repeat
        inc(i);
        if not AddedHeading then begin    // this adds a blank line between paras, MD style
            StL.Insert(i, '');
            inc(i);
		end;
        AddedHeading := False;
		if (StL.Strings[i] = '') or (StL.strings[i][1] <> '<') then continue;
		if copy(Stl.Strings[i], 1, length('<size:large>')) = '<size:large>' then begin
            PosI := pos('</size:large>', Stl.Strings[i]);
            if PosI = 0 then continue;
            L := length(Stl.Strings[i]);
            if PosI -1 + length('</size:large>') = L then begin
                StL.insert(i, copy(Stl.Strings[i], length('<size:large>')+1,
                        L - length('<size:large></size:large>')));
                StL.Delete(i+1);
                inc(i);
                StL.Insert(i, '--------');
                AddedHeading := True;
			end;
		end;
        if copy(Stl.Strings[i], 1, length('<size:huge>')) = '<size:huge>' then begin
            PosI := pos('</size:huge>', Stl.Strings[i]);
            if PosI = 0 then continue;
            L := length(Stl.Strings[i]);
            if PosI -1 + length('</size:huge>') = L then begin
                StL.insert(i, copy(Stl.Strings[i], length('<size:huge>')+1,
                        L - length('<size:huge></size:huge>')));
                StL.Delete(i+1);
                inc(i);
                StL.Insert(i, '========');
                AddedHeading := True;
			end;
		end;
	until I >= StL.Count-1;
end;

// This version does heading in the leading ### model
(* procedure TExportNote.ProcessHeadings(StL : TStringList);
var
    i : integer = -1;
    PosI, L : integer;
    //Blar : string;
begin
    repeat
        inc(i);
        if (StL.Strings[i] = '') or (StL.strings[i][1] <> '<') then continue;
        if copy(Stl.Strings[i], 1, length('<size:large><bold>')) = '<size:large><bold>' then begin
            //blar := Stl.Strings[i];
            PosI := pos('</bold></size:large>', Stl.Strings[i]);
            if PosI = 0 then continue;
            L := length(Stl.Strings[i]);
            if PosI -1 + length('</bold></size:large>') = L then begin
                StL.insert(i, '### ' + copy(Stl.Strings[i], length('<size:large><bold>')+1,
                        L - length('<size:large><bold></bold></size:large>')));
                StL.Delete(i+1);
			end;
		end;
        if copy(Stl.Strings[i], 1, length('<size:huge><bold>')) = '<size:huge><bold>' then begin
            //blar := Stl.Strings[i];
            PosI := pos('</bold></size:huge>', Stl.Strings[i]);
            if PosI = 0 then continue;
            L := length(Stl.Strings[i]);
            if PosI -1 + length('</bold></size:huge>') = L then begin
                StL.insert(i, '## ' + copy(Stl.Strings[i], length('<size:huge><bold>')+1,
                        L - length('<size:huge><bold></bold></size:huge>')));
                StL.Delete(i+1);
			end;
		end;
	until I >= StL.Count-1;
end;          *)

{ must convert upto level 6 bullets to md. We use 3 spaces, ahead of marker to
indicate each level. In the xml, each level is indicated by an additional
wrap of <list><list-item dir="ltr">CONTENT</list-item></list>. Must start with
the deepest bullet and work back up. }
function TExportCommon.ConvertBullets(Str : string) : string;
var
    Pre, Post, Spaces : string;
    i : integer = 5;
    j : integer;
begin
    Result := Str;
    while i >= 0 do begin
        Pre := '';
        Post := '';
        Spaces := '';
        for j := 0 to i do begin
            Pre := Pre + '<list><list-item dir="ltr">';
            Post := Post + '</list-item></list>';
        end;
        for j := 1 to (i*3) do
            Spaces := Spaces + ' ';
        Result := Result.Replace(Pre, Spaces + '* ');
        Result := Result.Replace(Post, '');
        dec(i);
    end
end;



procedure TExportCommon.ProcessMarkUp(StL : TStringList);
var
    StIndex : integer;
    TempSt: string;
    DeleteNext : boolean = false;       // no blank lines following Monospace
    Token : string;  // found token, not used in this method
    Where : integer; // index to a found token, not used in this method
begin
    StIndex := -1;
    while StIndex < StL.Count -1 do begin
        inc(StIndex);
        if DeleteNext and (Stl[StIndex] = '') then begin
            Stl.Delete(StIndex);
            DeleteNext := False;
            dec(StIndex);
            continue;
        end;
        if (length(StL.Strings[StIndex]) < 2) then continue;     // no room for a tag in there.
        TempSt := StL.Strings[StIndex];

        Where := 0;
        if not HasToken then         // we only want to know if there is one or more in the doc.
            if FindToken(TempSt, Where, Token) then begin
                HasToken := True;
                // debugln('Document has at least one token');
            end;

{        while FindToken(TempSt, Where, Token) do begin
            writeln('Line contains token [' + token + ']');
        end;  }

        DeleteNext := ConvertMono(TempSt);
        TempSt := TempSt.Replace('<bold>', '**', [rfReplaceAll]);
        TempSt := TempSt.Replace('</bold>', '**', [rfReplaceAll]);
        TempSt := TempSt.Replace('<italic>', '*', [rfReplaceAll]);
        TempSt := TempSt.Replace('</italic>', '*', [rfReplaceAll]);
//        TempSt := TempSt.Replace('<monospace>', '`', [rfReplaceAll]);
//        TempSt := TempSt.Replace('</monospace>', '`', [rfReplaceAll]);
        TempSt := TempSt.Replace('<size:small>', '<sub>', [rfReplaceAll]);
        TempSt := TempSt.Replace('</size:small>', '</sub>', [rfReplaceAll]);
        TempSt := TempSt.Replace('<strikeout>', '~~', [rfReplaceAll]);
        TempSt := TempSt.Replace('</strikeout>', '~~', [rfReplaceAll]);
        TempSt := TempSt.Replace('<size:large>', '', [rfReplaceAll]);
        TempSt := TempSt.Replace('</size:large>', '', [rfReplaceAll]);
        TempSt := TempSt.Replace('<size:huge>', '', [rfReplaceAll]);
        TempSt := TempSt.Replace('</size:huge>', '', [rfReplaceAll]);
        TempSt := ConvertBullets(TempSt);
//        TempSt := TempSt.Replace('<list><list-item dir="ltr">', '* ');
//        TempSt := TempSt.Replace('</list-item></list>', '');
        TempSt := RestoreBadXMLChar(TempSt);
        StL.Insert(StIndex, TempSt);
        StL.Delete(StIndex + 1);
	end;
end;


end.