1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
|
unit commonmark;
{$mode objfpc}{$H+}
{ Copyright (C) 2017-2024 David Bannon
License:
This code is licensed under MIT License, see the file License.txt
or https://spdx.org/licenses/MIT.html SPDX short identifier: MIT
Exports a note in a subset of commonmark
Create the object, optionally give it a directory to look in (and set DoPOFile ?). Call GetMDcontent()
with an ID (that is, a filename without extension) and a created list to fill in with content.
Has some limitations that relate, to some extent, to the MarkDown/CommonMark.
1) Monospace may be presented as either "leading spaces" or wrapped in BackTicks.
The Leading Space model is used where the whole line (ie para) is mono, we add
four spaces and strip out any other markup in the line. Looks better for blocks.
Backticks are used where the mono is in line. It can have extra markup but
has to have backticks closest to text, does not work here ...
So, summary, blocks of Mono cannot show any other markup.
2) Large or Huge font (in tomboy) is only honoured if on a line by itself, become
heading lines. Any in line Large or Huge is discarded. Small is preserved
but cannot be displayed in github flavour of MD.
3) Only bullet cha allowed in MD list here is * (officially others allowed but not here)
4) When bullets (lists) mix, lists get priority, the mono drops back to inline mono.
5) Cannot do highlight (but I try to preserve it during a Tb->GH->TB cycle).
HISTORY
2020-12-22 Extracted from the NextCloud Notes Branch
2020-??-?? Moved the Normalising code into a stand alone unit.
2021/06/15 Format lines that are all mono differenly so they show as a block.
2021/06/29 Merged this file back to tomboy-ng
2021/07/22 Make GetMDcontent more tolerent of passed ID/FFN
2021/07/30 Now use someutuls fro tb_util instead of implementing itself. Must sync to TB-NG
2021/07/30 Use the RemoveNoteMetaData( from TT_Utils, need merge TT_utils with TB_Utils
2021/08/19 Rewrite ProcessMarkup to use ST.Replace() approach
2021/09/28 Enabled multilevel bullets
2021/10/17 Rewrite most of monospace code.
2022/10/17 Remove underline tags around Title when exporting, confuses Importer.
}
interface
uses
Classes, SysUtils;
type
{ TExportCommon }
TExportCommon = class
private
{returns -1 if there is not a tag starting at index (1 based), else ret length
of tag if there is a tag but its not the one we want. Ret 0 if right tag found.}
function CheckForTag(index: integer; const Tag, St: string): integer;
function ConvertBullets(Str: string): string;
function ConvertMono(var InSt: string): boolean;
function FindInStringList(const StL: TStringList; const FindMe: string): integer;
// Make content suitable to write out as a PO file, no merging is going to happen !
procedure ProcessHeadings(StL: TStringList);
procedure ProcessMarkUp(StL: TStringList);
public
DebugMode : boolean;
ErrorMsg : string;
NotesDir : string; // dir were we expect to find our TB notes
// A token is $$STRING where STRING is one of more latin uppercase letters, numbers
// or tokens. eg $$MYTOKEN, $$ATOKEN$$MYTOKEN, $$BLAR27
// Minium of 4 char and surrounded by newline, space, comma (but NOT lower case)
HasToken : boolean;
// Takes a note ID (no extension) or a FFN inc path and .note
// and fills out the passed StringList that must have been created)
// with a commonmark version of the note.
// returns an empty list on error.
function GetMDcontent(ID : string; STL : TstringList) : boolean;
end;
implementation
uses LazFileUtils{$ifdef LCL}, lazlogger {$endif}, laz2_DOM, laz2_XMLRead, notenormal, tb_utils;
function TExportCommon.GetMDcontent(ID : string; STL : TStringList): boolean;
var
Normaliser : TNoteNormaliser;
Single : boolean = false;
begin
if FileExists(ID) then begin // eg, single note mode
StL.LoadFromFile(ID);
Single := True;
end
else
if FileExists(NotesDir + ID + '.note') then // eg in notes repo
StL.LoadFromFile(NotesDir + ID + '.note')
else exit(False);
// OK, now first line contains the title but some lines may have tags wrong side of \n, so Normalise
Normaliser := TNoteNormaliser.Create;
Normaliser.NormaliseList(StL);
Normaliser.Free;
StL.Delete(0);
if Single then
STL.Insert(0, GetTitleFromFFN(ID, False))
else
STL.Insert(0, GetTitleFromFFN(NotesDir + ID + '.note', False));
RemoveNoteMetaData(STL);
if StL.Count = 0 then begin
ErrorMsg := 'ERROR : invalid note content [' + GetTitleFromFFN(NotesDir + ID + '.note', False) + '] : ' + NotesDir + ID + '.note';
debugln(ErrorMsg);
exit(False);
end;
ProcessHeadings(StL); // Makes Title big too !
ProcessMarkUp(StL);
result := (Stl.Count > 2);
end;
function TExportCommon.FindInStringList(const StL : TStringList; const FindMe : string) : integer;
var
I : integer = 0;
begin
while i < StL.Count -1 do begin
if pos(FindMe, StL.strings[i]) > 0 then
exit(i);
inc(i);
end;
result := -1;
end;
{We have to deal with two sorts of mono, full line where we apply 4 spaces to left
and in-line where we use back ticks.
A four leading space line can have additional spaces and they are preserved. The
'four' is my choice, when converting back, I'll assume, if there are at least four
its mono, tag it up and remove first four spaces. No other codes are allowed on that
line including backticks. A full line mono is a line that has text, has the mono html
tags at beginning and end of line. But other tags and whitespace are allowed between the start
and mono tag and between the </monospace> and end of line.
github will only display other font styles with mono if we use back tick and then
only if the other tags appear, initiall, before the backtick, thus **`bold mono`**
As the backtick looks very ugly in a block, I will use it only in-line and therefore
leading space mono will need to be stripped of any other enhancements.
}
function TExportCommon.CheckForTag(index : integer; const Tag, St : string) : integer;
begin
if (St.Length < Index) or (St[Index] <> '<') then exit(-1);
if copy(St, Index, Tag.Length) = Tag then exit(0);
// OK, so it should be a tag but not the one we want.
Result := 1;
while (St.Length >= (Index + Result)) do begin
if St[Index+Result] = '>' then exit(Result+1);
inc(Result);
end;
result := -1;
end;
function TExportCommon.ConvertMono(var InSt : string) : boolean;
var
St : string;
RetValue, i : integer;
begin
result := false;
if pos('<list>', InSt) > 0 then begin // Lists have priority over Mono
InSt := InSt.Replace('<monospace>', '`', [rfReplaceAll]);
InSt := inSt.Replace('</monospace>', '`', [rfReplaceAll]);
end;
if (pos('<monospace>', InSt) = 0) or (pos('</monospace>', inSt) = 0) then
exit;
St := InSt;
i := 1;
while St.Length >= i do
if St[i] = ' ' then inc(i)
else break; // whitespace allowed and retained
RetValue := CheckForTag(i, '<monospace>', St); // start with first non-space we find
while RetValue > 0 do begin
St := St.Remove(i, RetValue); // Remove any tags that appear before <m>
RetValue := CheckForTag(i, '<monospace>', St);
end;
if RetValue <> 0 then begin
InSt := InSt.Replace('<monospace>', '`', [rfReplaceAll]);
InSt := inSt.Replace('</monospace>', '`', [rfReplaceAll]);
exit;
end;
// OK, we now have a leading mono tag, i points to its start, add 11 for next char after tag
i := St.IndexOf('<', i+11-1) +1; // we know there is at least one there.
RetValue := CheckForTag(i, '</monospace>', St); // one based
while RetValue > 0 do begin // a non-target tag, remove
St := St.Remove(i-1, RetValue); // zero based
RetValue := St.IndexOf('<', i) +1; // 0 based. Can we find another ?
if RetValue > 0 then begin // i remains one based.
i := RetValue;
RetValue := CheckForTag(i, '</monospace>', St);
end;
end;
// OK, here i should be pointing to </m>, add tag length and clear away any trailing tags
// i := pos('</monospace>', St) + 12; // must still be there.
i := i + 12; // length </m> tag
RetValue := CheckForTag(i, '<xxxx>', St); // only interested in pos or neg numbers
while RetValue > 0 do begin
St := St.Remove(i-1, RetValue); // remove is zero based.
RetValue := CheckForTag(i-1, '<xxxx>', St); // Kek ? why -1 ?????
end;
if St.Length < i then begin
St := St.Replace('<monospace>', '', [rfReplaceAll]);
St := St.Replace('</monospace>', '', [rfReplaceAll]);
InSt := ' ' + St; // yes, we passed all the tests, change to leading space mono
Result := true; // ToDo - should i remove any tags between <m> and </m> ?
end else begin
InSt := InSt.Replace('<monospace>', '`', [rfReplaceAll]);
InSt := inSt.Replace('</monospace>', '`', [rfReplaceAll]);
end;
//writeln(InSt);
end;
// This version uses the CommonMark model of noting heading with ---- ===== on line underneath
procedure TExportCommon.ProcessHeadings(StL : TStringList);
var
i : integer = 1;
PosI, L : integer;
AddedHeading : Boolean = false;
begin
// The Title will be wrapped with underline tags, upsets import, get rid of them
STL[0] := (STL[0]).Replace('<underline>', '');
STL[0] := (STL[0]).Replace('</underline>', '');
// We now have a clean title in first st, lets mark it up as really big.
StL.Insert(1, '===========');
repeat
inc(i);
if not AddedHeading then begin // this adds a blank line between paras, MD style
StL.Insert(i, '');
inc(i);
end;
AddedHeading := False;
if (StL.Strings[i] = '') or (StL.strings[i][1] <> '<') then continue;
if copy(Stl.Strings[i], 1, length('<size:large>')) = '<size:large>' then begin
PosI := pos('</size:large>', Stl.Strings[i]);
if PosI = 0 then continue;
L := length(Stl.Strings[i]);
if PosI -1 + length('</size:large>') = L then begin
StL.insert(i, copy(Stl.Strings[i], length('<size:large>')+1,
L - length('<size:large></size:large>')));
StL.Delete(i+1);
inc(i);
StL.Insert(i, '--------');
AddedHeading := True;
end;
end;
if copy(Stl.Strings[i], 1, length('<size:huge>')) = '<size:huge>' then begin
PosI := pos('</size:huge>', Stl.Strings[i]);
if PosI = 0 then continue;
L := length(Stl.Strings[i]);
if PosI -1 + length('</size:huge>') = L then begin
StL.insert(i, copy(Stl.Strings[i], length('<size:huge>')+1,
L - length('<size:huge></size:huge>')));
StL.Delete(i+1);
inc(i);
StL.Insert(i, '========');
AddedHeading := True;
end;
end;
until I >= StL.Count-1;
end;
// This version does heading in the leading ### model
(* procedure TExportNote.ProcessHeadings(StL : TStringList);
var
i : integer = -1;
PosI, L : integer;
//Blar : string;
begin
repeat
inc(i);
if (StL.Strings[i] = '') or (StL.strings[i][1] <> '<') then continue;
if copy(Stl.Strings[i], 1, length('<size:large><bold>')) = '<size:large><bold>' then begin
//blar := Stl.Strings[i];
PosI := pos('</bold></size:large>', Stl.Strings[i]);
if PosI = 0 then continue;
L := length(Stl.Strings[i]);
if PosI -1 + length('</bold></size:large>') = L then begin
StL.insert(i, '### ' + copy(Stl.Strings[i], length('<size:large><bold>')+1,
L - length('<size:large><bold></bold></size:large>')));
StL.Delete(i+1);
end;
end;
if copy(Stl.Strings[i], 1, length('<size:huge><bold>')) = '<size:huge><bold>' then begin
//blar := Stl.Strings[i];
PosI := pos('</bold></size:huge>', Stl.Strings[i]);
if PosI = 0 then continue;
L := length(Stl.Strings[i]);
if PosI -1 + length('</bold></size:huge>') = L then begin
StL.insert(i, '## ' + copy(Stl.Strings[i], length('<size:huge><bold>')+1,
L - length('<size:huge><bold></bold></size:huge>')));
StL.Delete(i+1);
end;
end;
until I >= StL.Count-1;
end; *)
{ must convert upto level 6 bullets to md. We use 3 spaces, ahead of marker to
indicate each level. In the xml, each level is indicated by an additional
wrap of <list><list-item dir="ltr">CONTENT</list-item></list>. Must start with
the deepest bullet and work back up. }
function TExportCommon.ConvertBullets(Str : string) : string;
var
Pre, Post, Spaces : string;
i : integer = 5;
j : integer;
begin
Result := Str;
while i >= 0 do begin
Pre := '';
Post := '';
Spaces := '';
for j := 0 to i do begin
Pre := Pre + '<list><list-item dir="ltr">';
Post := Post + '</list-item></list>';
end;
for j := 1 to (i*3) do
Spaces := Spaces + ' ';
Result := Result.Replace(Pre, Spaces + '* ');
Result := Result.Replace(Post, '');
dec(i);
end
end;
procedure TExportCommon.ProcessMarkUp(StL : TStringList);
var
StIndex : integer;
TempSt: string;
DeleteNext : boolean = false; // no blank lines following Monospace
Token : string; // found token, not used in this method
Where : integer; // index to a found token, not used in this method
begin
StIndex := -1;
while StIndex < StL.Count -1 do begin
inc(StIndex);
if DeleteNext and (Stl[StIndex] = '') then begin
Stl.Delete(StIndex);
DeleteNext := False;
dec(StIndex);
continue;
end;
if (length(StL.Strings[StIndex]) < 2) then continue; // no room for a tag in there.
TempSt := StL.Strings[StIndex];
Where := 0;
if not HasToken then // we only want to know if there is one or more in the doc.
if FindToken(TempSt, Where, Token) then begin
HasToken := True;
// debugln('Document has at least one token');
end;
{ while FindToken(TempSt, Where, Token) do begin
writeln('Line contains token [' + token + ']');
end; }
DeleteNext := ConvertMono(TempSt);
TempSt := TempSt.Replace('<bold>', '**', [rfReplaceAll]);
TempSt := TempSt.Replace('</bold>', '**', [rfReplaceAll]);
TempSt := TempSt.Replace('<italic>', '*', [rfReplaceAll]);
TempSt := TempSt.Replace('</italic>', '*', [rfReplaceAll]);
// TempSt := TempSt.Replace('<monospace>', '`', [rfReplaceAll]);
// TempSt := TempSt.Replace('</monospace>', '`', [rfReplaceAll]);
TempSt := TempSt.Replace('<size:small>', '<sub>', [rfReplaceAll]);
TempSt := TempSt.Replace('</size:small>', '</sub>', [rfReplaceAll]);
TempSt := TempSt.Replace('<strikeout>', '~~', [rfReplaceAll]);
TempSt := TempSt.Replace('</strikeout>', '~~', [rfReplaceAll]);
TempSt := TempSt.Replace('<size:large>', '', [rfReplaceAll]);
TempSt := TempSt.Replace('</size:large>', '', [rfReplaceAll]);
TempSt := TempSt.Replace('<size:huge>', '', [rfReplaceAll]);
TempSt := TempSt.Replace('</size:huge>', '', [rfReplaceAll]);
TempSt := ConvertBullets(TempSt);
// TempSt := TempSt.Replace('<list><list-item dir="ltr">', '* ');
// TempSt := TempSt.Replace('</list-item></list>', '');
TempSt := RestoreBadXMLChar(TempSt);
StL.Insert(StIndex, TempSt);
StL.Delete(StIndex + 1);
end;
end;
end.
|