File: LazUnicodeConsole.lpr

package info (click to toggle)
lazarus 2.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 214,460 kB
  • sloc: pascal: 1,862,622; xml: 265,709; cpp: 56,595; sh: 3,008; java: 609; makefile: 535; perl: 297; sql: 222; ansic: 137
file content (107 lines) | stat: -rw-r--r-- 3,516 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{
  This is a test program for LazUnicode unit.
  Works with UTF-8 and UTF-16 encodings by switching UseUTF16 define.
  Works also in Delphi where String=UnicodeString. Just rename this file as *.dpr.
   For Delphi you must copy units LazUnicode and LazUTF16, both part of LazUtils package.
}
program LazUnicodeConsole;

{$IFDEF FPC}
 {$mode objfpc}{$H+}
{$ENDIF}

{$APPTYPE CONSOLE}       // Mostly for Delphi

// For testing the UTF16 version.
{$IF DEFINED(FPC) and DEFINED(UseUTF16)}
{$ModeSwitch UnicodeStrings}   // Sets also FPC_UNICODESTRINGS.
{$ENDIF}

{$IF DEFINED(FPC_UNICODESTRINGS) or not DEFINED(FPC)}
 {$DEFINE ReallyUseUTF16}       // FPC with UTF-16 or Delphi
{$ENDIF}

uses
  LazUnicode;

const
  Eyes = '👀';
  Thai = 'ฃ';   // No idea what it means.
  WineGlass = '🍷';
  Heart = '💓';  // or '♡';
  // Accents in combining codepoints. Last one has 2 consecutive combining marks.
  Combining = 'ÓÓỐỐỚỚÒÒỒỒỎỎỔỔỞỞỌỌBあC'#$CC#$81#$CC#$B2;
  //ArEnStr1 = 'مAرBحCبDاE';

var
  s_UTF8: {$IFDEF FPC}AnsiString{$ELSE}String{$ENDIF};
  s, ch: String;
  //CodeUnit: Char;
  i: NativeInt;
  cpIter: TCodePointEnumerator;
  ucIter: TUnicodeCharacterEnumerator;
begin
  s_UTF8 := 'Pöö ' + Eyes + Thai + '. Have ' + WineGlass + ' for FPC' + Heart + 'Lazarus';
  s := s_UTF8;                              // Converts encoding when needed.
  {$IFDEF ReallyUseUTF16}
  writeln('Using UnicodeString + UTF-16');
  {$ELSE}
  writeln('Using AnsiString + UTF-8');
  {$ENDIF}
  writeln('Sizeof(Char) = ', Sizeof(Char));
  writeln('Testing with string: ', s);

  writeln('');
  ch := CodePointCopy(s, 14, 1);       // Should return the wine glass.
  writeln('Testing CodePointCopy. SubString = "', ch + '"');

  i := CodePointLength(s);             // Should return 30.
  writeln('Testing CodePointLength. Result = ', i);

  // Constant must be assigned to AnsiString when using the UTF-8 system.
  s_UTF8 := WineGlass;
  i := CodePointPos(s_UTF8, s);        // Should return 14.
  writeln('Testing CodePointPos. Result = ', i);
  s_UTF8 := '☐';
  i := CodePointPos(s_UTF8, s);        // Should return 0.
  writeln('Testing CodePointPos for non-existent char. Result = ', i);

  // Use CodePoint enumerator explicitly
  writeln('');
  writeln('*** Using CodePoint enumerator explicitly: ***');
  cpIter := TCodePointEnumerator.Create(s);
  while cpIter.MoveNext do
    writeln('ch=', cpIter.Current, '  has ', cpIter.CurrentCodeUnitCount, ' codeunits.');
  cpIter.Free;

  s_UTF8 := Combining;
  s := s_UTF8;                              // Converts encoding when needed.
  writeln('');
  writeln('Testing with string: ', s);

  // Use UnicodeCharacter enumerator explicitly
  writeln('');
  writeln('*** Using UnicodeCharacter enumerator explicitly: ***');
  ucIter := TUnicodeCharacterEnumerator.Create(s);
  while ucIter.MoveNext do
    writeln('ch=', ucIter.Current, '  has ', ucIter.CurrentCodePointCount, ' codepoints and ', ucIter.CurrentCodeUnitCount, ' codeunits.');
  ucIter.Free;

  {$IFDEF FPC}
  // Use for-in loop for Unicode Characters.
  writeln('');
  writeln('*** Using for-in loop for Unicode Characters : ***');
  for ch in s do
    writeln('ch=',ch);
  {$ENDIF}

  // for-in loop for codeunits using a Char variable still works.
  {    Uncomment to test.
  writeln('');
  writeln('*** Using for-in loop for codeunits: ***');
  for CodeUnit in s do
    writeln('CodeUnit=',CodeUnit);        // The output makes no sense obviously.
  }

end.