File: lazunicodeunit.pas

package info (click to toggle)
lazarus 2.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 214,460 kB
  • sloc: pascal: 1,862,622; xml: 265,709; cpp: 56,595; sh: 3,008; java: 609; makefile: 535; perl: 297; sql: 222; ansic: 137
file content (130 lines) | stat: -rw-r--r-- 3,839 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
unit LazUnicodeUnit;

{$IFDEF FPC}
 {$mode objfpc}{$H+}
{$ENDIF}

// For testing the UTF16 version.
{$IF DEFINED(FPC) and DEFINED(UseUTF16)}
{$ModeSwitch UnicodeStrings}   // Sets also FPC_UNICODESTRINGS.
{$ENDIF}

{$IF DEFINED(FPC_UNICODESTRINGS) or not DEFINED(FPC)}
 {$DEFINE ReallyUseUTF16}       // FPC with UTF-16 or Delphi
{$ENDIF}

interface

uses
  Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls,
  LazUnicode;

type

  { TForm1 }

  TForm1 = class(TForm)
    Button1: TButton;
    Memo1: TMemo;
    procedure Button1Click(Sender: TObject);
  private

  public

  end;

var
  Form1: TForm1;

const
  Eyes = '👀';
  Thai = 'ฃ';   // No idea what it means.
  WineGlass = '🍷';
  Heart = '💓';  // or '♡';
  // Accents in combining codepoints. Last one has 2 consecutive combining marks.
  Combining = 'ÓÓỐỐỚỚÒÒỒỒỎỎỔỔỞỞỌỌBあC'#$CC#$81#$CC#$B2;
  //ArEnStr1 = 'مAرBحCبDاE';

implementation

{$R *.dfm}

{ TForm1 }

procedure TForm1.Button1Click(Sender: TObject);
var
  s_UTF8: {$IFDEF FPC}AnsiString{$ELSE}String{$ENDIF};
  s, ch: String;
  //CodeUnit: Char;
  i: NativeInt;
  cpIter: TCodePointEnumerator;
  ucIter: TUnicodeCharacterEnumerator;
begin
  Memo1.Lines.Clear;
  s_UTF8 := 'Pöö ' + Eyes + Thai + '. Have ' + WineGlass + ' for FPC' + Heart + 'Lazarus';
  s := s_UTF8;                              // Converts encoding when needed.
  {$IFDEF ReallyUseUTF16}
  Memo1.Lines.Add('Using UnicodeString + UTF-16');
  {$ELSE}
  Memo1.Lines.Add('Using AnsiString + UTF-8');
  {$ENDIF}
  Memo1.Lines.Add(Format('Sizeof(Char) = %d.', [Sizeof(Char)]));
  Memo1.Lines.Add('Testing with string: ' + s);

  Memo1.Lines.Add('');
  ch := CodePointCopy(s, 14, 1);       // Should return the wine glass.
  Memo1.Lines.Add('Testing CodePointCopy. SubString = "' + ch + '"');

  i := CodePointLength(s);             // Should return 30.
  Memo1.Lines.Add(Format('Testing CodePointLength. Result = %d.', [i]));

  // Constant must be assigned to AnsiString when using the UTF-8 system.
  s_UTF8 := WineGlass;
  i := CodePointPos(s_UTF8, s);        // Should return 14.
  Memo1.Lines.Add(Format('Testing CodePointPos. Result = %d.', [i]));
  s_UTF8 := '☐';
  i := CodePointPos(s_UTF8, s);        // Should return 0.
  Memo1.Lines.Add(Format('Testing CodePointPos for non-existent char. Result = %d.', [i]));

  // Use CodePoint enumerator explicitly
  Memo1.Lines.Add('');
  Memo1.Lines.Add('*** Using CodePoint enumerator explicitly: ***');
  cpIter := TCodePointEnumerator.Create(s);
  while cpIter.MoveNext do
    Memo1.Lines.Add(Format('ch=%s has %d codeunits.',
                           [cpIter.Current, cpIter.CurrentCodeUnitCount]));
  cpIter.Free;

  s_UTF8 := Combining;
  s := s_UTF8;                              // Converts encoding when needed.
  Memo1.Lines.Add('');
  Memo1.Lines.Add('Testing with string: ' + s);

  // Use UnicodeCharacter enumerator explicitly
  Memo1.Lines.Add('');
  Memo1.Lines.Add('*** Using UnicodeCharacter enumerator explicitly: ***');
  ucIter := TUnicodeCharacterEnumerator.Create(s);
  while ucIter.MoveNext do
    Memo1.Lines.Add(Format('ch=%s has %d codepoints and %d codeunits.',
           [ucIter.Current, ucIter.CurrentCodePointCount, ucIter.CurrentCodeUnitCount]));
  ucIter.Free;

  {$IFDEF FPC}
  // Use for-in loop for Unicode Characters.
  Memo1.Lines.Add('');
  Memo1.Lines.Add('*** Using for-in loop for Unicode Characters : ***');
  for ch in s do
    Memo1.Lines.Add('ch=' + ch);
  {$ENDIF}

  // for-in loop for codeunits using a Char variable still works.
  {    Uncomment to test.
  Memo1.Lines.Add('');
  Memo1.Lines.Add('*** Using for-in loop for codeunits: ***');
  for CodeUnit in s do
    Memo1.Lines.Add('CodeUnit=',CodeUnit);        // The output makes no sense obviously.
  }
end;

end.