1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
unit LazUnicodeUnit;
{$IFDEF FPC}
{$mode objfpc}{$H+}
{$ENDIF}
// For testing the UTF16 version.
{$IF DEFINED(FPC) and DEFINED(UseUTF16)}
{$ModeSwitch UnicodeStrings} // Sets also FPC_UNICODESTRINGS.
{$ENDIF}
{$IF DEFINED(FPC_UNICODESTRINGS) or not DEFINED(FPC)}
{$DEFINE ReallyUseUTF16} // FPC with UTF-16 or Delphi
{$ENDIF}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls,
LazUnicode;
type
{ TForm1 }
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
public
end;
var
Form1: TForm1;
const
Eyes = '👀';
Thai = 'ฃ'; // No idea what it means.
WineGlass = '🍷';
Heart = '💓'; // or '♡';
// Accents in combining codepoints. Last one has 2 consecutive combining marks.
Combining = 'ÓÓỐỐỚỚÒÒỒỒỎỎỔỔỞỞỌỌBあC'#$CC#$81#$CC#$B2;
//ArEnStr1 = 'مAرBحCبDاE';
implementation
{$R *.dfm}
{ TForm1 }
procedure TForm1.Button1Click(Sender: TObject);
var
s_UTF8: {$IFDEF FPC}AnsiString{$ELSE}String{$ENDIF};
s, ch: String;
//CodeUnit: Char;
i: NativeInt;
cpIter: TCodePointEnumerator;
ucIter: TUnicodeCharacterEnumerator;
begin
Memo1.Lines.Clear;
s_UTF8 := 'Pöö ' + Eyes + Thai + '. Have ' + WineGlass + ' for FPC' + Heart + 'Lazarus';
s := s_UTF8; // Converts encoding when needed.
{$IFDEF ReallyUseUTF16}
Memo1.Lines.Add('Using UnicodeString + UTF-16');
{$ELSE}
Memo1.Lines.Add('Using AnsiString + UTF-8');
{$ENDIF}
Memo1.Lines.Add(Format('Sizeof(Char) = %d.', [Sizeof(Char)]));
Memo1.Lines.Add('Testing with string: ' + s);
Memo1.Lines.Add('');
ch := CodePointCopy(s, 14, 1); // Should return the wine glass.
Memo1.Lines.Add('Testing CodePointCopy. SubString = "' + ch + '"');
i := CodePointLength(s); // Should return 30.
Memo1.Lines.Add(Format('Testing CodePointLength. Result = %d.', [i]));
// Constant must be assigned to AnsiString when using the UTF-8 system.
s_UTF8 := WineGlass;
i := CodePointPos(s_UTF8, s); // Should return 14.
Memo1.Lines.Add(Format('Testing CodePointPos. Result = %d.', [i]));
s_UTF8 := '☐';
i := CodePointPos(s_UTF8, s); // Should return 0.
Memo1.Lines.Add(Format('Testing CodePointPos for non-existent char. Result = %d.', [i]));
// Use CodePoint enumerator explicitly
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using CodePoint enumerator explicitly: ***');
cpIter := TCodePointEnumerator.Create(s);
while cpIter.MoveNext do
Memo1.Lines.Add(Format('ch=%s has %d codeunits.',
[cpIter.Current, cpIter.CurrentCodeUnitCount]));
cpIter.Free;
s_UTF8 := Combining;
s := s_UTF8; // Converts encoding when needed.
Memo1.Lines.Add('');
Memo1.Lines.Add('Testing with string: ' + s);
// Use UnicodeCharacter enumerator explicitly
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using UnicodeCharacter enumerator explicitly: ***');
ucIter := TUnicodeCharacterEnumerator.Create(s);
while ucIter.MoveNext do
Memo1.Lines.Add(Format('ch=%s has %d codepoints and %d codeunits.',
[ucIter.Current, ucIter.CurrentCodePointCount, ucIter.CurrentCodeUnitCount]));
ucIter.Free;
{$IFDEF FPC}
// Use for-in loop for Unicode Characters.
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using for-in loop for Unicode Characters : ***');
for ch in s do
Memo1.Lines.Add('ch=' + ch);
{$ENDIF}
// for-in loop for codeunits using a Char variable still works.
{ Uncomment to test.
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using for-in loop for codeunits: ***');
for CodeUnit in s do
Memo1.Lines.Add('CodeUnit=',CodeUnit); // The output makes no sense obviously.
}
end;
end.
|