File: lazeditmiscprocs.pas

package info (click to toggle)
lazarus 4.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 275,760 kB
  • sloc: pascal: 2,341,904; xml: 509,420; makefile: 348,726; cpp: 93,608; sh: 3,387; java: 609; perl: 297; sql: 222; ansic: 137
file content (90 lines) | stat: -rw-r--r-- 3,111 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{
 *****************************************************************************
  This file is part of the LazEdit package from the Lazarus IDE.

  This content of this file is licensensed: Modified LGPL-2
  Or at the users choice: Modified LGPL-3
  See the file COPYING.modifiedLGPL.txt, included in the Lazarus distribution,
  for details about the license.

  Alternatively, the contents of this file may be used under the terms of the
  Mozilla Public License Version 1.1 http://www.mozilla.org/MPL/

  A copy used under either License can have the other Licenses removed from this
  header. A note should be added that the original file is available with the
  above choice of License.
 *****************************************************************************

  Written by Martin Friebe
}
unit LazEditMiscProcs;

{$mode objfpc}{$H+}

interface

uses
  Classes, SysUtils;

function IsCombiningCodePoint(const AChar: PChar): Boolean;

function CountChars(AText: PChar; AByteLen: integer): integer;
function CountBytes(AText: PChar; ACharLen: Integer; AMaxBytes: integer = high(Integer)): integer;

implementation

function IsCombiningCodePoint(const AChar: PChar): Boolean;
begin
  Result := (
   ( (AChar[0] = #$CC) ) or                                                       // Combining Diacritical Marks (belongs to previos char) 0300-036F
   ( (AChar[0] = #$CD) and (AChar[1] in [#$80..#$AF]) ) or                        // Combining Diacritical Marks
   ( (AChar[0] = #$D8) and (AChar[1] in [#$90..#$9A]) ) or                        // Arabic 0610 (d890)..061A (d89a)
   ( (AChar[0] = #$D9) and (AChar[1] in [#$8b..#$9f, #$B0]) ) or                  // Arabic 064B (d98b)..065F (d99f) // 0670 (d9b0)
   ( (AChar[0] = #$DB) and (AChar[1] in [#$96..#$9C, #$9F..#$A4, #$A7..#$A8, #$AA..#$AD]) ) or // Arabic 06D6 (db96)..  .. ..06EA (dbaa)
   ( (AChar[0] = #$E0) and (AChar[1] = #$A3) and (AChar[2] in [#$A4..#$BE]) ) or  // Arabic 08E4 (e0a3a4) ..08FE (e0a3be)
   ( (AChar[0] = #$E1) and (AChar[1] = #$B7) ) or                                 // Combining Diacritical Marks Supplement 1DC0-1DFF
   ( (AChar[0] = #$E2) and (AChar[1] = #$83) and (AChar[2] in [#$90..#$FF]) ) or  // Combining Diacritical Marks for Symbols 20D0-20FF
   ( (AChar[0] = #$EF) and (AChar[1] = #$B8) and (AChar[2] in [#$A0..#$AF]) )     // Combining half Marks FE20-FE2F
  );
end;

function CountChars(AText: PChar; AByteLen: integer): integer;
var
  b: Byte;
begin
  Result := 0;
  while AByteLen > 0 do begin
    b := Byte(AText^);
    if (b < 128) or
       ((b >= 192) and not IsCombiningCodePoint(AText))
    then
      inc(Result);
    inc(AText);
    dec(AByteLen);
  end;
end;

function CountBytes(AText: PChar; ACharLen: Integer; AMaxBytes: integer): integer;
var
  b: Byte;
begin
  Result := 0;
  while AMaxBytes > 0 do begin
    b := Byte(AText^);
    if b = 0 then
      exit;
    if (b < 128) or
       ((b >= 192) and not IsCombiningCodePoint(AText))
    then begin
      if ACharLen = 0 then
        exit;
      dec(ACharLen);
    end;
    inc(AText);
    inc(Result);
    dec(AMaxBytes);
  end;
end;

end.