File: unicode-ces-basic_8bit.ads

package info (click to toggle)
libxmlada 18-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 7,324 kB
  • sloc: ada: 32,766; makefile: 480; xml: 111; sh: 43; python: 35
file content (123 lines) | stat: -rw-r--r-- 5,074 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
------------------------------------------------------------------------------
--                     XML/Ada - An XML suite for Ada95                     --
--                                                                          --
--                     Copyright (C) 2001-2017, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

--  This package implements a basic 8bit encoding.
--  Only code points from 16#00# to 16#FF# can be encoded in such strings.
--  These are the standard Ada Strings.
--
--  However, then can be used to read files that contain accented characters,
--  in combination with Unicode.CCS.Iso_8859_1 for instance

with Unicode.CES.Utf32;
with Unicode.CCS;
with Unchecked_Deallocation;

package Unicode.CES.Basic_8bit is

   -----------
   -- Types --
   -----------

   subtype Basic_8bit_String is String;
   type Basic_8bit_String_Access is access Basic_8bit_String;
   --  A heigh bit string, undefined byte-order

   -------------------------------------------
   -- Conversion to and from byte sequences --
   -------------------------------------------

   procedure Encode
     (Char   : Unicode_Char;
      Output : in out Byte_Sequence;
      Index  : in out Natural);
   --  Return the byte sequence representing Char in the 8bit character
   --  encoding form
   --  Invalid_Encoding is raised if Char can not be converted.

   procedure Read
     (Str   : Basic_8bit_String;
      Index : in out Positive;
      Char  : out Unicode_Char);
   --  Return the character starting at location Index in Str

   function Width (Char : Unicode_Char) return Natural;
   --  Return the number of bytes occupied by the 8bit representation of Char

   function Length (Str : Basic_8bit_String) return Natural;
   --  Return the number of characters in Str

   ------------------------------------------
   -- Conversion to and from 8bit-encoding --
   ------------------------------------------

   function From_Utf32
     (Str : Unicode.CES.Utf32.Utf32_LE_String)
      return Basic_8bit_String;
   --  Return a new string, from a utf32-encoded string.

   function To_Utf32
     (Str : Basic_8bit_String)
      return Unicode.CES.Utf32.Utf32_LE_String;
   --  Return a new utf32-encoded string, from a standard Ada string.

   ---------------------------------------------
   -- Byte order and character set conversion --
   ---------------------------------------------

   function To_Unicode_LE
     (Str   : Basic_8bit_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Basic_8bit_String;
   --  Convert Str to a Unicode string, assuming it contains code points from
   --  the character set CS.
   --  Byte-order is irrelevant for 8bit strings, but is kept for interface
   --  compatibility with other similar functions

   function To_CS
     (Str   : Basic_8bit_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Basic_8bit_String;
   --  Convert Str to the character set Cs, assuming it contains Unicode
   --  characters.

   ---------------------
   -- Encoding Scheme --
   ---------------------

   Basic_8bit_Encoding : constant Encoding_Scheme :=
     (BOM    => Unknown,
      Read   => Read'Access,
      Width  => Width'Access,
      Encode => Encode_Function'(Encode'Access),
      Length => Length'Access);

   ------------------
   -- Deallocation --
   ------------------

   procedure Free is new Unchecked_Deallocation
     (Basic_8bit_String, Basic_8bit_String_Access);

private
   pragma Inline (Width);
end Unicode.CES.Basic_8bit;