File: unicode-ces-basic_8bit.ads

package info (click to toggle)
libxmlada1 1.0-2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 2,704 kB
  • ctags: 94
  • sloc: ada: 22,582; sh: 1,804; makefile: 142; xml: 140; perl: 128
file content (128 lines) | stat: -rw-r--r-- 5,327 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
-----------------------------------------------------------------------
--                XML/Ada - An XML suite for Ada95                   --
--                                                                   --
--                       Copyright (C) 2001-2002                     --
--                            ACT-Europe                             --
--                                                                   --
-- This library is free software; you can redistribute it and/or     --
-- modify it under the terms of the GNU General Public               --
-- License as published by the Free Software Foundation; either      --
-- version 2 of the License, or (at your option) any later version.  --
--                                                                   --
-- This library is distributed in the hope that it will be useful,   --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of    --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU --
-- General Public License for more details.                          --
--                                                                   --
-- You should have received a copy of the GNU General Public         --
-- License along with this library; if not, write to the             --
-- Free Software Foundation, Inc., 59 Temple Place - Suite 330,      --
-- Boston, MA 02111-1307, USA.                                       --
--                                                                   --
-- As a special exception, if other files instantiate generics from  --
-- this unit, or you link this unit with other files to produce an   --
-- executable, this  unit  does not  by itself cause  the resulting  --
-- executable to be covered by the GNU General Public License. This  --
-- exception does not however invalidate any other reasons why the   --
-- executable file  might be covered by the  GNU Public License.     --
-----------------------------------------------------------------------

--  This package implements a basic 8bit encoding.
--  Only code points from 16#00# to 16#FF# can be encoded in such strings.
--  These are the standard Ada Strings.
--
--  However, then can be used to read files that contain accented characters,
--  in combination with Unicode.CCS.Iso_8859_1 for instance

with Unicode.CES.Utf32;
with Unicode.CCS;
with Unchecked_Deallocation;

package Unicode.CES.Basic_8bit is

   -----------
   -- Types --
   -----------

   subtype Basic_8bit_String is String;
   type Basic_8bit_String_Access is access Basic_8bit_String;
   --  A heigh bit string, undefined byte-order

   -------------------------------------------
   -- Conversion to and from byte sequences --
   -------------------------------------------

   procedure Encode
     (Char   : Unicode_Char;
      Output : in out Byte_Sequence;
      Index  : in out Natural);
   --  Return the byte sequence representing Char in the 8bit character
   --  encoding form
   --  Invalid_Encoding is raised if Char can not be converted.

   procedure Read
     (Str   : Basic_8bit_String;
      Index : in out Positive;
      Char  : out Unicode_Char);
   --  Return the character starting at location Index in Str

   function Width (Char : Unicode_Char) return Natural;
   --  Return the number of bytes occupied by the 8bit representation of Char

   function Length (Str : Basic_8bit_String) return Natural;
   --  Return the number of characters in Str

   ------------------------------------------
   -- Conversion to and from 8bit-encoding --
   ------------------------------------------

   function From_Utf32
     (Str : Unicode.CES.Utf32.Utf32_LE_String)
      return Basic_8bit_String;
   --  Return a new string, from a utf32-encoded string.

   function To_Utf32
     (Str : Basic_8bit_String)
      return Unicode.CES.Utf32.Utf32_LE_String;
   --  Return a new utf32-encoded string, from a standard Ada string.

   ---------------------------------------------
   -- Byte order and character set conversion --
   ---------------------------------------------

   function To_Unicode_LE
     (Str   : Basic_8bit_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Basic_8bit_String;
   --  Convert Str to a Unicode string, assuming it contains code points from
   --  the character set CS.
   --  Byte-order is irrelevant for 8bit strings, but is kept for interface
   --  compatibility with other similar functions

   function To_CS
     (Str   : Basic_8bit_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Basic_8bit_String;
   --  Convert Str to the character set Cs, assuming it contains Unicode
   --  characters.

   ---------------------
   -- Encoding Scheme --
   ---------------------

   Basic_8bit_Encoding : constant Encoding_Scheme :=
     (Read   => Read'Access,
      Width  => Width'Access,
      Encode => Encode_Function'(Encode'Access),
      Length => Length'Access);

   ------------------
   -- Deallocation --
   ------------------

   procedure Free is new Unchecked_Deallocation
     (Basic_8bit_String, Basic_8bit_String_Access);

private
   pragma Inline (Width);
end Unicode.CES.Basic_8bit;